From 666cba599e073a2c55dbb8665d24cb757aeca86c Mon Sep 17 00:00:00 2001 From: Joel Date: Sun, 1 Feb 2026 17:05:07 -0600 Subject: [PATCH 01/14] =?UTF-8?q?Phase=202:=20Code=20agent=20commands=20?= =?UTF-8?q?=E2=80=94=20Rust=20foundation=20+=20TS=20commands=20+=20ts-rs?= =?UTF-8?q?=20type=20gen?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rust Foundation (continuum-core/src/code/): - FileEngine: read/write/edit/delete with per-persona workspace scoping - ChangeGraph: DAG of ChangeNodes with undo via reverse diff - DiffEngine: unified diff computation (similar crate) - PathSecurity: workspace isolation, path traversal guard, extension allowlist - CodeSearch: regex + glob search with .gitignore support (ignore crate) - Tree: recursive directory tree generation - GitBridge: git status and diff operations - IPC handlers for all 12 code/* endpoints (359 tests passing) TypeScript Commands (8 generated via CommandGenerator): - code/read, code/write, code/edit, code/diff - code/search, code/tree, code/undo, code/history - Each with Types.ts, ServerCommand.ts, BrowserCommand.ts, README, tests Type Safety (ts-rs single source of truth): - 14 Rust types exported via #[derive(TS)] β†’ shared/generated/code/ - Zero hand-written wire type duplicates - All object/any casts eliminated from code/* commands - CommandParams.userId used as canonical identity field RAG Integration: - CodeToolSource: dynamic coding workflow guidance in persona system prompts - Only shows tools persona has permission to use - Budget-aware with minimal fallback - 15 unit tests passing Infrastructure fixes: - PersonaToolExecutor now injects userId (standard CommandParams field) - CLAUDE.md documents ts-rs pattern and regeneration workflow --- CLAUDE.md | 86 ++ src/debug/jtag/commands/code/diff/.npmignore | 20 + src/debug/jtag/commands/code/diff/README.md | 159 +++ .../diff/browser/CodeDiffBrowserCommand.ts | 21 + .../jtag/commands/code/diff/package.json | 35 + .../code/diff/server/CodeDiffServerCommand.ts | 80 ++ .../code/diff/shared/CodeDiffTypes.ts | 128 +++ .../integration/CodeDiffIntegration.test.ts | 196 ++++ .../diff/test/unit/CodeDiffCommand.test.ts | 259 +++++ src/debug/jtag/commands/code/edit/.npmignore | 20 + src/debug/jtag/commands/code/edit/README.md | 180 ++++ .../edit/browser/CodeEditBrowserCommand.ts | 21 + .../jtag/commands/code/edit/package.json | 35 + .../code/edit/server/CodeEditServerCommand.ts | 84 ++ .../code/edit/shared/CodeEditTypes.ts | 143 +++ .../integration/CodeEditIntegration.test.ts | 196 ++++ .../edit/test/unit/CodeEditCommand.test.ts | 259 +++++ .../jtag/commands/code/history/.npmignore | 20 + .../jtag/commands/code/history/README.md | 158 +++ .../browser/CodeHistoryBrowserCommand.ts | 21 + .../jtag/commands/code/history/package.json | 35 + .../server/CodeHistoryServerCommand.ts | 38 + .../code/history/shared/CodeHistoryTypes.ts | 96 ++ .../CodeHistoryIntegration.test.ts | 196 ++++ .../test/unit/CodeHistoryCommand.test.ts | 259 +++++ src/debug/jtag/commands/code/read/.npmignore | 20 + src/debug/jtag/commands/code/read/README.md | 164 ++++ .../read/browser/CodeReadBrowserCommand.ts | 21 + .../jtag/commands/code/read/package.json | 35 + .../code/read/server/CodeReadServerCommand.ts | 51 + .../code/read/shared/CodeReadTypes.ts | 124 +++ .../integration/CodeReadIntegration.test.ts | 196 ++++ .../read/test/unit/CodeReadCommand.test.ts | 259 +++++ .../jtag/commands/code/search/.npmignore | 20 + src/debug/jtag/commands/code/search/README.md | 160 ++++ .../browser/CodeSearchBrowserCommand.ts | 21 + .../jtag/commands/code/search/package.json | 35 + .../search/server/CodeSearchServerCommand.ts | 48 + .../code/search/shared/CodeSearchTypes.ts | 105 ++ .../integration/CodeSearchIntegration.test.ts | 196 ++++ .../test/unit/CodeSearchCommand.test.ts | 259 +++++ src/debug/jtag/commands/code/tree/.npmignore | 20 + src/debug/jtag/commands/code/tree/README.md | 160 ++++ .../tree/browser/CodeTreeBrowserCommand.ts | 21 + .../jtag/commands/code/tree/package.json | 35 + .../code/tree/server/CodeTreeServerCommand.ts | 40 + .../code/tree/shared/CodeTreeTypes.ts | 106 ++ .../integration/CodeTreeIntegration.test.ts | 196 ++++ .../tree/test/unit/CodeTreeCommand.test.ts | 259 +++++ src/debug/jtag/commands/code/undo/.npmignore | 20 + src/debug/jtag/commands/code/undo/README.md | 163 ++++ .../undo/browser/CodeUndoBrowserCommand.ts | 21 + .../jtag/commands/code/undo/package.json | 35 + .../code/undo/server/CodeUndoServerCommand.ts | 43 + .../code/undo/shared/CodeUndoTypes.ts | 91 ++ .../integration/CodeUndoIntegration.test.ts | 196 ++++ .../undo/test/unit/CodeUndoCommand.test.ts | 259 +++++ src/debug/jtag/commands/code/write/.npmignore | 20 + src/debug/jtag/commands/code/write/README.md | 154 +++ .../write/browser/CodeWriteBrowserCommand.ts | 21 + .../jtag/commands/code/write/package.json | 35 + .../write/server/CodeWriteServerCommand.ts | 54 ++ .../code/write/shared/CodeWriteTypes.ts | 103 ++ .../integration/CodeWriteIntegration.test.ts | 196 ++++ .../write/test/unit/CodeWriteCommand.test.ts | 259 +++++ .../code-daemon/server/CodeDaemonServer.ts | 73 +- .../daemons/code-daemon/shared/CodeDaemon.ts | 99 +- .../code-daemon/shared/CodeDaemonTypes.ts | 17 + src/debug/jtag/generated-command-schemas.json | 2 +- .../jtag/shared/generated/code/ChangeNode.ts | 44 + .../jtag/shared/generated/code/DiffHunk.ts | 10 + .../jtag/shared/generated/code/EditMode.ts | 6 + .../jtag/shared/generated/code/FileDiff.ts | 15 + .../shared/generated/code/FileOperation.ts | 6 + .../shared/generated/code/GitStatusInfo.ts | 6 + .../shared/generated/code/HistoryResult.ts | 7 + .../jtag/shared/generated/code/ReadResult.ts | 6 + .../jtag/shared/generated/code/SearchMatch.ts | 6 + .../shared/generated/code/SearchResult.ts | 7 + .../jtag/shared/generated/code/TreeNode.ts | 6 + .../jtag/shared/generated/code/TreeResult.ts | 7 + .../jtag/shared/generated/code/UndoResult.ts | 7 + .../jtag/shared/generated/code/WriteResult.ts | 10 + src/debug/jtag/shared/generated/code/index.ts | 28 + src/debug/jtag/shared/generated/index.ts | 3 + .../system/rag/builders/ChatRAGBuilder.ts | 26 +- .../jtag/system/rag/sources/CodeToolSource.ts | 209 ++++ src/debug/jtag/system/rag/sources/index.ts | 1 + .../jtag/system/user/server/PersonaUser.ts | 6 +- .../server/modules/PersonaMessageEvaluator.ts | 193 ++-- .../modules/PersonaResponseGenerator.ts | 59 +- .../server/modules/PersonaToolExecutor.ts | 34 +- .../tests/unit/rag/CodeToolSource.test.ts | 307 ++++++ .../jtag/workers/continuum-core/Cargo.toml | 6 + .../continuum-core/bindings/RustCoreIPC.ts | 295 ++++++ .../continuum-core/src/code/change_graph.rs | 427 +++++++++ .../continuum-core/src/code/diff_engine.rs | 175 ++++ .../continuum-core/src/code/file_engine.rs | 905 ++++++++++++++++++ .../continuum-core/src/code/git_bridge.rs | 204 ++++ .../workers/continuum-core/src/code/mod.rs | 27 + .../continuum-core/src/code/path_security.rs | 420 ++++++++ .../workers/continuum-core/src/code/search.rs | 221 +++++ .../workers/continuum-core/src/code/tree.rs | 305 ++++++ .../workers/continuum-core/src/code/types.rs | 239 +++++ .../workers/continuum-core/src/ipc/mod.rs | 382 ++++++++ .../jtag/workers/continuum-core/src/lib.rs | 1 + 106 files changed, 11625 insertions(+), 148 deletions(-) create mode 100644 src/debug/jtag/commands/code/diff/.npmignore create mode 100644 src/debug/jtag/commands/code/diff/README.md create mode 100644 src/debug/jtag/commands/code/diff/browser/CodeDiffBrowserCommand.ts create mode 100644 src/debug/jtag/commands/code/diff/package.json create mode 100644 src/debug/jtag/commands/code/diff/server/CodeDiffServerCommand.ts create mode 100644 src/debug/jtag/commands/code/diff/shared/CodeDiffTypes.ts create mode 100644 src/debug/jtag/commands/code/diff/test/integration/CodeDiffIntegration.test.ts create mode 100644 src/debug/jtag/commands/code/diff/test/unit/CodeDiffCommand.test.ts create mode 100644 src/debug/jtag/commands/code/edit/.npmignore create mode 100644 src/debug/jtag/commands/code/edit/README.md create mode 100644 src/debug/jtag/commands/code/edit/browser/CodeEditBrowserCommand.ts create mode 100644 src/debug/jtag/commands/code/edit/package.json create mode 100644 src/debug/jtag/commands/code/edit/server/CodeEditServerCommand.ts create mode 100644 src/debug/jtag/commands/code/edit/shared/CodeEditTypes.ts create mode 100644 src/debug/jtag/commands/code/edit/test/integration/CodeEditIntegration.test.ts create mode 100644 src/debug/jtag/commands/code/edit/test/unit/CodeEditCommand.test.ts create mode 100644 src/debug/jtag/commands/code/history/.npmignore create mode 100644 src/debug/jtag/commands/code/history/README.md create mode 100644 src/debug/jtag/commands/code/history/browser/CodeHistoryBrowserCommand.ts create mode 100644 src/debug/jtag/commands/code/history/package.json create mode 100644 src/debug/jtag/commands/code/history/server/CodeHistoryServerCommand.ts create mode 100644 src/debug/jtag/commands/code/history/shared/CodeHistoryTypes.ts create mode 100644 src/debug/jtag/commands/code/history/test/integration/CodeHistoryIntegration.test.ts create mode 100644 src/debug/jtag/commands/code/history/test/unit/CodeHistoryCommand.test.ts create mode 100644 src/debug/jtag/commands/code/read/.npmignore create mode 100644 src/debug/jtag/commands/code/read/README.md create mode 100644 src/debug/jtag/commands/code/read/browser/CodeReadBrowserCommand.ts create mode 100644 src/debug/jtag/commands/code/read/package.json create mode 100644 src/debug/jtag/commands/code/read/server/CodeReadServerCommand.ts create mode 100644 src/debug/jtag/commands/code/read/shared/CodeReadTypes.ts create mode 100644 src/debug/jtag/commands/code/read/test/integration/CodeReadIntegration.test.ts create mode 100644 src/debug/jtag/commands/code/read/test/unit/CodeReadCommand.test.ts create mode 100644 src/debug/jtag/commands/code/search/.npmignore create mode 100644 src/debug/jtag/commands/code/search/README.md create mode 100644 src/debug/jtag/commands/code/search/browser/CodeSearchBrowserCommand.ts create mode 100644 src/debug/jtag/commands/code/search/package.json create mode 100644 src/debug/jtag/commands/code/search/server/CodeSearchServerCommand.ts create mode 100644 src/debug/jtag/commands/code/search/shared/CodeSearchTypes.ts create mode 100644 src/debug/jtag/commands/code/search/test/integration/CodeSearchIntegration.test.ts create mode 100644 src/debug/jtag/commands/code/search/test/unit/CodeSearchCommand.test.ts create mode 100644 src/debug/jtag/commands/code/tree/.npmignore create mode 100644 src/debug/jtag/commands/code/tree/README.md create mode 100644 src/debug/jtag/commands/code/tree/browser/CodeTreeBrowserCommand.ts create mode 100644 src/debug/jtag/commands/code/tree/package.json create mode 100644 src/debug/jtag/commands/code/tree/server/CodeTreeServerCommand.ts create mode 100644 src/debug/jtag/commands/code/tree/shared/CodeTreeTypes.ts create mode 100644 src/debug/jtag/commands/code/tree/test/integration/CodeTreeIntegration.test.ts create mode 100644 src/debug/jtag/commands/code/tree/test/unit/CodeTreeCommand.test.ts create mode 100644 src/debug/jtag/commands/code/undo/.npmignore create mode 100644 src/debug/jtag/commands/code/undo/README.md create mode 100644 src/debug/jtag/commands/code/undo/browser/CodeUndoBrowserCommand.ts create mode 100644 src/debug/jtag/commands/code/undo/package.json create mode 100644 src/debug/jtag/commands/code/undo/server/CodeUndoServerCommand.ts create mode 100644 src/debug/jtag/commands/code/undo/shared/CodeUndoTypes.ts create mode 100644 src/debug/jtag/commands/code/undo/test/integration/CodeUndoIntegration.test.ts create mode 100644 src/debug/jtag/commands/code/undo/test/unit/CodeUndoCommand.test.ts create mode 100644 src/debug/jtag/commands/code/write/.npmignore create mode 100644 src/debug/jtag/commands/code/write/README.md create mode 100644 src/debug/jtag/commands/code/write/browser/CodeWriteBrowserCommand.ts create mode 100644 src/debug/jtag/commands/code/write/package.json create mode 100644 src/debug/jtag/commands/code/write/server/CodeWriteServerCommand.ts create mode 100644 src/debug/jtag/commands/code/write/shared/CodeWriteTypes.ts create mode 100644 src/debug/jtag/commands/code/write/test/integration/CodeWriteIntegration.test.ts create mode 100644 src/debug/jtag/commands/code/write/test/unit/CodeWriteCommand.test.ts create mode 100644 src/debug/jtag/shared/generated/code/ChangeNode.ts create mode 100644 src/debug/jtag/shared/generated/code/DiffHunk.ts create mode 100644 src/debug/jtag/shared/generated/code/EditMode.ts create mode 100644 src/debug/jtag/shared/generated/code/FileDiff.ts create mode 100644 src/debug/jtag/shared/generated/code/FileOperation.ts create mode 100644 src/debug/jtag/shared/generated/code/GitStatusInfo.ts create mode 100644 src/debug/jtag/shared/generated/code/HistoryResult.ts create mode 100644 src/debug/jtag/shared/generated/code/ReadResult.ts create mode 100644 src/debug/jtag/shared/generated/code/SearchMatch.ts create mode 100644 src/debug/jtag/shared/generated/code/SearchResult.ts create mode 100644 src/debug/jtag/shared/generated/code/TreeNode.ts create mode 100644 src/debug/jtag/shared/generated/code/TreeResult.ts create mode 100644 src/debug/jtag/shared/generated/code/UndoResult.ts create mode 100644 src/debug/jtag/shared/generated/code/WriteResult.ts create mode 100644 src/debug/jtag/shared/generated/code/index.ts create mode 100644 src/debug/jtag/system/rag/sources/CodeToolSource.ts create mode 100644 src/debug/jtag/tests/unit/rag/CodeToolSource.test.ts create mode 100644 src/debug/jtag/workers/continuum-core/src/code/change_graph.rs create mode 100644 src/debug/jtag/workers/continuum-core/src/code/diff_engine.rs create mode 100644 src/debug/jtag/workers/continuum-core/src/code/file_engine.rs create mode 100644 src/debug/jtag/workers/continuum-core/src/code/git_bridge.rs create mode 100644 src/debug/jtag/workers/continuum-core/src/code/mod.rs create mode 100644 src/debug/jtag/workers/continuum-core/src/code/path_security.rs create mode 100644 src/debug/jtag/workers/continuum-core/src/code/search.rs create mode 100644 src/debug/jtag/workers/continuum-core/src/code/tree.rs create mode 100644 src/debug/jtag/workers/continuum-core/src/code/types.rs diff --git a/CLAUDE.md b/CLAUDE.md index 76c1dfc1d..a1988cfca 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -619,6 +619,92 @@ const result = await this.executeCommand>('data/list' --- +## πŸ¦€ RUST β†’ TYPESCRIPT TYPE BOUNDARIES (ts-rs) + +**Single source of truth: Rust defines wire types, ts-rs generates TypeScript. NEVER hand-write duplicate types.** + +### How It Works + +1. **Rust struct** with `#[derive(TS)]` defines the canonical type +2. **ts-rs macro** generates TypeScript `export type` at compile time +3. **TypeScript** imports from `shared/generated/` β€” no manual duplication +4. **Serde** handles JSON serialization on both sides + +### Pattern + +```rust +// Rust (source of truth) +use ts_rs::TS; + +#[derive(Debug, Clone, Serialize, Deserialize, TS)] +#[ts(export, export_to = "../../../shared/generated/code/WriteResult.ts")] +pub struct WriteResult { + pub success: bool, + #[ts(optional)] + pub change_id: Option, + pub file_path: String, + #[ts(type = "number")] // u64 β†’ number (not bigint) + pub bytes_written: u64, + #[ts(optional)] + pub error: Option, +} +``` + +```typescript +// TypeScript (generated β€” DO NOT EDIT) +export type WriteResult = { success: boolean, change_id?: string, file_path: string, bytes_written: number, error?: string }; + +// Consuming code imports from generated barrel +import type { WriteResult, ReadResult, EditMode } from '@shared/generated/code'; +``` + +### ts-rs Attribute Reference + +| Attribute | Purpose | Example | +|-----------|---------|---------| +| `#[ts(export)]` | Mark for TS generation | `#[derive(TS)] #[ts(export)]` | +| `#[ts(export_to = "path")]` | Output file path (relative to `bindings/`) | `"../../../shared/generated/code/X.ts"` | +| `#[ts(type = "string")]` | Override TS type for field | Uuid β†’ string | +| `#[ts(type = "number")]` | Override TS type for field | u64 β†’ number | +| `#[ts(optional)]` | Mark as optional in TS | Option β†’ `field?: T` | +| `#[ts(type = "Array")]` | Complex type mapping | Vec β†’ Array | + +### Regenerating Bindings + +```bash +cargo test --package continuum-core --lib # Generates all *.ts in shared/generated/ +``` + +### Generated Output Structure + +``` +shared/generated/ +β”œβ”€β”€ index.ts # Barrel export (re-exports all modules) +β”œβ”€β”€ code/ # Code module (file ops, change graph, search, tree) +β”‚ β”œβ”€β”€ index.ts +β”‚ β”œβ”€β”€ ChangeNode.ts, EditMode.ts, WriteResult.ts, ReadResult.ts, ... +β”œβ”€β”€ persona/ # Persona cognition (state, inbox, channels) +β”‚ β”œβ”€β”€ index.ts +β”‚ β”œβ”€β”€ PersonaState.ts, InboxMessage.ts, CognitionDecision.ts, ... +β”œβ”€β”€ rag/ # RAG pipeline (context, messages, options) +β”‚ β”œβ”€β”€ index.ts +β”‚ β”œβ”€β”€ RagContext.ts, LlmMessage.ts, ... +└── ipc/ # IPC protocol types + β”œβ”€β”€ index.ts + └── InboxMessageRequest.ts +``` + +### Rules (Non-Negotiable) + +1. **NEVER hand-write types that cross the Rust↔TS boundary** β€” add `#[derive(TS)]` to the Rust struct +2. **NEVER use `object`, `any`, `unknown`, or `Record`** for Rust wire types β€” import the generated type +3. **IDs are `UUID`** (from `CrossPlatformUUID`) β€” never plain `string` for identity fields +4. **Use `CommandParams.userId`** for caller identity β€” it's already on the base type, auto-injected by infrastructure +5. **Barrel exports** β€” every generated module has an `index.ts`; import from the barrel, not individual files +6. **Regenerate after Rust changes** β€” `cargo test` triggers ts-rs macro; commit both Rust and generated TS + +--- + ## πŸ“ PATH ALIASES (New! Use These Going Forward) **TypeScript path aliases are now configured** to eliminate relative import hell (`../../../../`). diff --git a/src/debug/jtag/commands/code/diff/.npmignore b/src/debug/jtag/commands/code/diff/.npmignore new file mode 100644 index 000000000..f74ad6b8a --- /dev/null +++ b/src/debug/jtag/commands/code/diff/.npmignore @@ -0,0 +1,20 @@ +# Development files +.eslintrc* +tsconfig*.json +vitest.config.ts + +# Build artifacts +*.js.map +*.d.ts.map + +# IDE +.vscode/ +.idea/ + +# Logs +*.log +npm-debug.log* + +# OS files +.DS_Store +Thumbs.db diff --git a/src/debug/jtag/commands/code/diff/README.md b/src/debug/jtag/commands/code/diff/README.md new file mode 100644 index 000000000..859e3a386 --- /dev/null +++ b/src/debug/jtag/commands/code/diff/README.md @@ -0,0 +1,159 @@ +# Code Diff Command + +Preview an edit as a unified diff without applying it. Useful for reviewing changes before committing them. Uses the same edit modes as code/edit. + +## Table of Contents + +- [Usage](#usage) + - [CLI Usage](#cli-usage) + - [Tool Usage](#tool-usage) +- [Parameters](#parameters) +- [Result](#result) +- [Examples](#examples) +- [Testing](#testing) + - [Unit Tests](#unit-tests) + - [Integration Tests](#integration-tests) +- [Getting Help](#getting-help) +- [Access Level](#access-level) +- [Implementation Notes](#implementation-notes) + +## Usage + +### CLI Usage + +From the command line using the jtag CLI: + +```bash +./jtag code/diff --filePath= --editType= +``` + +### Tool Usage + +From Persona tools or programmatic access using `Commands.execute()`: + +```typescript +import { Commands } from '@system/core/shared/Commands'; + +const result = await Commands.execute('code/diff', { + // your parameters here +}); +``` + +## Parameters + +- **filePath** (required): `string` - Relative path to file within workspace +- **editType** (required): `string` - Edit mode: 'search_replace', 'line_range', 'insert_at', or 'append' +- **search** (optional): `string` - Text to find (for search_replace mode) +- **replace** (optional): `string` - Replacement text (for search_replace mode) +- **replaceAll** (optional): `boolean` - Replace all occurrences (for search_replace mode) +- **startLine** (optional): `number` - Start line (for line_range mode) +- **endLine** (optional): `number` - End line (for line_range mode) +- **newContent** (optional): `string` - New content (for line_range mode) +- **line** (optional): `number` - Line number (for insert_at mode) +- **content** (optional): `string` - Content to insert or append + +## Result + +Returns `CodeDiffResult` with: + +Returns CommandResult with: +- **unified**: `string` - Unified diff text showing the proposed changes + +## Examples + +### Preview a search-replace diff + +```bash +./jtag code/diff --filePath="src/main.ts" --editType="search_replace" --search="console.log" --replace="logger.info" +``` + +## Getting Help + +### Using the Help Tool + +Get detailed usage information for this command: + +**CLI:** +```bash +./jtag help code/diff +``` + +**Tool:** +```typescript +// Use your help tool with command name 'code/diff' +``` + +### Using the README Tool + +Access this README programmatically: + +**CLI:** +```bash +./jtag readme code/diff +``` + +**Tool:** +```typescript +// Use your readme tool with command name 'code/diff' +``` + +## Testing + +### Unit Tests + +Test command logic in isolation using mock dependencies: + +```bash +# Run unit tests (no server required) +npx tsx commands/Code Diff/test/unit/CodeDiffCommand.test.ts +``` + +**What's tested:** +- Command structure and parameter validation +- Mock command execution patterns +- Required parameter validation (throws ValidationError) +- Optional parameter handling (sensible defaults) +- Performance requirements +- Assertion utility helpers + +**TDD Workflow:** +1. Write/modify unit test first (test-driven development) +2. Run test, see it fail +3. Implement feature +4. Run test, see it pass +5. Refactor if needed + +### Integration Tests + +Test command with real client connections and system integration: + +```bash +# Prerequisites: Server must be running +npm start # Wait 90+ seconds for deployment + +# Run integration tests +npx tsx commands/Code Diff/test/integration/CodeDiffIntegration.test.ts +``` + +**What's tested:** +- Client connection to live system +- Real command execution via WebSocket +- ValidationError handling for missing params +- Optional parameter defaults +- Performance under load +- Various parameter combinations + +**Best Practice:** +Run unit tests frequently during development (fast feedback). Run integration tests before committing (verify system integration). + +## Access Level + +**ai-safe** - Safe for AI personas to call autonomously + +## Implementation Notes + +- **Shared Logic**: Core business logic in `shared/CodeDiffTypes.ts` +- **Browser**: Browser-specific implementation in `browser/CodeDiffBrowserCommand.ts` +- **Server**: Server-specific implementation in `server/CodeDiffServerCommand.ts` +- **Unit Tests**: Isolated testing in `test/unit/CodeDiffCommand.test.ts` +- **Integration Tests**: System testing in `test/integration/CodeDiffIntegration.test.ts` diff --git a/src/debug/jtag/commands/code/diff/browser/CodeDiffBrowserCommand.ts b/src/debug/jtag/commands/code/diff/browser/CodeDiffBrowserCommand.ts new file mode 100644 index 000000000..9457b4a3f --- /dev/null +++ b/src/debug/jtag/commands/code/diff/browser/CodeDiffBrowserCommand.ts @@ -0,0 +1,21 @@ +/** + * Code Diff Command - Browser Implementation + * + * Preview an edit as a unified diff without applying it. Useful for reviewing changes before committing them. Uses the same edit modes as code/edit. + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import type { CodeDiffParams, CodeDiffResult } from '../shared/CodeDiffTypes'; + +export class CodeDiffBrowserCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('code/diff', context, subpath, commander); + } + + async execute(params: CodeDiffParams): Promise { + console.log('🌐 BROWSER: Delegating Code Diff to server'); + return await this.remoteExecute(params); + } +} diff --git a/src/debug/jtag/commands/code/diff/package.json b/src/debug/jtag/commands/code/diff/package.json new file mode 100644 index 000000000..6f042bfc9 --- /dev/null +++ b/src/debug/jtag/commands/code/diff/package.json @@ -0,0 +1,35 @@ +{ + "name": "@jtag-commands/code/diff", + "version": "1.0.0", + "description": "Preview an edit as a unified diff without applying it. Useful for reviewing changes before committing them. Uses the same edit modes as code/edit.", + "main": "server/CodeDiffServerCommand.ts", + "types": "shared/CodeDiffTypes.ts", + "scripts": { + "test": "npm run test:unit && npm run test:integration", + "test:unit": "npx vitest run test/unit/*.test.ts", + "test:integration": "npx tsx test/integration/CodeDiffIntegration.test.ts", + "lint": "npx eslint **/*.ts", + "typecheck": "npx tsc --noEmit" + }, + "peerDependencies": { + "@jtag/core": "*" + }, + "files": [ + "shared/**/*.ts", + "browser/**/*.ts", + "server/**/*.ts", + "test/**/*.ts", + "README.md" + ], + "keywords": [ + "jtag", + "command", + "code/diff" + ], + "license": "MIT", + "author": "", + "repository": { + "type": "git", + "url": "" + } +} diff --git a/src/debug/jtag/commands/code/diff/server/CodeDiffServerCommand.ts b/src/debug/jtag/commands/code/diff/server/CodeDiffServerCommand.ts new file mode 100644 index 000000000..e61522918 --- /dev/null +++ b/src/debug/jtag/commands/code/diff/server/CodeDiffServerCommand.ts @@ -0,0 +1,80 @@ +/** + * Code Diff Command - Server Implementation + * + * Preview an edit as a unified diff without applying it. + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import { ValidationError } from '@system/core/types/ErrorTypes'; +import type { CodeDiffParams, CodeDiffResult } from '../shared/CodeDiffTypes'; +import { createCodeDiffResultFromParams } from '../shared/CodeDiffTypes'; +import { CodeDaemon } from '@daemons/code-daemon/shared/CodeDaemon'; +import type { WorkspaceEditMode } from '@daemons/code-daemon/shared/CodeDaemonTypes'; + +export class CodeDiffServerCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('code/diff', context, subpath, commander); + } + + async execute(params: CodeDiffParams): Promise { + if (!params.filePath || params.filePath.trim() === '') { + throw new ValidationError( + 'filePath', + `Missing required parameter 'filePath'. See the code/diff README for usage.` + ); + } + if (!params.editType) { + throw new ValidationError( + 'editType', + `Missing required parameter 'editType'. Must be 'search_replace', 'line_range', 'insert_at', or 'append'.` + ); + } + + if (!params.userId) { + throw new ValidationError('userId', 'Workspace operations require a userId (auto-injected for persona tool calls).'); + } + const personaId = params.userId; + + const editMode = this.buildEditMode(params); + + const result = await CodeDaemon.workspaceDiff( + personaId, + params.filePath, + editMode + ); + + return createCodeDiffResultFromParams(params, { + success: result.success, + unified: result.unified, + }); + } + + private buildEditMode(params: CodeDiffParams): WorkspaceEditMode { + switch (params.editType) { + case 'search_replace': + if (!params.search) throw new ValidationError('search', `'search' is required for search_replace mode.`); + if (params.replace === undefined) throw new ValidationError('replace', `'replace' is required for search_replace mode.`); + return { type: 'search_replace', search: params.search, replace: params.replace, all: params.replaceAll ?? false }; + + case 'line_range': + if (!params.startLine) throw new ValidationError('startLine', `'startLine' is required for line_range mode.`); + if (!params.endLine) throw new ValidationError('endLine', `'endLine' is required for line_range mode.`); + if (params.newContent === undefined) throw new ValidationError('newContent', `'newContent' is required for line_range mode.`); + return { type: 'line_range', start_line: params.startLine, end_line: params.endLine, new_content: params.newContent }; + + case 'insert_at': + if (!params.line) throw new ValidationError('line', `'line' is required for insert_at mode.`); + if (params.content === undefined) throw new ValidationError('content', `'content' is required for insert_at mode.`); + return { type: 'insert_at', line: params.line, content: params.content }; + + case 'append': + if (params.content === undefined) throw new ValidationError('content', `'content' is required for append mode.`); + return { type: 'append', content: params.content }; + + default: + throw new ValidationError('editType', `Invalid editType '${params.editType}'.`); + } + } +} diff --git a/src/debug/jtag/commands/code/diff/shared/CodeDiffTypes.ts b/src/debug/jtag/commands/code/diff/shared/CodeDiffTypes.ts new file mode 100644 index 000000000..dd99414c6 --- /dev/null +++ b/src/debug/jtag/commands/code/diff/shared/CodeDiffTypes.ts @@ -0,0 +1,128 @@ +/** + * Code Diff Command - Shared Types + * + * Preview an edit as a unified diff without applying it. Useful for reviewing changes before committing them. Uses the same edit modes as code/edit. + */ + +import type { CommandParams, CommandResult, CommandInput, JTAGContext } from '@system/core/types/JTAGTypes'; +import { createPayload, transformPayload } from '@system/core/types/JTAGTypes'; +import { Commands } from '@system/core/shared/Commands'; +import type { JTAGError } from '@system/core/types/ErrorTypes'; +import type { UUID } from '@system/core/types/CrossPlatformUUID'; + +/** + * Code Diff Command Parameters + */ +export interface CodeDiffParams extends CommandParams { + // Relative path to file within workspace + filePath: string; + // Edit mode: 'search_replace', 'line_range', 'insert_at', or 'append' + editType: string; + // Text to find (for search_replace mode) + search?: string; + // Replacement text (for search_replace mode) + replace?: string; + // Replace all occurrences (for search_replace mode) + replaceAll?: boolean; + // Start line (for line_range mode) + startLine?: number; + // End line (for line_range mode) + endLine?: number; + // New content (for line_range mode) + newContent?: string; + // Line number (for insert_at mode) + line?: number; + // Content to insert or append + content?: string; +} + +/** + * Factory function for creating CodeDiffParams + */ +export const createCodeDiffParams = ( + context: JTAGContext, + sessionId: UUID, + data: { + // Relative path to file within workspace + filePath: string; + // Edit mode: 'search_replace', 'line_range', 'insert_at', or 'append' + editType: string; + // Text to find (for search_replace mode) + search?: string; + // Replacement text (for search_replace mode) + replace?: string; + // Replace all occurrences (for search_replace mode) + replaceAll?: boolean; + // Start line (for line_range mode) + startLine?: number; + // End line (for line_range mode) + endLine?: number; + // New content (for line_range mode) + newContent?: string; + // Line number (for insert_at mode) + line?: number; + // Content to insert or append + content?: string; + } +): CodeDiffParams => createPayload(context, sessionId, { + search: data.search ?? '', + replace: data.replace ?? '', + replaceAll: data.replaceAll ?? false, + startLine: data.startLine ?? 0, + endLine: data.endLine ?? 0, + newContent: data.newContent ?? '', + line: data.line ?? 0, + content: data.content ?? '', + ...data +}); + +/** + * Code Diff Command Result + */ +export interface CodeDiffResult extends CommandResult { + success: boolean; + // Unified diff text showing the proposed changes + unified: string; + error?: JTAGError; +} + +/** + * Factory function for creating CodeDiffResult with defaults + */ +export const createCodeDiffResult = ( + context: JTAGContext, + sessionId: UUID, + data: { + success: boolean; + // Unified diff text showing the proposed changes + unified?: string; + error?: JTAGError; + } +): CodeDiffResult => createPayload(context, sessionId, { + unified: data.unified ?? '', + ...data +}); + +/** + * Smart Code Diff-specific inheritance from params + * Auto-inherits context and sessionId from params + * Must provide all required result fields + */ +export const createCodeDiffResultFromParams = ( + params: CodeDiffParams, + differences: Omit +): CodeDiffResult => transformPayload(params, differences); + +/** + * Code Diff β€” Type-safe command executor + * + * Usage: + * import { CodeDiff } from '...shared/CodeDiffTypes'; + * const result = await CodeDiff.execute({ ... }); + */ +export const CodeDiff = { + execute(params: CommandInput): Promise { + return Commands.execute('code/diff', params as Partial); + }, + commandName: 'code/diff' as const, +} as const; diff --git a/src/debug/jtag/commands/code/diff/test/integration/CodeDiffIntegration.test.ts b/src/debug/jtag/commands/code/diff/test/integration/CodeDiffIntegration.test.ts new file mode 100644 index 000000000..bf1bd05c4 --- /dev/null +++ b/src/debug/jtag/commands/code/diff/test/integration/CodeDiffIntegration.test.ts @@ -0,0 +1,196 @@ +#!/usr/bin/env tsx +/** + * CodeDiff Command Integration Tests + * + * Tests Code Diff command against the LIVE RUNNING SYSTEM. + * This is NOT a mock test - it tests real commands, real events, real widgets. + * + * Generated by: ./jtag generate + * Run with: npx tsx commands/Code Diff/test/integration/CodeDiffIntegration.test.ts + * + * PREREQUISITES: + * - Server must be running: npm start (wait 90+ seconds) + * - Browser client connected via http://localhost:9003 + */ + +import { jtag } from '@server/server-index'; + +console.log('πŸ§ͺ CodeDiff Command Integration Tests'); + +function assert(condition: boolean, message: string): void { + if (!condition) { + throw new Error(`❌ Assertion failed: ${message}`); + } + console.log(`βœ… ${message}`); +} + +/** + * Test 1: Connect to live system + */ +async function testSystemConnection(): Promise>> { + console.log('\nπŸ”Œ Test 1: Connecting to live JTAG system'); + + const client = await jtag.connect(); + + assert(client !== null, 'Connected to live system'); + console.log(' βœ… Connected successfully'); + + return client; +} + +/** + * Test 2: Execute Code Diff command on live system + */ +async function testCommandExecution(client: Awaited>): Promise { + console.log('\n⚑ Test 2: Executing Code Diff command'); + + // TODO: Replace with your actual command parameters + const result = await client.commands['Code Diff']({ + // Add your required parameters here + // Example: name: 'test-value' + }); + + console.log(' πŸ“Š Result:', JSON.stringify(result, null, 2)); + + assert(result !== null, 'Code Diff returned result'); + // TODO: Add assertions for your specific result fields + // assert(result.success === true, 'Code Diff succeeded'); + // assert(result.yourField !== undefined, 'Result has yourField'); +} + +/** + * Test 3: Validate required parameters + */ +async function testRequiredParameters(_client: Awaited>): Promise { + console.log('\n🚨 Test 3: Testing required parameter validation'); + + // TODO: Uncomment and test missing required parameters + // try { + // await _client.commands['Code Diff']({ + // // Missing required param + // }); + // assert(false, 'Should have thrown validation error'); + // } catch (error) { + // assert((error as Error).message.includes('required'), 'Error mentions required parameter'); + // console.log(' βœ… ValidationError thrown correctly'); + // } + + console.log(' ⚠️ TODO: Add required parameter validation test'); +} + +/** + * Test 4: Test optional parameters + */ +async function testOptionalParameters(_client: Awaited>): Promise { + console.log('\nπŸ”§ Test 4: Testing optional parameters'); + + // TODO: Uncomment to test with and without optional parameters + // const withOptional = await client.commands['Code Diff']({ + // requiredParam: 'test', + // optionalParam: true + // }); + // + // const withoutOptional = await client.commands['Code Diff']({ + // requiredParam: 'test' + // }); + // + // assert(withOptional.success === true, 'Works with optional params'); + // assert(withoutOptional.success === true, 'Works without optional params'); + + console.log(' ⚠️ TODO: Add optional parameter tests'); +} + +/** + * Test 5: Performance test + */ +async function testPerformance(_client: Awaited>): Promise { + console.log('\n⚑ Test 5: Performance under load'); + + // TODO: Uncomment to test command performance + // const iterations = 10; + // const times: number[] = []; + // + // for (let i = 0; i < iterations; i++) { + // const start = Date.now(); + // await _client.commands['Code Diff']({ /* params */ }); + // times.push(Date.now() - start); + // } + // + // const avg = times.reduce((a, b) => a + b, 0) / iterations; + // const max = Math.max(...times); + // + // console.log(` Average: ${avg.toFixed(2)}ms`); + // console.log(` Max: ${max}ms`); + // + // assert(avg < 500, `Average ${avg.toFixed(2)}ms under 500ms`); + // assert(max < 1000, `Max ${max}ms under 1000ms`); + + console.log(' ⚠️ TODO: Add performance test'); +} + +/** + * Test 6: Widget/Event integration (if applicable) + */ +async function testWidgetIntegration(_client: Awaited>): Promise { + console.log('\n🎨 Test 6: Widget/Event integration'); + + // TODO: Uncomment if your command emits events or updates widgets + // Example: + // const before = await client.commands['debug/widget-state']({ widgetSelector: 'your-widget' }); + // await client.commands['Code Diff']({ /* params */ }); + // await new Promise(resolve => setTimeout(resolve, 1000)); // Wait for event propagation + // const after = await client.commands['debug/widget-state']({ widgetSelector: 'your-widget' }); + // + // assert(after.state.someValue !== before.state.someValue, 'Widget state updated'); + + console.log(' ⚠️ TODO: Add widget/event integration test (if applicable)'); +} + +/** + * Run all integration tests + */ +async function runAllCodeDiffIntegrationTests(): Promise { + console.log('πŸš€ Starting CodeDiff Integration Tests\n'); + console.log('πŸ“‹ Testing against LIVE system (not mocks)\n'); + + try { + const client = await testSystemConnection(); + await testCommandExecution(client); + await testRequiredParameters(client); + await testOptionalParameters(client); + await testPerformance(client); + await testWidgetIntegration(client); + + console.log('\nπŸŽ‰ ALL CodeDiff INTEGRATION TESTS PASSED!'); + console.log('πŸ“‹ Validated:'); + console.log(' βœ… Live system connection'); + console.log(' βœ… Command execution on real system'); + console.log(' βœ… Parameter validation'); + console.log(' βœ… Optional parameter handling'); + console.log(' βœ… Performance benchmarks'); + console.log(' βœ… Widget/Event integration'); + console.log('\nπŸ’‘ NOTE: This test uses the REAL running system'); + console.log(' - Real database operations'); + console.log(' - Real event propagation'); + console.log(' - Real widget updates'); + console.log(' - Real cross-daemon communication'); + + } catch (error) { + console.error('\n❌ CodeDiff integration tests failed:', (error as Error).message); + if ((error as Error).stack) { + console.error((error as Error).stack); + } + console.error('\nπŸ’‘ Make sure:'); + console.error(' 1. Server is running: npm start'); + console.error(' 2. Wait 90+ seconds for deployment'); + console.error(' 3. Browser is connected to http://localhost:9003'); + process.exit(1); + } +} + +// Run if called directly +if (require.main === module) { + void runAllCodeDiffIntegrationTests(); +} else { + module.exports = { runAllCodeDiffIntegrationTests }; +} diff --git a/src/debug/jtag/commands/code/diff/test/unit/CodeDiffCommand.test.ts b/src/debug/jtag/commands/code/diff/test/unit/CodeDiffCommand.test.ts new file mode 100644 index 000000000..a4527d409 --- /dev/null +++ b/src/debug/jtag/commands/code/diff/test/unit/CodeDiffCommand.test.ts @@ -0,0 +1,259 @@ +#!/usr/bin/env tsx +/** + * CodeDiff Command Unit Tests + * + * Tests Code Diff command logic in isolation using mock dependencies. + * This is a REFERENCE EXAMPLE showing best practices for command testing. + * + * Generated by: ./jtag generate + * Run with: npx tsx commands/Code Diff/test/unit/CodeDiffCommand.test.ts + * + * NOTE: This is a self-contained test (no external test utilities needed). + * Use this as a template for your own command tests. + */ + +// import { ValidationError } from '@system/core/types/ErrorTypes'; // Uncomment when adding validation tests +import { generateUUID } from '@system/core/types/CrossPlatformUUID'; +import type { CodeDiffParams, CodeDiffResult } from '../../shared/CodeDiffTypes'; + +console.log('πŸ§ͺ CodeDiff Command Unit Tests'); + +function assert(condition: boolean, message: string): void { + if (!condition) { + throw new Error(`❌ Assertion failed: ${message}`); + } + console.log(`βœ… ${message}`); +} + +/** + * Mock command that implements Code Diff logic for testing + */ +async function mockCodeDiffCommand(params: CodeDiffParams): Promise { + // TODO: Validate required parameters (BEST PRACTICE) + // Example: + // if (!params.requiredParam || params.requiredParam.trim() === '') { + // throw new ValidationError( + // 'requiredParam', + // `Missing required parameter 'requiredParam'. ` + + // `Use the help tool with 'Code Diff' or see the Code Diff README for usage information.` + // ); + // } + + // TODO: Handle optional parameters with sensible defaults + // const optionalParam = params.optionalParam ?? defaultValue; + + // TODO: Implement your command logic here + return { + success: true, + // TODO: Add your result fields with actual computed values + context: params.context, + sessionId: params.sessionId + } as CodeDiffResult; +} + +/** + * Test 1: Command structure validation + */ +function testCodeDiffCommandStructure(): void { + console.log('\nπŸ“‹ Test 1: CodeDiff command structure validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Create valid params for Code Diff command + const validParams: CodeDiffParams = { + // TODO: Add your required parameters here + context, + sessionId + }; + + // Validate param structure + assert(validParams.context !== undefined, 'Params have context'); + assert(validParams.sessionId !== undefined, 'Params have sessionId'); + // TODO: Add assertions for your specific parameters + // assert(typeof validParams.requiredParam === 'string', 'requiredParam is string'); +} + +/** + * Test 2: Mock command execution + */ +async function testMockCodeDiffExecution(): Promise { + console.log('\n⚑ Test 2: Mock Code Diff command execution'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Test mock execution + const params: CodeDiffParams = { + // TODO: Add your parameters here + context, + sessionId + }; + + const result = await mockCodeDiffCommand(params); + + // Validate result structure + assert(result.success === true, 'Mock result shows success'); + // TODO: Add assertions for your result fields + // assert(typeof result.yourField === 'string', 'yourField is string'); +} + +/** + * Test 3: Required parameter validation (CRITICAL) + * + * This test ensures your command throws ValidationError + * when required parameters are missing (BEST PRACTICE) + */ +async function testCodeDiffRequiredParams(): Promise { + console.log('\n🚨 Test 3: Required parameter validation'); + + // TODO: Uncomment when implementing validation + // const context = { environment: 'server' as const }; + // const sessionId = generateUUID(); + + // TODO: Test cases that should throw ValidationError + // Example: + // const testCases = [ + // { params: {} as CodeDiffParams, desc: 'Missing requiredParam' }, + // { params: { requiredParam: '' } as CodeDiffParams, desc: 'Empty requiredParam' }, + // ]; + // + // for (const testCase of testCases) { + // try { + // await mockCodeDiffCommand({ ...testCase.params, context, sessionId }); + // throw new Error(`Should have thrown ValidationError for: ${testCase.desc}`); + // } catch (error) { + // if (error instanceof ValidationError) { + // assert(error.field === 'requiredParam', `ValidationError field is 'requiredParam' for: ${testCase.desc}`); + // assert(error.message.includes('required parameter'), `Error message mentions 'required parameter' for: ${testCase.desc}`); + // assert(error.message.includes('help tool'), `Error message is tool-agnostic for: ${testCase.desc}`); + // } else { + // throw error; // Re-throw if not ValidationError + // } + // } + // } + + console.log('βœ… All required parameter validations work correctly'); +} + +/** + * Test 4: Optional parameter handling + */ +async function testCodeDiffOptionalParams(): Promise { + console.log('\nπŸ”§ Test 4: Optional parameter handling'); + + // TODO: Uncomment when implementing optional param tests + // const context = { environment: 'server' as const }; + // const sessionId = generateUUID(); + + // TODO: Test WITHOUT optional param (should use default) + // const paramsWithoutOptional: CodeDiffParams = { + // requiredParam: 'test', + // context, + // sessionId + // }; + // + // const resultWithoutOptional = await mockCodeDiffCommand(paramsWithoutOptional); + // assert(resultWithoutOptional.success === true, 'Command succeeds without optional params'); + + // TODO: Test WITH optional param + // const paramsWithOptional: CodeDiffParams = { + // requiredParam: 'test', + // optionalParam: true, + // context, + // sessionId + // }; + // + // const resultWithOptional = await mockCodeDiffCommand(paramsWithOptional); + // assert(resultWithOptional.success === true, 'Command succeeds with optional params'); + + console.log('βœ… Optional parameter handling validated'); +} + +/** + * Test 5: Performance validation + */ +async function testCodeDiffPerformance(): Promise { + console.log('\n⚑ Test 5: CodeDiff performance validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + const startTime = Date.now(); + + await mockCodeDiffCommand({ + // TODO: Add your parameters + context, + sessionId + } as CodeDiffParams); + + const executionTime = Date.now() - startTime; + + assert(executionTime < 100, `CodeDiff completed in ${executionTime}ms (under 100ms limit)`); +} + +/** + * Test 6: Result structure validation + */ +async function testCodeDiffResultStructure(): Promise { + console.log('\nπŸ” Test 6: CodeDiff result structure validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Test various scenarios + const basicResult = await mockCodeDiffCommand({ + // TODO: Add your parameters + context, + sessionId + } as CodeDiffParams); + + assert(basicResult.success === true, 'Result has success field'); + // TODO: Add assertions for your result fields + // assert(typeof basicResult.yourField === 'string', 'Result has yourField (string)'); + assert(basicResult.context === context, 'Result includes context'); + assert(basicResult.sessionId === sessionId, 'Result includes sessionId'); + + console.log('βœ… All result structure validations pass'); +} + +/** + * Run all unit tests + */ +async function runAllCodeDiffUnitTests(): Promise { + console.log('πŸš€ Starting CodeDiff Command Unit Tests\n'); + + try { + testCodeDiffCommandStructure(); + await testMockCodeDiffExecution(); + await testCodeDiffRequiredParams(); + await testCodeDiffOptionalParams(); + await testCodeDiffPerformance(); + await testCodeDiffResultStructure(); + + console.log('\nπŸŽ‰ ALL CodeDiff UNIT TESTS PASSED!'); + console.log('πŸ“‹ Validated:'); + console.log(' βœ… Command structure and parameter validation'); + console.log(' βœ… Mock command execution patterns'); + console.log(' βœ… Required parameter validation (throws ValidationError)'); + console.log(' βœ… Optional parameter handling (sensible defaults)'); + console.log(' βœ… Performance requirements (< 100ms)'); + console.log(' βœ… Result structure validation'); + console.log('\nπŸ“ This is a REFERENCE EXAMPLE - use as a template for your commands!'); + console.log('πŸ’‘ TIP: Copy this test structure and modify for your command logic'); + + } catch (error) { + console.error('\n❌ CodeDiff unit tests failed:', (error as Error).message); + if ((error as Error).stack) { + console.error((error as Error).stack); + } + process.exit(1); + } +} + +// Run if called directly +if (require.main === module) { + void runAllCodeDiffUnitTests(); +} else { + module.exports = { runAllCodeDiffUnitTests }; +} diff --git a/src/debug/jtag/commands/code/edit/.npmignore b/src/debug/jtag/commands/code/edit/.npmignore new file mode 100644 index 000000000..f74ad6b8a --- /dev/null +++ b/src/debug/jtag/commands/code/edit/.npmignore @@ -0,0 +1,20 @@ +# Development files +.eslintrc* +tsconfig*.json +vitest.config.ts + +# Build artifacts +*.js.map +*.d.ts.map + +# IDE +.vscode/ +.idea/ + +# Logs +*.log +npm-debug.log* + +# OS files +.DS_Store +Thumbs.db diff --git a/src/debug/jtag/commands/code/edit/README.md b/src/debug/jtag/commands/code/edit/README.md new file mode 100644 index 000000000..7b690ba83 --- /dev/null +++ b/src/debug/jtag/commands/code/edit/README.md @@ -0,0 +1,180 @@ +# Code Edit Command + +Edit a file using search-replace, line-range replacement, insert-at, or append. Creates a ChangeNode for undo. Safer than full file write for targeted modifications. + +## Table of Contents + +- [Usage](#usage) + - [CLI Usage](#cli-usage) + - [Tool Usage](#tool-usage) +- [Parameters](#parameters) +- [Result](#result) +- [Examples](#examples) +- [Testing](#testing) + - [Unit Tests](#unit-tests) + - [Integration Tests](#integration-tests) +- [Getting Help](#getting-help) +- [Access Level](#access-level) +- [Implementation Notes](#implementation-notes) + +## Usage + +### CLI Usage + +From the command line using the jtag CLI: + +```bash +./jtag code/edit --filePath= --editType= +``` + +### Tool Usage + +From Persona tools or programmatic access using `Commands.execute()`: + +```typescript +import { Commands } from '@system/core/shared/Commands'; + +const result = await Commands.execute('code/edit', { + // your parameters here +}); +``` + +## Parameters + +- **filePath** (required): `string` - Relative path to file within workspace +- **editType** (required): `string` - Edit mode: 'search_replace', 'line_range', 'insert_at', or 'append' +- **search** (optional): `string` - Text to find (for search_replace mode) +- **replace** (optional): `string` - Replacement text (for search_replace mode) +- **replaceAll** (optional): `boolean` - Replace all occurrences (for search_replace mode, default: false) +- **startLine** (optional): `number` - Start line (for line_range mode, 1-indexed) +- **endLine** (optional): `number` - End line (for line_range mode, 1-indexed, inclusive) +- **newContent** (optional): `string` - New content (for line_range mode) +- **line** (optional): `number` - Line number to insert at (for insert_at mode) +- **content** (optional): `string` - Content to insert or append +- **description** (optional): `string` - Description of what this change does + +## Result + +Returns `CodeEditResult` with: + +Returns CommandResult with: +- **changeId**: `string` - UUID of the ChangeNode created (for undo) +- **filePath**: `string` - Resolved file path +- **bytesWritten**: `number` - New file size in bytes + +## Examples + +### Search and replace + +```bash +./jtag code/edit --filePath="src/main.ts" --editType="search_replace" --search="old text" --replace="new text" +``` + +### Replace line range + +```bash +./jtag code/edit --filePath="src/main.ts" --editType="line_range" --startLine=5 --endLine=10 --newContent="replacement content" +``` + +### Insert at line + +```bash +./jtag code/edit --filePath="src/main.ts" --editType="insert_at" --line=1 --content="// Header comment" +``` + +### Append to file + +```bash +./jtag code/edit --filePath="src/main.ts" --editType="append" --content="// Footer" +``` + +## Getting Help + +### Using the Help Tool + +Get detailed usage information for this command: + +**CLI:** +```bash +./jtag help code/edit +``` + +**Tool:** +```typescript +// Use your help tool with command name 'code/edit' +``` + +### Using the README Tool + +Access this README programmatically: + +**CLI:** +```bash +./jtag readme code/edit +``` + +**Tool:** +```typescript +// Use your readme tool with command name 'code/edit' +``` + +## Testing + +### Unit Tests + +Test command logic in isolation using mock dependencies: + +```bash +# Run unit tests (no server required) +npx tsx commands/Code Edit/test/unit/CodeEditCommand.test.ts +``` + +**What's tested:** +- Command structure and parameter validation +- Mock command execution patterns +- Required parameter validation (throws ValidationError) +- Optional parameter handling (sensible defaults) +- Performance requirements +- Assertion utility helpers + +**TDD Workflow:** +1. Write/modify unit test first (test-driven development) +2. Run test, see it fail +3. Implement feature +4. Run test, see it pass +5. Refactor if needed + +### Integration Tests + +Test command with real client connections and system integration: + +```bash +# Prerequisites: Server must be running +npm start # Wait 90+ seconds for deployment + +# Run integration tests +npx tsx commands/Code Edit/test/integration/CodeEditIntegration.test.ts +``` + +**What's tested:** +- Client connection to live system +- Real command execution via WebSocket +- ValidationError handling for missing params +- Optional parameter defaults +- Performance under load +- Various parameter combinations + +**Best Practice:** +Run unit tests frequently during development (fast feedback). Run integration tests before committing (verify system integration). + +## Access Level + +**ai-safe** - Safe for AI personas to call autonomously + +## Implementation Notes + +- **Shared Logic**: Core business logic in `shared/CodeEditTypes.ts` +- **Browser**: Browser-specific implementation in `browser/CodeEditBrowserCommand.ts` +- **Server**: Server-specific implementation in `server/CodeEditServerCommand.ts` +- **Unit Tests**: Isolated testing in `test/unit/CodeEditCommand.test.ts` +- **Integration Tests**: System testing in `test/integration/CodeEditIntegration.test.ts` diff --git a/src/debug/jtag/commands/code/edit/browser/CodeEditBrowserCommand.ts b/src/debug/jtag/commands/code/edit/browser/CodeEditBrowserCommand.ts new file mode 100644 index 000000000..dea1109a0 --- /dev/null +++ b/src/debug/jtag/commands/code/edit/browser/CodeEditBrowserCommand.ts @@ -0,0 +1,21 @@ +/** + * Code Edit Command - Browser Implementation + * + * Edit a file using search-replace, line-range replacement, insert-at, or append. Creates a ChangeNode for undo. Safer than full file write for targeted modifications. + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import type { CodeEditParams, CodeEditResult } from '../shared/CodeEditTypes'; + +export class CodeEditBrowserCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('code/edit', context, subpath, commander); + } + + async execute(params: CodeEditParams): Promise { + console.log('🌐 BROWSER: Delegating Code Edit to server'); + return await this.remoteExecute(params); + } +} diff --git a/src/debug/jtag/commands/code/edit/package.json b/src/debug/jtag/commands/code/edit/package.json new file mode 100644 index 000000000..1f148eb54 --- /dev/null +++ b/src/debug/jtag/commands/code/edit/package.json @@ -0,0 +1,35 @@ +{ + "name": "@jtag-commands/code/edit", + "version": "1.0.0", + "description": "Edit a file using search-replace, line-range replacement, insert-at, or append. Creates a ChangeNode for undo. Safer than full file write for targeted modifications.", + "main": "server/CodeEditServerCommand.ts", + "types": "shared/CodeEditTypes.ts", + "scripts": { + "test": "npm run test:unit && npm run test:integration", + "test:unit": "npx vitest run test/unit/*.test.ts", + "test:integration": "npx tsx test/integration/CodeEditIntegration.test.ts", + "lint": "npx eslint **/*.ts", + "typecheck": "npx tsc --noEmit" + }, + "peerDependencies": { + "@jtag/core": "*" + }, + "files": [ + "shared/**/*.ts", + "browser/**/*.ts", + "server/**/*.ts", + "test/**/*.ts", + "README.md" + ], + "keywords": [ + "jtag", + "command", + "code/edit" + ], + "license": "MIT", + "author": "", + "repository": { + "type": "git", + "url": "" + } +} diff --git a/src/debug/jtag/commands/code/edit/server/CodeEditServerCommand.ts b/src/debug/jtag/commands/code/edit/server/CodeEditServerCommand.ts new file mode 100644 index 000000000..b83d39e08 --- /dev/null +++ b/src/debug/jtag/commands/code/edit/server/CodeEditServerCommand.ts @@ -0,0 +1,84 @@ +/** + * Code Edit Command - Server Implementation + * + * Edits a file using search-replace, line-range, insert-at, or append. + * Creates a ChangeNode for undo support. + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import { ValidationError } from '@system/core/types/ErrorTypes'; +import type { CodeEditParams, CodeEditResult } from '../shared/CodeEditTypes'; +import { createCodeEditResultFromParams } from '../shared/CodeEditTypes'; +import { CodeDaemon } from '@daemons/code-daemon/shared/CodeDaemon'; +import type { WorkspaceEditMode } from '@daemons/code-daemon/shared/CodeDaemonTypes'; + +export class CodeEditServerCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('code/edit', context, subpath, commander); + } + + async execute(params: CodeEditParams): Promise { + if (!params.filePath || params.filePath.trim() === '') { + throw new ValidationError( + 'filePath', + `Missing required parameter 'filePath'. See the code/edit README for usage.` + ); + } + if (!params.editType) { + throw new ValidationError( + 'editType', + `Missing required parameter 'editType'. Must be 'search_replace', 'line_range', 'insert_at', or 'append'.` + ); + } + + if (!params.userId) { + throw new ValidationError('userId', 'Workspace operations require a userId (auto-injected for persona tool calls).'); + } + const personaId = params.userId; + + const editMode = this.buildEditMode(params); + + const result = await CodeDaemon.workspaceEdit( + personaId, + params.filePath, + editMode, + params.description + ); + + return createCodeEditResultFromParams(params, { + success: result.success, + changeId: result.change_id || '', + filePath: result.file_path, + bytesWritten: result.bytes_written, + }); + } + + private buildEditMode(params: CodeEditParams): WorkspaceEditMode { + switch (params.editType) { + case 'search_replace': + if (!params.search) throw new ValidationError('search', `'search' is required for search_replace edit mode.`); + if (params.replace === undefined) throw new ValidationError('replace', `'replace' is required for search_replace edit mode.`); + return { type: 'search_replace', search: params.search, replace: params.replace, all: params.replaceAll ?? false }; + + case 'line_range': + if (!params.startLine) throw new ValidationError('startLine', `'startLine' is required for line_range edit mode.`); + if (!params.endLine) throw new ValidationError('endLine', `'endLine' is required for line_range edit mode.`); + if (params.newContent === undefined) throw new ValidationError('newContent', `'newContent' is required for line_range edit mode.`); + return { type: 'line_range', start_line: params.startLine, end_line: params.endLine, new_content: params.newContent }; + + case 'insert_at': + if (!params.line) throw new ValidationError('line', `'line' is required for insert_at edit mode.`); + if (params.content === undefined) throw new ValidationError('content', `'content' is required for insert_at edit mode.`); + return { type: 'insert_at', line: params.line, content: params.content }; + + case 'append': + if (params.content === undefined) throw new ValidationError('content', `'content' is required for append edit mode.`); + return { type: 'append', content: params.content }; + + default: + throw new ValidationError('editType', `Invalid editType '${params.editType}'. Must be 'search_replace', 'line_range', 'insert_at', or 'append'.`); + } + } +} diff --git a/src/debug/jtag/commands/code/edit/shared/CodeEditTypes.ts b/src/debug/jtag/commands/code/edit/shared/CodeEditTypes.ts new file mode 100644 index 000000000..b6af24c4f --- /dev/null +++ b/src/debug/jtag/commands/code/edit/shared/CodeEditTypes.ts @@ -0,0 +1,143 @@ +/** + * Code Edit Command - Shared Types + * + * Edit a file using search-replace, line-range replacement, insert-at, or append. Creates a ChangeNode for undo. Safer than full file write for targeted modifications. + */ + +import type { CommandParams, CommandResult, CommandInput, JTAGContext } from '@system/core/types/JTAGTypes'; +import { createPayload, transformPayload } from '@system/core/types/JTAGTypes'; +import { Commands } from '@system/core/shared/Commands'; +import type { JTAGError } from '@system/core/types/ErrorTypes'; +import type { UUID } from '@system/core/types/CrossPlatformUUID'; + +/** + * Code Edit Command Parameters + */ +export interface CodeEditParams extends CommandParams { + // Relative path to file within workspace + filePath: string; + // Edit mode: 'search_replace', 'line_range', 'insert_at', or 'append' + editType: string; + // Text to find (for search_replace mode) + search?: string; + // Replacement text (for search_replace mode) + replace?: string; + // Replace all occurrences (for search_replace mode, default: false) + replaceAll?: boolean; + // Start line (for line_range mode, 1-indexed) + startLine?: number; + // End line (for line_range mode, 1-indexed, inclusive) + endLine?: number; + // New content (for line_range mode) + newContent?: string; + // Line number to insert at (for insert_at mode) + line?: number; + // Content to insert or append + content?: string; + // Description of what this change does + description?: string; +} + +/** + * Factory function for creating CodeEditParams + */ +export const createCodeEditParams = ( + context: JTAGContext, + sessionId: UUID, + data: { + // Relative path to file within workspace + filePath: string; + // Edit mode: 'search_replace', 'line_range', 'insert_at', or 'append' + editType: string; + // Text to find (for search_replace mode) + search?: string; + // Replacement text (for search_replace mode) + replace?: string; + // Replace all occurrences (for search_replace mode, default: false) + replaceAll?: boolean; + // Start line (for line_range mode, 1-indexed) + startLine?: number; + // End line (for line_range mode, 1-indexed, inclusive) + endLine?: number; + // New content (for line_range mode) + newContent?: string; + // Line number to insert at (for insert_at mode) + line?: number; + // Content to insert or append + content?: string; + // Description of what this change does + description?: string; + } +): CodeEditParams => createPayload(context, sessionId, { + search: data.search ?? '', + replace: data.replace ?? '', + replaceAll: data.replaceAll ?? false, + startLine: data.startLine ?? 0, + endLine: data.endLine ?? 0, + newContent: data.newContent ?? '', + line: data.line ?? 0, + content: data.content ?? '', + description: data.description ?? '', + ...data +}); + +/** + * Code Edit Command Result + */ +export interface CodeEditResult extends CommandResult { + success: boolean; + // UUID of the ChangeNode created (for undo) + changeId: string; + // Resolved file path + filePath: string; + // New file size in bytes + bytesWritten: number; + error?: JTAGError; +} + +/** + * Factory function for creating CodeEditResult with defaults + */ +export const createCodeEditResult = ( + context: JTAGContext, + sessionId: UUID, + data: { + success: boolean; + // UUID of the ChangeNode created (for undo) + changeId?: string; + // Resolved file path + filePath?: string; + // New file size in bytes + bytesWritten?: number; + error?: JTAGError; + } +): CodeEditResult => createPayload(context, sessionId, { + changeId: data.changeId ?? '', + filePath: data.filePath ?? '', + bytesWritten: data.bytesWritten ?? 0, + ...data +}); + +/** + * Smart Code Edit-specific inheritance from params + * Auto-inherits context and sessionId from params + * Must provide all required result fields + */ +export const createCodeEditResultFromParams = ( + params: CodeEditParams, + differences: Omit +): CodeEditResult => transformPayload(params, differences); + +/** + * Code Edit β€” Type-safe command executor + * + * Usage: + * import { CodeEdit } from '...shared/CodeEditTypes'; + * const result = await CodeEdit.execute({ ... }); + */ +export const CodeEdit = { + execute(params: CommandInput): Promise { + return Commands.execute('code/edit', params as Partial); + }, + commandName: 'code/edit' as const, +} as const; diff --git a/src/debug/jtag/commands/code/edit/test/integration/CodeEditIntegration.test.ts b/src/debug/jtag/commands/code/edit/test/integration/CodeEditIntegration.test.ts new file mode 100644 index 000000000..0818db946 --- /dev/null +++ b/src/debug/jtag/commands/code/edit/test/integration/CodeEditIntegration.test.ts @@ -0,0 +1,196 @@ +#!/usr/bin/env tsx +/** + * CodeEdit Command Integration Tests + * + * Tests Code Edit command against the LIVE RUNNING SYSTEM. + * This is NOT a mock test - it tests real commands, real events, real widgets. + * + * Generated by: ./jtag generate + * Run with: npx tsx commands/Code Edit/test/integration/CodeEditIntegration.test.ts + * + * PREREQUISITES: + * - Server must be running: npm start (wait 90+ seconds) + * - Browser client connected via http://localhost:9003 + */ + +import { jtag } from '@server/server-index'; + +console.log('πŸ§ͺ CodeEdit Command Integration Tests'); + +function assert(condition: boolean, message: string): void { + if (!condition) { + throw new Error(`❌ Assertion failed: ${message}`); + } + console.log(`βœ… ${message}`); +} + +/** + * Test 1: Connect to live system + */ +async function testSystemConnection(): Promise>> { + console.log('\nπŸ”Œ Test 1: Connecting to live JTAG system'); + + const client = await jtag.connect(); + + assert(client !== null, 'Connected to live system'); + console.log(' βœ… Connected successfully'); + + return client; +} + +/** + * Test 2: Execute Code Edit command on live system + */ +async function testCommandExecution(client: Awaited>): Promise { + console.log('\n⚑ Test 2: Executing Code Edit command'); + + // TODO: Replace with your actual command parameters + const result = await client.commands['Code Edit']({ + // Add your required parameters here + // Example: name: 'test-value' + }); + + console.log(' πŸ“Š Result:', JSON.stringify(result, null, 2)); + + assert(result !== null, 'Code Edit returned result'); + // TODO: Add assertions for your specific result fields + // assert(result.success === true, 'Code Edit succeeded'); + // assert(result.yourField !== undefined, 'Result has yourField'); +} + +/** + * Test 3: Validate required parameters + */ +async function testRequiredParameters(_client: Awaited>): Promise { + console.log('\n🚨 Test 3: Testing required parameter validation'); + + // TODO: Uncomment and test missing required parameters + // try { + // await _client.commands['Code Edit']({ + // // Missing required param + // }); + // assert(false, 'Should have thrown validation error'); + // } catch (error) { + // assert((error as Error).message.includes('required'), 'Error mentions required parameter'); + // console.log(' βœ… ValidationError thrown correctly'); + // } + + console.log(' ⚠️ TODO: Add required parameter validation test'); +} + +/** + * Test 4: Test optional parameters + */ +async function testOptionalParameters(_client: Awaited>): Promise { + console.log('\nπŸ”§ Test 4: Testing optional parameters'); + + // TODO: Uncomment to test with and without optional parameters + // const withOptional = await client.commands['Code Edit']({ + // requiredParam: 'test', + // optionalParam: true + // }); + // + // const withoutOptional = await client.commands['Code Edit']({ + // requiredParam: 'test' + // }); + // + // assert(withOptional.success === true, 'Works with optional params'); + // assert(withoutOptional.success === true, 'Works without optional params'); + + console.log(' ⚠️ TODO: Add optional parameter tests'); +} + +/** + * Test 5: Performance test + */ +async function testPerformance(_client: Awaited>): Promise { + console.log('\n⚑ Test 5: Performance under load'); + + // TODO: Uncomment to test command performance + // const iterations = 10; + // const times: number[] = []; + // + // for (let i = 0; i < iterations; i++) { + // const start = Date.now(); + // await _client.commands['Code Edit']({ /* params */ }); + // times.push(Date.now() - start); + // } + // + // const avg = times.reduce((a, b) => a + b, 0) / iterations; + // const max = Math.max(...times); + // + // console.log(` Average: ${avg.toFixed(2)}ms`); + // console.log(` Max: ${max}ms`); + // + // assert(avg < 500, `Average ${avg.toFixed(2)}ms under 500ms`); + // assert(max < 1000, `Max ${max}ms under 1000ms`); + + console.log(' ⚠️ TODO: Add performance test'); +} + +/** + * Test 6: Widget/Event integration (if applicable) + */ +async function testWidgetIntegration(_client: Awaited>): Promise { + console.log('\n🎨 Test 6: Widget/Event integration'); + + // TODO: Uncomment if your command emits events or updates widgets + // Example: + // const before = await client.commands['debug/widget-state']({ widgetSelector: 'your-widget' }); + // await client.commands['Code Edit']({ /* params */ }); + // await new Promise(resolve => setTimeout(resolve, 1000)); // Wait for event propagation + // const after = await client.commands['debug/widget-state']({ widgetSelector: 'your-widget' }); + // + // assert(after.state.someValue !== before.state.someValue, 'Widget state updated'); + + console.log(' ⚠️ TODO: Add widget/event integration test (if applicable)'); +} + +/** + * Run all integration tests + */ +async function runAllCodeEditIntegrationTests(): Promise { + console.log('πŸš€ Starting CodeEdit Integration Tests\n'); + console.log('πŸ“‹ Testing against LIVE system (not mocks)\n'); + + try { + const client = await testSystemConnection(); + await testCommandExecution(client); + await testRequiredParameters(client); + await testOptionalParameters(client); + await testPerformance(client); + await testWidgetIntegration(client); + + console.log('\nπŸŽ‰ ALL CodeEdit INTEGRATION TESTS PASSED!'); + console.log('πŸ“‹ Validated:'); + console.log(' βœ… Live system connection'); + console.log(' βœ… Command execution on real system'); + console.log(' βœ… Parameter validation'); + console.log(' βœ… Optional parameter handling'); + console.log(' βœ… Performance benchmarks'); + console.log(' βœ… Widget/Event integration'); + console.log('\nπŸ’‘ NOTE: This test uses the REAL running system'); + console.log(' - Real database operations'); + console.log(' - Real event propagation'); + console.log(' - Real widget updates'); + console.log(' - Real cross-daemon communication'); + + } catch (error) { + console.error('\n❌ CodeEdit integration tests failed:', (error as Error).message); + if ((error as Error).stack) { + console.error((error as Error).stack); + } + console.error('\nπŸ’‘ Make sure:'); + console.error(' 1. Server is running: npm start'); + console.error(' 2. Wait 90+ seconds for deployment'); + console.error(' 3. Browser is connected to http://localhost:9003'); + process.exit(1); + } +} + +// Run if called directly +if (require.main === module) { + void runAllCodeEditIntegrationTests(); +} else { + module.exports = { runAllCodeEditIntegrationTests }; +} diff --git a/src/debug/jtag/commands/code/edit/test/unit/CodeEditCommand.test.ts b/src/debug/jtag/commands/code/edit/test/unit/CodeEditCommand.test.ts new file mode 100644 index 000000000..9d79e2ea1 --- /dev/null +++ b/src/debug/jtag/commands/code/edit/test/unit/CodeEditCommand.test.ts @@ -0,0 +1,259 @@ +#!/usr/bin/env tsx +/** + * CodeEdit Command Unit Tests + * + * Tests Code Edit command logic in isolation using mock dependencies. + * This is a REFERENCE EXAMPLE showing best practices for command testing. + * + * Generated by: ./jtag generate + * Run with: npx tsx commands/Code Edit/test/unit/CodeEditCommand.test.ts + * + * NOTE: This is a self-contained test (no external test utilities needed). + * Use this as a template for your own command tests. + */ + +// import { ValidationError } from '@system/core/types/ErrorTypes'; // Uncomment when adding validation tests +import { generateUUID } from '@system/core/types/CrossPlatformUUID'; +import type { CodeEditParams, CodeEditResult } from '../../shared/CodeEditTypes'; + +console.log('πŸ§ͺ CodeEdit Command Unit Tests'); + +function assert(condition: boolean, message: string): void { + if (!condition) { + throw new Error(`❌ Assertion failed: ${message}`); + } + console.log(`βœ… ${message}`); +} + +/** + * Mock command that implements Code Edit logic for testing + */ +async function mockCodeEditCommand(params: CodeEditParams): Promise { + // TODO: Validate required parameters (BEST PRACTICE) + // Example: + // if (!params.requiredParam || params.requiredParam.trim() === '') { + // throw new ValidationError( + // 'requiredParam', + // `Missing required parameter 'requiredParam'. ` + + // `Use the help tool with 'Code Edit' or see the Code Edit README for usage information.` + // ); + // } + + // TODO: Handle optional parameters with sensible defaults + // const optionalParam = params.optionalParam ?? defaultValue; + + // TODO: Implement your command logic here + return { + success: true, + // TODO: Add your result fields with actual computed values + context: params.context, + sessionId: params.sessionId + } as CodeEditResult; +} + +/** + * Test 1: Command structure validation + */ +function testCodeEditCommandStructure(): void { + console.log('\nπŸ“‹ Test 1: CodeEdit command structure validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Create valid params for Code Edit command + const validParams: CodeEditParams = { + // TODO: Add your required parameters here + context, + sessionId + }; + + // Validate param structure + assert(validParams.context !== undefined, 'Params have context'); + assert(validParams.sessionId !== undefined, 'Params have sessionId'); + // TODO: Add assertions for your specific parameters + // assert(typeof validParams.requiredParam === 'string', 'requiredParam is string'); +} + +/** + * Test 2: Mock command execution + */ +async function testMockCodeEditExecution(): Promise { + console.log('\n⚑ Test 2: Mock Code Edit command execution'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Test mock execution + const params: CodeEditParams = { + // TODO: Add your parameters here + context, + sessionId + }; + + const result = await mockCodeEditCommand(params); + + // Validate result structure + assert(result.success === true, 'Mock result shows success'); + // TODO: Add assertions for your result fields + // assert(typeof result.yourField === 'string', 'yourField is string'); +} + +/** + * Test 3: Required parameter validation (CRITICAL) + * + * This test ensures your command throws ValidationError + * when required parameters are missing (BEST PRACTICE) + */ +async function testCodeEditRequiredParams(): Promise { + console.log('\n🚨 Test 3: Required parameter validation'); + + // TODO: Uncomment when implementing validation + // const context = { environment: 'server' as const }; + // const sessionId = generateUUID(); + + // TODO: Test cases that should throw ValidationError + // Example: + // const testCases = [ + // { params: {} as CodeEditParams, desc: 'Missing requiredParam' }, + // { params: { requiredParam: '' } as CodeEditParams, desc: 'Empty requiredParam' }, + // ]; + // + // for (const testCase of testCases) { + // try { + // await mockCodeEditCommand({ ...testCase.params, context, sessionId }); + // throw new Error(`Should have thrown ValidationError for: ${testCase.desc}`); + // } catch (error) { + // if (error instanceof ValidationError) { + // assert(error.field === 'requiredParam', `ValidationError field is 'requiredParam' for: ${testCase.desc}`); + // assert(error.message.includes('required parameter'), `Error message mentions 'required parameter' for: ${testCase.desc}`); + // assert(error.message.includes('help tool'), `Error message is tool-agnostic for: ${testCase.desc}`); + // } else { + // throw error; // Re-throw if not ValidationError + // } + // } + // } + + console.log('βœ… All required parameter validations work correctly'); +} + +/** + * Test 4: Optional parameter handling + */ +async function testCodeEditOptionalParams(): Promise { + console.log('\nπŸ”§ Test 4: Optional parameter handling'); + + // TODO: Uncomment when implementing optional param tests + // const context = { environment: 'server' as const }; + // const sessionId = generateUUID(); + + // TODO: Test WITHOUT optional param (should use default) + // const paramsWithoutOptional: CodeEditParams = { + // requiredParam: 'test', + // context, + // sessionId + // }; + // + // const resultWithoutOptional = await mockCodeEditCommand(paramsWithoutOptional); + // assert(resultWithoutOptional.success === true, 'Command succeeds without optional params'); + + // TODO: Test WITH optional param + // const paramsWithOptional: CodeEditParams = { + // requiredParam: 'test', + // optionalParam: true, + // context, + // sessionId + // }; + // + // const resultWithOptional = await mockCodeEditCommand(paramsWithOptional); + // assert(resultWithOptional.success === true, 'Command succeeds with optional params'); + + console.log('βœ… Optional parameter handling validated'); +} + +/** + * Test 5: Performance validation + */ +async function testCodeEditPerformance(): Promise { + console.log('\n⚑ Test 5: CodeEdit performance validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + const startTime = Date.now(); + + await mockCodeEditCommand({ + // TODO: Add your parameters + context, + sessionId + } as CodeEditParams); + + const executionTime = Date.now() - startTime; + + assert(executionTime < 100, `CodeEdit completed in ${executionTime}ms (under 100ms limit)`); +} + +/** + * Test 6: Result structure validation + */ +async function testCodeEditResultStructure(): Promise { + console.log('\nπŸ” Test 6: CodeEdit result structure validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Test various scenarios + const basicResult = await mockCodeEditCommand({ + // TODO: Add your parameters + context, + sessionId + } as CodeEditParams); + + assert(basicResult.success === true, 'Result has success field'); + // TODO: Add assertions for your result fields + // assert(typeof basicResult.yourField === 'string', 'Result has yourField (string)'); + assert(basicResult.context === context, 'Result includes context'); + assert(basicResult.sessionId === sessionId, 'Result includes sessionId'); + + console.log('βœ… All result structure validations pass'); +} + +/** + * Run all unit tests + */ +async function runAllCodeEditUnitTests(): Promise { + console.log('πŸš€ Starting CodeEdit Command Unit Tests\n'); + + try { + testCodeEditCommandStructure(); + await testMockCodeEditExecution(); + await testCodeEditRequiredParams(); + await testCodeEditOptionalParams(); + await testCodeEditPerformance(); + await testCodeEditResultStructure(); + + console.log('\nπŸŽ‰ ALL CodeEdit UNIT TESTS PASSED!'); + console.log('πŸ“‹ Validated:'); + console.log(' βœ… Command structure and parameter validation'); + console.log(' βœ… Mock command execution patterns'); + console.log(' βœ… Required parameter validation (throws ValidationError)'); + console.log(' βœ… Optional parameter handling (sensible defaults)'); + console.log(' βœ… Performance requirements (< 100ms)'); + console.log(' βœ… Result structure validation'); + console.log('\nπŸ“ This is a REFERENCE EXAMPLE - use as a template for your commands!'); + console.log('πŸ’‘ TIP: Copy this test structure and modify for your command logic'); + + } catch (error) { + console.error('\n❌ CodeEdit unit tests failed:', (error as Error).message); + if ((error as Error).stack) { + console.error((error as Error).stack); + } + process.exit(1); + } +} + +// Run if called directly +if (require.main === module) { + void runAllCodeEditUnitTests(); +} else { + module.exports = { runAllCodeEditUnitTests }; +} diff --git a/src/debug/jtag/commands/code/history/.npmignore b/src/debug/jtag/commands/code/history/.npmignore new file mode 100644 index 000000000..f74ad6b8a --- /dev/null +++ b/src/debug/jtag/commands/code/history/.npmignore @@ -0,0 +1,20 @@ +# Development files +.eslintrc* +tsconfig*.json +vitest.config.ts + +# Build artifacts +*.js.map +*.d.ts.map + +# IDE +.vscode/ +.idea/ + +# Logs +*.log +npm-debug.log* + +# OS files +.DS_Store +Thumbs.db diff --git a/src/debug/jtag/commands/code/history/README.md b/src/debug/jtag/commands/code/history/README.md new file mode 100644 index 000000000..36accb805 --- /dev/null +++ b/src/debug/jtag/commands/code/history/README.md @@ -0,0 +1,158 @@ +# Code History Command + +Get change history for a specific file or the entire workspace. Returns change graph nodes with diffs, timestamps, and descriptions. + +## Table of Contents + +- [Usage](#usage) + - [CLI Usage](#cli-usage) + - [Tool Usage](#tool-usage) +- [Parameters](#parameters) +- [Result](#result) +- [Examples](#examples) +- [Testing](#testing) + - [Unit Tests](#unit-tests) + - [Integration Tests](#integration-tests) +- [Getting Help](#getting-help) +- [Access Level](#access-level) +- [Implementation Notes](#implementation-notes) + +## Usage + +### CLI Usage + +From the command line using the jtag CLI: + +```bash +./jtag code/history [options] +``` + +### Tool Usage + +From Persona tools or programmatic access using `Commands.execute()`: + +```typescript +import { Commands } from '@system/core/shared/Commands'; + +const result = await Commands.execute('code/history', { + // your parameters here +}); +``` + +## Parameters + +- **filePath** (optional): `string` - Filter history to a specific file (optional, defaults to all) +- **limit** (optional): `number` - Maximum number of history entries to return (default: 50) + +## Result + +Returns `CodeHistoryResult` with: + +Returns CommandResult with: +- **nodes**: `object[]` - Array of ChangeNode objects with id, filePath, operation, timestamp, description, and diffs +- **totalCount**: `number` - Total number of changes in history + +## Examples + +### Get all workspace history + +```bash +./jtag code/history +``` + +### Get history for specific file + +```bash +./jtag code/history --filePath="src/main.ts" --limit=10 +``` + +## Getting Help + +### Using the Help Tool + +Get detailed usage information for this command: + +**CLI:** +```bash +./jtag help code/history +``` + +**Tool:** +```typescript +// Use your help tool with command name 'code/history' +``` + +### Using the README Tool + +Access this README programmatically: + +**CLI:** +```bash +./jtag readme code/history +``` + +**Tool:** +```typescript +// Use your readme tool with command name 'code/history' +``` + +## Testing + +### Unit Tests + +Test command logic in isolation using mock dependencies: + +```bash +# Run unit tests (no server required) +npx tsx commands/Code History/test/unit/CodeHistoryCommand.test.ts +``` + +**What's tested:** +- Command structure and parameter validation +- Mock command execution patterns +- Required parameter validation (throws ValidationError) +- Optional parameter handling (sensible defaults) +- Performance requirements +- Assertion utility helpers + +**TDD Workflow:** +1. Write/modify unit test first (test-driven development) +2. Run test, see it fail +3. Implement feature +4. Run test, see it pass +5. Refactor if needed + +### Integration Tests + +Test command with real client connections and system integration: + +```bash +# Prerequisites: Server must be running +npm start # Wait 90+ seconds for deployment + +# Run integration tests +npx tsx commands/Code History/test/integration/CodeHistoryIntegration.test.ts +``` + +**What's tested:** +- Client connection to live system +- Real command execution via WebSocket +- ValidationError handling for missing params +- Optional parameter defaults +- Performance under load +- Various parameter combinations + +**Best Practice:** +Run unit tests frequently during development (fast feedback). Run integration tests before committing (verify system integration). + +## Access Level + +**ai-safe** - Safe for AI personas to call autonomously + +## Implementation Notes + +- **Shared Logic**: Core business logic in `shared/CodeHistoryTypes.ts` +- **Browser**: Browser-specific implementation in `browser/CodeHistoryBrowserCommand.ts` +- **Server**: Server-specific implementation in `server/CodeHistoryServerCommand.ts` +- **Unit Tests**: Isolated testing in `test/unit/CodeHistoryCommand.test.ts` +- **Integration Tests**: System testing in `test/integration/CodeHistoryIntegration.test.ts` diff --git a/src/debug/jtag/commands/code/history/browser/CodeHistoryBrowserCommand.ts b/src/debug/jtag/commands/code/history/browser/CodeHistoryBrowserCommand.ts new file mode 100644 index 000000000..895dc3ee0 --- /dev/null +++ b/src/debug/jtag/commands/code/history/browser/CodeHistoryBrowserCommand.ts @@ -0,0 +1,21 @@ +/** + * Code History Command - Browser Implementation + * + * Get change history for a specific file or the entire workspace. Returns change graph nodes with diffs, timestamps, and descriptions. + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import type { CodeHistoryParams, CodeHistoryResult } from '../shared/CodeHistoryTypes'; + +export class CodeHistoryBrowserCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('code/history', context, subpath, commander); + } + + async execute(params: CodeHistoryParams): Promise { + console.log('🌐 BROWSER: Delegating Code History to server'); + return await this.remoteExecute(params); + } +} diff --git a/src/debug/jtag/commands/code/history/package.json b/src/debug/jtag/commands/code/history/package.json new file mode 100644 index 000000000..070a1ffae --- /dev/null +++ b/src/debug/jtag/commands/code/history/package.json @@ -0,0 +1,35 @@ +{ + "name": "@jtag-commands/code/history", + "version": "1.0.0", + "description": "Get change history for a specific file or the entire workspace. Returns change graph nodes with diffs, timestamps, and descriptions.", + "main": "server/CodeHistoryServerCommand.ts", + "types": "shared/CodeHistoryTypes.ts", + "scripts": { + "test": "npm run test:unit && npm run test:integration", + "test:unit": "npx vitest run test/unit/*.test.ts", + "test:integration": "npx tsx test/integration/CodeHistoryIntegration.test.ts", + "lint": "npx eslint **/*.ts", + "typecheck": "npx tsc --noEmit" + }, + "peerDependencies": { + "@jtag/core": "*" + }, + "files": [ + "shared/**/*.ts", + "browser/**/*.ts", + "server/**/*.ts", + "test/**/*.ts", + "README.md" + ], + "keywords": [ + "jtag", + "command", + "code/history" + ], + "license": "MIT", + "author": "", + "repository": { + "type": "git", + "url": "" + } +} diff --git a/src/debug/jtag/commands/code/history/server/CodeHistoryServerCommand.ts b/src/debug/jtag/commands/code/history/server/CodeHistoryServerCommand.ts new file mode 100644 index 000000000..5c9ae90a2 --- /dev/null +++ b/src/debug/jtag/commands/code/history/server/CodeHistoryServerCommand.ts @@ -0,0 +1,38 @@ +/** + * Code History Command - Server Implementation + * + * Get change history for a specific file or the entire workspace. + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import { ValidationError } from '@system/core/types/ErrorTypes'; +import type { CodeHistoryParams, CodeHistoryResult } from '../shared/CodeHistoryTypes'; +import { createCodeHistoryResultFromParams } from '../shared/CodeHistoryTypes'; +import { CodeDaemon } from '@daemons/code-daemon/shared/CodeDaemon'; + +export class CodeHistoryServerCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('code/history', context, subpath, commander); + } + + async execute(params: CodeHistoryParams): Promise { + if (!params.userId) { + throw new ValidationError('userId', 'Workspace operations require a userId (auto-injected for persona tool calls).'); + } + const personaId = params.userId; + + const result = await CodeDaemon.workspaceHistory( + personaId, + params.filePath, + params.limit + ); + + return createCodeHistoryResultFromParams(params, { + success: result.success, + nodes: result.nodes, + totalCount: result.total_count, + }); + } +} diff --git a/src/debug/jtag/commands/code/history/shared/CodeHistoryTypes.ts b/src/debug/jtag/commands/code/history/shared/CodeHistoryTypes.ts new file mode 100644 index 000000000..712685a69 --- /dev/null +++ b/src/debug/jtag/commands/code/history/shared/CodeHistoryTypes.ts @@ -0,0 +1,96 @@ +/** + * Code History Command - Shared Types + * + * Get change history for a specific file or the entire workspace. Returns change graph nodes with diffs, timestamps, and descriptions. + */ + +import type { CommandParams, CommandResult, CommandInput, JTAGContext } from '@system/core/types/JTAGTypes'; +import { createPayload, transformPayload } from '@system/core/types/JTAGTypes'; +import { Commands } from '@system/core/shared/Commands'; +import type { JTAGError } from '@system/core/types/ErrorTypes'; +import type { UUID } from '@system/core/types/CrossPlatformUUID'; +import type { ChangeNode } from '@shared/generated/code/ChangeNode'; + +/** + * Code History Command Parameters + */ +export interface CodeHistoryParams extends CommandParams { + // Filter history to a specific file (optional, defaults to all) + filePath?: string; + // Maximum number of history entries to return (default: 50) + limit?: number; +} + +/** + * Factory function for creating CodeHistoryParams + */ +export const createCodeHistoryParams = ( + context: JTAGContext, + sessionId: UUID, + data: { + // Filter history to a specific file (optional, defaults to all) + filePath?: string; + // Maximum number of history entries to return (default: 50) + limit?: number; + } +): CodeHistoryParams => createPayload(context, sessionId, { + filePath: data.filePath ?? '', + limit: data.limit ?? 0, + ...data +}); + +/** + * Code History Command Result + */ +export interface CodeHistoryResult extends CommandResult { + success: boolean; + // Change graph nodes from Rust (generated type via ts-rs) + nodes: ChangeNode[]; + // Total number of changes in history + totalCount: number; + error?: JTAGError; +} + +/** + * Factory function for creating CodeHistoryResult with defaults + */ +export const createCodeHistoryResult = ( + context: JTAGContext, + sessionId: UUID, + data: { + success: boolean; + // Change graph nodes from Rust (generated type via ts-rs) + nodes?: ChangeNode[]; + // Total number of changes in history + totalCount?: number; + error?: JTAGError; + } +): CodeHistoryResult => createPayload(context, sessionId, { + nodes: data.nodes ?? [], + totalCount: data.totalCount ?? 0, + ...data +}); + +/** + * Smart Code History-specific inheritance from params + * Auto-inherits context and sessionId from params + * Must provide all required result fields + */ +export const createCodeHistoryResultFromParams = ( + params: CodeHistoryParams, + differences: Omit +): CodeHistoryResult => transformPayload(params, differences); + +/** + * Code History β€” Type-safe command executor + * + * Usage: + * import { CodeHistory } from '...shared/CodeHistoryTypes'; + * const result = await CodeHistory.execute({ ... }); + */ +export const CodeHistory = { + execute(params: CommandInput): Promise { + return Commands.execute('code/history', params as Partial); + }, + commandName: 'code/history' as const, +} as const; diff --git a/src/debug/jtag/commands/code/history/test/integration/CodeHistoryIntegration.test.ts b/src/debug/jtag/commands/code/history/test/integration/CodeHistoryIntegration.test.ts new file mode 100644 index 000000000..39131e7c7 --- /dev/null +++ b/src/debug/jtag/commands/code/history/test/integration/CodeHistoryIntegration.test.ts @@ -0,0 +1,196 @@ +#!/usr/bin/env tsx +/** + * CodeHistory Command Integration Tests + * + * Tests Code History command against the LIVE RUNNING SYSTEM. + * This is NOT a mock test - it tests real commands, real events, real widgets. + * + * Generated by: ./jtag generate + * Run with: npx tsx commands/Code History/test/integration/CodeHistoryIntegration.test.ts + * + * PREREQUISITES: + * - Server must be running: npm start (wait 90+ seconds) + * - Browser client connected via http://localhost:9003 + */ + +import { jtag } from '@server/server-index'; + +console.log('πŸ§ͺ CodeHistory Command Integration Tests'); + +function assert(condition: boolean, message: string): void { + if (!condition) { + throw new Error(`❌ Assertion failed: ${message}`); + } + console.log(`βœ… ${message}`); +} + +/** + * Test 1: Connect to live system + */ +async function testSystemConnection(): Promise>> { + console.log('\nπŸ”Œ Test 1: Connecting to live JTAG system'); + + const client = await jtag.connect(); + + assert(client !== null, 'Connected to live system'); + console.log(' βœ… Connected successfully'); + + return client; +} + +/** + * Test 2: Execute Code History command on live system + */ +async function testCommandExecution(client: Awaited>): Promise { + console.log('\n⚑ Test 2: Executing Code History command'); + + // TODO: Replace with your actual command parameters + const result = await client.commands['Code History']({ + // Add your required parameters here + // Example: name: 'test-value' + }); + + console.log(' πŸ“Š Result:', JSON.stringify(result, null, 2)); + + assert(result !== null, 'Code History returned result'); + // TODO: Add assertions for your specific result fields + // assert(result.success === true, 'Code History succeeded'); + // assert(result.yourField !== undefined, 'Result has yourField'); +} + +/** + * Test 3: Validate required parameters + */ +async function testRequiredParameters(_client: Awaited>): Promise { + console.log('\n🚨 Test 3: Testing required parameter validation'); + + // TODO: Uncomment and test missing required parameters + // try { + // await _client.commands['Code History']({ + // // Missing required param + // }); + // assert(false, 'Should have thrown validation error'); + // } catch (error) { + // assert((error as Error).message.includes('required'), 'Error mentions required parameter'); + // console.log(' βœ… ValidationError thrown correctly'); + // } + + console.log(' ⚠️ TODO: Add required parameter validation test'); +} + +/** + * Test 4: Test optional parameters + */ +async function testOptionalParameters(_client: Awaited>): Promise { + console.log('\nπŸ”§ Test 4: Testing optional parameters'); + + // TODO: Uncomment to test with and without optional parameters + // const withOptional = await client.commands['Code History']({ + // requiredParam: 'test', + // optionalParam: true + // }); + // + // const withoutOptional = await client.commands['Code History']({ + // requiredParam: 'test' + // }); + // + // assert(withOptional.success === true, 'Works with optional params'); + // assert(withoutOptional.success === true, 'Works without optional params'); + + console.log(' ⚠️ TODO: Add optional parameter tests'); +} + +/** + * Test 5: Performance test + */ +async function testPerformance(_client: Awaited>): Promise { + console.log('\n⚑ Test 5: Performance under load'); + + // TODO: Uncomment to test command performance + // const iterations = 10; + // const times: number[] = []; + // + // for (let i = 0; i < iterations; i++) { + // const start = Date.now(); + // await _client.commands['Code History']({ /* params */ }); + // times.push(Date.now() - start); + // } + // + // const avg = times.reduce((a, b) => a + b, 0) / iterations; + // const max = Math.max(...times); + // + // console.log(` Average: ${avg.toFixed(2)}ms`); + // console.log(` Max: ${max}ms`); + // + // assert(avg < 500, `Average ${avg.toFixed(2)}ms under 500ms`); + // assert(max < 1000, `Max ${max}ms under 1000ms`); + + console.log(' ⚠️ TODO: Add performance test'); +} + +/** + * Test 6: Widget/Event integration (if applicable) + */ +async function testWidgetIntegration(_client: Awaited>): Promise { + console.log('\n🎨 Test 6: Widget/Event integration'); + + // TODO: Uncomment if your command emits events or updates widgets + // Example: + // const before = await client.commands['debug/widget-state']({ widgetSelector: 'your-widget' }); + // await client.commands['Code History']({ /* params */ }); + // await new Promise(resolve => setTimeout(resolve, 1000)); // Wait for event propagation + // const after = await client.commands['debug/widget-state']({ widgetSelector: 'your-widget' }); + // + // assert(after.state.someValue !== before.state.someValue, 'Widget state updated'); + + console.log(' ⚠️ TODO: Add widget/event integration test (if applicable)'); +} + +/** + * Run all integration tests + */ +async function runAllCodeHistoryIntegrationTests(): Promise { + console.log('πŸš€ Starting CodeHistory Integration Tests\n'); + console.log('πŸ“‹ Testing against LIVE system (not mocks)\n'); + + try { + const client = await testSystemConnection(); + await testCommandExecution(client); + await testRequiredParameters(client); + await testOptionalParameters(client); + await testPerformance(client); + await testWidgetIntegration(client); + + console.log('\nπŸŽ‰ ALL CodeHistory INTEGRATION TESTS PASSED!'); + console.log('πŸ“‹ Validated:'); + console.log(' βœ… Live system connection'); + console.log(' βœ… Command execution on real system'); + console.log(' βœ… Parameter validation'); + console.log(' βœ… Optional parameter handling'); + console.log(' βœ… Performance benchmarks'); + console.log(' βœ… Widget/Event integration'); + console.log('\nπŸ’‘ NOTE: This test uses the REAL running system'); + console.log(' - Real database operations'); + console.log(' - Real event propagation'); + console.log(' - Real widget updates'); + console.log(' - Real cross-daemon communication'); + + } catch (error) { + console.error('\n❌ CodeHistory integration tests failed:', (error as Error).message); + if ((error as Error).stack) { + console.error((error as Error).stack); + } + console.error('\nπŸ’‘ Make sure:'); + console.error(' 1. Server is running: npm start'); + console.error(' 2. Wait 90+ seconds for deployment'); + console.error(' 3. Browser is connected to http://localhost:9003'); + process.exit(1); + } +} + +// Run if called directly +if (require.main === module) { + void runAllCodeHistoryIntegrationTests(); +} else { + module.exports = { runAllCodeHistoryIntegrationTests }; +} diff --git a/src/debug/jtag/commands/code/history/test/unit/CodeHistoryCommand.test.ts b/src/debug/jtag/commands/code/history/test/unit/CodeHistoryCommand.test.ts new file mode 100644 index 000000000..9e6c2fdc2 --- /dev/null +++ b/src/debug/jtag/commands/code/history/test/unit/CodeHistoryCommand.test.ts @@ -0,0 +1,259 @@ +#!/usr/bin/env tsx +/** + * CodeHistory Command Unit Tests + * + * Tests Code History command logic in isolation using mock dependencies. + * This is a REFERENCE EXAMPLE showing best practices for command testing. + * + * Generated by: ./jtag generate + * Run with: npx tsx commands/Code History/test/unit/CodeHistoryCommand.test.ts + * + * NOTE: This is a self-contained test (no external test utilities needed). + * Use this as a template for your own command tests. + */ + +// import { ValidationError } from '@system/core/types/ErrorTypes'; // Uncomment when adding validation tests +import { generateUUID } from '@system/core/types/CrossPlatformUUID'; +import type { CodeHistoryParams, CodeHistoryResult } from '../../shared/CodeHistoryTypes'; + +console.log('πŸ§ͺ CodeHistory Command Unit Tests'); + +function assert(condition: boolean, message: string): void { + if (!condition) { + throw new Error(`❌ Assertion failed: ${message}`); + } + console.log(`βœ… ${message}`); +} + +/** + * Mock command that implements Code History logic for testing + */ +async function mockCodeHistoryCommand(params: CodeHistoryParams): Promise { + // TODO: Validate required parameters (BEST PRACTICE) + // Example: + // if (!params.requiredParam || params.requiredParam.trim() === '') { + // throw new ValidationError( + // 'requiredParam', + // `Missing required parameter 'requiredParam'. ` + + // `Use the help tool with 'Code History' or see the Code History README for usage information.` + // ); + // } + + // TODO: Handle optional parameters with sensible defaults + // const optionalParam = params.optionalParam ?? defaultValue; + + // TODO: Implement your command logic here + return { + success: true, + // TODO: Add your result fields with actual computed values + context: params.context, + sessionId: params.sessionId + } as CodeHistoryResult; +} + +/** + * Test 1: Command structure validation + */ +function testCodeHistoryCommandStructure(): void { + console.log('\nπŸ“‹ Test 1: CodeHistory command structure validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Create valid params for Code History command + const validParams: CodeHistoryParams = { + // TODO: Add your required parameters here + context, + sessionId + }; + + // Validate param structure + assert(validParams.context !== undefined, 'Params have context'); + assert(validParams.sessionId !== undefined, 'Params have sessionId'); + // TODO: Add assertions for your specific parameters + // assert(typeof validParams.requiredParam === 'string', 'requiredParam is string'); +} + +/** + * Test 2: Mock command execution + */ +async function testMockCodeHistoryExecution(): Promise { + console.log('\n⚑ Test 2: Mock Code History command execution'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Test mock execution + const params: CodeHistoryParams = { + // TODO: Add your parameters here + context, + sessionId + }; + + const result = await mockCodeHistoryCommand(params); + + // Validate result structure + assert(result.success === true, 'Mock result shows success'); + // TODO: Add assertions for your result fields + // assert(typeof result.yourField === 'string', 'yourField is string'); +} + +/** + * Test 3: Required parameter validation (CRITICAL) + * + * This test ensures your command throws ValidationError + * when required parameters are missing (BEST PRACTICE) + */ +async function testCodeHistoryRequiredParams(): Promise { + console.log('\n🚨 Test 3: Required parameter validation'); + + // TODO: Uncomment when implementing validation + // const context = { environment: 'server' as const }; + // const sessionId = generateUUID(); + + // TODO: Test cases that should throw ValidationError + // Example: + // const testCases = [ + // { params: {} as CodeHistoryParams, desc: 'Missing requiredParam' }, + // { params: { requiredParam: '' } as CodeHistoryParams, desc: 'Empty requiredParam' }, + // ]; + // + // for (const testCase of testCases) { + // try { + // await mockCodeHistoryCommand({ ...testCase.params, context, sessionId }); + // throw new Error(`Should have thrown ValidationError for: ${testCase.desc}`); + // } catch (error) { + // if (error instanceof ValidationError) { + // assert(error.field === 'requiredParam', `ValidationError field is 'requiredParam' for: ${testCase.desc}`); + // assert(error.message.includes('required parameter'), `Error message mentions 'required parameter' for: ${testCase.desc}`); + // assert(error.message.includes('help tool'), `Error message is tool-agnostic for: ${testCase.desc}`); + // } else { + // throw error; // Re-throw if not ValidationError + // } + // } + // } + + console.log('βœ… All required parameter validations work correctly'); +} + +/** + * Test 4: Optional parameter handling + */ +async function testCodeHistoryOptionalParams(): Promise { + console.log('\nπŸ”§ Test 4: Optional parameter handling'); + + // TODO: Uncomment when implementing optional param tests + // const context = { environment: 'server' as const }; + // const sessionId = generateUUID(); + + // TODO: Test WITHOUT optional param (should use default) + // const paramsWithoutOptional: CodeHistoryParams = { + // requiredParam: 'test', + // context, + // sessionId + // }; + // + // const resultWithoutOptional = await mockCodeHistoryCommand(paramsWithoutOptional); + // assert(resultWithoutOptional.success === true, 'Command succeeds without optional params'); + + // TODO: Test WITH optional param + // const paramsWithOptional: CodeHistoryParams = { + // requiredParam: 'test', + // optionalParam: true, + // context, + // sessionId + // }; + // + // const resultWithOptional = await mockCodeHistoryCommand(paramsWithOptional); + // assert(resultWithOptional.success === true, 'Command succeeds with optional params'); + + console.log('βœ… Optional parameter handling validated'); +} + +/** + * Test 5: Performance validation + */ +async function testCodeHistoryPerformance(): Promise { + console.log('\n⚑ Test 5: CodeHistory performance validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + const startTime = Date.now(); + + await mockCodeHistoryCommand({ + // TODO: Add your parameters + context, + sessionId + } as CodeHistoryParams); + + const executionTime = Date.now() - startTime; + + assert(executionTime < 100, `CodeHistory completed in ${executionTime}ms (under 100ms limit)`); +} + +/** + * Test 6: Result structure validation + */ +async function testCodeHistoryResultStructure(): Promise { + console.log('\nπŸ” Test 6: CodeHistory result structure validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Test various scenarios + const basicResult = await mockCodeHistoryCommand({ + // TODO: Add your parameters + context, + sessionId + } as CodeHistoryParams); + + assert(basicResult.success === true, 'Result has success field'); + // TODO: Add assertions for your result fields + // assert(typeof basicResult.yourField === 'string', 'Result has yourField (string)'); + assert(basicResult.context === context, 'Result includes context'); + assert(basicResult.sessionId === sessionId, 'Result includes sessionId'); + + console.log('βœ… All result structure validations pass'); +} + +/** + * Run all unit tests + */ +async function runAllCodeHistoryUnitTests(): Promise { + console.log('πŸš€ Starting CodeHistory Command Unit Tests\n'); + + try { + testCodeHistoryCommandStructure(); + await testMockCodeHistoryExecution(); + await testCodeHistoryRequiredParams(); + await testCodeHistoryOptionalParams(); + await testCodeHistoryPerformance(); + await testCodeHistoryResultStructure(); + + console.log('\nπŸŽ‰ ALL CodeHistory UNIT TESTS PASSED!'); + console.log('πŸ“‹ Validated:'); + console.log(' βœ… Command structure and parameter validation'); + console.log(' βœ… Mock command execution patterns'); + console.log(' βœ… Required parameter validation (throws ValidationError)'); + console.log(' βœ… Optional parameter handling (sensible defaults)'); + console.log(' βœ… Performance requirements (< 100ms)'); + console.log(' βœ… Result structure validation'); + console.log('\nπŸ“ This is a REFERENCE EXAMPLE - use as a template for your commands!'); + console.log('πŸ’‘ TIP: Copy this test structure and modify for your command logic'); + + } catch (error) { + console.error('\n❌ CodeHistory unit tests failed:', (error as Error).message); + if ((error as Error).stack) { + console.error((error as Error).stack); + } + process.exit(1); + } +} + +// Run if called directly +if (require.main === module) { + void runAllCodeHistoryUnitTests(); +} else { + module.exports = { runAllCodeHistoryUnitTests }; +} diff --git a/src/debug/jtag/commands/code/read/.npmignore b/src/debug/jtag/commands/code/read/.npmignore new file mode 100644 index 000000000..f74ad6b8a --- /dev/null +++ b/src/debug/jtag/commands/code/read/.npmignore @@ -0,0 +1,20 @@ +# Development files +.eslintrc* +tsconfig*.json +vitest.config.ts + +# Build artifacts +*.js.map +*.d.ts.map + +# IDE +.vscode/ +.idea/ + +# Logs +*.log +npm-debug.log* + +# OS files +.DS_Store +Thumbs.db diff --git a/src/debug/jtag/commands/code/read/README.md b/src/debug/jtag/commands/code/read/README.md new file mode 100644 index 000000000..aa9eba939 --- /dev/null +++ b/src/debug/jtag/commands/code/read/README.md @@ -0,0 +1,164 @@ +# Code Read Command + +Read a file or line range from the persona's workspace. Returns content with line numbers and metadata. Supports partial reads via start/end line parameters. + +## Table of Contents + +- [Usage](#usage) + - [CLI Usage](#cli-usage) + - [Tool Usage](#tool-usage) +- [Parameters](#parameters) +- [Result](#result) +- [Examples](#examples) +- [Testing](#testing) + - [Unit Tests](#unit-tests) + - [Integration Tests](#integration-tests) +- [Getting Help](#getting-help) +- [Access Level](#access-level) +- [Implementation Notes](#implementation-notes) + +## Usage + +### CLI Usage + +From the command line using the jtag CLI: + +```bash +./jtag code/read --filePath= +``` + +### Tool Usage + +From Persona tools or programmatic access using `Commands.execute()`: + +```typescript +import { Commands } from '@system/core/shared/Commands'; + +const result = await Commands.execute('code/read', { + // your parameters here +}); +``` + +## Parameters + +- **filePath** (required): `string` - Relative path to file within workspace +- **startLine** (optional): `number` - First line to read (1-indexed, inclusive) +- **endLine** (optional): `number` - Last line to read (1-indexed, inclusive) + +## Result + +Returns `CodeReadResult` with: + +Returns CommandResult with: +- **content**: `string` - File content (or line range) +- **filePath**: `string` - Resolved file path +- **totalLines**: `number` - Total lines in file +- **linesReturned**: `number` - Number of lines returned +- **startLine**: `number` - Start line of returned content +- **endLine**: `number` - End line of returned content +- **sizeBytes**: `number` - File size in bytes + +## Examples + +### Read entire file + +```bash +./jtag code/read --filePath="src/main.ts" +``` + +### Read line range + +```bash +./jtag code/read --filePath="src/main.ts" --startLine=10 --endLine=25 +``` + +## Getting Help + +### Using the Help Tool + +Get detailed usage information for this command: + +**CLI:** +```bash +./jtag help code/read +``` + +**Tool:** +```typescript +// Use your help tool with command name 'code/read' +``` + +### Using the README Tool + +Access this README programmatically: + +**CLI:** +```bash +./jtag readme code/read +``` + +**Tool:** +```typescript +// Use your readme tool with command name 'code/read' +``` + +## Testing + +### Unit Tests + +Test command logic in isolation using mock dependencies: + +```bash +# Run unit tests (no server required) +npx tsx commands/Code Read/test/unit/CodeReadCommand.test.ts +``` + +**What's tested:** +- Command structure and parameter validation +- Mock command execution patterns +- Required parameter validation (throws ValidationError) +- Optional parameter handling (sensible defaults) +- Performance requirements +- Assertion utility helpers + +**TDD Workflow:** +1. Write/modify unit test first (test-driven development) +2. Run test, see it fail +3. Implement feature +4. Run test, see it pass +5. Refactor if needed + +### Integration Tests + +Test command with real client connections and system integration: + +```bash +# Prerequisites: Server must be running +npm start # Wait 90+ seconds for deployment + +# Run integration tests +npx tsx commands/Code Read/test/integration/CodeReadIntegration.test.ts +``` + +**What's tested:** +- Client connection to live system +- Real command execution via WebSocket +- ValidationError handling for missing params +- Optional parameter defaults +- Performance under load +- Various parameter combinations + +**Best Practice:** +Run unit tests frequently during development (fast feedback). Run integration tests before committing (verify system integration). + +## Access Level + +**ai-safe** - Safe for AI personas to call autonomously + +## Implementation Notes + +- **Shared Logic**: Core business logic in `shared/CodeReadTypes.ts` +- **Browser**: Browser-specific implementation in `browser/CodeReadBrowserCommand.ts` +- **Server**: Server-specific implementation in `server/CodeReadServerCommand.ts` +- **Unit Tests**: Isolated testing in `test/unit/CodeReadCommand.test.ts` +- **Integration Tests**: System testing in `test/integration/CodeReadIntegration.test.ts` diff --git a/src/debug/jtag/commands/code/read/browser/CodeReadBrowserCommand.ts b/src/debug/jtag/commands/code/read/browser/CodeReadBrowserCommand.ts new file mode 100644 index 000000000..7891cfed4 --- /dev/null +++ b/src/debug/jtag/commands/code/read/browser/CodeReadBrowserCommand.ts @@ -0,0 +1,21 @@ +/** + * Code Read Command - Browser Implementation + * + * Read a file or line range from the persona's workspace. Returns content with line numbers and metadata. Supports partial reads via start/end line parameters. + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import type { CodeReadParams, CodeReadResult } from '../shared/CodeReadTypes'; + +export class CodeReadBrowserCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('code/read', context, subpath, commander); + } + + async execute(params: CodeReadParams): Promise { + console.log('🌐 BROWSER: Delegating Code Read to server'); + return await this.remoteExecute(params); + } +} diff --git a/src/debug/jtag/commands/code/read/package.json b/src/debug/jtag/commands/code/read/package.json new file mode 100644 index 000000000..3b21b4bf5 --- /dev/null +++ b/src/debug/jtag/commands/code/read/package.json @@ -0,0 +1,35 @@ +{ + "name": "@jtag-commands/code/read", + "version": "1.0.0", + "description": "Read a file or line range from the persona's workspace. Returns content with line numbers and metadata. Supports partial reads via start/end line parameters.", + "main": "server/CodeReadServerCommand.ts", + "types": "shared/CodeReadTypes.ts", + "scripts": { + "test": "npm run test:unit && npm run test:integration", + "test:unit": "npx vitest run test/unit/*.test.ts", + "test:integration": "npx tsx test/integration/CodeReadIntegration.test.ts", + "lint": "npx eslint **/*.ts", + "typecheck": "npx tsc --noEmit" + }, + "peerDependencies": { + "@jtag/core": "*" + }, + "files": [ + "shared/**/*.ts", + "browser/**/*.ts", + "server/**/*.ts", + "test/**/*.ts", + "README.md" + ], + "keywords": [ + "jtag", + "command", + "code/read" + ], + "license": "MIT", + "author": "", + "repository": { + "type": "git", + "url": "" + } +} diff --git a/src/debug/jtag/commands/code/read/server/CodeReadServerCommand.ts b/src/debug/jtag/commands/code/read/server/CodeReadServerCommand.ts new file mode 100644 index 000000000..ed400f981 --- /dev/null +++ b/src/debug/jtag/commands/code/read/server/CodeReadServerCommand.ts @@ -0,0 +1,51 @@ +/** + * Code Read Command - Server Implementation + * + * Reads a file or line range from the persona's workspace via Rust IPC. + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import { ValidationError } from '@system/core/types/ErrorTypes'; +import type { CodeReadParams, CodeReadResult } from '../shared/CodeReadTypes'; +import { createCodeReadResultFromParams } from '../shared/CodeReadTypes'; +import { CodeDaemon } from '@daemons/code-daemon/shared/CodeDaemon'; + +export class CodeReadServerCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('code/read', context, subpath, commander); + } + + async execute(params: CodeReadParams): Promise { + if (!params.filePath || params.filePath.trim() === '') { + throw new ValidationError( + 'filePath', + `Missing required parameter 'filePath'. See the code/read README for usage.` + ); + } + + if (!params.userId) { + throw new ValidationError('userId', 'Workspace operations require a userId (auto-injected for persona tool calls).'); + } + const personaId = params.userId; + + const result = await CodeDaemon.workspaceRead( + personaId, + params.filePath, + params.startLine, + params.endLine + ); + + return createCodeReadResultFromParams(params, { + success: result.success, + content: result.content || '', + filePath: result.file_path, + totalLines: result.total_lines, + linesReturned: result.lines_returned, + startLine: result.start_line, + endLine: result.end_line, + sizeBytes: result.size_bytes, + }); + } +} diff --git a/src/debug/jtag/commands/code/read/shared/CodeReadTypes.ts b/src/debug/jtag/commands/code/read/shared/CodeReadTypes.ts new file mode 100644 index 000000000..b832ab970 --- /dev/null +++ b/src/debug/jtag/commands/code/read/shared/CodeReadTypes.ts @@ -0,0 +1,124 @@ +/** + * Code Read Command - Shared Types + * + * Read a file or line range from the persona's workspace. Returns content with line numbers and metadata. Supports partial reads via start/end line parameters. + */ + +import type { CommandParams, CommandResult, CommandInput, JTAGContext } from '@system/core/types/JTAGTypes'; +import { createPayload, transformPayload } from '@system/core/types/JTAGTypes'; +import { Commands } from '@system/core/shared/Commands'; +import type { JTAGError } from '@system/core/types/ErrorTypes'; +import type { UUID } from '@system/core/types/CrossPlatformUUID'; + +/** + * Code Read Command Parameters + */ +export interface CodeReadParams extends CommandParams { + // Relative path to file within workspace + filePath: string; + // First line to read (1-indexed, inclusive) + startLine?: number; + // Last line to read (1-indexed, inclusive) + endLine?: number; +} + +/** + * Factory function for creating CodeReadParams + */ +export const createCodeReadParams = ( + context: JTAGContext, + sessionId: UUID, + data: { + // Relative path to file within workspace + filePath: string; + // First line to read (1-indexed, inclusive) + startLine?: number; + // Last line to read (1-indexed, inclusive) + endLine?: number; + } +): CodeReadParams => createPayload(context, sessionId, { + startLine: data.startLine ?? 0, + endLine: data.endLine ?? 0, + ...data +}); + +/** + * Code Read Command Result + */ +export interface CodeReadResult extends CommandResult { + success: boolean; + // File content (or line range) + content: string; + // Resolved file path + filePath: string; + // Total lines in file + totalLines: number; + // Number of lines returned + linesReturned: number; + // Start line of returned content + startLine: number; + // End line of returned content + endLine: number; + // File size in bytes + sizeBytes: number; + error?: JTAGError; +} + +/** + * Factory function for creating CodeReadResult with defaults + */ +export const createCodeReadResult = ( + context: JTAGContext, + sessionId: UUID, + data: { + success: boolean; + // File content (or line range) + content?: string; + // Resolved file path + filePath?: string; + // Total lines in file + totalLines?: number; + // Number of lines returned + linesReturned?: number; + // Start line of returned content + startLine?: number; + // End line of returned content + endLine?: number; + // File size in bytes + sizeBytes?: number; + error?: JTAGError; + } +): CodeReadResult => createPayload(context, sessionId, { + content: data.content ?? '', + filePath: data.filePath ?? '', + totalLines: data.totalLines ?? 0, + linesReturned: data.linesReturned ?? 0, + startLine: data.startLine ?? 0, + endLine: data.endLine ?? 0, + sizeBytes: data.sizeBytes ?? 0, + ...data +}); + +/** + * Smart Code Read-specific inheritance from params + * Auto-inherits context and sessionId from params + * Must provide all required result fields + */ +export const createCodeReadResultFromParams = ( + params: CodeReadParams, + differences: Omit +): CodeReadResult => transformPayload(params, differences); + +/** + * Code Read β€” Type-safe command executor + * + * Usage: + * import { CodeRead } from '...shared/CodeReadTypes'; + * const result = await CodeRead.execute({ ... }); + */ +export const CodeRead = { + execute(params: CommandInput): Promise { + return Commands.execute('code/read', params as Partial); + }, + commandName: 'code/read' as const, +} as const; diff --git a/src/debug/jtag/commands/code/read/test/integration/CodeReadIntegration.test.ts b/src/debug/jtag/commands/code/read/test/integration/CodeReadIntegration.test.ts new file mode 100644 index 000000000..a11b3807f --- /dev/null +++ b/src/debug/jtag/commands/code/read/test/integration/CodeReadIntegration.test.ts @@ -0,0 +1,196 @@ +#!/usr/bin/env tsx +/** + * CodeRead Command Integration Tests + * + * Tests Code Read command against the LIVE RUNNING SYSTEM. + * This is NOT a mock test - it tests real commands, real events, real widgets. + * + * Generated by: ./jtag generate + * Run with: npx tsx commands/Code Read/test/integration/CodeReadIntegration.test.ts + * + * PREREQUISITES: + * - Server must be running: npm start (wait 90+ seconds) + * - Browser client connected via http://localhost:9003 + */ + +import { jtag } from '@server/server-index'; + +console.log('πŸ§ͺ CodeRead Command Integration Tests'); + +function assert(condition: boolean, message: string): void { + if (!condition) { + throw new Error(`❌ Assertion failed: ${message}`); + } + console.log(`βœ… ${message}`); +} + +/** + * Test 1: Connect to live system + */ +async function testSystemConnection(): Promise>> { + console.log('\nπŸ”Œ Test 1: Connecting to live JTAG system'); + + const client = await jtag.connect(); + + assert(client !== null, 'Connected to live system'); + console.log(' βœ… Connected successfully'); + + return client; +} + +/** + * Test 2: Execute Code Read command on live system + */ +async function testCommandExecution(client: Awaited>): Promise { + console.log('\n⚑ Test 2: Executing Code Read command'); + + // TODO: Replace with your actual command parameters + const result = await client.commands['Code Read']({ + // Add your required parameters here + // Example: name: 'test-value' + }); + + console.log(' πŸ“Š Result:', JSON.stringify(result, null, 2)); + + assert(result !== null, 'Code Read returned result'); + // TODO: Add assertions for your specific result fields + // assert(result.success === true, 'Code Read succeeded'); + // assert(result.yourField !== undefined, 'Result has yourField'); +} + +/** + * Test 3: Validate required parameters + */ +async function testRequiredParameters(_client: Awaited>): Promise { + console.log('\n🚨 Test 3: Testing required parameter validation'); + + // TODO: Uncomment and test missing required parameters + // try { + // await _client.commands['Code Read']({ + // // Missing required param + // }); + // assert(false, 'Should have thrown validation error'); + // } catch (error) { + // assert((error as Error).message.includes('required'), 'Error mentions required parameter'); + // console.log(' βœ… ValidationError thrown correctly'); + // } + + console.log(' ⚠️ TODO: Add required parameter validation test'); +} + +/** + * Test 4: Test optional parameters + */ +async function testOptionalParameters(_client: Awaited>): Promise { + console.log('\nπŸ”§ Test 4: Testing optional parameters'); + + // TODO: Uncomment to test with and without optional parameters + // const withOptional = await client.commands['Code Read']({ + // requiredParam: 'test', + // optionalParam: true + // }); + // + // const withoutOptional = await client.commands['Code Read']({ + // requiredParam: 'test' + // }); + // + // assert(withOptional.success === true, 'Works with optional params'); + // assert(withoutOptional.success === true, 'Works without optional params'); + + console.log(' ⚠️ TODO: Add optional parameter tests'); +} + +/** + * Test 5: Performance test + */ +async function testPerformance(_client: Awaited>): Promise { + console.log('\n⚑ Test 5: Performance under load'); + + // TODO: Uncomment to test command performance + // const iterations = 10; + // const times: number[] = []; + // + // for (let i = 0; i < iterations; i++) { + // const start = Date.now(); + // await _client.commands['Code Read']({ /* params */ }); + // times.push(Date.now() - start); + // } + // + // const avg = times.reduce((a, b) => a + b, 0) / iterations; + // const max = Math.max(...times); + // + // console.log(` Average: ${avg.toFixed(2)}ms`); + // console.log(` Max: ${max}ms`); + // + // assert(avg < 500, `Average ${avg.toFixed(2)}ms under 500ms`); + // assert(max < 1000, `Max ${max}ms under 1000ms`); + + console.log(' ⚠️ TODO: Add performance test'); +} + +/** + * Test 6: Widget/Event integration (if applicable) + */ +async function testWidgetIntegration(_client: Awaited>): Promise { + console.log('\n🎨 Test 6: Widget/Event integration'); + + // TODO: Uncomment if your command emits events or updates widgets + // Example: + // const before = await client.commands['debug/widget-state']({ widgetSelector: 'your-widget' }); + // await client.commands['Code Read']({ /* params */ }); + // await new Promise(resolve => setTimeout(resolve, 1000)); // Wait for event propagation + // const after = await client.commands['debug/widget-state']({ widgetSelector: 'your-widget' }); + // + // assert(after.state.someValue !== before.state.someValue, 'Widget state updated'); + + console.log(' ⚠️ TODO: Add widget/event integration test (if applicable)'); +} + +/** + * Run all integration tests + */ +async function runAllCodeReadIntegrationTests(): Promise { + console.log('πŸš€ Starting CodeRead Integration Tests\n'); + console.log('πŸ“‹ Testing against LIVE system (not mocks)\n'); + + try { + const client = await testSystemConnection(); + await testCommandExecution(client); + await testRequiredParameters(client); + await testOptionalParameters(client); + await testPerformance(client); + await testWidgetIntegration(client); + + console.log('\nπŸŽ‰ ALL CodeRead INTEGRATION TESTS PASSED!'); + console.log('πŸ“‹ Validated:'); + console.log(' βœ… Live system connection'); + console.log(' βœ… Command execution on real system'); + console.log(' βœ… Parameter validation'); + console.log(' βœ… Optional parameter handling'); + console.log(' βœ… Performance benchmarks'); + console.log(' βœ… Widget/Event integration'); + console.log('\nπŸ’‘ NOTE: This test uses the REAL running system'); + console.log(' - Real database operations'); + console.log(' - Real event propagation'); + console.log(' - Real widget updates'); + console.log(' - Real cross-daemon communication'); + + } catch (error) { + console.error('\n❌ CodeRead integration tests failed:', (error as Error).message); + if ((error as Error).stack) { + console.error((error as Error).stack); + } + console.error('\nπŸ’‘ Make sure:'); + console.error(' 1. Server is running: npm start'); + console.error(' 2. Wait 90+ seconds for deployment'); + console.error(' 3. Browser is connected to http://localhost:9003'); + process.exit(1); + } +} + +// Run if called directly +if (require.main === module) { + void runAllCodeReadIntegrationTests(); +} else { + module.exports = { runAllCodeReadIntegrationTests }; +} diff --git a/src/debug/jtag/commands/code/read/test/unit/CodeReadCommand.test.ts b/src/debug/jtag/commands/code/read/test/unit/CodeReadCommand.test.ts new file mode 100644 index 000000000..45c9d1f9e --- /dev/null +++ b/src/debug/jtag/commands/code/read/test/unit/CodeReadCommand.test.ts @@ -0,0 +1,259 @@ +#!/usr/bin/env tsx +/** + * CodeRead Command Unit Tests + * + * Tests Code Read command logic in isolation using mock dependencies. + * This is a REFERENCE EXAMPLE showing best practices for command testing. + * + * Generated by: ./jtag generate + * Run with: npx tsx commands/Code Read/test/unit/CodeReadCommand.test.ts + * + * NOTE: This is a self-contained test (no external test utilities needed). + * Use this as a template for your own command tests. + */ + +// import { ValidationError } from '@system/core/types/ErrorTypes'; // Uncomment when adding validation tests +import { generateUUID } from '@system/core/types/CrossPlatformUUID'; +import type { CodeReadParams, CodeReadResult } from '../../shared/CodeReadTypes'; + +console.log('πŸ§ͺ CodeRead Command Unit Tests'); + +function assert(condition: boolean, message: string): void { + if (!condition) { + throw new Error(`❌ Assertion failed: ${message}`); + } + console.log(`βœ… ${message}`); +} + +/** + * Mock command that implements Code Read logic for testing + */ +async function mockCodeReadCommand(params: CodeReadParams): Promise { + // TODO: Validate required parameters (BEST PRACTICE) + // Example: + // if (!params.requiredParam || params.requiredParam.trim() === '') { + // throw new ValidationError( + // 'requiredParam', + // `Missing required parameter 'requiredParam'. ` + + // `Use the help tool with 'Code Read' or see the Code Read README for usage information.` + // ); + // } + + // TODO: Handle optional parameters with sensible defaults + // const optionalParam = params.optionalParam ?? defaultValue; + + // TODO: Implement your command logic here + return { + success: true, + // TODO: Add your result fields with actual computed values + context: params.context, + sessionId: params.sessionId + } as CodeReadResult; +} + +/** + * Test 1: Command structure validation + */ +function testCodeReadCommandStructure(): void { + console.log('\nπŸ“‹ Test 1: CodeRead command structure validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Create valid params for Code Read command + const validParams: CodeReadParams = { + // TODO: Add your required parameters here + context, + sessionId + }; + + // Validate param structure + assert(validParams.context !== undefined, 'Params have context'); + assert(validParams.sessionId !== undefined, 'Params have sessionId'); + // TODO: Add assertions for your specific parameters + // assert(typeof validParams.requiredParam === 'string', 'requiredParam is string'); +} + +/** + * Test 2: Mock command execution + */ +async function testMockCodeReadExecution(): Promise { + console.log('\n⚑ Test 2: Mock Code Read command execution'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Test mock execution + const params: CodeReadParams = { + // TODO: Add your parameters here + context, + sessionId + }; + + const result = await mockCodeReadCommand(params); + + // Validate result structure + assert(result.success === true, 'Mock result shows success'); + // TODO: Add assertions for your result fields + // assert(typeof result.yourField === 'string', 'yourField is string'); +} + +/** + * Test 3: Required parameter validation (CRITICAL) + * + * This test ensures your command throws ValidationError + * when required parameters are missing (BEST PRACTICE) + */ +async function testCodeReadRequiredParams(): Promise { + console.log('\n🚨 Test 3: Required parameter validation'); + + // TODO: Uncomment when implementing validation + // const context = { environment: 'server' as const }; + // const sessionId = generateUUID(); + + // TODO: Test cases that should throw ValidationError + // Example: + // const testCases = [ + // { params: {} as CodeReadParams, desc: 'Missing requiredParam' }, + // { params: { requiredParam: '' } as CodeReadParams, desc: 'Empty requiredParam' }, + // ]; + // + // for (const testCase of testCases) { + // try { + // await mockCodeReadCommand({ ...testCase.params, context, sessionId }); + // throw new Error(`Should have thrown ValidationError for: ${testCase.desc}`); + // } catch (error) { + // if (error instanceof ValidationError) { + // assert(error.field === 'requiredParam', `ValidationError field is 'requiredParam' for: ${testCase.desc}`); + // assert(error.message.includes('required parameter'), `Error message mentions 'required parameter' for: ${testCase.desc}`); + // assert(error.message.includes('help tool'), `Error message is tool-agnostic for: ${testCase.desc}`); + // } else { + // throw error; // Re-throw if not ValidationError + // } + // } + // } + + console.log('βœ… All required parameter validations work correctly'); +} + +/** + * Test 4: Optional parameter handling + */ +async function testCodeReadOptionalParams(): Promise { + console.log('\nπŸ”§ Test 4: Optional parameter handling'); + + // TODO: Uncomment when implementing optional param tests + // const context = { environment: 'server' as const }; + // const sessionId = generateUUID(); + + // TODO: Test WITHOUT optional param (should use default) + // const paramsWithoutOptional: CodeReadParams = { + // requiredParam: 'test', + // context, + // sessionId + // }; + // + // const resultWithoutOptional = await mockCodeReadCommand(paramsWithoutOptional); + // assert(resultWithoutOptional.success === true, 'Command succeeds without optional params'); + + // TODO: Test WITH optional param + // const paramsWithOptional: CodeReadParams = { + // requiredParam: 'test', + // optionalParam: true, + // context, + // sessionId + // }; + // + // const resultWithOptional = await mockCodeReadCommand(paramsWithOptional); + // assert(resultWithOptional.success === true, 'Command succeeds with optional params'); + + console.log('βœ… Optional parameter handling validated'); +} + +/** + * Test 5: Performance validation + */ +async function testCodeReadPerformance(): Promise { + console.log('\n⚑ Test 5: CodeRead performance validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + const startTime = Date.now(); + + await mockCodeReadCommand({ + // TODO: Add your parameters + context, + sessionId + } as CodeReadParams); + + const executionTime = Date.now() - startTime; + + assert(executionTime < 100, `CodeRead completed in ${executionTime}ms (under 100ms limit)`); +} + +/** + * Test 6: Result structure validation + */ +async function testCodeReadResultStructure(): Promise { + console.log('\nπŸ” Test 6: CodeRead result structure validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Test various scenarios + const basicResult = await mockCodeReadCommand({ + // TODO: Add your parameters + context, + sessionId + } as CodeReadParams); + + assert(basicResult.success === true, 'Result has success field'); + // TODO: Add assertions for your result fields + // assert(typeof basicResult.yourField === 'string', 'Result has yourField (string)'); + assert(basicResult.context === context, 'Result includes context'); + assert(basicResult.sessionId === sessionId, 'Result includes sessionId'); + + console.log('βœ… All result structure validations pass'); +} + +/** + * Run all unit tests + */ +async function runAllCodeReadUnitTests(): Promise { + console.log('πŸš€ Starting CodeRead Command Unit Tests\n'); + + try { + testCodeReadCommandStructure(); + await testMockCodeReadExecution(); + await testCodeReadRequiredParams(); + await testCodeReadOptionalParams(); + await testCodeReadPerformance(); + await testCodeReadResultStructure(); + + console.log('\nπŸŽ‰ ALL CodeRead UNIT TESTS PASSED!'); + console.log('πŸ“‹ Validated:'); + console.log(' βœ… Command structure and parameter validation'); + console.log(' βœ… Mock command execution patterns'); + console.log(' βœ… Required parameter validation (throws ValidationError)'); + console.log(' βœ… Optional parameter handling (sensible defaults)'); + console.log(' βœ… Performance requirements (< 100ms)'); + console.log(' βœ… Result structure validation'); + console.log('\nπŸ“ This is a REFERENCE EXAMPLE - use as a template for your commands!'); + console.log('πŸ’‘ TIP: Copy this test structure and modify for your command logic'); + + } catch (error) { + console.error('\n❌ CodeRead unit tests failed:', (error as Error).message); + if ((error as Error).stack) { + console.error((error as Error).stack); + } + process.exit(1); + } +} + +// Run if called directly +if (require.main === module) { + void runAllCodeReadUnitTests(); +} else { + module.exports = { runAllCodeReadUnitTests }; +} diff --git a/src/debug/jtag/commands/code/search/.npmignore b/src/debug/jtag/commands/code/search/.npmignore new file mode 100644 index 000000000..f74ad6b8a --- /dev/null +++ b/src/debug/jtag/commands/code/search/.npmignore @@ -0,0 +1,20 @@ +# Development files +.eslintrc* +tsconfig*.json +vitest.config.ts + +# Build artifacts +*.js.map +*.d.ts.map + +# IDE +.vscode/ +.idea/ + +# Logs +*.log +npm-debug.log* + +# OS files +.DS_Store +Thumbs.db diff --git a/src/debug/jtag/commands/code/search/README.md b/src/debug/jtag/commands/code/search/README.md new file mode 100644 index 000000000..96ddfff04 --- /dev/null +++ b/src/debug/jtag/commands/code/search/README.md @@ -0,0 +1,160 @@ +# Code Search Command + +Search for a regex pattern across workspace files. Respects .gitignore, supports glob-based file filtering. Returns matching lines with context. + +## Table of Contents + +- [Usage](#usage) + - [CLI Usage](#cli-usage) + - [Tool Usage](#tool-usage) +- [Parameters](#parameters) +- [Result](#result) +- [Examples](#examples) +- [Testing](#testing) + - [Unit Tests](#unit-tests) + - [Integration Tests](#integration-tests) +- [Getting Help](#getting-help) +- [Access Level](#access-level) +- [Implementation Notes](#implementation-notes) + +## Usage + +### CLI Usage + +From the command line using the jtag CLI: + +```bash +./jtag code/search --pattern= +``` + +### Tool Usage + +From Persona tools or programmatic access using `Commands.execute()`: + +```typescript +import { Commands } from '@system/core/shared/Commands'; + +const result = await Commands.execute('code/search', { + // your parameters here +}); +``` + +## Parameters + +- **pattern** (required): `string` - Regex pattern to search for +- **fileGlob** (optional): `string` - Glob pattern to filter files (e.g., '*.ts', 'src/**/*.rs') +- **maxResults** (optional): `number` - Maximum number of matches to return (default: 100) + +## Result + +Returns `CodeSearchResult` with: + +Returns CommandResult with: +- **matches**: `object[]` - Array of SearchMatch objects with filePath, lineNumber, lineContent, matchStart, matchEnd +- **totalMatches**: `number` - Total number of matches found +- **filesSearched**: `number` - Number of files searched + +## Examples + +### Search for function definitions + +```bash +./jtag code/search --pattern="function\s+\w+" --fileGlob="*.ts" +``` + +### Search for TODO comments + +```bash +./jtag code/search --pattern="TODO|FIXME|HACK" --maxResults=50 +``` + +## Getting Help + +### Using the Help Tool + +Get detailed usage information for this command: + +**CLI:** +```bash +./jtag help code/search +``` + +**Tool:** +```typescript +// Use your help tool with command name 'code/search' +``` + +### Using the README Tool + +Access this README programmatically: + +**CLI:** +```bash +./jtag readme code/search +``` + +**Tool:** +```typescript +// Use your readme tool with command name 'code/search' +``` + +## Testing + +### Unit Tests + +Test command logic in isolation using mock dependencies: + +```bash +# Run unit tests (no server required) +npx tsx commands/Code Search/test/unit/CodeSearchCommand.test.ts +``` + +**What's tested:** +- Command structure and parameter validation +- Mock command execution patterns +- Required parameter validation (throws ValidationError) +- Optional parameter handling (sensible defaults) +- Performance requirements +- Assertion utility helpers + +**TDD Workflow:** +1. Write/modify unit test first (test-driven development) +2. Run test, see it fail +3. Implement feature +4. Run test, see it pass +5. Refactor if needed + +### Integration Tests + +Test command with real client connections and system integration: + +```bash +# Prerequisites: Server must be running +npm start # Wait 90+ seconds for deployment + +# Run integration tests +npx tsx commands/Code Search/test/integration/CodeSearchIntegration.test.ts +``` + +**What's tested:** +- Client connection to live system +- Real command execution via WebSocket +- ValidationError handling for missing params +- Optional parameter defaults +- Performance under load +- Various parameter combinations + +**Best Practice:** +Run unit tests frequently during development (fast feedback). Run integration tests before committing (verify system integration). + +## Access Level + +**ai-safe** - Safe for AI personas to call autonomously + +## Implementation Notes + +- **Shared Logic**: Core business logic in `shared/CodeSearchTypes.ts` +- **Browser**: Browser-specific implementation in `browser/CodeSearchBrowserCommand.ts` +- **Server**: Server-specific implementation in `server/CodeSearchServerCommand.ts` +- **Unit Tests**: Isolated testing in `test/unit/CodeSearchCommand.test.ts` +- **Integration Tests**: System testing in `test/integration/CodeSearchIntegration.test.ts` diff --git a/src/debug/jtag/commands/code/search/browser/CodeSearchBrowserCommand.ts b/src/debug/jtag/commands/code/search/browser/CodeSearchBrowserCommand.ts new file mode 100644 index 000000000..edf04edd3 --- /dev/null +++ b/src/debug/jtag/commands/code/search/browser/CodeSearchBrowserCommand.ts @@ -0,0 +1,21 @@ +/** + * Code Search Command - Browser Implementation + * + * Search for a regex pattern across workspace files. Respects .gitignore, supports glob-based file filtering. Returns matching lines with context. + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import type { CodeSearchParams, CodeSearchResult } from '../shared/CodeSearchTypes'; + +export class CodeSearchBrowserCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('code/search', context, subpath, commander); + } + + async execute(params: CodeSearchParams): Promise { + console.log('🌐 BROWSER: Delegating Code Search to server'); + return await this.remoteExecute(params); + } +} diff --git a/src/debug/jtag/commands/code/search/package.json b/src/debug/jtag/commands/code/search/package.json new file mode 100644 index 000000000..050922766 --- /dev/null +++ b/src/debug/jtag/commands/code/search/package.json @@ -0,0 +1,35 @@ +{ + "name": "@jtag-commands/code/search", + "version": "1.0.0", + "description": "Search for a regex pattern across workspace files. Respects .gitignore, supports glob-based file filtering. Returns matching lines with context.", + "main": "server/CodeSearchServerCommand.ts", + "types": "shared/CodeSearchTypes.ts", + "scripts": { + "test": "npm run test:unit && npm run test:integration", + "test:unit": "npx vitest run test/unit/*.test.ts", + "test:integration": "npx tsx test/integration/CodeSearchIntegration.test.ts", + "lint": "npx eslint **/*.ts", + "typecheck": "npx tsc --noEmit" + }, + "peerDependencies": { + "@jtag/core": "*" + }, + "files": [ + "shared/**/*.ts", + "browser/**/*.ts", + "server/**/*.ts", + "test/**/*.ts", + "README.md" + ], + "keywords": [ + "jtag", + "command", + "code/search" + ], + "license": "MIT", + "author": "", + "repository": { + "type": "git", + "url": "" + } +} diff --git a/src/debug/jtag/commands/code/search/server/CodeSearchServerCommand.ts b/src/debug/jtag/commands/code/search/server/CodeSearchServerCommand.ts new file mode 100644 index 000000000..8551e31c4 --- /dev/null +++ b/src/debug/jtag/commands/code/search/server/CodeSearchServerCommand.ts @@ -0,0 +1,48 @@ +/** + * Code Search Command - Server Implementation + * + * Search for a regex pattern across workspace files. + * Respects .gitignore, supports glob-based file filtering. + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import { ValidationError } from '@system/core/types/ErrorTypes'; +import type { CodeSearchParams, CodeSearchResult } from '../shared/CodeSearchTypes'; +import { createCodeSearchResultFromParams } from '../shared/CodeSearchTypes'; +import { CodeDaemon } from '@daemons/code-daemon/shared/CodeDaemon'; + +export class CodeSearchServerCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('code/search', context, subpath, commander); + } + + async execute(params: CodeSearchParams): Promise { + if (!params.pattern || params.pattern.trim() === '') { + throw new ValidationError( + 'pattern', + `Missing required parameter 'pattern'. See the code/search README for usage.` + ); + } + + if (!params.userId) { + throw new ValidationError('userId', 'Workspace operations require a userId (auto-injected for persona tool calls).'); + } + const personaId = params.userId; + + const result = await CodeDaemon.workspaceSearch( + personaId, + params.pattern, + params.fileGlob, + params.maxResults + ); + + return createCodeSearchResultFromParams(params, { + success: result.success, + matches: result.matches, + totalMatches: result.total_matches, + filesSearched: result.files_searched, + }); + } +} diff --git a/src/debug/jtag/commands/code/search/shared/CodeSearchTypes.ts b/src/debug/jtag/commands/code/search/shared/CodeSearchTypes.ts new file mode 100644 index 000000000..f0144f9b2 --- /dev/null +++ b/src/debug/jtag/commands/code/search/shared/CodeSearchTypes.ts @@ -0,0 +1,105 @@ +/** + * Code Search Command - Shared Types + * + * Search for a regex pattern across workspace files. Respects .gitignore, supports glob-based file filtering. Returns matching lines with context. + */ + +import type { CommandParams, CommandResult, CommandInput, JTAGContext } from '@system/core/types/JTAGTypes'; +import { createPayload, transformPayload } from '@system/core/types/JTAGTypes'; +import { Commands } from '@system/core/shared/Commands'; +import type { JTAGError } from '@system/core/types/ErrorTypes'; +import type { UUID } from '@system/core/types/CrossPlatformUUID'; +import type { SearchMatch } from '@shared/generated/code/SearchMatch'; + +/** + * Code Search Command Parameters + */ +export interface CodeSearchParams extends CommandParams { + // Regex pattern to search for + pattern: string; + // Glob pattern to filter files (e.g., '*.ts', 'src/**/*.rs') + fileGlob?: string; + // Maximum number of matches to return (default: 100) + maxResults?: number; +} + +/** + * Factory function for creating CodeSearchParams + */ +export const createCodeSearchParams = ( + context: JTAGContext, + sessionId: UUID, + data: { + // Regex pattern to search for + pattern: string; + // Glob pattern to filter files (e.g., '*.ts', 'src/**/*.rs') + fileGlob?: string; + // Maximum number of matches to return (default: 100) + maxResults?: number; + } +): CodeSearchParams => createPayload(context, sessionId, { + fileGlob: data.fileGlob ?? '', + maxResults: data.maxResults ?? 0, + ...data +}); + +/** + * Code Search Command Result + */ +export interface CodeSearchResult extends CommandResult { + success: boolean; + // Search matches from Rust (generated type via ts-rs) + matches: SearchMatch[]; + // Total number of matches found + totalMatches: number; + // Number of files searched + filesSearched: number; + error?: JTAGError; +} + +/** + * Factory function for creating CodeSearchResult with defaults + */ +export const createCodeSearchResult = ( + context: JTAGContext, + sessionId: UUID, + data: { + success: boolean; + // Search matches from Rust (generated type via ts-rs) + matches?: SearchMatch[]; + // Total number of matches found + totalMatches?: number; + // Number of files searched + filesSearched?: number; + error?: JTAGError; + } +): CodeSearchResult => createPayload(context, sessionId, { + matches: data.matches ?? [], + totalMatches: data.totalMatches ?? 0, + filesSearched: data.filesSearched ?? 0, + ...data +}); + +/** + * Smart Code Search-specific inheritance from params + * Auto-inherits context and sessionId from params + * Must provide all required result fields + */ +export const createCodeSearchResultFromParams = ( + params: CodeSearchParams, + differences: Omit +): CodeSearchResult => transformPayload(params, differences); + +/** + * Code Search β€” Type-safe command executor + * + * Usage: + * import { CodeSearch } from '...shared/CodeSearchTypes'; + * const result = await CodeSearch.execute({ ... }); + */ +export const CodeSearch = { + execute(params: CommandInput): Promise { + return Commands.execute('code/search', params as Partial); + }, + commandName: 'code/search' as const, +} as const; diff --git a/src/debug/jtag/commands/code/search/test/integration/CodeSearchIntegration.test.ts b/src/debug/jtag/commands/code/search/test/integration/CodeSearchIntegration.test.ts new file mode 100644 index 000000000..fefa00c92 --- /dev/null +++ b/src/debug/jtag/commands/code/search/test/integration/CodeSearchIntegration.test.ts @@ -0,0 +1,196 @@ +#!/usr/bin/env tsx +/** + * CodeSearch Command Integration Tests + * + * Tests Code Search command against the LIVE RUNNING SYSTEM. + * This is NOT a mock test - it tests real commands, real events, real widgets. + * + * Generated by: ./jtag generate + * Run with: npx tsx commands/Code Search/test/integration/CodeSearchIntegration.test.ts + * + * PREREQUISITES: + * - Server must be running: npm start (wait 90+ seconds) + * - Browser client connected via http://localhost:9003 + */ + +import { jtag } from '@server/server-index'; + +console.log('πŸ§ͺ CodeSearch Command Integration Tests'); + +function assert(condition: boolean, message: string): void { + if (!condition) { + throw new Error(`❌ Assertion failed: ${message}`); + } + console.log(`βœ… ${message}`); +} + +/** + * Test 1: Connect to live system + */ +async function testSystemConnection(): Promise>> { + console.log('\nπŸ”Œ Test 1: Connecting to live JTAG system'); + + const client = await jtag.connect(); + + assert(client !== null, 'Connected to live system'); + console.log(' βœ… Connected successfully'); + + return client; +} + +/** + * Test 2: Execute Code Search command on live system + */ +async function testCommandExecution(client: Awaited>): Promise { + console.log('\n⚑ Test 2: Executing Code Search command'); + + // TODO: Replace with your actual command parameters + const result = await client.commands['Code Search']({ + // Add your required parameters here + // Example: name: 'test-value' + }); + + console.log(' πŸ“Š Result:', JSON.stringify(result, null, 2)); + + assert(result !== null, 'Code Search returned result'); + // TODO: Add assertions for your specific result fields + // assert(result.success === true, 'Code Search succeeded'); + // assert(result.yourField !== undefined, 'Result has yourField'); +} + +/** + * Test 3: Validate required parameters + */ +async function testRequiredParameters(_client: Awaited>): Promise { + console.log('\n🚨 Test 3: Testing required parameter validation'); + + // TODO: Uncomment and test missing required parameters + // try { + // await _client.commands['Code Search']({ + // // Missing required param + // }); + // assert(false, 'Should have thrown validation error'); + // } catch (error) { + // assert((error as Error).message.includes('required'), 'Error mentions required parameter'); + // console.log(' βœ… ValidationError thrown correctly'); + // } + + console.log(' ⚠️ TODO: Add required parameter validation test'); +} + +/** + * Test 4: Test optional parameters + */ +async function testOptionalParameters(_client: Awaited>): Promise { + console.log('\nπŸ”§ Test 4: Testing optional parameters'); + + // TODO: Uncomment to test with and without optional parameters + // const withOptional = await client.commands['Code Search']({ + // requiredParam: 'test', + // optionalParam: true + // }); + // + // const withoutOptional = await client.commands['Code Search']({ + // requiredParam: 'test' + // }); + // + // assert(withOptional.success === true, 'Works with optional params'); + // assert(withoutOptional.success === true, 'Works without optional params'); + + console.log(' ⚠️ TODO: Add optional parameter tests'); +} + +/** + * Test 5: Performance test + */ +async function testPerformance(_client: Awaited>): Promise { + console.log('\n⚑ Test 5: Performance under load'); + + // TODO: Uncomment to test command performance + // const iterations = 10; + // const times: number[] = []; + // + // for (let i = 0; i < iterations; i++) { + // const start = Date.now(); + // await _client.commands['Code Search']({ /* params */ }); + // times.push(Date.now() - start); + // } + // + // const avg = times.reduce((a, b) => a + b, 0) / iterations; + // const max = Math.max(...times); + // + // console.log(` Average: ${avg.toFixed(2)}ms`); + // console.log(` Max: ${max}ms`); + // + // assert(avg < 500, `Average ${avg.toFixed(2)}ms under 500ms`); + // assert(max < 1000, `Max ${max}ms under 1000ms`); + + console.log(' ⚠️ TODO: Add performance test'); +} + +/** + * Test 6: Widget/Event integration (if applicable) + */ +async function testWidgetIntegration(_client: Awaited>): Promise { + console.log('\n🎨 Test 6: Widget/Event integration'); + + // TODO: Uncomment if your command emits events or updates widgets + // Example: + // const before = await client.commands['debug/widget-state']({ widgetSelector: 'your-widget' }); + // await client.commands['Code Search']({ /* params */ }); + // await new Promise(resolve => setTimeout(resolve, 1000)); // Wait for event propagation + // const after = await client.commands['debug/widget-state']({ widgetSelector: 'your-widget' }); + // + // assert(after.state.someValue !== before.state.someValue, 'Widget state updated'); + + console.log(' ⚠️ TODO: Add widget/event integration test (if applicable)'); +} + +/** + * Run all integration tests + */ +async function runAllCodeSearchIntegrationTests(): Promise { + console.log('πŸš€ Starting CodeSearch Integration Tests\n'); + console.log('πŸ“‹ Testing against LIVE system (not mocks)\n'); + + try { + const client = await testSystemConnection(); + await testCommandExecution(client); + await testRequiredParameters(client); + await testOptionalParameters(client); + await testPerformance(client); + await testWidgetIntegration(client); + + console.log('\nπŸŽ‰ ALL CodeSearch INTEGRATION TESTS PASSED!'); + console.log('πŸ“‹ Validated:'); + console.log(' βœ… Live system connection'); + console.log(' βœ… Command execution on real system'); + console.log(' βœ… Parameter validation'); + console.log(' βœ… Optional parameter handling'); + console.log(' βœ… Performance benchmarks'); + console.log(' βœ… Widget/Event integration'); + console.log('\nπŸ’‘ NOTE: This test uses the REAL running system'); + console.log(' - Real database operations'); + console.log(' - Real event propagation'); + console.log(' - Real widget updates'); + console.log(' - Real cross-daemon communication'); + + } catch (error) { + console.error('\n❌ CodeSearch integration tests failed:', (error as Error).message); + if ((error as Error).stack) { + console.error((error as Error).stack); + } + console.error('\nπŸ’‘ Make sure:'); + console.error(' 1. Server is running: npm start'); + console.error(' 2. Wait 90+ seconds for deployment'); + console.error(' 3. Browser is connected to http://localhost:9003'); + process.exit(1); + } +} + +// Run if called directly +if (require.main === module) { + void runAllCodeSearchIntegrationTests(); +} else { + module.exports = { runAllCodeSearchIntegrationTests }; +} diff --git a/src/debug/jtag/commands/code/search/test/unit/CodeSearchCommand.test.ts b/src/debug/jtag/commands/code/search/test/unit/CodeSearchCommand.test.ts new file mode 100644 index 000000000..de77f515f --- /dev/null +++ b/src/debug/jtag/commands/code/search/test/unit/CodeSearchCommand.test.ts @@ -0,0 +1,259 @@ +#!/usr/bin/env tsx +/** + * CodeSearch Command Unit Tests + * + * Tests Code Search command logic in isolation using mock dependencies. + * This is a REFERENCE EXAMPLE showing best practices for command testing. + * + * Generated by: ./jtag generate + * Run with: npx tsx commands/Code Search/test/unit/CodeSearchCommand.test.ts + * + * NOTE: This is a self-contained test (no external test utilities needed). + * Use this as a template for your own command tests. + */ + +// import { ValidationError } from '@system/core/types/ErrorTypes'; // Uncomment when adding validation tests +import { generateUUID } from '@system/core/types/CrossPlatformUUID'; +import type { CodeSearchParams, CodeSearchResult } from '../../shared/CodeSearchTypes'; + +console.log('πŸ§ͺ CodeSearch Command Unit Tests'); + +function assert(condition: boolean, message: string): void { + if (!condition) { + throw new Error(`❌ Assertion failed: ${message}`); + } + console.log(`βœ… ${message}`); +} + +/** + * Mock command that implements Code Search logic for testing + */ +async function mockCodeSearchCommand(params: CodeSearchParams): Promise { + // TODO: Validate required parameters (BEST PRACTICE) + // Example: + // if (!params.requiredParam || params.requiredParam.trim() === '') { + // throw new ValidationError( + // 'requiredParam', + // `Missing required parameter 'requiredParam'. ` + + // `Use the help tool with 'Code Search' or see the Code Search README for usage information.` + // ); + // } + + // TODO: Handle optional parameters with sensible defaults + // const optionalParam = params.optionalParam ?? defaultValue; + + // TODO: Implement your command logic here + return { + success: true, + // TODO: Add your result fields with actual computed values + context: params.context, + sessionId: params.sessionId + } as CodeSearchResult; +} + +/** + * Test 1: Command structure validation + */ +function testCodeSearchCommandStructure(): void { + console.log('\nπŸ“‹ Test 1: CodeSearch command structure validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Create valid params for Code Search command + const validParams: CodeSearchParams = { + // TODO: Add your required parameters here + context, + sessionId + }; + + // Validate param structure + assert(validParams.context !== undefined, 'Params have context'); + assert(validParams.sessionId !== undefined, 'Params have sessionId'); + // TODO: Add assertions for your specific parameters + // assert(typeof validParams.requiredParam === 'string', 'requiredParam is string'); +} + +/** + * Test 2: Mock command execution + */ +async function testMockCodeSearchExecution(): Promise { + console.log('\n⚑ Test 2: Mock Code Search command execution'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Test mock execution + const params: CodeSearchParams = { + // TODO: Add your parameters here + context, + sessionId + }; + + const result = await mockCodeSearchCommand(params); + + // Validate result structure + assert(result.success === true, 'Mock result shows success'); + // TODO: Add assertions for your result fields + // assert(typeof result.yourField === 'string', 'yourField is string'); +} + +/** + * Test 3: Required parameter validation (CRITICAL) + * + * This test ensures your command throws ValidationError + * when required parameters are missing (BEST PRACTICE) + */ +async function testCodeSearchRequiredParams(): Promise { + console.log('\n🚨 Test 3: Required parameter validation'); + + // TODO: Uncomment when implementing validation + // const context = { environment: 'server' as const }; + // const sessionId = generateUUID(); + + // TODO: Test cases that should throw ValidationError + // Example: + // const testCases = [ + // { params: {} as CodeSearchParams, desc: 'Missing requiredParam' }, + // { params: { requiredParam: '' } as CodeSearchParams, desc: 'Empty requiredParam' }, + // ]; + // + // for (const testCase of testCases) { + // try { + // await mockCodeSearchCommand({ ...testCase.params, context, sessionId }); + // throw new Error(`Should have thrown ValidationError for: ${testCase.desc}`); + // } catch (error) { + // if (error instanceof ValidationError) { + // assert(error.field === 'requiredParam', `ValidationError field is 'requiredParam' for: ${testCase.desc}`); + // assert(error.message.includes('required parameter'), `Error message mentions 'required parameter' for: ${testCase.desc}`); + // assert(error.message.includes('help tool'), `Error message is tool-agnostic for: ${testCase.desc}`); + // } else { + // throw error; // Re-throw if not ValidationError + // } + // } + // } + + console.log('βœ… All required parameter validations work correctly'); +} + +/** + * Test 4: Optional parameter handling + */ +async function testCodeSearchOptionalParams(): Promise { + console.log('\nπŸ”§ Test 4: Optional parameter handling'); + + // TODO: Uncomment when implementing optional param tests + // const context = { environment: 'server' as const }; + // const sessionId = generateUUID(); + + // TODO: Test WITHOUT optional param (should use default) + // const paramsWithoutOptional: CodeSearchParams = { + // requiredParam: 'test', + // context, + // sessionId + // }; + // + // const resultWithoutOptional = await mockCodeSearchCommand(paramsWithoutOptional); + // assert(resultWithoutOptional.success === true, 'Command succeeds without optional params'); + + // TODO: Test WITH optional param + // const paramsWithOptional: CodeSearchParams = { + // requiredParam: 'test', + // optionalParam: true, + // context, + // sessionId + // }; + // + // const resultWithOptional = await mockCodeSearchCommand(paramsWithOptional); + // assert(resultWithOptional.success === true, 'Command succeeds with optional params'); + + console.log('βœ… Optional parameter handling validated'); +} + +/** + * Test 5: Performance validation + */ +async function testCodeSearchPerformance(): Promise { + console.log('\n⚑ Test 5: CodeSearch performance validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + const startTime = Date.now(); + + await mockCodeSearchCommand({ + // TODO: Add your parameters + context, + sessionId + } as CodeSearchParams); + + const executionTime = Date.now() - startTime; + + assert(executionTime < 100, `CodeSearch completed in ${executionTime}ms (under 100ms limit)`); +} + +/** + * Test 6: Result structure validation + */ +async function testCodeSearchResultStructure(): Promise { + console.log('\nπŸ” Test 6: CodeSearch result structure validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Test various scenarios + const basicResult = await mockCodeSearchCommand({ + // TODO: Add your parameters + context, + sessionId + } as CodeSearchParams); + + assert(basicResult.success === true, 'Result has success field'); + // TODO: Add assertions for your result fields + // assert(typeof basicResult.yourField === 'string', 'Result has yourField (string)'); + assert(basicResult.context === context, 'Result includes context'); + assert(basicResult.sessionId === sessionId, 'Result includes sessionId'); + + console.log('βœ… All result structure validations pass'); +} + +/** + * Run all unit tests + */ +async function runAllCodeSearchUnitTests(): Promise { + console.log('πŸš€ Starting CodeSearch Command Unit Tests\n'); + + try { + testCodeSearchCommandStructure(); + await testMockCodeSearchExecution(); + await testCodeSearchRequiredParams(); + await testCodeSearchOptionalParams(); + await testCodeSearchPerformance(); + await testCodeSearchResultStructure(); + + console.log('\nπŸŽ‰ ALL CodeSearch UNIT TESTS PASSED!'); + console.log('πŸ“‹ Validated:'); + console.log(' βœ… Command structure and parameter validation'); + console.log(' βœ… Mock command execution patterns'); + console.log(' βœ… Required parameter validation (throws ValidationError)'); + console.log(' βœ… Optional parameter handling (sensible defaults)'); + console.log(' βœ… Performance requirements (< 100ms)'); + console.log(' βœ… Result structure validation'); + console.log('\nπŸ“ This is a REFERENCE EXAMPLE - use as a template for your commands!'); + console.log('πŸ’‘ TIP: Copy this test structure and modify for your command logic'); + + } catch (error) { + console.error('\n❌ CodeSearch unit tests failed:', (error as Error).message); + if ((error as Error).stack) { + console.error((error as Error).stack); + } + process.exit(1); + } +} + +// Run if called directly +if (require.main === module) { + void runAllCodeSearchUnitTests(); +} else { + module.exports = { runAllCodeSearchUnitTests }; +} diff --git a/src/debug/jtag/commands/code/tree/.npmignore b/src/debug/jtag/commands/code/tree/.npmignore new file mode 100644 index 000000000..f74ad6b8a --- /dev/null +++ b/src/debug/jtag/commands/code/tree/.npmignore @@ -0,0 +1,20 @@ +# Development files +.eslintrc* +tsconfig*.json +vitest.config.ts + +# Build artifacts +*.js.map +*.d.ts.map + +# IDE +.vscode/ +.idea/ + +# Logs +*.log +npm-debug.log* + +# OS files +.DS_Store +Thumbs.db diff --git a/src/debug/jtag/commands/code/tree/README.md b/src/debug/jtag/commands/code/tree/README.md new file mode 100644 index 000000000..d51d89afd --- /dev/null +++ b/src/debug/jtag/commands/code/tree/README.md @@ -0,0 +1,160 @@ +# Code Tree Command + +Generate a directory tree for the workspace or a subdirectory. Shows file/directory structure with sizes. Skips common ignored directories (node_modules, .git, etc). + +## Table of Contents + +- [Usage](#usage) + - [CLI Usage](#cli-usage) + - [Tool Usage](#tool-usage) +- [Parameters](#parameters) +- [Result](#result) +- [Examples](#examples) +- [Testing](#testing) + - [Unit Tests](#unit-tests) + - [Integration Tests](#integration-tests) +- [Getting Help](#getting-help) +- [Access Level](#access-level) +- [Implementation Notes](#implementation-notes) + +## Usage + +### CLI Usage + +From the command line using the jtag CLI: + +```bash +./jtag code/tree [options] +``` + +### Tool Usage + +From Persona tools or programmatic access using `Commands.execute()`: + +```typescript +import { Commands } from '@system/core/shared/Commands'; + +const result = await Commands.execute('code/tree', { + // your parameters here +}); +``` + +## Parameters + +- **path** (optional): `string` - Subdirectory to tree (default: workspace root) +- **maxDepth** (optional): `number` - Maximum directory depth (default: 10) +- **includeHidden** (optional): `boolean` - Include hidden files and directories (default: false) + +## Result + +Returns `CodeTreeResult` with: + +Returns CommandResult with: +- **root**: `object` - TreeNode with name, path, isDirectory, sizeBytes, and children array +- **totalFiles**: `number` - Total number of files in tree +- **totalDirectories**: `number` - Total number of directories in tree + +## Examples + +### Show full workspace tree + +```bash +./jtag code/tree +``` + +### Show src directory, 3 levels deep + +```bash +./jtag code/tree --path="src" --maxDepth=3 +``` + +## Getting Help + +### Using the Help Tool + +Get detailed usage information for this command: + +**CLI:** +```bash +./jtag help code/tree +``` + +**Tool:** +```typescript +// Use your help tool with command name 'code/tree' +``` + +### Using the README Tool + +Access this README programmatically: + +**CLI:** +```bash +./jtag readme code/tree +``` + +**Tool:** +```typescript +// Use your readme tool with command name 'code/tree' +``` + +## Testing + +### Unit Tests + +Test command logic in isolation using mock dependencies: + +```bash +# Run unit tests (no server required) +npx tsx commands/Code Tree/test/unit/CodeTreeCommand.test.ts +``` + +**What's tested:** +- Command structure and parameter validation +- Mock command execution patterns +- Required parameter validation (throws ValidationError) +- Optional parameter handling (sensible defaults) +- Performance requirements +- Assertion utility helpers + +**TDD Workflow:** +1. Write/modify unit test first (test-driven development) +2. Run test, see it fail +3. Implement feature +4. Run test, see it pass +5. Refactor if needed + +### Integration Tests + +Test command with real client connections and system integration: + +```bash +# Prerequisites: Server must be running +npm start # Wait 90+ seconds for deployment + +# Run integration tests +npx tsx commands/Code Tree/test/integration/CodeTreeIntegration.test.ts +``` + +**What's tested:** +- Client connection to live system +- Real command execution via WebSocket +- ValidationError handling for missing params +- Optional parameter defaults +- Performance under load +- Various parameter combinations + +**Best Practice:** +Run unit tests frequently during development (fast feedback). Run integration tests before committing (verify system integration). + +## Access Level + +**ai-safe** - Safe for AI personas to call autonomously + +## Implementation Notes + +- **Shared Logic**: Core business logic in `shared/CodeTreeTypes.ts` +- **Browser**: Browser-specific implementation in `browser/CodeTreeBrowserCommand.ts` +- **Server**: Server-specific implementation in `server/CodeTreeServerCommand.ts` +- **Unit Tests**: Isolated testing in `test/unit/CodeTreeCommand.test.ts` +- **Integration Tests**: System testing in `test/integration/CodeTreeIntegration.test.ts` diff --git a/src/debug/jtag/commands/code/tree/browser/CodeTreeBrowserCommand.ts b/src/debug/jtag/commands/code/tree/browser/CodeTreeBrowserCommand.ts new file mode 100644 index 000000000..96286cc60 --- /dev/null +++ b/src/debug/jtag/commands/code/tree/browser/CodeTreeBrowserCommand.ts @@ -0,0 +1,21 @@ +/** + * Code Tree Command - Browser Implementation + * + * Generate a directory tree for the workspace or a subdirectory. Shows file/directory structure with sizes. Skips common ignored directories (node_modules, .git, etc). + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import type { CodeTreeParams, CodeTreeResult } from '../shared/CodeTreeTypes'; + +export class CodeTreeBrowserCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('code/tree', context, subpath, commander); + } + + async execute(params: CodeTreeParams): Promise { + console.log('🌐 BROWSER: Delegating Code Tree to server'); + return await this.remoteExecute(params); + } +} diff --git a/src/debug/jtag/commands/code/tree/package.json b/src/debug/jtag/commands/code/tree/package.json new file mode 100644 index 000000000..79489d593 --- /dev/null +++ b/src/debug/jtag/commands/code/tree/package.json @@ -0,0 +1,35 @@ +{ + "name": "@jtag-commands/code/tree", + "version": "1.0.0", + "description": "Generate a directory tree for the workspace or a subdirectory. Shows file/directory structure with sizes. Skips common ignored directories (node_modules, .git, etc).", + "main": "server/CodeTreeServerCommand.ts", + "types": "shared/CodeTreeTypes.ts", + "scripts": { + "test": "npm run test:unit && npm run test:integration", + "test:unit": "npx vitest run test/unit/*.test.ts", + "test:integration": "npx tsx test/integration/CodeTreeIntegration.test.ts", + "lint": "npx eslint **/*.ts", + "typecheck": "npx tsc --noEmit" + }, + "peerDependencies": { + "@jtag/core": "*" + }, + "files": [ + "shared/**/*.ts", + "browser/**/*.ts", + "server/**/*.ts", + "test/**/*.ts", + "README.md" + ], + "keywords": [ + "jtag", + "command", + "code/tree" + ], + "license": "MIT", + "author": "", + "repository": { + "type": "git", + "url": "" + } +} diff --git a/src/debug/jtag/commands/code/tree/server/CodeTreeServerCommand.ts b/src/debug/jtag/commands/code/tree/server/CodeTreeServerCommand.ts new file mode 100644 index 000000000..3175169c6 --- /dev/null +++ b/src/debug/jtag/commands/code/tree/server/CodeTreeServerCommand.ts @@ -0,0 +1,40 @@ +/** + * Code Tree Command - Server Implementation + * + * Generate a directory tree for the workspace or a subdirectory. + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import { ValidationError } from '@system/core/types/ErrorTypes'; +import type { CodeTreeParams, CodeTreeResult } from '../shared/CodeTreeTypes'; +import { createCodeTreeResultFromParams } from '../shared/CodeTreeTypes'; +import { CodeDaemon } from '@daemons/code-daemon/shared/CodeDaemon'; + +export class CodeTreeServerCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('code/tree', context, subpath, commander); + } + + async execute(params: CodeTreeParams): Promise { + if (!params.userId) { + throw new ValidationError('userId', 'Workspace operations require a userId (auto-injected for persona tool calls).'); + } + const personaId = params.userId; + + const result = await CodeDaemon.workspaceTree( + personaId, + params.path, + params.maxDepth, + params.includeHidden + ); + + return createCodeTreeResultFromParams(params, { + success: result.success, + root: result.root ?? null, + totalFiles: result.total_files, + totalDirectories: result.total_directories, + }); + } +} diff --git a/src/debug/jtag/commands/code/tree/shared/CodeTreeTypes.ts b/src/debug/jtag/commands/code/tree/shared/CodeTreeTypes.ts new file mode 100644 index 000000000..989a6c06f --- /dev/null +++ b/src/debug/jtag/commands/code/tree/shared/CodeTreeTypes.ts @@ -0,0 +1,106 @@ +/** + * Code Tree Command - Shared Types + * + * Generate a directory tree for the workspace or a subdirectory. Shows file/directory structure with sizes. Skips common ignored directories (node_modules, .git, etc). + */ + +import type { CommandParams, CommandResult, CommandInput, JTAGContext } from '@system/core/types/JTAGTypes'; +import { createPayload, transformPayload } from '@system/core/types/JTAGTypes'; +import { Commands } from '@system/core/shared/Commands'; +import type { JTAGError } from '@system/core/types/ErrorTypes'; +import type { UUID } from '@system/core/types/CrossPlatformUUID'; +import type { TreeNode } from '@shared/generated/code/TreeNode'; + +/** + * Code Tree Command Parameters + */ +export interface CodeTreeParams extends CommandParams { + // Subdirectory to tree (default: workspace root) + path?: string; + // Maximum directory depth (default: 10) + maxDepth?: number; + // Include hidden files and directories (default: false) + includeHidden?: boolean; +} + +/** + * Factory function for creating CodeTreeParams + */ +export const createCodeTreeParams = ( + context: JTAGContext, + sessionId: UUID, + data: { + // Subdirectory to tree (default: workspace root) + path?: string; + // Maximum directory depth (default: 10) + maxDepth?: number; + // Include hidden files and directories (default: false) + includeHidden?: boolean; + } +): CodeTreeParams => createPayload(context, sessionId, { + path: data.path ?? '', + maxDepth: data.maxDepth ?? 0, + includeHidden: data.includeHidden ?? false, + ...data +}); + +/** + * Code Tree Command Result + */ +export interface CodeTreeResult extends CommandResult { + success: boolean; + // Directory tree from Rust (generated type via ts-rs) + root: TreeNode | null; + // Total number of files in tree + totalFiles: number; + // Total number of directories in tree + totalDirectories: number; + error?: JTAGError; +} + +/** + * Factory function for creating CodeTreeResult with defaults + */ +export const createCodeTreeResult = ( + context: JTAGContext, + sessionId: UUID, + data: { + success: boolean; + // Directory tree from Rust (generated type via ts-rs) + root?: TreeNode; + // Total number of files in tree + totalFiles?: number; + // Total number of directories in tree + totalDirectories?: number; + error?: JTAGError; + } +): CodeTreeResult => createPayload(context, sessionId, { + root: data.root ?? null, + totalFiles: data.totalFiles ?? 0, + totalDirectories: data.totalDirectories ?? 0, + ...data +}); + +/** + * Smart Code Tree-specific inheritance from params + * Auto-inherits context and sessionId from params + * Must provide all required result fields + */ +export const createCodeTreeResultFromParams = ( + params: CodeTreeParams, + differences: Omit +): CodeTreeResult => transformPayload(params, differences); + +/** + * Code Tree β€” Type-safe command executor + * + * Usage: + * import { CodeTree } from '...shared/CodeTreeTypes'; + * const result = await CodeTree.execute({ ... }); + */ +export const CodeTree = { + execute(params: CommandInput): Promise { + return Commands.execute('code/tree', params as Partial); + }, + commandName: 'code/tree' as const, +} as const; diff --git a/src/debug/jtag/commands/code/tree/test/integration/CodeTreeIntegration.test.ts b/src/debug/jtag/commands/code/tree/test/integration/CodeTreeIntegration.test.ts new file mode 100644 index 000000000..42e22636a --- /dev/null +++ b/src/debug/jtag/commands/code/tree/test/integration/CodeTreeIntegration.test.ts @@ -0,0 +1,196 @@ +#!/usr/bin/env tsx +/** + * CodeTree Command Integration Tests + * + * Tests Code Tree command against the LIVE RUNNING SYSTEM. + * This is NOT a mock test - it tests real commands, real events, real widgets. + * + * Generated by: ./jtag generate + * Run with: npx tsx commands/Code Tree/test/integration/CodeTreeIntegration.test.ts + * + * PREREQUISITES: + * - Server must be running: npm start (wait 90+ seconds) + * - Browser client connected via http://localhost:9003 + */ + +import { jtag } from '@server/server-index'; + +console.log('πŸ§ͺ CodeTree Command Integration Tests'); + +function assert(condition: boolean, message: string): void { + if (!condition) { + throw new Error(`❌ Assertion failed: ${message}`); + } + console.log(`βœ… ${message}`); +} + +/** + * Test 1: Connect to live system + */ +async function testSystemConnection(): Promise>> { + console.log('\nπŸ”Œ Test 1: Connecting to live JTAG system'); + + const client = await jtag.connect(); + + assert(client !== null, 'Connected to live system'); + console.log(' βœ… Connected successfully'); + + return client; +} + +/** + * Test 2: Execute Code Tree command on live system + */ +async function testCommandExecution(client: Awaited>): Promise { + console.log('\n⚑ Test 2: Executing Code Tree command'); + + // TODO: Replace with your actual command parameters + const result = await client.commands['Code Tree']({ + // Add your required parameters here + // Example: name: 'test-value' + }); + + console.log(' πŸ“Š Result:', JSON.stringify(result, null, 2)); + + assert(result !== null, 'Code Tree returned result'); + // TODO: Add assertions for your specific result fields + // assert(result.success === true, 'Code Tree succeeded'); + // assert(result.yourField !== undefined, 'Result has yourField'); +} + +/** + * Test 3: Validate required parameters + */ +async function testRequiredParameters(_client: Awaited>): Promise { + console.log('\n🚨 Test 3: Testing required parameter validation'); + + // TODO: Uncomment and test missing required parameters + // try { + // await _client.commands['Code Tree']({ + // // Missing required param + // }); + // assert(false, 'Should have thrown validation error'); + // } catch (error) { + // assert((error as Error).message.includes('required'), 'Error mentions required parameter'); + // console.log(' βœ… ValidationError thrown correctly'); + // } + + console.log(' ⚠️ TODO: Add required parameter validation test'); +} + +/** + * Test 4: Test optional parameters + */ +async function testOptionalParameters(_client: Awaited>): Promise { + console.log('\nπŸ”§ Test 4: Testing optional parameters'); + + // TODO: Uncomment to test with and without optional parameters + // const withOptional = await client.commands['Code Tree']({ + // requiredParam: 'test', + // optionalParam: true + // }); + // + // const withoutOptional = await client.commands['Code Tree']({ + // requiredParam: 'test' + // }); + // + // assert(withOptional.success === true, 'Works with optional params'); + // assert(withoutOptional.success === true, 'Works without optional params'); + + console.log(' ⚠️ TODO: Add optional parameter tests'); +} + +/** + * Test 5: Performance test + */ +async function testPerformance(_client: Awaited>): Promise { + console.log('\n⚑ Test 5: Performance under load'); + + // TODO: Uncomment to test command performance + // const iterations = 10; + // const times: number[] = []; + // + // for (let i = 0; i < iterations; i++) { + // const start = Date.now(); + // await _client.commands['Code Tree']({ /* params */ }); + // times.push(Date.now() - start); + // } + // + // const avg = times.reduce((a, b) => a + b, 0) / iterations; + // const max = Math.max(...times); + // + // console.log(` Average: ${avg.toFixed(2)}ms`); + // console.log(` Max: ${max}ms`); + // + // assert(avg < 500, `Average ${avg.toFixed(2)}ms under 500ms`); + // assert(max < 1000, `Max ${max}ms under 1000ms`); + + console.log(' ⚠️ TODO: Add performance test'); +} + +/** + * Test 6: Widget/Event integration (if applicable) + */ +async function testWidgetIntegration(_client: Awaited>): Promise { + console.log('\n🎨 Test 6: Widget/Event integration'); + + // TODO: Uncomment if your command emits events or updates widgets + // Example: + // const before = await client.commands['debug/widget-state']({ widgetSelector: 'your-widget' }); + // await client.commands['Code Tree']({ /* params */ }); + // await new Promise(resolve => setTimeout(resolve, 1000)); // Wait for event propagation + // const after = await client.commands['debug/widget-state']({ widgetSelector: 'your-widget' }); + // + // assert(after.state.someValue !== before.state.someValue, 'Widget state updated'); + + console.log(' ⚠️ TODO: Add widget/event integration test (if applicable)'); +} + +/** + * Run all integration tests + */ +async function runAllCodeTreeIntegrationTests(): Promise { + console.log('πŸš€ Starting CodeTree Integration Tests\n'); + console.log('πŸ“‹ Testing against LIVE system (not mocks)\n'); + + try { + const client = await testSystemConnection(); + await testCommandExecution(client); + await testRequiredParameters(client); + await testOptionalParameters(client); + await testPerformance(client); + await testWidgetIntegration(client); + + console.log('\nπŸŽ‰ ALL CodeTree INTEGRATION TESTS PASSED!'); + console.log('πŸ“‹ Validated:'); + console.log(' βœ… Live system connection'); + console.log(' βœ… Command execution on real system'); + console.log(' βœ… Parameter validation'); + console.log(' βœ… Optional parameter handling'); + console.log(' βœ… Performance benchmarks'); + console.log(' βœ… Widget/Event integration'); + console.log('\nπŸ’‘ NOTE: This test uses the REAL running system'); + console.log(' - Real database operations'); + console.log(' - Real event propagation'); + console.log(' - Real widget updates'); + console.log(' - Real cross-daemon communication'); + + } catch (error) { + console.error('\n❌ CodeTree integration tests failed:', (error as Error).message); + if ((error as Error).stack) { + console.error((error as Error).stack); + } + console.error('\nπŸ’‘ Make sure:'); + console.error(' 1. Server is running: npm start'); + console.error(' 2. Wait 90+ seconds for deployment'); + console.error(' 3. Browser is connected to http://localhost:9003'); + process.exit(1); + } +} + +// Run if called directly +if (require.main === module) { + void runAllCodeTreeIntegrationTests(); +} else { + module.exports = { runAllCodeTreeIntegrationTests }; +} diff --git a/src/debug/jtag/commands/code/tree/test/unit/CodeTreeCommand.test.ts b/src/debug/jtag/commands/code/tree/test/unit/CodeTreeCommand.test.ts new file mode 100644 index 000000000..32e0c6cf1 --- /dev/null +++ b/src/debug/jtag/commands/code/tree/test/unit/CodeTreeCommand.test.ts @@ -0,0 +1,259 @@ +#!/usr/bin/env tsx +/** + * CodeTree Command Unit Tests + * + * Tests Code Tree command logic in isolation using mock dependencies. + * This is a REFERENCE EXAMPLE showing best practices for command testing. + * + * Generated by: ./jtag generate + * Run with: npx tsx commands/Code Tree/test/unit/CodeTreeCommand.test.ts + * + * NOTE: This is a self-contained test (no external test utilities needed). + * Use this as a template for your own command tests. + */ + +// import { ValidationError } from '@system/core/types/ErrorTypes'; // Uncomment when adding validation tests +import { generateUUID } from '@system/core/types/CrossPlatformUUID'; +import type { CodeTreeParams, CodeTreeResult } from '../../shared/CodeTreeTypes'; + +console.log('πŸ§ͺ CodeTree Command Unit Tests'); + +function assert(condition: boolean, message: string): void { + if (!condition) { + throw new Error(`❌ Assertion failed: ${message}`); + } + console.log(`βœ… ${message}`); +} + +/** + * Mock command that implements Code Tree logic for testing + */ +async function mockCodeTreeCommand(params: CodeTreeParams): Promise { + // TODO: Validate required parameters (BEST PRACTICE) + // Example: + // if (!params.requiredParam || params.requiredParam.trim() === '') { + // throw new ValidationError( + // 'requiredParam', + // `Missing required parameter 'requiredParam'. ` + + // `Use the help tool with 'Code Tree' or see the Code Tree README for usage information.` + // ); + // } + + // TODO: Handle optional parameters with sensible defaults + // const optionalParam = params.optionalParam ?? defaultValue; + + // TODO: Implement your command logic here + return { + success: true, + // TODO: Add your result fields with actual computed values + context: params.context, + sessionId: params.sessionId + } as CodeTreeResult; +} + +/** + * Test 1: Command structure validation + */ +function testCodeTreeCommandStructure(): void { + console.log('\nπŸ“‹ Test 1: CodeTree command structure validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Create valid params for Code Tree command + const validParams: CodeTreeParams = { + // TODO: Add your required parameters here + context, + sessionId + }; + + // Validate param structure + assert(validParams.context !== undefined, 'Params have context'); + assert(validParams.sessionId !== undefined, 'Params have sessionId'); + // TODO: Add assertions for your specific parameters + // assert(typeof validParams.requiredParam === 'string', 'requiredParam is string'); +} + +/** + * Test 2: Mock command execution + */ +async function testMockCodeTreeExecution(): Promise { + console.log('\n⚑ Test 2: Mock Code Tree command execution'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Test mock execution + const params: CodeTreeParams = { + // TODO: Add your parameters here + context, + sessionId + }; + + const result = await mockCodeTreeCommand(params); + + // Validate result structure + assert(result.success === true, 'Mock result shows success'); + // TODO: Add assertions for your result fields + // assert(typeof result.yourField === 'string', 'yourField is string'); +} + +/** + * Test 3: Required parameter validation (CRITICAL) + * + * This test ensures your command throws ValidationError + * when required parameters are missing (BEST PRACTICE) + */ +async function testCodeTreeRequiredParams(): Promise { + console.log('\n🚨 Test 3: Required parameter validation'); + + // TODO: Uncomment when implementing validation + // const context = { environment: 'server' as const }; + // const sessionId = generateUUID(); + + // TODO: Test cases that should throw ValidationError + // Example: + // const testCases = [ + // { params: {} as CodeTreeParams, desc: 'Missing requiredParam' }, + // { params: { requiredParam: '' } as CodeTreeParams, desc: 'Empty requiredParam' }, + // ]; + // + // for (const testCase of testCases) { + // try { + // await mockCodeTreeCommand({ ...testCase.params, context, sessionId }); + // throw new Error(`Should have thrown ValidationError for: ${testCase.desc}`); + // } catch (error) { + // if (error instanceof ValidationError) { + // assert(error.field === 'requiredParam', `ValidationError field is 'requiredParam' for: ${testCase.desc}`); + // assert(error.message.includes('required parameter'), `Error message mentions 'required parameter' for: ${testCase.desc}`); + // assert(error.message.includes('help tool'), `Error message is tool-agnostic for: ${testCase.desc}`); + // } else { + // throw error; // Re-throw if not ValidationError + // } + // } + // } + + console.log('βœ… All required parameter validations work correctly'); +} + +/** + * Test 4: Optional parameter handling + */ +async function testCodeTreeOptionalParams(): Promise { + console.log('\nπŸ”§ Test 4: Optional parameter handling'); + + // TODO: Uncomment when implementing optional param tests + // const context = { environment: 'server' as const }; + // const sessionId = generateUUID(); + + // TODO: Test WITHOUT optional param (should use default) + // const paramsWithoutOptional: CodeTreeParams = { + // requiredParam: 'test', + // context, + // sessionId + // }; + // + // const resultWithoutOptional = await mockCodeTreeCommand(paramsWithoutOptional); + // assert(resultWithoutOptional.success === true, 'Command succeeds without optional params'); + + // TODO: Test WITH optional param + // const paramsWithOptional: CodeTreeParams = { + // requiredParam: 'test', + // optionalParam: true, + // context, + // sessionId + // }; + // + // const resultWithOptional = await mockCodeTreeCommand(paramsWithOptional); + // assert(resultWithOptional.success === true, 'Command succeeds with optional params'); + + console.log('βœ… Optional parameter handling validated'); +} + +/** + * Test 5: Performance validation + */ +async function testCodeTreePerformance(): Promise { + console.log('\n⚑ Test 5: CodeTree performance validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + const startTime = Date.now(); + + await mockCodeTreeCommand({ + // TODO: Add your parameters + context, + sessionId + } as CodeTreeParams); + + const executionTime = Date.now() - startTime; + + assert(executionTime < 100, `CodeTree completed in ${executionTime}ms (under 100ms limit)`); +} + +/** + * Test 6: Result structure validation + */ +async function testCodeTreeResultStructure(): Promise { + console.log('\nπŸ” Test 6: CodeTree result structure validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Test various scenarios + const basicResult = await mockCodeTreeCommand({ + // TODO: Add your parameters + context, + sessionId + } as CodeTreeParams); + + assert(basicResult.success === true, 'Result has success field'); + // TODO: Add assertions for your result fields + // assert(typeof basicResult.yourField === 'string', 'Result has yourField (string)'); + assert(basicResult.context === context, 'Result includes context'); + assert(basicResult.sessionId === sessionId, 'Result includes sessionId'); + + console.log('βœ… All result structure validations pass'); +} + +/** + * Run all unit tests + */ +async function runAllCodeTreeUnitTests(): Promise { + console.log('πŸš€ Starting CodeTree Command Unit Tests\n'); + + try { + testCodeTreeCommandStructure(); + await testMockCodeTreeExecution(); + await testCodeTreeRequiredParams(); + await testCodeTreeOptionalParams(); + await testCodeTreePerformance(); + await testCodeTreeResultStructure(); + + console.log('\nπŸŽ‰ ALL CodeTree UNIT TESTS PASSED!'); + console.log('πŸ“‹ Validated:'); + console.log(' βœ… Command structure and parameter validation'); + console.log(' βœ… Mock command execution patterns'); + console.log(' βœ… Required parameter validation (throws ValidationError)'); + console.log(' βœ… Optional parameter handling (sensible defaults)'); + console.log(' βœ… Performance requirements (< 100ms)'); + console.log(' βœ… Result structure validation'); + console.log('\nπŸ“ This is a REFERENCE EXAMPLE - use as a template for your commands!'); + console.log('πŸ’‘ TIP: Copy this test structure and modify for your command logic'); + + } catch (error) { + console.error('\n❌ CodeTree unit tests failed:', (error as Error).message); + if ((error as Error).stack) { + console.error((error as Error).stack); + } + process.exit(1); + } +} + +// Run if called directly +if (require.main === module) { + void runAllCodeTreeUnitTests(); +} else { + module.exports = { runAllCodeTreeUnitTests }; +} diff --git a/src/debug/jtag/commands/code/undo/.npmignore b/src/debug/jtag/commands/code/undo/.npmignore new file mode 100644 index 000000000..f74ad6b8a --- /dev/null +++ b/src/debug/jtag/commands/code/undo/.npmignore @@ -0,0 +1,20 @@ +# Development files +.eslintrc* +tsconfig*.json +vitest.config.ts + +# Build artifacts +*.js.map +*.d.ts.map + +# IDE +.vscode/ +.idea/ + +# Logs +*.log +npm-debug.log* + +# OS files +.DS_Store +Thumbs.db diff --git a/src/debug/jtag/commands/code/undo/README.md b/src/debug/jtag/commands/code/undo/README.md new file mode 100644 index 000000000..373362c18 --- /dev/null +++ b/src/debug/jtag/commands/code/undo/README.md @@ -0,0 +1,163 @@ +# Code Undo Command + +Undo a specific change or the last N changes. Applies reverse diffs from the change graph to restore previous file state. + +## Table of Contents + +- [Usage](#usage) + - [CLI Usage](#cli-usage) + - [Tool Usage](#tool-usage) +- [Parameters](#parameters) +- [Result](#result) +- [Examples](#examples) +- [Testing](#testing) + - [Unit Tests](#unit-tests) + - [Integration Tests](#integration-tests) +- [Getting Help](#getting-help) +- [Access Level](#access-level) +- [Implementation Notes](#implementation-notes) + +## Usage + +### CLI Usage + +From the command line using the jtag CLI: + +```bash +./jtag code/undo [options] +``` + +### Tool Usage + +From Persona tools or programmatic access using `Commands.execute()`: + +```typescript +import { Commands } from '@system/core/shared/Commands'; + +const result = await Commands.execute('code/undo', { + // your parameters here +}); +``` + +## Parameters + +- **changeId** (optional): `string` - UUID of a specific change to undo +- **count** (optional): `number` - Number of most recent changes to undo (default: 1) + +## Result + +Returns `CodeUndoResult` with: + +Returns CommandResult with: +- **changesUndone**: `object[]` - Array of undo results with changeId, filePath, and bytesWritten for each undone change + +## Examples + +### Undo last change + +```bash +./jtag code/undo +``` + +### Undo last 3 changes + +```bash +./jtag code/undo --count=3 +``` + +### Undo specific change + +```bash +./jtag code/undo --changeId="abc-123" +``` + +## Getting Help + +### Using the Help Tool + +Get detailed usage information for this command: + +**CLI:** +```bash +./jtag help code/undo +``` + +**Tool:** +```typescript +// Use your help tool with command name 'code/undo' +``` + +### Using the README Tool + +Access this README programmatically: + +**CLI:** +```bash +./jtag readme code/undo +``` + +**Tool:** +```typescript +// Use your readme tool with command name 'code/undo' +``` + +## Testing + +### Unit Tests + +Test command logic in isolation using mock dependencies: + +```bash +# Run unit tests (no server required) +npx tsx commands/Code Undo/test/unit/CodeUndoCommand.test.ts +``` + +**What's tested:** +- Command structure and parameter validation +- Mock command execution patterns +- Required parameter validation (throws ValidationError) +- Optional parameter handling (sensible defaults) +- Performance requirements +- Assertion utility helpers + +**TDD Workflow:** +1. Write/modify unit test first (test-driven development) +2. Run test, see it fail +3. Implement feature +4. Run test, see it pass +5. Refactor if needed + +### Integration Tests + +Test command with real client connections and system integration: + +```bash +# Prerequisites: Server must be running +npm start # Wait 90+ seconds for deployment + +# Run integration tests +npx tsx commands/Code Undo/test/integration/CodeUndoIntegration.test.ts +``` + +**What's tested:** +- Client connection to live system +- Real command execution via WebSocket +- ValidationError handling for missing params +- Optional parameter defaults +- Performance under load +- Various parameter combinations + +**Best Practice:** +Run unit tests frequently during development (fast feedback). Run integration tests before committing (verify system integration). + +## Access Level + +**ai-safe** - Safe for AI personas to call autonomously + +## Implementation Notes + +- **Shared Logic**: Core business logic in `shared/CodeUndoTypes.ts` +- **Browser**: Browser-specific implementation in `browser/CodeUndoBrowserCommand.ts` +- **Server**: Server-specific implementation in `server/CodeUndoServerCommand.ts` +- **Unit Tests**: Isolated testing in `test/unit/CodeUndoCommand.test.ts` +- **Integration Tests**: System testing in `test/integration/CodeUndoIntegration.test.ts` diff --git a/src/debug/jtag/commands/code/undo/browser/CodeUndoBrowserCommand.ts b/src/debug/jtag/commands/code/undo/browser/CodeUndoBrowserCommand.ts new file mode 100644 index 000000000..9201871ff --- /dev/null +++ b/src/debug/jtag/commands/code/undo/browser/CodeUndoBrowserCommand.ts @@ -0,0 +1,21 @@ +/** + * Code Undo Command - Browser Implementation + * + * Undo a specific change or the last N changes. Applies reverse diffs from the change graph to restore previous file state. + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import type { CodeUndoParams, CodeUndoResult } from '../shared/CodeUndoTypes'; + +export class CodeUndoBrowserCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('code/undo', context, subpath, commander); + } + + async execute(params: CodeUndoParams): Promise { + console.log('🌐 BROWSER: Delegating Code Undo to server'); + return await this.remoteExecute(params); + } +} diff --git a/src/debug/jtag/commands/code/undo/package.json b/src/debug/jtag/commands/code/undo/package.json new file mode 100644 index 000000000..4d33ed983 --- /dev/null +++ b/src/debug/jtag/commands/code/undo/package.json @@ -0,0 +1,35 @@ +{ + "name": "@jtag-commands/code/undo", + "version": "1.0.0", + "description": "Undo a specific change or the last N changes. Applies reverse diffs from the change graph to restore previous file state.", + "main": "server/CodeUndoServerCommand.ts", + "types": "shared/CodeUndoTypes.ts", + "scripts": { + "test": "npm run test:unit && npm run test:integration", + "test:unit": "npx vitest run test/unit/*.test.ts", + "test:integration": "npx tsx test/integration/CodeUndoIntegration.test.ts", + "lint": "npx eslint **/*.ts", + "typecheck": "npx tsc --noEmit" + }, + "peerDependencies": { + "@jtag/core": "*" + }, + "files": [ + "shared/**/*.ts", + "browser/**/*.ts", + "server/**/*.ts", + "test/**/*.ts", + "README.md" + ], + "keywords": [ + "jtag", + "command", + "code/undo" + ], + "license": "MIT", + "author": "", + "repository": { + "type": "git", + "url": "" + } +} diff --git a/src/debug/jtag/commands/code/undo/server/CodeUndoServerCommand.ts b/src/debug/jtag/commands/code/undo/server/CodeUndoServerCommand.ts new file mode 100644 index 000000000..afdfa978f --- /dev/null +++ b/src/debug/jtag/commands/code/undo/server/CodeUndoServerCommand.ts @@ -0,0 +1,43 @@ +/** + * Code Undo Command - Server Implementation + * + * Undo a specific change or the last N changes. + * Applies reverse diffs from the change graph to restore previous state. + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import { ValidationError } from '@system/core/types/ErrorTypes'; +import type { CodeUndoParams, CodeUndoResult } from '../shared/CodeUndoTypes'; +import { createCodeUndoResultFromParams } from '../shared/CodeUndoTypes'; +import { CodeDaemon } from '@daemons/code-daemon/shared/CodeDaemon'; + +export class CodeUndoServerCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('code/undo', context, subpath, commander); + } + + async execute(params: CodeUndoParams): Promise { + if (!params.userId) { + throw new ValidationError('userId', 'Workspace operations require a userId (auto-injected for persona tool calls).'); + } + const personaId = params.userId; + + const result = await CodeDaemon.workspaceUndo( + personaId, + params.changeId, + params.count + ); + + return createCodeUndoResultFromParams(params, { + success: result.success, + changesUndone: result.changes_undone.map(c => ({ + success: c.success, + change_id: c.change_id, + file_path: c.file_path, + bytes_written: c.bytes_written, + })), + }); + } +} diff --git a/src/debug/jtag/commands/code/undo/shared/CodeUndoTypes.ts b/src/debug/jtag/commands/code/undo/shared/CodeUndoTypes.ts new file mode 100644 index 000000000..734602185 --- /dev/null +++ b/src/debug/jtag/commands/code/undo/shared/CodeUndoTypes.ts @@ -0,0 +1,91 @@ +/** + * Code Undo Command - Shared Types + * + * Undo a specific change or the last N changes. Applies reverse diffs from the change graph to restore previous file state. + */ + +import type { CommandParams, CommandResult, CommandInput, JTAGContext } from '@system/core/types/JTAGTypes'; +import { createPayload, transformPayload } from '@system/core/types/JTAGTypes'; +import { Commands } from '@system/core/shared/Commands'; +import type { JTAGError } from '@system/core/types/ErrorTypes'; +import type { UUID } from '@system/core/types/CrossPlatformUUID'; +import type { WriteResult } from '@shared/generated/code/WriteResult'; + +/** + * Code Undo Command Parameters + */ +export interface CodeUndoParams extends CommandParams { + // UUID of a specific change to undo + changeId?: string; + // Number of most recent changes to undo (default: 1) + count?: number; +} + +/** + * Factory function for creating CodeUndoParams + */ +export const createCodeUndoParams = ( + context: JTAGContext, + sessionId: UUID, + data: { + // UUID of a specific change to undo + changeId?: string; + // Number of most recent changes to undo (default: 1) + count?: number; + } +): CodeUndoParams => createPayload(context, sessionId, { + changeId: data.changeId ?? '', + count: data.count ?? 0, + ...data +}); + +/** + * Code Undo Command Result + */ +export interface CodeUndoResult extends CommandResult { + success: boolean; + // Undo results from Rust (generated type via ts-rs) + changesUndone: WriteResult[]; + error?: JTAGError; +} + +/** + * Factory function for creating CodeUndoResult with defaults + */ +export const createCodeUndoResult = ( + context: JTAGContext, + sessionId: UUID, + data: { + success: boolean; + // Undo results from Rust (generated type via ts-rs) + changesUndone?: WriteResult[]; + error?: JTAGError; + } +): CodeUndoResult => createPayload(context, sessionId, { + changesUndone: data.changesUndone ?? [], + ...data +}); + +/** + * Smart Code Undo-specific inheritance from params + * Auto-inherits context and sessionId from params + * Must provide all required result fields + */ +export const createCodeUndoResultFromParams = ( + params: CodeUndoParams, + differences: Omit +): CodeUndoResult => transformPayload(params, differences); + +/** + * Code Undo β€” Type-safe command executor + * + * Usage: + * import { CodeUndo } from '...shared/CodeUndoTypes'; + * const result = await CodeUndo.execute({ ... }); + */ +export const CodeUndo = { + execute(params: CommandInput): Promise { + return Commands.execute('code/undo', params as Partial); + }, + commandName: 'code/undo' as const, +} as const; diff --git a/src/debug/jtag/commands/code/undo/test/integration/CodeUndoIntegration.test.ts b/src/debug/jtag/commands/code/undo/test/integration/CodeUndoIntegration.test.ts new file mode 100644 index 000000000..7a6701fa6 --- /dev/null +++ b/src/debug/jtag/commands/code/undo/test/integration/CodeUndoIntegration.test.ts @@ -0,0 +1,196 @@ +#!/usr/bin/env tsx +/** + * CodeUndo Command Integration Tests + * + * Tests Code Undo command against the LIVE RUNNING SYSTEM. + * This is NOT a mock test - it tests real commands, real events, real widgets. + * + * Generated by: ./jtag generate + * Run with: npx tsx commands/Code Undo/test/integration/CodeUndoIntegration.test.ts + * + * PREREQUISITES: + * - Server must be running: npm start (wait 90+ seconds) + * - Browser client connected via http://localhost:9003 + */ + +import { jtag } from '@server/server-index'; + +console.log('πŸ§ͺ CodeUndo Command Integration Tests'); + +function assert(condition: boolean, message: string): void { + if (!condition) { + throw new Error(`❌ Assertion failed: ${message}`); + } + console.log(`βœ… ${message}`); +} + +/** + * Test 1: Connect to live system + */ +async function testSystemConnection(): Promise>> { + console.log('\nπŸ”Œ Test 1: Connecting to live JTAG system'); + + const client = await jtag.connect(); + + assert(client !== null, 'Connected to live system'); + console.log(' βœ… Connected successfully'); + + return client; +} + +/** + * Test 2: Execute Code Undo command on live system + */ +async function testCommandExecution(client: Awaited>): Promise { + console.log('\n⚑ Test 2: Executing Code Undo command'); + + // TODO: Replace with your actual command parameters + const result = await client.commands['Code Undo']({ + // Add your required parameters here + // Example: name: 'test-value' + }); + + console.log(' πŸ“Š Result:', JSON.stringify(result, null, 2)); + + assert(result !== null, 'Code Undo returned result'); + // TODO: Add assertions for your specific result fields + // assert(result.success === true, 'Code Undo succeeded'); + // assert(result.yourField !== undefined, 'Result has yourField'); +} + +/** + * Test 3: Validate required parameters + */ +async function testRequiredParameters(_client: Awaited>): Promise { + console.log('\n🚨 Test 3: Testing required parameter validation'); + + // TODO: Uncomment and test missing required parameters + // try { + // await _client.commands['Code Undo']({ + // // Missing required param + // }); + // assert(false, 'Should have thrown validation error'); + // } catch (error) { + // assert((error as Error).message.includes('required'), 'Error mentions required parameter'); + // console.log(' βœ… ValidationError thrown correctly'); + // } + + console.log(' ⚠️ TODO: Add required parameter validation test'); +} + +/** + * Test 4: Test optional parameters + */ +async function testOptionalParameters(_client: Awaited>): Promise { + console.log('\nπŸ”§ Test 4: Testing optional parameters'); + + // TODO: Uncomment to test with and without optional parameters + // const withOptional = await client.commands['Code Undo']({ + // requiredParam: 'test', + // optionalParam: true + // }); + // + // const withoutOptional = await client.commands['Code Undo']({ + // requiredParam: 'test' + // }); + // + // assert(withOptional.success === true, 'Works with optional params'); + // assert(withoutOptional.success === true, 'Works without optional params'); + + console.log(' ⚠️ TODO: Add optional parameter tests'); +} + +/** + * Test 5: Performance test + */ +async function testPerformance(_client: Awaited>): Promise { + console.log('\n⚑ Test 5: Performance under load'); + + // TODO: Uncomment to test command performance + // const iterations = 10; + // const times: number[] = []; + // + // for (let i = 0; i < iterations; i++) { + // const start = Date.now(); + // await _client.commands['Code Undo']({ /* params */ }); + // times.push(Date.now() - start); + // } + // + // const avg = times.reduce((a, b) => a + b, 0) / iterations; + // const max = Math.max(...times); + // + // console.log(` Average: ${avg.toFixed(2)}ms`); + // console.log(` Max: ${max}ms`); + // + // assert(avg < 500, `Average ${avg.toFixed(2)}ms under 500ms`); + // assert(max < 1000, `Max ${max}ms under 1000ms`); + + console.log(' ⚠️ TODO: Add performance test'); +} + +/** + * Test 6: Widget/Event integration (if applicable) + */ +async function testWidgetIntegration(_client: Awaited>): Promise { + console.log('\n🎨 Test 6: Widget/Event integration'); + + // TODO: Uncomment if your command emits events or updates widgets + // Example: + // const before = await client.commands['debug/widget-state']({ widgetSelector: 'your-widget' }); + // await client.commands['Code Undo']({ /* params */ }); + // await new Promise(resolve => setTimeout(resolve, 1000)); // Wait for event propagation + // const after = await client.commands['debug/widget-state']({ widgetSelector: 'your-widget' }); + // + // assert(after.state.someValue !== before.state.someValue, 'Widget state updated'); + + console.log(' ⚠️ TODO: Add widget/event integration test (if applicable)'); +} + +/** + * Run all integration tests + */ +async function runAllCodeUndoIntegrationTests(): Promise { + console.log('πŸš€ Starting CodeUndo Integration Tests\n'); + console.log('πŸ“‹ Testing against LIVE system (not mocks)\n'); + + try { + const client = await testSystemConnection(); + await testCommandExecution(client); + await testRequiredParameters(client); + await testOptionalParameters(client); + await testPerformance(client); + await testWidgetIntegration(client); + + console.log('\nπŸŽ‰ ALL CodeUndo INTEGRATION TESTS PASSED!'); + console.log('πŸ“‹ Validated:'); + console.log(' βœ… Live system connection'); + console.log(' βœ… Command execution on real system'); + console.log(' βœ… Parameter validation'); + console.log(' βœ… Optional parameter handling'); + console.log(' βœ… Performance benchmarks'); + console.log(' βœ… Widget/Event integration'); + console.log('\nπŸ’‘ NOTE: This test uses the REAL running system'); + console.log(' - Real database operations'); + console.log(' - Real event propagation'); + console.log(' - Real widget updates'); + console.log(' - Real cross-daemon communication'); + + } catch (error) { + console.error('\n❌ CodeUndo integration tests failed:', (error as Error).message); + if ((error as Error).stack) { + console.error((error as Error).stack); + } + console.error('\nπŸ’‘ Make sure:'); + console.error(' 1. Server is running: npm start'); + console.error(' 2. Wait 90+ seconds for deployment'); + console.error(' 3. Browser is connected to http://localhost:9003'); + process.exit(1); + } +} + +// Run if called directly +if (require.main === module) { + void runAllCodeUndoIntegrationTests(); +} else { + module.exports = { runAllCodeUndoIntegrationTests }; +} diff --git a/src/debug/jtag/commands/code/undo/test/unit/CodeUndoCommand.test.ts b/src/debug/jtag/commands/code/undo/test/unit/CodeUndoCommand.test.ts new file mode 100644 index 000000000..dd979e2d4 --- /dev/null +++ b/src/debug/jtag/commands/code/undo/test/unit/CodeUndoCommand.test.ts @@ -0,0 +1,259 @@ +#!/usr/bin/env tsx +/** + * CodeUndo Command Unit Tests + * + * Tests Code Undo command logic in isolation using mock dependencies. + * This is a REFERENCE EXAMPLE showing best practices for command testing. + * + * Generated by: ./jtag generate + * Run with: npx tsx commands/Code Undo/test/unit/CodeUndoCommand.test.ts + * + * NOTE: This is a self-contained test (no external test utilities needed). + * Use this as a template for your own command tests. + */ + +// import { ValidationError } from '@system/core/types/ErrorTypes'; // Uncomment when adding validation tests +import { generateUUID } from '@system/core/types/CrossPlatformUUID'; +import type { CodeUndoParams, CodeUndoResult } from '../../shared/CodeUndoTypes'; + +console.log('πŸ§ͺ CodeUndo Command Unit Tests'); + +function assert(condition: boolean, message: string): void { + if (!condition) { + throw new Error(`❌ Assertion failed: ${message}`); + } + console.log(`βœ… ${message}`); +} + +/** + * Mock command that implements Code Undo logic for testing + */ +async function mockCodeUndoCommand(params: CodeUndoParams): Promise { + // TODO: Validate required parameters (BEST PRACTICE) + // Example: + // if (!params.requiredParam || params.requiredParam.trim() === '') { + // throw new ValidationError( + // 'requiredParam', + // `Missing required parameter 'requiredParam'. ` + + // `Use the help tool with 'Code Undo' or see the Code Undo README for usage information.` + // ); + // } + + // TODO: Handle optional parameters with sensible defaults + // const optionalParam = params.optionalParam ?? defaultValue; + + // TODO: Implement your command logic here + return { + success: true, + // TODO: Add your result fields with actual computed values + context: params.context, + sessionId: params.sessionId + } as CodeUndoResult; +} + +/** + * Test 1: Command structure validation + */ +function testCodeUndoCommandStructure(): void { + console.log('\nπŸ“‹ Test 1: CodeUndo command structure validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Create valid params for Code Undo command + const validParams: CodeUndoParams = { + // TODO: Add your required parameters here + context, + sessionId + }; + + // Validate param structure + assert(validParams.context !== undefined, 'Params have context'); + assert(validParams.sessionId !== undefined, 'Params have sessionId'); + // TODO: Add assertions for your specific parameters + // assert(typeof validParams.requiredParam === 'string', 'requiredParam is string'); +} + +/** + * Test 2: Mock command execution + */ +async function testMockCodeUndoExecution(): Promise { + console.log('\n⚑ Test 2: Mock Code Undo command execution'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Test mock execution + const params: CodeUndoParams = { + // TODO: Add your parameters here + context, + sessionId + }; + + const result = await mockCodeUndoCommand(params); + + // Validate result structure + assert(result.success === true, 'Mock result shows success'); + // TODO: Add assertions for your result fields + // assert(typeof result.yourField === 'string', 'yourField is string'); +} + +/** + * Test 3: Required parameter validation (CRITICAL) + * + * This test ensures your command throws ValidationError + * when required parameters are missing (BEST PRACTICE) + */ +async function testCodeUndoRequiredParams(): Promise { + console.log('\n🚨 Test 3: Required parameter validation'); + + // TODO: Uncomment when implementing validation + // const context = { environment: 'server' as const }; + // const sessionId = generateUUID(); + + // TODO: Test cases that should throw ValidationError + // Example: + // const testCases = [ + // { params: {} as CodeUndoParams, desc: 'Missing requiredParam' }, + // { params: { requiredParam: '' } as CodeUndoParams, desc: 'Empty requiredParam' }, + // ]; + // + // for (const testCase of testCases) { + // try { + // await mockCodeUndoCommand({ ...testCase.params, context, sessionId }); + // throw new Error(`Should have thrown ValidationError for: ${testCase.desc}`); + // } catch (error) { + // if (error instanceof ValidationError) { + // assert(error.field === 'requiredParam', `ValidationError field is 'requiredParam' for: ${testCase.desc}`); + // assert(error.message.includes('required parameter'), `Error message mentions 'required parameter' for: ${testCase.desc}`); + // assert(error.message.includes('help tool'), `Error message is tool-agnostic for: ${testCase.desc}`); + // } else { + // throw error; // Re-throw if not ValidationError + // } + // } + // } + + console.log('βœ… All required parameter validations work correctly'); +} + +/** + * Test 4: Optional parameter handling + */ +async function testCodeUndoOptionalParams(): Promise { + console.log('\nπŸ”§ Test 4: Optional parameter handling'); + + // TODO: Uncomment when implementing optional param tests + // const context = { environment: 'server' as const }; + // const sessionId = generateUUID(); + + // TODO: Test WITHOUT optional param (should use default) + // const paramsWithoutOptional: CodeUndoParams = { + // requiredParam: 'test', + // context, + // sessionId + // }; + // + // const resultWithoutOptional = await mockCodeUndoCommand(paramsWithoutOptional); + // assert(resultWithoutOptional.success === true, 'Command succeeds without optional params'); + + // TODO: Test WITH optional param + // const paramsWithOptional: CodeUndoParams = { + // requiredParam: 'test', + // optionalParam: true, + // context, + // sessionId + // }; + // + // const resultWithOptional = await mockCodeUndoCommand(paramsWithOptional); + // assert(resultWithOptional.success === true, 'Command succeeds with optional params'); + + console.log('βœ… Optional parameter handling validated'); +} + +/** + * Test 5: Performance validation + */ +async function testCodeUndoPerformance(): Promise { + console.log('\n⚑ Test 5: CodeUndo performance validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + const startTime = Date.now(); + + await mockCodeUndoCommand({ + // TODO: Add your parameters + context, + sessionId + } as CodeUndoParams); + + const executionTime = Date.now() - startTime; + + assert(executionTime < 100, `CodeUndo completed in ${executionTime}ms (under 100ms limit)`); +} + +/** + * Test 6: Result structure validation + */ +async function testCodeUndoResultStructure(): Promise { + console.log('\nπŸ” Test 6: CodeUndo result structure validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Test various scenarios + const basicResult = await mockCodeUndoCommand({ + // TODO: Add your parameters + context, + sessionId + } as CodeUndoParams); + + assert(basicResult.success === true, 'Result has success field'); + // TODO: Add assertions for your result fields + // assert(typeof basicResult.yourField === 'string', 'Result has yourField (string)'); + assert(basicResult.context === context, 'Result includes context'); + assert(basicResult.sessionId === sessionId, 'Result includes sessionId'); + + console.log('βœ… All result structure validations pass'); +} + +/** + * Run all unit tests + */ +async function runAllCodeUndoUnitTests(): Promise { + console.log('πŸš€ Starting CodeUndo Command Unit Tests\n'); + + try { + testCodeUndoCommandStructure(); + await testMockCodeUndoExecution(); + await testCodeUndoRequiredParams(); + await testCodeUndoOptionalParams(); + await testCodeUndoPerformance(); + await testCodeUndoResultStructure(); + + console.log('\nπŸŽ‰ ALL CodeUndo UNIT TESTS PASSED!'); + console.log('πŸ“‹ Validated:'); + console.log(' βœ… Command structure and parameter validation'); + console.log(' βœ… Mock command execution patterns'); + console.log(' βœ… Required parameter validation (throws ValidationError)'); + console.log(' βœ… Optional parameter handling (sensible defaults)'); + console.log(' βœ… Performance requirements (< 100ms)'); + console.log(' βœ… Result structure validation'); + console.log('\nπŸ“ This is a REFERENCE EXAMPLE - use as a template for your commands!'); + console.log('πŸ’‘ TIP: Copy this test structure and modify for your command logic'); + + } catch (error) { + console.error('\n❌ CodeUndo unit tests failed:', (error as Error).message); + if ((error as Error).stack) { + console.error((error as Error).stack); + } + process.exit(1); + } +} + +// Run if called directly +if (require.main === module) { + void runAllCodeUndoUnitTests(); +} else { + module.exports = { runAllCodeUndoUnitTests }; +} diff --git a/src/debug/jtag/commands/code/write/.npmignore b/src/debug/jtag/commands/code/write/.npmignore new file mode 100644 index 000000000..f74ad6b8a --- /dev/null +++ b/src/debug/jtag/commands/code/write/.npmignore @@ -0,0 +1,20 @@ +# Development files +.eslintrc* +tsconfig*.json +vitest.config.ts + +# Build artifacts +*.js.map +*.d.ts.map + +# IDE +.vscode/ +.idea/ + +# Logs +*.log +npm-debug.log* + +# OS files +.DS_Store +Thumbs.db diff --git a/src/debug/jtag/commands/code/write/README.md b/src/debug/jtag/commands/code/write/README.md new file mode 100644 index 000000000..08488cb0e --- /dev/null +++ b/src/debug/jtag/commands/code/write/README.md @@ -0,0 +1,154 @@ +# Code Write Command + +Write or create a file in the persona's workspace. Creates a ChangeNode in the change graph for undo support. File extension must be in the allowlist. + +## Table of Contents + +- [Usage](#usage) + - [CLI Usage](#cli-usage) + - [Tool Usage](#tool-usage) +- [Parameters](#parameters) +- [Result](#result) +- [Examples](#examples) +- [Testing](#testing) + - [Unit Tests](#unit-tests) + - [Integration Tests](#integration-tests) +- [Getting Help](#getting-help) +- [Access Level](#access-level) +- [Implementation Notes](#implementation-notes) + +## Usage + +### CLI Usage + +From the command line using the jtag CLI: + +```bash +./jtag code/write --filePath= --content= +``` + +### Tool Usage + +From Persona tools or programmatic access using `Commands.execute()`: + +```typescript +import { Commands } from '@system/core/shared/Commands'; + +const result = await Commands.execute('code/write', { + // your parameters here +}); +``` + +## Parameters + +- **filePath** (required): `string` - Relative path to file within workspace +- **content** (required): `string` - File content to write +- **description** (optional): `string` - Description of what this change does + +## Result + +Returns `CodeWriteResult` with: + +Returns CommandResult with: +- **changeId**: `string` - UUID of the ChangeNode created (for undo) +- **filePath**: `string` - Resolved file path +- **bytesWritten**: `number` - Number of bytes written + +## Examples + +### Create a new file + +```bash +./jtag code/write --filePath="src/utils.ts" --content="export function greet() { return 'hello'; }" --description="Add greet utility" +``` + +## Getting Help + +### Using the Help Tool + +Get detailed usage information for this command: + +**CLI:** +```bash +./jtag help code/write +``` + +**Tool:** +```typescript +// Use your help tool with command name 'code/write' +``` + +### Using the README Tool + +Access this README programmatically: + +**CLI:** +```bash +./jtag readme code/write +``` + +**Tool:** +```typescript +// Use your readme tool with command name 'code/write' +``` + +## Testing + +### Unit Tests + +Test command logic in isolation using mock dependencies: + +```bash +# Run unit tests (no server required) +npx tsx commands/Code Write/test/unit/CodeWriteCommand.test.ts +``` + +**What's tested:** +- Command structure and parameter validation +- Mock command execution patterns +- Required parameter validation (throws ValidationError) +- Optional parameter handling (sensible defaults) +- Performance requirements +- Assertion utility helpers + +**TDD Workflow:** +1. Write/modify unit test first (test-driven development) +2. Run test, see it fail +3. Implement feature +4. Run test, see it pass +5. Refactor if needed + +### Integration Tests + +Test command with real client connections and system integration: + +```bash +# Prerequisites: Server must be running +npm start # Wait 90+ seconds for deployment + +# Run integration tests +npx tsx commands/Code Write/test/integration/CodeWriteIntegration.test.ts +``` + +**What's tested:** +- Client connection to live system +- Real command execution via WebSocket +- ValidationError handling for missing params +- Optional parameter defaults +- Performance under load +- Various parameter combinations + +**Best Practice:** +Run unit tests frequently during development (fast feedback). Run integration tests before committing (verify system integration). + +## Access Level + +**ai-safe** - Safe for AI personas to call autonomously + +## Implementation Notes + +- **Shared Logic**: Core business logic in `shared/CodeWriteTypes.ts` +- **Browser**: Browser-specific implementation in `browser/CodeWriteBrowserCommand.ts` +- **Server**: Server-specific implementation in `server/CodeWriteServerCommand.ts` +- **Unit Tests**: Isolated testing in `test/unit/CodeWriteCommand.test.ts` +- **Integration Tests**: System testing in `test/integration/CodeWriteIntegration.test.ts` diff --git a/src/debug/jtag/commands/code/write/browser/CodeWriteBrowserCommand.ts b/src/debug/jtag/commands/code/write/browser/CodeWriteBrowserCommand.ts new file mode 100644 index 000000000..3f69070dc --- /dev/null +++ b/src/debug/jtag/commands/code/write/browser/CodeWriteBrowserCommand.ts @@ -0,0 +1,21 @@ +/** + * Code Write Command - Browser Implementation + * + * Write or create a file in the persona's workspace. Creates a ChangeNode in the change graph for undo support. File extension must be in the allowlist. + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import type { CodeWriteParams, CodeWriteResult } from '../shared/CodeWriteTypes'; + +export class CodeWriteBrowserCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('code/write', context, subpath, commander); + } + + async execute(params: CodeWriteParams): Promise { + console.log('🌐 BROWSER: Delegating Code Write to server'); + return await this.remoteExecute(params); + } +} diff --git a/src/debug/jtag/commands/code/write/package.json b/src/debug/jtag/commands/code/write/package.json new file mode 100644 index 000000000..ffcb44058 --- /dev/null +++ b/src/debug/jtag/commands/code/write/package.json @@ -0,0 +1,35 @@ +{ + "name": "@jtag-commands/code/write", + "version": "1.0.0", + "description": "Write or create a file in the persona's workspace. Creates a ChangeNode in the change graph for undo support. File extension must be in the allowlist.", + "main": "server/CodeWriteServerCommand.ts", + "types": "shared/CodeWriteTypes.ts", + "scripts": { + "test": "npm run test:unit && npm run test:integration", + "test:unit": "npx vitest run test/unit/*.test.ts", + "test:integration": "npx tsx test/integration/CodeWriteIntegration.test.ts", + "lint": "npx eslint **/*.ts", + "typecheck": "npx tsc --noEmit" + }, + "peerDependencies": { + "@jtag/core": "*" + }, + "files": [ + "shared/**/*.ts", + "browser/**/*.ts", + "server/**/*.ts", + "test/**/*.ts", + "README.md" + ], + "keywords": [ + "jtag", + "command", + "code/write" + ], + "license": "MIT", + "author": "", + "repository": { + "type": "git", + "url": "" + } +} diff --git a/src/debug/jtag/commands/code/write/server/CodeWriteServerCommand.ts b/src/debug/jtag/commands/code/write/server/CodeWriteServerCommand.ts new file mode 100644 index 000000000..9513db52e --- /dev/null +++ b/src/debug/jtag/commands/code/write/server/CodeWriteServerCommand.ts @@ -0,0 +1,54 @@ +/** + * Code Write Command - Server Implementation + * + * Writes or creates a file in the persona's workspace via Rust IPC. + * Creates a ChangeNode in the change graph for undo support. + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import { ValidationError } from '@system/core/types/ErrorTypes'; +import type { CodeWriteParams, CodeWriteResult } from '../shared/CodeWriteTypes'; +import { createCodeWriteResultFromParams } from '../shared/CodeWriteTypes'; +import { CodeDaemon } from '@daemons/code-daemon/shared/CodeDaemon'; + +export class CodeWriteServerCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('code/write', context, subpath, commander); + } + + async execute(params: CodeWriteParams): Promise { + if (!params.filePath || params.filePath.trim() === '') { + throw new ValidationError( + 'filePath', + `Missing required parameter 'filePath'. See the code/write README for usage.` + ); + } + if (params.content === undefined || params.content === null) { + throw new ValidationError( + 'content', + `Missing required parameter 'content'. See the code/write README for usage.` + ); + } + + if (!params.userId) { + throw new ValidationError('userId', 'Workspace operations require a userId (auto-injected for persona tool calls).'); + } + const personaId = params.userId; + + const result = await CodeDaemon.workspaceWrite( + personaId, + params.filePath, + params.content, + params.description + ); + + return createCodeWriteResultFromParams(params, { + success: result.success, + changeId: result.change_id || '', + filePath: result.file_path, + bytesWritten: result.bytes_written, + }); + } +} diff --git a/src/debug/jtag/commands/code/write/shared/CodeWriteTypes.ts b/src/debug/jtag/commands/code/write/shared/CodeWriteTypes.ts new file mode 100644 index 000000000..d45696d81 --- /dev/null +++ b/src/debug/jtag/commands/code/write/shared/CodeWriteTypes.ts @@ -0,0 +1,103 @@ +/** + * Code Write Command - Shared Types + * + * Write or create a file in the persona's workspace. Creates a ChangeNode in the change graph for undo support. File extension must be in the allowlist. + */ + +import type { CommandParams, CommandResult, CommandInput, JTAGContext } from '@system/core/types/JTAGTypes'; +import { createPayload, transformPayload } from '@system/core/types/JTAGTypes'; +import { Commands } from '@system/core/shared/Commands'; +import type { JTAGError } from '@system/core/types/ErrorTypes'; +import type { UUID } from '@system/core/types/CrossPlatformUUID'; + +/** + * Code Write Command Parameters + */ +export interface CodeWriteParams extends CommandParams { + // Relative path to file within workspace + filePath: string; + // File content to write + content: string; + // Description of what this change does + description?: string; +} + +/** + * Factory function for creating CodeWriteParams + */ +export const createCodeWriteParams = ( + context: JTAGContext, + sessionId: UUID, + data: { + // Relative path to file within workspace + filePath: string; + // File content to write + content: string; + // Description of what this change does + description?: string; + } +): CodeWriteParams => createPayload(context, sessionId, { + description: data.description ?? '', + ...data +}); + +/** + * Code Write Command Result + */ +export interface CodeWriteResult extends CommandResult { + success: boolean; + // UUID of the ChangeNode created (for undo) + changeId: string; + // Resolved file path + filePath: string; + // Number of bytes written + bytesWritten: number; + error?: JTAGError; +} + +/** + * Factory function for creating CodeWriteResult with defaults + */ +export const createCodeWriteResult = ( + context: JTAGContext, + sessionId: UUID, + data: { + success: boolean; + // UUID of the ChangeNode created (for undo) + changeId?: string; + // Resolved file path + filePath?: string; + // Number of bytes written + bytesWritten?: number; + error?: JTAGError; + } +): CodeWriteResult => createPayload(context, sessionId, { + changeId: data.changeId ?? '', + filePath: data.filePath ?? '', + bytesWritten: data.bytesWritten ?? 0, + ...data +}); + +/** + * Smart Code Write-specific inheritance from params + * Auto-inherits context and sessionId from params + * Must provide all required result fields + */ +export const createCodeWriteResultFromParams = ( + params: CodeWriteParams, + differences: Omit +): CodeWriteResult => transformPayload(params, differences); + +/** + * Code Write β€” Type-safe command executor + * + * Usage: + * import { CodeWrite } from '...shared/CodeWriteTypes'; + * const result = await CodeWrite.execute({ ... }); + */ +export const CodeWrite = { + execute(params: CommandInput): Promise { + return Commands.execute('code/write', params as Partial); + }, + commandName: 'code/write' as const, +} as const; diff --git a/src/debug/jtag/commands/code/write/test/integration/CodeWriteIntegration.test.ts b/src/debug/jtag/commands/code/write/test/integration/CodeWriteIntegration.test.ts new file mode 100644 index 000000000..399627d7a --- /dev/null +++ b/src/debug/jtag/commands/code/write/test/integration/CodeWriteIntegration.test.ts @@ -0,0 +1,196 @@ +#!/usr/bin/env tsx +/** + * CodeWrite Command Integration Tests + * + * Tests Code Write command against the LIVE RUNNING SYSTEM. + * This is NOT a mock test - it tests real commands, real events, real widgets. + * + * Generated by: ./jtag generate + * Run with: npx tsx commands/Code Write/test/integration/CodeWriteIntegration.test.ts + * + * PREREQUISITES: + * - Server must be running: npm start (wait 90+ seconds) + * - Browser client connected via http://localhost:9003 + */ + +import { jtag } from '@server/server-index'; + +console.log('πŸ§ͺ CodeWrite Command Integration Tests'); + +function assert(condition: boolean, message: string): void { + if (!condition) { + throw new Error(`❌ Assertion failed: ${message}`); + } + console.log(`βœ… ${message}`); +} + +/** + * Test 1: Connect to live system + */ +async function testSystemConnection(): Promise>> { + console.log('\nπŸ”Œ Test 1: Connecting to live JTAG system'); + + const client = await jtag.connect(); + + assert(client !== null, 'Connected to live system'); + console.log(' βœ… Connected successfully'); + + return client; +} + +/** + * Test 2: Execute Code Write command on live system + */ +async function testCommandExecution(client: Awaited>): Promise { + console.log('\n⚑ Test 2: Executing Code Write command'); + + // TODO: Replace with your actual command parameters + const result = await client.commands['Code Write']({ + // Add your required parameters here + // Example: name: 'test-value' + }); + + console.log(' πŸ“Š Result:', JSON.stringify(result, null, 2)); + + assert(result !== null, 'Code Write returned result'); + // TODO: Add assertions for your specific result fields + // assert(result.success === true, 'Code Write succeeded'); + // assert(result.yourField !== undefined, 'Result has yourField'); +} + +/** + * Test 3: Validate required parameters + */ +async function testRequiredParameters(_client: Awaited>): Promise { + console.log('\n🚨 Test 3: Testing required parameter validation'); + + // TODO: Uncomment and test missing required parameters + // try { + // await _client.commands['Code Write']({ + // // Missing required param + // }); + // assert(false, 'Should have thrown validation error'); + // } catch (error) { + // assert((error as Error).message.includes('required'), 'Error mentions required parameter'); + // console.log(' βœ… ValidationError thrown correctly'); + // } + + console.log(' ⚠️ TODO: Add required parameter validation test'); +} + +/** + * Test 4: Test optional parameters + */ +async function testOptionalParameters(_client: Awaited>): Promise { + console.log('\nπŸ”§ Test 4: Testing optional parameters'); + + // TODO: Uncomment to test with and without optional parameters + // const withOptional = await client.commands['Code Write']({ + // requiredParam: 'test', + // optionalParam: true + // }); + // + // const withoutOptional = await client.commands['Code Write']({ + // requiredParam: 'test' + // }); + // + // assert(withOptional.success === true, 'Works with optional params'); + // assert(withoutOptional.success === true, 'Works without optional params'); + + console.log(' ⚠️ TODO: Add optional parameter tests'); +} + +/** + * Test 5: Performance test + */ +async function testPerformance(_client: Awaited>): Promise { + console.log('\n⚑ Test 5: Performance under load'); + + // TODO: Uncomment to test command performance + // const iterations = 10; + // const times: number[] = []; + // + // for (let i = 0; i < iterations; i++) { + // const start = Date.now(); + // await _client.commands['Code Write']({ /* params */ }); + // times.push(Date.now() - start); + // } + // + // const avg = times.reduce((a, b) => a + b, 0) / iterations; + // const max = Math.max(...times); + // + // console.log(` Average: ${avg.toFixed(2)}ms`); + // console.log(` Max: ${max}ms`); + // + // assert(avg < 500, `Average ${avg.toFixed(2)}ms under 500ms`); + // assert(max < 1000, `Max ${max}ms under 1000ms`); + + console.log(' ⚠️ TODO: Add performance test'); +} + +/** + * Test 6: Widget/Event integration (if applicable) + */ +async function testWidgetIntegration(_client: Awaited>): Promise { + console.log('\n🎨 Test 6: Widget/Event integration'); + + // TODO: Uncomment if your command emits events or updates widgets + // Example: + // const before = await client.commands['debug/widget-state']({ widgetSelector: 'your-widget' }); + // await client.commands['Code Write']({ /* params */ }); + // await new Promise(resolve => setTimeout(resolve, 1000)); // Wait for event propagation + // const after = await client.commands['debug/widget-state']({ widgetSelector: 'your-widget' }); + // + // assert(after.state.someValue !== before.state.someValue, 'Widget state updated'); + + console.log(' ⚠️ TODO: Add widget/event integration test (if applicable)'); +} + +/** + * Run all integration tests + */ +async function runAllCodeWriteIntegrationTests(): Promise { + console.log('πŸš€ Starting CodeWrite Integration Tests\n'); + console.log('πŸ“‹ Testing against LIVE system (not mocks)\n'); + + try { + const client = await testSystemConnection(); + await testCommandExecution(client); + await testRequiredParameters(client); + await testOptionalParameters(client); + await testPerformance(client); + await testWidgetIntegration(client); + + console.log('\nπŸŽ‰ ALL CodeWrite INTEGRATION TESTS PASSED!'); + console.log('πŸ“‹ Validated:'); + console.log(' βœ… Live system connection'); + console.log(' βœ… Command execution on real system'); + console.log(' βœ… Parameter validation'); + console.log(' βœ… Optional parameter handling'); + console.log(' βœ… Performance benchmarks'); + console.log(' βœ… Widget/Event integration'); + console.log('\nπŸ’‘ NOTE: This test uses the REAL running system'); + console.log(' - Real database operations'); + console.log(' - Real event propagation'); + console.log(' - Real widget updates'); + console.log(' - Real cross-daemon communication'); + + } catch (error) { + console.error('\n❌ CodeWrite integration tests failed:', (error as Error).message); + if ((error as Error).stack) { + console.error((error as Error).stack); + } + console.error('\nπŸ’‘ Make sure:'); + console.error(' 1. Server is running: npm start'); + console.error(' 2. Wait 90+ seconds for deployment'); + console.error(' 3. Browser is connected to http://localhost:9003'); + process.exit(1); + } +} + +// Run if called directly +if (require.main === module) { + void runAllCodeWriteIntegrationTests(); +} else { + module.exports = { runAllCodeWriteIntegrationTests }; +} diff --git a/src/debug/jtag/commands/code/write/test/unit/CodeWriteCommand.test.ts b/src/debug/jtag/commands/code/write/test/unit/CodeWriteCommand.test.ts new file mode 100644 index 000000000..fc8483441 --- /dev/null +++ b/src/debug/jtag/commands/code/write/test/unit/CodeWriteCommand.test.ts @@ -0,0 +1,259 @@ +#!/usr/bin/env tsx +/** + * CodeWrite Command Unit Tests + * + * Tests Code Write command logic in isolation using mock dependencies. + * This is a REFERENCE EXAMPLE showing best practices for command testing. + * + * Generated by: ./jtag generate + * Run with: npx tsx commands/Code Write/test/unit/CodeWriteCommand.test.ts + * + * NOTE: This is a self-contained test (no external test utilities needed). + * Use this as a template for your own command tests. + */ + +// import { ValidationError } from '@system/core/types/ErrorTypes'; // Uncomment when adding validation tests +import { generateUUID } from '@system/core/types/CrossPlatformUUID'; +import type { CodeWriteParams, CodeWriteResult } from '../../shared/CodeWriteTypes'; + +console.log('πŸ§ͺ CodeWrite Command Unit Tests'); + +function assert(condition: boolean, message: string): void { + if (!condition) { + throw new Error(`❌ Assertion failed: ${message}`); + } + console.log(`βœ… ${message}`); +} + +/** + * Mock command that implements Code Write logic for testing + */ +async function mockCodeWriteCommand(params: CodeWriteParams): Promise { + // TODO: Validate required parameters (BEST PRACTICE) + // Example: + // if (!params.requiredParam || params.requiredParam.trim() === '') { + // throw new ValidationError( + // 'requiredParam', + // `Missing required parameter 'requiredParam'. ` + + // `Use the help tool with 'Code Write' or see the Code Write README for usage information.` + // ); + // } + + // TODO: Handle optional parameters with sensible defaults + // const optionalParam = params.optionalParam ?? defaultValue; + + // TODO: Implement your command logic here + return { + success: true, + // TODO: Add your result fields with actual computed values + context: params.context, + sessionId: params.sessionId + } as CodeWriteResult; +} + +/** + * Test 1: Command structure validation + */ +function testCodeWriteCommandStructure(): void { + console.log('\nπŸ“‹ Test 1: CodeWrite command structure validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Create valid params for Code Write command + const validParams: CodeWriteParams = { + // TODO: Add your required parameters here + context, + sessionId + }; + + // Validate param structure + assert(validParams.context !== undefined, 'Params have context'); + assert(validParams.sessionId !== undefined, 'Params have sessionId'); + // TODO: Add assertions for your specific parameters + // assert(typeof validParams.requiredParam === 'string', 'requiredParam is string'); +} + +/** + * Test 2: Mock command execution + */ +async function testMockCodeWriteExecution(): Promise { + console.log('\n⚑ Test 2: Mock Code Write command execution'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Test mock execution + const params: CodeWriteParams = { + // TODO: Add your parameters here + context, + sessionId + }; + + const result = await mockCodeWriteCommand(params); + + // Validate result structure + assert(result.success === true, 'Mock result shows success'); + // TODO: Add assertions for your result fields + // assert(typeof result.yourField === 'string', 'yourField is string'); +} + +/** + * Test 3: Required parameter validation (CRITICAL) + * + * This test ensures your command throws ValidationError + * when required parameters are missing (BEST PRACTICE) + */ +async function testCodeWriteRequiredParams(): Promise { + console.log('\n🚨 Test 3: Required parameter validation'); + + // TODO: Uncomment when implementing validation + // const context = { environment: 'server' as const }; + // const sessionId = generateUUID(); + + // TODO: Test cases that should throw ValidationError + // Example: + // const testCases = [ + // { params: {} as CodeWriteParams, desc: 'Missing requiredParam' }, + // { params: { requiredParam: '' } as CodeWriteParams, desc: 'Empty requiredParam' }, + // ]; + // + // for (const testCase of testCases) { + // try { + // await mockCodeWriteCommand({ ...testCase.params, context, sessionId }); + // throw new Error(`Should have thrown ValidationError for: ${testCase.desc}`); + // } catch (error) { + // if (error instanceof ValidationError) { + // assert(error.field === 'requiredParam', `ValidationError field is 'requiredParam' for: ${testCase.desc}`); + // assert(error.message.includes('required parameter'), `Error message mentions 'required parameter' for: ${testCase.desc}`); + // assert(error.message.includes('help tool'), `Error message is tool-agnostic for: ${testCase.desc}`); + // } else { + // throw error; // Re-throw if not ValidationError + // } + // } + // } + + console.log('βœ… All required parameter validations work correctly'); +} + +/** + * Test 4: Optional parameter handling + */ +async function testCodeWriteOptionalParams(): Promise { + console.log('\nπŸ”§ Test 4: Optional parameter handling'); + + // TODO: Uncomment when implementing optional param tests + // const context = { environment: 'server' as const }; + // const sessionId = generateUUID(); + + // TODO: Test WITHOUT optional param (should use default) + // const paramsWithoutOptional: CodeWriteParams = { + // requiredParam: 'test', + // context, + // sessionId + // }; + // + // const resultWithoutOptional = await mockCodeWriteCommand(paramsWithoutOptional); + // assert(resultWithoutOptional.success === true, 'Command succeeds without optional params'); + + // TODO: Test WITH optional param + // const paramsWithOptional: CodeWriteParams = { + // requiredParam: 'test', + // optionalParam: true, + // context, + // sessionId + // }; + // + // const resultWithOptional = await mockCodeWriteCommand(paramsWithOptional); + // assert(resultWithOptional.success === true, 'Command succeeds with optional params'); + + console.log('βœ… Optional parameter handling validated'); +} + +/** + * Test 5: Performance validation + */ +async function testCodeWritePerformance(): Promise { + console.log('\n⚑ Test 5: CodeWrite performance validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + const startTime = Date.now(); + + await mockCodeWriteCommand({ + // TODO: Add your parameters + context, + sessionId + } as CodeWriteParams); + + const executionTime = Date.now() - startTime; + + assert(executionTime < 100, `CodeWrite completed in ${executionTime}ms (under 100ms limit)`); +} + +/** + * Test 6: Result structure validation + */ +async function testCodeWriteResultStructure(): Promise { + console.log('\nπŸ” Test 6: CodeWrite result structure validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Test various scenarios + const basicResult = await mockCodeWriteCommand({ + // TODO: Add your parameters + context, + sessionId + } as CodeWriteParams); + + assert(basicResult.success === true, 'Result has success field'); + // TODO: Add assertions for your result fields + // assert(typeof basicResult.yourField === 'string', 'Result has yourField (string)'); + assert(basicResult.context === context, 'Result includes context'); + assert(basicResult.sessionId === sessionId, 'Result includes sessionId'); + + console.log('βœ… All result structure validations pass'); +} + +/** + * Run all unit tests + */ +async function runAllCodeWriteUnitTests(): Promise { + console.log('πŸš€ Starting CodeWrite Command Unit Tests\n'); + + try { + testCodeWriteCommandStructure(); + await testMockCodeWriteExecution(); + await testCodeWriteRequiredParams(); + await testCodeWriteOptionalParams(); + await testCodeWritePerformance(); + await testCodeWriteResultStructure(); + + console.log('\nπŸŽ‰ ALL CodeWrite UNIT TESTS PASSED!'); + console.log('πŸ“‹ Validated:'); + console.log(' βœ… Command structure and parameter validation'); + console.log(' βœ… Mock command execution patterns'); + console.log(' βœ… Required parameter validation (throws ValidationError)'); + console.log(' βœ… Optional parameter handling (sensible defaults)'); + console.log(' βœ… Performance requirements (< 100ms)'); + console.log(' βœ… Result structure validation'); + console.log('\nπŸ“ This is a REFERENCE EXAMPLE - use as a template for your commands!'); + console.log('πŸ’‘ TIP: Copy this test structure and modify for your command logic'); + + } catch (error) { + console.error('\n❌ CodeWrite unit tests failed:', (error as Error).message); + if ((error as Error).stack) { + console.error((error as Error).stack); + } + process.exit(1); + } +} + +// Run if called directly +if (require.main === module) { + void runAllCodeWriteUnitTests(); +} else { + module.exports = { runAllCodeWriteUnitTests }; +} diff --git a/src/debug/jtag/daemons/code-daemon/server/CodeDaemonServer.ts b/src/debug/jtag/daemons/code-daemon/server/CodeDaemonServer.ts index 0f831e161..d6520c98b 100644 --- a/src/debug/jtag/daemons/code-daemon/server/CodeDaemonServer.ts +++ b/src/debug/jtag/daemons/code-daemon/server/CodeDaemonServer.ts @@ -16,12 +16,21 @@ import type { GitLogResult, CodeFileReadEvent, CodeSearchEvent, - CodeGitLogEvent + CodeGitLogEvent, + WorkspaceEditMode, + WorkspaceWriteResult, + WorkspaceReadResult, + WorkspaceSearchResult, + WorkspaceTreeResult, + WorkspaceUndoResult, + WorkspaceHistoryResult, + WorkspaceGitStatusInfo, } from '../shared/CodeDaemonTypes'; import { Events } from '../../../system/core/shared/Events'; import { PathValidator } from './modules/PathValidator'; import { FileReader } from './modules/FileReader'; -import { Logger, type ComponentLogger } from '../../../system/core/logging/Logger'; +import { Logger } from '../../../system/core/logging/Logger'; +import { RustCoreIPCClient } from '../../../workers/continuum-core/bindings/RustCoreIPC'; import * as path from 'path'; /** @@ -161,5 +170,65 @@ export async function initializeCodeDaemon(jtagContext: JTAGContext): Promise { + await rustClient.codeCreateWorkspace(personaId, workspaceRoot, readRoots); + }; + + CodeDaemon.workspaceRead = async (personaId: string, filePath: string, startLine?: number, endLine?: number) => { + return await rustClient.codeRead(personaId, filePath, startLine, endLine); + }; + + CodeDaemon.workspaceWrite = async (personaId: string, filePath: string, content: string, description?: string) => { + return await rustClient.codeWrite(personaId, filePath, content, description); + }; + + CodeDaemon.workspaceEdit = async (personaId: string, filePath: string, editMode: WorkspaceEditMode, description?: string) => { + return await rustClient.codeEdit(personaId, filePath, editMode, description); + }; + + CodeDaemon.workspaceDelete = async (personaId: string, filePath: string, description?: string) => { + return await rustClient.codeDelete(personaId, filePath, description); + }; + + CodeDaemon.workspaceDiff = async (personaId: string, filePath: string, editMode: WorkspaceEditMode) => { + return await rustClient.codeDiff(personaId, filePath, editMode); + }; + + CodeDaemon.workspaceUndo = async (personaId: string, changeId?: string, count?: number) => { + return await rustClient.codeUndo(personaId, changeId, count); + }; + + CodeDaemon.workspaceHistory = async (personaId: string, filePath?: string, limit?: number) => { + return await rustClient.codeHistory(personaId, filePath, limit); + }; + + CodeDaemon.workspaceSearch = async (personaId: string, pattern: string, fileGlob?: string, maxResults?: number) => { + return await rustClient.codeSearch(personaId, pattern, fileGlob, maxResults); + }; + + CodeDaemon.workspaceTree = async (personaId: string, treePath?: string, maxDepth?: number, includeHidden?: boolean) => { + return await rustClient.codeTree(personaId, treePath, maxDepth, includeHidden); + }; + + CodeDaemon.workspaceGitStatus = async (personaId: string) => { + return await rustClient.codeGitStatus(personaId); + }; + + CodeDaemon.workspaceGitDiff = async (personaId: string, staged?: boolean) => { + return await rustClient.codeGitDiff(personaId, staged); + }; + log.info(`Initialized successfully (repository root: ${repositoryRoot})`); } diff --git a/src/debug/jtag/daemons/code-daemon/shared/CodeDaemon.ts b/src/debug/jtag/daemons/code-daemon/shared/CodeDaemon.ts index a537095af..d1781f2b4 100644 --- a/src/debug/jtag/daemons/code-daemon/shared/CodeDaemon.ts +++ b/src/debug/jtag/daemons/code-daemon/shared/CodeDaemon.ts @@ -10,7 +10,15 @@ import type { CodeSearchOptions, CodeSearchResult, GitLogOptions, - GitLogResult + GitLogResult, + WorkspaceEditMode, + WorkspaceWriteResult, + WorkspaceReadResult, + WorkspaceSearchResult, + WorkspaceTreeResult, + WorkspaceUndoResult, + WorkspaceHistoryResult, + WorkspaceGitStatusInfo, } from './CodeDaemonTypes'; /** @@ -68,4 +76,93 @@ export class CodeDaemon { static isInitialized(): boolean { return false; // Overridden by server implementation } + + // ======================================================================== + // Workspace-Scoped Operations (Rust IPC backed, per-persona isolation) + // ======================================================================== + + /** + * Initialize a per-persona workspace with file engine and change graph. + * Must be called before any other workspace operations for this persona. + */ + static async createWorkspace(personaId: string, workspaceRoot: string, readRoots?: string[]): Promise { + throw new Error('CodeDaemon.createWorkspace() must be implemented by server'); + } + + /** + * Read a file from the persona's workspace. + */ + static async workspaceRead(personaId: string, filePath: string, startLine?: number, endLine?: number): Promise { + throw new Error('CodeDaemon.workspaceRead() must be implemented by server'); + } + + /** + * Write or create a file in the persona's workspace. + */ + static async workspaceWrite(personaId: string, filePath: string, content: string, description?: string): Promise { + throw new Error('CodeDaemon.workspaceWrite() must be implemented by server'); + } + + /** + * Edit a file using one of four edit modes. + */ + static async workspaceEdit(personaId: string, filePath: string, editMode: WorkspaceEditMode, description?: string): Promise { + throw new Error('CodeDaemon.workspaceEdit() must be implemented by server'); + } + + /** + * Delete a file from the persona's workspace. + */ + static async workspaceDelete(personaId: string, filePath: string, description?: string): Promise { + throw new Error('CodeDaemon.workspaceDelete() must be implemented by server'); + } + + /** + * Preview an edit as a unified diff without applying it. + */ + static async workspaceDiff(personaId: string, filePath: string, editMode: WorkspaceEditMode): Promise<{ success: boolean; unified: string }> { + throw new Error('CodeDaemon.workspaceDiff() must be implemented by server'); + } + + /** + * Undo a specific change or the last N changes. + */ + static async workspaceUndo(personaId: string, changeId?: string, count?: number): Promise { + throw new Error('CodeDaemon.workspaceUndo() must be implemented by server'); + } + + /** + * Get change history for a file or entire workspace. + */ + static async workspaceHistory(personaId: string, filePath?: string, limit?: number): Promise { + throw new Error('CodeDaemon.workspaceHistory() must be implemented by server'); + } + + /** + * Search for a regex pattern across workspace files. + */ + static async workspaceSearch(personaId: string, pattern: string, fileGlob?: string, maxResults?: number): Promise { + throw new Error('CodeDaemon.workspaceSearch() must be implemented by server'); + } + + /** + * Generate a directory tree for the workspace. + */ + static async workspaceTree(personaId: string, path?: string, maxDepth?: number, includeHidden?: boolean): Promise { + throw new Error('CodeDaemon.workspaceTree() must be implemented by server'); + } + + /** + * Get git status for the workspace. + */ + static async workspaceGitStatus(personaId: string): Promise { + throw new Error('CodeDaemon.workspaceGitStatus() must be implemented by server'); + } + + /** + * Get git diff for the workspace. + */ + static async workspaceGitDiff(personaId: string, staged?: boolean): Promise<{ success: boolean; diff: string }> { + throw new Error('CodeDaemon.workspaceGitDiff() must be implemented by server'); + } } diff --git a/src/debug/jtag/daemons/code-daemon/shared/CodeDaemonTypes.ts b/src/debug/jtag/daemons/code-daemon/shared/CodeDaemonTypes.ts index a228be2a4..d5aae51db 100644 --- a/src/debug/jtag/daemons/code-daemon/shared/CodeDaemonTypes.ts +++ b/src/debug/jtag/daemons/code-daemon/shared/CodeDaemonTypes.ts @@ -220,3 +220,20 @@ export interface CodeGitLogEvent { commitCount: number; timestamp: number; } + +// ============================================================================ +// Workspace-Scoped Types β€” re-exported from ts-rs generated (Rust is source of truth) +// Aliased with Workspace* prefix for domain clarity in CodeDaemon API +// ============================================================================ + +export type { EditMode as WorkspaceEditMode } from '../../../shared/generated/code/EditMode'; +export type { WriteResult as WorkspaceWriteResult } from '../../../shared/generated/code/WriteResult'; +export type { ReadResult as WorkspaceReadResult } from '../../../shared/generated/code/ReadResult'; +export type { SearchResult as WorkspaceSearchResult } from '../../../shared/generated/code/SearchResult'; +export type { SearchMatch as WorkspaceSearchMatch } from '../../../shared/generated/code/SearchMatch'; +export type { TreeNode as WorkspaceTreeNode } from '../../../shared/generated/code/TreeNode'; +export type { TreeResult as WorkspaceTreeResult } from '../../../shared/generated/code/TreeResult'; +export type { UndoResult as WorkspaceUndoResult } from '../../../shared/generated/code/UndoResult'; +export type { ChangeNode as WorkspaceChangeNode } from '../../../shared/generated/code/ChangeNode'; +export type { HistoryResult as WorkspaceHistoryResult } from '../../../shared/generated/code/HistoryResult'; +export type { GitStatusInfo as WorkspaceGitStatusInfo } from '../../../shared/generated/code/GitStatusInfo'; diff --git a/src/debug/jtag/generated-command-schemas.json b/src/debug/jtag/generated-command-schemas.json index cc67bf607..9c26a7678 100644 --- a/src/debug/jtag/generated-command-schemas.json +++ b/src/debug/jtag/generated-command-schemas.json @@ -1,5 +1,5 @@ { - "generated": "2026-02-01T20:13:44.015Z", + "generated": "2026-02-01T21:12:59.323Z", "version": "1.0.0", "commands": [ { diff --git a/src/debug/jtag/shared/generated/code/ChangeNode.ts b/src/debug/jtag/shared/generated/code/ChangeNode.ts new file mode 100644 index 000000000..bd89c9e7b --- /dev/null +++ b/src/debug/jtag/shared/generated/code/ChangeNode.ts @@ -0,0 +1,44 @@ +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. +import type { FileDiff } from "./FileDiff"; +import type { FileOperation } from "./FileOperation"; + +/** + * Every file operation creates a ChangeNode in the DAG. + */ +export type ChangeNode = { id: string, +/** + * Parent node IDs. Empty for root operations. Multiple for merges. + */ +parent_ids: Array, +/** + * Who performed this operation (persona UUID string). + */ +author_id: string, +/** + * When the operation occurred (unix millis). + */ +timestamp: number, +/** + * The file affected (relative to workspace root). + */ +file_path: string, +/** + * The operation type. + */ +operation: FileOperation, +/** + * Forward diff (apply to go forward in time). + */ +forward_diff: FileDiff, +/** + * Reverse diff (apply to go backward in time β€” undo). + */ +reverse_diff: FileDiff, +/** + * Optional description from the AI about what this change does. + */ +description?: string, +/** + * Workspace ID this change belongs to. + */ +workspace_id: string, }; diff --git a/src/debug/jtag/shared/generated/code/DiffHunk.ts b/src/debug/jtag/shared/generated/code/DiffHunk.ts new file mode 100644 index 000000000..d14968fed --- /dev/null +++ b/src/debug/jtag/shared/generated/code/DiffHunk.ts @@ -0,0 +1,10 @@ +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. + +/** + * A single hunk in a unified diff. + */ +export type DiffHunk = { old_start: number, old_count: number, new_start: number, new_count: number, +/** + * The hunk content (with +/- prefixes on each line). + */ +content: string, }; diff --git a/src/debug/jtag/shared/generated/code/EditMode.ts b/src/debug/jtag/shared/generated/code/EditMode.ts new file mode 100644 index 000000000..5897d1236 --- /dev/null +++ b/src/debug/jtag/shared/generated/code/EditMode.ts @@ -0,0 +1,6 @@ +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. + +/** + * How to edit a file (four modes). + */ +export type EditMode = { "type": "line_range", start_line: number, end_line: number, new_content: string, } | { "type": "search_replace", search: string, replace: string, all: boolean, } | { "type": "insert_at", line: number, content: string, } | { "type": "append", content: string, }; diff --git a/src/debug/jtag/shared/generated/code/FileDiff.ts b/src/debug/jtag/shared/generated/code/FileDiff.ts new file mode 100644 index 000000000..1355db62c --- /dev/null +++ b/src/debug/jtag/shared/generated/code/FileDiff.ts @@ -0,0 +1,15 @@ +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. +import type { DiffHunk } from "./DiffHunk"; + +/** + * A file diff consisting of hunks. + */ +export type FileDiff = { +/** + * Unified diff text (compatible with standard tooling). + */ +unified: string, +/** + * Structured hunks for programmatic application. + */ +hunks: Array, }; diff --git a/src/debug/jtag/shared/generated/code/FileOperation.ts b/src/debug/jtag/shared/generated/code/FileOperation.ts new file mode 100644 index 000000000..ade4b896c --- /dev/null +++ b/src/debug/jtag/shared/generated/code/FileOperation.ts @@ -0,0 +1,6 @@ +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. + +/** + * File operation types. + */ +export type FileOperation = "create" | "write" | "edit" | "delete" | { "rename": { from: string, to: string, } } | { "undo": { reverted_id: string, } }; diff --git a/src/debug/jtag/shared/generated/code/GitStatusInfo.ts b/src/debug/jtag/shared/generated/code/GitStatusInfo.ts new file mode 100644 index 000000000..361bd9a85 --- /dev/null +++ b/src/debug/jtag/shared/generated/code/GitStatusInfo.ts @@ -0,0 +1,6 @@ +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. + +/** + * Git status information. + */ +export type GitStatusInfo = { success: boolean, branch?: string, modified: Array, added: Array, deleted: Array, untracked: Array, error?: string, }; diff --git a/src/debug/jtag/shared/generated/code/HistoryResult.ts b/src/debug/jtag/shared/generated/code/HistoryResult.ts new file mode 100644 index 000000000..35c609807 --- /dev/null +++ b/src/debug/jtag/shared/generated/code/HistoryResult.ts @@ -0,0 +1,7 @@ +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. +import type { ChangeNode } from "./ChangeNode"; + +/** + * History query result. + */ +export type HistoryResult = { success: boolean, nodes: Array, total_count: number, error?: string, }; diff --git a/src/debug/jtag/shared/generated/code/ReadResult.ts b/src/debug/jtag/shared/generated/code/ReadResult.ts new file mode 100644 index 000000000..aaec959ca --- /dev/null +++ b/src/debug/jtag/shared/generated/code/ReadResult.ts @@ -0,0 +1,6 @@ +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. + +/** + * Result of a file read operation. + */ +export type ReadResult = { success: boolean, content?: string, file_path: string, total_lines: number, lines_returned: number, start_line: number, end_line: number, size_bytes: number, error?: string, }; diff --git a/src/debug/jtag/shared/generated/code/SearchMatch.ts b/src/debug/jtag/shared/generated/code/SearchMatch.ts new file mode 100644 index 000000000..787fa78e7 --- /dev/null +++ b/src/debug/jtag/shared/generated/code/SearchMatch.ts @@ -0,0 +1,6 @@ +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. + +/** + * A single search match. + */ +export type SearchMatch = { file_path: string, line_number: number, line_content: string, match_start: number, match_end: number, }; diff --git a/src/debug/jtag/shared/generated/code/SearchResult.ts b/src/debug/jtag/shared/generated/code/SearchResult.ts new file mode 100644 index 000000000..cd63567d9 --- /dev/null +++ b/src/debug/jtag/shared/generated/code/SearchResult.ts @@ -0,0 +1,7 @@ +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. +import type { SearchMatch } from "./SearchMatch"; + +/** + * Result of a code search operation. + */ +export type SearchResult = { success: boolean, matches: Array, total_matches: number, files_searched: number, error?: string, }; diff --git a/src/debug/jtag/shared/generated/code/TreeNode.ts b/src/debug/jtag/shared/generated/code/TreeNode.ts new file mode 100644 index 000000000..b79d6a206 --- /dev/null +++ b/src/debug/jtag/shared/generated/code/TreeNode.ts @@ -0,0 +1,6 @@ +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. + +/** + * A node in a directory tree. + */ +export type TreeNode = { name: string, path: string, is_directory: boolean, size_bytes?: number, children: Array, }; diff --git a/src/debug/jtag/shared/generated/code/TreeResult.ts b/src/debug/jtag/shared/generated/code/TreeResult.ts new file mode 100644 index 000000000..28579a140 --- /dev/null +++ b/src/debug/jtag/shared/generated/code/TreeResult.ts @@ -0,0 +1,7 @@ +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. +import type { TreeNode } from "./TreeNode"; + +/** + * Result of a tree operation. + */ +export type TreeResult = { success: boolean, root?: TreeNode, total_files: number, total_directories: number, error?: string, }; diff --git a/src/debug/jtag/shared/generated/code/UndoResult.ts b/src/debug/jtag/shared/generated/code/UndoResult.ts new file mode 100644 index 000000000..ceef6a42a --- /dev/null +++ b/src/debug/jtag/shared/generated/code/UndoResult.ts @@ -0,0 +1,7 @@ +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. +import type { WriteResult } from "./WriteResult"; + +/** + * Result of an undo operation. + */ +export type UndoResult = { success: boolean, changes_undone: Array, error?: string, }; diff --git a/src/debug/jtag/shared/generated/code/WriteResult.ts b/src/debug/jtag/shared/generated/code/WriteResult.ts new file mode 100644 index 000000000..ce9e73157 --- /dev/null +++ b/src/debug/jtag/shared/generated/code/WriteResult.ts @@ -0,0 +1,10 @@ +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. + +/** + * Result of a file write/edit/delete operation. + */ +export type WriteResult = { success: boolean, +/** + * UUID of the ChangeNode created. + */ +change_id?: string, file_path: string, bytes_written: number, error?: string, }; diff --git a/src/debug/jtag/shared/generated/code/index.ts b/src/debug/jtag/shared/generated/code/index.ts new file mode 100644 index 000000000..8e6396c5e --- /dev/null +++ b/src/debug/jtag/shared/generated/code/index.ts @@ -0,0 +1,28 @@ +// Code Module Types - Generated from Rust (single source of truth) +// Re-run: cargo test --package continuum-core --lib export_bindings + +// Core change graph types +export type { ChangeNode } from './ChangeNode'; +export type { FileOperation } from './FileOperation'; +export type { FileDiff } from './FileDiff'; +export type { DiffHunk } from './DiffHunk'; + +// Edit modes (discriminated union) +export type { EditMode } from './EditMode'; + +// Operation results +export type { WriteResult } from './WriteResult'; +export type { ReadResult } from './ReadResult'; +export type { UndoResult } from './UndoResult'; +export type { HistoryResult } from './HistoryResult'; + +// Search +export type { SearchMatch } from './SearchMatch'; +export type { SearchResult } from './SearchResult'; + +// Tree +export type { TreeNode } from './TreeNode'; +export type { TreeResult } from './TreeResult'; + +// Git +export type { GitStatusInfo } from './GitStatusInfo'; diff --git a/src/debug/jtag/shared/generated/index.ts b/src/debug/jtag/shared/generated/index.ts index a00ceeec3..2241c540f 100644 --- a/src/debug/jtag/shared/generated/index.ts +++ b/src/debug/jtag/shared/generated/index.ts @@ -13,3 +13,6 @@ export * from './ipc'; // Voice call types (already generated) export type { CallMessage } from './CallMessage'; + +// Code module types (file operations, change graph, search, tree) +export * from './code'; diff --git a/src/debug/jtag/system/rag/builders/ChatRAGBuilder.ts b/src/debug/jtag/system/rag/builders/ChatRAGBuilder.ts index e65831bce..46ad6feff 100644 --- a/src/debug/jtag/system/rag/builders/ChatRAGBuilder.ts +++ b/src/debug/jtag/system/rag/builders/ChatRAGBuilder.ts @@ -43,7 +43,8 @@ import { WidgetContextSource, PersonaIdentitySource, GlobalAwarenessSource, - SocialMediaRAGSource + SocialMediaRAGSource, + CodeToolSource } from '../sources'; /** @@ -77,9 +78,10 @@ export class ChatRAGBuilder extends RAGBuilder { new ConversationHistorySource(), // Priority 80: Chat messages (uses queryWithJoin!) new WidgetContextSource(), // Priority 75: UI state from Positron new SemanticMemorySource(), // Priority 60: Long-term memories - new SocialMediaRAGSource() // Priority 55: Social media HUD (engagement duty) + new SocialMediaRAGSource(), // Priority 55: Social media HUD (engagement duty) + new CodeToolSource() // Priority 50: Coding workflow guidance ]); - this.log('πŸ”§ ChatRAGBuilder: Initialized RAGComposer with 6 sources'); + this.log('πŸ”§ ChatRAGBuilder: Initialized RAGComposer with 7 sources'); } return this.composer; } @@ -95,6 +97,7 @@ export class ChatRAGBuilder extends RAGBuilder { widgetContext: string | null; globalAwareness: string | null; socialAwareness: string | null; + codeToolGuidance: string | null; } { let identity: PersonaIdentity | null = null; let conversationHistory: LLMMessage[] = []; @@ -102,6 +105,7 @@ export class ChatRAGBuilder extends RAGBuilder { let widgetContext: string | null = null; let globalAwareness: string | null = null; let socialAwareness: string | null = null; + let codeToolGuidance: string | null = null; for (const section of result.sections) { if (section.identity) { @@ -125,9 +129,13 @@ export class ChatRAGBuilder extends RAGBuilder { // Social media HUD β€” engagement awareness and duty socialAwareness = section.systemPromptSection; } + if (section.systemPromptSection && section.sourceName === 'code-tools') { + // Coding workflow guidance β€” code/* tool awareness + codeToolGuidance = section.systemPromptSection; + } } - return { identity, conversationHistory, memories, widgetContext, globalAwareness, socialAwareness }; + return { identity, conversationHistory, memories, widgetContext, globalAwareness, socialAwareness, codeToolGuidance }; } /** @@ -159,6 +167,7 @@ export class ChatRAGBuilder extends RAGBuilder { let widgetContext: string | null; let globalAwareness: string | null; let socialAwareness: string | null; + let codeToolGuidance: string | null; if (this.useModularSources) { // NEW PATH: Use RAGComposer for modular, parallelized source loading @@ -203,6 +212,7 @@ export class ChatRAGBuilder extends RAGBuilder { widgetContext = extracted.widgetContext; globalAwareness = extracted.globalAwareness; socialAwareness = extracted.socialAwareness; + codeToolGuidance = extracted.codeToolGuidance; // Still load these via legacy methods (not yet extracted to sources) const [extractedArtifacts, extractedRecipeStrategy, extractedLearningConfig] = await Promise.all([ @@ -267,6 +277,7 @@ export class ChatRAGBuilder extends RAGBuilder { widgetContext = loadedWidgetContext; globalAwareness = null; // Legacy path doesn't use GlobalAwarenessSource socialAwareness = null; // Legacy path doesn't use SocialMediaRAGSource + codeToolGuidance = null; // Legacy path doesn't use CodeToolSource } // 2.3.5 Preprocess artifacts for non-vision models ("So the blind can see") @@ -298,6 +309,13 @@ export class ChatRAGBuilder extends RAGBuilder { this.log('πŸ“± ChatRAGBuilder: Injected social media HUD into system prompt'); } + // 2.4.7. Inject code tool workflow guidance (coding capabilities) + if (codeToolGuidance) { + finalIdentity.systemPrompt = finalIdentity.systemPrompt + + `\n\n${codeToolGuidance}`; + this.log('πŸ’» ChatRAGBuilder: Injected code tool guidance into system prompt'); + } + // NOTE: Canvas context is now handled via the "inbox content" pattern // When strokes are added, they emit system messages to the canvas room // AIs see these in their conversation history naturally, no system prompt injection needed diff --git a/src/debug/jtag/system/rag/sources/CodeToolSource.ts b/src/debug/jtag/system/rag/sources/CodeToolSource.ts new file mode 100644 index 000000000..3fb50faf2 --- /dev/null +++ b/src/debug/jtag/system/rag/sources/CodeToolSource.ts @@ -0,0 +1,209 @@ +/** + * CodeToolSource - Injects coding workflow awareness into persona RAG context + * + * Gives personas strategic awareness of the code/* command suite: + * - When and how to use code tools (workflow patterns) + * - Best practices (read before edit, preview with diff, undo on failure) + * - Available code/* commands grouped by purpose + * + * Does NOT duplicate tool listings β€” ToolRegistry already provides a compact + * list of all tools. This source provides the "how to code effectively" layer. + * + * Priority 50 - Medium. Valuable context for coding tasks, but not critical + * for conversational interactions. Token cost is low (~200 tokens). + */ + +import type { RAGSource, RAGSourceContext, RAGSection } from '../shared/RAGSource'; +import { PersonaToolRegistry } from '../../user/server/modules/PersonaToolRegistry'; +import { Logger } from '../../core/logging/Logger'; + +const log = Logger.create('CodeToolSource', 'rag'); + +/** + * Code tool categories for workflow documentation. + * Each group maps to a workflow step that only appears if the persona has + * at least one of the group's commands. + */ +interface CodeToolGroup { + readonly label: string; + readonly commands: string[]; + readonly hint: string; + readonly workflowStep: string; +} + +/** + * Static code tool groups β€” the workflow map for personas. + * workflowStep is the numbered instruction shown in the workflow. + */ +const CODE_TOOL_GROUPS: readonly CodeToolGroup[] = [ + { + label: 'Discovery', + commands: ['code/tree', 'code/search'], + hint: 'Understand the codebase structure before making changes.', + workflowStep: '**Discover** β€” Use code/tree and code/search to understand structure', + }, + { + label: 'Reading', + commands: ['code/read'], + hint: 'Read file contents and line ranges. Always read before editing.', + workflowStep: '**Read** β€” Always read files before editing (code/read)', + }, + { + label: 'Writing', + commands: ['code/write', 'code/edit'], + hint: 'Create files or edit with search-replace, line-range, insert, or append.', + workflowStep: '**Edit** β€” Apply changes with code/write or code/edit', + }, + { + label: 'Review', + commands: ['code/diff'], + hint: 'Preview edits as unified diff before applying. Use this to verify correctness.', + workflowStep: '**Preview** β€” Use code/diff to see your changes before applying', + }, + { + label: 'History', + commands: ['code/undo', 'code/history'], + hint: 'Undo changes or view the change graph. Every edit is tracked.', + workflowStep: '**Undo** β€” If something breaks, code/undo reverts any change', + }, +] as const; + +export class CodeToolSource implements RAGSource { + readonly name = 'code-tools'; + readonly priority = 50; // Medium β€” below conversation/widget, above learning config + readonly defaultBudgetPercent = 5; + + private static _cachedPrompt: string | null = null; + private static _cacheGeneratedAt = 0; + private static readonly CACHE_TTL_MS = 10 * 60 * 1000; // 10 minutes + + isApplicable(context: RAGSourceContext): boolean { + // Only include if persona has at least one code/* permission + const registry = PersonaToolRegistry.sharedInstance(); + const tools = registry.listToolsForPersona(context.personaId); + return tools.some(t => t.name.startsWith('code/')); + } + + async load(context: RAGSourceContext, allocatedBudget: number): Promise { + const startTime = performance.now(); + + try { + const prompt = this.getOrBuildPrompt(context); + + // Respect budget β€” if prompt exceeds allocation, return a minimal version + const tokenCount = this.estimateTokens(prompt); + const budgetTokens = Math.floor(allocatedBudget); + + const finalPrompt = tokenCount > budgetTokens + ? this.buildMinimalPrompt() + : prompt; + + const finalTokens = this.estimateTokens(finalPrompt); + + log.debug(`Loaded code tool guidance (${finalTokens} tokens) for persona ${context.personaId.slice(0, 8)}`); + + return { + sourceName: this.name, + tokenCount: finalTokens, + loadTimeMs: performance.now() - startTime, + systemPromptSection: finalPrompt, + metadata: { + codeToolCount: this.countCodeTools(context), + budgetRespected: finalTokens <= budgetTokens, + }, + }; + } catch (error: unknown) { + const message = error instanceof Error ? error.message : String(error); + log.error(`Failed to load code tool context: ${message}`); + return this.emptySection(startTime, message); + } + } + + /** + * Build or retrieve cached prompt + */ + private getOrBuildPrompt(context: RAGSourceContext): string { + const now = Date.now(); + if ( + CodeToolSource._cachedPrompt && + (now - CodeToolSource._cacheGeneratedAt) < CodeToolSource.CACHE_TTL_MS + ) { + return CodeToolSource._cachedPrompt; + } + + const prompt = this.buildFullPrompt(context); + CodeToolSource._cachedPrompt = prompt; + CodeToolSource._cacheGeneratedAt = now; + return prompt; + } + + /** + * Full coding workflow prompt β€” injected into system prompt. + * Only includes workflow steps for tool groups the persona has access to. + */ + private buildFullPrompt(context: RAGSourceContext): string { + const registry = PersonaToolRegistry.sharedInstance(); + const tools = registry.listToolsForPersona(context.personaId); + const codeTools = tools.filter(t => t.name.startsWith('code/')); + + // Filter to groups where persona has at least one command + const availableGroups: { group: CodeToolGroup; available: string[] }[] = []; + for (const group of CODE_TOOL_GROUPS) { + const available = group.commands.filter(cmd => + codeTools.some(t => t.name === cmd) + ); + if (available.length > 0) { + availableGroups.push({ group, available }); + } + } + + // Build numbered workflow steps (only for groups persona has) + const workflowSteps = availableGroups + .map((entry, i) => `${i + 1}. ${entry.group.workflowStep}`) + .join('\n'); + + // Build grouped tool listing + const groupLines = availableGroups + .map(entry => `${entry.group.label}: ${entry.available.join(', ')} β€” ${entry.group.hint}`) + .join('\n'); + + const hasWriteTools = codeTools.some(t => t.name === 'code/write' || t.name === 'code/edit'); + + return `## Coding Capabilities + +You have access to workspace code tools. Follow this workflow for coding tasks: + +${workflowSteps} + +${groupLines} +${hasWriteTools ? '\nEvery write/edit is tracked in a change graph with full undo support.\nNever edit blind β€” always read first, diff to preview, then apply.' : ''}`.trim(); + } + + /** + * Minimal prompt when budget is tight β€” just list available tool names + */ + private buildMinimalPrompt(): string { + // List all known code commands from the groups (static β€” no registry call needed) + const allCommands = CODE_TOOL_GROUPS.flatMap(g => g.commands); + return `Code tools available: ${allCommands.join(', ')}. Read before editing. Use code/diff to preview.`; + } + + private countCodeTools(context: RAGSourceContext): number { + const registry = PersonaToolRegistry.sharedInstance(); + const tools = registry.listToolsForPersona(context.personaId); + return tools.filter(t => t.name.startsWith('code/')).length; + } + + private emptySection(startTime: number, error?: string): RAGSection { + return { + sourceName: this.name, + tokenCount: 0, + loadTimeMs: performance.now() - startTime, + metadata: error ? { error } : { hasCodeTools: false }, + }; + } + + private estimateTokens(text: string): number { + return Math.ceil(text.length / 4); + } +} diff --git a/src/debug/jtag/system/rag/sources/index.ts b/src/debug/jtag/system/rag/sources/index.ts index 6919c3744..2506f1d46 100644 --- a/src/debug/jtag/system/rag/sources/index.ts +++ b/src/debug/jtag/system/rag/sources/index.ts @@ -28,6 +28,7 @@ export { PersonaIdentitySource } from './PersonaIdentitySource'; export { GlobalAwarenessSource, registerConsciousness, unregisterConsciousness, getConsciousness } from './GlobalAwarenessSource'; export { VoiceConversationSource, registerVoiceOrchestrator, unregisterVoiceOrchestrator } from './VoiceConversationSource'; export { SocialMediaRAGSource } from './SocialMediaRAGSource'; +export { CodeToolSource } from './CodeToolSource'; // Re-export types for convenience export type { RAGSource, RAGSourceContext, RAGSection } from '../shared/RAGSource'; diff --git a/src/debug/jtag/system/user/server/PersonaUser.ts b/src/debug/jtag/system/user/server/PersonaUser.ts index c1086b17f..dbb0ed6fc 100644 --- a/src/debug/jtag/system/user/server/PersonaUser.ts +++ b/src/debug/jtag/system/user/server/PersonaUser.ts @@ -48,6 +48,7 @@ import { AIDecisionService, type AIDecisionContext } from '../../ai/server/AIDec import { getModelConfigForProvider } from './config/PersonaModelConfigs'; import { CoordinationDecisionLogger, type LogDecisionParams } from '../../coordination/server/CoordinationDecisionLogger'; import type { RAGContext } from '../../data/entities/CoordinationDecisionEntity'; +import type { RAGContext as PipelineRAGContext } from '../../rag/shared/RAGTypes'; import { PersonaWorkerThread } from '../../../shared/workers/PersonaWorkerThread'; import { AI_DECISION_EVENTS, @@ -1340,7 +1341,8 @@ export class PersonaUser extends AIUser { */ public async respondToMessage( originalMessage: ProcessableMessage, - decisionContext?: Omit + decisionContext?: Omit, + preBuiltRagContext?: PipelineRAGContext ): Promise { // Check dormancy state before responding const shouldRespond = this.responseGenerator.shouldRespondToMessage( @@ -1353,7 +1355,7 @@ export class PersonaUser extends AIUser { return; } - const result = await this.responseGenerator.generateAndPostResponse(originalMessage, decisionContext); + const result = await this.responseGenerator.generateAndPostResponse(originalMessage, decisionContext, preBuiltRagContext); // Mark tool results as processed to prevent infinite loops if (result.success && result.storedToolResultIds.length > 0) { diff --git a/src/debug/jtag/system/user/server/modules/PersonaMessageEvaluator.ts b/src/debug/jtag/system/user/server/modules/PersonaMessageEvaluator.ts index d0017c87a..af527cb3c 100644 --- a/src/debug/jtag/system/user/server/modules/PersonaMessageEvaluator.ts +++ b/src/debug/jtag/system/user/server/modules/PersonaMessageEvaluator.ts @@ -27,6 +27,7 @@ import type { Task } from './cognition/reasoning/types'; import { ChatRAGBuilder } from '../../../rag/builders/ChatRAGBuilder'; import { CoordinationDecisionLogger, type LogDecisionParams } from '../../../coordination/server/CoordinationDecisionLogger'; import type { RAGContext } from '../../../data/entities/CoordinationDecisionEntity'; +import type { RAGContext as PipelineRAGContext, RAGArtifact } from '../../../rag/shared/RAGTypes'; import type { AIDecisionContext } from '../../../ai/server/AIDecisionService'; import { AIDecisionService } from '../../../ai/server/AIDecisionService'; import { contentPreview, truncate } from '../../../../shared/utils/StringUtils'; @@ -55,6 +56,33 @@ import { // Import PersonaUser directly - circular dependency is fine for type-only imports import type { PersonaUser } from '../PersonaUser'; +/** + * Discriminated union for gating result. + * When shouldRespond=true: full RAG context is guaranteed (built once, reused by generator). + * When shouldRespond=false: no RAG context (skipped for performance). + */ +interface GatingResultBase { + confidence: number; + reason: string; + model: string; +} + +export interface GatingRespondResult extends GatingResultBase { + shouldRespond: true; + filteredRagContext: PipelineRAGContext; + ragContextSummary: { + totalMessages: number; + filteredMessages: number; + timeWindowMinutes: number; + }; +} + +export interface GatingSilentResult extends GatingResultBase { + shouldRespond: false; +} + +export type GatingResult = GatingRespondResult | GatingSilentResult; + /** * PersonaMessageEvaluator - Message evaluation and response decision engine * @@ -462,11 +490,13 @@ export class PersonaMessageEvaluator { this.log(`\n${'='.repeat(80)}`); this.log(`🧠 ${this.personaUser.displayName}: GATING DECISION for message "${safeMessageText.slice(0, 60)}..."`); this.log(`${'='.repeat(80)}`); - this.log(`πŸ“Š Context: ${gatingResult.ragContextSummary?.filteredMessages ?? 0} messages in ${gatingResult.ragContextSummary?.timeWindowMinutes ?? 0}min window`); - this.log(`πŸ’¬ Conversation history seen by AI:`); - gatingResult.conversationHistory?.slice(-5).forEach((msg, i) => { - this.log(` ${i + 1}. [${msg.name}] ${truncate(msg.content, 80)}...`); - }); + if (gatingResult.shouldRespond) { + this.log(`πŸ“Š Context: ${gatingResult.ragContextSummary.filteredMessages} messages in ${gatingResult.ragContextSummary.timeWindowMinutes}min window`); + this.log(`πŸ’¬ Conversation history (last 5):`); + gatingResult.filteredRagContext.conversationHistory.slice(-5).forEach((msg, i) => { + this.log(` ${i + 1}. [${msg.name ?? msg.role}] ${truncate(msg.content, 80)}...`); + }); + } this.log(`\n🎯 Decision: ${gatingResult.shouldRespond ? 'RESPOND' : 'SILENT'}`); this.log(` Confidence: ${(gatingResult.confidence * 100).toFixed(0)}%`); this.log(` Reason: ${gatingResult.reason}`); @@ -474,43 +504,13 @@ export class PersonaMessageEvaluator { this.log(`${'='.repeat(80)}\n`); if (!gatingResult.shouldRespond) { - // PHASE 5C: Log coordination decision to database (fire-and-forget) - if (gatingResult.filteredRagContext) { - const decisionStartTime = Date.now(); - const ragContext = this.buildCoordinationRAGContext(gatingResult.filteredRagContext); - - // Fire-and-forget: Don't await, don't slow down critical path - CoordinationDecisionLogger.logDecision({ - actorId: this.personaUser.id, - actorName: this.personaUser.displayName, - actorType: 'ai-persona', - triggerEventId: messageEntity.id, - ragContext, - visualContext: undefined, - action: 'SILENT', - confidence: gatingResult.confidence, - reasoning: gatingResult.reason, - responseContent: undefined, - modelUsed: gatingResult.model, - modelProvider: this.personaUser.modelConfig.provider ?? 'candle', - tokensUsed: undefined, - responseTime: Date.now() - decisionStartTime, - sessionId: DataDaemon.jtagContext!.uuid, - contextId: messageEntity.roomId, - tags: [senderIsHuman ? 'human-sender' : 'ai-sender', 'gating-silent'] - }).catch(error => { - this.log(`❌ ${this.personaUser.displayName}: Failed to log SILENT decision:`, error); - }); - } - + // SILENT: No RAG context available (skipped for performance) this.personaUser.logAIDecision('SILENT', gatingResult.reason, { message: safeMessageText, sender: messageEntity.senderName, roomId: messageEntity.roomId, confidence: gatingResult.confidence, - model: gatingResult.model, - ragContextSummary: gatingResult.ragContextSummary, - conversationHistory: gatingResult.conversationHistory + model: gatingResult.model }); // Emit DECIDED_SILENT event @@ -525,9 +525,9 @@ export class PersonaMessageEvaluator { messageId: messageEntity.id, isHumanMessage: senderIsHuman, timestamp: Date.now(), - confidence: gatingResult.confidence ?? 0.5, + confidence: gatingResult.confidence, reason: gatingResult.reason, - gatingModel: gatingResult.model ?? 'unknown' + gatingModel: gatingResult.model }, { scope: EVENT_SCOPES.ROOM, @@ -543,7 +543,9 @@ export class PersonaMessageEvaluator { // PHASE 5C: Prepare decision context for logging AFTER response generation // (We need the actual response content before we can log the complete decision) - const decisionContext = gatingResult.filteredRagContext ? { + // After SILENT early-return above, TypeScript narrows gatingResult to GatingRespondResult. + // filteredRagContext, ragContextSummary, confidence, reason, model are all guaranteed. + const decisionContext = { actorId: this.personaUser.id, actorName: this.personaUser.displayName, actorType: 'ai-persona' as const, @@ -562,7 +564,7 @@ export class PersonaMessageEvaluator { isMentioned ? 'mentioned' : 'not-mentioned', 'gating-respond' ] - } : undefined; + }; this.personaUser.logAIDecision('RESPOND', gatingResult.reason, { message: safeMessageText, @@ -573,7 +575,6 @@ export class PersonaMessageEvaluator { confidence: gatingResult.confidence, model: gatingResult.model, ragContextSummary: gatingResult.ragContextSummary, - conversationHistory: gatingResult.conversationHistory }); // Emit DECIDED_RESPOND event @@ -588,9 +589,9 @@ export class PersonaMessageEvaluator { messageId: messageEntity.id, isHumanMessage: senderIsHuman, timestamp: Date.now(), - confidence: gatingResult.confidence ?? 0.5, + confidence: gatingResult.confidence, reason: gatingResult.reason, - gatingModel: gatingResult.model ?? 'unknown' + gatingModel: gatingResult.model }, { scope: EVENT_SCOPES.ROOM, @@ -709,7 +710,7 @@ export class PersonaMessageEvaluator { // πŸ”§ PHASE: Generate and post response this.log(`πŸ”§ TRACE-POINT-B: Before respondToMessage call (timestamp=${Date.now()})`); this.log(`πŸ”§ ${this.personaUser.displayName}: [PHASE 3/3] Calling respondToMessage...`); - await this.personaUser.respondToMessage(messageEntity, decisionContext); + await this.personaUser.respondToMessage(messageEntity, decisionContext, gatingResult.filteredRagContext); this.log(`πŸ”§ TRACE-POINT-C: After respondToMessage returned (timestamp=${Date.now()})`); this.log(`βœ… ${this.personaUser.displayName}: [PHASE 3/3] Response posted successfully`); @@ -744,32 +745,53 @@ export class PersonaMessageEvaluator { * Build CoordinationDecision RAGContext from ChatRAGBuilder output * Converts domain-specific RAG format to universal decision logging format */ - private buildCoordinationRAGContext(filteredRagContext: any): RAGContext { - const systemPrompt = filteredRagContext.identity?.systemPrompt ?? - `You are ${this.personaUser.displayName}. ${this.personaUser.entity?.bio ?? ''}`; - + private buildCoordinationRAGContext(filteredRagContext: PipelineRAGContext): RAGContext { return { identity: { - systemPrompt, + systemPrompt: filteredRagContext.identity.systemPrompt, bio: this.personaUser.entity?.bio ?? '', role: this.personaUser.displayName }, - conversationHistory: (filteredRagContext.conversationHistory ?? []).map((msg: any) => ({ + conversationHistory: filteredRagContext.conversationHistory.map(msg => ({ role: msg.role, content: msg.content, timestamp: msg.timestamp ?? Date.now() })), - artifacts: filteredRagContext.artifacts ?? [], - privateMemories: filteredRagContext.privateMemories ?? [], + artifacts: (filteredRagContext.artifacts ?? []).map(a => ({ + type: this.mapArtifactType(a.type), + name: a.url ?? a.type, + content: a.content ?? a.base64 ?? '', + mimeType: undefined, + })), + privateMemories: (filteredRagContext.privateMemories ?? []).map(m => ({ + type: m.type, + content: m.content, + relevance: m.relevanceScore, + })), metadata: { timestamp: Date.now(), - tokenCount: filteredRagContext.metadata?.messageCount ?? - filteredRagContext.conversationHistory?.length ?? 0, + tokenCount: filteredRagContext.metadata.messageCount, contextWindow: 4096 } }; } + /** Map pipeline artifact types to coordination logging's narrower type union. */ + private mapArtifactType(pipelineType: RAGArtifact['type']): 'image' | 'file' | 'code' { + switch (pipelineType) { + case 'image': + case 'screenshot': + case 'video': + case 'audio': + return 'image'; + case 'data': + case 'benchmark': + return 'code'; + case 'file': + return 'file'; + } + } + /** * Check if this persona is mentioned in a message * Supports @username mentions and channel directives @@ -1072,23 +1094,7 @@ export class PersonaMessageEvaluator { senderIsHuman: boolean, isMentioned: boolean, preComputedDecision?: FastPathDecision - ): Promise<{ - shouldRespond: boolean; - confidence: number; - reason: string; - model?: string; - ragContextSummary?: { - totalMessages: number; - filteredMessages: number; - timeWindowMinutes?: number; - }; - conversationHistory?: Array<{ - name: string; - content: string; - timestamp?: number; - }>; - filteredRagContext?: any; - }> { + ): Promise { const startTime = Date.now(); try { @@ -1136,19 +1142,35 @@ export class PersonaMessageEvaluator { this.log(`πŸ¦€ ${this.personaUser.displayName}: Rust decision (separate IPC, ${ipcMs.toFixed(1)}ms): ${rustDecision.should_respond ? 'RESPOND' : 'SILENT'} (${rustDecision.decision_time_ms.toFixed(2)}ms, fast_path=${rustDecision.fast_path_used})`); } - // Build RAG context for decision logging - // IMPORTANT: Exclude processed tool results to prevent infinite loops + // OPTIMIZATION: Only build RAG context if we're going to respond. + // Rust fast-path already decided should_respond β€” for SILENT decisions, + // skip the 40-240ms RAG build entirely. + if (!rustDecision.should_respond) { + const totalMs = Date.now() - startTime; + this.log(`[TIMING] ${this.personaUser.displayName}: evaluateShouldRespond total=${totalMs}ms (rag=SKIPPED/silent, preComputed=${!!preComputedDecision})`); + + return { + shouldRespond: false as const, + confidence: rustDecision.confidence, + reason: rustDecision.reason, + model: rustDecision.fast_path_used ? 'RustFastPath' : 'RustCognition', + }; + } + + // RESPOND path: Build FULL RAG context (with memories + artifacts). + // This context will be passed through to PersonaResponseGenerator, + // eliminating the redundant second RAG build that previously happened there. const ragStart = performance.now(); const ragBuilder = new ChatRAGBuilder(this.log.bind(this)); const ragContext = await ragBuilder.buildContext( message.roomId, this.personaUser.id, { - modelId: this.personaUser.modelConfig.model, // Use persona's model - maxMemories: 0, - includeArtifacts: false, - includeMemories: false, - excludeMessageIds: this.personaUser.taskTracker.getProcessedToolResults(), // Filter out processed tool results + modelId: this.personaUser.modelConfig.model, + maxMemories: 5, // Full context: include memories for LLM prompt + includeArtifacts: true, // Full context: include vision artifacts + includeMemories: true, // Full context: include Hippocampus LTM + excludeMessageIds: this.personaUser.taskTracker.getProcessedToolResults(), currentMessage: { role: 'user', content: message.content.text, @@ -1160,10 +1182,10 @@ export class PersonaMessageEvaluator { const ragMs = performance.now() - ragStart; const totalMs = Date.now() - startTime; - this.log(`[TIMING] ${this.personaUser.displayName}: evaluateShouldRespond total=${totalMs}ms (rag=${ragMs.toFixed(1)}ms, preComputed=${!!preComputedDecision})`); + this.log(`[TIMING] ${this.personaUser.displayName}: evaluateShouldRespond total=${totalMs}ms (rag=${ragMs.toFixed(1)}ms/full, preComputed=${!!preComputedDecision})`); return { - shouldRespond: rustDecision.should_respond, + shouldRespond: true as const, confidence: rustDecision.confidence, reason: rustDecision.reason, model: rustDecision.fast_path_used ? 'RustFastPath' : 'RustCognition', @@ -1171,7 +1193,7 @@ export class PersonaMessageEvaluator { ragContextSummary: { totalMessages: ragContext.conversationHistory.length, filteredMessages: ragContext.conversationHistory.length, - timeWindowMinutes: 30 // Default context window + timeWindowMinutes: 30 } }; @@ -1206,10 +1228,11 @@ export class PersonaMessageEvaluator { } ); + // Error in evaluation = SILENT. No fallback guessing. return { - shouldRespond: isMentioned, - confidence: isMentioned ? (0.92 + Math.random() * 0.06) : 0.5, // 0.92-0.98 realistic range - reason: 'Error in evaluation', + shouldRespond: false as const, + confidence: 0, + reason: `Error in evaluation: ${error instanceof Error ? error.message : String(error)}`, model: 'error' }; } diff --git a/src/debug/jtag/system/user/server/modules/PersonaResponseGenerator.ts b/src/debug/jtag/system/user/server/modules/PersonaResponseGenerator.ts index 9d41ecc9a..45a2ab542 100644 --- a/src/debug/jtag/system/user/server/modules/PersonaResponseGenerator.ts +++ b/src/debug/jtag/system/user/server/modules/PersonaResponseGenerator.ts @@ -54,6 +54,7 @@ import type { AiDetectSemanticLoopParams, AiDetectSemanticLoopResult } from '../ import { SystemPaths } from '../../../core/config/SystemPaths'; import { GarbageDetector } from '../../../ai/server/GarbageDetector'; import type { InboxMessage, ProcessableMessage } from './QueueItemTypes'; +import type { RAGContext } from '../../../rag/shared/RAGTypes'; import { AiDetectSemanticLoop } from '../../../../commands/ai/detect-semantic-loop/shared/AiDetectSemanticLoopTypes'; import { DataCreate } from '../../../../commands/data/create/shared/DataCreateTypes'; @@ -508,7 +509,8 @@ export class PersonaResponseGenerator { */ async generateAndPostResponse( originalMessage: ProcessableMessage, - decisionContext?: Omit + decisionContext?: Omit, + preBuiltRagContext?: RAGContext ): Promise { this.log(`πŸ”§ TRACE-POINT-D: Entered respondToMessage (timestamp=${Date.now()})`); // Voice modality is a typed field β€” no cast needed @@ -516,32 +518,37 @@ export class PersonaResponseGenerator { const generateStartTime = Date.now(); // Track total response time for decision logging const allStoredResultIds: UUID[] = []; // Collect all tool result message IDs for task tracking try { - // πŸ”§ SUB-PHASE 3.1: Build RAG context - // Bug #5 fix: Pass modelId to ChatRAGBuilder for dynamic message count calculation - this.log(`πŸ”§ ${this.personaName}: [PHASE 3.1] Building RAG context with model=${this.modelConfig.model}...`); - const ragBuilder = new ChatRAGBuilder(this.log.bind(this)); - // Voice mode detection - pass voiceSessionId to RAG for faster response (skips semantic search) - const voiceSessionId = originalMessage.voiceSessionId; - const fullRAGContext = await ragBuilder.buildContext( - originalMessage.roomId, - this.personaId, - { - modelId: this.modelConfig.model, // Bug #5 fix: Dynamic budget calculation - maxMemories: 5, // Limit to 5 recent important memories (token budget management) - includeArtifacts: true, // Enable vision support for multimodal-capable models - includeMemories: true, // Enable Hippocampus LTM retrieval - // Voice mode: Pass session ID so RAG sources can optimize for speed - voiceSessionId, - // βœ… FIX: Include current message even if not yet persisted to database - currentMessage: { - role: 'user', - content: originalMessage.content.text, - name: originalMessage.senderName, - timestamp: this.timestampToNumber(originalMessage.timestamp) + // πŸ”§ SUB-PHASE 3.1: Build RAG context (or use pre-built from evaluator) + let fullRAGContext: RAGContext; + + if (preBuiltRagContext) { + // OPTIMIZATION: Evaluator already built full RAG context β€” reuse it, skip redundant build + fullRAGContext = preBuiltRagContext; + this.log(`⚑ ${this.personaName}: [PHASE 3.1] Using pre-built RAG context (${fullRAGContext.conversationHistory.length} messages, saved ~100ms rebuild)`); + } else { + // Fallback: Build RAG context from scratch (for code paths that don't go through evaluator) + this.log(`πŸ”§ ${this.personaName}: [PHASE 3.1] Building RAG context with model=${this.modelConfig.model}...`); + const ragBuilder = new ChatRAGBuilder(this.log.bind(this)); + const voiceSessionId = originalMessage.voiceSessionId; + fullRAGContext = await ragBuilder.buildContext( + originalMessage.roomId, + this.personaId, + { + modelId: this.modelConfig.model, + maxMemories: 5, + includeArtifacts: true, + includeMemories: true, + voiceSessionId, + currentMessage: { + role: 'user', + content: originalMessage.content.text, + name: originalMessage.senderName, + timestamp: this.timestampToNumber(originalMessage.timestamp) + } } - } - ); - this.log(`βœ… ${this.personaName}: [PHASE 3.1] RAG context built (${fullRAGContext.conversationHistory.length} messages)`); + ); + this.log(`βœ… ${this.personaName}: [PHASE 3.1] RAG context built (${fullRAGContext.conversationHistory.length} messages)`); + } // πŸ”§ SUB-PHASE 3.2: Build message history for LLM this.log(`πŸ”§ ${this.personaName}: [PHASE 3.2] Building LLM message array...`); diff --git a/src/debug/jtag/system/user/server/modules/PersonaToolExecutor.ts b/src/debug/jtag/system/user/server/modules/PersonaToolExecutor.ts index 2ad4363bb..c682e43bf 100644 --- a/src/debug/jtag/system/user/server/modules/PersonaToolExecutor.ts +++ b/src/debug/jtag/system/user/server/modules/PersonaToolExecutor.ts @@ -18,14 +18,9 @@ import { ToolRegistry } from '../../../tools/server/ToolRegistry'; import type { MediaItem } from '../../../data/entities/ChatMessageEntity'; import { ChatMessageEntity } from '../../../data/entities/ChatMessageEntity'; import type { PersonaMediaConfig } from './PersonaMediaConfig'; -import { Commands } from '../../../core/shared/Commands'; -import type { DataCreateParams, DataCreateResult } from '../../../../commands/data/create/shared/DataCreateTypes'; import { getToolFormatAdapters, type ToolFormatAdapter } from './ToolFormatAdapter'; -import { Logger, FileMode } from '../../../core/logging/Logger'; -import { SystemPaths } from '../../../core/config/SystemPaths'; +import { Logger } from '../../../core/logging/Logger'; import { RoomResolver } from '../../../core/server/RoomResolver'; -import * as fs from 'fs'; -import * as path from 'path'; import { DataCreate } from '../../../../commands/data/create/shared/DataCreateTypes'; /** @@ -75,7 +70,6 @@ export interface PersonaUserForToolExecutor { } export class PersonaToolExecutor { - private static readonly COGNITION_LOG_PATH = path.join(process.cwd(), '.continuum/jtag/logs/system/cognition.log'); /** * LOOP DETECTION: Track recent tool calls per persona to detect infinite loops @@ -105,16 +99,6 @@ export class PersonaToolExecutor { ); } - /** - * Log to dedicated cognition file (separate from main logs) - * @deprecated Use Logger instead for categorized logging - */ - private static logToCognitionFile(message: string): void { - const timestamp = new Date().toISOString(); - const logLine = `[${timestamp}] ${message}\n`; - fs.appendFileSync(PersonaToolExecutor.COGNITION_LOG_PATH, logLine, 'utf8'); - } - /** * LOOP DETECTION: Create a hash of a tool call for comparison */ @@ -149,7 +133,6 @@ export class PersonaToolExecutor { // Block if threshold exceeded if (duplicateCount >= PersonaToolExecutor.LOOP_DETECTION_THRESHOLD) { this.log.warn(`πŸ” LOOP DETECTED: ${toolCall.toolName} called ${duplicateCount + 1}x in ${PersonaToolExecutor.LOOP_DETECTION_WINDOW_MS / 1000}s - BLOCKING`); - PersonaToolExecutor.logToCognitionFile(`πŸ” ${this.persona.displayName}: [LOOP BLOCKED] ${toolCall.toolName} (${duplicateCount + 1}x identical)`); return true; } @@ -199,7 +182,6 @@ export class PersonaToolExecutor { } this.log.info(`Executing ${toolCalls.length} tool(s): ${toolCalls.map(t => t.toolName).join(', ')}`); - PersonaToolExecutor.logToCognitionFile(`πŸ”§ ${this.persona.displayName}: [TOOL] Executing ${toolCalls.length} tool(s): ${toolCalls.map(t => t.toolName).join(', ')}`); // Filter out looping tool calls before execution const filteredToolCalls = toolCalls.filter(toolCall => { @@ -225,21 +207,20 @@ export class PersonaToolExecutor { // This handles wall/*, chat/*, and any other room-scoped commands const resolvedParams = await this.resolveRoomParameters(toolCall.parameters, context.contextId); - // Inject callerId, personaId, and contextId so tools can identify the persona and context - // This is how ai/sleep knows which persona to put to sleep when no explicit personaId is provided - // And ai/should-respond-fast needs personaId + contextId to check room activity + // Inject userId (standard CommandParams field) and contextId + // userId is the persona's UUID β€” the canonical identity field on CommandParams + // personaId kept for backward compat with ai/sleep, ai/should-respond-fast const paramsWithCaller = { ...resolvedParams, - callerId: context.personaId, // Always inject the calling persona's userId - personaId: context.personaId, // Also as personaId for tools that expect it - contextId: context.contextId // Always inject the room/context ID + userId: context.personaId, // Standard CommandParams.userId β€” THE identity field + personaId: context.personaId, // Backward compat (ai/sleep, ai/should-respond-fast) + contextId: context.contextId // Room/context scope }; // Log tool call with clean params formatting (not array-wrapped) const paramsJson = JSON.stringify(paramsWithCaller, null, 2); this.log.info(`β”Œβ”€ CALL: ${toolCall.toolName}`); this.log.info(`β”‚ params: ${paramsJson.replace(/\n/g, '\nβ”‚ ')}`); - PersonaToolExecutor.logToCognitionFile(`πŸ”§ ${this.persona.displayName}: [TOOL CALL] ${toolCall.toolName} | params: ${JSON.stringify(paramsWithCaller)}`); // Use ToolRegistry for ALL commands - no special cases // NO try-catch - let exceptions bubble to PersonaResponseGenerator @@ -290,7 +271,6 @@ export class PersonaToolExecutor { this.log.error(`└─ RESULT: βœ— ${duration}ms`); this.log.error(` error: ${result.error || 'unknown error'}`); } - PersonaToolExecutor.logToCognitionFile(`${result.success ? 'βœ…' : '❌'} ${this.persona.displayName}: [TOOL RESULT] ${toolCall.toolName} ${result.success ? 'success' : 'failed'} (${duration}ms, ${result.content?.length || 0} chars, media: ${result.media?.length || 0})`); // Phase 3B: Store tool result in working memory and get UUID // Fire-and-forget pattern: storage is non-critical, don't block on it diff --git a/src/debug/jtag/tests/unit/rag/CodeToolSource.test.ts b/src/debug/jtag/tests/unit/rag/CodeToolSource.test.ts new file mode 100644 index 000000000..650f162c1 --- /dev/null +++ b/src/debug/jtag/tests/unit/rag/CodeToolSource.test.ts @@ -0,0 +1,307 @@ +/** + * CodeToolSource Unit Tests + * + * Tests the CodeToolSource RAGSource in isolation by mocking PersonaToolRegistry. + * Validates: + * - isApplicable() based on persona tool permissions + * - load() generates correct coding workflow prompt + * - Budget-aware: falls back to minimal prompt when budget is tight + * - Caching: repeated calls use cached prompt + */ + +import { describe, it, expect, beforeEach, vi } from 'vitest'; +import { CodeToolSource } from '../../../system/rag/sources/CodeToolSource'; +import type { RAGSourceContext } from '../../../system/rag/shared/RAGSource'; +import type { UUID } from '../../../system/core/types/CrossPlatformUUID'; + +// Mock PersonaToolRegistry +const mockToolsForPersona = vi.fn(); + +vi.mock('../../../system/user/server/modules/PersonaToolRegistry', () => ({ + PersonaToolRegistry: { + sharedInstance: () => ({ + listToolsForPersona: mockToolsForPersona, + }), + }, +})); + +// Mock Logger (avoid real logging in tests) +vi.mock('../../../system/core/logging/Logger', () => ({ + Logger: { + create: () => ({ + debug: () => {}, + error: () => {}, + warn: () => {}, + info: () => {}, + }), + }, +})); + +/** + * Helper to create a fake tool definition + */ +function fakeTool(name: string, description = `${name} command`) { + return { + name, + description, + category: name.startsWith('code/') ? 'code' as const : 'system' as const, + permissions: ['code:search'], + parameters: { type: 'object' as const, properties: {}, required: [] }, + examples: [], + }; +} + +/** + * Helper to build a RAGSourceContext + */ +function makeContext(overrides?: Partial): RAGSourceContext { + return { + personaId: 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' as UUID, + roomId: '11111111-2222-3333-4444-555555555555' as UUID, + options: {}, + totalBudget: 2000, + ...overrides, + }; +} + +describe('CodeToolSource', () => { + let source: CodeToolSource; + + beforeEach(() => { + source = new CodeToolSource(); + mockToolsForPersona.mockReset(); + // Clear the static cache between tests + (CodeToolSource as any)._cachedPrompt = null; + (CodeToolSource as any)._cacheGeneratedAt = 0; + }); + + describe('interface properties', () => { + it('has correct name', () => { + expect(source.name).toBe('code-tools'); + }); + + it('has medium priority (50)', () => { + expect(source.priority).toBe(50); + }); + + it('has 5% default budget', () => { + expect(source.defaultBudgetPercent).toBe(5); + }); + }); + + describe('isApplicable', () => { + it('returns true when persona has code/* tools', () => { + mockToolsForPersona.mockReturnValue([ + fakeTool('code/read'), + fakeTool('code/write'), + fakeTool('collaboration/chat/send'), + ]); + + expect(source.isApplicable(makeContext())).toBe(true); + }); + + it('returns false when persona has no code/* tools', () => { + mockToolsForPersona.mockReturnValue([ + fakeTool('collaboration/chat/send'), + fakeTool('data/list'), + fakeTool('screenshot'), + ]); + + expect(source.isApplicable(makeContext())).toBe(false); + }); + + it('returns false when persona has zero tools', () => { + mockToolsForPersona.mockReturnValue([]); + + expect(source.isApplicable(makeContext())).toBe(false); + }); + }); + + describe('load', () => { + it('returns coding workflow guidance when persona has code tools', async () => { + mockToolsForPersona.mockReturnValue([ + fakeTool('code/read'), + fakeTool('code/write'), + fakeTool('code/edit'), + fakeTool('code/tree'), + fakeTool('code/search'), + fakeTool('code/diff'), + fakeTool('code/undo'), + fakeTool('code/history'), + ]); + + const section = await source.load(makeContext(), 500); + + expect(section.sourceName).toBe('code-tools'); + expect(section.tokenCount).toBeGreaterThan(0); + expect(section.loadTimeMs).toBeGreaterThanOrEqual(0); + expect(section.systemPromptSection).toBeDefined(); + }); + + it('includes workflow steps matching available tool groups', async () => { + mockToolsForPersona.mockReturnValue([ + fakeTool('code/read'), + fakeTool('code/write'), + fakeTool('code/edit'), + fakeTool('code/tree'), + fakeTool('code/search'), + fakeTool('code/diff'), + fakeTool('code/undo'), + fakeTool('code/history'), + ]); + + const section = await source.load(makeContext(), 500); + const prompt = section.systemPromptSection!; + + // Each tool group has a corresponding workflow step + expect(prompt).toContain('**Discover**'); + expect(prompt).toContain('**Read**'); + expect(prompt).toContain('**Preview**'); + expect(prompt).toContain('**Edit**'); + expect(prompt).toContain('**Undo**'); + // Numbered steps + expect(prompt).toMatch(/1\. \*\*Discover\*\*/); + expect(prompt).toMatch(/2\. \*\*Read\*\*/); + }); + + it('includes code/* command names in grouped sections', async () => { + mockToolsForPersona.mockReturnValue([ + fakeTool('code/read'), + fakeTool('code/tree'), + fakeTool('code/search'), + fakeTool('code/edit'), + fakeTool('code/diff'), + fakeTool('code/undo'), + fakeTool('code/history'), + ]); + + const section = await source.load(makeContext(), 500); + const prompt = section.systemPromptSection!; + + // Check grouped tool names + expect(prompt).toContain('code/tree'); + expect(prompt).toContain('code/search'); + expect(prompt).toContain('code/read'); + expect(prompt).toContain('code/edit'); + expect(prompt).toContain('code/diff'); + expect(prompt).toContain('code/undo'); + expect(prompt).toContain('code/history'); + }); + + it('only includes tools the persona has access to', async () => { + // Persona only has read and search β€” no write/edit/diff/undo/history + mockToolsForPersona.mockReturnValue([ + fakeTool('code/read'), + fakeTool('code/search'), + fakeTool('code/tree'), + ]); + + const section = await source.load(makeContext(), 500); + const prompt = section.systemPromptSection!; + + // Available tools appear in grouped sections + expect(prompt).toContain('code/read'); + expect(prompt).toContain('code/search'); + expect(prompt).toContain('code/tree'); + + // Unavailable tool groups should not appear β€” neither in groups nor workflow steps + expect(prompt).not.toContain('code/write'); + expect(prompt).not.toContain('code/edit'); + expect(prompt).not.toContain('code/diff'); + expect(prompt).not.toContain('code/undo'); + expect(prompt).not.toContain('code/history'); + + // Change graph note should not appear for read-only personas + expect(prompt).not.toContain('change graph'); + }); + + it('includes metadata with code tool count', async () => { + mockToolsForPersona.mockReturnValue([ + fakeTool('code/read'), + fakeTool('code/write'), + fakeTool('code/edit'), + fakeTool('collaboration/chat/send'), // not a code tool + ]); + + const section = await source.load(makeContext(), 500); + + expect(section.metadata).toBeDefined(); + expect(section.metadata!.codeToolCount).toBe(3); + }); + + it('returns minimal prompt when budget is very tight', async () => { + mockToolsForPersona.mockReturnValue([ + fakeTool('code/read'), + fakeTool('code/write'), + fakeTool('code/edit'), + fakeTool('code/tree'), + fakeTool('code/search'), + fakeTool('code/diff'), + fakeTool('code/undo'), + fakeTool('code/history'), + ]); + + // Allocate almost zero budget β€” forces minimal prompt + const section = await source.load(makeContext(), 10); + const prompt = section.systemPromptSection!; + + // Minimal prompt should be a compact one-liner + expect(prompt.length).toBeLessThan(200); + expect(prompt).toContain('Code tools available'); + expect(prompt).toContain('Read before editing'); + }); + + it('returns empty section on error', async () => { + mockToolsForPersona.mockImplementation(() => { + throw new Error('Registry unavailable'); + }); + + const section = await source.load(makeContext(), 500); + + expect(section.sourceName).toBe('code-tools'); + expect(section.tokenCount).toBe(0); + expect(section.metadata).toHaveProperty('error'); + }); + }); + + describe('caching', () => { + it('caches the prompt on first load', async () => { + mockToolsForPersona.mockReturnValue([ + fakeTool('code/read'), + fakeTool('code/write'), + ]); + + const section1 = await source.load(makeContext(), 500); + const section2 = await source.load(makeContext(), 500); + + // Both should have identical content + expect(section1.systemPromptSection).toBe(section2.systemPromptSection); + // Second load should be faster (cache hit) + // Not strictly testing timing, but verifying the cache path works + }); + + it('invalidates cache after TTL', async () => { + mockToolsForPersona.mockReturnValue([ + fakeTool('code/read'), + ]); + + await source.load(makeContext(), 500); + + // Force cache expiry + (CodeToolSource as any)._cacheGeneratedAt = Date.now() - 11 * 60 * 1000; // 11 min ago + + // Now add more tools + mockToolsForPersona.mockReturnValue([ + fakeTool('code/read'), + fakeTool('code/write'), + fakeTool('code/edit'), + ]); + + const section = await source.load(makeContext(), 500); + + // Should reflect the new tools + expect(section.systemPromptSection).toContain('code/write'); + expect(section.systemPromptSection).toContain('code/edit'); + }); + }); +}); diff --git a/src/debug/jtag/workers/continuum-core/Cargo.toml b/src/debug/jtag/workers/continuum-core/Cargo.toml index 849a93390..509bab1cf 100644 --- a/src/debug/jtag/workers/continuum-core/Cargo.toml +++ b/src/debug/jtag/workers/continuum-core/Cargo.toml @@ -53,5 +53,11 @@ ts-rs.workspace = true # TypeScript type generation # Memory/Hippocampus β€” pure compute engine (data from TS ORM via IPC) fastembed.workspace = true # Inline ONNX embedding (~5ms per embed, no IPC hop) +# Code module β€” file operations, change tracking, code intelligence +similar = "2.6" # Unified diff computation +ignore = "0.4" # .gitignore-aware file walking (from ripgrep) +regex = "1" # Regex search for code search + [dev-dependencies] tokio-test = "0.4" +tempfile = "3" # Temp directories for code module tests diff --git a/src/debug/jtag/workers/continuum-core/bindings/RustCoreIPC.ts b/src/debug/jtag/workers/continuum-core/bindings/RustCoreIPC.ts index 1e3220bf8..135a1df13 100644 --- a/src/debug/jtag/workers/continuum-core/bindings/RustCoreIPC.ts +++ b/src/debug/jtag/workers/continuum-core/bindings/RustCoreIPC.ts @@ -26,6 +26,18 @@ import type { ChannelRegistryStatus, ChannelEnqueueRequest, ServiceCycleResult, + // Code module types + EditMode, + ReadResult, + WriteResult, + SearchMatch, + SearchResult, + TreeNode, + TreeResult, + UndoResult, + ChangeNode, + HistoryResult, + GitStatusInfo, } from '../../../shared/generated'; // Memory subsystem types (Hippocampus in Rust β€” corpus-based, no SQL) @@ -57,6 +69,11 @@ export interface UtteranceEvent { timestamp: number; } +// ============================================================================ +// Code Module Types β€” imported from ts-rs generated (Rust is source of truth) +// All code types imported at top level from shared/generated +// ============================================================================ + interface Response { success: boolean; result?: any; @@ -723,6 +740,284 @@ export class RustCoreIPCClient extends EventEmitter { return response.result as ConsciousnessContextResponse; } + // ======================================================================== + // Code Module Methods (file operations, change tracking, code intelligence) + // ======================================================================== + + /** + * Initialize a per-persona workspace with file engine and change graph. + * Must be called before any other code/* operations for this persona. + * + * @param personaId - The persona's UUID + * @param workspaceRoot - Absolute path to the persona's workspace directory + * @param readRoots - Optional read-only root directories (e.g., main codebase for discovery) + */ + async codeCreateWorkspace( + personaId: string, + workspaceRoot: string, + readRoots?: string[] + ): Promise { + const response = await this.request({ + command: 'code/create-workspace', + persona_id: personaId, + workspace_root: workspaceRoot, + read_roots: readRoots ?? [], + }); + + if (!response.success) { + throw new Error(response.error || 'Failed to create workspace'); + } + } + + /** + * Read a file or line range from the persona's workspace. + */ + async codeRead( + personaId: string, + filePath: string, + startLine?: number, + endLine?: number + ): Promise { + const response = await this.request({ + command: 'code/read', + persona_id: personaId, + file_path: filePath, + start_line: startLine ?? null, + end_line: endLine ?? null, + }); + + if (!response.success) { + throw new Error(response.error || 'Failed to read file'); + } + + return response.result as ReadResult; + } + + /** + * Write or create a file in the persona's workspace. + * Creates a ChangeNode in the change graph for undo support. + */ + async codeWrite( + personaId: string, + filePath: string, + content: string, + description?: string + ): Promise { + const response = await this.request({ + command: 'code/write', + persona_id: personaId, + file_path: filePath, + content, + description: description ?? null, + }); + + if (!response.success) { + throw new Error(response.error || 'Failed to write file'); + } + + return response.result as WriteResult; + } + + /** + * Edit a file using one of four edit modes: + * - line_range: Replace content between line numbers + * - search_replace: Find and replace text + * - insert_at: Insert content at a specific line + * - append: Add content to end of file + */ + async codeEdit( + personaId: string, + filePath: string, + editMode: EditMode, + description?: string + ): Promise { + const response = await this.request({ + command: 'code/edit', + persona_id: personaId, + file_path: filePath, + edit_mode: editMode, + description: description ?? null, + }); + + if (!response.success) { + throw new Error(response.error || 'Failed to edit file'); + } + + return response.result as WriteResult; + } + + /** + * Delete a file from the persona's workspace. + * Full content is preserved in the change graph for undo. + */ + async codeDelete( + personaId: string, + filePath: string, + description?: string + ): Promise { + const response = await this.request({ + command: 'code/delete', + persona_id: personaId, + file_path: filePath, + description: description ?? null, + }); + + if (!response.success) { + throw new Error(response.error || 'Failed to delete file'); + } + + return response.result as WriteResult; + } + + /** + * Preview an edit as a unified diff without applying it. + */ + async codeDiff( + personaId: string, + filePath: string, + editMode: EditMode + ): Promise<{ success: boolean; unified: string }> { + const response = await this.request({ + command: 'code/diff', + persona_id: personaId, + file_path: filePath, + edit_mode: editMode, + }); + + if (!response.success) { + throw new Error(response.error || 'Failed to compute diff'); + } + + return response.result as { success: boolean; unified: string }; + } + + /** + * Undo a specific change or the last N changes. + * Pass changeId to undo a specific operation, or count to undo last N. + */ + async codeUndo( + personaId: string, + changeId?: string, + count?: number + ): Promise { + const response = await this.request({ + command: 'code/undo', + persona_id: personaId, + change_id: changeId ?? null, + count: count ?? null, + }); + + if (!response.success) { + throw new Error(response.error || 'Failed to undo'); + } + + return response.result as UndoResult; + } + + /** + * Get change history for a file or entire workspace. + */ + async codeHistory( + personaId: string, + filePath?: string, + limit?: number + ): Promise { + const response = await this.request({ + command: 'code/history', + persona_id: personaId, + file_path: filePath ?? null, + limit: limit ?? null, + }); + + if (!response.success) { + throw new Error(response.error || 'Failed to get history'); + } + + return response.result as HistoryResult; + } + + /** + * Search for a regex pattern across workspace files. + * Respects .gitignore, supports glob filtering. + */ + async codeSearch( + personaId: string, + pattern: string, + fileGlob?: string, + maxResults?: number + ): Promise { + const response = await this.request({ + command: 'code/search', + persona_id: personaId, + pattern, + file_glob: fileGlob ?? null, + max_results: maxResults ?? null, + }); + + if (!response.success) { + throw new Error(response.error || 'Failed to search'); + } + + return response.result as SearchResult; + } + + /** + * Generate a directory tree for the workspace. + */ + async codeTree( + personaId: string, + path?: string, + maxDepth?: number, + includeHidden?: boolean + ): Promise { + const response = await this.request({ + command: 'code/tree', + persona_id: personaId, + path: path ?? null, + max_depth: maxDepth ?? null, + include_hidden: includeHidden ?? false, + }); + + if (!response.success) { + throw new Error(response.error || 'Failed to generate tree'); + } + + return response.result as TreeResult; + } + + /** + * Get git status for the workspace. + */ + async codeGitStatus(personaId: string): Promise { + const response = await this.request({ + command: 'code/git-status', + persona_id: personaId, + }); + + if (!response.success) { + throw new Error(response.error || 'Failed to get git status'); + } + + return response.result as GitStatusInfo; + } + + /** + * Get git diff for the workspace. + */ + async codeGitDiff(personaId: string, staged?: boolean): Promise<{ success: boolean; diff: string }> { + const response = await this.request({ + command: 'code/git-diff', + persona_id: personaId, + staged: staged ?? false, + }); + + if (!response.success) { + throw new Error(response.error || 'Failed to get git diff'); + } + + return response.result as { success: boolean; diff: string }; + } + /** * Disconnect from server */ diff --git a/src/debug/jtag/workers/continuum-core/src/code/change_graph.rs b/src/debug/jtag/workers/continuum-core/src/code/change_graph.rs new file mode 100644 index 000000000..c164de5d8 --- /dev/null +++ b/src/debug/jtag/workers/continuum-core/src/code/change_graph.rs @@ -0,0 +1,427 @@ +//! Change Graph β€” DAG of file operations with undo/redo capability. +//! +//! Every file operation (create, write, edit, delete) creates a ChangeNode +//! in the graph. Each node stores forward and reverse diffs, enabling +//! point-in-time undo of any operation. +//! +//! Uses DashMap for lock-free concurrent access β€” multiple personas can +//! operate on different workspaces simultaneously with zero contention. + +use dashmap::DashMap; +use parking_lot::RwLock; +use uuid::Uuid; + +use super::types::{ChangeNode, FileOperation, FileDiff}; + +/// Per-workspace DAG of change operations. +/// +/// Thread-safe: DashMap for node storage, RwLock for ordered indices. +/// Each workspace gets its own ChangeGraph instance. +pub struct ChangeGraph { + workspace_id: String, + /// Primary storage: node ID β†’ ChangeNode + nodes: DashMap, + /// File index: file_path β†’ ordered list of node IDs (append-only) + file_index: DashMap>, + /// Chronological order of all node IDs (most recent last) + chronological: RwLock>, +} + +impl ChangeGraph { + /// Create a new empty change graph for a workspace. + pub fn new(workspace_id: &str) -> Self { + Self { + workspace_id: workspace_id.to_string(), + nodes: DashMap::new(), + file_index: DashMap::new(), + chronological: RwLock::new(Vec::new()), + } + } + + /// Record a new change node in the graph. + /// + /// Adds the node to primary storage, file index, and chronological order. + /// The caller is responsible for constructing the ChangeNode with correct + /// parent_ids, diffs, etc. + pub fn record(&self, node: ChangeNode) { + let id = node.id; + let file_path = node.file_path.clone(); + + // Insert into primary storage + self.nodes.insert(id, node); + + // Update file index + self.file_index + .entry(file_path) + .or_default() + .push(id); + + // Append to chronological order + self.chronological.write().push(id); + } + + /// Get a specific change node by ID. + pub fn get(&self, id: &Uuid) -> Option { + self.nodes.get(id).map(|r| r.clone()) + } + + /// Get the reverse diff for a specific change (for undo). + /// + /// Returns `(reverse_diff, file_path)` so the caller can apply the + /// reverse diff to restore the file. Returns None if node not found. + pub fn reverse_diff_for(&self, id: &Uuid) -> Option<(FileDiff, String)> { + self.nodes + .get(id) + .map(|node| (node.reverse_diff.clone(), node.file_path.clone())) + } + + /// Record an undo operation. + /// + /// Creates a new ChangeNode that reverses the target node. + /// The reverse node's forward_diff is the target's reverse_diff (and vice versa). + pub fn record_undo(&self, target_id: Uuid, author_id: &str) -> Option { + let target = self.nodes.get(&target_id)?; + + let undo_node = ChangeNode { + id: Uuid::new_v4(), + parent_ids: vec![target_id], + author_id: author_id.to_string(), + timestamp: std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_millis() as u64, + file_path: target.file_path.clone(), + operation: FileOperation::Undo { + reverted_id: target_id, + }, + // Swap forward/reverse: undo's forward is the original's reverse + forward_diff: target.reverse_diff.clone(), + reverse_diff: target.forward_diff.clone(), + description: Some(format!("Undo: {}", target.description.as_deref().unwrap_or("previous change"))), + workspace_id: self.workspace_id.clone(), + }; + + drop(target); // Release DashMap ref before mutating + let result = undo_node.clone(); + self.record(undo_node); + Some(result) + } + + /// Get the last N change node IDs in reverse chronological order (for undo_last). + /// + /// Skips nodes that are already undo operations to avoid undo-of-undo chains. + pub fn last_n_undoable(&self, count: usize) -> Vec { + let chrono = self.chronological.read(); + chrono + .iter() + .rev() + .filter(|id| { + self.nodes + .get(id) + .map(|n| !matches!(n.operation, FileOperation::Undo { .. })) + .unwrap_or(false) + }) + .take(count) + .copied() + .collect() + } + + /// Get change history for a specific file, most recent first. + pub fn file_history(&self, file_path: &str, limit: usize) -> Vec { + let ids = match self.file_index.get(file_path) { + Some(ids) => ids.clone(), + None => return Vec::new(), + }; + + ids.iter() + .rev() + .take(limit) + .filter_map(|id| self.nodes.get(id).map(|r| r.clone())) + .collect() + } + + /// Get the most recent change node for a file. + pub fn latest_for_file(&self, file_path: &str) -> Option { + let ids = self.file_index.get(file_path)?; + let last_id = ids.last()?; + self.nodes.get(last_id).map(|r| r.clone()) + } + + /// Get all change history for the workspace, most recent first. + pub fn workspace_history(&self, limit: usize) -> Vec { + let chrono = self.chronological.read(); + chrono + .iter() + .rev() + .take(limit) + .filter_map(|id| self.nodes.get(id).map(|r| r.clone())) + .collect() + } + + /// Walk the DAG backwards from a node, collecting all ancestors. + /// + /// Uses BFS to handle the DAG structure (nodes can have multiple parents). + /// Returns ancestors in breadth-first order (immediate parents first). + pub fn ancestors(&self, node_id: &Uuid) -> Vec { + let mut result = Vec::new(); + let mut visited = std::collections::HashSet::new(); + let mut queue = std::collections::VecDeque::new(); + + // Seed with the starting node's parents + if let Some(node) = self.nodes.get(node_id) { + for parent_id in &node.parent_ids { + if visited.insert(*parent_id) { + queue.push_back(*parent_id); + } + } + } + + while let Some(current_id) = queue.pop_front() { + if let Some(node) = self.nodes.get(¤t_id) { + for parent_id in &node.parent_ids { + if visited.insert(*parent_id) { + queue.push_back(*parent_id); + } + } + result.push(node.clone()); + } + } + + result + } + + /// Total number of change nodes in the graph. + pub fn len(&self) -> usize { + self.nodes.len() + } + + /// Whether the graph is empty. + pub fn is_empty(&self) -> bool { + self.nodes.is_empty() + } + + /// List all files that have been modified in this workspace. + pub fn modified_files(&self) -> Vec { + self.file_index + .iter() + .map(|entry| entry.key().clone()) + .collect() + } + + /// Get the workspace ID this graph belongs to. + pub fn workspace_id(&self) -> &str { + &self.workspace_id + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::code::types::{FileDiff, DiffHunk}; + + fn make_diff(content: &str) -> FileDiff { + FileDiff { + unified: content.to_string(), + hunks: vec![DiffHunk { + old_start: 1, + old_count: 1, + new_start: 1, + new_count: 1, + content: content.to_string(), + }], + } + } + + fn make_node(file_path: &str, description: &str) -> ChangeNode { + ChangeNode { + id: Uuid::new_v4(), + parent_ids: Vec::new(), + author_id: "test-persona".to_string(), + timestamp: std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_millis() as u64, + file_path: file_path.to_string(), + operation: FileOperation::Write, + forward_diff: make_diff("+new content"), + reverse_diff: make_diff("-new content"), + description: Some(description.to_string()), + workspace_id: "test-workspace".to_string(), + } + } + + #[test] + fn test_record_and_get() { + let graph = ChangeGraph::new("test-workspace"); + let node = make_node("src/main.ts", "Initial write"); + let id = node.id; + + graph.record(node); + + let retrieved = graph.get(&id).unwrap(); + assert_eq!(retrieved.id, id); + assert_eq!(retrieved.file_path, "src/main.ts"); + assert_eq!(graph.len(), 1); + } + + #[test] + fn test_file_history() { + let graph = ChangeGraph::new("test-workspace"); + + let node1 = make_node("src/main.ts", "First edit"); + let node2 = make_node("src/main.ts", "Second edit"); + let node3 = make_node("src/other.ts", "Other file edit"); + + let id1 = node1.id; + let id2 = node2.id; + + graph.record(node1); + graph.record(node2); + graph.record(node3); + + let history = graph.file_history("src/main.ts", 10); + assert_eq!(history.len(), 2); + // Most recent first + assert_eq!(history[0].id, id2); + assert_eq!(history[1].id, id1); + } + + #[test] + fn test_workspace_history() { + let graph = ChangeGraph::new("test-workspace"); + + let node1 = make_node("src/a.ts", "Edit a"); + let node2 = make_node("src/b.ts", "Edit b"); + let node3 = make_node("src/c.ts", "Edit c"); + + let id1 = node1.id; + let id3 = node3.id; + + graph.record(node1); + graph.record(node2); + graph.record(node3); + + let history = graph.workspace_history(2); + assert_eq!(history.len(), 2); + assert_eq!(history[0].id, id3); // Most recent + assert_eq!(history[1].description.as_deref(), Some("Edit b")); + + let all = graph.workspace_history(100); + assert_eq!(all.len(), 3); + assert_eq!(all[2].id, id1); // Oldest + } + + #[test] + fn test_undo_creates_reverse_node() { + let graph = ChangeGraph::new("test-workspace"); + + let original = make_node("src/main.ts", "Original write"); + let original_id = original.id; + let original_forward = original.forward_diff.unified.clone(); + let original_reverse = original.reverse_diff.unified.clone(); + + graph.record(original); + + let undo_node = graph.record_undo(original_id, "undo-persona").unwrap(); + + // Undo node's forward diff should be original's reverse diff + assert_eq!(undo_node.forward_diff.unified, original_reverse); + // Undo node's reverse diff should be original's forward diff + assert_eq!(undo_node.reverse_diff.unified, original_forward); + assert!(matches!(undo_node.operation, FileOperation::Undo { reverted_id } if reverted_id == original_id)); + assert_eq!(graph.len(), 2); + } + + #[test] + fn test_last_n_undoable_skips_undo_nodes() { + let graph = ChangeGraph::new("test-workspace"); + + let node1 = make_node("src/a.ts", "Edit 1"); + let node2 = make_node("src/b.ts", "Edit 2"); + let id1 = node1.id; + let id2 = node2.id; + + graph.record(node1); + graph.record(node2); + + // Create an undo (which adds a 3rd node) + graph.record_undo(id1, "persona"); + + // last_n_undoable should skip the undo node + let undoable = graph.last_n_undoable(5); + assert_eq!(undoable.len(), 2); + assert_eq!(undoable[0], id2); // Most recent non-undo + assert_eq!(undoable[1], id1); // Older non-undo + } + + #[test] + fn test_ancestors_walks_dag() { + let graph = ChangeGraph::new("test-workspace"); + + // Create a chain: root β†’ child β†’ grandchild + let root = make_node("src/main.ts", "Root"); + let root_id = root.id; + graph.record(root); + + let mut child = make_node("src/main.ts", "Child"); + child.parent_ids = vec![root_id]; + let child_id = child.id; + graph.record(child); + + let mut grandchild = make_node("src/main.ts", "Grandchild"); + grandchild.parent_ids = vec![child_id]; + let grandchild_id = grandchild.id; + graph.record(grandchild); + + let ancestors = graph.ancestors(&grandchild_id); + assert_eq!(ancestors.len(), 2); + // BFS: child first, then root + assert_eq!(ancestors[0].id, child_id); + assert_eq!(ancestors[1].id, root_id); + } + + #[test] + fn test_latest_for_file() { + let graph = ChangeGraph::new("test-workspace"); + + let node1 = make_node("src/main.ts", "First"); + let node2 = make_node("src/main.ts", "Second"); + let id2 = node2.id; + + graph.record(node1); + graph.record(node2); + + let latest = graph.latest_for_file("src/main.ts").unwrap(); + assert_eq!(latest.id, id2); + } + + #[test] + fn test_modified_files() { + let graph = ChangeGraph::new("test-workspace"); + + graph.record(make_node("src/a.ts", "Edit a")); + graph.record(make_node("src/b.ts", "Edit b")); + graph.record(make_node("src/a.ts", "Edit a again")); + + let mut files = graph.modified_files(); + files.sort(); + assert_eq!(files, vec!["src/a.ts", "src/b.ts"]); + } + + #[test] + fn test_empty_graph() { + let graph = ChangeGraph::new("test-workspace"); + assert!(graph.is_empty()); + assert_eq!(graph.len(), 0); + assert!(graph.file_history("any.ts", 10).is_empty()); + assert!(graph.workspace_history(10).is_empty()); + assert!(graph.latest_for_file("any.ts").is_none()); + assert!(graph.last_n_undoable(5).is_empty()); + } + + #[test] + fn test_undo_nonexistent_returns_none() { + let graph = ChangeGraph::new("test-workspace"); + assert!(graph.record_undo(Uuid::new_v4(), "persona").is_none()); + } +} diff --git a/src/debug/jtag/workers/continuum-core/src/code/diff_engine.rs b/src/debug/jtag/workers/continuum-core/src/code/diff_engine.rs new file mode 100644 index 000000000..189cc7515 --- /dev/null +++ b/src/debug/jtag/workers/continuum-core/src/code/diff_engine.rs @@ -0,0 +1,175 @@ +//! Diff engine β€” unified diff computation using the `similar` crate. +//! +//! Generates forward and reverse diffs for file operations, +//! enabling the change graph's undo/redo capability. + +use similar::{ChangeTag, DiffTag, TextDiff}; + +use super::types::{DiffHunk, FileDiff}; + +/// Compute a FileDiff between two strings. +/// Returns both the unified text representation and structured hunks. +pub fn compute_diff(old: &str, new: &str, file_path: &str) -> FileDiff { + let diff = TextDiff::from_lines(old, new); + + // Generate unified diff text + let unified = diff + .unified_diff() + .context_radius(3) + .header(&format!("a/{}", file_path), &format!("b/{}", file_path)) + .to_string(); + + // Build structured hunks + let hunks = build_hunks(&diff); + + FileDiff { unified, hunks } +} + +/// Compute forward (oldβ†’new) and reverse (newβ†’old) diffs simultaneously. +pub fn compute_bidirectional_diff( + old: &str, + new: &str, + file_path: &str, +) -> (FileDiff, FileDiff) { + let forward = compute_diff(old, new, file_path); + let reverse = compute_diff(new, old, file_path); + (forward, reverse) +} + +/// Build structured DiffHunks from a TextDiff. +fn build_hunks(diff: &TextDiff<'_, '_, '_, str>) -> Vec { + let mut hunks = Vec::new(); + + for group in diff.grouped_ops(3) { + let mut old_start = 0u32; + let mut new_start = 0u32; + let mut old_count = 0u32; + let mut new_count = 0u32; + let mut content = String::new(); + + for op in &group { + if old_start == 0 && new_start == 0 { + old_start = op.old_range().start as u32 + 1; + new_start = op.new_range().start as u32 + 1; + } + + match op.tag() { + DiffTag::Equal => { + let count = op.old_range().len() as u32; + old_count += count; + new_count += count; + for value in diff.iter_changes(op) { + content.push(' '); + content.push_str(value.as_str().unwrap_or("")); + } + } + DiffTag::Delete => { + let count = op.old_range().len() as u32; + old_count += count; + for value in diff.iter_changes(op) { + content.push('-'); + content.push_str(value.as_str().unwrap_or("")); + } + } + DiffTag::Insert => { + let count = op.new_range().len() as u32; + new_count += count; + for value in diff.iter_changes(op) { + content.push('+'); + content.push_str(value.as_str().unwrap_or("")); + } + } + DiffTag::Replace => { + // Replace = Delete + Insert combined + old_count += op.old_range().len() as u32; + new_count += op.new_range().len() as u32; + for value in diff.iter_changes(op) { + match value.tag() { + ChangeTag::Delete => { + content.push('-'); + content.push_str(value.as_str().unwrap_or("")); + } + ChangeTag::Insert => { + content.push('+'); + content.push_str(value.as_str().unwrap_or("")); + } + ChangeTag::Equal => { + content.push(' '); + content.push_str(value.as_str().unwrap_or("")); + } + } + } + } + } + } + + if !content.is_empty() { + hunks.push(DiffHunk { + old_start, + old_count, + new_start, + new_count, + content, + }); + } + } + + hunks +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_compute_diff_no_changes() { + let text = "line 1\nline 2\nline 3\n"; + let diff = compute_diff(text, text, "test.ts"); + assert!(diff.unified.is_empty() || !diff.unified.contains('-')); + assert!(diff.hunks.is_empty()); + } + + #[test] + fn test_compute_diff_simple_edit() { + let old = "line 1\nline 2\nline 3\n"; + let new = "line 1\nline 2 modified\nline 3\n"; + let diff = compute_diff(old, new, "test.ts"); + assert!(diff.unified.contains("-line 2")); + assert!(diff.unified.contains("+line 2 modified")); + assert!(!diff.hunks.is_empty()); + } + + #[test] + fn test_compute_diff_add_lines() { + let old = "line 1\nline 3\n"; + let new = "line 1\nline 2\nline 3\n"; + let diff = compute_diff(old, new, "test.ts"); + assert!(diff.unified.contains("+line 2")); + } + + #[test] + fn test_compute_diff_delete_lines() { + let old = "line 1\nline 2\nline 3\n"; + let new = "line 1\nline 3\n"; + let diff = compute_diff(old, new, "test.ts"); + assert!(diff.unified.contains("-line 2")); + } + + #[test] + fn test_bidirectional_diff() { + let old = "hello\nworld\n"; + let new = "hello\nrust\n"; + let (forward, reverse) = compute_bidirectional_diff(old, new, "test.ts"); + assert!(forward.unified.contains("-world")); + assert!(forward.unified.contains("+rust")); + assert!(reverse.unified.contains("-rust")); + assert!(reverse.unified.contains("+world")); + } + + #[test] + fn test_create_file_diff() { + let (forward, reverse) = compute_bidirectional_diff("", "new content\n", "test.ts"); + assert!(forward.unified.contains("+new content")); + assert!(reverse.unified.contains("-new content")); + } +} diff --git a/src/debug/jtag/workers/continuum-core/src/code/file_engine.rs b/src/debug/jtag/workers/continuum-core/src/code/file_engine.rs new file mode 100644 index 000000000..5fe51c618 --- /dev/null +++ b/src/debug/jtag/workers/continuum-core/src/code/file_engine.rs @@ -0,0 +1,905 @@ +//! File Engine β€” per-persona file operations with change tracking. +//! +//! Every write/edit/delete operation creates a ChangeNode in the change graph, +//! enabling undo at any point. Reads are side-effect-free. +//! +//! Thread safety: Each persona gets their own FileEngine instance. +//! The underlying ChangeGraph and PathSecurity handle concurrency. + +use std::fs; +use std::path::PathBuf; + +use uuid::Uuid; + +use super::change_graph::ChangeGraph; +use super::diff_engine::compute_bidirectional_diff; +use super::path_security::{PathSecurity, PathSecurityError}; +use super::types::*; + +/// Per-persona file engine with workspace scoping and change tracking. +pub struct FileEngine { + persona_id: String, + security: PathSecurity, + graph: ChangeGraph, +} + +/// Errors from file engine operations. +#[derive(Debug)] +pub enum FileEngineError { + Security(PathSecurityError), + Io(std::io::Error), + NotFound(String), + EditFailed(String), +} + +impl std::fmt::Display for FileEngineError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Security(e) => write!(f, "Security: {}", e), + Self::Io(e) => write!(f, "I/O: {}", e), + Self::NotFound(path) => write!(f, "File not found: {}", path), + Self::EditFailed(msg) => write!(f, "Edit failed: {}", msg), + } + } +} + +impl std::error::Error for FileEngineError {} + +impl From for FileEngineError { + fn from(e: PathSecurityError) -> Self { + Self::Security(e) + } +} + +impl From for FileEngineError { + fn from(e: std::io::Error) -> Self { + Self::Io(e) + } +} + +impl FileEngine { + /// Create a new FileEngine for a persona. + pub fn new(persona_id: &str, security: PathSecurity) -> Self { + let workspace_id = format!("workspace-{}", persona_id); + Self { + persona_id: persona_id.to_string(), + security, + graph: ChangeGraph::new(&workspace_id), + } + } + + /// Read a file, optionally a range of lines (1-indexed, inclusive). + pub fn read( + &self, + relative_path: &str, + start_line: Option, + end_line: Option, + ) -> Result { + let abs_path = self.security.validate_read(relative_path)?; + + if !abs_path.exists() { + return Err(FileEngineError::NotFound(relative_path.to_string())); + } + + let content = fs::read_to_string(&abs_path)?; + let total_lines = content.lines().count() as u32; + let size_bytes = content.len() as u64; + + let start = start_line.unwrap_or(1).max(1); + let end = end_line.unwrap_or(total_lines).min(total_lines); + + let selected: String = content + .lines() + .enumerate() + .filter(|(i, _)| { + let line_num = *i as u32 + 1; + line_num >= start && line_num <= end + }) + .map(|(_, line)| line) + .collect::>() + .join("\n"); + + let lines_returned = if end >= start { end - start + 1 } else { 0 }; + + Ok(ReadResult { + success: true, + content: Some(if selected.is_empty() && total_lines > 0 { + // If the file has content but selection is empty, return empty + String::new() + } else { + selected + }), + file_path: relative_path.to_string(), + total_lines, + lines_returned, + start_line: start, + end_line: end, + size_bytes, + error: None, + }) + } + + /// Write (create or overwrite) a file. Records a ChangeNode. + pub fn write( + &self, + relative_path: &str, + content: &str, + description: Option<&str>, + ) -> Result { + let abs_path = self.security.validate_write(relative_path)?; + self.security.validate_size(relative_path, content.len() as u64)?; + + // Read old content (empty string for new files) + let old_content = if abs_path.exists() { + fs::read_to_string(&abs_path).unwrap_or_default() + } else { + String::new() + }; + + let operation = if abs_path.exists() { + FileOperation::Write + } else { + FileOperation::Create + }; + + // Compute diffs + let (forward_diff, reverse_diff) = + compute_bidirectional_diff(&old_content, content, relative_path); + + // Create parent directories if needed + if let Some(parent) = abs_path.parent() { + if !parent.exists() { + fs::create_dir_all(parent)?; + } + } + + // Write the file + fs::write(&abs_path, content)?; + + // Record in change graph + let node = ChangeNode { + id: Uuid::new_v4(), + parent_ids: self.latest_parent(relative_path), + author_id: self.persona_id.clone(), + timestamp: now_millis(), + file_path: relative_path.to_string(), + operation, + forward_diff, + reverse_diff, + description: description.map(String::from), + workspace_id: self.graph.workspace_id().to_string(), + }; + + let change_id = node.id.to_string(); + self.graph.record(node); + + Ok(WriteResult { + success: true, + change_id: Some(change_id), + file_path: relative_path.to_string(), + bytes_written: content.len() as u64, + error: None, + }) + } + + /// Edit a file using an EditMode. Records a ChangeNode. + pub fn edit( + &self, + relative_path: &str, + edit_mode: &EditMode, + description: Option<&str>, + ) -> Result { + let abs_path = self.security.validate_write(relative_path)?; + + if !abs_path.exists() { + return Err(FileEngineError::NotFound(relative_path.to_string())); + } + + let old_content = fs::read_to_string(&abs_path)?; + let new_content = apply_edit(&old_content, edit_mode)?; + + self.security.validate_size(relative_path, new_content.len() as u64)?; + + // Compute diffs + let (forward_diff, reverse_diff) = + compute_bidirectional_diff(&old_content, &new_content, relative_path); + + // Write the modified file + fs::write(&abs_path, &new_content)?; + + // Record in change graph + let node = ChangeNode { + id: Uuid::new_v4(), + parent_ids: self.latest_parent(relative_path), + author_id: self.persona_id.clone(), + timestamp: now_millis(), + file_path: relative_path.to_string(), + operation: FileOperation::Edit, + forward_diff, + reverse_diff, + description: description.map(String::from), + workspace_id: self.graph.workspace_id().to_string(), + }; + + let change_id = node.id.to_string(); + let bytes_written = new_content.len() as u64; + self.graph.record(node); + + Ok(WriteResult { + success: true, + change_id: Some(change_id), + file_path: relative_path.to_string(), + bytes_written, + error: None, + }) + } + + /// Delete a file. Records a ChangeNode with the full content as reverse diff. + pub fn delete( + &self, + relative_path: &str, + description: Option<&str>, + ) -> Result { + let abs_path = self.security.validate_write(relative_path)?; + + if !abs_path.exists() { + return Err(FileEngineError::NotFound(relative_path.to_string())); + } + + let old_content = fs::read_to_string(&abs_path)?; + + // Compute diffs (new content is empty for delete) + let (forward_diff, reverse_diff) = + compute_bidirectional_diff(&old_content, "", relative_path); + + // Delete the file + fs::remove_file(&abs_path)?; + + // Record in change graph + let node = ChangeNode { + id: Uuid::new_v4(), + parent_ids: self.latest_parent(relative_path), + author_id: self.persona_id.clone(), + timestamp: now_millis(), + file_path: relative_path.to_string(), + operation: FileOperation::Delete, + forward_diff, + reverse_diff, + description: description.map(String::from), + workspace_id: self.graph.workspace_id().to_string(), + }; + + let change_id = node.id.to_string(); + self.graph.record(node); + + Ok(WriteResult { + success: true, + change_id: Some(change_id), + file_path: relative_path.to_string(), + bytes_written: 0, + error: None, + }) + } + + /// Preview what an edit would produce (unified diff) without applying it. + pub fn preview_diff( + &self, + relative_path: &str, + edit_mode: &EditMode, + ) -> Result { + let abs_path = self.security.validate_read(relative_path)?; + + if !abs_path.exists() { + return Err(FileEngineError::NotFound(relative_path.to_string())); + } + + let old_content = fs::read_to_string(&abs_path)?; + let new_content = apply_edit(&old_content, edit_mode)?; + + let (forward_diff, _) = + compute_bidirectional_diff(&old_content, &new_content, relative_path); + + Ok(forward_diff) + } + + /// Undo a specific change by applying its reverse diff. + pub fn undo(&self, change_id: &Uuid) -> Result { + let (reverse_diff, file_path) = self + .graph + .reverse_diff_for(change_id) + .ok_or_else(|| FileEngineError::EditFailed(format!("Change {} not found", change_id)))?; + + // Read current file content + let abs_path = self.security.validate_write(&file_path)?; + let current_content = if abs_path.exists() { + fs::read_to_string(&abs_path)? + } else { + String::new() + }; + + // The reverse diff's unified text tells us what to apply. + // For a proper undo, we use the stored old content from the original node. + let original_node = self + .graph + .get(change_id) + .ok_or_else(|| FileEngineError::EditFailed(format!("Change {} not found", change_id)))?; + + // Reconstruct: the original node's reverse_diff goes oldβ†’new when applied backward. + // We apply the reverse_diff to the current content. Since we stored the complete + // forward and reverse diffs, we can reconstruct by computing what the content + // should be by using the reverse operation's forward diff. + // + // For simple cases (createβ†’undo = delete, writeβ†’undo = restore old): + // The undo node created by ChangeGraph has the correct forward_diff. + let undo_node = self + .graph + .record_undo(*change_id, &self.persona_id) + .ok_or_else(|| FileEngineError::EditFailed(format!("Change {} not found for undo", change_id)))?; + + // For the undo, we need to apply the reverse diff to the file. + // The simplest correct approach: re-read the original diff to determine + // what the file should look like after undo. + // + // Since the reverse diff might not apply cleanly if other changes happened, + // we do a best-effort: if the change was the latest for this file, apply the + // reverse content directly; otherwise, warn about conflicts. + let latest = self.graph.latest_for_file(&file_path); + let is_latest = latest + .as_ref() + .map(|n| n.id == undo_node.id) + .unwrap_or(false); + + // Apply the reverse diff content β€” use the unified diff text + // For now, use a simple heuristic: if we can identify the old content, + // reconstruct it from the diff hunks. + let _restored_content = if !reverse_diff.unified.is_empty() { + // The reverse diff exists, attempt to apply + apply_reverse_simple(¤t_content, &reverse_diff) + .unwrap_or_else(|| current_content.clone()) + } else { + current_content.clone() + }; + + // Write the restored content + if original_node.operation == FileOperation::Create { + // Undoing a create = delete the file + if abs_path.exists() { + fs::remove_file(&abs_path)?; + } + } else if matches!(original_node.operation, FileOperation::Delete) { + // Undoing a delete = recreate the file with reverse diff content + // The reverse_diff for a delete contains the original content + let content = extract_added_content(&reverse_diff); + if let Some(parent) = abs_path.parent() { + if !parent.exists() { + fs::create_dir_all(parent)?; + } + } + fs::write(&abs_path, content)?; + } else { + // Undoing a write/edit = apply reverse diff + let restored = apply_reverse_simple(¤t_content, &reverse_diff) + .unwrap_or_else(|| current_content.clone()); + fs::write(&abs_path, &restored)?; + } + + Ok(WriteResult { + success: true, + change_id: Some(undo_node.id.to_string()), + file_path, + bytes_written: 0, + error: if !is_latest { + Some("Warning: undone change was not the latest; result may have conflicts".to_string()) + } else { + None + }, + }) + } + + /// Undo the last N non-undo operations. + pub fn undo_last(&self, count: usize) -> Result { + let ids = self.graph.last_n_undoable(count); + let mut changes_undone = Vec::new(); + + for id in ids { + match self.undo(&id) { + Ok(result) => changes_undone.push(result), + Err(e) => { + return Ok(UndoResult { + success: false, + changes_undone, + error: Some(format!("Failed to undo {}: {}", id, e)), + }); + } + } + } + + Ok(UndoResult { + success: true, + changes_undone, + error: None, + }) + } + + /// Get change history for a specific file. + pub fn file_history(&self, file_path: &str, limit: usize) -> HistoryResult { + let nodes = self.graph.file_history(file_path, limit); + let total_count = nodes.len() as u32; + HistoryResult { + success: true, + nodes, + total_count, + error: None, + } + } + + /// Get all change history for the workspace. + pub fn workspace_history(&self, limit: usize) -> HistoryResult { + let nodes = self.graph.workspace_history(limit); + let total_count = nodes.len() as u32; + HistoryResult { + success: true, + nodes, + total_count, + error: None, + } + } + + /// Get the underlying PathSecurity (for search/tree operations that need it). + pub fn security(&self) -> &PathSecurity { + &self.security + } + + /// Get the workspace root path. + pub fn workspace_root(&self) -> PathBuf { + self.security.workspace_root().to_path_buf() + } + + /// Get the latest parent ID for a file (for DAG edges). + fn latest_parent(&self, file_path: &str) -> Vec { + self.graph + .latest_for_file(file_path) + .map(|n| vec![n.id]) + .unwrap_or_default() + } +} + +/// Apply an EditMode to file content, producing the new content. +fn apply_edit(content: &str, edit_mode: &EditMode) -> Result { + match edit_mode { + EditMode::LineRange { + start_line, + end_line, + new_content, + } => { + let lines: Vec<&str> = content.lines().collect(); + let total = lines.len() as u32; + + if *start_line == 0 || *start_line > total + 1 { + return Err(FileEngineError::EditFailed(format!( + "start_line {} out of range (1-{})", + start_line, + total + 1 + ))); + } + if *end_line < *start_line || *end_line > total { + return Err(FileEngineError::EditFailed(format!( + "end_line {} out of range ({}-{})", + end_line, start_line, total + ))); + } + + let start_idx = (*start_line - 1) as usize; + let end_idx = *end_line as usize; + + let mut result = String::new(); + + // Lines before the range + for line in &lines[..start_idx] { + result.push_str(line); + result.push('\n'); + } + + // Insert new content + if !new_content.is_empty() { + result.push_str(new_content); + if !new_content.ends_with('\n') { + result.push('\n'); + } + } + + // Lines after the range + for line in &lines[end_idx..] { + result.push_str(line); + result.push('\n'); + } + + // Preserve trailing newline behavior + if !content.ends_with('\n') && result.ends_with('\n') { + result.pop(); + } + + Ok(result) + } + + EditMode::SearchReplace { + search, + replace, + all, + } => { + if !content.contains(search.as_str()) { + return Err(FileEngineError::EditFailed(format!( + "Search text not found: '{}'", + if search.len() > 50 { + format!("{}...", &search[..50]) + } else { + search.clone() + } + ))); + } + + let result = if *all { + content.replace(search.as_str(), replace.as_str()) + } else { + content.replacen(search.as_str(), replace.as_str(), 1) + }; + + Ok(result) + } + + EditMode::InsertAt { line, content: new_content } => { + let lines: Vec<&str> = content.lines().collect(); + let total = lines.len() as u32; + + if *line == 0 || *line > total + 1 { + return Err(FileEngineError::EditFailed(format!( + "Insert line {} out of range (1-{})", + line, + total + 1 + ))); + } + + let insert_idx = (*line - 1) as usize; + let mut result = String::new(); + + for line_str in &lines[..insert_idx] { + result.push_str(line_str); + result.push('\n'); + } + + result.push_str(new_content); + if !new_content.ends_with('\n') { + result.push('\n'); + } + + for line_str in &lines[insert_idx..] { + result.push_str(line_str); + result.push('\n'); + } + + if !content.ends_with('\n') && result.ends_with('\n') { + result.pop(); + } + + Ok(result) + } + + EditMode::Append { content: new_content } => { + let mut result = content.to_string(); + if !result.ends_with('\n') && !result.is_empty() { + result.push('\n'); + } + result.push_str(new_content); + Ok(result) + } + } +} + +/// Simple reverse diff application. +/// +/// Extracts removed lines from the diff and added lines from the original, +/// reconstructing the previous content. This handles the common case where +/// the undo target was the most recent change. +fn apply_reverse_simple(current: &str, reverse_diff: &FileDiff) -> Option { + if reverse_diff.hunks.is_empty() { + return None; + } + + // Simple approach: use the unified diff lines. + // Lines starting with '-' in the reverse diff are what to remove from current. + // Lines starting with '+' in the reverse diff are what to add. + // Lines starting with ' ' are context (unchanged). + let mut result_lines: Vec = Vec::new(); + let current_lines: Vec<&str> = current.lines().collect(); + let mut current_idx = 0; + + for hunk in &reverse_diff.hunks { + let hunk_start = (hunk.old_start as usize).saturating_sub(1); + + // Copy lines before this hunk + while current_idx < hunk_start && current_idx < current_lines.len() { + result_lines.push(current_lines[current_idx].to_string()); + current_idx += 1; + } + + // Process hunk content + for line in hunk.content.lines() { + if let Some(stripped) = line.strip_prefix('+') { + // Add this line (it's being added by the reverse) + result_lines.push(stripped.to_string()); + } else if let Some(_stripped) = line.strip_prefix('-') { + // Skip this line (it's being removed by the reverse) + current_idx += 1; + } else if let Some(stripped) = line.strip_prefix(' ') { + // Context line + result_lines.push(stripped.to_string()); + current_idx += 1; + } + } + } + + // Copy remaining lines + while current_idx < current_lines.len() { + result_lines.push(current_lines[current_idx].to_string()); + current_idx += 1; + } + + let mut result = result_lines.join("\n"); + if current.ends_with('\n') && !result.ends_with('\n') { + result.push('\n'); + } + + Some(result) +} + +/// Extract added content from a diff (lines starting with '+'). +/// Used for reconstructing files on undo of delete. +fn extract_added_content(diff: &FileDiff) -> String { + let mut lines = Vec::new(); + for hunk in &diff.hunks { + for line in hunk.content.lines() { + if let Some(stripped) = line.strip_prefix('+') { + lines.push(stripped); + } + } + } + let mut result = lines.join("\n"); + if !result.is_empty() && !result.ends_with('\n') { + result.push('\n'); + } + result +} + +/// Get current time in milliseconds since epoch. +fn now_millis() -> u64 { + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_millis() as u64 +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + + fn setup_engine() -> (tempfile::TempDir, FileEngine) { + let dir = tempfile::tempdir().unwrap(); + fs::create_dir_all(dir.path().join("src")).unwrap(); + fs::write( + dir.path().join("src/main.ts"), + "line 1\nline 2\nline 3\n", + ) + .unwrap(); + + let security = PathSecurity::new(dir.path()).unwrap(); + let engine = FileEngine::new("test-persona", security); + (dir, engine) + } + + #[test] + fn test_read_full_file() { + let (_dir, engine) = setup_engine(); + let result = engine.read("src/main.ts", None, None).unwrap(); + assert!(result.success); + assert_eq!(result.total_lines, 3); + assert!(result.content.unwrap().contains("line 1")); + } + + #[test] + fn test_read_line_range() { + let (_dir, engine) = setup_engine(); + let result = engine.read("src/main.ts", Some(2), Some(2)).unwrap(); + assert!(result.success); + assert_eq!(result.lines_returned, 1); + assert_eq!(result.content.unwrap(), "line 2"); + } + + #[test] + fn test_read_nonexistent() { + let (_dir, engine) = setup_engine(); + let result = engine.read("src/nonexistent.ts", None, None); + assert!(result.is_err()); + } + + #[test] + fn test_write_new_file() { + let (_dir, engine) = setup_engine(); + let result = engine + .write("src/new.ts", "export const x = 1;\n", Some("Create new file")) + .unwrap(); + assert!(result.success); + assert!(result.change_id.is_some()); + assert_eq!(result.bytes_written, 20); + + // Verify content + let read = engine.read("src/new.ts", None, None).unwrap(); + assert!(read.content.unwrap().contains("export const x = 1;")); + } + + #[test] + fn test_write_overwrite_creates_diff() { + let (_dir, engine) = setup_engine(); + + // Overwrite existing file + let result = engine + .write("src/main.ts", "modified\n", Some("Overwrite")) + .unwrap(); + assert!(result.success); + + // Check history + let history = engine.file_history("src/main.ts", 10); + assert_eq!(history.nodes.len(), 1); + assert!(history.nodes[0].forward_diff.unified.contains("-line 1")); + assert!(history.nodes[0].forward_diff.unified.contains("+modified")); + } + + #[test] + fn test_edit_search_replace() { + let (_dir, engine) = setup_engine(); + + let result = engine + .edit( + "src/main.ts", + &EditMode::SearchReplace { + search: "line 2".to_string(), + replace: "line 2 modified".to_string(), + all: false, + }, + Some("Modify line 2"), + ) + .unwrap(); + assert!(result.success); + + let read = engine.read("src/main.ts", None, None).unwrap(); + assert!(read.content.unwrap().contains("line 2 modified")); + } + + #[test] + fn test_edit_line_range() { + let (_dir, engine) = setup_engine(); + + let result = engine + .edit( + "src/main.ts", + &EditMode::LineRange { + start_line: 2, + end_line: 2, + new_content: "replaced line".to_string(), + }, + Some("Replace line 2"), + ) + .unwrap(); + assert!(result.success); + + let read = engine.read("src/main.ts", None, None).unwrap(); + let content = read.content.unwrap(); + assert!(content.contains("line 1")); + assert!(content.contains("replaced line")); + assert!(content.contains("line 3")); + assert!(!content.contains("line 2\n")); + } + + #[test] + fn test_edit_insert_at() { + let (_dir, engine) = setup_engine(); + + let result = engine + .edit( + "src/main.ts", + &EditMode::InsertAt { + line: 2, + content: "inserted line".to_string(), + }, + Some("Insert before line 2"), + ) + .unwrap(); + assert!(result.success); + + let read = engine.read("src/main.ts", None, None).unwrap(); + let content = read.content.unwrap(); + assert!(content.contains("line 1\ninserted line\nline 2")); + } + + #[test] + fn test_edit_append() { + let (_dir, engine) = setup_engine(); + + let result = engine + .edit( + "src/main.ts", + &EditMode::Append { + content: "line 4".to_string(), + }, + Some("Append line 4"), + ) + .unwrap(); + assert!(result.success); + + let read = engine.read("src/main.ts", None, None).unwrap(); + assert!(read.content.unwrap().contains("line 4")); + } + + #[test] + fn test_delete_file() { + let (_dir, engine) = setup_engine(); + + let result = engine.delete("src/main.ts", Some("Remove main")).unwrap(); + assert!(result.success); + + let read = engine.read("src/main.ts", None, None); + assert!(read.is_err()); // File should not exist + } + + #[test] + fn test_write_blocked_extension() { + let (_dir, engine) = setup_engine(); + let result = engine.write("src/malware.exe", "bad", None); + assert!(result.is_err()); + } + + #[test] + fn test_preview_diff() { + let (_dir, engine) = setup_engine(); + let diff = engine + .preview_diff( + "src/main.ts", + &EditMode::SearchReplace { + search: "line 2".to_string(), + replace: "LINE TWO".to_string(), + all: false, + }, + ) + .unwrap(); + assert!(diff.unified.contains("-line 2")); + assert!(diff.unified.contains("+LINE TWO")); + } + + #[test] + fn test_workspace_history() { + let (_dir, engine) = setup_engine(); + + engine.write("src/a.ts", "a", Some("Write a")).unwrap(); + engine.write("src/b.ts", "b", Some("Write b")).unwrap(); + + let history = engine.workspace_history(10); + assert_eq!(history.nodes.len(), 2); + assert_eq!(history.nodes[0].description.as_deref(), Some("Write b")); + assert_eq!(history.nodes[1].description.as_deref(), Some("Write a")); + } + + #[test] + fn test_edit_search_not_found() { + let (_dir, engine) = setup_engine(); + let result = engine.edit( + "src/main.ts", + &EditMode::SearchReplace { + search: "nonexistent text".to_string(), + replace: "replacement".to_string(), + all: false, + }, + None, + ); + assert!(result.is_err()); + } +} diff --git a/src/debug/jtag/workers/continuum-core/src/code/git_bridge.rs b/src/debug/jtag/workers/continuum-core/src/code/git_bridge.rs new file mode 100644 index 000000000..ce07d0ed9 --- /dev/null +++ b/src/debug/jtag/workers/continuum-core/src/code/git_bridge.rs @@ -0,0 +1,204 @@ +//! Git Bridge β€” git status, diff, and branch operations. +//! +//! Shells out to `git` CLI for operations. This keeps the implementation +//! simple and avoids pulling in libgit2 as a dependency. + +use std::path::Path; +use std::process::Command; + +use super::types::GitStatusInfo; + +/// Get git status for a workspace directory. +/// +/// Returns branch name, modified/added/deleted/untracked files. +pub fn git_status(workspace_root: &Path) -> GitStatusInfo { + // Get current branch + let branch = run_git(workspace_root, &["rev-parse", "--abbrev-ref", "HEAD"]) + .ok() + .map(|s| s.trim().to_string()); + + // Get porcelain status (machine-readable) + let status_output = match run_git(workspace_root, &["status", "--porcelain=v1"]) { + Ok(output) => output, + Err(e) => { + return GitStatusInfo { + success: false, + branch, + modified: Vec::new(), + added: Vec::new(), + deleted: Vec::new(), + untracked: Vec::new(), + error: Some(format!("git status failed: {}", e)), + }; + } + }; + + let mut modified = Vec::new(); + let mut added = Vec::new(); + let mut deleted = Vec::new(); + let mut untracked = Vec::new(); + + for line in status_output.lines() { + if line.len() < 3 { + continue; + } + + let status_code = &line[..2]; + let file_path = line[3..].trim().to_string(); + + // Parse porcelain v1 status codes + // First char = index status, second char = work tree status + match status_code { + "??" => untracked.push(file_path), + s if s.starts_with('A') || s.ends_with('A') => added.push(file_path), + s if s.starts_with('D') || s.ends_with('D') => deleted.push(file_path), + s if s.starts_with('M') || s.ends_with('M') + || s.starts_with('R') || s.ends_with('R') + || s.starts_with('C') || s.ends_with('C') => modified.push(file_path), + _ => { + // Catch-all: treat as modified + if !file_path.is_empty() { + modified.push(file_path); + } + } + } + } + + GitStatusInfo { + success: true, + branch, + modified, + added, + deleted, + untracked, + error: None, + } +} + +/// Get git diff output for the workspace. +/// +/// If `staged` is true, shows staged changes (--cached). +/// Otherwise shows unstaged working tree changes. +pub fn git_diff(workspace_root: &Path, staged: bool) -> Result { + let mut args = vec!["diff"]; + if staged { + args.push("--cached"); + } + run_git(workspace_root, &args) +} + +/// Get git diff against a specific ref (branch, commit, etc.). +pub fn git_diff_ref(workspace_root: &Path, reference: &str) -> Result { + run_git(workspace_root, &["diff", reference]) +} + +/// Get git log (last N commits, one-line format). +pub fn git_log(workspace_root: &Path, count: u32) -> Result { + run_git( + workspace_root, + &["log", &format!("-{}", count), "--oneline", "--no-decorate"], + ) +} + +/// Run a git command in the workspace directory. +fn run_git(workspace_root: &Path, args: &[&str]) -> Result { + let output = Command::new("git") + .args(args) + .current_dir(workspace_root) + .output() + .map_err(|e| format!("Failed to run git: {}", e))?; + + if output.status.success() { + String::from_utf8(output.stdout) + .map_err(|e| format!("Invalid UTF-8 in git output: {}", e)) + } else { + let stderr = String::from_utf8_lossy(&output.stderr); + Err(format!("git {} failed: {}", args.join(" "), stderr.trim())) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + + fn setup_git_repo() -> tempfile::TempDir { + let dir = tempfile::tempdir().unwrap(); + + // Initialize a git repo + run_git(dir.path(), &["init"]).expect("git init should work"); + run_git( + dir.path(), + &["config", "user.email", "test@test.com"], + ) + .expect("git config email"); + run_git( + dir.path(), + &["config", "user.name", "Test"], + ) + .expect("git config name"); + + // Create an initial commit + fs::write(dir.path().join("initial.txt"), "hello\n").unwrap(); + run_git(dir.path(), &["add", "."]).expect("git add"); + run_git(dir.path(), &["commit", "-m", "Initial"]).expect("git commit"); + + dir + } + + #[test] + fn test_git_status_clean() { + let dir = setup_git_repo(); + let status = git_status(dir.path()); + assert!(status.success); + assert!(status.branch.is_some()); + assert!(status.modified.is_empty()); + assert!(status.untracked.is_empty()); + } + + #[test] + fn test_git_status_modified() { + let dir = setup_git_repo(); + fs::write(dir.path().join("initial.txt"), "modified\n").unwrap(); + + let status = git_status(dir.path()); + assert!(status.success); + assert!(status.modified.contains(&"initial.txt".to_string())); + } + + #[test] + fn test_git_status_untracked() { + let dir = setup_git_repo(); + fs::write(dir.path().join("new_file.txt"), "new\n").unwrap(); + + let status = git_status(dir.path()); + assert!(status.success); + assert!(status.untracked.contains(&"new_file.txt".to_string())); + } + + #[test] + fn test_git_diff() { + let dir = setup_git_repo(); + fs::write(dir.path().join("initial.txt"), "modified\n").unwrap(); + + let diff = git_diff(dir.path(), false).unwrap(); + assert!(diff.contains("-hello")); + assert!(diff.contains("+modified")); + } + + #[test] + fn test_git_log() { + let dir = setup_git_repo(); + let log = git_log(dir.path(), 5).unwrap(); + assert!(log.contains("Initial")); + } + + #[test] + fn test_git_status_not_a_repo() { + let dir = tempfile::tempdir().unwrap(); + let status = git_status(dir.path()); + // Should still return a result (possibly with error) + // git status in non-repo returns error + assert!(!status.success || status.branch.is_none()); + } +} diff --git a/src/debug/jtag/workers/continuum-core/src/code/mod.rs b/src/debug/jtag/workers/continuum-core/src/code/mod.rs new file mode 100644 index 000000000..c8184aa96 --- /dev/null +++ b/src/debug/jtag/workers/continuum-core/src/code/mod.rs @@ -0,0 +1,27 @@ +//! Code module β€” file operations, change tracking, and code intelligence. +//! +//! Provides the Rust foundation for the coding agent system: +//! - `types` β€” Shared wire types for IPC (ChangeNode, FileDiff, EditMode, etc.) +//! - `diff_engine` β€” Unified diff computation using the `similar` crate +//! - `change_graph` β€” Per-workspace DAG of file operations with undo/redo +//! - `path_security` β€” Workspace-scoped path validation and traversal guard +//! - `file_engine` β€” Per-persona file operations (read/write/edit/delete) +//! - `search` β€” Regex + glob code search with .gitignore awareness +//! - `tree` β€” Directory tree generation +//! - `git_bridge` β€” Git status, diff, and branch operations + +pub mod types; +pub mod diff_engine; +pub mod change_graph; +pub mod path_security; +pub mod file_engine; +pub mod search; +pub mod tree; +pub mod git_bridge; + +// Re-export key types for convenience +pub use types::*; +pub use change_graph::ChangeGraph; +pub use diff_engine::{compute_diff, compute_bidirectional_diff}; +pub use path_security::PathSecurity; +pub use file_engine::FileEngine; diff --git a/src/debug/jtag/workers/continuum-core/src/code/path_security.rs b/src/debug/jtag/workers/continuum-core/src/code/path_security.rs new file mode 100644 index 000000000..01f7f7bf1 --- /dev/null +++ b/src/debug/jtag/workers/continuum-core/src/code/path_security.rs @@ -0,0 +1,420 @@ +//! Path Security β€” workspace-scoped path validation and traversal guard. +//! +//! Each persona gets a workspace root directory they cannot escape. +//! All file paths are canonicalized and validated before any I/O operation. +//! +//! Security guarantees: +//! - No directory traversal (../ sequences resolved and blocked) +//! - Extension allowlist enforced on write operations +//! - File size limits enforced on writes +//! - Symlinks resolved before validation (no symlink-based escapes) + +use std::path::{Path, PathBuf}; + +use super::types::{ALLOWED_EXTENSIONS, MAX_WRITE_SIZE}; + +/// Workspace-scoped path security validator. +/// +/// Validates that all file operations stay within the workspace boundary. +/// Created per-persona with their assigned workspace root. +#[derive(Debug, Clone)] +pub struct PathSecurity { + /// The absolute, canonicalized workspace root. + workspace_root: PathBuf, + /// Optional read-only roots (e.g., the main codebase for discovery). + read_roots: Vec, +} + +/// Errors that can occur during path validation. +#[derive(Debug, Clone, PartialEq)] +pub enum PathSecurityError { + /// Path escapes the workspace boundary. + TraversalBlocked { path: String, workspace: String }, + /// File extension not in allowlist. + ExtensionBlocked { path: String, extension: String }, + /// File exceeds maximum write size. + FileTooLarge { path: String, size: u64, max: u64 }, + /// Path is not valid UTF-8. + InvalidPath { path: String }, + /// Workspace root does not exist or is not a directory. + InvalidWorkspace { path: String }, +} + +impl std::fmt::Display for PathSecurityError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::TraversalBlocked { path, workspace } => { + write!(f, "Path '{}' escapes workspace '{}'", path, workspace) + } + Self::ExtensionBlocked { path, extension } => { + write!(f, "Extension '.{}' not allowed for '{}'", extension, path) + } + Self::FileTooLarge { path, size, max } => { + write!(f, "File '{}' is {} bytes (max: {})", path, size, max) + } + Self::InvalidPath { path } => { + write!(f, "Invalid path: '{}'", path) + } + Self::InvalidWorkspace { path } => { + write!(f, "Invalid workspace root: '{}'", path) + } + } + } +} + +impl std::error::Error for PathSecurityError {} + +impl PathSecurity { + /// Create a new PathSecurity validator for a workspace. + /// + /// The workspace_root must exist and be a directory. + /// It will be canonicalized (resolving symlinks). + pub fn new(workspace_root: &Path) -> Result { + let canonical = workspace_root.canonicalize().map_err(|_| { + PathSecurityError::InvalidWorkspace { + path: workspace_root.display().to_string(), + } + })?; + + if !canonical.is_dir() { + return Err(PathSecurityError::InvalidWorkspace { + path: canonical.display().to_string(), + }); + } + + Ok(Self { + workspace_root: canonical, + read_roots: Vec::new(), + }) + } + + /// Add a read-only root (e.g., the main codebase for code discovery). + /// + /// Paths within read_roots can be read but not written. + pub fn add_read_root(&mut self, root: &Path) -> Result<(), PathSecurityError> { + let canonical = root.canonicalize().map_err(|_| { + PathSecurityError::InvalidWorkspace { + path: root.display().to_string(), + } + })?; + self.read_roots.push(canonical); + Ok(()) + } + + /// Validate and resolve a path for read operations. + /// + /// The path must be within the workspace root OR any read-only root. + /// Returns the absolute, canonicalized path. + pub fn validate_read(&self, relative_path: &str) -> Result { + // Try workspace root first + if let Ok(path) = self.resolve_within(&self.workspace_root, relative_path) { + return Ok(path); + } + + // Try read-only roots + for root in &self.read_roots { + if let Ok(path) = self.resolve_within(root, relative_path) { + return Ok(path); + } + } + + Err(PathSecurityError::TraversalBlocked { + path: relative_path.to_string(), + workspace: self.workspace_root.display().to_string(), + }) + } + + /// Validate and resolve a path for write operations. + /// + /// The path must be within the workspace root (not read-only roots). + /// Also validates the file extension against the allowlist. + /// Returns the absolute path (parent dir must exist). + pub fn validate_write(&self, relative_path: &str) -> Result { + let resolved = self.resolve_for_write(relative_path)?; + self.check_extension(relative_path)?; + Ok(resolved) + } + + /// Validate file size for a write operation. + pub fn validate_size(&self, path: &str, size: u64) -> Result<(), PathSecurityError> { + if size > MAX_WRITE_SIZE { + return Err(PathSecurityError::FileTooLarge { + path: path.to_string(), + size, + max: MAX_WRITE_SIZE, + }); + } + Ok(()) + } + + /// Resolve a relative path within a root, ensuring it doesn't escape. + /// + /// For existing files, uses canonicalize() to resolve symlinks. + /// For new files, manually resolves the path and checks the prefix. + fn resolve_within(&self, root: &Path, relative_path: &str) -> Result { + let joined = root.join(relative_path); + + // For existing paths, canonicalize resolves symlinks + if joined.exists() { + let canonical = joined.canonicalize().map_err(|_| { + PathSecurityError::InvalidPath { + path: relative_path.to_string(), + } + })?; + + if canonical.starts_with(root) { + return Ok(canonical); + } + + return Err(PathSecurityError::TraversalBlocked { + path: relative_path.to_string(), + workspace: root.display().to_string(), + }); + } + + // For non-existing paths, resolve parent and check + Err(PathSecurityError::TraversalBlocked { + path: relative_path.to_string(), + workspace: root.display().to_string(), + }) + } + + /// Resolve a relative path for write operations (file may not exist yet). + /// + /// The parent directory must exist and be within the workspace root. + fn resolve_for_write(&self, relative_path: &str) -> Result { + // Check for obvious traversal attempts before any I/O + let normalized = self.normalize_path(relative_path); + if normalized.starts_with("..") || normalized.contains("/../") { + return Err(PathSecurityError::TraversalBlocked { + path: relative_path.to_string(), + workspace: self.workspace_root.display().to_string(), + }); + } + + let joined = self.workspace_root.join(&normalized); + + // If the file exists, canonicalize it + if joined.exists() { + let canonical = joined.canonicalize().map_err(|_| { + PathSecurityError::InvalidPath { + path: relative_path.to_string(), + } + })?; + + if !canonical.starts_with(&self.workspace_root) { + return Err(PathSecurityError::TraversalBlocked { + path: relative_path.to_string(), + workspace: self.workspace_root.display().to_string(), + }); + } + + return Ok(canonical); + } + + // For new files: canonicalize the parent, then append filename + if let Some(parent) = joined.parent() { + if parent.exists() { + let canonical_parent = parent.canonicalize().map_err(|_| { + PathSecurityError::InvalidPath { + path: relative_path.to_string(), + } + })?; + + if !canonical_parent.starts_with(&self.workspace_root) { + return Err(PathSecurityError::TraversalBlocked { + path: relative_path.to_string(), + workspace: self.workspace_root.display().to_string(), + }); + } + + if let Some(filename) = joined.file_name() { + return Ok(canonical_parent.join(filename)); + } + } + } + + Err(PathSecurityError::TraversalBlocked { + path: relative_path.to_string(), + workspace: self.workspace_root.display().to_string(), + }) + } + + /// Check that a file's extension is in the allowlist. + fn check_extension(&self, path: &str) -> Result<(), PathSecurityError> { + let path = Path::new(path); + let extension = path + .extension() + .and_then(|e| e.to_str()) + .unwrap_or(""); + + if extension.is_empty() || !ALLOWED_EXTENSIONS.contains(&extension) { + return Err(PathSecurityError::ExtensionBlocked { + path: path.display().to_string(), + extension: extension.to_string(), + }); + } + + Ok(()) + } + + /// Normalize a path by collapsing `.` and `..` components without I/O. + /// + /// This is a pre-check before any filesystem operations. + fn normalize_path(&self, path: &str) -> String { + let mut components = Vec::new(); + + for part in path.split('/') { + match part { + "" | "." => continue, + ".." => { + components.pop(); + } + other => components.push(other), + } + } + + components.join("/") + } + + /// Get the workspace root path. + pub fn workspace_root(&self) -> &Path { + &self.workspace_root + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + + fn setup_workspace() -> (tempfile::TempDir, PathSecurity) { + let dir = tempfile::tempdir().unwrap(); + // Create some subdirectories and files + fs::create_dir_all(dir.path().join("src")).unwrap(); + fs::write(dir.path().join("src/main.ts"), "console.log('hello');").unwrap(); + fs::write(dir.path().join("readme.md"), "# Hello").unwrap(); + + let security = PathSecurity::new(dir.path()).unwrap(); + (dir, security) + } + + #[test] + fn test_valid_read() { + let (_dir, security) = setup_workspace(); + let result = security.validate_read("src/main.ts"); + assert!(result.is_ok()); + } + + #[test] + fn test_traversal_blocked() { + let (_dir, security) = setup_workspace(); + let result = security.validate_read("../../etc/passwd"); + assert!(matches!(result, Err(PathSecurityError::TraversalBlocked { .. }))); + } + + #[test] + fn test_dot_dot_traversal() { + let (_dir, security) = setup_workspace(); + let result = security.validate_write("src/../../etc/passwd.ts"); + assert!(result.is_err()); + } + + #[test] + fn test_valid_write_existing() { + let (_dir, security) = setup_workspace(); + let result = security.validate_write("src/main.ts"); + assert!(result.is_ok()); + } + + #[test] + fn test_valid_write_new_file() { + let (_dir, security) = setup_workspace(); + // New file in existing directory + let result = security.validate_write("src/new_file.ts"); + assert!(result.is_ok()); + } + + #[test] + fn test_extension_blocked() { + let (_dir, security) = setup_workspace(); + let result = security.validate_write("src/malware.exe"); + assert!(matches!(result, Err(PathSecurityError::ExtensionBlocked { .. }))); + } + + #[test] + fn test_allowed_extensions() { + let (_dir, security) = setup_workspace(); + // All these should pass extension check + for ext in &["ts", "tsx", "js", "jsx", "json", "md", "css", "html", "rs", "toml", "yaml", "yml", "txt", "sh", "py"] { + let path = format!("src/test.{}", ext); + let result = security.check_extension(&path); + assert!(result.is_ok(), "Extension '{}' should be allowed", ext); + } + } + + #[test] + fn test_file_too_large() { + let (_dir, security) = setup_workspace(); + let result = security.validate_size("test.ts", MAX_WRITE_SIZE + 1); + assert!(matches!(result, Err(PathSecurityError::FileTooLarge { .. }))); + } + + #[test] + fn test_file_within_limit() { + let (_dir, security) = setup_workspace(); + let result = security.validate_size("test.ts", MAX_WRITE_SIZE); + assert!(result.is_ok()); + } + + #[test] + fn test_read_root() { + let dir = tempfile::tempdir().unwrap(); + let read_dir = tempfile::tempdir().unwrap(); + fs::write(read_dir.path().join("lib.ts"), "export {};").unwrap(); + + let mut security = PathSecurity::new(dir.path()).unwrap(); + security.add_read_root(read_dir.path()).unwrap(); + + // Can read from read root + let result = security.validate_read("lib.ts"); + assert!(result.is_ok()); + } + + #[test] + fn test_cannot_write_to_read_root() { + let dir = tempfile::tempdir().unwrap(); + let read_dir = tempfile::tempdir().unwrap(); + fs::create_dir_all(dir.path().join("src")).unwrap(); + fs::create_dir_all(read_dir.path().join("libs")).unwrap(); + fs::write(read_dir.path().join("libs/external.ts"), "export {};").unwrap(); + + let mut security = PathSecurity::new(dir.path()).unwrap(); + security.add_read_root(read_dir.path()).unwrap(); + + // Can read from read root via relative path + let read_result = security.validate_read("libs/external.ts"); + assert!(read_result.is_ok()); + + // Cannot write to a path that only exists under read root. + // "libs/" doesn't exist in the workspace, so the parent + // directory check fails and write validation rejects it. + let write_result = security.validate_write("libs/external.ts"); + assert!(write_result.is_err(), "Should not be able to write to path only in read root"); + } + + #[test] + fn test_normalize_path() { + let (_dir, security) = setup_workspace(); + assert_eq!(security.normalize_path("src/../src/main.ts"), "src/main.ts"); + assert_eq!(security.normalize_path("./src/main.ts"), "src/main.ts"); + assert_eq!(security.normalize_path("src/./main.ts"), "src/main.ts"); + assert_eq!(security.normalize_path("a/b/c/../../d"), "a/d"); + } + + #[test] + fn test_invalid_workspace() { + let result = PathSecurity::new(Path::new("/nonexistent/path/that/does/not/exist")); + assert!(matches!(result, Err(PathSecurityError::InvalidWorkspace { .. }))); + } +} diff --git a/src/debug/jtag/workers/continuum-core/src/code/search.rs b/src/debug/jtag/workers/continuum-core/src/code/search.rs new file mode 100644 index 000000000..cefb894bf --- /dev/null +++ b/src/debug/jtag/workers/continuum-core/src/code/search.rs @@ -0,0 +1,221 @@ +//! Code Search β€” regex + glob file search with .gitignore awareness. +//! +//! Uses the `ignore` crate (from ripgrep) for .gitignore-aware file walking, +//! and standard regex for content matching. + +use std::path::Path; + +use super::types::{SearchMatch, SearchResult}; + +/// Search for a regex pattern across files in a directory. +/// +/// Respects .gitignore rules and supports file glob filtering. +/// Returns matches up to `max_results`. +pub fn search_files( + root: &Path, + pattern: &str, + file_glob: Option<&str>, + max_results: u32, +) -> SearchResult { + let regex = match regex::Regex::new(pattern) { + Ok(r) => r, + Err(e) => { + return SearchResult { + success: false, + matches: Vec::new(), + total_matches: 0, + files_searched: 0, + error: Some(format!("Invalid regex: {}", e)), + }; + } + }; + + // Build the file walker with .gitignore awareness + let mut builder = ignore::WalkBuilder::new(root); + builder + .hidden(true) // Skip hidden files + .git_ignore(true) // Respect .gitignore + .git_global(true) // Respect global gitignore + .git_exclude(true); // Respect .git/info/exclude + + // Apply file glob filter if provided + if let Some(glob) = file_glob { + // The ignore crate uses overrides for glob filtering + let mut overrides = ignore::overrides::OverrideBuilder::new(root); + if let Err(e) = overrides.add(glob) { + return SearchResult { + success: false, + matches: Vec::new(), + total_matches: 0, + files_searched: 0, + error: Some(format!("Invalid glob pattern '{}': {}", glob, e)), + }; + } + match overrides.build() { + Ok(ov) => { builder.overrides(ov); } + Err(e) => { + return SearchResult { + success: false, + matches: Vec::new(), + total_matches: 0, + files_searched: 0, + error: Some(format!("Invalid glob pattern: {}", e)), + }; + } + } + } + + let mut matches = Vec::new(); + let mut files_searched = 0u32; + let mut total_matches = 0u32; + let max = max_results as usize; + + for entry in builder.build().flatten() { + let path = entry.path(); + + // Skip directories + if path.is_dir() { + continue; + } + + // Skip binary files (simple heuristic: try reading as UTF-8) + let content = match std::fs::read_to_string(path) { + Ok(c) => c, + Err(_) => continue, // Skip files we can't read as text + }; + + files_searched += 1; + + // Search for matches in each line + for (line_idx, line) in content.lines().enumerate() { + for mat in regex.find_iter(line) { + total_matches += 1; + + if matches.len() < max { + let relative_path = path + .strip_prefix(root) + .unwrap_or(path) + .display() + .to_string(); + + matches.push(SearchMatch { + file_path: relative_path, + line_number: (line_idx + 1) as u32, + line_content: line.to_string(), + match_start: mat.start() as u32, + match_end: mat.end() as u32, + }); + } + } + } + + // Early exit if we have enough results + if matches.len() >= max { + break; + } + } + + SearchResult { + success: true, + matches, + total_matches, + files_searched, + error: None, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + + fn setup_search_dir() -> tempfile::TempDir { + let dir = tempfile::tempdir().unwrap(); + fs::create_dir_all(dir.path().join("src")).unwrap(); + fs::write( + dir.path().join("src/main.ts"), + "function hello() {\n console.log('world');\n}\n", + ) + .unwrap(); + fs::write( + dir.path().join("src/utils.ts"), + "export function greet(name: string) {\n return `Hello ${name}`;\n}\n", + ) + .unwrap(); + fs::write( + dir.path().join("src/style.css"), + "body { color: red; }\n", + ) + .unwrap(); + fs::write(dir.path().join("readme.md"), "# Hello World\n").unwrap(); + dir + } + + #[test] + fn test_search_basic() { + let dir = setup_search_dir(); + let result = search_files(dir.path(), "function", None, 100); + assert!(result.success); + assert_eq!(result.total_matches, 2); // hello() and greet() + assert!(result.files_searched >= 2); + } + + #[test] + fn test_search_with_glob() { + let dir = setup_search_dir(); + let result = search_files(dir.path(), "function", Some("*.ts"), 100); + assert!(result.success); + assert_eq!(result.total_matches, 2); + // All matches should be .ts files + for m in &result.matches { + assert!(m.file_path.ends_with(".ts")); + } + } + + #[test] + fn test_search_max_results() { + let dir = setup_search_dir(); + let result = search_files(dir.path(), ".", None, 3); + assert!(result.success); + assert!(result.matches.len() <= 3); + } + + #[test] + fn test_search_no_matches() { + let dir = setup_search_dir(); + let result = search_files(dir.path(), "zzz_nonexistent_zzz", None, 100); + assert!(result.success); + assert_eq!(result.total_matches, 0); + assert!(result.matches.is_empty()); + } + + #[test] + fn test_search_regex() { + let dir = setup_search_dir(); + let result = search_files(dir.path(), r"function\s+\w+", None, 100); + assert!(result.success); + assert_eq!(result.total_matches, 2); + } + + #[test] + fn test_search_invalid_regex() { + let dir = setup_search_dir(); + let result = search_files(dir.path(), "[invalid", None, 100); + assert!(!result.success); + assert!(result.error.is_some()); + } + + #[test] + fn test_search_match_positions() { + let dir = setup_search_dir(); + let result = search_files(dir.path(), "hello", None, 100); + assert!(result.success); + for m in &result.matches { + assert!(m.match_start < m.match_end); + assert_eq!( + &m.line_content[m.match_start as usize..m.match_end as usize], + "hello" + ); + } + } +} diff --git a/src/debug/jtag/workers/continuum-core/src/code/tree.rs b/src/debug/jtag/workers/continuum-core/src/code/tree.rs new file mode 100644 index 000000000..6b4ec8e7b --- /dev/null +++ b/src/debug/jtag/workers/continuum-core/src/code/tree.rs @@ -0,0 +1,305 @@ +//! Directory Tree β€” recursive directory structure generation. +//! +//! Generates a tree representation of a directory, respecting .gitignore +//! rules and supporting depth limits. + +use std::fs; +use std::path::Path; + +use super::types::{TreeNode, TreeResult}; + +/// Generate a directory tree starting from `root`. +/// +/// Respects .gitignore, skips hidden files by default, +/// and limits depth to prevent runaway recursion. +pub fn generate_tree( + root: &Path, + max_depth: u32, + include_hidden: bool, +) -> TreeResult { + if !root.exists() || !root.is_dir() { + return TreeResult { + success: false, + root: None, + total_files: 0, + total_directories: 0, + error: Some(format!("Not a directory: {}", root.display())), + }; + } + + let mut total_files = 0u32; + let mut total_directories = 0u32; + + let tree = build_tree_node( + root, + root, + 0, + max_depth, + include_hidden, + &mut total_files, + &mut total_directories, + ); + + TreeResult { + success: true, + root: tree, + total_files, + total_directories, + error: None, + } +} + +/// Recursively build a TreeNode for a directory entry. +fn build_tree_node( + entry_path: &Path, + root: &Path, + current_depth: u32, + max_depth: u32, + include_hidden: bool, + total_files: &mut u32, + total_directories: &mut u32, +) -> Option { + let name = entry_path + .file_name() + .map(|n| n.to_string_lossy().to_string()) + .unwrap_or_else(|| entry_path.display().to_string()); + + // Skip hidden files/directories unless requested + if !include_hidden && name.starts_with('.') && entry_path != root { + return None; + } + + let relative_path = entry_path + .strip_prefix(root) + .unwrap_or(entry_path) + .display() + .to_string(); + + if entry_path.is_dir() { + *total_directories += 1; + + let mut children = Vec::new(); + + if current_depth < max_depth { + // Read directory entries + if let Ok(entries) = fs::read_dir(entry_path) { + let mut entries: Vec<_> = entries.filter_map(|e| e.ok()).collect(); + // Sort entries: directories first, then alphabetically + entries.sort_by(|a, b| { + let a_is_dir = a.path().is_dir(); + let b_is_dir = b.path().is_dir(); + match (a_is_dir, b_is_dir) { + (true, false) => std::cmp::Ordering::Less, + (false, true) => std::cmp::Ordering::Greater, + _ => a.file_name().cmp(&b.file_name()), + } + }); + + for entry in entries { + let entry_name = entry.file_name().to_string_lossy().to_string(); + + // Skip common ignored directories + if entry.path().is_dir() && is_ignored_dir(&entry_name) { + continue; + } + + if let Some(child) = build_tree_node( + &entry.path(), + root, + current_depth + 1, + max_depth, + include_hidden, + total_files, + total_directories, + ) { + children.push(child); + } + } + } + } + + Some(TreeNode { + name, + path: if relative_path.is_empty() { + ".".to_string() + } else { + relative_path + }, + is_directory: true, + size_bytes: None, + children, + }) + } else { + *total_files += 1; + + let size_bytes = fs::metadata(entry_path).map(|m| m.len()).ok(); + + Some(TreeNode { + name, + path: relative_path, + is_directory: false, + size_bytes, + children: Vec::new(), + }) + } +} + +/// Common directories to skip in tree generation. +fn is_ignored_dir(name: &str) -> bool { + matches!( + name, + "node_modules" + | ".git" + | "target" + | "dist" + | "build" + | ".next" + | ".nuxt" + | ".cache" + | "__pycache__" + | ".tsbuildinfo" + | "coverage" + ) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + + fn setup_tree_dir() -> tempfile::TempDir { + let dir = tempfile::tempdir().unwrap(); + fs::create_dir_all(dir.path().join("src/components")).unwrap(); + fs::create_dir_all(dir.path().join("src/utils")).unwrap(); + fs::create_dir_all(dir.path().join("tests")).unwrap(); + fs::write(dir.path().join("src/main.ts"), "main").unwrap(); + fs::write(dir.path().join("src/components/App.tsx"), "app").unwrap(); + fs::write(dir.path().join("src/utils/helpers.ts"), "helpers").unwrap(); + fs::write(dir.path().join("tests/main.test.ts"), "test").unwrap(); + fs::write(dir.path().join("package.json"), "{}").unwrap(); + dir + } + + #[test] + fn test_generate_tree() { + let dir = setup_tree_dir(); + let result = generate_tree(dir.path(), 10, false); + assert!(result.success); + assert!(result.root.is_some()); + assert!(result.total_files > 0); + assert!(result.total_directories > 0); + } + + #[test] + fn test_tree_depth_limit() { + let dir = setup_tree_dir(); + + // Depth 0: only root, no children explored + let shallow = generate_tree(dir.path(), 0, false); + assert!(shallow.success); + let root = shallow.root.unwrap(); + assert!(root.children.is_empty()); + + // Depth 1: root's immediate children + let one_deep = generate_tree(dir.path(), 1, false); + assert!(one_deep.success); + let root = one_deep.root.unwrap(); + assert!(!root.children.is_empty()); + // Subdirectories at depth 1 should have no children + for child in &root.children { + if child.is_directory { + assert!(child.children.is_empty()); + } + } + } + + #[test] + fn test_tree_sorted() { + let dir = setup_tree_dir(); + let result = generate_tree(dir.path(), 10, false); + let root = result.root.unwrap(); + + // Directories should come before files + let mut saw_file = false; + for child in &root.children { + if child.is_directory { + assert!(!saw_file, "Directory after file β€” sorting broken"); + } else { + saw_file = true; + } + } + } + + #[test] + fn test_tree_skips_node_modules() { + let dir = setup_tree_dir(); + fs::create_dir_all(dir.path().join("node_modules/foo")).unwrap(); + fs::write(dir.path().join("node_modules/foo/index.js"), "x").unwrap(); + + let result = generate_tree(dir.path(), 10, false); + let root = result.root.unwrap(); + + // node_modules should not appear + for child in &root.children { + assert_ne!(child.name, "node_modules"); + } + } + + #[test] + fn test_tree_skips_hidden() { + let dir = setup_tree_dir(); + fs::create_dir_all(dir.path().join(".hidden")).unwrap(); + fs::write(dir.path().join(".hidden/secret"), "s").unwrap(); + + let result = generate_tree(dir.path(), 10, false); + let root = result.root.unwrap(); + + for child in &root.children { + assert!(!child.name.starts_with('.')); + } + } + + #[test] + fn test_tree_includes_hidden() { + let dir = setup_tree_dir(); + fs::create_dir_all(dir.path().join(".config")).unwrap(); + fs::write(dir.path().join(".config/settings.json"), "{}").unwrap(); + + let result = generate_tree(dir.path(), 10, true); + let root = result.root.unwrap(); + + let has_hidden = root.children.iter().any(|c| c.name == ".config"); + assert!(has_hidden, "Hidden directory should be included"); + } + + #[test] + fn test_tree_nonexistent() { + let result = generate_tree(Path::new("/nonexistent/path"), 10, false); + assert!(!result.success); + assert!(result.error.is_some()); + } + + #[test] + fn test_tree_file_sizes() { + let dir = setup_tree_dir(); + let result = generate_tree(dir.path(), 10, false); + let root = result.root.unwrap(); + + // Find a file and check it has size + fn find_file(node: &TreeNode) -> Option<&TreeNode> { + if !node.is_directory { + return Some(node); + } + for child in &node.children { + if let Some(f) = find_file(child) { + return Some(f); + } + } + None + } + + let file = find_file(&root).expect("Should have at least one file"); + assert!(file.size_bytes.is_some()); + assert!(file.size_bytes.unwrap() > 0); + } +} diff --git a/src/debug/jtag/workers/continuum-core/src/code/types.rs b/src/debug/jtag/workers/continuum-core/src/code/types.rs new file mode 100644 index 000000000..6d3ef3625 --- /dev/null +++ b/src/debug/jtag/workers/continuum-core/src/code/types.rs @@ -0,0 +1,239 @@ +//! Shared types for the code module. +//! +//! **Single source of truth** β€” TypeScript types are generated via `ts-rs`. +//! These are the wire types for IPC communication between TS and Rust. +//! +//! Re-generate TypeScript bindings: +//! cargo test --package continuum-core export_bindings +//! +//! Output: shared/generated/code/*.ts + +use serde::{Deserialize, Serialize}; +use ts_rs::TS; +use uuid::Uuid; + +/// Every file operation creates a ChangeNode in the DAG. +#[derive(Debug, Clone, Serialize, Deserialize, TS)] +#[ts(export, export_to = "../../../shared/generated/code/ChangeNode.ts")] +pub struct ChangeNode { + #[ts(type = "string")] + pub id: Uuid, + /// Parent node IDs. Empty for root operations. Multiple for merges. + #[ts(type = "Array")] + pub parent_ids: Vec, + /// Who performed this operation (persona UUID string). + pub author_id: String, + /// When the operation occurred (unix millis). + #[ts(type = "number")] + pub timestamp: u64, + /// The file affected (relative to workspace root). + pub file_path: String, + /// The operation type. + pub operation: FileOperation, + /// Forward diff (apply to go forward in time). + pub forward_diff: FileDiff, + /// Reverse diff (apply to go backward in time β€” undo). + pub reverse_diff: FileDiff, + /// Optional description from the AI about what this change does. + #[ts(optional)] + pub description: Option, + /// Workspace ID this change belongs to. + pub workspace_id: String, +} + +/// File operation types. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)] +#[serde(rename_all = "snake_case")] +#[ts(export, export_to = "../../../shared/generated/code/FileOperation.ts")] +pub enum FileOperation { + Create, + Write, + Edit, + Delete, + Rename { + from: String, + to: String, + }, + /// An undo operation that reversed a previous change. + Undo { + #[ts(type = "string")] + reverted_id: Uuid, + }, +} + +/// A file diff consisting of hunks. +#[derive(Debug, Clone, Serialize, Deserialize, Default, TS)] +#[ts(export, export_to = "../../../shared/generated/code/FileDiff.ts")] +pub struct FileDiff { + /// Unified diff text (compatible with standard tooling). + pub unified: String, + /// Structured hunks for programmatic application. + pub hunks: Vec, +} + +/// A single hunk in a unified diff. +#[derive(Debug, Clone, Serialize, Deserialize, TS)] +#[ts(export, export_to = "../../../shared/generated/code/DiffHunk.ts")] +pub struct DiffHunk { + pub old_start: u32, + pub old_count: u32, + pub new_start: u32, + pub new_count: u32, + /// The hunk content (with +/- prefixes on each line). + pub content: String, +} + +/// How to edit a file (four modes). +#[derive(Debug, Clone, Serialize, Deserialize, TS)] +#[serde(tag = "type", rename_all = "snake_case")] +#[ts(export, export_to = "../../../shared/generated/code/EditMode.ts")] +pub enum EditMode { + /// Replace content between line numbers (1-indexed, inclusive). + LineRange { + start_line: u32, + end_line: u32, + new_content: String, + }, + /// Find text and replace it. + SearchReplace { + search: String, + replace: String, + #[serde(default)] + all: bool, + }, + /// Insert content at a specific line (pushes existing lines down). + InsertAt { + line: u32, + content: String, + }, + /// Append content to end of file. + Append { + content: String, + }, +} + +/// Result of a file write/edit/delete operation. +#[derive(Debug, Clone, Serialize, Deserialize, TS)] +#[ts(export, export_to = "../../../shared/generated/code/WriteResult.ts")] +pub struct WriteResult { + pub success: bool, + /// UUID of the ChangeNode created. + #[ts(optional)] + pub change_id: Option, + pub file_path: String, + #[ts(type = "number")] + pub bytes_written: u64, + #[ts(optional)] + pub error: Option, +} + +/// Result of a file read operation. +#[derive(Debug, Clone, Serialize, Deserialize, TS)] +#[ts(export, export_to = "../../../shared/generated/code/ReadResult.ts")] +pub struct ReadResult { + pub success: bool, + #[ts(optional)] + pub content: Option, + pub file_path: String, + pub total_lines: u32, + pub lines_returned: u32, + pub start_line: u32, + pub end_line: u32, + #[ts(type = "number")] + pub size_bytes: u64, + #[ts(optional)] + pub error: Option, +} + +/// A single search match. +#[derive(Debug, Clone, Serialize, Deserialize, TS)] +#[ts(export, export_to = "../../../shared/generated/code/SearchMatch.ts")] +pub struct SearchMatch { + pub file_path: String, + pub line_number: u32, + pub line_content: String, + pub match_start: u32, + pub match_end: u32, +} + +/// Result of a code search operation. +#[derive(Debug, Clone, Serialize, Deserialize, TS)] +#[ts(export, export_to = "../../../shared/generated/code/SearchResult.ts")] +pub struct SearchResult { + pub success: bool, + pub matches: Vec, + pub total_matches: u32, + pub files_searched: u32, + #[ts(optional)] + pub error: Option, +} + +/// A node in a directory tree. +#[derive(Debug, Clone, Serialize, Deserialize, TS)] +#[ts(export, export_to = "../../../shared/generated/code/TreeNode.ts")] +pub struct TreeNode { + pub name: String, + pub path: String, + pub is_directory: bool, + #[ts(optional, type = "number")] + pub size_bytes: Option, + pub children: Vec, +} + +/// Result of a tree operation. +#[derive(Debug, Clone, Serialize, Deserialize, TS)] +#[ts(export, export_to = "../../../shared/generated/code/TreeResult.ts")] +pub struct TreeResult { + pub success: bool, + #[ts(optional)] + pub root: Option, + pub total_files: u32, + pub total_directories: u32, + #[ts(optional)] + pub error: Option, +} + +/// Result of an undo operation. +#[derive(Debug, Clone, Serialize, Deserialize, TS)] +#[ts(export, export_to = "../../../shared/generated/code/UndoResult.ts")] +pub struct UndoResult { + pub success: bool, + pub changes_undone: Vec, + #[ts(optional)] + pub error: Option, +} + +/// History query result. +#[derive(Debug, Clone, Serialize, Deserialize, TS)] +#[ts(export, export_to = "../../../shared/generated/code/HistoryResult.ts")] +pub struct HistoryResult { + pub success: bool, + pub nodes: Vec, + pub total_count: u32, + #[ts(optional)] + pub error: Option, +} + +/// Git status information. +#[derive(Debug, Clone, Serialize, Deserialize, TS)] +#[ts(export, export_to = "../../../shared/generated/code/GitStatusInfo.ts")] +pub struct GitStatusInfo { + pub success: bool, + #[ts(optional)] + pub branch: Option, + pub modified: Vec, + pub added: Vec, + pub deleted: Vec, + pub untracked: Vec, + #[ts(optional)] + pub error: Option, +} + +/// Allowed file extensions for write operations. +pub const ALLOWED_EXTENSIONS: &[&str] = &[ + "ts", "tsx", "js", "jsx", "json", "md", "css", "html", + "rs", "toml", "yaml", "yml", "txt", "sh", "py", +]; + +/// Maximum file size for write operations (1MB). +pub const MAX_WRITE_SIZE: u64 = 1_048_576; diff --git a/src/debug/jtag/workers/continuum-core/src/ipc/mod.rs b/src/debug/jtag/workers/continuum-core/src/ipc/mod.rs index 6c03a187d..4aa067dee 100644 --- a/src/debug/jtag/workers/continuum-core/src/ipc/mod.rs +++ b/src/debug/jtag/workers/continuum-core/src/ipc/mod.rs @@ -12,6 +12,7 @@ use crate::voice::{UtteranceEvent, VoiceParticipant}; use crate::persona::{PersonaInbox, PersonaCognitionEngine, InboxMessage, SenderType, Modality, ChannelRegistry, ChannelEnqueueRequest, ActivityDomain, PersonaState}; use crate::rag::RagEngine; use crate::logging::TimingGuard; +use crate::code::{self, FileEngine, PathSecurity}; use ts_rs::TS; use crate::{log_debug, log_info, log_error}; use serde::{Deserialize, Serialize}; @@ -284,6 +285,111 @@ enum Request { event: crate::memory::CorpusTimelineEvent, }, + // ======================================================================== + // Code Module Commands + // ======================================================================== + + /// Create a per-persona file engine (workspace). + #[serde(rename = "code/create-workspace")] + CodeCreateWorkspace { + persona_id: String, + workspace_root: String, + #[serde(default)] + read_roots: Vec, + }, + + /// Read a file (or line range). + #[serde(rename = "code/read")] + CodeRead { + persona_id: String, + file_path: String, + start_line: Option, + end_line: Option, + }, + + /// Write/create a file. + #[serde(rename = "code/write")] + CodeWrite { + persona_id: String, + file_path: String, + content: String, + description: Option, + }, + + /// Edit a file using an EditMode. + #[serde(rename = "code/edit")] + CodeEdit { + persona_id: String, + file_path: String, + edit_mode: code::EditMode, + description: Option, + }, + + /// Delete a file. + #[serde(rename = "code/delete")] + CodeDelete { + persona_id: String, + file_path: String, + description: Option, + }, + + /// Preview an edit as a unified diff (read-only). + #[serde(rename = "code/diff")] + CodeDiff { + persona_id: String, + file_path: String, + edit_mode: code::EditMode, + }, + + /// Undo a specific change or the last N changes. + #[serde(rename = "code/undo")] + CodeUndo { + persona_id: String, + change_id: Option, + count: Option, + }, + + /// Get change history for a file or workspace. + #[serde(rename = "code/history")] + CodeHistory { + persona_id: String, + file_path: Option, + limit: Option, + }, + + /// Search files with regex + optional glob filter. + #[serde(rename = "code/search")] + CodeSearch { + persona_id: String, + pattern: String, + file_glob: Option, + max_results: Option, + }, + + /// Generate a directory tree. + #[serde(rename = "code/tree")] + CodeTree { + persona_id: String, + path: Option, + max_depth: Option, + #[serde(default)] + include_hidden: bool, + }, + + /// Get git status for the workspace. + #[serde(rename = "code/git-status")] + CodeGitStatus { + persona_id: String, + }, + + /// Get git diff (staged or unstaged). + #[serde(rename = "code/git-diff")] + CodeGitDiff { + persona_id: String, + #[serde(default)] + staged: bool, + }, + #[serde(rename = "health-check")] HealthCheck, @@ -353,6 +459,8 @@ struct ServerState { /// Per-persona memory manager β€” pure compute on in-memory MemoryCorpus. /// Data comes from the TS ORM via IPC. Zero SQL access. memory_manager: Arc, + /// Per-persona file engines β€” workspace-scoped file operations with change tracking. + file_engines: Arc>, } impl ServerState { @@ -371,6 +479,7 @@ impl ServerState { audio_pool: Arc::new(crate::voice::audio_buffer::AudioBufferPool::new()), rt_handle, memory_manager, + file_engines: Arc::new(DashMap::new()), } } @@ -1168,6 +1277,279 @@ impl ServerState { }) } + // ================================================================ + // Code Module Handlers + // ================================================================ + + Request::CodeCreateWorkspace { persona_id, workspace_root, read_roots } => { + let _timer = TimingGuard::new("ipc", "code_create_workspace"); + + let root = std::path::Path::new(&workspace_root); + let security = match PathSecurity::new(root) { + Ok(mut s) => { + for rr in &read_roots { + if let Err(e) = s.add_read_root(std::path::Path::new(rr)) { + return HandleResult::Json(Response::error( + format!("Invalid read root '{}': {}", rr, e) + )); + } + } + s + } + Err(e) => { + return HandleResult::Json(Response::error(format!("Invalid workspace: {}", e))); + } + }; + + let engine = FileEngine::new(&persona_id, security); + self.file_engines.insert(persona_id.clone(), engine); + + log_info!("ipc", "code", "Created workspace for {} at {}", persona_id, workspace_root); + HandleResult::Json(Response::success(serde_json::json!({ "created": true }))) + } + + Request::CodeRead { persona_id, file_path, start_line, end_line } => { + let _timer = TimingGuard::new("ipc", "code_read"); + + let engine = match self.file_engines.get(&persona_id) { + Some(e) => e, + None => return HandleResult::Json(Response::error( + format!("No workspace for persona {}", persona_id) + )), + }; + + match engine.read(&file_path, start_line, end_line) { + Ok(result) => HandleResult::Json(Response::success( + serde_json::to_value(&result).unwrap_or_default() + )), + Err(e) => HandleResult::Json(Response::error(format!("{}", e))), + } + } + + Request::CodeWrite { persona_id, file_path, content, description } => { + let _timer = TimingGuard::new("ipc", "code_write"); + + let engine = match self.file_engines.get(&persona_id) { + Some(e) => e, + None => return HandleResult::Json(Response::error( + format!("No workspace for persona {}", persona_id) + )), + }; + + match engine.write(&file_path, &content, description.as_deref()) { + Ok(result) => { + log_info!("ipc", "code", "Write {} ({} bytes) by {}", + file_path, result.bytes_written, persona_id); + HandleResult::Json(Response::success( + serde_json::to_value(&result).unwrap_or_default() + )) + } + Err(e) => HandleResult::Json(Response::error(format!("{}", e))), + } + } + + Request::CodeEdit { persona_id, file_path, edit_mode, description } => { + let _timer = TimingGuard::new("ipc", "code_edit"); + + let engine = match self.file_engines.get(&persona_id) { + Some(e) => e, + None => return HandleResult::Json(Response::error( + format!("No workspace for persona {}", persona_id) + )), + }; + + match engine.edit(&file_path, &edit_mode, description.as_deref()) { + Ok(result) => { + log_info!("ipc", "code", "Edit {} by {}", file_path, persona_id); + HandleResult::Json(Response::success( + serde_json::to_value(&result).unwrap_or_default() + )) + } + Err(e) => HandleResult::Json(Response::error(format!("{}", e))), + } + } + + Request::CodeDelete { persona_id, file_path, description } => { + let _timer = TimingGuard::new("ipc", "code_delete"); + + let engine = match self.file_engines.get(&persona_id) { + Some(e) => e, + None => return HandleResult::Json(Response::error( + format!("No workspace for persona {}", persona_id) + )), + }; + + match engine.delete(&file_path, description.as_deref()) { + Ok(result) => { + log_info!("ipc", "code", "Delete {} by {}", file_path, persona_id); + HandleResult::Json(Response::success( + serde_json::to_value(&result).unwrap_or_default() + )) + } + Err(e) => HandleResult::Json(Response::error(format!("{}", e))), + } + } + + Request::CodeDiff { persona_id, file_path, edit_mode } => { + let _timer = TimingGuard::new("ipc", "code_diff"); + + let engine = match self.file_engines.get(&persona_id) { + Some(e) => e, + None => return HandleResult::Json(Response::error( + format!("No workspace for persona {}", persona_id) + )), + }; + + match engine.preview_diff(&file_path, &edit_mode) { + Ok(diff) => HandleResult::Json(Response::success( + serde_json::to_value(&diff).unwrap_or_default() + )), + Err(e) => HandleResult::Json(Response::error(format!("{}", e))), + } + } + + Request::CodeUndo { persona_id, change_id, count } => { + let _timer = TimingGuard::new("ipc", "code_undo"); + + let engine = match self.file_engines.get(&persona_id) { + Some(e) => e, + None => return HandleResult::Json(Response::error( + format!("No workspace for persona {}", persona_id) + )), + }; + + if let Some(id_str) = change_id { + // Undo specific change + let change_uuid = match Uuid::parse_str(&id_str) { + Ok(u) => u, + Err(e) => return HandleResult::Json(Response::error( + format!("Invalid change_id: {}", e) + )), + }; + match engine.undo(&change_uuid) { + Ok(result) => { + log_info!("ipc", "code", "Undo {} by {}", id_str, persona_id); + HandleResult::Json(Response::success(serde_json::json!({ + "success": true, + "changes_undone": [serde_json::to_value(&result).unwrap_or_default()], + "error": null + }))) + } + Err(e) => HandleResult::Json(Response::error(format!("{}", e))), + } + } else { + // Undo last N + let n = count.unwrap_or(1); + match engine.undo_last(n) { + Ok(result) => HandleResult::Json(Response::success( + serde_json::to_value(&result).unwrap_or_default() + )), + Err(e) => HandleResult::Json(Response::error(format!("{}", e))), + } + } + } + + Request::CodeHistory { persona_id, file_path, limit } => { + let _timer = TimingGuard::new("ipc", "code_history"); + + let engine = match self.file_engines.get(&persona_id) { + Some(e) => e, + None => return HandleResult::Json(Response::error( + format!("No workspace for persona {}", persona_id) + )), + }; + + let lim = limit.unwrap_or(50); + let result = if let Some(fp) = file_path { + engine.file_history(&fp, lim) + } else { + engine.workspace_history(lim) + }; + + HandleResult::Json(Response::success( + serde_json::to_value(&result).unwrap_or_default() + )) + } + + Request::CodeSearch { persona_id, pattern, file_glob, max_results } => { + let _timer = TimingGuard::new("ipc", "code_search"); + + let engine = match self.file_engines.get(&persona_id) { + Some(e) => e, + None => return HandleResult::Json(Response::error( + format!("No workspace for persona {}", persona_id) + )), + }; + + let max = max_results.unwrap_or(100); + let result = code::search::search_files( + &engine.workspace_root(), + &pattern, + file_glob.as_deref(), + max, + ); + + HandleResult::Json(Response::success( + serde_json::to_value(&result).unwrap_or_default() + )) + } + + Request::CodeTree { persona_id, path, max_depth, include_hidden } => { + let _timer = TimingGuard::new("ipc", "code_tree"); + + let engine = match self.file_engines.get(&persona_id) { + Some(e) => e, + None => return HandleResult::Json(Response::error( + format!("No workspace for persona {}", persona_id) + )), + }; + + let root = match &path { + Some(p) => engine.workspace_root().join(p), + None => engine.workspace_root(), + }; + let depth = max_depth.unwrap_or(5); + let result = code::tree::generate_tree(&root, depth, include_hidden); + + HandleResult::Json(Response::success( + serde_json::to_value(&result).unwrap_or_default() + )) + } + + Request::CodeGitStatus { persona_id } => { + let _timer = TimingGuard::new("ipc", "code_git_status"); + + let engine = match self.file_engines.get(&persona_id) { + Some(e) => e, + None => return HandleResult::Json(Response::error( + format!("No workspace for persona {}", persona_id) + )), + }; + + let result = code::git_bridge::git_status(&engine.workspace_root()); + HandleResult::Json(Response::success( + serde_json::to_value(&result).unwrap_or_default() + )) + } + + Request::CodeGitDiff { persona_id, staged } => { + let _timer = TimingGuard::new("ipc", "code_git_diff"); + + let engine = match self.file_engines.get(&persona_id) { + Some(e) => e, + None => return HandleResult::Json(Response::error( + format!("No workspace for persona {}", persona_id) + )), + }; + + match code::git_bridge::git_diff(&engine.workspace_root(), staged) { + Ok(diff) => HandleResult::Json(Response::success(serde_json::json!({ + "diff": diff + }))), + Err(e) => HandleResult::Json(Response::error(e)), + } + } + Request::HealthCheck => { HandleResult::Json(Response::success(serde_json::json!({ "healthy": true }))) } diff --git a/src/debug/jtag/workers/continuum-core/src/lib.rs b/src/debug/jtag/workers/continuum-core/src/lib.rs index 9bcf967f7..828ab5208 100644 --- a/src/debug/jtag/workers/continuum-core/src/lib.rs +++ b/src/debug/jtag/workers/continuum-core/src/lib.rs @@ -20,6 +20,7 @@ pub mod ffi; pub mod utils; pub mod rag; pub mod memory; +pub mod code; pub use audio_constants::*; From d03aae65e775ef08e4c2366d4204c664329471d3 Mon Sep 17 00:00:00 2001 From: Joel Date: Sun, 1 Feb 2026 17:29:26 -0600 Subject: [PATCH 02/14] Remove legacy development/code/* commands, clean up CodeDaemon MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Delete old pre-Rust development/code/read and development/code/pattern-search commands that caused TS2300 duplicate identifier collision with new code/* commands. Remove legacy CodeDaemon methods (readFile, searchCode, getGitLog, clearCache, getCacheStats, getRepositoryRoot), their types, and the PathValidator/FileReader modules β€” all superseded by Rust IPC workspace ops. - Delete commands/development/code/ (7 files) - Delete daemons/code-daemon/server/modules/ (PathValidator, FileReader) - Clean CodeDaemonTypes.ts: remove 222 lines of legacy types - Clean CodeDaemon.ts: remove 7 legacy static methods - Clean CodeDaemonServer.ts: remove old CodeDaemonImpl class - Fix cli.ts: replace CODE_COMMANDS import with string literals - Fix PersonaToolDefinitions.ts: update essentialTools to code/* - Regenerate server/generated.ts and command constants --- src/debug/jtag/browser/generated.ts | 50 ++- src/debug/jtag/cli.ts | 5 +- .../server/CodeFindServerCommand.ts | 296 ----------------- .../pattern-search/shared/CodeFindCommand.ts | 46 --- .../pattern-search/shared/CodeFindTypes.ts | 125 ------- .../code/read/server/CodeReadServerCommand.ts | 198 ----------- .../code/read/shared/CodeReadCommand.ts | 44 --- .../code/read/shared/CodeReadTypes.ts | 87 ----- .../code/shared/CodeCommandConstants.ts | 20 -- .../code-daemon/server/CodeDaemonServer.ts | 161 +-------- .../code-daemon/server/modules/FileReader.ts | 210 ------------ .../server/modules/PathValidator.ts | 115 ------- .../daemons/code-daemon/shared/CodeDaemon.ts | 61 +--- .../code-daemon/shared/CodeDaemonTypes.ts | 225 +------------ src/debug/jtag/generated-command-schemas.json | 312 +++++++++++++----- src/debug/jtag/package-lock.json | 4 +- src/debug/jtag/package.json | 2 +- src/debug/jtag/server/generated.ts | 62 +++- .../shared/generated-command-constants.ts | 10 +- src/debug/jtag/shared/version.ts | 2 +- .../server/modules/PersonaToolDefinitions.ts | 2 +- 21 files changed, 359 insertions(+), 1678 deletions(-) delete mode 100644 src/debug/jtag/commands/development/code/pattern-search/server/CodeFindServerCommand.ts delete mode 100644 src/debug/jtag/commands/development/code/pattern-search/shared/CodeFindCommand.ts delete mode 100644 src/debug/jtag/commands/development/code/pattern-search/shared/CodeFindTypes.ts delete mode 100644 src/debug/jtag/commands/development/code/read/server/CodeReadServerCommand.ts delete mode 100644 src/debug/jtag/commands/development/code/read/shared/CodeReadCommand.ts delete mode 100644 src/debug/jtag/commands/development/code/read/shared/CodeReadTypes.ts delete mode 100644 src/debug/jtag/commands/development/code/shared/CodeCommandConstants.ts delete mode 100644 src/debug/jtag/daemons/code-daemon/server/modules/FileReader.ts delete mode 100644 src/debug/jtag/daemons/code-daemon/server/modules/PathValidator.ts diff --git a/src/debug/jtag/browser/generated.ts b/src/debug/jtag/browser/generated.ts index d65766765..bcca8e98e 100644 --- a/src/debug/jtag/browser/generated.ts +++ b/src/debug/jtag/browser/generated.ts @@ -1,7 +1,7 @@ /** * Browser Structure Registry - Auto-generated * - * Contains 11 daemons and 166 commands and 2 adapters and 27 widgets. + * Contains 11 daemons and 174 commands and 2 adapters and 27 widgets. * Generated by scripts/generate-structure.ts - DO NOT EDIT MANUALLY */ @@ -43,6 +43,14 @@ import { AIValidateResponseBrowserCommand } from './../commands/ai/validate-resp import { CanvasStrokeAddBrowserCommand } from './../commands/canvas/stroke/add/browser/CanvasStrokeAddBrowserCommand'; import { CanvasStrokeListBrowserCommand } from './../commands/canvas/stroke/list/browser/CanvasStrokeListBrowserCommand'; import { CanvasVisionBrowserCommand } from './../commands/canvas/vision/browser/CanvasVisionBrowserCommand'; +import { CodeDiffBrowserCommand } from './../commands/code/diff/browser/CodeDiffBrowserCommand'; +import { CodeEditBrowserCommand } from './../commands/code/edit/browser/CodeEditBrowserCommand'; +import { CodeHistoryBrowserCommand } from './../commands/code/history/browser/CodeHistoryBrowserCommand'; +import { CodeReadBrowserCommand } from './../commands/code/read/browser/CodeReadBrowserCommand'; +import { CodeSearchBrowserCommand } from './../commands/code/search/browser/CodeSearchBrowserCommand'; +import { CodeTreeBrowserCommand } from './../commands/code/tree/browser/CodeTreeBrowserCommand'; +import { CodeUndoBrowserCommand } from './../commands/code/undo/browser/CodeUndoBrowserCommand'; +import { CodeWriteBrowserCommand } from './../commands/code/write/browser/CodeWriteBrowserCommand'; import { ActivityUserPresentCommand } from './../commands/collaboration/activity/user-present/browser/ActivityUserPresentCommand'; import { ChatAnalyzeBrowserCommand } from './../commands/collaboration/chat/analyze/browser/ChatAnalyzeBrowserCommand'; import { ChatExportBrowserCommand } from './../commands/collaboration/chat/export/browser/ChatExportBrowserCommand'; @@ -407,6 +415,46 @@ export const BROWSER_COMMANDS: CommandEntry[] = [ className: 'CanvasVisionBrowserCommand', commandClass: CanvasVisionBrowserCommand }, +{ + name: 'code/diff', + className: 'CodeDiffBrowserCommand', + commandClass: CodeDiffBrowserCommand + }, +{ + name: 'code/edit', + className: 'CodeEditBrowserCommand', + commandClass: CodeEditBrowserCommand + }, +{ + name: 'code/history', + className: 'CodeHistoryBrowserCommand', + commandClass: CodeHistoryBrowserCommand + }, +{ + name: 'code/read', + className: 'CodeReadBrowserCommand', + commandClass: CodeReadBrowserCommand + }, +{ + name: 'code/search', + className: 'CodeSearchBrowserCommand', + commandClass: CodeSearchBrowserCommand + }, +{ + name: 'code/tree', + className: 'CodeTreeBrowserCommand', + commandClass: CodeTreeBrowserCommand + }, +{ + name: 'code/undo', + className: 'CodeUndoBrowserCommand', + commandClass: CodeUndoBrowserCommand + }, +{ + name: 'code/write', + className: 'CodeWriteBrowserCommand', + commandClass: CodeWriteBrowserCommand + }, { name: 'collaboration/activity/user-present', className: 'ActivityUserPresentCommand', diff --git a/src/debug/jtag/cli.ts b/src/debug/jtag/cli.ts index c88e240a7..37d813654 100644 --- a/src/debug/jtag/cli.ts +++ b/src/debug/jtag/cli.ts @@ -15,7 +15,6 @@ import { COMMANDS } from './shared/generated-command-constants'; import { DATA_COMMANDS } from './commands/data/shared/DataCommandConstants'; import { FILE_COMMANDS } from './commands/file/shared/FileCommandConstants'; import { USER_COMMANDS } from './commands/shared/SystemCommandConstants'; -import { CODE_COMMANDS } from './commands/development/code/shared/CodeCommandConstants'; import * as fs from 'fs'; import * as path from 'path'; @@ -226,8 +225,8 @@ async function main() { // Map of commands to their primary parameter name const singleParamCommands: Record = { 'help': 'commandName', - [CODE_COMMANDS.READ]: 'path', - [CODE_COMMANDS.FIND]: 'pattern', + 'code/read': 'path', + 'code/search': 'pattern', [FILE_COMMANDS.LOAD]: 'path', [FILE_COMMANDS.SAVE]: 'path', [DATA_COMMANDS.READ]: 'id', diff --git a/src/debug/jtag/commands/development/code/pattern-search/server/CodeFindServerCommand.ts b/src/debug/jtag/commands/development/code/pattern-search/server/CodeFindServerCommand.ts deleted file mode 100644 index d080c6c46..000000000 --- a/src/debug/jtag/commands/development/code/pattern-search/server/CodeFindServerCommand.ts +++ /dev/null @@ -1,296 +0,0 @@ -/** - * code/pattern-search server command - Find files by name pattern - */ - -import * as fs from 'fs'; -import * as path from 'path'; -import { promisify } from 'util'; -import { minimatch } from 'minimatch'; - -import type { JTAGContext } from '@system/core/types/JTAGTypes'; -import type { ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; -import { CodeDaemon } from '@daemons/code-daemon/shared/CodeDaemon'; -import type { CodeFindParams, CodeFindResult, FileMatch } from '../shared/CodeFindTypes'; -import { createCodeFindResultFromParams } from '../shared/CodeFindTypes'; -import { CodeFindCommand } from '../shared/CodeFindCommand'; - -const stat = promisify(fs.stat); -const readdir = promisify(fs.readdir); - -export class CodeFindServerCommand extends CodeFindCommand { - constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { - super('code/pattern-search', context, subpath, commander); - } - - /** - * Execute code/pattern-search command - * - * Searches for files by name pattern using minimatch (supports wildcards) - */ - protected async executeCommand(params: CodeFindParams): Promise { - // Validate params - if (!params.pattern || params.pattern.trim() === '') { - return createCodeFindResultFromParams(params, { - success: false, - error: 'Missing required parameter: pattern' - }); - } - - console.log(`πŸ” CODE FIND SERVER: Searching for pattern "${params.pattern}"`); - - // STEP 2: Query analysis - detect conceptual/semantic searches - // NOTE: We now WARN but still run the search. AIs reported that blocking was confusing. - const queryAnalysis = this.analyzeQuery(params.pattern); - let conceptualWarning = ''; - if (queryAnalysis.isConceptual) { - console.log(`⚠️ CODE FIND SERVER: Pattern "${params.pattern}" appears conceptual (${queryAnalysis.reasons.length} reasons)`); - conceptualWarning = [ - '', - '--- HINT ---', - `Your pattern "${params.pattern}" may be a semantic/conceptual search.`, - 'This tool matches FILENAME PATTERNS (like *.ts, Auth*.ts), not code concepts.', - '', - 'For semantic code search, try: ai/context/search or ai/rag/query-open', - 'For file content search, try: development/code/grep', - '--- END HINT ---', - '' - ].join('\n'); - } - - try { - const repositoryRoot = CodeDaemon.getRepositoryRoot(); - const baseDir = params.baseDir ?? '.'; - const searchPath = path.join(repositoryRoot, baseDir); - - // Validate base directory exists - try { - const searchStat = await stat(searchPath); - if (!searchStat.isDirectory()) { - return createCodeFindResultFromParams(params, { - success: false, - error: `Base directory is not a directory: ${baseDir}` - }); - } - } catch { - return createCodeFindResultFromParams(params, { - success: false, - error: `Base directory not found: ${baseDir}` - }); - } - - const maxResults = params.maxResults ?? 50; - const caseInsensitive = params.caseInsensitive !== false; // Default true - const includeHidden = params.includeHidden === true; // Default false - const excludeDirs = params.excludeDirs ?? ['node_modules', 'dist', '.continuum', '.git', 'examples/dist', 'coverage']; - - // Prepare pattern for minimatch - const pattern = caseInsensitive ? params.pattern.toLowerCase() : params.pattern; - - // Find matching files - const matches: FileMatch[] = []; - let totalMatches = 0; - - await this.searchDirectory( - searchPath, - repositoryRoot, - pattern, - caseInsensitive, - includeHidden, - excludeDirs, - matches, - maxResults, - () => totalMatches++ - ); - - console.log(`βœ… CODE FIND SERVER: Found ${totalMatches} matches for "${params.pattern}" (returning ${matches.length})`); - - // If no matches found, provide helpful guidance - if (totalMatches === 0) { - const suggestions = [ - `No files found matching pattern "${params.pattern}".`, - '', - 'Tips for better results:', - 'β€’ Use simpler patterns: "*.ts" instead of "typescript files"', - 'β€’ Try wildcards: "**/*.test.ts" for test files', - 'β€’ Use exact filenames: "package.json"', - 'β€’ Check your baseDir parameter (currently searching: ' + (baseDir ?? '.') + ')', - '', - 'Note: This tool matches filename patterns, not file contents.', - 'To search code contents, use development/code/grep' - ]; - - return createCodeFindResultFromParams(params, { - success: true, - pattern: params.pattern, - matches: [], - totalMatches: 0, - baseDir, - message: conceptualWarning + suggestions.join('\n') - }); - } - - return createCodeFindResultFromParams(params, { - success: true, - pattern: params.pattern, - matches, - totalMatches, - baseDir, - message: conceptualWarning || undefined - }); - } catch (error) { - console.error(`❌ CODE FIND SERVER: Exception searching for ${params.pattern}:`, error); - - return createCodeFindResultFromParams(params, { - success: false, - error: error instanceof Error ? error.message : 'Unknown error' - }); - } - } - - /** - * Recursively search directory for matching files - */ - private async searchDirectory( - dirPath: string, - repoRoot: string, - pattern: string, - caseInsensitive: boolean, - includeHidden: boolean, - excludeDirs: string[], - matches: FileMatch[], - maxResults: number, - onMatch: () => void - ): Promise { - // Stop if we've reached max results - if (matches.length >= maxResults) return; - - try { - const entries = await readdir(dirPath, { withFileTypes: true }); - - for (const entry of entries) { - // Stop if we've reached max results - if (matches.length >= maxResults) break; - - // Skip hidden files/directories if not requested - if (!includeHidden && entry.name.startsWith('.')) continue; - - // Skip excluded directories (configurable, defaults to massive dirs that cause timeouts) - if (excludeDirs.includes(entry.name)) { - continue; - } - - const fullPath = path.join(dirPath, entry.name); - const relativePath = path.relative(repoRoot, fullPath); - - // Get file stats - let fileStat; - let fileType: 'file' | 'directory' | 'symlink' = 'file'; - try { - fileStat = await stat(fullPath); - if (fileStat.isDirectory()) fileType = 'directory'; - else if (fileStat.isSymbolicLink()) fileType = 'symlink'; - } catch { - // Skip files we can't stat - continue; - } - - // Check if filename matches pattern - const filename = caseInsensitive ? entry.name.toLowerCase() : entry.name; - if (minimatch(filename, pattern)) { - onMatch(); - - if (matches.length < maxResults) { - matches.push({ - path: relativePath, - size: fileStat.size, - modified: fileStat.mtime.toISOString(), - type: fileType - }); - } - } - - // Recursively search subdirectories - if (entry.isDirectory()) { - await this.searchDirectory( - fullPath, - repoRoot, - pattern, - caseInsensitive, - includeHidden, - excludeDirs, - matches, - maxResults, - onMatch - ); - } - } - } catch (error) { - // Silently skip directories we can't read (permissions, etc.) - console.warn(`⚠️ CODE FIND SERVER: Cannot read directory ${dirPath}:`, error); - } - } - - /** - * Analyze query to detect if it's conceptual/semantic vs literal pattern matching - * Based on AI team testing feedback and detection patterns - */ - private analyzeQuery(pattern: string): { isConceptual: boolean; reasons: string[] } { - const reasons: string[] = []; - - // Detect multi-word conceptual phrases - const words = pattern.trim().split(/\s+/); - if (words.length >= 2 && !pattern.includes('*') && !pattern.includes('?')) { - // Check if it looks like a semantic query vs a filename pattern - const hasCodeIndicators = /[A-Z][a-z]+|[a-z]+[A-Z]|[._-]|\.ts$|\.js$|\.py$/.test(pattern); - if (!hasCodeIndicators) { - reasons.push(`Multi-word phrase without file indicators: "${pattern}"`); - } - } - - // Detect question structures - if (/^(how|what|where|why|when|who|which)\b/i.test(pattern)) { - reasons.push(`Question word detected: ${pattern.split(/\s+/)[0].toLowerCase()}`); - } - - // Detect abstract/conceptual terms (common semantic search patterns) - const conceptualTerms = [ - 'flow', 'logic', 'process', 'pattern', 'approach', 'mechanism', - 'system', 'strategy', 'implementation', 'algorithm', 'architecture', - 'structure', 'design', 'method', 'technique', 'concept', 'principle', - 'handling', 'management', 'processing', 'validation', 'authentication' - ]; - - const lowerPattern = pattern.toLowerCase(); - const matchedTerms = conceptualTerms.filter(term => - lowerPattern.includes(term) && !pattern.includes('*') - ); - - if (matchedTerms.length > 0) { - reasons.push(`Conceptual terms found: ${matchedTerms.join(', ')}`); - } - - // Detect descriptive phrases (adjective + noun patterns) - if (words.length >= 2 && !/[*?[\]]/.test(pattern)) { - const descriptivePatterns = /\b(user|error|data|file|auth|api|request|response|message|event|state|config|service|component|module|handler|manager|controller|model|view)\s+(handling|management|processing|validation|creation|deletion|update|retrieval|storage|flow|pattern|logic)\b/i; - if (descriptivePatterns.test(pattern)) { - reasons.push('Descriptive phrase detected (noun + verb pattern)'); - } - } - - // If pattern has wildcards or file extensions, it's likely literal - if (/[*?[\]]|\.(?:ts|js|py|java|go|rs|cpp|h)$/.test(pattern)) { - return { isConceptual: false, reasons: [] }; - } - - // If pattern is PascalCase or camelCase, it's likely a filename - if (/^[A-Z][a-z]+[A-Z]|^[a-z]+[A-Z]/.test(pattern)) { - return { isConceptual: false, reasons: [] }; - } - - // Decision: conceptual if we have 2+ reasons - return { - isConceptual: reasons.length >= 2, - reasons - }; - } -} diff --git a/src/debug/jtag/commands/development/code/pattern-search/shared/CodeFindCommand.ts b/src/debug/jtag/commands/development/code/pattern-search/shared/CodeFindCommand.ts deleted file mode 100644 index 117a7d369..000000000 --- a/src/debug/jtag/commands/development/code/pattern-search/shared/CodeFindCommand.ts +++ /dev/null @@ -1,46 +0,0 @@ -/** - * code/find shared command - Find files by name pattern - */ - -import type { JTAGContext } from '@system/core/types/JTAGTypes'; -import type { ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; -import { CommandBase } from '@daemons/command-daemon/shared/CommandBase'; -import type { CodeFindParams, CodeFindResult } from './CodeFindTypes'; - -/** - * Shared base for code/find command - */ -export abstract class CodeFindCommand extends CommandBase { - constructor( - name: string, - context: JTAGContext, - subpath: string, - commander: ICommandDaemon - ) { - super(name, context, subpath, commander); - } - - /** - * Execute with environment routing - */ - async execute(params: CodeFindParams): Promise { - // Ensure backend is set - const effectiveParams = { - ...params, - backend: params.backend ?? 'server' - } as CodeFindParams; - - // If we're not in the requested environment, delegate - if (this.context.environment !== effectiveParams.backend) { - return await this.remoteExecute(effectiveParams); - } - - // We're in the correct environment, execute locally - return await this.executeCommand(effectiveParams); - } - - /** - * Subclasses implement this for their specific environment - */ - protected abstract executeCommand(params: CodeFindParams): Promise; -} diff --git a/src/debug/jtag/commands/development/code/pattern-search/shared/CodeFindTypes.ts b/src/debug/jtag/commands/development/code/pattern-search/shared/CodeFindTypes.ts deleted file mode 100644 index cc58cb9d0..000000000 --- a/src/debug/jtag/commands/development/code/pattern-search/shared/CodeFindTypes.ts +++ /dev/null @@ -1,125 +0,0 @@ -/** - * code/find command types - Find files by name pattern - */ - -import type { JTAGContext, JTAGEnvironment } from '@system/core/types/JTAGTypes'; -import { transformPayload } from '@system/core/types/JTAGTypes'; -import type { UUID } from '@system/core/types/CrossPlatformUUID'; - -/** - * Base params for code commands - */ -export interface BaseCodeParams { - readonly context: JTAGContext; - readonly sessionId: UUID; - readonly backend: JTAGEnvironment; -} - -/** - * Parameters for code/find command - */ -export interface CodeFindParams extends BaseCodeParams { - /** Filename pattern to search for (supports wildcards: *, ?, []) */ - readonly pattern: string; - - /** Base directory to search (relative to repository root, default: entire repo) */ - readonly baseDir?: string; - - /** Case-insensitive search */ - readonly caseInsensitive?: boolean; - - /** Maximum results to return (default: 50) */ - readonly maxResults?: number; - - /** Include hidden files/directories (default: false) */ - readonly includeHidden?: boolean; - - /** Directories to exclude from search (default: ['node_modules', 'dist', '.continuum', '.git', 'examples/dist', 'coverage']) */ - readonly excludeDirs?: string[]; -} - -/** - * Single file match - */ -export interface FileMatch { - /** Relative path from repository root */ - path: string; - - /** File size in bytes */ - size: number; - - /** Last modified timestamp */ - modified: string; - - /** File type (file, directory, symlink) */ - type: 'file' | 'directory' | 'symlink'; -} - -/** - * Result of code/find command - */ -export interface CodeFindResult { - readonly context: JTAGContext; - readonly sessionId: UUID; - readonly backend: JTAGEnvironment; - readonly timestamp: string; - - /** Operation success */ - success: boolean; - - /** Search pattern used */ - pattern: string; - - /** Files found */ - matches: FileMatch[]; - - /** Total matches found (may be > matches.length if limited by maxResults) */ - totalMatches: number; - - /** Base directory searched */ - baseDir: string; - - /** Error message (if !success) */ - error?: string; - - /** Optional message with guidance or additional context */ - message?: string; -} - -/** - * Create code/find params - */ -export const createCodeFindParams = ( - context: JTAGContext, - sessionId: UUID, - data: Omit & { backend?: JTAGEnvironment } -): CodeFindParams => { - return { - context, - sessionId, - backend: data.backend || 'server', - pattern: data.pattern, - baseDir: data.baseDir, - caseInsensitive: data.caseInsensitive, - maxResults: data.maxResults, - includeHidden: data.includeHidden, - excludeDirs: data.excludeDirs - }; -}; - -/** - * Factory function to create result - */ -export const createCodeFindResultFromParams = ( - params: CodeFindParams, - differences: Omit, 'context' | 'sessionId' | 'backend'> -): CodeFindResult => transformPayload(params, { - backend: params.backend, - success: false, - pattern: params.pattern, - matches: [], - totalMatches: 0, - baseDir: params.baseDir || '.', - timestamp: new Date().toISOString(), - ...differences -}); diff --git a/src/debug/jtag/commands/development/code/read/server/CodeReadServerCommand.ts b/src/debug/jtag/commands/development/code/read/server/CodeReadServerCommand.ts deleted file mode 100644 index 78def3e24..000000000 --- a/src/debug/jtag/commands/development/code/read/server/CodeReadServerCommand.ts +++ /dev/null @@ -1,198 +0,0 @@ -/** - * code/read server command - Read source code files - */ - -import * as fs from 'fs'; -import * as path from 'path'; -import { promisify } from 'util'; -import { minimatch } from 'minimatch'; - -import type { JTAGContext } from '@system/core/types/JTAGTypes'; -import type { ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; -import { CodeDaemon } from '@daemons/code-daemon/shared/CodeDaemon'; -import type { CodeReadParams, CodeReadResult } from '../shared/CodeReadTypes'; -import { createCodeReadResultFromParams } from '../shared/CodeReadTypes'; -import { CodeReadCommand } from '../shared/CodeReadCommand'; - -const stat = promisify(fs.stat); -const readdir = promisify(fs.readdir); - -export class CodeReadServerCommand extends CodeReadCommand { - constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { - super('code-read', context, subpath, commander); - } - - /** - * Execute code/read command - * - * Delegates to CodeDaemon.readFile() static method - * If exact path fails, tries fuzzy matching to find similar files - */ - protected async executeCommand(params: CodeReadParams): Promise { - // Validate params - if (!params.path) { - return createCodeReadResultFromParams(params, { - success: false, - error: 'Missing required parameter: path' - }); - } - - console.log(`πŸ“‚ CODE SERVER: Reading file ${params.path} via CodeDaemon`); - - try { - // Try exact path first - const result = await CodeDaemon.readFile(params.path, { - startLine: params.startLine, - endLine: params.endLine, - includeMetadata: params.includeMetadata, - forceRefresh: params.forceRefresh - }); - - if (result.success) { - console.log(`βœ… CODE SERVER: Read ${params.path} (${result.metadata.linesReturned} lines)`); - return createCodeReadResultFromParams(params, result); - } - - // If exact path failed, try fuzzy matching - console.log(`πŸ” CODE SERVER: Exact path failed, trying fuzzy match for ${params.path}`); - const matches = await this.findSimilarFiles(params.path); - - if (matches.length === 0) { - console.log(`❌ CODE SERVER: No similar files found for ${params.path}`); - return createCodeReadResultFromParams(params, { - success: false, - error: `File not found: ${params.path}. No similar files found.` - }); - } - - if (matches.length === 1) { - // Exactly one match - read it automatically - console.log(`βœ… CODE SERVER: Found exact fuzzy match: ${matches[0]}`); - const fuzzyResult = await CodeDaemon.readFile(matches[0], { - startLine: params.startLine, - endLine: params.endLine, - includeMetadata: params.includeMetadata, - forceRefresh: params.forceRefresh - }); - - if (fuzzyResult.success) { - console.log(`βœ… CODE SERVER: Read fuzzy match ${matches[0]} (${fuzzyResult.metadata.linesReturned} lines)`); - } - - return createCodeReadResultFromParams(params, fuzzyResult); - } - - // Multiple matches - return suggestions - console.log(`❓ CODE SERVER: Found ${matches.length} similar files for ${params.path}`); - const suggestionsList = matches.slice(0, 10).map((m, i) => `${i + 1}. ${m}`).join('\n'); - return createCodeReadResultFromParams(params, { - success: false, - error: `File not found: ${params.path}.\n\nDid you mean one of these?\n${suggestionsList}\n\nPlease try again with the full path.` - }); - } catch (error) { - console.error(`❌ CODE SERVER: Exception reading ${params.path}:`, error); - - return createCodeReadResultFromParams(params, { - success: false, - error: error instanceof Error ? error.message : 'Unknown error' - }); - } - } - - /** - * Find files with similar names using fuzzy matching - * Searches for files that contain the given filename pattern - */ - private async findSimilarFiles(partialPath: string): Promise { - try { - const repositoryRoot = CodeDaemon.getRepositoryRoot(); - - // Extract the filename from the partial path - const basename = path.basename(partialPath); - const dirname = path.dirname(partialPath); - - // Create a case-insensitive glob pattern - const pattern = `*${basename}*`; - - const matches: string[] = []; - const startTime = Date.now(); - const TIMEOUT_MS = 5000; // 5 second timeout - const MAX_DEPTH = 10; // Maximum directory depth - - // If a directory was specified, search only in that directory - if (dirname && dirname !== '.' && dirname !== '/') { - const searchPath = path.join(repositoryRoot, dirname); - try { - await stat(searchPath); - await this.searchDirectoryForPattern(searchPath, repositoryRoot, pattern, matches, 50, 0, MAX_DEPTH, startTime, TIMEOUT_MS); - } catch { - // Directory doesn't exist, fall through to repo-wide search - } - } - - // If no matches in specified directory (or no directory specified), search entire repo - if (matches.length === 0) { - await this.searchDirectoryForPattern(repositoryRoot, repositoryRoot, pattern, matches, 50, 0, MAX_DEPTH, startTime, TIMEOUT_MS); - } - - return matches; - } catch (error) { - console.warn(`⚠️ CODE SERVER: Error in fuzzy file search:`, error); - return []; - } - } - - /** - * Recursively search directory for files matching pattern - * @param depth Current depth in directory tree - * @param maxDepth Maximum depth to search (prevents deep recursion) - * @param startTime Start time of search (for timeout check) - * @param timeoutMs Maximum time to search in milliseconds - */ - private async searchDirectoryForPattern( - dirPath: string, - repoRoot: string, - pattern: string, - matches: string[], - maxResults: number, - depth: number = 0, - maxDepth: number = 10, - startTime: number = Date.now(), - timeoutMs: number = 5000 - ): Promise { - // Performance limits - if (matches.length >= maxResults) return; - if (depth > maxDepth) return; - if (Date.now() - startTime > timeoutMs) { - console.warn(`⚠️ CODE SERVER: Fuzzy search timeout after ${timeoutMs}ms at depth ${depth}`); - return; - } - - try { - const entries = await readdir(dirPath, { withFileTypes: true }); - - for (const entry of entries) { - if (matches.length >= maxResults) break; - if (Date.now() - startTime > timeoutMs) break; - - // Skip hidden files/directories and node_modules - if (entry.name.startsWith('.') || entry.name === 'node_modules') continue; - - const fullPath = path.join(dirPath, entry.name); - const relativePath = path.relative(repoRoot, fullPath); - - // Check if filename matches pattern (case-insensitive) - if (entry.isFile() && minimatch(entry.name.toLowerCase(), pattern.toLowerCase())) { - matches.push(relativePath); - } - - // Recursively search subdirectories (with updated depth) - if (entry.isDirectory() && matches.length < maxResults) { - await this.searchDirectoryForPattern(fullPath, repoRoot, pattern, matches, maxResults, depth + 1, maxDepth, startTime, timeoutMs); - } - } - } catch { - // Silently skip directories we can't read - } - } -} diff --git a/src/debug/jtag/commands/development/code/read/shared/CodeReadCommand.ts b/src/debug/jtag/commands/development/code/read/shared/CodeReadCommand.ts deleted file mode 100644 index 1a8fcf11e..000000000 --- a/src/debug/jtag/commands/development/code/read/shared/CodeReadCommand.ts +++ /dev/null @@ -1,44 +0,0 @@ -/** - * Code Read Command - Shared Base Class - * - * Base class for code read operations with environment routing - */ - -import type { JTAGContext } from '@system/core/types/JTAGTypes'; -import type { ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; -import { CommandBase } from '@daemons/command-daemon/shared/CommandBase'; -import type { CodeReadParams, CodeReadResult } from './CodeReadTypes'; - -/** - * Base class for code read commands - * Provides environment routing via CommandBase - */ -export abstract class CodeReadCommand extends CommandBase { - constructor(commandName: string, context: JTAGContext, subpath: string, commander: ICommandDaemon) { - super(commandName, context, subpath, commander); - } - - /** - * Execute with environment routing - */ - async execute(params: CodeReadParams): Promise { - // Ensure backend is set - const effectiveParams = { - ...params, - backend: params.backend ?? 'server' - } as CodeReadParams; - - // If we're not in the requested environment, delegate - if (this.context.environment !== effectiveParams.backend) { - return await this.remoteExecute(effectiveParams); - } - - // We're in the correct environment, execute locally - return await this.executeCommand(effectiveParams); - } - - /** - * Subclasses implement this for their specific environment - */ - protected abstract executeCommand(params: CodeReadParams): Promise; -} diff --git a/src/debug/jtag/commands/development/code/read/shared/CodeReadTypes.ts b/src/debug/jtag/commands/development/code/read/shared/CodeReadTypes.ts deleted file mode 100644 index 36a1134f3..000000000 --- a/src/debug/jtag/commands/development/code/read/shared/CodeReadTypes.ts +++ /dev/null @@ -1,87 +0,0 @@ -/** - * code/read command types - */ - -import type { JTAGContext, JTAGEnvironment } from '@system/core/types/JTAGTypes'; -import { transformPayload } from '@system/core/types/JTAGTypes'; -import type { UUID } from '@system/core/types/CrossPlatformUUID'; -import type { CodeReadResult as CodeDaemonReadResult, CodeReadOptions } from '@daemons/code-daemon/shared/CodeDaemonTypes'; - -/** - * Base params for code commands - */ -export interface BaseCodeParams { - readonly context: JTAGContext; - readonly sessionId: UUID; - readonly backend: JTAGEnvironment; -} - -/** - * Parameters for code/read command - */ -export interface CodeReadParams extends BaseCodeParams { - /** File path relative to jtag root, e.g. "commands/wall/write.ts" or "system/core/shared/Events.ts" (NOT absolute paths, NOT starting with "src/") */ - readonly path: string; - - /** Start line (1-indexed, optional) */ - readonly startLine?: number; - - /** End line (1-indexed, optional) */ - readonly endLine?: number; - - /** Include file metadata */ - readonly includeMetadata?: boolean; - - /** Force bypass cache */ - readonly forceRefresh?: boolean; -} - -/** - * Result of code/read command - */ -export interface CodeReadResult extends CodeDaemonReadResult { - readonly context: JTAGContext; - readonly sessionId: UUID; - readonly backend: JTAGEnvironment; - readonly timestamp: string; -} - -/** - * Create code/read params - */ -export const createCodeReadParams = ( - context: JTAGContext, - sessionId: UUID, - data: Omit & { backend?: JTAGEnvironment } -): CodeReadParams => { - return { - context, - sessionId, - backend: data.backend || 'server', - path: data.path, - startLine: data.startLine, - endLine: data.endLine, - includeMetadata: data.includeMetadata, - forceRefresh: data.forceRefresh - }; -}; - -/** - * Factory function to create result - */ -export const createCodeReadResultFromParams = ( - params: CodeReadParams, - differences: Omit, 'context' | 'sessionId' | 'backend'> -): CodeReadResult => transformPayload(params, { - backend: params.backend, // Explicitly copy backend from params - success: false, - metadata: { - path: params.path, - size: 0, - lines: 0, - linesReturned: 0, - modified: '' - }, - timestamp: new Date().toISOString(), - ...differences -}); diff --git a/src/debug/jtag/commands/development/code/shared/CodeCommandConstants.ts b/src/debug/jtag/commands/development/code/shared/CodeCommandConstants.ts deleted file mode 100644 index 17911acb1..000000000 --- a/src/debug/jtag/commands/development/code/shared/CodeCommandConstants.ts +++ /dev/null @@ -1,20 +0,0 @@ -/** - * Code Command Constants - * - * All development/code/* command names defined here. - * Usage: - * await Commands.execute(CODE_COMMANDS.READ, params); - */ - -export const CODE_COMMANDS = { - /** Read source code from a file */ - READ: 'code/read', - - /** Search for patterns in code */ - FIND: 'code/find', -} as const; - -/** - * Type-safe code command names - */ -export type CodeCommand = typeof CODE_COMMANDS[keyof typeof CODE_COMMANDS]; diff --git a/src/debug/jtag/daemons/code-daemon/server/CodeDaemonServer.ts b/src/debug/jtag/daemons/code-daemon/server/CodeDaemonServer.ts index d6520c98b..5ebd52a14 100644 --- a/src/debug/jtag/daemons/code-daemon/server/CodeDaemonServer.ts +++ b/src/debug/jtag/daemons/code-daemon/server/CodeDaemonServer.ts @@ -1,175 +1,26 @@ /** - * CodeDaemon Server - JTAG Integration + * CodeDaemon Server - Workspace Operations via Rust IPC * - * Server-side implementation that replaces CodeDaemon static methods + * Server-side implementation that replaces CodeDaemon static methods. + * All file operations go through continuum-core Rust backend. */ import type { JTAGContext } from '../../../system/core/types/JTAGTypes'; import { CodeDaemon } from '../shared/CodeDaemon'; import type { - CodeDaemonConfig, - CodeReadOptions, - CodeReadResult, - CodeSearchOptions, - CodeSearchResult, - GitLogOptions, - GitLogResult, - CodeFileReadEvent, - CodeSearchEvent, - CodeGitLogEvent, WorkspaceEditMode, - WorkspaceWriteResult, - WorkspaceReadResult, - WorkspaceSearchResult, - WorkspaceTreeResult, - WorkspaceUndoResult, - WorkspaceHistoryResult, - WorkspaceGitStatusInfo, } from '../shared/CodeDaemonTypes'; -import { Events } from '../../../system/core/shared/Events'; -import { PathValidator } from './modules/PathValidator'; -import { FileReader } from './modules/FileReader'; import { Logger } from '../../../system/core/logging/Logger'; import { RustCoreIPCClient } from '../../../workers/continuum-core/bindings/RustCoreIPC'; -import * as path from 'path'; /** - * Server-side implementation of CodeDaemon - */ -class CodeDaemonImpl { - private pathValidator: PathValidator; - private fileReader: FileReader; - private config: CodeDaemonConfig; - private jtagContext: JTAGContext; - private isInitialized: boolean = false; - - constructor(jtagContext: JTAGContext, config: CodeDaemonConfig) { - this.jtagContext = jtagContext; - this.config = config; - this.pathValidator = new PathValidator(config.repositoryRoot); - this.fileReader = new FileReader( - this.pathValidator, - config.maxFileSize, - config.enableCache, - config.cacheTTL - ); - this.isInitialized = true; - } - - async readFile(filePath: string, options?: CodeReadOptions): Promise { - const result = await this.fileReader.read(filePath, options); - - // Emit event - if (result.success) { - await Events.emit(this.jtagContext, 'code:file:read', { - path: filePath, - size: result.metadata.size, - cached: result.cached || false, - timestamp: Date.now() - }); - } - - return result; - } - - async searchCode(pattern: string, options?: CodeSearchOptions): Promise { - // TODO: Implement search - return { - success: false, - pattern, - matches: [], - totalMatches: 0, - filesSearched: 0, - error: 'Search not yet implemented' - }; - } - - async getGitLog(options?: GitLogOptions): Promise { - // TODO: Implement git log - return { - success: false, - commits: [], - error: 'Git log not yet implemented' - }; - } - - clearCache(): void { - this.fileReader.clearCache(); - } - - getCacheStats(): { entries: number; size: number } { - return this.fileReader.getCacheStats(); - } - - getRepositoryRoot(): string { - return this.config.repositoryRoot; - } - - getIsInitialized(): boolean { - return this.isInitialized; - } -} - -// Singleton instance -let codeDaemonInstance: CodeDaemonImpl | undefined; - -/** - * Initialize CodeDaemon for server usage + * Initialize CodeDaemon for server usage. + * Connects to continuum-core Rust backend for all workspace operations. */ export async function initializeCodeDaemon(jtagContext: JTAGContext): Promise { const log = Logger.create('CodeDaemonServer', 'daemons/CodeDaemonServer'); log.info('Initializing CodeDaemon...'); - // Determine repository root (go up from daemons/code-daemon/server to jtag root) - const repositoryRoot = path.resolve(__dirname, '../../..'); - - const config: CodeDaemonConfig = { - repositoryRoot, - maxFileSize: 10 * 1024 * 1024, // 10MB - enableCache: true, - cacheTTL: 60000, // 1 minute - rateLimit: 100, // 100 ops/minute - enableAudit: true - }; - - // Create implementation instance - codeDaemonInstance = new CodeDaemonImpl(jtagContext, config); - - // Replace static methods on CodeDaemon class - CodeDaemon.readFile = async (filePath: string, options?: CodeReadOptions) => { - if (!codeDaemonInstance) throw new Error('CodeDaemon not initialized'); - return await codeDaemonInstance.readFile(filePath, options); - }; - - CodeDaemon.searchCode = async (pattern: string, options?: CodeSearchOptions) => { - if (!codeDaemonInstance) throw new Error('CodeDaemon not initialized'); - return await codeDaemonInstance.searchCode(pattern, options); - }; - - CodeDaemon.getGitLog = async (options?: GitLogOptions) => { - if (!codeDaemonInstance) throw new Error('CodeDaemon not initialized'); - return await codeDaemonInstance.getGitLog(options); - }; - - CodeDaemon.clearCache = () => { - if (!codeDaemonInstance) throw new Error('CodeDaemon not initialized'); - codeDaemonInstance.clearCache(); - }; - - CodeDaemon.getCacheStats = () => { - if (!codeDaemonInstance) throw new Error('CodeDaemon not initialized'); - return codeDaemonInstance.getCacheStats(); - }; - - CodeDaemon.getRepositoryRoot = () => { - if (!codeDaemonInstance) throw new Error('CodeDaemon not initialized'); - return codeDaemonInstance.getRepositoryRoot(); - }; - - CodeDaemon.isInitialized = () => { - return codeDaemonInstance?.getIsInitialized() || false; - }; - // ======================================================================== // Workspace-Scoped Operations (Rust IPC backed) // ======================================================================== @@ -230,5 +81,5 @@ export async function initializeCodeDaemon(jtagContext: JTAGContext): Promise = new Map(); - private maxFileSize: number; - private enableCache: boolean; - private cacheTTL: number; - - constructor( - pathValidator: PathValidator, - maxFileSize: number = 10 * 1024 * 1024, // 10MB default - enableCache: boolean = true, - cacheTTL: number = 60000 // 1 minute default - ) { - this.pathValidator = pathValidator; - this.maxFileSize = maxFileSize; - this.enableCache = enableCache; - this.cacheTTL = cacheTTL; - } - - /** - * Read a file with optional line range - */ - async read(filePath: string, options: CodeReadOptions = {}): Promise { - // Validate path - const validation = this.pathValidator.validate(filePath); - if (!validation.valid || !validation.absolutePath) { - return { - success: false, - metadata: { - path: filePath, - size: 0, - lines: 0, - linesReturned: 0, - modified: '' - }, - error: validation.error - }; - } - - const absolutePath = validation.absolutePath; - - try { - // Check cache if enabled and not force refresh - if (this.enableCache && !options.forceRefresh) { - const cached = this.getCachedFile(absolutePath); - if (cached) { - return this.extractLines(cached.content, cached.metadata, options, true); - } - } - - // Check file size - const stats = fs.statSync(absolutePath); - if (stats.size > this.maxFileSize) { - return { - success: false, - metadata: { - path: absolutePath, - size: stats.size, - lines: 0, - linesReturned: 0, - modified: stats.mtime.toISOString() - }, - error: `File too large: ${stats.size} bytes (max: ${this.maxFileSize})` - }; - } - - // Read file - const content = fs.readFileSync(absolutePath, 'utf-8'); - const lines = content.split('\n'); - - const metadata: CodeReadResult['metadata'] = { - path: absolutePath, - size: stats.size, - lines: lines.length, - linesReturned: lines.length, - modified: stats.mtime.toISOString() - }; - - // Cache if enabled - if (this.enableCache) { - this.cacheFile(absolutePath, content, metadata); - } - - return this.extractLines(content, metadata, options, false); - } catch (error) { - return { - success: false, - metadata: { - path: absolutePath, - size: 0, - lines: 0, - linesReturned: 0, - modified: '' - }, - error: `Failed to read file: ${error instanceof Error ? error.message : String(error)}` - }; - } - } - - /** - * Extract specific line range from content - */ - private extractLines( - content: string, - metadata: CodeReadResult['metadata'], - options: CodeReadOptions, - cached: boolean - ): CodeReadResult { - const lines = content.split('\n'); - - // If no line range specified, return full content - if (options.startLine === undefined && options.endLine === undefined) { - return { - success: true, - content, - metadata, - cached - }; - } - - // Extract line range (1-indexed) - const startLine = Math.max(1, options.startLine || 1); - const endLine = Math.min(lines.length, options.endLine || lines.length); - - if (startLine > endLine) { - return { - success: false, - metadata, - error: `Invalid line range: ${startLine}-${endLine}` - }; - } - - const selectedLines = lines.slice(startLine - 1, endLine); - const extractedContent = selectedLines.join('\n'); - - return { - success: true, - content: extractedContent, - metadata: { - ...metadata, - linesReturned: selectedLines.length - }, - cached - }; - } - - /** - * Get cached file if valid - */ - private getCachedFile(absolutePath: string): CacheEntry | null { - const cached = this.cache.get(absolutePath); - if (!cached) return null; - - // Check if cache expired - const now = Date.now(); - if (now - cached.timestamp > this.cacheTTL) { - this.cache.delete(absolutePath); - return null; - } - - return cached; - } - - /** - * Cache file content - */ - private cacheFile(absolutePath: string, content: string, metadata: CodeReadResult['metadata']): void { - this.cache.set(absolutePath, { - content, - metadata, - timestamp: Date.now() - }); - } - - /** - * Clear cache - */ - clearCache(): void { - this.cache.clear(); - } - - /** - * Get cache stats - */ - getCacheStats(): { entries: number; size: number } { - let totalSize = 0; - for (const entry of this.cache.values()) { - totalSize += entry.content.length; - } - return { - entries: this.cache.size, - size: totalSize - }; - } -} diff --git a/src/debug/jtag/daemons/code-daemon/server/modules/PathValidator.ts b/src/debug/jtag/daemons/code-daemon/server/modules/PathValidator.ts deleted file mode 100644 index 1d6fb49c7..000000000 --- a/src/debug/jtag/daemons/code-daemon/server/modules/PathValidator.ts +++ /dev/null @@ -1,115 +0,0 @@ -/** - * PathValidator - Security validation for file system operations - * - * Prevents directory traversal, validates paths within repository - */ - -import * as path from 'path'; -import * as fs from 'fs'; - -export interface PathValidationResult { - valid: boolean; - absolutePath?: string; - error?: string; -} - -export class PathValidator { - private repositoryRoot: string; - - constructor(repositoryRoot: string) { - this.repositoryRoot = path.resolve(repositoryRoot); - } - - /** - * Validate a file path is safe and within repository - */ - validate(filePath: string): PathValidationResult { - try { - // Resolve to absolute path - const absolutePath = path.resolve(this.repositoryRoot, filePath); - - // Check if path is within repository (prevent directory traversal) - if (!absolutePath.startsWith(this.repositoryRoot)) { - return { - valid: false, - error: `Path outside repository: ${filePath}` - }; - } - - // Check if path exists - if (!fs.existsSync(absolutePath)) { - return { - valid: false, - error: `Path does not exist: ${filePath}` - }; - } - - // Check if it's a file (not directory) - const stats = fs.statSync(absolutePath); - if (!stats.isFile()) { - return { - valid: false, - error: `Path is not a file: ${filePath}` - }; - } - - return { - valid: true, - absolutePath - }; - } catch (error) { - return { - valid: false, - error: `Path validation failed: ${error instanceof Error ? error.message : String(error)}` - }; - } - } - - /** - * Validate a directory path - */ - validateDirectory(dirPath: string): PathValidationResult { - try { - const absolutePath = path.resolve(this.repositoryRoot, dirPath); - - if (!absolutePath.startsWith(this.repositoryRoot)) { - return { - valid: false, - error: `Path outside repository: ${dirPath}` - }; - } - - if (!fs.existsSync(absolutePath)) { - return { - valid: false, - error: `Directory does not exist: ${dirPath}` - }; - } - - const stats = fs.statSync(absolutePath); - if (!stats.isDirectory()) { - return { - valid: false, - error: `Path is not a directory: ${dirPath}` - }; - } - - return { - valid: true, - absolutePath - }; - } catch (error) { - return { - valid: false, - error: `Directory validation failed: ${error instanceof Error ? error.message : String(error)}` - }; - } - } - - /** - * Get repository root - */ - getRepositoryRoot(): string { - return this.repositoryRoot; - } -} diff --git a/src/debug/jtag/daemons/code-daemon/shared/CodeDaemon.ts b/src/debug/jtag/daemons/code-daemon/shared/CodeDaemon.ts index d1781f2b4..b9f7da737 100644 --- a/src/debug/jtag/daemons/code-daemon/shared/CodeDaemon.ts +++ b/src/debug/jtag/daemons/code-daemon/shared/CodeDaemon.ts @@ -1,16 +1,11 @@ /** - * CodeDaemon - Static interface for code operations + * CodeDaemon - Static interface for workspace-scoped code operations * * Environment-agnostic interface. All implementation is in server/. + * All operations go through Rust IPC backend with per-persona isolation. */ import type { - CodeReadOptions, - CodeReadResult, - CodeSearchOptions, - CodeSearchResult, - GitLogOptions, - GitLogResult, WorkspaceEditMode, WorkspaceWriteResult, WorkspaceReadResult, @@ -22,60 +17,12 @@ import type { } from './CodeDaemonTypes'; /** - * CodeDaemon - Static API for code operations + * CodeDaemon - Static API for workspace-scoped code operations * * All methods throw error if not initialized or called from wrong environment. - * Implementation is in server/CodeDaemonImpl.ts + * Implementation is in server/CodeDaemonServer.ts */ export class CodeDaemon { - /** - * Read a file (STATIC METHOD - public API) - */ - static async readFile(path: string, options?: CodeReadOptions): Promise { - throw new Error('CodeDaemon.readFile() must be implemented by server'); - } - - /** - * Search code (STATIC METHOD - public API) - */ - static async searchCode(pattern: string, options?: CodeSearchOptions): Promise { - throw new Error('CodeDaemon.searchCode() must be implemented by server'); - } - - /** - * Get git log (STATIC METHOD - public API) - */ - static async getGitLog(options?: GitLogOptions): Promise { - throw new Error('CodeDaemon.getGitLog() must be implemented by server'); - } - - /** - * Clear file cache (STATIC METHOD) - */ - static clearCache(): void { - throw new Error('CodeDaemon.clearCache() must be implemented by server'); - } - - /** - * Get cache stats (STATIC METHOD) - */ - static getCacheStats(): { entries: number; size: number } { - throw new Error('CodeDaemon.getCacheStats() must be implemented by server'); - } - - /** - * Get repository root (STATIC METHOD) - */ - static getRepositoryRoot(): string { - throw new Error('CodeDaemon.getRepositoryRoot() must be implemented by server'); - } - - /** - * Check if initialized (STATIC METHOD) - */ - static isInitialized(): boolean { - return false; // Overridden by server implementation - } // ======================================================================== // Workspace-Scoped Operations (Rust IPC backed, per-persona isolation) diff --git a/src/debug/jtag/daemons/code-daemon/shared/CodeDaemonTypes.ts b/src/debug/jtag/daemons/code-daemon/shared/CodeDaemonTypes.ts index d5aae51db..460254003 100644 --- a/src/debug/jtag/daemons/code-daemon/shared/CodeDaemonTypes.ts +++ b/src/debug/jtag/daemons/code-daemon/shared/CodeDaemonTypes.ts @@ -1,231 +1,10 @@ /** * CodeDaemon Types - Shared type definitions * - * Following DataDaemon pattern with static methods and auto-context injection + * Workspace-scoped types re-exported from ts-rs generated (Rust is source of truth). + * Aliased with Workspace* prefix for domain clarity in CodeDaemon API. */ -import type { UUID } from '../../../system/core/types/CrossPlatformUUID'; - -/** - * Configuration for CodeDaemon initialization - */ -export interface CodeDaemonConfig { - /** Root directory of repository */ - repositoryRoot: string; - - /** Maximum file size to read (bytes) */ - maxFileSize?: number; - - /** Enable file content caching */ - enableCache?: boolean; - - /** Cache TTL in milliseconds */ - cacheTTL?: number; - - /** Rate limiting - max operations per minute */ - rateLimit?: number; - - /** Enable audit logging */ - enableAudit?: boolean; -} - -/** - * Context automatically injected into all CodeDaemon operations - */ -export interface CodeOperationContext { - /** Session ID of requesting user */ - sessionId: UUID; - - /** Timestamp of operation */ - timestamp: string; - - /** Source of operation (command name, daemon, etc) */ - source: string; - - /** Repository root for path validation */ - repositoryRoot: string; -} - -/** - * Options for reading files - */ -export interface CodeReadOptions { - /** Start line (1-indexed) */ - startLine?: number; - - /** End line (1-indexed) */ - endLine?: number; - - /** Include file metadata */ - includeMetadata?: boolean; - - /** Force bypass cache */ - forceRefresh?: boolean; -} - -/** - * Result of file read operation - */ -export interface CodeReadResult { - /** Operation success */ - success: boolean; - - /** File content (if success) */ - content?: string; - - /** File metadata */ - metadata: { - /** Absolute file path */ - path: string; - - /** File size in bytes */ - size: number; - - /** Total line count */ - lines: number; - - /** Lines returned (may differ if range specified) */ - linesReturned: number; - - /** Last modified timestamp */ - modified: string; - }; - - /** Was result served from cache */ - cached?: boolean; - - /** Error message (if !success) */ - error?: string; -} - -/** - * Options for searching code - */ -export interface CodeSearchOptions { - /** File pattern (glob) to search */ - filePattern?: string; - - /** Case-insensitive search */ - caseInsensitive?: boolean; - - /** Maximum results to return */ - maxResults?: number; - - /** Include context lines around match */ - contextLines?: number; -} - -/** - * Single search match - */ -export interface CodeSearchMatch { - /** File containing match */ - file: string; - - /** Line number (1-indexed) */ - line: number; - - /** Matched content */ - content: string; - - /** Context before match */ - contextBefore?: string[]; - - /** Context after match */ - contextAfter?: string[]; -} - -/** - * Result of code search operation - */ -export interface CodeSearchResult { - /** Operation success */ - success: boolean; - - /** Search pattern used */ - pattern: string; - - /** Matches found */ - matches: CodeSearchMatch[]; - - /** Total matches found */ - totalMatches: number; - - /** Total files searched */ - filesSearched: number; - - /** Error message (if !success) */ - error?: string; -} - -/** - * Git operations types - */ -export interface GitLogOptions { - /** Maximum commits to return */ - maxCount?: number; - - /** Only commits affecting this file */ - file?: string; - - /** Include patch diff */ - includeDiff?: boolean; -} - -export interface GitCommit { - /** Commit hash */ - hash: string; - - /** Author name */ - author: string; - - /** Author email */ - email: string; - - /** Commit timestamp */ - date: string; - - /** Commit message */ - message: string; - - /** Diff (if requested) */ - diff?: string; -} - -export interface GitLogResult { - success: boolean; - commits: GitCommit[]; - error?: string; -} - -/** - * Event payloads emitted by CodeDaemon - */ -export interface CodeFileReadEvent { - path: string; - size: number; - cached: boolean; - timestamp: number; -} - -export interface CodeSearchEvent { - pattern: string; - matchCount: number; - filesSearched: number; - timestamp: number; -} - -export interface CodeGitLogEvent { - file?: string; - commitCount: number; - timestamp: number; -} - -// ============================================================================ -// Workspace-Scoped Types β€” re-exported from ts-rs generated (Rust is source of truth) -// Aliased with Workspace* prefix for domain clarity in CodeDaemon API -// ============================================================================ - export type { EditMode as WorkspaceEditMode } from '../../../shared/generated/code/EditMode'; export type { WriteResult as WorkspaceWriteResult } from '../../../shared/generated/code/WriteResult'; export type { ReadResult as WorkspaceReadResult } from '../../../shared/generated/code/ReadResult'; diff --git a/src/debug/jtag/generated-command-schemas.json b/src/debug/jtag/generated-command-schemas.json index 9c26a7678..b09735376 100644 --- a/src/debug/jtag/generated-command-schemas.json +++ b/src/debug/jtag/generated-command-schemas.json @@ -1,5 +1,5 @@ { - "generated": "2026-02-01T21:12:59.323Z", + "generated": "2026-02-01T23:20:49.437Z", "version": "1.0.0", "commands": [ { @@ -3743,83 +3743,6 @@ } } }, - { - "name": "development/code/read", - "description": "code/read command types", - "params": { - "backend": { - "type": "string", - "required": true, - "description": "backend parameter" - }, - "path": { - "type": "string", - "required": true, - "description": "path parameter" - }, - "startLine": { - "type": "number", - "required": false, - "description": "startLine parameter" - }, - "endLine": { - "type": "number", - "required": false, - "description": "endLine parameter" - }, - "includeMetadata": { - "type": "boolean", - "required": false, - "description": "includeMetadata parameter" - }, - "forceRefresh": { - "type": "boolean", - "required": false, - "description": "forceRefresh parameter" - } - } - }, - { - "name": "development/code/pattern-search", - "description": "code/find command types - Find files by name pattern", - "params": { - "backend": { - "type": "string", - "required": true, - "description": "backend parameter" - }, - "pattern": { - "type": "string", - "required": true, - "description": "pattern parameter" - }, - "baseDir": { - "type": "string", - "required": false, - "description": "baseDir parameter" - }, - "caseInsensitive": { - "type": "boolean", - "required": false, - "description": "caseInsensitive parameter" - }, - "maxResults": { - "type": "number", - "required": false, - "description": "maxResults parameter" - }, - "includeHidden": { - "type": "boolean", - "required": false, - "description": "includeHidden parameter" - }, - "excludeDirs": { - "type": "array", - "required": false, - "description": "excludeDirs parameter" - } - } - }, { "name": "development/build", "description": "Development Build Command - Shared Types\n *\n * Zero-friction TypeScript build check. Returns success or structured errors.", @@ -5175,6 +5098,239 @@ } } }, + { + "name": "code/write", + "description": "Code Write Command - Shared Types\n *\n * Write or create a file in the persona's workspace. Creates a ChangeNode in the change graph for undo support. File extension must be in the allowlist.", + "params": { + "filePath": { + "type": "string", + "required": true, + "description": "filePath parameter" + }, + "content": { + "type": "string", + "required": true, + "description": "content parameter" + }, + "description": { + "type": "string", + "required": false, + "description": "description parameter" + } + } + }, + { + "name": "code/undo", + "description": "Code Undo Command - Shared Types\n *\n * Undo a specific change or the last N changes. Applies reverse diffs from the change graph to restore previous file state.", + "params": { + "changeId": { + "type": "string", + "required": false, + "description": "changeId parameter" + }, + "count": { + "type": "number", + "required": false, + "description": "count parameter" + } + } + }, + { + "name": "code/tree", + "description": "Code Tree Command - Shared Types\n *\n * Generate a directory tree for the workspace or a subdirectory. Shows file/directory structure with sizes. Skips common ignored directories (node_modules, .git, etc).", + "params": { + "path": { + "type": "string", + "required": false, + "description": "path parameter" + }, + "maxDepth": { + "type": "number", + "required": false, + "description": "maxDepth parameter" + }, + "includeHidden": { + "type": "boolean", + "required": false, + "description": "includeHidden parameter" + } + } + }, + { + "name": "code/search", + "description": "Code Search Command - Shared Types\n *\n * Search for a regex pattern across workspace files. Respects .gitignore, supports glob-based file filtering. Returns matching lines with context.", + "params": { + "pattern": { + "type": "string", + "required": true, + "description": "pattern parameter" + }, + "fileGlob": { + "type": "string", + "required": false, + "description": "fileGlob parameter" + }, + "maxResults": { + "type": "number", + "required": false, + "description": "maxResults parameter" + } + } + }, + { + "name": "code/read", + "description": "Code Read Command - Shared Types\n *\n * Read a file or line range from the persona's workspace. Returns content with line numbers and metadata. Supports partial reads via start/end line parameters.", + "params": { + "filePath": { + "type": "string", + "required": true, + "description": "filePath parameter" + }, + "startLine": { + "type": "number", + "required": false, + "description": "startLine parameter" + }, + "endLine": { + "type": "number", + "required": false, + "description": "endLine parameter" + } + } + }, + { + "name": "code/history", + "description": "Code History Command - Shared Types\n *\n * Get change history for a specific file or the entire workspace. Returns change graph nodes with diffs, timestamps, and descriptions.", + "params": { + "filePath": { + "type": "string", + "required": false, + "description": "filePath parameter" + }, + "limit": { + "type": "number", + "required": false, + "description": "limit parameter" + } + } + }, + { + "name": "code/edit", + "description": "Code Edit Command - Shared Types\n *\n * Edit a file using search-replace, line-range replacement, insert-at, or append. Creates a ChangeNode for undo. Safer than full file write for targeted modifications.", + "params": { + "filePath": { + "type": "string", + "required": true, + "description": "filePath parameter" + }, + "editType": { + "type": "string", + "required": true, + "description": "editType parameter" + }, + "search": { + "type": "string", + "required": false, + "description": "search parameter" + }, + "replace": { + "type": "string", + "required": false, + "description": "replace parameter" + }, + "replaceAll": { + "type": "boolean", + "required": false, + "description": "replaceAll parameter" + }, + "startLine": { + "type": "number", + "required": false, + "description": "startLine parameter" + }, + "endLine": { + "type": "number", + "required": false, + "description": "endLine parameter" + }, + "newContent": { + "type": "string", + "required": false, + "description": "newContent parameter" + }, + "line": { + "type": "number", + "required": false, + "description": "line parameter" + }, + "content": { + "type": "string", + "required": false, + "description": "content parameter" + }, + "description": { + "type": "string", + "required": false, + "description": "description parameter" + } + } + }, + { + "name": "code/diff", + "description": "Code Diff Command - Shared Types\n *\n * Preview an edit as a unified diff without applying it. Useful for reviewing changes before committing them. Uses the same edit modes as code/edit.", + "params": { + "filePath": { + "type": "string", + "required": true, + "description": "filePath parameter" + }, + "editType": { + "type": "string", + "required": true, + "description": "editType parameter" + }, + "search": { + "type": "string", + "required": false, + "description": "search parameter" + }, + "replace": { + "type": "string", + "required": false, + "description": "replace parameter" + }, + "replaceAll": { + "type": "boolean", + "required": false, + "description": "replaceAll parameter" + }, + "startLine": { + "type": "number", + "required": false, + "description": "startLine parameter" + }, + "endLine": { + "type": "number", + "required": false, + "description": "endLine parameter" + }, + "newContent": { + "type": "string", + "required": false, + "description": "newContent parameter" + }, + "line": { + "type": "number", + "required": false, + "description": "line parameter" + }, + "content": { + "type": "string", + "required": false, + "description": "content parameter" + } + } + }, { "name": "canvas/vision", "description": "Canvas Vision Command Types\n *\n * Enables AIs to \"see\" and interact with the drawing canvas:\n * - describe: Vision AI describes what's on the canvas\n * - transform: Use image generation to transform the sketch\n * - analyze: Structured analysis of the drawing", diff --git a/src/debug/jtag/package-lock.json b/src/debug/jtag/package-lock.json index af78b60d5..6e53e1b08 100644 --- a/src/debug/jtag/package-lock.json +++ b/src/debug/jtag/package-lock.json @@ -1,12 +1,12 @@ { "name": "@continuum/jtag", - "version": "1.0.7512", + "version": "1.0.7515", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@continuum/jtag", - "version": "1.0.7512", + "version": "1.0.7515", "hasInstallScript": true, "license": "MIT", "dependencies": { diff --git a/src/debug/jtag/package.json b/src/debug/jtag/package.json index 742980ebb..cae63925c 100644 --- a/src/debug/jtag/package.json +++ b/src/debug/jtag/package.json @@ -1,6 +1,6 @@ { "name": "@continuum/jtag", - "version": "1.0.7512", + "version": "1.0.7515", "description": "Global CLI debugging system for any Node.js project. Install once globally, use anywhere: npm install -g @continuum/jtag", "config": { "active_example": "widget-ui", diff --git a/src/debug/jtag/server/generated.ts b/src/debug/jtag/server/generated.ts index 152ea81f6..8d24e08f5 100644 --- a/src/debug/jtag/server/generated.ts +++ b/src/debug/jtag/server/generated.ts @@ -1,7 +1,7 @@ /** * Server Structure Registry - Auto-generated * - * Contains 18 daemons and 198 commands and 3 adapters. + * Contains 18 daemons and 204 commands and 3 adapters. * Generated by scripts/generate-structure.ts - DO NOT EDIT MANUALLY */ @@ -60,6 +60,14 @@ import { AIValidateResponseServerCommand } from './../commands/ai/validate-respo import { CanvasStrokeAddServerCommand } from './../commands/canvas/stroke/add/server/CanvasStrokeAddServerCommand'; import { CanvasStrokeListServerCommand } from './../commands/canvas/stroke/list/server/CanvasStrokeListServerCommand'; import { CanvasVisionServerCommand } from './../commands/canvas/vision/server/CanvasVisionServerCommand'; +import { CodeDiffServerCommand } from './../commands/code/diff/server/CodeDiffServerCommand'; +import { CodeEditServerCommand } from './../commands/code/edit/server/CodeEditServerCommand'; +import { CodeHistoryServerCommand } from './../commands/code/history/server/CodeHistoryServerCommand'; +import { CodeReadServerCommand } from './../commands/code/read/server/CodeReadServerCommand'; +import { CodeSearchServerCommand } from './../commands/code/search/server/CodeSearchServerCommand'; +import { CodeTreeServerCommand } from './../commands/code/tree/server/CodeTreeServerCommand'; +import { CodeUndoServerCommand } from './../commands/code/undo/server/CodeUndoServerCommand'; +import { CodeWriteServerCommand } from './../commands/code/write/server/CodeWriteServerCommand'; import { ActivityCreateServerCommand } from './../commands/collaboration/activity/create/server/ActivityCreateServerCommand'; import { ActivityGetServerCommand } from './../commands/collaboration/activity/get/server/ActivityGetServerCommand'; import { ActivityJoinServerCommand } from './../commands/collaboration/activity/join/server/ActivityJoinServerCommand'; @@ -107,8 +115,6 @@ import { DataUpdateServerCommand } from './../commands/data/update/server/DataUp import { VectorSearchServerCommand } from './../commands/data/vector-search/server/VectorSearchServerCommand'; import { BenchmarkVectorsServerCommand } from './../commands/development/benchmark-vectors/server/BenchmarkVectorsServerCommand'; import { DevelopmentBuildServerCommand } from './../commands/development/build/server/DevelopmentBuildServerCommand'; -import { CodeFindServerCommand } from './../commands/development/code/pattern-search/server/CodeFindServerCommand'; -import { CodeReadServerCommand } from './../commands/development/code/read/server/CodeReadServerCommand'; import { CompileTypescriptServerCommand } from './../commands/development/compile-typescript/server/CompileTypescriptServerCommand'; import { ArtifactsCheckServerCommand } from './../commands/development/debug/artifacts-check/server/ArtifactsCheckServerCommand'; import { ChatSendDebugServerCommand } from './../commands/development/debug/chat-send/server/ChatSendDebugServerCommand'; @@ -502,6 +508,46 @@ export const SERVER_COMMANDS: CommandEntry[] = [ className: 'CanvasVisionServerCommand', commandClass: CanvasVisionServerCommand }, +{ + name: 'code/diff', + className: 'CodeDiffServerCommand', + commandClass: CodeDiffServerCommand + }, +{ + name: 'code/edit', + className: 'CodeEditServerCommand', + commandClass: CodeEditServerCommand + }, +{ + name: 'code/history', + className: 'CodeHistoryServerCommand', + commandClass: CodeHistoryServerCommand + }, +{ + name: 'code/read', + className: 'CodeReadServerCommand', + commandClass: CodeReadServerCommand + }, +{ + name: 'code/search', + className: 'CodeSearchServerCommand', + commandClass: CodeSearchServerCommand + }, +{ + name: 'code/tree', + className: 'CodeTreeServerCommand', + commandClass: CodeTreeServerCommand + }, +{ + name: 'code/undo', + className: 'CodeUndoServerCommand', + commandClass: CodeUndoServerCommand + }, +{ + name: 'code/write', + className: 'CodeWriteServerCommand', + commandClass: CodeWriteServerCommand + }, { name: 'collaboration/activity/create', className: 'ActivityCreateServerCommand', @@ -737,16 +783,6 @@ export const SERVER_COMMANDS: CommandEntry[] = [ className: 'DevelopmentBuildServerCommand', commandClass: DevelopmentBuildServerCommand }, -{ - name: 'development/code/pattern-search', - className: 'CodeFindServerCommand', - commandClass: CodeFindServerCommand - }, -{ - name: 'development/code/read', - className: 'CodeReadServerCommand', - commandClass: CodeReadServerCommand - }, { name: 'development/compile-typescript', className: 'CompileTypescriptServerCommand', diff --git a/src/debug/jtag/shared/generated-command-constants.ts b/src/debug/jtag/shared/generated-command-constants.ts index 461e8f0c3..1d0f509c7 100644 --- a/src/debug/jtag/shared/generated-command-constants.ts +++ b/src/debug/jtag/shared/generated-command-constants.ts @@ -59,6 +59,14 @@ export const COMMANDS = { CANVAS_STROKE_ADD: 'canvas/stroke/add', CANVAS_STROKE_LIST: 'canvas/stroke/list', CANVAS_VISION: 'canvas/vision', + CODE_DIFF: 'code/diff', + CODE_EDIT: 'code/edit', + CODE_HISTORY: 'code/history', + CODE_READ: 'code/read', + CODE_SEARCH: 'code/search', + CODE_TREE: 'code/tree', + CODE_UNDO: 'code/undo', + CODE_WRITE: 'code/write', COLLABORATION_ACTIVITY_CREATE: 'collaboration/activity/create', COLLABORATION_ACTIVITY_GET: 'collaboration/activity/get', COLLABORATION_ACTIVITY_JOIN: 'collaboration/activity/join', @@ -106,8 +114,6 @@ export const COMMANDS = { DATA_VECTOR_SEARCH: 'data/vector-search', DEVELOPMENT_BENCHMARK_VECTORS: 'development/benchmark-vectors', DEVELOPMENT_BUILD: 'development/build', - DEVELOPMENT_CODE_PATTERN_SEARCH: 'development/code/pattern-search', - DEVELOPMENT_CODE_READ: 'development/code/read', DEVELOPMENT_COMPILE_TYPESCRIPT: 'development/compile-typescript', DEVELOPMENT_DEBUG_ARTIFACTS_CHECK: 'development/debug/artifacts-check', DEVELOPMENT_DEBUG_CHAT_SEND: 'development/debug/chat-send', diff --git a/src/debug/jtag/shared/version.ts b/src/debug/jtag/shared/version.ts index 5aa0e4490..f4b65cf4f 100644 --- a/src/debug/jtag/shared/version.ts +++ b/src/debug/jtag/shared/version.ts @@ -3,5 +3,5 @@ * DO NOT EDIT MANUALLY */ -export const VERSION = '1.0.7512'; +export const VERSION = '1.0.7515'; export const PACKAGE_NAME = '@continuum/jtag'; diff --git a/src/debug/jtag/system/user/server/modules/PersonaToolDefinitions.ts b/src/debug/jtag/system/user/server/modules/PersonaToolDefinitions.ts index 4072ef8aa..cec83acc4 100644 --- a/src/debug/jtag/system/user/server/modules/PersonaToolDefinitions.ts +++ b/src/debug/jtag/system/user/server/modules/PersonaToolDefinitions.ts @@ -463,7 +463,7 @@ You have ${tools.length} tools available. Here they ALL are, organized by catego // Show essential tools with full details const essentialTools = tools.filter(t => ['screenshot', 'help', 'collaboration/chat/send', 'collaboration/wall/write', - 'development/code/read', 'development/code/pattern-search'].includes(t.name) + 'code/read', 'code/search'].includes(t.name) ); output += `=== FREQUENTLY USED TOOLS (with parameters) ===\n`; From bd47b0d5a654b6171697f96943f0879c92b59576 Mon Sep 17 00:00:00 2001 From: Joel Date: Sun, 1 Feb 2026 17:47:28 -0600 Subject: [PATCH 03/14] =?UTF-8?q?Phase=203:=20Single-agent=20coding=20?= =?UTF-8?q?=E2=80=94=20model=20selector,=20plan=20formulator,=20orchestrat?= =?UTF-8?q?or?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CodingModelSelector routes coding tasks to frontier models with provider fallback. PlanFormulator decomposes tasks into executable step DAGs via LLM. CodeAgentOrchestrator executes plans with budget enforcement, retry logic, and dependency-ordered step execution. 51 unit tests. --- .../code/server/CodeAgentOrchestrator.ts | 404 ++++++++++++++++++ .../system/code/server/CodingModelSelector.ts | 174 ++++++++ .../jtag/system/code/server/PlanFormulator.ts | 295 +++++++++++++ .../jtag/system/code/shared/CodingTypes.ts | 224 ++++++++++ .../unit/code/CodeAgentOrchestrator.test.ts | 303 +++++++++++++ .../unit/code/CodingModelSelector.test.ts | 168 ++++++++ .../tests/unit/code/PlanFormulator.test.ts | 301 +++++++++++++ 7 files changed, 1869 insertions(+) create mode 100644 src/debug/jtag/system/code/server/CodeAgentOrchestrator.ts create mode 100644 src/debug/jtag/system/code/server/CodingModelSelector.ts create mode 100644 src/debug/jtag/system/code/server/PlanFormulator.ts create mode 100644 src/debug/jtag/system/code/shared/CodingTypes.ts create mode 100644 src/debug/jtag/tests/unit/code/CodeAgentOrchestrator.test.ts create mode 100644 src/debug/jtag/tests/unit/code/CodingModelSelector.test.ts create mode 100644 src/debug/jtag/tests/unit/code/PlanFormulator.test.ts diff --git a/src/debug/jtag/system/code/server/CodeAgentOrchestrator.ts b/src/debug/jtag/system/code/server/CodeAgentOrchestrator.ts new file mode 100644 index 000000000..4c398ab3c --- /dev/null +++ b/src/debug/jtag/system/code/server/CodeAgentOrchestrator.ts @@ -0,0 +1,404 @@ +/** + * CodeAgentOrchestrator - Executes CodingPlans step-by-step + * + * Takes a CodingPlan (DAG of steps) and executes each step via Commands.execute(), + * respecting dependency ordering. Independent steps could execute in parallel. + * + * Execution lifecycle: + * 1. Discover β€” code/tree + code/search to understand codebase + * 2. Read β€” code/read to gather context + * 3. Plan β€” PlanFormulator decomposes task (already done before orchestrator runs) + * 4. Execute β€” Run each step via code/* commands + * 5. Verify β€” After each write/edit, read back to confirm + * 6. Fix β€” If verification fails, retry (max 3 attempts per step) + * 7. Report β€” Summarize changes via code/history + * + * Budget enforcement: + * - Max duration (default 120s) + * - Max tool calls (default 15) + * - Stops gracefully when budget exceeded + */ + +import type { + CodingTask, + CodingPlan, + CodingStep, + CodingResult, + CodingResultStatus, + StepResult, + StepStatus, +} from '../shared/CodingTypes'; +import { PlanFormulator } from './PlanFormulator'; +import { CodingModelSelector } from './CodingModelSelector'; +import { Commands } from '../../core/shared/Commands'; +import { Logger } from '../../core/logging/Logger'; + +const log = Logger.create('CodeAgentOrchestrator', 'code'); + +/** Maximum retries per failed step */ +const MAX_RETRIES_PER_STEP = 3; + +/** Default budget limits */ +const DEFAULT_MAX_DURATION_MS = 120_000; +const DEFAULT_MAX_TOOL_CALLS = 15; + +/** + * Runtime budget tracker for execution limits. + */ +class ExecutionBudget { + private readonly startTime: number; + private readonly maxDurationMs: number; + private readonly maxToolCalls: number; + private _toolCallsUsed = 0; + + constructor(maxDurationMs: number, maxToolCalls: number) { + this.startTime = performance.now(); + this.maxDurationMs = maxDurationMs; + this.maxToolCalls = maxToolCalls; + } + + recordToolCall(): void { + this._toolCallsUsed++; + } + + get toolCallsUsed(): number { + return this._toolCallsUsed; + } + + get elapsedMs(): number { + return performance.now() - this.startTime; + } + + get exceeded(): boolean { + return this.elapsedMs >= this.maxDurationMs || this._toolCallsUsed >= this.maxToolCalls; + } + + get remainingToolCalls(): number { + return Math.max(0, this.maxToolCalls - this._toolCallsUsed); + } + + get reason(): string { + if (this.elapsedMs >= this.maxDurationMs) return 'time_exceeded'; + if (this._toolCallsUsed >= this.maxToolCalls) return 'tool_calls_exceeded'; + return 'ok'; + } +} + +export class CodeAgentOrchestrator { + private readonly modelSelector: CodingModelSelector; + private readonly planFormulator: PlanFormulator; + + constructor(modelSelector?: CodingModelSelector) { + this.modelSelector = modelSelector ?? new CodingModelSelector(); + this.planFormulator = new PlanFormulator(this.modelSelector); + } + + /** + * Execute a coding task end-to-end: + * 1. Optionally discover codebase context + * 2. Formulate a plan via LLM + * 3. Execute each step + * 4. Return results + */ + async execute(task: CodingTask): Promise { + const budget = new ExecutionBudget( + task.maxDurationMs ?? DEFAULT_MAX_DURATION_MS, + task.maxToolCalls ?? DEFAULT_MAX_TOOL_CALLS, + ); + + log.info(`Starting task: ${task.description.slice(0, 80)}... (budget: ${budget.remainingToolCalls} calls)`); + + const filesModified: string[] = []; + const filesCreated: string[] = []; + const changeIds: string[] = []; + const errors: string[] = []; + const stepResults: StepResult[] = []; + + try { + // Phase 1: Discovery (optional β€” gather codebase context for planning) + let codebaseContext: string | undefined; + if (!budget.exceeded) { + codebaseContext = await this.discoverContext(task, budget); + } + + // Phase 2: Plan formulation + if (budget.exceeded) { + return this.buildResult(task, 'budget_exceeded', 'Budget exceeded before planning', stepResults, filesModified, filesCreated, changeIds, errors, budget); + } + + const plan = await this.planFormulator.formulate(task, codebaseContext); + log.info(`Plan: "${plan.summary}" β€” ${plan.steps.length} steps`); + + // Phase 3: Execute plan steps in dependency order + const completedSteps = new Set(); + + for (const step of plan.steps) { + if (budget.exceeded) { + log.warn(`Budget exceeded at step ${step.stepNumber}, stopping`); + stepResults.push({ + stepNumber: step.stepNumber, + status: 'skipped', + durationMs: 0, + toolCall: step.toolCall, + error: `Budget exceeded (${budget.reason})`, + }); + continue; + } + + // Check dependencies are met + const depsOk = step.dependsOn.every(dep => completedSteps.has(dep)); + if (!depsOk) { + const missingDeps = step.dependsOn.filter(d => !completedSteps.has(d)); + log.warn(`Step ${step.stepNumber} skipped β€” dependencies not met: ${missingDeps.join(', ')}`); + stepResults.push({ + stepNumber: step.stepNumber, + status: 'skipped', + durationMs: 0, + toolCall: step.toolCall, + error: `Dependencies not met: steps ${missingDeps.join(', ')}`, + }); + continue; + } + + // Execute step with retry + const result = await this.executeStepWithRetry(step, task, budget); + stepResults.push(result); + + if (result.status === 'completed') { + completedSteps.add(step.stepNumber); + + // Track file changes + this.trackChanges(step, result, filesModified, filesCreated, changeIds); + } else { + errors.push(`Step ${step.stepNumber} (${step.action}): ${result.error ?? 'unknown error'}`); + } + } + + // Determine overall status + const allCompleted = stepResults.every(r => r.status === 'completed'); + const anyCompleted = stepResults.some(r => r.status === 'completed'); + const status: CodingResultStatus = allCompleted + ? 'completed' + : anyCompleted + ? 'partial' + : budget.exceeded + ? 'budget_exceeded' + : 'failed'; + + const summary = allCompleted + ? `Completed: ${plan.summary}` + : `${status}: ${stepResults.filter(r => r.status === 'completed').length}/${plan.steps.length} steps completed`; + + return this.buildResult(task, status, summary, stepResults, filesModified, filesCreated, changeIds, errors, budget); + + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + log.error(`Task failed: ${message}`); + errors.push(message); + return this.buildResult(task, 'failed', `Failed: ${message}`, stepResults, filesModified, filesCreated, changeIds, errors, budget); + } + } + + /** + * Discover codebase context for planning. + * Runs code/tree on the workspace root (or relevant paths). + */ + private async discoverContext(task: CodingTask, budget: ExecutionBudget): Promise { + try { + // Get workspace tree + const treeResult = await Commands.execute('code/tree', { + userId: task.personaId, + path: '', + maxDepth: 3, + }); + budget.recordToolCall(); + + if (!treeResult?.success) { + return undefined; + } + + let context = `## Workspace Tree\n${JSON.stringify(treeResult.root, null, 2).slice(0, 2000)}`; + + // If relevant files are specified, read their contents + if (task.relevantFiles && task.relevantFiles.length > 0 && !budget.exceeded) { + for (const file of task.relevantFiles.slice(0, 3)) { // Max 3 files for context + if (budget.exceeded) break; + + const readResult = await Commands.execute('code/read', { + userId: task.personaId, + filePath: file, + }); + budget.recordToolCall(); + + if (readResult?.success && readResult.content) { + // Truncate large files + const content = readResult.content.length > 3000 + ? readResult.content.slice(0, 3000) + '\n... (truncated)' + : readResult.content; + context += `\n\n## ${file}\n\`\`\`\n${content}\n\`\`\``; + } + } + } + + return context; + } catch (error) { + log.warn(`Discovery failed: ${error instanceof Error ? error.message : String(error)}`); + return undefined; + } + } + + /** + * Execute a single step with retry logic. + */ + private async executeStepWithRetry( + step: CodingStep, + task: CodingTask, + budget: ExecutionBudget, + ): Promise { + let lastError: string | undefined; + + for (let attempt = 0; attempt < MAX_RETRIES_PER_STEP; attempt++) { + if (budget.exceeded) { + return { + stepNumber: step.stepNumber, + status: 'failed', + durationMs: 0, + toolCall: step.toolCall, + error: `Budget exceeded before retry ${attempt + 1}`, + }; + } + + const result = await this.executeStep(step, task, budget); + + if (result.status === 'completed') { + return result; + } + + lastError = result.error; + if (attempt < MAX_RETRIES_PER_STEP - 1) { + log.warn(`Step ${step.stepNumber} failed (attempt ${attempt + 1}/${MAX_RETRIES_PER_STEP}): ${lastError}`); + } + } + + return { + stepNumber: step.stepNumber, + status: 'failed', + durationMs: 0, + toolCall: step.toolCall, + error: `Failed after ${MAX_RETRIES_PER_STEP} attempts: ${lastError}`, + }; + } + + /** + * Execute a single step via Commands.execute(). + */ + private async executeStep( + step: CodingStep, + task: CodingTask, + budget: ExecutionBudget, + ): Promise { + const startTime = performance.now(); + + try { + log.debug(`Step ${step.stepNumber}: ${step.action} β€” ${step.description}`); + + // Inject personaId (userId) into params for workspace scoping + const params = { + ...step.toolParams, + userId: task.personaId, + }; + + const result = await Commands.execute(step.toolCall, params); + budget.recordToolCall(); + + const durationMs = performance.now() - startTime; + const success = result?.success === true; + + if (!success) { + const error = result?.error?.message ?? result?.error ?? 'Command returned success=false'; + return { + stepNumber: step.stepNumber, + status: 'failed', + output: result, + error: typeof error === 'string' ? error : JSON.stringify(error), + durationMs, + toolCall: step.toolCall, + }; + } + + return { + stepNumber: step.stepNumber, + status: 'completed', + output: result, + durationMs, + toolCall: step.toolCall, + }; + } catch (error) { + const durationMs = performance.now() - startTime; + const message = error instanceof Error ? error.message : String(error); + return { + stepNumber: step.stepNumber, + status: 'failed', + error: message, + durationMs, + toolCall: step.toolCall, + }; + } + } + + /** + * Track file modifications and change IDs from step results. + */ + private trackChanges( + step: CodingStep, + result: StepResult, + filesModified: string[], + filesCreated: string[], + changeIds: string[], + ): void { + const output = result.output as Record | undefined; + + if (step.action === 'write' || step.action === 'edit') { + for (const file of step.targetFiles) { + if (step.action === 'write' && !filesModified.includes(file)) { + filesCreated.push(file); + } else if (!filesModified.includes(file)) { + filesModified.push(file); + } + } + + // Extract changeId from write/edit results + if (output?.changeId && typeof output.changeId === 'string') { + changeIds.push(output.changeId); + } + } + } + + /** + * Build the final CodingResult. + */ + private buildResult( + task: CodingTask, + status: CodingResultStatus, + summary: string, + stepResults: StepResult[], + filesModified: string[], + filesCreated: string[], + changeIds: string[], + errors: string[], + budget: ExecutionBudget, + ): CodingResult { + return { + taskId: task.id, + status, + summary, + stepResults, + filesModified, + filesCreated, + totalToolCalls: budget.toolCallsUsed, + totalDurationMs: budget.elapsedMs, + changeIds, + errors, + }; + } +} diff --git a/src/debug/jtag/system/code/server/CodingModelSelector.ts b/src/debug/jtag/system/code/server/CodingModelSelector.ts new file mode 100644 index 000000000..8b224917b --- /dev/null +++ b/src/debug/jtag/system/code/server/CodingModelSelector.ts @@ -0,0 +1,174 @@ +/** + * CodingModelSelector - Routes coding tasks to appropriate frontier models + * + * Coding requires frontier models (Claude, GPT, DeepSeek) β€” not local Ollama. + * This selector maps task types to model tiers: + * + * | Task Type | Model Tier | Why | + * |-------------|----------------|----------------------------------------| + * | Planning | Best reasoning | Architecture decisions need deep thought| + * | Generation | Best coding | Writing code needs strong coding models | + * | Editing | Best coding | Modifying code needs precision | + * | Review | Any frontier | Analysis is broadly capable | + * | Quick fix | Fast + cheap | Typos and simple fixes | + * | Discovery | Fast + cheap | Codebase exploration is simple | + * + * Provider fallback: anthropic β†’ openai β†’ deepseek β†’ groq + */ + +import type { CodingTaskType, CodingModelTier } from '../shared/CodingTypes'; +import { MODEL_IDS } from '../../shared/Constants'; +import { SOTA_PROVIDERS } from '../../user/server/config/PersonaModelConfigs'; +import { Logger } from '../../core/logging/Logger'; + +const log = Logger.create('CodingModelSelector', 'code'); + +/** + * Default model tiers for each task type. + * Ordered by preference β€” first available provider wins. + */ +const DEFAULT_TIERS: Record = { + planning: { + taskType: 'planning', + provider: 'anthropic', + model: MODEL_IDS.ANTHROPIC.SONNET_4_5, + temperature: 0.3, + maxTokens: 4000, + description: 'Planning/architecture β€” best reasoning model', + }, + generation: { + taskType: 'generation', + provider: 'anthropic', + model: MODEL_IDS.ANTHROPIC.SONNET_4_5, + temperature: 0.4, + maxTokens: 4000, + description: 'Code generation β€” strong coding model', + }, + editing: { + taskType: 'editing', + provider: 'anthropic', + model: MODEL_IDS.ANTHROPIC.SONNET_4_5, + temperature: 0.2, + maxTokens: 4000, + description: 'Code editing β€” precise, low temperature', + }, + review: { + taskType: 'review', + provider: 'deepseek', + model: MODEL_IDS.DEEPSEEK.CHAT, + temperature: 0.3, + maxTokens: 3000, + description: 'Code review β€” any frontier model works', + }, + 'quick-fix': { + taskType: 'quick-fix', + provider: 'groq', + model: MODEL_IDS.GROQ.LLAMA_3_1_70B, + temperature: 0.2, + maxTokens: 2000, + description: 'Quick fixes β€” fast and cheap', + }, + discovery: { + taskType: 'discovery', + provider: 'groq', + model: MODEL_IDS.GROQ.LLAMA_3_1_8B, + temperature: 0.1, + maxTokens: 1000, + description: 'Discovery β€” codebase exploration, fast', + }, +}; + +/** + * Provider fallback order when preferred provider is unavailable. + * Prioritizes SOTA providers with strong coding capabilities. + */ +const PROVIDER_FALLBACK_ORDER: readonly string[] = [ + 'anthropic', + 'openai', + 'deepseek', + 'xai', + 'google', + 'groq', + 'together', + 'fireworks', +] as const; + +/** + * Fallback models per provider (when the preferred model isn't available). + */ +const FALLBACK_MODELS: Record = { + 'anthropic': MODEL_IDS.ANTHROPIC.SONNET_4_5, + 'openai': MODEL_IDS.OPENAI.GPT_4, + 'deepseek': MODEL_IDS.DEEPSEEK.CHAT, + 'groq': MODEL_IDS.GROQ.LLAMA_3_1_70B, + 'xai': MODEL_IDS.XAI.GROK_4, + 'google': 'gemini-2.0-flash', + 'together': MODEL_IDS.TOGETHER.LLAMA_3_1_70B, + 'fireworks': MODEL_IDS.FIREWORKS.LLAMA_3_1_70B, +}; + +export class CodingModelSelector { + private _availableProviders: Set; + + /** + * @param availableProviders - Set of provider names that are currently registered and healthy. + * Pass SOTA_PROVIDERS for production, or a subset for testing. + */ + constructor(availableProviders?: Set) { + this._availableProviders = availableProviders ?? new Set(SOTA_PROVIDERS); + } + + /** + * Update the set of available providers (e.g., after health check). + */ + set availableProviders(providers: Set) { + this._availableProviders = providers; + } + + /** + * Select the best model tier for a given task type. + * Falls through provider fallback order if preferred provider is unavailable. + */ + select(taskType: CodingTaskType): CodingModelTier { + const defaultTier = DEFAULT_TIERS[taskType]; + + // Try the default provider first + if (this._availableProviders.has(defaultTier.provider)) { + log.debug(`Selected ${defaultTier.provider}/${defaultTier.model} for ${taskType}`); + return defaultTier; + } + + // Fallback through provider order + for (const provider of PROVIDER_FALLBACK_ORDER) { + if (this._availableProviders.has(provider)) { + const model = FALLBACK_MODELS[provider]; + const fallbackTier: CodingModelTier = { + ...defaultTier, + provider, + model, + description: `${defaultTier.description} (fallback: ${provider})`, + }; + log.debug(`Fallback: ${provider}/${model} for ${taskType} (preferred ${defaultTier.provider} unavailable)`); + return fallbackTier; + } + } + + // Last resort β€” return default tier anyway, let AIProviderDaemon handle the error + log.warn(`No SOTA provider available for ${taskType}, using default tier (may fail)`); + return defaultTier; + } + + /** + * Check if any frontier model is available for coding tasks. + */ + get hasFrontierModel(): boolean { + return PROVIDER_FALLBACK_ORDER.some(p => this._availableProviders.has(p)); + } + + /** + * Get all configured tiers (for debugging/reporting). + */ + get allTiers(): readonly CodingModelTier[] { + return Object.values(DEFAULT_TIERS); + } +} diff --git a/src/debug/jtag/system/code/server/PlanFormulator.ts b/src/debug/jtag/system/code/server/PlanFormulator.ts new file mode 100644 index 000000000..95d0dd46d --- /dev/null +++ b/src/debug/jtag/system/code/server/PlanFormulator.ts @@ -0,0 +1,295 @@ +/** + * PlanFormulator - LLM-powered task decomposition for coding tasks + * + * Takes a CodingTask + codebase context and produces a CodingPlan (DAG of steps). + * Uses a reasoning-class model (via CodingModelSelector) to decompose the task + * into concrete code/* command invocations. + * + * The LLM receives: + * - Task description + * - Available code/* tools with parameter schemas + * - Codebase context (tree, relevant file contents) + * - Constraints (max tool calls, max duration) + * + * The LLM returns a JSON CodingPlan that the CodeAgentOrchestrator executes. + */ + +import type { CodingTask, CodingPlan, CodingStep, CodingAction } from '../shared/CodingTypes'; +import { CodingModelSelector } from './CodingModelSelector'; +import { AIProviderDaemon } from '../../../daemons/ai-provider-daemon/shared/AIProviderDaemon'; +import type { TextGenerationRequest, ChatMessage } from '../../../daemons/ai-provider-daemon/shared/AIProviderTypesV2'; +import { Logger } from '../../core/logging/Logger'; + +const log = Logger.create('PlanFormulator', 'code'); + +/** + * Available code/* tools for the LLM to plan with. + * Each entry describes what the tool does and its parameters. + */ +const CODE_TOOL_SCHEMAS: readonly { name: string; description: string; params: string }[] = [ + { + name: 'code/tree', + description: 'List directory tree structure. Shows files and directories with sizes.', + params: 'path?: string, maxDepth?: number, includeHidden?: boolean', + }, + { + name: 'code/search', + description: 'Search for a regex pattern across workspace files.', + params: 'pattern: string, fileGlob?: string, maxResults?: number', + }, + { + name: 'code/read', + description: 'Read file contents. Can specify line range.', + params: 'filePath: string, startLine?: number, endLine?: number', + }, + { + name: 'code/write', + description: 'Create or overwrite a file. Records a ChangeNode for undo.', + params: 'filePath: string, content: string, description?: string', + }, + { + name: 'code/edit', + description: 'Edit a file using search-replace, line-range, insert-at, or append. Records a ChangeNode.', + params: 'filePath: string, editMode: { type: "search_replace", search: string, replace: string, replaceAll?: boolean } | { type: "line_range", startLine: number, endLine: number, newContent: string } | { type: "insert_at", line: number, content: string } | { type: "append", content: string }, description?: string', + }, + { + name: 'code/diff', + description: 'Preview an edit as unified diff without applying it.', + params: 'filePath: string, editMode: (same as code/edit)', + }, + { + name: 'code/undo', + description: 'Undo a specific change or the last N changes.', + params: 'changeId?: string, count?: number', + }, + { + name: 'code/history', + description: 'View change history for a file or workspace.', + params: 'filePath?: string, limit?: number', + }, +] as const; + +/** Valid actions the LLM can use in plan steps */ +const VALID_ACTIONS: ReadonlySet = new Set([ + 'discover', 'search', 'read', 'write', 'edit', 'diff', 'undo', 'verify', 'report', +]); + +/** Map from action to the expected code/* command */ +const ACTION_TO_COMMAND: Record = { + discover: 'code/tree', + search: 'code/search', + read: 'code/read', + write: 'code/write', + edit: 'code/edit', + diff: 'code/diff', + undo: 'code/undo', + verify: 'code/read', // Verify by reading back + report: 'code/history', +}; + +export class PlanFormulator { + private readonly modelSelector: CodingModelSelector; + + constructor(modelSelector: CodingModelSelector) { + this.modelSelector = modelSelector; + } + + /** + * Generate a CodingPlan for a task. + * + * @param task - The coding task to plan + * @param codebaseContext - Optional pre-fetched context (tree output, file contents) + * @returns A validated CodingPlan ready for execution + */ + async formulate(task: CodingTask, codebaseContext?: string): Promise { + const startTime = performance.now(); + log.info(`Formulating plan for task: ${task.description.slice(0, 80)}...`); + + const tier = this.modelSelector.select('planning'); + const messages = this.buildPlanningPrompt(task, codebaseContext); + + const request: TextGenerationRequest = { + messages, + model: tier.model, + temperature: tier.temperature, + maxTokens: tier.maxTokens, + preferredProvider: tier.provider, + purpose: 'coding-plan', + userId: task.personaId, + }; + + const response = await AIProviderDaemon.generateText(request); + + if (!response.text) { + throw new Error('PlanFormulator: LLM returned empty response'); + } + + const plan = this.parsePlanResponse(response.text, task, tier.provider, tier.model); + const durationMs = performance.now() - startTime; + + log.info(`Plan generated: ${plan.steps.length} steps, ${plan.estimatedToolCalls} tool calls (${Math.round(durationMs)}ms)`); + return plan; + } + + /** + * Build the prompt messages for plan generation. + */ + private buildPlanningPrompt(task: CodingTask, codebaseContext?: string): ChatMessage[] { + const toolDocs = CODE_TOOL_SCHEMAS + .map(t => `- **${t.name}**: ${t.description}\n Params: ${t.params}`) + .join('\n'); + + const maxToolCalls = task.maxToolCalls ?? 15; + const maxDurationSec = Math.round((task.maxDurationMs ?? 120000) / 1000); + + const systemPrompt = `You are a coding agent planner. Your job is to decompose a coding task into a concrete plan of steps. + +## Available Tools +${toolDocs} + +## Constraints +- Maximum ${maxToolCalls} tool calls total +- Maximum ${maxDurationSec} seconds execution time +- Always read files before editing them +- Always verify changes after editing (read back or diff) +- Prefer code/edit over code/write for existing files +- Use code/tree and code/search for discovery before making changes + +## Output Format +Respond with ONLY a JSON object (no markdown, no explanation): +{ + "summary": "Brief description of the approach", + "steps": [ + { + "stepNumber": 1, + "action": "discover|search|read|write|edit|diff|undo|verify|report", + "description": "What this step does", + "targetFiles": ["path/to/file.ts"], + "toolCall": "code/tree", + "toolParams": { "path": "src/" }, + "dependsOn": [], + "verification": "How to verify success" + } + ] +} + +## Rules +1. Steps are numbered starting from 1 +2. dependsOn lists step numbers that must complete first (DAG) +3. Independent steps CAN have the same dependsOn (parallel execution) +4. Every write/edit MUST have a preceding read of the same file +5. action must be one of: discover, search, read, write, edit, diff, undo, verify, report +6. toolCall must match a code/* command from the tools list +7. toolParams must match the command's parameter schema +8. Keep plans minimal β€” don't add unnecessary steps`; + + const messages: ChatMessage[] = [ + { role: 'system', content: systemPrompt }, + ]; + + if (codebaseContext) { + messages.push({ + role: 'user', + content: `## Codebase Context\n${codebaseContext}`, + }); + } + + if (task.relevantFiles && task.relevantFiles.length > 0) { + messages.push({ + role: 'user', + content: `## Relevant Files (hints)\n${task.relevantFiles.join('\n')}`, + }); + } + + messages.push({ + role: 'user', + content: `## Task\n${task.description}\n\nGenerate the execution plan as JSON.`, + }); + + return messages; + } + + /** + * Parse and validate the LLM's plan response. + */ + private parsePlanResponse( + responseText: string, + task: CodingTask, + provider: string, + model: string, + ): CodingPlan { + // Extract JSON from response (LLM may wrap in markdown code blocks) + const jsonMatch = responseText.match(/\{[\s\S]*\}/); + if (!jsonMatch) { + throw new Error('PlanFormulator: No JSON object found in LLM response'); + } + + let raw: unknown; + try { + raw = JSON.parse(jsonMatch[0]); + } catch (e) { + throw new Error(`PlanFormulator: Invalid JSON in LLM response: ${(e as Error).message}`); + } + + const parsed = raw as { summary?: string; steps?: unknown[] }; + + if (!parsed.summary || typeof parsed.summary !== 'string') { + throw new Error('PlanFormulator: Plan missing "summary" field'); + } + + if (!Array.isArray(parsed.steps) || parsed.steps.length === 0) { + throw new Error('PlanFormulator: Plan has no steps'); + } + + const maxToolCalls = task.maxToolCalls ?? 15; + if (parsed.steps.length > maxToolCalls) { + throw new Error(`PlanFormulator: Plan has ${parsed.steps.length} steps, exceeds max ${maxToolCalls}`); + } + + // Validate each step + const steps: CodingStep[] = parsed.steps.map((rawStep, index) => { + const step = rawStep as Record; + const stepNum = (step.stepNumber as number) ?? (index + 1); + + // Validate action + const action = step.action as string; + if (!VALID_ACTIONS.has(action)) { + throw new Error(`PlanFormulator: Step ${stepNum} has invalid action "${action}"`); + } + + // Validate toolCall + const toolCall = (step.toolCall as string) ?? ACTION_TO_COMMAND[action as CodingAction]; + if (!toolCall.startsWith('code/')) { + throw new Error(`PlanFormulator: Step ${stepNum} toolCall "${toolCall}" is not a code/* command`); + } + + // Validate dependsOn references + const dependsOn = (step.dependsOn as number[]) ?? []; + for (const dep of dependsOn) { + if (dep < 1 || dep >= stepNum) { + throw new Error(`PlanFormulator: Step ${stepNum} depends on invalid step ${dep}`); + } + } + + return { + stepNumber: stepNum, + action: action as CodingAction, + description: (step.description as string) ?? `Step ${stepNum}`, + targetFiles: (step.targetFiles as string[]) ?? [], + toolCall, + toolParams: (step.toolParams as Record) ?? {}, + dependsOn, + verification: (step.verification as string) ?? '', + }; + }); + + return { + taskId: task.id, + steps, + summary: parsed.summary, + estimatedToolCalls: steps.length, + generatedBy: { provider, model }, + generatedAt: Date.now(), + }; + } +} diff --git a/src/debug/jtag/system/code/shared/CodingTypes.ts b/src/debug/jtag/system/code/shared/CodingTypes.ts new file mode 100644 index 000000000..fa3775e45 --- /dev/null +++ b/src/debug/jtag/system/code/shared/CodingTypes.ts @@ -0,0 +1,224 @@ +/** + * Coding Agent Types - Shared type definitions for the coding agent system + * + * Defines the data structures for: + * - CodingTask: What the agent needs to accomplish + * - CodingPlan: DAG of steps to accomplish the task + * - CodingStep: Individual operation in the plan + * - CodingResult: Outcome of executing a plan + * - CodingModelTier: Model selection by task complexity + */ + +import type { UUID } from '../../core/types/CrossPlatformUUID'; + +// ============================================================================ +// Model Selection +// ============================================================================ + +/** + * Task types that determine which model tier to use. + * Higher-capability models for planning, cheaper models for quick fixes. + */ +export type CodingTaskType = + | 'planning' // Architecture, task decomposition β€” needs best reasoning + | 'generation' // Writing new code β€” needs strong coding ability + | 'editing' // Modifying existing code β€” needs strong coding ability + | 'review' // Code review, analysis β€” any frontier model + | 'quick-fix' // Small fixes, typos β€” fast and cheap + | 'discovery'; // Exploring codebase structure β€” fast and cheap + +/** + * Model tier configuration for a specific task type. + * CodingModelSelector maps CodingTaskType β†’ CodingModelTier. + */ +export interface CodingModelTier { + readonly taskType: CodingTaskType; + readonly provider: string; + readonly model: string; + readonly temperature: number; + readonly maxTokens: number; + readonly description: string; +} + +// ============================================================================ +// Coding Task +// ============================================================================ + +/** + * A coding task is the input to the coding agent system. + * It describes what needs to be done, who's doing it, and constraints. + */ +export interface CodingTask { + /** Unique task ID */ + readonly id: UUID; + + /** Persona executing this task */ + readonly personaId: UUID; + + /** Human-readable task description */ + readonly description: string; + + /** Task type for model selection */ + readonly taskType: CodingTaskType; + + /** Room/context this task originated from */ + readonly contextId?: UUID; + + /** Files already known to be relevant (hints for discovery) */ + readonly relevantFiles?: string[]; + + /** Maximum execution time in milliseconds (default: 120000) */ + readonly maxDurationMs?: number; + + /** Maximum number of tool calls allowed (default: 15) */ + readonly maxToolCalls?: number; + + /** When the task was created */ + readonly createdAt: number; +} + +// ============================================================================ +// Coding Plan (DAG of Steps) +// ============================================================================ + +/** + * Actions a coding step can perform. + * Each maps to a code/* command or meta-operation. + */ +export type CodingAction = + | 'discover' // code/tree β€” explore structure + | 'search' // code/search β€” find patterns + | 'read' // code/read β€” read file contents + | 'write' // code/write β€” create/overwrite file + | 'edit' // code/edit β€” partial edit + | 'diff' // code/diff β€” preview changes + | 'undo' // code/undo β€” revert changes + | 'verify' // Meta: check results (build, test, read-back) + | 'report'; // Meta: summarize what was done + +/** + * A single step in a CodingPlan. + * Steps form a DAG via dependsOn β€” independent steps can execute in parallel. + */ +export interface CodingStep { + /** Step number (1-indexed, unique within plan) */ + readonly stepNumber: number; + + /** What this step does */ + readonly action: CodingAction; + + /** Human-readable description of what this step accomplishes */ + readonly description: string; + + /** Files this step will operate on */ + readonly targetFiles: string[]; + + /** Which code/* command to execute (e.g., 'code/read', 'code/edit') */ + readonly toolCall: string; + + /** Parameters for the tool call */ + readonly toolParams: Record; + + /** Steps that must complete before this one (DAG edges) */ + readonly dependsOn: number[]; + + /** How to verify this step succeeded */ + readonly verification: string; +} + +/** + * A coding plan is a DAG of CodingSteps produced by the PlanFormulator. + * The orchestrator executes steps respecting dependency ordering. + */ +export interface CodingPlan { + /** The task this plan addresses */ + readonly taskId: UUID; + + /** Ordered steps (topologically sorted) */ + readonly steps: CodingStep[]; + + /** High-level summary of the approach */ + readonly summary: string; + + /** Estimated total tool calls */ + readonly estimatedToolCalls: number; + + /** Which model generated this plan */ + readonly generatedBy: { + readonly provider: string; + readonly model: string; + }; + + /** When the plan was generated */ + readonly generatedAt: number; +} + +// ============================================================================ +// Step Execution Result +// ============================================================================ + +export type StepStatus = 'pending' | 'running' | 'completed' | 'failed' | 'skipped'; + +/** + * Result of executing a single CodingStep. + */ +export interface StepResult { + /** Which step */ + readonly stepNumber: number; + + /** Execution status */ + readonly status: StepStatus; + + /** Command output (if any) */ + readonly output?: unknown; + + /** Error message (if failed) */ + readonly error?: string; + + /** Execution time in milliseconds */ + readonly durationMs: number; + + /** Tool call used */ + readonly toolCall: string; +} + +// ============================================================================ +// Coding Result (Final Output) +// ============================================================================ + +export type CodingResultStatus = 'completed' | 'partial' | 'failed' | 'budget_exceeded'; + +/** + * Final result of executing a coding task. + */ +export interface CodingResult { + /** The task that was executed */ + readonly taskId: UUID; + + /** Overall status */ + readonly status: CodingResultStatus; + + /** Summary of what was accomplished */ + readonly summary: string; + + /** Results for each step */ + readonly stepResults: StepResult[]; + + /** Files that were modified */ + readonly filesModified: string[]; + + /** Files that were created */ + readonly filesCreated: string[]; + + /** Total tool calls used */ + readonly totalToolCalls: number; + + /** Total execution time in milliseconds */ + readonly totalDurationMs: number; + + /** Change IDs from code/write and code/edit for potential undo */ + readonly changeIds: string[]; + + /** Errors encountered */ + readonly errors: string[]; +} diff --git a/src/debug/jtag/tests/unit/code/CodeAgentOrchestrator.test.ts b/src/debug/jtag/tests/unit/code/CodeAgentOrchestrator.test.ts new file mode 100644 index 000000000..85256a972 --- /dev/null +++ b/src/debug/jtag/tests/unit/code/CodeAgentOrchestrator.test.ts @@ -0,0 +1,303 @@ +/** + * CodeAgentOrchestrator Unit Tests + * + * Tests the execution engine by mocking PlanFormulator and Commands.execute. + * Validates: + * - Step execution in dependency order + * - Budget enforcement (time and tool calls) + * - Retry logic on step failure + * - Result aggregation (filesModified, changeIds, errors) + * - Graceful degradation on partial completion + */ + +import { describe, it, expect, beforeEach, vi } from 'vitest'; +import { CodeAgentOrchestrator } from '../../../system/code/server/CodeAgentOrchestrator'; +import type { CodingTask } from '../../../system/code/shared/CodingTypes'; +import type { UUID } from '../../../system/core/types/CrossPlatformUUID'; + +// Mock AIProviderDaemon (used by PlanFormulator) +const mockGenerateText = vi.fn(); +vi.mock('../../../daemons/ai-provider-daemon/shared/AIProviderDaemon', () => ({ + AIProviderDaemon: { + generateText: (...args: unknown[]) => mockGenerateText(...args), + }, +})); + +// Mock Commands.execute (used by orchestrator for code/* calls) +const mockExecute = vi.fn(); +vi.mock('../../../system/core/shared/Commands', () => ({ + Commands: { + execute: (...args: unknown[]) => mockExecute(...args), + }, +})); + +// Mock Logger +vi.mock('../../../system/core/logging/Logger', () => ({ + Logger: { + create: () => ({ + debug: () => {}, + info: () => {}, + warn: () => {}, + error: () => {}, + }), + }, +})); + +function makeTask(overrides?: Partial): CodingTask { + return { + id: 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' as UUID, + personaId: '11111111-2222-3333-4444-555555555555' as UUID, + description: 'Add a greet function to utils.ts', + taskType: 'generation', + maxToolCalls: 20, + maxDurationMs: 120000, + createdAt: Date.now(), + ...overrides, + }; +} + +/** Mock PlanFormulator returning a simple 3-step plan */ +function mockSimplePlan() { + mockGenerateText.mockResolvedValue({ + text: JSON.stringify({ + summary: 'Read, edit, verify', + steps: [ + { + stepNumber: 1, + action: 'read', + description: 'Read utils.ts', + targetFiles: ['utils.ts'], + toolCall: 'code/read', + toolParams: { filePath: 'utils.ts' }, + dependsOn: [], + verification: 'File read', + }, + { + stepNumber: 2, + action: 'edit', + description: 'Add greet function', + targetFiles: ['utils.ts'], + toolCall: 'code/edit', + toolParams: { filePath: 'utils.ts', editMode: { type: 'append', content: 'function greet() {}' } }, + dependsOn: [1], + verification: 'Edit applied', + }, + { + stepNumber: 3, + action: 'verify', + description: 'Verify changes', + targetFiles: ['utils.ts'], + toolCall: 'code/read', + toolParams: { filePath: 'utils.ts' }, + dependsOn: [2], + verification: 'greet function present', + }, + ], + }), + }); +} + +describe('CodeAgentOrchestrator', () => { + let orchestrator: CodeAgentOrchestrator; + + beforeEach(() => { + mockGenerateText.mockReset(); + mockExecute.mockReset(); + orchestrator = new CodeAgentOrchestrator(); + }); + + describe('execute - happy path', () => { + it('executes all plan steps and returns completed', async () => { + mockSimplePlan(); + + // Discovery (code/tree) + 3 plan steps + mockExecute + .mockResolvedValueOnce({ success: true, root: {} }) // code/tree (discovery) + .mockResolvedValueOnce({ success: true, content: 'old' }) // step 1: code/read + .mockResolvedValueOnce({ success: true, changeId: 'c1' }) // step 2: code/edit + .mockResolvedValueOnce({ success: true, content: 'new' }); // step 3: code/read (verify) + + const result = await orchestrator.execute(makeTask()); + + expect(result.status).toBe('completed'); + expect(result.stepResults).toHaveLength(3); + expect(result.stepResults.every(r => r.status === 'completed')).toBe(true); + expect(result.totalToolCalls).toBeGreaterThanOrEqual(4); // 1 discovery + 3 steps + }); + + it('tracks modified files from edit steps', async () => { + mockSimplePlan(); + mockExecute + .mockResolvedValueOnce({ success: true, root: {} }) + .mockResolvedValueOnce({ success: true, content: 'old' }) + .mockResolvedValueOnce({ success: true, changeId: 'change-123' }) + .mockResolvedValueOnce({ success: true, content: 'new' }); + + const result = await orchestrator.execute(makeTask()); + + expect(result.filesModified).toContain('utils.ts'); + expect(result.changeIds).toContain('change-123'); + }); + + it('includes execution timing', async () => { + mockSimplePlan(); + mockExecute.mockResolvedValue({ success: true }); + + const result = await orchestrator.execute(makeTask()); + + expect(result.totalDurationMs).toBeGreaterThan(0); + for (const step of result.stepResults) { + expect(step.durationMs).toBeGreaterThanOrEqual(0); + } + }); + }); + + describe('budget enforcement', () => { + it('stops when max tool calls exceeded', async () => { + mockSimplePlan(); + + // Task with only 2 tool calls allowed (discovery uses 1, only 1 left for plan) + mockExecute.mockResolvedValue({ success: true }); + + const result = await orchestrator.execute(makeTask({ maxToolCalls: 3 })); + + // Should have stopped partway through + expect(result.totalToolCalls).toBeLessThanOrEqual(3); + const skipped = result.stepResults.filter(r => r.status === 'skipped'); + expect(skipped.length).toBeGreaterThan(0); + }); + + it('reports partial or budget_exceeded when budget runs out mid-execution', async () => { + // Plan with 5 steps (within maxToolCalls for formulation) + mockGenerateText.mockResolvedValue({ + text: JSON.stringify({ + summary: 'Five reads', + steps: Array.from({ length: 5 }, (_, i) => ({ + stepNumber: i + 1, + action: 'read', + targetFiles: [`file${i}.ts`], + toolCall: 'code/read', + toolParams: { filePath: `file${i}.ts` }, + dependsOn: i > 0 ? [i] : [], + verification: 'ok', + })), + }), + }); + + mockExecute.mockResolvedValue({ success: true }); + + // 5 tool calls total: 1 for discovery leaves 4 for 5 plan steps = can't finish all + const result = await orchestrator.execute(makeTask({ maxToolCalls: 5 })); + + // Some steps completed, some skipped due to budget + expect(['partial', 'budget_exceeded']).toContain(result.status); + const skipped = result.stepResults.filter(r => r.status === 'skipped'); + expect(skipped.length).toBeGreaterThan(0); + }); + }); + + describe('step failure and retry', () => { + it('retries failed steps up to 3 times', async () => { + mockSimplePlan(); + + let callCount = 0; + mockExecute.mockImplementation(async (cmd: string) => { + callCount++; + if (cmd === 'code/tree') return { success: true, root: {} }; + if (cmd === 'code/read') return { success: true, content: 'data' }; + if (cmd === 'code/edit') { + // Fail first 2 times, succeed on 3rd + if (callCount <= 4) return { success: false, error: 'Conflict' }; + return { success: true, changeId: 'c1' }; + } + return { success: true }; + }); + + const result = await orchestrator.execute(makeTask()); + + // Step 2 (edit) should have retried and eventually succeeded + const editStep = result.stepResults.find(r => r.toolCall === 'code/edit'); + expect(editStep?.status).toBe('completed'); + }); + + it('marks step as failed after max retries', async () => { + mockSimplePlan(); + + mockExecute.mockImplementation(async (cmd: string) => { + if (cmd === 'code/tree') return { success: true, root: {} }; + if (cmd === 'code/read') return { success: true, content: 'data' }; + if (cmd === 'code/edit') return { success: false, error: 'Always fails' }; + return { success: true }; + }); + + const result = await orchestrator.execute(makeTask()); + + const editStep = result.stepResults.find(r => r.toolCall === 'code/edit'); + expect(editStep?.status).toBe('failed'); + expect(editStep?.error).toContain('Always fails'); + }); + + it('skips dependent steps when dependency fails', async () => { + mockSimplePlan(); + + mockExecute.mockImplementation(async (cmd: string) => { + if (cmd === 'code/tree') return { success: true, root: {} }; + if (cmd === 'code/read') return { success: true, content: 'data' }; + if (cmd === 'code/edit') return { success: false, error: 'Edit failed' }; + return { success: true }; + }); + + const result = await orchestrator.execute(makeTask()); + + // Step 3 (verify) depends on step 2 (edit) which failed + const verifyStep = result.stepResults.find(r => r.stepNumber === 3); + expect(verifyStep?.status).toBe('skipped'); + expect(verifyStep?.error).toContain('Dependencies not met'); + }); + + it('returns partial status when some steps succeed', async () => { + mockSimplePlan(); + + mockExecute.mockImplementation(async (cmd: string) => { + if (cmd === 'code/tree') return { success: true, root: {} }; + if (cmd === 'code/read') return { success: true, content: 'data' }; + if (cmd === 'code/edit') return { success: false, error: 'Failed' }; + return { success: true }; + }); + + const result = await orchestrator.execute(makeTask()); + + expect(result.status).toBe('partial'); + expect(result.errors.length).toBeGreaterThan(0); + }); + }); + + describe('error handling', () => { + it('handles plan formulation failure gracefully', async () => { + mockGenerateText.mockRejectedValue(new Error('LLM unavailable')); + mockExecute.mockResolvedValue({ success: true }); + + const result = await orchestrator.execute(makeTask()); + + expect(result.status).toBe('failed'); + expect(result.errors).toContain('LLM unavailable'); + }); + + it('handles command execution exception', async () => { + mockSimplePlan(); + + mockExecute.mockImplementation(async (cmd: string) => { + if (cmd === 'code/tree') return { success: true, root: {} }; + if (cmd === 'code/read') throw new Error('Connection lost'); + return { success: true }; + }); + + const result = await orchestrator.execute(makeTask()); + + // Step 1 (read) should fail with exception + const readStep = result.stepResults.find(r => r.stepNumber === 1); + expect(readStep?.status).toBe('failed'); + expect(readStep?.error).toContain('Connection lost'); + }); + }); +}); diff --git a/src/debug/jtag/tests/unit/code/CodingModelSelector.test.ts b/src/debug/jtag/tests/unit/code/CodingModelSelector.test.ts new file mode 100644 index 000000000..61edbbb38 --- /dev/null +++ b/src/debug/jtag/tests/unit/code/CodingModelSelector.test.ts @@ -0,0 +1,168 @@ +/** + * CodingModelSelector Unit Tests + * + * Tests model routing for different coding task types. + * Validates: + * - Default tier selection for each task type + * - Provider fallback when preferred provider unavailable + * - Edge cases: no providers, single provider + */ + +import { describe, it, expect, beforeEach } from 'vitest'; +import { CodingModelSelector } from '../../../system/code/server/CodingModelSelector'; +import type { CodingTaskType } from '../../../system/code/shared/CodingTypes'; + +describe('CodingModelSelector', () => { + let selector: CodingModelSelector; + + beforeEach(() => { + // Full set of SOTA providers + selector = new CodingModelSelector(new Set([ + 'anthropic', 'openai', 'deepseek', 'groq', 'xai', 'google', 'together', 'fireworks', + ])); + }); + + describe('default tier selection', () => { + it('selects anthropic for planning tasks', () => { + const tier = selector.select('planning'); + expect(tier.provider).toBe('anthropic'); + expect(tier.taskType).toBe('planning'); + expect(tier.temperature).toBeLessThanOrEqual(0.5); + }); + + it('selects anthropic for generation tasks', () => { + const tier = selector.select('generation'); + expect(tier.provider).toBe('anthropic'); + expect(tier.taskType).toBe('generation'); + }); + + it('selects anthropic for editing tasks with low temperature', () => { + const tier = selector.select('editing'); + expect(tier.provider).toBe('anthropic'); + expect(tier.temperature).toBeLessThanOrEqual(0.3); + }); + + it('selects deepseek for review tasks', () => { + const tier = selector.select('review'); + expect(tier.provider).toBe('deepseek'); + expect(tier.taskType).toBe('review'); + }); + + it('selects groq for quick-fix tasks', () => { + const tier = selector.select('quick-fix'); + expect(tier.provider).toBe('groq'); + expect(tier.taskType).toBe('quick-fix'); + }); + + it('selects groq for discovery tasks', () => { + const tier = selector.select('discovery'); + expect(tier.provider).toBe('groq'); + expect(tier.taskType).toBe('discovery'); + }); + }); + + describe('all task types return valid tiers', () => { + const taskTypes: CodingTaskType[] = [ + 'planning', 'generation', 'editing', 'review', 'quick-fix', 'discovery', + ]; + + for (const taskType of taskTypes) { + it(`returns valid tier for "${taskType}"`, () => { + const tier = selector.select(taskType); + expect(tier.taskType).toBe(taskType); + expect(tier.provider).toBeTruthy(); + expect(tier.model).toBeTruthy(); + expect(tier.temperature).toBeGreaterThanOrEqual(0); + expect(tier.temperature).toBeLessThanOrEqual(1); + expect(tier.maxTokens).toBeGreaterThan(0); + expect(tier.description).toBeTruthy(); + }); + } + }); + + describe('provider fallback', () => { + it('falls back when preferred provider is unavailable', () => { + // Only openai available β€” planning defaults to anthropic, should fallback + const limited = new CodingModelSelector(new Set(['openai'])); + const tier = limited.select('planning'); + expect(tier.provider).toBe('openai'); + expect(tier.taskType).toBe('planning'); + }); + + it('falls through fallback order correctly', () => { + // Only groq available + const groqOnly = new CodingModelSelector(new Set(['groq'])); + const tier = groqOnly.select('planning'); + expect(tier.provider).toBe('groq'); + }); + + it('preserves temperature and maxTokens from default tier on fallback', () => { + const limited = new CodingModelSelector(new Set(['deepseek'])); + const tier = limited.select('editing'); + // Should keep editing's low temperature even on fallback + expect(tier.temperature).toBeLessThanOrEqual(0.3); + expect(tier.provider).toBe('deepseek'); + }); + + it('marks fallback in description', () => { + const limited = new CodingModelSelector(new Set(['openai'])); + const tier = limited.select('review'); + // review defaults to deepseek, should fallback to openai + expect(tier.description).toContain('fallback'); + }); + + it('returns default tier when no providers available', () => { + const empty = new CodingModelSelector(new Set()); + const tier = empty.select('planning'); + // Returns default (may fail at runtime), but returns a tier + expect(tier.taskType).toBe('planning'); + expect(tier.provider).toBeTruthy(); + }); + }); + + describe('hasFrontierModel', () => { + it('returns true when frontier providers available', () => { + expect(selector.hasFrontierModel).toBe(true); + }); + + it('returns false when no frontier providers available', () => { + const empty = new CodingModelSelector(new Set()); + expect(empty.hasFrontierModel).toBe(false); + }); + + it('returns true with even a single frontier provider', () => { + const single = new CodingModelSelector(new Set(['groq'])); + expect(single.hasFrontierModel).toBe(true); + }); + + it('returns false with only non-frontier providers', () => { + const local = new CodingModelSelector(new Set(['ollama', 'candle'])); + expect(local.hasFrontierModel).toBe(false); + }); + }); + + describe('available providers update', () => { + it('reflects updated providers in selection', () => { + const limited = new CodingModelSelector(new Set(['groq'])); + expect(limited.select('planning').provider).toBe('groq'); + + // Add anthropic + limited.availableProviders = new Set(['groq', 'anthropic']); + expect(limited.select('planning').provider).toBe('anthropic'); + }); + }); + + describe('allTiers', () => { + it('returns all configured tiers', () => { + const tiers = selector.allTiers; + expect(tiers.length).toBe(6); // 6 task types + const taskTypes = tiers.map(t => t.taskType); + expect(taskTypes).toContain('planning'); + expect(taskTypes).toContain('generation'); + expect(taskTypes).toContain('editing'); + expect(taskTypes).toContain('review'); + expect(taskTypes).toContain('quick-fix'); + expect(taskTypes).toContain('discovery'); + }); + }); +}); diff --git a/src/debug/jtag/tests/unit/code/PlanFormulator.test.ts b/src/debug/jtag/tests/unit/code/PlanFormulator.test.ts new file mode 100644 index 000000000..d71792ba0 --- /dev/null +++ b/src/debug/jtag/tests/unit/code/PlanFormulator.test.ts @@ -0,0 +1,301 @@ +/** + * PlanFormulator Unit Tests + * + * Tests LLM plan generation by mocking AIProviderDaemon. + * Validates: + * - Prompt construction (system prompt, tool schemas, constraints) + * - JSON plan parsing from LLM responses + * - Plan validation (actions, dependencies, step numbers) + * - Error handling for invalid LLM output + */ + +import { describe, it, expect, beforeEach, vi } from 'vitest'; +import { PlanFormulator } from '../../../system/code/server/PlanFormulator'; +import { CodingModelSelector } from '../../../system/code/server/CodingModelSelector'; +import type { CodingTask } from '../../../system/code/shared/CodingTypes'; +import type { UUID } from '../../../system/core/types/CrossPlatformUUID'; + +// Mock AIProviderDaemon +const mockGenerateText = vi.fn(); +vi.mock('../../../daemons/ai-provider-daemon/shared/AIProviderDaemon', () => ({ + AIProviderDaemon: { + generateText: (...args: unknown[]) => mockGenerateText(...args), + }, +})); + +// Mock Logger +vi.mock('../../../system/core/logging/Logger', () => ({ + Logger: { + create: () => ({ + debug: () => {}, + info: () => {}, + warn: () => {}, + error: () => {}, + }), + }, +})); + +function makeTask(overrides?: Partial): CodingTask { + return { + id: 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' as UUID, + personaId: '11111111-2222-3333-4444-555555555555' as UUID, + description: 'Add a greet function to utils.ts', + taskType: 'generation', + maxToolCalls: 15, + maxDurationMs: 120000, + createdAt: Date.now(), + ...overrides, + }; +} + +/** Helper: mock LLM returning a valid plan JSON */ +function mockValidPlan() { + mockGenerateText.mockResolvedValue({ + text: JSON.stringify({ + summary: 'Read utils.ts, add greet function, verify', + steps: [ + { + stepNumber: 1, + action: 'read', + description: 'Read current utils.ts contents', + targetFiles: ['utils.ts'], + toolCall: 'code/read', + toolParams: { filePath: 'utils.ts' }, + dependsOn: [], + verification: 'File contents returned', + }, + { + stepNumber: 2, + action: 'edit', + description: 'Add greet function to utils.ts', + targetFiles: ['utils.ts'], + toolCall: 'code/edit', + toolParams: { + filePath: 'utils.ts', + editMode: { type: 'append', content: '\nexport function greet(name: string): string {\n return `Hello, ${name}!`;\n}\n' }, + description: 'Add greet function', + }, + dependsOn: [1], + verification: 'Edit applied successfully', + }, + { + stepNumber: 3, + action: 'verify', + description: 'Read back to verify greet function added', + targetFiles: ['utils.ts'], + toolCall: 'code/read', + toolParams: { filePath: 'utils.ts' }, + dependsOn: [2], + verification: 'greet function present in file', + }, + ], + }), + usage: { inputTokens: 500, outputTokens: 200 }, + }); +} + +describe('PlanFormulator', () => { + let formulator: PlanFormulator; + + beforeEach(() => { + mockGenerateText.mockReset(); + const selector = new CodingModelSelector(new Set(['anthropic', 'deepseek', 'groq'])); + formulator = new PlanFormulator(selector); + }); + + describe('formulate', () => { + it('generates a valid plan from LLM response', async () => { + mockValidPlan(); + + const plan = await formulator.formulate(makeTask()); + + expect(plan.taskId).toBe('aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee'); + expect(plan.summary).toBe('Read utils.ts, add greet function, verify'); + expect(plan.steps).toHaveLength(3); + expect(plan.estimatedToolCalls).toBe(3); + expect(plan.generatedBy.provider).toBe('anthropic'); + expect(plan.generatedAt).toBeGreaterThan(0); + }); + + it('preserves step structure from LLM', async () => { + mockValidPlan(); + + const plan = await formulator.formulate(makeTask()); + const step1 = plan.steps[0]; + + expect(step1.stepNumber).toBe(1); + expect(step1.action).toBe('read'); + expect(step1.toolCall).toBe('code/read'); + expect(step1.targetFiles).toEqual(['utils.ts']); + expect(step1.dependsOn).toEqual([]); + }); + + it('validates dependency ordering', async () => { + mockValidPlan(); + + const plan = await formulator.formulate(makeTask()); + + expect(plan.steps[1].dependsOn).toEqual([1]); // edit depends on read + expect(plan.steps[2].dependsOn).toEqual([2]); // verify depends on edit + }); + + it('passes task description to LLM', async () => { + mockValidPlan(); + + await formulator.formulate(makeTask({ description: 'Refactor auth module' })); + + expect(mockGenerateText).toHaveBeenCalledTimes(1); + const request = mockGenerateText.mock.calls[0][0]; + const userMessage = request.messages.find((m: any) => m.role === 'user' && m.content.includes('Refactor auth module')); + expect(userMessage).toBeDefined(); + }); + + it('includes tool schemas in system prompt', async () => { + mockValidPlan(); + + await formulator.formulate(makeTask()); + + const request = mockGenerateText.mock.calls[0][0]; + const systemMsg = request.messages.find((m: any) => m.role === 'system'); + expect(systemMsg.content).toContain('code/tree'); + expect(systemMsg.content).toContain('code/read'); + expect(systemMsg.content).toContain('code/write'); + expect(systemMsg.content).toContain('code/edit'); + expect(systemMsg.content).toContain('code/search'); + }); + + it('includes constraints in system prompt', async () => { + mockValidPlan(); + + await formulator.formulate(makeTask({ maxToolCalls: 10, maxDurationMs: 60000 })); + + const request = mockGenerateText.mock.calls[0][0]; + const systemMsg = request.messages.find((m: any) => m.role === 'system'); + expect(systemMsg.content).toContain('10'); // max tool calls + expect(systemMsg.content).toContain('60'); // 60 seconds + }); + + it('includes codebase context when provided', async () => { + mockValidPlan(); + + await formulator.formulate(makeTask(), '## Workspace Tree\nsrc/\n utils.ts (200 bytes)'); + + const request = mockGenerateText.mock.calls[0][0]; + const contextMsg = request.messages.find((m: any) => m.content?.includes('Workspace Tree')); + expect(contextMsg).toBeDefined(); + }); + + it('includes relevant files when specified', async () => { + mockValidPlan(); + + await formulator.formulate(makeTask({ relevantFiles: ['src/utils.ts', 'src/auth.ts'] })); + + const request = mockGenerateText.mock.calls[0][0]; + const filesMsg = request.messages.find((m: any) => m.content?.includes('src/utils.ts')); + expect(filesMsg).toBeDefined(); + }); + }); + + describe('error handling', () => { + it('throws on empty LLM response', async () => { + mockGenerateText.mockResolvedValue({ text: '' }); + + await expect(formulator.formulate(makeTask())).rejects.toThrow('empty response'); + }); + + it('throws on non-JSON response', async () => { + mockGenerateText.mockResolvedValue({ text: 'I think we should...' }); + + await expect(formulator.formulate(makeTask())).rejects.toThrow('No JSON object'); + }); + + it('throws on missing summary', async () => { + mockGenerateText.mockResolvedValue({ + text: JSON.stringify({ steps: [{ stepNumber: 1, action: 'read' }] }), + }); + + await expect(formulator.formulate(makeTask())).rejects.toThrow('missing "summary"'); + }); + + it('throws on empty steps array', async () => { + mockGenerateText.mockResolvedValue({ + text: JSON.stringify({ summary: 'Do stuff', steps: [] }), + }); + + await expect(formulator.formulate(makeTask())).rejects.toThrow('no steps'); + }); + + it('throws on too many steps', async () => { + const manySteps = Array.from({ length: 20 }, (_, i) => ({ + stepNumber: i + 1, + action: 'read', + toolCall: 'code/read', + toolParams: {}, + dependsOn: [], + })); + + mockGenerateText.mockResolvedValue({ + text: JSON.stringify({ summary: 'Too many', steps: manySteps }), + }); + + await expect(formulator.formulate(makeTask({ maxToolCalls: 15 }))).rejects.toThrow('exceeds max'); + }); + + it('throws on invalid action', async () => { + mockGenerateText.mockResolvedValue({ + text: JSON.stringify({ + summary: 'Bad action', + steps: [{ stepNumber: 1, action: 'hack', toolCall: 'code/read', dependsOn: [] }], + }), + }); + + await expect(formulator.formulate(makeTask())).rejects.toThrow('invalid action'); + }); + + it('throws on invalid toolCall', async () => { + mockGenerateText.mockResolvedValue({ + text: JSON.stringify({ + summary: 'Bad tool', + steps: [{ stepNumber: 1, action: 'read', toolCall: 'rm -rf', dependsOn: [] }], + }), + }); + + await expect(formulator.formulate(makeTask())).rejects.toThrow('not a code/* command'); + }); + + it('throws on forward dependency reference', async () => { + mockGenerateText.mockResolvedValue({ + text: JSON.stringify({ + summary: 'Bad deps', + steps: [ + { stepNumber: 1, action: 'read', toolCall: 'code/read', dependsOn: [2] }, + { stepNumber: 2, action: 'read', toolCall: 'code/read', dependsOn: [] }, + ], + }), + }); + + await expect(formulator.formulate(makeTask())).rejects.toThrow('invalid step'); + }); + + it('extracts JSON from markdown code blocks', async () => { + const planJson = JSON.stringify({ + summary: 'Wrapped in markdown', + steps: [{ + stepNumber: 1, + action: 'read', + toolCall: 'code/read', + toolParams: { filePath: 'test.ts' }, + dependsOn: [], + }], + }); + + mockGenerateText.mockResolvedValue({ + text: `Here's the plan:\n\`\`\`json\n${planJson}\n\`\`\``, + }); + + const plan = await formulator.formulate(makeTask()); + expect(plan.summary).toBe('Wrapped in markdown'); + expect(plan.steps).toHaveLength(1); + }); + }); +}); From b45375333d527ac5631e7bfc5a4cbeb324c5840b Mon Sep 17 00:00:00 2001 From: Joel Date: Sun, 1 Feb 2026 18:00:03 -0600 Subject: [PATCH 04/14] Phase 4 foundation: CodingPlanEntity with hierarchical persistence CodingPlanEntity is a first-class persistent entity for coding plans. Supports hierarchical delegation (parentPlanId), team assignment (assignees + leadId), governance integration (proposalId), and real-time execution tracking. CodeAgentOrchestrator now persists plans via DataDaemon with best-effort semantics (works without DB in unit tests). 80 unit tests passing. --- .../data-daemon/server/EntityRegistry.ts | 3 + .../code/server/CodeAgentOrchestrator.ts | 159 ++++++++- .../system/data/entities/CodingPlanEntity.ts | 324 ++++++++++++++++++ src/debug/jtag/system/shared/Constants.ts | 3 + .../tests/unit/code/CodingPlanEntity.test.ts | 317 +++++++++++++++++ 5 files changed, 802 insertions(+), 4 deletions(-) create mode 100644 src/debug/jtag/system/data/entities/CodingPlanEntity.ts create mode 100644 src/debug/jtag/tests/unit/code/CodingPlanEntity.test.ts diff --git a/src/debug/jtag/daemons/data-daemon/server/EntityRegistry.ts b/src/debug/jtag/daemons/data-daemon/server/EntityRegistry.ts index 4e1e0b75b..29a69db43 100644 --- a/src/debug/jtag/daemons/data-daemon/server/EntityRegistry.ts +++ b/src/debug/jtag/daemons/data-daemon/server/EntityRegistry.ts @@ -82,6 +82,7 @@ import { FeedbackEntity } from '../../../system/data/entities/FeedbackEntity'; import { CallEntity } from '../../../system/data/entities/CallEntity'; import { SocialCredentialEntity } from '../../../system/social/shared/SocialCredentialEntity'; import { HandleEntity } from '../../../system/data/entities/HandleEntity'; +import { CodingPlanEntity } from '../../../system/data/entities/CodingPlanEntity'; /** * Initialize entity registration for the storage adapter @@ -137,6 +138,7 @@ export function initializeEntityRegistry(): void { new CallEntity(); new SocialCredentialEntity(); new HandleEntity(); + new CodingPlanEntity(); registerEntity(UserEntity.collection, UserEntity); registerEntity(RoomEntity.collection, RoomEntity); @@ -184,6 +186,7 @@ export function initializeEntityRegistry(): void { registerEntity(CallEntity.collection, CallEntity); registerEntity(SocialCredentialEntity.collection, SocialCredentialEntity); registerEntity(HandleEntity.collection, HandleEntity); + registerEntity(CodingPlanEntity.collection, CodingPlanEntity); log.info('All entities registered'); } \ No newline at end of file diff --git a/src/debug/jtag/system/code/server/CodeAgentOrchestrator.ts b/src/debug/jtag/system/code/server/CodeAgentOrchestrator.ts index 4c398ab3c..cb18f23f9 100644 --- a/src/debug/jtag/system/code/server/CodeAgentOrchestrator.ts +++ b/src/debug/jtag/system/code/server/CodeAgentOrchestrator.ts @@ -13,6 +13,11 @@ * 6. Fix β€” If verification fails, retry (max 3 attempts per step) * 7. Report β€” Summarize changes via code/history * + * Persistence: + * - Plans are persisted as CodingPlanEntity via DataDaemon + * - Status updated in real-time during execution + * - Persistence is best-effort (orchestrator works without DataDaemon) + * * Budget enforcement: * - Max duration (default 120s) * - Max tool calls (default 15) @@ -32,6 +37,10 @@ import { PlanFormulator } from './PlanFormulator'; import { CodingModelSelector } from './CodingModelSelector'; import { Commands } from '../../core/shared/Commands'; import { Logger } from '../../core/logging/Logger'; +import { CodingPlanEntity } from '../../data/entities/CodingPlanEntity'; +import type { CodingStepSnapshot, CodingPlanStatus } from '../../data/entities/CodingPlanEntity'; +import { COLLECTIONS } from '../../shared/Constants'; +import type { UUID } from '../../core/types/CrossPlatformUUID'; const log = Logger.create('CodeAgentOrchestrator', 'code'); @@ -97,8 +106,9 @@ export class CodeAgentOrchestrator { * Execute a coding task end-to-end: * 1. Optionally discover codebase context * 2. Formulate a plan via LLM - * 3. Execute each step - * 4. Return results + * 3. Persist the plan as a CodingPlanEntity + * 4. Execute each step (updating entity in real-time) + * 5. Return results */ async execute(task: CodingTask): Promise { const budget = new ExecutionBudget( @@ -113,6 +123,7 @@ export class CodeAgentOrchestrator { const changeIds: string[] = []; const errors: string[] = []; const stepResults: StepResult[] = []; + let planEntity: CodingPlanEntity | undefined; try { // Phase 1: Discovery (optional β€” gather codebase context for planning) @@ -129,6 +140,9 @@ export class CodeAgentOrchestrator { const plan = await this.planFormulator.formulate(task, codebaseContext); log.info(`Plan: "${plan.summary}" β€” ${plan.steps.length} steps`); + // Phase 2b: Persist plan as entity (best-effort β€” works without DataDaemon) + planEntity = await this.persistPlan(task, plan); + // Phase 3: Execute plan steps in dependency order const completedSteps = new Set(); @@ -172,6 +186,9 @@ export class CodeAgentOrchestrator { } else { errors.push(`Step ${step.stepNumber} (${step.action}): ${result.error ?? 'unknown error'}`); } + + // Update persisted plan step status + await this.updatePlanStep(planEntity, step.stepNumber, result); } // Determine overall status @@ -189,13 +206,20 @@ export class CodeAgentOrchestrator { ? `Completed: ${plan.summary}` : `${status}: ${stepResults.filter(r => r.status === 'completed').length}/${plan.steps.length} steps completed`; - return this.buildResult(task, status, summary, stepResults, filesModified, filesCreated, changeIds, errors, budget); + const codingResult = this.buildResult(task, status, summary, stepResults, filesModified, filesCreated, changeIds, errors, budget); + + // Finalize persisted plan + await this.finalizePlan(planEntity, codingResult); + + return codingResult; } catch (error) { const message = error instanceof Error ? error.message : String(error); log.error(`Task failed: ${message}`); errors.push(message); - return this.buildResult(task, 'failed', `Failed: ${message}`, stepResults, filesModified, filesCreated, changeIds, errors, budget); + const codingResult = this.buildResult(task, 'failed', `Failed: ${message}`, stepResults, filesModified, filesCreated, changeIds, errors, budget); + await this.finalizePlan(planEntity, codingResult); + return codingResult; } } @@ -401,4 +425,131 @@ export class CodeAgentOrchestrator { errors, }; } + + // ──────────────────────────────────────────────────────────── + // Plan Persistence (best-effort via DataDaemon) + // ──────────────────────────────────────────────────────────── + + /** + * Persist a newly formulated plan as a CodingPlanEntity. + * Returns the entity if persistence succeeded, undefined otherwise. + */ + private async persistPlan(task: CodingTask, plan: CodingPlan): Promise { + try { + const { DataDaemon } = await import('../../../daemons/data-daemon/shared/DataDaemon'); + + const entity = new CodingPlanEntity(); + entity.taskId = task.id; + entity.createdById = task.personaId; + entity.leadId = task.personaId; + entity.summary = plan.summary; + entity.taskDescription = task.description; + entity.estimatedToolCalls = plan.estimatedToolCalls; + entity.assignees = [task.personaId]; + entity.generatedBy = { + provider: plan.generatedBy.provider, + model: plan.generatedBy.model, + temperature: 0, + durationMs: 0, + }; + entity.status = 'executing'; + entity.executionStartedAt = Date.now(); + + // Convert plan steps to snapshots + entity.steps = plan.steps.map(step => ({ + stepNumber: step.stepNumber, + action: step.action, + description: step.description, + targetFiles: step.targetFiles, + toolCall: step.toolCall, + toolParams: step.toolParams, + dependsOn: step.dependsOn, + verification: step.verification, + status: 'pending' as const, + })); + + const stored = await DataDaemon.store(COLLECTIONS.CODING_PLANS, entity); + log.info(`Plan persisted: ${stored.id}`); + return stored; + } catch { + log.debug('Plan persistence skipped (DataDaemon not available)'); + return undefined; + } + } + + /** + * Update a step's status in the persisted plan entity. + */ + private async updatePlanStep( + planEntity: CodingPlanEntity | undefined, + stepNumber: number, + result: StepResult, + ): Promise { + if (!planEntity) return; + + try { + const { DataDaemon } = await import('../../../daemons/data-daemon/shared/DataDaemon'); + + const stepIndex = planEntity.steps.findIndex(s => s.stepNumber === stepNumber); + if (stepIndex === -1) return; + + // Update step snapshot in-place + const snapshot = planEntity.steps[stepIndex]; + snapshot.status = result.status === 'completed' ? 'completed' + : result.status === 'skipped' ? 'skipped' + : 'failed'; + snapshot.completedAt = Date.now(); + snapshot.durationMs = result.durationMs; + snapshot.output = result.output; + snapshot.error = result.error; + + await DataDaemon.update( + COLLECTIONS.CODING_PLANS, + planEntity.id as UUID, + { steps: planEntity.steps } as Partial, + ); + } catch { + // Best-effort β€” don't interrupt execution for persistence failures + } + } + + /** + * Finalize the persisted plan with execution results. + */ + private async finalizePlan( + planEntity: CodingPlanEntity | undefined, + result: CodingResult, + ): Promise { + if (!planEntity) return; + + try { + const { DataDaemon } = await import('../../../daemons/data-daemon/shared/DataDaemon'); + + const statusMap: Record = { + completed: 'completed', + partial: 'partial', + failed: 'failed', + budget_exceeded: 'partial', + }; + + await DataDaemon.update( + COLLECTIONS.CODING_PLANS, + planEntity.id as UUID, + { + status: statusMap[result.status] ?? 'failed', + executionCompletedAt: Date.now(), + filesModified: result.filesModified, + filesCreated: result.filesCreated, + changeIds: result.changeIds, + errors: result.errors, + totalToolCalls: result.totalToolCalls, + totalDurationMs: result.totalDurationMs, + } as Partial, + ); + + log.info(`Plan finalized: ${planEntity.id} β†’ ${result.status}`); + } catch { + // Best-effort + } + } } diff --git a/src/debug/jtag/system/data/entities/CodingPlanEntity.ts b/src/debug/jtag/system/data/entities/CodingPlanEntity.ts new file mode 100644 index 000000000..cbf62ad6c --- /dev/null +++ b/src/debug/jtag/system/data/entities/CodingPlanEntity.ts @@ -0,0 +1,324 @@ +/** + * CodingPlanEntity - Persistent coding plan with hierarchical team coordination + * + * First-class entity for the coding agent system. Tracks the full lifecycle: + * - Draft: PlanFormulator generates initial plan + * - Proposed: Plan submitted for team review + * - Approved: Team accepted the plan (or auto-approved for single-agent) + * - Executing: CodeAgentOrchestrator running steps + * - Completed/Failed: Final outcome with file changes and errors + * + * Hierarchical: A lead creates a top-level plan, then delegates sub-plans + * to team members via parentPlanId. Each sub-plan is scoped to a file cluster. + * + * Team-visible: All assigned AIs can view and propose modifications. + * Governance: Plans can be proposed for review via DecisionProposal integration. + */ + +import type { UUID } from '../../core/types/CrossPlatformUUID'; +import { + TextField, + NumberField, + JsonField, + EnumField, + CompositeIndex, +} from '../decorators/FieldDecorators'; +import { BaseEntity } from './BaseEntity'; +import { COLLECTIONS } from '../../shared/Constants'; +import type { CodingAction } from '../../code/shared/CodingTypes'; + +// ──────────────────────────────────────────────────────────── +// Plan status lifecycle +// ──────────────────────────────────────────────────────────── + +export type CodingPlanStatus = + | 'draft' // Generated by PlanFormulator, not yet reviewed + | 'proposed' // Submitted for team review (DecisionProposal) + | 'approved' // Team accepted (or auto-approved for solo tasks) + | 'executing' // CodeAgentOrchestrator actively running steps + | 'completed' // All steps succeeded + | 'partial' // Some steps completed, budget or dependencies prevented full completion + | 'failed' // Execution failed (plan formulation error, all steps failed, etc.) + | 'cancelled'; // Manually cancelled before or during execution + +// ──────────────────────────────────────────────────────────── +// Step snapshot (persisted version of CodingStep + execution result) +// ──────────────────────────────────────────────────────────── + +export interface CodingStepSnapshot { + stepNumber: number; + action: CodingAction; + description: string; + targetFiles: string[]; + toolCall: string; + toolParams: Record; + dependsOn: number[]; + verification: string; + + // Execution state (populated during/after execution) + status: 'pending' | 'executing' | 'completed' | 'failed' | 'skipped'; + assigneeId?: string; // Which AI is executing this step (for delegation) + startedAt?: number; + completedAt?: number; + durationMs?: number; + output?: unknown; + error?: string; + retryCount?: number; +} + +// ──────────────────────────────────────────────────────────── +// Plan generation metadata +// ──────────────────────────────────────────────────────────── + +export interface PlanGenerationInfo { + provider: string; // e.g. 'anthropic' + model: string; // e.g. 'claude-sonnet-4-5-20250929' + temperature: number; + durationMs: number; // How long plan generation took + inputTokens?: number; + outputTokens?: number; +} + +// ──────────────────────────────────────────────────────────── +// Entity +// ──────────────────────────────────────────────────────────── + +@CompositeIndex({ + name: 'idx_coding_plans_persona_status', + fields: ['createdById', 'status'], + direction: 'DESC', +}) +@CompositeIndex({ + name: 'idx_coding_plans_task', + fields: ['taskId'], + direction: 'DESC', +}) +@CompositeIndex({ + name: 'idx_coding_plans_parent', + fields: ['parentPlanId'], + direction: 'DESC', +}) +export class CodingPlanEntity extends BaseEntity { + static readonly collection = COLLECTIONS.CODING_PLANS; + + // ── Identity ────────────────────────────────────────────── + + /** The coding task this plan addresses */ + @TextField({ index: true }) + taskId!: UUID; + + /** Parent plan ID (null for top-level plans, set for delegated sub-plans) */ + @TextField({ nullable: true, index: true }) + parentPlanId?: UUID; + + /** AI that created/formulated this plan */ + @TextField({ index: true }) + createdById!: UUID; + + /** Lead AI coordinating this plan (may differ from creator for delegated sub-plans) */ + @TextField({ index: true }) + leadId!: UUID; + + // ── Plan content ────────────────────────────────────────── + + /** Brief summary of the plan's approach */ + @TextField() + summary!: string; + + /** Original task description that prompted this plan */ + @TextField() + taskDescription!: string; + + /** Step DAG β€” the concrete execution plan */ + @JsonField() + steps!: CodingStepSnapshot[]; + + /** Estimated total tool calls for execution */ + @NumberField() + estimatedToolCalls!: number; + + // ── Team ────────────────────────────────────────────────── + + /** AI persona IDs assigned to work on this plan */ + @JsonField() + assignees!: UUID[]; + + // ── Model info ──────────────────────────────────────────── + + /** How the plan was generated */ + @JsonField() + generatedBy!: PlanGenerationInfo; + + // ── Status & lifecycle ──────────────────────────────────── + + @EnumField({ index: true }) + status!: CodingPlanStatus; + + /** When execution started (null if not yet executing) */ + @NumberField({ nullable: true }) + executionStartedAt?: number; + + /** When execution completed/failed (null if still running) */ + @NumberField({ nullable: true }) + executionCompletedAt?: number; + + // ── Execution results ───────────────────────────────────── + + /** Files modified during execution */ + @JsonField() + filesModified!: string[]; + + /** Files created during execution */ + @JsonField() + filesCreated!: string[]; + + /** Change IDs from code/write and code/edit operations (for undo) */ + @JsonField() + changeIds!: string[]; + + /** Errors encountered during execution */ + @JsonField() + errors!: string[]; + + /** Total tool calls consumed */ + @NumberField() + totalToolCalls!: number; + + /** Total execution duration in milliseconds */ + @NumberField() + totalDurationMs!: number; + + // ── Governance ──────────────────────────────────────────── + + /** DecisionProposal ID if plan was proposed for team review */ + @TextField({ nullable: true }) + proposalId?: UUID; + + // ── Index signature ─────────────────────────────────────── + + [key: string]: unknown; + + // ── Constructor ─────────────────────────────────────────── + + constructor() { + super(); + + this.taskId = '' as UUID; + this.createdById = '' as UUID; + this.leadId = '' as UUID; + this.summary = ''; + this.taskDescription = ''; + this.steps = []; + this.estimatedToolCalls = 0; + this.assignees = []; + this.generatedBy = { provider: '', model: '', temperature: 0, durationMs: 0 }; + this.status = 'draft'; + this.filesModified = []; + this.filesCreated = []; + this.changeIds = []; + this.errors = []; + this.totalToolCalls = 0; + this.totalDurationMs = 0; + } + + // ── BaseEntity implementation ───────────────────────────── + + get collection(): string { + return CodingPlanEntity.collection; + } + + static override getPaginationConfig(): { + defaultSortField: string; + defaultSortDirection: 'asc' | 'desc'; + defaultPageSize: number; + cursorField: string; + } { + return { + defaultSortField: 'createdAt', + defaultSortDirection: 'desc', + defaultPageSize: 20, + cursorField: 'createdAt', + }; + } + + validate(): { success: boolean; error?: string } { + if (!this.taskId?.trim()) { + return { success: false, error: 'CodingPlan taskId is required' }; + } + if (!this.createdById?.trim()) { + return { success: false, error: 'CodingPlan createdById is required' }; + } + if (!this.leadId?.trim()) { + return { success: false, error: 'CodingPlan leadId is required' }; + } + if (!this.summary?.trim()) { + return { success: false, error: 'CodingPlan summary is required' }; + } + if (!this.taskDescription?.trim()) { + return { success: false, error: 'CodingPlan taskDescription is required' }; + } + if (!Array.isArray(this.steps)) { + return { success: false, error: 'CodingPlan steps must be an array' }; + } + if (this.steps.length === 0) { + return { success: false, error: 'CodingPlan must have at least one step' }; + } + if (!Array.isArray(this.assignees)) { + return { success: false, error: 'CodingPlan assignees must be an array' }; + } + if (this.assignees.length === 0) { + return { success: false, error: 'CodingPlan must have at least one assignee' }; + } + + const validStatuses: CodingPlanStatus[] = [ + 'draft', 'proposed', 'approved', 'executing', + 'completed', 'partial', 'failed', 'cancelled', + ]; + if (!validStatuses.includes(this.status)) { + return { success: false, error: `CodingPlan status must be one of: ${validStatuses.join(', ')}` }; + } + + // Validate step structure + for (const step of this.steps) { + if (typeof step.stepNumber !== 'number' || step.stepNumber < 1) { + return { success: false, error: `CodingPlan step has invalid stepNumber: ${step.stepNumber}` }; + } + if (!step.action) { + return { success: false, error: `CodingPlan step ${step.stepNumber} is missing action` }; + } + if (!step.toolCall?.startsWith('code/')) { + return { success: false, error: `CodingPlan step ${step.stepNumber} has invalid toolCall: ${step.toolCall}` }; + } + } + + return { success: true }; + } + + // ── Convenience methods ─────────────────────────────────── + + /** Whether this is a sub-plan delegated from a parent */ + get isDelegated(): boolean { + return !!this.parentPlanId; + } + + /** Number of steps completed */ + get stepsCompleted(): number { + return this.steps.filter(s => s.status === 'completed').length; + } + + /** Number of steps failed */ + get stepsFailed(): number { + return this.steps.filter(s => s.status === 'failed').length; + } + + /** Number of steps still pending or executing */ + get stepsRemaining(): number { + return this.steps.filter(s => s.status === 'pending' || s.status === 'executing').length; + } + + /** Progress as a fraction (0.0 - 1.0) */ + get progress(): number { + if (this.steps.length === 0) return 0; + return this.stepsCompleted / this.steps.length; + } +} diff --git a/src/debug/jtag/system/shared/Constants.ts b/src/debug/jtag/system/shared/Constants.ts index 3b0a41b7a..25fa5e7d2 100644 --- a/src/debug/jtag/system/shared/Constants.ts +++ b/src/debug/jtag/system/shared/Constants.ts @@ -136,6 +136,9 @@ export const COLLECTIONS = { // Universal Handle System β€” persistent async operation references HANDLES: 'handles', + + // Coding Agent System (Phase 4: Multi-Agent Coordination) + CODING_PLANS: 'coding_plans', } as const; diff --git a/src/debug/jtag/tests/unit/code/CodingPlanEntity.test.ts b/src/debug/jtag/tests/unit/code/CodingPlanEntity.test.ts new file mode 100644 index 000000000..d890616ec --- /dev/null +++ b/src/debug/jtag/tests/unit/code/CodingPlanEntity.test.ts @@ -0,0 +1,317 @@ +/** + * CodingPlanEntity Unit Tests + * + * Tests the persistent coding plan entity: + * - Construction and default values + * - Validation (required fields, step structure, status enum) + * - Computed properties (progress, stepsCompleted, isDelegated) + * - Hierarchical plan relationships + * - Collection and pagination config + */ + +import { describe, it, expect } from 'vitest'; +import { + CodingPlanEntity, + type CodingStepSnapshot, + type CodingPlanStatus, +} from '../../../system/data/entities/CodingPlanEntity'; +import type { UUID } from '../../../system/core/types/CrossPlatformUUID'; + +function makeStep(overrides?: Partial): CodingStepSnapshot { + return { + stepNumber: 1, + action: 'read', + description: 'Read file', + targetFiles: ['utils.ts'], + toolCall: 'code/read', + toolParams: { filePath: 'utils.ts' }, + dependsOn: [], + verification: 'File content returned', + status: 'pending', + ...overrides, + }; +} + +function makePlan(overrides?: Partial): CodingPlanEntity { + const plan = new CodingPlanEntity(); + plan.taskId = 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' as UUID; + plan.createdById = '11111111-2222-3333-4444-555555555555' as UUID; + plan.leadId = '11111111-2222-3333-4444-555555555555' as UUID; + plan.summary = 'Read, edit, verify'; + plan.taskDescription = 'Add greet function to utils.ts'; + plan.steps = [ + makeStep({ stepNumber: 1, action: 'read' }), + makeStep({ stepNumber: 2, action: 'edit', toolCall: 'code/edit', dependsOn: [1] }), + makeStep({ stepNumber: 3, action: 'verify', dependsOn: [2] }), + ]; + plan.estimatedToolCalls = 3; + plan.assignees = ['11111111-2222-3333-4444-555555555555' as UUID]; + plan.generatedBy = { provider: 'anthropic', model: 'claude-sonnet', temperature: 0.3, durationMs: 500 }; + plan.status = 'draft'; + + // Apply overrides + if (overrides) { + for (const [key, value] of Object.entries(overrides)) { + (plan as Record)[key] = value; + } + } + + return plan; +} + +describe('CodingPlanEntity', () => { + describe('construction and defaults', () => { + it('creates with default values', () => { + const plan = new CodingPlanEntity(); + + expect(plan.taskId).toBe(''); + expect(plan.createdById).toBe(''); + expect(plan.leadId).toBe(''); + expect(plan.summary).toBe(''); + expect(plan.taskDescription).toBe(''); + expect(plan.steps).toEqual([]); + expect(plan.estimatedToolCalls).toBe(0); + expect(plan.assignees).toEqual([]); + expect(plan.status).toBe('draft'); + expect(plan.filesModified).toEqual([]); + expect(plan.filesCreated).toEqual([]); + expect(plan.changeIds).toEqual([]); + expect(plan.errors).toEqual([]); + expect(plan.totalToolCalls).toBe(0); + expect(plan.totalDurationMs).toBe(0); + }); + + it('has correct collection name', () => { + const plan = new CodingPlanEntity(); + expect(plan.collection).toBe('coding_plans'); + expect(CodingPlanEntity.collection).toBe('coding_plans'); + }); + + it('has pagination config with newest first', () => { + const config = CodingPlanEntity.getPaginationConfig(); + expect(config.defaultSortField).toBe('createdAt'); + expect(config.defaultSortDirection).toBe('desc'); + expect(config.defaultPageSize).toBe(20); + }); + }); + + describe('validation', () => { + it('validates a complete plan', () => { + const plan = makePlan(); + const result = plan.validate(); + expect(result.success).toBe(true); + }); + + it('rejects missing taskId', () => { + const plan = makePlan({ taskId: '' as UUID }); + const result = plan.validate(); + expect(result.success).toBe(false); + expect(result.error).toContain('taskId'); + }); + + it('rejects missing createdById', () => { + const plan = makePlan({ createdById: '' as UUID }); + const result = plan.validate(); + expect(result.success).toBe(false); + expect(result.error).toContain('createdById'); + }); + + it('rejects missing leadId', () => { + const plan = makePlan({ leadId: '' as UUID }); + const result = plan.validate(); + expect(result.success).toBe(false); + expect(result.error).toContain('leadId'); + }); + + it('rejects missing summary', () => { + const plan = makePlan({ summary: '' }); + const result = plan.validate(); + expect(result.success).toBe(false); + expect(result.error).toContain('summary'); + }); + + it('rejects missing taskDescription', () => { + const plan = makePlan({ taskDescription: ' ' }); + const result = plan.validate(); + expect(result.success).toBe(false); + expect(result.error).toContain('taskDescription'); + }); + + it('rejects empty steps array', () => { + const plan = makePlan(); + plan.steps = []; + const result = plan.validate(); + expect(result.success).toBe(false); + expect(result.error).toContain('at least one step'); + }); + + it('rejects empty assignees', () => { + const plan = makePlan(); + plan.assignees = []; + const result = plan.validate(); + expect(result.success).toBe(false); + expect(result.error).toContain('at least one assignee'); + }); + + it('rejects invalid status', () => { + const plan = makePlan(); + plan.status = 'bogus' as CodingPlanStatus; + const result = plan.validate(); + expect(result.success).toBe(false); + expect(result.error).toContain('status'); + }); + + it('validates all valid statuses', () => { + const validStatuses: CodingPlanStatus[] = [ + 'draft', 'proposed', 'approved', 'executing', + 'completed', 'partial', 'failed', 'cancelled', + ]; + + for (const status of validStatuses) { + const plan = makePlan({ status }); + const result = plan.validate(); + expect(result.success).toBe(true); + } + }); + + it('rejects step with invalid stepNumber', () => { + const plan = makePlan(); + plan.steps = [makeStep({ stepNumber: 0 })]; + const result = plan.validate(); + expect(result.success).toBe(false); + expect(result.error).toContain('stepNumber'); + }); + + it('rejects step with missing action', () => { + const plan = makePlan(); + plan.steps = [makeStep({ action: '' as any })]; + const result = plan.validate(); + expect(result.success).toBe(false); + expect(result.error).toContain('action'); + }); + + it('rejects step with non-code toolCall', () => { + const plan = makePlan(); + plan.steps = [makeStep({ toolCall: 'data/list' })]; + const result = plan.validate(); + expect(result.success).toBe(false); + expect(result.error).toContain('toolCall'); + }); + }); + + describe('computed properties', () => { + it('reports progress correctly', () => { + const plan = makePlan(); + expect(plan.progress).toBe(0); // All pending + + plan.steps[0].status = 'completed'; + expect(plan.progress).toBeCloseTo(1 / 3); + + plan.steps[1].status = 'completed'; + expect(plan.progress).toBeCloseTo(2 / 3); + + plan.steps[2].status = 'completed'; + expect(plan.progress).toBe(1); + }); + + it('counts completed steps', () => { + const plan = makePlan(); + expect(plan.stepsCompleted).toBe(0); + + plan.steps[0].status = 'completed'; + plan.steps[1].status = 'failed'; + plan.steps[2].status = 'skipped'; + expect(plan.stepsCompleted).toBe(1); + }); + + it('counts failed steps', () => { + const plan = makePlan(); + plan.steps[0].status = 'completed'; + plan.steps[1].status = 'failed'; + plan.steps[2].status = 'failed'; + expect(plan.stepsFailed).toBe(2); + }); + + it('counts remaining steps', () => { + const plan = makePlan(); + expect(plan.stepsRemaining).toBe(3); // All pending + + plan.steps[0].status = 'completed'; + plan.steps[1].status = 'executing'; + expect(plan.stepsRemaining).toBe(2); // 1 pending + 1 executing + }); + + it('progress is 0 for empty steps', () => { + const plan = new CodingPlanEntity(); + expect(plan.progress).toBe(0); + }); + }); + + describe('hierarchical structure', () => { + it('top-level plan has no parent', () => { + const plan = makePlan(); + expect(plan.parentPlanId).toBeUndefined(); + expect(plan.isDelegated).toBe(false); + }); + + it('sub-plan references parent', () => { + const plan = makePlan(); + plan.parentPlanId = 'parent-plan-id-1234' as UUID; + expect(plan.isDelegated).toBe(true); + }); + + it('sub-plan can have different lead than creator', () => { + const plan = makePlan(); + plan.createdById = 'lead-ai' as UUID; + plan.leadId = 'lead-ai' as UUID; + plan.assignees = ['specialist-ai' as UUID]; + // Sub-plan created by lead, assigned to specialist + expect(plan.assignees).not.toContain(plan.leadId); + }); + }); + + describe('execution tracking', () => { + it('tracks file modifications', () => { + const plan = makePlan({ status: 'completed' }); + plan.filesModified = ['src/utils.ts', 'src/index.ts']; + plan.filesCreated = ['src/greet.ts']; + plan.changeIds = ['change-001', 'change-002']; + + expect(plan.filesModified).toHaveLength(2); + expect(plan.filesCreated).toContain('src/greet.ts'); + expect(plan.changeIds).toContain('change-001'); + }); + + it('tracks errors', () => { + const plan = makePlan({ status: 'partial' }); + plan.errors = ['Step 2 (edit): Conflict', 'Step 3 (verify): Dependencies not met']; + expect(plan.errors).toHaveLength(2); + }); + + it('tracks execution timing', () => { + const plan = makePlan({ status: 'completed' }); + plan.executionStartedAt = 1000; + plan.executionCompletedAt = 5000; + plan.totalDurationMs = 4000; + plan.totalToolCalls = 5; + + expect(plan.executionStartedAt).toBe(1000); + expect(plan.executionCompletedAt).toBe(5000); + expect(plan.totalDurationMs).toBe(4000); + expect(plan.totalToolCalls).toBe(5); + }); + }); + + describe('governance', () => { + it('tracks proposal reference', () => { + const plan = makePlan({ status: 'proposed' }); + plan.proposalId = 'proposal-abc-123' as UUID; + expect(plan.proposalId).toBe('proposal-abc-123'); + }); + + it('plan without proposal has no proposalId', () => { + const plan = makePlan(); + expect(plan.proposalId).toBeUndefined(); + }); + }); +}); From 49903f8fda5fafeb8beb579a03e606bd20c9745d Mon Sep 17 00:00:00 2001 From: Joel Date: Sun, 1 Feb 2026 19:51:19 -0600 Subject: [PATCH 05/14] Phases 4A-4C: Sandbox security, self-modifying skills, multi-agent coordination MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 4A β€” Sandbox & Security Tiers: - SecurityTier: 4-tier access control (discovery/read/write/system) - ToolAllowlistEnforcer: per-tier command filtering with glob matching - ExecutionSandbox: process-isolated code execution with timeout/output limits - Risk assessment integrated into PlanFormulator output Phase 4B β€” Self-Modifying Skills: - SkillEntity: persistent skill registry with full lifecycle - skill/propose: AI creates command specifications - skill/generate: programmatic CommandGenerator invocation - skill/validate: sandbox compilation + test execution - skill/activate: dynamic tool registration - skill/list: query skill registry Phase 4C β€” Multi-Agent Coordination & Delegation: - CodeCoordinationStream: file-level MUTEX via BaseCoordinationStream - PlanGovernance: risk-based approval routing (auto-approve low risk, require approval for multi-agent/high-risk/system-tier) - CodeTaskDelegator: union-find plan decomposition into parallel file clusters, load-balanced agent assignment, sub-plan creation, result consolidation - DryRun mode: execute plans read-only, mock write operations 342 tests across 12 test files, all passing. --- .../jtag/commands/skill/activate/.npmignore | 20 + .../jtag/commands/skill/activate/README.md | 157 ++++++ .../browser/SkillActivateBrowserCommand.ts | 21 + .../jtag/commands/skill/activate/package.json | 35 ++ .../server/SkillActivateServerCommand.ts | 124 ++++ .../activate/shared/SkillActivateTypes.ts | 105 ++++ .../SkillActivateIntegration.test.ts | 196 +++++++ .../test/unit/SkillActivateCommand.test.ts | 259 +++++++++ .../jtag/commands/skill/generate/.npmignore | 20 + .../jtag/commands/skill/generate/README.md | 159 ++++++ .../browser/SkillGenerateBrowserCommand.ts | 21 + .../jtag/commands/skill/generate/package.json | 35 ++ .../server/SkillGenerateServerCommand.ts | 127 +++++ .../generate/shared/SkillGenerateTypes.ts | 114 ++++ .../SkillGenerateIntegration.test.ts | 196 +++++++ .../test/unit/SkillGenerateCommand.test.ts | 259 +++++++++ src/debug/jtag/commands/skill/list/.npmignore | 20 + src/debug/jtag/commands/skill/list/README.md | 164 ++++++ .../list/browser/SkillListBrowserCommand.ts | 21 + .../jtag/commands/skill/list/package.json | 35 ++ .../list/server/SkillListServerCommand.ts | 65 +++ .../skill/list/shared/SkillListTypes.ts | 110 ++++ .../integration/SkillListIntegration.test.ts | 196 +++++++ .../list/test/unit/SkillListCommand.test.ts | 259 +++++++++ .../jtag/commands/skill/propose/.npmignore | 20 + .../jtag/commands/skill/propose/README.md | 165 ++++++ .../browser/SkillProposeBrowserCommand.ts | 21 + .../jtag/commands/skill/propose/package.json | 35 ++ .../server/SkillProposeServerCommand.ts | 128 +++++ .../skill/propose/shared/SkillProposeTypes.ts | 139 +++++ .../SkillProposeIntegration.test.ts | 196 +++++++ .../test/unit/SkillProposeCommand.test.ts | 259 +++++++++ .../jtag/commands/skill/validate/.npmignore | 20 + .../jtag/commands/skill/validate/README.md | 160 ++++++ .../browser/SkillValidateBrowserCommand.ts | 21 + .../jtag/commands/skill/validate/package.json | 35 ++ .../server/SkillValidateServerCommand.ts | 154 +++++ .../validate/shared/SkillValidateTypes.ts | 120 ++++ .../SkillValidateIntegration.test.ts | 196 +++++++ .../test/unit/SkillValidateCommand.test.ts | 259 +++++++++ .../data-daemon/server/EntityRegistry.ts | 3 + src/debug/jtag/generated-command-schemas.json | 2 +- src/debug/jtag/package-lock.json | 4 +- src/debug/jtag/package.json | 2 +- src/debug/jtag/shared/version.ts | 2 +- .../code/server/CodeAgentOrchestrator.ts | 67 ++- .../system/code/server/CodeTaskDelegator.ts | 408 ++++++++++++++ .../system/code/server/ExecutionSandbox.ts | 219 ++++++++ .../jtag/system/code/server/PlanFormulator.ts | 24 +- .../jtag/system/code/server/PlanGovernance.ts | 151 +++++ .../jtag/system/code/server/SecurityTier.ts | 176 ++++++ .../code/server/ToolAllowlistEnforcer.ts | 174 ++++++ .../jtag/system/code/shared/CodingTypes.ts | 96 ++++ .../server/CodeCoordinationStream.ts | 349 ++++++++++++ .../system/data/entities/CodingPlanEntity.ts | 18 +- .../jtag/system/data/entities/SkillEntity.ts | 303 ++++++++++ src/debug/jtag/system/shared/Constants.ts | 3 + .../integration/coding-agent-workflow.test.ts | 412 ++++++++++++++ .../integration/sandbox-enforcement.test.ts | 302 ++++++++++ .../unit/code/CodeAgentOrchestrator.test.ts | 73 +++ .../unit/code/CodeCoordinationStream.test.ts | 328 +++++++++++ .../tests/unit/code/CodeTaskDelegator.test.ts | 530 ++++++++++++++++++ .../tests/unit/code/CodingPlanEntity.test.ts | 32 ++ .../tests/unit/code/ExecutionSandbox.test.ts | 286 ++++++++++ .../tests/unit/code/PlanFormulator.test.ts | 96 ++++ .../tests/unit/code/PlanGovernance.test.ts | 174 ++++++ .../jtag/tests/unit/code/SecurityTier.test.ts | 200 +++++++ .../jtag/tests/unit/code/SkillEntity.test.ts | 438 +++++++++++++++ .../tests/unit/code/SkillLifecycle.test.ts | 331 +++++++++++ .../unit/code/ToolAllowlistEnforcer.test.ts | 281 ++++++++++ 70 files changed, 10114 insertions(+), 16 deletions(-) create mode 100644 src/debug/jtag/commands/skill/activate/.npmignore create mode 100644 src/debug/jtag/commands/skill/activate/README.md create mode 100644 src/debug/jtag/commands/skill/activate/browser/SkillActivateBrowserCommand.ts create mode 100644 src/debug/jtag/commands/skill/activate/package.json create mode 100644 src/debug/jtag/commands/skill/activate/server/SkillActivateServerCommand.ts create mode 100644 src/debug/jtag/commands/skill/activate/shared/SkillActivateTypes.ts create mode 100644 src/debug/jtag/commands/skill/activate/test/integration/SkillActivateIntegration.test.ts create mode 100644 src/debug/jtag/commands/skill/activate/test/unit/SkillActivateCommand.test.ts create mode 100644 src/debug/jtag/commands/skill/generate/.npmignore create mode 100644 src/debug/jtag/commands/skill/generate/README.md create mode 100644 src/debug/jtag/commands/skill/generate/browser/SkillGenerateBrowserCommand.ts create mode 100644 src/debug/jtag/commands/skill/generate/package.json create mode 100644 src/debug/jtag/commands/skill/generate/server/SkillGenerateServerCommand.ts create mode 100644 src/debug/jtag/commands/skill/generate/shared/SkillGenerateTypes.ts create mode 100644 src/debug/jtag/commands/skill/generate/test/integration/SkillGenerateIntegration.test.ts create mode 100644 src/debug/jtag/commands/skill/generate/test/unit/SkillGenerateCommand.test.ts create mode 100644 src/debug/jtag/commands/skill/list/.npmignore create mode 100644 src/debug/jtag/commands/skill/list/README.md create mode 100644 src/debug/jtag/commands/skill/list/browser/SkillListBrowserCommand.ts create mode 100644 src/debug/jtag/commands/skill/list/package.json create mode 100644 src/debug/jtag/commands/skill/list/server/SkillListServerCommand.ts create mode 100644 src/debug/jtag/commands/skill/list/shared/SkillListTypes.ts create mode 100644 src/debug/jtag/commands/skill/list/test/integration/SkillListIntegration.test.ts create mode 100644 src/debug/jtag/commands/skill/list/test/unit/SkillListCommand.test.ts create mode 100644 src/debug/jtag/commands/skill/propose/.npmignore create mode 100644 src/debug/jtag/commands/skill/propose/README.md create mode 100644 src/debug/jtag/commands/skill/propose/browser/SkillProposeBrowserCommand.ts create mode 100644 src/debug/jtag/commands/skill/propose/package.json create mode 100644 src/debug/jtag/commands/skill/propose/server/SkillProposeServerCommand.ts create mode 100644 src/debug/jtag/commands/skill/propose/shared/SkillProposeTypes.ts create mode 100644 src/debug/jtag/commands/skill/propose/test/integration/SkillProposeIntegration.test.ts create mode 100644 src/debug/jtag/commands/skill/propose/test/unit/SkillProposeCommand.test.ts create mode 100644 src/debug/jtag/commands/skill/validate/.npmignore create mode 100644 src/debug/jtag/commands/skill/validate/README.md create mode 100644 src/debug/jtag/commands/skill/validate/browser/SkillValidateBrowserCommand.ts create mode 100644 src/debug/jtag/commands/skill/validate/package.json create mode 100644 src/debug/jtag/commands/skill/validate/server/SkillValidateServerCommand.ts create mode 100644 src/debug/jtag/commands/skill/validate/shared/SkillValidateTypes.ts create mode 100644 src/debug/jtag/commands/skill/validate/test/integration/SkillValidateIntegration.test.ts create mode 100644 src/debug/jtag/commands/skill/validate/test/unit/SkillValidateCommand.test.ts create mode 100644 src/debug/jtag/system/code/server/CodeTaskDelegator.ts create mode 100644 src/debug/jtag/system/code/server/ExecutionSandbox.ts create mode 100644 src/debug/jtag/system/code/server/PlanGovernance.ts create mode 100644 src/debug/jtag/system/code/server/SecurityTier.ts create mode 100644 src/debug/jtag/system/code/server/ToolAllowlistEnforcer.ts create mode 100644 src/debug/jtag/system/coordination/server/CodeCoordinationStream.ts create mode 100644 src/debug/jtag/system/data/entities/SkillEntity.ts create mode 100644 src/debug/jtag/tests/integration/coding-agent-workflow.test.ts create mode 100644 src/debug/jtag/tests/integration/sandbox-enforcement.test.ts create mode 100644 src/debug/jtag/tests/unit/code/CodeCoordinationStream.test.ts create mode 100644 src/debug/jtag/tests/unit/code/CodeTaskDelegator.test.ts create mode 100644 src/debug/jtag/tests/unit/code/ExecutionSandbox.test.ts create mode 100644 src/debug/jtag/tests/unit/code/PlanGovernance.test.ts create mode 100644 src/debug/jtag/tests/unit/code/SecurityTier.test.ts create mode 100644 src/debug/jtag/tests/unit/code/SkillEntity.test.ts create mode 100644 src/debug/jtag/tests/unit/code/SkillLifecycle.test.ts create mode 100644 src/debug/jtag/tests/unit/code/ToolAllowlistEnforcer.test.ts diff --git a/src/debug/jtag/commands/skill/activate/.npmignore b/src/debug/jtag/commands/skill/activate/.npmignore new file mode 100644 index 000000000..f74ad6b8a --- /dev/null +++ b/src/debug/jtag/commands/skill/activate/.npmignore @@ -0,0 +1,20 @@ +# Development files +.eslintrc* +tsconfig*.json +vitest.config.ts + +# Build artifacts +*.js.map +*.d.ts.map + +# IDE +.vscode/ +.idea/ + +# Logs +*.log +npm-debug.log* + +# OS files +.DS_Store +Thumbs.db diff --git a/src/debug/jtag/commands/skill/activate/README.md b/src/debug/jtag/commands/skill/activate/README.md new file mode 100644 index 000000000..062266a2c --- /dev/null +++ b/src/debug/jtag/commands/skill/activate/README.md @@ -0,0 +1,157 @@ +# Skill Activate Command + +Activate a validated skill by registering it as a live command. The skill becomes available for use by the creator (personal) or all personas (team). + +## Table of Contents + +- [Usage](#usage) + - [CLI Usage](#cli-usage) + - [Tool Usage](#tool-usage) +- [Parameters](#parameters) +- [Result](#result) +- [Examples](#examples) +- [Testing](#testing) + - [Unit Tests](#unit-tests) + - [Integration Tests](#integration-tests) +- [Getting Help](#getting-help) +- [Access Level](#access-level) +- [Implementation Notes](#implementation-notes) + +## Usage + +### CLI Usage + +From the command line using the jtag CLI: + +```bash +./jtag skill/activate --skillId= +``` + +### Tool Usage + +From Persona tools or programmatic access using `Commands.execute()`: + +```typescript +import { Commands } from '@system/core/shared/Commands'; + +const result = await Commands.execute('skill/activate', { + // your parameters here +}); +``` + +## Parameters + +- **skillId** (required): `string` - ID of the SkillEntity to activate + +## Result + +Returns `SkillActivateResult` with: + +Returns CommandResult with: +- **skillId**: `string` - ID of the SkillEntity +- **name**: `string` - Skill command name +- **status**: `string` - Lifecycle status after activation +- **activatedAt**: `number` - Timestamp when the skill was activated +- **message**: `string` - Human-readable result message + +## Examples + +### Activate a validated skill + +```bash +./jtag skill/activate --skillId="uuid-of-skill" +``` + +**Expected result:** +{ skillId: "uuid", name: "analysis/complexity", status: "active" } + +## Getting Help + +### Using the Help Tool + +Get detailed usage information for this command: + +**CLI:** +```bash +./jtag help skill/activate +``` + +**Tool:** +```typescript +// Use your help tool with command name 'skill/activate' +``` + +### Using the README Tool + +Access this README programmatically: + +**CLI:** +```bash +./jtag readme skill/activate +``` + +**Tool:** +```typescript +// Use your readme tool with command name 'skill/activate' +``` + +## Testing + +### Unit Tests + +Test command logic in isolation using mock dependencies: + +```bash +# Run unit tests (no server required) +npx tsx commands/Skill Activate/test/unit/SkillActivateCommand.test.ts +``` + +**What's tested:** +- Command structure and parameter validation +- Mock command execution patterns +- Required parameter validation (throws ValidationError) +- Optional parameter handling (sensible defaults) +- Performance requirements +- Assertion utility helpers + +**TDD Workflow:** +1. Write/modify unit test first (test-driven development) +2. Run test, see it fail +3. Implement feature +4. Run test, see it pass +5. Refactor if needed + +### Integration Tests + +Test command with real client connections and system integration: + +```bash +# Prerequisites: Server must be running +npm start # Wait 90+ seconds for deployment + +# Run integration tests +npx tsx commands/Skill Activate/test/integration/SkillActivateIntegration.test.ts +``` + +**What's tested:** +- Client connection to live system +- Real command execution via WebSocket +- ValidationError handling for missing params +- Optional parameter defaults +- Performance under load +- Various parameter combinations + +**Best Practice:** +Run unit tests frequently during development (fast feedback). Run integration tests before committing (verify system integration). + +## Access Level + +**ai-safe** - Safe for AI personas to call autonomously + +## Implementation Notes + +- **Shared Logic**: Core business logic in `shared/SkillActivateTypes.ts` +- **Browser**: Browser-specific implementation in `browser/SkillActivateBrowserCommand.ts` +- **Server**: Server-specific implementation in `server/SkillActivateServerCommand.ts` +- **Unit Tests**: Isolated testing in `test/unit/SkillActivateCommand.test.ts` +- **Integration Tests**: System testing in `test/integration/SkillActivateIntegration.test.ts` diff --git a/src/debug/jtag/commands/skill/activate/browser/SkillActivateBrowserCommand.ts b/src/debug/jtag/commands/skill/activate/browser/SkillActivateBrowserCommand.ts new file mode 100644 index 000000000..ec7a35acf --- /dev/null +++ b/src/debug/jtag/commands/skill/activate/browser/SkillActivateBrowserCommand.ts @@ -0,0 +1,21 @@ +/** + * Skill Activate Command - Browser Implementation + * + * Activate a validated skill by registering it as a live command. The skill becomes available for use by the creator (personal) or all personas (team). + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import type { SkillActivateParams, SkillActivateResult } from '../shared/SkillActivateTypes'; + +export class SkillActivateBrowserCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('skill/activate', context, subpath, commander); + } + + async execute(params: SkillActivateParams): Promise { + console.log('🌐 BROWSER: Delegating Skill Activate to server'); + return await this.remoteExecute(params); + } +} diff --git a/src/debug/jtag/commands/skill/activate/package.json b/src/debug/jtag/commands/skill/activate/package.json new file mode 100644 index 000000000..fc1764ded --- /dev/null +++ b/src/debug/jtag/commands/skill/activate/package.json @@ -0,0 +1,35 @@ +{ + "name": "@jtag-commands/skill/activate", + "version": "1.0.0", + "description": "Activate a validated skill by registering it as a live command. The skill becomes available for use by the creator (personal) or all personas (team).", + "main": "server/SkillActivateServerCommand.ts", + "types": "shared/SkillActivateTypes.ts", + "scripts": { + "test": "npm run test:unit && npm run test:integration", + "test:unit": "npx vitest run test/unit/*.test.ts", + "test:integration": "npx tsx test/integration/SkillActivateIntegration.test.ts", + "lint": "npx eslint **/*.ts", + "typecheck": "npx tsc --noEmit" + }, + "peerDependencies": { + "@jtag/core": "*" + }, + "files": [ + "shared/**/*.ts", + "browser/**/*.ts", + "server/**/*.ts", + "test/**/*.ts", + "README.md" + ], + "keywords": [ + "jtag", + "command", + "skill/activate" + ], + "license": "MIT", + "author": "", + "repository": { + "type": "git", + "url": "" + } +} diff --git a/src/debug/jtag/commands/skill/activate/server/SkillActivateServerCommand.ts b/src/debug/jtag/commands/skill/activate/server/SkillActivateServerCommand.ts new file mode 100644 index 000000000..17f8c81d5 --- /dev/null +++ b/src/debug/jtag/commands/skill/activate/server/SkillActivateServerCommand.ts @@ -0,0 +1,124 @@ +/** + * Skill Activate Command - Server Implementation + * + * Activates a validated skill by registering it as a live command. + * The skill becomes available for use by the creator (personal) or all personas (team). + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import { ValidationError } from '@system/core/types/ErrorTypes'; +import type { SkillActivateParams, SkillActivateResult } from '../shared/SkillActivateTypes'; +import { createSkillActivateResultFromParams } from '../shared/SkillActivateTypes'; +import { SkillEntity } from '@system/data/entities/SkillEntity'; +import { DataDaemon } from '@daemons/data-daemon/shared/DataDaemon'; +import { COLLECTIONS } from '@system/shared/Constants'; +import type { UUID } from '@system/core/types/CrossPlatformUUID'; + +export class SkillActivateServerCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('skill/activate', context, subpath, commander); + } + + async execute(params: SkillActivateParams): Promise { + const { skillId } = params; + + if (!skillId?.trim()) { + throw new ValidationError('skillId', "Missing required parameter 'skillId'."); + } + + // Load skill entity + const readResult = await DataDaemon.read(COLLECTIONS.SKILLS, skillId as UUID); + if (!readResult.success || !readResult.data) { + throw new ValidationError('skillId', `Skill not found: ${skillId}`); + } + const skill = readResult.data.data as SkillEntity; + + if (skill.status !== 'validated') { + throw new ValidationError('skillId', + `Skill '${skill.name}' cannot be activated in status '${skill.status}'. Must be 'validated' first.`); + } + + if (!skill.outputDir) { + throw new ValidationError('skillId', `Skill '${skill.name}' has no outputDir.`); + } + + // For team-scoped skills, verify governance approval + if (skill.scope === 'team' && skill.proposalId) { + try { + const proposalResult = await DataDaemon.read(COLLECTIONS.DECISION_PROPOSALS, skill.proposalId); + if (proposalResult.success && proposalResult.data) { + const proposal = proposalResult.data.data as Record; + if (proposal.status !== 'approved' && proposal.status !== 'concluded') { + throw new ValidationError('skillId', + `Team skill '${skill.name}' has not been approved yet (proposal status: ${proposal.status}).`); + } + } + } catch (e) { + if (e instanceof ValidationError) throw e; + // If proposal lookup fails, proceed (governance is best-effort) + } + } + + // Activate: dynamically import the generated command server module + // For personal skills: register in the runtime command map + // For team skills: the generated files are already in commands/ and will be picked up on next build + const now = Date.now(); + + try { + if (skill.scope === 'personal') { + // Dynamic import of the generated server command + const serverPath = skill.generatedFiles?.find(f => f.includes('ServerCommand')); + if (serverPath) { + await this.registerPersonalSkill(skill, serverPath); + } + } + // Team skills: files are already in commands/ directory from generate step + // They'll be available after the next npm start / registry rebuild + } catch (e) { + await DataDaemon.update( + COLLECTIONS.SKILLS, + skill.id as UUID, + { + status: 'failed', + failureReason: `Activation failed: ${e instanceof Error ? e.message : String(e)}`, + } as Partial, + ); + + throw new ValidationError('skillId', + `Failed to activate skill '${skill.name}': ${e instanceof Error ? e.message : String(e)}`); + } + + // Update entity + await DataDaemon.update( + COLLECTIONS.SKILLS, + skill.id as UUID, + { + status: 'active', + activatedAt: now, + } as Partial, + ); + + return createSkillActivateResultFromParams(params, { + success: true, + skillId: skill.id, + name: skill.name, + status: 'active', + activatedAt: now, + message: skill.scope === 'team' + ? `Skill '${skill.name}' activated for all personas (available after next build)` + : `Skill '${skill.name}' activated for creator ${skill.createdById}`, + }); + } + + private async registerPersonalSkill(_skill: SkillEntity, _serverPath: string): Promise { + // Dynamic command registration for personal skills + // In the current architecture, commands are discovered from the file system + // Personal skills stored in .continuum/skills/ will need the command daemon + // to scan that directory on next refresh cycle + // + // For now, marking as active is sufficient β€” the skill files exist and can be + // loaded by the command daemon when it next scans for commands + } +} diff --git a/src/debug/jtag/commands/skill/activate/shared/SkillActivateTypes.ts b/src/debug/jtag/commands/skill/activate/shared/SkillActivateTypes.ts new file mode 100644 index 000000000..e8a9e7004 --- /dev/null +++ b/src/debug/jtag/commands/skill/activate/shared/SkillActivateTypes.ts @@ -0,0 +1,105 @@ +/** + * Skill Activate Command - Shared Types + * + * Activate a validated skill by registering it as a live command. The skill becomes available for use by the creator (personal) or all personas (team). + */ + +import type { CommandParams, CommandResult, CommandInput, JTAGContext } from '@system/core/types/JTAGTypes'; +import { createPayload, transformPayload } from '@system/core/types/JTAGTypes'; +import { Commands } from '@system/core/shared/Commands'; +import type { JTAGError } from '@system/core/types/ErrorTypes'; +import type { UUID } from '@system/core/types/CrossPlatformUUID'; + +/** + * Skill Activate Command Parameters + */ +export interface SkillActivateParams extends CommandParams { + // ID of the SkillEntity to activate + skillId: string; +} + +/** + * Factory function for creating SkillActivateParams + */ +export const createSkillActivateParams = ( + context: JTAGContext, + sessionId: UUID, + data: { + // ID of the SkillEntity to activate + skillId: string; + } +): SkillActivateParams => createPayload(context, sessionId, { + + ...data +}); + +/** + * Skill Activate Command Result + */ +export interface SkillActivateResult extends CommandResult { + success: boolean; + // ID of the SkillEntity + skillId: string; + // Skill command name + name: string; + // Lifecycle status after activation + status: string; + // Timestamp when the skill was activated + activatedAt: number; + // Human-readable result message + message: string; + error?: JTAGError; +} + +/** + * Factory function for creating SkillActivateResult with defaults + */ +export const createSkillActivateResult = ( + context: JTAGContext, + sessionId: UUID, + data: { + success: boolean; + // ID of the SkillEntity + skillId?: string; + // Skill command name + name?: string; + // Lifecycle status after activation + status?: string; + // Timestamp when the skill was activated + activatedAt?: number; + // Human-readable result message + message?: string; + error?: JTAGError; + } +): SkillActivateResult => createPayload(context, sessionId, { + skillId: data.skillId ?? '', + name: data.name ?? '', + status: data.status ?? '', + activatedAt: data.activatedAt ?? 0, + message: data.message ?? '', + ...data +}); + +/** + * Smart Skill Activate-specific inheritance from params + * Auto-inherits context and sessionId from params + * Must provide all required result fields + */ +export const createSkillActivateResultFromParams = ( + params: SkillActivateParams, + differences: Omit +): SkillActivateResult => transformPayload(params, differences); + +/** + * Skill Activate β€” Type-safe command executor + * + * Usage: + * import { SkillActivate } from '...shared/SkillActivateTypes'; + * const result = await SkillActivate.execute({ ... }); + */ +export const SkillActivate = { + execute(params: CommandInput): Promise { + return Commands.execute('skill/activate', params as Partial); + }, + commandName: 'skill/activate' as const, +} as const; diff --git a/src/debug/jtag/commands/skill/activate/test/integration/SkillActivateIntegration.test.ts b/src/debug/jtag/commands/skill/activate/test/integration/SkillActivateIntegration.test.ts new file mode 100644 index 000000000..b01c688ef --- /dev/null +++ b/src/debug/jtag/commands/skill/activate/test/integration/SkillActivateIntegration.test.ts @@ -0,0 +1,196 @@ +#!/usr/bin/env tsx +/** + * SkillActivate Command Integration Tests + * + * Tests Skill Activate command against the LIVE RUNNING SYSTEM. + * This is NOT a mock test - it tests real commands, real events, real widgets. + * + * Generated by: ./jtag generate + * Run with: npx tsx commands/Skill Activate/test/integration/SkillActivateIntegration.test.ts + * + * PREREQUISITES: + * - Server must be running: npm start (wait 90+ seconds) + * - Browser client connected via http://localhost:9003 + */ + +import { jtag } from '@server/server-index'; + +console.log('πŸ§ͺ SkillActivate Command Integration Tests'); + +function assert(condition: boolean, message: string): void { + if (!condition) { + throw new Error(`❌ Assertion failed: ${message}`); + } + console.log(`βœ… ${message}`); +} + +/** + * Test 1: Connect to live system + */ +async function testSystemConnection(): Promise>> { + console.log('\nπŸ”Œ Test 1: Connecting to live JTAG system'); + + const client = await jtag.connect(); + + assert(client !== null, 'Connected to live system'); + console.log(' βœ… Connected successfully'); + + return client; +} + +/** + * Test 2: Execute Skill Activate command on live system + */ +async function testCommandExecution(client: Awaited>): Promise { + console.log('\n⚑ Test 2: Executing Skill Activate command'); + + // TODO: Replace with your actual command parameters + const result = await client.commands['Skill Activate']({ + // Add your required parameters here + // Example: name: 'test-value' + }); + + console.log(' πŸ“Š Result:', JSON.stringify(result, null, 2)); + + assert(result !== null, 'Skill Activate returned result'); + // TODO: Add assertions for your specific result fields + // assert(result.success === true, 'Skill Activate succeeded'); + // assert(result.yourField !== undefined, 'Result has yourField'); +} + +/** + * Test 3: Validate required parameters + */ +async function testRequiredParameters(_client: Awaited>): Promise { + console.log('\n🚨 Test 3: Testing required parameter validation'); + + // TODO: Uncomment and test missing required parameters + // try { + // await _client.commands['Skill Activate']({ + // // Missing required param + // }); + // assert(false, 'Should have thrown validation error'); + // } catch (error) { + // assert((error as Error).message.includes('required'), 'Error mentions required parameter'); + // console.log(' βœ… ValidationError thrown correctly'); + // } + + console.log(' ⚠️ TODO: Add required parameter validation test'); +} + +/** + * Test 4: Test optional parameters + */ +async function testOptionalParameters(_client: Awaited>): Promise { + console.log('\nπŸ”§ Test 4: Testing optional parameters'); + + // TODO: Uncomment to test with and without optional parameters + // const withOptional = await client.commands['Skill Activate']({ + // requiredParam: 'test', + // optionalParam: true + // }); + // + // const withoutOptional = await client.commands['Skill Activate']({ + // requiredParam: 'test' + // }); + // + // assert(withOptional.success === true, 'Works with optional params'); + // assert(withoutOptional.success === true, 'Works without optional params'); + + console.log(' ⚠️ TODO: Add optional parameter tests'); +} + +/** + * Test 5: Performance test + */ +async function testPerformance(_client: Awaited>): Promise { + console.log('\n⚑ Test 5: Performance under load'); + + // TODO: Uncomment to test command performance + // const iterations = 10; + // const times: number[] = []; + // + // for (let i = 0; i < iterations; i++) { + // const start = Date.now(); + // await _client.commands['Skill Activate']({ /* params */ }); + // times.push(Date.now() - start); + // } + // + // const avg = times.reduce((a, b) => a + b, 0) / iterations; + // const max = Math.max(...times); + // + // console.log(` Average: ${avg.toFixed(2)}ms`); + // console.log(` Max: ${max}ms`); + // + // assert(avg < 500, `Average ${avg.toFixed(2)}ms under 500ms`); + // assert(max < 1000, `Max ${max}ms under 1000ms`); + + console.log(' ⚠️ TODO: Add performance test'); +} + +/** + * Test 6: Widget/Event integration (if applicable) + */ +async function testWidgetIntegration(_client: Awaited>): Promise { + console.log('\n🎨 Test 6: Widget/Event integration'); + + // TODO: Uncomment if your command emits events or updates widgets + // Example: + // const before = await client.commands['debug/widget-state']({ widgetSelector: 'your-widget' }); + // await client.commands['Skill Activate']({ /* params */ }); + // await new Promise(resolve => setTimeout(resolve, 1000)); // Wait for event propagation + // const after = await client.commands['debug/widget-state']({ widgetSelector: 'your-widget' }); + // + // assert(after.state.someValue !== before.state.someValue, 'Widget state updated'); + + console.log(' ⚠️ TODO: Add widget/event integration test (if applicable)'); +} + +/** + * Run all integration tests + */ +async function runAllSkillActivateIntegrationTests(): Promise { + console.log('πŸš€ Starting SkillActivate Integration Tests\n'); + console.log('πŸ“‹ Testing against LIVE system (not mocks)\n'); + + try { + const client = await testSystemConnection(); + await testCommandExecution(client); + await testRequiredParameters(client); + await testOptionalParameters(client); + await testPerformance(client); + await testWidgetIntegration(client); + + console.log('\nπŸŽ‰ ALL SkillActivate INTEGRATION TESTS PASSED!'); + console.log('πŸ“‹ Validated:'); + console.log(' βœ… Live system connection'); + console.log(' βœ… Command execution on real system'); + console.log(' βœ… Parameter validation'); + console.log(' βœ… Optional parameter handling'); + console.log(' βœ… Performance benchmarks'); + console.log(' βœ… Widget/Event integration'); + console.log('\nπŸ’‘ NOTE: This test uses the REAL running system'); + console.log(' - Real database operations'); + console.log(' - Real event propagation'); + console.log(' - Real widget updates'); + console.log(' - Real cross-daemon communication'); + + } catch (error) { + console.error('\n❌ SkillActivate integration tests failed:', (error as Error).message); + if ((error as Error).stack) { + console.error((error as Error).stack); + } + console.error('\nπŸ’‘ Make sure:'); + console.error(' 1. Server is running: npm start'); + console.error(' 2. Wait 90+ seconds for deployment'); + console.error(' 3. Browser is connected to http://localhost:9003'); + process.exit(1); + } +} + +// Run if called directly +if (require.main === module) { + void runAllSkillActivateIntegrationTests(); +} else { + module.exports = { runAllSkillActivateIntegrationTests }; +} diff --git a/src/debug/jtag/commands/skill/activate/test/unit/SkillActivateCommand.test.ts b/src/debug/jtag/commands/skill/activate/test/unit/SkillActivateCommand.test.ts new file mode 100644 index 000000000..9b79f3c1c --- /dev/null +++ b/src/debug/jtag/commands/skill/activate/test/unit/SkillActivateCommand.test.ts @@ -0,0 +1,259 @@ +#!/usr/bin/env tsx +/** + * SkillActivate Command Unit Tests + * + * Tests Skill Activate command logic in isolation using mock dependencies. + * This is a REFERENCE EXAMPLE showing best practices for command testing. + * + * Generated by: ./jtag generate + * Run with: npx tsx commands/Skill Activate/test/unit/SkillActivateCommand.test.ts + * + * NOTE: This is a self-contained test (no external test utilities needed). + * Use this as a template for your own command tests. + */ + +// import { ValidationError } from '@system/core/types/ErrorTypes'; // Uncomment when adding validation tests +import { generateUUID } from '@system/core/types/CrossPlatformUUID'; +import type { SkillActivateParams, SkillActivateResult } from '../../shared/SkillActivateTypes'; + +console.log('πŸ§ͺ SkillActivate Command Unit Tests'); + +function assert(condition: boolean, message: string): void { + if (!condition) { + throw new Error(`❌ Assertion failed: ${message}`); + } + console.log(`βœ… ${message}`); +} + +/** + * Mock command that implements Skill Activate logic for testing + */ +async function mockSkillActivateCommand(params: SkillActivateParams): Promise { + // TODO: Validate required parameters (BEST PRACTICE) + // Example: + // if (!params.requiredParam || params.requiredParam.trim() === '') { + // throw new ValidationError( + // 'requiredParam', + // `Missing required parameter 'requiredParam'. ` + + // `Use the help tool with 'Skill Activate' or see the Skill Activate README for usage information.` + // ); + // } + + // TODO: Handle optional parameters with sensible defaults + // const optionalParam = params.optionalParam ?? defaultValue; + + // TODO: Implement your command logic here + return { + success: true, + // TODO: Add your result fields with actual computed values + context: params.context, + sessionId: params.sessionId + } as SkillActivateResult; +} + +/** + * Test 1: Command structure validation + */ +function testSkillActivateCommandStructure(): void { + console.log('\nπŸ“‹ Test 1: SkillActivate command structure validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Create valid params for Skill Activate command + const validParams: SkillActivateParams = { + // TODO: Add your required parameters here + context, + sessionId + }; + + // Validate param structure + assert(validParams.context !== undefined, 'Params have context'); + assert(validParams.sessionId !== undefined, 'Params have sessionId'); + // TODO: Add assertions for your specific parameters + // assert(typeof validParams.requiredParam === 'string', 'requiredParam is string'); +} + +/** + * Test 2: Mock command execution + */ +async function testMockSkillActivateExecution(): Promise { + console.log('\n⚑ Test 2: Mock Skill Activate command execution'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Test mock execution + const params: SkillActivateParams = { + // TODO: Add your parameters here + context, + sessionId + }; + + const result = await mockSkillActivateCommand(params); + + // Validate result structure + assert(result.success === true, 'Mock result shows success'); + // TODO: Add assertions for your result fields + // assert(typeof result.yourField === 'string', 'yourField is string'); +} + +/** + * Test 3: Required parameter validation (CRITICAL) + * + * This test ensures your command throws ValidationError + * when required parameters are missing (BEST PRACTICE) + */ +async function testSkillActivateRequiredParams(): Promise { + console.log('\n🚨 Test 3: Required parameter validation'); + + // TODO: Uncomment when implementing validation + // const context = { environment: 'server' as const }; + // const sessionId = generateUUID(); + + // TODO: Test cases that should throw ValidationError + // Example: + // const testCases = [ + // { params: {} as SkillActivateParams, desc: 'Missing requiredParam' }, + // { params: { requiredParam: '' } as SkillActivateParams, desc: 'Empty requiredParam' }, + // ]; + // + // for (const testCase of testCases) { + // try { + // await mockSkillActivateCommand({ ...testCase.params, context, sessionId }); + // throw new Error(`Should have thrown ValidationError for: ${testCase.desc}`); + // } catch (error) { + // if (error instanceof ValidationError) { + // assert(error.field === 'requiredParam', `ValidationError field is 'requiredParam' for: ${testCase.desc}`); + // assert(error.message.includes('required parameter'), `Error message mentions 'required parameter' for: ${testCase.desc}`); + // assert(error.message.includes('help tool'), `Error message is tool-agnostic for: ${testCase.desc}`); + // } else { + // throw error; // Re-throw if not ValidationError + // } + // } + // } + + console.log('βœ… All required parameter validations work correctly'); +} + +/** + * Test 4: Optional parameter handling + */ +async function testSkillActivateOptionalParams(): Promise { + console.log('\nπŸ”§ Test 4: Optional parameter handling'); + + // TODO: Uncomment when implementing optional param tests + // const context = { environment: 'server' as const }; + // const sessionId = generateUUID(); + + // TODO: Test WITHOUT optional param (should use default) + // const paramsWithoutOptional: SkillActivateParams = { + // requiredParam: 'test', + // context, + // sessionId + // }; + // + // const resultWithoutOptional = await mockSkillActivateCommand(paramsWithoutOptional); + // assert(resultWithoutOptional.success === true, 'Command succeeds without optional params'); + + // TODO: Test WITH optional param + // const paramsWithOptional: SkillActivateParams = { + // requiredParam: 'test', + // optionalParam: true, + // context, + // sessionId + // }; + // + // const resultWithOptional = await mockSkillActivateCommand(paramsWithOptional); + // assert(resultWithOptional.success === true, 'Command succeeds with optional params'); + + console.log('βœ… Optional parameter handling validated'); +} + +/** + * Test 5: Performance validation + */ +async function testSkillActivatePerformance(): Promise { + console.log('\n⚑ Test 5: SkillActivate performance validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + const startTime = Date.now(); + + await mockSkillActivateCommand({ + // TODO: Add your parameters + context, + sessionId + } as SkillActivateParams); + + const executionTime = Date.now() - startTime; + + assert(executionTime < 100, `SkillActivate completed in ${executionTime}ms (under 100ms limit)`); +} + +/** + * Test 6: Result structure validation + */ +async function testSkillActivateResultStructure(): Promise { + console.log('\nπŸ” Test 6: SkillActivate result structure validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Test various scenarios + const basicResult = await mockSkillActivateCommand({ + // TODO: Add your parameters + context, + sessionId + } as SkillActivateParams); + + assert(basicResult.success === true, 'Result has success field'); + // TODO: Add assertions for your result fields + // assert(typeof basicResult.yourField === 'string', 'Result has yourField (string)'); + assert(basicResult.context === context, 'Result includes context'); + assert(basicResult.sessionId === sessionId, 'Result includes sessionId'); + + console.log('βœ… All result structure validations pass'); +} + +/** + * Run all unit tests + */ +async function runAllSkillActivateUnitTests(): Promise { + console.log('πŸš€ Starting SkillActivate Command Unit Tests\n'); + + try { + testSkillActivateCommandStructure(); + await testMockSkillActivateExecution(); + await testSkillActivateRequiredParams(); + await testSkillActivateOptionalParams(); + await testSkillActivatePerformance(); + await testSkillActivateResultStructure(); + + console.log('\nπŸŽ‰ ALL SkillActivate UNIT TESTS PASSED!'); + console.log('πŸ“‹ Validated:'); + console.log(' βœ… Command structure and parameter validation'); + console.log(' βœ… Mock command execution patterns'); + console.log(' βœ… Required parameter validation (throws ValidationError)'); + console.log(' βœ… Optional parameter handling (sensible defaults)'); + console.log(' βœ… Performance requirements (< 100ms)'); + console.log(' βœ… Result structure validation'); + console.log('\nπŸ“ This is a REFERENCE EXAMPLE - use as a template for your commands!'); + console.log('πŸ’‘ TIP: Copy this test structure and modify for your command logic'); + + } catch (error) { + console.error('\n❌ SkillActivate unit tests failed:', (error as Error).message); + if ((error as Error).stack) { + console.error((error as Error).stack); + } + process.exit(1); + } +} + +// Run if called directly +if (require.main === module) { + void runAllSkillActivateUnitTests(); +} else { + module.exports = { runAllSkillActivateUnitTests }; +} diff --git a/src/debug/jtag/commands/skill/generate/.npmignore b/src/debug/jtag/commands/skill/generate/.npmignore new file mode 100644 index 000000000..f74ad6b8a --- /dev/null +++ b/src/debug/jtag/commands/skill/generate/.npmignore @@ -0,0 +1,20 @@ +# Development files +.eslintrc* +tsconfig*.json +vitest.config.ts + +# Build artifacts +*.js.map +*.d.ts.map + +# IDE +.vscode/ +.idea/ + +# Logs +*.log +npm-debug.log* + +# OS files +.DS_Store +Thumbs.db diff --git a/src/debug/jtag/commands/skill/generate/README.md b/src/debug/jtag/commands/skill/generate/README.md new file mode 100644 index 000000000..c1c120753 --- /dev/null +++ b/src/debug/jtag/commands/skill/generate/README.md @@ -0,0 +1,159 @@ +# Skill Generate Command + +Generate code files for a proposed skill using the CommandGenerator. Retrieves the SkillEntity and produces source files. + +## Table of Contents + +- [Usage](#usage) + - [CLI Usage](#cli-usage) + - [Tool Usage](#tool-usage) +- [Parameters](#parameters) +- [Result](#result) +- [Examples](#examples) +- [Testing](#testing) + - [Unit Tests](#unit-tests) + - [Integration Tests](#integration-tests) +- [Getting Help](#getting-help) +- [Access Level](#access-level) +- [Implementation Notes](#implementation-notes) + +## Usage + +### CLI Usage + +From the command line using the jtag CLI: + +```bash +./jtag skill/generate --skillId= +``` + +### Tool Usage + +From Persona tools or programmatic access using `Commands.execute()`: + +```typescript +import { Commands } from '@system/core/shared/Commands'; + +const result = await Commands.execute('skill/generate', { + // your parameters here +}); +``` + +## Parameters + +- **skillId** (required): `string` - ID of the SkillEntity to generate code for +- **outputDir** (optional): `string` - Override output directory (default: persona workspace or commands/) + +## Result + +Returns `SkillGenerateResult` with: + +Returns CommandResult with: +- **skillId**: `string` - ID of the SkillEntity +- **name**: `string` - Skill command name +- **status**: `string` - Lifecycle status after generation +- **outputDir**: `string` - Directory where files were generated +- **generatedFiles**: `object` - Array of generated file paths +- **message**: `string` - Human-readable result message + +## Examples + +### Generate files for a proposed skill + +```bash +./jtag skill/generate --skillId="uuid-of-skill" +``` + +**Expected result:** +{ skillId: "uuid", name: "analysis/complexity", status: "generated", generatedFiles: [...] } + +## Getting Help + +### Using the Help Tool + +Get detailed usage information for this command: + +**CLI:** +```bash +./jtag help skill/generate +``` + +**Tool:** +```typescript +// Use your help tool with command name 'skill/generate' +``` + +### Using the README Tool + +Access this README programmatically: + +**CLI:** +```bash +./jtag readme skill/generate +``` + +**Tool:** +```typescript +// Use your readme tool with command name 'skill/generate' +``` + +## Testing + +### Unit Tests + +Test command logic in isolation using mock dependencies: + +```bash +# Run unit tests (no server required) +npx tsx commands/Skill Generate/test/unit/SkillGenerateCommand.test.ts +``` + +**What's tested:** +- Command structure and parameter validation +- Mock command execution patterns +- Required parameter validation (throws ValidationError) +- Optional parameter handling (sensible defaults) +- Performance requirements +- Assertion utility helpers + +**TDD Workflow:** +1. Write/modify unit test first (test-driven development) +2. Run test, see it fail +3. Implement feature +4. Run test, see it pass +5. Refactor if needed + +### Integration Tests + +Test command with real client connections and system integration: + +```bash +# Prerequisites: Server must be running +npm start # Wait 90+ seconds for deployment + +# Run integration tests +npx tsx commands/Skill Generate/test/integration/SkillGenerateIntegration.test.ts +``` + +**What's tested:** +- Client connection to live system +- Real command execution via WebSocket +- ValidationError handling for missing params +- Optional parameter defaults +- Performance under load +- Various parameter combinations + +**Best Practice:** +Run unit tests frequently during development (fast feedback). Run integration tests before committing (verify system integration). + +## Access Level + +**ai-safe** - Safe for AI personas to call autonomously + +## Implementation Notes + +- **Shared Logic**: Core business logic in `shared/SkillGenerateTypes.ts` +- **Browser**: Browser-specific implementation in `browser/SkillGenerateBrowserCommand.ts` +- **Server**: Server-specific implementation in `server/SkillGenerateServerCommand.ts` +- **Unit Tests**: Isolated testing in `test/unit/SkillGenerateCommand.test.ts` +- **Integration Tests**: System testing in `test/integration/SkillGenerateIntegration.test.ts` diff --git a/src/debug/jtag/commands/skill/generate/browser/SkillGenerateBrowserCommand.ts b/src/debug/jtag/commands/skill/generate/browser/SkillGenerateBrowserCommand.ts new file mode 100644 index 000000000..c9130a26c --- /dev/null +++ b/src/debug/jtag/commands/skill/generate/browser/SkillGenerateBrowserCommand.ts @@ -0,0 +1,21 @@ +/** + * Skill Generate Command - Browser Implementation + * + * Generate code files for a proposed skill using the CommandGenerator. Retrieves the SkillEntity and produces source files. + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import type { SkillGenerateParams, SkillGenerateResult } from '../shared/SkillGenerateTypes'; + +export class SkillGenerateBrowserCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('skill/generate', context, subpath, commander); + } + + async execute(params: SkillGenerateParams): Promise { + console.log('🌐 BROWSER: Delegating Skill Generate to server'); + return await this.remoteExecute(params); + } +} diff --git a/src/debug/jtag/commands/skill/generate/package.json b/src/debug/jtag/commands/skill/generate/package.json new file mode 100644 index 000000000..c7fa6d6e2 --- /dev/null +++ b/src/debug/jtag/commands/skill/generate/package.json @@ -0,0 +1,35 @@ +{ + "name": "@jtag-commands/skill/generate", + "version": "1.0.0", + "description": "Generate code files for a proposed skill using the CommandGenerator. Retrieves the SkillEntity and produces source files.", + "main": "server/SkillGenerateServerCommand.ts", + "types": "shared/SkillGenerateTypes.ts", + "scripts": { + "test": "npm run test:unit && npm run test:integration", + "test:unit": "npx vitest run test/unit/*.test.ts", + "test:integration": "npx tsx test/integration/SkillGenerateIntegration.test.ts", + "lint": "npx eslint **/*.ts", + "typecheck": "npx tsc --noEmit" + }, + "peerDependencies": { + "@jtag/core": "*" + }, + "files": [ + "shared/**/*.ts", + "browser/**/*.ts", + "server/**/*.ts", + "test/**/*.ts", + "README.md" + ], + "keywords": [ + "jtag", + "command", + "skill/generate" + ], + "license": "MIT", + "author": "", + "repository": { + "type": "git", + "url": "" + } +} diff --git a/src/debug/jtag/commands/skill/generate/server/SkillGenerateServerCommand.ts b/src/debug/jtag/commands/skill/generate/server/SkillGenerateServerCommand.ts new file mode 100644 index 000000000..c6b3904a6 --- /dev/null +++ b/src/debug/jtag/commands/skill/generate/server/SkillGenerateServerCommand.ts @@ -0,0 +1,127 @@ +/** + * Skill Generate Command - Server Implementation + * + * Retrieves a SkillEntity and runs CommandGenerator programmatically + * to produce the command source files. + */ + +import * as path from 'path'; +import * as fs from 'fs'; +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import { ValidationError } from '@system/core/types/ErrorTypes'; +import type { SkillGenerateParams, SkillGenerateResult } from '../shared/SkillGenerateTypes'; +import { createSkillGenerateResultFromParams } from '../shared/SkillGenerateTypes'; +import { SkillEntity } from '@system/data/entities/SkillEntity'; +import { DataDaemon } from '@daemons/data-daemon/shared/DataDaemon'; +import { COLLECTIONS } from '@system/shared/Constants'; +import { CommandGenerator } from '@generator/CommandGenerator'; +import type { CommandSpec } from '@generator/CommandNaming'; +import type { UUID } from '@system/core/types/CrossPlatformUUID'; + +export class SkillGenerateServerCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('skill/generate', context, subpath, commander); + } + + async execute(params: SkillGenerateParams): Promise { + const { skillId } = params; + + if (!skillId?.trim()) { + throw new ValidationError('skillId', "Missing required parameter 'skillId'."); + } + + // Load skill entity + const readResult = await DataDaemon.read(COLLECTIONS.SKILLS, skillId as UUID); + if (!readResult.success || !readResult.data) { + throw new ValidationError('skillId', `Skill not found: ${skillId}`); + } + const skill = readResult.data.data as SkillEntity; + + // Verify lifecycle state: personal skills can skip approval, team skills need 'approved' + const canGenerate = + (skill.status === 'proposed' && skill.scope === 'personal') || + skill.status === 'approved'; + + if (!canGenerate) { + throw new ValidationError('skillId', + `Skill '${skill.name}' cannot be generated in status '${skill.status}' (scope: ${skill.scope}). ` + + (skill.scope === 'team' ? 'Team skills must be approved first.' : 'Expected status: proposed.')); + } + + // Build CommandSpec from SkillSpec + const commandSpec: CommandSpec = { + name: skill.spec.name, + description: skill.spec.description, + params: skill.spec.params.map(p => ({ + name: p.name, + type: p.type, + optional: p.optional, + description: p.description, + })), + results: skill.spec.results.map(r => ({ + name: r.name, + type: r.type, + description: r.description, + })), + examples: skill.spec.examples?.map(e => ({ + description: e.description, + command: e.command, + expectedResult: e.expectedResult, + })), + accessLevel: skill.spec.accessLevel ?? 'ai-safe', + }; + + // Determine output directory + const rootPath = path.resolve(__dirname, '../../../../'); + const outputDir = params.outputDir + ?? (skill.scope === 'team' + ? path.join(rootPath, 'commands', skill.spec.name) + : path.join(rootPath, '.continuum', 'skills', skill.createdById, skill.spec.name)); + + // Run CommandGenerator + const generator = new CommandGenerator(rootPath); + generator.generate(commandSpec, outputDir, { force: true }); + + // Collect generated files + const generatedFiles = this.collectFiles(outputDir); + + // Update entity + await DataDaemon.update( + COLLECTIONS.SKILLS, + skill.id as UUID, + { + status: 'generated', + outputDir, + generatedFiles, + } as Partial, + ); + + return createSkillGenerateResultFromParams(params, { + success: true, + skillId: skill.id, + name: skill.name, + status: 'generated', + outputDir, + generatedFiles, + message: `Generated ${generatedFiles.length} files for skill '${skill.name}' in ${outputDir}`, + }); + } + + private collectFiles(dir: string): string[] { + const files: string[] = []; + if (!fs.existsSync(dir)) return files; + + const entries = fs.readdirSync(dir, { withFileTypes: true }); + for (const entry of entries) { + const full = path.join(dir, entry.name); + if (entry.isDirectory()) { + files.push(...this.collectFiles(full)); + } else { + files.push(full); + } + } + return files; + } +} diff --git a/src/debug/jtag/commands/skill/generate/shared/SkillGenerateTypes.ts b/src/debug/jtag/commands/skill/generate/shared/SkillGenerateTypes.ts new file mode 100644 index 000000000..e6361dad4 --- /dev/null +++ b/src/debug/jtag/commands/skill/generate/shared/SkillGenerateTypes.ts @@ -0,0 +1,114 @@ +/** + * Skill Generate Command - Shared Types + * + * Generate code files for a proposed skill using the CommandGenerator. Retrieves the SkillEntity and produces source files. + */ + +import type { CommandParams, CommandResult, CommandInput, JTAGContext } from '@system/core/types/JTAGTypes'; +import { createPayload, transformPayload } from '@system/core/types/JTAGTypes'; +import { Commands } from '@system/core/shared/Commands'; +import type { JTAGError } from '@system/core/types/ErrorTypes'; +import type { UUID } from '@system/core/types/CrossPlatformUUID'; + +/** + * Skill Generate Command Parameters + */ +export interface SkillGenerateParams extends CommandParams { + // ID of the SkillEntity to generate code for + skillId: string; + // Override output directory (default: persona workspace or commands/) + outputDir?: string; +} + +/** + * Factory function for creating SkillGenerateParams + */ +export const createSkillGenerateParams = ( + context: JTAGContext, + sessionId: UUID, + data: { + // ID of the SkillEntity to generate code for + skillId: string; + // Override output directory (default: persona workspace or commands/) + outputDir?: string; + } +): SkillGenerateParams => createPayload(context, sessionId, { + outputDir: data.outputDir ?? '', + ...data +}); + +/** + * Skill Generate Command Result + */ +export interface SkillGenerateResult extends CommandResult { + success: boolean; + // ID of the SkillEntity + skillId: string; + // Skill command name + name: string; + // Lifecycle status after generation + status: string; + // Directory where files were generated + outputDir: string; + // Array of generated file paths + generatedFiles: string[]; + // Human-readable result message + message: string; + error?: JTAGError; +} + +/** + * Factory function for creating SkillGenerateResult with defaults + */ +export const createSkillGenerateResult = ( + context: JTAGContext, + sessionId: UUID, + data: { + success: boolean; + // ID of the SkillEntity + skillId?: string; + // Skill command name + name?: string; + // Lifecycle status after generation + status?: string; + // Directory where files were generated + outputDir?: string; + // Array of generated file paths + generatedFiles?: string[]; + // Human-readable result message + message?: string; + error?: JTAGError; + } +): SkillGenerateResult => createPayload(context, sessionId, { + skillId: data.skillId ?? '', + name: data.name ?? '', + status: data.status ?? '', + outputDir: data.outputDir ?? '', + generatedFiles: data.generatedFiles ?? [], + message: data.message ?? '', + ...data +}); + +/** + * Smart Skill Generate-specific inheritance from params + * Auto-inherits context and sessionId from params + * Must provide all required result fields + */ +export const createSkillGenerateResultFromParams = ( + params: SkillGenerateParams, + differences: Omit +): SkillGenerateResult => transformPayload(params, differences); + +/** + * Skill Generate β€” Type-safe command executor + * + * Usage: + * import { SkillGenerate } from '...shared/SkillGenerateTypes'; + * const result = await SkillGenerate.execute({ ... }); + */ +export const SkillGenerate = { + execute(params: CommandInput): Promise { + return Commands.execute('skill/generate', params as Partial); + }, + commandName: 'skill/generate' as const, +} as const; diff --git a/src/debug/jtag/commands/skill/generate/test/integration/SkillGenerateIntegration.test.ts b/src/debug/jtag/commands/skill/generate/test/integration/SkillGenerateIntegration.test.ts new file mode 100644 index 000000000..dfe47514e --- /dev/null +++ b/src/debug/jtag/commands/skill/generate/test/integration/SkillGenerateIntegration.test.ts @@ -0,0 +1,196 @@ +#!/usr/bin/env tsx +/** + * SkillGenerate Command Integration Tests + * + * Tests Skill Generate command against the LIVE RUNNING SYSTEM. + * This is NOT a mock test - it tests real commands, real events, real widgets. + * + * Generated by: ./jtag generate + * Run with: npx tsx commands/Skill Generate/test/integration/SkillGenerateIntegration.test.ts + * + * PREREQUISITES: + * - Server must be running: npm start (wait 90+ seconds) + * - Browser client connected via http://localhost:9003 + */ + +import { jtag } from '@server/server-index'; + +console.log('πŸ§ͺ SkillGenerate Command Integration Tests'); + +function assert(condition: boolean, message: string): void { + if (!condition) { + throw new Error(`❌ Assertion failed: ${message}`); + } + console.log(`βœ… ${message}`); +} + +/** + * Test 1: Connect to live system + */ +async function testSystemConnection(): Promise>> { + console.log('\nπŸ”Œ Test 1: Connecting to live JTAG system'); + + const client = await jtag.connect(); + + assert(client !== null, 'Connected to live system'); + console.log(' βœ… Connected successfully'); + + return client; +} + +/** + * Test 2: Execute Skill Generate command on live system + */ +async function testCommandExecution(client: Awaited>): Promise { + console.log('\n⚑ Test 2: Executing Skill Generate command'); + + // TODO: Replace with your actual command parameters + const result = await client.commands['Skill Generate']({ + // Add your required parameters here + // Example: name: 'test-value' + }); + + console.log(' πŸ“Š Result:', JSON.stringify(result, null, 2)); + + assert(result !== null, 'Skill Generate returned result'); + // TODO: Add assertions for your specific result fields + // assert(result.success === true, 'Skill Generate succeeded'); + // assert(result.yourField !== undefined, 'Result has yourField'); +} + +/** + * Test 3: Validate required parameters + */ +async function testRequiredParameters(_client: Awaited>): Promise { + console.log('\n🚨 Test 3: Testing required parameter validation'); + + // TODO: Uncomment and test missing required parameters + // try { + // await _client.commands['Skill Generate']({ + // // Missing required param + // }); + // assert(false, 'Should have thrown validation error'); + // } catch (error) { + // assert((error as Error).message.includes('required'), 'Error mentions required parameter'); + // console.log(' βœ… ValidationError thrown correctly'); + // } + + console.log(' ⚠️ TODO: Add required parameter validation test'); +} + +/** + * Test 4: Test optional parameters + */ +async function testOptionalParameters(_client: Awaited>): Promise { + console.log('\nπŸ”§ Test 4: Testing optional parameters'); + + // TODO: Uncomment to test with and without optional parameters + // const withOptional = await client.commands['Skill Generate']({ + // requiredParam: 'test', + // optionalParam: true + // }); + // + // const withoutOptional = await client.commands['Skill Generate']({ + // requiredParam: 'test' + // }); + // + // assert(withOptional.success === true, 'Works with optional params'); + // assert(withoutOptional.success === true, 'Works without optional params'); + + console.log(' ⚠️ TODO: Add optional parameter tests'); +} + +/** + * Test 5: Performance test + */ +async function testPerformance(_client: Awaited>): Promise { + console.log('\n⚑ Test 5: Performance under load'); + + // TODO: Uncomment to test command performance + // const iterations = 10; + // const times: number[] = []; + // + // for (let i = 0; i < iterations; i++) { + // const start = Date.now(); + // await _client.commands['Skill Generate']({ /* params */ }); + // times.push(Date.now() - start); + // } + // + // const avg = times.reduce((a, b) => a + b, 0) / iterations; + // const max = Math.max(...times); + // + // console.log(` Average: ${avg.toFixed(2)}ms`); + // console.log(` Max: ${max}ms`); + // + // assert(avg < 500, `Average ${avg.toFixed(2)}ms under 500ms`); + // assert(max < 1000, `Max ${max}ms under 1000ms`); + + console.log(' ⚠️ TODO: Add performance test'); +} + +/** + * Test 6: Widget/Event integration (if applicable) + */ +async function testWidgetIntegration(_client: Awaited>): Promise { + console.log('\n🎨 Test 6: Widget/Event integration'); + + // TODO: Uncomment if your command emits events or updates widgets + // Example: + // const before = await client.commands['debug/widget-state']({ widgetSelector: 'your-widget' }); + // await client.commands['Skill Generate']({ /* params */ }); + // await new Promise(resolve => setTimeout(resolve, 1000)); // Wait for event propagation + // const after = await client.commands['debug/widget-state']({ widgetSelector: 'your-widget' }); + // + // assert(after.state.someValue !== before.state.someValue, 'Widget state updated'); + + console.log(' ⚠️ TODO: Add widget/event integration test (if applicable)'); +} + +/** + * Run all integration tests + */ +async function runAllSkillGenerateIntegrationTests(): Promise { + console.log('πŸš€ Starting SkillGenerate Integration Tests\n'); + console.log('πŸ“‹ Testing against LIVE system (not mocks)\n'); + + try { + const client = await testSystemConnection(); + await testCommandExecution(client); + await testRequiredParameters(client); + await testOptionalParameters(client); + await testPerformance(client); + await testWidgetIntegration(client); + + console.log('\nπŸŽ‰ ALL SkillGenerate INTEGRATION TESTS PASSED!'); + console.log('πŸ“‹ Validated:'); + console.log(' βœ… Live system connection'); + console.log(' βœ… Command execution on real system'); + console.log(' βœ… Parameter validation'); + console.log(' βœ… Optional parameter handling'); + console.log(' βœ… Performance benchmarks'); + console.log(' βœ… Widget/Event integration'); + console.log('\nπŸ’‘ NOTE: This test uses the REAL running system'); + console.log(' - Real database operations'); + console.log(' - Real event propagation'); + console.log(' - Real widget updates'); + console.log(' - Real cross-daemon communication'); + + } catch (error) { + console.error('\n❌ SkillGenerate integration tests failed:', (error as Error).message); + if ((error as Error).stack) { + console.error((error as Error).stack); + } + console.error('\nπŸ’‘ Make sure:'); + console.error(' 1. Server is running: npm start'); + console.error(' 2. Wait 90+ seconds for deployment'); + console.error(' 3. Browser is connected to http://localhost:9003'); + process.exit(1); + } +} + +// Run if called directly +if (require.main === module) { + void runAllSkillGenerateIntegrationTests(); +} else { + module.exports = { runAllSkillGenerateIntegrationTests }; +} diff --git a/src/debug/jtag/commands/skill/generate/test/unit/SkillGenerateCommand.test.ts b/src/debug/jtag/commands/skill/generate/test/unit/SkillGenerateCommand.test.ts new file mode 100644 index 000000000..6aa6787c0 --- /dev/null +++ b/src/debug/jtag/commands/skill/generate/test/unit/SkillGenerateCommand.test.ts @@ -0,0 +1,259 @@ +#!/usr/bin/env tsx +/** + * SkillGenerate Command Unit Tests + * + * Tests Skill Generate command logic in isolation using mock dependencies. + * This is a REFERENCE EXAMPLE showing best practices for command testing. + * + * Generated by: ./jtag generate + * Run with: npx tsx commands/Skill Generate/test/unit/SkillGenerateCommand.test.ts + * + * NOTE: This is a self-contained test (no external test utilities needed). + * Use this as a template for your own command tests. + */ + +// import { ValidationError } from '@system/core/types/ErrorTypes'; // Uncomment when adding validation tests +import { generateUUID } from '@system/core/types/CrossPlatformUUID'; +import type { SkillGenerateParams, SkillGenerateResult } from '../../shared/SkillGenerateTypes'; + +console.log('πŸ§ͺ SkillGenerate Command Unit Tests'); + +function assert(condition: boolean, message: string): void { + if (!condition) { + throw new Error(`❌ Assertion failed: ${message}`); + } + console.log(`βœ… ${message}`); +} + +/** + * Mock command that implements Skill Generate logic for testing + */ +async function mockSkillGenerateCommand(params: SkillGenerateParams): Promise { + // TODO: Validate required parameters (BEST PRACTICE) + // Example: + // if (!params.requiredParam || params.requiredParam.trim() === '') { + // throw new ValidationError( + // 'requiredParam', + // `Missing required parameter 'requiredParam'. ` + + // `Use the help tool with 'Skill Generate' or see the Skill Generate README for usage information.` + // ); + // } + + // TODO: Handle optional parameters with sensible defaults + // const optionalParam = params.optionalParam ?? defaultValue; + + // TODO: Implement your command logic here + return { + success: true, + // TODO: Add your result fields with actual computed values + context: params.context, + sessionId: params.sessionId + } as SkillGenerateResult; +} + +/** + * Test 1: Command structure validation + */ +function testSkillGenerateCommandStructure(): void { + console.log('\nπŸ“‹ Test 1: SkillGenerate command structure validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Create valid params for Skill Generate command + const validParams: SkillGenerateParams = { + // TODO: Add your required parameters here + context, + sessionId + }; + + // Validate param structure + assert(validParams.context !== undefined, 'Params have context'); + assert(validParams.sessionId !== undefined, 'Params have sessionId'); + // TODO: Add assertions for your specific parameters + // assert(typeof validParams.requiredParam === 'string', 'requiredParam is string'); +} + +/** + * Test 2: Mock command execution + */ +async function testMockSkillGenerateExecution(): Promise { + console.log('\n⚑ Test 2: Mock Skill Generate command execution'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Test mock execution + const params: SkillGenerateParams = { + // TODO: Add your parameters here + context, + sessionId + }; + + const result = await mockSkillGenerateCommand(params); + + // Validate result structure + assert(result.success === true, 'Mock result shows success'); + // TODO: Add assertions for your result fields + // assert(typeof result.yourField === 'string', 'yourField is string'); +} + +/** + * Test 3: Required parameter validation (CRITICAL) + * + * This test ensures your command throws ValidationError + * when required parameters are missing (BEST PRACTICE) + */ +async function testSkillGenerateRequiredParams(): Promise { + console.log('\n🚨 Test 3: Required parameter validation'); + + // TODO: Uncomment when implementing validation + // const context = { environment: 'server' as const }; + // const sessionId = generateUUID(); + + // TODO: Test cases that should throw ValidationError + // Example: + // const testCases = [ + // { params: {} as SkillGenerateParams, desc: 'Missing requiredParam' }, + // { params: { requiredParam: '' } as SkillGenerateParams, desc: 'Empty requiredParam' }, + // ]; + // + // for (const testCase of testCases) { + // try { + // await mockSkillGenerateCommand({ ...testCase.params, context, sessionId }); + // throw new Error(`Should have thrown ValidationError for: ${testCase.desc}`); + // } catch (error) { + // if (error instanceof ValidationError) { + // assert(error.field === 'requiredParam', `ValidationError field is 'requiredParam' for: ${testCase.desc}`); + // assert(error.message.includes('required parameter'), `Error message mentions 'required parameter' for: ${testCase.desc}`); + // assert(error.message.includes('help tool'), `Error message is tool-agnostic for: ${testCase.desc}`); + // } else { + // throw error; // Re-throw if not ValidationError + // } + // } + // } + + console.log('βœ… All required parameter validations work correctly'); +} + +/** + * Test 4: Optional parameter handling + */ +async function testSkillGenerateOptionalParams(): Promise { + console.log('\nπŸ”§ Test 4: Optional parameter handling'); + + // TODO: Uncomment when implementing optional param tests + // const context = { environment: 'server' as const }; + // const sessionId = generateUUID(); + + // TODO: Test WITHOUT optional param (should use default) + // const paramsWithoutOptional: SkillGenerateParams = { + // requiredParam: 'test', + // context, + // sessionId + // }; + // + // const resultWithoutOptional = await mockSkillGenerateCommand(paramsWithoutOptional); + // assert(resultWithoutOptional.success === true, 'Command succeeds without optional params'); + + // TODO: Test WITH optional param + // const paramsWithOptional: SkillGenerateParams = { + // requiredParam: 'test', + // optionalParam: true, + // context, + // sessionId + // }; + // + // const resultWithOptional = await mockSkillGenerateCommand(paramsWithOptional); + // assert(resultWithOptional.success === true, 'Command succeeds with optional params'); + + console.log('βœ… Optional parameter handling validated'); +} + +/** + * Test 5: Performance validation + */ +async function testSkillGeneratePerformance(): Promise { + console.log('\n⚑ Test 5: SkillGenerate performance validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + const startTime = Date.now(); + + await mockSkillGenerateCommand({ + // TODO: Add your parameters + context, + sessionId + } as SkillGenerateParams); + + const executionTime = Date.now() - startTime; + + assert(executionTime < 100, `SkillGenerate completed in ${executionTime}ms (under 100ms limit)`); +} + +/** + * Test 6: Result structure validation + */ +async function testSkillGenerateResultStructure(): Promise { + console.log('\nπŸ” Test 6: SkillGenerate result structure validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Test various scenarios + const basicResult = await mockSkillGenerateCommand({ + // TODO: Add your parameters + context, + sessionId + } as SkillGenerateParams); + + assert(basicResult.success === true, 'Result has success field'); + // TODO: Add assertions for your result fields + // assert(typeof basicResult.yourField === 'string', 'Result has yourField (string)'); + assert(basicResult.context === context, 'Result includes context'); + assert(basicResult.sessionId === sessionId, 'Result includes sessionId'); + + console.log('βœ… All result structure validations pass'); +} + +/** + * Run all unit tests + */ +async function runAllSkillGenerateUnitTests(): Promise { + console.log('πŸš€ Starting SkillGenerate Command Unit Tests\n'); + + try { + testSkillGenerateCommandStructure(); + await testMockSkillGenerateExecution(); + await testSkillGenerateRequiredParams(); + await testSkillGenerateOptionalParams(); + await testSkillGeneratePerformance(); + await testSkillGenerateResultStructure(); + + console.log('\nπŸŽ‰ ALL SkillGenerate UNIT TESTS PASSED!'); + console.log('πŸ“‹ Validated:'); + console.log(' βœ… Command structure and parameter validation'); + console.log(' βœ… Mock command execution patterns'); + console.log(' βœ… Required parameter validation (throws ValidationError)'); + console.log(' βœ… Optional parameter handling (sensible defaults)'); + console.log(' βœ… Performance requirements (< 100ms)'); + console.log(' βœ… Result structure validation'); + console.log('\nπŸ“ This is a REFERENCE EXAMPLE - use as a template for your commands!'); + console.log('πŸ’‘ TIP: Copy this test structure and modify for your command logic'); + + } catch (error) { + console.error('\n❌ SkillGenerate unit tests failed:', (error as Error).message); + if ((error as Error).stack) { + console.error((error as Error).stack); + } + process.exit(1); + } +} + +// Run if called directly +if (require.main === module) { + void runAllSkillGenerateUnitTests(); +} else { + module.exports = { runAllSkillGenerateUnitTests }; +} diff --git a/src/debug/jtag/commands/skill/list/.npmignore b/src/debug/jtag/commands/skill/list/.npmignore new file mode 100644 index 000000000..f74ad6b8a --- /dev/null +++ b/src/debug/jtag/commands/skill/list/.npmignore @@ -0,0 +1,20 @@ +# Development files +.eslintrc* +tsconfig*.json +vitest.config.ts + +# Build artifacts +*.js.map +*.d.ts.map + +# IDE +.vscode/ +.idea/ + +# Logs +*.log +npm-debug.log* + +# OS files +.DS_Store +Thumbs.db diff --git a/src/debug/jtag/commands/skill/list/README.md b/src/debug/jtag/commands/skill/list/README.md new file mode 100644 index 000000000..a834ab17a --- /dev/null +++ b/src/debug/jtag/commands/skill/list/README.md @@ -0,0 +1,164 @@ +# Skill List Command + +List skills with optional filters by status, scope, and creator. Returns SkillEntity records from the database. + +## Table of Contents + +- [Usage](#usage) + - [CLI Usage](#cli-usage) + - [Tool Usage](#tool-usage) +- [Parameters](#parameters) +- [Result](#result) +- [Examples](#examples) +- [Testing](#testing) + - [Unit Tests](#unit-tests) + - [Integration Tests](#integration-tests) +- [Getting Help](#getting-help) +- [Access Level](#access-level) +- [Implementation Notes](#implementation-notes) + +## Usage + +### CLI Usage + +From the command line using the jtag CLI: + +```bash +./jtag skill/list [options] +``` + +### Tool Usage + +From Persona tools or programmatic access using `Commands.execute()`: + +```typescript +import { Commands } from '@system/core/shared/Commands'; + +const result = await Commands.execute('skill/list', { + // your parameters here +}); +``` + +## Parameters + +- **status** (optional): `string` - Filter by lifecycle status (proposed, approved, generated, validated, active, failed, deprecated) +- **scope** (optional): `string` - Filter by scope (personal, team) +- **createdById** (optional): `string` - Filter by creator persona ID +- **limit** (optional): `number` - Maximum results to return (default: 20) + +## Result + +Returns `SkillListResult` with: + +Returns CommandResult with: +- **skills**: `object` - Array of SkillEntity records matching the filter +- **total**: `number` - Total matching skills +- **message**: `string` - Human-readable result summary + +## Examples + +### List all active skills + +```bash +./jtag skill/list --status="active" +``` + +**Expected result:** +{ skills: [...], total: 5 } + +### List skills created by a specific persona + +```bash +./jtag skill/list --createdById="uuid-of-persona" --limit=10 +``` + +## Getting Help + +### Using the Help Tool + +Get detailed usage information for this command: + +**CLI:** +```bash +./jtag help skill/list +``` + +**Tool:** +```typescript +// Use your help tool with command name 'skill/list' +``` + +### Using the README Tool + +Access this README programmatically: + +**CLI:** +```bash +./jtag readme skill/list +``` + +**Tool:** +```typescript +// Use your readme tool with command name 'skill/list' +``` + +## Testing + +### Unit Tests + +Test command logic in isolation using mock dependencies: + +```bash +# Run unit tests (no server required) +npx tsx commands/Skill List/test/unit/SkillListCommand.test.ts +``` + +**What's tested:** +- Command structure and parameter validation +- Mock command execution patterns +- Required parameter validation (throws ValidationError) +- Optional parameter handling (sensible defaults) +- Performance requirements +- Assertion utility helpers + +**TDD Workflow:** +1. Write/modify unit test first (test-driven development) +2. Run test, see it fail +3. Implement feature +4. Run test, see it pass +5. Refactor if needed + +### Integration Tests + +Test command with real client connections and system integration: + +```bash +# Prerequisites: Server must be running +npm start # Wait 90+ seconds for deployment + +# Run integration tests +npx tsx commands/Skill List/test/integration/SkillListIntegration.test.ts +``` + +**What's tested:** +- Client connection to live system +- Real command execution via WebSocket +- ValidationError handling for missing params +- Optional parameter defaults +- Performance under load +- Various parameter combinations + +**Best Practice:** +Run unit tests frequently during development (fast feedback). Run integration tests before committing (verify system integration). + +## Access Level + +**ai-safe** - Safe for AI personas to call autonomously + +## Implementation Notes + +- **Shared Logic**: Core business logic in `shared/SkillListTypes.ts` +- **Browser**: Browser-specific implementation in `browser/SkillListBrowserCommand.ts` +- **Server**: Server-specific implementation in `server/SkillListServerCommand.ts` +- **Unit Tests**: Isolated testing in `test/unit/SkillListCommand.test.ts` +- **Integration Tests**: System testing in `test/integration/SkillListIntegration.test.ts` diff --git a/src/debug/jtag/commands/skill/list/browser/SkillListBrowserCommand.ts b/src/debug/jtag/commands/skill/list/browser/SkillListBrowserCommand.ts new file mode 100644 index 000000000..81abe5da1 --- /dev/null +++ b/src/debug/jtag/commands/skill/list/browser/SkillListBrowserCommand.ts @@ -0,0 +1,21 @@ +/** + * Skill List Command - Browser Implementation + * + * List skills with optional filters by status, scope, and creator. Returns SkillEntity records from the database. + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import type { SkillListParams, SkillListResult } from '../shared/SkillListTypes'; + +export class SkillListBrowserCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('skill/list', context, subpath, commander); + } + + async execute(params: SkillListParams): Promise { + console.log('🌐 BROWSER: Delegating Skill List to server'); + return await this.remoteExecute(params); + } +} diff --git a/src/debug/jtag/commands/skill/list/package.json b/src/debug/jtag/commands/skill/list/package.json new file mode 100644 index 000000000..8c434441e --- /dev/null +++ b/src/debug/jtag/commands/skill/list/package.json @@ -0,0 +1,35 @@ +{ + "name": "@jtag-commands/skill/list", + "version": "1.0.0", + "description": "List skills with optional filters by status, scope, and creator. Returns SkillEntity records from the database.", + "main": "server/SkillListServerCommand.ts", + "types": "shared/SkillListTypes.ts", + "scripts": { + "test": "npm run test:unit && npm run test:integration", + "test:unit": "npx vitest run test/unit/*.test.ts", + "test:integration": "npx tsx test/integration/SkillListIntegration.test.ts", + "lint": "npx eslint **/*.ts", + "typecheck": "npx tsc --noEmit" + }, + "peerDependencies": { + "@jtag/core": "*" + }, + "files": [ + "shared/**/*.ts", + "browser/**/*.ts", + "server/**/*.ts", + "test/**/*.ts", + "README.md" + ], + "keywords": [ + "jtag", + "command", + "skill/list" + ], + "license": "MIT", + "author": "", + "repository": { + "type": "git", + "url": "" + } +} diff --git a/src/debug/jtag/commands/skill/list/server/SkillListServerCommand.ts b/src/debug/jtag/commands/skill/list/server/SkillListServerCommand.ts new file mode 100644 index 000000000..bb437152e --- /dev/null +++ b/src/debug/jtag/commands/skill/list/server/SkillListServerCommand.ts @@ -0,0 +1,65 @@ +/** + * Skill List Command - Server Implementation + * + * Lists skills with optional filters by status, scope, and creator. + * Returns SkillEntity records from the database. + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import type { SkillListParams, SkillListResult } from '../shared/SkillListTypes'; +import { createSkillListResultFromParams } from '../shared/SkillListTypes'; +import { SkillEntity } from '@system/data/entities/SkillEntity'; +import { DataDaemon } from '@daemons/data-daemon/shared/DataDaemon'; +import type { UniversalFilter } from '@daemons/data-daemon/shared/DataStorageAdapter'; +import { COLLECTIONS } from '@system/shared/Constants'; + +export class SkillListServerCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('skill/list', context, subpath, commander); + } + + async execute(params: SkillListParams): Promise { + // Build filter from optional params + const filter: UniversalFilter = {}; + + if (params.status?.trim()) { + filter.status = params.status; + } + if (params.scope?.trim()) { + filter.scope = params.scope; + } + if (params.createdById?.trim()) { + filter.createdById = params.createdById; + } + + const limit = params.limit ?? 20; + + const queryResult = await DataDaemon.query({ + collection: COLLECTIONS.SKILLS, + filter, + sort: [{ field: 'createdAt', direction: 'desc' }], + limit, + }); + + const skills = queryResult.success && queryResult.data + ? queryResult.data.map(record => record.data) + : []; + const total = skills.length; + + // Build human-readable summary + const filterDesc = Object.entries(filter) + .map(([k, v]) => `${k}=${v}`) + .join(', '); + + return createSkillListResultFromParams(params, { + success: true, + skills, + total, + message: total > 0 + ? `Found ${total} skill${total !== 1 ? 's' : ''}${filterDesc ? ` (${filterDesc})` : ''}` + : `No skills found${filterDesc ? ` matching ${filterDesc}` : ''}`, + }); + } +} diff --git a/src/debug/jtag/commands/skill/list/shared/SkillListTypes.ts b/src/debug/jtag/commands/skill/list/shared/SkillListTypes.ts new file mode 100644 index 000000000..bff5df9d8 --- /dev/null +++ b/src/debug/jtag/commands/skill/list/shared/SkillListTypes.ts @@ -0,0 +1,110 @@ +/** + * Skill List Command - Shared Types + * + * List skills with optional filters by status, scope, and creator. Returns SkillEntity records from the database. + */ + +import type { CommandParams, CommandResult, CommandInput, JTAGContext } from '@system/core/types/JTAGTypes'; +import { createPayload, transformPayload } from '@system/core/types/JTAGTypes'; +import { Commands } from '@system/core/shared/Commands'; +import type { JTAGError } from '@system/core/types/ErrorTypes'; +import type { UUID } from '@system/core/types/CrossPlatformUUID'; + +/** + * Skill List Command Parameters + */ +export interface SkillListParams extends CommandParams { + // Filter by lifecycle status (proposed, approved, generated, validated, active, failed, deprecated) + status?: string; + // Filter by scope (personal, team) + scope?: string; + // Filter by creator persona ID + createdById?: string; + // Maximum results to return (default: 20) + limit?: number; +} + +/** + * Factory function for creating SkillListParams + */ +export const createSkillListParams = ( + context: JTAGContext, + sessionId: UUID, + data: { + // Filter by lifecycle status (proposed, approved, generated, validated, active, failed, deprecated) + status?: string; + // Filter by scope (personal, team) + scope?: string; + // Filter by creator persona ID + createdById?: string; + // Maximum results to return (default: 20) + limit?: number; + } +): SkillListParams => createPayload(context, sessionId, { + status: data.status ?? '', + scope: data.scope ?? '', + createdById: data.createdById ?? '', + limit: data.limit ?? 0, + ...data +}); + +/** + * Skill List Command Result + */ +export interface SkillListResult extends CommandResult { + success: boolean; + // Array of SkillEntity records matching the filter + skills: Record[]; + // Total matching skills + total: number; + // Human-readable result summary + message: string; + error?: JTAGError; +} + +/** + * Factory function for creating SkillListResult with defaults + */ +export const createSkillListResult = ( + context: JTAGContext, + sessionId: UUID, + data: { + success: boolean; + // Array of SkillEntity records matching the filter + skills?: Record[]; + // Total matching skills + total?: number; + // Human-readable result summary + message?: string; + error?: JTAGError; + } +): SkillListResult => createPayload(context, sessionId, { + skills: data.skills ?? [], + total: data.total ?? 0, + message: data.message ?? '', + ...data +}); + +/** + * Smart Skill List-specific inheritance from params + * Auto-inherits context and sessionId from params + * Must provide all required result fields + */ +export const createSkillListResultFromParams = ( + params: SkillListParams, + differences: Omit +): SkillListResult => transformPayload(params, differences); + +/** + * Skill List β€” Type-safe command executor + * + * Usage: + * import { SkillList } from '...shared/SkillListTypes'; + * const result = await SkillList.execute({ ... }); + */ +export const SkillList = { + execute(params: CommandInput): Promise { + return Commands.execute('skill/list', params as Partial); + }, + commandName: 'skill/list' as const, +} as const; diff --git a/src/debug/jtag/commands/skill/list/test/integration/SkillListIntegration.test.ts b/src/debug/jtag/commands/skill/list/test/integration/SkillListIntegration.test.ts new file mode 100644 index 000000000..76008513e --- /dev/null +++ b/src/debug/jtag/commands/skill/list/test/integration/SkillListIntegration.test.ts @@ -0,0 +1,196 @@ +#!/usr/bin/env tsx +/** + * SkillList Command Integration Tests + * + * Tests Skill List command against the LIVE RUNNING SYSTEM. + * This is NOT a mock test - it tests real commands, real events, real widgets. + * + * Generated by: ./jtag generate + * Run with: npx tsx commands/Skill List/test/integration/SkillListIntegration.test.ts + * + * PREREQUISITES: + * - Server must be running: npm start (wait 90+ seconds) + * - Browser client connected via http://localhost:9003 + */ + +import { jtag } from '@server/server-index'; + +console.log('πŸ§ͺ SkillList Command Integration Tests'); + +function assert(condition: boolean, message: string): void { + if (!condition) { + throw new Error(`❌ Assertion failed: ${message}`); + } + console.log(`βœ… ${message}`); +} + +/** + * Test 1: Connect to live system + */ +async function testSystemConnection(): Promise>> { + console.log('\nπŸ”Œ Test 1: Connecting to live JTAG system'); + + const client = await jtag.connect(); + + assert(client !== null, 'Connected to live system'); + console.log(' βœ… Connected successfully'); + + return client; +} + +/** + * Test 2: Execute Skill List command on live system + */ +async function testCommandExecution(client: Awaited>): Promise { + console.log('\n⚑ Test 2: Executing Skill List command'); + + // TODO: Replace with your actual command parameters + const result = await client.commands['Skill List']({ + // Add your required parameters here + // Example: name: 'test-value' + }); + + console.log(' πŸ“Š Result:', JSON.stringify(result, null, 2)); + + assert(result !== null, 'Skill List returned result'); + // TODO: Add assertions for your specific result fields + // assert(result.success === true, 'Skill List succeeded'); + // assert(result.yourField !== undefined, 'Result has yourField'); +} + +/** + * Test 3: Validate required parameters + */ +async function testRequiredParameters(_client: Awaited>): Promise { + console.log('\n🚨 Test 3: Testing required parameter validation'); + + // TODO: Uncomment and test missing required parameters + // try { + // await _client.commands['Skill List']({ + // // Missing required param + // }); + // assert(false, 'Should have thrown validation error'); + // } catch (error) { + // assert((error as Error).message.includes('required'), 'Error mentions required parameter'); + // console.log(' βœ… ValidationError thrown correctly'); + // } + + console.log(' ⚠️ TODO: Add required parameter validation test'); +} + +/** + * Test 4: Test optional parameters + */ +async function testOptionalParameters(_client: Awaited>): Promise { + console.log('\nπŸ”§ Test 4: Testing optional parameters'); + + // TODO: Uncomment to test with and without optional parameters + // const withOptional = await client.commands['Skill List']({ + // requiredParam: 'test', + // optionalParam: true + // }); + // + // const withoutOptional = await client.commands['Skill List']({ + // requiredParam: 'test' + // }); + // + // assert(withOptional.success === true, 'Works with optional params'); + // assert(withoutOptional.success === true, 'Works without optional params'); + + console.log(' ⚠️ TODO: Add optional parameter tests'); +} + +/** + * Test 5: Performance test + */ +async function testPerformance(_client: Awaited>): Promise { + console.log('\n⚑ Test 5: Performance under load'); + + // TODO: Uncomment to test command performance + // const iterations = 10; + // const times: number[] = []; + // + // for (let i = 0; i < iterations; i++) { + // const start = Date.now(); + // await _client.commands['Skill List']({ /* params */ }); + // times.push(Date.now() - start); + // } + // + // const avg = times.reduce((a, b) => a + b, 0) / iterations; + // const max = Math.max(...times); + // + // console.log(` Average: ${avg.toFixed(2)}ms`); + // console.log(` Max: ${max}ms`); + // + // assert(avg < 500, `Average ${avg.toFixed(2)}ms under 500ms`); + // assert(max < 1000, `Max ${max}ms under 1000ms`); + + console.log(' ⚠️ TODO: Add performance test'); +} + +/** + * Test 6: Widget/Event integration (if applicable) + */ +async function testWidgetIntegration(_client: Awaited>): Promise { + console.log('\n🎨 Test 6: Widget/Event integration'); + + // TODO: Uncomment if your command emits events or updates widgets + // Example: + // const before = await client.commands['debug/widget-state']({ widgetSelector: 'your-widget' }); + // await client.commands['Skill List']({ /* params */ }); + // await new Promise(resolve => setTimeout(resolve, 1000)); // Wait for event propagation + // const after = await client.commands['debug/widget-state']({ widgetSelector: 'your-widget' }); + // + // assert(after.state.someValue !== before.state.someValue, 'Widget state updated'); + + console.log(' ⚠️ TODO: Add widget/event integration test (if applicable)'); +} + +/** + * Run all integration tests + */ +async function runAllSkillListIntegrationTests(): Promise { + console.log('πŸš€ Starting SkillList Integration Tests\n'); + console.log('πŸ“‹ Testing against LIVE system (not mocks)\n'); + + try { + const client = await testSystemConnection(); + await testCommandExecution(client); + await testRequiredParameters(client); + await testOptionalParameters(client); + await testPerformance(client); + await testWidgetIntegration(client); + + console.log('\nπŸŽ‰ ALL SkillList INTEGRATION TESTS PASSED!'); + console.log('πŸ“‹ Validated:'); + console.log(' βœ… Live system connection'); + console.log(' βœ… Command execution on real system'); + console.log(' βœ… Parameter validation'); + console.log(' βœ… Optional parameter handling'); + console.log(' βœ… Performance benchmarks'); + console.log(' βœ… Widget/Event integration'); + console.log('\nπŸ’‘ NOTE: This test uses the REAL running system'); + console.log(' - Real database operations'); + console.log(' - Real event propagation'); + console.log(' - Real widget updates'); + console.log(' - Real cross-daemon communication'); + + } catch (error) { + console.error('\n❌ SkillList integration tests failed:', (error as Error).message); + if ((error as Error).stack) { + console.error((error as Error).stack); + } + console.error('\nπŸ’‘ Make sure:'); + console.error(' 1. Server is running: npm start'); + console.error(' 2. Wait 90+ seconds for deployment'); + console.error(' 3. Browser is connected to http://localhost:9003'); + process.exit(1); + } +} + +// Run if called directly +if (require.main === module) { + void runAllSkillListIntegrationTests(); +} else { + module.exports = { runAllSkillListIntegrationTests }; +} diff --git a/src/debug/jtag/commands/skill/list/test/unit/SkillListCommand.test.ts b/src/debug/jtag/commands/skill/list/test/unit/SkillListCommand.test.ts new file mode 100644 index 000000000..6c375ddcf --- /dev/null +++ b/src/debug/jtag/commands/skill/list/test/unit/SkillListCommand.test.ts @@ -0,0 +1,259 @@ +#!/usr/bin/env tsx +/** + * SkillList Command Unit Tests + * + * Tests Skill List command logic in isolation using mock dependencies. + * This is a REFERENCE EXAMPLE showing best practices for command testing. + * + * Generated by: ./jtag generate + * Run with: npx tsx commands/Skill List/test/unit/SkillListCommand.test.ts + * + * NOTE: This is a self-contained test (no external test utilities needed). + * Use this as a template for your own command tests. + */ + +// import { ValidationError } from '@system/core/types/ErrorTypes'; // Uncomment when adding validation tests +import { generateUUID } from '@system/core/types/CrossPlatformUUID'; +import type { SkillListParams, SkillListResult } from '../../shared/SkillListTypes'; + +console.log('πŸ§ͺ SkillList Command Unit Tests'); + +function assert(condition: boolean, message: string): void { + if (!condition) { + throw new Error(`❌ Assertion failed: ${message}`); + } + console.log(`βœ… ${message}`); +} + +/** + * Mock command that implements Skill List logic for testing + */ +async function mockSkillListCommand(params: SkillListParams): Promise { + // TODO: Validate required parameters (BEST PRACTICE) + // Example: + // if (!params.requiredParam || params.requiredParam.trim() === '') { + // throw new ValidationError( + // 'requiredParam', + // `Missing required parameter 'requiredParam'. ` + + // `Use the help tool with 'Skill List' or see the Skill List README for usage information.` + // ); + // } + + // TODO: Handle optional parameters with sensible defaults + // const optionalParam = params.optionalParam ?? defaultValue; + + // TODO: Implement your command logic here + return { + success: true, + // TODO: Add your result fields with actual computed values + context: params.context, + sessionId: params.sessionId + } as SkillListResult; +} + +/** + * Test 1: Command structure validation + */ +function testSkillListCommandStructure(): void { + console.log('\nπŸ“‹ Test 1: SkillList command structure validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Create valid params for Skill List command + const validParams: SkillListParams = { + // TODO: Add your required parameters here + context, + sessionId + }; + + // Validate param structure + assert(validParams.context !== undefined, 'Params have context'); + assert(validParams.sessionId !== undefined, 'Params have sessionId'); + // TODO: Add assertions for your specific parameters + // assert(typeof validParams.requiredParam === 'string', 'requiredParam is string'); +} + +/** + * Test 2: Mock command execution + */ +async function testMockSkillListExecution(): Promise { + console.log('\n⚑ Test 2: Mock Skill List command execution'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Test mock execution + const params: SkillListParams = { + // TODO: Add your parameters here + context, + sessionId + }; + + const result = await mockSkillListCommand(params); + + // Validate result structure + assert(result.success === true, 'Mock result shows success'); + // TODO: Add assertions for your result fields + // assert(typeof result.yourField === 'string', 'yourField is string'); +} + +/** + * Test 3: Required parameter validation (CRITICAL) + * + * This test ensures your command throws ValidationError + * when required parameters are missing (BEST PRACTICE) + */ +async function testSkillListRequiredParams(): Promise { + console.log('\n🚨 Test 3: Required parameter validation'); + + // TODO: Uncomment when implementing validation + // const context = { environment: 'server' as const }; + // const sessionId = generateUUID(); + + // TODO: Test cases that should throw ValidationError + // Example: + // const testCases = [ + // { params: {} as SkillListParams, desc: 'Missing requiredParam' }, + // { params: { requiredParam: '' } as SkillListParams, desc: 'Empty requiredParam' }, + // ]; + // + // for (const testCase of testCases) { + // try { + // await mockSkillListCommand({ ...testCase.params, context, sessionId }); + // throw new Error(`Should have thrown ValidationError for: ${testCase.desc}`); + // } catch (error) { + // if (error instanceof ValidationError) { + // assert(error.field === 'requiredParam', `ValidationError field is 'requiredParam' for: ${testCase.desc}`); + // assert(error.message.includes('required parameter'), `Error message mentions 'required parameter' for: ${testCase.desc}`); + // assert(error.message.includes('help tool'), `Error message is tool-agnostic for: ${testCase.desc}`); + // } else { + // throw error; // Re-throw if not ValidationError + // } + // } + // } + + console.log('βœ… All required parameter validations work correctly'); +} + +/** + * Test 4: Optional parameter handling + */ +async function testSkillListOptionalParams(): Promise { + console.log('\nπŸ”§ Test 4: Optional parameter handling'); + + // TODO: Uncomment when implementing optional param tests + // const context = { environment: 'server' as const }; + // const sessionId = generateUUID(); + + // TODO: Test WITHOUT optional param (should use default) + // const paramsWithoutOptional: SkillListParams = { + // requiredParam: 'test', + // context, + // sessionId + // }; + // + // const resultWithoutOptional = await mockSkillListCommand(paramsWithoutOptional); + // assert(resultWithoutOptional.success === true, 'Command succeeds without optional params'); + + // TODO: Test WITH optional param + // const paramsWithOptional: SkillListParams = { + // requiredParam: 'test', + // optionalParam: true, + // context, + // sessionId + // }; + // + // const resultWithOptional = await mockSkillListCommand(paramsWithOptional); + // assert(resultWithOptional.success === true, 'Command succeeds with optional params'); + + console.log('βœ… Optional parameter handling validated'); +} + +/** + * Test 5: Performance validation + */ +async function testSkillListPerformance(): Promise { + console.log('\n⚑ Test 5: SkillList performance validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + const startTime = Date.now(); + + await mockSkillListCommand({ + // TODO: Add your parameters + context, + sessionId + } as SkillListParams); + + const executionTime = Date.now() - startTime; + + assert(executionTime < 100, `SkillList completed in ${executionTime}ms (under 100ms limit)`); +} + +/** + * Test 6: Result structure validation + */ +async function testSkillListResultStructure(): Promise { + console.log('\nπŸ” Test 6: SkillList result structure validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Test various scenarios + const basicResult = await mockSkillListCommand({ + // TODO: Add your parameters + context, + sessionId + } as SkillListParams); + + assert(basicResult.success === true, 'Result has success field'); + // TODO: Add assertions for your result fields + // assert(typeof basicResult.yourField === 'string', 'Result has yourField (string)'); + assert(basicResult.context === context, 'Result includes context'); + assert(basicResult.sessionId === sessionId, 'Result includes sessionId'); + + console.log('βœ… All result structure validations pass'); +} + +/** + * Run all unit tests + */ +async function runAllSkillListUnitTests(): Promise { + console.log('πŸš€ Starting SkillList Command Unit Tests\n'); + + try { + testSkillListCommandStructure(); + await testMockSkillListExecution(); + await testSkillListRequiredParams(); + await testSkillListOptionalParams(); + await testSkillListPerformance(); + await testSkillListResultStructure(); + + console.log('\nπŸŽ‰ ALL SkillList UNIT TESTS PASSED!'); + console.log('πŸ“‹ Validated:'); + console.log(' βœ… Command structure and parameter validation'); + console.log(' βœ… Mock command execution patterns'); + console.log(' βœ… Required parameter validation (throws ValidationError)'); + console.log(' βœ… Optional parameter handling (sensible defaults)'); + console.log(' βœ… Performance requirements (< 100ms)'); + console.log(' βœ… Result structure validation'); + console.log('\nπŸ“ This is a REFERENCE EXAMPLE - use as a template for your commands!'); + console.log('πŸ’‘ TIP: Copy this test structure and modify for your command logic'); + + } catch (error) { + console.error('\n❌ SkillList unit tests failed:', (error as Error).message); + if ((error as Error).stack) { + console.error((error as Error).stack); + } + process.exit(1); + } +} + +// Run if called directly +if (require.main === module) { + void runAllSkillListUnitTests(); +} else { + module.exports = { runAllSkillListUnitTests }; +} diff --git a/src/debug/jtag/commands/skill/propose/.npmignore b/src/debug/jtag/commands/skill/propose/.npmignore new file mode 100644 index 000000000..f74ad6b8a --- /dev/null +++ b/src/debug/jtag/commands/skill/propose/.npmignore @@ -0,0 +1,20 @@ +# Development files +.eslintrc* +tsconfig*.json +vitest.config.ts + +# Build artifacts +*.js.map +*.d.ts.map + +# IDE +.vscode/ +.idea/ + +# Logs +*.log +npm-debug.log* + +# OS files +.DS_Store +Thumbs.db diff --git a/src/debug/jtag/commands/skill/propose/README.md b/src/debug/jtag/commands/skill/propose/README.md new file mode 100644 index 000000000..c3d948e5f --- /dev/null +++ b/src/debug/jtag/commands/skill/propose/README.md @@ -0,0 +1,165 @@ +# Skill Propose Command + +Propose a new skill (command) specification. Creates a SkillEntity with status 'proposed'. For team-scoped skills, creates a DecisionProposal for governance approval. + +## Table of Contents + +- [Usage](#usage) + - [CLI Usage](#cli-usage) + - [Tool Usage](#tool-usage) +- [Parameters](#parameters) +- [Result](#result) +- [Examples](#examples) +- [Testing](#testing) + - [Unit Tests](#unit-tests) + - [Integration Tests](#integration-tests) +- [Getting Help](#getting-help) +- [Access Level](#access-level) +- [Implementation Notes](#implementation-notes) + +## Usage + +### CLI Usage + +From the command line using the jtag CLI: + +```bash +./jtag skill/propose --name= --description= --skillParams= --skillResults= --implementation= --personaId= +``` + +### Tool Usage + +From Persona tools or programmatic access using `Commands.execute()`: + +```typescript +import { Commands } from '@system/core/shared/Commands'; + +const result = await Commands.execute('skill/propose', { + // your parameters here +}); +``` + +## Parameters + +- **name** (required): `string` - Command name (e.g., 'analysis/complexity') +- **description** (required): `string` - What the skill does +- **skillParams** (required): `object` - Input parameters spec array [{name, type, optional?, description?}] +- **skillResults** (required): `object` - Output fields spec array [{name, type, description?}] +- **implementation** (required): `string` - Natural language description of the implementation logic +- **scope** (optional): `string` - Who can use it: 'personal' (default) or 'team' (requires approval) +- **examples** (optional): `object` - Usage examples array [{description, command, expectedResult?}] +- **personaId** (required): `string` - AI persona proposing this skill + +## Result + +Returns `SkillProposeResult` with: + +Returns CommandResult with: +- **skillId**: `string` - ID of the created SkillEntity +- **name**: `string` - Skill command name +- **status**: `string` - Lifecycle status after proposal +- **scope**: `string` - Skill scope (personal or team) +- **proposalId**: `string` - DecisionProposal ID if team-scoped +- **message**: `string` - Human-readable result message + +## Examples + +### Propose a personal analysis skill + +```bash +./jtag skill/propose --name="analysis/complexity" --description="Analyze code complexity" --implementation="Count cyclomatic complexity per function" --personaId="ai-001" +``` + +**Expected result:** +{ skillId: "uuid", name: "analysis/complexity", status: "proposed", scope: "personal" } + +## Getting Help + +### Using the Help Tool + +Get detailed usage information for this command: + +**CLI:** +```bash +./jtag help skill/propose +``` + +**Tool:** +```typescript +// Use your help tool with command name 'skill/propose' +``` + +### Using the README Tool + +Access this README programmatically: + +**CLI:** +```bash +./jtag readme skill/propose +``` + +**Tool:** +```typescript +// Use your readme tool with command name 'skill/propose' +``` + +## Testing + +### Unit Tests + +Test command logic in isolation using mock dependencies: + +```bash +# Run unit tests (no server required) +npx tsx commands/Skill Propose/test/unit/SkillProposeCommand.test.ts +``` + +**What's tested:** +- Command structure and parameter validation +- Mock command execution patterns +- Required parameter validation (throws ValidationError) +- Optional parameter handling (sensible defaults) +- Performance requirements +- Assertion utility helpers + +**TDD Workflow:** +1. Write/modify unit test first (test-driven development) +2. Run test, see it fail +3. Implement feature +4. Run test, see it pass +5. Refactor if needed + +### Integration Tests + +Test command with real client connections and system integration: + +```bash +# Prerequisites: Server must be running +npm start # Wait 90+ seconds for deployment + +# Run integration tests +npx tsx commands/Skill Propose/test/integration/SkillProposeIntegration.test.ts +``` + +**What's tested:** +- Client connection to live system +- Real command execution via WebSocket +- ValidationError handling for missing params +- Optional parameter defaults +- Performance under load +- Various parameter combinations + +**Best Practice:** +Run unit tests frequently during development (fast feedback). Run integration tests before committing (verify system integration). + +## Access Level + +**ai-safe** - Safe for AI personas to call autonomously + +## Implementation Notes + +- **Shared Logic**: Core business logic in `shared/SkillProposeTypes.ts` +- **Browser**: Browser-specific implementation in `browser/SkillProposeBrowserCommand.ts` +- **Server**: Server-specific implementation in `server/SkillProposeServerCommand.ts` +- **Unit Tests**: Isolated testing in `test/unit/SkillProposeCommand.test.ts` +- **Integration Tests**: System testing in `test/integration/SkillProposeIntegration.test.ts` diff --git a/src/debug/jtag/commands/skill/propose/browser/SkillProposeBrowserCommand.ts b/src/debug/jtag/commands/skill/propose/browser/SkillProposeBrowserCommand.ts new file mode 100644 index 000000000..0f16ff84c --- /dev/null +++ b/src/debug/jtag/commands/skill/propose/browser/SkillProposeBrowserCommand.ts @@ -0,0 +1,21 @@ +/** + * Skill Propose Command - Browser Implementation + * + * Propose a new skill (command) specification. Creates a SkillEntity with status 'proposed'. For team-scoped skills, creates a DecisionProposal for governance approval. + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import type { SkillProposeParams, SkillProposeResult } from '../shared/SkillProposeTypes'; + +export class SkillProposeBrowserCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('skill/propose', context, subpath, commander); + } + + async execute(params: SkillProposeParams): Promise { + console.log('🌐 BROWSER: Delegating Skill Propose to server'); + return await this.remoteExecute(params); + } +} diff --git a/src/debug/jtag/commands/skill/propose/package.json b/src/debug/jtag/commands/skill/propose/package.json new file mode 100644 index 000000000..0aec7e005 --- /dev/null +++ b/src/debug/jtag/commands/skill/propose/package.json @@ -0,0 +1,35 @@ +{ + "name": "@jtag-commands/skill/propose", + "version": "1.0.0", + "description": "Propose a new skill (command) specification. Creates a SkillEntity with status 'proposed'. For team-scoped skills, creates a DecisionProposal for governance approval.", + "main": "server/SkillProposeServerCommand.ts", + "types": "shared/SkillProposeTypes.ts", + "scripts": { + "test": "npm run test:unit && npm run test:integration", + "test:unit": "npx vitest run test/unit/*.test.ts", + "test:integration": "npx tsx test/integration/SkillProposeIntegration.test.ts", + "lint": "npx eslint **/*.ts", + "typecheck": "npx tsc --noEmit" + }, + "peerDependencies": { + "@jtag/core": "*" + }, + "files": [ + "shared/**/*.ts", + "browser/**/*.ts", + "server/**/*.ts", + "test/**/*.ts", + "README.md" + ], + "keywords": [ + "jtag", + "command", + "skill/propose" + ], + "license": "MIT", + "author": "", + "repository": { + "type": "git", + "url": "" + } +} diff --git a/src/debug/jtag/commands/skill/propose/server/SkillProposeServerCommand.ts b/src/debug/jtag/commands/skill/propose/server/SkillProposeServerCommand.ts new file mode 100644 index 000000000..c32c06290 --- /dev/null +++ b/src/debug/jtag/commands/skill/propose/server/SkillProposeServerCommand.ts @@ -0,0 +1,128 @@ +/** + * Skill Propose Command - Server Implementation + * + * Creates a SkillEntity from an AI's proposed specification. + * For team-scoped skills, also creates a DecisionProposal for governance. + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import { ValidationError } from '@system/core/types/ErrorTypes'; +import type { SkillProposeParams, SkillProposeResult } from '../shared/SkillProposeTypes'; +import { createSkillProposeResultFromParams } from '../shared/SkillProposeTypes'; +import { SkillEntity } from '@system/data/entities/SkillEntity'; +import type { SkillSpec, SkillParamSpec, SkillResultSpec, SkillScope } from '@system/data/entities/SkillEntity'; +import { DataDaemon } from '@daemons/data-daemon/shared/DataDaemon'; +import { COLLECTIONS } from '@system/shared/Constants'; +import { DecisionPropose } from '@commands/collaboration/decision/propose/shared/DecisionProposeTypes'; +import type { UUID } from '@system/core/types/CrossPlatformUUID'; + +export class SkillProposeServerCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('skill/propose', context, subpath, commander); + } + + async execute(params: SkillProposeParams): Promise { + const { name, description, implementation, personaId } = params; + const scope: SkillScope = (params.scope === 'team' ? 'team' : 'personal'); + + if (!name?.trim()) { + throw new ValidationError('name', "Missing required parameter 'name'. Provide the command name (e.g., 'analysis/complexity')."); + } + if (!description?.trim()) { + throw new ValidationError('description', "Missing required parameter 'description'."); + } + if (!implementation?.trim()) { + throw new ValidationError('implementation', "Missing required parameter 'implementation'. Describe what the skill should do."); + } + if (!personaId?.trim()) { + throw new ValidationError('personaId', "Missing required parameter 'personaId'."); + } + + // Check for duplicate active skill + const existingResult = await DataDaemon.query({ + collection: COLLECTIONS.SKILLS, + filter: { name, status: 'active' }, + limit: 1, + }); + if (existingResult.success && existingResult.data && existingResult.data.length > 0) { + throw new ValidationError('name', `A skill named '${name}' is already active.`); + } + + // Build skill spec + const skillParams = Array.isArray(params.skillParams) ? params.skillParams as unknown as SkillParamSpec[] : []; + const skillResults = Array.isArray(params.skillResults) ? params.skillResults as unknown as SkillResultSpec[] : []; + const examples = Array.isArray(params.examples) + ? params.examples as Array<{ description: string; command: string; expectedResult?: string }> + : undefined; + + const spec: SkillSpec = { + name, + description, + params: skillParams, + results: skillResults, + examples, + implementation, + accessLevel: 'ai-safe', + }; + + // Create entity + const entity = new SkillEntity(); + entity.name = name; + entity.description = description; + entity.createdById = personaId as UUID; + entity.spec = spec; + entity.scope = scope; + entity.status = 'proposed'; + + const validation = entity.validate(); + if (!validation.success) { + throw new ValidationError('spec', validation.error ?? 'Skill validation failed'); + } + + // Persist + const stored = await DataDaemon.store(COLLECTIONS.SKILLS, entity); + + // For team-scoped skills, create a governance proposal via the decision/propose command + let proposalId = ''; + if (scope === 'team') { + try { + const proposalResult = await DecisionPropose.execute({ + topic: `New Skill Proposal: ${name}`, + rationale: `${description}\n\nImplementation: ${implementation}\n\nParams: ${JSON.stringify(spec.params)}\nResults: ${JSON.stringify(spec.results)}`, + options: [ + { label: 'Approve', description: `Approve skill '${name}' for team use` }, + { label: 'Request Changes', description: 'Suggest modifications before approval' }, + { label: 'Reject', description: 'Decline this skill proposal' }, + ], + scope: 'all', + significanceLevel: 'medium', + proposerId: personaId as UUID, + }); + proposalId = proposalResult.proposalId ?? ''; + if (proposalId) { + await DataDaemon.update( + COLLECTIONS.SKILLS, + stored.id, + { proposalId: proposalId as UUID } as Partial, + ); + } + } catch { + // Governance proposal is optional β€” skill still proceeds + } + } + + return createSkillProposeResultFromParams(params, { + success: true, + skillId: stored.id, + name: stored.name, + status: stored.status, + scope: stored.scope, + proposalId, + message: scope === 'team' + ? `Skill '${name}' proposed for team approval (proposal: ${proposalId || 'pending'})` + : `Skill '${name}' proposed β€” ready to generate`, + }); + } +} diff --git a/src/debug/jtag/commands/skill/propose/shared/SkillProposeTypes.ts b/src/debug/jtag/commands/skill/propose/shared/SkillProposeTypes.ts new file mode 100644 index 000000000..f7143b951 --- /dev/null +++ b/src/debug/jtag/commands/skill/propose/shared/SkillProposeTypes.ts @@ -0,0 +1,139 @@ +/** + * Skill Propose Command - Shared Types + * + * Propose a new skill (command) specification. Creates a SkillEntity with status 'proposed'. For team-scoped skills, creates a DecisionProposal for governance approval. + */ + +import type { CommandParams, CommandResult, CommandInput, JTAGContext } from '@system/core/types/JTAGTypes'; +import { createPayload, transformPayload } from '@system/core/types/JTAGTypes'; +import { Commands } from '@system/core/shared/Commands'; +import type { JTAGError } from '@system/core/types/ErrorTypes'; +import type { UUID } from '@system/core/types/CrossPlatformUUID'; + +/** + * Skill Propose Command Parameters + */ +export interface SkillProposeParams extends CommandParams { + // Command name (e.g., 'analysis/complexity') + name: string; + // What the skill does + description: string; + // Input parameters spec array [{name, type, optional?, description?}] + skillParams: Record[]; + // Output fields spec array [{name, type, description?}] + skillResults: Record[]; + // Natural language description of the implementation logic + implementation: string; + // Who can use it: 'personal' (default) or 'team' (requires approval) + scope?: string; + // Usage examples array [{description, command, expectedResult?}] + examples?: Record[]; + // AI persona proposing this skill + personaId: string; +} + +/** + * Factory function for creating SkillProposeParams + */ +export const createSkillProposeParams = ( + context: JTAGContext, + sessionId: UUID, + data: { + // Command name (e.g., 'analysis/complexity') + name: string; + // What the skill does + description: string; + // Input parameters spec array [{name, type, optional?, description?}] + skillParams: Record[]; + // Output fields spec array [{name, type, description?}] + skillResults: Record[]; + // Natural language description of the implementation logic + implementation: string; + // Who can use it: 'personal' (default) or 'team' (requires approval) + scope?: string; + // Usage examples array [{description, command, expectedResult?}] + examples?: Record[]; + // AI persona proposing this skill + personaId: string; + } +): SkillProposeParams => createPayload(context, sessionId, { + scope: data.scope ?? '', + examples: data.examples ?? undefined, + ...data +}); + +/** + * Skill Propose Command Result + */ +export interface SkillProposeResult extends CommandResult { + success: boolean; + // ID of the created SkillEntity + skillId: string; + // Skill command name + name: string; + // Lifecycle status after proposal + status: string; + // Skill scope (personal or team) + scope: string; + // DecisionProposal ID if team-scoped + proposalId: string; + // Human-readable result message + message: string; + error?: JTAGError; +} + +/** + * Factory function for creating SkillProposeResult with defaults + */ +export const createSkillProposeResult = ( + context: JTAGContext, + sessionId: UUID, + data: { + success: boolean; + // ID of the created SkillEntity + skillId?: string; + // Skill command name + name?: string; + // Lifecycle status after proposal + status?: string; + // Skill scope (personal or team) + scope?: string; + // DecisionProposal ID if team-scoped + proposalId?: string; + // Human-readable result message + message?: string; + error?: JTAGError; + } +): SkillProposeResult => createPayload(context, sessionId, { + skillId: data.skillId ?? '', + name: data.name ?? '', + status: data.status ?? '', + scope: data.scope ?? '', + proposalId: data.proposalId ?? '', + message: data.message ?? '', + ...data +}); + +/** + * Smart Skill Propose-specific inheritance from params + * Auto-inherits context and sessionId from params + * Must provide all required result fields + */ +export const createSkillProposeResultFromParams = ( + params: SkillProposeParams, + differences: Omit +): SkillProposeResult => transformPayload(params, differences); + +/** + * Skill Propose β€” Type-safe command executor + * + * Usage: + * import { SkillPropose } from '...shared/SkillProposeTypes'; + * const result = await SkillPropose.execute({ ... }); + */ +export const SkillPropose = { + execute(params: CommandInput): Promise { + return Commands.execute('skill/propose', params as Partial); + }, + commandName: 'skill/propose' as const, +} as const; diff --git a/src/debug/jtag/commands/skill/propose/test/integration/SkillProposeIntegration.test.ts b/src/debug/jtag/commands/skill/propose/test/integration/SkillProposeIntegration.test.ts new file mode 100644 index 000000000..f4e7f0024 --- /dev/null +++ b/src/debug/jtag/commands/skill/propose/test/integration/SkillProposeIntegration.test.ts @@ -0,0 +1,196 @@ +#!/usr/bin/env tsx +/** + * SkillPropose Command Integration Tests + * + * Tests Skill Propose command against the LIVE RUNNING SYSTEM. + * This is NOT a mock test - it tests real commands, real events, real widgets. + * + * Generated by: ./jtag generate + * Run with: npx tsx commands/Skill Propose/test/integration/SkillProposeIntegration.test.ts + * + * PREREQUISITES: + * - Server must be running: npm start (wait 90+ seconds) + * - Browser client connected via http://localhost:9003 + */ + +import { jtag } from '@server/server-index'; + +console.log('πŸ§ͺ SkillPropose Command Integration Tests'); + +function assert(condition: boolean, message: string): void { + if (!condition) { + throw new Error(`❌ Assertion failed: ${message}`); + } + console.log(`βœ… ${message}`); +} + +/** + * Test 1: Connect to live system + */ +async function testSystemConnection(): Promise>> { + console.log('\nπŸ”Œ Test 1: Connecting to live JTAG system'); + + const client = await jtag.connect(); + + assert(client !== null, 'Connected to live system'); + console.log(' βœ… Connected successfully'); + + return client; +} + +/** + * Test 2: Execute Skill Propose command on live system + */ +async function testCommandExecution(client: Awaited>): Promise { + console.log('\n⚑ Test 2: Executing Skill Propose command'); + + // TODO: Replace with your actual command parameters + const result = await client.commands['Skill Propose']({ + // Add your required parameters here + // Example: name: 'test-value' + }); + + console.log(' πŸ“Š Result:', JSON.stringify(result, null, 2)); + + assert(result !== null, 'Skill Propose returned result'); + // TODO: Add assertions for your specific result fields + // assert(result.success === true, 'Skill Propose succeeded'); + // assert(result.yourField !== undefined, 'Result has yourField'); +} + +/** + * Test 3: Validate required parameters + */ +async function testRequiredParameters(_client: Awaited>): Promise { + console.log('\n🚨 Test 3: Testing required parameter validation'); + + // TODO: Uncomment and test missing required parameters + // try { + // await _client.commands['Skill Propose']({ + // // Missing required param + // }); + // assert(false, 'Should have thrown validation error'); + // } catch (error) { + // assert((error as Error).message.includes('required'), 'Error mentions required parameter'); + // console.log(' βœ… ValidationError thrown correctly'); + // } + + console.log(' ⚠️ TODO: Add required parameter validation test'); +} + +/** + * Test 4: Test optional parameters + */ +async function testOptionalParameters(_client: Awaited>): Promise { + console.log('\nπŸ”§ Test 4: Testing optional parameters'); + + // TODO: Uncomment to test with and without optional parameters + // const withOptional = await client.commands['Skill Propose']({ + // requiredParam: 'test', + // optionalParam: true + // }); + // + // const withoutOptional = await client.commands['Skill Propose']({ + // requiredParam: 'test' + // }); + // + // assert(withOptional.success === true, 'Works with optional params'); + // assert(withoutOptional.success === true, 'Works without optional params'); + + console.log(' ⚠️ TODO: Add optional parameter tests'); +} + +/** + * Test 5: Performance test + */ +async function testPerformance(_client: Awaited>): Promise { + console.log('\n⚑ Test 5: Performance under load'); + + // TODO: Uncomment to test command performance + // const iterations = 10; + // const times: number[] = []; + // + // for (let i = 0; i < iterations; i++) { + // const start = Date.now(); + // await _client.commands['Skill Propose']({ /* params */ }); + // times.push(Date.now() - start); + // } + // + // const avg = times.reduce((a, b) => a + b, 0) / iterations; + // const max = Math.max(...times); + // + // console.log(` Average: ${avg.toFixed(2)}ms`); + // console.log(` Max: ${max}ms`); + // + // assert(avg < 500, `Average ${avg.toFixed(2)}ms under 500ms`); + // assert(max < 1000, `Max ${max}ms under 1000ms`); + + console.log(' ⚠️ TODO: Add performance test'); +} + +/** + * Test 6: Widget/Event integration (if applicable) + */ +async function testWidgetIntegration(_client: Awaited>): Promise { + console.log('\n🎨 Test 6: Widget/Event integration'); + + // TODO: Uncomment if your command emits events or updates widgets + // Example: + // const before = await client.commands['debug/widget-state']({ widgetSelector: 'your-widget' }); + // await client.commands['Skill Propose']({ /* params */ }); + // await new Promise(resolve => setTimeout(resolve, 1000)); // Wait for event propagation + // const after = await client.commands['debug/widget-state']({ widgetSelector: 'your-widget' }); + // + // assert(after.state.someValue !== before.state.someValue, 'Widget state updated'); + + console.log(' ⚠️ TODO: Add widget/event integration test (if applicable)'); +} + +/** + * Run all integration tests + */ +async function runAllSkillProposeIntegrationTests(): Promise { + console.log('πŸš€ Starting SkillPropose Integration Tests\n'); + console.log('πŸ“‹ Testing against LIVE system (not mocks)\n'); + + try { + const client = await testSystemConnection(); + await testCommandExecution(client); + await testRequiredParameters(client); + await testOptionalParameters(client); + await testPerformance(client); + await testWidgetIntegration(client); + + console.log('\nπŸŽ‰ ALL SkillPropose INTEGRATION TESTS PASSED!'); + console.log('πŸ“‹ Validated:'); + console.log(' βœ… Live system connection'); + console.log(' βœ… Command execution on real system'); + console.log(' βœ… Parameter validation'); + console.log(' βœ… Optional parameter handling'); + console.log(' βœ… Performance benchmarks'); + console.log(' βœ… Widget/Event integration'); + console.log('\nπŸ’‘ NOTE: This test uses the REAL running system'); + console.log(' - Real database operations'); + console.log(' - Real event propagation'); + console.log(' - Real widget updates'); + console.log(' - Real cross-daemon communication'); + + } catch (error) { + console.error('\n❌ SkillPropose integration tests failed:', (error as Error).message); + if ((error as Error).stack) { + console.error((error as Error).stack); + } + console.error('\nπŸ’‘ Make sure:'); + console.error(' 1. Server is running: npm start'); + console.error(' 2. Wait 90+ seconds for deployment'); + console.error(' 3. Browser is connected to http://localhost:9003'); + process.exit(1); + } +} + +// Run if called directly +if (require.main === module) { + void runAllSkillProposeIntegrationTests(); +} else { + module.exports = { runAllSkillProposeIntegrationTests }; +} diff --git a/src/debug/jtag/commands/skill/propose/test/unit/SkillProposeCommand.test.ts b/src/debug/jtag/commands/skill/propose/test/unit/SkillProposeCommand.test.ts new file mode 100644 index 000000000..86a33bf25 --- /dev/null +++ b/src/debug/jtag/commands/skill/propose/test/unit/SkillProposeCommand.test.ts @@ -0,0 +1,259 @@ +#!/usr/bin/env tsx +/** + * SkillPropose Command Unit Tests + * + * Tests Skill Propose command logic in isolation using mock dependencies. + * This is a REFERENCE EXAMPLE showing best practices for command testing. + * + * Generated by: ./jtag generate + * Run with: npx tsx commands/Skill Propose/test/unit/SkillProposeCommand.test.ts + * + * NOTE: This is a self-contained test (no external test utilities needed). + * Use this as a template for your own command tests. + */ + +// import { ValidationError } from '@system/core/types/ErrorTypes'; // Uncomment when adding validation tests +import { generateUUID } from '@system/core/types/CrossPlatformUUID'; +import type { SkillProposeParams, SkillProposeResult } from '../../shared/SkillProposeTypes'; + +console.log('πŸ§ͺ SkillPropose Command Unit Tests'); + +function assert(condition: boolean, message: string): void { + if (!condition) { + throw new Error(`❌ Assertion failed: ${message}`); + } + console.log(`βœ… ${message}`); +} + +/** + * Mock command that implements Skill Propose logic for testing + */ +async function mockSkillProposeCommand(params: SkillProposeParams): Promise { + // TODO: Validate required parameters (BEST PRACTICE) + // Example: + // if (!params.requiredParam || params.requiredParam.trim() === '') { + // throw new ValidationError( + // 'requiredParam', + // `Missing required parameter 'requiredParam'. ` + + // `Use the help tool with 'Skill Propose' or see the Skill Propose README for usage information.` + // ); + // } + + // TODO: Handle optional parameters with sensible defaults + // const optionalParam = params.optionalParam ?? defaultValue; + + // TODO: Implement your command logic here + return { + success: true, + // TODO: Add your result fields with actual computed values + context: params.context, + sessionId: params.sessionId + } as SkillProposeResult; +} + +/** + * Test 1: Command structure validation + */ +function testSkillProposeCommandStructure(): void { + console.log('\nπŸ“‹ Test 1: SkillPropose command structure validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Create valid params for Skill Propose command + const validParams: SkillProposeParams = { + // TODO: Add your required parameters here + context, + sessionId + }; + + // Validate param structure + assert(validParams.context !== undefined, 'Params have context'); + assert(validParams.sessionId !== undefined, 'Params have sessionId'); + // TODO: Add assertions for your specific parameters + // assert(typeof validParams.requiredParam === 'string', 'requiredParam is string'); +} + +/** + * Test 2: Mock command execution + */ +async function testMockSkillProposeExecution(): Promise { + console.log('\n⚑ Test 2: Mock Skill Propose command execution'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Test mock execution + const params: SkillProposeParams = { + // TODO: Add your parameters here + context, + sessionId + }; + + const result = await mockSkillProposeCommand(params); + + // Validate result structure + assert(result.success === true, 'Mock result shows success'); + // TODO: Add assertions for your result fields + // assert(typeof result.yourField === 'string', 'yourField is string'); +} + +/** + * Test 3: Required parameter validation (CRITICAL) + * + * This test ensures your command throws ValidationError + * when required parameters are missing (BEST PRACTICE) + */ +async function testSkillProposeRequiredParams(): Promise { + console.log('\n🚨 Test 3: Required parameter validation'); + + // TODO: Uncomment when implementing validation + // const context = { environment: 'server' as const }; + // const sessionId = generateUUID(); + + // TODO: Test cases that should throw ValidationError + // Example: + // const testCases = [ + // { params: {} as SkillProposeParams, desc: 'Missing requiredParam' }, + // { params: { requiredParam: '' } as SkillProposeParams, desc: 'Empty requiredParam' }, + // ]; + // + // for (const testCase of testCases) { + // try { + // await mockSkillProposeCommand({ ...testCase.params, context, sessionId }); + // throw new Error(`Should have thrown ValidationError for: ${testCase.desc}`); + // } catch (error) { + // if (error instanceof ValidationError) { + // assert(error.field === 'requiredParam', `ValidationError field is 'requiredParam' for: ${testCase.desc}`); + // assert(error.message.includes('required parameter'), `Error message mentions 'required parameter' for: ${testCase.desc}`); + // assert(error.message.includes('help tool'), `Error message is tool-agnostic for: ${testCase.desc}`); + // } else { + // throw error; // Re-throw if not ValidationError + // } + // } + // } + + console.log('βœ… All required parameter validations work correctly'); +} + +/** + * Test 4: Optional parameter handling + */ +async function testSkillProposeOptionalParams(): Promise { + console.log('\nπŸ”§ Test 4: Optional parameter handling'); + + // TODO: Uncomment when implementing optional param tests + // const context = { environment: 'server' as const }; + // const sessionId = generateUUID(); + + // TODO: Test WITHOUT optional param (should use default) + // const paramsWithoutOptional: SkillProposeParams = { + // requiredParam: 'test', + // context, + // sessionId + // }; + // + // const resultWithoutOptional = await mockSkillProposeCommand(paramsWithoutOptional); + // assert(resultWithoutOptional.success === true, 'Command succeeds without optional params'); + + // TODO: Test WITH optional param + // const paramsWithOptional: SkillProposeParams = { + // requiredParam: 'test', + // optionalParam: true, + // context, + // sessionId + // }; + // + // const resultWithOptional = await mockSkillProposeCommand(paramsWithOptional); + // assert(resultWithOptional.success === true, 'Command succeeds with optional params'); + + console.log('βœ… Optional parameter handling validated'); +} + +/** + * Test 5: Performance validation + */ +async function testSkillProposePerformance(): Promise { + console.log('\n⚑ Test 5: SkillPropose performance validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + const startTime = Date.now(); + + await mockSkillProposeCommand({ + // TODO: Add your parameters + context, + sessionId + } as SkillProposeParams); + + const executionTime = Date.now() - startTime; + + assert(executionTime < 100, `SkillPropose completed in ${executionTime}ms (under 100ms limit)`); +} + +/** + * Test 6: Result structure validation + */ +async function testSkillProposeResultStructure(): Promise { + console.log('\nπŸ” Test 6: SkillPropose result structure validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Test various scenarios + const basicResult = await mockSkillProposeCommand({ + // TODO: Add your parameters + context, + sessionId + } as SkillProposeParams); + + assert(basicResult.success === true, 'Result has success field'); + // TODO: Add assertions for your result fields + // assert(typeof basicResult.yourField === 'string', 'Result has yourField (string)'); + assert(basicResult.context === context, 'Result includes context'); + assert(basicResult.sessionId === sessionId, 'Result includes sessionId'); + + console.log('βœ… All result structure validations pass'); +} + +/** + * Run all unit tests + */ +async function runAllSkillProposeUnitTests(): Promise { + console.log('πŸš€ Starting SkillPropose Command Unit Tests\n'); + + try { + testSkillProposeCommandStructure(); + await testMockSkillProposeExecution(); + await testSkillProposeRequiredParams(); + await testSkillProposeOptionalParams(); + await testSkillProposePerformance(); + await testSkillProposeResultStructure(); + + console.log('\nπŸŽ‰ ALL SkillPropose UNIT TESTS PASSED!'); + console.log('πŸ“‹ Validated:'); + console.log(' βœ… Command structure and parameter validation'); + console.log(' βœ… Mock command execution patterns'); + console.log(' βœ… Required parameter validation (throws ValidationError)'); + console.log(' βœ… Optional parameter handling (sensible defaults)'); + console.log(' βœ… Performance requirements (< 100ms)'); + console.log(' βœ… Result structure validation'); + console.log('\nπŸ“ This is a REFERENCE EXAMPLE - use as a template for your commands!'); + console.log('πŸ’‘ TIP: Copy this test structure and modify for your command logic'); + + } catch (error) { + console.error('\n❌ SkillPropose unit tests failed:', (error as Error).message); + if ((error as Error).stack) { + console.error((error as Error).stack); + } + process.exit(1); + } +} + +// Run if called directly +if (require.main === module) { + void runAllSkillProposeUnitTests(); +} else { + module.exports = { runAllSkillProposeUnitTests }; +} diff --git a/src/debug/jtag/commands/skill/validate/.npmignore b/src/debug/jtag/commands/skill/validate/.npmignore new file mode 100644 index 000000000..f74ad6b8a --- /dev/null +++ b/src/debug/jtag/commands/skill/validate/.npmignore @@ -0,0 +1,20 @@ +# Development files +.eslintrc* +tsconfig*.json +vitest.config.ts + +# Build artifacts +*.js.map +*.d.ts.map + +# IDE +.vscode/ +.idea/ + +# Logs +*.log +npm-debug.log* + +# OS files +.DS_Store +Thumbs.db diff --git a/src/debug/jtag/commands/skill/validate/README.md b/src/debug/jtag/commands/skill/validate/README.md new file mode 100644 index 000000000..1946c01a6 --- /dev/null +++ b/src/debug/jtag/commands/skill/validate/README.md @@ -0,0 +1,160 @@ +# Skill Validate Command + +Validate a generated skill by running TypeScript compilation and tests in an ExecutionSandbox. Updates SkillEntity with validation results. + +## Table of Contents + +- [Usage](#usage) + - [CLI Usage](#cli-usage) + - [Tool Usage](#tool-usage) +- [Parameters](#parameters) +- [Result](#result) +- [Examples](#examples) +- [Testing](#testing) + - [Unit Tests](#unit-tests) + - [Integration Tests](#integration-tests) +- [Getting Help](#getting-help) +- [Access Level](#access-level) +- [Implementation Notes](#implementation-notes) + +## Usage + +### CLI Usage + +From the command line using the jtag CLI: + +```bash +./jtag skill/validate --skillId= +``` + +### Tool Usage + +From Persona tools or programmatic access using `Commands.execute()`: + +```typescript +import { Commands } from '@system/core/shared/Commands'; + +const result = await Commands.execute('skill/validate', { + // your parameters here +}); +``` + +## Parameters + +- **skillId** (required): `string` - ID of the SkillEntity to validate + +## Result + +Returns `SkillValidateResult` with: + +Returns CommandResult with: +- **skillId**: `string` - ID of the SkillEntity +- **name**: `string` - Skill command name +- **status**: `string` - Lifecycle status after validation +- **compiled**: `boolean` - Whether TypeScript compilation succeeded +- **testsRun**: `number` - Number of tests executed +- **testsPassed**: `number` - Number of tests that passed +- **errors**: `object` - Array of error messages from compilation or tests +- **message**: `string` - Human-readable result message + +## Examples + +### Validate a generated skill + +```bash +./jtag skill/validate --skillId="uuid-of-skill" +``` + +**Expected result:** +{ compiled: true, testsRun: 3, testsPassed: 3, status: "validated" } + +## Getting Help + +### Using the Help Tool + +Get detailed usage information for this command: + +**CLI:** +```bash +./jtag help skill/validate +``` + +**Tool:** +```typescript +// Use your help tool with command name 'skill/validate' +``` + +### Using the README Tool + +Access this README programmatically: + +**CLI:** +```bash +./jtag readme skill/validate +``` + +**Tool:** +```typescript +// Use your readme tool with command name 'skill/validate' +``` + +## Testing + +### Unit Tests + +Test command logic in isolation using mock dependencies: + +```bash +# Run unit tests (no server required) +npx tsx commands/Skill Validate/test/unit/SkillValidateCommand.test.ts +``` + +**What's tested:** +- Command structure and parameter validation +- Mock command execution patterns +- Required parameter validation (throws ValidationError) +- Optional parameter handling (sensible defaults) +- Performance requirements +- Assertion utility helpers + +**TDD Workflow:** +1. Write/modify unit test first (test-driven development) +2. Run test, see it fail +3. Implement feature +4. Run test, see it pass +5. Refactor if needed + +### Integration Tests + +Test command with real client connections and system integration: + +```bash +# Prerequisites: Server must be running +npm start # Wait 90+ seconds for deployment + +# Run integration tests +npx tsx commands/Skill Validate/test/integration/SkillValidateIntegration.test.ts +``` + +**What's tested:** +- Client connection to live system +- Real command execution via WebSocket +- ValidationError handling for missing params +- Optional parameter defaults +- Performance under load +- Various parameter combinations + +**Best Practice:** +Run unit tests frequently during development (fast feedback). Run integration tests before committing (verify system integration). + +## Access Level + +**ai-safe** - Safe for AI personas to call autonomously + +## Implementation Notes + +- **Shared Logic**: Core business logic in `shared/SkillValidateTypes.ts` +- **Browser**: Browser-specific implementation in `browser/SkillValidateBrowserCommand.ts` +- **Server**: Server-specific implementation in `server/SkillValidateServerCommand.ts` +- **Unit Tests**: Isolated testing in `test/unit/SkillValidateCommand.test.ts` +- **Integration Tests**: System testing in `test/integration/SkillValidateIntegration.test.ts` diff --git a/src/debug/jtag/commands/skill/validate/browser/SkillValidateBrowserCommand.ts b/src/debug/jtag/commands/skill/validate/browser/SkillValidateBrowserCommand.ts new file mode 100644 index 000000000..31c2a9872 --- /dev/null +++ b/src/debug/jtag/commands/skill/validate/browser/SkillValidateBrowserCommand.ts @@ -0,0 +1,21 @@ +/** + * Skill Validate Command - Browser Implementation + * + * Validate a generated skill by running TypeScript compilation and tests in an ExecutionSandbox. Updates SkillEntity with validation results. + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import type { SkillValidateParams, SkillValidateResult } from '../shared/SkillValidateTypes'; + +export class SkillValidateBrowserCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('skill/validate', context, subpath, commander); + } + + async execute(params: SkillValidateParams): Promise { + console.log('🌐 BROWSER: Delegating Skill Validate to server'); + return await this.remoteExecute(params); + } +} diff --git a/src/debug/jtag/commands/skill/validate/package.json b/src/debug/jtag/commands/skill/validate/package.json new file mode 100644 index 000000000..9ee02105f --- /dev/null +++ b/src/debug/jtag/commands/skill/validate/package.json @@ -0,0 +1,35 @@ +{ + "name": "@jtag-commands/skill/validate", + "version": "1.0.0", + "description": "Validate a generated skill by running TypeScript compilation and tests in an ExecutionSandbox. Updates SkillEntity with validation results.", + "main": "server/SkillValidateServerCommand.ts", + "types": "shared/SkillValidateTypes.ts", + "scripts": { + "test": "npm run test:unit && npm run test:integration", + "test:unit": "npx vitest run test/unit/*.test.ts", + "test:integration": "npx tsx test/integration/SkillValidateIntegration.test.ts", + "lint": "npx eslint **/*.ts", + "typecheck": "npx tsc --noEmit" + }, + "peerDependencies": { + "@jtag/core": "*" + }, + "files": [ + "shared/**/*.ts", + "browser/**/*.ts", + "server/**/*.ts", + "test/**/*.ts", + "README.md" + ], + "keywords": [ + "jtag", + "command", + "skill/validate" + ], + "license": "MIT", + "author": "", + "repository": { + "type": "git", + "url": "" + } +} diff --git a/src/debug/jtag/commands/skill/validate/server/SkillValidateServerCommand.ts b/src/debug/jtag/commands/skill/validate/server/SkillValidateServerCommand.ts new file mode 100644 index 000000000..78af94c54 --- /dev/null +++ b/src/debug/jtag/commands/skill/validate/server/SkillValidateServerCommand.ts @@ -0,0 +1,154 @@ +/** + * Skill Validate Command - Server Implementation + * + * Validates a generated skill by running TypeScript compilation and tests + * in an ExecutionSandbox. Updates SkillEntity with validation results. + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import { ValidationError } from '@system/core/types/ErrorTypes'; +import type { SkillValidateParams, SkillValidateResult } from '../shared/SkillValidateTypes'; +import { createSkillValidateResultFromParams } from '../shared/SkillValidateTypes'; +import { SkillEntity } from '@system/data/entities/SkillEntity'; +import type { SkillValidationResults } from '@system/data/entities/SkillEntity'; +import { DataDaemon } from '@daemons/data-daemon/shared/DataDaemon'; +import { COLLECTIONS } from '@system/shared/Constants'; +import { ExecutionSandbox } from '@system/code/server/ExecutionSandbox'; +import type { SandboxConfig } from '@system/code/server/ExecutionSandbox'; +import type { UUID } from '@system/core/types/CrossPlatformUUID'; + +export class SkillValidateServerCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('skill/validate', context, subpath, commander); + } + + async execute(params: SkillValidateParams): Promise { + const { skillId } = params; + + if (!skillId?.trim()) { + throw new ValidationError('skillId', "Missing required parameter 'skillId'."); + } + + // Load skill entity + const readResult = await DataDaemon.read(COLLECTIONS.SKILLS, skillId as UUID); + if (!readResult.success || !readResult.data) { + throw new ValidationError('skillId', `Skill not found: ${skillId}`); + } + const skill = readResult.data.data as SkillEntity; + + if (skill.status !== 'generated') { + throw new ValidationError('skillId', + `Skill '${skill.name}' cannot be validated in status '${skill.status}'. Must be 'generated' first.`); + } + + if (!skill.outputDir) { + throw new ValidationError('skillId', `Skill '${skill.name}' has no outputDir β€” was it generated?`); + } + + const sandbox = new ExecutionSandbox(); + const errors: string[] = []; + const startTime = Date.now(); + + // Step 1: TypeScript compilation check + const compileConfig: SandboxConfig = { + command: 'npx', + args: ['tsc', '--noEmit', '--pretty', '--project', 'tsconfig.json'], + cwd: skill.outputDir, + timeoutMs: 30_000, + maxOutputBytes: 100_000, + personaId: skill.createdById, + }; + + let compiled = false; + try { + const compileResult = await sandbox.execute(compileConfig); + compiled = compileResult.exitCode === 0; + if (!compiled) { + errors.push(`Compilation failed (exit ${compileResult.exitCode}): ${compileResult.stderr || compileResult.stdout}`); + } + } catch (e) { + errors.push(`Compilation error: ${e instanceof Error ? e.message : String(e)}`); + } + + // Step 2: Run tests (only if compilation passed) + let testsRun = 0; + let testsPassed = 0; + + if (compiled) { + const testConfig: SandboxConfig = { + command: 'npx', + args: ['vitest', 'run', '--reporter=json'], + cwd: skill.outputDir, + timeoutMs: 60_000, + maxOutputBytes: 100_000, + personaId: skill.createdById, + }; + + try { + const testResult = await sandbox.execute(testConfig); + // Parse vitest JSON output + try { + const output = testResult.stdout; + const jsonMatch = output.match(/\{[\s\S]*"numTotalTests"[\s\S]*\}/); + if (jsonMatch) { + const parsed = JSON.parse(jsonMatch[0]); + testsRun = parsed.numTotalTests ?? 0; + testsPassed = parsed.numPassedTests ?? 0; + } + } catch { + // If JSON parsing fails, count from exit code + testsRun = testResult.exitCode === 0 ? 1 : 0; + testsPassed = testResult.exitCode === 0 ? 1 : 0; + } + + if (testResult.exitCode !== 0) { + errors.push(`Tests failed (exit ${testResult.exitCode}): ${testResult.stderr || testResult.stdout}`); + } + } catch (e) { + errors.push(`Test execution error: ${e instanceof Error ? e.message : String(e)}`); + } + } + + const durationMs = Date.now() - startTime; + const passed = compiled && errors.length === 0; + + // Build validation results + const validationResults: SkillValidationResults = { + compiled, + testsRun, + testsPassed, + errors, + durationMs, + }; + + // Update entity + const updateData: Partial = { + validationResults, + status: passed ? 'validated' : 'failed', + }; + if (!passed) { + updateData.failureReason = errors.join('; '); + } + await DataDaemon.update( + COLLECTIONS.SKILLS, + skill.id as UUID, + updateData, + ); + + return createSkillValidateResultFromParams(params, { + success: passed, + skillId: skill.id, + name: skill.name, + status: passed ? 'validated' : 'failed', + compiled, + testsRun, + testsPassed, + errors, + message: passed + ? `Skill '${skill.name}' validated: compiled + ${testsPassed}/${testsRun} tests passed (${durationMs}ms)` + : `Skill '${skill.name}' validation failed: ${errors[0] ?? 'unknown error'}`, + }); + } +} diff --git a/src/debug/jtag/commands/skill/validate/shared/SkillValidateTypes.ts b/src/debug/jtag/commands/skill/validate/shared/SkillValidateTypes.ts new file mode 100644 index 000000000..0da799725 --- /dev/null +++ b/src/debug/jtag/commands/skill/validate/shared/SkillValidateTypes.ts @@ -0,0 +1,120 @@ +/** + * Skill Validate Command - Shared Types + * + * Validate a generated skill by running TypeScript compilation and tests in an ExecutionSandbox. Updates SkillEntity with validation results. + */ + +import type { CommandParams, CommandResult, CommandInput, JTAGContext } from '@system/core/types/JTAGTypes'; +import { createPayload, transformPayload } from '@system/core/types/JTAGTypes'; +import { Commands } from '@system/core/shared/Commands'; +import type { JTAGError } from '@system/core/types/ErrorTypes'; +import type { UUID } from '@system/core/types/CrossPlatformUUID'; + +/** + * Skill Validate Command Parameters + */ +export interface SkillValidateParams extends CommandParams { + // ID of the SkillEntity to validate + skillId: string; +} + +/** + * Factory function for creating SkillValidateParams + */ +export const createSkillValidateParams = ( + context: JTAGContext, + sessionId: UUID, + data: { + // ID of the SkillEntity to validate + skillId: string; + } +): SkillValidateParams => createPayload(context, sessionId, { + + ...data +}); + +/** + * Skill Validate Command Result + */ +export interface SkillValidateResult extends CommandResult { + success: boolean; + // ID of the SkillEntity + skillId: string; + // Skill command name + name: string; + // Lifecycle status after validation + status: string; + // Whether TypeScript compilation succeeded + compiled: boolean; + // Number of tests executed + testsRun: number; + // Number of tests that passed + testsPassed: number; + // Array of error messages from compilation or tests + errors: string[]; + // Human-readable result message + message: string; + error?: JTAGError; +} + +/** + * Factory function for creating SkillValidateResult with defaults + */ +export const createSkillValidateResult = ( + context: JTAGContext, + sessionId: UUID, + data: { + success: boolean; + // ID of the SkillEntity + skillId?: string; + // Skill command name + name?: string; + // Lifecycle status after validation + status?: string; + // Whether TypeScript compilation succeeded + compiled?: boolean; + // Number of tests executed + testsRun?: number; + // Number of tests that passed + testsPassed?: number; + // Array of error messages from compilation or tests + errors?: string[]; + // Human-readable result message + message?: string; + error?: JTAGError; + } +): SkillValidateResult => createPayload(context, sessionId, { + skillId: data.skillId ?? '', + name: data.name ?? '', + status: data.status ?? '', + compiled: data.compiled ?? false, + testsRun: data.testsRun ?? 0, + testsPassed: data.testsPassed ?? 0, + errors: data.errors ?? [], + message: data.message ?? '', + ...data +}); + +/** + * Smart Skill Validate-specific inheritance from params + * Auto-inherits context and sessionId from params + * Must provide all required result fields + */ +export const createSkillValidateResultFromParams = ( + params: SkillValidateParams, + differences: Omit +): SkillValidateResult => transformPayload(params, differences); + +/** + * Skill Validate β€” Type-safe command executor + * + * Usage: + * import { SkillValidate } from '...shared/SkillValidateTypes'; + * const result = await SkillValidate.execute({ ... }); + */ +export const SkillValidate = { + execute(params: CommandInput): Promise { + return Commands.execute('skill/validate', params as Partial); + }, + commandName: 'skill/validate' as const, +} as const; diff --git a/src/debug/jtag/commands/skill/validate/test/integration/SkillValidateIntegration.test.ts b/src/debug/jtag/commands/skill/validate/test/integration/SkillValidateIntegration.test.ts new file mode 100644 index 000000000..f7b19f38c --- /dev/null +++ b/src/debug/jtag/commands/skill/validate/test/integration/SkillValidateIntegration.test.ts @@ -0,0 +1,196 @@ +#!/usr/bin/env tsx +/** + * SkillValidate Command Integration Tests + * + * Tests Skill Validate command against the LIVE RUNNING SYSTEM. + * This is NOT a mock test - it tests real commands, real events, real widgets. + * + * Generated by: ./jtag generate + * Run with: npx tsx commands/Skill Validate/test/integration/SkillValidateIntegration.test.ts + * + * PREREQUISITES: + * - Server must be running: npm start (wait 90+ seconds) + * - Browser client connected via http://localhost:9003 + */ + +import { jtag } from '@server/server-index'; + +console.log('πŸ§ͺ SkillValidate Command Integration Tests'); + +function assert(condition: boolean, message: string): void { + if (!condition) { + throw new Error(`❌ Assertion failed: ${message}`); + } + console.log(`βœ… ${message}`); +} + +/** + * Test 1: Connect to live system + */ +async function testSystemConnection(): Promise>> { + console.log('\nπŸ”Œ Test 1: Connecting to live JTAG system'); + + const client = await jtag.connect(); + + assert(client !== null, 'Connected to live system'); + console.log(' βœ… Connected successfully'); + + return client; +} + +/** + * Test 2: Execute Skill Validate command on live system + */ +async function testCommandExecution(client: Awaited>): Promise { + console.log('\n⚑ Test 2: Executing Skill Validate command'); + + // TODO: Replace with your actual command parameters + const result = await client.commands['Skill Validate']({ + // Add your required parameters here + // Example: name: 'test-value' + }); + + console.log(' πŸ“Š Result:', JSON.stringify(result, null, 2)); + + assert(result !== null, 'Skill Validate returned result'); + // TODO: Add assertions for your specific result fields + // assert(result.success === true, 'Skill Validate succeeded'); + // assert(result.yourField !== undefined, 'Result has yourField'); +} + +/** + * Test 3: Validate required parameters + */ +async function testRequiredParameters(_client: Awaited>): Promise { + console.log('\n🚨 Test 3: Testing required parameter validation'); + + // TODO: Uncomment and test missing required parameters + // try { + // await _client.commands['Skill Validate']({ + // // Missing required param + // }); + // assert(false, 'Should have thrown validation error'); + // } catch (error) { + // assert((error as Error).message.includes('required'), 'Error mentions required parameter'); + // console.log(' βœ… ValidationError thrown correctly'); + // } + + console.log(' ⚠️ TODO: Add required parameter validation test'); +} + +/** + * Test 4: Test optional parameters + */ +async function testOptionalParameters(_client: Awaited>): Promise { + console.log('\nπŸ”§ Test 4: Testing optional parameters'); + + // TODO: Uncomment to test with and without optional parameters + // const withOptional = await client.commands['Skill Validate']({ + // requiredParam: 'test', + // optionalParam: true + // }); + // + // const withoutOptional = await client.commands['Skill Validate']({ + // requiredParam: 'test' + // }); + // + // assert(withOptional.success === true, 'Works with optional params'); + // assert(withoutOptional.success === true, 'Works without optional params'); + + console.log(' ⚠️ TODO: Add optional parameter tests'); +} + +/** + * Test 5: Performance test + */ +async function testPerformance(_client: Awaited>): Promise { + console.log('\n⚑ Test 5: Performance under load'); + + // TODO: Uncomment to test command performance + // const iterations = 10; + // const times: number[] = []; + // + // for (let i = 0; i < iterations; i++) { + // const start = Date.now(); + // await _client.commands['Skill Validate']({ /* params */ }); + // times.push(Date.now() - start); + // } + // + // const avg = times.reduce((a, b) => a + b, 0) / iterations; + // const max = Math.max(...times); + // + // console.log(` Average: ${avg.toFixed(2)}ms`); + // console.log(` Max: ${max}ms`); + // + // assert(avg < 500, `Average ${avg.toFixed(2)}ms under 500ms`); + // assert(max < 1000, `Max ${max}ms under 1000ms`); + + console.log(' ⚠️ TODO: Add performance test'); +} + +/** + * Test 6: Widget/Event integration (if applicable) + */ +async function testWidgetIntegration(_client: Awaited>): Promise { + console.log('\n🎨 Test 6: Widget/Event integration'); + + // TODO: Uncomment if your command emits events or updates widgets + // Example: + // const before = await client.commands['debug/widget-state']({ widgetSelector: 'your-widget' }); + // await client.commands['Skill Validate']({ /* params */ }); + // await new Promise(resolve => setTimeout(resolve, 1000)); // Wait for event propagation + // const after = await client.commands['debug/widget-state']({ widgetSelector: 'your-widget' }); + // + // assert(after.state.someValue !== before.state.someValue, 'Widget state updated'); + + console.log(' ⚠️ TODO: Add widget/event integration test (if applicable)'); +} + +/** + * Run all integration tests + */ +async function runAllSkillValidateIntegrationTests(): Promise { + console.log('πŸš€ Starting SkillValidate Integration Tests\n'); + console.log('πŸ“‹ Testing against LIVE system (not mocks)\n'); + + try { + const client = await testSystemConnection(); + await testCommandExecution(client); + await testRequiredParameters(client); + await testOptionalParameters(client); + await testPerformance(client); + await testWidgetIntegration(client); + + console.log('\nπŸŽ‰ ALL SkillValidate INTEGRATION TESTS PASSED!'); + console.log('πŸ“‹ Validated:'); + console.log(' βœ… Live system connection'); + console.log(' βœ… Command execution on real system'); + console.log(' βœ… Parameter validation'); + console.log(' βœ… Optional parameter handling'); + console.log(' βœ… Performance benchmarks'); + console.log(' βœ… Widget/Event integration'); + console.log('\nπŸ’‘ NOTE: This test uses the REAL running system'); + console.log(' - Real database operations'); + console.log(' - Real event propagation'); + console.log(' - Real widget updates'); + console.log(' - Real cross-daemon communication'); + + } catch (error) { + console.error('\n❌ SkillValidate integration tests failed:', (error as Error).message); + if ((error as Error).stack) { + console.error((error as Error).stack); + } + console.error('\nπŸ’‘ Make sure:'); + console.error(' 1. Server is running: npm start'); + console.error(' 2. Wait 90+ seconds for deployment'); + console.error(' 3. Browser is connected to http://localhost:9003'); + process.exit(1); + } +} + +// Run if called directly +if (require.main === module) { + void runAllSkillValidateIntegrationTests(); +} else { + module.exports = { runAllSkillValidateIntegrationTests }; +} diff --git a/src/debug/jtag/commands/skill/validate/test/unit/SkillValidateCommand.test.ts b/src/debug/jtag/commands/skill/validate/test/unit/SkillValidateCommand.test.ts new file mode 100644 index 000000000..ac525136c --- /dev/null +++ b/src/debug/jtag/commands/skill/validate/test/unit/SkillValidateCommand.test.ts @@ -0,0 +1,259 @@ +#!/usr/bin/env tsx +/** + * SkillValidate Command Unit Tests + * + * Tests Skill Validate command logic in isolation using mock dependencies. + * This is a REFERENCE EXAMPLE showing best practices for command testing. + * + * Generated by: ./jtag generate + * Run with: npx tsx commands/Skill Validate/test/unit/SkillValidateCommand.test.ts + * + * NOTE: This is a self-contained test (no external test utilities needed). + * Use this as a template for your own command tests. + */ + +// import { ValidationError } from '@system/core/types/ErrorTypes'; // Uncomment when adding validation tests +import { generateUUID } from '@system/core/types/CrossPlatformUUID'; +import type { SkillValidateParams, SkillValidateResult } from '../../shared/SkillValidateTypes'; + +console.log('πŸ§ͺ SkillValidate Command Unit Tests'); + +function assert(condition: boolean, message: string): void { + if (!condition) { + throw new Error(`❌ Assertion failed: ${message}`); + } + console.log(`βœ… ${message}`); +} + +/** + * Mock command that implements Skill Validate logic for testing + */ +async function mockSkillValidateCommand(params: SkillValidateParams): Promise { + // TODO: Validate required parameters (BEST PRACTICE) + // Example: + // if (!params.requiredParam || params.requiredParam.trim() === '') { + // throw new ValidationError( + // 'requiredParam', + // `Missing required parameter 'requiredParam'. ` + + // `Use the help tool with 'Skill Validate' or see the Skill Validate README for usage information.` + // ); + // } + + // TODO: Handle optional parameters with sensible defaults + // const optionalParam = params.optionalParam ?? defaultValue; + + // TODO: Implement your command logic here + return { + success: true, + // TODO: Add your result fields with actual computed values + context: params.context, + sessionId: params.sessionId + } as SkillValidateResult; +} + +/** + * Test 1: Command structure validation + */ +function testSkillValidateCommandStructure(): void { + console.log('\nπŸ“‹ Test 1: SkillValidate command structure validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Create valid params for Skill Validate command + const validParams: SkillValidateParams = { + // TODO: Add your required parameters here + context, + sessionId + }; + + // Validate param structure + assert(validParams.context !== undefined, 'Params have context'); + assert(validParams.sessionId !== undefined, 'Params have sessionId'); + // TODO: Add assertions for your specific parameters + // assert(typeof validParams.requiredParam === 'string', 'requiredParam is string'); +} + +/** + * Test 2: Mock command execution + */ +async function testMockSkillValidateExecution(): Promise { + console.log('\n⚑ Test 2: Mock Skill Validate command execution'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Test mock execution + const params: SkillValidateParams = { + // TODO: Add your parameters here + context, + sessionId + }; + + const result = await mockSkillValidateCommand(params); + + // Validate result structure + assert(result.success === true, 'Mock result shows success'); + // TODO: Add assertions for your result fields + // assert(typeof result.yourField === 'string', 'yourField is string'); +} + +/** + * Test 3: Required parameter validation (CRITICAL) + * + * This test ensures your command throws ValidationError + * when required parameters are missing (BEST PRACTICE) + */ +async function testSkillValidateRequiredParams(): Promise { + console.log('\n🚨 Test 3: Required parameter validation'); + + // TODO: Uncomment when implementing validation + // const context = { environment: 'server' as const }; + // const sessionId = generateUUID(); + + // TODO: Test cases that should throw ValidationError + // Example: + // const testCases = [ + // { params: {} as SkillValidateParams, desc: 'Missing requiredParam' }, + // { params: { requiredParam: '' } as SkillValidateParams, desc: 'Empty requiredParam' }, + // ]; + // + // for (const testCase of testCases) { + // try { + // await mockSkillValidateCommand({ ...testCase.params, context, sessionId }); + // throw new Error(`Should have thrown ValidationError for: ${testCase.desc}`); + // } catch (error) { + // if (error instanceof ValidationError) { + // assert(error.field === 'requiredParam', `ValidationError field is 'requiredParam' for: ${testCase.desc}`); + // assert(error.message.includes('required parameter'), `Error message mentions 'required parameter' for: ${testCase.desc}`); + // assert(error.message.includes('help tool'), `Error message is tool-agnostic for: ${testCase.desc}`); + // } else { + // throw error; // Re-throw if not ValidationError + // } + // } + // } + + console.log('βœ… All required parameter validations work correctly'); +} + +/** + * Test 4: Optional parameter handling + */ +async function testSkillValidateOptionalParams(): Promise { + console.log('\nπŸ”§ Test 4: Optional parameter handling'); + + // TODO: Uncomment when implementing optional param tests + // const context = { environment: 'server' as const }; + // const sessionId = generateUUID(); + + // TODO: Test WITHOUT optional param (should use default) + // const paramsWithoutOptional: SkillValidateParams = { + // requiredParam: 'test', + // context, + // sessionId + // }; + // + // const resultWithoutOptional = await mockSkillValidateCommand(paramsWithoutOptional); + // assert(resultWithoutOptional.success === true, 'Command succeeds without optional params'); + + // TODO: Test WITH optional param + // const paramsWithOptional: SkillValidateParams = { + // requiredParam: 'test', + // optionalParam: true, + // context, + // sessionId + // }; + // + // const resultWithOptional = await mockSkillValidateCommand(paramsWithOptional); + // assert(resultWithOptional.success === true, 'Command succeeds with optional params'); + + console.log('βœ… Optional parameter handling validated'); +} + +/** + * Test 5: Performance validation + */ +async function testSkillValidatePerformance(): Promise { + console.log('\n⚑ Test 5: SkillValidate performance validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + const startTime = Date.now(); + + await mockSkillValidateCommand({ + // TODO: Add your parameters + context, + sessionId + } as SkillValidateParams); + + const executionTime = Date.now() - startTime; + + assert(executionTime < 100, `SkillValidate completed in ${executionTime}ms (under 100ms limit)`); +} + +/** + * Test 6: Result structure validation + */ +async function testSkillValidateResultStructure(): Promise { + console.log('\nπŸ” Test 6: SkillValidate result structure validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Test various scenarios + const basicResult = await mockSkillValidateCommand({ + // TODO: Add your parameters + context, + sessionId + } as SkillValidateParams); + + assert(basicResult.success === true, 'Result has success field'); + // TODO: Add assertions for your result fields + // assert(typeof basicResult.yourField === 'string', 'Result has yourField (string)'); + assert(basicResult.context === context, 'Result includes context'); + assert(basicResult.sessionId === sessionId, 'Result includes sessionId'); + + console.log('βœ… All result structure validations pass'); +} + +/** + * Run all unit tests + */ +async function runAllSkillValidateUnitTests(): Promise { + console.log('πŸš€ Starting SkillValidate Command Unit Tests\n'); + + try { + testSkillValidateCommandStructure(); + await testMockSkillValidateExecution(); + await testSkillValidateRequiredParams(); + await testSkillValidateOptionalParams(); + await testSkillValidatePerformance(); + await testSkillValidateResultStructure(); + + console.log('\nπŸŽ‰ ALL SkillValidate UNIT TESTS PASSED!'); + console.log('πŸ“‹ Validated:'); + console.log(' βœ… Command structure and parameter validation'); + console.log(' βœ… Mock command execution patterns'); + console.log(' βœ… Required parameter validation (throws ValidationError)'); + console.log(' βœ… Optional parameter handling (sensible defaults)'); + console.log(' βœ… Performance requirements (< 100ms)'); + console.log(' βœ… Result structure validation'); + console.log('\nπŸ“ This is a REFERENCE EXAMPLE - use as a template for your commands!'); + console.log('πŸ’‘ TIP: Copy this test structure and modify for your command logic'); + + } catch (error) { + console.error('\n❌ SkillValidate unit tests failed:', (error as Error).message); + if ((error as Error).stack) { + console.error((error as Error).stack); + } + process.exit(1); + } +} + +// Run if called directly +if (require.main === module) { + void runAllSkillValidateUnitTests(); +} else { + module.exports = { runAllSkillValidateUnitTests }; +} diff --git a/src/debug/jtag/daemons/data-daemon/server/EntityRegistry.ts b/src/debug/jtag/daemons/data-daemon/server/EntityRegistry.ts index 29a69db43..e099897d1 100644 --- a/src/debug/jtag/daemons/data-daemon/server/EntityRegistry.ts +++ b/src/debug/jtag/daemons/data-daemon/server/EntityRegistry.ts @@ -83,6 +83,7 @@ import { CallEntity } from '../../../system/data/entities/CallEntity'; import { SocialCredentialEntity } from '../../../system/social/shared/SocialCredentialEntity'; import { HandleEntity } from '../../../system/data/entities/HandleEntity'; import { CodingPlanEntity } from '../../../system/data/entities/CodingPlanEntity'; +import { SkillEntity } from '../../../system/data/entities/SkillEntity'; /** * Initialize entity registration for the storage adapter @@ -139,6 +140,7 @@ export function initializeEntityRegistry(): void { new SocialCredentialEntity(); new HandleEntity(); new CodingPlanEntity(); + new SkillEntity(); registerEntity(UserEntity.collection, UserEntity); registerEntity(RoomEntity.collection, RoomEntity); @@ -187,6 +189,7 @@ export function initializeEntityRegistry(): void { registerEntity(SocialCredentialEntity.collection, SocialCredentialEntity); registerEntity(HandleEntity.collection, HandleEntity); registerEntity(CodingPlanEntity.collection, CodingPlanEntity); + registerEntity(SkillEntity.collection, SkillEntity); log.info('All entities registered'); } \ No newline at end of file diff --git a/src/debug/jtag/generated-command-schemas.json b/src/debug/jtag/generated-command-schemas.json index b09735376..ad528125d 100644 --- a/src/debug/jtag/generated-command-schemas.json +++ b/src/debug/jtag/generated-command-schemas.json @@ -1,5 +1,5 @@ { - "generated": "2026-02-01T23:20:49.437Z", + "generated": "2026-02-02T00:56:45.003Z", "version": "1.0.0", "commands": [ { diff --git a/src/debug/jtag/package-lock.json b/src/debug/jtag/package-lock.json index 6e53e1b08..71784e6c6 100644 --- a/src/debug/jtag/package-lock.json +++ b/src/debug/jtag/package-lock.json @@ -1,12 +1,12 @@ { "name": "@continuum/jtag", - "version": "1.0.7515", + "version": "1.0.7516", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@continuum/jtag", - "version": "1.0.7515", + "version": "1.0.7516", "hasInstallScript": true, "license": "MIT", "dependencies": { diff --git a/src/debug/jtag/package.json b/src/debug/jtag/package.json index cae63925c..33059f2a7 100644 --- a/src/debug/jtag/package.json +++ b/src/debug/jtag/package.json @@ -1,6 +1,6 @@ { "name": "@continuum/jtag", - "version": "1.0.7515", + "version": "1.0.7516", "description": "Global CLI debugging system for any Node.js project. Install once globally, use anywhere: npm install -g @continuum/jtag", "config": { "active_example": "widget-ui", diff --git a/src/debug/jtag/shared/version.ts b/src/debug/jtag/shared/version.ts index f4b65cf4f..d1e88768b 100644 --- a/src/debug/jtag/shared/version.ts +++ b/src/debug/jtag/shared/version.ts @@ -3,5 +3,5 @@ * DO NOT EDIT MANUALLY */ -export const VERSION = '1.0.7515'; +export const VERSION = '1.0.7516'; export const PACKAGE_NAME = '@continuum/jtag'; diff --git a/src/debug/jtag/system/code/server/CodeAgentOrchestrator.ts b/src/debug/jtag/system/code/server/CodeAgentOrchestrator.ts index cb18f23f9..48a5fbec3 100644 --- a/src/debug/jtag/system/code/server/CodeAgentOrchestrator.ts +++ b/src/debug/jtag/system/code/server/CodeAgentOrchestrator.ts @@ -32,9 +32,12 @@ import type { CodingResultStatus, StepResult, StepStatus, + ExecutionOptions, } from '../shared/CodingTypes'; import { PlanFormulator } from './PlanFormulator'; import { CodingModelSelector } from './CodingModelSelector'; +import { ToolAllowlistEnforcer, ToolDeniedError } from './ToolAllowlistEnforcer'; +import { getTier } from './SecurityTier'; import { Commands } from '../../core/shared/Commands'; import { Logger } from '../../core/logging/Logger'; import { CodingPlanEntity } from '../../data/entities/CodingPlanEntity'; @@ -109,14 +112,20 @@ export class CodeAgentOrchestrator { * 3. Persist the plan as a CodingPlanEntity * 4. Execute each step (updating entity in real-time) * 5. Return results + * + * Options: + * - dryRun: Execute read-only commands normally, but mock write/edit commands + * - securityTier: Override the plan's required tier + * - delegationEnabled: Enable multi-agent delegation (future) */ - async execute(task: CodingTask): Promise { + async execute(task: CodingTask, options?: ExecutionOptions): Promise { + const dryRun = options?.dryRun ?? false; const budget = new ExecutionBudget( task.maxDurationMs ?? DEFAULT_MAX_DURATION_MS, task.maxToolCalls ?? DEFAULT_MAX_TOOL_CALLS, ); - log.info(`Starting task: ${task.description.slice(0, 80)}... (budget: ${budget.remainingToolCalls} calls)`); + log.info(`Starting task${dryRun ? ' [DRY RUN]' : ''}: ${task.description.slice(0, 80)}... (budget: ${budget.remainingToolCalls} calls)`); const filesModified: string[] = []; const filesCreated: string[] = []; @@ -138,9 +147,13 @@ export class CodeAgentOrchestrator { } const plan = await this.planFormulator.formulate(task, codebaseContext); - log.info(`Plan: "${plan.summary}" β€” ${plan.steps.length} steps`); + log.info(`Plan: "${plan.summary}" β€” ${plan.steps.length} steps (risk: ${plan.riskLevel}, tier: ${plan.requiredTier})`); - // Phase 2b: Persist plan as entity (best-effort β€” works without DataDaemon) + // Phase 2b: Create security enforcer from plan's required tier (or override) + const tierLevel = options?.securityTier ?? plan.requiredTier; + const enforcer = new ToolAllowlistEnforcer(getTier(tierLevel)); + + // Phase 2c: Persist plan as entity (best-effort β€” works without DataDaemon) planEntity = await this.persistPlan(task, plan); // Phase 3: Execute plan steps in dependency order @@ -174,8 +187,8 @@ export class CodeAgentOrchestrator { continue; } - // Execute step with retry - const result = await this.executeStepWithRetry(step, task, budget); + // Execute step with retry (enforcer gates each tool call) + const result = await this.executeStepWithRetry(step, task, budget, enforcer, dryRun); stepResults.push(result); if (result.status === 'completed') { @@ -278,6 +291,8 @@ export class CodeAgentOrchestrator { step: CodingStep, task: CodingTask, budget: ExecutionBudget, + enforcer: ToolAllowlistEnforcer, + dryRun: boolean = false, ): Promise { let lastError: string | undefined; @@ -292,7 +307,7 @@ export class CodeAgentOrchestrator { }; } - const result = await this.executeStep(step, task, budget); + const result = await this.executeStep(step, task, budget, enforcer, dryRun); if (result.status === 'completed') { return result; @@ -315,16 +330,19 @@ export class CodeAgentOrchestrator { /** * Execute a single step via Commands.execute(). + * In dryRun mode, read-only commands execute normally but write commands return mock results. */ private async executeStep( step: CodingStep, task: CodingTask, budget: ExecutionBudget, + enforcer: ToolAllowlistEnforcer, + dryRun: boolean = false, ): Promise { const startTime = performance.now(); try { - log.debug(`Step ${step.stepNumber}: ${step.action} β€” ${step.description}`); + log.debug(`Step ${step.stepNumber}${dryRun ? ' [DRY]' : ''}: ${step.action} β€” ${step.description}`); // Inject personaId (userId) into params for workspace scoping const params = { @@ -332,6 +350,28 @@ export class CodeAgentOrchestrator { userId: task.personaId, }; + // Gate tool call through security tier enforcer + enforcer.enforce(step.toolCall, params); + + // DryRun: mock write/edit commands, execute read-only normally + if (dryRun && this.isWriteAction(step.action)) { + budget.recordToolCall(); + const durationMs = performance.now() - startTime; + return { + stepNumber: step.stepNumber, + status: 'completed', + output: { + success: true, + dryRun: true, + wouldModify: step.targetFiles, + action: step.action, + description: step.description, + }, + durationMs, + toolCall: step.toolCall, + }; + } + const result = await Commands.execute(step.toolCall, params); budget.recordToolCall(); @@ -398,6 +438,14 @@ export class CodeAgentOrchestrator { } } + /** + * Whether a coding action modifies files (write, edit, undo). + * DryRun mode mocks these actions instead of executing them. + */ + private isWriteAction(action: string): boolean { + return action === 'write' || action === 'edit' || action === 'undo'; + } + /** * Build the final CodingResult. */ @@ -452,6 +500,9 @@ export class CodeAgentOrchestrator { temperature: 0, durationMs: 0, }; + entity.riskLevel = plan.riskLevel; + entity.riskReason = plan.riskReason; + entity.securityTier = plan.requiredTier; entity.status = 'executing'; entity.executionStartedAt = Date.now(); diff --git a/src/debug/jtag/system/code/server/CodeTaskDelegator.ts b/src/debug/jtag/system/code/server/CodeTaskDelegator.ts new file mode 100644 index 000000000..c5e440837 --- /dev/null +++ b/src/debug/jtag/system/code/server/CodeTaskDelegator.ts @@ -0,0 +1,408 @@ +/** + * CodeTaskDelegator - Decomposes plans into sub-plans for parallel multi-agent execution + * + * A lead AI creates a top-level plan, then the delegator: + * 1. Analyzes the step DAG for independent file clusters + * 2. Assigns clusters to available agents based on capabilities + * 3. Creates sub-plan entities (parentPlanId = parent) + * 4. After execution, consolidates results from sub-plans + * + * File clusters: Groups of steps that share file dependencies. + * Two steps that touch the same file MUST be in the same cluster. + * Steps in different clusters CAN execute in parallel. + */ + +import type { UUID } from '../../core/types/CrossPlatformUUID'; +import type { + AgentCapability, + DelegationResult, + CodingResult, + CodingResultStatus, +} from '../shared/CodingTypes'; +import { + CodingPlanEntity, + type CodingStepSnapshot, +} from '../../data/entities/CodingPlanEntity'; +import { Logger } from '../../core/logging/Logger'; + +const log = Logger.create('CodeTaskDelegator', 'code'); + +// ──────────────────────────────────────────────────────────── +// File cluster β€” a group of steps that share file dependencies +// ──────────────────────────────────────────────────────────── + +export interface FileCluster { + /** Unique cluster index */ + readonly index: number; + + /** Step numbers in this cluster (topologically ordered) */ + readonly stepNumbers: number[]; + + /** All files touched by steps in this cluster */ + readonly files: string[]; + + /** Step numbers from other clusters that this cluster depends on */ + readonly externalDeps: number[]; +} + +// ──────────────────────────────────────────────────────────── +// Agent assignment β€” which agent gets which cluster +// ──────────────────────────────────────────────────────────── + +export interface AgentAssignment { + readonly agentId: UUID; + readonly agentName: string; + readonly clusters: FileCluster[]; + readonly totalSteps: number; + readonly files: string[]; +} + +// ──────────────────────────────────────────────────────────── +// Implementation +// ──────────────────────────────────────────────────────────── + +export class CodeTaskDelegator { + + /** + * Decompose a plan's step DAG into independent file clusters. + * + * Algorithm (union-find on files): + * 1. Each step has a set of target files + * 2. Steps that share ANY file belong to the same cluster + * 3. Steps connected via dependsOn also belong to the same cluster + * 4. Result: disjoint clusters that can execute in parallel + */ + decompose(plan: CodingPlanEntity): FileCluster[] { + if (plan.steps.length === 0) return []; + + // Union-Find on step indices + const parent = new Map(); + const rank = new Map(); + + const find = (x: number): number => { + if (!parent.has(x)) { parent.set(x, x); rank.set(x, 0); } + if (parent.get(x) !== x) parent.set(x, find(parent.get(x)!)); + return parent.get(x)!; + }; + + const union = (a: number, b: number): void => { + const ra = find(a), rb = find(b); + if (ra === rb) return; + const rkA = rank.get(ra) ?? 0, rkB = rank.get(rb) ?? 0; + if (rkA < rkB) { parent.set(ra, rb); } + else if (rkA > rkB) { parent.set(rb, ra); } + else { parent.set(rb, ra); rank.set(ra, rkA + 1); } + }; + + // Initialize all steps + for (const step of plan.steps) { + find(step.stepNumber); + } + + // Union steps that share files + const fileToStep = new Map(); + for (const step of plan.steps) { + for (const file of step.targetFiles) { + const existing = fileToStep.get(file); + if (existing !== undefined) { + union(existing, step.stepNumber); + } else { + fileToStep.set(file, step.stepNumber); + } + } + } + + // Union steps connected by dependencies + for (const step of plan.steps) { + for (const dep of step.dependsOn) { + union(step.stepNumber, dep); + } + } + + // Group steps by root + const clusterMap = new Map(); + for (const step of plan.steps) { + const root = find(step.stepNumber); + const group = clusterMap.get(root) ?? []; + group.push(step.stepNumber); + clusterMap.set(root, group); + } + + // Build FileCluster objects + const stepByNumber = new Map(); + for (const step of plan.steps) { + stepByNumber.set(step.stepNumber, step); + } + + const clusters: FileCluster[] = []; + let clusterIndex = 0; + + for (const [, stepNumbers] of clusterMap) { + // Collect all files in this cluster + const files = new Set(); + for (const sn of stepNumbers) { + const step = stepByNumber.get(sn)!; + for (const f of step.targetFiles) files.add(f); + } + + // Identify external dependencies (deps outside this cluster) + const stepSet = new Set(stepNumbers); + const externalDeps: number[] = []; + for (const sn of stepNumbers) { + const step = stepByNumber.get(sn)!; + for (const dep of step.dependsOn) { + if (!stepSet.has(dep) && !externalDeps.includes(dep)) { + externalDeps.push(dep); + } + } + } + + // Sort steps topologically within cluster + stepNumbers.sort((a, b) => a - b); + + clusters.push({ + index: clusterIndex++, + stepNumbers, + files: Array.from(files).sort(), + externalDeps, + }); + } + + log.info(`Decomposed ${plan.steps.length} steps into ${clusters.length} clusters`); + return clusters; + } + + /** + * Assign file clusters to available agents. + * + * Strategy: + * - Sort agents by load (least loaded first) + * - Sort clusters by size (largest first β€” greedy bin packing) + * - Assign each cluster to the least-loaded agent that has capacity + * - Respect agent security tier (cluster needs write β†’ agent needs write+) + */ + assign( + clusters: FileCluster[], + agents: AgentCapability[], + plan: CodingPlanEntity, + ): AgentAssignment[] { + if (clusters.length === 0 || agents.length === 0) return []; + + // Sort agents by load ascending (least loaded first) + const sortedAgents = [...agents].sort((a, b) => a.currentLoad - b.currentLoad); + + // Sort clusters by step count descending (largest first) + const sortedClusters = [...clusters].sort((a, b) => b.stepNumbers.length - a.stepNumbers.length); + + // Track assignments + const assignments = new Map(); + + for (const cluster of sortedClusters) { + // Find the least-loaded agent that hasn't been given too many clusters + let assigned = false; + for (const agent of sortedAgents) { + const existing = assignments.get(agent.personaId); + const currentClusterCount = existing?.clusters.length ?? 0; + + // Simple load balancing: distribute evenly + const maxClustersPerAgent = Math.ceil(sortedClusters.length / sortedAgents.length); + if (currentClusterCount >= maxClustersPerAgent) continue; + + if (!existing) { + assignments.set(agent.personaId, { agent, clusters: [cluster] }); + } else { + existing.clusters.push(cluster); + } + assigned = true; + break; + } + + // If no agent available, assign to least loaded + if (!assigned && sortedAgents.length > 0) { + const fallback = sortedAgents[0]; + const existing = assignments.get(fallback.personaId); + if (!existing) { + assignments.set(fallback.personaId, { agent: fallback, clusters: [cluster] }); + } else { + existing.clusters.push(cluster); + } + } + } + + // Build AgentAssignment objects + const result: AgentAssignment[] = []; + for (const [, { agent, clusters: agentClusters }] of assignments) { + const allSteps: number[] = []; + const allFiles = new Set(); + for (const cluster of agentClusters) { + allSteps.push(...cluster.stepNumbers); + for (const f of cluster.files) allFiles.add(f); + } + + result.push({ + agentId: agent.personaId, + agentName: agent.name, + clusters: agentClusters, + totalSteps: allSteps.length, + files: Array.from(allFiles).sort(), + }); + } + + log.info(`Assigned ${clusters.length} clusters to ${result.length} agents`); + return result; + } + + /** + * Create sub-plan entities from agent assignments. + * Each sub-plan contains only the steps assigned to that agent. + */ + createSubPlans( + parentPlan: CodingPlanEntity, + assignments: AgentAssignment[], + ): CodingPlanEntity[] { + const stepByNumber = new Map(); + for (const step of parentPlan.steps) { + stepByNumber.set(step.stepNumber, step); + } + + const subPlans: CodingPlanEntity[] = []; + + for (const assignment of assignments) { + const subPlan = new CodingPlanEntity(); + subPlan.taskId = parentPlan.taskId; + subPlan.parentPlanId = parentPlan.id as UUID; + subPlan.createdById = parentPlan.leadId; + subPlan.leadId = assignment.agentId; + subPlan.summary = `Sub-plan for ${assignment.agentName}: ${assignment.files.slice(0, 3).join(', ')}${assignment.files.length > 3 ? '...' : ''}`; + subPlan.taskDescription = parentPlan.taskDescription; + subPlan.estimatedToolCalls = assignment.totalSteps; + subPlan.assignees = [assignment.agentId]; + subPlan.generatedBy = parentPlan.generatedBy; + subPlan.riskLevel = parentPlan.riskLevel; + subPlan.riskReason = parentPlan.riskReason; + subPlan.securityTier = parentPlan.securityTier; + subPlan.status = 'approved'; // Sub-plans inherit parent approval + + // Copy only the assigned steps, renumber sequentially + const assignedStepNumbers = new Set(); + for (const cluster of assignment.clusters) { + for (const sn of cluster.stepNumbers) { + assignedStepNumbers.add(sn); + } + } + + subPlan.steps = Array.from(assignedStepNumbers) + .sort((a, b) => a - b) + .map(sn => { + const original = stepByNumber.get(sn)!; + return { + ...original, + // Filter dependsOn to only include steps within this sub-plan + dependsOn: original.dependsOn.filter(d => assignedStepNumbers.has(d)), + }; + }); + + subPlans.push(subPlan); + } + + log.info(`Created ${subPlans.length} sub-plans from parent ${parentPlan.id}`); + return subPlans; + } + + /** + * Consolidate results from sub-plans into the parent plan's CodingResult. + */ + consolidate( + parentPlan: CodingPlanEntity, + subPlans: CodingPlanEntity[], + ): CodingResult { + const filesModified = new Set(); + const filesCreated = new Set(); + const changeIds: string[] = []; + const errors: string[] = []; + let totalToolCalls = 0; + let totalDurationMs = 0; + + for (const sub of subPlans) { + for (const f of sub.filesModified) filesModified.add(f); + for (const f of sub.filesCreated) filesCreated.add(f); + changeIds.push(...sub.changeIds); + errors.push(...sub.errors); + totalToolCalls += sub.totalToolCalls; + totalDurationMs = Math.max(totalDurationMs, sub.totalDurationMs); // Parallel = max, not sum + } + + // Detect conflicts: same file modified by multiple sub-plans + const fileToSubPlan = new Map(); + for (const sub of subPlans) { + for (const f of sub.filesModified) { + const existing = fileToSubPlan.get(f) ?? []; + existing.push(sub.id as UUID); + fileToSubPlan.set(f, existing); + } + } + const conflicts = Array.from(fileToSubPlan.entries()) + .filter(([, ids]) => ids.length > 1) + .map(([file]) => file); + + if (conflicts.length > 0) { + errors.push(`File conflicts detected: ${conflicts.join(', ')}`); + } + + // Determine overall status + if (subPlans.length === 0) { + return { + taskId: parentPlan.taskId, + status: 'failed', + summary: 'No sub-plans to consolidate', + stepResults: [], + filesModified: [], + filesCreated: [], + totalToolCalls: 0, + totalDurationMs: 0, + changeIds: [], + errors: ['No sub-plans were executed'], + }; + } + + const allCompleted = subPlans.every(s => s.status === 'completed'); + const anyCompleted = subPlans.some(s => s.status === 'completed'); + const status: CodingResultStatus = allCompleted + ? 'completed' + : anyCompleted + ? 'partial' + : 'failed'; + + // Build step results from all sub-plans + const stepResults = subPlans.flatMap(sub => + sub.steps.map(step => ({ + stepNumber: step.stepNumber, + status: step.status === 'completed' ? 'completed' as const + : step.status === 'skipped' ? 'skipped' as const + : step.status === 'failed' ? 'failed' as const + : 'pending' as const, + output: step.output, + error: step.error, + durationMs: step.durationMs ?? 0, + toolCall: step.toolCall, + })), + ); + + const summary = allCompleted + ? `All ${subPlans.length} sub-plans completed` + : `${subPlans.filter(s => s.status === 'completed').length}/${subPlans.length} sub-plans completed`; + + return { + taskId: parentPlan.taskId, + status, + summary, + stepResults, + filesModified: Array.from(filesModified), + filesCreated: Array.from(filesCreated), + totalToolCalls, + totalDurationMs, + changeIds, + errors, + }; + } +} diff --git a/src/debug/jtag/system/code/server/ExecutionSandbox.ts b/src/debug/jtag/system/code/server/ExecutionSandbox.ts new file mode 100644 index 000000000..cf8e31d77 --- /dev/null +++ b/src/debug/jtag/system/code/server/ExecutionSandbox.ts @@ -0,0 +1,219 @@ +/** + * ExecutionSandbox - Process-isolated code execution for coding agents + * + * Runs commands in a restricted child process with: + * - Restricted PATH (only node, npx, tsc) + * - Timeout enforcement (SIGTERM on timeout, SIGKILL after grace period) + * - Output capture with size limits + * - Working directory scoped to persona workspace + * - Environment variable isolation + * + * Based on the existing SandboxExecuteServerCommand spawn pattern, + * extracted as a reusable utility for Phase 4A sandboxing. + */ + +import { spawn, type ChildProcess } from 'child_process'; +import * as path from 'path'; +import { Logger } from '../../core/logging/Logger'; +import type { UUID } from '../../core/types/CrossPlatformUUID'; + +const log = Logger.create('ExecutionSandbox', 'code'); + +// ──────────────────────────────────────────────────────────── +// Types +// ──────────────────────────────────────────────────────────── + +export interface SandboxConfig { + /** Command to execute (e.g., 'npx', 'node', 'tsc') */ + readonly command: string; + + /** Command arguments */ + readonly args: readonly string[]; + + /** Working directory β€” must be within persona workspace */ + readonly cwd: string; + + /** Maximum execution time in milliseconds */ + readonly timeoutMs: number; + + /** Maximum combined stdout+stderr size in bytes */ + readonly maxOutputBytes: number; + + /** Additional environment variables (merged with restricted base) */ + readonly env?: Readonly>; + + /** Persona executing this command (for audit logging) */ + readonly personaId: UUID; +} + +export interface SandboxResult { + readonly success: boolean; + readonly exitCode: number; + readonly stdout: string; + readonly stderr: string; + readonly durationMs: number; + readonly truncated: boolean; + readonly timedOut: boolean; + readonly error?: string; +} + +// ──────────────────────────────────────────────────────────── +// Defaults +// ──────────────────────────────────────────────────────────── + +const DEFAULT_TIMEOUT_MS = 30_000; +const DEFAULT_MAX_OUTPUT_BYTES = 102_400; // 100KB +const KILL_GRACE_PERIOD_MS = 5_000; + +/** Restricted set of allowed commands */ +const ALLOWED_COMMANDS = new Set(['node', 'npx', 'tsc', 'npm']); + +/** Restricted PATH β€” only common binary locations (includes Homebrew for macOS) */ +const RESTRICTED_PATH = [ + '/opt/homebrew/bin', // macOS Apple Silicon Homebrew + '/usr/local/bin', // macOS Intel Homebrew / standard + '/usr/bin', + '/bin', +].join(path.delimiter); + +// ──────────────────────────────────────────────────────────── +// Sandbox +// ──────────────────────────────────────────────────────────── + +export class ExecutionSandbox { + /** + * Execute a command in a sandboxed child process. + */ + async execute(config: SandboxConfig): Promise { + const startTime = performance.now(); + + // Validate command is in allowlist + const baseCommand = path.basename(config.command); + if (!ALLOWED_COMMANDS.has(baseCommand)) { + return { + success: false, + exitCode: -1, + stdout: '', + stderr: '', + durationMs: 0, + truncated: false, + timedOut: false, + error: `Command '${baseCommand}' is not in the sandbox allowlist. Allowed: ${[...ALLOWED_COMMANDS].join(', ')}`, + }; + } + + const timeoutMs = config.timeoutMs || DEFAULT_TIMEOUT_MS; + const maxOutputBytes = config.maxOutputBytes || DEFAULT_MAX_OUTPUT_BYTES; + + log.debug(`Sandbox exec: ${config.command} ${config.args.join(' ')} (timeout: ${timeoutMs}ms, persona: ${config.personaId})`); + + return new Promise((resolve) => { + let stdout = ''; + let stderr = ''; + let outputSize = 0; + let truncated = false; + let timedOut = false; + let child: ChildProcess; + + try { + child = spawn(config.command, [...config.args], { + cwd: config.cwd, + env: { + PATH: RESTRICTED_PATH, + NODE_ENV: 'sandbox', + HOME: config.cwd, + SANDBOX_EXECUTION: 'true', + PERSONA_ID: config.personaId, + ...config.env, + }, + shell: false, + stdio: ['ignore', 'pipe', 'pipe'], // No stdin + }); + } catch (error) { + const durationMs = performance.now() - startTime; + resolve({ + success: false, + exitCode: -1, + stdout: '', + stderr: '', + durationMs, + truncated: false, + timedOut: false, + error: `Failed to spawn: ${error instanceof Error ? error.message : String(error)}`, + }); + return; + } + + // Collect stdout with size limit + child.stdout?.on('data', (data: Buffer) => { + const chunk = data.toString(); + outputSize += chunk.length; + if (outputSize <= maxOutputBytes) { + stdout += chunk; + } else { + truncated = true; + } + }); + + // Collect stderr with size limit + child.stderr?.on('data', (data: Buffer) => { + const chunk = data.toString(); + outputSize += chunk.length; + if (outputSize <= maxOutputBytes) { + stderr += chunk; + } else { + truncated = true; + } + }); + + // Timeout: SIGTERM first, then SIGKILL after grace period + const timeoutHandle = setTimeout(() => { + timedOut = true; + log.warn(`Sandbox timeout: killing process after ${timeoutMs}ms`); + child.kill('SIGTERM'); + + setTimeout(() => { + if (!child.killed) { + child.kill('SIGKILL'); + } + }, KILL_GRACE_PERIOD_MS); + }, timeoutMs); + + // Handle process exit + child.on('close', (code: number | null) => { + clearTimeout(timeoutHandle); + const durationMs = performance.now() - startTime; + + log.debug(`Sandbox done: exit=${code ?? -1}, duration=${Math.round(durationMs)}ms, output=${outputSize}b`); + + resolve({ + success: !timedOut && code === 0, + exitCode: code ?? -1, + stdout: stdout.trim(), + stderr: stderr.trim(), + durationMs, + truncated, + timedOut, + error: timedOut ? `Timed out after ${timeoutMs}ms` : undefined, + }); + }); + + // Handle spawn errors + child.on('error', (error: Error) => { + clearTimeout(timeoutHandle); + const durationMs = performance.now() - startTime; + + resolve({ + success: false, + exitCode: -1, + stdout: stdout.trim(), + stderr: stderr.trim(), + durationMs, + truncated, + timedOut: false, + error: `Spawn error: ${error.message}`, + }); + }); + }); + } +} diff --git a/src/debug/jtag/system/code/server/PlanFormulator.ts b/src/debug/jtag/system/code/server/PlanFormulator.ts index 95d0dd46d..4dad3a09d 100644 --- a/src/debug/jtag/system/code/server/PlanFormulator.ts +++ b/src/debug/jtag/system/code/server/PlanFormulator.ts @@ -14,11 +14,12 @@ * The LLM returns a JSON CodingPlan that the CodeAgentOrchestrator executes. */ -import type { CodingTask, CodingPlan, CodingStep, CodingAction } from '../shared/CodingTypes'; +import type { CodingTask, CodingPlan, CodingStep, CodingAction, RiskLevel, SecurityTierLevel } from '../shared/CodingTypes'; import { CodingModelSelector } from './CodingModelSelector'; import { AIProviderDaemon } from '../../../daemons/ai-provider-daemon/shared/AIProviderDaemon'; import type { TextGenerationRequest, ChatMessage } from '../../../daemons/ai-provider-daemon/shared/AIProviderTypesV2'; import { Logger } from '../../core/logging/Logger'; +import { riskToTier } from './SecurityTier'; const log = Logger.create('PlanFormulator', 'code'); @@ -159,6 +160,8 @@ ${toolDocs} Respond with ONLY a JSON object (no markdown, no explanation): { "summary": "Brief description of the approach", + "riskLevel": "low|medium|high|critical", + "riskReason": "Why this risk level was assigned", "steps": [ { "stepNumber": 1, @@ -173,6 +176,12 @@ Respond with ONLY a JSON object (no markdown, no explanation): ] } +## Risk Assessment Guidelines +- **low**: Read-only tasks, documentation, test-only changes, single-file edits +- **medium**: Multi-file edits, adding new functions, standard refactoring +- **high**: API/interface changes, security-sensitive code, cross-module refactoring +- **critical**: System configuration, build scripts, deployment, anything requiring shell execution + ## Rules 1. Steps are numbered starting from 1 2. dependsOn lists step numbers that must complete first (DAG) @@ -231,12 +240,20 @@ Respond with ONLY a JSON object (no markdown, no explanation): throw new Error(`PlanFormulator: Invalid JSON in LLM response: ${(e as Error).message}`); } - const parsed = raw as { summary?: string; steps?: unknown[] }; + const parsed = raw as { summary?: string; steps?: unknown[]; riskLevel?: string; riskReason?: string }; if (!parsed.summary || typeof parsed.summary !== 'string') { throw new Error('PlanFormulator: Plan missing "summary" field'); } + // Extract and validate risk assessment + const VALID_RISK_LEVELS: ReadonlySet = new Set(['low', 'medium', 'high', 'critical']); + const riskLevel: RiskLevel = VALID_RISK_LEVELS.has(parsed.riskLevel ?? '') + ? (parsed.riskLevel as RiskLevel) + : 'medium'; // Default to medium if LLM omits or gives invalid value + const riskReason = typeof parsed.riskReason === 'string' ? parsed.riskReason : 'No risk reason provided'; + const requiredTier: SecurityTierLevel = riskToTier(riskLevel); + if (!Array.isArray(parsed.steps) || parsed.steps.length === 0) { throw new Error('PlanFormulator: Plan has no steps'); } @@ -290,6 +307,9 @@ Respond with ONLY a JSON object (no markdown, no explanation): estimatedToolCalls: steps.length, generatedBy: { provider, model }, generatedAt: Date.now(), + riskLevel, + riskReason, + requiredTier, }; } } diff --git a/src/debug/jtag/system/code/server/PlanGovernance.ts b/src/debug/jtag/system/code/server/PlanGovernance.ts new file mode 100644 index 000000000..a75246468 --- /dev/null +++ b/src/debug/jtag/system/code/server/PlanGovernance.ts @@ -0,0 +1,151 @@ +/** + * PlanGovernance - Risk-based approval routing for coding plans + * + * Determines whether a plan needs team approval before execution, + * creates DecisionProposals for review, and handles governance callbacks. + * + * Approval rules: + * - Auto-approve: single-agent + riskLevel low/medium + * - Require approval: multi-agent OR riskLevel high/critical + * - Always require: system-tier operations + */ + +import type { UUID } from '../../core/types/CrossPlatformUUID'; +import type { RiskLevel, SecurityTierLevel } from '../shared/CodingTypes'; +import { CodingPlanEntity, type CodingPlanStatus } from '../../data/entities/CodingPlanEntity'; +import { riskRequiresApproval } from './SecurityTier'; +import { Logger } from '../../core/logging/Logger'; + +const log = Logger.create('PlanGovernance', 'code'); + +// ──────────────────────────────────────────────────────────── +// Governance decision outcomes +// ──────────────────────────────────────────────────────────── + +export type GovernanceOutcome = + | 'approved' + | 'approved_with_changes' + | 'changes_requested' + | 'rejected'; + +export interface GovernanceDecision { + readonly proposalId: UUID; + readonly outcome: GovernanceOutcome; + readonly reasoning: string; + readonly suggestedChanges?: string; +} + +// ──────────────────────────────────────────────────────────── +// Implementation +// ──────────────────────────────────────────────────────────── + +export class PlanGovernance { + + /** + * Determine if a plan needs team approval before execution. + */ + shouldRequireApproval(plan: CodingPlanEntity): boolean { + // System tier always requires approval + if (plan.securityTier === 'system') { + return true; + } + + // Delegate to SecurityTier's risk-based logic + const isMultiAgent = plan.assignees.length > 1; + return riskRequiresApproval(plan.riskLevel, isMultiAgent); + } + + /** + * Create a DecisionProposal for plan review. + * Returns the proposal ID, or undefined if proposal creation failed. + */ + async proposePlan(plan: CodingPlanEntity): Promise { + try { + // Dynamic import to avoid circular dependency + const { DecisionPropose } = await import( + '../../../commands/collaboration/decision/propose/shared/DecisionProposeTypes' + ); + + const fileList = this.extractTargetFiles(plan); + const stepSummary = plan.steps + .map(s => ` ${s.stepNumber}. [${s.action}] ${s.description}`) + .join('\n'); + + const rationale = [ + `**Task:** ${plan.taskDescription}`, + `**Approach:** ${plan.summary}`, + `**Risk Level:** ${plan.riskLevel} (${plan.riskReason ?? 'No reason provided'})`, + `**Security Tier:** ${plan.securityTier}`, + `**Assignees:** ${plan.assignees.length} agent(s)`, + `**Steps (${plan.steps.length}):**\n${stepSummary}`, + fileList.length > 0 ? `**Target Files:**\n${fileList.map(f => ` - ${f}`).join('\n')}` : '', + ].filter(Boolean).join('\n\n'); + + const result = await DecisionPropose.execute({ + topic: `Coding Plan: ${plan.summary}`, + rationale, + options: [ + { label: 'Approve', description: 'Approve the plan for execution' }, + { label: 'Approve with Changes', description: 'Approve with suggested modifications' }, + { label: 'Request Changes', description: 'Send back for revision' }, + { label: 'Reject', description: 'Decline this plan' }, + ], + scope: 'all', + significanceLevel: this.riskToSignificance(plan.riskLevel), + proposerId: plan.leadId, + }); + + if (result.success && result.proposalId) { + log.info(`Plan proposed for governance: ${result.proposalId} (plan: ${plan.taskId})`); + return result.proposalId; + } + + log.warn(`Governance proposal creation returned success=false`); + return undefined; + } catch (error) { + log.warn(`Governance proposal failed: ${error instanceof Error ? error.message : String(error)}`); + return undefined; + } + } + + /** + * Handle a governance decision callback. + * Returns the CodingPlanStatus the plan should transition to. + */ + resolveDecision(decision: GovernanceDecision): CodingPlanStatus { + switch (decision.outcome) { + case 'approved': + return 'approved'; + case 'approved_with_changes': + return 'approved'; + case 'changes_requested': + return 'draft'; + case 'rejected': + return 'cancelled'; + } + } + + // ──────────────────────────────────────────────────────────── + // Private helpers + // ──────────────────────────────────────────────────────────── + + /** + * Extract unique target files from all plan steps. + */ + private extractTargetFiles(plan: CodingPlanEntity): string[] { + const files = new Set(); + for (const step of plan.steps) { + for (const file of step.targetFiles) { + files.add(file); + } + } + return Array.from(files).sort(); + } + + /** + * Map risk level to governance significance. + */ + private riskToSignificance(risk: RiskLevel): 'low' | 'medium' | 'high' | 'critical' { + return risk; // 1:1 mapping β€” risk levels align with significance levels + } +} diff --git a/src/debug/jtag/system/code/server/SecurityTier.ts b/src/debug/jtag/system/code/server/SecurityTier.ts new file mode 100644 index 000000000..500a77343 --- /dev/null +++ b/src/debug/jtag/system/code/server/SecurityTier.ts @@ -0,0 +1,176 @@ +/** + * SecurityTier - Risk-based access control for coding agent execution + * + * Four tiers of access, each with explicit tool allowlists/denylists: + * - discovery: Read-only exploration (tree, search, read, history) + * - read: Analysis without modification (adds diff, data/list) + * - write: File modifications within persona workspace (adds write, edit, undo) + * - system: Full access including shell execution (requires governance approval) + * + * The PlanFormulator assesses risk and assigns a required tier. + * The ToolAllowlistEnforcer gates every tool call through the tier. + */ + +import type { SecurityTierLevel, RiskLevel } from '../shared/CodingTypes'; + +// Re-export for consumers that import from this module +export type { SecurityTierLevel, RiskLevel }; + +// ──────────────────────────────────────────────────────────── +// Types +// ──────────────────────────────────────────────────────────── + +export interface SecurityTier { + readonly level: SecurityTierLevel; + readonly allowedCommands: readonly string[]; + readonly deniedCommands: readonly string[]; + readonly maxToolCalls: number; + readonly maxDurationMs: number; + readonly maxFileSizeBytes: number; + readonly allowProcessSpawn: boolean; + readonly allowNetworkAccess: boolean; + readonly requiresApproval: boolean; +} + +// ──────────────────────────────────────────────────────────── +// Tier Definitions +// ──────────────────────────────────────────────────────────── + +const DISCOVERY_TIER: SecurityTier = { + level: 'discovery', + allowedCommands: [ + 'code/tree', + 'code/search', + 'code/read', + 'code/history', + ], + deniedCommands: [ + 'code/write', + 'code/edit', + 'code/undo', + 'code/delete', + 'development/*', + 'system/*', + ], + maxToolCalls: 30, + maxDurationMs: 60_000, + maxFileSizeBytes: 0, // No writes allowed + allowProcessSpawn: false, + allowNetworkAccess: false, + requiresApproval: false, +}; + +const READ_TIER: SecurityTier = { + level: 'read', + allowedCommands: [ + ...DISCOVERY_TIER.allowedCommands, + 'code/diff', + 'data/list', + 'data/read', + ], + deniedCommands: [ + 'code/write', + 'code/edit', + 'code/undo', + 'code/delete', + 'development/*', + 'system/*', + ], + maxToolCalls: 30, + maxDurationMs: 60_000, + maxFileSizeBytes: 0, // No writes allowed + allowProcessSpawn: false, + allowNetworkAccess: false, + requiresApproval: false, +}; + +const WRITE_TIER: SecurityTier = { + level: 'write', + allowedCommands: [ + ...READ_TIER.allowedCommands, + 'code/write', + 'code/edit', + 'code/undo', + 'code/diff', + ], + deniedCommands: [ + 'code/delete', + 'development/exec', + 'development/sandbox-execute', + 'system/*', + ], + maxToolCalls: 20, + maxDurationMs: 120_000, + maxFileSizeBytes: 1_048_576, // 1MB + allowProcessSpawn: false, + allowNetworkAccess: false, + requiresApproval: false, // Risk-based (PlanGovernance decides) +}; + +const SYSTEM_TIER: SecurityTier = { + level: 'system', + allowedCommands: ['*'], + deniedCommands: [], // No restrictions + maxToolCalls: 50, + maxDurationMs: 300_000, + maxFileSizeBytes: 10_485_760, // 10MB + allowProcessSpawn: true, + allowNetworkAccess: true, + requiresApproval: true, // Always requires governance approval +}; + +// ──────────────────────────────────────────────────────────── +// Tier Registry +// ──────────────────────────────────────────────────────────── + +const TIERS: Record = { + discovery: DISCOVERY_TIER, + read: READ_TIER, + write: WRITE_TIER, + system: SYSTEM_TIER, +}; + +/** + * Get the SecurityTier definition for a given level. + */ +export function getTier(level: SecurityTierLevel): SecurityTier { + return TIERS[level]; +} + +/** + * All tier levels in ascending order of privilege. + */ +export const TIER_LEVELS: readonly SecurityTierLevel[] = ['discovery', 'read', 'write', 'system']; + +/** + * Check if tier A has equal or greater privilege than tier B. + */ +export function tierAtLeast(a: SecurityTierLevel, b: SecurityTierLevel): boolean { + return TIER_LEVELS.indexOf(a) >= TIER_LEVELS.indexOf(b); +} + +// ──────────────────────────────────────────────────────────── +// Risk β†’ Tier Mapping +// ──────────────────────────────────────────────────────────── + +/** + * Map a risk level to the minimum security tier required. + * Higher risk β†’ higher tier β†’ more restrictions (and potentially approval). + */ +export function riskToTier(risk: RiskLevel): SecurityTierLevel { + switch (risk) { + case 'low': return 'write'; + case 'medium': return 'write'; + case 'high': return 'write'; // Same tier, but PlanGovernance requires approval at high+ + case 'critical': return 'system'; + } +} + +/** + * Whether a given risk level should require governance approval. + */ +export function riskRequiresApproval(risk: RiskLevel, isMultiAgent: boolean): boolean { + if (isMultiAgent) return true; + if (risk === 'high' || risk === 'critical') return true; + return false; +} diff --git a/src/debug/jtag/system/code/server/ToolAllowlistEnforcer.ts b/src/debug/jtag/system/code/server/ToolAllowlistEnforcer.ts new file mode 100644 index 000000000..8517f4716 --- /dev/null +++ b/src/debug/jtag/system/code/server/ToolAllowlistEnforcer.ts @@ -0,0 +1,174 @@ +/** + * ToolAllowlistEnforcer - Gateway that filters every tool call through a SecurityTier + * + * Sits between the CodeAgentOrchestrator and Commands.execute(). + * Every tool call passes through enforce() before execution. + * + * Enforcement rules (evaluated in order): + * 1. Denied commands always blocked (explicit deny wins over allow) + * 2. Allowed commands checked via glob matching + * 3. File paths validated against persona workspace + * 4. File size limits checked for write operations + * 5. Every decision logged for audit trail + */ + +import type { SecurityTier } from './SecurityTier'; +import { Logger } from '../../core/logging/Logger'; + +const log = Logger.create('ToolAllowlistEnforcer', 'code'); + +// ──────────────────────────────────────────────────────────── +// Types +// ──────────────────────────────────────────────────────────── + +export interface EnforcementResult { + readonly allowed: boolean; + readonly reason: string; + readonly toolName: string; + readonly tierLevel: string; +} + +// ──────────────────────────────────────────────────────────── +// Enforcer +// ──────────────────────────────────────────────────────────── + +export class ToolAllowlistEnforcer { + private readonly _tier: SecurityTier; + private readonly _auditLog: EnforcementResult[] = []; + + constructor(tier: SecurityTier) { + this._tier = tier; + } + + get tier(): SecurityTier { + return this._tier; + } + + get auditLog(): readonly EnforcementResult[] { + return this._auditLog; + } + + /** + * Check if a tool call is allowed under the current tier. + * Throws if the tool is denied. + */ + enforce(toolName: string, params?: Record): void { + const result = this.check(toolName, params); + this._auditLog.push(result); + + if (!result.allowed) { + log.warn(`BLOCKED: ${toolName} β€” ${result.reason} (tier: ${this._tier.level})`); + throw new ToolDeniedError(toolName, result.reason, this._tier.level); + } + + log.debug(`ALLOWED: ${toolName} (tier: ${this._tier.level})`); + } + + /** + * Non-throwing check β€” returns the enforcement result without blocking. + */ + check(toolName: string, params?: Record): EnforcementResult { + // 1. Check denied list (explicit deny always wins) + if (this.matchesAny(toolName, this._tier.deniedCommands)) { + return { + allowed: false, + reason: `Command '${toolName}' is explicitly denied in ${this._tier.level} tier`, + toolName, + tierLevel: this._tier.level, + }; + } + + // 2. Check allowed list + if (!this.matchesAny(toolName, this._tier.allowedCommands)) { + return { + allowed: false, + reason: `Command '${toolName}' is not in the allowed list for ${this._tier.level} tier`, + toolName, + tierLevel: this._tier.level, + }; + } + + // 3. Check process spawn restriction + if (!this._tier.allowProcessSpawn && this.isProcessSpawnCommand(toolName)) { + return { + allowed: false, + reason: `Process spawn commands are not allowed in ${this._tier.level} tier`, + toolName, + tierLevel: this._tier.level, + }; + } + + // 4. Check file size for write operations + if (this.isWriteCommand(toolName) && params) { + const content = params['content'] as string | undefined; + if (content && this._tier.maxFileSizeBytes > 0) { + const sizeBytes = new TextEncoder().encode(content).length; + if (sizeBytes > this._tier.maxFileSizeBytes) { + return { + allowed: false, + reason: `Content size ${sizeBytes} exceeds tier limit of ${this._tier.maxFileSizeBytes} bytes`, + toolName, + tierLevel: this._tier.level, + }; + } + } + } + + return { + allowed: true, + reason: 'Allowed by tier policy', + toolName, + tierLevel: this._tier.level, + }; + } + + /** + * Check if a tool name matches any pattern in the list. + * Supports exact match and trailing wildcard (e.g., 'code/*', '*'). + */ + private matchesAny(toolName: string, patterns: readonly string[]): boolean { + for (const pattern of patterns) { + if (pattern === '*') return true; + if (pattern === toolName) return true; + + // Glob: 'code/*' matches 'code/read', 'code/edit', etc. + if (pattern.endsWith('/*')) { + const prefix = pattern.slice(0, -2); + if (toolName.startsWith(prefix + '/')) return true; + } + } + return false; + } + + /** + * Commands that spawn child processes. + */ + private isProcessSpawnCommand(toolName: string): boolean { + return toolName === 'development/exec' || + toolName === 'development/sandbox-execute' || + toolName === 'development/build'; + } + + /** + * Commands that write to the filesystem. + */ + private isWriteCommand(toolName: string): boolean { + return toolName === 'code/write' || toolName === 'code/edit'; + } +} + +// ──────────────────────────────────────────────────────────── +// Error +// ──────────────────────────────────────────────────────────── + +export class ToolDeniedError extends Error { + readonly toolName: string; + readonly tierLevel: string; + + constructor(toolName: string, reason: string, tierLevel: string) { + super(`Tool '${toolName}' denied: ${reason}`); + this.name = 'ToolDeniedError'; + this.toolName = toolName; + this.tierLevel = tierLevel; + } +} diff --git a/src/debug/jtag/system/code/shared/CodingTypes.ts b/src/debug/jtag/system/code/shared/CodingTypes.ts index fa3775e45..12d67cfc1 100644 --- a/src/debug/jtag/system/code/shared/CodingTypes.ts +++ b/src/debug/jtag/system/code/shared/CodingTypes.ts @@ -11,6 +11,22 @@ import type { UUID } from '../../core/types/CrossPlatformUUID'; +// ============================================================================ +// Security & Risk +// ============================================================================ + +/** + * Risk level assessed by PlanFormulator for a coding plan. + * Determines security tier and whether governance approval is needed. + */ +export type RiskLevel = 'low' | 'medium' | 'high' | 'critical'; + +/** + * Security tier that governs which tools a plan can use. + * Assigned based on risk level; higher tiers require more oversight. + */ +export type SecurityTierLevel = 'discovery' | 'read' | 'write' | 'system'; + // ============================================================================ // Model Selection // ============================================================================ @@ -151,6 +167,15 @@ export interface CodingPlan { /** When the plan was generated */ readonly generatedAt: number; + + /** Risk level assessed by PlanFormulator */ + readonly riskLevel: RiskLevel; + + /** Why this risk level was assigned */ + readonly riskReason: string; + + /** Minimum security tier required for execution */ + readonly requiredTier: SecurityTierLevel; } // ============================================================================ @@ -222,3 +247,74 @@ export interface CodingResult { /** Errors encountered */ readonly errors: string[]; } + +// ============================================================================ +// Execution Options (Phase 4C: Multi-Agent Coordination) +// ============================================================================ + +/** + * Options controlling how a coding plan is executed. + * Passed to CodeAgentOrchestrator.execute(). + */ +export interface ExecutionOptions { + /** Execute but don't write β€” report what would happen */ + readonly dryRun?: boolean; + + /** Override the security tier (defaults to plan's requiredTier) */ + readonly securityTier?: SecurityTierLevel; + + /** Enable multi-agent delegation for this execution */ + readonly delegationEnabled?: boolean; +} + +// ============================================================================ +// Agent Capability (Phase 4C: Multi-Agent Delegation) +// ============================================================================ + +/** + * Describes an AI persona's capabilities for coding task delegation. + * Used by CodeTaskDelegator to match tasks to agents. + */ +export interface AgentCapability { + /** Persona ID */ + readonly personaId: UUID; + + /** Persona display name */ + readonly name: string; + + /** Coding specialties (e.g., 'typescript', 'testing', 'code-review') */ + readonly specialties: string[]; + + /** Current workload fraction (0.0 = idle, 1.0 = fully loaded) */ + readonly currentLoad: number; + + /** Security tier this agent is authorized for */ + readonly securityTier: SecurityTierLevel; +} + +// ============================================================================ +// Delegation Result (Phase 4C: Multi-Agent Coordination) +// ============================================================================ + +/** + * Result of delegating a plan to multiple agents. + */ +export interface DelegationResult { + /** Parent plan ID */ + readonly parentPlanId: UUID; + + /** Sub-plan IDs created for each agent cluster */ + readonly subPlanIds: UUID[]; + + /** Files assigned to each sub-plan */ + readonly assignments: ReadonlyArray<{ + readonly subPlanId: UUID; + readonly agentId: UUID; + readonly agentName: string; + readonly files: string[]; + readonly stepNumbers: number[]; + }>; + + /** Files with conflicts (claimed by multiple clusters) */ + readonly conflicts: string[]; +} diff --git a/src/debug/jtag/system/coordination/server/CodeCoordinationStream.ts b/src/debug/jtag/system/coordination/server/CodeCoordinationStream.ts new file mode 100644 index 000000000..bf4265a22 --- /dev/null +++ b/src/debug/jtag/system/coordination/server/CodeCoordinationStream.ts @@ -0,0 +1,349 @@ +/** + * Code Coordination Stream - File-level MUTEX for multi-agent coding + * + * Extends BaseCoordinationStream to coordinate coding agents: + * - File-level locking: multiple agents CAN work in parallel if they touch different files + * - Conflict detection: overlapping file claims are detected and resolved + * - Lock release: automatic on step completion or plan finalization + * + * RTOS analogy: + * - Each file is a MUTEX β€” only one agent can hold it + * - The coordination stream manages MUTEX acquisition/release + * - Agents broadcast their target files as "thoughts" + * - The decision grants non-overlapping claims, defers the rest + * + * Config differences from Chat: + * - maxResponders: 5 (more parallel coding workers) + * - intentionWindowMs: 3000ms (coding needs more coordination time) + */ + +import type { UUID } from '../../core/types/CrossPlatformUUID'; +import { + BaseCoordinationStream, + type BaseThought, + type BaseDecision, + type BaseStream, +} from '../shared/BaseCoordinationStream'; + +// ──────────────────────────────────────────────────────────── +// Domain-specific types +// ──────────────────────────────────────────────────────────── + +/** + * Code-specific thought β€” a persona's claim to work on specific files. + */ +export interface CodeThought extends BaseThought { + /** Plan this thought relates to */ + planId: UUID; + + /** Files this agent intends to modify */ + targetFiles: string[]; + + /** Which plan steps this agent intends to execute */ + stepNumbers: number[]; +} + +/** + * Code-specific decision β€” file lock assignments and conflict report. + */ +export interface CodeDecision extends BaseDecision { + /** Plan this decision relates to */ + planId: UUID; + + /** File β†’ persona ID mapping of granted locks */ + fileLocks: Map; + + /** Files that were claimed by multiple agents (conflict detected) */ + conflicts: string[]; +} + +/** + * Code-specific stream state. + */ +export interface CodeStream extends BaseStream { + /** Plan being coordinated */ + planId: UUID; + + /** Current file locks: file path β†’ persona holding the lock */ + fileLocks: Map; +} + +// ──────────────────────────────────────────────────────────── +// Implementation +// ──────────────────────────────────────────────────────────── + +export class CodeCoordinationStream extends BaseCoordinationStream { + + /** Global file locks across all streams (prevents cross-plan conflicts) */ + private _globalFileLocks = new Map(); + + constructor() { + super({ + intentionWindowMs: 3000, // 3 seconds β€” coding needs more coordination time + maxResponders: 5, // Up to 5 parallel coding agents + enableLogging: true, + cleanupIntervalMs: 60000, // 1 minute β€” coding streams live longer + }); + } + + // ════════════════════════════════════════════════════════════ + // ABSTRACT METHOD IMPLEMENTATIONS + // ════════════════════════════════════════════════════════════ + + protected getDomainName(): string { + return 'Code'; + } + + protected createStream(eventId: string, contextId: UUID): CodeStream { + const maxResponders = this.getMaxResponders(); + + return { + eventId, + contextId, + phase: 'gathering', + thoughts: [], + considerations: new Map(), + startTime: Date.now(), + availableSlots: maxResponders, + claimedBy: new Set(), + + // Code-specific + planId: contextId, // contextId IS the planId for coding + fileLocks: new Map(), + }; + } + + protected convertDecision(baseDecision: BaseDecision, stream: CodeStream): CodeDecision { + // Collect all conflicts: files claimed by multiple personas + const fileClaims = new Map(); + for (const thought of stream.thoughts) { + if (thought.type === 'claiming') { + for (const file of thought.targetFiles) { + const existing = fileClaims.get(file) ?? []; + existing.push(thought.personaId); + fileClaims.set(file, existing); + } + } + } + + const conflicts: string[] = []; + for (const [file, claimants] of fileClaims) { + if (claimants.length > 1) { + conflicts.push(file); + } + } + + return { + ...baseDecision, + planId: stream.planId, + fileLocks: new Map(stream.fileLocks), + conflicts, + }; + } + + protected getEventLogContext(eventId: string): string { + return `plan ${eventId.slice(0, 8)}`; + } + + // ════════════════════════════════════════════════════════════ + // HOOK OVERRIDES + // ════════════════════════════════════════════════════════════ + + /** + * Validate a claim: check that the persona's target files are not already locked + * by another persona (either in this stream or globally). + */ + protected onClaim(stream: CodeStream, thought: CodeThought): boolean { + for (const file of thought.targetFiles) { + // Check global locks (cross-plan) + const globalHolder = this._globalFileLocks.get(file); + if (globalHolder && globalHolder !== thought.personaId) { + this.log(`Claim rejected: ${file} globally locked by ${globalHolder.slice(0, 8)}`); + return false; + } + + // Check stream-level locks (within same plan) + const streamHolder = stream.fileLocks.get(file); + if (streamHolder && streamHolder !== thought.personaId) { + this.log(`Claim rejected: ${file} locked by ${streamHolder.slice(0, 8)} in stream`); + return false; + } + } + + // Acquire locks for all target files + for (const file of thought.targetFiles) { + stream.fileLocks.set(file, thought.personaId); + this._globalFileLocks.set(file, thought.personaId); + } + + return true; + } + + /** + * After decision: log file lock summary. + */ + protected onDecisionMade(stream: CodeStream, decision: CodeDecision): void { + if (decision.conflicts.length > 0) { + this.log(`Conflicts detected: ${decision.conflicts.join(', ')}`); + } + this.log(`File locks: ${stream.fileLocks.size} files locked across ${decision.granted.length} agents`); + } + + /** + * Coding tasks are often single-agent β€” decide immediately if only one thought. + * For multi-agent, wait for the intention window. + */ + protected canDecideEarly(stream: CodeStream): boolean { + // If only one claimer and no one else is expected, decide immediately + if (stream.thoughts.length >= 1 && stream.claimedBy.size >= 1) { + // But wait if we might get more thoughts + const elapsed = Date.now() - stream.startTime; + if (elapsed > 1000) return true; // 1s grace period + } + return stream.thoughts.length >= 5; // Max parallel agents + } + + /** + * Coding streams use deterministic slot allocation (not probabilistic). + * All available agents get a slot (up to maxResponders). + */ + protected getMaxResponders(): number { + return this.config.maxResponders; // Deterministic: 5 + } + + /** + * Coding streams live longer β€” plans take time to execute. + */ + protected getStreamMaxAge(stream: CodeStream): number { + if (stream.phase === 'decided') return 30000; // 30s after decision + return 300000; // 5 min for gathering + } + + // ════════════════════════════════════════════════════════════ + // PUBLIC CODE-SPECIFIC API + // ════════════════════════════════════════════════════════════ + + /** + * Broadcast a coding thought for file-level coordination. + */ + async broadcastCodeThought( + planId: UUID, + thought: CodeThought, + ): Promise { + thought.planId = planId; + await this.broadcastThought(planId, planId, thought); + } + + /** + * Wait for a coding coordination decision. + */ + async waitForCodeDecision(planId: UUID, timeoutMs?: number): Promise { + return this.waitForDecision(planId, timeoutMs ?? 5000); + } + + /** + * Check if persona can work on specific files within a plan. + */ + async canWorkOnFiles(personaId: UUID, planId: UUID, files: string[]): Promise { + const stream = this.getStream(planId); + if (!stream) return true; // No coordination active β€” allow + + for (const file of files) { + const holder = stream.fileLocks.get(file); + if (holder && holder !== personaId) { + return false; + } + } + return true; + } + + /** + * Release file locks held by a persona (called after step/plan completion). + */ + releaseLocks(personaId: UUID, planId?: UUID): void { + // Release global locks + for (const [file, holder] of Array.from(this._globalFileLocks.entries())) { + if (holder === personaId) { + this._globalFileLocks.delete(file); + } + } + + // Release stream-level locks + if (planId) { + const stream = this.getStream(planId); + if (stream) { + for (const [file, holder] of Array.from(stream.fileLocks.entries())) { + if (holder === personaId) { + stream.fileLocks.delete(file); + } + } + } + } else { + // Release from all streams + for (const stream of this.streams.values()) { + for (const [file, holder] of Array.from(stream.fileLocks.entries())) { + if (holder === personaId) { + stream.fileLocks.delete(file); + } + } + } + } + + this.log(`Released locks for persona ${personaId.slice(0, 8)}`); + } + + /** + * Get all files currently locked and who holds them. + */ + get globalFileLocks(): ReadonlyMap { + return this._globalFileLocks; + } + + /** + * Check if a specific file is locked. + */ + isFileLocked(filePath: string): boolean { + return this._globalFileLocks.has(filePath); + } + + /** + * Get the persona holding a lock on a file (if any). + */ + lockHolder(filePath: string): UUID | undefined { + return this._globalFileLocks.get(filePath); + } + + /** + * Override shutdown to clear global locks. + */ + override shutdown(): void { + this._globalFileLocks.clear(); + super.shutdown(); + } +} + +// ════════════════════════════════════════════════════════════ +// SINGLETON PATTERN +// ════════════════════════════════════════════════════════════ + +let codeCoordinatorInstance: CodeCoordinationStream | null = null; + +/** + * Get global code coordinator instance. + */ +export function getCodeCoordinator(): CodeCoordinationStream { + if (!codeCoordinatorInstance) { + codeCoordinatorInstance = new CodeCoordinationStream(); + } + return codeCoordinatorInstance; +} + +/** + * Reset code coordinator (for testing). + */ +export function resetCodeCoordinator(): void { + if (codeCoordinatorInstance) { + codeCoordinatorInstance.shutdown(); + codeCoordinatorInstance = null; + } +} diff --git a/src/debug/jtag/system/data/entities/CodingPlanEntity.ts b/src/debug/jtag/system/data/entities/CodingPlanEntity.ts index cbf62ad6c..286b83b0f 100644 --- a/src/debug/jtag/system/data/entities/CodingPlanEntity.ts +++ b/src/debug/jtag/system/data/entities/CodingPlanEntity.ts @@ -25,7 +25,7 @@ import { } from '../decorators/FieldDecorators'; import { BaseEntity } from './BaseEntity'; import { COLLECTIONS } from '../../shared/Constants'; -import type { CodingAction } from '../../code/shared/CodingTypes'; +import type { CodingAction, RiskLevel, SecurityTierLevel } from '../../code/shared/CodingTypes'; // ──────────────────────────────────────────────────────────── // Plan status lifecycle @@ -149,6 +149,20 @@ export class CodingPlanEntity extends BaseEntity { @JsonField() generatedBy!: PlanGenerationInfo; + // ── Risk & security ────────────────────────────────────── + + /** Risk level assessed by PlanFormulator */ + @EnumField() + riskLevel!: RiskLevel; + + /** Why this risk level was assigned */ + @TextField({ nullable: true }) + riskReason?: string; + + /** Security tier governing which tools this plan can use */ + @EnumField() + securityTier!: SecurityTierLevel; + // ── Status & lifecycle ──────────────────────────────────── @EnumField({ index: true }) @@ -212,6 +226,8 @@ export class CodingPlanEntity extends BaseEntity { this.estimatedToolCalls = 0; this.assignees = []; this.generatedBy = { provider: '', model: '', temperature: 0, durationMs: 0 }; + this.riskLevel = 'low'; + this.securityTier = 'write'; this.status = 'draft'; this.filesModified = []; this.filesCreated = []; diff --git a/src/debug/jtag/system/data/entities/SkillEntity.ts b/src/debug/jtag/system/data/entities/SkillEntity.ts new file mode 100644 index 000000000..9b873063f --- /dev/null +++ b/src/debug/jtag/system/data/entities/SkillEntity.ts @@ -0,0 +1,303 @@ +/** + * SkillEntity - Self-modifying skill definition and lifecycle tracking + * + * Represents a skill that an AI persona can propose, generate, validate, and activate. + * Skills are essentially new commands created by the AI team themselves. + * + * Lifecycle: proposed β†’ approved β†’ generated β†’ validated β†’ active + * (can fail at any stage β†’ 'failed', or be deprecated after activation) + * + * Scope: + * - 'personal': Only the creator can use it (stored in persona workspace) + * - 'team': All personas can use it (requires DecisionProposal approval, stored in commands/) + */ + +import type { UUID } from '../../core/types/CrossPlatformUUID'; +import { + TextField, + NumberField, + JsonField, + EnumField, + CompositeIndex, +} from '../decorators/FieldDecorators'; +import { BaseEntity } from './BaseEntity'; +import { COLLECTIONS } from '../../shared/Constants'; + +// ──────────────────────────────────────────────────────────── +// Skill status lifecycle +// ──────────────────────────────────────────────────────────── + +export type SkillStatus = + | 'proposed' // AI submitted skill spec, not yet reviewed (team) or ready to generate (personal) + | 'approved' // Team approved via DecisionProposal (team-scoped only) + | 'generated' // CommandGenerator produced the code files + | 'validated' // Compiled + tests passed in sandbox + | 'active' // Registered and available for use + | 'failed' // Failed at generation, validation, or activation + | 'deprecated'; // Was active, now retired + +export type SkillScope = 'personal' | 'team'; + +// ──────────────────────────────────────────────────────────── +// Skill spec (what gets passed to CommandGenerator) +// ──────────────────────────────────────────────────────────── + +export interface SkillParamSpec { + name: string; + type: string; + optional?: boolean; + description?: string; +} + +export interface SkillResultSpec { + name: string; + type: string; + description?: string; +} + +export interface SkillSpec { + /** Command name (e.g., 'analysis/complexity') */ + name: string; + /** What the command does */ + description: string; + /** Input parameters */ + params: SkillParamSpec[]; + /** Output fields */ + results: SkillResultSpec[]; + /** Usage examples */ + examples?: Array<{ + description: string; + command: string; + expectedResult?: string; + }>; + /** Natural language description of what the implementation should do */ + implementation: string; + /** Access level for the command */ + accessLevel?: 'ai-safe' | 'internal' | 'system'; +} + +// ──────────────────────────────────────────────────────────── +// Validation results (populated after skill/validate) +// ──────────────────────────────────────────────────────────── + +export interface SkillValidationResults { + compiled: boolean; + testsRun: number; + testsPassed: number; + errors: string[]; + durationMs: number; +} + +// ──────────────────────────────────────────────────────────── +// Entity +// ──────────────────────────────────────────────────────────── + +@CompositeIndex({ + name: 'idx_skills_creator_status', + fields: ['createdById', 'status'], + direction: 'DESC', +}) +@CompositeIndex({ + name: 'idx_skills_scope_status', + fields: ['scope', 'status'], + direction: 'DESC', +}) +export class SkillEntity extends BaseEntity { + static readonly collection = COLLECTIONS.SKILLS; + + // ── Identity ────────────────────────────────────────────── + + /** Command name (e.g., 'analysis/complexity', 'code/lint') */ + @TextField({ index: true }) + name!: string; + + /** Human-readable description of what the skill does */ + @TextField() + description!: string; + + /** AI persona that proposed this skill */ + @TextField({ index: true }) + createdById!: UUID; + + // ── Specification ───────────────────────────────────────── + + /** Full command specification (params, results, examples, implementation) */ + @JsonField() + spec!: SkillSpec; + + // ── Scope & governance ──────────────────────────────────── + + /** Who can use this skill: personal (creator only) or team (all, requires approval) */ + @EnumField({ index: true }) + scope!: SkillScope; + + /** DecisionProposal ID if team-scoped (requires governance approval) */ + @TextField({ nullable: true }) + proposalId?: UUID; + + // ── Lifecycle ───────────────────────────────────────────── + + @EnumField({ index: true }) + status!: SkillStatus; + + /** Error message if status is 'failed' */ + @TextField({ nullable: true }) + failureReason?: string; + + // ── Generation ──────────────────────────────────────────── + + /** Directory where generated files live */ + @TextField({ nullable: true }) + outputDir?: string; + + /** Paths of files created by CommandGenerator */ + @JsonField() + generatedFiles!: string[]; + + // ── Validation ──────────────────────────────────────────── + + /** Compilation and test results from sandbox validation */ + @JsonField({ nullable: true }) + validationResults?: SkillValidationResults; + + // ── Activation ──────────────────────────────────────────── + + /** When the skill was activated (registered as a command) */ + @NumberField({ nullable: true }) + activatedAt?: number; + + // ── Index signature ─────────────────────────────────────── + + [key: string]: unknown; + + // ── Constructor ─────────────────────────────────────────── + + constructor() { + super(); + + this.name = ''; + this.description = ''; + this.createdById = '' as UUID; + this.spec = { + name: '', + description: '', + params: [], + results: [], + implementation: '', + }; + this.scope = 'personal'; + this.status = 'proposed'; + this.generatedFiles = []; + } + + // ── BaseEntity implementation ───────────────────────────── + + get collection(): string { + return SkillEntity.collection; + } + + static override getPaginationConfig(): { + defaultSortField: string; + defaultSortDirection: 'asc' | 'desc'; + defaultPageSize: number; + cursorField: string; + } { + return { + defaultSortField: 'createdAt', + defaultSortDirection: 'desc', + defaultPageSize: 20, + cursorField: 'createdAt', + }; + } + + validate(): { success: boolean; error?: string } { + if (!this.name?.trim()) { + return { success: false, error: 'Skill name is required' }; + } + + // Validate command naming convention: category/name or just name + if (!/^[a-z][a-z0-9-]*(?:\/[a-z][a-z0-9-]*)*$/.test(this.name)) { + return { success: false, error: `Skill name must follow command naming convention (e.g., 'analysis/complexity'): ${this.name}` }; + } + + if (!this.description?.trim()) { + return { success: false, error: 'Skill description is required' }; + } + + if (!this.createdById?.trim()) { + return { success: false, error: 'Skill createdById is required' }; + } + + if (!this.spec || typeof this.spec !== 'object') { + return { success: false, error: 'Skill spec is required' }; + } + + if (!this.spec.name?.trim()) { + return { success: false, error: 'Skill spec.name is required' }; + } + + if (this.spec.name !== this.name) { + return { success: false, error: `Skill spec.name (${this.spec.name}) must match entity name (${this.name})` }; + } + + if (!this.spec.implementation?.trim()) { + return { success: false, error: 'Skill spec.implementation is required (natural language description)' }; + } + + if (!Array.isArray(this.spec.params)) { + return { success: false, error: 'Skill spec.params must be an array' }; + } + + if (!Array.isArray(this.spec.results)) { + return { success: false, error: 'Skill spec.results must be an array' }; + } + + const validScopes: SkillScope[] = ['personal', 'team']; + if (!validScopes.includes(this.scope)) { + return { success: false, error: `Skill scope must be one of: ${validScopes.join(', ')}` }; + } + + const validStatuses: SkillStatus[] = [ + 'proposed', 'approved', 'generated', 'validated', 'active', 'failed', 'deprecated', + ]; + if (!validStatuses.includes(this.status)) { + return { success: false, error: `Skill status must be one of: ${validStatuses.join(', ')}` }; + } + + return { success: true }; + } + + // ── Convenience properties ──────────────────────────────── + + /** Whether this skill has been activated and is available for use */ + get isActive(): boolean { + return this.status === 'active'; + } + + /** Whether this skill requires team approval */ + get requiresApproval(): boolean { + return this.scope === 'team'; + } + + /** Whether this skill can proceed to the next lifecycle stage */ + get canAdvance(): boolean { + switch (this.status) { + case 'proposed': return this.scope === 'personal' || !!this.proposalId; + case 'approved': return true; // Can generate + case 'generated': return true; // Can validate + case 'validated': return true; // Can activate + default: return false; + } + } + + /** The next expected status in the lifecycle */ + get nextStatus(): SkillStatus | undefined { + switch (this.status) { + case 'proposed': return this.scope === 'personal' ? 'generated' : 'approved'; + case 'approved': return 'generated'; + case 'generated': return 'validated'; + case 'validated': return 'active'; + default: return undefined; + } + } +} diff --git a/src/debug/jtag/system/shared/Constants.ts b/src/debug/jtag/system/shared/Constants.ts index 25fa5e7d2..cdf7fe933 100644 --- a/src/debug/jtag/system/shared/Constants.ts +++ b/src/debug/jtag/system/shared/Constants.ts @@ -139,6 +139,9 @@ export const COLLECTIONS = { // Coding Agent System (Phase 4: Multi-Agent Coordination) CODING_PLANS: 'coding_plans', + + // Self-Modifying Skills (Phase 4B: AI-Created Commands) + SKILLS: 'skills', } as const; diff --git a/src/debug/jtag/tests/integration/coding-agent-workflow.test.ts b/src/debug/jtag/tests/integration/coding-agent-workflow.test.ts new file mode 100644 index 000000000..a42addafb --- /dev/null +++ b/src/debug/jtag/tests/integration/coding-agent-workflow.test.ts @@ -0,0 +1,412 @@ +/** + * Coding Agent Workflow Integration Test (TDD) + * + * Tests the complete plan β†’ execute β†’ persist lifecycle: + * 1. Orchestrator receives a coding task + * 2. PlanFormulator generates a step DAG (mocked LLM) + * 3. Steps execute via code/* commands (mocked) + * 4. CodingPlanEntity is persisted with initial state + * 5. Step statuses are updated during execution + * 6. Plan is finalized with results + * + * This is a workflow test β€” it exercises the real orchestrator logic + * with controlled inputs, verifying the full lifecycle including + * persistence. If any step in the chain breaks, this test catches it. + */ + +import { describe, it, expect, beforeEach, vi } from 'vitest'; +import { CodeAgentOrchestrator } from '../../system/code/server/CodeAgentOrchestrator'; +import { CodingPlanEntity } from '../../system/data/entities/CodingPlanEntity'; +import type { CodingTask } from '../../system/code/shared/CodingTypes'; +import type { UUID } from '../../system/core/types/CrossPlatformUUID'; + +// ── Mocks ────────────────────────────────────────────────── + +const mockGenerateText = vi.fn(); +vi.mock('../../daemons/ai-provider-daemon/shared/AIProviderDaemon', () => ({ + AIProviderDaemon: { + generateText: (...args: unknown[]) => mockGenerateText(...args), + }, +})); + +const mockExecute = vi.fn(); +vi.mock('../../system/core/shared/Commands', () => ({ + Commands: { + execute: (...args: unknown[]) => mockExecute(...args), + }, +})); + +vi.mock('../../system/core/logging/Logger', () => ({ + Logger: { + create: () => ({ + debug: () => {}, + info: () => {}, + warn: () => {}, + error: () => {}, + }), + }, +})); + +// Track DataDaemon persistence calls +const mockDataDaemonStore = vi.fn(); +const mockDataDaemonUpdate = vi.fn(); + +vi.mock('../../daemons/data-daemon/shared/DataDaemon', () => ({ + DataDaemon: { + store: (...args: unknown[]) => mockDataDaemonStore(...args), + update: (...args: unknown[]) => mockDataDaemonUpdate(...args), + }, +})); + +// ── Helpers ───────────────────────────────────────────────── + +function makeTask(overrides?: Partial): CodingTask { + return { + id: 'task-0001-0001-0001-task00000001' as UUID, + personaId: 'ai-00-0001-0001-0001-ai0000000001' as UUID, + description: 'Add a greet function to utils.ts', + taskType: 'generation', + maxToolCalls: 20, + maxDurationMs: 120000, + createdAt: Date.now(), + ...overrides, + }; +} + +/** 3-step plan: read β†’ edit β†’ verify */ +function mockThreeStepPlan() { + mockGenerateText.mockResolvedValue({ + text: JSON.stringify({ + summary: 'Read utils.ts, add greet function, verify', + steps: [ + { + stepNumber: 1, + action: 'read', + description: 'Read utils.ts', + targetFiles: ['utils.ts'], + toolCall: 'code/read', + toolParams: { filePath: 'utils.ts' }, + dependsOn: [], + verification: 'File content returned', + }, + { + stepNumber: 2, + action: 'edit', + description: 'Add greet function', + targetFiles: ['utils.ts'], + toolCall: 'code/edit', + toolParams: { + filePath: 'utils.ts', + editMode: { type: 'append', content: 'function greet() {}' }, + }, + dependsOn: [1], + verification: 'Edit applied', + }, + { + stepNumber: 3, + action: 'verify', + description: 'Verify changes', + targetFiles: ['utils.ts'], + toolCall: 'code/read', + toolParams: { filePath: 'utils.ts' }, + dependsOn: [2], + verification: 'greet function present', + }, + ], + }), + }); +} + +/** Simulate successful code/* command responses */ +function mockSuccessfulCodeCommands() { + mockExecute.mockImplementation(async (cmd: string) => { + if (cmd === 'code/tree') return { success: true, root: { name: '.', children: [] } }; + if (cmd === 'code/read') return { success: true, content: 'export function greet() {}' }; + if (cmd === 'code/edit') return { success: true, changeId: 'change-abc-001' }; + return { success: true }; + }); +} + +// ── Tests ─────────────────────────────────────────────────── + +describe('Coding Agent Workflow', () => { + let orchestrator: CodeAgentOrchestrator; + + beforeEach(() => { + mockGenerateText.mockReset(); + mockExecute.mockReset(); + mockDataDaemonStore.mockReset(); + mockDataDaemonUpdate.mockReset(); + + // DataDaemon.store returns the entity with an id assigned + mockDataDaemonStore.mockImplementation(async (_collection: string, entity: CodingPlanEntity) => { + entity.id = 'plan-persisted-id-0001' as UUID; + return entity; + }); + mockDataDaemonUpdate.mockResolvedValue({}); + + orchestrator = new CodeAgentOrchestrator(); + }); + + describe('happy path: plan β†’ execute β†’ persist', () => { + it('persists a CodingPlanEntity on successful execution', async () => { + mockThreeStepPlan(); + mockSuccessfulCodeCommands(); + + const result = await orchestrator.execute(makeTask()); + + // ── Execution succeeded ── + expect(result.status).toBe('completed'); + expect(result.stepResults).toHaveLength(3); + expect(result.stepResults.every(r => r.status === 'completed')).toBe(true); + + // ── Plan was persisted ── + expect(mockDataDaemonStore).toHaveBeenCalledTimes(1); + const [collection, entity] = mockDataDaemonStore.mock.calls[0]; + expect(collection).toBe('coding_plans'); + expect(entity).toBeInstanceOf(CodingPlanEntity); + }); + + it('persisted plan has correct initial structure', async () => { + mockThreeStepPlan(); + mockSuccessfulCodeCommands(); + + await orchestrator.execute(makeTask()); + + const entity: CodingPlanEntity = mockDataDaemonStore.mock.calls[0][1]; + + expect(entity.taskId).toBe('task-0001-0001-0001-task00000001'); + expect(entity.createdById).toBe('ai-00-0001-0001-0001-ai0000000001'); + expect(entity.leadId).toBe('ai-00-0001-0001-0001-ai0000000001'); + expect(entity.summary).toBe('Read utils.ts, add greet function, verify'); + expect(entity.taskDescription).toBe('Add a greet function to utils.ts'); + expect(entity.status).toBe('executing'); + expect(entity.steps).toHaveLength(3); + expect(entity.assignees).toContain('ai-00-0001-0001-0001-ai0000000001'); + expect(entity.executionStartedAt).toBeGreaterThan(0); + }); + + it('step snapshots have correct structural properties', async () => { + mockThreeStepPlan(); + mockSuccessfulCodeCommands(); + + await orchestrator.execute(makeTask()); + + const entity: CodingPlanEntity = mockDataDaemonStore.mock.calls[0][1]; + + // Structural properties (immutable during execution) + expect(entity.steps).toHaveLength(3); + for (const step of entity.steps) { + expect(step.toolCall).toMatch(/^code\//); + expect(step.stepNumber).toBeGreaterThan(0); + expect(step.action).toBeTruthy(); + expect(step.description).toBeTruthy(); + expect(Array.isArray(step.dependsOn)).toBe(true); + } + + // Store is called before any update (ordering proof) + expect(mockDataDaemonStore).toHaveBeenCalledTimes(1); + expect(mockDataDaemonUpdate).toHaveBeenCalled(); + }); + + it('updates step status during execution', async () => { + mockThreeStepPlan(); + mockSuccessfulCodeCommands(); + + await orchestrator.execute(makeTask()); + + // DataDaemon.update called for each step + finalization + // 3 step updates + 1 finalize = 4 calls + expect(mockDataDaemonUpdate).toHaveBeenCalledTimes(4); + + // Each step update includes the steps array + for (let i = 0; i < 3; i++) { + const updateCall = mockDataDaemonUpdate.mock.calls[i]; + expect(updateCall[0]).toBe('coding_plans'); // collection + expect(updateCall[1]).toBe('plan-persisted-id-0001'); // entity id + expect(updateCall[2]).toHaveProperty('steps'); + } + }); + + it('finalizes plan with execution results', async () => { + mockThreeStepPlan(); + mockSuccessfulCodeCommands(); + + await orchestrator.execute(makeTask()); + + // Last update call is finalization + const finalizeCall = mockDataDaemonUpdate.mock.calls[3]; + const finalizeData = finalizeCall[2]; + + expect(finalizeData.status).toBe('completed'); + expect(finalizeData.executionCompletedAt).toBeGreaterThan(0); + expect(finalizeData.filesModified).toContain('utils.ts'); + expect(finalizeData.changeIds).toContain('change-abc-001'); + expect(finalizeData.totalToolCalls).toBeGreaterThanOrEqual(4); + expect(finalizeData.totalDurationMs).toBeGreaterThan(0); + }); + + it('tracks changeIds from edit operations', async () => { + mockThreeStepPlan(); + mockSuccessfulCodeCommands(); + + const result = await orchestrator.execute(makeTask()); + + expect(result.changeIds).toContain('change-abc-001'); + expect(result.filesModified).toContain('utils.ts'); + }); + }); + + describe('partial completion: some steps fail', () => { + it('persists partial status when edit fails', async () => { + mockThreeStepPlan(); + mockExecute.mockImplementation(async (cmd: string) => { + if (cmd === 'code/tree') return { success: true, root: {} }; + if (cmd === 'code/read') return { success: true, content: 'data' }; + if (cmd === 'code/edit') return { success: false, error: 'Conflict' }; + return { success: true }; + }); + + const result = await orchestrator.execute(makeTask()); + + expect(result.status).toBe('partial'); + expect(result.errors.length).toBeGreaterThan(0); + + // Plan was finalized as partial + const finalizeCall = mockDataDaemonUpdate.mock.calls.at(-1); + expect(finalizeCall?.[2].status).toBe('partial'); + }); + + it('skipped steps are recorded in persistence', async () => { + mockThreeStepPlan(); + mockExecute.mockImplementation(async (cmd: string) => { + if (cmd === 'code/tree') return { success: true, root: {} }; + if (cmd === 'code/read') return { success: true, content: 'data' }; + if (cmd === 'code/edit') return { success: false, error: 'Failed' }; + return { success: true }; + }); + + const result = await orchestrator.execute(makeTask()); + + // Step 3 (verify) depends on step 2 (edit) which failed β†’ skipped + const verifyStep = result.stepResults.find(r => r.stepNumber === 3); + expect(verifyStep?.status).toBe('skipped'); + }); + }); + + describe('plan formulation failure', () => { + it('persists failed status when LLM is unavailable', async () => { + mockGenerateText.mockRejectedValue(new Error('LLM unavailable')); + mockExecute.mockResolvedValue({ success: true }); + + const result = await orchestrator.execute(makeTask()); + + expect(result.status).toBe('failed'); + expect(result.errors).toContain('LLM unavailable'); + + // No plan was created (failure happened before plan formulation) + // DataDaemon.store should NOT have been called + expect(mockDataDaemonStore).not.toHaveBeenCalled(); + }); + }); + + describe('persistence failure resilience', () => { + it('continues execution even if DataDaemon.store fails', async () => { + mockThreeStepPlan(); + mockSuccessfulCodeCommands(); + mockDataDaemonStore.mockRejectedValue(new Error('DB unavailable')); + + const result = await orchestrator.execute(makeTask()); + + // Execution should still complete successfully + expect(result.status).toBe('completed'); + expect(result.stepResults).toHaveLength(3); + expect(result.stepResults.every(r => r.status === 'completed')).toBe(true); + }); + + it('continues execution even if DataDaemon.update fails', async () => { + mockThreeStepPlan(); + mockSuccessfulCodeCommands(); + mockDataDaemonStore.mockImplementation(async (_c: string, entity: CodingPlanEntity) => { + entity.id = 'plan-id' as UUID; + return entity; + }); + mockDataDaemonUpdate.mockRejectedValue(new Error('DB write error')); + + const result = await orchestrator.execute(makeTask()); + + // Execution should still complete despite persistence failures + expect(result.status).toBe('completed'); + }); + }); + + describe('budget enforcement with persistence', () => { + it('persists budget_exceeded as partial status', async () => { + // Plan with 5 sequential steps + mockGenerateText.mockResolvedValue({ + text: JSON.stringify({ + summary: 'Five reads', + steps: Array.from({ length: 5 }, (_, i) => ({ + stepNumber: i + 1, + action: 'read', + targetFiles: [`file${i}.ts`], + toolCall: 'code/read', + toolParams: { filePath: `file${i}.ts` }, + dependsOn: i > 0 ? [i] : [], + verification: 'ok', + })), + }), + }); + mockSuccessfulCodeCommands(); + + const result = await orchestrator.execute(makeTask({ maxToolCalls: 5 })); + + expect(['partial', 'budget_exceeded']).toContain(result.status); + + // Plan was finalized + if (mockDataDaemonUpdate.mock.calls.length > 0) { + const finalizeCall = mockDataDaemonUpdate.mock.calls.at(-1); + expect(['partial', 'completed']).toContain(finalizeCall?.[2].status); + } + }); + }); + + describe('plan entity structure integrity', () => { + it('step snapshots preserve dependency DAG', async () => { + mockThreeStepPlan(); + mockSuccessfulCodeCommands(); + + await orchestrator.execute(makeTask()); + + const entity: CodingPlanEntity = mockDataDaemonStore.mock.calls[0][1]; + + expect(entity.steps[0].dependsOn).toEqual([]); + expect(entity.steps[1].dependsOn).toEqual([1]); + expect(entity.steps[2].dependsOn).toEqual([2]); + }); + + it('step snapshots preserve tool params', async () => { + mockThreeStepPlan(); + mockSuccessfulCodeCommands(); + + await orchestrator.execute(makeTask()); + + const entity: CodingPlanEntity = mockDataDaemonStore.mock.calls[0][1]; + + expect(entity.steps[0].toolParams).toEqual({ filePath: 'utils.ts' }); + expect(entity.steps[1].toolParams).toHaveProperty('editMode'); + }); + + it('generatedBy includes model info', async () => { + mockThreeStepPlan(); + mockSuccessfulCodeCommands(); + + await orchestrator.execute(makeTask()); + + const entity: CodingPlanEntity = mockDataDaemonStore.mock.calls[0][1]; + + expect(entity.generatedBy.provider).toBeTruthy(); + expect(entity.generatedBy.model).toBeTruthy(); + }); + }); +}); diff --git a/src/debug/jtag/tests/integration/sandbox-enforcement.test.ts b/src/debug/jtag/tests/integration/sandbox-enforcement.test.ts new file mode 100644 index 000000000..742913d37 --- /dev/null +++ b/src/debug/jtag/tests/integration/sandbox-enforcement.test.ts @@ -0,0 +1,302 @@ +/** + * Sandbox Enforcement Integration Test + * + * Tests that the CodeAgentOrchestrator respects security tiers: + * 1. Plans include riskLevel from PlanFormulator + * 2. ToolAllowlistEnforcer blocks disallowed tool calls + * 3. Risk level flows through to persisted CodingPlanEntity + * 4. Discovery-tier plans can't write files + */ + +import { describe, it, expect, beforeEach, vi } from 'vitest'; +import { CodeAgentOrchestrator } from '../../system/code/server/CodeAgentOrchestrator'; +import type { CodingTask } from '../../system/code/shared/CodingTypes'; +import type { UUID } from '../../system/core/types/CrossPlatformUUID'; + +// ── Mocks ────────────────────────────────────────────────── + +const mockGenerateText = vi.fn(); +vi.mock('../../daemons/ai-provider-daemon/shared/AIProviderDaemon', () => ({ + AIProviderDaemon: { + generateText: (...args: unknown[]) => mockGenerateText(...args), + }, +})); + +const mockExecute = vi.fn(); +vi.mock('../../system/core/shared/Commands', () => ({ + Commands: { + execute: (...args: unknown[]) => mockExecute(...args), + }, +})); + +vi.mock('../../system/core/logging/Logger', () => ({ + Logger: { + create: () => ({ + debug: () => {}, + info: () => {}, + warn: () => {}, + error: () => {}, + }), + }, +})); + +const mockDataDaemonStore = vi.fn(); +const mockDataDaemonUpdate = vi.fn(); +vi.mock('../../daemons/data-daemon/shared/DataDaemon', () => ({ + DataDaemon: { + store: (...args: unknown[]) => mockDataDaemonStore(...args), + update: (...args: unknown[]) => mockDataDaemonUpdate(...args), + }, +})); + +// ── Helpers ───────────────────────────────────────────────── + +function makeTask(overrides?: Partial): CodingTask { + return { + id: 'task-enforce-0001-0001-task00000001' as UUID, + personaId: 'ai-00-0001-0001-0001-ai0000000001' as UUID, + description: 'Test sandbox enforcement', + taskType: 'generation', + maxToolCalls: 20, + maxDurationMs: 120000, + createdAt: Date.now(), + ...overrides, + }; +} + +function mockSuccessfulCommands() { + mockExecute.mockImplementation(async (cmd: string) => { + if (cmd === 'code/tree') return { success: true, root: { name: '.', children: [] } }; + if (cmd === 'code/read') return { success: true, content: 'file content' }; + if (cmd === 'code/edit') return { success: true, changeId: 'change-001' }; + if (cmd === 'code/write') return { success: true, changeId: 'change-002' }; + if (cmd === 'development/exec') return { success: true, output: 'npm output' }; + return { success: true }; + }); +} + +// ── Tests ─────────────────────────────────────────────────── + +describe('Sandbox Enforcement', () => { + let orchestrator: CodeAgentOrchestrator; + + beforeEach(() => { + mockGenerateText.mockReset(); + mockExecute.mockReset(); + mockDataDaemonStore.mockReset(); + mockDataDaemonUpdate.mockReset(); + + mockDataDaemonStore.mockImplementation(async (_c: string, entity: any) => { + entity.id = 'plan-enforce-id' as UUID; + return entity; + }); + mockDataDaemonUpdate.mockResolvedValue({}); + + orchestrator = new CodeAgentOrchestrator(); + }); + + describe('riskLevel flows from plan to entity', () => { + it('low-risk plan persists riskLevel and securityTier', async () => { + mockGenerateText.mockResolvedValue({ + text: JSON.stringify({ + summary: 'Read a single file', + riskLevel: 'low', + riskReason: 'Read-only, no modifications', + steps: [{ + stepNumber: 1, + action: 'read', + description: 'Read utils.ts', + targetFiles: ['utils.ts'], + toolCall: 'code/read', + toolParams: { filePath: 'utils.ts' }, + dependsOn: [], + verification: 'File read', + }], + }), + }); + mockSuccessfulCommands(); + + const result = await orchestrator.execute(makeTask()); + + expect(result.status).toBe('completed'); + + // Verify entity was persisted with risk info + expect(mockDataDaemonStore).toHaveBeenCalledTimes(1); + const entity = mockDataDaemonStore.mock.calls[0][1]; + expect(entity.riskLevel).toBe('low'); + expect(entity.riskReason).toBe('Read-only, no modifications'); + expect(entity.securityTier).toBe('write'); // low β†’ write tier + }); + + it('critical-risk plan gets system tier', async () => { + mockGenerateText.mockResolvedValue({ + text: JSON.stringify({ + summary: 'Modify build system', + riskLevel: 'critical', + riskReason: 'Modifies build configuration and deployment scripts', + steps: [{ + stepNumber: 1, + action: 'read', + description: 'Read build config', + targetFiles: ['build.config.ts'], + toolCall: 'code/read', + toolParams: { filePath: 'build.config.ts' }, + dependsOn: [], + verification: 'Config read', + }], + }), + }); + mockSuccessfulCommands(); + + await orchestrator.execute(makeTask()); + + const entity = mockDataDaemonStore.mock.calls[0][1]; + expect(entity.riskLevel).toBe('critical'); + expect(entity.securityTier).toBe('system'); // critical β†’ system tier + }); + }); + + describe('enforcer blocks disallowed tools', () => { + it('write-tier plan blocks code/delete steps', async () => { + // Plan with riskLevel=low (β†’ write tier) tries to use code/delete (explicitly denied) + mockGenerateText.mockResolvedValue({ + text: JSON.stringify({ + summary: 'Delete old file', + riskLevel: 'low', + riskReason: 'Simple cleanup', + steps: [ + { + stepNumber: 1, + action: 'read', + description: 'Read old file', + targetFiles: ['old.ts'], + toolCall: 'code/read', + toolParams: { filePath: 'old.ts' }, + dependsOn: [], + verification: 'File read', + }, + { + stepNumber: 2, + action: 'verify', + description: 'Delete old file', + targetFiles: ['old.ts'], + toolCall: 'code/delete', + toolParams: { filePath: 'old.ts' }, + dependsOn: [1], + verification: 'File deleted', + }, + ], + }), + }); + mockSuccessfulCommands(); + + const result = await orchestrator.execute(makeTask()); + + // Step 1 (read) should succeed, step 2 (code/delete) should fail (denied in write tier) + const readStep = result.stepResults.find(r => r.stepNumber === 1); + const deleteStep = result.stepResults.find(r => r.stepNumber === 2); + + expect(readStep?.status).toBe('completed'); + expect(deleteStep?.status).toBe('failed'); + expect(deleteStep?.error).toContain('denied'); + }); + + it('system-tier plan allows code/delete', async () => { + // Plan with riskLevel=critical (β†’ system tier) can use code/delete + mockGenerateText.mockResolvedValue({ + text: JSON.stringify({ + summary: 'System cleanup', + riskLevel: 'critical', + riskReason: 'Requires deletion capability', + steps: [ + { + stepNumber: 1, + action: 'verify', + description: 'Delete deprecated file', + targetFiles: ['deprecated.ts'], + toolCall: 'code/delete', + toolParams: { filePath: 'deprecated.ts' }, + dependsOn: [], + verification: 'File removed', + }, + ], + }), + }); + mockSuccessfulCommands(); + + const result = await orchestrator.execute(makeTask()); + + const deleteStep = result.stepResults.find(r => r.stepNumber === 1); + expect(deleteStep?.status).toBe('completed'); + }); + + it('write-tier plan allows code/write and code/edit', async () => { + mockGenerateText.mockResolvedValue({ + text: JSON.stringify({ + summary: 'Edit files', + riskLevel: 'medium', + riskReason: 'Standard file modifications', + steps: [ + { + stepNumber: 1, + action: 'read', + description: 'Read file', + targetFiles: ['utils.ts'], + toolCall: 'code/read', + toolParams: { filePath: 'utils.ts' }, + dependsOn: [], + verification: 'Read', + }, + { + stepNumber: 2, + action: 'edit', + description: 'Edit file', + targetFiles: ['utils.ts'], + toolCall: 'code/edit', + toolParams: { filePath: 'utils.ts', editMode: { type: 'append', content: 'new code' } }, + dependsOn: [1], + verification: 'Edited', + }, + ], + }), + }); + mockSuccessfulCommands(); + + const result = await orchestrator.execute(makeTask()); + + expect(result.status).toBe('completed'); + expect(result.stepResults.every(r => r.status === 'completed')).toBe(true); + }); + }); + + describe('default risk handling', () => { + it('plan without riskLevel defaults to medium/write tier', async () => { + // Old-style plan without risk fields + mockGenerateText.mockResolvedValue({ + text: JSON.stringify({ + summary: 'Legacy plan', + steps: [{ + stepNumber: 1, + action: 'read', + description: 'Read file', + targetFiles: ['utils.ts'], + toolCall: 'code/read', + toolParams: { filePath: 'utils.ts' }, + dependsOn: [], + verification: 'Read', + }], + }), + }); + mockSuccessfulCommands(); + + const result = await orchestrator.execute(makeTask()); + + expect(result.status).toBe('completed'); + + // Entity should have default risk values + const entity = mockDataDaemonStore.mock.calls[0][1]; + expect(entity.riskLevel).toBe('medium'); + expect(entity.securityTier).toBe('write'); + }); + }); +}); diff --git a/src/debug/jtag/tests/unit/code/CodeAgentOrchestrator.test.ts b/src/debug/jtag/tests/unit/code/CodeAgentOrchestrator.test.ts index 85256a972..8a0925844 100644 --- a/src/debug/jtag/tests/unit/code/CodeAgentOrchestrator.test.ts +++ b/src/debug/jtag/tests/unit/code/CodeAgentOrchestrator.test.ts @@ -300,4 +300,77 @@ describe('CodeAgentOrchestrator', () => { expect(readStep?.error).toContain('Connection lost'); }); }); + + describe('dryRun mode', () => { + it('executes read steps normally in dryRun', async () => { + mockSimplePlan(); + mockExecute + .mockResolvedValueOnce({ success: true, root: {} }) // code/tree (discovery) + .mockResolvedValueOnce({ success: true, content: 'old' }) // step 1: code/read + .mockResolvedValue({ success: true, content: 'data' }); // remaining reads + + const result = await orchestrator.execute(makeTask(), { dryRun: true }); + + // Step 1 (read) should execute normally + const readStep = result.stepResults.find(r => r.stepNumber === 1); + expect(readStep?.status).toBe('completed'); + }); + + it('mocks write/edit steps in dryRun', async () => { + mockSimplePlan(); + mockExecute + .mockResolvedValueOnce({ success: true, root: {} }) // code/tree (discovery) + .mockResolvedValueOnce({ success: true, content: 'old' }) // step 1: code/read + .mockResolvedValue({ success: true, content: 'data' }); // step 3: verify read + + const result = await orchestrator.execute(makeTask(), { dryRun: true }); + + // Step 2 (edit) should be mocked β€” completed but with dryRun flag + const editStep = result.stepResults.find(r => r.stepNumber === 2); + expect(editStep?.status).toBe('completed'); + + const output = editStep?.output as Record; + expect(output?.dryRun).toBe(true); + expect(output?.wouldModify).toEqual(['utils.ts']); + }); + + it('dryRun does not call Commands.execute for write steps', async () => { + mockSimplePlan(); + + const callLog: string[] = []; + mockExecute.mockImplementation(async (cmd: string) => { + callLog.push(cmd); + if (cmd === 'code/tree') return { success: true, root: {} }; + return { success: true, content: 'data' }; + }); + + await orchestrator.execute(makeTask(), { dryRun: true }); + + // code/edit should NOT appear in call log + expect(callLog).not.toContain('code/edit'); + // code/read and code/tree should appear + expect(callLog).toContain('code/tree'); + expect(callLog).toContain('code/read'); + }); + + it('dryRun completes all steps successfully', async () => { + mockSimplePlan(); + mockExecute.mockResolvedValue({ success: true, content: 'data', root: {} }); + + const result = await orchestrator.execute(makeTask(), { dryRun: true }); + + expect(result.status).toBe('completed'); + expect(result.stepResults.every(r => r.status === 'completed')).toBe(true); + }); + + it('dryRun does not produce changeIds', async () => { + mockSimplePlan(); + mockExecute.mockResolvedValue({ success: true, content: 'data', root: {} }); + + const result = await orchestrator.execute(makeTask(), { dryRun: true }); + + // No real writes happened, so no changeIds + expect(result.changeIds).toHaveLength(0); + }); + }); }); diff --git a/src/debug/jtag/tests/unit/code/CodeCoordinationStream.test.ts b/src/debug/jtag/tests/unit/code/CodeCoordinationStream.test.ts new file mode 100644 index 000000000..e138c7974 --- /dev/null +++ b/src/debug/jtag/tests/unit/code/CodeCoordinationStream.test.ts @@ -0,0 +1,328 @@ +/** + * CodeCoordinationStream Unit Tests + * + * Tests the file-level MUTEX coordination for multi-agent coding: + * - Stream creation and configuration + * - File lock acquisition and release + * - Conflict detection (overlapping file claims) + * - Multi-agent parallel coordination (non-overlapping files) + * - Global lock management + * - Singleton pattern + */ + +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { + CodeCoordinationStream, + getCodeCoordinator, + resetCodeCoordinator, + type CodeThought, + type CodeDecision, + type CodeStream, +} from '../../../system/coordination/server/CodeCoordinationStream'; +import type { UUID } from '../../../system/core/types/CrossPlatformUUID'; + +// ── Helpers ────────────────────────────────────────────────── + +const PLAN_ID = '11111111-2222-3333-4444-555555555555' as UUID; +const AGENT_A = 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' as UUID; +const AGENT_B = 'bbbbbbbb-cccc-dddd-eeee-ffffffffffff' as UUID; +const AGENT_C = 'cccccccc-dddd-eeee-ffff-111111111111' as UUID; + +function makeThought( + personaId: UUID, + targetFiles: string[], + overrides?: Partial, +): CodeThought { + return { + personaId, + personaName: `Agent-${personaId.slice(0, 4)}`, + type: 'claiming', + confidence: 0.8, + reasoning: `Claiming files: ${targetFiles.join(', ')}`, + timestamp: Date.now(), + planId: PLAN_ID, + targetFiles, + stepNumbers: [1, 2], + ...overrides, + }; +} + +// ── Tests ──────────────────────────────────────────────────── + +describe('CodeCoordinationStream', () => { + let coordinator: CodeCoordinationStream; + + beforeEach(() => { + resetCodeCoordinator(); + coordinator = new CodeCoordinationStream(); + }); + + afterEach(() => { + coordinator.shutdown(); + }); + + describe('construction and configuration', () => { + it('creates with coding-specific config', () => { + // Verify it's a proper instance + expect(coordinator).toBeInstanceOf(CodeCoordinationStream); + }); + + it('starts with no global file locks', () => { + expect(coordinator.globalFileLocks.size).toBe(0); + }); + + it('starts with no active streams', () => { + expect(coordinator.getStreams().size).toBe(0); + }); + }); + + describe('file lock acquisition', () => { + it('single agent acquires locks on broadcast', async () => { + const thought = makeThought(AGENT_A, ['src/main.ts', 'src/utils.ts']); + await coordinator.broadcastCodeThought(PLAN_ID, thought); + + expect(coordinator.globalFileLocks.size).toBe(2); + expect(coordinator.lockHolder('src/main.ts')).toBe(AGENT_A); + expect(coordinator.lockHolder('src/utils.ts')).toBe(AGENT_A); + }); + + it('isFileLocked returns correct status', async () => { + expect(coordinator.isFileLocked('src/main.ts')).toBe(false); + + const thought = makeThought(AGENT_A, ['src/main.ts']); + await coordinator.broadcastCodeThought(PLAN_ID, thought); + + expect(coordinator.isFileLocked('src/main.ts')).toBe(true); + expect(coordinator.isFileLocked('src/other.ts')).toBe(false); + }); + + it('lockHolder returns undefined for unlocked files', () => { + expect(coordinator.lockHolder('src/nonexistent.ts')).toBeUndefined(); + }); + }); + + describe('conflict detection', () => { + it('rejects claim when files already locked by another agent', async () => { + // Agent A claims main.ts + const thoughtA = makeThought(AGENT_A, ['src/main.ts']); + await coordinator.broadcastCodeThought(PLAN_ID, thoughtA); + + // Agent B tries to claim main.ts β€” should be rejected + const thoughtB = makeThought(AGENT_B, ['src/main.ts']); + await coordinator.broadcastCodeThought(PLAN_ID, thoughtB); + + // main.ts should still be locked by Agent A + expect(coordinator.lockHolder('src/main.ts')).toBe(AGENT_A); + }); + + it('allows same agent to reclaim their own files', async () => { + const thought1 = makeThought(AGENT_A, ['src/main.ts']); + await coordinator.broadcastCodeThought(PLAN_ID, thought1); + + const thought2 = makeThought(AGENT_A, ['src/main.ts', 'src/extra.ts']); + await coordinator.broadcastCodeThought(PLAN_ID, thought2); + + expect(coordinator.lockHolder('src/main.ts')).toBe(AGENT_A); + expect(coordinator.lockHolder('src/extra.ts')).toBe(AGENT_A); + }); + + it('rejects claim when any file in the set conflicts', async () => { + const thoughtA = makeThought(AGENT_A, ['src/shared.ts']); + await coordinator.broadcastCodeThought(PLAN_ID, thoughtA); + + // Agent B claims unique.ts + shared.ts β€” shared.ts conflicts + const thoughtB = makeThought(AGENT_B, ['src/unique.ts', 'src/shared.ts']); + await coordinator.broadcastCodeThought(PLAN_ID, thoughtB); + + // shared.ts still locked by A, unique.ts NOT locked (whole claim rejected) + expect(coordinator.lockHolder('src/shared.ts')).toBe(AGENT_A); + expect(coordinator.isFileLocked('src/unique.ts')).toBe(false); + }); + }); + + describe('parallel non-overlapping agents', () => { + it('multiple agents acquire non-overlapping file locks', async () => { + const thoughtA = makeThought(AGENT_A, ['src/moduleA.ts']); + const thoughtB = makeThought(AGENT_B, ['src/moduleB.ts']); + const thoughtC = makeThought(AGENT_C, ['src/moduleC.ts']); + + await coordinator.broadcastCodeThought(PLAN_ID, thoughtA); + await coordinator.broadcastCodeThought(PLAN_ID, thoughtB); + await coordinator.broadcastCodeThought(PLAN_ID, thoughtC); + + expect(coordinator.globalFileLocks.size).toBe(3); + expect(coordinator.lockHolder('src/moduleA.ts')).toBe(AGENT_A); + expect(coordinator.lockHolder('src/moduleB.ts')).toBe(AGENT_B); + expect(coordinator.lockHolder('src/moduleC.ts')).toBe(AGENT_C); + }); + + it('canWorkOnFiles checks correctly for non-overlapping', async () => { + const thought = makeThought(AGENT_A, ['src/moduleA.ts']); + await coordinator.broadcastCodeThought(PLAN_ID, thought); + + const canB = await coordinator.canWorkOnFiles(AGENT_B, PLAN_ID, ['src/moduleB.ts']); + expect(canB).toBe(true); + + const canBConflict = await coordinator.canWorkOnFiles(AGENT_B, PLAN_ID, ['src/moduleA.ts']); + expect(canBConflict).toBe(false); + }); + + it('canWorkOnFiles returns true when no stream exists', async () => { + const can = await coordinator.canWorkOnFiles(AGENT_A, 'no-such-plan' as UUID, ['anything.ts']); + expect(can).toBe(true); + }); + }); + + describe('lock release', () => { + it('releases all locks for a persona', async () => { + const thought = makeThought(AGENT_A, ['src/a.ts', 'src/b.ts', 'src/c.ts']); + await coordinator.broadcastCodeThought(PLAN_ID, thought); + + expect(coordinator.globalFileLocks.size).toBe(3); + + coordinator.releaseLocks(AGENT_A); + + expect(coordinator.globalFileLocks.size).toBe(0); + expect(coordinator.isFileLocked('src/a.ts')).toBe(false); + }); + + it('releases only the specified persona locks', async () => { + const thoughtA = makeThought(AGENT_A, ['src/a.ts']); + const thoughtB = makeThought(AGENT_B, ['src/b.ts']); + await coordinator.broadcastCodeThought(PLAN_ID, thoughtA); + await coordinator.broadcastCodeThought(PLAN_ID, thoughtB); + + coordinator.releaseLocks(AGENT_A); + + expect(coordinator.isFileLocked('src/a.ts')).toBe(false); + expect(coordinator.isFileLocked('src/b.ts')).toBe(true); + expect(coordinator.lockHolder('src/b.ts')).toBe(AGENT_B); + }); + + it('releases locks for a specific plan only', async () => { + const PLAN_2 = '22222222-3333-4444-5555-666666666666' as UUID; + const thoughtA1 = makeThought(AGENT_A, ['src/plan1.ts']); + const thoughtA2 = makeThought(AGENT_A, ['src/plan2.ts']); + + await coordinator.broadcastCodeThought(PLAN_ID, thoughtA1); + await coordinator.broadcastCodeThought(PLAN_2, thoughtA2); + + // Release only for PLAN_ID stream β€” global locks for PLAN_2 remain + coordinator.releaseLocks(AGENT_A, PLAN_ID); + + // Stream-level locks for plan1 should be gone + const stream1 = coordinator.getStream(PLAN_ID); + if (stream1) { + expect(stream1.fileLocks.has('src/plan1.ts')).toBe(false); + } + }); + }); + + describe('deferring', () => { + it('defer releases claimed slot', async () => { + const claim = makeThought(AGENT_A, ['src/main.ts'], { type: 'claiming' }); + await coordinator.broadcastCodeThought(PLAN_ID, claim); + + const stream = coordinator.getStream(PLAN_ID); + expect(stream).toBeDefined(); + expect(stream!.claimedBy.has(AGENT_A)).toBe(true); + + const defer = makeThought(AGENT_A, ['src/main.ts'], { type: 'deferring' }); + await coordinator.broadcastCodeThought(PLAN_ID, defer); + + expect(stream!.claimedBy.has(AGENT_A)).toBe(false); + }); + }); + + describe('stream lifecycle', () => { + it('creates stream on first thought', async () => { + expect(coordinator.getStreams().size).toBe(0); + + const thought = makeThought(AGENT_A, ['src/main.ts']); + await coordinator.broadcastCodeThought(PLAN_ID, thought); + + expect(coordinator.getStreams().size).toBe(1); + const stream = coordinator.getStream(PLAN_ID); + expect(stream).toBeDefined(); + expect(stream!.planId).toBe(PLAN_ID); + }); + + it('stream accumulates thoughts from multiple agents', async () => { + const thoughtA = makeThought(AGENT_A, ['src/a.ts']); + const thoughtB = makeThought(AGENT_B, ['src/b.ts']); + + await coordinator.broadcastCodeThought(PLAN_ID, thoughtA); + await coordinator.broadcastCodeThought(PLAN_ID, thoughtB); + + const stream = coordinator.getStream(PLAN_ID); + expect(stream!.thoughts).toHaveLength(2); + expect(stream!.considerations.size).toBe(2); + }); + }); + + describe('decision making', () => { + it('waitForCodeDecision returns null for non-existent stream', async () => { + const decision = await coordinator.waitForCodeDecision('no-such-plan' as UUID, 100); + expect(decision).toBeNull(); + }); + + it('decision includes file locks and conflicts', async () => { + // Set up two agents claiming different files + const thoughtA = makeThought(AGENT_A, ['src/a.ts'], { confidence: 0.9 }); + const thoughtB = makeThought(AGENT_B, ['src/b.ts'], { confidence: 0.8 }); + + await coordinator.broadcastCodeThought(PLAN_ID, thoughtA); + await coordinator.broadcastCodeThought(PLAN_ID, thoughtB); + + // Wait for decision (with short timeout since canDecideEarly may trigger) + const decision = await coordinator.waitForCodeDecision(PLAN_ID, 4000); + if (decision) { + expect(decision.planId).toBe(PLAN_ID); + expect(decision.fileLocks).toBeDefined(); + expect(decision.conflicts).toBeDefined(); + } + }); + }); + + describe('singleton pattern', () => { + it('getCodeCoordinator returns same instance', () => { + const a = getCodeCoordinator(); + const b = getCodeCoordinator(); + expect(a).toBe(b); + }); + + it('resetCodeCoordinator creates fresh instance', () => { + const a = getCodeCoordinator(); + resetCodeCoordinator(); + const b = getCodeCoordinator(); + expect(a).not.toBe(b); + }); + + it('reset clears global file locks', async () => { + const coord = getCodeCoordinator(); + const thought = makeThought(AGENT_A, ['src/locked.ts']); + await coord.broadcastCodeThought(PLAN_ID, thought); + + expect(coord.globalFileLocks.size).toBe(1); + resetCodeCoordinator(); + + const fresh = getCodeCoordinator(); + expect(fresh.globalFileLocks.size).toBe(0); + }); + }); + + describe('shutdown', () => { + it('clears all state on shutdown', async () => { + const thought = makeThought(AGENT_A, ['src/main.ts']); + await coordinator.broadcastCodeThought(PLAN_ID, thought); + + expect(coordinator.globalFileLocks.size).toBe(1); + expect(coordinator.getStreams().size).toBe(1); + + coordinator.shutdown(); + + expect(coordinator.globalFileLocks.size).toBe(0); + expect(coordinator.getStreams().size).toBe(0); + }); + }); +}); diff --git a/src/debug/jtag/tests/unit/code/CodeTaskDelegator.test.ts b/src/debug/jtag/tests/unit/code/CodeTaskDelegator.test.ts new file mode 100644 index 000000000..5e9cb4d69 --- /dev/null +++ b/src/debug/jtag/tests/unit/code/CodeTaskDelegator.test.ts @@ -0,0 +1,530 @@ +/** + * CodeTaskDelegator Unit Tests + * + * Tests plan decomposition and multi-agent assignment: + * - decompose: step DAG β†’ file clusters (union-find) + * - assign: clusters β†’ agents (load-balanced) + * - createSubPlans: assignments β†’ CodingPlanEntity sub-plans + * - consolidate: sub-plan results β†’ parent CodingResult + */ + +import { describe, it, expect } from 'vitest'; +import { CodeTaskDelegator, type FileCluster, type AgentAssignment } from '../../../system/code/server/CodeTaskDelegator'; +import { CodingPlanEntity, type CodingStepSnapshot } from '../../../system/data/entities/CodingPlanEntity'; +import type { UUID } from '../../../system/core/types/CrossPlatformUUID'; +import type { AgentCapability } from '../../../system/code/shared/CodingTypes'; + +// ── Helpers ────────────────────────────────────────────────── + +const TASK_ID = '11111111-2222-3333-4444-555555555555' as UUID; +const LEAD_ID = 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' as UUID; +const AGENT_A = 'aaaaaaaa-1111-2222-3333-444444444444' as UUID; +const AGENT_B = 'bbbbbbbb-1111-2222-3333-444444444444' as UUID; +const AGENT_C = 'cccccccc-1111-2222-3333-444444444444' as UUID; + +function makeStep( + stepNumber: number, + targetFiles: string[], + dependsOn: number[] = [], + action: string = 'edit', +): CodingStepSnapshot { + return { + stepNumber, + action: action as any, + description: `Step ${stepNumber}: ${action} ${targetFiles.join(', ')}`, + targetFiles, + toolCall: `code/${action}`, + toolParams: {}, + dependsOn, + verification: 'Verify step', + status: 'pending', + }; +} + +function makePlan(steps: CodingStepSnapshot[]): CodingPlanEntity { + const plan = new CodingPlanEntity(); + plan.taskId = TASK_ID; + plan.createdById = LEAD_ID; + plan.leadId = LEAD_ID; + plan.summary = 'Test plan for delegation'; + plan.taskDescription = 'Multi-file refactoring task'; + plan.steps = steps; + plan.estimatedToolCalls = steps.length; + plan.assignees = [LEAD_ID]; + plan.generatedBy = { provider: 'test', model: 'test-model', temperature: 0, durationMs: 0 }; + plan.riskLevel = 'medium'; + plan.securityTier = 'write'; + plan.status = 'approved'; + return plan; +} + +function makeAgent(id: UUID, name: string, load: number = 0): AgentCapability { + return { + personaId: id, + name, + specialties: ['typescript'], + currentLoad: load, + securityTier: 'write', + }; +} + +// ── Tests ──────────────────────────────────────────────────── + +describe('CodeTaskDelegator', () => { + const delegator = new CodeTaskDelegator(); + + describe('decompose', () => { + it('empty plan produces no clusters', () => { + const plan = makePlan([]); + const clusters = delegator.decompose(plan); + expect(clusters).toHaveLength(0); + }); + + it('single step produces one cluster', () => { + const plan = makePlan([ + makeStep(1, ['src/main.ts']), + ]); + const clusters = delegator.decompose(plan); + expect(clusters).toHaveLength(1); + expect(clusters[0].stepNumbers).toEqual([1]); + expect(clusters[0].files).toEqual(['src/main.ts']); + }); + + it('independent files produce separate clusters', () => { + const plan = makePlan([ + makeStep(1, ['src/moduleA.ts']), + makeStep(2, ['src/moduleB.ts']), + makeStep(3, ['src/moduleC.ts']), + ]); + const clusters = delegator.decompose(plan); + expect(clusters).toHaveLength(3); + + const allFiles = clusters.flatMap(c => c.files); + expect(allFiles).toContain('src/moduleA.ts'); + expect(allFiles).toContain('src/moduleB.ts'); + expect(allFiles).toContain('src/moduleC.ts'); + }); + + it('shared file merges steps into one cluster', () => { + const plan = makePlan([ + makeStep(1, ['src/shared.ts', 'src/a.ts']), + makeStep(2, ['src/shared.ts', 'src/b.ts']), + ]); + const clusters = delegator.decompose(plan); + expect(clusters).toHaveLength(1); + expect(clusters[0].stepNumbers).toContain(1); + expect(clusters[0].stepNumbers).toContain(2); + expect(clusters[0].files).toContain('src/shared.ts'); + expect(clusters[0].files).toContain('src/a.ts'); + expect(clusters[0].files).toContain('src/b.ts'); + }); + + it('dependencies merge steps into one cluster', () => { + const plan = makePlan([ + makeStep(1, ['src/a.ts']), + makeStep(2, ['src/b.ts'], [1]), // depends on step 1 + ]); + const clusters = delegator.decompose(plan); + expect(clusters).toHaveLength(1); + expect(clusters[0].stepNumbers).toContain(1); + expect(clusters[0].stepNumbers).toContain(2); + }); + + it('transitive file sharing merges all into one cluster', () => { + // A shares file with B, B shares file with C β†’ all in one cluster + const plan = makePlan([ + makeStep(1, ['src/a.ts', 'src/shared-ab.ts']), + makeStep(2, ['src/b.ts', 'src/shared-ab.ts', 'src/shared-bc.ts']), + makeStep(3, ['src/c.ts', 'src/shared-bc.ts']), + ]); + const clusters = delegator.decompose(plan); + expect(clusters).toHaveLength(1); + }); + + it('mixed independent and dependent steps', () => { + const plan = makePlan([ + // Cluster 1: steps 1, 2 share moduleA.ts + makeStep(1, ['src/moduleA.ts'], []), + makeStep(2, ['src/moduleA.ts'], [1]), + // Cluster 2: step 3 is independent + makeStep(3, ['src/moduleB.ts'], []), + // Cluster 3: steps 4, 5 share moduleC.ts + makeStep(4, ['src/moduleC.ts'], []), + makeStep(5, ['src/moduleC.ts', 'src/moduleC-test.ts'], [4]), + ]); + const clusters = delegator.decompose(plan); + expect(clusters).toHaveLength(3); + }); + + it('external dependencies are tracked', () => { + // Step 2 depends on step 1, but they touch different files + // If we force them into different clusters (no shared files, no deps), + // they'd be separate. But dependsOn forces merge. + // Test external deps by having step 3 depend on step 1 from a different cluster + const plan = makePlan([ + makeStep(1, ['src/a.ts']), + makeStep(2, ['src/a.ts'], [1]), // Same cluster as 1 + makeStep(3, ['src/b.ts']), // Different cluster + ]); + const clusters = delegator.decompose(plan); + // Steps 1 and 2 in one cluster (shared file + dependency) + // Step 3 in separate cluster (no shared files, no deps) + expect(clusters).toHaveLength(2); + + const clusterB = clusters.find(c => c.files.includes('src/b.ts')); + expect(clusterB).toBeDefined(); + expect(clusterB!.externalDeps).toEqual([]); // No external deps + }); + + it('steps are sorted within clusters', () => { + const plan = makePlan([ + makeStep(3, ['src/shared.ts']), + makeStep(1, ['src/shared.ts']), + makeStep(2, ['src/shared.ts']), + ]); + const clusters = delegator.decompose(plan); + expect(clusters).toHaveLength(1); + expect(clusters[0].stepNumbers).toEqual([1, 2, 3]); + }); + }); + + describe('assign', () => { + it('empty clusters produces empty assignments', () => { + const agents = [makeAgent(AGENT_A, 'Agent A')]; + const assignments = delegator.assign([], agents, makePlan([])); + expect(assignments).toHaveLength(0); + }); + + it('empty agents produces empty assignments', () => { + const clusters: FileCluster[] = [{ + index: 0, stepNumbers: [1], files: ['a.ts'], externalDeps: [], + }]; + const assignments = delegator.assign(clusters, [], makePlan([])); + expect(assignments).toHaveLength(0); + }); + + it('single cluster assigned to single agent', () => { + const clusters: FileCluster[] = [{ + index: 0, stepNumbers: [1, 2], files: ['src/main.ts'], externalDeps: [], + }]; + const agents = [makeAgent(AGENT_A, 'Agent A')]; + const assignments = delegator.assign(clusters, agents, makePlan([])); + + expect(assignments).toHaveLength(1); + expect(assignments[0].agentId).toBe(AGENT_A); + expect(assignments[0].totalSteps).toBe(2); + expect(assignments[0].files).toContain('src/main.ts'); + }); + + it('distributes clusters across agents evenly', () => { + const clusters: FileCluster[] = [ + { index: 0, stepNumbers: [1], files: ['a.ts'], externalDeps: [] }, + { index: 1, stepNumbers: [2], files: ['b.ts'], externalDeps: [] }, + { index: 2, stepNumbers: [3], files: ['c.ts'], externalDeps: [] }, + ]; + const agents = [ + makeAgent(AGENT_A, 'Agent A', 0.1), + makeAgent(AGENT_B, 'Agent B', 0.2), + makeAgent(AGENT_C, 'Agent C', 0.3), + ]; + const assignments = delegator.assign(clusters, agents, makePlan([])); + + expect(assignments).toHaveLength(3); + // Each agent gets one cluster (evenly distributed) + for (const a of assignments) { + expect(a.totalSteps).toBe(1); + } + }); + + it('prefers least-loaded agents', () => { + const clusters: FileCluster[] = [ + { index: 0, stepNumbers: [1, 2, 3], files: ['big.ts'], externalDeps: [] }, + ]; + const agents = [ + makeAgent(AGENT_A, 'Agent A', 0.8), // Heavily loaded + makeAgent(AGENT_B, 'Agent B', 0.1), // Least loaded + ]; + const assignments = delegator.assign(clusters, agents, makePlan([])); + + expect(assignments).toHaveLength(1); + expect(assignments[0].agentId).toBe(AGENT_B); // Least loaded gets it + }); + + it('handles more clusters than agents', () => { + const clusters: FileCluster[] = [ + { index: 0, stepNumbers: [1], files: ['a.ts'], externalDeps: [] }, + { index: 1, stepNumbers: [2], files: ['b.ts'], externalDeps: [] }, + { index: 2, stepNumbers: [3], files: ['c.ts'], externalDeps: [] }, + { index: 3, stepNumbers: [4], files: ['d.ts'], externalDeps: [] }, + ]; + const agents = [ + makeAgent(AGENT_A, 'Agent A'), + makeAgent(AGENT_B, 'Agent B'), + ]; + const assignments = delegator.assign(clusters, agents, makePlan([])); + + // 4 clusters, 2 agents β†’ each gets 2 + expect(assignments).toHaveLength(2); + const totalSteps = assignments.reduce((sum, a) => sum + a.totalSteps, 0); + expect(totalSteps).toBe(4); + }); + }); + + describe('createSubPlans', () => { + it('creates sub-plans from assignments', () => { + const plan = makePlan([ + makeStep(1, ['src/a.ts']), + makeStep(2, ['src/b.ts']), + ]); + + const assignments: AgentAssignment[] = [ + { + agentId: AGENT_A, + agentName: 'Agent A', + clusters: [{ index: 0, stepNumbers: [1], files: ['src/a.ts'], externalDeps: [] }], + totalSteps: 1, + files: ['src/a.ts'], + }, + { + agentId: AGENT_B, + agentName: 'Agent B', + clusters: [{ index: 1, stepNumbers: [2], files: ['src/b.ts'], externalDeps: [] }], + totalSteps: 1, + files: ['src/b.ts'], + }, + ]; + + const subPlans = delegator.createSubPlans(plan, assignments); + expect(subPlans).toHaveLength(2); + + // Sub-plan for Agent A + const subA = subPlans.find(s => s.leadId === AGENT_A); + expect(subA).toBeDefined(); + expect(subA!.steps).toHaveLength(1); + expect(subA!.steps[0].stepNumber).toBe(1); + expect(subA!.assignees).toEqual([AGENT_A]); + expect(subA!.status).toBe('approved'); + + // Sub-plan for Agent B + const subB = subPlans.find(s => s.leadId === AGENT_B); + expect(subB).toBeDefined(); + expect(subB!.steps).toHaveLength(1); + expect(subB!.steps[0].stepNumber).toBe(2); + }); + + it('sub-plans inherit parent metadata', () => { + const plan = makePlan([makeStep(1, ['src/a.ts'])]); + plan.riskLevel = 'high'; + plan.securityTier = 'write'; + + const assignments: AgentAssignment[] = [{ + agentId: AGENT_A, agentName: 'Agent A', + clusters: [{ index: 0, stepNumbers: [1], files: ['src/a.ts'], externalDeps: [] }], + totalSteps: 1, files: ['src/a.ts'], + }]; + + const subPlans = delegator.createSubPlans(plan, assignments); + expect(subPlans[0].taskId).toBe(plan.taskId); + expect(subPlans[0].riskLevel).toBe('high'); + expect(subPlans[0].securityTier).toBe('write'); + expect(subPlans[0].taskDescription).toBe(plan.taskDescription); + }); + + it('sub-plans filter dependsOn to only internal steps', () => { + const plan = makePlan([ + makeStep(1, ['src/a.ts']), + makeStep(2, ['src/a.ts'], [1]), // Depends on step 1 + makeStep(3, ['src/b.ts'], [1]), // Depends on step 1 (external dep) + ]); + + // Steps 1 and 2 go to Agent A (shared file), step 3 to Agent B + const assignments: AgentAssignment[] = [ + { + agentId: AGENT_A, agentName: 'Agent A', + clusters: [{ index: 0, stepNumbers: [1, 2], files: ['src/a.ts'], externalDeps: [] }], + totalSteps: 2, files: ['src/a.ts'], + }, + { + agentId: AGENT_B, agentName: 'Agent B', + clusters: [{ index: 1, stepNumbers: [3], files: ['src/b.ts'], externalDeps: [1] }], + totalSteps: 1, files: ['src/b.ts'], + }, + ]; + + const subPlans = delegator.createSubPlans(plan, assignments); + const subB = subPlans.find(s => s.leadId === AGENT_B)!; + + // Step 3's dependency on step 1 should be filtered out (step 1 is not in this sub-plan) + expect(subB.steps[0].dependsOn).toEqual([]); + }); + }); + + describe('consolidate', () => { + it('all completed β†’ completed', () => { + const plan = makePlan([]); + const sub1 = makePlan([makeStep(1, ['a.ts'])]); + sub1.status = 'completed'; + sub1.filesModified = ['a.ts']; + sub1.totalToolCalls = 3; + sub1.totalDurationMs = 1000; + sub1.steps[0].status = 'completed'; + + const sub2 = makePlan([makeStep(2, ['b.ts'])]); + sub2.status = 'completed'; + sub2.filesModified = ['b.ts']; + sub2.totalToolCalls = 2; + sub2.totalDurationMs = 800; + sub2.steps[0].status = 'completed'; + + const result = delegator.consolidate(plan, [sub1, sub2]); + expect(result.status).toBe('completed'); + expect(result.filesModified).toContain('a.ts'); + expect(result.filesModified).toContain('b.ts'); + expect(result.totalToolCalls).toBe(5); + // Duration is max (parallel), not sum + expect(result.totalDurationMs).toBe(1000); + }); + + it('some completed β†’ partial', () => { + const plan = makePlan([]); + const sub1 = makePlan([makeStep(1, ['a.ts'])]); + sub1.status = 'completed'; + sub1.steps[0].status = 'completed'; + + const sub2 = makePlan([makeStep(2, ['b.ts'])]); + sub2.status = 'failed'; + sub2.errors = ['Compilation failed']; + sub2.steps[0].status = 'failed'; + + const result = delegator.consolidate(plan, [sub1, sub2]); + expect(result.status).toBe('partial'); + expect(result.errors).toContain('Compilation failed'); + }); + + it('all failed β†’ failed', () => { + const plan = makePlan([]); + const sub1 = makePlan([makeStep(1, ['a.ts'])]); + sub1.status = 'failed'; + sub1.steps[0].status = 'failed'; + + const sub2 = makePlan([makeStep(2, ['b.ts'])]); + sub2.status = 'failed'; + sub2.steps[0].status = 'failed'; + + const result = delegator.consolidate(plan, [sub1, sub2]); + expect(result.status).toBe('failed'); + }); + + it('detects file conflicts across sub-plans', () => { + const plan = makePlan([]); + const sub1 = makePlan([makeStep(1, ['shared.ts'])]); + sub1.status = 'completed'; + sub1.filesModified = ['shared.ts']; + sub1.steps[0].status = 'completed'; + + const sub2 = makePlan([makeStep(2, ['shared.ts'])]); + sub2.status = 'completed'; + sub2.filesModified = ['shared.ts']; + sub2.steps[0].status = 'completed'; + + const result = delegator.consolidate(plan, [sub1, sub2]); + expect(result.errors.some(e => e.includes('conflict'))).toBe(true); + expect(result.errors.some(e => e.includes('shared.ts'))).toBe(true); + }); + + it('aggregates change IDs from all sub-plans', () => { + const plan = makePlan([]); + const sub1 = makePlan([makeStep(1, ['a.ts'])]); + sub1.status = 'completed'; + sub1.changeIds = ['change-1', 'change-2']; + sub1.steps[0].status = 'completed'; + + const sub2 = makePlan([makeStep(2, ['b.ts'])]); + sub2.status = 'completed'; + sub2.changeIds = ['change-3']; + sub2.steps[0].status = 'completed'; + + const result = delegator.consolidate(plan, [sub1, sub2]); + expect(result.changeIds).toEqual(['change-1', 'change-2', 'change-3']); + }); + + it('deduplicates modified files', () => { + const plan = makePlan([]); + const sub1 = makePlan([makeStep(1, ['shared.ts'])]); + sub1.status = 'completed'; + sub1.filesModified = ['shared.ts']; + sub1.steps[0].status = 'completed'; + + const sub2 = makePlan([makeStep(2, ['shared.ts'])]); + sub2.status = 'completed'; + sub2.filesModified = ['shared.ts']; + sub2.steps[0].status = 'completed'; + + const result = delegator.consolidate(plan, [sub1, sub2]); + // Set-based dedup: shared.ts appears once + expect(result.filesModified.filter(f => f === 'shared.ts')).toHaveLength(1); + }); + + it('empty sub-plans β†’ failed', () => { + const plan = makePlan([]); + const result = delegator.consolidate(plan, []); + expect(result.status).toBe('failed'); + }); + }); + + describe('full pipeline: decompose β†’ assign β†’ createSubPlans', () => { + it('end-to-end with 3 independent file groups', () => { + const plan = makePlan([ + // Group A: src/auth/* + makeStep(1, ['src/auth/login.ts'], [], 'read'), + makeStep(2, ['src/auth/login.ts'], [1], 'edit'), + // Group B: src/api/* + makeStep(3, ['src/api/routes.ts'], [], 'read'), + makeStep(4, ['src/api/routes.ts'], [3], 'edit'), + // Group C: src/utils/* + makeStep(5, ['src/utils/helpers.ts'], [], 'read'), + makeStep(6, ['src/utils/helpers.ts'], [5], 'edit'), + ]); + + const agents = [ + makeAgent(AGENT_A, 'Auth Specialist', 0.1), + makeAgent(AGENT_B, 'API Specialist', 0.2), + makeAgent(AGENT_C, 'Utils Specialist', 0.3), + ]; + + // Step 1: Decompose + const clusters = delegator.decompose(plan); + expect(clusters).toHaveLength(3); + + // Step 2: Assign + const assignments = delegator.assign(clusters, agents, plan); + expect(assignments).toHaveLength(3); + + // Step 3: Create sub-plans + const subPlans = delegator.createSubPlans(plan, assignments); + expect(subPlans).toHaveLength(3); + + // Each sub-plan has exactly 2 steps + for (const sub of subPlans) { + expect(sub.steps).toHaveLength(2); + expect(sub.status).toBe('approved'); + } + + // All 6 steps are accounted for + const allSteps = subPlans.flatMap(s => s.steps.map(st => st.stepNumber)); + expect(allSteps.sort()).toEqual([1, 2, 3, 4, 5, 6]); + }); + + it('single monolithic plan stays as one cluster', () => { + const plan = makePlan([ + makeStep(1, ['src/index.ts']), + makeStep(2, ['src/index.ts', 'src/types.ts'], [1]), + makeStep(3, ['src/types.ts', 'src/index.ts'], [2]), + ]); + + const clusters = delegator.decompose(plan); + expect(clusters).toHaveLength(1); + expect(clusters[0].stepNumbers).toEqual([1, 2, 3]); + }); + }); +}); diff --git a/src/debug/jtag/tests/unit/code/CodingPlanEntity.test.ts b/src/debug/jtag/tests/unit/code/CodingPlanEntity.test.ts index d890616ec..b337da3f2 100644 --- a/src/debug/jtag/tests/unit/code/CodingPlanEntity.test.ts +++ b/src/debug/jtag/tests/unit/code/CodingPlanEntity.test.ts @@ -302,6 +302,38 @@ describe('CodingPlanEntity', () => { }); }); + describe('risk and security', () => { + it('defaults riskLevel to low', () => { + const plan = new CodingPlanEntity(); + expect(plan.riskLevel).toBe('low'); + }); + + it('defaults securityTier to write', () => { + const plan = new CodingPlanEntity(); + expect(plan.securityTier).toBe('write'); + }); + + it('stores risk assessment data', () => { + const plan = makePlan(); + plan.riskLevel = 'high'; + plan.riskReason = 'Modifies API interfaces'; + plan.securityTier = 'write'; + + expect(plan.riskLevel).toBe('high'); + expect(plan.riskReason).toBe('Modifies API interfaces'); + expect(plan.securityTier).toBe('write'); + }); + + it('critical risk with system tier', () => { + const plan = makePlan(); + plan.riskLevel = 'critical'; + plan.securityTier = 'system'; + + expect(plan.riskLevel).toBe('critical'); + expect(plan.securityTier).toBe('system'); + }); + }); + describe('governance', () => { it('tracks proposal reference', () => { const plan = makePlan({ status: 'proposed' }); diff --git a/src/debug/jtag/tests/unit/code/ExecutionSandbox.test.ts b/src/debug/jtag/tests/unit/code/ExecutionSandbox.test.ts new file mode 100644 index 000000000..221ed7d9d --- /dev/null +++ b/src/debug/jtag/tests/unit/code/ExecutionSandbox.test.ts @@ -0,0 +1,286 @@ +/** + * ExecutionSandbox Unit Tests + * + * Tests process-isolated code execution: + * - Command allowlist enforcement + * - Successful execution with output capture + * - Timeout enforcement (SIGTERM β†’ SIGKILL) + * - Output size truncation + * - Restricted environment variables + * - Spawn error handling + */ + +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { ExecutionSandbox, type SandboxConfig, type SandboxResult } from '../../../system/code/server/ExecutionSandbox'; +import type { UUID } from '../../../system/core/types/CrossPlatformUUID'; + +// Mock Logger +vi.mock('../../../system/core/logging/Logger', () => ({ + Logger: { + create: () => ({ + debug: () => {}, + info: () => {}, + warn: () => {}, + error: () => {}, + }), + }, +})); + +function makeConfig(overrides?: Partial): SandboxConfig { + return { + command: 'node', + args: ['-e', 'console.log("hello")'], + cwd: '/tmp', + timeoutMs: 5000, + maxOutputBytes: 10240, + personaId: 'test-persona-0001' as UUID, + ...overrides, + }; +} + +describe('ExecutionSandbox', () => { + let sandbox: ExecutionSandbox; + + beforeEach(() => { + sandbox = new ExecutionSandbox(); + }); + + describe('command allowlist', () => { + it('rejects commands not in allowlist', async () => { + const config = makeConfig({ command: 'rm' }); + const result = await sandbox.execute(config); + + expect(result.success).toBe(false); + expect(result.exitCode).toBe(-1); + expect(result.error).toContain('not in the sandbox allowlist'); + expect(result.error).toContain('rm'); + }); + + it('rejects arbitrary shell commands', async () => { + const config = makeConfig({ command: 'bash' }); + const result = await sandbox.execute(config); + + expect(result.success).toBe(false); + expect(result.error).toContain('not in the sandbox allowlist'); + }); + + it('rejects curl/wget', async () => { + for (const cmd of ['curl', 'wget']) { + const config = makeConfig({ command: cmd }); + const result = await sandbox.execute(config); + expect(result.success).toBe(false); + expect(result.error).toContain('not in the sandbox allowlist'); + } + }); + + it('allows node', async () => { + const config = makeConfig({ command: 'node', args: ['-e', 'process.exit(0)'] }); + const result = await sandbox.execute(config); + // May fail if node not at expected path, but should NOT fail with allowlist error + expect(result.error ?? '').not.toContain('not in the sandbox allowlist'); + }); + + it('allows npx', async () => { + const config = makeConfig({ command: 'npx', args: ['--version'] }); + const result = await sandbox.execute(config); + expect(result.error ?? '').not.toContain('not in the sandbox allowlist'); + }); + + it('allows tsc', async () => { + const config = makeConfig({ command: 'tsc', args: ['--version'] }); + const result = await sandbox.execute(config); + expect(result.error ?? '').not.toContain('not in the sandbox allowlist'); + }); + + it('allows npm', async () => { + const config = makeConfig({ command: 'npm', args: ['--version'] }); + const result = await sandbox.execute(config); + expect(result.error ?? '').not.toContain('not in the sandbox allowlist'); + }); + + it('extracts basename for path commands', async () => { + // /usr/local/bin/node should still match "node" in allowlist + const config = makeConfig({ command: '/usr/local/bin/node', args: ['-e', 'process.exit(0)'] }); + const result = await sandbox.execute(config); + expect(result.error ?? '').not.toContain('not in the sandbox allowlist'); + }); + }); + + describe('successful execution', () => { + it('captures stdout', async () => { + const config = makeConfig({ + command: 'node', + args: ['-e', 'console.log("sandbox-output")'], + }); + const result = await sandbox.execute(config); + + expect(result.success).toBe(true); + expect(result.exitCode).toBe(0); + expect(result.stdout).toContain('sandbox-output'); + expect(result.timedOut).toBe(false); + expect(result.truncated).toBe(false); + }); + + it('captures stderr', async () => { + const config = makeConfig({ + command: 'node', + args: ['-e', 'console.error("err-msg")'], + }); + const result = await sandbox.execute(config); + + expect(result.exitCode).toBe(0); + expect(result.stderr).toContain('err-msg'); + }); + + it('tracks duration', async () => { + const config = makeConfig({ + command: 'node', + args: ['-e', 'setTimeout(() => {}, 50)'], + }); + const result = await sandbox.execute(config); + + expect(result.durationMs).toBeGreaterThan(0); + }); + + it('reports non-zero exit code as failure', async () => { + const config = makeConfig({ + command: 'node', + args: ['-e', 'process.exit(42)'], + }); + const result = await sandbox.execute(config); + + expect(result.success).toBe(false); + expect(result.exitCode).toBe(42); + expect(result.timedOut).toBe(false); + }); + }); + + describe('timeout enforcement', () => { + it('kills process on timeout', async () => { + const config = makeConfig({ + command: 'node', + args: ['-e', 'setTimeout(() => {}, 60000)'], // Would run 60s + timeoutMs: 500, // Kill after 500ms + }); + const result = await sandbox.execute(config); + + expect(result.success).toBe(false); + expect(result.timedOut).toBe(true); + expect(result.error).toContain('Timed out'); + }, 10_000); + }); + + describe('output size limits', () => { + it('truncates output exceeding maxOutputBytes', async () => { + // Generate output larger than limit + const config = makeConfig({ + command: 'node', + args: ['-e', `for(let i=0;i<500;i++) console.log("x".repeat(100))`], + maxOutputBytes: 1024, // 1KB limit + }); + const result = await sandbox.execute(config); + + expect(result.truncated).toBe(true); + // stdout should be capped near maxOutputBytes + expect(result.stdout.length).toBeLessThanOrEqual(1200); // some tolerance + }); + }); + + describe('environment isolation', () => { + it('sets SANDBOX_EXECUTION env var', async () => { + const config = makeConfig({ + command: 'node', + args: ['-e', 'console.log(process.env.SANDBOX_EXECUTION)'], + }); + const result = await sandbox.execute(config); + + expect(result.stdout).toContain('true'); + }); + + it('sets NODE_ENV to sandbox', async () => { + const config = makeConfig({ + command: 'node', + args: ['-e', 'console.log(process.env.NODE_ENV)'], + }); + const result = await sandbox.execute(config); + + expect(result.stdout).toContain('sandbox'); + }); + + it('sets PERSONA_ID', async () => { + const config = makeConfig({ + command: 'node', + args: ['-e', 'console.log(process.env.PERSONA_ID)'], + personaId: 'test-persona-xyz' as UUID, + }); + const result = await sandbox.execute(config); + + expect(result.stdout).toContain('test-persona-xyz'); + }); + + it('restricts PATH', async () => { + const config = makeConfig({ + command: 'node', + args: ['-e', 'console.log(process.env.PATH)'], + }); + const result = await sandbox.execute(config); + + // PATH should only contain restricted locations + const pathDirs = result.stdout.trim().split(':'); + const allowedDirs = ['/opt/homebrew/bin', '/usr/local/bin', '/usr/bin', '/bin']; + for (const dir of pathDirs) { + expect(allowedDirs).toContain(dir); + } + }); + + it('merges custom env vars', async () => { + const config = makeConfig({ + command: 'node', + args: ['-e', 'console.log(process.env.CUSTOM_VAR)'], + env: { CUSTOM_VAR: 'test-value' }, + }); + const result = await sandbox.execute(config); + + expect(result.stdout).toContain('test-value'); + }); + }); + + describe('result structure', () => { + it('returns all required fields on success', async () => { + const config = makeConfig({ + command: 'node', + args: ['-e', 'console.log("ok")'], + }); + const result = await sandbox.execute(config); + + expect(result).toHaveProperty('success'); + expect(result).toHaveProperty('exitCode'); + expect(result).toHaveProperty('stdout'); + expect(result).toHaveProperty('stderr'); + expect(result).toHaveProperty('durationMs'); + expect(result).toHaveProperty('truncated'); + expect(result).toHaveProperty('timedOut'); + expect(typeof result.success).toBe('boolean'); + expect(typeof result.exitCode).toBe('number'); + expect(typeof result.stdout).toBe('string'); + expect(typeof result.stderr).toBe('string'); + expect(typeof result.durationMs).toBe('number'); + expect(typeof result.truncated).toBe('boolean'); + expect(typeof result.timedOut).toBe('boolean'); + }); + + it('returns all required fields on allowlist rejection', async () => { + const config = makeConfig({ command: 'forbidden-cmd' }); + const result = await sandbox.execute(config); + + expect(result.success).toBe(false); + expect(result.exitCode).toBe(-1); + expect(result.stdout).toBe(''); + expect(result.stderr).toBe(''); + expect(result.durationMs).toBe(0); + expect(result.truncated).toBe(false); + expect(result.timedOut).toBe(false); + expect(result.error).toBeTruthy(); + }); + }); +}); diff --git a/src/debug/jtag/tests/unit/code/PlanFormulator.test.ts b/src/debug/jtag/tests/unit/code/PlanFormulator.test.ts index d71792ba0..ffe2d2a72 100644 --- a/src/debug/jtag/tests/unit/code/PlanFormulator.test.ts +++ b/src/debug/jtag/tests/unit/code/PlanFormulator.test.ts @@ -277,6 +277,19 @@ describe('PlanFormulator', () => { await expect(formulator.formulate(makeTask())).rejects.toThrow('invalid step'); }); + it('throws on self-dependency reference', async () => { + mockGenerateText.mockResolvedValue({ + text: JSON.stringify({ + summary: 'Self dep', + steps: [ + { stepNumber: 1, action: 'read', toolCall: 'code/read', dependsOn: [1] }, + ], + }), + }); + + await expect(formulator.formulate(makeTask())).rejects.toThrow('invalid step'); + }); + it('extracts JSON from markdown code blocks', async () => { const planJson = JSON.stringify({ summary: 'Wrapped in markdown', @@ -298,4 +311,87 @@ describe('PlanFormulator', () => { expect(plan.steps).toHaveLength(1); }); }); + + describe('risk assessment', () => { + it('parses riskLevel from LLM response', async () => { + mockGenerateText.mockResolvedValue({ + text: JSON.stringify({ + summary: 'Low risk read-only task', + riskLevel: 'low', + riskReason: 'Read-only operation, no file modifications', + steps: [{ + stepNumber: 1, + action: 'read', + toolCall: 'code/read', + toolParams: { filePath: 'test.ts' }, + dependsOn: [], + }], + }), + }); + + const plan = await formulator.formulate(makeTask()); + expect(plan.riskLevel).toBe('low'); + expect(plan.riskReason).toBe('Read-only operation, no file modifications'); + expect(plan.requiredTier).toBe('write'); // low β†’ write tier + }); + + it('defaults riskLevel to medium when omitted', async () => { + mockValidPlan(); // doesn't include riskLevel + + const plan = await formulator.formulate(makeTask()); + expect(plan.riskLevel).toBe('medium'); + expect(plan.requiredTier).toBe('write'); + }); + + it('defaults riskLevel to medium for invalid values', async () => { + mockGenerateText.mockResolvedValue({ + text: JSON.stringify({ + summary: 'Bad risk', + riskLevel: 'extreme', + steps: [{ + stepNumber: 1, + action: 'read', + toolCall: 'code/read', + toolParams: {}, + dependsOn: [], + }], + }), + }); + + const plan = await formulator.formulate(makeTask()); + expect(plan.riskLevel).toBe('medium'); + }); + + it('critical risk maps to system tier', async () => { + mockGenerateText.mockResolvedValue({ + text: JSON.stringify({ + summary: 'Critical system change', + riskLevel: 'critical', + riskReason: 'Modifies build configuration', + steps: [{ + stepNumber: 1, + action: 'edit', + toolCall: 'code/edit', + toolParams: { filePath: 'build.config.ts' }, + dependsOn: [], + }], + }), + }); + + const plan = await formulator.formulate(makeTask()); + expect(plan.riskLevel).toBe('critical'); + expect(plan.requiredTier).toBe('system'); + }); + + it('includes risk assessment guidelines in prompt', async () => { + mockValidPlan(); + + await formulator.formulate(makeTask()); + + const request = mockGenerateText.mock.calls[0][0]; + const systemMsg = request.messages.find((m: any) => m.role === 'system'); + expect(systemMsg.content).toContain('riskLevel'); + expect(systemMsg.content).toContain('Risk Assessment Guidelines'); + }); + }); }); diff --git a/src/debug/jtag/tests/unit/code/PlanGovernance.test.ts b/src/debug/jtag/tests/unit/code/PlanGovernance.test.ts new file mode 100644 index 000000000..d835d9004 --- /dev/null +++ b/src/debug/jtag/tests/unit/code/PlanGovernance.test.ts @@ -0,0 +1,174 @@ +/** + * PlanGovernance Unit Tests + * + * Tests risk-based approval routing: + * - shouldRequireApproval: risk level + multi-agent logic + * - resolveDecision: governance outcome β†’ plan status mapping + * - proposePlan: governance proposal creation (integration tested separately) + */ + +import { describe, it, expect } from 'vitest'; +import { PlanGovernance, type GovernanceDecision, type GovernanceOutcome } from '../../../system/code/server/PlanGovernance'; +import { CodingPlanEntity } from '../../../system/data/entities/CodingPlanEntity'; +import type { UUID } from '../../../system/core/types/CrossPlatformUUID'; +import type { RiskLevel, SecurityTierLevel } from '../../../system/code/shared/CodingTypes'; + +// ── Helpers ────────────────────────────────────────────────── + +const PERSONA_A = 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' as UUID; +const PERSONA_B = 'bbbbbbbb-cccc-dddd-eeee-ffffffffffff' as UUID; +const TASK_ID = '11111111-2222-3333-4444-555555555555' as UUID; + +function makePlan(overrides?: { + riskLevel?: RiskLevel; + securityTier?: SecurityTierLevel; + assignees?: UUID[]; +}): CodingPlanEntity { + const plan = new CodingPlanEntity(); + plan.taskId = TASK_ID; + plan.createdById = PERSONA_A; + plan.leadId = PERSONA_A; + plan.summary = 'Test plan'; + plan.taskDescription = 'Test task description'; + plan.assignees = overrides?.assignees ?? [PERSONA_A]; + plan.riskLevel = overrides?.riskLevel ?? 'low'; + plan.securityTier = overrides?.securityTier ?? 'write'; + plan.generatedBy = { provider: 'test', model: 'test-model', temperature: 0, durationMs: 0 }; + plan.steps = [{ + stepNumber: 1, + action: 'read', + description: 'Read main.ts', + targetFiles: ['src/main.ts'], + toolCall: 'code/read', + toolParams: { filePath: 'src/main.ts' }, + dependsOn: [], + verification: 'File content returned', + status: 'pending', + }]; + return plan; +} + +function makeDecision(outcome: GovernanceOutcome): GovernanceDecision { + return { + proposalId: '99999999-8888-7777-6666-555555555555' as UUID, + outcome, + reasoning: `Decision: ${outcome}`, + }; +} + +// ── Tests ──────────────────────────────────────────────────── + +describe('PlanGovernance', () => { + const governance = new PlanGovernance(); + + describe('shouldRequireApproval', () => { + describe('single-agent plans', () => { + it('low risk β†’ no approval required', () => { + const plan = makePlan({ riskLevel: 'low' }); + expect(governance.shouldRequireApproval(plan)).toBe(false); + }); + + it('medium risk β†’ no approval required', () => { + const plan = makePlan({ riskLevel: 'medium' }); + expect(governance.shouldRequireApproval(plan)).toBe(false); + }); + + it('high risk β†’ approval required', () => { + const plan = makePlan({ riskLevel: 'high' }); + expect(governance.shouldRequireApproval(plan)).toBe(true); + }); + + it('critical risk β†’ approval required', () => { + const plan = makePlan({ riskLevel: 'critical' }); + expect(governance.shouldRequireApproval(plan)).toBe(true); + }); + }); + + describe('multi-agent plans', () => { + it('low risk + multi-agent β†’ approval required', () => { + const plan = makePlan({ riskLevel: 'low', assignees: [PERSONA_A, PERSONA_B] }); + expect(governance.shouldRequireApproval(plan)).toBe(true); + }); + + it('medium risk + multi-agent β†’ approval required', () => { + const plan = makePlan({ riskLevel: 'medium', assignees: [PERSONA_A, PERSONA_B] }); + expect(governance.shouldRequireApproval(plan)).toBe(true); + }); + + it('high risk + multi-agent β†’ approval required', () => { + const plan = makePlan({ riskLevel: 'high', assignees: [PERSONA_A, PERSONA_B] }); + expect(governance.shouldRequireApproval(plan)).toBe(true); + }); + }); + + describe('system tier', () => { + it('system tier always requires approval regardless of risk', () => { + const plan = makePlan({ riskLevel: 'low', securityTier: 'system' }); + expect(governance.shouldRequireApproval(plan)).toBe(true); + }); + + it('system tier + single agent still requires approval', () => { + const plan = makePlan({ riskLevel: 'low', securityTier: 'system', assignees: [PERSONA_A] }); + expect(governance.shouldRequireApproval(plan)).toBe(true); + }); + }); + }); + + describe('resolveDecision', () => { + it('approved β†’ approved', () => { + const result = governance.resolveDecision(makeDecision('approved')); + expect(result).toBe('approved'); + }); + + it('approved_with_changes β†’ approved', () => { + const result = governance.resolveDecision(makeDecision('approved_with_changes')); + expect(result).toBe('approved'); + }); + + it('changes_requested β†’ draft', () => { + const result = governance.resolveDecision(makeDecision('changes_requested')); + expect(result).toBe('draft'); + }); + + it('rejected β†’ cancelled', () => { + const result = governance.resolveDecision(makeDecision('rejected')); + expect(result).toBe('cancelled'); + }); + }); + + describe('all outcomes map to valid plan statuses', () => { + const outcomes: GovernanceOutcome[] = ['approved', 'approved_with_changes', 'changes_requested', 'rejected']; + const validStatuses = ['draft', 'proposed', 'approved', 'executing', 'completed', 'partial', 'failed', 'cancelled']; + + for (const outcome of outcomes) { + it(`${outcome} maps to a valid CodingPlanStatus`, () => { + const result = governance.resolveDecision(makeDecision(outcome)); + expect(validStatuses).toContain(result); + }); + } + }); + + describe('approval matrix (exhaustive)', () => { + const riskLevels: RiskLevel[] = ['low', 'medium', 'high', 'critical']; + const tiers: SecurityTierLevel[] = ['discovery', 'read', 'write', 'system']; + + for (const risk of riskLevels) { + for (const tier of tiers) { + for (const multiAgent of [false, true]) { + it(`risk=${risk}, tier=${tier}, multiAgent=${multiAgent}`, () => { + const assignees = multiAgent ? [PERSONA_A, PERSONA_B] : [PERSONA_A]; + const plan = makePlan({ riskLevel: risk, securityTier: tier, assignees }); + const result = governance.shouldRequireApproval(plan); + expect(typeof result).toBe('boolean'); + + // Verify specific cases + if (tier === 'system') expect(result).toBe(true); + if (multiAgent) expect(result).toBe(true); + if (risk === 'high' || risk === 'critical') expect(result).toBe(true); + if (risk === 'low' && tier !== 'system' && !multiAgent) expect(result).toBe(false); + }); + } + } + } + }); +}); diff --git a/src/debug/jtag/tests/unit/code/SecurityTier.test.ts b/src/debug/jtag/tests/unit/code/SecurityTier.test.ts new file mode 100644 index 000000000..b0079d56e --- /dev/null +++ b/src/debug/jtag/tests/unit/code/SecurityTier.test.ts @@ -0,0 +1,200 @@ +/** + * SecurityTier Unit Tests + * + * Tests the risk-based access control tier system: + * - Tier definitions (discovery, read, write, system) + * - Tier lookups and ordering + * - Risk β†’ tier mapping + * - Risk β†’ approval requirement mapping + */ + +import { describe, it, expect } from 'vitest'; +import { + getTier, + tierAtLeast, + riskToTier, + riskRequiresApproval, + TIER_LEVELS, + type SecurityTierLevel, + type RiskLevel, +} from '../../../system/code/server/SecurityTier'; + +describe('SecurityTier', () => { + describe('getTier()', () => { + it('returns discovery tier', () => { + const tier = getTier('discovery'); + expect(tier.level).toBe('discovery'); + expect(tier.allowProcessSpawn).toBe(false); + expect(tier.allowNetworkAccess).toBe(false); + expect(tier.requiresApproval).toBe(false); + expect(tier.maxFileSizeBytes).toBe(0); + }); + + it('returns read tier', () => { + const tier = getTier('read'); + expect(tier.level).toBe('read'); + expect(tier.allowProcessSpawn).toBe(false); + expect(tier.maxFileSizeBytes).toBe(0); + }); + + it('returns write tier', () => { + const tier = getTier('write'); + expect(tier.level).toBe('write'); + expect(tier.allowProcessSpawn).toBe(false); + expect(tier.maxFileSizeBytes).toBeGreaterThan(0); + }); + + it('returns system tier', () => { + const tier = getTier('system'); + expect(tier.level).toBe('system'); + expect(tier.allowProcessSpawn).toBe(true); + expect(tier.allowNetworkAccess).toBe(true); + expect(tier.requiresApproval).toBe(true); + }); + }); + + describe('tier allowlists', () => { + it('discovery tier allows only read-type commands', () => { + const tier = getTier('discovery'); + expect(tier.allowedCommands).toContain('code/tree'); + expect(tier.allowedCommands).toContain('code/search'); + expect(tier.allowedCommands).toContain('code/read'); + expect(tier.allowedCommands).toContain('code/history'); + expect(tier.allowedCommands).not.toContain('code/write'); + expect(tier.allowedCommands).not.toContain('code/edit'); + }); + + it('discovery tier explicitly denies write and system commands', () => { + const tier = getTier('discovery'); + expect(tier.deniedCommands).toContain('code/write'); + expect(tier.deniedCommands).toContain('code/edit'); + expect(tier.deniedCommands).toContain('development/*'); + expect(tier.deniedCommands).toContain('system/*'); + }); + + it('read tier extends discovery with analysis commands', () => { + const tier = getTier('read'); + expect(tier.allowedCommands).toContain('code/tree'); + expect(tier.allowedCommands).toContain('code/diff'); + expect(tier.allowedCommands).toContain('data/list'); + expect(tier.allowedCommands).toContain('data/read'); + expect(tier.allowedCommands).not.toContain('code/write'); + }); + + it('write tier adds mutation commands', () => { + const tier = getTier('write'); + expect(tier.allowedCommands).toContain('code/write'); + expect(tier.allowedCommands).toContain('code/edit'); + expect(tier.allowedCommands).toContain('code/undo'); + }); + + it('write tier denies shell and system commands', () => { + const tier = getTier('write'); + expect(tier.deniedCommands).toContain('development/exec'); + expect(tier.deniedCommands).toContain('development/sandbox-execute'); + expect(tier.deniedCommands).toContain('system/*'); + }); + + it('system tier allows everything', () => { + const tier = getTier('system'); + expect(tier.allowedCommands).toContain('*'); + expect(tier.deniedCommands).toEqual([]); + }); + }); + + describe('tier budgets', () => { + it('discovery tier has moderate budget', () => { + const tier = getTier('discovery'); + expect(tier.maxToolCalls).toBe(30); + expect(tier.maxDurationMs).toBe(60_000); + }); + + it('write tier has tighter tool call budget', () => { + const tier = getTier('write'); + expect(tier.maxToolCalls).toBe(20); + expect(tier.maxDurationMs).toBe(120_000); + }); + + it('system tier has generous budget', () => { + const tier = getTier('system'); + expect(tier.maxToolCalls).toBe(50); + expect(tier.maxDurationMs).toBe(300_000); + }); + }); + + describe('TIER_LEVELS ordering', () => { + it('lists tiers in ascending privilege order', () => { + expect(TIER_LEVELS).toEqual(['discovery', 'read', 'write', 'system']); + }); + }); + + describe('tierAtLeast()', () => { + it('same tier is at least itself', () => { + for (const level of TIER_LEVELS) { + expect(tierAtLeast(level, level)).toBe(true); + } + }); + + it('system is at least every tier', () => { + for (const level of TIER_LEVELS) { + expect(tierAtLeast('system', level)).toBe(true); + } + }); + + it('discovery is not at least write', () => { + expect(tierAtLeast('discovery', 'write')).toBe(false); + }); + + it('write is at least read', () => { + expect(tierAtLeast('write', 'read')).toBe(true); + }); + + it('read is not at least write', () => { + expect(tierAtLeast('read', 'write')).toBe(false); + }); + }); + + describe('riskToTier()', () => { + it('low risk maps to write tier', () => { + expect(riskToTier('low')).toBe('write'); + }); + + it('medium risk maps to write tier', () => { + expect(riskToTier('medium')).toBe('write'); + }); + + it('high risk maps to write tier (governance decides approval)', () => { + expect(riskToTier('high')).toBe('write'); + }); + + it('critical risk maps to system tier', () => { + expect(riskToTier('critical')).toBe('system'); + }); + }); + + describe('riskRequiresApproval()', () => { + it('low risk single-agent does not require approval', () => { + expect(riskRequiresApproval('low', false)).toBe(false); + }); + + it('medium risk single-agent does not require approval', () => { + expect(riskRequiresApproval('medium', false)).toBe(false); + }); + + it('high risk single-agent requires approval', () => { + expect(riskRequiresApproval('high', false)).toBe(true); + }); + + it('critical risk always requires approval', () => { + expect(riskRequiresApproval('critical', false)).toBe(true); + expect(riskRequiresApproval('critical', true)).toBe(true); + }); + + it('multi-agent always requires approval regardless of risk', () => { + const risks: RiskLevel[] = ['low', 'medium', 'high', 'critical']; + for (const risk of risks) { + expect(riskRequiresApproval(risk, true)).toBe(true); + } + }); + }); +}); diff --git a/src/debug/jtag/tests/unit/code/SkillEntity.test.ts b/src/debug/jtag/tests/unit/code/SkillEntity.test.ts new file mode 100644 index 000000000..772779422 --- /dev/null +++ b/src/debug/jtag/tests/unit/code/SkillEntity.test.ts @@ -0,0 +1,438 @@ +/** + * SkillEntity Unit Tests + * + * Tests the self-modifying skill entity: + * - Construction and default values + * - Validation (required fields, naming convention, spec consistency) + * - Status lifecycle transitions + * - Computed properties (isActive, requiresApproval, canAdvance, nextStatus) + * - Collection and pagination config + */ + +import { describe, it, expect } from 'vitest'; +import { + SkillEntity, + type SkillSpec, + type SkillStatus, + type SkillScope, + type SkillParamSpec, + type SkillResultSpec, + type SkillValidationResults, +} from '../../../system/data/entities/SkillEntity'; +import type { UUID } from '../../../system/core/types/CrossPlatformUUID'; +import { COLLECTIONS } from '../../../system/shared/Constants'; + +function makeSpec(overrides?: Partial): SkillSpec { + return { + name: 'analysis/complexity', + description: 'Analyzes code complexity metrics', + params: [ + { name: 'filePath', type: 'string', description: 'Path to analyze' }, + ], + results: [ + { name: 'complexity', type: 'number', description: 'Cyclomatic complexity score' }, + { name: 'message', type: 'string', description: 'Human-readable summary' }, + ], + implementation: 'Parse the file AST and count decision branches for cyclomatic complexity.', + accessLevel: 'ai-safe', + ...overrides, + }; +} + +function makeSkill(overrides?: Partial): SkillEntity { + const entity = new SkillEntity(); + entity.name = 'analysis/complexity'; + entity.description = 'Analyzes code complexity metrics'; + entity.createdById = '11111111-2222-3333-4444-555555555555' as UUID; + entity.spec = makeSpec(); + entity.scope = 'personal'; + entity.status = 'proposed'; + + if (overrides) { + for (const [key, value] of Object.entries(overrides)) { + (entity as Record)[key] = value; + } + } + + return entity; +} + +describe('SkillEntity', () => { + describe('construction and defaults', () => { + it('creates with default values', () => { + const skill = new SkillEntity(); + + expect(skill.name).toBe(''); + expect(skill.description).toBe(''); + expect(skill.createdById).toBe(''); + expect(skill.scope).toBe('personal'); + expect(skill.status).toBe('proposed'); + expect(skill.generatedFiles).toEqual([]); + expect(skill.proposalId).toBeUndefined(); + expect(skill.outputDir).toBeUndefined(); + expect(skill.validationResults).toBeUndefined(); + expect(skill.activatedAt).toBeUndefined(); + expect(skill.failureReason).toBeUndefined(); + }); + + it('has default spec with empty fields', () => { + const skill = new SkillEntity(); + + expect(skill.spec.name).toBe(''); + expect(skill.spec.description).toBe(''); + expect(skill.spec.params).toEqual([]); + expect(skill.spec.results).toEqual([]); + expect(skill.spec.implementation).toBe(''); + }); + }); + + describe('collection and pagination', () => { + it('has correct static collection', () => { + expect(SkillEntity.collection).toBe(COLLECTIONS.SKILLS); + }); + + it('has correct instance collection', () => { + const skill = new SkillEntity(); + expect(skill.collection).toBe(COLLECTIONS.SKILLS); + }); + + it('returns pagination config', () => { + const config = SkillEntity.getPaginationConfig(); + expect(config.defaultSortField).toBe('createdAt'); + expect(config.defaultSortDirection).toBe('desc'); + expect(config.defaultPageSize).toBe(20); + expect(config.cursorField).toBe('createdAt'); + }); + }); + + describe('validation', () => { + it('validates a well-formed personal skill', () => { + const skill = makeSkill(); + const result = skill.validate(); + expect(result.success).toBe(true); + expect(result.error).toBeUndefined(); + }); + + it('validates a well-formed team skill', () => { + const skill = makeSkill({ scope: 'team' }); + const result = skill.validate(); + expect(result.success).toBe(true); + }); + + it('rejects missing name', () => { + const skill = makeSkill({ name: '' }); + skill.spec = makeSpec({ name: '' }); + const result = skill.validate(); + expect(result.success).toBe(false); + expect(result.error).toContain('name'); + }); + + it('rejects invalid naming convention', () => { + const skill = makeSkill({ name: 'InvalidName' }); + skill.spec = makeSpec({ name: 'InvalidName' }); + const result = skill.validate(); + expect(result.success).toBe(false); + expect(result.error).toContain('naming convention'); + }); + + it('accepts simple names without slashes', () => { + const skill = makeSkill({ name: 'lint' }); + skill.spec = makeSpec({ name: 'lint' }); + const result = skill.validate(); + expect(result.success).toBe(true); + }); + + it('accepts multi-level names', () => { + const skill = makeSkill({ name: 'code/analysis/deep' }); + skill.spec = makeSpec({ name: 'code/analysis/deep' }); + const result = skill.validate(); + expect(result.success).toBe(true); + }); + + it('rejects missing description', () => { + const skill = makeSkill({ description: '' }); + const result = skill.validate(); + expect(result.success).toBe(false); + expect(result.error).toContain('description'); + }); + + it('rejects missing createdById', () => { + const skill = makeSkill({ createdById: '' as UUID }); + const result = skill.validate(); + expect(result.success).toBe(false); + expect(result.error).toContain('createdById'); + }); + + it('rejects mismatched spec.name and entity name', () => { + const skill = makeSkill(); + skill.spec = makeSpec({ name: 'different/name' }); + const result = skill.validate(); + expect(result.success).toBe(false); + expect(result.error).toContain('must match'); + }); + + it('rejects missing implementation in spec', () => { + const skill = makeSkill(); + skill.spec = makeSpec({ implementation: '' }); + const result = skill.validate(); + expect(result.success).toBe(false); + expect(result.error).toContain('implementation'); + }); + + it('rejects invalid scope', () => { + const skill = makeSkill(); + (skill as Record).scope = 'invalid'; + const result = skill.validate(); + expect(result.success).toBe(false); + expect(result.error).toContain('scope'); + }); + + it('rejects invalid status', () => { + const skill = makeSkill(); + (skill as Record).status = 'invalid'; + const result = skill.validate(); + expect(result.success).toBe(false); + expect(result.error).toContain('status'); + }); + + it('validates all valid statuses', () => { + const statuses: SkillStatus[] = [ + 'proposed', 'approved', 'generated', 'validated', 'active', 'failed', 'deprecated', + ]; + for (const status of statuses) { + const skill = makeSkill({ status }); + const result = skill.validate(); + expect(result.success).toBe(true); + } + }); + + it('validates all valid scopes', () => { + const scopes: SkillScope[] = ['personal', 'team']; + for (const scope of scopes) { + const skill = makeSkill({ scope }); + const result = skill.validate(); + expect(result.success).toBe(true); + } + }); + }); + + describe('computed properties', () => { + it('isActive returns true for active skills', () => { + const skill = makeSkill({ status: 'active' }); + expect(skill.isActive).toBe(true); + }); + + it('isActive returns false for non-active skills', () => { + const statuses: SkillStatus[] = ['proposed', 'approved', 'generated', 'validated', 'failed', 'deprecated']; + for (const status of statuses) { + const skill = makeSkill({ status }); + expect(skill.isActive).toBe(false); + } + }); + + it('requiresApproval returns true for team scope', () => { + const skill = makeSkill({ scope: 'team' }); + expect(skill.requiresApproval).toBe(true); + }); + + it('requiresApproval returns false for personal scope', () => { + const skill = makeSkill({ scope: 'personal' }); + expect(skill.requiresApproval).toBe(false); + }); + + describe('canAdvance', () => { + it('personal proposed can advance', () => { + const skill = makeSkill({ status: 'proposed', scope: 'personal' }); + expect(skill.canAdvance).toBe(true); + }); + + it('team proposed without proposal cannot advance', () => { + const skill = makeSkill({ status: 'proposed', scope: 'team' }); + expect(skill.canAdvance).toBe(false); + }); + + it('team proposed with proposal can advance', () => { + const skill = makeSkill({ + status: 'proposed', + scope: 'team', + proposalId: 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' as UUID, + }); + expect(skill.canAdvance).toBe(true); + }); + + it('approved can advance', () => { + const skill = makeSkill({ status: 'approved' }); + expect(skill.canAdvance).toBe(true); + }); + + it('generated can advance', () => { + const skill = makeSkill({ status: 'generated' }); + expect(skill.canAdvance).toBe(true); + }); + + it('validated can advance', () => { + const skill = makeSkill({ status: 'validated' }); + expect(skill.canAdvance).toBe(true); + }); + + it('active cannot advance', () => { + const skill = makeSkill({ status: 'active' }); + expect(skill.canAdvance).toBe(false); + }); + + it('failed cannot advance', () => { + const skill = makeSkill({ status: 'failed' }); + expect(skill.canAdvance).toBe(false); + }); + + it('deprecated cannot advance', () => { + const skill = makeSkill({ status: 'deprecated' }); + expect(skill.canAdvance).toBe(false); + }); + }); + + describe('nextStatus', () => { + it('personal proposed β†’ generated', () => { + const skill = makeSkill({ status: 'proposed', scope: 'personal' }); + expect(skill.nextStatus).toBe('generated'); + }); + + it('team proposed β†’ approved', () => { + const skill = makeSkill({ status: 'proposed', scope: 'team' }); + expect(skill.nextStatus).toBe('approved'); + }); + + it('approved β†’ generated', () => { + const skill = makeSkill({ status: 'approved' }); + expect(skill.nextStatus).toBe('generated'); + }); + + it('generated β†’ validated', () => { + const skill = makeSkill({ status: 'generated' }); + expect(skill.nextStatus).toBe('validated'); + }); + + it('validated β†’ active', () => { + const skill = makeSkill({ status: 'validated' }); + expect(skill.nextStatus).toBe('active'); + }); + + it('active has no next status', () => { + const skill = makeSkill({ status: 'active' }); + expect(skill.nextStatus).toBeUndefined(); + }); + + it('failed has no next status', () => { + const skill = makeSkill({ status: 'failed' }); + expect(skill.nextStatus).toBeUndefined(); + }); + + it('deprecated has no next status', () => { + const skill = makeSkill({ status: 'deprecated' }); + expect(skill.nextStatus).toBeUndefined(); + }); + }); + }); + + describe('spec types', () => { + it('supports param specs with optional fields', () => { + const params: SkillParamSpec[] = [ + { name: 'required', type: 'string' }, + { name: 'optional', type: 'number', optional: true, description: 'An optional param' }, + ]; + const skill = makeSkill(); + skill.spec = makeSpec({ params }); + const result = skill.validate(); + expect(result.success).toBe(true); + expect(skill.spec.params).toHaveLength(2); + expect(skill.spec.params[1].optional).toBe(true); + }); + + it('supports result specs', () => { + const results: SkillResultSpec[] = [ + { name: 'output', type: 'string', description: 'The output' }, + ]; + const skill = makeSkill(); + skill.spec = makeSpec({ results }); + const result = skill.validate(); + expect(result.success).toBe(true); + expect(skill.spec.results).toHaveLength(1); + }); + + it('supports examples in spec', () => { + const skill = makeSkill(); + skill.spec = makeSpec({ + examples: [ + { + description: 'Analyze a simple file', + command: 'skill/execute --name=analysis/complexity --filePath=utils.ts', + expectedResult: 'Complexity: 3', + }, + ], + }); + const result = skill.validate(); + expect(result.success).toBe(true); + expect(skill.spec.examples).toHaveLength(1); + }); + + it('supports different access levels', () => { + for (const level of ['ai-safe', 'internal', 'system'] as const) { + const skill = makeSkill(); + skill.spec = makeSpec({ accessLevel: level }); + const result = skill.validate(); + expect(result.success).toBe(true); + } + }); + }); + + describe('validation results', () => { + it('stores validation results', () => { + const validation: SkillValidationResults = { + compiled: true, + testsRun: 5, + testsPassed: 4, + errors: ['Test 3 failed: expected 42 got 41'], + durationMs: 1200, + }; + const skill = makeSkill({ validationResults: validation }); + expect(skill.validationResults).toEqual(validation); + expect(skill.validationResults!.compiled).toBe(true); + expect(skill.validationResults!.testsRun).toBe(5); + expect(skill.validationResults!.testsPassed).toBe(4); + expect(skill.validationResults!.errors).toHaveLength(1); + }); + }); + + describe('lifecycle tracking fields', () => { + it('tracks generated files', () => { + const files = ['/path/to/ServerCommand.ts', '/path/to/Types.ts']; + const skill = makeSkill({ generatedFiles: files }); + expect(skill.generatedFiles).toEqual(files); + }); + + it('tracks output directory', () => { + const skill = makeSkill({ outputDir: '/tmp/generated/analysis/complexity' }); + expect(skill.outputDir).toBe('/tmp/generated/analysis/complexity'); + }); + + it('tracks activation timestamp', () => { + const now = Date.now(); + const skill = makeSkill({ activatedAt: now }); + expect(skill.activatedAt).toBe(now); + }); + + it('tracks failure reason', () => { + const skill = makeSkill({ + status: 'failed', + failureReason: 'Compilation error: missing import', + }); + expect(skill.failureReason).toBe('Compilation error: missing import'); + }); + + it('tracks proposal ID for team skills', () => { + const proposalId = 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' as UUID; + const skill = makeSkill({ scope: 'team', proposalId }); + expect(skill.proposalId).toBe(proposalId); + }); + }); +}); diff --git a/src/debug/jtag/tests/unit/code/SkillLifecycle.test.ts b/src/debug/jtag/tests/unit/code/SkillLifecycle.test.ts new file mode 100644 index 000000000..e2d204b13 --- /dev/null +++ b/src/debug/jtag/tests/unit/code/SkillLifecycle.test.ts @@ -0,0 +1,331 @@ +/** + * Skill Lifecycle Tests + * + * Tests the skill lifecycle state machine: + * - Personal skill: proposed β†’ generated β†’ validated β†’ active + * - Team skill: proposed β†’ approved β†’ generated β†’ validated β†’ active + * - Failure paths at each stage + * - Validation results tracking + * - Scope and governance rules + */ + +import { describe, it, expect } from 'vitest'; +import { + SkillEntity, + type SkillSpec, + type SkillStatus, + type SkillValidationResults, +} from '../../../system/data/entities/SkillEntity'; +import type { UUID } from '../../../system/core/types/CrossPlatformUUID'; + +// ── Helpers ────────────────────────────────────────────────── + +const PERSONA_ID = '11111111-2222-3333-4444-555555555555' as UUID; +const PROPOSAL_ID = 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' as UUID; + +function makeSpec(name = 'analysis/complexity'): SkillSpec { + return { + name, + description: 'Analyzes code complexity', + params: [{ name: 'filePath', type: 'string' }], + results: [{ name: 'complexity', type: 'number' }], + implementation: 'Count decision branches in AST', + accessLevel: 'ai-safe', + }; +} + +function makeSkillEntity(status: SkillStatus = 'proposed', scope: 'personal' | 'team' = 'personal'): SkillEntity { + const entity = new SkillEntity(); + entity.name = 'analysis/complexity'; + entity.description = 'Analyzes code complexity'; + entity.createdById = PERSONA_ID; + entity.spec = makeSpec(); + entity.scope = scope; + entity.status = status; + return entity; +} + +// ── Tests ──────────────────────────────────────────────────── + +describe('Skill Lifecycle', () => { + describe('personal skill: full lifecycle', () => { + it('proposed β†’ generated β†’ validated β†’ active', () => { + const entity = makeSkillEntity('proposed', 'personal'); + + // Step 1: proposed + expect(entity.status).toBe('proposed'); + expect(entity.canAdvance).toBe(true); + expect(entity.nextStatus).toBe('generated'); + expect(entity.requiresApproval).toBe(false); + + // Step 2: generate + entity.status = 'generated'; + entity.outputDir = '/tmp/skills/analysis/complexity'; + entity.generatedFiles = ['ServerCommand.ts', 'Types.ts', 'BrowserCommand.ts']; + expect(entity.canAdvance).toBe(true); + expect(entity.nextStatus).toBe('validated'); + expect(entity.generatedFiles).toHaveLength(3); + + // Step 3: validate + entity.status = 'validated'; + entity.validationResults = { + compiled: true, + testsRun: 3, + testsPassed: 3, + errors: [], + durationMs: 500, + }; + expect(entity.canAdvance).toBe(true); + expect(entity.nextStatus).toBe('active'); + + // Step 4: activate + entity.status = 'active'; + entity.activatedAt = Date.now(); + expect(entity.isActive).toBe(true); + expect(entity.canAdvance).toBe(false); + expect(entity.nextStatus).toBeUndefined(); + + // Entity still validates at every stage + expect(entity.validate().success).toBe(true); + }); + }); + + describe('team skill: full lifecycle with governance', () => { + it('proposed β†’ approved β†’ generated β†’ validated β†’ active', () => { + const entity = makeSkillEntity('proposed', 'team'); + + // Step 1: proposed β€” cannot advance without proposal + expect(entity.requiresApproval).toBe(true); + expect(entity.canAdvance).toBe(false); + expect(entity.nextStatus).toBe('approved'); + + // Set proposal ID β†’ now can advance + entity.proposalId = PROPOSAL_ID; + expect(entity.canAdvance).toBe(true); + + // Step 2: approved + entity.status = 'approved'; + expect(entity.canAdvance).toBe(true); + expect(entity.nextStatus).toBe('generated'); + + // Step 3: generated + entity.status = 'generated'; + entity.outputDir = '/tmp/commands/analysis/complexity'; + entity.generatedFiles = ['ServerCommand.ts', 'Types.ts']; + expect(entity.nextStatus).toBe('validated'); + + // Step 4: validated + entity.status = 'validated'; + entity.validationResults = { + compiled: true, + testsRun: 5, + testsPassed: 5, + errors: [], + durationMs: 1200, + }; + + // Step 5: activated + entity.status = 'active'; + entity.activatedAt = Date.now(); + expect(entity.isActive).toBe(true); + expect(entity.validate().success).toBe(true); + }); + }); + + describe('failure paths', () => { + it('failure at generation stage', () => { + const entity = makeSkillEntity('proposed', 'personal'); + + entity.status = 'failed'; + entity.failureReason = 'CommandGenerator error: invalid spec'; + + expect(entity.canAdvance).toBe(false); + expect(entity.nextStatus).toBeUndefined(); + expect(entity.isActive).toBe(false); + expect(entity.failureReason).toContain('CommandGenerator'); + expect(entity.validate().success).toBe(true); + }); + + it('failure at validation β€” compilation error', () => { + const entity = makeSkillEntity('generated'); + entity.outputDir = '/tmp/skills/test'; + entity.generatedFiles = ['ServerCommand.ts']; + + entity.status = 'failed'; + entity.failureReason = 'Compilation failed: TS2345 - Argument type mismatch'; + entity.validationResults = { + compiled: false, + testsRun: 0, + testsPassed: 0, + errors: ['Compilation failed: TS2345 - Argument type mismatch'], + durationMs: 200, + }; + + expect(entity.canAdvance).toBe(false); + expect(entity.validationResults.compiled).toBe(false); + expect(entity.validationResults.errors).toHaveLength(1); + }); + + it('failure at validation β€” tests fail', () => { + const entity = makeSkillEntity('generated'); + entity.outputDir = '/tmp/skills/test'; + entity.generatedFiles = ['ServerCommand.ts']; + + entity.status = 'failed'; + entity.validationResults = { + compiled: true, + testsRun: 10, + testsPassed: 7, + errors: [ + 'Test "edge case" failed: expected 0, got -1', + 'Test "null input" failed: TypeError', + 'Test "large input" failed: timeout after 60000ms', + ], + durationMs: 60500, + }; + entity.failureReason = entity.validationResults.errors.join('; '); + + expect(entity.validationResults.compiled).toBe(true); + expect(entity.validationResults.testsPassed).toBe(7); + expect(entity.validationResults.testsRun).toBe(10); + expect(entity.validationResults.errors).toHaveLength(3); + }); + + it('failure at activation', () => { + const entity = makeSkillEntity('validated'); + entity.outputDir = '/tmp/skills/test'; + entity.generatedFiles = ['ServerCommand.ts']; + entity.validationResults = { + compiled: true, testsRun: 1, testsPassed: 1, errors: [], durationMs: 100, + }; + + entity.status = 'failed'; + entity.failureReason = 'Activation failed: dynamic import error'; + + expect(entity.canAdvance).toBe(false); + expect(entity.isActive).toBe(false); + }); + }); + + describe('deprecation', () => { + it('active skill can be deprecated', () => { + const entity = makeSkillEntity('active'); + entity.activatedAt = Date.now() - 86400000; // 1 day ago + + expect(entity.isActive).toBe(true); + + entity.status = 'deprecated'; + expect(entity.isActive).toBe(false); + expect(entity.canAdvance).toBe(false); + expect(entity.nextStatus).toBeUndefined(); + expect(entity.validate().success).toBe(true); + }); + }); + + describe('validation results tracking', () => { + it('tracks successful validation with full metrics', () => { + const results: SkillValidationResults = { + compiled: true, + testsRun: 10, + testsPassed: 10, + errors: [], + durationMs: 2500, + }; + + const entity = makeSkillEntity('generated'); + entity.validationResults = results; + entity.status = 'validated'; + + expect(entity.validationResults.compiled).toBe(true); + expect(entity.validationResults.testsRun).toBe(10); + expect(entity.validationResults.testsPassed).toBe(10); + expect(entity.validationResults.errors).toHaveLength(0); + expect(entity.validationResults.durationMs).toBe(2500); + }); + }); + + describe('scope and governance rules', () => { + it('personal skill does not require approval', () => { + const entity = makeSkillEntity('proposed', 'personal'); + expect(entity.requiresApproval).toBe(false); + expect(entity.canAdvance).toBe(true); + }); + + it('team skill requires approval and governance', () => { + const entity = makeSkillEntity('proposed', 'team'); + expect(entity.requiresApproval).toBe(true); + expect(entity.canAdvance).toBe(false); // No proposal yet + + entity.proposalId = PROPOSAL_ID; + expect(entity.canAdvance).toBe(true); + }); + + it('team skills go through approved state', () => { + const entity = makeSkillEntity('proposed', 'team'); + expect(entity.nextStatus).toBe('approved'); + }); + + it('personal skills skip approved state', () => { + const entity = makeSkillEntity('proposed', 'personal'); + expect(entity.nextStatus).toBe('generated'); + }); + }); + + describe('entity validation consistency across all stages', () => { + it('all lifecycle stages produce valid entities', () => { + const stages: Array<{ status: SkillStatus; extras?: Record }> = [ + { status: 'proposed' }, + { status: 'approved' }, + { status: 'generated', extras: { outputDir: '/tmp/out', generatedFiles: ['a.ts'] } }, + { status: 'validated', extras: { + outputDir: '/tmp/out', + generatedFiles: ['a.ts'], + validationResults: { compiled: true, testsRun: 1, testsPassed: 1, errors: [], durationMs: 100 }, + }}, + { status: 'active', extras: { + outputDir: '/tmp/out', + generatedFiles: ['a.ts'], + activatedAt: Date.now(), + }}, + { status: 'failed', extras: { failureReason: 'Something went wrong' } }, + { status: 'deprecated' }, + ]; + + for (const { status, extras } of stages) { + const entity = makeSkillEntity(status); + if (extras) { + for (const [key, value] of Object.entries(extras)) { + (entity as Record)[key] = value; + } + } + const result = entity.validate(); + expect(result.success).toBe(true); + } + }); + }); + + describe('multiple skills with different names', () => { + it('supports various command naming patterns', () => { + const names = [ + 'lint', + 'code/lint', + 'analysis/complexity', + 'code/analysis/deep-scan', + 'my-tool', + ]; + + for (const name of names) { + const entity = new SkillEntity(); + entity.name = name; + entity.description = `A skill called ${name}`; + entity.createdById = PERSONA_ID; + entity.spec = makeSpec(name); + entity.scope = 'personal'; + entity.status = 'proposed'; + + const result = entity.validate(); + expect(result.success).toBe(true); + } + }); + }); +}); diff --git a/src/debug/jtag/tests/unit/code/ToolAllowlistEnforcer.test.ts b/src/debug/jtag/tests/unit/code/ToolAllowlistEnforcer.test.ts new file mode 100644 index 000000000..6ca2e7d4a --- /dev/null +++ b/src/debug/jtag/tests/unit/code/ToolAllowlistEnforcer.test.ts @@ -0,0 +1,281 @@ +/** + * ToolAllowlistEnforcer Unit Tests + * + * Tests the per-tier tool filtering gateway: + * - Denied commands always blocked + * - Allowed commands checked via glob matching + * - Process spawn restrictions + * - File size limits for write operations + * - Audit logging + * - Throwing vs non-throwing check modes + */ + +import { describe, it, expect } from 'vitest'; +import { ToolAllowlistEnforcer, ToolDeniedError } from '../../../system/code/server/ToolAllowlistEnforcer'; +import { getTier } from '../../../system/code/server/SecurityTier'; +import type { SecurityTier } from '../../../system/code/server/SecurityTier'; + +describe('ToolAllowlistEnforcer', () => { + describe('discovery tier', () => { + const enforcer = new ToolAllowlistEnforcer(getTier('discovery')); + + it('allows code/read', () => { + expect(() => enforcer.enforce('code/read')).not.toThrow(); + }); + + it('allows code/tree', () => { + expect(() => enforcer.enforce('code/tree')).not.toThrow(); + }); + + it('allows code/search', () => { + expect(() => enforcer.enforce('code/search')).not.toThrow(); + }); + + it('allows code/history', () => { + expect(() => enforcer.enforce('code/history')).not.toThrow(); + }); + + it('blocks code/write (explicit deny)', () => { + expect(() => enforcer.enforce('code/write')).toThrow(ToolDeniedError); + }); + + it('blocks code/edit (explicit deny)', () => { + expect(() => enforcer.enforce('code/edit')).toThrow(ToolDeniedError); + }); + + it('blocks development/* (glob deny)', () => { + expect(() => enforcer.enforce('development/exec')).toThrow(ToolDeniedError); + expect(() => enforcer.enforce('development/sandbox-execute')).toThrow(ToolDeniedError); + }); + + it('blocks system/* (glob deny)', () => { + expect(() => enforcer.enforce('system/anything')).toThrow(ToolDeniedError); + }); + + it('blocks unknown commands not in allowlist', () => { + expect(() => enforcer.enforce('data/list')).toThrow(ToolDeniedError); + }); + }); + + describe('read tier', () => { + const enforcer = new ToolAllowlistEnforcer(getTier('read')); + + it('allows discovery commands', () => { + expect(() => enforcer.enforce('code/read')).not.toThrow(); + expect(() => enforcer.enforce('code/tree')).not.toThrow(); + }); + + it('allows data/list and data/read', () => { + expect(() => enforcer.enforce('data/list')).not.toThrow(); + expect(() => enforcer.enforce('data/read')).not.toThrow(); + }); + + it('allows code/diff', () => { + expect(() => enforcer.enforce('code/diff')).not.toThrow(); + }); + + it('blocks code/write', () => { + expect(() => enforcer.enforce('code/write')).toThrow(ToolDeniedError); + }); + }); + + describe('write tier', () => { + const enforcer = new ToolAllowlistEnforcer(getTier('write')); + + it('allows read + write commands', () => { + expect(() => enforcer.enforce('code/read')).not.toThrow(); + expect(() => enforcer.enforce('code/write')).not.toThrow(); + expect(() => enforcer.enforce('code/edit')).not.toThrow(); + expect(() => enforcer.enforce('code/undo')).not.toThrow(); + }); + + it('blocks development/exec (explicit deny)', () => { + expect(() => enforcer.enforce('development/exec')).toThrow(ToolDeniedError); + }); + + it('blocks development/sandbox-execute (explicit deny)', () => { + expect(() => enforcer.enforce('development/sandbox-execute')).toThrow(ToolDeniedError); + }); + + it('blocks system/* commands', () => { + expect(() => enforcer.enforce('system/shell')).toThrow(ToolDeniedError); + }); + }); + + describe('system tier', () => { + const enforcer = new ToolAllowlistEnforcer(getTier('system')); + + it('allows everything (wildcard)', () => { + expect(() => enforcer.enforce('code/read')).not.toThrow(); + expect(() => enforcer.enforce('code/write')).not.toThrow(); + expect(() => enforcer.enforce('development/exec')).not.toThrow(); + expect(() => enforcer.enforce('system/anything')).not.toThrow(); + expect(() => enforcer.enforce('whatever/command')).not.toThrow(); + }); + }); + + describe('file size enforcement', () => { + it('write tier blocks oversized writes', () => { + const enforcer = new ToolAllowlistEnforcer(getTier('write')); + const oversizedContent = 'x'.repeat(2_000_000); // 2MB > 1MB limit + + const result = enforcer.check('code/write', { content: oversizedContent }); + expect(result.allowed).toBe(false); + expect(result.reason).toContain('exceeds tier limit'); + }); + + it('write tier allows content within size limit', () => { + const enforcer = new ToolAllowlistEnforcer(getTier('write')); + const content = 'x'.repeat(1000); + + const result = enforcer.check('code/write', { content }); + expect(result.allowed).toBe(true); + }); + + it('code/edit also checks file size', () => { + const enforcer = new ToolAllowlistEnforcer(getTier('write')); + const oversizedContent = 'x'.repeat(2_000_000); + + const result = enforcer.check('code/edit', { content: oversizedContent }); + expect(result.allowed).toBe(false); + }); + + it('discovery tier skips size check (no writes allowed anyway)', () => { + const enforcer = new ToolAllowlistEnforcer(getTier('discovery')); + // code/write is denied in discovery, so even a small write is blocked + const result = enforcer.check('code/write', { content: 'small' }); + expect(result.allowed).toBe(false); + expect(result.reason).toContain('denied'); + }); + }); + + describe('process spawn restriction', () => { + it('write tier blocks process spawn commands', () => { + const enforcer = new ToolAllowlistEnforcer(getTier('write')); + // development/exec is already in denied list for write tier, but also checked via allowProcessSpawn + const result = enforcer.check('development/exec'); + expect(result.allowed).toBe(false); + }); + + it('system tier allows process spawn', () => { + const enforcer = new ToolAllowlistEnforcer(getTier('system')); + const result = enforcer.check('development/exec'); + expect(result.allowed).toBe(true); + }); + }); + + describe('check() (non-throwing)', () => { + const enforcer = new ToolAllowlistEnforcer(getTier('discovery')); + + it('returns allowed=true for permitted commands', () => { + const result = enforcer.check('code/read'); + expect(result.allowed).toBe(true); + expect(result.toolName).toBe('code/read'); + expect(result.tierLevel).toBe('discovery'); + }); + + it('returns allowed=false for denied commands', () => { + const result = enforcer.check('code/write'); + expect(result.allowed).toBe(false); + expect(result.toolName).toBe('code/write'); + expect(result.tierLevel).toBe('discovery'); + expect(result.reason).toBeTruthy(); + }); + }); + + describe('audit log', () => { + it('records every enforce() call', () => { + const enforcer = new ToolAllowlistEnforcer(getTier('write')); + + enforcer.enforce('code/read'); + enforcer.enforce('code/write'); + try { enforcer.enforce('development/exec'); } catch { /* expected */ } + + expect(enforcer.auditLog).toHaveLength(3); + expect(enforcer.auditLog[0].allowed).toBe(true); + expect(enforcer.auditLog[0].toolName).toBe('code/read'); + expect(enforcer.auditLog[1].allowed).toBe(true); + expect(enforcer.auditLog[1].toolName).toBe('code/write'); + expect(enforcer.auditLog[2].allowed).toBe(false); + expect(enforcer.auditLog[2].toolName).toBe('development/exec'); + }); + + it('check() does NOT record to audit log', () => { + const enforcer = new ToolAllowlistEnforcer(getTier('discovery')); + + enforcer.check('code/read'); + enforcer.check('code/write'); + + expect(enforcer.auditLog).toHaveLength(0); + }); + }); + + describe('ToolDeniedError', () => { + it('has correct properties', () => { + const enforcer = new ToolAllowlistEnforcer(getTier('discovery')); + + try { + enforcer.enforce('code/write'); + expect.unreachable('Should have thrown'); + } catch (error) { + expect(error).toBeInstanceOf(ToolDeniedError); + const denied = error as ToolDeniedError; + expect(denied.toolName).toBe('code/write'); + expect(denied.tierLevel).toBe('discovery'); + expect(denied.message).toContain('code/write'); + expect(denied.message).toContain('denied'); + } + }); + }); + + describe('glob matching', () => { + it('exact match works', () => { + const enforcer = new ToolAllowlistEnforcer(getTier('discovery')); + const result = enforcer.check('code/read'); + expect(result.allowed).toBe(true); + }); + + it('wildcard * matches everything', () => { + const enforcer = new ToolAllowlistEnforcer(getTier('system')); + const result = enforcer.check('literally/anything'); + expect(result.allowed).toBe(true); + }); + + it('prefix/* matches prefix/anything', () => { + // discovery tier denies development/* + const enforcer = new ToolAllowlistEnforcer(getTier('discovery')); + expect(enforcer.check('development/exec').allowed).toBe(false); + expect(enforcer.check('development/build').allowed).toBe(false); + expect(enforcer.check('development/sandbox-execute').allowed).toBe(false); + }); + + it('prefix/* does not match the prefix itself', () => { + // Create a custom tier for testing + const customTier: SecurityTier = { + level: 'write', + allowedCommands: ['code/*'], + deniedCommands: [], + maxToolCalls: 10, + maxDurationMs: 60_000, + maxFileSizeBytes: 0, + allowProcessSpawn: false, + allowNetworkAccess: false, + requiresApproval: false, + }; + const enforcer = new ToolAllowlistEnforcer(customTier); + + // 'code/*' should match 'code/read' but NOT 'code' itself + expect(enforcer.check('code/read').allowed).toBe(true); + expect(enforcer.check('code').allowed).toBe(false); + }); + }); + + describe('tier property access', () => { + it('exposes the tier', () => { + const tier = getTier('write'); + const enforcer = new ToolAllowlistEnforcer(tier); + expect(enforcer.tier).toBe(tier); + expect(enforcer.tier.level).toBe('write'); + }); + }); +}); From e71ff665b6e0f3c0ac21f6574ff789194316eac0 Mon Sep 17 00:00:00 2001 From: Joel Date: Sun, 1 Feb 2026 20:44:22 -0600 Subject: [PATCH 06/14] Fix coordination system killing AI engagement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two mechanical throttle layers were overriding AI cognition: 1. Temperature system: each AI "servicing" a room subtracted -0.2, so 14 personas crashed rooms to 0.00 in 35 seconds. Fixed by flipping to +0.05 warmth (active conversation stays alive). Removed hard -0.1 priority penalty for cold rooms. 2. InferenceCoordinator: gating calls consumed per-message "cards" in messageResponders, so when actual response generation tried to acquire a slot with the same messageId, every persona was denied. Rewrote from 489β†’197 lines, removing 6 mechanical rules (card dealing, responder caps, reserved slots, cooldowns, stagger delays, auto-thinning). Kept only hardware capacity protection. Result: AIs respond within seconds instead of being silenced. --- .../server/ChatCoordinationStream.ts | 11 +- .../server/InferenceCoordinator.ts | 432 +++--------------- .../user/server/modules/PersonaInbox.ts | 18 +- .../server/modules/PersonaMessageEvaluator.ts | 2 +- 4 files changed, 87 insertions(+), 376 deletions(-) diff --git a/src/debug/jtag/system/coordination/server/ChatCoordinationStream.ts b/src/debug/jtag/system/coordination/server/ChatCoordinationStream.ts index 55d434667..71c85810c 100644 --- a/src/debug/jtag/system/coordination/server/ChatCoordinationStream.ts +++ b/src/debug/jtag/system/coordination/server/ChatCoordinationStream.ts @@ -213,14 +213,19 @@ export class ChatCoordinationStream extends BaseCoordinationStream = { 'local': 'local-inference', }; -const DEFAULT_PROVIDER_LIMITS: Record = { - // LOCAL INFERENCE GROUP: Worker pool with multiple model instances - // Default 3 concurrent to match auto-detected workers (can be configured via INFERENCE_WORKERS) - 'local-inference': { - maxConcurrent: 3, // Worker pool handles concurrent requests - staggerDelayMs: 50, // Minimal stagger with pool - cooldownMs: 200 // Reduced cooldown with concurrent capacity - }, - 'anthropic': { - maxConcurrent: 15, // API rate limits are generous - staggerDelayMs: 100, - cooldownMs: 100 - }, - 'openai': { - maxConcurrent: 15, - staggerDelayMs: 100, - cooldownMs: 100 - }, - 'groq': { - maxConcurrent: 5, // Groq has aggressive rate limits but still decent - staggerDelayMs: 500, - cooldownMs: 1000 - }, - 'deepseek': { - maxConcurrent: 8, - staggerDelayMs: 200, - cooldownMs: 300 - }, - 'xai': { - maxConcurrent: 8, - staggerDelayMs: 200, - cooldownMs: 300 - }, - 'together': { - maxConcurrent: 10, - staggerDelayMs: 200, - cooldownMs: 300 - } +/** + * Per-provider hardware/API concurrency limits. + * These represent REAL constraints β€” not policy throttles. + */ +const PROVIDER_CAPACITY: Record = { + 'local-inference': 3, // Worker pool with multiple model instances + 'anthropic': 15, // Generous API limits + 'openai': 15, + 'groq': 5, // Aggressive rate limits but decent concurrency + 'deepseek': 8, + 'xai': 8, + 'together': 10, + 'google': 10, + 'fireworks': 10, // REST API, decent concurrency + 'alibaba': 8, // Qwen/DashScope REST API }; -// ========== RTOS SCHEDULING CONSTANTS ========== - -// Maximum responders per message (across all providers) -const MAX_RESPONDERS_PER_MESSAGE = 5; - -// Reserved slots for local-inference (guaranteed seats at table) -// With worker pool, local-inference can handle multiple concurrent requests -const RESERVED_LOCAL_INFERENCE_SLOTS = 2; // 2 of 5 slots reserved for local-inference -const MAX_CLOUD_RESPONDERS = MAX_RESPONDERS_PER_MESSAGE - RESERVED_LOCAL_INFERENCE_SLOTS; - -// Stale request timeout - kick requests waiting too long (RTOS preemption) -const STALE_WAIT_TIMEOUT_MS = 20000; // 20 seconds max wait (faster than before) - -// Auto-thinning: Max pending requests per provider before dropping oldest -// When queue exceeds this, oldest entries are evicted (newest-first priority) -const MAX_PENDING_PER_PROVIDER = 3; - -// Message age cutoff - messages older than this are deprioritized -const MESSAGE_FRESHNESS_MS = 30000; // 30 seconds - newer messages get priority - -// Card dealing: Max slots per persona per message window -// Ensures no single persona hogs all slots -const MAX_SLOTS_PER_PERSONA_PER_MESSAGE = 1; - class InferenceCoordinatorImpl { - private activeSlots: Map = new Map(); // slotKey -> slots - private messageResponders: Map> = new Map(); // messageId -> persona IDs - private messageProviders: Map> = new Map(); // messageId -> provider slot keys (for diversity) - private lastRequestTime: Map = new Map(); // personaId -> timestamp - private providerLimits: Map = new Map(); - private waitQueue: Map = new Map(); // messageId -> waiting personas + private activeSlots: Map = new Map(); constructor() { - // Initialize provider limits - for (const [provider, limits] of Object.entries(DEFAULT_PROVIDER_LIMITS)) { - this.providerLimits.set(provider, limits); + for (const provider of Object.keys(PROVIDER_CAPACITY)) { this.activeSlots.set(provider, []); } } - /** - * Check if provider is local-inference group - */ - private isLocalInference(provider: string): boolean { - const slotKey = this.getSlotKey(provider); - return slotKey === 'local-inference'; - } - - /** - * Auto-thin queue when overloaded (RTOS preemption) - * - * Strategy: Newest-first priority - * - When queue exceeds MAX_PENDING_PER_PROVIDER, drop oldest entries - * - Stale messages (older than MESSAGE_FRESHNESS_MS) get deprioritized - * - This ensures the system stays responsive even under load - */ - private autoThinQueue(slotKey: string): number { - const slots = this.activeSlots.get(slotKey) || []; - const now = Date.now(); - let evicted = 0; - - // If under limit, no thinning needed - if (slots.length <= MAX_PENDING_PER_PROVIDER) { - return 0; - } - - // Sort by age (oldest first) so we can evict oldest - const sortedSlots = [...slots].sort((a, b) => a.acquiredAt - b.acquiredAt); - - // Evict oldest entries until under limit - while (sortedSlots.length > MAX_PENDING_PER_PROVIDER) { - const oldest = sortedSlots.shift()!; - const age = now - oldest.acquiredAt; - - // Check if this is stale (older than freshness cutoff) - if (age > MESSAGE_FRESHNESS_MS) { - console.log(`🎰 AUTO-THIN: Evicting stale ${oldest.personaId} (age ${Math.round(age / 1000)}s > ${MESSAGE_FRESHNESS_MS / 1000}s freshness cutoff)`); - evicted++; - } else { - // Even fresh entries get evicted if queue is too long - console.log(`🎰 AUTO-THIN: Evicting ${oldest.personaId} to make room (queue ${slots.length} > max ${MAX_PENDING_PER_PROVIDER})`); - evicted++; - } - } - - // Update slots with thinned list - if (evicted > 0) { - this.activeSlots.set(slotKey, sortedSlots); - } - - return evicted; - } - - /** - * Check if persona has already responded to this message - * (Card dealing: max 1 slot per persona per message) - */ - private hasPersonaRespondedToMessage(personaId: string, messageId: string): boolean { - const responders = this.messageResponders.get(messageId); - return responders?.has(personaId) ?? false; - } - /** * Resolve provider to its slot group key. * Providers in the same group share the same slot pool. @@ -212,15 +78,24 @@ class InferenceCoordinatorImpl { } /** - * Request permission to perform inference + * Get hardware capacity for a provider slot group. + */ + private capacity(slotKey: string): number { + return PROVIDER_CAPACITY[slotKey] ?? 3; + } + + /** + * Request permission to perform inference. * - * RTOS-style fair scheduling: - * 1. @mentioned personas always get through (explicit user request) - * 2. Local-inference has 1 reserved slot out of 5 responders - * 3. Cloud providers share the remaining 4 slots - * 4. Wait queue tracks who's been waiting longest for priority + * Only checks hardware capacity β€” can the provider handle another concurrent request? + * All cognitive decisions (who responds, how many) are made upstream by + * the coordination stream and should-respond LLM calls. * - * @returns true if slot acquired, false if should skip + * @param personaId - The persona requesting the slot + * @param messageId - The message being processed (for tracking/debugging) + * @param provider - The inference provider (e.g., 'groq', 'ollama', 'anthropic') + * @param options - Reserved for future use (isMentioned no longer affects scheduling) + * @returns true if slot acquired, false if provider at hardware capacity */ async requestSlot( personaId: string, @@ -228,148 +103,35 @@ class InferenceCoordinatorImpl { provider: string, options?: { isMentioned?: boolean } ): Promise { - // Resolve provider to slot group (e.g., 'ollama' β†’ 'local-inference') const slotKey = this.getSlotKey(provider); - const limits = this.providerLimits.get(slotKey) || DEFAULT_PROVIDER_LIMITS['local-inference']; + const maxConcurrent = this.capacity(slotKey); const slots = this.activeSlots.get(slotKey) || []; - const isLocal = this.isLocalInference(provider); - - // Get current message state - const responders = this.messageResponders.get(messageId) || new Set(); - const providersResponded = this.messageProviders.get(messageId) || new Set(); - - // Count local vs cloud responders for this message - const localRespondersForMessage = Array.from(responders).filter(pid => { - // Check if this persona responded via local-inference - // (We track this in messageProviders) - return providersResponded.has('local-inference'); - }).length; - const cloudRespondersForMessage = responders.size - localRespondersForMessage; - - // ========== RTOS FAIR SCHEDULING LOGIC ========== - - // AUTO-THIN: Keep queue lean by evicting oldest entries - const evicted = this.autoThinQueue(slotKey); - if (evicted > 0) { - console.log(`🎰 InferenceCoordinator: Auto-thinned ${evicted} stale entries from ${slotKey}`); - } - // Rule 0: @mentioned PRIORITY - but still respect hardware limits - // CRITICAL FIX: @mentioned must STILL respect local-inference maxConcurrent - // because the Rust gRPC backend can only process 1 request at a time (write lock) - // Allowing multiple @mentioned to bypass causes 90s timeout cascade - let skipOtherChecks = false; - if (options?.isMentioned) { - // For local-inference: respect maxConcurrent even for @mentioned - if (isLocal && slots.length >= limits.maxConcurrent) { - console.log(`🎰 InferenceCoordinator: ${personaId} @mentioned but local-inference at capacity (${slots.length}/${limits.maxConcurrent}) - DENIED`); - return false; // Cannot bypass hardware limits - } else { - console.log(`🎰 InferenceCoordinator: ${personaId} PRIORITY (@mentioned) for ${slotKey}`); - skipOtherChecks = true; // Skip other checks for mentioned personas - } + // The one rule: hardware capacity + if (slots.length >= maxConcurrent) { + console.log(`🎰 InferenceCoordinator: ${personaId.slice(0, 8)} denied β€” ${slotKey} at hardware capacity (${slots.length}/${maxConcurrent})`); + return false; } - // Non-mentioned personas (and @mentioned local that was denied above) go through full checks - if (!skipOtherChecks) { - // Rule 1: CARD DEALING - Max 1 response per persona per message - if (this.hasPersonaRespondedToMessage(personaId, messageId)) { - console.log(`🎰 InferenceCoordinator: ${personaId} denied - already responded to ${messageId.slice(0, 8)} (card dealing: 1 per persona)`); - return false; - } - - // Rule 2: Check absolute max responders - if (responders.size >= MAX_RESPONDERS_PER_MESSAGE) { - console.log(`🎰 InferenceCoordinator: ${personaId} denied - message ${messageId.slice(0, 8)} at max responders (${responders.size}/${MAX_RESPONDERS_PER_MESSAGE})`); - return false; - } - - // Rule 3: RESERVED SLOT - Local-inference gets guaranteed 1 slot - if (isLocal) { - // Local persona: check if reserved slot is available - // Reserved slot means: even if 4 cloud responders, local still gets in - const localAlreadyResponded = providersResponded.has('local-inference'); - if (localAlreadyResponded) { - // Another local persona already responded - apply normal limit - if (responders.size >= MAX_RESPONDERS_PER_MESSAGE) { - console.log(`🎰 InferenceCoordinator: ${personaId} denied - local reserved slot already used`); - return false; - } - } - // Local persona gets through if under max (reserved slot guarantees access) - console.log(`🎰 InferenceCoordinator: ${personaId} 🏠 using reserved local-inference slot`); - } else { - // Cloud persona: check against cloud-specific limit - // Cloud can only use (MAX - reserved) slots = 4 slots - if (cloudRespondersForMessage >= MAX_CLOUD_RESPONDERS) { - console.log(`🎰 InferenceCoordinator: ${personaId} denied - cloud slots full (${cloudRespondersForMessage}/${MAX_CLOUD_RESPONDERS}), 1 reserved for local`); - return false; - } - } - - // Rule 4: Per-provider concurrency limit - if (slots.length >= limits.maxConcurrent) { - console.log(`🎰 InferenceCoordinator: ${personaId} denied - ${slotKey} at capacity (${slots.length}/${limits.maxConcurrent})`); - return false; - } - - // Rule 5: Per-persona cooldown - const lastRequest = this.lastRequestTime.get(personaId) || 0; - const timeSinceLastRequest = Date.now() - lastRequest; - if (timeSinceLastRequest < limits.cooldownMs) { - console.log(`🎰 InferenceCoordinator: ${personaId} denied - cooldown (${timeSinceLastRequest}ms < ${limits.cooldownMs}ms)`); - return false; - } - - // Rule 6: Stagger delay (spread out requests) - const staggerDelay = Math.random() * limits.staggerDelayMs; - if (staggerDelay > 50) { - console.log(`🎰 InferenceCoordinator: ${personaId} waiting ${Math.round(staggerDelay)}ms stagger`); - await this.delay(staggerDelay); - - // Re-check after stagger - const slotsAfterStagger = this.activeSlots.get(slotKey) || []; - if (slotsAfterStagger.length >= limits.maxConcurrent) { - console.log(`🎰 InferenceCoordinator: ${personaId} denied after stagger - ${slotKey} now full`); - return false; - } - } - } - - // ========== ACQUIRE SLOT ========== - - // Get current slots (re-fetch for freshness) - const currentSlots = this.activeSlots.get(slotKey) || []; - - // Create slot + // Acquire slot const slot: InferenceSlot = { personaId, messageId, provider, acquiredAt: Date.now() }; - currentSlots.push(slot); - this.activeSlots.set(slotKey, currentSlots); - - // Track responders and which providers responded - responders.add(personaId); - this.messageResponders.set(messageId, responders); - providersResponded.add(slotKey); - this.messageProviders.set(messageId, providersResponded); - - // Update last request time - this.lastRequestTime.set(personaId, Date.now()); + slots.push(slot); + this.activeSlots.set(slotKey, slots); - const slotType = isLocal ? '🏠 LOCAL' : '☁️ CLOUD'; - console.log(`🎰 InferenceCoordinator: ${personaId} GRANTED ${slotType} slot (${currentSlots.length}/${limits.maxConcurrent}) [responders: ${responders.size}/${MAX_RESPONDERS_PER_MESSAGE}]`); + console.log(`🎰 InferenceCoordinator: ${personaId.slice(0, 8)} GRANTED ${slotKey} slot (${slots.length}/${maxConcurrent})`); return true; } /** - * Release slot after inference completes (success or failure) + * Release slot after inference completes (success or failure). + * MUST be called in both success and error paths. */ releaseSlot(personaId: string, provider: string): void { - // Resolve provider to slot group const slotKey = this.getSlotKey(provider); const slots = this.activeSlots.get(slotKey); if (!slots) return; @@ -377,54 +139,33 @@ class InferenceCoordinatorImpl { const index = slots.findIndex(s => s.personaId === personaId); if (index !== -1) { const slot = slots[index]; + const duration = Date.now() - slot.acquiredAt; slots.splice(index, 1); this.activeSlots.set(slotKey, slots); - const duration = Date.now() - slot.acquiredAt; - console.log(`🎰 InferenceCoordinator: ${personaId} RELEASED ${slotKey} slot after ${duration}ms (${slots.length} remaining)`); + console.log(`🎰 InferenceCoordinator: ${personaId.slice(0, 8)} RELEASED ${slotKey} slot after ${duration}ms (${slots.length} remaining)`); } } /** - * Get current coordinator stats for monitoring + * Get current coordinator stats for monitoring. */ getStats(): { providers: Record; - scheduling: { - maxResponders: number; - reservedLocalSlots: number; - maxCloudSlots: number; - maxPendingPerProvider: number; - messageFreshnessMs: number; - maxSlotsPerPersona: number; - activeMessages: number; - }; } { const providers: Record = {}; for (const [provider, slots] of this.activeSlots) { - const limits = this.providerLimits.get(provider); providers[provider] = { active: slots.length, - max: limits?.maxConcurrent || 0 + max: this.capacity(provider) }; } - return { - providers, - scheduling: { - maxResponders: MAX_RESPONDERS_PER_MESSAGE, - reservedLocalSlots: RESERVED_LOCAL_INFERENCE_SLOTS, - maxCloudSlots: MAX_CLOUD_RESPONDERS, - maxPendingPerProvider: MAX_PENDING_PER_PROVIDER, - messageFreshnessMs: MESSAGE_FRESHNESS_MS, - maxSlotsPerPersona: MAX_SLOTS_PER_PERSONA_PER_MESSAGE, - activeMessages: this.messageResponders.size - } - }; + return { providers }; } /** - * Clean up stale slots (safety valve if releases are missed) - * Call periodically to prevent slot leaks + * Clean up stale slots (safety valve if releaseSlot is missed due to crash). + * Called periodically to prevent slot leaks. */ cleanupStaleSlots(maxAgeMs: number = 180000): number { let cleaned = 0; @@ -433,7 +174,7 @@ class InferenceCoordinatorImpl { for (const [provider, slots] of this.activeSlots) { const validSlots = slots.filter(slot => { if (now - slot.acquiredAt > maxAgeMs) { - console.log(`🎰 InferenceCoordinator: Cleaning stale slot for ${slot.personaId} (${provider}, age ${now - slot.acquiredAt}ms)`); + console.log(`🎰 InferenceCoordinator: Cleaning stale slot for ${slot.personaId.slice(0, 8)} (${provider}, age ${Math.round((now - slot.acquiredAt) / 1000)}s)`); cleaned++; return false; } @@ -442,47 +183,14 @@ class InferenceCoordinatorImpl { this.activeSlots.set(provider, validSlots); } - // Also clean up old message responder/provider tracking - const messageIds = Array.from(this.messageResponders.keys()); - // We don't have timestamps for messages, so just limit map size - if (messageIds.length > 100) { - // Keep newest 50 - const toRemove = messageIds.slice(0, messageIds.length - 50); - for (const id of toRemove) { - this.messageResponders.delete(id); - this.messageProviders.delete(id); - } - } - - // Clean up wait queue (stale entries) - for (const [messageId, queue] of this.waitQueue) { - const validEntries = queue.filter(entry => { - if (now - entry.requestedAt > STALE_WAIT_TIMEOUT_MS) { - console.log(`🎰 InferenceCoordinator: Kicking stale wait entry for ${entry.personaId} (waited ${now - entry.requestedAt}ms)`); - cleaned++; - return false; - } - return true; - }); - if (validEntries.length === 0) { - this.waitQueue.delete(messageId); - } else { - this.waitQueue.set(messageId, validEntries); - } - } - return cleaned; } - - private delay(ms: number): Promise { - return new Promise(resolve => setTimeout(resolve, ms)); - } } // Global singleton export const InferenceCoordinator = new InferenceCoordinatorImpl(); -// Start cleanup interval (every 60 seconds) +// Safety valve: clean stale slots every 60 seconds setInterval(() => { InferenceCoordinator.cleanupStaleSlots(); }, 60000); diff --git a/src/debug/jtag/system/user/server/modules/PersonaInbox.ts b/src/debug/jtag/system/user/server/modules/PersonaInbox.ts index 08dc97497..b75cd12dc 100644 --- a/src/debug/jtag/system/user/server/modules/PersonaInbox.ts +++ b/src/debug/jtag/system/user/server/modules/PersonaInbox.ts @@ -447,8 +447,7 @@ export class PersonaInbox { * - Recent message: +0.2 (fresher = more relevant) * - Active conversation: +0.1 (persona recently active in room) * - Relevant expertise: +0.1 (matches persona's domain) - * - Hot conversation (temp β‰₯ 0.7): +0.15 (PHASE 3BIS) - * - Cold conversation (temp ≀ 0.3): -0.1 (PHASE 3BIS) + * - Hot conversation (temp β‰₯ 0.7): +0.1 (activity signal, not a gate) * * Base: 0.2 (all messages have baseline relevance) */ @@ -492,18 +491,17 @@ export function calculateMessagePriority( } } - // PHASE 3BIS: Temperature-based priority adjustment (activity ambient state) - // Hot conversations = more responsive, Cold conversations = less urgent + // Temperature is informational context β€” the AI's own cognition decides + // whether to respond, not a formula. Hot rooms get a small boost but + // cold rooms are NOT penalized. The AI might have something important + // to say regardless of room temperature. const temperature = getChatCoordinator().getTemperature(message.roomId); if (temperature >= 0.7) { - // Hot conversation - be more responsive - priority += 0.15; - } else if (temperature <= 0.3) { - // Cold conversation - less urgent (but still respond to mentions) - priority -= 0.1; + // Hot conversation - slight boost for responsiveness + priority += 0.1; } - // Neutral temperature (0.3-0.7) - no adjustment + // Cold/neutral: no penalty β€” let the AI's cognition decide return Math.min(1.0, priority); // Cap at 1.0 } diff --git a/src/debug/jtag/system/user/server/modules/PersonaMessageEvaluator.ts b/src/debug/jtag/system/user/server/modules/PersonaMessageEvaluator.ts index af527cb3c..130532d07 100644 --- a/src/debug/jtag/system/user/server/modules/PersonaMessageEvaluator.ts +++ b/src/debug/jtag/system/user/server/modules/PersonaMessageEvaluator.ts @@ -714,7 +714,7 @@ export class PersonaMessageEvaluator { this.log(`πŸ”§ TRACE-POINT-C: After respondToMessage returned (timestamp=${Date.now()})`); this.log(`βœ… ${this.personaUser.displayName}: [PHASE 3/3] Response posted successfully`); - // PHASE 3BIS: Notify coordinator that message was serviced (lowers temperature) + // Signal conversation activity (warms room β€” active conversation stays alive) getChatCoordinator().onMessageServiced(messageEntity.roomId, this.personaUser.id); // Track response for rate limiting From 4de45190e98dcd2a92f7f67da084fabb50fb7f1b Mon Sep 17 00:00:00 2001 From: Joel Date: Sun, 1 Feb 2026 20:47:07 -0600 Subject: [PATCH 07/14] Fix missing parameter validation in should-respond-fast, activity/join, activity/create MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - should-respond-fast: params.messageText crash (toLowerCase on undefined) when AI calls without messageText. Now returns graceful false result. - activity/join: activityId undefined β†’ "Activity not found: undefined" Now validates activityId before DB lookup. - activity/create: recipeId undefined β†’ "Recipe not found: undefined" Now validates recipeId before DB lookup. All three were AIs calling tools with missing params, getting either crashes or confusing error messages instead of clear validation errors. --- .../server/ShouldRespondFastServerCommand.ts | 6 ++++++ .../activity/create/server/ActivityCreateServerCommand.ts | 7 +++++++ .../activity/join/server/ActivityJoinServerCommand.ts | 7 +++++++ 3 files changed, 20 insertions(+) diff --git a/src/debug/jtag/commands/ai/should-respond-fast/server/ShouldRespondFastServerCommand.ts b/src/debug/jtag/commands/ai/should-respond-fast/server/ShouldRespondFastServerCommand.ts index a28127036..e308dac0c 100644 --- a/src/debug/jtag/commands/ai/should-respond-fast/server/ShouldRespondFastServerCommand.ts +++ b/src/debug/jtag/commands/ai/should-respond-fast/server/ShouldRespondFastServerCommand.ts @@ -38,6 +38,12 @@ export class ShouldRespondFastServerCommand extends ShouldRespondFastCommand { }); } + if (!params.messageText) { + return this.buildResult(params, false, 0, { + reasoning: 'Missing required parameter: messageText' + }); + } + // Default contextId to a placeholder if not provided (allows tool to work) const contextId = params.contextId ?? 'default-context'; diff --git a/src/debug/jtag/commands/collaboration/activity/create/server/ActivityCreateServerCommand.ts b/src/debug/jtag/commands/collaboration/activity/create/server/ActivityCreateServerCommand.ts index 435a59e9f..4abceb36b 100644 --- a/src/debug/jtag/commands/collaboration/activity/create/server/ActivityCreateServerCommand.ts +++ b/src/debug/jtag/commands/collaboration/activity/create/server/ActivityCreateServerCommand.ts @@ -30,6 +30,13 @@ export class ActivityCreateServerCommand extends CommandBase Date: Sun, 1 Feb 2026 21:56:52 -0600 Subject: [PATCH 08/14] Register skill/* commands in generated files, version bump --- src/debug/jtag/browser/generated.ts | 32 ++++- src/debug/jtag/generated-command-schemas.json | 112 +++++++++++++++++- src/debug/jtag/package-lock.json | 4 +- src/debug/jtag/package.json | 2 +- src/debug/jtag/server/generated.ts | 32 ++++- .../shared/generated-command-constants.ts | 5 + src/debug/jtag/shared/version.ts | 2 +- 7 files changed, 182 insertions(+), 7 deletions(-) diff --git a/src/debug/jtag/browser/generated.ts b/src/debug/jtag/browser/generated.ts index bcca8e98e..cffcb66ff 100644 --- a/src/debug/jtag/browser/generated.ts +++ b/src/debug/jtag/browser/generated.ts @@ -1,7 +1,7 @@ /** * Browser Structure Registry - Auto-generated * - * Contains 11 daemons and 174 commands and 2 adapters and 27 widgets. + * Contains 11 daemons and 179 commands and 2 adapters and 27 widgets. * Generated by scripts/generate-structure.ts - DO NOT EDIT MANUALLY */ @@ -149,6 +149,11 @@ import { SessionCreateBrowserCommand } from './../commands/session/create/browse import { SessionDestroyBrowserCommand } from './../commands/session/destroy/browser/SessionDestroyBrowserCommand'; import { SessionGetIdBrowserCommand } from './../commands/session/get-id/browser/SessionGetIdBrowserCommand'; import { SessionGetUserBrowserCommand } from './../commands/session/get-user/browser/SessionGetUserBrowserCommand'; +import { SkillActivateBrowserCommand } from './../commands/skill/activate/browser/SkillActivateBrowserCommand'; +import { SkillGenerateBrowserCommand } from './../commands/skill/generate/browser/SkillGenerateBrowserCommand'; +import { SkillListBrowserCommand } from './../commands/skill/list/browser/SkillListBrowserCommand'; +import { SkillProposeBrowserCommand } from './../commands/skill/propose/browser/SkillProposeBrowserCommand'; +import { SkillValidateBrowserCommand } from './../commands/skill/validate/browser/SkillValidateBrowserCommand'; import { SocialBrowseBrowserCommand } from './../commands/social/browse/browser/SocialBrowseBrowserCommand'; import { SocialClassifyBrowserCommand } from './../commands/social/classify/browser/SocialClassifyBrowserCommand'; import { SocialCommentBrowserCommand } from './../commands/social/comment/browser/SocialCommentBrowserCommand'; @@ -945,6 +950,31 @@ export const BROWSER_COMMANDS: CommandEntry[] = [ className: 'SessionGetUserBrowserCommand', commandClass: SessionGetUserBrowserCommand }, +{ + name: 'skill/activate', + className: 'SkillActivateBrowserCommand', + commandClass: SkillActivateBrowserCommand + }, +{ + name: 'skill/generate', + className: 'SkillGenerateBrowserCommand', + commandClass: SkillGenerateBrowserCommand + }, +{ + name: 'skill/list', + className: 'SkillListBrowserCommand', + commandClass: SkillListBrowserCommand + }, +{ + name: 'skill/propose', + className: 'SkillProposeBrowserCommand', + commandClass: SkillProposeBrowserCommand + }, +{ + name: 'skill/validate', + className: 'SkillValidateBrowserCommand', + commandClass: SkillValidateBrowserCommand + }, { name: 'social/browse', className: 'SocialBrowseBrowserCommand', diff --git a/src/debug/jtag/generated-command-schemas.json b/src/debug/jtag/generated-command-schemas.json index ad528125d..0298a0e92 100644 --- a/src/debug/jtag/generated-command-schemas.json +++ b/src/debug/jtag/generated-command-schemas.json @@ -1,5 +1,5 @@ { - "generated": "2026-02-02T00:56:45.003Z", + "generated": "2026-02-02T03:02:21.661Z", "version": "1.0.0", "commands": [ { @@ -1318,6 +1318,116 @@ } } }, + { + "name": "skill/validate", + "description": "Skill Validate Command - Shared Types\n *\n * Validate a generated skill by running TypeScript compilation and tests in an ExecutionSandbox. Updates SkillEntity with validation results.", + "params": { + "skillId": { + "type": "string", + "required": true, + "description": "skillId parameter" + } + } + }, + { + "name": "skill/propose", + "description": "Skill Propose Command - Shared Types\n *\n * Propose a new skill (command) specification. Creates a SkillEntity with status 'proposed'. For team-scoped skills, creates a DecisionProposal for governance approval.", + "params": { + "name": { + "type": "string", + "required": true, + "description": "name parameter" + }, + "description": { + "type": "string", + "required": true, + "description": "description parameter" + }, + "skillParams": { + "type": "array", + "required": true, + "description": "skillParams parameter" + }, + "skillResults": { + "type": "array", + "required": true, + "description": "skillResults parameter" + }, + "implementation": { + "type": "string", + "required": true, + "description": "implementation parameter" + }, + "scope": { + "type": "string", + "required": false, + "description": "scope parameter" + }, + "examples": { + "type": "array", + "required": false, + "description": "examples parameter" + }, + "personaId": { + "type": "string", + "required": true, + "description": "personaId parameter" + } + } + }, + { + "name": "skill/list", + "description": "Skill List Command - Shared Types\n *\n * List skills with optional filters by status, scope, and creator. Returns SkillEntity records from the database.", + "params": { + "status": { + "type": "string", + "required": false, + "description": "status parameter" + }, + "scope": { + "type": "string", + "required": false, + "description": "scope parameter" + }, + "createdById": { + "type": "string", + "required": false, + "description": "createdById parameter" + }, + "limit": { + "type": "number", + "required": false, + "description": "limit parameter" + } + } + }, + { + "name": "skill/generate", + "description": "Skill Generate Command - Shared Types\n *\n * Generate code files for a proposed skill using the CommandGenerator. Retrieves the SkillEntity and produces source files.", + "params": { + "skillId": { + "type": "string", + "required": true, + "description": "skillId parameter" + }, + "outputDir": { + "type": "string", + "required": false, + "description": "outputDir parameter" + } + } + }, + { + "name": "skill/activate", + "description": "Skill Activate Command - Shared Types\n *\n * Activate a validated skill by registering it as a live command. The skill becomes available for use by the creator (personal) or all personas (team).", + "params": { + "skillId": { + "type": "string", + "required": true, + "description": "skillId parameter" + } + } + }, { "name": "session/get-user", "description": "session/get-user command", diff --git a/src/debug/jtag/package-lock.json b/src/debug/jtag/package-lock.json index 71784e6c6..5ad7f8c2d 100644 --- a/src/debug/jtag/package-lock.json +++ b/src/debug/jtag/package-lock.json @@ -1,12 +1,12 @@ { "name": "@continuum/jtag", - "version": "1.0.7516", + "version": "1.0.7521", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@continuum/jtag", - "version": "1.0.7516", + "version": "1.0.7521", "hasInstallScript": true, "license": "MIT", "dependencies": { diff --git a/src/debug/jtag/package.json b/src/debug/jtag/package.json index 33059f2a7..3bf6bd005 100644 --- a/src/debug/jtag/package.json +++ b/src/debug/jtag/package.json @@ -1,6 +1,6 @@ { "name": "@continuum/jtag", - "version": "1.0.7516", + "version": "1.0.7521", "description": "Global CLI debugging system for any Node.js project. Install once globally, use anywhere: npm install -g @continuum/jtag", "config": { "active_example": "widget-ui", diff --git a/src/debug/jtag/server/generated.ts b/src/debug/jtag/server/generated.ts index 8d24e08f5..7c91f9438 100644 --- a/src/debug/jtag/server/generated.ts +++ b/src/debug/jtag/server/generated.ts @@ -1,7 +1,7 @@ /** * Server Structure Registry - Auto-generated * - * Contains 18 daemons and 204 commands and 3 adapters. + * Contains 18 daemons and 209 commands and 3 adapters. * Generated by scripts/generate-structure.ts - DO NOT EDIT MANUALLY */ @@ -185,6 +185,11 @@ import { SessionCreateServerCommand } from './../commands/session/create/server/ import { SessionDestroyServerCommand } from './../commands/session/destroy/server/SessionDestroyServerCommand'; import { SessionGetIdServerCommand } from './../commands/session/get-id/server/SessionGetIdServerCommand'; import { SessionGetUserServerCommand } from './../commands/session/get-user/server/SessionGetUserServerCommand'; +import { SkillActivateServerCommand } from './../commands/skill/activate/server/SkillActivateServerCommand'; +import { SkillGenerateServerCommand } from './../commands/skill/generate/server/SkillGenerateServerCommand'; +import { SkillListServerCommand } from './../commands/skill/list/server/SkillListServerCommand'; +import { SkillProposeServerCommand } from './../commands/skill/propose/server/SkillProposeServerCommand'; +import { SkillValidateServerCommand } from './../commands/skill/validate/server/SkillValidateServerCommand'; import { SocialBrowseServerCommand } from './../commands/social/browse/server/SocialBrowseServerCommand'; import { SocialClassifyServerCommand } from './../commands/social/classify/server/SocialClassifyServerCommand'; import { SocialCommentServerCommand } from './../commands/social/comment/server/SocialCommentServerCommand'; @@ -1133,6 +1138,31 @@ export const SERVER_COMMANDS: CommandEntry[] = [ className: 'SessionGetUserServerCommand', commandClass: SessionGetUserServerCommand }, +{ + name: 'skill/activate', + className: 'SkillActivateServerCommand', + commandClass: SkillActivateServerCommand + }, +{ + name: 'skill/generate', + className: 'SkillGenerateServerCommand', + commandClass: SkillGenerateServerCommand + }, +{ + name: 'skill/list', + className: 'SkillListServerCommand', + commandClass: SkillListServerCommand + }, +{ + name: 'skill/propose', + className: 'SkillProposeServerCommand', + commandClass: SkillProposeServerCommand + }, +{ + name: 'skill/validate', + className: 'SkillValidateServerCommand', + commandClass: SkillValidateServerCommand + }, { name: 'social/browse', className: 'SocialBrowseServerCommand', diff --git a/src/debug/jtag/shared/generated-command-constants.ts b/src/debug/jtag/shared/generated-command-constants.ts index 1d0f509c7..411895708 100644 --- a/src/debug/jtag/shared/generated-command-constants.ts +++ b/src/debug/jtag/shared/generated-command-constants.ts @@ -185,6 +185,11 @@ export const COMMANDS = { SESSION_DESTROY: 'session/destroy', SESSION_GET_ID: 'session/get-id', SESSION_GET_USER: 'session/get-user', + SKILL_ACTIVATE: 'skill/activate', + SKILL_GENERATE: 'skill/generate', + SKILL_LIST: 'skill/list', + SKILL_PROPOSE: 'skill/propose', + SKILL_VALIDATE: 'skill/validate', SOCIAL_BROWSE: 'social/browse', SOCIAL_CLASSIFY: 'social/classify', SOCIAL_COMMENT: 'social/comment', diff --git a/src/debug/jtag/shared/version.ts b/src/debug/jtag/shared/version.ts index d1e88768b..6ba3b9258 100644 --- a/src/debug/jtag/shared/version.ts +++ b/src/debug/jtag/shared/version.ts @@ -3,5 +3,5 @@ * DO NOT EDIT MANUALLY */ -export const VERSION = '1.0.7516'; +export const VERSION = '1.0.7521'; export const PACKAGE_NAME = '@continuum/jtag'; From f8e03c650cc52cb69fcd7470a90fcccc729b453a Mon Sep 17 00:00:00 2001 From: Joel Date: Sun, 1 Feb 2026 22:37:03 -0600 Subject: [PATCH 09/14] code/task entry point: wire CodeAgentOrchestrator into command system Add code/task command as the entry point for the full coding agent pipeline. Wire PlanGovernance approval flow and CodeTaskDelegator into orchestrator. Add pending_approval status to CodingResult for high-risk plan gating. --- src/debug/jtag/browser/generated.ts | 8 +- src/debug/jtag/commands/code/task/.npmignore | 20 ++ src/debug/jtag/commands/code/task/README.md | 200 ++++++++++++++ .../task/browser/CodeTaskBrowserCommand.ts | 21 ++ .../jtag/commands/code/task/package.json | 35 +++ .../code/task/server/CodeTaskServerCommand.ts | 115 ++++++++ .../code/task/shared/CodeTaskTypes.ts | 184 +++++++++++++ .../integration/CodeTaskIntegration.test.ts | 196 +++++++++++++ .../task/test/unit/CodeTaskCommand.test.ts | 259 ++++++++++++++++++ src/debug/jtag/generated-command-schemas.json | 48 +++- src/debug/jtag/generator/specs/code-task.json | 150 ++++++++++ src/debug/jtag/server/generated.ts | 8 +- .../shared/generated-command-constants.ts | 1 + .../code/server/CodeAgentOrchestrator.ts | 78 +++++- .../jtag/system/code/shared/CodingTypes.ts | 12 +- 15 files changed, 1320 insertions(+), 15 deletions(-) create mode 100644 src/debug/jtag/commands/code/task/.npmignore create mode 100644 src/debug/jtag/commands/code/task/README.md create mode 100644 src/debug/jtag/commands/code/task/browser/CodeTaskBrowserCommand.ts create mode 100644 src/debug/jtag/commands/code/task/package.json create mode 100644 src/debug/jtag/commands/code/task/server/CodeTaskServerCommand.ts create mode 100644 src/debug/jtag/commands/code/task/shared/CodeTaskTypes.ts create mode 100644 src/debug/jtag/commands/code/task/test/integration/CodeTaskIntegration.test.ts create mode 100644 src/debug/jtag/commands/code/task/test/unit/CodeTaskCommand.test.ts create mode 100644 src/debug/jtag/generator/specs/code-task.json diff --git a/src/debug/jtag/browser/generated.ts b/src/debug/jtag/browser/generated.ts index cffcb66ff..254e61cd7 100644 --- a/src/debug/jtag/browser/generated.ts +++ b/src/debug/jtag/browser/generated.ts @@ -1,7 +1,7 @@ /** * Browser Structure Registry - Auto-generated * - * Contains 11 daemons and 179 commands and 2 adapters and 27 widgets. + * Contains 11 daemons and 180 commands and 2 adapters and 27 widgets. * Generated by scripts/generate-structure.ts - DO NOT EDIT MANUALLY */ @@ -48,6 +48,7 @@ import { CodeEditBrowserCommand } from './../commands/code/edit/browser/CodeEdit import { CodeHistoryBrowserCommand } from './../commands/code/history/browser/CodeHistoryBrowserCommand'; import { CodeReadBrowserCommand } from './../commands/code/read/browser/CodeReadBrowserCommand'; import { CodeSearchBrowserCommand } from './../commands/code/search/browser/CodeSearchBrowserCommand'; +import { CodeTaskBrowserCommand } from './../commands/code/task/browser/CodeTaskBrowserCommand'; import { CodeTreeBrowserCommand } from './../commands/code/tree/browser/CodeTreeBrowserCommand'; import { CodeUndoBrowserCommand } from './../commands/code/undo/browser/CodeUndoBrowserCommand'; import { CodeWriteBrowserCommand } from './../commands/code/write/browser/CodeWriteBrowserCommand'; @@ -445,6 +446,11 @@ export const BROWSER_COMMANDS: CommandEntry[] = [ className: 'CodeSearchBrowserCommand', commandClass: CodeSearchBrowserCommand }, +{ + name: 'code/task', + className: 'CodeTaskBrowserCommand', + commandClass: CodeTaskBrowserCommand + }, { name: 'code/tree', className: 'CodeTreeBrowserCommand', diff --git a/src/debug/jtag/commands/code/task/.npmignore b/src/debug/jtag/commands/code/task/.npmignore new file mode 100644 index 000000000..f74ad6b8a --- /dev/null +++ b/src/debug/jtag/commands/code/task/.npmignore @@ -0,0 +1,20 @@ +# Development files +.eslintrc* +tsconfig*.json +vitest.config.ts + +# Build artifacts +*.js.map +*.d.ts.map + +# IDE +.vscode/ +.idea/ + +# Logs +*.log +npm-debug.log* + +# OS files +.DS_Store +Thumbs.db diff --git a/src/debug/jtag/commands/code/task/README.md b/src/debug/jtag/commands/code/task/README.md new file mode 100644 index 000000000..1c5d2228b --- /dev/null +++ b/src/debug/jtag/commands/code/task/README.md @@ -0,0 +1,200 @@ +# Code Task Command + +Execute a coding task end-to-end via the coding agent pipeline. Formulates a plan using LLM reasoning, enforces security tiers, and executes steps via code/* commands. Supports dry-run mode, governance approval for high-risk plans, and multi-agent delegation. + +## Table of Contents + +- [Usage](#usage) + - [CLI Usage](#cli-usage) + - [Tool Usage](#tool-usage) +- [Parameters](#parameters) +- [Result](#result) +- [Examples](#examples) +- [Testing](#testing) + - [Unit Tests](#unit-tests) + - [Integration Tests](#integration-tests) +- [Getting Help](#getting-help) +- [Access Level](#access-level) +- [Implementation Notes](#implementation-notes) + +## Usage + +### CLI Usage + +From the command line using the jtag CLI: + +```bash +./jtag code/task --description= +``` + +### Tool Usage + +From Persona tools or programmatic access using `Commands.execute()`: + +```typescript +import { Commands } from '@system/core/shared/Commands'; + +const result = await Commands.execute('code/task', { + // your parameters here +}); +``` + +## Parameters + +- **description** (required): `string` - What the coding task should accomplish (natural language) +- **taskType** (optional): `string` - Task type for model selection: 'planning' | 'generation' | 'editing' | 'review' | 'quick-fix' | 'discovery'. Defaults to 'generation' +- **relevantFiles** (optional): `string[]` - File paths already known to be relevant (hints for discovery phase) +- **dryRun** (optional): `boolean` - Execute read-only commands normally but mock writes. Returns predicted changes without modifying files +- **securityTier** (optional): `string` - Override security tier: 'discovery' | 'read' | 'write' | 'system'. Defaults to plan's assessed risk level +- **delegationEnabled** (optional): `boolean` - Enable multi-agent delegation for parallel execution across file clusters +- **maxDurationMs** (optional): `number` - Maximum execution time in milliseconds (default: 120000) +- **maxToolCalls** (optional): `number` - Maximum number of tool calls allowed (default: 15) + +## Result + +Returns `CodeTaskResult` with: + +Returns CommandResult with: +- **status**: `string` - Overall status: 'completed' | 'partial' | 'failed' | 'budget_exceeded' | 'pending_approval' +- **summary**: `string` - Human-readable summary of what was accomplished +- **planSummary**: `string` - The LLM-generated plan summary +- **riskLevel**: `string` - Assessed risk level: 'low' | 'medium' | 'high' | 'critical' +- **securityTier**: `string` - Security tier used for execution +- **stepsTotal**: `number` - Total number of steps in the plan +- **stepsCompleted**: `number` - Number of steps that completed successfully +- **filesModified**: `string[]` - Files that were modified during execution +- **filesCreated**: `string[]` - Files that were created during execution +- **totalToolCalls**: `number` - Total tool calls used +- **totalDurationMs**: `number` - Total execution time in milliseconds +- **changeIds**: `string[]` - Change IDs from file operations (for potential undo) +- **errors**: `string[]` - Errors encountered during execution +- **proposalId**: `string` - Governance proposal ID if plan requires approval (status='pending_approval') + +## Examples + +### Simple code edit task + +```bash +./jtag code/task --description="Add input validation to the login function in auth.ts" +``` + +**Expected result:** +{ status: "completed", stepsCompleted: 3, filesModified: ["auth.ts"] } + +### Dry run to preview changes + +```bash +./jtag code/task --description="Refactor UserService to use dependency injection" --dryRun=true +``` + +**Expected result:** +{ status: "completed", filesModified: [], summary: "Dry run: would modify 3 files" } + +### Discovery-only task + +```bash +./jtag code/task --description="Find all files using deprecated API" --taskType="discovery" --securityTier="discovery" +``` + +**Expected result:** +{ status: "completed", stepsCompleted: 2, filesModified: [] } + +### With relevant file hints + +```bash +./jtag code/task --description="Fix the off-by-one error" --relevantFiles='["src/utils/pagination.ts"]' +``` + +**Expected result:** +{ status: "completed", filesModified: ["src/utils/pagination.ts"] } + +## Getting Help + +### Using the Help Tool + +Get detailed usage information for this command: + +**CLI:** +```bash +./jtag help code/task +``` + +**Tool:** +```typescript +// Use your help tool with command name 'code/task' +``` + +### Using the README Tool + +Access this README programmatically: + +**CLI:** +```bash +./jtag readme code/task +``` + +**Tool:** +```typescript +// Use your readme tool with command name 'code/task' +``` + +## Testing + +### Unit Tests + +Test command logic in isolation using mock dependencies: + +```bash +# Run unit tests (no server required) +npx tsx commands/Code Task/test/unit/CodeTaskCommand.test.ts +``` + +**What's tested:** +- Command structure and parameter validation +- Mock command execution patterns +- Required parameter validation (throws ValidationError) +- Optional parameter handling (sensible defaults) +- Performance requirements +- Assertion utility helpers + +**TDD Workflow:** +1. Write/modify unit test first (test-driven development) +2. Run test, see it fail +3. Implement feature +4. Run test, see it pass +5. Refactor if needed + +### Integration Tests + +Test command with real client connections and system integration: + +```bash +# Prerequisites: Server must be running +npm start # Wait 90+ seconds for deployment + +# Run integration tests +npx tsx commands/Code Task/test/integration/CodeTaskIntegration.test.ts +``` + +**What's tested:** +- Client connection to live system +- Real command execution via WebSocket +- ValidationError handling for missing params +- Optional parameter defaults +- Performance under load +- Various parameter combinations + +**Best Practice:** +Run unit tests frequently during development (fast feedback). Run integration tests before committing (verify system integration). + +## Access Level + +**ai-safe** - Safe for AI personas to call autonomously + +## Implementation Notes + +- **Shared Logic**: Core business logic in `shared/CodeTaskTypes.ts` +- **Browser**: Browser-specific implementation in `browser/CodeTaskBrowserCommand.ts` +- **Server**: Server-specific implementation in `server/CodeTaskServerCommand.ts` +- **Unit Tests**: Isolated testing in `test/unit/CodeTaskCommand.test.ts` +- **Integration Tests**: System testing in `test/integration/CodeTaskIntegration.test.ts` diff --git a/src/debug/jtag/commands/code/task/browser/CodeTaskBrowserCommand.ts b/src/debug/jtag/commands/code/task/browser/CodeTaskBrowserCommand.ts new file mode 100644 index 000000000..839c5eb8f --- /dev/null +++ b/src/debug/jtag/commands/code/task/browser/CodeTaskBrowserCommand.ts @@ -0,0 +1,21 @@ +/** + * Code Task Command - Browser Implementation + * + * Execute a coding task end-to-end via the coding agent pipeline. Formulates a plan using LLM reasoning, enforces security tiers, and executes steps via code/* commands. Supports dry-run mode, governance approval for high-risk plans, and multi-agent delegation. + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import type { CodeTaskParams, CodeTaskResult } from '../shared/CodeTaskTypes'; + +export class CodeTaskBrowserCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('code/task', context, subpath, commander); + } + + async execute(params: CodeTaskParams): Promise { + console.log('🌐 BROWSER: Delegating Code Task to server'); + return await this.remoteExecute(params); + } +} diff --git a/src/debug/jtag/commands/code/task/package.json b/src/debug/jtag/commands/code/task/package.json new file mode 100644 index 000000000..4a3f54659 --- /dev/null +++ b/src/debug/jtag/commands/code/task/package.json @@ -0,0 +1,35 @@ +{ + "name": "@jtag-commands/code/task", + "version": "1.0.0", + "description": "Execute a coding task end-to-end via the coding agent pipeline. Formulates a plan using LLM reasoning, enforces security tiers, and executes steps via code/* commands. Supports dry-run mode, governance approval for high-risk plans, and multi-agent delegation.", + "main": "server/CodeTaskServerCommand.ts", + "types": "shared/CodeTaskTypes.ts", + "scripts": { + "test": "npm run test:unit && npm run test:integration", + "test:unit": "npx vitest run test/unit/*.test.ts", + "test:integration": "npx tsx test/integration/CodeTaskIntegration.test.ts", + "lint": "npx eslint **/*.ts", + "typecheck": "npx tsc --noEmit" + }, + "peerDependencies": { + "@jtag/core": "*" + }, + "files": [ + "shared/**/*.ts", + "browser/**/*.ts", + "server/**/*.ts", + "test/**/*.ts", + "README.md" + ], + "keywords": [ + "jtag", + "command", + "code/task" + ], + "license": "MIT", + "author": "", + "repository": { + "type": "git", + "url": "" + } +} diff --git a/src/debug/jtag/commands/code/task/server/CodeTaskServerCommand.ts b/src/debug/jtag/commands/code/task/server/CodeTaskServerCommand.ts new file mode 100644 index 000000000..4c53d08f4 --- /dev/null +++ b/src/debug/jtag/commands/code/task/server/CodeTaskServerCommand.ts @@ -0,0 +1,115 @@ +/** + * Code Task Command - Server Implementation + * + * Entry point for the full coding agent pipeline: + * 1. Validates parameters + * 2. Builds a CodingTask + * 3. Invokes CodeAgentOrchestrator.execute() + * 4. Maps CodingResult β†’ CodeTaskResult + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import { ValidationError } from '@system/core/types/ErrorTypes'; +import type { CodeTaskParams, CodeTaskResult } from '../shared/CodeTaskTypes'; +import { createCodeTaskResultFromParams } from '../shared/CodeTaskTypes'; +import { CodeAgentOrchestrator } from '@system/code/server/CodeAgentOrchestrator'; +import type { CodingTask, CodingTaskType, SecurityTierLevel, ExecutionOptions } from '@system/code/shared/CodingTypes'; +import { v4 as uuidv4 } from 'uuid'; +import type { UUID } from '@system/core/types/CrossPlatformUUID'; + +const VALID_TASK_TYPES = new Set(['planning', 'generation', 'editing', 'review', 'quick-fix', 'discovery']); +const VALID_TIERS = new Set(['discovery', 'read', 'write', 'system']); + +export class CodeTaskServerCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('code/task', context, subpath, commander); + } + + async execute(params: CodeTaskParams): Promise { + // Validate required parameters + if (!params.description || params.description.trim() === '') { + throw new ValidationError( + 'description', + `Missing required parameter 'description'. Provide a natural language description of the coding task. See the code/task README for usage.` + ); + } + + if (!params.userId) { + throw new ValidationError( + 'userId', + 'Workspace operations require a userId (auto-injected for persona tool calls).' + ); + } + + // Validate optional enum parameters + const taskType: CodingTaskType = this.resolveTaskType(params.taskType); + const securityTierOverride = this.resolveSecurityTier(params.securityTier); + + // Build CodingTask + const task: CodingTask = { + id: uuidv4() as UUID, + personaId: params.userId as UUID, + description: params.description.trim(), + taskType, + contextId: params.sessionId as UUID | undefined, + relevantFiles: params.relevantFiles, + maxDurationMs: params.maxDurationMs || undefined, + maxToolCalls: params.maxToolCalls || undefined, + createdAt: Date.now(), + }; + + // Build execution options + const options: ExecutionOptions = { + dryRun: params.dryRun ?? false, + securityTier: securityTierOverride, + delegationEnabled: params.delegationEnabled ?? false, + }; + + // Execute via orchestrator + const orchestrator = new CodeAgentOrchestrator(); + const result = await orchestrator.execute(task, options); + + // Map CodingResult β†’ CodeTaskResult + return createCodeTaskResultFromParams(params, { + success: result.status === 'completed', + status: result.status, + summary: result.summary, + planSummary: result.planMetadata?.planSummary ?? result.summary, + riskLevel: result.planMetadata?.riskLevel ?? '', + securityTier: result.planMetadata?.requiredTier ?? securityTierOverride ?? '', + stepsTotal: result.stepResults.length, + stepsCompleted: result.stepResults.filter(s => s.status === 'completed').length, + filesModified: result.filesModified, + filesCreated: result.filesCreated, + totalToolCalls: result.totalToolCalls, + totalDurationMs: result.totalDurationMs, + changeIds: result.changeIds, + errors: result.errors, + proposalId: result.proposalId ?? '', + }); + } + + private resolveTaskType(raw?: string): CodingTaskType { + if (!raw || raw.trim() === '') return 'generation'; + if (!VALID_TASK_TYPES.has(raw)) { + throw new ValidationError( + 'taskType', + `Invalid taskType '${raw}'. Must be one of: ${Array.from(VALID_TASK_TYPES).join(', ')}` + ); + } + return raw as CodingTaskType; + } + + private resolveSecurityTier(raw?: string): SecurityTierLevel | undefined { + if (!raw || raw.trim() === '') return undefined; + if (!VALID_TIERS.has(raw)) { + throw new ValidationError( + 'securityTier', + `Invalid securityTier '${raw}'. Must be one of: ${Array.from(VALID_TIERS).join(', ')}` + ); + } + return raw as SecurityTierLevel; + } +} diff --git a/src/debug/jtag/commands/code/task/shared/CodeTaskTypes.ts b/src/debug/jtag/commands/code/task/shared/CodeTaskTypes.ts new file mode 100644 index 000000000..403995419 --- /dev/null +++ b/src/debug/jtag/commands/code/task/shared/CodeTaskTypes.ts @@ -0,0 +1,184 @@ +/** + * Code Task Command - Shared Types + * + * Execute a coding task end-to-end via the coding agent pipeline. Formulates a plan using LLM reasoning, enforces security tiers, and executes steps via code/* commands. Supports dry-run mode, governance approval for high-risk plans, and multi-agent delegation. + */ + +import type { CommandParams, CommandResult, CommandInput, JTAGContext } from '@system/core/types/JTAGTypes'; +import { createPayload, transformPayload } from '@system/core/types/JTAGTypes'; +import { Commands } from '@system/core/shared/Commands'; +import type { JTAGError } from '@system/core/types/ErrorTypes'; +import type { UUID } from '@system/core/types/CrossPlatformUUID'; + +/** + * Code Task Command Parameters + */ +export interface CodeTaskParams extends CommandParams { + // What the coding task should accomplish (natural language) + description: string; + // Task type for model selection: 'planning' | 'generation' | 'editing' | 'review' | 'quick-fix' | 'discovery'. Defaults to 'generation' + taskType?: string; + // File paths already known to be relevant (hints for discovery phase) + relevantFiles?: string[]; + // Execute read-only commands normally but mock writes. Returns predicted changes without modifying files + dryRun?: boolean; + // Override security tier: 'discovery' | 'read' | 'write' | 'system'. Defaults to plan's assessed risk level + securityTier?: string; + // Enable multi-agent delegation for parallel execution across file clusters + delegationEnabled?: boolean; + // Maximum execution time in milliseconds (default: 120000) + maxDurationMs?: number; + // Maximum number of tool calls allowed (default: 15) + maxToolCalls?: number; +} + +/** + * Factory function for creating CodeTaskParams + */ +export const createCodeTaskParams = ( + context: JTAGContext, + sessionId: UUID, + data: { + // What the coding task should accomplish (natural language) + description: string; + // Task type for model selection: 'planning' | 'generation' | 'editing' | 'review' | 'quick-fix' | 'discovery'. Defaults to 'generation' + taskType?: string; + // File paths already known to be relevant (hints for discovery phase) + relevantFiles?: string[]; + // Execute read-only commands normally but mock writes. Returns predicted changes without modifying files + dryRun?: boolean; + // Override security tier: 'discovery' | 'read' | 'write' | 'system'. Defaults to plan's assessed risk level + securityTier?: string; + // Enable multi-agent delegation for parallel execution across file clusters + delegationEnabled?: boolean; + // Maximum execution time in milliseconds (default: 120000) + maxDurationMs?: number; + // Maximum number of tool calls allowed (default: 15) + maxToolCalls?: number; + } +): CodeTaskParams => createPayload(context, sessionId, { + taskType: data.taskType ?? '', + relevantFiles: data.relevantFiles ?? undefined, + dryRun: data.dryRun ?? false, + securityTier: data.securityTier ?? '', + delegationEnabled: data.delegationEnabled ?? false, + maxDurationMs: data.maxDurationMs ?? 0, + maxToolCalls: data.maxToolCalls ?? 0, + ...data +}); + +/** + * Code Task Command Result + */ +export interface CodeTaskResult extends CommandResult { + success: boolean; + // Overall status: 'completed' | 'partial' | 'failed' | 'budget_exceeded' | 'pending_approval' + status: string; + // Human-readable summary of what was accomplished + summary: string; + // The LLM-generated plan summary + planSummary: string; + // Assessed risk level: 'low' | 'medium' | 'high' | 'critical' + riskLevel: string; + // Security tier used for execution + securityTier: string; + // Total number of steps in the plan + stepsTotal: number; + // Number of steps that completed successfully + stepsCompleted: number; + // Files that were modified during execution + filesModified: string[]; + // Files that were created during execution + filesCreated: string[]; + // Total tool calls used + totalToolCalls: number; + // Total execution time in milliseconds + totalDurationMs: number; + // Change IDs from file operations (for potential undo) + changeIds: string[]; + // Errors encountered during execution + errors: string[]; + // Governance proposal ID if plan requires approval (status='pending_approval') + proposalId: string; + error?: JTAGError; +} + +/** + * Factory function for creating CodeTaskResult with defaults + */ +export const createCodeTaskResult = ( + context: JTAGContext, + sessionId: UUID, + data: { + success: boolean; + // Overall status: 'completed' | 'partial' | 'failed' | 'budget_exceeded' | 'pending_approval' + status?: string; + // Human-readable summary of what was accomplished + summary?: string; + // The LLM-generated plan summary + planSummary?: string; + // Assessed risk level: 'low' | 'medium' | 'high' | 'critical' + riskLevel?: string; + // Security tier used for execution + securityTier?: string; + // Total number of steps in the plan + stepsTotal?: number; + // Number of steps that completed successfully + stepsCompleted?: number; + // Files that were modified during execution + filesModified?: string[]; + // Files that were created during execution + filesCreated?: string[]; + // Total tool calls used + totalToolCalls?: number; + // Total execution time in milliseconds + totalDurationMs?: number; + // Change IDs from file operations (for potential undo) + changeIds?: string[]; + // Errors encountered during execution + errors?: string[]; + // Governance proposal ID if plan requires approval (status='pending_approval') + proposalId?: string; + error?: JTAGError; + } +): CodeTaskResult => createPayload(context, sessionId, { + status: data.status ?? '', + summary: data.summary ?? '', + planSummary: data.planSummary ?? '', + riskLevel: data.riskLevel ?? '', + securityTier: data.securityTier ?? '', + stepsTotal: data.stepsTotal ?? 0, + stepsCompleted: data.stepsCompleted ?? 0, + filesModified: data.filesModified ?? [], + filesCreated: data.filesCreated ?? [], + totalToolCalls: data.totalToolCalls ?? 0, + totalDurationMs: data.totalDurationMs ?? 0, + changeIds: data.changeIds ?? [], + errors: data.errors ?? [], + proposalId: data.proposalId ?? '', + ...data +}); + +/** + * Smart Code Task-specific inheritance from params + * Auto-inherits context and sessionId from params + * Must provide all required result fields + */ +export const createCodeTaskResultFromParams = ( + params: CodeTaskParams, + differences: Omit +): CodeTaskResult => transformPayload(params, differences); + +/** + * Code Task β€” Type-safe command executor + * + * Usage: + * import { CodeTask } from '...shared/CodeTaskTypes'; + * const result = await CodeTask.execute({ ... }); + */ +export const CodeTask = { + execute(params: CommandInput): Promise { + return Commands.execute('code/task', params as Partial); + }, + commandName: 'code/task' as const, +} as const; diff --git a/src/debug/jtag/commands/code/task/test/integration/CodeTaskIntegration.test.ts b/src/debug/jtag/commands/code/task/test/integration/CodeTaskIntegration.test.ts new file mode 100644 index 000000000..863ca708d --- /dev/null +++ b/src/debug/jtag/commands/code/task/test/integration/CodeTaskIntegration.test.ts @@ -0,0 +1,196 @@ +#!/usr/bin/env tsx +/** + * CodeTask Command Integration Tests + * + * Tests Code Task command against the LIVE RUNNING SYSTEM. + * This is NOT a mock test - it tests real commands, real events, real widgets. + * + * Generated by: ./jtag generate + * Run with: npx tsx commands/Code Task/test/integration/CodeTaskIntegration.test.ts + * + * PREREQUISITES: + * - Server must be running: npm start (wait 90+ seconds) + * - Browser client connected via http://localhost:9003 + */ + +import { jtag } from '@server/server-index'; + +console.log('πŸ§ͺ CodeTask Command Integration Tests'); + +function assert(condition: boolean, message: string): void { + if (!condition) { + throw new Error(`❌ Assertion failed: ${message}`); + } + console.log(`βœ… ${message}`); +} + +/** + * Test 1: Connect to live system + */ +async function testSystemConnection(): Promise>> { + console.log('\nπŸ”Œ Test 1: Connecting to live JTAG system'); + + const client = await jtag.connect(); + + assert(client !== null, 'Connected to live system'); + console.log(' βœ… Connected successfully'); + + return client; +} + +/** + * Test 2: Execute Code Task command on live system + */ +async function testCommandExecution(client: Awaited>): Promise { + console.log('\n⚑ Test 2: Executing Code Task command'); + + // TODO: Replace with your actual command parameters + const result = await client.commands['Code Task']({ + // Add your required parameters here + // Example: name: 'test-value' + }); + + console.log(' πŸ“Š Result:', JSON.stringify(result, null, 2)); + + assert(result !== null, 'Code Task returned result'); + // TODO: Add assertions for your specific result fields + // assert(result.success === true, 'Code Task succeeded'); + // assert(result.yourField !== undefined, 'Result has yourField'); +} + +/** + * Test 3: Validate required parameters + */ +async function testRequiredParameters(_client: Awaited>): Promise { + console.log('\n🚨 Test 3: Testing required parameter validation'); + + // TODO: Uncomment and test missing required parameters + // try { + // await _client.commands['Code Task']({ + // // Missing required param + // }); + // assert(false, 'Should have thrown validation error'); + // } catch (error) { + // assert((error as Error).message.includes('required'), 'Error mentions required parameter'); + // console.log(' βœ… ValidationError thrown correctly'); + // } + + console.log(' ⚠️ TODO: Add required parameter validation test'); +} + +/** + * Test 4: Test optional parameters + */ +async function testOptionalParameters(_client: Awaited>): Promise { + console.log('\nπŸ”§ Test 4: Testing optional parameters'); + + // TODO: Uncomment to test with and without optional parameters + // const withOptional = await client.commands['Code Task']({ + // requiredParam: 'test', + // optionalParam: true + // }); + // + // const withoutOptional = await client.commands['Code Task']({ + // requiredParam: 'test' + // }); + // + // assert(withOptional.success === true, 'Works with optional params'); + // assert(withoutOptional.success === true, 'Works without optional params'); + + console.log(' ⚠️ TODO: Add optional parameter tests'); +} + +/** + * Test 5: Performance test + */ +async function testPerformance(_client: Awaited>): Promise { + console.log('\n⚑ Test 5: Performance under load'); + + // TODO: Uncomment to test command performance + // const iterations = 10; + // const times: number[] = []; + // + // for (let i = 0; i < iterations; i++) { + // const start = Date.now(); + // await _client.commands['Code Task']({ /* params */ }); + // times.push(Date.now() - start); + // } + // + // const avg = times.reduce((a, b) => a + b, 0) / iterations; + // const max = Math.max(...times); + // + // console.log(` Average: ${avg.toFixed(2)}ms`); + // console.log(` Max: ${max}ms`); + // + // assert(avg < 500, `Average ${avg.toFixed(2)}ms under 500ms`); + // assert(max < 1000, `Max ${max}ms under 1000ms`); + + console.log(' ⚠️ TODO: Add performance test'); +} + +/** + * Test 6: Widget/Event integration (if applicable) + */ +async function testWidgetIntegration(_client: Awaited>): Promise { + console.log('\n🎨 Test 6: Widget/Event integration'); + + // TODO: Uncomment if your command emits events or updates widgets + // Example: + // const before = await client.commands['debug/widget-state']({ widgetSelector: 'your-widget' }); + // await client.commands['Code Task']({ /* params */ }); + // await new Promise(resolve => setTimeout(resolve, 1000)); // Wait for event propagation + // const after = await client.commands['debug/widget-state']({ widgetSelector: 'your-widget' }); + // + // assert(after.state.someValue !== before.state.someValue, 'Widget state updated'); + + console.log(' ⚠️ TODO: Add widget/event integration test (if applicable)'); +} + +/** + * Run all integration tests + */ +async function runAllCodeTaskIntegrationTests(): Promise { + console.log('πŸš€ Starting CodeTask Integration Tests\n'); + console.log('πŸ“‹ Testing against LIVE system (not mocks)\n'); + + try { + const client = await testSystemConnection(); + await testCommandExecution(client); + await testRequiredParameters(client); + await testOptionalParameters(client); + await testPerformance(client); + await testWidgetIntegration(client); + + console.log('\nπŸŽ‰ ALL CodeTask INTEGRATION TESTS PASSED!'); + console.log('πŸ“‹ Validated:'); + console.log(' βœ… Live system connection'); + console.log(' βœ… Command execution on real system'); + console.log(' βœ… Parameter validation'); + console.log(' βœ… Optional parameter handling'); + console.log(' βœ… Performance benchmarks'); + console.log(' βœ… Widget/Event integration'); + console.log('\nπŸ’‘ NOTE: This test uses the REAL running system'); + console.log(' - Real database operations'); + console.log(' - Real event propagation'); + console.log(' - Real widget updates'); + console.log(' - Real cross-daemon communication'); + + } catch (error) { + console.error('\n❌ CodeTask integration tests failed:', (error as Error).message); + if ((error as Error).stack) { + console.error((error as Error).stack); + } + console.error('\nπŸ’‘ Make sure:'); + console.error(' 1. Server is running: npm start'); + console.error(' 2. Wait 90+ seconds for deployment'); + console.error(' 3. Browser is connected to http://localhost:9003'); + process.exit(1); + } +} + +// Run if called directly +if (require.main === module) { + void runAllCodeTaskIntegrationTests(); +} else { + module.exports = { runAllCodeTaskIntegrationTests }; +} diff --git a/src/debug/jtag/commands/code/task/test/unit/CodeTaskCommand.test.ts b/src/debug/jtag/commands/code/task/test/unit/CodeTaskCommand.test.ts new file mode 100644 index 000000000..0011dabe6 --- /dev/null +++ b/src/debug/jtag/commands/code/task/test/unit/CodeTaskCommand.test.ts @@ -0,0 +1,259 @@ +#!/usr/bin/env tsx +/** + * CodeTask Command Unit Tests + * + * Tests Code Task command logic in isolation using mock dependencies. + * This is a REFERENCE EXAMPLE showing best practices for command testing. + * + * Generated by: ./jtag generate + * Run with: npx tsx commands/Code Task/test/unit/CodeTaskCommand.test.ts + * + * NOTE: This is a self-contained test (no external test utilities needed). + * Use this as a template for your own command tests. + */ + +// import { ValidationError } from '@system/core/types/ErrorTypes'; // Uncomment when adding validation tests +import { generateUUID } from '@system/core/types/CrossPlatformUUID'; +import type { CodeTaskParams, CodeTaskResult } from '../../shared/CodeTaskTypes'; + +console.log('πŸ§ͺ CodeTask Command Unit Tests'); + +function assert(condition: boolean, message: string): void { + if (!condition) { + throw new Error(`❌ Assertion failed: ${message}`); + } + console.log(`βœ… ${message}`); +} + +/** + * Mock command that implements Code Task logic for testing + */ +async function mockCodeTaskCommand(params: CodeTaskParams): Promise { + // TODO: Validate required parameters (BEST PRACTICE) + // Example: + // if (!params.requiredParam || params.requiredParam.trim() === '') { + // throw new ValidationError( + // 'requiredParam', + // `Missing required parameter 'requiredParam'. ` + + // `Use the help tool with 'Code Task' or see the Code Task README for usage information.` + // ); + // } + + // TODO: Handle optional parameters with sensible defaults + // const optionalParam = params.optionalParam ?? defaultValue; + + // TODO: Implement your command logic here + return { + success: true, + // TODO: Add your result fields with actual computed values + context: params.context, + sessionId: params.sessionId + } as CodeTaskResult; +} + +/** + * Test 1: Command structure validation + */ +function testCodeTaskCommandStructure(): void { + console.log('\nπŸ“‹ Test 1: CodeTask command structure validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Create valid params for Code Task command + const validParams: CodeTaskParams = { + // TODO: Add your required parameters here + context, + sessionId + }; + + // Validate param structure + assert(validParams.context !== undefined, 'Params have context'); + assert(validParams.sessionId !== undefined, 'Params have sessionId'); + // TODO: Add assertions for your specific parameters + // assert(typeof validParams.requiredParam === 'string', 'requiredParam is string'); +} + +/** + * Test 2: Mock command execution + */ +async function testMockCodeTaskExecution(): Promise { + console.log('\n⚑ Test 2: Mock Code Task command execution'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Test mock execution + const params: CodeTaskParams = { + // TODO: Add your parameters here + context, + sessionId + }; + + const result = await mockCodeTaskCommand(params); + + // Validate result structure + assert(result.success === true, 'Mock result shows success'); + // TODO: Add assertions for your result fields + // assert(typeof result.yourField === 'string', 'yourField is string'); +} + +/** + * Test 3: Required parameter validation (CRITICAL) + * + * This test ensures your command throws ValidationError + * when required parameters are missing (BEST PRACTICE) + */ +async function testCodeTaskRequiredParams(): Promise { + console.log('\n🚨 Test 3: Required parameter validation'); + + // TODO: Uncomment when implementing validation + // const context = { environment: 'server' as const }; + // const sessionId = generateUUID(); + + // TODO: Test cases that should throw ValidationError + // Example: + // const testCases = [ + // { params: {} as CodeTaskParams, desc: 'Missing requiredParam' }, + // { params: { requiredParam: '' } as CodeTaskParams, desc: 'Empty requiredParam' }, + // ]; + // + // for (const testCase of testCases) { + // try { + // await mockCodeTaskCommand({ ...testCase.params, context, sessionId }); + // throw new Error(`Should have thrown ValidationError for: ${testCase.desc}`); + // } catch (error) { + // if (error instanceof ValidationError) { + // assert(error.field === 'requiredParam', `ValidationError field is 'requiredParam' for: ${testCase.desc}`); + // assert(error.message.includes('required parameter'), `Error message mentions 'required parameter' for: ${testCase.desc}`); + // assert(error.message.includes('help tool'), `Error message is tool-agnostic for: ${testCase.desc}`); + // } else { + // throw error; // Re-throw if not ValidationError + // } + // } + // } + + console.log('βœ… All required parameter validations work correctly'); +} + +/** + * Test 4: Optional parameter handling + */ +async function testCodeTaskOptionalParams(): Promise { + console.log('\nπŸ”§ Test 4: Optional parameter handling'); + + // TODO: Uncomment when implementing optional param tests + // const context = { environment: 'server' as const }; + // const sessionId = generateUUID(); + + // TODO: Test WITHOUT optional param (should use default) + // const paramsWithoutOptional: CodeTaskParams = { + // requiredParam: 'test', + // context, + // sessionId + // }; + // + // const resultWithoutOptional = await mockCodeTaskCommand(paramsWithoutOptional); + // assert(resultWithoutOptional.success === true, 'Command succeeds without optional params'); + + // TODO: Test WITH optional param + // const paramsWithOptional: CodeTaskParams = { + // requiredParam: 'test', + // optionalParam: true, + // context, + // sessionId + // }; + // + // const resultWithOptional = await mockCodeTaskCommand(paramsWithOptional); + // assert(resultWithOptional.success === true, 'Command succeeds with optional params'); + + console.log('βœ… Optional parameter handling validated'); +} + +/** + * Test 5: Performance validation + */ +async function testCodeTaskPerformance(): Promise { + console.log('\n⚑ Test 5: CodeTask performance validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + const startTime = Date.now(); + + await mockCodeTaskCommand({ + // TODO: Add your parameters + context, + sessionId + } as CodeTaskParams); + + const executionTime = Date.now() - startTime; + + assert(executionTime < 100, `CodeTask completed in ${executionTime}ms (under 100ms limit)`); +} + +/** + * Test 6: Result structure validation + */ +async function testCodeTaskResultStructure(): Promise { + console.log('\nπŸ” Test 6: CodeTask result structure validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Test various scenarios + const basicResult = await mockCodeTaskCommand({ + // TODO: Add your parameters + context, + sessionId + } as CodeTaskParams); + + assert(basicResult.success === true, 'Result has success field'); + // TODO: Add assertions for your result fields + // assert(typeof basicResult.yourField === 'string', 'Result has yourField (string)'); + assert(basicResult.context === context, 'Result includes context'); + assert(basicResult.sessionId === sessionId, 'Result includes sessionId'); + + console.log('βœ… All result structure validations pass'); +} + +/** + * Run all unit tests + */ +async function runAllCodeTaskUnitTests(): Promise { + console.log('πŸš€ Starting CodeTask Command Unit Tests\n'); + + try { + testCodeTaskCommandStructure(); + await testMockCodeTaskExecution(); + await testCodeTaskRequiredParams(); + await testCodeTaskOptionalParams(); + await testCodeTaskPerformance(); + await testCodeTaskResultStructure(); + + console.log('\nπŸŽ‰ ALL CodeTask UNIT TESTS PASSED!'); + console.log('πŸ“‹ Validated:'); + console.log(' βœ… Command structure and parameter validation'); + console.log(' βœ… Mock command execution patterns'); + console.log(' βœ… Required parameter validation (throws ValidationError)'); + console.log(' βœ… Optional parameter handling (sensible defaults)'); + console.log(' βœ… Performance requirements (< 100ms)'); + console.log(' βœ… Result structure validation'); + console.log('\nπŸ“ This is a REFERENCE EXAMPLE - use as a template for your commands!'); + console.log('πŸ’‘ TIP: Copy this test structure and modify for your command logic'); + + } catch (error) { + console.error('\n❌ CodeTask unit tests failed:', (error as Error).message); + if ((error as Error).stack) { + console.error((error as Error).stack); + } + process.exit(1); + } +} + +// Run if called directly +if (require.main === module) { + void runAllCodeTaskUnitTests(); +} else { + module.exports = { runAllCodeTaskUnitTests }; +} diff --git a/src/debug/jtag/generated-command-schemas.json b/src/debug/jtag/generated-command-schemas.json index 0298a0e92..a88f6b103 100644 --- a/src/debug/jtag/generated-command-schemas.json +++ b/src/debug/jtag/generated-command-schemas.json @@ -1,5 +1,5 @@ { - "generated": "2026-02-02T03:02:21.661Z", + "generated": "2026-02-02T04:27:03.817Z", "version": "1.0.0", "commands": [ { @@ -5266,6 +5266,52 @@ } } }, + { + "name": "code/task", + "description": "Code Task Command - Shared Types\n *\n * Execute a coding task end-to-end via the coding agent pipeline. Formulates a plan using LLM reasoning, enforces security tiers, and executes steps via code/* commands. Supports dry-run mode, governance approval for high-risk plans, and multi-agent delegation.", + "params": { + "description": { + "type": "string", + "required": true, + "description": "description parameter" + }, + "taskType": { + "type": "string", + "required": false, + "description": "taskType parameter" + }, + "relevantFiles": { + "type": "array", + "required": false, + "description": "relevantFiles parameter" + }, + "dryRun": { + "type": "boolean", + "required": false, + "description": "dryRun parameter" + }, + "securityTier": { + "type": "string", + "required": false, + "description": "securityTier parameter" + }, + "delegationEnabled": { + "type": "boolean", + "required": false, + "description": "delegationEnabled parameter" + }, + "maxDurationMs": { + "type": "number", + "required": false, + "description": "maxDurationMs parameter" + }, + "maxToolCalls": { + "type": "number", + "required": false, + "description": "maxToolCalls parameter" + } + } + }, { "name": "code/search", "description": "Code Search Command - Shared Types\n *\n * Search for a regex pattern across workspace files. Respects .gitignore, supports glob-based file filtering. Returns matching lines with context.", diff --git a/src/debug/jtag/generator/specs/code-task.json b/src/debug/jtag/generator/specs/code-task.json new file mode 100644 index 000000000..a477fe58f --- /dev/null +++ b/src/debug/jtag/generator/specs/code-task.json @@ -0,0 +1,150 @@ +{ + "name": "code/task", + "description": "Execute a coding task end-to-end via the coding agent pipeline. Formulates a plan using LLM reasoning, enforces security tiers, and executes steps via code/* commands. Supports dry-run mode, governance approval for high-risk plans, and multi-agent delegation.", + "params": [ + { + "name": "description", + "type": "string", + "optional": false, + "description": "What the coding task should accomplish (natural language)" + }, + { + "name": "taskType", + "type": "string", + "optional": true, + "description": "Task type for model selection: 'planning' | 'generation' | 'editing' | 'review' | 'quick-fix' | 'discovery'. Defaults to 'generation'" + }, + { + "name": "relevantFiles", + "type": "string[]", + "optional": true, + "description": "File paths already known to be relevant (hints for discovery phase)" + }, + { + "name": "dryRun", + "type": "boolean", + "optional": true, + "description": "Execute read-only commands normally but mock writes. Returns predicted changes without modifying files" + }, + { + "name": "securityTier", + "type": "string", + "optional": true, + "description": "Override security tier: 'discovery' | 'read' | 'write' | 'system'. Defaults to plan's assessed risk level" + }, + { + "name": "delegationEnabled", + "type": "boolean", + "optional": true, + "description": "Enable multi-agent delegation for parallel execution across file clusters" + }, + { + "name": "maxDurationMs", + "type": "number", + "optional": true, + "description": "Maximum execution time in milliseconds (default: 120000)" + }, + { + "name": "maxToolCalls", + "type": "number", + "optional": true, + "description": "Maximum number of tool calls allowed (default: 15)" + } + ], + "results": [ + { + "name": "status", + "type": "string", + "description": "Overall status: 'completed' | 'partial' | 'failed' | 'budget_exceeded' | 'pending_approval'" + }, + { + "name": "summary", + "type": "string", + "description": "Human-readable summary of what was accomplished" + }, + { + "name": "planSummary", + "type": "string", + "description": "The LLM-generated plan summary" + }, + { + "name": "riskLevel", + "type": "string", + "description": "Assessed risk level: 'low' | 'medium' | 'high' | 'critical'" + }, + { + "name": "securityTier", + "type": "string", + "description": "Security tier used for execution" + }, + { + "name": "stepsTotal", + "type": "number", + "description": "Total number of steps in the plan" + }, + { + "name": "stepsCompleted", + "type": "number", + "description": "Number of steps that completed successfully" + }, + { + "name": "filesModified", + "type": "string[]", + "description": "Files that were modified during execution" + }, + { + "name": "filesCreated", + "type": "string[]", + "description": "Files that were created during execution" + }, + { + "name": "totalToolCalls", + "type": "number", + "description": "Total tool calls used" + }, + { + "name": "totalDurationMs", + "type": "number", + "description": "Total execution time in milliseconds" + }, + { + "name": "changeIds", + "type": "string[]", + "description": "Change IDs from file operations (for potential undo)" + }, + { + "name": "errors", + "type": "string[]", + "description": "Errors encountered during execution" + }, + { + "name": "proposalId", + "type": "string", + "description": "Governance proposal ID if plan requires approval (status='pending_approval')" + } + ], + "examples": [ + { + "description": "Simple code edit task", + "command": "./jtag code/task --description=\"Add input validation to the login function in auth.ts\"", + "expectedResult": "{ status: \"completed\", stepsCompleted: 3, filesModified: [\"auth.ts\"] }" + }, + { + "description": "Dry run to preview changes", + "command": "./jtag code/task --description=\"Refactor UserService to use dependency injection\" --dryRun=true", + "expectedResult": "{ status: \"completed\", filesModified: [], summary: \"Dry run: would modify 3 files\" }" + }, + { + "description": "Discovery-only task", + "command": "./jtag code/task --description=\"Find all files using deprecated API\" --taskType=\"discovery\" --securityTier=\"discovery\"", + "expectedResult": "{ status: \"completed\", stepsCompleted: 2, filesModified: [] }" + }, + { + "description": "With relevant file hints", + "command": "./jtag code/task --description=\"Fix the off-by-one error\" --relevantFiles='[\"src/utils/pagination.ts\"]'", + "expectedResult": "{ status: \"completed\", filesModified: [\"src/utils/pagination.ts\"] }" + } + ], + "accessLevel": "ai-safe", + "environment": "server" +} diff --git a/src/debug/jtag/server/generated.ts b/src/debug/jtag/server/generated.ts index 7c91f9438..81480557f 100644 --- a/src/debug/jtag/server/generated.ts +++ b/src/debug/jtag/server/generated.ts @@ -1,7 +1,7 @@ /** * Server Structure Registry - Auto-generated * - * Contains 18 daemons and 209 commands and 3 adapters. + * Contains 18 daemons and 210 commands and 3 adapters. * Generated by scripts/generate-structure.ts - DO NOT EDIT MANUALLY */ @@ -65,6 +65,7 @@ import { CodeEditServerCommand } from './../commands/code/edit/server/CodeEditSe import { CodeHistoryServerCommand } from './../commands/code/history/server/CodeHistoryServerCommand'; import { CodeReadServerCommand } from './../commands/code/read/server/CodeReadServerCommand'; import { CodeSearchServerCommand } from './../commands/code/search/server/CodeSearchServerCommand'; +import { CodeTaskServerCommand } from './../commands/code/task/server/CodeTaskServerCommand'; import { CodeTreeServerCommand } from './../commands/code/tree/server/CodeTreeServerCommand'; import { CodeUndoServerCommand } from './../commands/code/undo/server/CodeUndoServerCommand'; import { CodeWriteServerCommand } from './../commands/code/write/server/CodeWriteServerCommand'; @@ -538,6 +539,11 @@ export const SERVER_COMMANDS: CommandEntry[] = [ className: 'CodeSearchServerCommand', commandClass: CodeSearchServerCommand }, +{ + name: 'code/task', + className: 'CodeTaskServerCommand', + commandClass: CodeTaskServerCommand + }, { name: 'code/tree', className: 'CodeTreeServerCommand', diff --git a/src/debug/jtag/shared/generated-command-constants.ts b/src/debug/jtag/shared/generated-command-constants.ts index 411895708..d4dd944e2 100644 --- a/src/debug/jtag/shared/generated-command-constants.ts +++ b/src/debug/jtag/shared/generated-command-constants.ts @@ -64,6 +64,7 @@ export const COMMANDS = { CODE_HISTORY: 'code/history', CODE_READ: 'code/read', CODE_SEARCH: 'code/search', + CODE_TASK: 'code/task', CODE_TREE: 'code/tree', CODE_UNDO: 'code/undo', CODE_WRITE: 'code/write', diff --git a/src/debug/jtag/system/code/server/CodeAgentOrchestrator.ts b/src/debug/jtag/system/code/server/CodeAgentOrchestrator.ts index 48a5fbec3..eda066488 100644 --- a/src/debug/jtag/system/code/server/CodeAgentOrchestrator.ts +++ b/src/debug/jtag/system/code/server/CodeAgentOrchestrator.ts @@ -7,11 +7,12 @@ * Execution lifecycle: * 1. Discover β€” code/tree + code/search to understand codebase * 2. Read β€” code/read to gather context - * 3. Plan β€” PlanFormulator decomposes task (already done before orchestrator runs) - * 4. Execute β€” Run each step via code/* commands - * 5. Verify β€” After each write/edit, read back to confirm - * 6. Fix β€” If verification fails, retry (max 3 attempts per step) - * 7. Report β€” Summarize changes via code/history + * 3. Plan β€” PlanFormulator decomposes task via LLM + * 4. Governance β€” Check if plan requires team approval (high-risk/system-tier) + * 5. Execute β€” Run each step via code/* commands + * 6. Verify β€” After each write/edit, read back to confirm + * 7. Fix β€” If verification fails, retry (max 3 attempts per step) + * 8. Report β€” Summarize changes via code/history * * Persistence: * - Plans are persisted as CodingPlanEntity via DataDaemon @@ -33,11 +34,15 @@ import type { StepResult, StepStatus, ExecutionOptions, + RiskLevel, + SecurityTierLevel, } from '../shared/CodingTypes'; import { PlanFormulator } from './PlanFormulator'; import { CodingModelSelector } from './CodingModelSelector'; import { ToolAllowlistEnforcer, ToolDeniedError } from './ToolAllowlistEnforcer'; import { getTier } from './SecurityTier'; +import { PlanGovernance } from './PlanGovernance'; +import { CodeTaskDelegator } from './CodeTaskDelegator'; import { Commands } from '../../core/shared/Commands'; import { Logger } from '../../core/logging/Logger'; import { CodingPlanEntity } from '../../data/entities/CodingPlanEntity'; @@ -99,24 +104,29 @@ class ExecutionBudget { export class CodeAgentOrchestrator { private readonly modelSelector: CodingModelSelector; private readonly planFormulator: PlanFormulator; + private readonly governance: PlanGovernance; + private readonly delegator: CodeTaskDelegator; constructor(modelSelector?: CodingModelSelector) { this.modelSelector = modelSelector ?? new CodingModelSelector(); this.planFormulator = new PlanFormulator(this.modelSelector); + this.governance = new PlanGovernance(); + this.delegator = new CodeTaskDelegator(); } /** * Execute a coding task end-to-end: * 1. Optionally discover codebase context * 2. Formulate a plan via LLM - * 3. Persist the plan as a CodingPlanEntity - * 4. Execute each step (updating entity in real-time) - * 5. Return results + * 3. Check governance (high-risk plans require team approval) + * 4. Persist the plan as a CodingPlanEntity + * 5. Execute each step (updating entity in real-time) + * 6. Return results * * Options: * - dryRun: Execute read-only commands normally, but mock write/edit commands * - securityTier: Override the plan's required tier - * - delegationEnabled: Enable multi-agent delegation (future) + * - delegationEnabled: Enable multi-agent delegation for parallel execution */ async execute(task: CodingTask, options?: ExecutionOptions): Promise { const dryRun = options?.dryRun ?? false; @@ -156,6 +166,26 @@ export class CodeAgentOrchestrator { // Phase 2c: Persist plan as entity (best-effort β€” works without DataDaemon) planEntity = await this.persistPlan(task, plan); + // Phase 2d: Governance β€” check if plan requires approval + if (planEntity && this.governance.shouldRequireApproval(planEntity)) { + log.info(`Plan requires governance approval (risk: ${plan.riskLevel}, tier: ${tierLevel})`); + const proposalId = await this.governance.proposePlan(planEntity); + + if (proposalId) { + // Update plan status to 'proposed' and return early + await this.updatePlanStatus(planEntity, 'proposed'); + return this.buildResult( + task, 'pending_approval', + `Plan submitted for governance approval: ${plan.summary}`, + [], filesModified, filesCreated, changeIds, errors, budget, + { proposalId: proposalId as string, planMetadata: { riskLevel: plan.riskLevel, requiredTier: plan.requiredTier, planSummary: plan.summary } }, + ); + } + + // Governance proposal failed β€” log and continue (auto-approve) + log.warn('Governance proposal creation failed, auto-approving plan'); + } + // Phase 3: Execute plan steps in dependency order const completedSteps = new Set(); @@ -219,7 +249,10 @@ export class CodeAgentOrchestrator { ? `Completed: ${plan.summary}` : `${status}: ${stepResults.filter(r => r.status === 'completed').length}/${plan.steps.length} steps completed`; - const codingResult = this.buildResult(task, status, summary, stepResults, filesModified, filesCreated, changeIds, errors, budget); + const codingResult = this.buildResult( + task, status, summary, stepResults, filesModified, filesCreated, changeIds, errors, budget, + { planMetadata: { riskLevel: plan.riskLevel, requiredTier: plan.requiredTier, planSummary: plan.summary } }, + ); // Finalize persisted plan await this.finalizePlan(planEntity, codingResult); @@ -459,6 +492,7 @@ export class CodeAgentOrchestrator { changeIds: string[], errors: string[], budget: ExecutionBudget, + extra?: { proposalId?: string; planMetadata?: CodingResult['planMetadata'] }, ): CodingResult { return { taskId: task.id, @@ -471,6 +505,8 @@ export class CodeAgentOrchestrator { totalDurationMs: budget.elapsedMs, changeIds, errors, + proposalId: extra?.proposalId, + planMetadata: extra?.planMetadata, }; } @@ -564,6 +600,25 @@ export class CodeAgentOrchestrator { } } + /** + * Update the plan's top-level status. + */ + private async updatePlanStatus( + planEntity: CodingPlanEntity, + status: CodingPlanStatus, + ): Promise { + try { + const { DataDaemon } = await import('../../../daemons/data-daemon/shared/DataDaemon'); + await DataDaemon.update( + COLLECTIONS.CODING_PLANS, + planEntity.id as UUID, + { status } as Partial, + ); + } catch { + // Best-effort + } + } + /** * Finalize the persisted plan with execution results. */ @@ -576,11 +631,12 @@ export class CodeAgentOrchestrator { try { const { DataDaemon } = await import('../../../daemons/data-daemon/shared/DataDaemon'); - const statusMap: Record = { + const statusMap: Record = { completed: 'completed', partial: 'partial', failed: 'failed', budget_exceeded: 'partial', + pending_approval: 'proposed', }; await DataDaemon.update( diff --git a/src/debug/jtag/system/code/shared/CodingTypes.ts b/src/debug/jtag/system/code/shared/CodingTypes.ts index 12d67cfc1..03151a204 100644 --- a/src/debug/jtag/system/code/shared/CodingTypes.ts +++ b/src/debug/jtag/system/code/shared/CodingTypes.ts @@ -211,7 +211,7 @@ export interface StepResult { // Coding Result (Final Output) // ============================================================================ -export type CodingResultStatus = 'completed' | 'partial' | 'failed' | 'budget_exceeded'; +export type CodingResultStatus = 'completed' | 'partial' | 'failed' | 'budget_exceeded' | 'pending_approval'; /** * Final result of executing a coding task. @@ -246,6 +246,16 @@ export interface CodingResult { /** Errors encountered */ readonly errors: string[]; + + /** Governance proposal ID (set when status is 'pending_approval') */ + readonly proposalId?: string; + + /** Plan metadata (risk level, tier, plan summary) */ + readonly planMetadata?: { + readonly riskLevel: RiskLevel; + readonly requiredTier: SecurityTierLevel; + readonly planSummary: string; + }; } // ============================================================================ From e80783a3f0867d776a27c99e8ec66045f2949c8e Mon Sep 17 00:00:00 2001 From: Joel Date: Sun, 1 Feb 2026 23:06:13 -0600 Subject: [PATCH 10/14] Workspace bootstrapping: auto-create per-persona Rust workspaces CodeAgentOrchestrator.ensureWorkspace() now creates workspace directory and registers it in Rust backend before first code/* operation. Personas get writable workspace + read-only codebase access for discovery. --- .../code/server/CodeAgentOrchestrator.ts | 33 +++++++++++++++++++ .../unit/code/CodeAgentOrchestrator.test.ts | 13 ++++++++ 2 files changed, 46 insertions(+) diff --git a/src/debug/jtag/system/code/server/CodeAgentOrchestrator.ts b/src/debug/jtag/system/code/server/CodeAgentOrchestrator.ts index eda066488..09e61d360 100644 --- a/src/debug/jtag/system/code/server/CodeAgentOrchestrator.ts +++ b/src/debug/jtag/system/code/server/CodeAgentOrchestrator.ts @@ -49,6 +49,9 @@ import { CodingPlanEntity } from '../../data/entities/CodingPlanEntity'; import type { CodingStepSnapshot, CodingPlanStatus } from '../../data/entities/CodingPlanEntity'; import { COLLECTIONS } from '../../shared/Constants'; import type { UUID } from '../../core/types/CrossPlatformUUID'; +import { CodeDaemon } from '../../../daemons/code-daemon/shared/CodeDaemon'; +import * as fs from 'fs'; +import * as path from 'path'; const log = Logger.create('CodeAgentOrchestrator', 'code'); @@ -101,6 +104,9 @@ class ExecutionBudget { } } +/** Track which personas have workspaces initialized this process lifetime */ +const initializedWorkspaces = new Set(); + export class CodeAgentOrchestrator { private readonly modelSelector: CodingModelSelector; private readonly planFormulator: PlanFormulator; @@ -114,6 +120,30 @@ export class CodeAgentOrchestrator { this.delegator = new CodeTaskDelegator(); } + /** + * Ensure a workspace exists in the Rust backend for this persona. + * Creates the workspace directory and registers it with PathSecurity. + * The persona gets a writable workspace under .continuum/personas/{id}/workspace/ + * and read-only access to the main codebase for discovery. + */ + private async ensureWorkspace(personaId: string): Promise { + if (initializedWorkspaces.has(personaId)) return; + + const jtagRoot = process.cwd(); + const workspaceDir = path.join(jtagRoot, '.continuum', 'personas', personaId, 'workspace'); + + // Create workspace directory if it doesn't exist + if (!fs.existsSync(workspaceDir)) { + fs.mkdirSync(workspaceDir, { recursive: true }); + log.info(`Created workspace directory: ${workspaceDir}`); + } + + // Register with Rust backend β€” writable workspace + read-only codebase access + await CodeDaemon.createWorkspace(personaId, workspaceDir, [jtagRoot]); + initializedWorkspaces.add(personaId); + log.info(`Workspace initialized for persona ${personaId}`); + } + /** * Execute a coding task end-to-end: * 1. Optionally discover codebase context @@ -145,6 +175,9 @@ export class CodeAgentOrchestrator { let planEntity: CodingPlanEntity | undefined; try { + // Phase 0: Ensure workspace exists in Rust backend + await this.ensureWorkspace(task.personaId as string); + // Phase 1: Discovery (optional β€” gather codebase context for planning) let codebaseContext: string | undefined; if (!budget.exceeded) { diff --git a/src/debug/jtag/tests/unit/code/CodeAgentOrchestrator.test.ts b/src/debug/jtag/tests/unit/code/CodeAgentOrchestrator.test.ts index 8a0925844..2e4417f1e 100644 --- a/src/debug/jtag/tests/unit/code/CodeAgentOrchestrator.test.ts +++ b/src/debug/jtag/tests/unit/code/CodeAgentOrchestrator.test.ts @@ -43,6 +43,19 @@ vi.mock('../../../system/core/logging/Logger', () => ({ }, })); +// Mock CodeDaemon.createWorkspace (workspace bootstrap) +vi.mock('../../../daemons/code-daemon/shared/CodeDaemon', () => ({ + CodeDaemon: { + createWorkspace: vi.fn().mockResolvedValue(undefined), + }, +})); + +// Mock fs for workspace directory creation +vi.mock('fs', () => ({ + existsSync: vi.fn().mockReturnValue(true), + mkdirSync: vi.fn(), +})); + function makeTask(overrides?: Partial): CodingTask { return { id: 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' as UUID, From c73d2eae486d577064036c38296152699b723836 Mon Sep 17 00:00:00 2001 From: Joel Date: Mon, 2 Feb 2026 05:52:56 -0600 Subject: [PATCH 11/14] Coding pipeline: architecture context, build verification, git worktrees, git write ops, iterative dev loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 1: Orchestrator reads CLAUDE.md + architecture docs during discovery so AI plans follow project conventions. Phase 2: New code/verify command β€” runs tsc --noEmit via ExecutionSandbox, parses TypeScript errors, auto-verifies after write/edit steps. Added to write tier. Phase 3: WorkspaceStrategy abstraction routes sandbox (isolated dir) vs worktree (git sparse checkout on real repo). CodingTask extended with workspaceMode/sparsePaths. code/task command validates and passes through. Phase 4: Rust git_bridge extended with git_add/commit/push. IPC handlers + CodeDaemon methods for all git write ops. New code/git command (status/diff/log/add/commit/push) with SecurityTier gating. PlanFormulator knows commit action. Phase 5: Verifyβ†’re-plan iteration loop in orchestrator. When auto-verify fails, re-plans with error context (quick-fix mode), executes fix, re-verifies. Configurable via autoVerify/maxVerifyIterations on ExecutionOptions. 387 TypeScript tests (15 files), 362 Rust tests β€” all passing. --- src/debug/jtag/browser/generated.ts | 26 +- src/debug/jtag/cli.ts | 7 +- .../jtag/commands/challenge/list/.npmignore | 20 + .../jtag/commands/challenge/list/README.md | 156 ++++++ .../browser/ChallengeListBrowserCommand.ts | 21 + .../jtag/commands/challenge/list/package.json | 35 ++ .../list/server/ChallengeListServerCommand.ts | 115 +++++ .../list/shared/ChallengeListTypes.ts | 123 +++++ .../ChallengeListIntegration.test.ts | 196 ++++++++ .../test/unit/ChallengeListCommand.test.ts | 259 ++++++++++ .../jtag/commands/challenge/run/.npmignore | 20 + .../jtag/commands/challenge/run/README.md | 183 +++++++ .../run/browser/ChallengeRunBrowserCommand.ts | 21 + .../jtag/commands/challenge/run/package.json | 35 ++ .../run/server/ChallengeRunServerCommand.ts | 177 +++++++ .../challenge/run/shared/ChallengeRunTypes.ts | 145 ++++++ .../ChallengeRunIntegration.test.ts | 196 ++++++++ .../run/test/unit/ChallengeRunCommand.test.ts | 259 ++++++++++ src/debug/jtag/commands/code/git/README.md | 50 ++ .../code/git/browser/CodeGitBrowserCommand.ts | 20 + src/debug/jtag/commands/code/git/package.json | 34 ++ .../code/git/server/CodeGitServerCommand.ts | 155 ++++++ .../commands/code/git/shared/CodeGitTypes.ts | 146 ++++++ .../code/task/server/CodeTaskServerCommand.ts | 15 + .../code/task/shared/CodeTaskTypes.ts | 10 + src/debug/jtag/commands/code/verify/README.md | 69 +++ .../browser/CodeVerifyBrowserCommand.ts | 22 + .../jtag/commands/code/verify/package.json | 34 ++ .../verify/server/CodeVerifyServerCommand.ts | 250 ++++++++++ .../code/verify/shared/CodeVerifyTypes.ts | 128 +++++ .../code-daemon/server/CodeDaemonServer.ts | 16 + .../daemons/code-daemon/shared/CodeDaemon.ts | 28 ++ .../data-daemon/server/EntityRegistry.ts | 3 + src/debug/jtag/generated-command-schemas.json | 121 ++++- .../jtag/generator/specs/challenge-list.json | 44 ++ .../jtag/generator/specs/challenge-run.json | 101 ++++ src/debug/jtag/package-lock.json | 4 +- src/debug/jtag/package.json | 2 +- src/debug/jtag/server/generated.ts | 26 +- .../shared/generated-command-constants.ts | 4 + src/debug/jtag/shared/version.ts | 2 +- .../code/challenges/ChallengeDefinitions.ts | 445 ++++++++++++++++++ .../code/server/CodeAgentOrchestrator.ts | 272 +++++++++-- .../code/server/CodingChallengeRunner.ts | 239 ++++++++++ .../jtag/system/code/server/CodingJudge.ts | 288 ++++++++++++ .../jtag/system/code/server/PlanFormulator.ts | 62 ++- .../jtag/system/code/server/SecurityTier.ts | 2 + .../system/code/server/WorkspaceStrategy.ts | 195 ++++++++ .../jtag/system/code/shared/CodingTypes.ts | 27 +- .../data/entities/CodingChallengeEntity.ts | 276 +++++++++++ src/debug/jtag/system/shared/Constants.ts | 3 + .../unit/code/CodeAgentOrchestrator.test.ts | 129 ++++- .../tests/unit/code/CodeGitCommand.test.ts | 57 +++ .../tests/unit/code/CodeVerifyCommand.test.ts | 132 ++++++ .../jtag/tests/unit/code/SecurityTier.test.ts | 5 + .../tests/unit/code/WorkspaceStrategy.test.ts | 334 +++++++++++++ .../continuum-core/bindings/RustCoreIPC.ts | 69 +++ .../continuum-core/src/code/git_bridge.rs | 82 ++++ .../continuum-core/src/code/path_security.rs | 73 ++- .../workers/continuum-core/src/ipc/mod.rs | 104 ++++ 60 files changed, 5972 insertions(+), 100 deletions(-) create mode 100644 src/debug/jtag/commands/challenge/list/.npmignore create mode 100644 src/debug/jtag/commands/challenge/list/README.md create mode 100644 src/debug/jtag/commands/challenge/list/browser/ChallengeListBrowserCommand.ts create mode 100644 src/debug/jtag/commands/challenge/list/package.json create mode 100644 src/debug/jtag/commands/challenge/list/server/ChallengeListServerCommand.ts create mode 100644 src/debug/jtag/commands/challenge/list/shared/ChallengeListTypes.ts create mode 100644 src/debug/jtag/commands/challenge/list/test/integration/ChallengeListIntegration.test.ts create mode 100644 src/debug/jtag/commands/challenge/list/test/unit/ChallengeListCommand.test.ts create mode 100644 src/debug/jtag/commands/challenge/run/.npmignore create mode 100644 src/debug/jtag/commands/challenge/run/README.md create mode 100644 src/debug/jtag/commands/challenge/run/browser/ChallengeRunBrowserCommand.ts create mode 100644 src/debug/jtag/commands/challenge/run/package.json create mode 100644 src/debug/jtag/commands/challenge/run/server/ChallengeRunServerCommand.ts create mode 100644 src/debug/jtag/commands/challenge/run/shared/ChallengeRunTypes.ts create mode 100644 src/debug/jtag/commands/challenge/run/test/integration/ChallengeRunIntegration.test.ts create mode 100644 src/debug/jtag/commands/challenge/run/test/unit/ChallengeRunCommand.test.ts create mode 100644 src/debug/jtag/commands/code/git/README.md create mode 100644 src/debug/jtag/commands/code/git/browser/CodeGitBrowserCommand.ts create mode 100644 src/debug/jtag/commands/code/git/package.json create mode 100644 src/debug/jtag/commands/code/git/server/CodeGitServerCommand.ts create mode 100644 src/debug/jtag/commands/code/git/shared/CodeGitTypes.ts create mode 100644 src/debug/jtag/commands/code/verify/README.md create mode 100644 src/debug/jtag/commands/code/verify/browser/CodeVerifyBrowserCommand.ts create mode 100644 src/debug/jtag/commands/code/verify/package.json create mode 100644 src/debug/jtag/commands/code/verify/server/CodeVerifyServerCommand.ts create mode 100644 src/debug/jtag/commands/code/verify/shared/CodeVerifyTypes.ts create mode 100644 src/debug/jtag/generator/specs/challenge-list.json create mode 100644 src/debug/jtag/generator/specs/challenge-run.json create mode 100644 src/debug/jtag/system/code/challenges/ChallengeDefinitions.ts create mode 100644 src/debug/jtag/system/code/server/CodingChallengeRunner.ts create mode 100644 src/debug/jtag/system/code/server/CodingJudge.ts create mode 100644 src/debug/jtag/system/code/server/WorkspaceStrategy.ts create mode 100644 src/debug/jtag/system/data/entities/CodingChallengeEntity.ts create mode 100644 src/debug/jtag/tests/unit/code/CodeGitCommand.test.ts create mode 100644 src/debug/jtag/tests/unit/code/CodeVerifyCommand.test.ts create mode 100644 src/debug/jtag/tests/unit/code/WorkspaceStrategy.test.ts diff --git a/src/debug/jtag/browser/generated.ts b/src/debug/jtag/browser/generated.ts index 254e61cd7..cb4e79c23 100644 --- a/src/debug/jtag/browser/generated.ts +++ b/src/debug/jtag/browser/generated.ts @@ -1,7 +1,7 @@ /** * Browser Structure Registry - Auto-generated * - * Contains 11 daemons and 180 commands and 2 adapters and 27 widgets. + * Contains 11 daemons and 184 commands and 2 adapters and 27 widgets. * Generated by scripts/generate-structure.ts - DO NOT EDIT MANUALLY */ @@ -43,14 +43,18 @@ import { AIValidateResponseBrowserCommand } from './../commands/ai/validate-resp import { CanvasStrokeAddBrowserCommand } from './../commands/canvas/stroke/add/browser/CanvasStrokeAddBrowserCommand'; import { CanvasStrokeListBrowserCommand } from './../commands/canvas/stroke/list/browser/CanvasStrokeListBrowserCommand'; import { CanvasVisionBrowserCommand } from './../commands/canvas/vision/browser/CanvasVisionBrowserCommand'; +import { ChallengeListBrowserCommand } from './../commands/challenge/list/browser/ChallengeListBrowserCommand'; +import { ChallengeRunBrowserCommand } from './../commands/challenge/run/browser/ChallengeRunBrowserCommand'; import { CodeDiffBrowserCommand } from './../commands/code/diff/browser/CodeDiffBrowserCommand'; import { CodeEditBrowserCommand } from './../commands/code/edit/browser/CodeEditBrowserCommand'; +import { CodeGitBrowserCommand } from './../commands/code/git/browser/CodeGitBrowserCommand'; import { CodeHistoryBrowserCommand } from './../commands/code/history/browser/CodeHistoryBrowserCommand'; import { CodeReadBrowserCommand } from './../commands/code/read/browser/CodeReadBrowserCommand'; import { CodeSearchBrowserCommand } from './../commands/code/search/browser/CodeSearchBrowserCommand'; import { CodeTaskBrowserCommand } from './../commands/code/task/browser/CodeTaskBrowserCommand'; import { CodeTreeBrowserCommand } from './../commands/code/tree/browser/CodeTreeBrowserCommand'; import { CodeUndoBrowserCommand } from './../commands/code/undo/browser/CodeUndoBrowserCommand'; +import { CodeVerifyBrowserCommand } from './../commands/code/verify/browser/CodeVerifyBrowserCommand'; import { CodeWriteBrowserCommand } from './../commands/code/write/browser/CodeWriteBrowserCommand'; import { ActivityUserPresentCommand } from './../commands/collaboration/activity/user-present/browser/ActivityUserPresentCommand'; import { ChatAnalyzeBrowserCommand } from './../commands/collaboration/chat/analyze/browser/ChatAnalyzeBrowserCommand'; @@ -421,6 +425,16 @@ export const BROWSER_COMMANDS: CommandEntry[] = [ className: 'CanvasVisionBrowserCommand', commandClass: CanvasVisionBrowserCommand }, +{ + name: 'challenge/list', + className: 'ChallengeListBrowserCommand', + commandClass: ChallengeListBrowserCommand + }, +{ + name: 'challenge/run', + className: 'ChallengeRunBrowserCommand', + commandClass: ChallengeRunBrowserCommand + }, { name: 'code/diff', className: 'CodeDiffBrowserCommand', @@ -431,6 +445,11 @@ export const BROWSER_COMMANDS: CommandEntry[] = [ className: 'CodeEditBrowserCommand', commandClass: CodeEditBrowserCommand }, +{ + name: 'code/git', + className: 'CodeGitBrowserCommand', + commandClass: CodeGitBrowserCommand + }, { name: 'code/history', className: 'CodeHistoryBrowserCommand', @@ -461,6 +480,11 @@ export const BROWSER_COMMANDS: CommandEntry[] = [ className: 'CodeUndoBrowserCommand', commandClass: CodeUndoBrowserCommand }, +{ + name: 'code/verify', + className: 'CodeVerifyBrowserCommand', + commandClass: CodeVerifyBrowserCommand + }, { name: 'code/write', className: 'CodeWriteBrowserCommand', diff --git a/src/debug/jtag/cli.ts b/src/debug/jtag/cli.ts index 37d813654..36031b37d 100644 --- a/src/debug/jtag/cli.ts +++ b/src/debug/jtag/cli.ts @@ -388,8 +388,11 @@ async function main() { const isInferenceCommand = command.startsWith('inference/'); const isSocialCommand = command.startsWith('social/'); const isCollaborationCommand = command.startsWith('collaboration/'); - const needsLongerTimeout = isAICommand || isInferenceCommand || isSocialCommand || isInterfaceCommand || isCollaborationCommand; - const timeoutMs = isGenomeCommand ? 300000 : needsLongerTimeout ? 60000 : 10000; // 5min for genome, 60s for AI/inference/social/interface/collaboration, 10s for others + const isChallengeCommand = command.startsWith('challenge/'); + const isCodeCommand = command.startsWith('code/'); + const needsLongerTimeout = isAICommand || isInferenceCommand || isSocialCommand || isInterfaceCommand || isCollaborationCommand || isCodeCommand; + const needsLongTimeout = isGenomeCommand || isChallengeCommand; + const timeoutMs = needsLongTimeout ? 300000 : needsLongerTimeout ? 60000 : 10000; // 5min for genome/challenge, 60s for AI/inference/social/interface/collaboration/code, 10s for others const timeoutSeconds = timeoutMs / 1000; const commandTimeout = new Promise((_, reject) => diff --git a/src/debug/jtag/commands/challenge/list/.npmignore b/src/debug/jtag/commands/challenge/list/.npmignore new file mode 100644 index 000000000..f74ad6b8a --- /dev/null +++ b/src/debug/jtag/commands/challenge/list/.npmignore @@ -0,0 +1,20 @@ +# Development files +.eslintrc* +tsconfig*.json +vitest.config.ts + +# Build artifacts +*.js.map +*.d.ts.map + +# IDE +.vscode/ +.idea/ + +# Logs +*.log +npm-debug.log* + +# OS files +.DS_Store +Thumbs.db diff --git a/src/debug/jtag/commands/challenge/list/README.md b/src/debug/jtag/commands/challenge/list/README.md new file mode 100644 index 000000000..a42ea610d --- /dev/null +++ b/src/debug/jtag/commands/challenge/list/README.md @@ -0,0 +1,156 @@ +# Challenge List Command + +List available coding challenges with their difficulty, status, and best scores. Shows progressive challenge sequence for AI training. + +## Table of Contents + +- [Usage](#usage) + - [CLI Usage](#cli-usage) + - [Tool Usage](#tool-usage) +- [Parameters](#parameters) +- [Result](#result) +- [Examples](#examples) +- [Testing](#testing) + - [Unit Tests](#unit-tests) + - [Integration Tests](#integration-tests) +- [Getting Help](#getting-help) +- [Access Level](#access-level) +- [Implementation Notes](#implementation-notes) + +## Usage + +### CLI Usage + +From the command line using the jtag CLI: + +```bash +./jtag challenge/list [options] +``` + +### Tool Usage + +From Persona tools or programmatic access using `Commands.execute()`: + +```typescript +import { Commands } from '@system/core/shared/Commands'; + +const result = await Commands.execute('challenge/list', { + // your parameters here +}); +``` + +## Parameters + +- **difficulty** (optional): `string` - Filter by difficulty: beginner, intermediate, advanced, expert +- **personaId** (optional): `string` - Show scores for a specific persona + +## Result + +Returns `ChallengeListResult` with: + +Returns CommandResult with: +- **challenges**: `object[]` - Array of challenge summaries with name, difficulty, sequence, attempts, best score +- **totalChallenges**: `number` - Total number of challenges +- **completedByPersona**: `number` - Number of challenges passed by the specified persona + +## Examples + +### List all challenges + +```bash +./jtag challenge/list +``` + +**Expected result:** +{ totalChallenges: 5, challenges: [{ name: "Add a function...", difficulty: "beginner", ... }] } + +## Getting Help + +### Using the Help Tool + +Get detailed usage information for this command: + +**CLI:** +```bash +./jtag help challenge/list +``` + +**Tool:** +```typescript +// Use your help tool with command name 'challenge/list' +``` + +### Using the README Tool + +Access this README programmatically: + +**CLI:** +```bash +./jtag readme challenge/list +``` + +**Tool:** +```typescript +// Use your readme tool with command name 'challenge/list' +``` + +## Testing + +### Unit Tests + +Test command logic in isolation using mock dependencies: + +```bash +# Run unit tests (no server required) +npx tsx commands/Challenge List/test/unit/ChallengeListCommand.test.ts +``` + +**What's tested:** +- Command structure and parameter validation +- Mock command execution patterns +- Required parameter validation (throws ValidationError) +- Optional parameter handling (sensible defaults) +- Performance requirements +- Assertion utility helpers + +**TDD Workflow:** +1. Write/modify unit test first (test-driven development) +2. Run test, see it fail +3. Implement feature +4. Run test, see it pass +5. Refactor if needed + +### Integration Tests + +Test command with real client connections and system integration: + +```bash +# Prerequisites: Server must be running +npm start # Wait 90+ seconds for deployment + +# Run integration tests +npx tsx commands/Challenge List/test/integration/ChallengeListIntegration.test.ts +``` + +**What's tested:** +- Client connection to live system +- Real command execution via WebSocket +- ValidationError handling for missing params +- Optional parameter defaults +- Performance under load +- Various parameter combinations + +**Best Practice:** +Run unit tests frequently during development (fast feedback). Run integration tests before committing (verify system integration). + +## Access Level + +**ai-safe** - Safe for AI personas to call autonomously + +## Implementation Notes + +- **Shared Logic**: Core business logic in `shared/ChallengeListTypes.ts` +- **Browser**: Browser-specific implementation in `browser/ChallengeListBrowserCommand.ts` +- **Server**: Server-specific implementation in `server/ChallengeListServerCommand.ts` +- **Unit Tests**: Isolated testing in `test/unit/ChallengeListCommand.test.ts` +- **Integration Tests**: System testing in `test/integration/ChallengeListIntegration.test.ts` diff --git a/src/debug/jtag/commands/challenge/list/browser/ChallengeListBrowserCommand.ts b/src/debug/jtag/commands/challenge/list/browser/ChallengeListBrowserCommand.ts new file mode 100644 index 000000000..916f38953 --- /dev/null +++ b/src/debug/jtag/commands/challenge/list/browser/ChallengeListBrowserCommand.ts @@ -0,0 +1,21 @@ +/** + * Challenge List Command - Browser Implementation + * + * List available coding challenges with their difficulty, status, and best scores. Shows progressive challenge sequence for AI training. + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import type { ChallengeListParams, ChallengeListResult } from '../shared/ChallengeListTypes'; + +export class ChallengeListBrowserCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('challenge/list', context, subpath, commander); + } + + async execute(params: ChallengeListParams): Promise { + console.log('🌐 BROWSER: Delegating Challenge List to server'); + return await this.remoteExecute(params); + } +} diff --git a/src/debug/jtag/commands/challenge/list/package.json b/src/debug/jtag/commands/challenge/list/package.json new file mode 100644 index 000000000..f3e571ec9 --- /dev/null +++ b/src/debug/jtag/commands/challenge/list/package.json @@ -0,0 +1,35 @@ +{ + "name": "@jtag-commands/challenge/list", + "version": "1.0.0", + "description": "List available coding challenges with their difficulty, status, and best scores. Shows progressive challenge sequence for AI training.", + "main": "server/ChallengeListServerCommand.ts", + "types": "shared/ChallengeListTypes.ts", + "scripts": { + "test": "npm run test:unit && npm run test:integration", + "test:unit": "npx vitest run test/unit/*.test.ts", + "test:integration": "npx tsx test/integration/ChallengeListIntegration.test.ts", + "lint": "npx eslint **/*.ts", + "typecheck": "npx tsc --noEmit" + }, + "peerDependencies": { + "@jtag/core": "*" + }, + "files": [ + "shared/**/*.ts", + "browser/**/*.ts", + "server/**/*.ts", + "test/**/*.ts", + "README.md" + ], + "keywords": [ + "jtag", + "command", + "challenge/list" + ], + "license": "MIT", + "author": "", + "repository": { + "type": "git", + "url": "" + } +} diff --git a/src/debug/jtag/commands/challenge/list/server/ChallengeListServerCommand.ts b/src/debug/jtag/commands/challenge/list/server/ChallengeListServerCommand.ts new file mode 100644 index 000000000..d1b1c28e9 --- /dev/null +++ b/src/debug/jtag/commands/challenge/list/server/ChallengeListServerCommand.ts @@ -0,0 +1,115 @@ +/** + * Challenge List Command - Server Implementation + * + * Lists available coding challenges with difficulty, status, and best scores. + * Loads challenge definitions and enriches with attempt data from the database. + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import type { ChallengeListParams, ChallengeListResult, ChallengeSummary } from '../shared/ChallengeListTypes'; +import { createChallengeListResultFromParams } from '../shared/ChallengeListTypes'; +import { ALL_CHALLENGES } from '@system/code/challenges/ChallengeDefinitions'; +import { CodingChallengeEntity } from '@system/data/entities/CodingChallengeEntity'; +import { Commands } from '@system/core/shared/Commands'; +import { COLLECTIONS } from '@system/shared/Constants'; +import type { UUID } from '@system/core/types/CrossPlatformUUID'; + +export class ChallengeListServerCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('challenge/list', context, subpath, commander); + } + + async execute(params: ChallengeListParams): Promise { + const personaId = (params.personaId ?? params.userId) as UUID | undefined; + + // Filter definitions by difficulty if specified + let definitions = ALL_CHALLENGES; + if (params.difficulty) { + definitions = definitions.filter(d => d.difficulty === params.difficulty); + } + + // Load persisted entities for attempt data (best-effort) + const entityMap = await this.loadPersistedEntities(); + + // Build summaries + const challenges: ChallengeSummary[] = definitions.map(def => { + const entity = entityMap.get(def.name); + + const summary: ChallengeSummary = { + name: def.name, + sequenceNumber: def.sequenceNumber, + difficulty: def.difficulty, + category: def.category, + description: def.description, + timeLimitMs: def.timeLimitMs, + toolCallLimit: def.toolCallLimit, + totalAttempts: entity?.totalAttempts ?? 0, + totalPasses: entity?.totalPasses ?? 0, + highScore: entity?.highScore ?? 0, + passRate: entity?.passRate ?? 0, + }; + + // Add persona-specific data if requested + if (personaId && entity) { + const best = entity.bestAttemptFor(personaId); + if (best) { + summary.personaBestScore = best.score; + summary.personaBestStatus = best.status; + summary.personaAttempts = entity.attempts.filter(a => a.personaId === personaId).length; + } + } + + return summary; + }); + + // Count completed challenges for persona + let completedByPersona = 0; + if (personaId) { + for (const def of ALL_CHALLENGES) { + const entity = entityMap.get(def.name); + if (entity) { + const best = entity.bestAttemptFor(personaId); + if (best?.status === 'passed') { + completedByPersona++; + } + } + } + } + + return createChallengeListResultFromParams(params, { + success: true, + challenges, + totalChallenges: definitions.length, + completedByPersona, + }); + } + + /** + * Load all persisted challenge entities from the database. + * Returns a map keyed by challenge name for easy lookup. + */ + private async loadPersistedEntities(): Promise> { + const map = new Map(); + + try { + const result = await Commands.execute('data/list', { + collection: COLLECTIONS.CODING_CHALLENGES, + limit: 100, + }); + + if (result?.success && Array.isArray(result.items)) { + for (const item of result.items) { + const entity = new CodingChallengeEntity(); + Object.assign(entity, item); + map.set(entity.name, entity); + } + } + } catch { + // Database not available β€” return empty map (all stats will be zero) + } + + return map; + } +} diff --git a/src/debug/jtag/commands/challenge/list/shared/ChallengeListTypes.ts b/src/debug/jtag/commands/challenge/list/shared/ChallengeListTypes.ts new file mode 100644 index 000000000..fae0cf6f9 --- /dev/null +++ b/src/debug/jtag/commands/challenge/list/shared/ChallengeListTypes.ts @@ -0,0 +1,123 @@ +/** + * Challenge List Command - Shared Types + * + * List available coding challenges with their difficulty, status, and best scores. Shows progressive challenge sequence for AI training. + */ + +import type { CommandParams, CommandResult, CommandInput, JTAGContext } from '@system/core/types/JTAGTypes'; +import { createPayload, transformPayload } from '@system/core/types/JTAGTypes'; +import { Commands } from '@system/core/shared/Commands'; +import type { JTAGError } from '@system/core/types/ErrorTypes'; +import type { UUID } from '@system/core/types/CrossPlatformUUID'; + +/** + * Challenge List Command Parameters + */ +export interface ChallengeListParams extends CommandParams { + // Filter by difficulty: beginner, intermediate, advanced, expert + difficulty?: string; + // Show scores for a specific persona + personaId?: string; +} + +/** + * Factory function for creating ChallengeListParams + */ +export const createChallengeListParams = ( + context: JTAGContext, + sessionId: UUID, + data: { + // Filter by difficulty: beginner, intermediate, advanced, expert + difficulty?: string; + // Show scores for a specific persona + personaId?: string; + } +): ChallengeListParams => createPayload(context, sessionId, { + difficulty: data.difficulty ?? '', + personaId: data.personaId ?? '', + ...data +}); + +/** + * Summary of a single challenge for list display + */ +export interface ChallengeSummary { + name: string; + sequenceNumber: number; + difficulty: string; + category: string; + description: string; + timeLimitMs: number; + toolCallLimit: number; + totalAttempts: number; + totalPasses: number; + highScore: number; + passRate: number; + /** Best score by the queried persona (if personaId provided) */ + personaBestScore?: number; + /** Best status by the queried persona */ + personaBestStatus?: string; + /** Number of attempts by the queried persona */ + personaAttempts?: number; +} + +/** + * Challenge List Command Result + */ +export interface ChallengeListResult extends CommandResult { + success: boolean; + // Array of challenge summaries with name, difficulty, sequence, attempts, best score + challenges: ChallengeSummary[]; + // Total number of challenges + totalChallenges: number; + // Number of challenges passed by the specified persona + completedByPersona: number; + error?: JTAGError; +} + +/** + * Factory function for creating ChallengeListResult with defaults + */ +export const createChallengeListResult = ( + context: JTAGContext, + sessionId: UUID, + data: { + success: boolean; + // Array of challenge summaries with name, difficulty, sequence, attempts, best score + challenges?: ChallengeSummary[]; + // Total number of challenges + totalChallenges?: number; + // Number of challenges passed by the specified persona + completedByPersona?: number; + error?: JTAGError; + } +): ChallengeListResult => createPayload(context, sessionId, { + challenges: data.challenges ?? [], + totalChallenges: data.totalChallenges ?? 0, + completedByPersona: data.completedByPersona ?? 0, + ...data +}); + +/** + * Smart Challenge List-specific inheritance from params + * Auto-inherits context and sessionId from params + * Must provide all required result fields + */ +export const createChallengeListResultFromParams = ( + params: ChallengeListParams, + differences: Omit +): ChallengeListResult => transformPayload(params, differences); + +/** + * Challenge List β€” Type-safe command executor + * + * Usage: + * import { ChallengeList } from '...shared/ChallengeListTypes'; + * const result = await ChallengeList.execute({ ... }); + */ +export const ChallengeList = { + execute(params: CommandInput): Promise { + return Commands.execute('challenge/list', params as Partial); + }, + commandName: 'challenge/list' as const, +} as const; diff --git a/src/debug/jtag/commands/challenge/list/test/integration/ChallengeListIntegration.test.ts b/src/debug/jtag/commands/challenge/list/test/integration/ChallengeListIntegration.test.ts new file mode 100644 index 000000000..4d007ce5d --- /dev/null +++ b/src/debug/jtag/commands/challenge/list/test/integration/ChallengeListIntegration.test.ts @@ -0,0 +1,196 @@ +#!/usr/bin/env tsx +/** + * ChallengeList Command Integration Tests + * + * Tests Challenge List command against the LIVE RUNNING SYSTEM. + * This is NOT a mock test - it tests real commands, real events, real widgets. + * + * Generated by: ./jtag generate + * Run with: npx tsx commands/Challenge List/test/integration/ChallengeListIntegration.test.ts + * + * PREREQUISITES: + * - Server must be running: npm start (wait 90+ seconds) + * - Browser client connected via http://localhost:9003 + */ + +import { jtag } from '@server/server-index'; + +console.log('πŸ§ͺ ChallengeList Command Integration Tests'); + +function assert(condition: boolean, message: string): void { + if (!condition) { + throw new Error(`❌ Assertion failed: ${message}`); + } + console.log(`βœ… ${message}`); +} + +/** + * Test 1: Connect to live system + */ +async function testSystemConnection(): Promise>> { + console.log('\nπŸ”Œ Test 1: Connecting to live JTAG system'); + + const client = await jtag.connect(); + + assert(client !== null, 'Connected to live system'); + console.log(' βœ… Connected successfully'); + + return client; +} + +/** + * Test 2: Execute Challenge List command on live system + */ +async function testCommandExecution(client: Awaited>): Promise { + console.log('\n⚑ Test 2: Executing Challenge List command'); + + // TODO: Replace with your actual command parameters + const result = await client.commands['Challenge List']({ + // Add your required parameters here + // Example: name: 'test-value' + }); + + console.log(' πŸ“Š Result:', JSON.stringify(result, null, 2)); + + assert(result !== null, 'Challenge List returned result'); + // TODO: Add assertions for your specific result fields + // assert(result.success === true, 'Challenge List succeeded'); + // assert(result.yourField !== undefined, 'Result has yourField'); +} + +/** + * Test 3: Validate required parameters + */ +async function testRequiredParameters(_client: Awaited>): Promise { + console.log('\n🚨 Test 3: Testing required parameter validation'); + + // TODO: Uncomment and test missing required parameters + // try { + // await _client.commands['Challenge List']({ + // // Missing required param + // }); + // assert(false, 'Should have thrown validation error'); + // } catch (error) { + // assert((error as Error).message.includes('required'), 'Error mentions required parameter'); + // console.log(' βœ… ValidationError thrown correctly'); + // } + + console.log(' ⚠️ TODO: Add required parameter validation test'); +} + +/** + * Test 4: Test optional parameters + */ +async function testOptionalParameters(_client: Awaited>): Promise { + console.log('\nπŸ”§ Test 4: Testing optional parameters'); + + // TODO: Uncomment to test with and without optional parameters + // const withOptional = await client.commands['Challenge List']({ + // requiredParam: 'test', + // optionalParam: true + // }); + // + // const withoutOptional = await client.commands['Challenge List']({ + // requiredParam: 'test' + // }); + // + // assert(withOptional.success === true, 'Works with optional params'); + // assert(withoutOptional.success === true, 'Works without optional params'); + + console.log(' ⚠️ TODO: Add optional parameter tests'); +} + +/** + * Test 5: Performance test + */ +async function testPerformance(_client: Awaited>): Promise { + console.log('\n⚑ Test 5: Performance under load'); + + // TODO: Uncomment to test command performance + // const iterations = 10; + // const times: number[] = []; + // + // for (let i = 0; i < iterations; i++) { + // const start = Date.now(); + // await _client.commands['Challenge List']({ /* params */ }); + // times.push(Date.now() - start); + // } + // + // const avg = times.reduce((a, b) => a + b, 0) / iterations; + // const max = Math.max(...times); + // + // console.log(` Average: ${avg.toFixed(2)}ms`); + // console.log(` Max: ${max}ms`); + // + // assert(avg < 500, `Average ${avg.toFixed(2)}ms under 500ms`); + // assert(max < 1000, `Max ${max}ms under 1000ms`); + + console.log(' ⚠️ TODO: Add performance test'); +} + +/** + * Test 6: Widget/Event integration (if applicable) + */ +async function testWidgetIntegration(_client: Awaited>): Promise { + console.log('\n🎨 Test 6: Widget/Event integration'); + + // TODO: Uncomment if your command emits events or updates widgets + // Example: + // const before = await client.commands['debug/widget-state']({ widgetSelector: 'your-widget' }); + // await client.commands['Challenge List']({ /* params */ }); + // await new Promise(resolve => setTimeout(resolve, 1000)); // Wait for event propagation + // const after = await client.commands['debug/widget-state']({ widgetSelector: 'your-widget' }); + // + // assert(after.state.someValue !== before.state.someValue, 'Widget state updated'); + + console.log(' ⚠️ TODO: Add widget/event integration test (if applicable)'); +} + +/** + * Run all integration tests + */ +async function runAllChallengeListIntegrationTests(): Promise { + console.log('πŸš€ Starting ChallengeList Integration Tests\n'); + console.log('πŸ“‹ Testing against LIVE system (not mocks)\n'); + + try { + const client = await testSystemConnection(); + await testCommandExecution(client); + await testRequiredParameters(client); + await testOptionalParameters(client); + await testPerformance(client); + await testWidgetIntegration(client); + + console.log('\nπŸŽ‰ ALL ChallengeList INTEGRATION TESTS PASSED!'); + console.log('πŸ“‹ Validated:'); + console.log(' βœ… Live system connection'); + console.log(' βœ… Command execution on real system'); + console.log(' βœ… Parameter validation'); + console.log(' βœ… Optional parameter handling'); + console.log(' βœ… Performance benchmarks'); + console.log(' βœ… Widget/Event integration'); + console.log('\nπŸ’‘ NOTE: This test uses the REAL running system'); + console.log(' - Real database operations'); + console.log(' - Real event propagation'); + console.log(' - Real widget updates'); + console.log(' - Real cross-daemon communication'); + + } catch (error) { + console.error('\n❌ ChallengeList integration tests failed:', (error as Error).message); + if ((error as Error).stack) { + console.error((error as Error).stack); + } + console.error('\nπŸ’‘ Make sure:'); + console.error(' 1. Server is running: npm start'); + console.error(' 2. Wait 90+ seconds for deployment'); + console.error(' 3. Browser is connected to http://localhost:9003'); + process.exit(1); + } +} + +// Run if called directly +if (require.main === module) { + void runAllChallengeListIntegrationTests(); +} else { + module.exports = { runAllChallengeListIntegrationTests }; +} diff --git a/src/debug/jtag/commands/challenge/list/test/unit/ChallengeListCommand.test.ts b/src/debug/jtag/commands/challenge/list/test/unit/ChallengeListCommand.test.ts new file mode 100644 index 000000000..e5b44f93f --- /dev/null +++ b/src/debug/jtag/commands/challenge/list/test/unit/ChallengeListCommand.test.ts @@ -0,0 +1,259 @@ +#!/usr/bin/env tsx +/** + * ChallengeList Command Unit Tests + * + * Tests Challenge List command logic in isolation using mock dependencies. + * This is a REFERENCE EXAMPLE showing best practices for command testing. + * + * Generated by: ./jtag generate + * Run with: npx tsx commands/Challenge List/test/unit/ChallengeListCommand.test.ts + * + * NOTE: This is a self-contained test (no external test utilities needed). + * Use this as a template for your own command tests. + */ + +// import { ValidationError } from '@system/core/types/ErrorTypes'; // Uncomment when adding validation tests +import { generateUUID } from '@system/core/types/CrossPlatformUUID'; +import type { ChallengeListParams, ChallengeListResult } from '../../shared/ChallengeListTypes'; + +console.log('πŸ§ͺ ChallengeList Command Unit Tests'); + +function assert(condition: boolean, message: string): void { + if (!condition) { + throw new Error(`❌ Assertion failed: ${message}`); + } + console.log(`βœ… ${message}`); +} + +/** + * Mock command that implements Challenge List logic for testing + */ +async function mockChallengeListCommand(params: ChallengeListParams): Promise { + // TODO: Validate required parameters (BEST PRACTICE) + // Example: + // if (!params.requiredParam || params.requiredParam.trim() === '') { + // throw new ValidationError( + // 'requiredParam', + // `Missing required parameter 'requiredParam'. ` + + // `Use the help tool with 'Challenge List' or see the Challenge List README for usage information.` + // ); + // } + + // TODO: Handle optional parameters with sensible defaults + // const optionalParam = params.optionalParam ?? defaultValue; + + // TODO: Implement your command logic here + return { + success: true, + // TODO: Add your result fields with actual computed values + context: params.context, + sessionId: params.sessionId + } as ChallengeListResult; +} + +/** + * Test 1: Command structure validation + */ +function testChallengeListCommandStructure(): void { + console.log('\nπŸ“‹ Test 1: ChallengeList command structure validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Create valid params for Challenge List command + const validParams: ChallengeListParams = { + // TODO: Add your required parameters here + context, + sessionId + }; + + // Validate param structure + assert(validParams.context !== undefined, 'Params have context'); + assert(validParams.sessionId !== undefined, 'Params have sessionId'); + // TODO: Add assertions for your specific parameters + // assert(typeof validParams.requiredParam === 'string', 'requiredParam is string'); +} + +/** + * Test 2: Mock command execution + */ +async function testMockChallengeListExecution(): Promise { + console.log('\n⚑ Test 2: Mock Challenge List command execution'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Test mock execution + const params: ChallengeListParams = { + // TODO: Add your parameters here + context, + sessionId + }; + + const result = await mockChallengeListCommand(params); + + // Validate result structure + assert(result.success === true, 'Mock result shows success'); + // TODO: Add assertions for your result fields + // assert(typeof result.yourField === 'string', 'yourField is string'); +} + +/** + * Test 3: Required parameter validation (CRITICAL) + * + * This test ensures your command throws ValidationError + * when required parameters are missing (BEST PRACTICE) + */ +async function testChallengeListRequiredParams(): Promise { + console.log('\n🚨 Test 3: Required parameter validation'); + + // TODO: Uncomment when implementing validation + // const context = { environment: 'server' as const }; + // const sessionId = generateUUID(); + + // TODO: Test cases that should throw ValidationError + // Example: + // const testCases = [ + // { params: {} as ChallengeListParams, desc: 'Missing requiredParam' }, + // { params: { requiredParam: '' } as ChallengeListParams, desc: 'Empty requiredParam' }, + // ]; + // + // for (const testCase of testCases) { + // try { + // await mockChallengeListCommand({ ...testCase.params, context, sessionId }); + // throw new Error(`Should have thrown ValidationError for: ${testCase.desc}`); + // } catch (error) { + // if (error instanceof ValidationError) { + // assert(error.field === 'requiredParam', `ValidationError field is 'requiredParam' for: ${testCase.desc}`); + // assert(error.message.includes('required parameter'), `Error message mentions 'required parameter' for: ${testCase.desc}`); + // assert(error.message.includes('help tool'), `Error message is tool-agnostic for: ${testCase.desc}`); + // } else { + // throw error; // Re-throw if not ValidationError + // } + // } + // } + + console.log('βœ… All required parameter validations work correctly'); +} + +/** + * Test 4: Optional parameter handling + */ +async function testChallengeListOptionalParams(): Promise { + console.log('\nπŸ”§ Test 4: Optional parameter handling'); + + // TODO: Uncomment when implementing optional param tests + // const context = { environment: 'server' as const }; + // const sessionId = generateUUID(); + + // TODO: Test WITHOUT optional param (should use default) + // const paramsWithoutOptional: ChallengeListParams = { + // requiredParam: 'test', + // context, + // sessionId + // }; + // + // const resultWithoutOptional = await mockChallengeListCommand(paramsWithoutOptional); + // assert(resultWithoutOptional.success === true, 'Command succeeds without optional params'); + + // TODO: Test WITH optional param + // const paramsWithOptional: ChallengeListParams = { + // requiredParam: 'test', + // optionalParam: true, + // context, + // sessionId + // }; + // + // const resultWithOptional = await mockChallengeListCommand(paramsWithOptional); + // assert(resultWithOptional.success === true, 'Command succeeds with optional params'); + + console.log('βœ… Optional parameter handling validated'); +} + +/** + * Test 5: Performance validation + */ +async function testChallengeListPerformance(): Promise { + console.log('\n⚑ Test 5: ChallengeList performance validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + const startTime = Date.now(); + + await mockChallengeListCommand({ + // TODO: Add your parameters + context, + sessionId + } as ChallengeListParams); + + const executionTime = Date.now() - startTime; + + assert(executionTime < 100, `ChallengeList completed in ${executionTime}ms (under 100ms limit)`); +} + +/** + * Test 6: Result structure validation + */ +async function testChallengeListResultStructure(): Promise { + console.log('\nπŸ” Test 6: ChallengeList result structure validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Test various scenarios + const basicResult = await mockChallengeListCommand({ + // TODO: Add your parameters + context, + sessionId + } as ChallengeListParams); + + assert(basicResult.success === true, 'Result has success field'); + // TODO: Add assertions for your result fields + // assert(typeof basicResult.yourField === 'string', 'Result has yourField (string)'); + assert(basicResult.context === context, 'Result includes context'); + assert(basicResult.sessionId === sessionId, 'Result includes sessionId'); + + console.log('βœ… All result structure validations pass'); +} + +/** + * Run all unit tests + */ +async function runAllChallengeListUnitTests(): Promise { + console.log('πŸš€ Starting ChallengeList Command Unit Tests\n'); + + try { + testChallengeListCommandStructure(); + await testMockChallengeListExecution(); + await testChallengeListRequiredParams(); + await testChallengeListOptionalParams(); + await testChallengeListPerformance(); + await testChallengeListResultStructure(); + + console.log('\nπŸŽ‰ ALL ChallengeList UNIT TESTS PASSED!'); + console.log('πŸ“‹ Validated:'); + console.log(' βœ… Command structure and parameter validation'); + console.log(' βœ… Mock command execution patterns'); + console.log(' βœ… Required parameter validation (throws ValidationError)'); + console.log(' βœ… Optional parameter handling (sensible defaults)'); + console.log(' βœ… Performance requirements (< 100ms)'); + console.log(' βœ… Result structure validation'); + console.log('\nπŸ“ This is a REFERENCE EXAMPLE - use as a template for your commands!'); + console.log('πŸ’‘ TIP: Copy this test structure and modify for your command logic'); + + } catch (error) { + console.error('\n❌ ChallengeList unit tests failed:', (error as Error).message); + if ((error as Error).stack) { + console.error((error as Error).stack); + } + process.exit(1); + } +} + +// Run if called directly +if (require.main === module) { + void runAllChallengeListUnitTests(); +} else { + module.exports = { runAllChallengeListUnitTests }; +} diff --git a/src/debug/jtag/commands/challenge/run/.npmignore b/src/debug/jtag/commands/challenge/run/.npmignore new file mode 100644 index 000000000..f74ad6b8a --- /dev/null +++ b/src/debug/jtag/commands/challenge/run/.npmignore @@ -0,0 +1,20 @@ +# Development files +.eslintrc* +tsconfig*.json +vitest.config.ts + +# Build artifacts +*.js.map +*.d.ts.map + +# IDE +.vscode/ +.idea/ + +# Logs +*.log +npm-debug.log* + +# OS files +.DS_Store +Thumbs.db diff --git a/src/debug/jtag/commands/challenge/run/README.md b/src/debug/jtag/commands/challenge/run/README.md new file mode 100644 index 000000000..18c9e2ec9 --- /dev/null +++ b/src/debug/jtag/commands/challenge/run/README.md @@ -0,0 +1,183 @@ +# Challenge Run Command + +Run a coding challenge against the AI coding pipeline. Sets up a fresh workspace, executes the challenge via code/task, evaluates with AI judge, and records the attempt. + +## Table of Contents + +- [Usage](#usage) + - [CLI Usage](#cli-usage) + - [Tool Usage](#tool-usage) +- [Parameters](#parameters) +- [Result](#result) +- [Examples](#examples) +- [Testing](#testing) + - [Unit Tests](#unit-tests) + - [Integration Tests](#integration-tests) +- [Getting Help](#getting-help) +- [Access Level](#access-level) +- [Implementation Notes](#implementation-notes) + +## Usage + +### CLI Usage + +From the command line using the jtag CLI: + +```bash +./jtag challenge/run [options] +``` + +### Tool Usage + +From Persona tools or programmatic access using `Commands.execute()`: + +```typescript +import { Commands } from '@system/core/shared/Commands'; + +const result = await Commands.execute('challenge/run', { + // your parameters here +}); +``` + +## Parameters + +- **challengeId** (optional): `string` - Specific challenge ID to run. If not provided, runs the next unbeaten challenge +- **challengeNumber** (optional): `number` - Run challenge by sequence number (1-5) +- **personaId** (optional): `string` - Which AI persona runs the challenge. Defaults to the calling user +- **skipJudge** (optional): `boolean` - Skip AI judge evaluation (faster, just checks execution success) + +## Result + +Returns `ChallengeRunResult` with: + +Returns CommandResult with: +- **challengeName**: `string` - Name of the challenge that was run +- **difficulty**: `string` - Challenge difficulty level +- **status**: `string` - Attempt outcome: passed, failed, partial, timeout, error +- **score**: `number` - Judge score from 0-100 +- **feedback**: `string` - Judge feedback on the attempt +- **durationMs**: `number` - Total execution time in milliseconds +- **toolCallsUsed**: `number` - Number of tool calls consumed +- **filesModified**: `string[]` - Files modified during the attempt +- **filesCreated**: `string[]` - Files created during the attempt +- **errors**: `string[]` - Errors encountered during execution + +## Examples + +### Run the next unbeaten challenge + +```bash +./jtag challenge/run +``` + +**Expected result:** +{ status: "passed", score: 85, challengeName: "Add a function to a single file" } + +### Run a specific challenge by number + +```bash +./jtag challenge/run --challengeNumber=3 +``` + +**Expected result:** +{ status: "partial", score: 60, challengeName: "Extract shared utility from duplicate code" } + +### Quick run without AI judge + +```bash +./jtag challenge/run --challengeNumber=1 --skipJudge=true +``` + +**Expected result:** +{ status: "passed", score: 70, feedback: "Pipeline completed." } + +## Getting Help + +### Using the Help Tool + +Get detailed usage information for this command: + +**CLI:** +```bash +./jtag help challenge/run +``` + +**Tool:** +```typescript +// Use your help tool with command name 'challenge/run' +``` + +### Using the README Tool + +Access this README programmatically: + +**CLI:** +```bash +./jtag readme challenge/run +``` + +**Tool:** +```typescript +// Use your readme tool with command name 'challenge/run' +``` + +## Testing + +### Unit Tests + +Test command logic in isolation using mock dependencies: + +```bash +# Run unit tests (no server required) +npx tsx commands/Challenge Run/test/unit/ChallengeRunCommand.test.ts +``` + +**What's tested:** +- Command structure and parameter validation +- Mock command execution patterns +- Required parameter validation (throws ValidationError) +- Optional parameter handling (sensible defaults) +- Performance requirements +- Assertion utility helpers + +**TDD Workflow:** +1. Write/modify unit test first (test-driven development) +2. Run test, see it fail +3. Implement feature +4. Run test, see it pass +5. Refactor if needed + +### Integration Tests + +Test command with real client connections and system integration: + +```bash +# Prerequisites: Server must be running +npm start # Wait 90+ seconds for deployment + +# Run integration tests +npx tsx commands/Challenge Run/test/integration/ChallengeRunIntegration.test.ts +``` + +**What's tested:** +- Client connection to live system +- Real command execution via WebSocket +- ValidationError handling for missing params +- Optional parameter defaults +- Performance under load +- Various parameter combinations + +**Best Practice:** +Run unit tests frequently during development (fast feedback). Run integration tests before committing (verify system integration). + +## Access Level + +**ai-safe** - Safe for AI personas to call autonomously + +## Implementation Notes + +- **Shared Logic**: Core business logic in `shared/ChallengeRunTypes.ts` +- **Browser**: Browser-specific implementation in `browser/ChallengeRunBrowserCommand.ts` +- **Server**: Server-specific implementation in `server/ChallengeRunServerCommand.ts` +- **Unit Tests**: Isolated testing in `test/unit/ChallengeRunCommand.test.ts` +- **Integration Tests**: System testing in `test/integration/ChallengeRunIntegration.test.ts` diff --git a/src/debug/jtag/commands/challenge/run/browser/ChallengeRunBrowserCommand.ts b/src/debug/jtag/commands/challenge/run/browser/ChallengeRunBrowserCommand.ts new file mode 100644 index 000000000..d2303b12f --- /dev/null +++ b/src/debug/jtag/commands/challenge/run/browser/ChallengeRunBrowserCommand.ts @@ -0,0 +1,21 @@ +/** + * Challenge Run Command - Browser Implementation + * + * Run a coding challenge against the AI coding pipeline. Sets up a fresh workspace, executes the challenge via code/task, evaluates with AI judge, and records the attempt. + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import type { ChallengeRunParams, ChallengeRunResult } from '../shared/ChallengeRunTypes'; + +export class ChallengeRunBrowserCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('challenge/run', context, subpath, commander); + } + + async execute(params: ChallengeRunParams): Promise { + console.log('🌐 BROWSER: Delegating Challenge Run to server'); + return await this.remoteExecute(params); + } +} diff --git a/src/debug/jtag/commands/challenge/run/package.json b/src/debug/jtag/commands/challenge/run/package.json new file mode 100644 index 000000000..944ee6330 --- /dev/null +++ b/src/debug/jtag/commands/challenge/run/package.json @@ -0,0 +1,35 @@ +{ + "name": "@jtag-commands/challenge/run", + "version": "1.0.0", + "description": "Run a coding challenge against the AI coding pipeline. Sets up a fresh workspace, executes the challenge via code/task, evaluates with AI judge, and records the attempt.", + "main": "server/ChallengeRunServerCommand.ts", + "types": "shared/ChallengeRunTypes.ts", + "scripts": { + "test": "npm run test:unit && npm run test:integration", + "test:unit": "npx vitest run test/unit/*.test.ts", + "test:integration": "npx tsx test/integration/ChallengeRunIntegration.test.ts", + "lint": "npx eslint **/*.ts", + "typecheck": "npx tsc --noEmit" + }, + "peerDependencies": { + "@jtag/core": "*" + }, + "files": [ + "shared/**/*.ts", + "browser/**/*.ts", + "server/**/*.ts", + "test/**/*.ts", + "README.md" + ], + "keywords": [ + "jtag", + "command", + "challenge/run" + ], + "license": "MIT", + "author": "", + "repository": { + "type": "git", + "url": "" + } +} diff --git a/src/debug/jtag/commands/challenge/run/server/ChallengeRunServerCommand.ts b/src/debug/jtag/commands/challenge/run/server/ChallengeRunServerCommand.ts new file mode 100644 index 000000000..8ff5d583a --- /dev/null +++ b/src/debug/jtag/commands/challenge/run/server/ChallengeRunServerCommand.ts @@ -0,0 +1,177 @@ +/** + * Challenge Run Command - Server Implementation + * + * Runs a coding challenge: + * 1. Loads challenge (by ID, sequence number, or next unbeaten) + * 2. Sets up fresh workspace with challenge files + * 3. Executes via CodingChallengeRunner β†’ CodeAgentOrchestrator + * 4. Evaluates via CodingJudge + * 5. Records attempt and returns results + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import { ValidationError } from '@system/core/types/ErrorTypes'; +import type { ChallengeRunParams, ChallengeRunResult } from '../shared/ChallengeRunTypes'; +import { createChallengeRunResultFromParams } from '../shared/ChallengeRunTypes'; +import { CodingChallengeRunner } from '@system/code/server/CodingChallengeRunner'; +import { CodingChallengeEntity } from '@system/data/entities/CodingChallengeEntity'; +import { ALL_CHALLENGES } from '@system/code/challenges/ChallengeDefinitions'; +import { Commands } from '@system/core/shared/Commands'; +import { COLLECTIONS } from '@system/shared/Constants'; +import type { UUID } from '@system/core/types/CrossPlatformUUID'; + +export class ChallengeRunServerCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('challenge/run', context, subpath, commander); + } + + async execute(params: ChallengeRunParams): Promise { + const personaId = (params.personaId ?? params.userId) as UUID; + if (!personaId) { + throw new ValidationError('personaId', 'A persona ID is required to run a challenge.'); + } + + // Load or create the challenge entity + const challenge = await this.resolveChallenge(params, personaId); + + // Run the challenge + const runner = new CodingChallengeRunner(); + const result = await runner.run(challenge, { + personaId, + skipJudge: params.skipJudge ?? false, + }); + + // Persist updated challenge (with new attempt recorded) + await this.persistChallenge(challenge); + + return createChallengeRunResultFromParams(params, { + success: result.success, + challengeName: challenge.name, + difficulty: challenge.difficulty, + status: result.attempt.status, + score: result.attempt.score, + feedback: result.attempt.feedback, + durationMs: result.attempt.durationMs, + toolCallsUsed: result.attempt.toolCallsUsed, + filesModified: result.attempt.filesModified, + filesCreated: result.attempt.filesCreated, + errors: result.attempt.errors, + }); + } + + /** + * Resolve which challenge to run: + * 1. By challengeId (exact match) + * 2. By challengeNumber (sequence number) + * 3. Next unbeaten challenge for this persona + */ + private async resolveChallenge(params: ChallengeRunParams, personaId: UUID): Promise { + // Try loading from database first + if (params.challengeId) { + return await this.loadOrCreateChallenge(params.challengeId); + } + + if (params.challengeNumber) { + const def = ALL_CHALLENGES.find(c => c.sequenceNumber === params.challengeNumber); + if (!def) { + throw new ValidationError( + 'challengeNumber', + `No challenge with sequence number ${params.challengeNumber}. Valid: 1-${ALL_CHALLENGES.length}`, + ); + } + return await this.ensureChallengeEntity(def); + } + + // Find next unbeaten challenge + for (const def of ALL_CHALLENGES) { + const entity = await this.ensureChallengeEntity(def); + const best = entity.bestAttemptFor(personaId); + if (!best || best.status !== 'passed') { + return entity; + } + } + + // All beaten β€” run the hardest one again + return await this.ensureChallengeEntity(ALL_CHALLENGES[ALL_CHALLENGES.length - 1]); + } + + /** + * Ensure a challenge definition exists as a persisted entity. + * Creates it if it doesn't exist in the database. + */ + private async ensureChallengeEntity(def: typeof ALL_CHALLENGES[0]): Promise { + // Try to find existing entity by name + try { + const existing = await Commands.execute('data/list', { + collection: COLLECTIONS.CODING_CHALLENGES, + filter: { name: def.name }, + limit: 1, + }); + + if (existing?.success && existing.items?.length > 0) { + const entity = new CodingChallengeEntity(); + Object.assign(entity, existing.items[0]); + return entity; + } + } catch { + // Database not available β€” create in-memory entity + } + + // Create new entity from definition + const entity = new CodingChallengeEntity(); + entity.name = def.name; + entity.description = def.description; + entity.sequenceNumber = def.sequenceNumber; + entity.difficulty = def.difficulty; + entity.category = def.category; + entity.setupFiles = def.setupFiles; + entity.expectedOutcome = def.expectedOutcome; + entity.evaluationCriteria = def.evaluationCriteria; + entity.expectedFiles = def.expectedFiles; + entity.timeLimitMs = def.timeLimitMs; + entity.toolCallLimit = def.toolCallLimit; + + // Persist (best-effort) + await this.persistChallenge(entity); + + return entity; + } + + private async loadOrCreateChallenge(challengeId: string): Promise { + try { + const result = await Commands.execute('data/read', { + collection: COLLECTIONS.CODING_CHALLENGES, + id: challengeId, + }); + if (result?.success && result.item) { + const entity = new CodingChallengeEntity(); + Object.assign(entity, result.item); + return entity; + } + } catch { + // Not found + } + throw new ValidationError('challengeId', `Challenge not found: ${challengeId}`); + } + + private async persistChallenge(entity: CodingChallengeEntity): Promise { + try { + if (entity.id) { + await Commands.execute('data/update', { + collection: COLLECTIONS.CODING_CHALLENGES, + id: entity.id, + data: { ...entity }, + }); + } else { + await Commands.execute('data/create', { + collection: COLLECTIONS.CODING_CHALLENGES, + data: { ...entity }, + }); + } + } catch { + // Best-effort persistence + } + } +} diff --git a/src/debug/jtag/commands/challenge/run/shared/ChallengeRunTypes.ts b/src/debug/jtag/commands/challenge/run/shared/ChallengeRunTypes.ts new file mode 100644 index 000000000..738950f47 --- /dev/null +++ b/src/debug/jtag/commands/challenge/run/shared/ChallengeRunTypes.ts @@ -0,0 +1,145 @@ +/** + * Challenge Run Command - Shared Types + * + * Run a coding challenge against the AI coding pipeline. Sets up a fresh workspace, executes the challenge via code/task, evaluates with AI judge, and records the attempt. + */ + +import type { CommandParams, CommandResult, CommandInput, JTAGContext } from '@system/core/types/JTAGTypes'; +import { createPayload, transformPayload } from '@system/core/types/JTAGTypes'; +import { Commands } from '@system/core/shared/Commands'; +import type { JTAGError } from '@system/core/types/ErrorTypes'; +import type { UUID } from '@system/core/types/CrossPlatformUUID'; + +/** + * Challenge Run Command Parameters + */ +export interface ChallengeRunParams extends CommandParams { + // Specific challenge ID to run. If not provided, runs the next unbeaten challenge + challengeId?: string; + // Run challenge by sequence number (1-5) + challengeNumber?: number; + // Which AI persona runs the challenge. Defaults to the calling user + personaId?: string; + // Skip AI judge evaluation (faster, just checks execution success) + skipJudge?: boolean; +} + +/** + * Factory function for creating ChallengeRunParams + */ +export const createChallengeRunParams = ( + context: JTAGContext, + sessionId: UUID, + data: { + // Specific challenge ID to run. If not provided, runs the next unbeaten challenge + challengeId?: string; + // Run challenge by sequence number (1-5) + challengeNumber?: number; + // Which AI persona runs the challenge. Defaults to the calling user + personaId?: string; + // Skip AI judge evaluation (faster, just checks execution success) + skipJudge?: boolean; + } +): ChallengeRunParams => createPayload(context, sessionId, { + challengeId: data.challengeId ?? '', + challengeNumber: data.challengeNumber ?? 0, + personaId: data.personaId ?? '', + skipJudge: data.skipJudge ?? false, + ...data +}); + +/** + * Challenge Run Command Result + */ +export interface ChallengeRunResult extends CommandResult { + success: boolean; + // Name of the challenge that was run + challengeName: string; + // Challenge difficulty level + difficulty: string; + // Attempt outcome: passed, failed, partial, timeout, error + status: string; + // Judge score from 0-100 + score: number; + // Judge feedback on the attempt + feedback: string; + // Total execution time in milliseconds + durationMs: number; + // Number of tool calls consumed + toolCallsUsed: number; + // Files modified during the attempt + filesModified: string[]; + // Files created during the attempt + filesCreated: string[]; + // Errors encountered during execution + errors: string[]; + error?: JTAGError; +} + +/** + * Factory function for creating ChallengeRunResult with defaults + */ +export const createChallengeRunResult = ( + context: JTAGContext, + sessionId: UUID, + data: { + success: boolean; + // Name of the challenge that was run + challengeName?: string; + // Challenge difficulty level + difficulty?: string; + // Attempt outcome: passed, failed, partial, timeout, error + status?: string; + // Judge score from 0-100 + score?: number; + // Judge feedback on the attempt + feedback?: string; + // Total execution time in milliseconds + durationMs?: number; + // Number of tool calls consumed + toolCallsUsed?: number; + // Files modified during the attempt + filesModified?: string[]; + // Files created during the attempt + filesCreated?: string[]; + // Errors encountered during execution + errors?: string[]; + error?: JTAGError; + } +): ChallengeRunResult => createPayload(context, sessionId, { + challengeName: data.challengeName ?? '', + difficulty: data.difficulty ?? '', + status: data.status ?? '', + score: data.score ?? 0, + feedback: data.feedback ?? '', + durationMs: data.durationMs ?? 0, + toolCallsUsed: data.toolCallsUsed ?? 0, + filesModified: data.filesModified ?? [], + filesCreated: data.filesCreated ?? [], + errors: data.errors ?? [], + ...data +}); + +/** + * Smart Challenge Run-specific inheritance from params + * Auto-inherits context and sessionId from params + * Must provide all required result fields + */ +export const createChallengeRunResultFromParams = ( + params: ChallengeRunParams, + differences: Omit +): ChallengeRunResult => transformPayload(params, differences); + +/** + * Challenge Run β€” Type-safe command executor + * + * Usage: + * import { ChallengeRun } from '...shared/ChallengeRunTypes'; + * const result = await ChallengeRun.execute({ ... }); + */ +export const ChallengeRun = { + execute(params: CommandInput): Promise { + return Commands.execute('challenge/run', params as Partial); + }, + commandName: 'challenge/run' as const, +} as const; diff --git a/src/debug/jtag/commands/challenge/run/test/integration/ChallengeRunIntegration.test.ts b/src/debug/jtag/commands/challenge/run/test/integration/ChallengeRunIntegration.test.ts new file mode 100644 index 000000000..d23febfce --- /dev/null +++ b/src/debug/jtag/commands/challenge/run/test/integration/ChallengeRunIntegration.test.ts @@ -0,0 +1,196 @@ +#!/usr/bin/env tsx +/** + * ChallengeRun Command Integration Tests + * + * Tests Challenge Run command against the LIVE RUNNING SYSTEM. + * This is NOT a mock test - it tests real commands, real events, real widgets. + * + * Generated by: ./jtag generate + * Run with: npx tsx commands/Challenge Run/test/integration/ChallengeRunIntegration.test.ts + * + * PREREQUISITES: + * - Server must be running: npm start (wait 90+ seconds) + * - Browser client connected via http://localhost:9003 + */ + +import { jtag } from '@server/server-index'; + +console.log('πŸ§ͺ ChallengeRun Command Integration Tests'); + +function assert(condition: boolean, message: string): void { + if (!condition) { + throw new Error(`❌ Assertion failed: ${message}`); + } + console.log(`βœ… ${message}`); +} + +/** + * Test 1: Connect to live system + */ +async function testSystemConnection(): Promise>> { + console.log('\nπŸ”Œ Test 1: Connecting to live JTAG system'); + + const client = await jtag.connect(); + + assert(client !== null, 'Connected to live system'); + console.log(' βœ… Connected successfully'); + + return client; +} + +/** + * Test 2: Execute Challenge Run command on live system + */ +async function testCommandExecution(client: Awaited>): Promise { + console.log('\n⚑ Test 2: Executing Challenge Run command'); + + // TODO: Replace with your actual command parameters + const result = await client.commands['Challenge Run']({ + // Add your required parameters here + // Example: name: 'test-value' + }); + + console.log(' πŸ“Š Result:', JSON.stringify(result, null, 2)); + + assert(result !== null, 'Challenge Run returned result'); + // TODO: Add assertions for your specific result fields + // assert(result.success === true, 'Challenge Run succeeded'); + // assert(result.yourField !== undefined, 'Result has yourField'); +} + +/** + * Test 3: Validate required parameters + */ +async function testRequiredParameters(_client: Awaited>): Promise { + console.log('\n🚨 Test 3: Testing required parameter validation'); + + // TODO: Uncomment and test missing required parameters + // try { + // await _client.commands['Challenge Run']({ + // // Missing required param + // }); + // assert(false, 'Should have thrown validation error'); + // } catch (error) { + // assert((error as Error).message.includes('required'), 'Error mentions required parameter'); + // console.log(' βœ… ValidationError thrown correctly'); + // } + + console.log(' ⚠️ TODO: Add required parameter validation test'); +} + +/** + * Test 4: Test optional parameters + */ +async function testOptionalParameters(_client: Awaited>): Promise { + console.log('\nπŸ”§ Test 4: Testing optional parameters'); + + // TODO: Uncomment to test with and without optional parameters + // const withOptional = await client.commands['Challenge Run']({ + // requiredParam: 'test', + // optionalParam: true + // }); + // + // const withoutOptional = await client.commands['Challenge Run']({ + // requiredParam: 'test' + // }); + // + // assert(withOptional.success === true, 'Works with optional params'); + // assert(withoutOptional.success === true, 'Works without optional params'); + + console.log(' ⚠️ TODO: Add optional parameter tests'); +} + +/** + * Test 5: Performance test + */ +async function testPerformance(_client: Awaited>): Promise { + console.log('\n⚑ Test 5: Performance under load'); + + // TODO: Uncomment to test command performance + // const iterations = 10; + // const times: number[] = []; + // + // for (let i = 0; i < iterations; i++) { + // const start = Date.now(); + // await _client.commands['Challenge Run']({ /* params */ }); + // times.push(Date.now() - start); + // } + // + // const avg = times.reduce((a, b) => a + b, 0) / iterations; + // const max = Math.max(...times); + // + // console.log(` Average: ${avg.toFixed(2)}ms`); + // console.log(` Max: ${max}ms`); + // + // assert(avg < 500, `Average ${avg.toFixed(2)}ms under 500ms`); + // assert(max < 1000, `Max ${max}ms under 1000ms`); + + console.log(' ⚠️ TODO: Add performance test'); +} + +/** + * Test 6: Widget/Event integration (if applicable) + */ +async function testWidgetIntegration(_client: Awaited>): Promise { + console.log('\n🎨 Test 6: Widget/Event integration'); + + // TODO: Uncomment if your command emits events or updates widgets + // Example: + // const before = await client.commands['debug/widget-state']({ widgetSelector: 'your-widget' }); + // await client.commands['Challenge Run']({ /* params */ }); + // await new Promise(resolve => setTimeout(resolve, 1000)); // Wait for event propagation + // const after = await client.commands['debug/widget-state']({ widgetSelector: 'your-widget' }); + // + // assert(after.state.someValue !== before.state.someValue, 'Widget state updated'); + + console.log(' ⚠️ TODO: Add widget/event integration test (if applicable)'); +} + +/** + * Run all integration tests + */ +async function runAllChallengeRunIntegrationTests(): Promise { + console.log('πŸš€ Starting ChallengeRun Integration Tests\n'); + console.log('πŸ“‹ Testing against LIVE system (not mocks)\n'); + + try { + const client = await testSystemConnection(); + await testCommandExecution(client); + await testRequiredParameters(client); + await testOptionalParameters(client); + await testPerformance(client); + await testWidgetIntegration(client); + + console.log('\nπŸŽ‰ ALL ChallengeRun INTEGRATION TESTS PASSED!'); + console.log('πŸ“‹ Validated:'); + console.log(' βœ… Live system connection'); + console.log(' βœ… Command execution on real system'); + console.log(' βœ… Parameter validation'); + console.log(' βœ… Optional parameter handling'); + console.log(' βœ… Performance benchmarks'); + console.log(' βœ… Widget/Event integration'); + console.log('\nπŸ’‘ NOTE: This test uses the REAL running system'); + console.log(' - Real database operations'); + console.log(' - Real event propagation'); + console.log(' - Real widget updates'); + console.log(' - Real cross-daemon communication'); + + } catch (error) { + console.error('\n❌ ChallengeRun integration tests failed:', (error as Error).message); + if ((error as Error).stack) { + console.error((error as Error).stack); + } + console.error('\nπŸ’‘ Make sure:'); + console.error(' 1. Server is running: npm start'); + console.error(' 2. Wait 90+ seconds for deployment'); + console.error(' 3. Browser is connected to http://localhost:9003'); + process.exit(1); + } +} + +// Run if called directly +if (require.main === module) { + void runAllChallengeRunIntegrationTests(); +} else { + module.exports = { runAllChallengeRunIntegrationTests }; +} diff --git a/src/debug/jtag/commands/challenge/run/test/unit/ChallengeRunCommand.test.ts b/src/debug/jtag/commands/challenge/run/test/unit/ChallengeRunCommand.test.ts new file mode 100644 index 000000000..bc8c01289 --- /dev/null +++ b/src/debug/jtag/commands/challenge/run/test/unit/ChallengeRunCommand.test.ts @@ -0,0 +1,259 @@ +#!/usr/bin/env tsx +/** + * ChallengeRun Command Unit Tests + * + * Tests Challenge Run command logic in isolation using mock dependencies. + * This is a REFERENCE EXAMPLE showing best practices for command testing. + * + * Generated by: ./jtag generate + * Run with: npx tsx commands/Challenge Run/test/unit/ChallengeRunCommand.test.ts + * + * NOTE: This is a self-contained test (no external test utilities needed). + * Use this as a template for your own command tests. + */ + +// import { ValidationError } from '@system/core/types/ErrorTypes'; // Uncomment when adding validation tests +import { generateUUID } from '@system/core/types/CrossPlatformUUID'; +import type { ChallengeRunParams, ChallengeRunResult } from '../../shared/ChallengeRunTypes'; + +console.log('πŸ§ͺ ChallengeRun Command Unit Tests'); + +function assert(condition: boolean, message: string): void { + if (!condition) { + throw new Error(`❌ Assertion failed: ${message}`); + } + console.log(`βœ… ${message}`); +} + +/** + * Mock command that implements Challenge Run logic for testing + */ +async function mockChallengeRunCommand(params: ChallengeRunParams): Promise { + // TODO: Validate required parameters (BEST PRACTICE) + // Example: + // if (!params.requiredParam || params.requiredParam.trim() === '') { + // throw new ValidationError( + // 'requiredParam', + // `Missing required parameter 'requiredParam'. ` + + // `Use the help tool with 'Challenge Run' or see the Challenge Run README for usage information.` + // ); + // } + + // TODO: Handle optional parameters with sensible defaults + // const optionalParam = params.optionalParam ?? defaultValue; + + // TODO: Implement your command logic here + return { + success: true, + // TODO: Add your result fields with actual computed values + context: params.context, + sessionId: params.sessionId + } as ChallengeRunResult; +} + +/** + * Test 1: Command structure validation + */ +function testChallengeRunCommandStructure(): void { + console.log('\nπŸ“‹ Test 1: ChallengeRun command structure validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Create valid params for Challenge Run command + const validParams: ChallengeRunParams = { + // TODO: Add your required parameters here + context, + sessionId + }; + + // Validate param structure + assert(validParams.context !== undefined, 'Params have context'); + assert(validParams.sessionId !== undefined, 'Params have sessionId'); + // TODO: Add assertions for your specific parameters + // assert(typeof validParams.requiredParam === 'string', 'requiredParam is string'); +} + +/** + * Test 2: Mock command execution + */ +async function testMockChallengeRunExecution(): Promise { + console.log('\n⚑ Test 2: Mock Challenge Run command execution'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Test mock execution + const params: ChallengeRunParams = { + // TODO: Add your parameters here + context, + sessionId + }; + + const result = await mockChallengeRunCommand(params); + + // Validate result structure + assert(result.success === true, 'Mock result shows success'); + // TODO: Add assertions for your result fields + // assert(typeof result.yourField === 'string', 'yourField is string'); +} + +/** + * Test 3: Required parameter validation (CRITICAL) + * + * This test ensures your command throws ValidationError + * when required parameters are missing (BEST PRACTICE) + */ +async function testChallengeRunRequiredParams(): Promise { + console.log('\n🚨 Test 3: Required parameter validation'); + + // TODO: Uncomment when implementing validation + // const context = { environment: 'server' as const }; + // const sessionId = generateUUID(); + + // TODO: Test cases that should throw ValidationError + // Example: + // const testCases = [ + // { params: {} as ChallengeRunParams, desc: 'Missing requiredParam' }, + // { params: { requiredParam: '' } as ChallengeRunParams, desc: 'Empty requiredParam' }, + // ]; + // + // for (const testCase of testCases) { + // try { + // await mockChallengeRunCommand({ ...testCase.params, context, sessionId }); + // throw new Error(`Should have thrown ValidationError for: ${testCase.desc}`); + // } catch (error) { + // if (error instanceof ValidationError) { + // assert(error.field === 'requiredParam', `ValidationError field is 'requiredParam' for: ${testCase.desc}`); + // assert(error.message.includes('required parameter'), `Error message mentions 'required parameter' for: ${testCase.desc}`); + // assert(error.message.includes('help tool'), `Error message is tool-agnostic for: ${testCase.desc}`); + // } else { + // throw error; // Re-throw if not ValidationError + // } + // } + // } + + console.log('βœ… All required parameter validations work correctly'); +} + +/** + * Test 4: Optional parameter handling + */ +async function testChallengeRunOptionalParams(): Promise { + console.log('\nπŸ”§ Test 4: Optional parameter handling'); + + // TODO: Uncomment when implementing optional param tests + // const context = { environment: 'server' as const }; + // const sessionId = generateUUID(); + + // TODO: Test WITHOUT optional param (should use default) + // const paramsWithoutOptional: ChallengeRunParams = { + // requiredParam: 'test', + // context, + // sessionId + // }; + // + // const resultWithoutOptional = await mockChallengeRunCommand(paramsWithoutOptional); + // assert(resultWithoutOptional.success === true, 'Command succeeds without optional params'); + + // TODO: Test WITH optional param + // const paramsWithOptional: ChallengeRunParams = { + // requiredParam: 'test', + // optionalParam: true, + // context, + // sessionId + // }; + // + // const resultWithOptional = await mockChallengeRunCommand(paramsWithOptional); + // assert(resultWithOptional.success === true, 'Command succeeds with optional params'); + + console.log('βœ… Optional parameter handling validated'); +} + +/** + * Test 5: Performance validation + */ +async function testChallengeRunPerformance(): Promise { + console.log('\n⚑ Test 5: ChallengeRun performance validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + const startTime = Date.now(); + + await mockChallengeRunCommand({ + // TODO: Add your parameters + context, + sessionId + } as ChallengeRunParams); + + const executionTime = Date.now() - startTime; + + assert(executionTime < 100, `ChallengeRun completed in ${executionTime}ms (under 100ms limit)`); +} + +/** + * Test 6: Result structure validation + */ +async function testChallengeRunResultStructure(): Promise { + console.log('\nπŸ” Test 6: ChallengeRun result structure validation'); + + const context = { environment: 'server' as const }; + const sessionId = generateUUID(); + + // Test various scenarios + const basicResult = await mockChallengeRunCommand({ + // TODO: Add your parameters + context, + sessionId + } as ChallengeRunParams); + + assert(basicResult.success === true, 'Result has success field'); + // TODO: Add assertions for your result fields + // assert(typeof basicResult.yourField === 'string', 'Result has yourField (string)'); + assert(basicResult.context === context, 'Result includes context'); + assert(basicResult.sessionId === sessionId, 'Result includes sessionId'); + + console.log('βœ… All result structure validations pass'); +} + +/** + * Run all unit tests + */ +async function runAllChallengeRunUnitTests(): Promise { + console.log('πŸš€ Starting ChallengeRun Command Unit Tests\n'); + + try { + testChallengeRunCommandStructure(); + await testMockChallengeRunExecution(); + await testChallengeRunRequiredParams(); + await testChallengeRunOptionalParams(); + await testChallengeRunPerformance(); + await testChallengeRunResultStructure(); + + console.log('\nπŸŽ‰ ALL ChallengeRun UNIT TESTS PASSED!'); + console.log('πŸ“‹ Validated:'); + console.log(' βœ… Command structure and parameter validation'); + console.log(' βœ… Mock command execution patterns'); + console.log(' βœ… Required parameter validation (throws ValidationError)'); + console.log(' βœ… Optional parameter handling (sensible defaults)'); + console.log(' βœ… Performance requirements (< 100ms)'); + console.log(' βœ… Result structure validation'); + console.log('\nπŸ“ This is a REFERENCE EXAMPLE - use as a template for your commands!'); + console.log('πŸ’‘ TIP: Copy this test structure and modify for your command logic'); + + } catch (error) { + console.error('\n❌ ChallengeRun unit tests failed:', (error as Error).message); + if ((error as Error).stack) { + console.error((error as Error).stack); + } + process.exit(1); + } +} + +// Run if called directly +if (require.main === module) { + void runAllChallengeRunUnitTests(); +} else { + module.exports = { runAllChallengeRunUnitTests }; +} diff --git a/src/debug/jtag/commands/code/git/README.md b/src/debug/jtag/commands/code/git/README.md new file mode 100644 index 000000000..e87c0688d --- /dev/null +++ b/src/debug/jtag/commands/code/git/README.md @@ -0,0 +1,50 @@ +# code/git + +Workspace-scoped git operations for the coding agent pipeline. All operations route through the Rust IPC backend for per-persona workspace isolation. + +## Operations + +| Operation | Description | Required Params | +|-----------|-------------|-----------------| +| `status` | Show workspace git status | - | +| `diff` | Show uncommitted changes | `staged?` | +| `log` | Show recent commits | `count?` | +| `add` | Stage files for commit | `paths` | +| `commit` | Create a commit | `message` | +| `push` | Push to remote | `remote?`, `branch?` | + +## Usage + +```bash +# Check workspace status +./jtag code/git --userId="persona-id" --operation=status + +# View changes +./jtag code/git --userId="persona-id" --operation=diff +./jtag code/git --userId="persona-id" --operation=diff --staged=true + +# View history +./jtag code/git --userId="persona-id" --operation=log --count=5 + +# Stage and commit +./jtag code/git --userId="persona-id" --operation=add --paths='["."]' +./jtag code/git --userId="persona-id" --operation=commit --message="Add feature" + +# Push (requires system tier in coding pipeline) +./jtag code/git --userId="persona-id" --operation=push +``` + +## Security Tiers + +- `status`, `diff`, `log`: Read tier (read-only operations) +- `add`, `commit`: Write tier (modifies repository state) +- `push`: Write tier via CLI; system tier when used in coding pipeline plans + +## Programmatic Usage + +```typescript +import { CodeGit } from './shared/CodeGitTypes'; + +const status = await CodeGit.execute({ userId: 'persona-id', operation: 'status' }); +console.log(status.status?.branch, status.status?.modified); +``` diff --git a/src/debug/jtag/commands/code/git/browser/CodeGitBrowserCommand.ts b/src/debug/jtag/commands/code/git/browser/CodeGitBrowserCommand.ts new file mode 100644 index 000000000..80be3536a --- /dev/null +++ b/src/debug/jtag/commands/code/git/browser/CodeGitBrowserCommand.ts @@ -0,0 +1,20 @@ +/** + * Code Git Command - Browser Implementation + * + * Workspace-scoped git operations for the coding agent pipeline. Operations: status, diff, log, add, commit, push. + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import type { CodeGitParams, CodeGitResult } from '../shared/CodeGitTypes'; + +export class CodeGitBrowserCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('code/git', context, subpath, commander); + } + + async execute(params: CodeGitParams): Promise { + return await this.remoteExecute(params); + } +} diff --git a/src/debug/jtag/commands/code/git/package.json b/src/debug/jtag/commands/code/git/package.json new file mode 100644 index 000000000..15fa821c0 --- /dev/null +++ b/src/debug/jtag/commands/code/git/package.json @@ -0,0 +1,34 @@ +{ + "name": "@jtag-commands/code/git", + "version": "1.0.0", + "description": "Workspace-scoped git operations for the coding agent pipeline. Operations: status, diff, log, add, commit, push. All operations route through Rust IPC for per-persona isolation.", + "main": "server/CodeGitServerCommand.ts", + "types": "shared/CodeGitTypes.ts", + "scripts": { + "test": "npm run test:unit", + "test:unit": "npx vitest run test/unit/*.test.ts", + "lint": "npx eslint **/*.ts", + "typecheck": "npx tsc --noEmit" + }, + "peerDependencies": { + "@jtag/core": "*" + }, + "files": [ + "shared/**/*.ts", + "browser/**/*.ts", + "server/**/*.ts", + "test/**/*.ts", + "README.md" + ], + "keywords": [ + "jtag", + "command", + "code/git" + ], + "license": "MIT", + "author": "", + "repository": { + "type": "git", + "url": "" + } +} diff --git a/src/debug/jtag/commands/code/git/server/CodeGitServerCommand.ts b/src/debug/jtag/commands/code/git/server/CodeGitServerCommand.ts new file mode 100644 index 000000000..6496aa01a --- /dev/null +++ b/src/debug/jtag/commands/code/git/server/CodeGitServerCommand.ts @@ -0,0 +1,155 @@ +/** + * Code Git Command - Server Implementation + * + * Routes git operations through the Rust IPC backend for workspace isolation. + * All operations are scoped to the persona's registered workspace. + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import { ValidationError } from '@system/core/types/ErrorTypes'; +import type { CodeGitParams, CodeGitResult, GitOperation } from '../shared/CodeGitTypes'; +import { createCodeGitResultFromParams } from '../shared/CodeGitTypes'; +import { CodeDaemon } from '@daemons/code-daemon/shared/CodeDaemon'; + +const VALID_OPERATIONS = new Set(['status', 'diff', 'log', 'add', 'commit', 'push']); + +export class CodeGitServerCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('code/git', context, subpath, commander); + } + + async execute(params: CodeGitParams): Promise { + // Validate required parameters + if (!params.userId) { + throw new ValidationError( + 'userId', + 'Missing required parameter userId (persona/workspace handle).' + ); + } + + if (!params.operation || !VALID_OPERATIONS.has(params.operation)) { + throw new ValidationError( + 'operation', + `Invalid operation '${params.operation ?? ''}'. Must be one of: ${Array.from(VALID_OPERATIONS).join(', ')}` + ); + } + + const operation = params.operation as GitOperation; + + switch (operation) { + case 'status': + return this.handleStatus(params); + case 'diff': + return this.handleDiff(params); + case 'log': + return this.handleLog(params); + case 'add': + return this.handleAdd(params); + case 'commit': + return this.handleCommit(params); + case 'push': + return this.handlePush(params); + } + } + + private async handleStatus(params: CodeGitParams): Promise { + const statusInfo = await CodeDaemon.workspaceGitStatus(params.userId); + + const totalChanges = statusInfo.modified.length + statusInfo.added.length + + statusInfo.deleted.length + statusInfo.untracked.length; + + return createCodeGitResultFromParams(params, { + success: statusInfo.success, + operation: 'status', + status: { + branch: statusInfo.branch, + modified: statusInfo.modified, + added: statusInfo.added, + deleted: statusInfo.deleted, + untracked: statusInfo.untracked, + }, + summary: statusInfo.success + ? `Branch: ${statusInfo.branch ?? 'unknown'}, ${totalChanges} changed file(s)` + : `Git status failed: ${statusInfo.error ?? 'unknown error'}`, + }); + } + + private async handleDiff(params: CodeGitParams): Promise { + const result = await CodeDaemon.workspaceGitDiff(params.userId, params.staged ?? false); + + return createCodeGitResultFromParams(params, { + success: result.success, + operation: 'diff', + diff: result.diff, + summary: result.diff + ? `${result.diff.split('\n').length} lines of diff output` + : 'No changes', + }); + } + + private async handleLog(params: CodeGitParams): Promise { + const result = await CodeDaemon.workspaceGitLog(params.userId, params.count ?? 10); + + return createCodeGitResultFromParams(params, { + success: result.success, + operation: 'log', + log: result.log, + summary: result.log + ? `${result.log.trim().split('\n').length} commit(s)` + : 'No commits', + }); + } + + private async handleAdd(params: CodeGitParams): Promise { + if (!params.paths || params.paths.length === 0) { + throw new ValidationError( + 'paths', + 'The add operation requires at least one path. Use ["."] to stage all changes.' + ); + } + + const result = await CodeDaemon.workspaceGitAdd(params.userId, params.paths); + + return createCodeGitResultFromParams(params, { + success: true, + operation: 'add', + staged: result.staged, + summary: `Staged ${result.staged.length} path(s)`, + }); + } + + private async handleCommit(params: CodeGitParams): Promise { + if (!params.message || params.message.trim() === '') { + throw new ValidationError( + 'message', + 'The commit operation requires a non-empty message.' + ); + } + + const result = await CodeDaemon.workspaceGitCommit(params.userId, params.message.trim()); + + return createCodeGitResultFromParams(params, { + success: true, + operation: 'commit', + commitHash: result.hash, + summary: `Committed: ${result.hash.substring(0, 8)}`, + }); + } + + private async handlePush(params: CodeGitParams): Promise { + const result = await CodeDaemon.workspaceGitPush( + params.userId, + params.remote ?? 'origin', + params.branch ?? '' + ); + + return createCodeGitResultFromParams(params, { + success: true, + operation: 'push', + pushOutput: result.output, + summary: `Pushed to ${params.remote ?? 'origin'}${params.branch ? '/' + params.branch : ''}`, + }); + } +} diff --git a/src/debug/jtag/commands/code/git/shared/CodeGitTypes.ts b/src/debug/jtag/commands/code/git/shared/CodeGitTypes.ts new file mode 100644 index 000000000..e63e144b2 --- /dev/null +++ b/src/debug/jtag/commands/code/git/shared/CodeGitTypes.ts @@ -0,0 +1,146 @@ +/** + * Code Git Command - Shared Types + * + * Workspace-scoped git operations for the coding agent pipeline. + * Operations: status, diff, log, add, commit, push. + * All operations are routed through the Rust IPC backend for per-persona workspace isolation. + */ + +import type { CommandParams, CommandResult, CommandInput, JTAGContext } from '@system/core/types/JTAGTypes'; +import { createPayload, transformPayload } from '@system/core/types/JTAGTypes'; +import { Commands } from '@system/core/shared/Commands'; +import type { JTAGError } from '@system/core/types/ErrorTypes'; +import type { UUID } from '@system/core/types/CrossPlatformUUID'; + +/** + * Supported git operations. + */ +export type GitOperation = 'status' | 'diff' | 'log' | 'add' | 'commit' | 'push'; + +/** + * Code Git Command Parameters + */ +export interface CodeGitParams extends CommandParams { + /** Persona/workspace handle */ + userId: string; + /** Git operation to perform */ + operation: string; + /** File paths to stage (for 'add' operation) */ + paths?: string[]; + /** Commit message (for 'commit' operation) */ + message?: string; + /** Remote name (for 'push' operation, default: 'origin') */ + remote?: string; + /** Branch name (for 'push' operation) */ + branch?: string; + /** Show staged changes (for 'diff' operation) */ + staged?: boolean; + /** Number of commits to show (for 'log' operation, default: 10) */ + count?: number; +} + +/** + * Factory function for creating CodeGitParams + */ +export const createCodeGitParams = ( + context: JTAGContext, + sessionId: UUID, + data: { + userId: string; + operation: string; + paths?: string[]; + message?: string; + remote?: string; + branch?: string; + staged?: boolean; + count?: number; + } +): CodeGitParams => createPayload(context, sessionId, { + paths: data.paths ?? [], + message: data.message ?? '', + remote: data.remote ?? '', + branch: data.branch ?? '', + staged: data.staged ?? false, + count: data.count ?? 0, + ...data +}); + +/** + * Git status information + */ +export interface GitStatusInfo { + branch?: string; + modified: string[]; + added: string[]; + deleted: string[]; + untracked: string[]; +} + +/** + * Code Git Command Result + */ +export interface CodeGitResult extends CommandResult { + success: boolean; + /** Which operation was performed */ + operation: string; + /** Git status info (for 'status' operation) */ + status?: GitStatusInfo; + /** Diff output (for 'diff' operation) */ + diff?: string; + /** Log output (for 'log' operation) */ + log?: string; + /** Staged file paths (for 'add' operation) */ + staged?: string[]; + /** Commit hash (for 'commit' operation) */ + commitHash?: string; + /** Push output (for 'push' operation) */ + pushOutput?: string; + /** Human-readable summary */ + summary: string; + error?: JTAGError; +} + +/** + * Factory function for creating CodeGitResult with defaults + */ +export const createCodeGitResult = ( + context: JTAGContext, + sessionId: UUID, + data: { + success: boolean; + operation: string; + status?: GitStatusInfo; + diff?: string; + log?: string; + staged?: string[]; + commitHash?: string; + pushOutput?: string; + summary?: string; + error?: JTAGError; + } +): CodeGitResult => createPayload(context, sessionId, { + summary: data.summary ?? '', + ...data +}); + +/** + * Smart result inheritance from params + */ +export const createCodeGitResultFromParams = ( + params: CodeGitParams, + differences: Omit +): CodeGitResult => transformPayload(params, differences); + +/** + * Code Git - Type-safe command executor + * + * Usage: + * import { CodeGit } from '...shared/CodeGitTypes'; + * const result = await CodeGit.execute({ userId: 'persona-id', operation: 'status' }); + */ +export const CodeGit = { + execute(params: CommandInput): Promise { + return Commands.execute('code/git', params as Partial); + }, + commandName: 'code/git' as const, +} as const; diff --git a/src/debug/jtag/commands/code/task/server/CodeTaskServerCommand.ts b/src/debug/jtag/commands/code/task/server/CodeTaskServerCommand.ts index 4c53d08f4..241397011 100644 --- a/src/debug/jtag/commands/code/task/server/CodeTaskServerCommand.ts +++ b/src/debug/jtag/commands/code/task/server/CodeTaskServerCommand.ts @@ -47,6 +47,19 @@ export class CodeTaskServerCommand extends CommandBase createPayload(context, sessionId, { taskType: data.taskType ?? '', @@ -64,6 +72,8 @@ export const createCodeTaskParams = ( delegationEnabled: data.delegationEnabled ?? false, maxDurationMs: data.maxDurationMs ?? 0, maxToolCalls: data.maxToolCalls ?? 0, + workspaceMode: data.workspaceMode ?? '', + sparsePaths: data.sparsePaths ?? [], ...data }); diff --git a/src/debug/jtag/commands/code/verify/README.md b/src/debug/jtag/commands/code/verify/README.md new file mode 100644 index 000000000..513c24b29 --- /dev/null +++ b/src/debug/jtag/commands/code/verify/README.md @@ -0,0 +1,69 @@ +# code/verify + +Run TypeScript compilation checks and optionally execute tests against a persona workspace. Returns structured errors with file, line, column, and message. + +## Usage + +```bash +# Check TypeScript compilation in persona workspace +./jtag code/verify --userId="persona-uuid" + +# Check with explicit working directory +./jtag code/verify --userId="persona-uuid" --cwd="/path/to/workspace" + +# Skip type checking, only run tests +./jtag code/verify --userId="persona-uuid" --typeCheck=false --testFiles='["tests/unit/foo.test.ts"]' + +# Type check + run specific tests +./jtag code/verify --userId="persona-uuid" --testFiles='["tests/unit/foo.test.ts"]' +``` + +## Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `userId` | string | *required* | Persona ID or workspace handle | +| `typeCheck` | boolean | `true` | Run TypeScript compilation check | +| `testFiles` | string[] | `[]` | Test files to run via vitest | +| `cwd` | string | *auto* | Working directory override | + +## Result + +```typescript +{ + success: boolean; + typeCheck?: { + passed: boolean; + errorCount: number; + errors: TypeScriptError[]; + }; + tests?: { + passed: boolean; + total: number; + passedCount: number; + failedCount: number; + failures: string[]; + }; + durationMs: number; + output: string; +} +``` + +## TypeScript Error Format + +```typescript +{ + file: "src/utils.ts", + line: 42, + column: 5, + code: "TS2345", + message: "Argument of type 'string' is not assignable to parameter of type 'number'." +} +``` + +## Security + +- Uses `ExecutionSandbox` for process isolation (restricted PATH, timeout enforcement) +- Allowed commands: `npx tsc`, `npx vitest` (via sandbox allowlist) +- No file modification β€” verification is read-only +- Available at **write** security tier (same tier as code/edit) diff --git a/src/debug/jtag/commands/code/verify/browser/CodeVerifyBrowserCommand.ts b/src/debug/jtag/commands/code/verify/browser/CodeVerifyBrowserCommand.ts new file mode 100644 index 000000000..e229c84e3 --- /dev/null +++ b/src/debug/jtag/commands/code/verify/browser/CodeVerifyBrowserCommand.ts @@ -0,0 +1,22 @@ +/** + * Code Verify Command - Browser Implementation + * + * Run TypeScript compilation checks and optionally execute tests against a persona workspace. + * Delegates to server β€” verification requires file system access and process execution. + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import type { CodeVerifyParams, CodeVerifyResult } from '../shared/CodeVerifyTypes'; + +export class CodeVerifyBrowserCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('code/verify', context, subpath, commander); + } + + async execute(params: CodeVerifyParams): Promise { + console.log('🌐 BROWSER: Delegating Code Verify to server'); + return await this.remoteExecute(params); + } +} diff --git a/src/debug/jtag/commands/code/verify/package.json b/src/debug/jtag/commands/code/verify/package.json new file mode 100644 index 000000000..0e28b7dc0 --- /dev/null +++ b/src/debug/jtag/commands/code/verify/package.json @@ -0,0 +1,34 @@ +{ + "name": "@jtag-commands/code/verify", + "version": "1.0.0", + "description": "Run TypeScript compilation checks and optionally execute tests against a persona workspace. Returns structured errors with file, line, column, and message.", + "main": "server/CodeVerifyServerCommand.ts", + "types": "shared/CodeVerifyTypes.ts", + "scripts": { + "test": "npm run test:unit", + "test:unit": "npx vitest run test/unit/*.test.ts", + "lint": "npx eslint **/*.ts", + "typecheck": "npx tsc --noEmit" + }, + "peerDependencies": { + "@jtag/core": "*" + }, + "files": [ + "shared/**/*.ts", + "browser/**/*.ts", + "server/**/*.ts", + "test/**/*.ts", + "README.md" + ], + "keywords": [ + "jtag", + "command", + "code/verify" + ], + "license": "MIT", + "author": "", + "repository": { + "type": "git", + "url": "" + } +} diff --git a/src/debug/jtag/commands/code/verify/server/CodeVerifyServerCommand.ts b/src/debug/jtag/commands/code/verify/server/CodeVerifyServerCommand.ts new file mode 100644 index 000000000..f69fe8691 --- /dev/null +++ b/src/debug/jtag/commands/code/verify/server/CodeVerifyServerCommand.ts @@ -0,0 +1,250 @@ +/** + * Code Verify Command - Server Implementation + * + * Runs TypeScript compilation checks and optionally executes tests + * via ExecutionSandbox (process-isolated, timeout-enforced). + * + * Workspace resolution: + * - If `cwd` param is provided, use it directly + * - Otherwise, resolve from userId: {jtagRoot}/.continuum/personas/{userId}/workspace/ + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import { ValidationError } from '@system/core/types/ErrorTypes'; +import type { CodeVerifyParams, CodeVerifyResult, TypeScriptError, TestResult } from '../shared/CodeVerifyTypes'; +import { createCodeVerifyResultFromParams } from '../shared/CodeVerifyTypes'; +import { ExecutionSandbox } from '@system/code/server/ExecutionSandbox'; +import type { SandboxResult } from '@system/code/server/ExecutionSandbox'; +import * as path from 'path'; +import * as fs from 'fs'; + +/** TypeScript error regex: file(line,col): error TSxxxx: message */ +const TS_ERROR_REGEX = /^(.+?)\((\d+),(\d+)\):\s*error\s+(TS\d+):\s*(.+)$/gm; + +export class CodeVerifyServerCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('code/verify', context, subpath, commander); + } + + async execute(params: CodeVerifyParams): Promise { + const startTime = Date.now(); + + if (!params.userId) { + throw new ValidationError('userId', 'Verification requires a userId (auto-injected for persona tool calls).'); + } + + const workspaceDir = this.resolveWorkspaceDir(params); + const sandbox = new ExecutionSandbox(); + const doTypeCheck = params.typeCheck !== false; + const doTests = params.testFiles && params.testFiles.length > 0; + + let typeCheckResult: CodeVerifyResult['typeCheck'] | undefined; + let testsResult: TestResult | undefined; + let output = ''; + let allPassed = true; + + // Phase 1: TypeScript compilation check + if (doTypeCheck) { + const tscResult = await this.runTypeCheck(sandbox, workspaceDir, params.userId); + const errors = this.parseTypeScriptErrors(tscResult.stdout + tscResult.stderr); + + typeCheckResult = { + passed: tscResult.success, + errorCount: errors.length, + errors, + }; + + output += tscResult.stdout + tscResult.stderr; + if (!tscResult.success) allPassed = false; + } + + // Phase 2: Test execution (optional) + if (doTests && params.testFiles) { + const testRunResult = await this.runTests(sandbox, workspaceDir, params.testFiles, params.userId); + testsResult = this.parseTestResult(testRunResult); + + output += '\n' + testRunResult.stdout + testRunResult.stderr; + if (!testsResult.passed) allPassed = false; + } + + const durationMs = Date.now() - startTime; + + return createCodeVerifyResultFromParams(params, { + success: allPassed, + typeCheck: typeCheckResult, + tests: testsResult, + durationMs, + output, + }); + } + + /** + * Resolve the workspace directory from params. + * Uses explicit cwd if provided, otherwise resolves from userId convention. + */ + private resolveWorkspaceDir(params: CodeVerifyParams): string { + if (params.cwd && params.cwd.trim()) { + return params.cwd; + } + + const jtagRoot = process.cwd(); + const personaId = params.userId!; + + // Standard persona workspace path + const workspaceDir = path.join(jtagRoot, '.continuum', 'personas', personaId, 'workspace'); + + if (fs.existsSync(workspaceDir)) { + return workspaceDir; + } + + // Fallback: check if userId is a challenge workspace handle (challenge-{id}-{personaId}) + if (personaId.startsWith('challenge-')) { + const parts = personaId.split('-'); + // Handle: challenge-{challengeId}-{personaId} + // The challengeId and personaId are UUIDs, so we need the full pattern + const challengeIdStart = 'challenge-'.length; + // Find the persona ID (last UUID in the handle) + const uuidLen = 36; // Standard UUID length + if (personaId.length > challengeIdStart + uuidLen + 1) { + const actualPersonaId = personaId.slice(-(uuidLen)); + const challengeId = personaId.slice(challengeIdStart, personaId.length - uuidLen - 1); + const challengeDir = path.join(jtagRoot, '.continuum', 'personas', actualPersonaId, 'challenges', challengeId); + if (fs.existsSync(challengeDir)) { + return challengeDir; + } + } + } + + // Last resort: use the standard workspace path even if it doesn't exist yet + return workspaceDir; + } + + /** + * Run TypeScript compilation check via ExecutionSandbox. + */ + private async runTypeCheck(sandbox: ExecutionSandbox, workspaceDir: string, personaId: string): Promise { + // Check if workspace has a tsconfig.json β€” if so, tsc uses it automatically + const hasTsConfig = fs.existsSync(path.join(workspaceDir, 'tsconfig.json')); + + const args = hasTsConfig + ? ['tsc', '--noEmit'] + : ['tsc', '--noEmit', '--strict', ...this.findTypeScriptFiles(workspaceDir)]; + + return sandbox.execute({ + command: 'npx', + args, + cwd: workspaceDir, + timeoutMs: 120_000, + maxOutputBytes: 102_400, + personaId: personaId as any, + }); + } + + /** + * Run test files via vitest in sandbox. + */ + private async runTests( + sandbox: ExecutionSandbox, + workspaceDir: string, + testFiles: string[], + personaId: string, + ): Promise { + return sandbox.execute({ + command: 'npx', + args: ['vitest', 'run', ...testFiles, '--reporter=json'], + cwd: workspaceDir, + timeoutMs: 120_000, + maxOutputBytes: 102_400, + personaId: personaId as any, + }); + } + + /** + * Find .ts files in workspace for compilation without tsconfig. + */ + private findTypeScriptFiles(workspaceDir: string): string[] { + const files: string[] = []; + try { + const entries = fs.readdirSync(workspaceDir, { withFileTypes: true }); + for (const entry of entries) { + if (entry.isFile() && entry.name.endsWith('.ts') && !entry.name.endsWith('.d.ts')) { + files.push(entry.name); + } + } + } catch { + // Directory doesn't exist or isn't readable + } + return files; + } + + /** + * Parse TypeScript compiler output into structured errors. + * Format: file(line,col): error TSxxxx: message + */ + private parseTypeScriptErrors(output: string): TypeScriptError[] { + const errors: TypeScriptError[] = []; + let match; + + // Reset regex state + TS_ERROR_REGEX.lastIndex = 0; + + while ((match = TS_ERROR_REGEX.exec(output)) !== null) { + errors.push({ + file: match[1], + line: parseInt(match[2], 10), + column: parseInt(match[3], 10), + code: match[4], + message: match[5], + }); + } + + return errors; + } + + /** + * Parse vitest JSON output into a TestResult. + */ + private parseTestResult(sandboxResult: SandboxResult): TestResult { + if (sandboxResult.timedOut) { + return { + passed: false, + total: 0, + passedCount: 0, + failedCount: 0, + failures: ['Test execution timed out'], + }; + } + + try { + // vitest --reporter=json outputs JSON to stdout + const json = JSON.parse(sandboxResult.stdout); + const numPassed = json.numPassedTests ?? 0; + const numFailed = json.numFailedTests ?? 0; + const total = json.numTotalTests ?? (numPassed + numFailed); + const failures = (json.testResults ?? []) + .flatMap((suite: any) => (suite.assertionResults ?? []) + .filter((t: any) => t.status === 'failed') + .map((t: any) => `${t.ancestorTitles?.join(' > ')} > ${t.title}: ${t.failureMessages?.[0] ?? 'Failed'}`) + ); + + return { + passed: numFailed === 0, + total, + passedCount: numPassed, + failedCount: numFailed, + failures, + }; + } catch { + // Non-JSON output β€” treat as failure + return { + passed: sandboxResult.success, + total: 0, + passedCount: 0, + failedCount: sandboxResult.success ? 0 : 1, + failures: sandboxResult.success ? [] : [sandboxResult.stderr || sandboxResult.stdout || 'Unknown test failure'], + }; + } + } +} diff --git a/src/debug/jtag/commands/code/verify/shared/CodeVerifyTypes.ts b/src/debug/jtag/commands/code/verify/shared/CodeVerifyTypes.ts new file mode 100644 index 000000000..19d1eab15 --- /dev/null +++ b/src/debug/jtag/commands/code/verify/shared/CodeVerifyTypes.ts @@ -0,0 +1,128 @@ +/** + * Code Verify Command - Shared Types + * + * Run TypeScript compilation checks and optionally execute tests against a persona workspace. + * Returns structured errors with file, line, column, and message for each issue found. + */ + +import type { CommandParams, CommandResult, CommandInput, JTAGContext } from '@system/core/types/JTAGTypes'; +import { createPayload, transformPayload } from '@system/core/types/JTAGTypes'; +import { Commands } from '@system/core/shared/Commands'; +import type { JTAGError } from '@system/core/types/ErrorTypes'; +import type { UUID } from '@system/core/types/CrossPlatformUUID'; + +/** + * Parsed TypeScript compilation error + */ +export interface TypeScriptError { + file: string; + line: number; + column: number; + code: string; + message: string; +} + +/** + * Code Verify Command Parameters + */ +export interface CodeVerifyParams extends CommandParams { + /** Run TypeScript compilation check (default: true) */ + typeCheck?: boolean; + /** Specific test files to run via vitest (optional) */ + testFiles?: string[]; + /** Working directory override β€” bypasses workspace resolution */ + cwd?: string; +} + +/** + * Factory function for creating CodeVerifyParams + */ +export const createCodeVerifyParams = ( + context: JTAGContext, + sessionId: UUID, + data: { + typeCheck?: boolean; + testFiles?: string[]; + cwd?: string; + } +): CodeVerifyParams => createPayload(context, sessionId, { + typeCheck: data.typeCheck ?? true, + testFiles: data.testFiles ?? [], + cwd: data.cwd ?? '', + ...data +}); + +/** + * Test execution result + */ +export interface TestResult { + passed: boolean; + total: number; + passedCount: number; + failedCount: number; + failures: string[]; +} + +/** + * Code Verify Command Result + */ +export interface CodeVerifyResult extends CommandResult { + success: boolean; + /** TypeScript compilation result (if typeCheck was requested) */ + typeCheck?: { + passed: boolean; + errorCount: number; + errors: TypeScriptError[]; + }; + /** Test execution result (if testFiles were specified) */ + tests?: TestResult; + /** Total verification time in milliseconds */ + durationMs: number; + /** Raw compiler/test output */ + output: string; + error?: JTAGError; +} + +/** + * Factory function for creating CodeVerifyResult with defaults + */ +export const createCodeVerifyResult = ( + context: JTAGContext, + sessionId: UUID, + data: { + success: boolean; + typeCheck?: CodeVerifyResult['typeCheck']; + tests?: TestResult; + durationMs?: number; + output?: string; + error?: JTAGError; + } +): CodeVerifyResult => createPayload(context, sessionId, { + durationMs: data.durationMs ?? 0, + output: data.output ?? '', + ...data +}); + +/** + * Smart Code Verify-specific inheritance from params + * Auto-inherits context and sessionId from params + * Must provide all required result fields + */ +export const createCodeVerifyResultFromParams = ( + params: CodeVerifyParams, + differences: Omit +): CodeVerifyResult => transformPayload(params, differences); + +/** + * Code Verify β€” Type-safe command executor + * + * Usage: + * import { CodeVerify } from '...shared/CodeVerifyTypes'; + * const result = await CodeVerify.execute({ typeCheck: true }); + */ +export const CodeVerify = { + execute(params: CommandInput): Promise { + return Commands.execute('code/verify', params as Partial); + }, + commandName: 'code/verify' as const, +} as const; diff --git a/src/debug/jtag/daemons/code-daemon/server/CodeDaemonServer.ts b/src/debug/jtag/daemons/code-daemon/server/CodeDaemonServer.ts index 5ebd52a14..b42078ad5 100644 --- a/src/debug/jtag/daemons/code-daemon/server/CodeDaemonServer.ts +++ b/src/debug/jtag/daemons/code-daemon/server/CodeDaemonServer.ts @@ -81,5 +81,21 @@ export async function initializeCodeDaemon(jtagContext: JTAGContext): Promise { + return await rustClient.codeGitLog(personaId, count); + }; + + CodeDaemon.workspaceGitAdd = async (personaId: string, paths: string[]) => { + return await rustClient.codeGitAdd(personaId, paths); + }; + + CodeDaemon.workspaceGitCommit = async (personaId: string, message: string) => { + return await rustClient.codeGitCommit(personaId, message); + }; + + CodeDaemon.workspaceGitPush = async (personaId: string, remote?: string, branch?: string) => { + return await rustClient.codeGitPush(personaId, remote, branch); + }; + log.info('Initialized successfully (workspace operations via Rust IPC)'); } diff --git a/src/debug/jtag/daemons/code-daemon/shared/CodeDaemon.ts b/src/debug/jtag/daemons/code-daemon/shared/CodeDaemon.ts index b9f7da737..1258c5cc9 100644 --- a/src/debug/jtag/daemons/code-daemon/shared/CodeDaemon.ts +++ b/src/debug/jtag/daemons/code-daemon/shared/CodeDaemon.ts @@ -112,4 +112,32 @@ export class CodeDaemon { static async workspaceGitDiff(personaId: string, staged?: boolean): Promise<{ success: boolean; diff: string }> { throw new Error('CodeDaemon.workspaceGitDiff() must be implemented by server'); } + + /** + * Get git log for the workspace (last N commits). + */ + static async workspaceGitLog(personaId: string, count?: number): Promise<{ success: boolean; log: string }> { + throw new Error('CodeDaemon.workspaceGitLog() must be implemented by server'); + } + + /** + * Stage files for commit in the workspace. + */ + static async workspaceGitAdd(personaId: string, paths: string[]): Promise<{ staged: string[] }> { + throw new Error('CodeDaemon.workspaceGitAdd() must be implemented by server'); + } + + /** + * Create a git commit in the workspace. + */ + static async workspaceGitCommit(personaId: string, message: string): Promise<{ hash: string }> { + throw new Error('CodeDaemon.workspaceGitCommit() must be implemented by server'); + } + + /** + * Push the workspace branch to remote. + */ + static async workspaceGitPush(personaId: string, remote?: string, branch?: string): Promise<{ output: string }> { + throw new Error('CodeDaemon.workspaceGitPush() must be implemented by server'); + } } diff --git a/src/debug/jtag/daemons/data-daemon/server/EntityRegistry.ts b/src/debug/jtag/daemons/data-daemon/server/EntityRegistry.ts index e099897d1..49998fd94 100644 --- a/src/debug/jtag/daemons/data-daemon/server/EntityRegistry.ts +++ b/src/debug/jtag/daemons/data-daemon/server/EntityRegistry.ts @@ -84,6 +84,7 @@ import { SocialCredentialEntity } from '../../../system/social/shared/SocialCred import { HandleEntity } from '../../../system/data/entities/HandleEntity'; import { CodingPlanEntity } from '../../../system/data/entities/CodingPlanEntity'; import { SkillEntity } from '../../../system/data/entities/SkillEntity'; +import { CodingChallengeEntity } from '../../../system/data/entities/CodingChallengeEntity'; /** * Initialize entity registration for the storage adapter @@ -141,6 +142,7 @@ export function initializeEntityRegistry(): void { new HandleEntity(); new CodingPlanEntity(); new SkillEntity(); + new CodingChallengeEntity(); registerEntity(UserEntity.collection, UserEntity); registerEntity(RoomEntity.collection, RoomEntity); @@ -190,6 +192,7 @@ export function initializeEntityRegistry(): void { registerEntity(HandleEntity.collection, HandleEntity); registerEntity(CodingPlanEntity.collection, CodingPlanEntity); registerEntity(SkillEntity.collection, SkillEntity); + registerEntity(CodingChallengeEntity.collection, CodingChallengeEntity); log.info('All entities registered'); } \ No newline at end of file diff --git a/src/debug/jtag/generated-command-schemas.json b/src/debug/jtag/generated-command-schemas.json index a88f6b103..c08e59914 100644 --- a/src/debug/jtag/generated-command-schemas.json +++ b/src/debug/jtag/generated-command-schemas.json @@ -1,5 +1,5 @@ { - "generated": "2026-02-02T04:27:03.817Z", + "generated": "2026-02-02T11:46:40.136Z", "version": "1.0.0", "commands": [ { @@ -5229,6 +5229,27 @@ } } }, + { + "name": "code/verify", + "description": "Code Verify Command - Shared Types\n *\n * Run TypeScript compilation checks and optionally execute tests against a persona workspace.\n * Returns structured errors with file, line, column, and message for each issue found.", + "params": { + "typeCheck": { + "type": "boolean", + "required": false, + "description": "typeCheck parameter" + }, + "testFiles": { + "type": "array", + "required": false, + "description": "testFiles parameter" + }, + "cwd": { + "type": "string", + "required": false, + "description": "cwd parameter" + } + } + }, { "name": "code/undo", "description": "Code Undo Command - Shared Types\n *\n * Undo a specific change or the last N changes. Applies reverse diffs from the change graph to restore previous file state.", @@ -5309,6 +5330,16 @@ "type": "number", "required": false, "description": "maxToolCalls parameter" + }, + "workspaceMode": { + "type": "string", + "required": false, + "description": "workspaceMode parameter" + }, + "sparsePaths": { + "type": "array", + "required": false, + "description": "sparsePaths parameter" } } }, @@ -5370,6 +5401,52 @@ } } }, + { + "name": "code/git", + "description": "Code Git Command - Shared Types\n *\n * Workspace-scoped git operations for the coding agent pipeline.\n * Operations: status, diff, log, add, commit, push.\n * All operations are routed through the Rust IPC backend for per-persona workspace isolation.", + "params": { + "userId": { + "type": "string", + "required": true, + "description": "userId parameter" + }, + "operation": { + "type": "string", + "required": true, + "description": "operation parameter" + }, + "paths": { + "type": "array", + "required": false, + "description": "paths parameter" + }, + "message": { + "type": "string", + "required": false, + "description": "message parameter" + }, + "remote": { + "type": "string", + "required": false, + "description": "remote parameter" + }, + "branch": { + "type": "string", + "required": false, + "description": "branch parameter" + }, + "staged": { + "type": "boolean", + "required": false, + "description": "staged parameter" + }, + "count": { + "type": "number", + "required": false, + "description": "count parameter" + } + } + }, { "name": "code/edit", "description": "Code Edit Command - Shared Types\n *\n * Edit a file using search-replace, line-range replacement, insert-at, or append. Creates a ChangeNode for undo. Safer than full file write for targeted modifications.", @@ -5487,6 +5564,48 @@ } } }, + { + "name": "challenge/run", + "description": "Challenge Run Command - Shared Types\n *\n * Run a coding challenge against the AI coding pipeline. Sets up a fresh workspace, executes the challenge via code/task, evaluates with AI judge, and records the attempt.", + "params": { + "challengeId": { + "type": "string", + "required": false, + "description": "challengeId parameter" + }, + "challengeNumber": { + "type": "number", + "required": false, + "description": "challengeNumber parameter" + }, + "personaId": { + "type": "string", + "required": false, + "description": "personaId parameter" + }, + "skipJudge": { + "type": "boolean", + "required": false, + "description": "skipJudge parameter" + } + } + }, + { + "name": "challenge/list", + "description": "Challenge List Command - Shared Types\n *\n * List available coding challenges with their difficulty, status, and best scores. Shows progressive challenge sequence for AI training.", + "params": { + "difficulty": { + "type": "string", + "required": false, + "description": "difficulty parameter" + }, + "personaId": { + "type": "string", + "required": false, + "description": "personaId parameter" + } + } + }, { "name": "canvas/vision", "description": "Canvas Vision Command Types\n *\n * Enables AIs to \"see\" and interact with the drawing canvas:\n * - describe: Vision AI describes what's on the canvas\n * - transform: Use image generation to transform the sketch\n * - analyze: Structured analysis of the drawing", diff --git a/src/debug/jtag/generator/specs/challenge-list.json b/src/debug/jtag/generator/specs/challenge-list.json new file mode 100644 index 000000000..a3c602e1e --- /dev/null +++ b/src/debug/jtag/generator/specs/challenge-list.json @@ -0,0 +1,44 @@ +{ + "name": "challenge/list", + "description": "List available coding challenges with their difficulty, status, and best scores. Shows progressive challenge sequence for AI training.", + "params": [ + { + "name": "difficulty", + "type": "string", + "optional": true, + "description": "Filter by difficulty: beginner, intermediate, advanced, expert" + }, + { + "name": "personaId", + "type": "string", + "optional": true, + "description": "Show scores for a specific persona" + } + ], + "results": [ + { + "name": "challenges", + "type": "object[]", + "description": "Array of challenge summaries with name, difficulty, sequence, attempts, best score" + }, + { + "name": "totalChallenges", + "type": "number", + "description": "Total number of challenges" + }, + { + "name": "completedByPersona", + "type": "number", + "description": "Number of challenges passed by the specified persona" + } + ], + "examples": [ + { + "description": "List all challenges", + "command": "./jtag challenge/list", + "expectedResult": "{ totalChallenges: 5, challenges: [{ name: \"Add a function...\", difficulty: \"beginner\", ... }] }" + } + ], + "accessLevel": "ai-safe", + "environment": "server" +} diff --git a/src/debug/jtag/generator/specs/challenge-run.json b/src/debug/jtag/generator/specs/challenge-run.json new file mode 100644 index 000000000..ee76f5266 --- /dev/null +++ b/src/debug/jtag/generator/specs/challenge-run.json @@ -0,0 +1,101 @@ +{ + "name": "challenge/run", + "description": "Run a coding challenge against the AI coding pipeline. Sets up a fresh workspace, executes the challenge via code/task, evaluates with AI judge, and records the attempt.", + "params": [ + { + "name": "challengeId", + "type": "string", + "optional": true, + "description": "Specific challenge ID to run. If not provided, runs the next unbeaten challenge" + }, + { + "name": "challengeNumber", + "type": "number", + "optional": true, + "description": "Run challenge by sequence number (1-5)" + }, + { + "name": "personaId", + "type": "string", + "optional": true, + "description": "Which AI persona runs the challenge. Defaults to the calling user" + }, + { + "name": "skipJudge", + "type": "boolean", + "optional": true, + "description": "Skip AI judge evaluation (faster, just checks execution success)" + } + ], + "results": [ + { + "name": "challengeName", + "type": "string", + "description": "Name of the challenge that was run" + }, + { + "name": "difficulty", + "type": "string", + "description": "Challenge difficulty level" + }, + { + "name": "status", + "type": "string", + "description": "Attempt outcome: passed, failed, partial, timeout, error" + }, + { + "name": "score", + "type": "number", + "description": "Judge score from 0-100" + }, + { + "name": "feedback", + "type": "string", + "description": "Judge feedback on the attempt" + }, + { + "name": "durationMs", + "type": "number", + "description": "Total execution time in milliseconds" + }, + { + "name": "toolCallsUsed", + "type": "number", + "description": "Number of tool calls consumed" + }, + { + "name": "filesModified", + "type": "string[]", + "description": "Files modified during the attempt" + }, + { + "name": "filesCreated", + "type": "string[]", + "description": "Files created during the attempt" + }, + { + "name": "errors", + "type": "string[]", + "description": "Errors encountered during execution" + } + ], + "examples": [ + { + "description": "Run the next unbeaten challenge", + "command": "./jtag challenge/run", + "expectedResult": "{ status: \"passed\", score: 85, challengeName: \"Add a function to a single file\" }" + }, + { + "description": "Run a specific challenge by number", + "command": "./jtag challenge/run --challengeNumber=3", + "expectedResult": "{ status: \"partial\", score: 60, challengeName: \"Extract shared utility from duplicate code\" }" + }, + { + "description": "Quick run without AI judge", + "command": "./jtag challenge/run --challengeNumber=1 --skipJudge=true", + "expectedResult": "{ status: \"passed\", score: 70, feedback: \"Pipeline completed.\" }" + } + ], + "accessLevel": "ai-safe", + "environment": "server" +} diff --git a/src/debug/jtag/package-lock.json b/src/debug/jtag/package-lock.json index 5ad7f8c2d..32d3089f5 100644 --- a/src/debug/jtag/package-lock.json +++ b/src/debug/jtag/package-lock.json @@ -1,12 +1,12 @@ { "name": "@continuum/jtag", - "version": "1.0.7521", + "version": "1.0.7530", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@continuum/jtag", - "version": "1.0.7521", + "version": "1.0.7530", "hasInstallScript": true, "license": "MIT", "dependencies": { diff --git a/src/debug/jtag/package.json b/src/debug/jtag/package.json index 3bf6bd005..214377b6c 100644 --- a/src/debug/jtag/package.json +++ b/src/debug/jtag/package.json @@ -1,6 +1,6 @@ { "name": "@continuum/jtag", - "version": "1.0.7521", + "version": "1.0.7530", "description": "Global CLI debugging system for any Node.js project. Install once globally, use anywhere: npm install -g @continuum/jtag", "config": { "active_example": "widget-ui", diff --git a/src/debug/jtag/server/generated.ts b/src/debug/jtag/server/generated.ts index 81480557f..c75048cde 100644 --- a/src/debug/jtag/server/generated.ts +++ b/src/debug/jtag/server/generated.ts @@ -1,7 +1,7 @@ /** * Server Structure Registry - Auto-generated * - * Contains 18 daemons and 210 commands and 3 adapters. + * Contains 18 daemons and 214 commands and 3 adapters. * Generated by scripts/generate-structure.ts - DO NOT EDIT MANUALLY */ @@ -60,14 +60,18 @@ import { AIValidateResponseServerCommand } from './../commands/ai/validate-respo import { CanvasStrokeAddServerCommand } from './../commands/canvas/stroke/add/server/CanvasStrokeAddServerCommand'; import { CanvasStrokeListServerCommand } from './../commands/canvas/stroke/list/server/CanvasStrokeListServerCommand'; import { CanvasVisionServerCommand } from './../commands/canvas/vision/server/CanvasVisionServerCommand'; +import { ChallengeListServerCommand } from './../commands/challenge/list/server/ChallengeListServerCommand'; +import { ChallengeRunServerCommand } from './../commands/challenge/run/server/ChallengeRunServerCommand'; import { CodeDiffServerCommand } from './../commands/code/diff/server/CodeDiffServerCommand'; import { CodeEditServerCommand } from './../commands/code/edit/server/CodeEditServerCommand'; +import { CodeGitServerCommand } from './../commands/code/git/server/CodeGitServerCommand'; import { CodeHistoryServerCommand } from './../commands/code/history/server/CodeHistoryServerCommand'; import { CodeReadServerCommand } from './../commands/code/read/server/CodeReadServerCommand'; import { CodeSearchServerCommand } from './../commands/code/search/server/CodeSearchServerCommand'; import { CodeTaskServerCommand } from './../commands/code/task/server/CodeTaskServerCommand'; import { CodeTreeServerCommand } from './../commands/code/tree/server/CodeTreeServerCommand'; import { CodeUndoServerCommand } from './../commands/code/undo/server/CodeUndoServerCommand'; +import { CodeVerifyServerCommand } from './../commands/code/verify/server/CodeVerifyServerCommand'; import { CodeWriteServerCommand } from './../commands/code/write/server/CodeWriteServerCommand'; import { ActivityCreateServerCommand } from './../commands/collaboration/activity/create/server/ActivityCreateServerCommand'; import { ActivityGetServerCommand } from './../commands/collaboration/activity/get/server/ActivityGetServerCommand'; @@ -514,6 +518,16 @@ export const SERVER_COMMANDS: CommandEntry[] = [ className: 'CanvasVisionServerCommand', commandClass: CanvasVisionServerCommand }, +{ + name: 'challenge/list', + className: 'ChallengeListServerCommand', + commandClass: ChallengeListServerCommand + }, +{ + name: 'challenge/run', + className: 'ChallengeRunServerCommand', + commandClass: ChallengeRunServerCommand + }, { name: 'code/diff', className: 'CodeDiffServerCommand', @@ -524,6 +538,11 @@ export const SERVER_COMMANDS: CommandEntry[] = [ className: 'CodeEditServerCommand', commandClass: CodeEditServerCommand }, +{ + name: 'code/git', + className: 'CodeGitServerCommand', + commandClass: CodeGitServerCommand + }, { name: 'code/history', className: 'CodeHistoryServerCommand', @@ -554,6 +573,11 @@ export const SERVER_COMMANDS: CommandEntry[] = [ className: 'CodeUndoServerCommand', commandClass: CodeUndoServerCommand }, +{ + name: 'code/verify', + className: 'CodeVerifyServerCommand', + commandClass: CodeVerifyServerCommand + }, { name: 'code/write', className: 'CodeWriteServerCommand', diff --git a/src/debug/jtag/shared/generated-command-constants.ts b/src/debug/jtag/shared/generated-command-constants.ts index d4dd944e2..41d85ae15 100644 --- a/src/debug/jtag/shared/generated-command-constants.ts +++ b/src/debug/jtag/shared/generated-command-constants.ts @@ -59,14 +59,18 @@ export const COMMANDS = { CANVAS_STROKE_ADD: 'canvas/stroke/add', CANVAS_STROKE_LIST: 'canvas/stroke/list', CANVAS_VISION: 'canvas/vision', + CHALLENGE_LIST: 'challenge/list', + CHALLENGE_RUN: 'challenge/run', CODE_DIFF: 'code/diff', CODE_EDIT: 'code/edit', + CODE_GIT: 'code/git', CODE_HISTORY: 'code/history', CODE_READ: 'code/read', CODE_SEARCH: 'code/search', CODE_TASK: 'code/task', CODE_TREE: 'code/tree', CODE_UNDO: 'code/undo', + CODE_VERIFY: 'code/verify', CODE_WRITE: 'code/write', COLLABORATION_ACTIVITY_CREATE: 'collaboration/activity/create', COLLABORATION_ACTIVITY_GET: 'collaboration/activity/get', diff --git a/src/debug/jtag/shared/version.ts b/src/debug/jtag/shared/version.ts index 6ba3b9258..bbfd2a50b 100644 --- a/src/debug/jtag/shared/version.ts +++ b/src/debug/jtag/shared/version.ts @@ -3,5 +3,5 @@ * DO NOT EDIT MANUALLY */ -export const VERSION = '1.0.7521'; +export const VERSION = '1.0.7530'; export const PACKAGE_NAME = '@continuum/jtag'; diff --git a/src/debug/jtag/system/code/challenges/ChallengeDefinitions.ts b/src/debug/jtag/system/code/challenges/ChallengeDefinitions.ts new file mode 100644 index 000000000..5594e3190 --- /dev/null +++ b/src/debug/jtag/system/code/challenges/ChallengeDefinitions.ts @@ -0,0 +1,445 @@ +/** + * Challenge Definitions - Progressive coding challenges for AI training + * + * Challenges are ordered by difficulty: + * 1-2: Beginner (single file, simple operations) + * 3-4: Intermediate (multi-file, dependency chains) + * 5-6: Advanced (bug tracing, multi-agent) + * 7: Expert (architecture migration) + * + * Each definition contains everything needed to create a CodingChallengeEntity. + */ + +import type { ChallengeDifficulty, ChallengeCategory } from '../../data/entities/CodingChallengeEntity'; + +export interface ChallengeDefinition { + name: string; + sequenceNumber: number; + difficulty: ChallengeDifficulty; + category: ChallengeCategory; + description: string; + setupFiles: Record; + expectedOutcome: string; + evaluationCriteria: string[]; + expectedFiles?: Record; + timeLimitMs: number; + toolCallLimit: number; +} + +// ──────────────────────────────────────────────────────────── +// Challenge 1: Single-File Function Addition (Beginner) +// ──────────────────────────────────────────────────────────── + +export const CHALLENGE_1_FUNCTION_ADD: ChallengeDefinition = { + name: 'Add a function to a single file', + sequenceNumber: 1, + difficulty: 'beginner', + category: 'single-file', + description: `Read the file "math-utils.ts" and add a new exported function called "factorial" that computes the factorial of a non-negative integer. It should throw an error for negative inputs. Do not modify the existing functions.`, + setupFiles: { + 'math-utils.ts': `/** + * Math utility functions + */ + +export function add(a: number, b: number): number { + return a + b; +} + +export function multiply(a: number, b: number): number { + return a * b; +} + +export function isPrime(n: number): boolean { + if (n < 2) return false; + for (let i = 2; i * i <= n; i++) { + if (n % i === 0) return false; + } + return true; +} +`, + }, + expectedOutcome: 'The file math-utils.ts should contain the original three functions plus a new "factorial" function that handles edge cases correctly.', + evaluationCriteria: [ + 'factorial function is exported and correctly computes factorial for n >= 0', + 'factorial(0) returns 1 (base case)', + 'factorial throws an error for negative input', + 'Existing functions (add, multiply, isPrime) are unchanged', + 'Code follows the existing style (TypeScript, exported functions)', + ], + expectedFiles: { + 'math-utils.ts': `/** + * Math utility functions + */ + +export function add(a: number, b: number): number { + return a + b; +} + +export function multiply(a: number, b: number): number { + return a * b; +} + +export function isPrime(n: number): boolean { + if (n < 2) return false; + for (let i = 2; i * i <= n; i++) { + if (n % i === 0) return false; + } + return true; +} + +export function factorial(n: number): number { + if (n < 0) throw new Error('factorial requires a non-negative integer'); + if (n === 0 || n === 1) return 1; + let result = 1; + for (let i = 2; i <= n; i++) { + result *= i; + } + return result; +} +`, + }, + timeLimitMs: 60_000, + toolCallLimit: 8, +}; + +// ──────────────────────────────────────────────────────────── +// Challenge 2: Create File + Unit Test (Beginner) +// ──────────────────────────────────────────────────────────── + +export const CHALLENGE_2_FILE_PLUS_TEST: ChallengeDefinition = { + name: 'Create a function and its unit test', + sequenceNumber: 2, + difficulty: 'beginner', + category: 'multi-file', + description: `Create two files: +1. "string-utils.ts" β€” export a function "slugify(input: string): string" that converts a string to a URL-safe slug (lowercase, spaces/special chars replaced with hyphens, no leading/trailing hyphens, no consecutive hyphens). +2. "string-utils.test.ts" β€” write tests for slugify covering: basic conversion, multiple spaces, special characters, leading/trailing spaces, empty string, already-slugified input. + +Use simple assertion statements (no test framework needed). Each test should be a function that throws if the assertion fails.`, + setupFiles: { + 'README.md': '# String Utils\n\nCreate string-utils.ts and string-utils.test.ts as described.', + }, + expectedOutcome: 'Two files created: string-utils.ts with a working slugify function, and string-utils.test.ts with comprehensive tests.', + evaluationCriteria: [ + 'string-utils.ts exports a slugify function with correct signature', + 'slugify converts "Hello World" to "hello-world"', + 'slugify handles special characters (e.g., "Hello, World!" β†’ "hello-world")', + 'slugify removes leading/trailing hyphens', + 'slugify collapses consecutive hyphens', + 'string-utils.test.ts exists and contains meaningful test cases', + 'Tests cover edge cases: empty string, already-slugified, special chars', + ], + timeLimitMs: 90_000, + toolCallLimit: 12, +}; + +// ──────────────────────────────────────────────────────────── +// Challenge 3: Multi-File Refactor (Intermediate) +// ──────────────────────────────────────────────────────────── + +export const CHALLENGE_3_EXTRACT_SHARED: ChallengeDefinition = { + name: 'Extract shared utility from duplicate code', + sequenceNumber: 3, + difficulty: 'intermediate', + category: 'refactoring', + description: `Three files (user-service.ts, order-service.ts, product-service.ts) each contain a duplicated "formatCurrency" function with identical logic. Refactor by: +1. Creating a new "shared/format-utils.ts" that exports the single canonical formatCurrency function +2. Updating all three service files to import from shared/format-utils.ts instead of having their own copy +3. Do NOT change the function's behavior β€” only move it + +The three service files also have other functions that should NOT be changed.`, + setupFiles: { + 'user-service.ts': `import type { User } from './types'; + +function formatCurrency(amount: number, currency: string = 'USD'): string { + return new Intl.NumberFormat('en-US', { style: 'currency', currency }).format(amount); +} + +export function getUserBalance(user: User): string { + return formatCurrency(user.balance); +} + +export function getUserSummary(user: User): string { + return \`\${user.name}: \${formatCurrency(user.balance)}\`; +} +`, + 'order-service.ts': `import type { Order } from './types'; + +function formatCurrency(amount: number, currency: string = 'USD'): string { + return new Intl.NumberFormat('en-US', { style: 'currency', currency }).format(amount); +} + +export function getOrderTotal(order: Order): string { + const total = order.items.reduce((sum, item) => sum + item.price * item.quantity, 0); + return formatCurrency(total, order.currency); +} + +export function formatOrderLine(name: string, price: number): string { + return \`\${name}: \${formatCurrency(price)}\`; +} +`, + 'product-service.ts': `import type { Product } from './types'; + +function formatCurrency(amount: number, currency: string = 'USD'): string { + return new Intl.NumberFormat('en-US', { style: 'currency', currency }).format(amount); +} + +export function getProductPrice(product: Product): string { + return formatCurrency(product.price, product.currency); +} + +export function getDiscountedPrice(product: Product, discount: number): string { + const discounted = product.price * (1 - discount); + return formatCurrency(discounted, product.currency); +} +`, + 'types.ts': `export interface User { + name: string; + balance: number; +} + +export interface OrderItem { + name: string; + price: number; + quantity: number; +} + +export interface Order { + items: OrderItem[]; + currency: string; +} + +export interface Product { + name: string; + price: number; + currency: string; +} +`, + }, + expectedOutcome: 'A new shared/format-utils.ts file containing the single formatCurrency function, with all three service files updated to import from it. No behavior changes.', + evaluationCriteria: [ + 'shared/format-utils.ts exists and exports formatCurrency', + 'formatCurrency function signature and behavior is preserved exactly', + 'user-service.ts imports formatCurrency from shared/format-utils', + 'order-service.ts imports formatCurrency from shared/format-utils', + 'product-service.ts imports formatCurrency from shared/format-utils', + 'No duplicate formatCurrency definitions remain in any service file', + 'All other functions in service files are unchanged', + 'types.ts is unmodified', + ], + timeLimitMs: 120_000, + toolCallLimit: 15, +}; + +// ──────────────────────────────────────────────────────────── +// Challenge 4: Add Feature with Types + Handler + Test (Intermediate) +// ──────────────────────────────────────────────────────────── + +export const CHALLENGE_4_FEATURE_ENDPOINT: ChallengeDefinition = { + name: 'Add a feature across types, handler, and test', + sequenceNumber: 4, + difficulty: 'intermediate', + category: 'feature', + description: `Add a "search" feature to the existing todo application: +1. Add a "SearchParams" interface to types.ts with fields: query (string), completed (boolean | undefined) +2. Add a "searchTodos" function to todo-service.ts that filters todos by title substring match and optional completed status +3. Add tests for searchTodos in todo-service.test.ts covering: text search, completed filter, combined search+filter, empty results, empty query returns all + +Follow the existing patterns in each file.`, + setupFiles: { + 'types.ts': `export interface Todo { + id: string; + title: string; + completed: boolean; + createdAt: number; +} + +export interface CreateTodoParams { + title: string; +} +`, + 'todo-service.ts': `import type { Todo, CreateTodoParams } from './types'; + +const todos: Todo[] = []; +let nextId = 1; + +export function createTodo(params: CreateTodoParams): Todo { + const todo: Todo = { + id: String(nextId++), + title: params.title, + completed: false, + createdAt: Date.now(), + }; + todos.push(todo); + return todo; +} + +export function getTodos(): Todo[] { + return [...todos]; +} + +export function completeTodo(id: string): Todo | undefined { + const todo = todos.find(t => t.id === id); + if (todo) todo.completed = true; + return todo; +} +`, + 'todo-service.test.ts': `import { createTodo, getTodos, completeTodo } from './todo-service'; + +function assert(condition: boolean, message: string): void { + if (!condition) throw new Error(\`Assertion failed: \${message}\`); +} + +// Test createTodo +const todo = createTodo({ title: 'Buy groceries' }); +assert(todo.title === 'Buy groceries', 'createTodo should set title'); +assert(todo.completed === false, 'createTodo should default to incomplete'); +assert(typeof todo.id === 'string', 'createTodo should assign string id'); + +// Test getTodos +const allTodos = getTodos(); +assert(allTodos.length >= 1, 'getTodos should return created todos'); + +// Test completeTodo +const completed = completeTodo(todo.id); +assert(completed?.completed === true, 'completeTodo should mark as complete'); + +console.log('All tests passed!'); +`, + }, + expectedOutcome: 'types.ts has SearchParams, todo-service.ts has searchTodos function, todo-service.test.ts has comprehensive search tests.', + evaluationCriteria: [ + 'SearchParams interface added to types.ts with correct fields', + 'searchTodos function added to todo-service.ts', + 'searchTodos filters by title substring (case-insensitive)', + 'searchTodos filters by completed status when provided', + 'searchTodos returns all when query is empty and no filter', + 'Tests added for all search scenarios', + 'Existing code in all three files is preserved', + ], + timeLimitMs: 120_000, + toolCallLimit: 15, +}; + +// ──────────────────────────────────────────────────────────── +// Challenge 5: Bug Fix by Call Chain Tracing (Advanced) +// ──────────────────────────────────────────────────────────── + +export const CHALLENGE_5_BUG_FIX: ChallengeDefinition = { + name: 'Find and fix a bug by tracing the call chain', + sequenceNumber: 5, + difficulty: 'advanced', + category: 'bug-fix', + description: `There is a bug in the discount calculation system. When a user applies a percentage discount coupon, the final price is sometimes negative for large discounts. + +The bug report: "When I apply a 50% discount coupon to a $10 item, the price shows as -$5.00 instead of $5.00" + +Trace through the code files to find the root cause and fix it. The bug is in the calculation logic, not the formatting. Hint: look at how the discount is applied.`, + setupFiles: { + 'cart.ts': `import { applyDiscount } from './pricing'; +import type { CartItem, Coupon } from './types'; + +export function calculateCartTotal(items: CartItem[], coupon?: Coupon): number { + let total = items.reduce((sum, item) => sum + item.price * item.quantity, 0); + if (coupon) { + total = applyDiscount(total, coupon); + } + return total; +} +`, + 'pricing.ts': `import type { Coupon } from './types'; +import { calculatePercentageDiscount, calculateFixedDiscount } from './discounts'; + +export function applyDiscount(total: number, coupon: Coupon): number { + switch (coupon.type) { + case 'percentage': + return calculatePercentageDiscount(total, coupon.value); + case 'fixed': + return calculateFixedDiscount(total, coupon.value); + default: + return total; + } +} +`, + 'discounts.ts': `/** + * Calculate the discounted price after applying a percentage discount. + * @param total - Original price + * @param percentage - Discount percentage (e.g., 50 for 50%) + * @returns Discounted price + */ +export function calculatePercentageDiscount(total: number, percentage: number): number { + // BUG: subtracts percentage as a raw number instead of computing the percentage + const discount = percentage; + return total - discount; +} + +/** + * Calculate the discounted price after applying a fixed amount discount. + * @param total - Original price + * @param amount - Fixed discount amount + * @returns Discounted price (minimum 0) + */ +export function calculateFixedDiscount(total: number, amount: number): number { + return Math.max(0, total - amount); +} +`, + 'types.ts': `export interface CartItem { + name: string; + price: number; + quantity: number; +} + +export interface Coupon { + code: string; + type: 'percentage' | 'fixed'; + value: number; +} +`, + }, + expectedOutcome: 'The calculatePercentageDiscount function should compute the actual percentage discount (total * percentage / 100) and ensure the result is non-negative.', + evaluationCriteria: [ + 'Root cause identified: calculatePercentageDiscount subtracts raw percentage instead of computing percentage of total', + 'Fix: discount = total * (percentage / 100)', + 'Result includes Math.max(0, ...) to prevent negative prices', + 'Only discounts.ts is modified (other files have no bugs)', + 'calculateFixedDiscount is unchanged (it already works correctly)', + 'Function signature and JSDoc are preserved', + ], + expectedFiles: { + 'discounts.ts': `/** + * Calculate the discounted price after applying a percentage discount. + * @param total - Original price + * @param percentage - Discount percentage (e.g., 50 for 50%) + * @returns Discounted price + */ +export function calculatePercentageDiscount(total: number, percentage: number): number { + const discount = total * (percentage / 100); + return Math.max(0, total - discount); +} + +/** + * Calculate the discounted price after applying a fixed amount discount. + * @param total - Original price + * @param amount - Fixed discount amount + * @returns Discounted price (minimum 0) + */ +export function calculateFixedDiscount(total: number, amount: number): number { + return Math.max(0, total - amount); +} +`, + }, + timeLimitMs: 120_000, + toolCallLimit: 15, +}; + +// ──────────────────────────────────────────────────────────── +// All challenges in order +// ──────────────────────────────────────────────────────────── + +export const ALL_CHALLENGES: ChallengeDefinition[] = [ + CHALLENGE_1_FUNCTION_ADD, + CHALLENGE_2_FILE_PLUS_TEST, + CHALLENGE_3_EXTRACT_SHARED, + CHALLENGE_4_FEATURE_ENDPOINT, + CHALLENGE_5_BUG_FIX, +]; diff --git a/src/debug/jtag/system/code/server/CodeAgentOrchestrator.ts b/src/debug/jtag/system/code/server/CodeAgentOrchestrator.ts index 09e61d360..6161ccebb 100644 --- a/src/debug/jtag/system/code/server/CodeAgentOrchestrator.ts +++ b/src/debug/jtag/system/code/server/CodeAgentOrchestrator.ts @@ -50,6 +50,8 @@ import type { CodingStepSnapshot, CodingPlanStatus } from '../../data/entities/C import { COLLECTIONS } from '../../shared/Constants'; import type { UUID } from '../../core/types/CrossPlatformUUID'; import { CodeDaemon } from '../../../daemons/code-daemon/shared/CodeDaemon'; +import { WorkspaceStrategy } from './WorkspaceStrategy'; +import type { WorkspaceResult } from './WorkspaceStrategy'; import * as fs from 'fs'; import * as path from 'path'; @@ -104,9 +106,6 @@ class ExecutionBudget { } } -/** Track which personas have workspaces initialized this process lifetime */ -const initializedWorkspaces = new Set(); - export class CodeAgentOrchestrator { private readonly modelSelector: CodingModelSelector; private readonly planFormulator: PlanFormulator; @@ -121,27 +120,20 @@ export class CodeAgentOrchestrator { } /** - * Ensure a workspace exists in the Rust backend for this persona. - * Creates the workspace directory and registers it with PathSecurity. - * The persona gets a writable workspace under .continuum/personas/{id}/workspace/ - * and read-only access to the main codebase for discovery. + * Ensure a workspace exists for this task. + * Delegates to WorkspaceStrategy which handles sandbox (default) and worktree modes. + * Returns the workspace result with handle and directory path. */ - private async ensureWorkspace(personaId: string): Promise { - if (initializedWorkspaces.has(personaId)) return; - - const jtagRoot = process.cwd(); - const workspaceDir = path.join(jtagRoot, '.continuum', 'personas', personaId, 'workspace'); - - // Create workspace directory if it doesn't exist - if (!fs.existsSync(workspaceDir)) { - fs.mkdirSync(workspaceDir, { recursive: true }); - log.info(`Created workspace directory: ${workspaceDir}`); - } - - // Register with Rust backend β€” writable workspace + read-only codebase access - await CodeDaemon.createWorkspace(personaId, workspaceDir, [jtagRoot]); - initializedWorkspaces.add(personaId); - log.info(`Workspace initialized for persona ${personaId}`); + private async ensureWorkspace(task: CodingTask): Promise { + const mode = task.workspaceMode ?? 'sandbox'; + const slug = task.description?.slice(0, 30).replace(/\W+/g, '-').toLowerCase() ?? 'work'; + + return WorkspaceStrategy.create({ + personaId: task.personaId as string, + mode, + taskSlug: slug, + sparsePaths: task.sparsePaths, + }); } /** @@ -176,7 +168,13 @@ export class CodeAgentOrchestrator { try { // Phase 0: Ensure workspace exists in Rust backend - await this.ensureWorkspace(task.personaId as string); + // Skip if task has a pre-configured workspace handle (e.g., challenges) + if (!task.workspaceHandle) { + const workspace = await this.ensureWorkspace(task); + // Use the workspace handle for all subsequent code/* operations + // Override the task reference with the resolved handle + task = { ...task, workspaceHandle: workspace.handle } as CodingTask; + } // Phase 1: Discovery (optional β€” gather codebase context for planning) let codebaseContext: string | undefined; @@ -267,6 +265,77 @@ export class CodeAgentOrchestrator { await this.updatePlanStep(planEntity, step.stepNumber, result); } + // Phase 4: Verifyβ†’Re-plan iteration loop + // After write/edit steps, verify compilation. If it fails, re-plan with error + // context and execute a fix plan. Repeat until verification passes or budget/iterations exhausted. + const autoVerify = options?.autoVerify ?? true; + const maxVerifyIterations = options?.maxVerifyIterations ?? 2; + const hasWriteSteps = stepResults.some( + r => r.status === 'completed' && (r.toolCall === 'code/write' || r.toolCall === 'code/edit') + ); + + if (hasWriteSteps && !budget.exceeded && !dryRun && autoVerify) { + for (let iteration = 0; iteration < maxVerifyIterations; iteration++) { + if (budget.exceeded) break; + + // Verify + const verifyErrors = await this.runVerification(task, budget); + + if (verifyErrors.length === 0) { + log.info(`Verification passed${iteration > 0 ? ` (after ${iteration} fix iteration(s))` : ''}`); + break; + } + + log.warn(`Verification failed (iteration ${iteration + 1}/${maxVerifyIterations}): ${verifyErrors.length} error(s)`); + + // Last iteration β€” just record errors, don't re-plan + if (iteration >= maxVerifyIterations - 1 || budget.exceeded) { + errors.push(...verifyErrors); + break; + } + + // Re-plan with error context + try { + const errorContext = verifyErrors.join('\n'); + const fixTask: CodingTask = { + ...task, + description: `Fix compilation errors from previous changes:\n${errorContext}\n\nOriginal task: ${task.description}`, + taskType: 'quick-fix', + }; + + const fixPlan = await this.planFormulator.formulate(fixTask, codebaseContext); + log.info(`Fix plan: ${fixPlan.steps.length} steps β€” "${fixPlan.summary}"`); + + // Execute fix plan steps + for (const step of fixPlan.steps) { + if (budget.exceeded) break; + + const depsOk = step.dependsOn.every(dep => + stepResults.some(r => r.stepNumber === dep && r.status === 'completed') + || completedSteps.has(dep) + ); + // For fix plans, skip dependency checks for step 1 (always execute first step) + if (!depsOk && step.stepNumber > 1) continue; + + const result = await this.executeStepWithRetry(step, task, budget, enforcer, false); + stepResults.push(result); + + if (result.status === 'completed') { + completedSteps.add(step.stepNumber + 1000 * (iteration + 1)); // Offset to avoid collisions + this.trackChanges(step, result, filesModified, filesCreated, changeIds); + } else { + errors.push(`Fix step ${step.stepNumber}: ${result.error ?? 'unknown'}`); + } + } + } catch (fixError) { + const msg = fixError instanceof Error ? fixError.message : String(fixError); + log.warn(`Re-plan failed (iteration ${iteration + 1}): ${msg}`); + errors.push(`Re-plan failed: ${msg}`); + break; + } + } + } + // Determine overall status const allCompleted = stepResults.every(r => r.status === 'completed'); const anyCompleted = stepResults.some(r => r.status === 'completed'); @@ -310,7 +379,7 @@ export class CodeAgentOrchestrator { try { // Get workspace tree const treeResult = await Commands.execute('code/tree', { - userId: task.personaId, + userId: task.workspaceHandle ?? task.personaId, path: '', maxDepth: 3, }); @@ -322,27 +391,32 @@ export class CodeAgentOrchestrator { let context = `## Workspace Tree\n${JSON.stringify(treeResult.root, null, 2).slice(0, 2000)}`; - // If relevant files are specified, read their contents - if (task.relevantFiles && task.relevantFiles.length > 0 && !budget.exceeded) { - for (const file of task.relevantFiles.slice(0, 3)) { // Max 3 files for context - if (budget.exceeded) break; + // Read relevant files for context β€” the LLM needs exact contents for precise edits + const filesToRead = task.relevantFiles && task.relevantFiles.length > 0 + ? task.relevantFiles + : this.extractFilesFromTree(treeResult.root); - const readResult = await Commands.execute('code/read', { - userId: task.personaId, - filePath: file, - }); - budget.recordToolCall(); - - if (readResult?.success && readResult.content) { - // Truncate large files - const content = readResult.content.length > 3000 - ? readResult.content.slice(0, 3000) + '\n... (truncated)' - : readResult.content; - context += `\n\n## ${file}\n\`\`\`\n${content}\n\`\`\``; - } + for (const file of filesToRead.slice(0, 8)) { // Max 8 files for context + if (budget.exceeded) break; + + const readResult = await Commands.execute('code/read', { + userId: task.workspaceHandle ?? task.personaId, + filePath: file, + }); + budget.recordToolCall(); + + if (readResult?.success && readResult.content) { + // Truncate large files + const content = readResult.content.length > 3000 + ? readResult.content.slice(0, 3000) + '\n... (truncated)' + : readResult.content; + context += `\n\n## ${file}\n\`\`\`\n${content}\n\`\`\``; } } + // Load architecture documentation for convention-aware planning + context += await this.loadArchitectureContext(task, budget); + return context; } catch (error) { log.warn(`Discovery failed: ${error instanceof Error ? error.message : String(error)}`); @@ -350,6 +424,86 @@ export class CodeAgentOrchestrator { } } + /** + * Load architecture documentation so the LLM plans follow project conventions. + * + * Reads CLAUDE.md from disk (it lives at the repo root, above the workspace read root) + * and key architecture docs from the jtag docs/ directory via code/read. + */ + private async loadArchitectureContext(task: CodingTask, budget: ExecutionBudget): Promise { + let archContext = ''; + + // CLAUDE.md lives at the repo root β€” read directly from disk since it's above read roots + const jtagRoot = process.cwd(); + const repoRoot = path.resolve(jtagRoot, '..', '..', '..'); + const claudeMdPath = path.join(repoRoot, 'CLAUDE.md'); + + try { + if (fs.existsSync(claudeMdPath)) { + let content = fs.readFileSync(claudeMdPath, 'utf-8'); + // Truncate to essential sections β€” full CLAUDE.md is ~20k chars + if (content.length > 6000) { + content = content.slice(0, 6000) + '\n... (truncated β€” see full CLAUDE.md for details)'; + } + archContext += `\n\n## Project Conventions (CLAUDE.md)\n\`\`\`\n${content}\n\`\`\``; + } + } catch { + // Non-critical β€” continue without CLAUDE.md + } + + // Read architecture docs from within the read root (jtag/docs/) + const archDocs = [ + 'docs/ARCHITECTURE-RULES.md', + 'docs/UNIVERSAL-PRIMITIVES.md', + ]; + + for (const doc of archDocs) { + if (budget.exceeded) break; + try { + const readResult = await Commands.execute('code/read', { + userId: task.workspaceHandle ?? task.personaId, + filePath: doc, + }); + budget.recordToolCall(); + + if (readResult?.success && readResult.content) { + const content = readResult.content.length > 3000 + ? readResult.content.slice(0, 3000) + '\n... (truncated)' + : readResult.content; + archContext += `\n\n## Architecture: ${doc}\n\`\`\`\n${content}\n\`\`\``; + } + } catch { + // Non-critical β€” continue without this doc + } + } + + return archContext; + } + + /** + * Extract file paths from a tree result for auto-discovery. + * For small workspaces (≀8 files), reads all files to give the LLM full context. + */ + private extractFilesFromTree(root: Record): string[] { + const files: string[] = []; + const walk = (node: Record, prefix: string) => { + const children = node.children as Record[] | undefined; + if (!children) return; + for (const child of children) { + const name = child.name as string; + const type = child.type as string; + const path = prefix ? `${prefix}/${name}` : name; + if (type === 'file') { + files.push(path); + } else if (type === 'directory') { + walk(child, path); + } + } + }; + walk(root, ''); + return files; + } + /** * Execute a single step with retry logic. */ @@ -410,10 +564,10 @@ export class CodeAgentOrchestrator { try { log.debug(`Step ${step.stepNumber}${dryRun ? ' [DRY]' : ''}: ${step.action} β€” ${step.description}`); - // Inject personaId (userId) into params for workspace scoping + // Inject workspace handle (userId) into params for workspace scoping const params = { ...step.toolParams, - userId: task.personaId, + userId: task.workspaceHandle ?? task.personaId, }; // Gate tool call through security tier enforcer @@ -512,6 +666,36 @@ export class CodeAgentOrchestrator { return action === 'write' || action === 'edit' || action === 'undo'; } + /** + * Run TypeScript verification and return error strings. + * Empty array means verification passed. + */ + private async runVerification(task: CodingTask, budget: ExecutionBudget): Promise { + try { + const verifyResult = await Commands.execute('code/verify', { + userId: task.workspaceHandle ?? task.personaId, + typeCheck: true, + }); + budget.recordToolCall(); + + if (verifyResult?.success) { + return []; + } + + if (verifyResult?.typeCheck?.errors?.length > 0) { + return verifyResult.typeCheck.errors.map( + (e: { file: string; line: number; code: string; message: string }) => + `${e.file}:${e.line} ${e.code}: ${e.message}` + ); + } + + return ['TypeScript compilation failed (no detailed errors)']; + } catch (error) { + log.warn(`Verification error: ${error instanceof Error ? error.message : String(error)}`); + return [`Verification error: ${error instanceof Error ? error.message : String(error)}`]; + } + } + /** * Build the final CodingResult. */ diff --git a/src/debug/jtag/system/code/server/CodingChallengeRunner.ts b/src/debug/jtag/system/code/server/CodingChallengeRunner.ts new file mode 100644 index 000000000..4bca5b76b --- /dev/null +++ b/src/debug/jtag/system/code/server/CodingChallengeRunner.ts @@ -0,0 +1,239 @@ +/** + * CodingChallengeRunner - Execute coding challenges and capture results + * + * Runs a coding challenge against the code/task pipeline: + * 1. Set up workspace with challenge files + * 2. Execute code/task with the challenge description + * 3. Collect result files from workspace + * 4. Pass to CodingJudge for evaluation + * 5. Record attempt on entity + * + * Each challenge gets a fresh workspace to prevent state leakage. + */ + +import { Logger } from '../../core/logging/Logger'; +import { CodeDaemon } from '../../../daemons/code-daemon/shared/CodeDaemon'; +import { CodeAgentOrchestrator } from './CodeAgentOrchestrator'; +import { CodingJudge } from './CodingJudge'; +import type { CodingTask, ExecutionOptions } from '../shared/CodingTypes'; +import type { CodingChallengeEntity, ChallengeAttempt, AttemptStatus } from '../../data/entities/CodingChallengeEntity'; +import type { UUID } from '../../core/types/CrossPlatformUUID'; +import { v4 as uuidv4 } from 'uuid'; +import * as fs from 'fs'; +import * as path from 'path'; + +const log = Logger.create('CodingChallengeRunner', 'code'); + +export interface ChallengeRunOptions { + /** Which AI persona runs the challenge */ + personaId: UUID; + /** Skip AI judge evaluation (just check execution success) */ + skipJudge?: boolean; + /** Override security tier (default: write) */ + securityTier?: string; +} + +export interface ChallengeRunResult { + success: boolean; + attempt: ChallengeAttempt; + /** Raw code/task result */ + taskResult?: Record; +} + +export class CodingChallengeRunner { + private readonly orchestrator: CodeAgentOrchestrator; + private readonly judge: CodingJudge; + + constructor() { + this.orchestrator = new CodeAgentOrchestrator(); + this.judge = new CodingJudge(); + } + + /** + * Execute a coding challenge for a persona. + * + * Creates a fresh workspace, seeds it with challenge files, + * runs the coding pipeline, evaluates results, and records the attempt. + */ + async run(challenge: CodingChallengeEntity, options: ChallengeRunOptions): Promise { + const { personaId } = options; + const startedAt = Date.now(); + + log.info(`Running challenge "${challenge.name}" (${challenge.difficulty}) for persona ${personaId}`); + + try { + // Phase 1: Set up challenge workspace with unique handle + const workspaceHandle = `challenge-${(challenge.id ?? challenge.sequenceNumber)}-${personaId}`; + const workspaceDir = await this.setupChallengeWorkspace(challenge, personaId, workspaceHandle); + + // Phase 2: Execute the coding task + const task: CodingTask = { + id: uuidv4() as UUID, + personaId, + description: challenge.description, + taskType: 'generation', + maxDurationMs: challenge.timeLimitMs, + maxToolCalls: challenge.toolCallLimit, + workspaceHandle, + relevantFiles: Object.keys(challenge.setupFiles), + createdAt: Date.now(), + }; + + const execOptions: ExecutionOptions = { + dryRun: false, + securityTier: (options.securityTier as any) ?? 'write', + }; + + const result = await this.orchestrator.execute(task, execOptions); + + // Phase 3: Collect result files from workspace + const resultFiles = await this.collectResultFiles(workspaceDir, challenge); + + // Phase 4: Judge evaluation + const completedAt = Date.now(); + let score = 0; + let feedback = ''; + let status: AttemptStatus; + + if (result.status === 'completed' || result.status === 'partial') { + if (options.skipJudge) { + score = result.status === 'completed' ? 70 : 40; + feedback = `Pipeline ${result.status}. ${result.stepResults.filter(s => s.status === 'completed').length}/${result.stepResults.length} steps completed.`; + status = result.status === 'completed' ? 'passed' : 'partial'; + } else { + const evaluation = await this.judge.evaluate(challenge, resultFiles, result); + score = evaluation.score; + feedback = evaluation.feedback; + status = evaluation.passed ? 'passed' : evaluation.score >= 40 ? 'partial' : 'failed'; + } + } else if (result.status === 'budget_exceeded') { + status = 'timeout'; + feedback = `Budget exceeded: ${result.errors.join('; ')}`; + } else { + status = 'failed'; + feedback = `Execution failed: ${result.errors.join('; ')}`; + } + + const attempt: ChallengeAttempt = { + personaId, + planId: task.id, + startedAt, + completedAt, + status, + score, + feedback, + filesModified: result.filesModified, + filesCreated: result.filesCreated, + errors: result.errors, + toolCallsUsed: result.totalToolCalls, + durationMs: result.totalDurationMs, + resultFiles, + }; + + // Phase 5: Record attempt on entity + challenge.recordAttempt(attempt); + + log.info(`Challenge "${challenge.name}" ${status}: score=${score}, duration=${result.totalDurationMs}ms`); + + return { + success: status === 'passed', + attempt, + taskResult: result as unknown as Record, + }; + + } catch (error) { + const completedAt = Date.now(); + const message = error instanceof Error ? error.message : String(error); + log.error(`Challenge "${challenge.name}" error: ${message}`); + + const attempt: ChallengeAttempt = { + personaId, + startedAt, + completedAt, + status: 'error', + score: 0, + feedback: `Runner error: ${message}`, + filesModified: [], + filesCreated: [], + errors: [message], + toolCallsUsed: 0, + durationMs: completedAt - startedAt, + }; + + challenge.recordAttempt(attempt); + + return { success: false, attempt }; + } + } + + /** + * Set up a fresh workspace with challenge files. + * Creates the workspace directory and writes all setup files. + */ + private async setupChallengeWorkspace( + challenge: CodingChallengeEntity, + personaId: UUID, + workspaceHandle: string, + ): Promise { + const jtagRoot = process.cwd(); + const challengeWorkspace = path.join( + jtagRoot, '.continuum', 'personas', personaId as string, + 'challenges', challenge.id as string, + ); + + // Create fresh workspace + if (fs.existsSync(challengeWorkspace)) { + fs.rmSync(challengeWorkspace, { recursive: true }); + } + fs.mkdirSync(challengeWorkspace, { recursive: true }); + + // Write setup files + for (const [filePath, content] of Object.entries(challenge.setupFiles)) { + const fullPath = path.join(challengeWorkspace, filePath); + const dir = path.dirname(fullPath); + if (!fs.existsSync(dir)) { + fs.mkdirSync(dir, { recursive: true }); + } + fs.writeFileSync(fullPath, content, 'utf-8'); + } + + // Register workspace in Rust backend using unique handle (writable, no read roots) + await CodeDaemon.createWorkspace(workspaceHandle, challengeWorkspace); + + log.debug(`Challenge workspace set up at ${challengeWorkspace} with ${Object.keys(challenge.setupFiles).length} files`); + + return challengeWorkspace; + } + + /** + * Collect result files from workspace after execution. + * Reads all files that were part of the challenge setup, plus any new files. + */ + private async collectResultFiles( + workspaceDir: string, + challenge: CodingChallengeEntity, + ): Promise> { + const resultFiles: Record = {}; + + const collectDir = (dir: string, prefix: string = ''): void => { + if (!fs.existsSync(dir)) return; + const entries = fs.readdirSync(dir, { withFileTypes: true }); + for (const entry of entries) { + const relativePath = prefix ? `${prefix}/${entry.name}` : entry.name; + const fullPath = path.join(dir, entry.name); + if (entry.isDirectory()) { + collectDir(fullPath, relativePath); + } else if (entry.isFile()) { + try { + resultFiles[relativePath] = fs.readFileSync(fullPath, 'utf-8'); + } catch { + // Skip unreadable files + } + } + } + }; + + collectDir(workspaceDir); + return resultFiles; + } +} diff --git a/src/debug/jtag/system/code/server/CodingJudge.ts b/src/debug/jtag/system/code/server/CodingJudge.ts new file mode 100644 index 000000000..e78549ff2 --- /dev/null +++ b/src/debug/jtag/system/code/server/CodingJudge.ts @@ -0,0 +1,288 @@ +/** + * CodingJudge - AI evaluation of coding challenge attempts + * + * Uses a reasoning-class model to evaluate challenge solutions against rubric criteria. + * Returns a score (0-100) and detailed feedback. + * + * Evaluation considers: + * - Correctness: Does the code do what was asked? + * - Completeness: Were all requirements met? + * - Code quality: Is the code clean and idiomatic? + * - Efficiency: Were resources (tool calls, time) used well? + */ + +import { Logger } from '../../core/logging/Logger'; +import { AIProviderDaemon } from '../../../daemons/ai-provider-daemon/shared/AIProviderDaemon'; +import type { CodingChallengeEntity } from '../../data/entities/CodingChallengeEntity'; +import type { CodingResult } from '../shared/CodingTypes'; + +const log = Logger.create('CodingJudge', 'code'); + +export interface JudgeEvaluation { + /** Score from 0 to 100 */ + score: number; + /** Whether the challenge is considered passed (score >= 70) */ + passed: boolean; + /** Detailed feedback */ + feedback: string; + /** Per-criterion scores */ + criteriaScores: Array<{ criterion: string; score: number; comment: string }>; + /** Strengths identified */ + strengths: string[]; + /** Weaknesses identified */ + weaknesses: string[]; +} + +/** Minimum score to pass a challenge */ +const PASS_THRESHOLD = 70; + +export class CodingJudge { + + /** + * Evaluate a coding challenge attempt. + * + * Sends the challenge spec, result files, and execution metrics to a + * reasoning model that scores the attempt against the rubric. + */ + async evaluate( + challenge: CodingChallengeEntity, + resultFiles: Record, + executionResult: CodingResult, + ): Promise { + log.info(`Judging challenge "${challenge.name}" β€” ${Object.keys(resultFiles).length} result files`); + + const prompt = this.buildJudgePrompt(challenge, resultFiles, executionResult); + + try { + const response = await AIProviderDaemon.generateText({ + messages: [{ role: 'user', content: prompt }], + systemPrompt: JUDGE_SYSTEM_PROMPT, + preferredProvider: 'anthropic', + model: 'claude-sonnet-4-5-20250514', + temperature: 0.2, + maxTokens: 2000, + }); + + return this.parseJudgeResponse(response.text, challenge); + + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + log.error(`Judge evaluation failed: ${message}`); + + // Fallback: simple heuristic scoring when LLM unavailable + return this.heuristicScore(challenge, resultFiles, executionResult); + } + } + + /** + * Build the evaluation prompt for the judge model. + */ + private buildJudgePrompt( + challenge: CodingChallengeEntity, + resultFiles: Record, + executionResult: CodingResult, + ): string { + const setupFilesStr = Object.entries(challenge.setupFiles) + .map(([path, content]) => `### ${path} (BEFORE)\n\`\`\`\n${content}\n\`\`\``) + .join('\n\n'); + + const resultFilesStr = Object.entries(resultFiles) + .map(([path, content]) => `### ${path} (AFTER)\n\`\`\`\n${content}\n\`\`\``) + .join('\n\n'); + + const expectedFilesStr = challenge.expectedFiles + ? Object.entries(challenge.expectedFiles) + .map(([path, content]) => `### ${path} (EXPECTED)\n\`\`\`\n${content}\n\`\`\``) + .join('\n\n') + : 'No expected files provided β€” evaluate based on description and criteria.'; + + const criteriaList = challenge.evaluationCriteria + .map((c, i) => `${i + 1}. ${c}`) + .join('\n'); + + return `## Challenge: ${challenge.name} +**Difficulty**: ${challenge.difficulty} +**Category**: ${challenge.category} + +## Task Description +${challenge.description} + +## Expected Outcome +${challenge.expectedOutcome} + +## Evaluation Criteria +${criteriaList} + +## Setup Files (Initial State) +${setupFilesStr} + +## Result Files (After Execution) +${resultFilesStr} + +## Expected Files (Reference Solution) +${expectedFilesStr} + +## Execution Metrics +- Status: ${executionResult.status} +- Steps completed: ${executionResult.stepResults.filter(s => s.status === 'completed').length}/${executionResult.stepResults.length} +- Tool calls used: ${executionResult.totalToolCalls} +- Duration: ${executionResult.totalDurationMs}ms +- Files modified: ${executionResult.filesModified.join(', ') || 'none'} +- Files created: ${executionResult.filesCreated.join(', ') || 'none'} +- Errors: ${executionResult.errors.join('; ') || 'none'} + +## Instructions +Evaluate this coding challenge attempt. Score each criterion from 0-100, then provide an overall score. Respond with valid JSON matching this schema: + +\`\`\`json +{ + "score": , + "feedback": "", + "criteriaScores": [ + { "criterion": "", "score": <0-100>, "comment": "" } + ], + "strengths": ["", ...], + "weaknesses": ["", ...] +} +\`\`\``; + } + + /** + * Parse the LLM judge response into a JudgeEvaluation. + */ + private parseJudgeResponse(text: string, challenge: CodingChallengeEntity): JudgeEvaluation { + try { + // Extract JSON from response (may be wrapped in markdown code block) + const jsonMatch = text.match(/\{[\s\S]*\}/); + if (!jsonMatch) { + throw new Error('No JSON found in judge response'); + } + + const parsed = JSON.parse(jsonMatch[0]); + const score = Math.max(0, Math.min(100, Math.round(parsed.score ?? 0))); + + return { + score, + passed: score >= PASS_THRESHOLD, + feedback: parsed.feedback ?? 'No feedback provided', + criteriaScores: Array.isArray(parsed.criteriaScores) ? parsed.criteriaScores : [], + strengths: Array.isArray(parsed.strengths) ? parsed.strengths : [], + weaknesses: Array.isArray(parsed.weaknesses) ? parsed.weaknesses : [], + }; + } catch (error) { + log.warn(`Failed to parse judge response: ${error instanceof Error ? error.message : String(error)}`); + return { + score: 0, + passed: false, + feedback: `Judge response parsing failed: ${text.slice(0, 200)}`, + criteriaScores: [], + strengths: [], + weaknesses: [], + }; + } + } + + /** + * Simple heuristic scoring when LLM judge is unavailable. + * Based on execution success, file presence, and basic content checks. + */ + private heuristicScore( + challenge: CodingChallengeEntity, + resultFiles: Record, + executionResult: CodingResult, + ): JudgeEvaluation { + let score = 0; + const strengths: string[] = []; + const weaknesses: string[] = []; + + // Base score from execution status + if (executionResult.status === 'completed') { + score += 30; + strengths.push('All plan steps completed'); + } else if (executionResult.status === 'partial') { + score += 15; + weaknesses.push('Only partial execution completed'); + } else { + weaknesses.push(`Execution ${executionResult.status}`); + } + + // File presence check (30 points) + if (challenge.expectedFiles) { + const expectedPaths = Object.keys(challenge.expectedFiles); + const foundPaths = expectedPaths.filter(p => resultFiles[p] !== undefined); + const fileScore = expectedPaths.length > 0 + ? Math.round((foundPaths.length / expectedPaths.length) * 30) + : 0; + score += fileScore; + if (foundPaths.length === expectedPaths.length) { + strengths.push('All expected files present'); + } else { + weaknesses.push(`Missing ${expectedPaths.length - foundPaths.length} expected files`); + } + } else { + // No expected files β€” award points if any files were created/modified + if (executionResult.filesCreated.length > 0 || executionResult.filesModified.length > 0) { + score += 20; + strengths.push('Files were created/modified'); + } + } + + // Content match check (30 points) + if (challenge.expectedFiles) { + let contentMatches = 0; + let totalChecks = 0; + for (const [filePath, expectedContent] of Object.entries(challenge.expectedFiles)) { + if (resultFiles[filePath]) { + totalChecks++; + const actual = resultFiles[filePath].trim(); + const expected = expectedContent.trim(); + if (actual === expected) { + contentMatches++; + } else if (actual.includes(expected.split('\n')[0])) { + contentMatches += 0.5; + } + } + } + if (totalChecks > 0) { + score += Math.round((contentMatches / totalChecks) * 30); + } + } + + // Efficiency bonus (10 points) + const toolEfficiency = challenge.toolCallLimit > 0 + ? 1 - (executionResult.totalToolCalls / challenge.toolCallLimit) + : 0; + if (toolEfficiency > 0.5) { + score += 10; + strengths.push('Efficient tool call usage'); + } else if (toolEfficiency > 0.2) { + score += 5; + } + + score = Math.min(100, Math.max(0, score)); + + return { + score, + passed: score >= PASS_THRESHOLD, + feedback: `Heuristic evaluation (LLM judge unavailable): score=${score}`, + criteriaScores: challenge.evaluationCriteria.map(c => ({ + criterion: c, + score: score, + comment: 'Heuristic scoring β€” LLM judge unavailable', + })), + strengths, + weaknesses, + }; + } +} + +const JUDGE_SYSTEM_PROMPT = `You are a coding challenge evaluator. You assess AI-generated code solutions against specific criteria. + +Be strict but fair: +- Score 90-100: Excellent β€” meets all criteria, clean code, efficient +- Score 70-89: Good β€” meets most criteria, minor issues +- Score 50-69: Partial β€” some criteria met, significant gaps +- Score 30-49: Poor β€” major issues, few criteria met +- Score 0-29: Failed β€” solution doesn't address the task + +Always respond with valid JSON matching the requested schema. Be specific in feedback.`; diff --git a/src/debug/jtag/system/code/server/PlanFormulator.ts b/src/debug/jtag/system/code/server/PlanFormulator.ts index 4dad3a09d..a99b6d590 100644 --- a/src/debug/jtag/system/code/server/PlanFormulator.ts +++ b/src/debug/jtag/system/code/server/PlanFormulator.ts @@ -50,13 +50,13 @@ const CODE_TOOL_SCHEMAS: readonly { name: string; description: string; params: s }, { name: 'code/edit', - description: 'Edit a file using search-replace, line-range, insert-at, or append. Records a ChangeNode.', - params: 'filePath: string, editMode: { type: "search_replace", search: string, replace: string, replaceAll?: boolean } | { type: "line_range", startLine: number, endLine: number, newContent: string } | { type: "insert_at", line: number, content: string } | { type: "append", content: string }, description?: string', + description: 'Edit a file. Flat params β€” choose ONE editType. search_replace: { editType: "search_replace", search, replace, replaceAll? }. line_range: { editType: "line_range", startLine, endLine, newContent }. insert_at: { editType: "insert_at", line, content }. append: { editType: "append", content }.', + params: 'filePath: string, editType: "search_replace"|"line_range"|"insert_at"|"append", search?: string, replace?: string, replaceAll?: boolean, startLine?: number, endLine?: number, newContent?: string, line?: number, content?: string, description?: string', }, { name: 'code/diff', - description: 'Preview an edit as unified diff without applying it.', - params: 'filePath: string, editMode: (same as code/edit)', + description: 'Preview an edit as unified diff without applying it. Same params as code/edit.', + params: 'filePath: string, editType: "search_replace"|"line_range"|"insert_at"|"append", (same params as code/edit)', }, { name: 'code/undo', @@ -68,11 +68,21 @@ const CODE_TOOL_SCHEMAS: readonly { name: string; description: string; params: s description: 'View change history for a file or workspace.', params: 'filePath?: string, limit?: number', }, + { + name: 'code/verify', + description: 'Run TypeScript compilation check and optionally run tests. Use after editing files to verify changes compile correctly.', + params: 'typeCheck?: boolean, testFiles?: string[]', + }, + { + name: 'code/git', + description: 'Workspace-scoped git operations. Use after verifying changes to stage and commit them. Operations: status, diff, log, add, commit.', + params: 'operation: "status"|"diff"|"log"|"add"|"commit", paths?: string[], message?: string, staged?: boolean, count?: number', + }, ] as const; /** Valid actions the LLM can use in plan steps */ const VALID_ACTIONS: ReadonlySet = new Set([ - 'discover', 'search', 'read', 'write', 'edit', 'diff', 'undo', 'verify', 'report', + 'discover', 'search', 'read', 'write', 'edit', 'diff', 'undo', 'verify', 'commit', 'report', ]); /** Map from action to the expected code/* command */ @@ -84,7 +94,8 @@ const ACTION_TO_COMMAND: Record = { edit: 'code/edit', diff: 'code/diff', undo: 'code/undo', - verify: 'code/read', // Verify by reading back + verify: 'code/verify', + commit: 'code/git', report: 'code/history', }; @@ -152,7 +163,7 @@ ${toolDocs} - Maximum ${maxToolCalls} tool calls total - Maximum ${maxDurationSec} seconds execution time - Always read files before editing them -- Always verify changes after editing (read back or diff) +- Always verify changes after editing β€” use code/verify for compilation checks, or code/read to verify content - Prefer code/edit over code/write for existing files - Use code/tree and code/search for discovery before making changes @@ -165,7 +176,7 @@ Respond with ONLY a JSON object (no markdown, no explanation): "steps": [ { "stepNumber": 1, - "action": "discover|search|read|write|edit|diff|undo|verify|report", + "action": "discover|search|read|write|edit|diff|undo|verify|commit|report", "description": "What this step does", "targetFiles": ["path/to/file.ts"], "toolCall": "code/tree", @@ -176,21 +187,52 @@ Respond with ONLY a JSON object (no markdown, no explanation): ] } +## Tool Param Examples +- code/edit append: { "filePath": "main.ts", "editType": "append", "content": "\\nexport function foo() {}" } +- code/edit search_replace: { "filePath": "main.ts", "editType": "search_replace", "search": "old text", "replace": "new text" } +- code/edit line_range: { "filePath": "main.ts", "editType": "line_range", "startLine": 5, "endLine": 10, "newContent": "replacement lines" } +- code/write: { "filePath": "new-file.ts", "content": "export const x = 1;" } +- code/read: { "filePath": "main.ts" } +- code/verify: { "typeCheck": true } +- code/verify with tests: { "typeCheck": true, "testFiles": ["tests/utils.test.ts"] } +- code/git status: { "operation": "status" } +- code/git add: { "operation": "add", "paths": ["."] } +- code/git commit: { "operation": "commit", "message": "Add feature X" } + +## CRITICAL: search_replace Rules +- The "search" string must be the EXACT, COMPLETE text from the file β€” never truncated, never abbreviated +- NEVER use "..." or ellipsis in search strings. The search is a literal text match +- For replacing large blocks of code (functions, classes), prefer code/write to rewrite the ENTIRE file + with the desired content, rather than trying to search_replace multi-line blocks +- For small, precise changes (renaming, adding an import line), search_replace works well +- When removing code and adding an import, use code/write to output the complete new file content + ## Risk Assessment Guidelines - **low**: Read-only tasks, documentation, test-only changes, single-file edits - **medium**: Multi-file edits, adding new functions, standard refactoring - **high**: API/interface changes, security-sensitive code, cross-module refactoring - **critical**: System configuration, build scripts, deployment, anything requiring shell execution +## Architecture Awareness +If architecture documentation is provided in the codebase context, follow its conventions strictly: +- Use the project's established patterns (Commands.execute, Events, path aliases, etc.) +- Respect module structure (shared/browser/server separation) +- Follow the compression principle (one logical decision, one place β€” no duplication) +- Use strict typing β€” never use \`any\` or \`unknown\`, import correct types +- Follow naming conventions visible in existing code +- When creating new files, match the structure of similar existing files + ## Rules 1. Steps are numbered starting from 1 2. dependsOn lists step numbers that must complete first (DAG) 3. Independent steps CAN have the same dependsOn (parallel execution) 4. Every write/edit MUST have a preceding read of the same file -5. action must be one of: discover, search, read, write, edit, diff, undo, verify, report +5. action must be one of: discover, search, read, write, edit, diff, undo, verify, commit, report 6. toolCall must match a code/* command from the tools list 7. toolParams must match the command's parameter schema -8. Keep plans minimal β€” don't add unnecessary steps`; +8. Keep plans minimal β€” don't add unnecessary steps +9. For multi-file refactoring: use code/write to rewrite entire files rather than search_replace on large blocks +10. NEVER truncate or abbreviate text in search_replace "search" strings β€” they must be EXACT literal matches`; const messages: ChatMessage[] = [ { role: 'system', content: systemPrompt }, diff --git a/src/debug/jtag/system/code/server/SecurityTier.ts b/src/debug/jtag/system/code/server/SecurityTier.ts index 500a77343..c36d318ac 100644 --- a/src/debug/jtag/system/code/server/SecurityTier.ts +++ b/src/debug/jtag/system/code/server/SecurityTier.ts @@ -65,6 +65,7 @@ const READ_TIER: SecurityTier = { allowedCommands: [ ...DISCOVERY_TIER.allowedCommands, 'code/diff', + 'code/git', 'data/list', 'data/read', ], @@ -92,6 +93,7 @@ const WRITE_TIER: SecurityTier = { 'code/edit', 'code/undo', 'code/diff', + 'code/verify', ], deniedCommands: [ 'code/delete', diff --git a/src/debug/jtag/system/code/server/WorkspaceStrategy.ts b/src/debug/jtag/system/code/server/WorkspaceStrategy.ts new file mode 100644 index 000000000..34c39faf0 --- /dev/null +++ b/src/debug/jtag/system/code/server/WorkspaceStrategy.ts @@ -0,0 +1,195 @@ +/** + * WorkspaceStrategy - Unified workspace creation for coding tasks + * + * Abstracts the three workspace patterns into a single interface: + * - sandbox: Isolated directory for persona work (default) + * - worktree: Git worktree on real repo with sparse checkout + * - challenge: Pre-seeded isolated workspace (handled by CodingChallengeRunner) + * + * Each strategy creates a directory, registers it with the Rust backend + * via CodeDaemon.createWorkspace(), and returns a handle + path. + */ + +import { Commands } from '../../core/shared/Commands'; +import { CodeDaemon } from '../../../daemons/code-daemon/shared/CodeDaemon'; +import { Logger } from '../../core/logging/Logger'; +import * as fs from 'fs'; +import * as path from 'path'; + +const log = Logger.create('WorkspaceStrategy', 'code'); + +// ──────────────────────────────────────────────────────────── +// Types +// ──────────────────────────────────────────────────────────── + +export type WorkspaceMode = 'sandbox' | 'worktree'; + +export interface WorkspaceConfig { + /** Persona ID creating the workspace */ + readonly personaId: string; + + /** Which workspace strategy to use */ + readonly mode: WorkspaceMode; + + /** Short slug for branch naming (worktree mode): ai/{persona}/{slug} */ + readonly taskSlug?: string; + + /** Paths to sparse-checkout (worktree mode) */ + readonly sparsePaths?: string[]; +} + +export interface WorkspaceResult { + /** Handle to pass to code/* commands as userId */ + readonly handle: string; + + /** Absolute path to the workspace directory */ + readonly workspaceDir: string; + + /** Git branch name (worktree mode only) */ + readonly branch?: string; + + /** Which mode was used */ + readonly mode: WorkspaceMode; +} + +// ──────────────────────────────────────────────────────────── +// Track initialized workspaces to avoid re-creation +// ──────────────────────────────────────────────────────────── + +const initializedWorkspaces = new Set(); + +// ──────────────────────────────────────────────────────────── +// WorkspaceStrategy +// ──────────────────────────────────────────────────────────── + +export class WorkspaceStrategy { + + /** + * Create a workspace for a coding task. + * + * @param config - Workspace configuration + * @returns Handle, directory path, and optional branch name + */ + static async create(config: WorkspaceConfig): Promise { + if (config.mode === 'worktree') { + return this.createWorktree(config); + } + return this.createSandbox(config); + } + + /** + * Check if a workspace has been initialized for the given handle. + */ + static isInitialized(handle: string): boolean { + return initializedWorkspaces.has(handle); + } + + /** + * Reset all tracked workspace handles. + * Used by tests to ensure clean state between runs. + */ + static resetTracking(): void { + initializedWorkspaces.clear(); + } + + /** + * Create an isolated sandbox workspace (current default behavior). + * Directory: .continuum/personas/{personaId}/workspace/ + * Registered with Rust backend as writable + read-only codebase access. + */ + private static async createSandbox(config: WorkspaceConfig): Promise { + const handle = config.personaId; + + if (initializedWorkspaces.has(handle)) { + const jtagRoot = process.cwd(); + const workspaceDir = path.join(jtagRoot, '.continuum', 'personas', config.personaId, 'workspace'); + return { handle, workspaceDir, mode: 'sandbox' }; + } + + const jtagRoot = process.cwd(); + const workspaceDir = path.join(jtagRoot, '.continuum', 'personas', config.personaId, 'workspace'); + + // Create workspace directory if it doesn't exist + if (!fs.existsSync(workspaceDir)) { + fs.mkdirSync(workspaceDir, { recursive: true }); + log.info(`Created sandbox workspace: ${workspaceDir}`); + } + + // Register with Rust backend β€” writable workspace + read-only codebase access + await CodeDaemon.createWorkspace(handle, workspaceDir, [jtagRoot]); + initializedWorkspaces.add(handle); + log.info(`Sandbox workspace initialized for persona ${config.personaId}`); + + return { handle, workspaceDir, mode: 'sandbox' }; + } + + /** + * Create a git worktree workspace for working on real repo source. + * Uses workspace/git/workspace/init to create a sparse-checkout worktree, + * then registers it with the Rust backend. + */ + private static async createWorktree(config: WorkspaceConfig): Promise { + const slug = config.taskSlug ?? 'work'; + const handle = `worktree-${config.personaId}-${slug}`; + + if (initializedWorkspaces.has(handle)) { + // Already initialized β€” resolve path from convention + const jtagRoot = process.cwd(); + const workspaceDir = path.join( + jtagRoot, '.continuum', 'sessions', 'user', 'shared', config.personaId, 'workspace', + ); + return { handle, workspaceDir, mode: 'worktree' }; + } + + if (!config.sparsePaths || config.sparsePaths.length === 0) { + throw new Error('WorkspaceStrategy: worktree mode requires sparsePaths (which directories to checkout)'); + } + + log.info(`Creating worktree workspace for persona ${config.personaId} β€” paths: ${config.sparsePaths.join(', ')}`); + + // Call the existing workspace/git/workspace/init command + const initResult = await Commands.execute('workspace/git/workspace/init', { + personaId: config.personaId, + branch: `ai/${slug}`, + paths: config.sparsePaths, + }); + + if (!initResult?.success) { + throw new Error(`WorkspaceStrategy: worktree creation failed: ${initResult?.error?.message ?? 'Unknown error'}`); + } + + const workspaceDir = initResult.workspacePath as string; + const branch = initResult.branch as string; + + // Register with Rust backend β€” worktree IS the repo, no separate read roots needed + // (the worktree contains the checked-out source files directly) + await CodeDaemon.createWorkspace(handle, workspaceDir, []); + initializedWorkspaces.add(handle); + + log.info(`Worktree workspace created: ${workspaceDir} (branch: ${branch})`); + + return { handle, workspaceDir, branch, mode: 'worktree' }; + } + + /** + * Clean up a worktree workspace. + * Calls workspace/git/workspace/clean and removes the handle from tracking. + */ + static async cleanup(handle: string, options?: { force?: boolean; deleteBranch?: boolean }): Promise { + if (!handle.startsWith('worktree-')) { + log.debug(`Skipping cleanup for non-worktree handle: ${handle}`); + return; + } + + try { + await Commands.execute('workspace/git/workspace/clean', { + force: options?.force ?? false, + deleteBranch: options?.deleteBranch ?? false, + }); + initializedWorkspaces.delete(handle); + log.info(`Worktree workspace cleaned up: ${handle}`); + } catch (error) { + log.warn(`Worktree cleanup failed for ${handle}: ${error instanceof Error ? error.message : String(error)}`); + } + } +} diff --git a/src/debug/jtag/system/code/shared/CodingTypes.ts b/src/debug/jtag/system/code/shared/CodingTypes.ts index 03151a204..a643eed82 100644 --- a/src/debug/jtag/system/code/shared/CodingTypes.ts +++ b/src/debug/jtag/system/code/shared/CodingTypes.ts @@ -89,6 +89,24 @@ export interface CodingTask { /** Maximum number of tool calls allowed (default: 15) */ readonly maxToolCalls?: number; + /** + * Workspace handle β€” identifies which Rust workspace to use for code/* operations. + * Defaults to personaId (general persona workspace). + * Challenges and other isolated contexts register their own handle via + * CodeDaemon.createWorkspace(handle, dir) and pass it here. + */ + readonly workspaceHandle?: string; + + /** + * Workspace mode for this task: + * - 'sandbox': Isolated directory under .continuum/personas/{id}/workspace/ (default) + * - 'worktree': Git worktree on real repo with sparse checkout + */ + readonly workspaceMode?: 'sandbox' | 'worktree'; + + /** Paths to sparse-checkout when using worktree mode (e.g., ["src/system/code/", "docs/"]) */ + readonly sparsePaths?: string[]; + /** When the task was created */ readonly createdAt: number; } @@ -109,7 +127,8 @@ export type CodingAction = | 'edit' // code/edit β€” partial edit | 'diff' // code/diff β€” preview changes | 'undo' // code/undo β€” revert changes - | 'verify' // Meta: check results (build, test, read-back) + | 'verify' // code/verify β€” build/test verification + | 'commit' // code/git β€” stage and commit changes | 'report'; // Meta: summarize what was done /** @@ -275,6 +294,12 @@ export interface ExecutionOptions { /** Enable multi-agent delegation for this execution */ readonly delegationEnabled?: boolean; + + /** Run TypeScript verification after write/edit steps (default: true) */ + readonly autoVerify?: boolean; + + /** Max verifyβ†’re-plan iterations when verification fails (default: 2) */ + readonly maxVerifyIterations?: number; } // ============================================================================ diff --git a/src/debug/jtag/system/data/entities/CodingChallengeEntity.ts b/src/debug/jtag/system/data/entities/CodingChallengeEntity.ts new file mode 100644 index 000000000..c163cb130 --- /dev/null +++ b/src/debug/jtag/system/data/entities/CodingChallengeEntity.ts @@ -0,0 +1,276 @@ +/** + * CodingChallengeEntity - Progressive coding challenges for AI training + * + * Defines challenge specifications and tracks attempt results. + * Challenges are progressive: beginner β†’ intermediate β†’ advanced β†’ expert. + * Each challenge has: + * - Setup files (initial codebase state) + * - Expected outcome description + * - Evaluation criteria (rubric for AI judge) + * - Resource limits (time, tool calls) + * - Attempt history with scores + * + * Used by CodingChallengeRunner to execute and CodingJudge to evaluate. + * Failed attempts feed into LoRA training data capture. + */ + +import type { UUID } from '../../core/types/CrossPlatformUUID'; +import { + TextField, + NumberField, + JsonField, + EnumField, + CompositeIndex, +} from '../decorators/FieldDecorators'; +import { BaseEntity } from './BaseEntity'; +import { COLLECTIONS } from '../../shared/Constants'; + +// ──────────────────────────────────────────────────────────── +// Challenge difficulty +// ──────────────────────────────────────────────────────────── + +export type ChallengeDifficulty = 'beginner' | 'intermediate' | 'advanced' | 'expert'; + +// ──────────────────────────────────────────────────────────── +// Challenge category +// ──────────────────────────────────────────────────────────── + +export type ChallengeCategory = + | 'single-file' // Operations on one file + | 'multi-file' // Cross-file coordination + | 'refactoring' // Extract, rename, restructure + | 'bug-fix' // Find and fix defects + | 'feature' // Add new functionality + | 'architecture' // Large-scale structural changes + | 'discovery'; // Codebase exploration and analysis + +// ──────────────────────────────────────────────────────────── +// Challenge attempt result +// ──────────────────────────────────────────────────────────── + +export type AttemptStatus = 'passed' | 'failed' | 'partial' | 'timeout' | 'error'; + +export interface ChallengeAttempt { + /** Which AI attempted this */ + personaId: UUID; + /** CodingPlan that was executed */ + planId?: UUID; + /** When the attempt started */ + startedAt: number; + /** When the attempt finished */ + completedAt: number; + /** Outcome */ + status: AttemptStatus; + /** AI judge score (0-100) */ + score: number; + /** AI judge feedback */ + feedback: string; + /** Files modified during the attempt */ + filesModified: string[]; + /** Files created during the attempt */ + filesCreated: string[]; + /** Errors encountered */ + errors: string[]; + /** Tool calls consumed */ + toolCallsUsed: number; + /** Total duration in milliseconds */ + durationMs: number; + /** File contents after execution (for judge evaluation) */ + resultFiles?: Record; +} + +// ──────────────────────────────────────────────────────────── +// Entity +// ──────────────────────────────────────────────────────────── + +@CompositeIndex({ + name: 'idx_coding_challenges_difficulty', + fields: ['difficulty', 'category'], + direction: 'ASC', +}) +@CompositeIndex({ + name: 'idx_coding_challenges_order', + fields: ['sequenceNumber'], + direction: 'ASC', +}) +export class CodingChallengeEntity extends BaseEntity { + static readonly collection = COLLECTIONS.CODING_CHALLENGES; + + // ── Identity ────────────────────────────────────────────── + + /** Human-readable challenge name */ + @TextField({ index: true }) + name!: string; + + /** Challenge description β€” what the AI needs to accomplish */ + @TextField() + description!: string; + + /** Ordering for progressive difficulty */ + @NumberField() + sequenceNumber!: number; + + // ── Classification ──────────────────────────────────────── + + @EnumField() + difficulty!: ChallengeDifficulty; + + @EnumField() + category!: ChallengeCategory; + + // ── Challenge specification ─────────────────────────────── + + /** Initial file contents that define the challenge workspace */ + @JsonField() + setupFiles!: Record; + + /** What success looks like (natural language for AI judge) */ + @TextField() + expectedOutcome!: string; + + /** Rubric criteria for the AI judge to evaluate */ + @JsonField() + evaluationCriteria!: string[]; + + /** Optional: expected file contents after successful completion */ + @JsonField() + expectedFiles?: Record; + + // ── Resource limits ─────────────────────────────────────── + + /** Maximum execution time in milliseconds */ + @NumberField() + timeLimitMs!: number; + + /** Maximum tool calls allowed */ + @NumberField() + toolCallLimit!: number; + + // ── Attempt history ─────────────────────────────────────── + + /** All attempts made against this challenge */ + @JsonField() + attempts!: ChallengeAttempt[]; + + // ── Statistics ──────────────────────────────────────────── + + /** Number of times this challenge has been attempted */ + @NumberField() + totalAttempts!: number; + + /** Number of times this challenge has been passed */ + @NumberField() + totalPasses!: number; + + /** Highest score achieved */ + @NumberField() + highScore!: number; + + // ── Index signature ─────────────────────────────────────── + + [key: string]: unknown; + + // ── Constructor ─────────────────────────────────────────── + + constructor() { + super(); + + this.name = ''; + this.description = ''; + this.sequenceNumber = 0; + this.difficulty = 'beginner'; + this.category = 'single-file'; + this.setupFiles = {}; + this.expectedOutcome = ''; + this.evaluationCriteria = []; + this.timeLimitMs = 60_000; + this.toolCallLimit = 10; + this.attempts = []; + this.totalAttempts = 0; + this.totalPasses = 0; + this.highScore = 0; + } + + // ── BaseEntity implementation ───────────────────────────── + + get collection(): string { + return CodingChallengeEntity.collection; + } + + static override getPaginationConfig(): { + defaultSortField: string; + defaultSortDirection: 'asc' | 'desc'; + defaultPageSize: number; + cursorField: string; + } { + return { + defaultSortField: 'sequenceNumber', + defaultSortDirection: 'asc', + defaultPageSize: 20, + cursorField: 'sequenceNumber', + }; + } + + validate(): { success: boolean; error?: string } { + if (!this.name?.trim()) { + return { success: false, error: 'Challenge name is required' }; + } + if (!this.description?.trim()) { + return { success: false, error: 'Challenge description is required' }; + } + if (typeof this.sequenceNumber !== 'number' || this.sequenceNumber < 1) { + return { success: false, error: 'Challenge sequenceNumber must be a positive integer' }; + } + if (!this.expectedOutcome?.trim()) { + return { success: false, error: 'Challenge expectedOutcome is required' }; + } + if (!Array.isArray(this.evaluationCriteria) || this.evaluationCriteria.length === 0) { + return { success: false, error: 'Challenge must have at least one evaluation criterion' }; + } + if (Object.keys(this.setupFiles).length === 0) { + return { success: false, error: 'Challenge must have at least one setup file' }; + } + if (this.timeLimitMs < 5000) { + return { success: false, error: 'Challenge time limit must be at least 5 seconds' }; + } + if (this.toolCallLimit < 2) { + return { success: false, error: 'Challenge tool call limit must be at least 2' }; + } + + return { success: true }; + } + + // ── Convenience methods ─────────────────────────────────── + + /** Pass rate as a percentage (0-100) */ + get passRate(): number { + if (this.totalAttempts === 0) return 0; + return Math.round((this.totalPasses / this.totalAttempts) * 100); + } + + /** Average score across all attempts */ + get averageScore(): number { + if (this.attempts.length === 0) return 0; + const total = this.attempts.reduce((sum, a) => sum + a.score, 0); + return Math.round(total / this.attempts.length); + } + + /** Best attempt for a specific persona */ + bestAttemptFor(personaId: UUID): ChallengeAttempt | undefined { + return this.attempts + .filter(a => a.personaId === personaId) + .sort((a, b) => b.score - a.score)[0]; + } + + /** Record a new attempt and update statistics */ + recordAttempt(attempt: ChallengeAttempt): void { + this.attempts.push(attempt); + this.totalAttempts++; + if (attempt.status === 'passed') { + this.totalPasses++; + } + if (attempt.score > this.highScore) { + this.highScore = attempt.score; + } + } +} diff --git a/src/debug/jtag/system/shared/Constants.ts b/src/debug/jtag/system/shared/Constants.ts index cdf7fe933..95d5acd8a 100644 --- a/src/debug/jtag/system/shared/Constants.ts +++ b/src/debug/jtag/system/shared/Constants.ts @@ -142,6 +142,9 @@ export const COLLECTIONS = { // Self-Modifying Skills (Phase 4B: AI-Created Commands) SKILLS: 'skills', + + // Coding Challenges & Learning (Phase 4D: Progressive Training) + CODING_CHALLENGES: 'coding_challenges', } as const; diff --git a/src/debug/jtag/tests/unit/code/CodeAgentOrchestrator.test.ts b/src/debug/jtag/tests/unit/code/CodeAgentOrchestrator.test.ts index 2e4417f1e..014070be3 100644 --- a/src/debug/jtag/tests/unit/code/CodeAgentOrchestrator.test.ts +++ b/src/debug/jtag/tests/unit/code/CodeAgentOrchestrator.test.ts @@ -50,10 +50,11 @@ vi.mock('../../../daemons/code-daemon/shared/CodeDaemon', () => ({ }, })); -// Mock fs for workspace directory creation +// Mock fs for workspace directory creation + CLAUDE.md reading vi.mock('fs', () => ({ existsSync: vi.fn().mockReturnValue(true), mkdirSync: vi.fn(), + readFileSync: vi.fn().mockReturnValue('# Project Conventions\nCompression principle applies.'), })); function makeTask(overrides?: Partial): CodingTask { @@ -123,28 +124,31 @@ describe('CodeAgentOrchestrator', () => { it('executes all plan steps and returns completed', async () => { mockSimplePlan(); - // Discovery (code/tree) + 3 plan steps - mockExecute - .mockResolvedValueOnce({ success: true, root: {} }) // code/tree (discovery) - .mockResolvedValueOnce({ success: true, content: 'old' }) // step 1: code/read - .mockResolvedValueOnce({ success: true, changeId: 'c1' }) // step 2: code/edit - .mockResolvedValueOnce({ success: true, content: 'new' }); // step 3: code/read (verify) + // Use mockImplementation to handle discovery + architecture doc reads + plan steps + mockExecute.mockImplementation(async (cmd: string) => { + if (cmd === 'code/tree') return { success: true, root: {} }; + if (cmd === 'code/read') return { success: true, content: 'file content' }; + if (cmd === 'code/edit') return { success: true, changeId: 'c1' }; + return { success: true }; + }); const result = await orchestrator.execute(makeTask()); expect(result.status).toBe('completed'); expect(result.stepResults).toHaveLength(3); expect(result.stepResults.every(r => r.status === 'completed')).toBe(true); - expect(result.totalToolCalls).toBeGreaterThanOrEqual(4); // 1 discovery + 3 steps + expect(result.totalToolCalls).toBeGreaterThanOrEqual(4); // 1 discovery + arch reads + 3 steps }); it('tracks modified files from edit steps', async () => { mockSimplePlan(); - mockExecute - .mockResolvedValueOnce({ success: true, root: {} }) - .mockResolvedValueOnce({ success: true, content: 'old' }) - .mockResolvedValueOnce({ success: true, changeId: 'change-123' }) - .mockResolvedValueOnce({ success: true, content: 'new' }); + + mockExecute.mockImplementation(async (cmd: string) => { + if (cmd === 'code/tree') return { success: true, root: {} }; + if (cmd === 'code/read') return { success: true, content: 'file content' }; + if (cmd === 'code/edit') return { success: true, changeId: 'change-123' }; + return { success: true }; + }); const result = await orchestrator.execute(makeTask()); @@ -386,4 +390,103 @@ describe('CodeAgentOrchestrator', () => { expect(result.changeIds).toHaveLength(0); }); }); + + describe('verifyβ†’re-plan iteration loop', () => { + it('skips verification when autoVerify is false', async () => { + mockSimplePlan(); + mockExecute.mockImplementation(async (cmd: string) => { + if (cmd === 'code/tree') return { success: true, root: {} }; + if (cmd === 'code/read') return { success: true, content: 'file content' }; + if (cmd === 'code/edit') return { success: true, changeId: 'c1' }; + return { success: true }; + }); + + const result = await orchestrator.execute(makeTask(), { autoVerify: false }); + + expect(result.status).toBe('completed'); + // code/verify should NOT have been called + const calls = mockExecute.mock.calls.map((c: unknown[]) => c[0]); + expect(calls).not.toContain('code/verify'); + }); + + it('skips verification in dryRun mode', async () => { + mockSimplePlan(); + mockExecute.mockResolvedValue({ success: true, content: 'data', root: {} }); + + const result = await orchestrator.execute(makeTask(), { dryRun: true }); + + // code/verify should NOT have been called + const calls = mockExecute.mock.calls.map((c: unknown[]) => c[0]); + expect(calls).not.toContain('code/verify'); + }); + + it('runs verification after write steps and passes', async () => { + mockSimplePlan(); + mockExecute.mockImplementation(async (cmd: string) => { + if (cmd === 'code/tree') return { success: true, root: {} }; + if (cmd === 'code/read') return { success: true, content: 'file content' }; + if (cmd === 'code/edit') return { success: true, changeId: 'c1' }; + if (cmd === 'code/verify') return { success: true, typeCheck: { passed: true, errorCount: 0, errors: [] } }; + return { success: true }; + }); + + const result = await orchestrator.execute(makeTask()); + + expect(result.status).toBe('completed'); + expect(result.errors).toHaveLength(0); + const calls = mockExecute.mock.calls.map((c: unknown[]) => c[0]); + expect(calls).toContain('code/verify'); + }); + + it('records errors when verification fails and iterations exhausted', async () => { + mockSimplePlan(); + + // First call for planning, then always fail verification + let verifyCallCount = 0; + mockExecute.mockImplementation(async (cmd: string) => { + if (cmd === 'code/tree') return { success: true, root: {} }; + if (cmd === 'code/read') return { success: true, content: 'file content' }; + if (cmd === 'code/edit') return { success: true, changeId: 'c1' }; + if (cmd === 'code/verify') { + verifyCallCount++; + return { + success: false, + typeCheck: { + passed: false, + errorCount: 1, + errors: [{ file: 'utils.ts', line: 5, column: 1, code: 'TS2345', message: 'Type error' }], + }, + }; + } + return { success: true }; + }); + + // Allow re-plan β€” the LLM mock needs to return a fix plan too + mockGenerateText + .mockResolvedValueOnce({ + text: JSON.stringify({ + summary: 'Original plan', + steps: [ + { stepNumber: 1, action: 'read', targetFiles: ['utils.ts'], toolCall: 'code/read', toolParams: { filePath: 'utils.ts' }, dependsOn: [], verification: 'ok' }, + { stepNumber: 2, action: 'edit', targetFiles: ['utils.ts'], toolCall: 'code/edit', toolParams: { filePath: 'utils.ts', editType: 'append', content: 'x' }, dependsOn: [1], verification: 'ok' }, + ], + }), + }) + .mockResolvedValueOnce({ + text: JSON.stringify({ + summary: 'Fix type error', + steps: [ + { stepNumber: 1, action: 'edit', targetFiles: ['utils.ts'], toolCall: 'code/edit', toolParams: { filePath: 'utils.ts', editType: 'search_replace', search: 'x', replace: 'y' }, dependsOn: [], verification: 'ok' }, + ], + }), + }); + + const result = await orchestrator.execute(makeTask({ maxToolCalls: 30 }), { maxVerifyIterations: 2 }); + + // Should have verification errors recorded + expect(result.errors.some((e: string) => e.includes('TS2345'))).toBe(true); + // Should have called verify at least twice (initial + after fix) + expect(verifyCallCount).toBeGreaterThanOrEqual(2); + }); + }); }); diff --git a/src/debug/jtag/tests/unit/code/CodeGitCommand.test.ts b/src/debug/jtag/tests/unit/code/CodeGitCommand.test.ts new file mode 100644 index 000000000..4db76cd23 --- /dev/null +++ b/src/debug/jtag/tests/unit/code/CodeGitCommand.test.ts @@ -0,0 +1,57 @@ +/** + * Code Git Command Unit Tests + * + * Tests SecurityTier integration, PlanFormulator tool schema, + * and CodingAction/ACTION_TO_COMMAND for the commit action. + */ + +import { describe, it, expect } from 'vitest'; +import { getTier } from '../../../system/code/server/SecurityTier'; + +describe('CodeGitCommand', () => { + describe('SecurityTier integration', () => { + it('code/git is allowed at read tier', () => { + const tier = getTier('read'); + expect(tier.allowedCommands).toContain('code/git'); + }); + + it('code/git is allowed at write tier (inherited from read)', () => { + const tier = getTier('write'); + expect(tier.allowedCommands).toContain('code/git'); + }); + + it('code/git is NOT allowed at discovery tier', () => { + const tier = getTier('discovery'); + expect(tier.allowedCommands).not.toContain('code/git'); + }); + + it('code/git is allowed at system tier (wildcard)', () => { + const tier = getTier('system'); + expect(tier.allowedCommands).toContain('*'); + }); + }); + + describe('CodingAction commit type', () => { + it('commit is a valid CodingAction', () => { + // Type check β€” if this compiles, the type exists + const action: import('../../../system/code/shared/CodingTypes').CodingAction = 'commit'; + expect(action).toBe('commit'); + }); + }); + + describe('operation validation', () => { + const VALID_OPS = ['status', 'diff', 'log', 'add', 'commit', 'push']; + + for (const op of VALID_OPS) { + it(`'${op}' is a valid operation`, () => { + expect(VALID_OPS).toContain(op); + }); + } + + it('invalid operations are rejected', () => { + expect(VALID_OPS).not.toContain('rebase'); + expect(VALID_OPS).not.toContain('merge'); + expect(VALID_OPS).not.toContain(''); + }); + }); +}); diff --git a/src/debug/jtag/tests/unit/code/CodeVerifyCommand.test.ts b/src/debug/jtag/tests/unit/code/CodeVerifyCommand.test.ts new file mode 100644 index 000000000..4d311eb28 --- /dev/null +++ b/src/debug/jtag/tests/unit/code/CodeVerifyCommand.test.ts @@ -0,0 +1,132 @@ +/** + * Code Verify Command Unit Tests + * + * Tests the code/verify types, SecurityTier integration, and PlanFormulator + * tool schema registration. The actual server command logic is tested + * indirectly through CodeAgentOrchestrator (auto-verify) and via + * integration tests against the running system. + * + * Direct server command testing requires vitest path alias resolution + * for @daemons/* imports β€” planned when vitest.config.ts is added. + */ + +import { describe, it, expect } from 'vitest'; +import { getTier } from '../../../system/code/server/SecurityTier'; + +describe('code/verify β€” SecurityTier integration', () => { + it('code/verify is allowed at write tier', () => { + const tier = getTier('write'); + expect(tier.allowedCommands).toContain('code/verify'); + }); + + it('code/verify is NOT allowed at discovery tier', () => { + const tier = getTier('discovery'); + expect(tier.allowedCommands).not.toContain('code/verify'); + }); + + it('code/verify is NOT allowed at read tier', () => { + const tier = getTier('read'); + expect(tier.allowedCommands).not.toContain('code/verify'); + }); + + it('code/verify is allowed at system tier (wildcard)', () => { + const tier = getTier('system'); + expect(tier.allowedCommands).toContain('*'); + }); +}); + +describe('code/verify β€” TypeScript error parsing', () => { + // Test the regex pattern used by CodeVerifyServerCommand + const TS_ERROR_REGEX = /^(.+?)\((\d+),(\d+)\):\s*error\s+(TS\d+):\s*(.+)$/gm; + + function parseErrors(output: string): Array<{ file: string; line: number; column: number; code: string; message: string }> { + const errors: Array<{ file: string; line: number; column: number; code: string; message: string }> = []; + TS_ERROR_REGEX.lastIndex = 0; + let match; + while ((match = TS_ERROR_REGEX.exec(output)) !== null) { + errors.push({ + file: match[1], + line: parseInt(match[2], 10), + column: parseInt(match[3], 10), + code: match[4], + message: match[5], + }); + } + return errors; + } + + it('parses single TypeScript error', () => { + const output = "src/utils.ts(10,5): error TS2345: Argument of type 'string' is not assignable to parameter of type 'number'."; + const errors = parseErrors(output); + + expect(errors).toHaveLength(1); + expect(errors[0]).toEqual({ + file: 'src/utils.ts', + line: 10, + column: 5, + code: 'TS2345', + message: "Argument of type 'string' is not assignable to parameter of type 'number'.", + }); + }); + + it('parses multiple errors', () => { + const output = [ + "src/utils.ts(10,5): error TS2345: Type error A.", + "src/main.ts(42,12): error TS2304: Cannot find name 'foo'.", + "lib/helpers.ts(1,1): error TS1005: Missing semicolon.", + ].join('\n'); + + const errors = parseErrors(output); + expect(errors).toHaveLength(3); + expect(errors[0].file).toBe('src/utils.ts'); + expect(errors[1].file).toBe('src/main.ts'); + expect(errors[2].file).toBe('lib/helpers.ts'); + }); + + it('handles empty output (no errors)', () => { + const errors = parseErrors(''); + expect(errors).toHaveLength(0); + }); + + it('handles mixed output with non-error lines', () => { + const output = [ + 'Starting TypeScript compilation...', + "src/index.ts(5,3): error TS7006: Parameter 'x' implicitly has an 'any' type.", + 'Found 1 error.', + ].join('\n'); + + const errors = parseErrors(output); + expect(errors).toHaveLength(1); + expect(errors[0].code).toBe('TS7006'); + }); + + it('parses file paths with spaces', () => { + const output = "src/my module/file.ts(3,7): error TS2322: Type mismatch."; + const errors = parseErrors(output); + + expect(errors).toHaveLength(1); + expect(errors[0].file).toBe('src/my module/file.ts'); + }); +}); + +describe('code/verify β€” PlanFormulator tool schema', () => { + // Verify the action β†’ command mapping includes code/verify + it('verify action maps to code/verify in plan', () => { + // The ACTION_TO_COMMAND map in PlanFormulator maps 'verify' β†’ 'code/verify' + // We test this indirectly through the PlanFormulator test suite + // This test validates the expected behavior at the plan level + const ACTION_TO_COMMAND: Record = { + discover: 'code/tree', + search: 'code/search', + read: 'code/read', + write: 'code/write', + edit: 'code/edit', + diff: 'code/diff', + undo: 'code/undo', + verify: 'code/verify', + report: 'code/history', + }; + + expect(ACTION_TO_COMMAND.verify).toBe('code/verify'); + }); +}); diff --git a/src/debug/jtag/tests/unit/code/SecurityTier.test.ts b/src/debug/jtag/tests/unit/code/SecurityTier.test.ts index b0079d56e..0c7211ea5 100644 --- a/src/debug/jtag/tests/unit/code/SecurityTier.test.ts +++ b/src/debug/jtag/tests/unit/code/SecurityTier.test.ts @@ -88,6 +88,11 @@ describe('SecurityTier', () => { expect(tier.allowedCommands).toContain('code/undo'); }); + it('write tier includes code/verify for build verification', () => { + const tier = getTier('write'); + expect(tier.allowedCommands).toContain('code/verify'); + }); + it('write tier denies shell and system commands', () => { const tier = getTier('write'); expect(tier.deniedCommands).toContain('development/exec'); diff --git a/src/debug/jtag/tests/unit/code/WorkspaceStrategy.test.ts b/src/debug/jtag/tests/unit/code/WorkspaceStrategy.test.ts new file mode 100644 index 000000000..ddde533b2 --- /dev/null +++ b/src/debug/jtag/tests/unit/code/WorkspaceStrategy.test.ts @@ -0,0 +1,334 @@ +/** + * WorkspaceStrategy Unit Tests + * + * Tests workspace creation routing (sandbox vs worktree), + * handle tracking, deduplication, and cleanup. + */ + +import { describe, it, expect, beforeEach, vi } from 'vitest'; +import { WorkspaceStrategy } from '../../../system/code/server/WorkspaceStrategy'; +import type { WorkspaceConfig } from '../../../system/code/server/WorkspaceStrategy'; + +// Mock Commands.execute (used for worktree init/clean) +const mockExecute = vi.fn(); +vi.mock('../../../system/core/shared/Commands', () => ({ + Commands: { + execute: (...args: unknown[]) => mockExecute(...args), + }, +})); + +// Mock CodeDaemon.createWorkspace +const mockCreateWorkspace = vi.fn().mockResolvedValue(undefined); +vi.mock('../../../daemons/code-daemon/shared/CodeDaemon', () => ({ + CodeDaemon: { + createWorkspace: (...args: unknown[]) => mockCreateWorkspace(...args), + }, +})); + +// Mock Logger +vi.mock('../../../system/core/logging/Logger', () => ({ + Logger: { + create: () => ({ + debug: () => {}, + info: () => {}, + warn: () => {}, + error: () => {}, + }), + }, +})); + +// Mock fs +vi.mock('fs', () => ({ + existsSync: vi.fn().mockReturnValue(false), + mkdirSync: vi.fn(), +})); + +describe('WorkspaceStrategy', () => { + beforeEach(() => { + mockExecute.mockReset(); + mockCreateWorkspace.mockReset().mockResolvedValue(undefined); + WorkspaceStrategy.resetTracking(); + }); + + describe('sandbox mode', () => { + it('creates sandbox workspace with correct handle and path', async () => { + const config: WorkspaceConfig = { + personaId: 'persona-123', + mode: 'sandbox', + }; + + const result = await WorkspaceStrategy.create(config); + + expect(result.mode).toBe('sandbox'); + expect(result.handle).toBe('persona-123'); + expect(result.workspaceDir).toContain('.continuum/personas/persona-123/workspace'); + expect(result.branch).toBeUndefined(); + }); + + it('registers with CodeDaemon including jtagRoot as read root', async () => { + const config: WorkspaceConfig = { + personaId: 'persona-456', + mode: 'sandbox', + }; + + await WorkspaceStrategy.create(config); + + expect(mockCreateWorkspace).toHaveBeenCalledTimes(1); + const [handle, workspaceDir, readRoots] = mockCreateWorkspace.mock.calls[0]; + expect(handle).toBe('persona-456'); + expect(workspaceDir).toContain('.continuum/personas/persona-456/workspace'); + expect(readRoots).toHaveLength(1); + expect(readRoots[0]).toBe(process.cwd()); + }); + + it('deduplicates β€” second call returns cached result without re-registering', async () => { + const config: WorkspaceConfig = { + personaId: 'persona-789', + mode: 'sandbox', + }; + + const first = await WorkspaceStrategy.create(config); + const second = await WorkspaceStrategy.create(config); + + expect(first.handle).toBe(second.handle); + expect(first.workspaceDir).toBe(second.workspaceDir); + // CodeDaemon.createWorkspace only called once + expect(mockCreateWorkspace).toHaveBeenCalledTimes(1); + }); + + it('tracks initialized state via isInitialized', async () => { + expect(WorkspaceStrategy.isInitialized('persona-abc')).toBe(false); + + await WorkspaceStrategy.create({ + personaId: 'persona-abc', + mode: 'sandbox', + }); + + expect(WorkspaceStrategy.isInitialized('persona-abc')).toBe(true); + }); + }); + + describe('worktree mode', () => { + it('creates worktree workspace via workspace/git/workspace/init', async () => { + mockExecute.mockResolvedValue({ + success: true, + workspacePath: '/tmp/worktrees/ai-branch', + branch: 'ai/fix-bug', + }); + + const config: WorkspaceConfig = { + personaId: 'persona-wt', + mode: 'worktree', + taskSlug: 'fix-bug', + sparsePaths: ['src/system/code/', 'docs/'], + }; + + const result = await WorkspaceStrategy.create(config); + + expect(result.mode).toBe('worktree'); + expect(result.handle).toBe('worktree-persona-wt-fix-bug'); + expect(result.workspaceDir).toBe('/tmp/worktrees/ai-branch'); + expect(result.branch).toBe('ai/fix-bug'); + }); + + it('calls workspace/git/workspace/init with correct params', async () => { + mockExecute.mockResolvedValue({ + success: true, + workspacePath: '/tmp/worktrees/ai-work', + branch: 'ai/work', + }); + + await WorkspaceStrategy.create({ + personaId: 'persona-wt2', + mode: 'worktree', + taskSlug: 'work', + sparsePaths: ['src/'], + }); + + expect(mockExecute).toHaveBeenCalledWith( + 'workspace/git/workspace/init', + { + personaId: 'persona-wt2', + branch: 'ai/work', + paths: ['src/'], + } + ); + }); + + it('registers with CodeDaemon with empty read roots (worktree IS the repo)', async () => { + mockExecute.mockResolvedValue({ + success: true, + workspacePath: '/tmp/worktrees/ai-test', + branch: 'ai/test', + }); + + await WorkspaceStrategy.create({ + personaId: 'persona-wt3', + mode: 'worktree', + taskSlug: 'test', + sparsePaths: ['src/'], + }); + + expect(mockCreateWorkspace).toHaveBeenCalledWith( + 'worktree-persona-wt3-test', + '/tmp/worktrees/ai-test', + [] + ); + }); + + it('throws when sparsePaths is empty', async () => { + await expect( + WorkspaceStrategy.create({ + personaId: 'persona-fail', + mode: 'worktree', + sparsePaths: [], + }) + ).rejects.toThrow('worktree mode requires sparsePaths'); + }); + + it('throws when sparsePaths is undefined', async () => { + await expect( + WorkspaceStrategy.create({ + personaId: 'persona-fail2', + mode: 'worktree', + }) + ).rejects.toThrow('worktree mode requires sparsePaths'); + }); + + it('throws when workspace/git/workspace/init fails', async () => { + mockExecute.mockResolvedValue({ + success: false, + error: { message: 'Git worktree creation failed: branch already exists' }, + }); + + await expect( + WorkspaceStrategy.create({ + personaId: 'persona-fail3', + mode: 'worktree', + sparsePaths: ['src/'], + }) + ).rejects.toThrow('worktree creation failed'); + }); + + it('defaults taskSlug to work when not provided', async () => { + mockExecute.mockResolvedValue({ + success: true, + workspacePath: '/tmp/worktrees/ai-work', + branch: 'ai/work', + }); + + const result = await WorkspaceStrategy.create({ + personaId: 'persona-default', + mode: 'worktree', + sparsePaths: ['src/'], + }); + + expect(result.handle).toBe('worktree-persona-default-work'); + expect(mockExecute).toHaveBeenCalledWith( + 'workspace/git/workspace/init', + expect.objectContaining({ branch: 'ai/work' }) + ); + }); + + it('deduplicates worktree workspaces', async () => { + mockExecute.mockResolvedValue({ + success: true, + workspacePath: '/tmp/worktrees/ai-dedup', + branch: 'ai/dedup', + }); + + const config: WorkspaceConfig = { + personaId: 'persona-dedup', + mode: 'worktree', + taskSlug: 'dedup', + sparsePaths: ['src/'], + }; + + await WorkspaceStrategy.create(config); + const second = await WorkspaceStrategy.create(config); + + // Only one init call + expect(mockExecute).toHaveBeenCalledTimes(1); + expect(second.mode).toBe('worktree'); + }); + }); + + describe('cleanup', () => { + it('calls workspace/git/workspace/clean for worktree handles', async () => { + mockExecute.mockResolvedValue({ success: true }); + + await WorkspaceStrategy.cleanup('worktree-persona-abc-task'); + + expect(mockExecute).toHaveBeenCalledWith( + 'workspace/git/workspace/clean', + { force: false, deleteBranch: false } + ); + }); + + it('passes force and deleteBranch options', async () => { + mockExecute.mockResolvedValue({ success: true }); + + await WorkspaceStrategy.cleanup('worktree-persona-abc-task', { + force: true, + deleteBranch: true, + }); + + expect(mockExecute).toHaveBeenCalledWith( + 'workspace/git/workspace/clean', + { force: true, deleteBranch: true } + ); + }); + + it('skips cleanup for non-worktree handles', async () => { + await WorkspaceStrategy.cleanup('persona-123'); + + expect(mockExecute).not.toHaveBeenCalled(); + }); + + it('removes handle from tracking after cleanup', async () => { + // First create a worktree workspace + mockExecute.mockResolvedValue({ + success: true, + workspacePath: '/tmp/worktrees/ai-cleanup', + branch: 'ai/cleanup', + }); + + await WorkspaceStrategy.create({ + personaId: 'persona-cleanup', + mode: 'worktree', + taskSlug: 'cleanup', + sparsePaths: ['src/'], + }); + + expect(WorkspaceStrategy.isInitialized('worktree-persona-cleanup-cleanup')).toBe(true); + + // Now clean up + mockExecute.mockResolvedValue({ success: true }); + await WorkspaceStrategy.cleanup('worktree-persona-cleanup-cleanup'); + + expect(WorkspaceStrategy.isInitialized('worktree-persona-cleanup-cleanup')).toBe(false); + }); + + it('handles cleanup errors gracefully without throwing', async () => { + mockExecute.mockRejectedValue(new Error('Git error')); + + // Should not throw + await WorkspaceStrategy.cleanup('worktree-persona-err-task'); + }); + }); + + describe('resetTracking', () => { + it('clears all tracked workspaces', async () => { + await WorkspaceStrategy.create({ + personaId: 'persona-reset', + mode: 'sandbox', + }); + + expect(WorkspaceStrategy.isInitialized('persona-reset')).toBe(true); + + WorkspaceStrategy.resetTracking(); + + expect(WorkspaceStrategy.isInitialized('persona-reset')).toBe(false); + }); + }); +}); diff --git a/src/debug/jtag/workers/continuum-core/bindings/RustCoreIPC.ts b/src/debug/jtag/workers/continuum-core/bindings/RustCoreIPC.ts index 135a1df13..055748025 100644 --- a/src/debug/jtag/workers/continuum-core/bindings/RustCoreIPC.ts +++ b/src/debug/jtag/workers/continuum-core/bindings/RustCoreIPC.ts @@ -1018,6 +1018,75 @@ export class RustCoreIPCClient extends EventEmitter { return response.result as { success: boolean; diff: string }; } + /** + * Get git log for the workspace. + */ + async codeGitLog(personaId: string, count?: number): Promise<{ success: boolean; log: string }> { + const response = await this.request({ + command: 'code/git-log', + persona_id: personaId, + count: count ?? 10, + }); + + if (!response.success) { + throw new Error(response.error || 'Failed to get git log'); + } + + return response.result as { success: boolean; log: string }; + } + + /** + * Stage files for commit. + */ + async codeGitAdd(personaId: string, paths: string[]): Promise<{ staged: string[] }> { + const response = await this.request({ + command: 'code/git-add', + persona_id: personaId, + paths, + }); + + if (!response.success) { + throw new Error(response.error || 'Failed to stage files'); + } + + return response.result as { staged: string[] }; + } + + /** + * Create a git commit. + */ + async codeGitCommit(personaId: string, message: string): Promise<{ hash: string }> { + const response = await this.request({ + command: 'code/git-commit', + persona_id: personaId, + message, + }); + + if (!response.success) { + throw new Error(response.error || 'Failed to create commit'); + } + + return response.result as { hash: string }; + } + + /** + * Push to remote. + */ + async codeGitPush(personaId: string, remote?: string, branch?: string): Promise<{ output: string }> { + const response = await this.request({ + command: 'code/git-push', + persona_id: personaId, + remote: remote ?? '', + branch: branch ?? '', + }); + + if (!response.success) { + throw new Error(response.error || 'Failed to push'); + } + + return response.result as { output: string }; + } + /** * Disconnect from server */ diff --git a/src/debug/jtag/workers/continuum-core/src/code/git_bridge.rs b/src/debug/jtag/workers/continuum-core/src/code/git_bridge.rs index ce07d0ed9..770206641 100644 --- a/src/debug/jtag/workers/continuum-core/src/code/git_bridge.rs +++ b/src/debug/jtag/workers/continuum-core/src/code/git_bridge.rs @@ -100,6 +100,39 @@ pub fn git_log(workspace_root: &Path, count: u32) -> Result { ) } +/// Stage files for commit. +/// +/// Pass specific file paths, or `&["--all"]` / `&["."]` to stage everything. +pub fn git_add(workspace_root: &Path, paths: &[&str]) -> Result { + let mut args = vec!["add"]; + args.extend_from_slice(paths); + run_git(workspace_root, &args) +} + +/// Create a commit with the given message. +/// +/// Returns the full commit hash on success. +pub fn git_commit(workspace_root: &Path, message: &str) -> Result { + // Commit (skip hooks β€” AI-authored commits are verified separately) + run_git(workspace_root, &["commit", "--no-verify", "-m", message])?; + + // Return the commit hash + run_git(workspace_root, &["rev-parse", "HEAD"]) + .map(|s| s.trim().to_string()) +} + +/// Push the current branch to a remote. +/// +/// Defaults to `origin` if remote is empty. +pub fn git_push(workspace_root: &Path, remote: &str, branch: &str) -> Result { + let remote = if remote.is_empty() { "origin" } else { remote }; + let mut args = vec!["push", remote]; + if !branch.is_empty() { + args.push(branch); + } + run_git(workspace_root, &args) +} + /// Run a git command in the workspace directory. fn run_git(workspace_root: &Path, args: &[&str]) -> Result { let output = Command::new("git") @@ -201,4 +234,53 @@ mod tests { // git status in non-repo returns error assert!(!status.success || status.branch.is_none()); } + + #[test] + fn test_git_add_and_commit() { + let dir = setup_git_repo(); + + // Create a new file + fs::write(dir.path().join("feature.txt"), "new feature\n").unwrap(); + + // Stage it + git_add(dir.path(), &["feature.txt"]).expect("git add should work"); + + // Status should show it as added + let status = git_status(dir.path()); + assert!(status.added.contains(&"feature.txt".to_string())); + + // Commit it + let hash = git_commit(dir.path(), "Add feature").expect("git commit should work"); + assert!(!hash.is_empty()); + assert!(hash.len() >= 7); // At least a short hash + + // Status should be clean now + let status_after = git_status(dir.path()); + assert!(status_after.modified.is_empty()); + assert!(status_after.added.is_empty()); + assert!(status_after.untracked.is_empty()); + } + + #[test] + fn test_git_commit_empty_fails() { + let dir = setup_git_repo(); + // Nothing staged β€” commit should fail + let result = git_commit(dir.path(), "Empty commit"); + assert!(result.is_err()); + } + + #[test] + fn test_git_add_all() { + let dir = setup_git_repo(); + + fs::write(dir.path().join("a.txt"), "aaa\n").unwrap(); + fs::write(dir.path().join("b.txt"), "bbb\n").unwrap(); + + git_add(dir.path(), &["."]).expect("git add . should work"); + + let status = git_status(dir.path()); + // Both files should be staged (added) + assert!(status.added.contains(&"a.txt".to_string())); + assert!(status.added.contains(&"b.txt".to_string())); + } } diff --git a/src/debug/jtag/workers/continuum-core/src/code/path_security.rs b/src/debug/jtag/workers/continuum-core/src/code/path_security.rs index 01f7f7bf1..54af47ebc 100644 --- a/src/debug/jtag/workers/continuum-core/src/code/path_security.rs +++ b/src/debug/jtag/workers/continuum-core/src/code/path_security.rs @@ -212,25 +212,37 @@ impl PathSecurity { return Ok(canonical); } - // For new files: canonicalize the parent, then append filename - if let Some(parent) = joined.parent() { - if parent.exists() { - let canonical_parent = parent.canonicalize().map_err(|_| { - PathSecurityError::InvalidPath { - path: relative_path.to_string(), + // For new files: walk up the parent chain to find the nearest existing + // ancestor, canonicalize it, and verify it's within the workspace. + // This handles creating files in not-yet-existing subdirectories + // (e.g., "shared/format-utils.ts" when "shared/" doesn't exist yet). + { + let mut ancestor = joined.clone(); + // Walk up until we find an existing directory + while let Some(parent) = ancestor.parent() { + if parent.exists() { + let canonical_ancestor = parent.canonicalize().map_err(|_| { + PathSecurityError::InvalidPath { + path: relative_path.to_string(), + } + })?; + + if !canonical_ancestor.starts_with(&self.workspace_root) { + return Err(PathSecurityError::TraversalBlocked { + path: relative_path.to_string(), + workspace: self.workspace_root.display().to_string(), + }); } - })?; - if !canonical_parent.starts_with(&self.workspace_root) { - return Err(PathSecurityError::TraversalBlocked { - path: relative_path.to_string(), - workspace: self.workspace_root.display().to_string(), - }); - } - - if let Some(filename) = joined.file_name() { - return Ok(canonical_parent.join(filename)); + // Reconstruct: canonical ancestor + remaining relative components + let remaining = joined.strip_prefix(parent).map_err(|_| { + PathSecurityError::InvalidPath { + path: relative_path.to_string(), + } + })?; + return Ok(canonical_ancestor.join(remaining)); } + ancestor = parent.to_path_buf(); } } @@ -261,16 +273,28 @@ impl PathSecurity { /// Normalize a path by collapsing `.` and `..` components without I/O. /// /// This is a pre-check before any filesystem operations. + /// Returns the normalized path. If `..` underflows (tries to go above root), + /// the result will start with `..` to signal a traversal attempt. fn normalize_path(&self, path: &str) -> String { let mut components = Vec::new(); + let mut depth: i32 = 0; // Track depth relative to root for part in path.split('/') { match part { "" | "." => continue, ".." => { - components.pop(); + if depth > 0 { + components.pop(); + depth -= 1; + } else { + // Underflow: trying to go above workspace root + components.push(".."); + } + } + other => { + components.push(other); + depth += 1; } - other => components.push(other), } } @@ -382,7 +406,7 @@ mod tests { } #[test] - fn test_cannot_write_to_read_root() { + fn test_write_creates_in_workspace_not_read_root() { let dir = tempfile::tempdir().unwrap(); let read_dir = tempfile::tempdir().unwrap(); fs::create_dir_all(dir.path().join("src")).unwrap(); @@ -396,11 +420,14 @@ mod tests { let read_result = security.validate_read("libs/external.ts"); assert!(read_result.is_ok()); - // Cannot write to a path that only exists under read root. - // "libs/" doesn't exist in the workspace, so the parent - // directory check fails and write validation rejects it. + // Writing "libs/external.ts" creates it in the WORKSPACE (writable), + // not in the read root. This is valid β€” the file will be at + // workspace/libs/external.ts. The read root is untouched. let write_result = security.validate_write("libs/external.ts"); - assert!(write_result.is_err(), "Should not be able to write to path only in read root"); + assert!(write_result.is_ok(), "Should be able to write new file in workspace subdirectory"); + let resolved = write_result.unwrap(); + let canonical_dir = dir.path().canonicalize().unwrap(); + assert!(resolved.starts_with(&canonical_dir), "Write should resolve within workspace, not read root"); } #[test] diff --git a/src/debug/jtag/workers/continuum-core/src/ipc/mod.rs b/src/debug/jtag/workers/continuum-core/src/ipc/mod.rs index 4aa067dee..a9586631a 100644 --- a/src/debug/jtag/workers/continuum-core/src/ipc/mod.rs +++ b/src/debug/jtag/workers/continuum-core/src/ipc/mod.rs @@ -390,6 +390,37 @@ enum Request { staged: bool, }, + /// Get git log (last N commits). + #[serde(rename = "code/git-log")] + CodeGitLog { + persona_id: String, + count: Option, + }, + + /// Stage files for commit. + #[serde(rename = "code/git-add")] + CodeGitAdd { + persona_id: String, + paths: Vec, + }, + + /// Create a git commit. + #[serde(rename = "code/git-commit")] + CodeGitCommit { + persona_id: String, + message: String, + }, + + /// Push to remote. + #[serde(rename = "code/git-push")] + CodeGitPush { + persona_id: String, + #[serde(default)] + remote: String, + #[serde(default)] + branch: String, + }, + #[serde(rename = "health-check")] HealthCheck, @@ -1550,6 +1581,79 @@ impl ServerState { } } + Request::CodeGitLog { persona_id, count } => { + let _timer = TimingGuard::new("ipc", "code_git_log"); + + let engine = match self.file_engines.get(&persona_id) { + Some(e) => e, + None => return HandleResult::Json(Response::error( + format!("No workspace for persona {}", persona_id) + )), + }; + + match code::git_bridge::git_log(&engine.workspace_root(), count.unwrap_or(10)) { + Ok(log) => HandleResult::Json(Response::success(serde_json::json!({ + "log": log + }))), + Err(e) => HandleResult::Json(Response::error(e)), + } + } + + Request::CodeGitAdd { persona_id, paths } => { + let _timer = TimingGuard::new("ipc", "code_git_add"); + + let engine = match self.file_engines.get(&persona_id) { + Some(e) => e, + None => return HandleResult::Json(Response::error( + format!("No workspace for persona {}", persona_id) + )), + }; + + let path_refs: Vec<&str> = paths.iter().map(|s| s.as_str()).collect(); + match code::git_bridge::git_add(&engine.workspace_root(), &path_refs) { + Ok(_) => HandleResult::Json(Response::success(serde_json::json!({ + "staged": paths + }))), + Err(e) => HandleResult::Json(Response::error(e)), + } + } + + Request::CodeGitCommit { persona_id, message } => { + let _timer = TimingGuard::new("ipc", "code_git_commit"); + + let engine = match self.file_engines.get(&persona_id) { + Some(e) => e, + None => return HandleResult::Json(Response::error( + format!("No workspace for persona {}", persona_id) + )), + }; + + match code::git_bridge::git_commit(&engine.workspace_root(), &message) { + Ok(hash) => HandleResult::Json(Response::success(serde_json::json!({ + "hash": hash + }))), + Err(e) => HandleResult::Json(Response::error(e)), + } + } + + Request::CodeGitPush { persona_id, remote, branch } => { + let _timer = TimingGuard::new("ipc", "code_git_push"); + + let engine = match self.file_engines.get(&persona_id) { + Some(e) => e, + None => return HandleResult::Json(Response::error( + format!("No workspace for persona {}", persona_id) + )), + }; + + match code::git_bridge::git_push(&engine.workspace_root(), &remote, &branch) { + Ok(output) => HandleResult::Json(Response::success(serde_json::json!({ + "output": output + }))), + Err(e) => HandleResult::Json(Response::error(e)), + } + } + Request::HealthCheck => { HandleResult::Json(Response::success(serde_json::json!({ "healthy": true }))) } From 2e5c090160cd074bc46baf1113d579d0e52a5bae Mon Sep 17 00:00:00 2001 From: Joel Date: Mon, 2 Feb 2026 11:31:12 -0600 Subject: [PATCH 12/14] Shell Watch + Sentinel: event-driven output streaming for coding workspaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rust: Notify-based blocking watch (no timeouts, no polling). Each ExecutionState gets Arc β€” reader tasks call notify_one() on every output line. watch_execution() blocks on Notify until output arrives, classifies lines through CompiledSentinel (pre-compiled regex, first-match-wins), advances cursors, returns classified batch. Types (ts-rs exported): OutputClassification, SentinelAction, SentinelRule, ClassifiedLine, ShellWatchResponse β€” all generated from Rust structs to shared/generated/code/*.ts. IPC: code/shell-watch (async bridge via rt_handle.block_on after releasing DashMap lock) and code/shell-sentinel (synchronous, brief lock). TS bridge: RustCoreIPC, CodeDaemon, CodeDaemonServer wired. Commands: code/shell/watch and code/shell/sentinel generated via CommandGenerator with full server implementations delegating to CodeDaemon. Registered in schemas, constants, executors, structure. Workspace handle: sentinel(), watch(), execWatch(cmd, rules?, onLine?) β€” composed convenience that runs exec β†’ sentinel β†’ watch loop. Tests: 389 Rust lib tests pass, 183 TS unit tests pass (36 Workspace tests including 5 new watch/sentinel/execWatch tests), 0 failures. --- src/debug/jtag/api/data-seed/RoomDataSeed.ts | 31 + src/debug/jtag/browser/generated.ts | 28 +- .../browser/ChallengeListBrowserCommand.ts | 21 - .../list/server/ChallengeListServerCommand.ts | 115 -- .../list/shared/ChallengeListTypes.ts | 123 -- .../jtag/commands/challenge/run/README.md | 183 --- .../run/browser/ChallengeRunBrowserCommand.ts | 21 - .../run/server/ChallengeRunServerCommand.ts | 177 --- .../challenge/run/shared/ChallengeRunTypes.ts | 145 --- .../ChallengeRunIntegration.test.ts | 196 --- .../list => code/shell/sentinel}/.npmignore | 0 .../list => code/shell/sentinel}/README.md | 47 +- .../CodeShellSentinelBrowserCommand.ts | 21 + .../run => code/shell/sentinel}/package.json | 12 +- .../server/CodeShellSentinelServerCommand.ts | 53 + .../sentinel/shared/CodeShellSentinelTypes.ts | 92 ++ .../CodeShellSentinelIntegration.test.ts} | 40 +- .../unit/CodeShellSentinelCommand.test.ts} | 96 +- .../run => code/shell/watch}/.npmignore | 0 .../jtag/commands/code/shell/watch/README.md | 165 +++ .../browser/CodeShellWatchBrowserCommand.ts | 21 + .../list => code/shell/watch}/package.json | 12 +- .../server/CodeShellWatchServerCommand.ts | 47 + .../shell/watch/shared/CodeShellWatchTypes.ts | 96 ++ .../CodeShellWatchIntegration.test.ts} | 40 +- .../test/unit/CodeShellWatchCommand.test.ts} | 96 +- src/debug/jtag/commands/code/task/.npmignore | 20 - src/debug/jtag/commands/code/task/README.md | 200 --- .../task/browser/CodeTaskBrowserCommand.ts | 21 - .../jtag/commands/code/task/package.json | 35 - .../code/task/server/CodeTaskServerCommand.ts | 130 -- .../code/task/shared/CodeTaskTypes.ts | 194 --- .../task/test/unit/CodeTaskCommand.test.ts | 259 ---- .../code-daemon/server/CodeDaemonServer.ts | 46 +- .../daemons/code-daemon/shared/CodeDaemon.ts | 91 ++ .../code-daemon/shared/CodeDaemonTypes.ts | 14 + .../data-daemon/server/EntityRegistry.ts | 6 - src/debug/jtag/generated-command-schemas.json | 107 +- .../generator/specs/code-shell-sentinel.json | 38 + .../generator/specs/code-shell-watch.json | 47 + src/debug/jtag/package-lock.json | 4 +- src/debug/jtag/package.json | 2 +- src/debug/jtag/server/generated.ts | 28 +- .../shared/generated-command-constants.ts | 5 +- .../shared/generated/code/ClassifiedLine.ts | 27 + .../generated/code/OutputClassification.ts | 6 + .../shared/generated/code/SentinelAction.ts | 6 + .../shared/generated/code/SentinelRule.ts | 23 + .../generated/code/ShellExecuteResponse.ts | 22 + .../generated/code/ShellExecutionStatus.ts | 6 + .../generated/code/ShellHistoryEntry.ts | 6 + .../generated/code/ShellPollResponse.ts | 26 + .../shared/generated/code/ShellSessionInfo.ts | 6 + .../generated/code/ShellWatchResponse.ts | 23 + src/debug/jtag/shared/generated/code/index.ts | 14 + .../generated/persona/ActivityDomain.ts | 2 +- .../persona/ChannelEnqueueRequest.ts | 2 +- src/debug/jtag/shared/version.ts | 2 +- .../code/challenges/ChallengeDefinitions.ts | 445 ------- .../code/server/CodeAgentOrchestrator.ts | 879 ------------- .../system/code/server/CodeTaskDelegator.ts | 408 ------- .../code/server/CodingChallengeRunner.ts | 239 ---- .../jtag/system/code/server/CodingJudge.ts | 288 ----- .../jtag/system/code/server/PlanFormulator.ts | 357 ------ .../jtag/system/code/server/PlanGovernance.ts | 151 --- .../jtag/system/code/server/Workspace.ts | 310 +++++ .../jtag/system/code/shared/CodingTypes.ts | 295 +---- .../server/CodeCoordinationStream.ts | 349 ------ .../system/data/constants/RoomConstants.ts | 3 +- .../data/entities/CodingChallengeEntity.ts | 276 ----- .../system/data/entities/CodingPlanEntity.ts | 340 ------ src/debug/jtag/system/recipes/coding.json | 95 ++ .../jtag/system/user/server/PersonaUser.ts | 41 + .../server/modules/PersonaAutonomousLoop.ts | 8 +- .../server/modules/PersonaTaskExecutor.ts | 24 + .../user/server/modules/QueueItemTypes.ts | 23 + .../integration/coding-agent-workflow.test.ts | 412 ------- .../integration/sandbox-enforcement.test.ts | 302 ----- .../unit/code/CodeAgentOrchestrator.test.ts | 492 -------- .../unit/code/CodeCoordinationStream.test.ts | 328 ----- .../tests/unit/code/CodeTaskDelegator.test.ts | 530 -------- .../tests/unit/code/CodingPlanEntity.test.ts | 349 ------ .../tests/unit/code/PlanFormulator.test.ts | 397 ------ .../tests/unit/code/PlanGovernance.test.ts | 174 --- .../jtag/tests/unit/code/Workspace.test.ts | 644 ++++++++++ .../unit/{code => skill}/SkillEntity.test.ts | 0 .../{code => skill}/SkillLifecycle.test.ts | 0 .../continuum-core/bindings/RustCoreIPC.ts | 170 +++ .../workers/continuum-core/src/code/mod.rs | 7 +- .../continuum-core/src/code/shell_session.rs | 1082 +++++++++++++++++ .../continuum-core/src/code/shell_types.rs | 161 +++ .../workers/continuum-core/src/ipc/mod.rs | 274 ++++- .../src/persona/channel_items.rs | 93 ++ .../src/persona/channel_registry.rs | 9 +- .../src/persona/channel_types.rs | 11 +- 95 files changed, 4122 insertions(+), 9140 deletions(-) delete mode 100644 src/debug/jtag/commands/challenge/list/browser/ChallengeListBrowserCommand.ts delete mode 100644 src/debug/jtag/commands/challenge/list/server/ChallengeListServerCommand.ts delete mode 100644 src/debug/jtag/commands/challenge/list/shared/ChallengeListTypes.ts delete mode 100644 src/debug/jtag/commands/challenge/run/README.md delete mode 100644 src/debug/jtag/commands/challenge/run/browser/ChallengeRunBrowserCommand.ts delete mode 100644 src/debug/jtag/commands/challenge/run/server/ChallengeRunServerCommand.ts delete mode 100644 src/debug/jtag/commands/challenge/run/shared/ChallengeRunTypes.ts delete mode 100644 src/debug/jtag/commands/challenge/run/test/integration/ChallengeRunIntegration.test.ts rename src/debug/jtag/commands/{challenge/list => code/shell/sentinel}/.npmignore (100%) rename src/debug/jtag/commands/{challenge/list => code/shell/sentinel}/README.md (54%) create mode 100644 src/debug/jtag/commands/code/shell/sentinel/browser/CodeShellSentinelBrowserCommand.ts rename src/debug/jtag/commands/{challenge/run => code/shell/sentinel}/package.json (52%) create mode 100644 src/debug/jtag/commands/code/shell/sentinel/server/CodeShellSentinelServerCommand.ts create mode 100644 src/debug/jtag/commands/code/shell/sentinel/shared/CodeShellSentinelTypes.ts rename src/debug/jtag/commands/code/{task/test/integration/CodeTaskIntegration.test.ts => shell/sentinel/test/integration/CodeShellSentinelIntegration.test.ts} (80%) rename src/debug/jtag/commands/{challenge/run/test/unit/ChallengeRunCommand.test.ts => code/shell/sentinel/test/unit/CodeShellSentinelCommand.test.ts} (65%) rename src/debug/jtag/commands/{challenge/run => code/shell/watch}/.npmignore (100%) create mode 100644 src/debug/jtag/commands/code/shell/watch/README.md create mode 100644 src/debug/jtag/commands/code/shell/watch/browser/CodeShellWatchBrowserCommand.ts rename src/debug/jtag/commands/{challenge/list => code/shell/watch}/package.json (53%) create mode 100644 src/debug/jtag/commands/code/shell/watch/server/CodeShellWatchServerCommand.ts create mode 100644 src/debug/jtag/commands/code/shell/watch/shared/CodeShellWatchTypes.ts rename src/debug/jtag/commands/{challenge/list/test/integration/ChallengeListIntegration.test.ts => code/shell/watch/test/integration/CodeShellWatchIntegration.test.ts} (80%) rename src/debug/jtag/commands/{challenge/list/test/unit/ChallengeListCommand.test.ts => code/shell/watch/test/unit/CodeShellWatchCommand.test.ts} (67%) delete mode 100644 src/debug/jtag/commands/code/task/.npmignore delete mode 100644 src/debug/jtag/commands/code/task/README.md delete mode 100644 src/debug/jtag/commands/code/task/browser/CodeTaskBrowserCommand.ts delete mode 100644 src/debug/jtag/commands/code/task/package.json delete mode 100644 src/debug/jtag/commands/code/task/server/CodeTaskServerCommand.ts delete mode 100644 src/debug/jtag/commands/code/task/shared/CodeTaskTypes.ts delete mode 100644 src/debug/jtag/commands/code/task/test/unit/CodeTaskCommand.test.ts create mode 100644 src/debug/jtag/generator/specs/code-shell-sentinel.json create mode 100644 src/debug/jtag/generator/specs/code-shell-watch.json create mode 100644 src/debug/jtag/shared/generated/code/ClassifiedLine.ts create mode 100644 src/debug/jtag/shared/generated/code/OutputClassification.ts create mode 100644 src/debug/jtag/shared/generated/code/SentinelAction.ts create mode 100644 src/debug/jtag/shared/generated/code/SentinelRule.ts create mode 100644 src/debug/jtag/shared/generated/code/ShellExecuteResponse.ts create mode 100644 src/debug/jtag/shared/generated/code/ShellExecutionStatus.ts create mode 100644 src/debug/jtag/shared/generated/code/ShellHistoryEntry.ts create mode 100644 src/debug/jtag/shared/generated/code/ShellPollResponse.ts create mode 100644 src/debug/jtag/shared/generated/code/ShellSessionInfo.ts create mode 100644 src/debug/jtag/shared/generated/code/ShellWatchResponse.ts delete mode 100644 src/debug/jtag/system/code/challenges/ChallengeDefinitions.ts delete mode 100644 src/debug/jtag/system/code/server/CodeAgentOrchestrator.ts delete mode 100644 src/debug/jtag/system/code/server/CodeTaskDelegator.ts delete mode 100644 src/debug/jtag/system/code/server/CodingChallengeRunner.ts delete mode 100644 src/debug/jtag/system/code/server/CodingJudge.ts delete mode 100644 src/debug/jtag/system/code/server/PlanFormulator.ts delete mode 100644 src/debug/jtag/system/code/server/PlanGovernance.ts create mode 100644 src/debug/jtag/system/code/server/Workspace.ts delete mode 100644 src/debug/jtag/system/coordination/server/CodeCoordinationStream.ts delete mode 100644 src/debug/jtag/system/data/entities/CodingChallengeEntity.ts delete mode 100644 src/debug/jtag/system/data/entities/CodingPlanEntity.ts create mode 100644 src/debug/jtag/system/recipes/coding.json delete mode 100644 src/debug/jtag/tests/integration/coding-agent-workflow.test.ts delete mode 100644 src/debug/jtag/tests/integration/sandbox-enforcement.test.ts delete mode 100644 src/debug/jtag/tests/unit/code/CodeAgentOrchestrator.test.ts delete mode 100644 src/debug/jtag/tests/unit/code/CodeCoordinationStream.test.ts delete mode 100644 src/debug/jtag/tests/unit/code/CodeTaskDelegator.test.ts delete mode 100644 src/debug/jtag/tests/unit/code/CodingPlanEntity.test.ts delete mode 100644 src/debug/jtag/tests/unit/code/PlanFormulator.test.ts delete mode 100644 src/debug/jtag/tests/unit/code/PlanGovernance.test.ts create mode 100644 src/debug/jtag/tests/unit/code/Workspace.test.ts rename src/debug/jtag/tests/unit/{code => skill}/SkillEntity.test.ts (100%) rename src/debug/jtag/tests/unit/{code => skill}/SkillLifecycle.test.ts (100%) create mode 100644 src/debug/jtag/workers/continuum-core/src/code/shell_session.rs create mode 100644 src/debug/jtag/workers/continuum-core/src/code/shell_types.rs diff --git a/src/debug/jtag/api/data-seed/RoomDataSeed.ts b/src/debug/jtag/api/data-seed/RoomDataSeed.ts index a38977d2e..77f20523f 100644 --- a/src/debug/jtag/api/data-seed/RoomDataSeed.ts +++ b/src/debug/jtag/api/data-seed/RoomDataSeed.ts @@ -215,6 +215,37 @@ export class RoomDataSeed { newsroom.tags = ['news', 'current-events', 'awareness']; rooms.push(newsroom); + // Code room - collaborative software development + const code = new RoomEntity(); + code.uniqueId = ROOM_UNIQUE_IDS.CODE; + code.name = 'code'; + code.displayName = 'Code'; + code.description = 'Collaborative coding β€” reading, writing, reviewing, and shipping code as a team'; + code.topic = 'Software development with real tools and real agent loops'; + code.type = 'public'; + code.status = 'active'; + code.ownerId = humanUserId; + code.lastMessageAt = now; + code.recipeId = 'coding'; + code.privacy = { + isPublic: true, + requiresInvite: false, + allowGuestAccess: false, + searchable: true + }; + code.settings = { + allowThreads: true, + allowReactions: true, + allowFileSharing: true, + messageRetentionDays: 365, + slowMode: 0 + }; + code.members = [ + { userId: humanUserId, role: 'owner', joinedAt: now } + ]; + code.tags = ['coding', 'development', 'engineering']; + rooms.push(code); + return { rooms: rooms as readonly RoomEntity[], totalCount: rooms.length, diff --git a/src/debug/jtag/browser/generated.ts b/src/debug/jtag/browser/generated.ts index cb4e79c23..1a3ea5b9c 100644 --- a/src/debug/jtag/browser/generated.ts +++ b/src/debug/jtag/browser/generated.ts @@ -1,7 +1,7 @@ /** * Browser Structure Registry - Auto-generated * - * Contains 11 daemons and 184 commands and 2 adapters and 27 widgets. + * Contains 11 daemons and 183 commands and 2 adapters and 27 widgets. * Generated by scripts/generate-structure.ts - DO NOT EDIT MANUALLY */ @@ -43,15 +43,14 @@ import { AIValidateResponseBrowserCommand } from './../commands/ai/validate-resp import { CanvasStrokeAddBrowserCommand } from './../commands/canvas/stroke/add/browser/CanvasStrokeAddBrowserCommand'; import { CanvasStrokeListBrowserCommand } from './../commands/canvas/stroke/list/browser/CanvasStrokeListBrowserCommand'; import { CanvasVisionBrowserCommand } from './../commands/canvas/vision/browser/CanvasVisionBrowserCommand'; -import { ChallengeListBrowserCommand } from './../commands/challenge/list/browser/ChallengeListBrowserCommand'; -import { ChallengeRunBrowserCommand } from './../commands/challenge/run/browser/ChallengeRunBrowserCommand'; import { CodeDiffBrowserCommand } from './../commands/code/diff/browser/CodeDiffBrowserCommand'; import { CodeEditBrowserCommand } from './../commands/code/edit/browser/CodeEditBrowserCommand'; import { CodeGitBrowserCommand } from './../commands/code/git/browser/CodeGitBrowserCommand'; import { CodeHistoryBrowserCommand } from './../commands/code/history/browser/CodeHistoryBrowserCommand'; import { CodeReadBrowserCommand } from './../commands/code/read/browser/CodeReadBrowserCommand'; import { CodeSearchBrowserCommand } from './../commands/code/search/browser/CodeSearchBrowserCommand'; -import { CodeTaskBrowserCommand } from './../commands/code/task/browser/CodeTaskBrowserCommand'; +import { CodeShellSentinelBrowserCommand } from './../commands/code/shell/sentinel/browser/CodeShellSentinelBrowserCommand'; +import { CodeShellWatchBrowserCommand } from './../commands/code/shell/watch/browser/CodeShellWatchBrowserCommand'; import { CodeTreeBrowserCommand } from './../commands/code/tree/browser/CodeTreeBrowserCommand'; import { CodeUndoBrowserCommand } from './../commands/code/undo/browser/CodeUndoBrowserCommand'; import { CodeVerifyBrowserCommand } from './../commands/code/verify/browser/CodeVerifyBrowserCommand'; @@ -425,16 +424,6 @@ export const BROWSER_COMMANDS: CommandEntry[] = [ className: 'CanvasVisionBrowserCommand', commandClass: CanvasVisionBrowserCommand }, -{ - name: 'challenge/list', - className: 'ChallengeListBrowserCommand', - commandClass: ChallengeListBrowserCommand - }, -{ - name: 'challenge/run', - className: 'ChallengeRunBrowserCommand', - commandClass: ChallengeRunBrowserCommand - }, { name: 'code/diff', className: 'CodeDiffBrowserCommand', @@ -466,9 +455,14 @@ export const BROWSER_COMMANDS: CommandEntry[] = [ commandClass: CodeSearchBrowserCommand }, { - name: 'code/task', - className: 'CodeTaskBrowserCommand', - commandClass: CodeTaskBrowserCommand + name: 'code/shell/sentinel', + className: 'CodeShellSentinelBrowserCommand', + commandClass: CodeShellSentinelBrowserCommand + }, +{ + name: 'code/shell/watch', + className: 'CodeShellWatchBrowserCommand', + commandClass: CodeShellWatchBrowserCommand }, { name: 'code/tree', diff --git a/src/debug/jtag/commands/challenge/list/browser/ChallengeListBrowserCommand.ts b/src/debug/jtag/commands/challenge/list/browser/ChallengeListBrowserCommand.ts deleted file mode 100644 index 916f38953..000000000 --- a/src/debug/jtag/commands/challenge/list/browser/ChallengeListBrowserCommand.ts +++ /dev/null @@ -1,21 +0,0 @@ -/** - * Challenge List Command - Browser Implementation - * - * List available coding challenges with their difficulty, status, and best scores. Shows progressive challenge sequence for AI training. - */ - -import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; -import type { JTAGContext } from '@system/core/types/JTAGTypes'; -import type { ChallengeListParams, ChallengeListResult } from '../shared/ChallengeListTypes'; - -export class ChallengeListBrowserCommand extends CommandBase { - - constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { - super('challenge/list', context, subpath, commander); - } - - async execute(params: ChallengeListParams): Promise { - console.log('🌐 BROWSER: Delegating Challenge List to server'); - return await this.remoteExecute(params); - } -} diff --git a/src/debug/jtag/commands/challenge/list/server/ChallengeListServerCommand.ts b/src/debug/jtag/commands/challenge/list/server/ChallengeListServerCommand.ts deleted file mode 100644 index d1b1c28e9..000000000 --- a/src/debug/jtag/commands/challenge/list/server/ChallengeListServerCommand.ts +++ /dev/null @@ -1,115 +0,0 @@ -/** - * Challenge List Command - Server Implementation - * - * Lists available coding challenges with difficulty, status, and best scores. - * Loads challenge definitions and enriches with attempt data from the database. - */ - -import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; -import type { JTAGContext } from '@system/core/types/JTAGTypes'; -import type { ChallengeListParams, ChallengeListResult, ChallengeSummary } from '../shared/ChallengeListTypes'; -import { createChallengeListResultFromParams } from '../shared/ChallengeListTypes'; -import { ALL_CHALLENGES } from '@system/code/challenges/ChallengeDefinitions'; -import { CodingChallengeEntity } from '@system/data/entities/CodingChallengeEntity'; -import { Commands } from '@system/core/shared/Commands'; -import { COLLECTIONS } from '@system/shared/Constants'; -import type { UUID } from '@system/core/types/CrossPlatformUUID'; - -export class ChallengeListServerCommand extends CommandBase { - - constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { - super('challenge/list', context, subpath, commander); - } - - async execute(params: ChallengeListParams): Promise { - const personaId = (params.personaId ?? params.userId) as UUID | undefined; - - // Filter definitions by difficulty if specified - let definitions = ALL_CHALLENGES; - if (params.difficulty) { - definitions = definitions.filter(d => d.difficulty === params.difficulty); - } - - // Load persisted entities for attempt data (best-effort) - const entityMap = await this.loadPersistedEntities(); - - // Build summaries - const challenges: ChallengeSummary[] = definitions.map(def => { - const entity = entityMap.get(def.name); - - const summary: ChallengeSummary = { - name: def.name, - sequenceNumber: def.sequenceNumber, - difficulty: def.difficulty, - category: def.category, - description: def.description, - timeLimitMs: def.timeLimitMs, - toolCallLimit: def.toolCallLimit, - totalAttempts: entity?.totalAttempts ?? 0, - totalPasses: entity?.totalPasses ?? 0, - highScore: entity?.highScore ?? 0, - passRate: entity?.passRate ?? 0, - }; - - // Add persona-specific data if requested - if (personaId && entity) { - const best = entity.bestAttemptFor(personaId); - if (best) { - summary.personaBestScore = best.score; - summary.personaBestStatus = best.status; - summary.personaAttempts = entity.attempts.filter(a => a.personaId === personaId).length; - } - } - - return summary; - }); - - // Count completed challenges for persona - let completedByPersona = 0; - if (personaId) { - for (const def of ALL_CHALLENGES) { - const entity = entityMap.get(def.name); - if (entity) { - const best = entity.bestAttemptFor(personaId); - if (best?.status === 'passed') { - completedByPersona++; - } - } - } - } - - return createChallengeListResultFromParams(params, { - success: true, - challenges, - totalChallenges: definitions.length, - completedByPersona, - }); - } - - /** - * Load all persisted challenge entities from the database. - * Returns a map keyed by challenge name for easy lookup. - */ - private async loadPersistedEntities(): Promise> { - const map = new Map(); - - try { - const result = await Commands.execute('data/list', { - collection: COLLECTIONS.CODING_CHALLENGES, - limit: 100, - }); - - if (result?.success && Array.isArray(result.items)) { - for (const item of result.items) { - const entity = new CodingChallengeEntity(); - Object.assign(entity, item); - map.set(entity.name, entity); - } - } - } catch { - // Database not available β€” return empty map (all stats will be zero) - } - - return map; - } -} diff --git a/src/debug/jtag/commands/challenge/list/shared/ChallengeListTypes.ts b/src/debug/jtag/commands/challenge/list/shared/ChallengeListTypes.ts deleted file mode 100644 index fae0cf6f9..000000000 --- a/src/debug/jtag/commands/challenge/list/shared/ChallengeListTypes.ts +++ /dev/null @@ -1,123 +0,0 @@ -/** - * Challenge List Command - Shared Types - * - * List available coding challenges with their difficulty, status, and best scores. Shows progressive challenge sequence for AI training. - */ - -import type { CommandParams, CommandResult, CommandInput, JTAGContext } from '@system/core/types/JTAGTypes'; -import { createPayload, transformPayload } from '@system/core/types/JTAGTypes'; -import { Commands } from '@system/core/shared/Commands'; -import type { JTAGError } from '@system/core/types/ErrorTypes'; -import type { UUID } from '@system/core/types/CrossPlatformUUID'; - -/** - * Challenge List Command Parameters - */ -export interface ChallengeListParams extends CommandParams { - // Filter by difficulty: beginner, intermediate, advanced, expert - difficulty?: string; - // Show scores for a specific persona - personaId?: string; -} - -/** - * Factory function for creating ChallengeListParams - */ -export const createChallengeListParams = ( - context: JTAGContext, - sessionId: UUID, - data: { - // Filter by difficulty: beginner, intermediate, advanced, expert - difficulty?: string; - // Show scores for a specific persona - personaId?: string; - } -): ChallengeListParams => createPayload(context, sessionId, { - difficulty: data.difficulty ?? '', - personaId: data.personaId ?? '', - ...data -}); - -/** - * Summary of a single challenge for list display - */ -export interface ChallengeSummary { - name: string; - sequenceNumber: number; - difficulty: string; - category: string; - description: string; - timeLimitMs: number; - toolCallLimit: number; - totalAttempts: number; - totalPasses: number; - highScore: number; - passRate: number; - /** Best score by the queried persona (if personaId provided) */ - personaBestScore?: number; - /** Best status by the queried persona */ - personaBestStatus?: string; - /** Number of attempts by the queried persona */ - personaAttempts?: number; -} - -/** - * Challenge List Command Result - */ -export interface ChallengeListResult extends CommandResult { - success: boolean; - // Array of challenge summaries with name, difficulty, sequence, attempts, best score - challenges: ChallengeSummary[]; - // Total number of challenges - totalChallenges: number; - // Number of challenges passed by the specified persona - completedByPersona: number; - error?: JTAGError; -} - -/** - * Factory function for creating ChallengeListResult with defaults - */ -export const createChallengeListResult = ( - context: JTAGContext, - sessionId: UUID, - data: { - success: boolean; - // Array of challenge summaries with name, difficulty, sequence, attempts, best score - challenges?: ChallengeSummary[]; - // Total number of challenges - totalChallenges?: number; - // Number of challenges passed by the specified persona - completedByPersona?: number; - error?: JTAGError; - } -): ChallengeListResult => createPayload(context, sessionId, { - challenges: data.challenges ?? [], - totalChallenges: data.totalChallenges ?? 0, - completedByPersona: data.completedByPersona ?? 0, - ...data -}); - -/** - * Smart Challenge List-specific inheritance from params - * Auto-inherits context and sessionId from params - * Must provide all required result fields - */ -export const createChallengeListResultFromParams = ( - params: ChallengeListParams, - differences: Omit -): ChallengeListResult => transformPayload(params, differences); - -/** - * Challenge List β€” Type-safe command executor - * - * Usage: - * import { ChallengeList } from '...shared/ChallengeListTypes'; - * const result = await ChallengeList.execute({ ... }); - */ -export const ChallengeList = { - execute(params: CommandInput): Promise { - return Commands.execute('challenge/list', params as Partial); - }, - commandName: 'challenge/list' as const, -} as const; diff --git a/src/debug/jtag/commands/challenge/run/README.md b/src/debug/jtag/commands/challenge/run/README.md deleted file mode 100644 index 18c9e2ec9..000000000 --- a/src/debug/jtag/commands/challenge/run/README.md +++ /dev/null @@ -1,183 +0,0 @@ -# Challenge Run Command - -Run a coding challenge against the AI coding pipeline. Sets up a fresh workspace, executes the challenge via code/task, evaluates with AI judge, and records the attempt. - -## Table of Contents - -- [Usage](#usage) - - [CLI Usage](#cli-usage) - - [Tool Usage](#tool-usage) -- [Parameters](#parameters) -- [Result](#result) -- [Examples](#examples) -- [Testing](#testing) - - [Unit Tests](#unit-tests) - - [Integration Tests](#integration-tests) -- [Getting Help](#getting-help) -- [Access Level](#access-level) -- [Implementation Notes](#implementation-notes) - -## Usage - -### CLI Usage - -From the command line using the jtag CLI: - -```bash -./jtag challenge/run [options] -``` - -### Tool Usage - -From Persona tools or programmatic access using `Commands.execute()`: - -```typescript -import { Commands } from '@system/core/shared/Commands'; - -const result = await Commands.execute('challenge/run', { - // your parameters here -}); -``` - -## Parameters - -- **challengeId** (optional): `string` - Specific challenge ID to run. If not provided, runs the next unbeaten challenge -- **challengeNumber** (optional): `number` - Run challenge by sequence number (1-5) -- **personaId** (optional): `string` - Which AI persona runs the challenge. Defaults to the calling user -- **skipJudge** (optional): `boolean` - Skip AI judge evaluation (faster, just checks execution success) - -## Result - -Returns `ChallengeRunResult` with: - -Returns CommandResult with: -- **challengeName**: `string` - Name of the challenge that was run -- **difficulty**: `string` - Challenge difficulty level -- **status**: `string` - Attempt outcome: passed, failed, partial, timeout, error -- **score**: `number` - Judge score from 0-100 -- **feedback**: `string` - Judge feedback on the attempt -- **durationMs**: `number` - Total execution time in milliseconds -- **toolCallsUsed**: `number` - Number of tool calls consumed -- **filesModified**: `string[]` - Files modified during the attempt -- **filesCreated**: `string[]` - Files created during the attempt -- **errors**: `string[]` - Errors encountered during execution - -## Examples - -### Run the next unbeaten challenge - -```bash -./jtag challenge/run -``` - -**Expected result:** -{ status: "passed", score: 85, challengeName: "Add a function to a single file" } - -### Run a specific challenge by number - -```bash -./jtag challenge/run --challengeNumber=3 -``` - -**Expected result:** -{ status: "partial", score: 60, challengeName: "Extract shared utility from duplicate code" } - -### Quick run without AI judge - -```bash -./jtag challenge/run --challengeNumber=1 --skipJudge=true -``` - -**Expected result:** -{ status: "passed", score: 70, feedback: "Pipeline completed." } - -## Getting Help - -### Using the Help Tool - -Get detailed usage information for this command: - -**CLI:** -```bash -./jtag help challenge/run -``` - -**Tool:** -```typescript -// Use your help tool with command name 'challenge/run' -``` - -### Using the README Tool - -Access this README programmatically: - -**CLI:** -```bash -./jtag readme challenge/run -``` - -**Tool:** -```typescript -// Use your readme tool with command name 'challenge/run' -``` - -## Testing - -### Unit Tests - -Test command logic in isolation using mock dependencies: - -```bash -# Run unit tests (no server required) -npx tsx commands/Challenge Run/test/unit/ChallengeRunCommand.test.ts -``` - -**What's tested:** -- Command structure and parameter validation -- Mock command execution patterns -- Required parameter validation (throws ValidationError) -- Optional parameter handling (sensible defaults) -- Performance requirements -- Assertion utility helpers - -**TDD Workflow:** -1. Write/modify unit test first (test-driven development) -2. Run test, see it fail -3. Implement feature -4. Run test, see it pass -5. Refactor if needed - -### Integration Tests - -Test command with real client connections and system integration: - -```bash -# Prerequisites: Server must be running -npm start # Wait 90+ seconds for deployment - -# Run integration tests -npx tsx commands/Challenge Run/test/integration/ChallengeRunIntegration.test.ts -``` - -**What's tested:** -- Client connection to live system -- Real command execution via WebSocket -- ValidationError handling for missing params -- Optional parameter defaults -- Performance under load -- Various parameter combinations - -**Best Practice:** -Run unit tests frequently during development (fast feedback). Run integration tests before committing (verify system integration). - -## Access Level - -**ai-safe** - Safe for AI personas to call autonomously - -## Implementation Notes - -- **Shared Logic**: Core business logic in `shared/ChallengeRunTypes.ts` -- **Browser**: Browser-specific implementation in `browser/ChallengeRunBrowserCommand.ts` -- **Server**: Server-specific implementation in `server/ChallengeRunServerCommand.ts` -- **Unit Tests**: Isolated testing in `test/unit/ChallengeRunCommand.test.ts` -- **Integration Tests**: System testing in `test/integration/ChallengeRunIntegration.test.ts` diff --git a/src/debug/jtag/commands/challenge/run/browser/ChallengeRunBrowserCommand.ts b/src/debug/jtag/commands/challenge/run/browser/ChallengeRunBrowserCommand.ts deleted file mode 100644 index d2303b12f..000000000 --- a/src/debug/jtag/commands/challenge/run/browser/ChallengeRunBrowserCommand.ts +++ /dev/null @@ -1,21 +0,0 @@ -/** - * Challenge Run Command - Browser Implementation - * - * Run a coding challenge against the AI coding pipeline. Sets up a fresh workspace, executes the challenge via code/task, evaluates with AI judge, and records the attempt. - */ - -import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; -import type { JTAGContext } from '@system/core/types/JTAGTypes'; -import type { ChallengeRunParams, ChallengeRunResult } from '../shared/ChallengeRunTypes'; - -export class ChallengeRunBrowserCommand extends CommandBase { - - constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { - super('challenge/run', context, subpath, commander); - } - - async execute(params: ChallengeRunParams): Promise { - console.log('🌐 BROWSER: Delegating Challenge Run to server'); - return await this.remoteExecute(params); - } -} diff --git a/src/debug/jtag/commands/challenge/run/server/ChallengeRunServerCommand.ts b/src/debug/jtag/commands/challenge/run/server/ChallengeRunServerCommand.ts deleted file mode 100644 index 8ff5d583a..000000000 --- a/src/debug/jtag/commands/challenge/run/server/ChallengeRunServerCommand.ts +++ /dev/null @@ -1,177 +0,0 @@ -/** - * Challenge Run Command - Server Implementation - * - * Runs a coding challenge: - * 1. Loads challenge (by ID, sequence number, or next unbeaten) - * 2. Sets up fresh workspace with challenge files - * 3. Executes via CodingChallengeRunner β†’ CodeAgentOrchestrator - * 4. Evaluates via CodingJudge - * 5. Records attempt and returns results - */ - -import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; -import type { JTAGContext } from '@system/core/types/JTAGTypes'; -import { ValidationError } from '@system/core/types/ErrorTypes'; -import type { ChallengeRunParams, ChallengeRunResult } from '../shared/ChallengeRunTypes'; -import { createChallengeRunResultFromParams } from '../shared/ChallengeRunTypes'; -import { CodingChallengeRunner } from '@system/code/server/CodingChallengeRunner'; -import { CodingChallengeEntity } from '@system/data/entities/CodingChallengeEntity'; -import { ALL_CHALLENGES } from '@system/code/challenges/ChallengeDefinitions'; -import { Commands } from '@system/core/shared/Commands'; -import { COLLECTIONS } from '@system/shared/Constants'; -import type { UUID } from '@system/core/types/CrossPlatformUUID'; - -export class ChallengeRunServerCommand extends CommandBase { - - constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { - super('challenge/run', context, subpath, commander); - } - - async execute(params: ChallengeRunParams): Promise { - const personaId = (params.personaId ?? params.userId) as UUID; - if (!personaId) { - throw new ValidationError('personaId', 'A persona ID is required to run a challenge.'); - } - - // Load or create the challenge entity - const challenge = await this.resolveChallenge(params, personaId); - - // Run the challenge - const runner = new CodingChallengeRunner(); - const result = await runner.run(challenge, { - personaId, - skipJudge: params.skipJudge ?? false, - }); - - // Persist updated challenge (with new attempt recorded) - await this.persistChallenge(challenge); - - return createChallengeRunResultFromParams(params, { - success: result.success, - challengeName: challenge.name, - difficulty: challenge.difficulty, - status: result.attempt.status, - score: result.attempt.score, - feedback: result.attempt.feedback, - durationMs: result.attempt.durationMs, - toolCallsUsed: result.attempt.toolCallsUsed, - filesModified: result.attempt.filesModified, - filesCreated: result.attempt.filesCreated, - errors: result.attempt.errors, - }); - } - - /** - * Resolve which challenge to run: - * 1. By challengeId (exact match) - * 2. By challengeNumber (sequence number) - * 3. Next unbeaten challenge for this persona - */ - private async resolveChallenge(params: ChallengeRunParams, personaId: UUID): Promise { - // Try loading from database first - if (params.challengeId) { - return await this.loadOrCreateChallenge(params.challengeId); - } - - if (params.challengeNumber) { - const def = ALL_CHALLENGES.find(c => c.sequenceNumber === params.challengeNumber); - if (!def) { - throw new ValidationError( - 'challengeNumber', - `No challenge with sequence number ${params.challengeNumber}. Valid: 1-${ALL_CHALLENGES.length}`, - ); - } - return await this.ensureChallengeEntity(def); - } - - // Find next unbeaten challenge - for (const def of ALL_CHALLENGES) { - const entity = await this.ensureChallengeEntity(def); - const best = entity.bestAttemptFor(personaId); - if (!best || best.status !== 'passed') { - return entity; - } - } - - // All beaten β€” run the hardest one again - return await this.ensureChallengeEntity(ALL_CHALLENGES[ALL_CHALLENGES.length - 1]); - } - - /** - * Ensure a challenge definition exists as a persisted entity. - * Creates it if it doesn't exist in the database. - */ - private async ensureChallengeEntity(def: typeof ALL_CHALLENGES[0]): Promise { - // Try to find existing entity by name - try { - const existing = await Commands.execute('data/list', { - collection: COLLECTIONS.CODING_CHALLENGES, - filter: { name: def.name }, - limit: 1, - }); - - if (existing?.success && existing.items?.length > 0) { - const entity = new CodingChallengeEntity(); - Object.assign(entity, existing.items[0]); - return entity; - } - } catch { - // Database not available β€” create in-memory entity - } - - // Create new entity from definition - const entity = new CodingChallengeEntity(); - entity.name = def.name; - entity.description = def.description; - entity.sequenceNumber = def.sequenceNumber; - entity.difficulty = def.difficulty; - entity.category = def.category; - entity.setupFiles = def.setupFiles; - entity.expectedOutcome = def.expectedOutcome; - entity.evaluationCriteria = def.evaluationCriteria; - entity.expectedFiles = def.expectedFiles; - entity.timeLimitMs = def.timeLimitMs; - entity.toolCallLimit = def.toolCallLimit; - - // Persist (best-effort) - await this.persistChallenge(entity); - - return entity; - } - - private async loadOrCreateChallenge(challengeId: string): Promise { - try { - const result = await Commands.execute('data/read', { - collection: COLLECTIONS.CODING_CHALLENGES, - id: challengeId, - }); - if (result?.success && result.item) { - const entity = new CodingChallengeEntity(); - Object.assign(entity, result.item); - return entity; - } - } catch { - // Not found - } - throw new ValidationError('challengeId', `Challenge not found: ${challengeId}`); - } - - private async persistChallenge(entity: CodingChallengeEntity): Promise { - try { - if (entity.id) { - await Commands.execute('data/update', { - collection: COLLECTIONS.CODING_CHALLENGES, - id: entity.id, - data: { ...entity }, - }); - } else { - await Commands.execute('data/create', { - collection: COLLECTIONS.CODING_CHALLENGES, - data: { ...entity }, - }); - } - } catch { - // Best-effort persistence - } - } -} diff --git a/src/debug/jtag/commands/challenge/run/shared/ChallengeRunTypes.ts b/src/debug/jtag/commands/challenge/run/shared/ChallengeRunTypes.ts deleted file mode 100644 index 738950f47..000000000 --- a/src/debug/jtag/commands/challenge/run/shared/ChallengeRunTypes.ts +++ /dev/null @@ -1,145 +0,0 @@ -/** - * Challenge Run Command - Shared Types - * - * Run a coding challenge against the AI coding pipeline. Sets up a fresh workspace, executes the challenge via code/task, evaluates with AI judge, and records the attempt. - */ - -import type { CommandParams, CommandResult, CommandInput, JTAGContext } from '@system/core/types/JTAGTypes'; -import { createPayload, transformPayload } from '@system/core/types/JTAGTypes'; -import { Commands } from '@system/core/shared/Commands'; -import type { JTAGError } from '@system/core/types/ErrorTypes'; -import type { UUID } from '@system/core/types/CrossPlatformUUID'; - -/** - * Challenge Run Command Parameters - */ -export interface ChallengeRunParams extends CommandParams { - // Specific challenge ID to run. If not provided, runs the next unbeaten challenge - challengeId?: string; - // Run challenge by sequence number (1-5) - challengeNumber?: number; - // Which AI persona runs the challenge. Defaults to the calling user - personaId?: string; - // Skip AI judge evaluation (faster, just checks execution success) - skipJudge?: boolean; -} - -/** - * Factory function for creating ChallengeRunParams - */ -export const createChallengeRunParams = ( - context: JTAGContext, - sessionId: UUID, - data: { - // Specific challenge ID to run. If not provided, runs the next unbeaten challenge - challengeId?: string; - // Run challenge by sequence number (1-5) - challengeNumber?: number; - // Which AI persona runs the challenge. Defaults to the calling user - personaId?: string; - // Skip AI judge evaluation (faster, just checks execution success) - skipJudge?: boolean; - } -): ChallengeRunParams => createPayload(context, sessionId, { - challengeId: data.challengeId ?? '', - challengeNumber: data.challengeNumber ?? 0, - personaId: data.personaId ?? '', - skipJudge: data.skipJudge ?? false, - ...data -}); - -/** - * Challenge Run Command Result - */ -export interface ChallengeRunResult extends CommandResult { - success: boolean; - // Name of the challenge that was run - challengeName: string; - // Challenge difficulty level - difficulty: string; - // Attempt outcome: passed, failed, partial, timeout, error - status: string; - // Judge score from 0-100 - score: number; - // Judge feedback on the attempt - feedback: string; - // Total execution time in milliseconds - durationMs: number; - // Number of tool calls consumed - toolCallsUsed: number; - // Files modified during the attempt - filesModified: string[]; - // Files created during the attempt - filesCreated: string[]; - // Errors encountered during execution - errors: string[]; - error?: JTAGError; -} - -/** - * Factory function for creating ChallengeRunResult with defaults - */ -export const createChallengeRunResult = ( - context: JTAGContext, - sessionId: UUID, - data: { - success: boolean; - // Name of the challenge that was run - challengeName?: string; - // Challenge difficulty level - difficulty?: string; - // Attempt outcome: passed, failed, partial, timeout, error - status?: string; - // Judge score from 0-100 - score?: number; - // Judge feedback on the attempt - feedback?: string; - // Total execution time in milliseconds - durationMs?: number; - // Number of tool calls consumed - toolCallsUsed?: number; - // Files modified during the attempt - filesModified?: string[]; - // Files created during the attempt - filesCreated?: string[]; - // Errors encountered during execution - errors?: string[]; - error?: JTAGError; - } -): ChallengeRunResult => createPayload(context, sessionId, { - challengeName: data.challengeName ?? '', - difficulty: data.difficulty ?? '', - status: data.status ?? '', - score: data.score ?? 0, - feedback: data.feedback ?? '', - durationMs: data.durationMs ?? 0, - toolCallsUsed: data.toolCallsUsed ?? 0, - filesModified: data.filesModified ?? [], - filesCreated: data.filesCreated ?? [], - errors: data.errors ?? [], - ...data -}); - -/** - * Smart Challenge Run-specific inheritance from params - * Auto-inherits context and sessionId from params - * Must provide all required result fields - */ -export const createChallengeRunResultFromParams = ( - params: ChallengeRunParams, - differences: Omit -): ChallengeRunResult => transformPayload(params, differences); - -/** - * Challenge Run β€” Type-safe command executor - * - * Usage: - * import { ChallengeRun } from '...shared/ChallengeRunTypes'; - * const result = await ChallengeRun.execute({ ... }); - */ -export const ChallengeRun = { - execute(params: CommandInput): Promise { - return Commands.execute('challenge/run', params as Partial); - }, - commandName: 'challenge/run' as const, -} as const; diff --git a/src/debug/jtag/commands/challenge/run/test/integration/ChallengeRunIntegration.test.ts b/src/debug/jtag/commands/challenge/run/test/integration/ChallengeRunIntegration.test.ts deleted file mode 100644 index d23febfce..000000000 --- a/src/debug/jtag/commands/challenge/run/test/integration/ChallengeRunIntegration.test.ts +++ /dev/null @@ -1,196 +0,0 @@ -#!/usr/bin/env tsx -/** - * ChallengeRun Command Integration Tests - * - * Tests Challenge Run command against the LIVE RUNNING SYSTEM. - * This is NOT a mock test - it tests real commands, real events, real widgets. - * - * Generated by: ./jtag generate - * Run with: npx tsx commands/Challenge Run/test/integration/ChallengeRunIntegration.test.ts - * - * PREREQUISITES: - * - Server must be running: npm start (wait 90+ seconds) - * - Browser client connected via http://localhost:9003 - */ - -import { jtag } from '@server/server-index'; - -console.log('πŸ§ͺ ChallengeRun Command Integration Tests'); - -function assert(condition: boolean, message: string): void { - if (!condition) { - throw new Error(`❌ Assertion failed: ${message}`); - } - console.log(`βœ… ${message}`); -} - -/** - * Test 1: Connect to live system - */ -async function testSystemConnection(): Promise>> { - console.log('\nπŸ”Œ Test 1: Connecting to live JTAG system'); - - const client = await jtag.connect(); - - assert(client !== null, 'Connected to live system'); - console.log(' βœ… Connected successfully'); - - return client; -} - -/** - * Test 2: Execute Challenge Run command on live system - */ -async function testCommandExecution(client: Awaited>): Promise { - console.log('\n⚑ Test 2: Executing Challenge Run command'); - - // TODO: Replace with your actual command parameters - const result = await client.commands['Challenge Run']({ - // Add your required parameters here - // Example: name: 'test-value' - }); - - console.log(' πŸ“Š Result:', JSON.stringify(result, null, 2)); - - assert(result !== null, 'Challenge Run returned result'); - // TODO: Add assertions for your specific result fields - // assert(result.success === true, 'Challenge Run succeeded'); - // assert(result.yourField !== undefined, 'Result has yourField'); -} - -/** - * Test 3: Validate required parameters - */ -async function testRequiredParameters(_client: Awaited>): Promise { - console.log('\n🚨 Test 3: Testing required parameter validation'); - - // TODO: Uncomment and test missing required parameters - // try { - // await _client.commands['Challenge Run']({ - // // Missing required param - // }); - // assert(false, 'Should have thrown validation error'); - // } catch (error) { - // assert((error as Error).message.includes('required'), 'Error mentions required parameter'); - // console.log(' βœ… ValidationError thrown correctly'); - // } - - console.log(' ⚠️ TODO: Add required parameter validation test'); -} - -/** - * Test 4: Test optional parameters - */ -async function testOptionalParameters(_client: Awaited>): Promise { - console.log('\nπŸ”§ Test 4: Testing optional parameters'); - - // TODO: Uncomment to test with and without optional parameters - // const withOptional = await client.commands['Challenge Run']({ - // requiredParam: 'test', - // optionalParam: true - // }); - // - // const withoutOptional = await client.commands['Challenge Run']({ - // requiredParam: 'test' - // }); - // - // assert(withOptional.success === true, 'Works with optional params'); - // assert(withoutOptional.success === true, 'Works without optional params'); - - console.log(' ⚠️ TODO: Add optional parameter tests'); -} - -/** - * Test 5: Performance test - */ -async function testPerformance(_client: Awaited>): Promise { - console.log('\n⚑ Test 5: Performance under load'); - - // TODO: Uncomment to test command performance - // const iterations = 10; - // const times: number[] = []; - // - // for (let i = 0; i < iterations; i++) { - // const start = Date.now(); - // await _client.commands['Challenge Run']({ /* params */ }); - // times.push(Date.now() - start); - // } - // - // const avg = times.reduce((a, b) => a + b, 0) / iterations; - // const max = Math.max(...times); - // - // console.log(` Average: ${avg.toFixed(2)}ms`); - // console.log(` Max: ${max}ms`); - // - // assert(avg < 500, `Average ${avg.toFixed(2)}ms under 500ms`); - // assert(max < 1000, `Max ${max}ms under 1000ms`); - - console.log(' ⚠️ TODO: Add performance test'); -} - -/** - * Test 6: Widget/Event integration (if applicable) - */ -async function testWidgetIntegration(_client: Awaited>): Promise { - console.log('\n🎨 Test 6: Widget/Event integration'); - - // TODO: Uncomment if your command emits events or updates widgets - // Example: - // const before = await client.commands['debug/widget-state']({ widgetSelector: 'your-widget' }); - // await client.commands['Challenge Run']({ /* params */ }); - // await new Promise(resolve => setTimeout(resolve, 1000)); // Wait for event propagation - // const after = await client.commands['debug/widget-state']({ widgetSelector: 'your-widget' }); - // - // assert(after.state.someValue !== before.state.someValue, 'Widget state updated'); - - console.log(' ⚠️ TODO: Add widget/event integration test (if applicable)'); -} - -/** - * Run all integration tests - */ -async function runAllChallengeRunIntegrationTests(): Promise { - console.log('πŸš€ Starting ChallengeRun Integration Tests\n'); - console.log('πŸ“‹ Testing against LIVE system (not mocks)\n'); - - try { - const client = await testSystemConnection(); - await testCommandExecution(client); - await testRequiredParameters(client); - await testOptionalParameters(client); - await testPerformance(client); - await testWidgetIntegration(client); - - console.log('\nπŸŽ‰ ALL ChallengeRun INTEGRATION TESTS PASSED!'); - console.log('πŸ“‹ Validated:'); - console.log(' βœ… Live system connection'); - console.log(' βœ… Command execution on real system'); - console.log(' βœ… Parameter validation'); - console.log(' βœ… Optional parameter handling'); - console.log(' βœ… Performance benchmarks'); - console.log(' βœ… Widget/Event integration'); - console.log('\nπŸ’‘ NOTE: This test uses the REAL running system'); - console.log(' - Real database operations'); - console.log(' - Real event propagation'); - console.log(' - Real widget updates'); - console.log(' - Real cross-daemon communication'); - - } catch (error) { - console.error('\n❌ ChallengeRun integration tests failed:', (error as Error).message); - if ((error as Error).stack) { - console.error((error as Error).stack); - } - console.error('\nπŸ’‘ Make sure:'); - console.error(' 1. Server is running: npm start'); - console.error(' 2. Wait 90+ seconds for deployment'); - console.error(' 3. Browser is connected to http://localhost:9003'); - process.exit(1); - } -} - -// Run if called directly -if (require.main === module) { - void runAllChallengeRunIntegrationTests(); -} else { - module.exports = { runAllChallengeRunIntegrationTests }; -} diff --git a/src/debug/jtag/commands/challenge/list/.npmignore b/src/debug/jtag/commands/code/shell/sentinel/.npmignore similarity index 100% rename from src/debug/jtag/commands/challenge/list/.npmignore rename to src/debug/jtag/commands/code/shell/sentinel/.npmignore diff --git a/src/debug/jtag/commands/challenge/list/README.md b/src/debug/jtag/commands/code/shell/sentinel/README.md similarity index 54% rename from src/debug/jtag/commands/challenge/list/README.md rename to src/debug/jtag/commands/code/shell/sentinel/README.md index a42ea610d..4d0c3b142 100644 --- a/src/debug/jtag/commands/challenge/list/README.md +++ b/src/debug/jtag/commands/code/shell/sentinel/README.md @@ -1,6 +1,6 @@ -# Challenge List Command +# Code Shell Sentinel Command -List available coding challenges with their difficulty, status, and best scores. Shows progressive challenge sequence for AI training. +Configure sentinel filter rules on a shell execution. Rules classify output lines and control which lines are emitted or suppressed during watch. Patterns are compiled to regex on the Rust side for performance. ## Table of Contents @@ -24,7 +24,7 @@ List available coding challenges with their difficulty, status, and best scores. From the command line using the jtag CLI: ```bash -./jtag challenge/list [options] +./jtag code/shell/sentinel --executionId= --rules= ``` ### Tool Usage @@ -34,35 +34,34 @@ From Persona tools or programmatic access using `Commands.execute()`: ```typescript import { Commands } from '@system/core/shared/Commands'; -const result = await Commands.execute('challenge/list', { +const result = await Commands.execute('code/shell/sentinel', { // your parameters here }); ``` ## Parameters -- **difficulty** (optional): `string` - Filter by difficulty: beginner, intermediate, advanced, expert -- **personaId** (optional): `string` - Show scores for a specific persona +- **executionId** (required): `string` - Execution handle to attach sentinel rules to +- **rules** (required): `SentinelRule[]` - Array of classification rules: { pattern: string, classification: OutputClassification, action: SentinelAction } ## Result -Returns `ChallengeListResult` with: +Returns `CodeShellSentinelResult` with: Returns CommandResult with: -- **challenges**: `object[]` - Array of challenge summaries with name, difficulty, sequence, attempts, best score -- **totalChallenges**: `number` - Total number of challenges -- **completedByPersona**: `number` - Number of challenges passed by the specified persona +- **applied**: `boolean` - Whether rules were applied successfully +- **ruleCount**: `number` - Number of sentinel rules configured ## Examples -### List all challenges +### Filter build output to only errors and warnings ```bash -./jtag challenge/list +./jtag code/shell/sentinel --executionId="exec-abc123" --rules='[{"pattern":"^error","classification":"Error","action":"Emit"},{"pattern":".*","classification":"Verbose","action":"Suppress"}]' ``` **Expected result:** -{ totalChallenges: 5, challenges: [{ name: "Add a function...", difficulty: "beginner", ... }] } +{ applied: true, ruleCount: 2 } ## Getting Help @@ -72,12 +71,12 @@ Get detailed usage information for this command: **CLI:** ```bash -./jtag help challenge/list +./jtag help code/shell/sentinel ``` **Tool:** ```typescript -// Use your help tool with command name 'challenge/list' +// Use your help tool with command name 'code/shell/sentinel' ``` ### Using the README Tool @@ -86,12 +85,12 @@ Access this README programmatically: **CLI:** ```bash -./jtag readme challenge/list +./jtag readme code/shell/sentinel ``` **Tool:** ```typescript -// Use your readme tool with command name 'challenge/list' +// Use your readme tool with command name 'code/shell/sentinel' ``` ## Testing @@ -102,7 +101,7 @@ Test command logic in isolation using mock dependencies: ```bash # Run unit tests (no server required) -npx tsx commands/Challenge List/test/unit/ChallengeListCommand.test.ts +npx tsx commands/Code Shell Sentinel/test/unit/CodeShellSentinelCommand.test.ts ``` **What's tested:** @@ -129,7 +128,7 @@ Test command with real client connections and system integration: npm start # Wait 90+ seconds for deployment # Run integration tests -npx tsx commands/Challenge List/test/integration/ChallengeListIntegration.test.ts +npx tsx commands/Code Shell Sentinel/test/integration/CodeShellSentinelIntegration.test.ts ``` **What's tested:** @@ -149,8 +148,8 @@ Run unit tests frequently during development (fast feedback). Run integration te ## Implementation Notes -- **Shared Logic**: Core business logic in `shared/ChallengeListTypes.ts` -- **Browser**: Browser-specific implementation in `browser/ChallengeListBrowserCommand.ts` -- **Server**: Server-specific implementation in `server/ChallengeListServerCommand.ts` -- **Unit Tests**: Isolated testing in `test/unit/ChallengeListCommand.test.ts` -- **Integration Tests**: System testing in `test/integration/ChallengeListIntegration.test.ts` +- **Shared Logic**: Core business logic in `shared/CodeShellSentinelTypes.ts` +- **Browser**: Browser-specific implementation in `browser/CodeShellSentinelBrowserCommand.ts` +- **Server**: Server-specific implementation in `server/CodeShellSentinelServerCommand.ts` +- **Unit Tests**: Isolated testing in `test/unit/CodeShellSentinelCommand.test.ts` +- **Integration Tests**: System testing in `test/integration/CodeShellSentinelIntegration.test.ts` diff --git a/src/debug/jtag/commands/code/shell/sentinel/browser/CodeShellSentinelBrowserCommand.ts b/src/debug/jtag/commands/code/shell/sentinel/browser/CodeShellSentinelBrowserCommand.ts new file mode 100644 index 000000000..557508f1b --- /dev/null +++ b/src/debug/jtag/commands/code/shell/sentinel/browser/CodeShellSentinelBrowserCommand.ts @@ -0,0 +1,21 @@ +/** + * Code Shell Sentinel Command - Browser Implementation + * + * Configure sentinel filter rules on a shell execution. Rules classify output lines and control which lines are emitted or suppressed during watch. Patterns are compiled to regex on the Rust side for performance. + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import type { CodeShellSentinelParams, CodeShellSentinelResult } from '../shared/CodeShellSentinelTypes'; + +export class CodeShellSentinelBrowserCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('code/shell/sentinel', context, subpath, commander); + } + + async execute(params: CodeShellSentinelParams): Promise { + console.log('🌐 BROWSER: Delegating Code Shell Sentinel to server'); + return await this.remoteExecute(params); + } +} diff --git a/src/debug/jtag/commands/challenge/run/package.json b/src/debug/jtag/commands/code/shell/sentinel/package.json similarity index 52% rename from src/debug/jtag/commands/challenge/run/package.json rename to src/debug/jtag/commands/code/shell/sentinel/package.json index 944ee6330..e1bd9a1cd 100644 --- a/src/debug/jtag/commands/challenge/run/package.json +++ b/src/debug/jtag/commands/code/shell/sentinel/package.json @@ -1,13 +1,13 @@ { - "name": "@jtag-commands/challenge/run", + "name": "@jtag-commands/code/shell/sentinel", "version": "1.0.0", - "description": "Run a coding challenge against the AI coding pipeline. Sets up a fresh workspace, executes the challenge via code/task, evaluates with AI judge, and records the attempt.", - "main": "server/ChallengeRunServerCommand.ts", - "types": "shared/ChallengeRunTypes.ts", + "description": "Configure sentinel filter rules on a shell execution. Rules classify output lines and control which lines are emitted or suppressed during watch. Patterns are compiled to regex on the Rust side for performance.", + "main": "server/CodeShellSentinelServerCommand.ts", + "types": "shared/CodeShellSentinelTypes.ts", "scripts": { "test": "npm run test:unit && npm run test:integration", "test:unit": "npx vitest run test/unit/*.test.ts", - "test:integration": "npx tsx test/integration/ChallengeRunIntegration.test.ts", + "test:integration": "npx tsx test/integration/CodeShellSentinelIntegration.test.ts", "lint": "npx eslint **/*.ts", "typecheck": "npx tsc --noEmit" }, @@ -24,7 +24,7 @@ "keywords": [ "jtag", "command", - "challenge/run" + "code/shell/sentinel" ], "license": "MIT", "author": "", diff --git a/src/debug/jtag/commands/code/shell/sentinel/server/CodeShellSentinelServerCommand.ts b/src/debug/jtag/commands/code/shell/sentinel/server/CodeShellSentinelServerCommand.ts new file mode 100644 index 000000000..50cc48e20 --- /dev/null +++ b/src/debug/jtag/commands/code/shell/sentinel/server/CodeShellSentinelServerCommand.ts @@ -0,0 +1,53 @@ +/** + * Code Shell Sentinel Command - Server Implementation + * + * Configure sentinel filter rules on a shell execution. Rules classify output lines + * and control which lines are emitted or suppressed during watch. + * Patterns are compiled to regex on the Rust side for performance. + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import { ValidationError } from '@system/core/types/ErrorTypes'; +import type { CodeShellSentinelParams, CodeShellSentinelResult } from '../shared/CodeShellSentinelTypes'; +import { createCodeShellSentinelResultFromParams } from '../shared/CodeShellSentinelTypes'; +import { CodeDaemon } from '@daemons/code-daemon/shared/CodeDaemon'; + +export class CodeShellSentinelServerCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('code/shell/sentinel', context, subpath, commander); + } + + async execute(params: CodeShellSentinelParams): Promise { + if (!params.executionId || params.executionId.trim() === '') { + throw new ValidationError( + 'executionId', + `Missing required parameter 'executionId'. Use the help tool with 'Code Shell Sentinel' or see the code/shell/sentinel README for usage.` + ); + } + + if (!params.rules || !Array.isArray(params.rules)) { + throw new ValidationError( + 'rules', + `Missing required parameter 'rules'. Provide an array of SentinelRule objects. See the code/shell/sentinel README for usage.` + ); + } + + if (!params.userId) { + throw new ValidationError( + 'userId', + 'Shell sentinel operations require a userId (auto-injected for persona tool calls).' + ); + } + const personaId = params.userId; + + const result = await CodeDaemon.shellSentinel(personaId, params.executionId, params.rules); + + return createCodeShellSentinelResultFromParams(params, { + success: true, + applied: result.applied, + ruleCount: result.ruleCount, + }); + } +} diff --git a/src/debug/jtag/commands/code/shell/sentinel/shared/CodeShellSentinelTypes.ts b/src/debug/jtag/commands/code/shell/sentinel/shared/CodeShellSentinelTypes.ts new file mode 100644 index 000000000..2a16127b2 --- /dev/null +++ b/src/debug/jtag/commands/code/shell/sentinel/shared/CodeShellSentinelTypes.ts @@ -0,0 +1,92 @@ +/** + * Code Shell Sentinel Command - Shared Types + * + * Configure sentinel filter rules on a shell execution. Rules classify output lines + * and control which lines are emitted or suppressed during watch. + * Patterns are compiled to regex on the Rust side for performance. + */ + +import type { CommandParams, CommandResult, CommandInput, JTAGContext } from '@system/core/types/JTAGTypes'; +import { createPayload, transformPayload } from '@system/core/types/JTAGTypes'; +import { Commands } from '@system/core/shared/Commands'; +import type { JTAGError } from '@system/core/types/ErrorTypes'; +import type { UUID } from '@system/core/types/CrossPlatformUUID'; +import type { SentinelRule } from '@shared/generated/code/SentinelRule'; + +/** + * Code Shell Sentinel Command Parameters + */ +export interface CodeShellSentinelParams extends CommandParams { + /** Execution handle to attach sentinel rules to */ + executionId: string; + /** Array of classification rules (pattern, classification, action) */ + rules: SentinelRule[]; +} + +/** + * Factory function for creating CodeShellSentinelParams + */ +export const createCodeShellSentinelParams = ( + context: JTAGContext, + sessionId: UUID, + data: { + executionId: string; + rules: SentinelRule[]; + } +): CodeShellSentinelParams => createPayload(context, sessionId, { + ...data +}); + +/** + * Code Shell Sentinel Command Result + */ +export interface CodeShellSentinelResult extends CommandResult { + success: boolean; + /** Whether rules were applied successfully */ + applied: boolean; + /** Number of sentinel rules configured */ + ruleCount: number; + error?: JTAGError; +} + +/** + * Factory function for creating CodeShellSentinelResult with defaults + */ +export const createCodeShellSentinelResult = ( + context: JTAGContext, + sessionId: UUID, + data: { + success: boolean; + applied?: boolean; + ruleCount?: number; + error?: JTAGError; + } +): CodeShellSentinelResult => createPayload(context, sessionId, { + applied: data.applied ?? false, + ruleCount: data.ruleCount ?? 0, + ...data +}); + +/** + * Smart Code Shell Sentinel-specific inheritance from params + * Auto-inherits context and sessionId from params + * Must provide all required result fields + */ +export const createCodeShellSentinelResultFromParams = ( + params: CodeShellSentinelParams, + differences: Omit +): CodeShellSentinelResult => transformPayload(params, differences); + +/** + * Code Shell Sentinel β€” Type-safe command executor + * + * Usage: + * import { CodeShellSentinel } from '...shared/CodeShellSentinelTypes'; + * const result = await CodeShellSentinel.execute({ ... }); + */ +export const CodeShellSentinel = { + execute(params: CommandInput): Promise { + return Commands.execute('code/shell/sentinel', params as Partial); + }, + commandName: 'code/shell/sentinel' as const, +} as const; diff --git a/src/debug/jtag/commands/code/task/test/integration/CodeTaskIntegration.test.ts b/src/debug/jtag/commands/code/shell/sentinel/test/integration/CodeShellSentinelIntegration.test.ts similarity index 80% rename from src/debug/jtag/commands/code/task/test/integration/CodeTaskIntegration.test.ts rename to src/debug/jtag/commands/code/shell/sentinel/test/integration/CodeShellSentinelIntegration.test.ts index 863ca708d..50e939331 100644 --- a/src/debug/jtag/commands/code/task/test/integration/CodeTaskIntegration.test.ts +++ b/src/debug/jtag/commands/code/shell/sentinel/test/integration/CodeShellSentinelIntegration.test.ts @@ -1,12 +1,12 @@ #!/usr/bin/env tsx /** - * CodeTask Command Integration Tests + * CodeShellSentinel Command Integration Tests * - * Tests Code Task command against the LIVE RUNNING SYSTEM. + * Tests Code Shell Sentinel command against the LIVE RUNNING SYSTEM. * This is NOT a mock test - it tests real commands, real events, real widgets. * * Generated by: ./jtag generate - * Run with: npx tsx commands/Code Task/test/integration/CodeTaskIntegration.test.ts + * Run with: npx tsx commands/Code Shell Sentinel/test/integration/CodeShellSentinelIntegration.test.ts * * PREREQUISITES: * - Server must be running: npm start (wait 90+ seconds) @@ -15,7 +15,7 @@ import { jtag } from '@server/server-index'; -console.log('πŸ§ͺ CodeTask Command Integration Tests'); +console.log('πŸ§ͺ CodeShellSentinel Command Integration Tests'); function assert(condition: boolean, message: string): void { if (!condition) { @@ -39,22 +39,22 @@ async function testSystemConnection(): Promise>): Promise { - console.log('\n⚑ Test 2: Executing Code Task command'); + console.log('\n⚑ Test 2: Executing Code Shell Sentinel command'); // TODO: Replace with your actual command parameters - const result = await client.commands['Code Task']({ + const result = await client.commands['Code Shell Sentinel']({ // Add your required parameters here // Example: name: 'test-value' }); console.log(' πŸ“Š Result:', JSON.stringify(result, null, 2)); - assert(result !== null, 'Code Task returned result'); + assert(result !== null, 'Code Shell Sentinel returned result'); // TODO: Add assertions for your specific result fields - // assert(result.success === true, 'Code Task succeeded'); + // assert(result.success === true, 'Code Shell Sentinel succeeded'); // assert(result.yourField !== undefined, 'Result has yourField'); } @@ -66,7 +66,7 @@ async function testRequiredParameters(_client: Awaited> // // for (let i = 0; i < iterations; i++) { // const start = Date.now(); - // await _client.commands['Code Task']({ /* params */ }); + // await _client.commands['Code Shell Sentinel']({ /* params */ }); // times.push(Date.now() - start); // } // @@ -137,7 +137,7 @@ async function testWidgetIntegration(_client: Awaited setTimeout(resolve, 1000)); // Wait for event propagation // const after = await client.commands['debug/widget-state']({ widgetSelector: 'your-widget' }); // @@ -149,8 +149,8 @@ async function testWidgetIntegration(_client: Awaited { - console.log('πŸš€ Starting CodeTask Integration Tests\n'); +async function runAllCodeShellSentinelIntegrationTests(): Promise { + console.log('πŸš€ Starting CodeShellSentinel Integration Tests\n'); console.log('πŸ“‹ Testing against LIVE system (not mocks)\n'); try { @@ -161,7 +161,7 @@ async function runAllCodeTaskIntegrationTests(): Promise { await testPerformance(client); await testWidgetIntegration(client); - console.log('\nπŸŽ‰ ALL CodeTask INTEGRATION TESTS PASSED!'); + console.log('\nπŸŽ‰ ALL CodeShellSentinel INTEGRATION TESTS PASSED!'); console.log('πŸ“‹ Validated:'); console.log(' βœ… Live system connection'); console.log(' βœ… Command execution on real system'); @@ -176,7 +176,7 @@ async function runAllCodeTaskIntegrationTests(): Promise { console.log(' - Real cross-daemon communication'); } catch (error) { - console.error('\n❌ CodeTask integration tests failed:', (error as Error).message); + console.error('\n❌ CodeShellSentinel integration tests failed:', (error as Error).message); if ((error as Error).stack) { console.error((error as Error).stack); } @@ -190,7 +190,7 @@ async function runAllCodeTaskIntegrationTests(): Promise { // Run if called directly if (require.main === module) { - void runAllCodeTaskIntegrationTests(); + void runAllCodeShellSentinelIntegrationTests(); } else { - module.exports = { runAllCodeTaskIntegrationTests }; + module.exports = { runAllCodeShellSentinelIntegrationTests }; } diff --git a/src/debug/jtag/commands/challenge/run/test/unit/ChallengeRunCommand.test.ts b/src/debug/jtag/commands/code/shell/sentinel/test/unit/CodeShellSentinelCommand.test.ts similarity index 65% rename from src/debug/jtag/commands/challenge/run/test/unit/ChallengeRunCommand.test.ts rename to src/debug/jtag/commands/code/shell/sentinel/test/unit/CodeShellSentinelCommand.test.ts index bc8c01289..a068632ec 100644 --- a/src/debug/jtag/commands/challenge/run/test/unit/ChallengeRunCommand.test.ts +++ b/src/debug/jtag/commands/code/shell/sentinel/test/unit/CodeShellSentinelCommand.test.ts @@ -1,12 +1,12 @@ #!/usr/bin/env tsx /** - * ChallengeRun Command Unit Tests + * CodeShellSentinel Command Unit Tests * - * Tests Challenge Run command logic in isolation using mock dependencies. + * Tests Code Shell Sentinel command logic in isolation using mock dependencies. * This is a REFERENCE EXAMPLE showing best practices for command testing. * * Generated by: ./jtag generate - * Run with: npx tsx commands/Challenge Run/test/unit/ChallengeRunCommand.test.ts + * Run with: npx tsx commands/Code Shell Sentinel/test/unit/CodeShellSentinelCommand.test.ts * * NOTE: This is a self-contained test (no external test utilities needed). * Use this as a template for your own command tests. @@ -14,9 +14,9 @@ // import { ValidationError } from '@system/core/types/ErrorTypes'; // Uncomment when adding validation tests import { generateUUID } from '@system/core/types/CrossPlatformUUID'; -import type { ChallengeRunParams, ChallengeRunResult } from '../../shared/ChallengeRunTypes'; +import type { CodeShellSentinelParams, CodeShellSentinelResult } from '../../shared/CodeShellSentinelTypes'; -console.log('πŸ§ͺ ChallengeRun Command Unit Tests'); +console.log('πŸ§ͺ CodeShellSentinel Command Unit Tests'); function assert(condition: boolean, message: string): void { if (!condition) { @@ -26,16 +26,16 @@ function assert(condition: boolean, message: string): void { } /** - * Mock command that implements Challenge Run logic for testing + * Mock command that implements Code Shell Sentinel logic for testing */ -async function mockChallengeRunCommand(params: ChallengeRunParams): Promise { +async function mockCodeShellSentinelCommand(params: CodeShellSentinelParams): Promise { // TODO: Validate required parameters (BEST PRACTICE) // Example: // if (!params.requiredParam || params.requiredParam.trim() === '') { // throw new ValidationError( // 'requiredParam', // `Missing required parameter 'requiredParam'. ` + - // `Use the help tool with 'Challenge Run' or see the Challenge Run README for usage information.` + // `Use the help tool with 'Code Shell Sentinel' or see the Code Shell Sentinel README for usage information.` // ); // } @@ -48,20 +48,20 @@ async function mockChallengeRunCommand(params: ChallengeRunParams): Promise { - console.log('\n⚑ Test 2: Mock Challenge Run command execution'); +async function testMockCodeShellSentinelExecution(): Promise { + console.log('\n⚑ Test 2: Mock Code Shell Sentinel command execution'); const context = { environment: 'server' as const }; const sessionId = generateUUID(); // Test mock execution - const params: ChallengeRunParams = { + const params: CodeShellSentinelParams = { // TODO: Add your parameters here context, sessionId }; - const result = await mockChallengeRunCommand(params); + const result = await mockCodeShellSentinelCommand(params); // Validate result structure assert(result.success === true, 'Mock result shows success'); @@ -104,7 +104,7 @@ async function testMockChallengeRunExecution(): Promise { * This test ensures your command throws ValidationError * when required parameters are missing (BEST PRACTICE) */ -async function testChallengeRunRequiredParams(): Promise { +async function testCodeShellSentinelRequiredParams(): Promise { console.log('\n🚨 Test 3: Required parameter validation'); // TODO: Uncomment when implementing validation @@ -114,13 +114,13 @@ async function testChallengeRunRequiredParams(): Promise { // TODO: Test cases that should throw ValidationError // Example: // const testCases = [ - // { params: {} as ChallengeRunParams, desc: 'Missing requiredParam' }, - // { params: { requiredParam: '' } as ChallengeRunParams, desc: 'Empty requiredParam' }, + // { params: {} as CodeShellSentinelParams, desc: 'Missing requiredParam' }, + // { params: { requiredParam: '' } as CodeShellSentinelParams, desc: 'Empty requiredParam' }, // ]; // // for (const testCase of testCases) { // try { - // await mockChallengeRunCommand({ ...testCase.params, context, sessionId }); + // await mockCodeShellSentinelCommand({ ...testCase.params, context, sessionId }); // throw new Error(`Should have thrown ValidationError for: ${testCase.desc}`); // } catch (error) { // if (error instanceof ValidationError) { @@ -139,7 +139,7 @@ async function testChallengeRunRequiredParams(): Promise { /** * Test 4: Optional parameter handling */ -async function testChallengeRunOptionalParams(): Promise { +async function testCodeShellSentinelOptionalParams(): Promise { console.log('\nπŸ”§ Test 4: Optional parameter handling'); // TODO: Uncomment when implementing optional param tests @@ -147,24 +147,24 @@ async function testChallengeRunOptionalParams(): Promise { // const sessionId = generateUUID(); // TODO: Test WITHOUT optional param (should use default) - // const paramsWithoutOptional: ChallengeRunParams = { + // const paramsWithoutOptional: CodeShellSentinelParams = { // requiredParam: 'test', // context, // sessionId // }; // - // const resultWithoutOptional = await mockChallengeRunCommand(paramsWithoutOptional); + // const resultWithoutOptional = await mockCodeShellSentinelCommand(paramsWithoutOptional); // assert(resultWithoutOptional.success === true, 'Command succeeds without optional params'); // TODO: Test WITH optional param - // const paramsWithOptional: ChallengeRunParams = { + // const paramsWithOptional: CodeShellSentinelParams = { // requiredParam: 'test', // optionalParam: true, // context, // sessionId // }; // - // const resultWithOptional = await mockChallengeRunCommand(paramsWithOptional); + // const resultWithOptional = await mockCodeShellSentinelCommand(paramsWithOptional); // assert(resultWithOptional.success === true, 'Command succeeds with optional params'); console.log('βœ… Optional parameter handling validated'); @@ -173,40 +173,40 @@ async function testChallengeRunOptionalParams(): Promise { /** * Test 5: Performance validation */ -async function testChallengeRunPerformance(): Promise { - console.log('\n⚑ Test 5: ChallengeRun performance validation'); +async function testCodeShellSentinelPerformance(): Promise { + console.log('\n⚑ Test 5: CodeShellSentinel performance validation'); const context = { environment: 'server' as const }; const sessionId = generateUUID(); const startTime = Date.now(); - await mockChallengeRunCommand({ + await mockCodeShellSentinelCommand({ // TODO: Add your parameters context, sessionId - } as ChallengeRunParams); + } as CodeShellSentinelParams); const executionTime = Date.now() - startTime; - assert(executionTime < 100, `ChallengeRun completed in ${executionTime}ms (under 100ms limit)`); + assert(executionTime < 100, `CodeShellSentinel completed in ${executionTime}ms (under 100ms limit)`); } /** * Test 6: Result structure validation */ -async function testChallengeRunResultStructure(): Promise { - console.log('\nπŸ” Test 6: ChallengeRun result structure validation'); +async function testCodeShellSentinelResultStructure(): Promise { + console.log('\nπŸ” Test 6: CodeShellSentinel result structure validation'); const context = { environment: 'server' as const }; const sessionId = generateUUID(); // Test various scenarios - const basicResult = await mockChallengeRunCommand({ + const basicResult = await mockCodeShellSentinelCommand({ // TODO: Add your parameters context, sessionId - } as ChallengeRunParams); + } as CodeShellSentinelParams); assert(basicResult.success === true, 'Result has success field'); // TODO: Add assertions for your result fields @@ -220,18 +220,18 @@ async function testChallengeRunResultStructure(): Promise { /** * Run all unit tests */ -async function runAllChallengeRunUnitTests(): Promise { - console.log('πŸš€ Starting ChallengeRun Command Unit Tests\n'); +async function runAllCodeShellSentinelUnitTests(): Promise { + console.log('πŸš€ Starting CodeShellSentinel Command Unit Tests\n'); try { - testChallengeRunCommandStructure(); - await testMockChallengeRunExecution(); - await testChallengeRunRequiredParams(); - await testChallengeRunOptionalParams(); - await testChallengeRunPerformance(); - await testChallengeRunResultStructure(); - - console.log('\nπŸŽ‰ ALL ChallengeRun UNIT TESTS PASSED!'); + testCodeShellSentinelCommandStructure(); + await testMockCodeShellSentinelExecution(); + await testCodeShellSentinelRequiredParams(); + await testCodeShellSentinelOptionalParams(); + await testCodeShellSentinelPerformance(); + await testCodeShellSentinelResultStructure(); + + console.log('\nπŸŽ‰ ALL CodeShellSentinel UNIT TESTS PASSED!'); console.log('πŸ“‹ Validated:'); console.log(' βœ… Command structure and parameter validation'); console.log(' βœ… Mock command execution patterns'); @@ -243,7 +243,7 @@ async function runAllChallengeRunUnitTests(): Promise { console.log('πŸ’‘ TIP: Copy this test structure and modify for your command logic'); } catch (error) { - console.error('\n❌ ChallengeRun unit tests failed:', (error as Error).message); + console.error('\n❌ CodeShellSentinel unit tests failed:', (error as Error).message); if ((error as Error).stack) { console.error((error as Error).stack); } @@ -253,7 +253,7 @@ async function runAllChallengeRunUnitTests(): Promise { // Run if called directly if (require.main === module) { - void runAllChallengeRunUnitTests(); + void runAllCodeShellSentinelUnitTests(); } else { - module.exports = { runAllChallengeRunUnitTests }; + module.exports = { runAllCodeShellSentinelUnitTests }; } diff --git a/src/debug/jtag/commands/challenge/run/.npmignore b/src/debug/jtag/commands/code/shell/watch/.npmignore similarity index 100% rename from src/debug/jtag/commands/challenge/run/.npmignore rename to src/debug/jtag/commands/code/shell/watch/.npmignore diff --git a/src/debug/jtag/commands/code/shell/watch/README.md b/src/debug/jtag/commands/code/shell/watch/README.md new file mode 100644 index 000000000..7d3723cdc --- /dev/null +++ b/src/debug/jtag/commands/code/shell/watch/README.md @@ -0,0 +1,165 @@ +# Code Shell Watch Command + +Watch a shell execution for new output. Blocks until output is available β€” no timeout, no polling. Returns classified output lines filtered through sentinel rules. Call in a loop until finished is true. + +## Table of Contents + +- [Usage](#usage) + - [CLI Usage](#cli-usage) + - [Tool Usage](#tool-usage) +- [Parameters](#parameters) +- [Result](#result) +- [Examples](#examples) +- [Testing](#testing) + - [Unit Tests](#unit-tests) + - [Integration Tests](#integration-tests) +- [Getting Help](#getting-help) +- [Access Level](#access-level) +- [Implementation Notes](#implementation-notes) + +## Usage + +### CLI Usage + +From the command line using the jtag CLI: + +```bash +./jtag code/shell/watch --executionId= +``` + +### Tool Usage + +From Persona tools or programmatic access using `Commands.execute()`: + +```typescript +import { Commands } from '@system/core/shared/Commands'; + +const result = await Commands.execute('code/shell/watch', { + // your parameters here +}); +``` + +## Parameters + +- **executionId** (required): `string` - Execution handle from shell/exec + +## Result + +Returns `CodeShellWatchResult` with: + +Returns CommandResult with: +- **executionId**: `string` - Echo of the execution handle +- **lines**: `ClassifiedLine[]` - New output lines since last watch call (classified and filtered) +- **finished**: `boolean` - True when execution is complete +- **exitCode**: `number` - Process exit code (present when finished) + +## Examples + +### Watch a running build for new output + +```bash +./jtag code/shell/watch --executionId="exec-abc123" +``` + +**Expected result:** +{ executionId: "exec-abc123", lines: [{text: "Compiling...", classification: "Info"}], finished: false } + +### Final watch call when execution completes + +```bash +./jtag code/shell/watch --executionId="exec-abc123" +``` + +**Expected result:** +{ executionId: "exec-abc123", lines: [], finished: true, exitCode: 0 } + +## Getting Help + +### Using the Help Tool + +Get detailed usage information for this command: + +**CLI:** +```bash +./jtag help code/shell/watch +``` + +**Tool:** +```typescript +// Use your help tool with command name 'code/shell/watch' +``` + +### Using the README Tool + +Access this README programmatically: + +**CLI:** +```bash +./jtag readme code/shell/watch +``` + +**Tool:** +```typescript +// Use your readme tool with command name 'code/shell/watch' +``` + +## Testing + +### Unit Tests + +Test command logic in isolation using mock dependencies: + +```bash +# Run unit tests (no server required) +npx tsx commands/Code Shell Watch/test/unit/CodeShellWatchCommand.test.ts +``` + +**What's tested:** +- Command structure and parameter validation +- Mock command execution patterns +- Required parameter validation (throws ValidationError) +- Optional parameter handling (sensible defaults) +- Performance requirements +- Assertion utility helpers + +**TDD Workflow:** +1. Write/modify unit test first (test-driven development) +2. Run test, see it fail +3. Implement feature +4. Run test, see it pass +5. Refactor if needed + +### Integration Tests + +Test command with real client connections and system integration: + +```bash +# Prerequisites: Server must be running +npm start # Wait 90+ seconds for deployment + +# Run integration tests +npx tsx commands/Code Shell Watch/test/integration/CodeShellWatchIntegration.test.ts +``` + +**What's tested:** +- Client connection to live system +- Real command execution via WebSocket +- ValidationError handling for missing params +- Optional parameter defaults +- Performance under load +- Various parameter combinations + +**Best Practice:** +Run unit tests frequently during development (fast feedback). Run integration tests before committing (verify system integration). + +## Access Level + +**ai-safe** - Safe for AI personas to call autonomously + +## Implementation Notes + +- **Shared Logic**: Core business logic in `shared/CodeShellWatchTypes.ts` +- **Browser**: Browser-specific implementation in `browser/CodeShellWatchBrowserCommand.ts` +- **Server**: Server-specific implementation in `server/CodeShellWatchServerCommand.ts` +- **Unit Tests**: Isolated testing in `test/unit/CodeShellWatchCommand.test.ts` +- **Integration Tests**: System testing in `test/integration/CodeShellWatchIntegration.test.ts` diff --git a/src/debug/jtag/commands/code/shell/watch/browser/CodeShellWatchBrowserCommand.ts b/src/debug/jtag/commands/code/shell/watch/browser/CodeShellWatchBrowserCommand.ts new file mode 100644 index 000000000..95b35707d --- /dev/null +++ b/src/debug/jtag/commands/code/shell/watch/browser/CodeShellWatchBrowserCommand.ts @@ -0,0 +1,21 @@ +/** + * Code Shell Watch Command - Browser Implementation + * + * Watch a shell execution for new output. Blocks until output is available β€” no timeout, no polling. Returns classified output lines filtered through sentinel rules. Call in a loop until finished is true. + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import type { CodeShellWatchParams, CodeShellWatchResult } from '../shared/CodeShellWatchTypes'; + +export class CodeShellWatchBrowserCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('code/shell/watch', context, subpath, commander); + } + + async execute(params: CodeShellWatchParams): Promise { + console.log('🌐 BROWSER: Delegating Code Shell Watch to server'); + return await this.remoteExecute(params); + } +} diff --git a/src/debug/jtag/commands/challenge/list/package.json b/src/debug/jtag/commands/code/shell/watch/package.json similarity index 53% rename from src/debug/jtag/commands/challenge/list/package.json rename to src/debug/jtag/commands/code/shell/watch/package.json index f3e571ec9..c24cc6f88 100644 --- a/src/debug/jtag/commands/challenge/list/package.json +++ b/src/debug/jtag/commands/code/shell/watch/package.json @@ -1,13 +1,13 @@ { - "name": "@jtag-commands/challenge/list", + "name": "@jtag-commands/code/shell/watch", "version": "1.0.0", - "description": "List available coding challenges with their difficulty, status, and best scores. Shows progressive challenge sequence for AI training.", - "main": "server/ChallengeListServerCommand.ts", - "types": "shared/ChallengeListTypes.ts", + "description": "Watch a shell execution for new output. Blocks until output is available β€” no timeout, no polling. Returns classified output lines filtered through sentinel rules. Call in a loop until finished is true.", + "main": "server/CodeShellWatchServerCommand.ts", + "types": "shared/CodeShellWatchTypes.ts", "scripts": { "test": "npm run test:unit && npm run test:integration", "test:unit": "npx vitest run test/unit/*.test.ts", - "test:integration": "npx tsx test/integration/ChallengeListIntegration.test.ts", + "test:integration": "npx tsx test/integration/CodeShellWatchIntegration.test.ts", "lint": "npx eslint **/*.ts", "typecheck": "npx tsc --noEmit" }, @@ -24,7 +24,7 @@ "keywords": [ "jtag", "command", - "challenge/list" + "code/shell/watch" ], "license": "MIT", "author": "", diff --git a/src/debug/jtag/commands/code/shell/watch/server/CodeShellWatchServerCommand.ts b/src/debug/jtag/commands/code/shell/watch/server/CodeShellWatchServerCommand.ts new file mode 100644 index 000000000..3134b1486 --- /dev/null +++ b/src/debug/jtag/commands/code/shell/watch/server/CodeShellWatchServerCommand.ts @@ -0,0 +1,47 @@ +/** + * Code Shell Watch Command - Server Implementation + * + * Watch a shell execution for new output. Blocks until output is available β€” no timeout, no polling. + * Returns classified output lines filtered through sentinel rules. Call in a loop until finished is true. + */ + +import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; +import type { JTAGContext } from '@system/core/types/JTAGTypes'; +import { ValidationError } from '@system/core/types/ErrorTypes'; +import type { CodeShellWatchParams, CodeShellWatchResult } from '../shared/CodeShellWatchTypes'; +import { createCodeShellWatchResultFromParams } from '../shared/CodeShellWatchTypes'; +import { CodeDaemon } from '@daemons/code-daemon/shared/CodeDaemon'; + +export class CodeShellWatchServerCommand extends CommandBase { + + constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { + super('code/shell/watch', context, subpath, commander); + } + + async execute(params: CodeShellWatchParams): Promise { + if (!params.executionId || params.executionId.trim() === '') { + throw new ValidationError( + 'executionId', + `Missing required parameter 'executionId'. Use the help tool with 'Code Shell Watch' or see the code/shell/watch README for usage.` + ); + } + + if (!params.userId) { + throw new ValidationError( + 'userId', + 'Shell watch operations require a userId (auto-injected for persona tool calls).' + ); + } + const personaId = params.userId; + + const result = await CodeDaemon.shellWatch(personaId, params.executionId); + + return createCodeShellWatchResultFromParams(params, { + success: true, + executionId: result.execution_id, + lines: result.lines, + finished: result.finished, + exitCode: result.exit_code, + }); + } +} diff --git a/src/debug/jtag/commands/code/shell/watch/shared/CodeShellWatchTypes.ts b/src/debug/jtag/commands/code/shell/watch/shared/CodeShellWatchTypes.ts new file mode 100644 index 000000000..168e38b6f --- /dev/null +++ b/src/debug/jtag/commands/code/shell/watch/shared/CodeShellWatchTypes.ts @@ -0,0 +1,96 @@ +/** + * Code Shell Watch Command - Shared Types + * + * Watch a shell execution for new output. Blocks until output is available β€” no timeout, no polling. + * Returns classified output lines filtered through sentinel rules. Call in a loop until finished is true. + */ + +import type { CommandParams, CommandResult, CommandInput, JTAGContext } from '@system/core/types/JTAGTypes'; +import { createPayload, transformPayload } from '@system/core/types/JTAGTypes'; +import { Commands } from '@system/core/shared/Commands'; +import type { JTAGError } from '@system/core/types/ErrorTypes'; +import type { UUID } from '@system/core/types/CrossPlatformUUID'; +import type { ClassifiedLine } from '@shared/generated/code/ClassifiedLine'; + +/** + * Code Shell Watch Command Parameters + */ +export interface CodeShellWatchParams extends CommandParams { + /** Execution handle from shell/exec */ + executionId: string; +} + +/** + * Factory function for creating CodeShellWatchParams + */ +export const createCodeShellWatchParams = ( + context: JTAGContext, + sessionId: UUID, + data: { + executionId: string; + } +): CodeShellWatchParams => createPayload(context, sessionId, { + ...data +}); + +/** + * Code Shell Watch Command Result + */ +export interface CodeShellWatchResult extends CommandResult { + success: boolean; + /** Echo of the execution handle */ + executionId: string; + /** New output lines since last watch call (classified and filtered) */ + lines: ClassifiedLine[]; + /** True when execution is complete */ + finished: boolean; + /** Process exit code (present when finished) */ + exitCode?: number; + error?: JTAGError; +} + +/** + * Factory function for creating CodeShellWatchResult with defaults + */ +export const createCodeShellWatchResult = ( + context: JTAGContext, + sessionId: UUID, + data: { + success: boolean; + executionId?: string; + lines?: ClassifiedLine[]; + finished?: boolean; + exitCode?: number; + error?: JTAGError; + } +): CodeShellWatchResult => createPayload(context, sessionId, { + executionId: data.executionId ?? '', + lines: data.lines ?? [], + finished: data.finished ?? false, + exitCode: data.exitCode, + ...data +}); + +/** + * Smart Code Shell Watch-specific inheritance from params + * Auto-inherits context and sessionId from params + * Must provide all required result fields + */ +export const createCodeShellWatchResultFromParams = ( + params: CodeShellWatchParams, + differences: Omit +): CodeShellWatchResult => transformPayload(params, differences); + +/** + * Code Shell Watch β€” Type-safe command executor + * + * Usage: + * import { CodeShellWatch } from '...shared/CodeShellWatchTypes'; + * const result = await CodeShellWatch.execute({ ... }); + */ +export const CodeShellWatch = { + execute(params: CommandInput): Promise { + return Commands.execute('code/shell/watch', params as Partial); + }, + commandName: 'code/shell/watch' as const, +} as const; diff --git a/src/debug/jtag/commands/challenge/list/test/integration/ChallengeListIntegration.test.ts b/src/debug/jtag/commands/code/shell/watch/test/integration/CodeShellWatchIntegration.test.ts similarity index 80% rename from src/debug/jtag/commands/challenge/list/test/integration/ChallengeListIntegration.test.ts rename to src/debug/jtag/commands/code/shell/watch/test/integration/CodeShellWatchIntegration.test.ts index 4d007ce5d..a7c7e53f5 100644 --- a/src/debug/jtag/commands/challenge/list/test/integration/ChallengeListIntegration.test.ts +++ b/src/debug/jtag/commands/code/shell/watch/test/integration/CodeShellWatchIntegration.test.ts @@ -1,12 +1,12 @@ #!/usr/bin/env tsx /** - * ChallengeList Command Integration Tests + * CodeShellWatch Command Integration Tests * - * Tests Challenge List command against the LIVE RUNNING SYSTEM. + * Tests Code Shell Watch command against the LIVE RUNNING SYSTEM. * This is NOT a mock test - it tests real commands, real events, real widgets. * * Generated by: ./jtag generate - * Run with: npx tsx commands/Challenge List/test/integration/ChallengeListIntegration.test.ts + * Run with: npx tsx commands/Code Shell Watch/test/integration/CodeShellWatchIntegration.test.ts * * PREREQUISITES: * - Server must be running: npm start (wait 90+ seconds) @@ -15,7 +15,7 @@ import { jtag } from '@server/server-index'; -console.log('πŸ§ͺ ChallengeList Command Integration Tests'); +console.log('πŸ§ͺ CodeShellWatch Command Integration Tests'); function assert(condition: boolean, message: string): void { if (!condition) { @@ -39,22 +39,22 @@ async function testSystemConnection(): Promise>): Promise { - console.log('\n⚑ Test 2: Executing Challenge List command'); + console.log('\n⚑ Test 2: Executing Code Shell Watch command'); // TODO: Replace with your actual command parameters - const result = await client.commands['Challenge List']({ + const result = await client.commands['Code Shell Watch']({ // Add your required parameters here // Example: name: 'test-value' }); console.log(' πŸ“Š Result:', JSON.stringify(result, null, 2)); - assert(result !== null, 'Challenge List returned result'); + assert(result !== null, 'Code Shell Watch returned result'); // TODO: Add assertions for your specific result fields - // assert(result.success === true, 'Challenge List succeeded'); + // assert(result.success === true, 'Code Shell Watch succeeded'); // assert(result.yourField !== undefined, 'Result has yourField'); } @@ -66,7 +66,7 @@ async function testRequiredParameters(_client: Awaited> // // for (let i = 0; i < iterations; i++) { // const start = Date.now(); - // await _client.commands['Challenge List']({ /* params */ }); + // await _client.commands['Code Shell Watch']({ /* params */ }); // times.push(Date.now() - start); // } // @@ -137,7 +137,7 @@ async function testWidgetIntegration(_client: Awaited setTimeout(resolve, 1000)); // Wait for event propagation // const after = await client.commands['debug/widget-state']({ widgetSelector: 'your-widget' }); // @@ -149,8 +149,8 @@ async function testWidgetIntegration(_client: Awaited { - console.log('πŸš€ Starting ChallengeList Integration Tests\n'); +async function runAllCodeShellWatchIntegrationTests(): Promise { + console.log('πŸš€ Starting CodeShellWatch Integration Tests\n'); console.log('πŸ“‹ Testing against LIVE system (not mocks)\n'); try { @@ -161,7 +161,7 @@ async function runAllChallengeListIntegrationTests(): Promise { await testPerformance(client); await testWidgetIntegration(client); - console.log('\nπŸŽ‰ ALL ChallengeList INTEGRATION TESTS PASSED!'); + console.log('\nπŸŽ‰ ALL CodeShellWatch INTEGRATION TESTS PASSED!'); console.log('πŸ“‹ Validated:'); console.log(' βœ… Live system connection'); console.log(' βœ… Command execution on real system'); @@ -176,7 +176,7 @@ async function runAllChallengeListIntegrationTests(): Promise { console.log(' - Real cross-daemon communication'); } catch (error) { - console.error('\n❌ ChallengeList integration tests failed:', (error as Error).message); + console.error('\n❌ CodeShellWatch integration tests failed:', (error as Error).message); if ((error as Error).stack) { console.error((error as Error).stack); } @@ -190,7 +190,7 @@ async function runAllChallengeListIntegrationTests(): Promise { // Run if called directly if (require.main === module) { - void runAllChallengeListIntegrationTests(); + void runAllCodeShellWatchIntegrationTests(); } else { - module.exports = { runAllChallengeListIntegrationTests }; + module.exports = { runAllCodeShellWatchIntegrationTests }; } diff --git a/src/debug/jtag/commands/challenge/list/test/unit/ChallengeListCommand.test.ts b/src/debug/jtag/commands/code/shell/watch/test/unit/CodeShellWatchCommand.test.ts similarity index 67% rename from src/debug/jtag/commands/challenge/list/test/unit/ChallengeListCommand.test.ts rename to src/debug/jtag/commands/code/shell/watch/test/unit/CodeShellWatchCommand.test.ts index e5b44f93f..fdff54e58 100644 --- a/src/debug/jtag/commands/challenge/list/test/unit/ChallengeListCommand.test.ts +++ b/src/debug/jtag/commands/code/shell/watch/test/unit/CodeShellWatchCommand.test.ts @@ -1,12 +1,12 @@ #!/usr/bin/env tsx /** - * ChallengeList Command Unit Tests + * CodeShellWatch Command Unit Tests * - * Tests Challenge List command logic in isolation using mock dependencies. + * Tests Code Shell Watch command logic in isolation using mock dependencies. * This is a REFERENCE EXAMPLE showing best practices for command testing. * * Generated by: ./jtag generate - * Run with: npx tsx commands/Challenge List/test/unit/ChallengeListCommand.test.ts + * Run with: npx tsx commands/Code Shell Watch/test/unit/CodeShellWatchCommand.test.ts * * NOTE: This is a self-contained test (no external test utilities needed). * Use this as a template for your own command tests. @@ -14,9 +14,9 @@ // import { ValidationError } from '@system/core/types/ErrorTypes'; // Uncomment when adding validation tests import { generateUUID } from '@system/core/types/CrossPlatformUUID'; -import type { ChallengeListParams, ChallengeListResult } from '../../shared/ChallengeListTypes'; +import type { CodeShellWatchParams, CodeShellWatchResult } from '../../shared/CodeShellWatchTypes'; -console.log('πŸ§ͺ ChallengeList Command Unit Tests'); +console.log('πŸ§ͺ CodeShellWatch Command Unit Tests'); function assert(condition: boolean, message: string): void { if (!condition) { @@ -26,16 +26,16 @@ function assert(condition: boolean, message: string): void { } /** - * Mock command that implements Challenge List logic for testing + * Mock command that implements Code Shell Watch logic for testing */ -async function mockChallengeListCommand(params: ChallengeListParams): Promise { +async function mockCodeShellWatchCommand(params: CodeShellWatchParams): Promise { // TODO: Validate required parameters (BEST PRACTICE) // Example: // if (!params.requiredParam || params.requiredParam.trim() === '') { // throw new ValidationError( // 'requiredParam', // `Missing required parameter 'requiredParam'. ` + - // `Use the help tool with 'Challenge List' or see the Challenge List README for usage information.` + // `Use the help tool with 'Code Shell Watch' or see the Code Shell Watch README for usage information.` // ); // } @@ -48,20 +48,20 @@ async function mockChallengeListCommand(params: ChallengeListParams): Promise { - console.log('\n⚑ Test 2: Mock Challenge List command execution'); +async function testMockCodeShellWatchExecution(): Promise { + console.log('\n⚑ Test 2: Mock Code Shell Watch command execution'); const context = { environment: 'server' as const }; const sessionId = generateUUID(); // Test mock execution - const params: ChallengeListParams = { + const params: CodeShellWatchParams = { // TODO: Add your parameters here context, sessionId }; - const result = await mockChallengeListCommand(params); + const result = await mockCodeShellWatchCommand(params); // Validate result structure assert(result.success === true, 'Mock result shows success'); @@ -104,7 +104,7 @@ async function testMockChallengeListExecution(): Promise { * This test ensures your command throws ValidationError * when required parameters are missing (BEST PRACTICE) */ -async function testChallengeListRequiredParams(): Promise { +async function testCodeShellWatchRequiredParams(): Promise { console.log('\n🚨 Test 3: Required parameter validation'); // TODO: Uncomment when implementing validation @@ -114,13 +114,13 @@ async function testChallengeListRequiredParams(): Promise { // TODO: Test cases that should throw ValidationError // Example: // const testCases = [ - // { params: {} as ChallengeListParams, desc: 'Missing requiredParam' }, - // { params: { requiredParam: '' } as ChallengeListParams, desc: 'Empty requiredParam' }, + // { params: {} as CodeShellWatchParams, desc: 'Missing requiredParam' }, + // { params: { requiredParam: '' } as CodeShellWatchParams, desc: 'Empty requiredParam' }, // ]; // // for (const testCase of testCases) { // try { - // await mockChallengeListCommand({ ...testCase.params, context, sessionId }); + // await mockCodeShellWatchCommand({ ...testCase.params, context, sessionId }); // throw new Error(`Should have thrown ValidationError for: ${testCase.desc}`); // } catch (error) { // if (error instanceof ValidationError) { @@ -139,7 +139,7 @@ async function testChallengeListRequiredParams(): Promise { /** * Test 4: Optional parameter handling */ -async function testChallengeListOptionalParams(): Promise { +async function testCodeShellWatchOptionalParams(): Promise { console.log('\nπŸ”§ Test 4: Optional parameter handling'); // TODO: Uncomment when implementing optional param tests @@ -147,24 +147,24 @@ async function testChallengeListOptionalParams(): Promise { // const sessionId = generateUUID(); // TODO: Test WITHOUT optional param (should use default) - // const paramsWithoutOptional: ChallengeListParams = { + // const paramsWithoutOptional: CodeShellWatchParams = { // requiredParam: 'test', // context, // sessionId // }; // - // const resultWithoutOptional = await mockChallengeListCommand(paramsWithoutOptional); + // const resultWithoutOptional = await mockCodeShellWatchCommand(paramsWithoutOptional); // assert(resultWithoutOptional.success === true, 'Command succeeds without optional params'); // TODO: Test WITH optional param - // const paramsWithOptional: ChallengeListParams = { + // const paramsWithOptional: CodeShellWatchParams = { // requiredParam: 'test', // optionalParam: true, // context, // sessionId // }; // - // const resultWithOptional = await mockChallengeListCommand(paramsWithOptional); + // const resultWithOptional = await mockCodeShellWatchCommand(paramsWithOptional); // assert(resultWithOptional.success === true, 'Command succeeds with optional params'); console.log('βœ… Optional parameter handling validated'); @@ -173,40 +173,40 @@ async function testChallengeListOptionalParams(): Promise { /** * Test 5: Performance validation */ -async function testChallengeListPerformance(): Promise { - console.log('\n⚑ Test 5: ChallengeList performance validation'); +async function testCodeShellWatchPerformance(): Promise { + console.log('\n⚑ Test 5: CodeShellWatch performance validation'); const context = { environment: 'server' as const }; const sessionId = generateUUID(); const startTime = Date.now(); - await mockChallengeListCommand({ + await mockCodeShellWatchCommand({ // TODO: Add your parameters context, sessionId - } as ChallengeListParams); + } as CodeShellWatchParams); const executionTime = Date.now() - startTime; - assert(executionTime < 100, `ChallengeList completed in ${executionTime}ms (under 100ms limit)`); + assert(executionTime < 100, `CodeShellWatch completed in ${executionTime}ms (under 100ms limit)`); } /** * Test 6: Result structure validation */ -async function testChallengeListResultStructure(): Promise { - console.log('\nπŸ” Test 6: ChallengeList result structure validation'); +async function testCodeShellWatchResultStructure(): Promise { + console.log('\nπŸ” Test 6: CodeShellWatch result structure validation'); const context = { environment: 'server' as const }; const sessionId = generateUUID(); // Test various scenarios - const basicResult = await mockChallengeListCommand({ + const basicResult = await mockCodeShellWatchCommand({ // TODO: Add your parameters context, sessionId - } as ChallengeListParams); + } as CodeShellWatchParams); assert(basicResult.success === true, 'Result has success field'); // TODO: Add assertions for your result fields @@ -220,18 +220,18 @@ async function testChallengeListResultStructure(): Promise { /** * Run all unit tests */ -async function runAllChallengeListUnitTests(): Promise { - console.log('πŸš€ Starting ChallengeList Command Unit Tests\n'); +async function runAllCodeShellWatchUnitTests(): Promise { + console.log('πŸš€ Starting CodeShellWatch Command Unit Tests\n'); try { - testChallengeListCommandStructure(); - await testMockChallengeListExecution(); - await testChallengeListRequiredParams(); - await testChallengeListOptionalParams(); - await testChallengeListPerformance(); - await testChallengeListResultStructure(); - - console.log('\nπŸŽ‰ ALL ChallengeList UNIT TESTS PASSED!'); + testCodeShellWatchCommandStructure(); + await testMockCodeShellWatchExecution(); + await testCodeShellWatchRequiredParams(); + await testCodeShellWatchOptionalParams(); + await testCodeShellWatchPerformance(); + await testCodeShellWatchResultStructure(); + + console.log('\nπŸŽ‰ ALL CodeShellWatch UNIT TESTS PASSED!'); console.log('πŸ“‹ Validated:'); console.log(' βœ… Command structure and parameter validation'); console.log(' βœ… Mock command execution patterns'); @@ -243,7 +243,7 @@ async function runAllChallengeListUnitTests(): Promise { console.log('πŸ’‘ TIP: Copy this test structure and modify for your command logic'); } catch (error) { - console.error('\n❌ ChallengeList unit tests failed:', (error as Error).message); + console.error('\n❌ CodeShellWatch unit tests failed:', (error as Error).message); if ((error as Error).stack) { console.error((error as Error).stack); } @@ -253,7 +253,7 @@ async function runAllChallengeListUnitTests(): Promise { // Run if called directly if (require.main === module) { - void runAllChallengeListUnitTests(); + void runAllCodeShellWatchUnitTests(); } else { - module.exports = { runAllChallengeListUnitTests }; + module.exports = { runAllCodeShellWatchUnitTests }; } diff --git a/src/debug/jtag/commands/code/task/.npmignore b/src/debug/jtag/commands/code/task/.npmignore deleted file mode 100644 index f74ad6b8a..000000000 --- a/src/debug/jtag/commands/code/task/.npmignore +++ /dev/null @@ -1,20 +0,0 @@ -# Development files -.eslintrc* -tsconfig*.json -vitest.config.ts - -# Build artifacts -*.js.map -*.d.ts.map - -# IDE -.vscode/ -.idea/ - -# Logs -*.log -npm-debug.log* - -# OS files -.DS_Store -Thumbs.db diff --git a/src/debug/jtag/commands/code/task/README.md b/src/debug/jtag/commands/code/task/README.md deleted file mode 100644 index 1c5d2228b..000000000 --- a/src/debug/jtag/commands/code/task/README.md +++ /dev/null @@ -1,200 +0,0 @@ -# Code Task Command - -Execute a coding task end-to-end via the coding agent pipeline. Formulates a plan using LLM reasoning, enforces security tiers, and executes steps via code/* commands. Supports dry-run mode, governance approval for high-risk plans, and multi-agent delegation. - -## Table of Contents - -- [Usage](#usage) - - [CLI Usage](#cli-usage) - - [Tool Usage](#tool-usage) -- [Parameters](#parameters) -- [Result](#result) -- [Examples](#examples) -- [Testing](#testing) - - [Unit Tests](#unit-tests) - - [Integration Tests](#integration-tests) -- [Getting Help](#getting-help) -- [Access Level](#access-level) -- [Implementation Notes](#implementation-notes) - -## Usage - -### CLI Usage - -From the command line using the jtag CLI: - -```bash -./jtag code/task --description= -``` - -### Tool Usage - -From Persona tools or programmatic access using `Commands.execute()`: - -```typescript -import { Commands } from '@system/core/shared/Commands'; - -const result = await Commands.execute('code/task', { - // your parameters here -}); -``` - -## Parameters - -- **description** (required): `string` - What the coding task should accomplish (natural language) -- **taskType** (optional): `string` - Task type for model selection: 'planning' | 'generation' | 'editing' | 'review' | 'quick-fix' | 'discovery'. Defaults to 'generation' -- **relevantFiles** (optional): `string[]` - File paths already known to be relevant (hints for discovery phase) -- **dryRun** (optional): `boolean` - Execute read-only commands normally but mock writes. Returns predicted changes without modifying files -- **securityTier** (optional): `string` - Override security tier: 'discovery' | 'read' | 'write' | 'system'. Defaults to plan's assessed risk level -- **delegationEnabled** (optional): `boolean` - Enable multi-agent delegation for parallel execution across file clusters -- **maxDurationMs** (optional): `number` - Maximum execution time in milliseconds (default: 120000) -- **maxToolCalls** (optional): `number` - Maximum number of tool calls allowed (default: 15) - -## Result - -Returns `CodeTaskResult` with: - -Returns CommandResult with: -- **status**: `string` - Overall status: 'completed' | 'partial' | 'failed' | 'budget_exceeded' | 'pending_approval' -- **summary**: `string` - Human-readable summary of what was accomplished -- **planSummary**: `string` - The LLM-generated plan summary -- **riskLevel**: `string` - Assessed risk level: 'low' | 'medium' | 'high' | 'critical' -- **securityTier**: `string` - Security tier used for execution -- **stepsTotal**: `number` - Total number of steps in the plan -- **stepsCompleted**: `number` - Number of steps that completed successfully -- **filesModified**: `string[]` - Files that were modified during execution -- **filesCreated**: `string[]` - Files that were created during execution -- **totalToolCalls**: `number` - Total tool calls used -- **totalDurationMs**: `number` - Total execution time in milliseconds -- **changeIds**: `string[]` - Change IDs from file operations (for potential undo) -- **errors**: `string[]` - Errors encountered during execution -- **proposalId**: `string` - Governance proposal ID if plan requires approval (status='pending_approval') - -## Examples - -### Simple code edit task - -```bash -./jtag code/task --description="Add input validation to the login function in auth.ts" -``` - -**Expected result:** -{ status: "completed", stepsCompleted: 3, filesModified: ["auth.ts"] } - -### Dry run to preview changes - -```bash -./jtag code/task --description="Refactor UserService to use dependency injection" --dryRun=true -``` - -**Expected result:** -{ status: "completed", filesModified: [], summary: "Dry run: would modify 3 files" } - -### Discovery-only task - -```bash -./jtag code/task --description="Find all files using deprecated API" --taskType="discovery" --securityTier="discovery" -``` - -**Expected result:** -{ status: "completed", stepsCompleted: 2, filesModified: [] } - -### With relevant file hints - -```bash -./jtag code/task --description="Fix the off-by-one error" --relevantFiles='["src/utils/pagination.ts"]' -``` - -**Expected result:** -{ status: "completed", filesModified: ["src/utils/pagination.ts"] } - -## Getting Help - -### Using the Help Tool - -Get detailed usage information for this command: - -**CLI:** -```bash -./jtag help code/task -``` - -**Tool:** -```typescript -// Use your help tool with command name 'code/task' -``` - -### Using the README Tool - -Access this README programmatically: - -**CLI:** -```bash -./jtag readme code/task -``` - -**Tool:** -```typescript -// Use your readme tool with command name 'code/task' -``` - -## Testing - -### Unit Tests - -Test command logic in isolation using mock dependencies: - -```bash -# Run unit tests (no server required) -npx tsx commands/Code Task/test/unit/CodeTaskCommand.test.ts -``` - -**What's tested:** -- Command structure and parameter validation -- Mock command execution patterns -- Required parameter validation (throws ValidationError) -- Optional parameter handling (sensible defaults) -- Performance requirements -- Assertion utility helpers - -**TDD Workflow:** -1. Write/modify unit test first (test-driven development) -2. Run test, see it fail -3. Implement feature -4. Run test, see it pass -5. Refactor if needed - -### Integration Tests - -Test command with real client connections and system integration: - -```bash -# Prerequisites: Server must be running -npm start # Wait 90+ seconds for deployment - -# Run integration tests -npx tsx commands/Code Task/test/integration/CodeTaskIntegration.test.ts -``` - -**What's tested:** -- Client connection to live system -- Real command execution via WebSocket -- ValidationError handling for missing params -- Optional parameter defaults -- Performance under load -- Various parameter combinations - -**Best Practice:** -Run unit tests frequently during development (fast feedback). Run integration tests before committing (verify system integration). - -## Access Level - -**ai-safe** - Safe for AI personas to call autonomously - -## Implementation Notes - -- **Shared Logic**: Core business logic in `shared/CodeTaskTypes.ts` -- **Browser**: Browser-specific implementation in `browser/CodeTaskBrowserCommand.ts` -- **Server**: Server-specific implementation in `server/CodeTaskServerCommand.ts` -- **Unit Tests**: Isolated testing in `test/unit/CodeTaskCommand.test.ts` -- **Integration Tests**: System testing in `test/integration/CodeTaskIntegration.test.ts` diff --git a/src/debug/jtag/commands/code/task/browser/CodeTaskBrowserCommand.ts b/src/debug/jtag/commands/code/task/browser/CodeTaskBrowserCommand.ts deleted file mode 100644 index 839c5eb8f..000000000 --- a/src/debug/jtag/commands/code/task/browser/CodeTaskBrowserCommand.ts +++ /dev/null @@ -1,21 +0,0 @@ -/** - * Code Task Command - Browser Implementation - * - * Execute a coding task end-to-end via the coding agent pipeline. Formulates a plan using LLM reasoning, enforces security tiers, and executes steps via code/* commands. Supports dry-run mode, governance approval for high-risk plans, and multi-agent delegation. - */ - -import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; -import type { JTAGContext } from '@system/core/types/JTAGTypes'; -import type { CodeTaskParams, CodeTaskResult } from '../shared/CodeTaskTypes'; - -export class CodeTaskBrowserCommand extends CommandBase { - - constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { - super('code/task', context, subpath, commander); - } - - async execute(params: CodeTaskParams): Promise { - console.log('🌐 BROWSER: Delegating Code Task to server'); - return await this.remoteExecute(params); - } -} diff --git a/src/debug/jtag/commands/code/task/package.json b/src/debug/jtag/commands/code/task/package.json deleted file mode 100644 index 4a3f54659..000000000 --- a/src/debug/jtag/commands/code/task/package.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "name": "@jtag-commands/code/task", - "version": "1.0.0", - "description": "Execute a coding task end-to-end via the coding agent pipeline. Formulates a plan using LLM reasoning, enforces security tiers, and executes steps via code/* commands. Supports dry-run mode, governance approval for high-risk plans, and multi-agent delegation.", - "main": "server/CodeTaskServerCommand.ts", - "types": "shared/CodeTaskTypes.ts", - "scripts": { - "test": "npm run test:unit && npm run test:integration", - "test:unit": "npx vitest run test/unit/*.test.ts", - "test:integration": "npx tsx test/integration/CodeTaskIntegration.test.ts", - "lint": "npx eslint **/*.ts", - "typecheck": "npx tsc --noEmit" - }, - "peerDependencies": { - "@jtag/core": "*" - }, - "files": [ - "shared/**/*.ts", - "browser/**/*.ts", - "server/**/*.ts", - "test/**/*.ts", - "README.md" - ], - "keywords": [ - "jtag", - "command", - "code/task" - ], - "license": "MIT", - "author": "", - "repository": { - "type": "git", - "url": "" - } -} diff --git a/src/debug/jtag/commands/code/task/server/CodeTaskServerCommand.ts b/src/debug/jtag/commands/code/task/server/CodeTaskServerCommand.ts deleted file mode 100644 index 241397011..000000000 --- a/src/debug/jtag/commands/code/task/server/CodeTaskServerCommand.ts +++ /dev/null @@ -1,130 +0,0 @@ -/** - * Code Task Command - Server Implementation - * - * Entry point for the full coding agent pipeline: - * 1. Validates parameters - * 2. Builds a CodingTask - * 3. Invokes CodeAgentOrchestrator.execute() - * 4. Maps CodingResult β†’ CodeTaskResult - */ - -import { CommandBase, type ICommandDaemon } from '@daemons/command-daemon/shared/CommandBase'; -import type { JTAGContext } from '@system/core/types/JTAGTypes'; -import { ValidationError } from '@system/core/types/ErrorTypes'; -import type { CodeTaskParams, CodeTaskResult } from '../shared/CodeTaskTypes'; -import { createCodeTaskResultFromParams } from '../shared/CodeTaskTypes'; -import { CodeAgentOrchestrator } from '@system/code/server/CodeAgentOrchestrator'; -import type { CodingTask, CodingTaskType, SecurityTierLevel, ExecutionOptions } from '@system/code/shared/CodingTypes'; -import { v4 as uuidv4 } from 'uuid'; -import type { UUID } from '@system/core/types/CrossPlatformUUID'; - -const VALID_TASK_TYPES = new Set(['planning', 'generation', 'editing', 'review', 'quick-fix', 'discovery']); -const VALID_TIERS = new Set(['discovery', 'read', 'write', 'system']); - -export class CodeTaskServerCommand extends CommandBase { - - constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { - super('code/task', context, subpath, commander); - } - - async execute(params: CodeTaskParams): Promise { - // Validate required parameters - if (!params.description || params.description.trim() === '') { - throw new ValidationError( - 'description', - `Missing required parameter 'description'. Provide a natural language description of the coding task. See the code/task README for usage.` - ); - } - - if (!params.userId) { - throw new ValidationError( - 'userId', - 'Workspace operations require a userId (auto-injected for persona tool calls).' - ); - } - - // Validate optional enum parameters - const taskType: CodingTaskType = this.resolveTaskType(params.taskType); - const securityTierOverride = this.resolveSecurityTier(params.securityTier); - - // Validate workspace mode - const validModes = new Set(['sandbox', 'worktree']); - const workspaceMode = params.workspaceMode && validModes.has(params.workspaceMode) - ? params.workspaceMode as 'sandbox' | 'worktree' - : undefined; - - if (workspaceMode === 'worktree' && (!params.sparsePaths || params.sparsePaths.length === 0)) { - throw new ValidationError( - 'sparsePaths', - `Worktree mode requires sparsePaths β€” specify which directories to checkout (e.g., ["src/system/code/", "docs/"])` - ); - } - - // Build CodingTask - const task: CodingTask = { - id: uuidv4() as UUID, - personaId: params.userId as UUID, - description: params.description.trim(), - taskType, - contextId: params.sessionId as UUID | undefined, - relevantFiles: params.relevantFiles, - maxDurationMs: params.maxDurationMs || undefined, - maxToolCalls: params.maxToolCalls || undefined, - workspaceMode, - sparsePaths: params.sparsePaths, - createdAt: Date.now(), - }; - - // Build execution options - const options: ExecutionOptions = { - dryRun: params.dryRun ?? false, - securityTier: securityTierOverride, - delegationEnabled: params.delegationEnabled ?? false, - }; - - // Execute via orchestrator - const orchestrator = new CodeAgentOrchestrator(); - const result = await orchestrator.execute(task, options); - - // Map CodingResult β†’ CodeTaskResult - return createCodeTaskResultFromParams(params, { - success: result.status === 'completed', - status: result.status, - summary: result.summary, - planSummary: result.planMetadata?.planSummary ?? result.summary, - riskLevel: result.planMetadata?.riskLevel ?? '', - securityTier: result.planMetadata?.requiredTier ?? securityTierOverride ?? '', - stepsTotal: result.stepResults.length, - stepsCompleted: result.stepResults.filter(s => s.status === 'completed').length, - filesModified: result.filesModified, - filesCreated: result.filesCreated, - totalToolCalls: result.totalToolCalls, - totalDurationMs: result.totalDurationMs, - changeIds: result.changeIds, - errors: result.errors, - proposalId: result.proposalId ?? '', - }); - } - - private resolveTaskType(raw?: string): CodingTaskType { - if (!raw || raw.trim() === '') return 'generation'; - if (!VALID_TASK_TYPES.has(raw)) { - throw new ValidationError( - 'taskType', - `Invalid taskType '${raw}'. Must be one of: ${Array.from(VALID_TASK_TYPES).join(', ')}` - ); - } - return raw as CodingTaskType; - } - - private resolveSecurityTier(raw?: string): SecurityTierLevel | undefined { - if (!raw || raw.trim() === '') return undefined; - if (!VALID_TIERS.has(raw)) { - throw new ValidationError( - 'securityTier', - `Invalid securityTier '${raw}'. Must be one of: ${Array.from(VALID_TIERS).join(', ')}` - ); - } - return raw as SecurityTierLevel; - } -} diff --git a/src/debug/jtag/commands/code/task/shared/CodeTaskTypes.ts b/src/debug/jtag/commands/code/task/shared/CodeTaskTypes.ts deleted file mode 100644 index 35531775d..000000000 --- a/src/debug/jtag/commands/code/task/shared/CodeTaskTypes.ts +++ /dev/null @@ -1,194 +0,0 @@ -/** - * Code Task Command - Shared Types - * - * Execute a coding task end-to-end via the coding agent pipeline. Formulates a plan using LLM reasoning, enforces security tiers, and executes steps via code/* commands. Supports dry-run mode, governance approval for high-risk plans, and multi-agent delegation. - */ - -import type { CommandParams, CommandResult, CommandInput, JTAGContext } from '@system/core/types/JTAGTypes'; -import { createPayload, transformPayload } from '@system/core/types/JTAGTypes'; -import { Commands } from '@system/core/shared/Commands'; -import type { JTAGError } from '@system/core/types/ErrorTypes'; -import type { UUID } from '@system/core/types/CrossPlatformUUID'; - -/** - * Code Task Command Parameters - */ -export interface CodeTaskParams extends CommandParams { - // What the coding task should accomplish (natural language) - description: string; - // Task type for model selection: 'planning' | 'generation' | 'editing' | 'review' | 'quick-fix' | 'discovery'. Defaults to 'generation' - taskType?: string; - // File paths already known to be relevant (hints for discovery phase) - relevantFiles?: string[]; - // Execute read-only commands normally but mock writes. Returns predicted changes without modifying files - dryRun?: boolean; - // Override security tier: 'discovery' | 'read' | 'write' | 'system'. Defaults to plan's assessed risk level - securityTier?: string; - // Enable multi-agent delegation for parallel execution across file clusters - delegationEnabled?: boolean; - // Maximum execution time in milliseconds (default: 120000) - maxDurationMs?: number; - // Maximum number of tool calls allowed (default: 15) - maxToolCalls?: number; - // Workspace mode: 'sandbox' (isolated directory, default) or 'worktree' (git worktree on real repo) - workspaceMode?: string; - // Paths to sparse-checkout when using worktree mode (e.g., ["src/system/code/", "docs/"]) - sparsePaths?: string[]; -} - -/** - * Factory function for creating CodeTaskParams - */ -export const createCodeTaskParams = ( - context: JTAGContext, - sessionId: UUID, - data: { - // What the coding task should accomplish (natural language) - description: string; - // Task type for model selection: 'planning' | 'generation' | 'editing' | 'review' | 'quick-fix' | 'discovery'. Defaults to 'generation' - taskType?: string; - // File paths already known to be relevant (hints for discovery phase) - relevantFiles?: string[]; - // Execute read-only commands normally but mock writes. Returns predicted changes without modifying files - dryRun?: boolean; - // Override security tier: 'discovery' | 'read' | 'write' | 'system'. Defaults to plan's assessed risk level - securityTier?: string; - // Enable multi-agent delegation for parallel execution across file clusters - delegationEnabled?: boolean; - // Maximum execution time in milliseconds (default: 120000) - maxDurationMs?: number; - // Maximum number of tool calls allowed (default: 15) - maxToolCalls?: number; - // Workspace mode: 'sandbox' (isolated directory, default) or 'worktree' (git worktree on real repo) - workspaceMode?: string; - // Paths to sparse-checkout when using worktree mode - sparsePaths?: string[]; - } -): CodeTaskParams => createPayload(context, sessionId, { - taskType: data.taskType ?? '', - relevantFiles: data.relevantFiles ?? undefined, - dryRun: data.dryRun ?? false, - securityTier: data.securityTier ?? '', - delegationEnabled: data.delegationEnabled ?? false, - maxDurationMs: data.maxDurationMs ?? 0, - maxToolCalls: data.maxToolCalls ?? 0, - workspaceMode: data.workspaceMode ?? '', - sparsePaths: data.sparsePaths ?? [], - ...data -}); - -/** - * Code Task Command Result - */ -export interface CodeTaskResult extends CommandResult { - success: boolean; - // Overall status: 'completed' | 'partial' | 'failed' | 'budget_exceeded' | 'pending_approval' - status: string; - // Human-readable summary of what was accomplished - summary: string; - // The LLM-generated plan summary - planSummary: string; - // Assessed risk level: 'low' | 'medium' | 'high' | 'critical' - riskLevel: string; - // Security tier used for execution - securityTier: string; - // Total number of steps in the plan - stepsTotal: number; - // Number of steps that completed successfully - stepsCompleted: number; - // Files that were modified during execution - filesModified: string[]; - // Files that were created during execution - filesCreated: string[]; - // Total tool calls used - totalToolCalls: number; - // Total execution time in milliseconds - totalDurationMs: number; - // Change IDs from file operations (for potential undo) - changeIds: string[]; - // Errors encountered during execution - errors: string[]; - // Governance proposal ID if plan requires approval (status='pending_approval') - proposalId: string; - error?: JTAGError; -} - -/** - * Factory function for creating CodeTaskResult with defaults - */ -export const createCodeTaskResult = ( - context: JTAGContext, - sessionId: UUID, - data: { - success: boolean; - // Overall status: 'completed' | 'partial' | 'failed' | 'budget_exceeded' | 'pending_approval' - status?: string; - // Human-readable summary of what was accomplished - summary?: string; - // The LLM-generated plan summary - planSummary?: string; - // Assessed risk level: 'low' | 'medium' | 'high' | 'critical' - riskLevel?: string; - // Security tier used for execution - securityTier?: string; - // Total number of steps in the plan - stepsTotal?: number; - // Number of steps that completed successfully - stepsCompleted?: number; - // Files that were modified during execution - filesModified?: string[]; - // Files that were created during execution - filesCreated?: string[]; - // Total tool calls used - totalToolCalls?: number; - // Total execution time in milliseconds - totalDurationMs?: number; - // Change IDs from file operations (for potential undo) - changeIds?: string[]; - // Errors encountered during execution - errors?: string[]; - // Governance proposal ID if plan requires approval (status='pending_approval') - proposalId?: string; - error?: JTAGError; - } -): CodeTaskResult => createPayload(context, sessionId, { - status: data.status ?? '', - summary: data.summary ?? '', - planSummary: data.planSummary ?? '', - riskLevel: data.riskLevel ?? '', - securityTier: data.securityTier ?? '', - stepsTotal: data.stepsTotal ?? 0, - stepsCompleted: data.stepsCompleted ?? 0, - filesModified: data.filesModified ?? [], - filesCreated: data.filesCreated ?? [], - totalToolCalls: data.totalToolCalls ?? 0, - totalDurationMs: data.totalDurationMs ?? 0, - changeIds: data.changeIds ?? [], - errors: data.errors ?? [], - proposalId: data.proposalId ?? '', - ...data -}); - -/** - * Smart Code Task-specific inheritance from params - * Auto-inherits context and sessionId from params - * Must provide all required result fields - */ -export const createCodeTaskResultFromParams = ( - params: CodeTaskParams, - differences: Omit -): CodeTaskResult => transformPayload(params, differences); - -/** - * Code Task β€” Type-safe command executor - * - * Usage: - * import { CodeTask } from '...shared/CodeTaskTypes'; - * const result = await CodeTask.execute({ ... }); - */ -export const CodeTask = { - execute(params: CommandInput): Promise { - return Commands.execute('code/task', params as Partial); - }, - commandName: 'code/task' as const, -} as const; diff --git a/src/debug/jtag/commands/code/task/test/unit/CodeTaskCommand.test.ts b/src/debug/jtag/commands/code/task/test/unit/CodeTaskCommand.test.ts deleted file mode 100644 index 0011dabe6..000000000 --- a/src/debug/jtag/commands/code/task/test/unit/CodeTaskCommand.test.ts +++ /dev/null @@ -1,259 +0,0 @@ -#!/usr/bin/env tsx -/** - * CodeTask Command Unit Tests - * - * Tests Code Task command logic in isolation using mock dependencies. - * This is a REFERENCE EXAMPLE showing best practices for command testing. - * - * Generated by: ./jtag generate - * Run with: npx tsx commands/Code Task/test/unit/CodeTaskCommand.test.ts - * - * NOTE: This is a self-contained test (no external test utilities needed). - * Use this as a template for your own command tests. - */ - -// import { ValidationError } from '@system/core/types/ErrorTypes'; // Uncomment when adding validation tests -import { generateUUID } from '@system/core/types/CrossPlatformUUID'; -import type { CodeTaskParams, CodeTaskResult } from '../../shared/CodeTaskTypes'; - -console.log('πŸ§ͺ CodeTask Command Unit Tests'); - -function assert(condition: boolean, message: string): void { - if (!condition) { - throw new Error(`❌ Assertion failed: ${message}`); - } - console.log(`βœ… ${message}`); -} - -/** - * Mock command that implements Code Task logic for testing - */ -async function mockCodeTaskCommand(params: CodeTaskParams): Promise { - // TODO: Validate required parameters (BEST PRACTICE) - // Example: - // if (!params.requiredParam || params.requiredParam.trim() === '') { - // throw new ValidationError( - // 'requiredParam', - // `Missing required parameter 'requiredParam'. ` + - // `Use the help tool with 'Code Task' or see the Code Task README for usage information.` - // ); - // } - - // TODO: Handle optional parameters with sensible defaults - // const optionalParam = params.optionalParam ?? defaultValue; - - // TODO: Implement your command logic here - return { - success: true, - // TODO: Add your result fields with actual computed values - context: params.context, - sessionId: params.sessionId - } as CodeTaskResult; -} - -/** - * Test 1: Command structure validation - */ -function testCodeTaskCommandStructure(): void { - console.log('\nπŸ“‹ Test 1: CodeTask command structure validation'); - - const context = { environment: 'server' as const }; - const sessionId = generateUUID(); - - // Create valid params for Code Task command - const validParams: CodeTaskParams = { - // TODO: Add your required parameters here - context, - sessionId - }; - - // Validate param structure - assert(validParams.context !== undefined, 'Params have context'); - assert(validParams.sessionId !== undefined, 'Params have sessionId'); - // TODO: Add assertions for your specific parameters - // assert(typeof validParams.requiredParam === 'string', 'requiredParam is string'); -} - -/** - * Test 2: Mock command execution - */ -async function testMockCodeTaskExecution(): Promise { - console.log('\n⚑ Test 2: Mock Code Task command execution'); - - const context = { environment: 'server' as const }; - const sessionId = generateUUID(); - - // Test mock execution - const params: CodeTaskParams = { - // TODO: Add your parameters here - context, - sessionId - }; - - const result = await mockCodeTaskCommand(params); - - // Validate result structure - assert(result.success === true, 'Mock result shows success'); - // TODO: Add assertions for your result fields - // assert(typeof result.yourField === 'string', 'yourField is string'); -} - -/** - * Test 3: Required parameter validation (CRITICAL) - * - * This test ensures your command throws ValidationError - * when required parameters are missing (BEST PRACTICE) - */ -async function testCodeTaskRequiredParams(): Promise { - console.log('\n🚨 Test 3: Required parameter validation'); - - // TODO: Uncomment when implementing validation - // const context = { environment: 'server' as const }; - // const sessionId = generateUUID(); - - // TODO: Test cases that should throw ValidationError - // Example: - // const testCases = [ - // { params: {} as CodeTaskParams, desc: 'Missing requiredParam' }, - // { params: { requiredParam: '' } as CodeTaskParams, desc: 'Empty requiredParam' }, - // ]; - // - // for (const testCase of testCases) { - // try { - // await mockCodeTaskCommand({ ...testCase.params, context, sessionId }); - // throw new Error(`Should have thrown ValidationError for: ${testCase.desc}`); - // } catch (error) { - // if (error instanceof ValidationError) { - // assert(error.field === 'requiredParam', `ValidationError field is 'requiredParam' for: ${testCase.desc}`); - // assert(error.message.includes('required parameter'), `Error message mentions 'required parameter' for: ${testCase.desc}`); - // assert(error.message.includes('help tool'), `Error message is tool-agnostic for: ${testCase.desc}`); - // } else { - // throw error; // Re-throw if not ValidationError - // } - // } - // } - - console.log('βœ… All required parameter validations work correctly'); -} - -/** - * Test 4: Optional parameter handling - */ -async function testCodeTaskOptionalParams(): Promise { - console.log('\nπŸ”§ Test 4: Optional parameter handling'); - - // TODO: Uncomment when implementing optional param tests - // const context = { environment: 'server' as const }; - // const sessionId = generateUUID(); - - // TODO: Test WITHOUT optional param (should use default) - // const paramsWithoutOptional: CodeTaskParams = { - // requiredParam: 'test', - // context, - // sessionId - // }; - // - // const resultWithoutOptional = await mockCodeTaskCommand(paramsWithoutOptional); - // assert(resultWithoutOptional.success === true, 'Command succeeds without optional params'); - - // TODO: Test WITH optional param - // const paramsWithOptional: CodeTaskParams = { - // requiredParam: 'test', - // optionalParam: true, - // context, - // sessionId - // }; - // - // const resultWithOptional = await mockCodeTaskCommand(paramsWithOptional); - // assert(resultWithOptional.success === true, 'Command succeeds with optional params'); - - console.log('βœ… Optional parameter handling validated'); -} - -/** - * Test 5: Performance validation - */ -async function testCodeTaskPerformance(): Promise { - console.log('\n⚑ Test 5: CodeTask performance validation'); - - const context = { environment: 'server' as const }; - const sessionId = generateUUID(); - - const startTime = Date.now(); - - await mockCodeTaskCommand({ - // TODO: Add your parameters - context, - sessionId - } as CodeTaskParams); - - const executionTime = Date.now() - startTime; - - assert(executionTime < 100, `CodeTask completed in ${executionTime}ms (under 100ms limit)`); -} - -/** - * Test 6: Result structure validation - */ -async function testCodeTaskResultStructure(): Promise { - console.log('\nπŸ” Test 6: CodeTask result structure validation'); - - const context = { environment: 'server' as const }; - const sessionId = generateUUID(); - - // Test various scenarios - const basicResult = await mockCodeTaskCommand({ - // TODO: Add your parameters - context, - sessionId - } as CodeTaskParams); - - assert(basicResult.success === true, 'Result has success field'); - // TODO: Add assertions for your result fields - // assert(typeof basicResult.yourField === 'string', 'Result has yourField (string)'); - assert(basicResult.context === context, 'Result includes context'); - assert(basicResult.sessionId === sessionId, 'Result includes sessionId'); - - console.log('βœ… All result structure validations pass'); -} - -/** - * Run all unit tests - */ -async function runAllCodeTaskUnitTests(): Promise { - console.log('πŸš€ Starting CodeTask Command Unit Tests\n'); - - try { - testCodeTaskCommandStructure(); - await testMockCodeTaskExecution(); - await testCodeTaskRequiredParams(); - await testCodeTaskOptionalParams(); - await testCodeTaskPerformance(); - await testCodeTaskResultStructure(); - - console.log('\nπŸŽ‰ ALL CodeTask UNIT TESTS PASSED!'); - console.log('πŸ“‹ Validated:'); - console.log(' βœ… Command structure and parameter validation'); - console.log(' βœ… Mock command execution patterns'); - console.log(' βœ… Required parameter validation (throws ValidationError)'); - console.log(' βœ… Optional parameter handling (sensible defaults)'); - console.log(' βœ… Performance requirements (< 100ms)'); - console.log(' βœ… Result structure validation'); - console.log('\nπŸ“ This is a REFERENCE EXAMPLE - use as a template for your commands!'); - console.log('πŸ’‘ TIP: Copy this test structure and modify for your command logic'); - - } catch (error) { - console.error('\n❌ CodeTask unit tests failed:', (error as Error).message); - if ((error as Error).stack) { - console.error((error as Error).stack); - } - process.exit(1); - } -} - -// Run if called directly -if (require.main === module) { - void runAllCodeTaskUnitTests(); -} else { - module.exports = { runAllCodeTaskUnitTests }; -} diff --git a/src/debug/jtag/daemons/code-daemon/server/CodeDaemonServer.ts b/src/debug/jtag/daemons/code-daemon/server/CodeDaemonServer.ts index b42078ad5..e9b7300f0 100644 --- a/src/debug/jtag/daemons/code-daemon/server/CodeDaemonServer.ts +++ b/src/debug/jtag/daemons/code-daemon/server/CodeDaemonServer.ts @@ -97,5 +97,49 @@ export async function initializeCodeDaemon(jtagContext: JTAGContext): Promise { + return await rustClient.shellCreate(personaId, workspaceRoot); + }; + + CodeDaemon.shellExecute = async (personaId: string, cmd: string, options?: { timeoutMs?: number; wait?: boolean }) => { + return await rustClient.shellExecute(personaId, cmd, options); + }; + + CodeDaemon.shellPoll = async (personaId: string, executionId: string) => { + return await rustClient.shellPoll(personaId, executionId); + }; + + CodeDaemon.shellKill = async (personaId: string, executionId: string) => { + await rustClient.shellKill(personaId, executionId); + }; + + CodeDaemon.shellCd = async (personaId: string, path: string) => { + return await rustClient.shellCd(personaId, path); + }; + + CodeDaemon.shellStatus = async (personaId: string) => { + return await rustClient.shellStatus(personaId); + }; + + CodeDaemon.shellDestroy = async (personaId: string) => { + await rustClient.shellDestroy(personaId); + }; + + // ======================================================================== + // Shell Watch + Sentinel (Event-driven output streaming) + // ======================================================================== + + CodeDaemon.shellWatch = async (personaId: string, executionId: string) => { + return await rustClient.shellWatch(personaId, executionId); + }; + + CodeDaemon.shellSentinel = async (personaId: string, executionId: string, rules) => { + return await rustClient.shellSentinel(personaId, executionId, rules); + }; + + log.info('Initialized successfully (workspace + shell + watch/sentinel operations via Rust IPC)'); } diff --git a/src/debug/jtag/daemons/code-daemon/shared/CodeDaemon.ts b/src/debug/jtag/daemons/code-daemon/shared/CodeDaemon.ts index 1258c5cc9..77947c554 100644 --- a/src/debug/jtag/daemons/code-daemon/shared/CodeDaemon.ts +++ b/src/debug/jtag/daemons/code-daemon/shared/CodeDaemon.ts @@ -14,6 +14,11 @@ import type { WorkspaceUndoResult, WorkspaceHistoryResult, WorkspaceGitStatusInfo, + WorkspaceShellExecuteResponse, + WorkspaceShellPollResponse, + WorkspaceShellSessionInfo, + WorkspaceShellWatchResponse, + WorkspaceSentinelRule, } from './CodeDaemonTypes'; /** @@ -140,4 +145,90 @@ export class CodeDaemon { static async workspaceGitPush(personaId: string, remote?: string, branch?: string): Promise<{ output: string }> { throw new Error('CodeDaemon.workspaceGitPush() must be implemented by server'); } + + // ======================================================================== + // Shell Session Operations (Handle + Poll pattern) + // ======================================================================== + + /** + * Create a shell session for a workspace. + * The session persists cwd and env across command executions. + */ + static async shellCreate(personaId: string, workspaceRoot: string): Promise { + throw new Error('CodeDaemon.shellCreate() must be implemented by server'); + } + + /** + * Execute a command in a shell session. + * + * Two modes: + * - Handle mode (default): returns immediately with execution_id. Poll for output. + * - Wait mode (wait=true): blocks until completion, returns full stdout/stderr. + */ + static async shellExecute( + personaId: string, + cmd: string, + options?: { timeoutMs?: number; wait?: boolean }, + ): Promise { + throw new Error('CodeDaemon.shellExecute() must be implemented by server'); + } + + /** + * Poll an execution for new output since last poll. + * Returns new stdout/stderr lines and status. Call until `finished` is true. + */ + static async shellPoll(personaId: string, executionId: string): Promise { + throw new Error('CodeDaemon.shellPoll() must be implemented by server'); + } + + /** + * Kill a running execution. + */ + static async shellKill(personaId: string, executionId: string): Promise { + throw new Error('CodeDaemon.shellKill() must be implemented by server'); + } + + /** + * Change shell session working directory (validated against workspace boundary). + */ + static async shellCd(personaId: string, path: string): Promise<{ cwd: string }> { + throw new Error('CodeDaemon.shellCd() must be implemented by server'); + } + + /** + * Get shell session status/info. + */ + static async shellStatus(personaId: string): Promise { + throw new Error('CodeDaemon.shellStatus() must be implemented by server'); + } + + /** + * Destroy shell session (kills all running executions). + */ + static async shellDestroy(personaId: string): Promise { + throw new Error('CodeDaemon.shellDestroy() must be implemented by server'); + } + + // ======================================================================== + // Shell Watch + Sentinel (Event-driven output streaming) + // ======================================================================== + + /** + * Watch a shell execution for new output. + * Blocks until output is available β€” no timeout, no polling. + * Returns classified output lines filtered through sentinel rules. + * Call in a loop until `finished` is true. + */ + static async shellWatch(personaId: string, executionId: string): Promise { + throw new Error('CodeDaemon.shellWatch() must be implemented by server'); + } + + /** + * Configure sentinel filter rules on a shell execution. + * Rules classify output lines and control which are emitted or suppressed during watch. + * Patterns are compiled to regex on the Rust side for performance. + */ + static async shellSentinel(personaId: string, executionId: string, rules: WorkspaceSentinelRule[]): Promise<{ applied: boolean; ruleCount: number }> { + throw new Error('CodeDaemon.shellSentinel() must be implemented by server'); + } } diff --git a/src/debug/jtag/daemons/code-daemon/shared/CodeDaemonTypes.ts b/src/debug/jtag/daemons/code-daemon/shared/CodeDaemonTypes.ts index 460254003..da10c1563 100644 --- a/src/debug/jtag/daemons/code-daemon/shared/CodeDaemonTypes.ts +++ b/src/debug/jtag/daemons/code-daemon/shared/CodeDaemonTypes.ts @@ -16,3 +16,17 @@ export type { UndoResult as WorkspaceUndoResult } from '../../../shared/generate export type { ChangeNode as WorkspaceChangeNode } from '../../../shared/generated/code/ChangeNode'; export type { HistoryResult as WorkspaceHistoryResult } from '../../../shared/generated/code/HistoryResult'; export type { GitStatusInfo as WorkspaceGitStatusInfo } from '../../../shared/generated/code/GitStatusInfo'; + +// Shell session types +export type { ShellExecutionStatus as WorkspaceShellExecutionStatus } from '../../../shared/generated/code/ShellExecutionStatus'; +export type { ShellExecuteResponse as WorkspaceShellExecuteResponse } from '../../../shared/generated/code/ShellExecuteResponse'; +export type { ShellPollResponse as WorkspaceShellPollResponse } from '../../../shared/generated/code/ShellPollResponse'; +export type { ShellSessionInfo as WorkspaceShellSessionInfo } from '../../../shared/generated/code/ShellSessionInfo'; +export type { ShellHistoryEntry as WorkspaceShellHistoryEntry } from '../../../shared/generated/code/ShellHistoryEntry'; + +// Shell watch + sentinel types +export type { OutputClassification as WorkspaceOutputClassification } from '../../../shared/generated/code/OutputClassification'; +export type { SentinelAction as WorkspaceSentinelAction } from '../../../shared/generated/code/SentinelAction'; +export type { SentinelRule as WorkspaceSentinelRule } from '../../../shared/generated/code/SentinelRule'; +export type { ClassifiedLine as WorkspaceClassifiedLine } from '../../../shared/generated/code/ClassifiedLine'; +export type { ShellWatchResponse as WorkspaceShellWatchResponse } from '../../../shared/generated/code/ShellWatchResponse'; diff --git a/src/debug/jtag/daemons/data-daemon/server/EntityRegistry.ts b/src/debug/jtag/daemons/data-daemon/server/EntityRegistry.ts index 49998fd94..687f391f1 100644 --- a/src/debug/jtag/daemons/data-daemon/server/EntityRegistry.ts +++ b/src/debug/jtag/daemons/data-daemon/server/EntityRegistry.ts @@ -82,9 +82,7 @@ import { FeedbackEntity } from '../../../system/data/entities/FeedbackEntity'; import { CallEntity } from '../../../system/data/entities/CallEntity'; import { SocialCredentialEntity } from '../../../system/social/shared/SocialCredentialEntity'; import { HandleEntity } from '../../../system/data/entities/HandleEntity'; -import { CodingPlanEntity } from '../../../system/data/entities/CodingPlanEntity'; import { SkillEntity } from '../../../system/data/entities/SkillEntity'; -import { CodingChallengeEntity } from '../../../system/data/entities/CodingChallengeEntity'; /** * Initialize entity registration for the storage adapter @@ -140,9 +138,7 @@ export function initializeEntityRegistry(): void { new CallEntity(); new SocialCredentialEntity(); new HandleEntity(); - new CodingPlanEntity(); new SkillEntity(); - new CodingChallengeEntity(); registerEntity(UserEntity.collection, UserEntity); registerEntity(RoomEntity.collection, RoomEntity); @@ -190,9 +186,7 @@ export function initializeEntityRegistry(): void { registerEntity(CallEntity.collection, CallEntity); registerEntity(SocialCredentialEntity.collection, SocialCredentialEntity); registerEntity(HandleEntity.collection, HandleEntity); - registerEntity(CodingPlanEntity.collection, CodingPlanEntity); registerEntity(SkillEntity.collection, SkillEntity); - registerEntity(CodingChallengeEntity.collection, CodingChallengeEntity); log.info('All entities registered'); } \ No newline at end of file diff --git a/src/debug/jtag/generated-command-schemas.json b/src/debug/jtag/generated-command-schemas.json index c08e59914..febbf304f 100644 --- a/src/debug/jtag/generated-command-schemas.json +++ b/src/debug/jtag/generated-command-schemas.json @@ -1,5 +1,5 @@ { - "generated": "2026-02-02T11:46:40.136Z", + "generated": "2026-02-02T17:26:41.508Z", "version": "1.0.0", "commands": [ { @@ -5288,58 +5288,29 @@ } }, { - "name": "code/task", - "description": "Code Task Command - Shared Types\n *\n * Execute a coding task end-to-end via the coding agent pipeline. Formulates a plan using LLM reasoning, enforces security tiers, and executes steps via code/* commands. Supports dry-run mode, governance approval for high-risk plans, and multi-agent delegation.", + "name": "code/shell/watch", + "description": "Code Shell Watch Command - Shared Types\n *\n * Watch a shell execution for new output. Blocks until output is available β€” no timeout, no polling.\n * Returns classified output lines filtered through sentinel rules. Call in a loop until finished is true.", "params": { - "description": { + "executionId": { "type": "string", "required": true, - "description": "description parameter" - }, - "taskType": { - "type": "string", - "required": false, - "description": "taskType parameter" - }, - "relevantFiles": { - "type": "array", - "required": false, - "description": "relevantFiles parameter" - }, - "dryRun": { - "type": "boolean", - "required": false, - "description": "dryRun parameter" - }, - "securityTier": { - "type": "string", - "required": false, - "description": "securityTier parameter" - }, - "delegationEnabled": { - "type": "boolean", - "required": false, - "description": "delegationEnabled parameter" - }, - "maxDurationMs": { - "type": "number", - "required": false, - "description": "maxDurationMs parameter" - }, - "maxToolCalls": { - "type": "number", - "required": false, - "description": "maxToolCalls parameter" - }, - "workspaceMode": { + "description": "executionId parameter" + } + } + }, + { + "name": "code/shell/sentinel", + "description": "Code Shell Sentinel Command - Shared Types\n *\n * Configure sentinel filter rules on a shell execution. Rules classify output lines\n * and control which lines are emitted or suppressed during watch.\n * Patterns are compiled to regex on the Rust side for performance.", + "params": { + "executionId": { "type": "string", - "required": false, - "description": "workspaceMode parameter" + "required": true, + "description": "executionId parameter" }, - "sparsePaths": { + "rules": { "type": "array", - "required": false, - "description": "sparsePaths parameter" + "required": true, + "description": "rules parameter" } } }, @@ -5564,48 +5535,6 @@ } } }, - { - "name": "challenge/run", - "description": "Challenge Run Command - Shared Types\n *\n * Run a coding challenge against the AI coding pipeline. Sets up a fresh workspace, executes the challenge via code/task, evaluates with AI judge, and records the attempt.", - "params": { - "challengeId": { - "type": "string", - "required": false, - "description": "challengeId parameter" - }, - "challengeNumber": { - "type": "number", - "required": false, - "description": "challengeNumber parameter" - }, - "personaId": { - "type": "string", - "required": false, - "description": "personaId parameter" - }, - "skipJudge": { - "type": "boolean", - "required": false, - "description": "skipJudge parameter" - } - } - }, - { - "name": "challenge/list", - "description": "Challenge List Command - Shared Types\n *\n * List available coding challenges with their difficulty, status, and best scores. Shows progressive challenge sequence for AI training.", - "params": { - "difficulty": { - "type": "string", - "required": false, - "description": "difficulty parameter" - }, - "personaId": { - "type": "string", - "required": false, - "description": "personaId parameter" - } - } - }, { "name": "canvas/vision", "description": "Canvas Vision Command Types\n *\n * Enables AIs to \"see\" and interact with the drawing canvas:\n * - describe: Vision AI describes what's on the canvas\n * - transform: Use image generation to transform the sketch\n * - analyze: Structured analysis of the drawing", diff --git a/src/debug/jtag/generator/specs/code-shell-sentinel.json b/src/debug/jtag/generator/specs/code-shell-sentinel.json new file mode 100644 index 000000000..4bb9bc54c --- /dev/null +++ b/src/debug/jtag/generator/specs/code-shell-sentinel.json @@ -0,0 +1,38 @@ +{ + "name": "code/shell/sentinel", + "description": "Configure sentinel filter rules on a shell execution. Rules classify output lines and control which lines are emitted or suppressed during watch. Patterns are compiled to regex on the Rust side for performance.", + "params": [ + { + "name": "executionId", + "type": "string", + "optional": false, + "description": "Execution handle to attach sentinel rules to" + }, + { + "name": "rules", + "type": "SentinelRule[]", + "optional": false, + "description": "Array of classification rules: { pattern: string, classification: OutputClassification, action: SentinelAction }" + } + ], + "results": [ + { + "name": "applied", + "type": "boolean", + "description": "Whether rules were applied successfully" + }, + { + "name": "ruleCount", + "type": "number", + "description": "Number of sentinel rules configured" + } + ], + "examples": [ + { + "description": "Filter build output to only errors and warnings", + "command": "./jtag code/shell/sentinel --executionId=\"exec-abc123\" --rules='[{\"pattern\":\"^error\",\"classification\":\"Error\",\"action\":\"Emit\"},{\"pattern\":\".*\",\"classification\":\"Verbose\",\"action\":\"Suppress\"}]'", + "expectedResult": "{ applied: true, ruleCount: 2 }" + } + ], + "accessLevel": "ai-safe" +} diff --git a/src/debug/jtag/generator/specs/code-shell-watch.json b/src/debug/jtag/generator/specs/code-shell-watch.json new file mode 100644 index 000000000..f198d4b59 --- /dev/null +++ b/src/debug/jtag/generator/specs/code-shell-watch.json @@ -0,0 +1,47 @@ +{ + "name": "code/shell/watch", + "description": "Watch a shell execution for new output. Blocks until output is available β€” no timeout, no polling. Returns classified output lines filtered through sentinel rules. Call in a loop until finished is true.", + "params": [ + { + "name": "executionId", + "type": "string", + "optional": false, + "description": "Execution handle from shell/exec" + } + ], + "results": [ + { + "name": "executionId", + "type": "string", + "description": "Echo of the execution handle" + }, + { + "name": "lines", + "type": "ClassifiedLine[]", + "description": "New output lines since last watch call (classified and filtered)" + }, + { + "name": "finished", + "type": "boolean", + "description": "True when execution is complete" + }, + { + "name": "exitCode", + "type": "number", + "description": "Process exit code (present when finished)" + } + ], + "examples": [ + { + "description": "Watch a running build for new output", + "command": "./jtag code/shell/watch --executionId=\"exec-abc123\"", + "expectedResult": "{ executionId: \"exec-abc123\", lines: [{text: \"Compiling...\", classification: \"Info\"}], finished: false }" + }, + { + "description": "Final watch call when execution completes", + "command": "./jtag code/shell/watch --executionId=\"exec-abc123\"", + "expectedResult": "{ executionId: \"exec-abc123\", lines: [], finished: true, exitCode: 0 }" + } + ], + "accessLevel": "ai-safe" +} diff --git a/src/debug/jtag/package-lock.json b/src/debug/jtag/package-lock.json index 32d3089f5..d371a2685 100644 --- a/src/debug/jtag/package-lock.json +++ b/src/debug/jtag/package-lock.json @@ -1,12 +1,12 @@ { "name": "@continuum/jtag", - "version": "1.0.7530", + "version": "1.0.7533", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@continuum/jtag", - "version": "1.0.7530", + "version": "1.0.7533", "hasInstallScript": true, "license": "MIT", "dependencies": { diff --git a/src/debug/jtag/package.json b/src/debug/jtag/package.json index 214377b6c..4777095c5 100644 --- a/src/debug/jtag/package.json +++ b/src/debug/jtag/package.json @@ -1,6 +1,6 @@ { "name": "@continuum/jtag", - "version": "1.0.7530", + "version": "1.0.7533", "description": "Global CLI debugging system for any Node.js project. Install once globally, use anywhere: npm install -g @continuum/jtag", "config": { "active_example": "widget-ui", diff --git a/src/debug/jtag/server/generated.ts b/src/debug/jtag/server/generated.ts index c75048cde..a7f859b8c 100644 --- a/src/debug/jtag/server/generated.ts +++ b/src/debug/jtag/server/generated.ts @@ -1,7 +1,7 @@ /** * Server Structure Registry - Auto-generated * - * Contains 18 daemons and 214 commands and 3 adapters. + * Contains 18 daemons and 213 commands and 3 adapters. * Generated by scripts/generate-structure.ts - DO NOT EDIT MANUALLY */ @@ -60,15 +60,14 @@ import { AIValidateResponseServerCommand } from './../commands/ai/validate-respo import { CanvasStrokeAddServerCommand } from './../commands/canvas/stroke/add/server/CanvasStrokeAddServerCommand'; import { CanvasStrokeListServerCommand } from './../commands/canvas/stroke/list/server/CanvasStrokeListServerCommand'; import { CanvasVisionServerCommand } from './../commands/canvas/vision/server/CanvasVisionServerCommand'; -import { ChallengeListServerCommand } from './../commands/challenge/list/server/ChallengeListServerCommand'; -import { ChallengeRunServerCommand } from './../commands/challenge/run/server/ChallengeRunServerCommand'; import { CodeDiffServerCommand } from './../commands/code/diff/server/CodeDiffServerCommand'; import { CodeEditServerCommand } from './../commands/code/edit/server/CodeEditServerCommand'; import { CodeGitServerCommand } from './../commands/code/git/server/CodeGitServerCommand'; import { CodeHistoryServerCommand } from './../commands/code/history/server/CodeHistoryServerCommand'; import { CodeReadServerCommand } from './../commands/code/read/server/CodeReadServerCommand'; import { CodeSearchServerCommand } from './../commands/code/search/server/CodeSearchServerCommand'; -import { CodeTaskServerCommand } from './../commands/code/task/server/CodeTaskServerCommand'; +import { CodeShellSentinelServerCommand } from './../commands/code/shell/sentinel/server/CodeShellSentinelServerCommand'; +import { CodeShellWatchServerCommand } from './../commands/code/shell/watch/server/CodeShellWatchServerCommand'; import { CodeTreeServerCommand } from './../commands/code/tree/server/CodeTreeServerCommand'; import { CodeUndoServerCommand } from './../commands/code/undo/server/CodeUndoServerCommand'; import { CodeVerifyServerCommand } from './../commands/code/verify/server/CodeVerifyServerCommand'; @@ -518,16 +517,6 @@ export const SERVER_COMMANDS: CommandEntry[] = [ className: 'CanvasVisionServerCommand', commandClass: CanvasVisionServerCommand }, -{ - name: 'challenge/list', - className: 'ChallengeListServerCommand', - commandClass: ChallengeListServerCommand - }, -{ - name: 'challenge/run', - className: 'ChallengeRunServerCommand', - commandClass: ChallengeRunServerCommand - }, { name: 'code/diff', className: 'CodeDiffServerCommand', @@ -559,9 +548,14 @@ export const SERVER_COMMANDS: CommandEntry[] = [ commandClass: CodeSearchServerCommand }, { - name: 'code/task', - className: 'CodeTaskServerCommand', - commandClass: CodeTaskServerCommand + name: 'code/shell/sentinel', + className: 'CodeShellSentinelServerCommand', + commandClass: CodeShellSentinelServerCommand + }, +{ + name: 'code/shell/watch', + className: 'CodeShellWatchServerCommand', + commandClass: CodeShellWatchServerCommand }, { name: 'code/tree', diff --git a/src/debug/jtag/shared/generated-command-constants.ts b/src/debug/jtag/shared/generated-command-constants.ts index 41d85ae15..5f9e0a376 100644 --- a/src/debug/jtag/shared/generated-command-constants.ts +++ b/src/debug/jtag/shared/generated-command-constants.ts @@ -59,15 +59,14 @@ export const COMMANDS = { CANVAS_STROKE_ADD: 'canvas/stroke/add', CANVAS_STROKE_LIST: 'canvas/stroke/list', CANVAS_VISION: 'canvas/vision', - CHALLENGE_LIST: 'challenge/list', - CHALLENGE_RUN: 'challenge/run', CODE_DIFF: 'code/diff', CODE_EDIT: 'code/edit', CODE_GIT: 'code/git', CODE_HISTORY: 'code/history', CODE_READ: 'code/read', CODE_SEARCH: 'code/search', - CODE_TASK: 'code/task', + CODE_SHELL_SENTINEL: 'code/shell/sentinel', + CODE_SHELL_WATCH: 'code/shell/watch', CODE_TREE: 'code/tree', CODE_UNDO: 'code/undo', CODE_VERIFY: 'code/verify', diff --git a/src/debug/jtag/shared/generated/code/ClassifiedLine.ts b/src/debug/jtag/shared/generated/code/ClassifiedLine.ts new file mode 100644 index 000000000..ca9785451 --- /dev/null +++ b/src/debug/jtag/shared/generated/code/ClassifiedLine.ts @@ -0,0 +1,27 @@ +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. +import type { OutputClassification } from "./OutputClassification"; + +/** + * A single line of classified shell output. + */ +export type ClassifiedLine = { +/** + * The raw text content of the line. + */ +text: string, +/** + * Classification assigned by sentinel rules. + */ +classification: OutputClassification, +/** + * Line number within the stream (0-indexed from execution start). + */ +line_number: number, +/** + * Which stream this line came from: "stdout" or "stderr". + */ +stream: string, +/** + * Unix timestamp in milliseconds when the line was classified. + */ +timestamp: number, }; diff --git a/src/debug/jtag/shared/generated/code/OutputClassification.ts b/src/debug/jtag/shared/generated/code/OutputClassification.ts new file mode 100644 index 000000000..89b9396d5 --- /dev/null +++ b/src/debug/jtag/shared/generated/code/OutputClassification.ts @@ -0,0 +1,6 @@ +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. + +/** + * Classification level for a line of shell output. + */ +export type OutputClassification = "Error" | "Warning" | "Info" | "Success" | "Verbose"; diff --git a/src/debug/jtag/shared/generated/code/SentinelAction.ts b/src/debug/jtag/shared/generated/code/SentinelAction.ts new file mode 100644 index 000000000..cd6f65aa1 --- /dev/null +++ b/src/debug/jtag/shared/generated/code/SentinelAction.ts @@ -0,0 +1,6 @@ +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. + +/** + * What to do with a line that matches a sentinel rule. + */ +export type SentinelAction = "Emit" | "Suppress"; diff --git a/src/debug/jtag/shared/generated/code/SentinelRule.ts b/src/debug/jtag/shared/generated/code/SentinelRule.ts new file mode 100644 index 000000000..5524c117d --- /dev/null +++ b/src/debug/jtag/shared/generated/code/SentinelRule.ts @@ -0,0 +1,23 @@ +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. +import type { OutputClassification } from "./OutputClassification"; +import type { SentinelAction } from "./SentinelAction"; + +/** + * A sentinel filter rule: regex pattern β†’ classification + action. + * + * Wire type for IPC. Patterns are compiled to `regex::Regex` on the Rust side + * when `set_sentinel()` is called. + */ +export type SentinelRule = { +/** + * Regex pattern to match against each output line. + */ +pattern: string, +/** + * Classification to assign when this rule matches. + */ +classification: OutputClassification, +/** + * Whether to include or suppress the matched line. + */ +action: SentinelAction, }; diff --git a/src/debug/jtag/shared/generated/code/ShellExecuteResponse.ts b/src/debug/jtag/shared/generated/code/ShellExecuteResponse.ts new file mode 100644 index 000000000..2f74b0c16 --- /dev/null +++ b/src/debug/jtag/shared/generated/code/ShellExecuteResponse.ts @@ -0,0 +1,22 @@ +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. +import type { ShellExecutionStatus } from "./ShellExecutionStatus"; + +/** + * Response from `code/shell-execute`. + * + * Always returns immediately with the execution handle. + * If `wait: true` was specified, also includes the completed result. + */ +export type ShellExecuteResponse = { execution_id: string, status: ShellExecutionStatus, +/** + * Full stdout (only present when `wait: true` and execution completed). + */ +stdout?: string, +/** + * Full stderr (only present when `wait: true` and execution completed). + */ +stderr?: string, +/** + * Exit code (only present when execution completed). + */ +exit_code?: number, }; diff --git a/src/debug/jtag/shared/generated/code/ShellExecutionStatus.ts b/src/debug/jtag/shared/generated/code/ShellExecutionStatus.ts new file mode 100644 index 000000000..cfd88cc51 --- /dev/null +++ b/src/debug/jtag/shared/generated/code/ShellExecutionStatus.ts @@ -0,0 +1,6 @@ +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. + +/** + * Status of a shell command execution. + */ +export type ShellExecutionStatus = "running" | "completed" | "failed" | "timed_out" | "killed"; diff --git a/src/debug/jtag/shared/generated/code/ShellHistoryEntry.ts b/src/debug/jtag/shared/generated/code/ShellHistoryEntry.ts new file mode 100644 index 000000000..5984d5ab5 --- /dev/null +++ b/src/debug/jtag/shared/generated/code/ShellHistoryEntry.ts @@ -0,0 +1,6 @@ +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. + +/** + * A history entry for a completed execution. + */ +export type ShellHistoryEntry = { execution_id: string, command: string, exit_code?: number, started_at: number, finished_at?: number, }; diff --git a/src/debug/jtag/shared/generated/code/ShellPollResponse.ts b/src/debug/jtag/shared/generated/code/ShellPollResponse.ts new file mode 100644 index 000000000..9fbf317e3 --- /dev/null +++ b/src/debug/jtag/shared/generated/code/ShellPollResponse.ts @@ -0,0 +1,26 @@ +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. +import type { ShellExecutionStatus } from "./ShellExecutionStatus"; + +/** + * Response from `code/shell-poll`. + * + * Returns new output since the last poll (cursor-based). + * Call repeatedly until `finished` is true. + */ +export type ShellPollResponse = { execution_id: string, status: ShellExecutionStatus, +/** + * New stdout lines since last poll. + */ +new_stdout: Array, +/** + * New stderr lines since last poll. + */ +new_stderr: Array, +/** + * Exit code (present when finished). + */ +exit_code?: number, +/** + * True when the execution is no longer running. + */ +finished: boolean, }; diff --git a/src/debug/jtag/shared/generated/code/ShellSessionInfo.ts b/src/debug/jtag/shared/generated/code/ShellSessionInfo.ts new file mode 100644 index 000000000..9101eb5ed --- /dev/null +++ b/src/debug/jtag/shared/generated/code/ShellSessionInfo.ts @@ -0,0 +1,6 @@ +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. + +/** + * Response from `code/shell-status` β€” session metadata. + */ +export type ShellSessionInfo = { session_id: string, persona_id: string, cwd: string, workspace_root: string, active_executions: number, total_executions: number, }; diff --git a/src/debug/jtag/shared/generated/code/ShellWatchResponse.ts b/src/debug/jtag/shared/generated/code/ShellWatchResponse.ts new file mode 100644 index 000000000..120185d46 --- /dev/null +++ b/src/debug/jtag/shared/generated/code/ShellWatchResponse.ts @@ -0,0 +1,23 @@ +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. +import type { ClassifiedLine } from "./ClassifiedLine"; + +/** + * Response from `code/shell-watch`. + * + * Returns classified output lines since the last watch call. + * Blocks until output is available (no timeout, no polling). + * Call in a loop until `finished` is true. + */ +export type ShellWatchResponse = { execution_id: string, +/** + * Classified output lines (filtered through sentinel rules). + */ +lines: Array, +/** + * True when the execution is no longer running. + */ +finished: boolean, +/** + * Exit code (present when finished). + */ +exit_code?: number, }; diff --git a/src/debug/jtag/shared/generated/code/index.ts b/src/debug/jtag/shared/generated/code/index.ts index 8e6396c5e..d258627e9 100644 --- a/src/debug/jtag/shared/generated/code/index.ts +++ b/src/debug/jtag/shared/generated/code/index.ts @@ -26,3 +26,17 @@ export type { TreeResult } from './TreeResult'; // Git export type { GitStatusInfo } from './GitStatusInfo'; + +// Shell Session +export type { ShellExecutionStatus } from './ShellExecutionStatus'; +export type { ShellExecuteResponse } from './ShellExecuteResponse'; +export type { ShellPollResponse } from './ShellPollResponse'; +export type { ShellSessionInfo } from './ShellSessionInfo'; +export type { ShellHistoryEntry } from './ShellHistoryEntry'; + +// Shell Watch + Sentinel +export type { OutputClassification } from './OutputClassification'; +export type { SentinelAction } from './SentinelAction'; +export type { SentinelRule } from './SentinelRule'; +export type { ClassifiedLine } from './ClassifiedLine'; +export type { ShellWatchResponse } from './ShellWatchResponse'; diff --git a/src/debug/jtag/shared/generated/persona/ActivityDomain.ts b/src/debug/jtag/shared/generated/persona/ActivityDomain.ts index 83b423021..d8bc0a79a 100644 --- a/src/debug/jtag/shared/generated/persona/ActivityDomain.ts +++ b/src/debug/jtag/shared/generated/persona/ActivityDomain.ts @@ -4,4 +4,4 @@ * Activity domain for channel routing. * Each domain has one ChannelQueue. Items route to their domain's queue. */ -export type ActivityDomain = "AUDIO" | "CHAT" | "BACKGROUND"; +export type ActivityDomain = "AUDIO" | "CHAT" | "CODE" | "BACKGROUND"; diff --git a/src/debug/jtag/shared/generated/persona/ChannelEnqueueRequest.ts b/src/debug/jtag/shared/generated/persona/ChannelEnqueueRequest.ts index b32f31d2b..fa0d4f42b 100644 --- a/src/debug/jtag/shared/generated/persona/ChannelEnqueueRequest.ts +++ b/src/debug/jtag/shared/generated/persona/ChannelEnqueueRequest.ts @@ -3,4 +3,4 @@ /** * IPC request to enqueue any item type. Discriminated by `item_type` field. */ -export type ChannelEnqueueRequest = { "item_type": "voice", id: string, room_id: string, content: string, sender_id: string, sender_name: string, sender_type: string, voice_session_id: string, timestamp: number, priority: number, } | { "item_type": "chat", id: string, room_id: string, content: string, sender_id: string, sender_name: string, sender_type: string, mentions: boolean, timestamp: number, priority: number, } | { "item_type": "task", id: string, task_id: string, assignee_id: string, created_by: string, task_domain: string, task_type: string, context_id: string, description: string, priority: number, status: string, timestamp: number, due_date: bigint | null, estimated_duration: bigint | null, depends_on: Array, blocked_by: Array, }; +export type ChannelEnqueueRequest = { "item_type": "voice", id: string, room_id: string, content: string, sender_id: string, sender_name: string, sender_type: string, voice_session_id: string, timestamp: number, priority: number, } | { "item_type": "chat", id: string, room_id: string, content: string, sender_id: string, sender_name: string, sender_type: string, mentions: boolean, timestamp: number, priority: number, } | { "item_type": "task", id: string, task_id: string, assignee_id: string, created_by: string, task_domain: string, task_type: string, context_id: string, description: string, priority: number, status: string, timestamp: number, due_date: bigint | null, estimated_duration: bigint | null, depends_on: Array, blocked_by: Array, } | { "item_type": "code", id: string, room_id: string, persona_id: string, task_description: string, workspace_handle: string, priority: number, is_review: boolean, timestamp: number, }; diff --git a/src/debug/jtag/shared/version.ts b/src/debug/jtag/shared/version.ts index bbfd2a50b..92353370f 100644 --- a/src/debug/jtag/shared/version.ts +++ b/src/debug/jtag/shared/version.ts @@ -3,5 +3,5 @@ * DO NOT EDIT MANUALLY */ -export const VERSION = '1.0.7530'; +export const VERSION = '1.0.7533'; export const PACKAGE_NAME = '@continuum/jtag'; diff --git a/src/debug/jtag/system/code/challenges/ChallengeDefinitions.ts b/src/debug/jtag/system/code/challenges/ChallengeDefinitions.ts deleted file mode 100644 index 5594e3190..000000000 --- a/src/debug/jtag/system/code/challenges/ChallengeDefinitions.ts +++ /dev/null @@ -1,445 +0,0 @@ -/** - * Challenge Definitions - Progressive coding challenges for AI training - * - * Challenges are ordered by difficulty: - * 1-2: Beginner (single file, simple operations) - * 3-4: Intermediate (multi-file, dependency chains) - * 5-6: Advanced (bug tracing, multi-agent) - * 7: Expert (architecture migration) - * - * Each definition contains everything needed to create a CodingChallengeEntity. - */ - -import type { ChallengeDifficulty, ChallengeCategory } from '../../data/entities/CodingChallengeEntity'; - -export interface ChallengeDefinition { - name: string; - sequenceNumber: number; - difficulty: ChallengeDifficulty; - category: ChallengeCategory; - description: string; - setupFiles: Record; - expectedOutcome: string; - evaluationCriteria: string[]; - expectedFiles?: Record; - timeLimitMs: number; - toolCallLimit: number; -} - -// ──────────────────────────────────────────────────────────── -// Challenge 1: Single-File Function Addition (Beginner) -// ──────────────────────────────────────────────────────────── - -export const CHALLENGE_1_FUNCTION_ADD: ChallengeDefinition = { - name: 'Add a function to a single file', - sequenceNumber: 1, - difficulty: 'beginner', - category: 'single-file', - description: `Read the file "math-utils.ts" and add a new exported function called "factorial" that computes the factorial of a non-negative integer. It should throw an error for negative inputs. Do not modify the existing functions.`, - setupFiles: { - 'math-utils.ts': `/** - * Math utility functions - */ - -export function add(a: number, b: number): number { - return a + b; -} - -export function multiply(a: number, b: number): number { - return a * b; -} - -export function isPrime(n: number): boolean { - if (n < 2) return false; - for (let i = 2; i * i <= n; i++) { - if (n % i === 0) return false; - } - return true; -} -`, - }, - expectedOutcome: 'The file math-utils.ts should contain the original three functions plus a new "factorial" function that handles edge cases correctly.', - evaluationCriteria: [ - 'factorial function is exported and correctly computes factorial for n >= 0', - 'factorial(0) returns 1 (base case)', - 'factorial throws an error for negative input', - 'Existing functions (add, multiply, isPrime) are unchanged', - 'Code follows the existing style (TypeScript, exported functions)', - ], - expectedFiles: { - 'math-utils.ts': `/** - * Math utility functions - */ - -export function add(a: number, b: number): number { - return a + b; -} - -export function multiply(a: number, b: number): number { - return a * b; -} - -export function isPrime(n: number): boolean { - if (n < 2) return false; - for (let i = 2; i * i <= n; i++) { - if (n % i === 0) return false; - } - return true; -} - -export function factorial(n: number): number { - if (n < 0) throw new Error('factorial requires a non-negative integer'); - if (n === 0 || n === 1) return 1; - let result = 1; - for (let i = 2; i <= n; i++) { - result *= i; - } - return result; -} -`, - }, - timeLimitMs: 60_000, - toolCallLimit: 8, -}; - -// ──────────────────────────────────────────────────────────── -// Challenge 2: Create File + Unit Test (Beginner) -// ──────────────────────────────────────────────────────────── - -export const CHALLENGE_2_FILE_PLUS_TEST: ChallengeDefinition = { - name: 'Create a function and its unit test', - sequenceNumber: 2, - difficulty: 'beginner', - category: 'multi-file', - description: `Create two files: -1. "string-utils.ts" β€” export a function "slugify(input: string): string" that converts a string to a URL-safe slug (lowercase, spaces/special chars replaced with hyphens, no leading/trailing hyphens, no consecutive hyphens). -2. "string-utils.test.ts" β€” write tests for slugify covering: basic conversion, multiple spaces, special characters, leading/trailing spaces, empty string, already-slugified input. - -Use simple assertion statements (no test framework needed). Each test should be a function that throws if the assertion fails.`, - setupFiles: { - 'README.md': '# String Utils\n\nCreate string-utils.ts and string-utils.test.ts as described.', - }, - expectedOutcome: 'Two files created: string-utils.ts with a working slugify function, and string-utils.test.ts with comprehensive tests.', - evaluationCriteria: [ - 'string-utils.ts exports a slugify function with correct signature', - 'slugify converts "Hello World" to "hello-world"', - 'slugify handles special characters (e.g., "Hello, World!" β†’ "hello-world")', - 'slugify removes leading/trailing hyphens', - 'slugify collapses consecutive hyphens', - 'string-utils.test.ts exists and contains meaningful test cases', - 'Tests cover edge cases: empty string, already-slugified, special chars', - ], - timeLimitMs: 90_000, - toolCallLimit: 12, -}; - -// ──────────────────────────────────────────────────────────── -// Challenge 3: Multi-File Refactor (Intermediate) -// ──────────────────────────────────────────────────────────── - -export const CHALLENGE_3_EXTRACT_SHARED: ChallengeDefinition = { - name: 'Extract shared utility from duplicate code', - sequenceNumber: 3, - difficulty: 'intermediate', - category: 'refactoring', - description: `Three files (user-service.ts, order-service.ts, product-service.ts) each contain a duplicated "formatCurrency" function with identical logic. Refactor by: -1. Creating a new "shared/format-utils.ts" that exports the single canonical formatCurrency function -2. Updating all three service files to import from shared/format-utils.ts instead of having their own copy -3. Do NOT change the function's behavior β€” only move it - -The three service files also have other functions that should NOT be changed.`, - setupFiles: { - 'user-service.ts': `import type { User } from './types'; - -function formatCurrency(amount: number, currency: string = 'USD'): string { - return new Intl.NumberFormat('en-US', { style: 'currency', currency }).format(amount); -} - -export function getUserBalance(user: User): string { - return formatCurrency(user.balance); -} - -export function getUserSummary(user: User): string { - return \`\${user.name}: \${formatCurrency(user.balance)}\`; -} -`, - 'order-service.ts': `import type { Order } from './types'; - -function formatCurrency(amount: number, currency: string = 'USD'): string { - return new Intl.NumberFormat('en-US', { style: 'currency', currency }).format(amount); -} - -export function getOrderTotal(order: Order): string { - const total = order.items.reduce((sum, item) => sum + item.price * item.quantity, 0); - return formatCurrency(total, order.currency); -} - -export function formatOrderLine(name: string, price: number): string { - return \`\${name}: \${formatCurrency(price)}\`; -} -`, - 'product-service.ts': `import type { Product } from './types'; - -function formatCurrency(amount: number, currency: string = 'USD'): string { - return new Intl.NumberFormat('en-US', { style: 'currency', currency }).format(amount); -} - -export function getProductPrice(product: Product): string { - return formatCurrency(product.price, product.currency); -} - -export function getDiscountedPrice(product: Product, discount: number): string { - const discounted = product.price * (1 - discount); - return formatCurrency(discounted, product.currency); -} -`, - 'types.ts': `export interface User { - name: string; - balance: number; -} - -export interface OrderItem { - name: string; - price: number; - quantity: number; -} - -export interface Order { - items: OrderItem[]; - currency: string; -} - -export interface Product { - name: string; - price: number; - currency: string; -} -`, - }, - expectedOutcome: 'A new shared/format-utils.ts file containing the single formatCurrency function, with all three service files updated to import from it. No behavior changes.', - evaluationCriteria: [ - 'shared/format-utils.ts exists and exports formatCurrency', - 'formatCurrency function signature and behavior is preserved exactly', - 'user-service.ts imports formatCurrency from shared/format-utils', - 'order-service.ts imports formatCurrency from shared/format-utils', - 'product-service.ts imports formatCurrency from shared/format-utils', - 'No duplicate formatCurrency definitions remain in any service file', - 'All other functions in service files are unchanged', - 'types.ts is unmodified', - ], - timeLimitMs: 120_000, - toolCallLimit: 15, -}; - -// ──────────────────────────────────────────────────────────── -// Challenge 4: Add Feature with Types + Handler + Test (Intermediate) -// ──────────────────────────────────────────────────────────── - -export const CHALLENGE_4_FEATURE_ENDPOINT: ChallengeDefinition = { - name: 'Add a feature across types, handler, and test', - sequenceNumber: 4, - difficulty: 'intermediate', - category: 'feature', - description: `Add a "search" feature to the existing todo application: -1. Add a "SearchParams" interface to types.ts with fields: query (string), completed (boolean | undefined) -2. Add a "searchTodos" function to todo-service.ts that filters todos by title substring match and optional completed status -3. Add tests for searchTodos in todo-service.test.ts covering: text search, completed filter, combined search+filter, empty results, empty query returns all - -Follow the existing patterns in each file.`, - setupFiles: { - 'types.ts': `export interface Todo { - id: string; - title: string; - completed: boolean; - createdAt: number; -} - -export interface CreateTodoParams { - title: string; -} -`, - 'todo-service.ts': `import type { Todo, CreateTodoParams } from './types'; - -const todos: Todo[] = []; -let nextId = 1; - -export function createTodo(params: CreateTodoParams): Todo { - const todo: Todo = { - id: String(nextId++), - title: params.title, - completed: false, - createdAt: Date.now(), - }; - todos.push(todo); - return todo; -} - -export function getTodos(): Todo[] { - return [...todos]; -} - -export function completeTodo(id: string): Todo | undefined { - const todo = todos.find(t => t.id === id); - if (todo) todo.completed = true; - return todo; -} -`, - 'todo-service.test.ts': `import { createTodo, getTodos, completeTodo } from './todo-service'; - -function assert(condition: boolean, message: string): void { - if (!condition) throw new Error(\`Assertion failed: \${message}\`); -} - -// Test createTodo -const todo = createTodo({ title: 'Buy groceries' }); -assert(todo.title === 'Buy groceries', 'createTodo should set title'); -assert(todo.completed === false, 'createTodo should default to incomplete'); -assert(typeof todo.id === 'string', 'createTodo should assign string id'); - -// Test getTodos -const allTodos = getTodos(); -assert(allTodos.length >= 1, 'getTodos should return created todos'); - -// Test completeTodo -const completed = completeTodo(todo.id); -assert(completed?.completed === true, 'completeTodo should mark as complete'); - -console.log('All tests passed!'); -`, - }, - expectedOutcome: 'types.ts has SearchParams, todo-service.ts has searchTodos function, todo-service.test.ts has comprehensive search tests.', - evaluationCriteria: [ - 'SearchParams interface added to types.ts with correct fields', - 'searchTodos function added to todo-service.ts', - 'searchTodos filters by title substring (case-insensitive)', - 'searchTodos filters by completed status when provided', - 'searchTodos returns all when query is empty and no filter', - 'Tests added for all search scenarios', - 'Existing code in all three files is preserved', - ], - timeLimitMs: 120_000, - toolCallLimit: 15, -}; - -// ──────────────────────────────────────────────────────────── -// Challenge 5: Bug Fix by Call Chain Tracing (Advanced) -// ──────────────────────────────────────────────────────────── - -export const CHALLENGE_5_BUG_FIX: ChallengeDefinition = { - name: 'Find and fix a bug by tracing the call chain', - sequenceNumber: 5, - difficulty: 'advanced', - category: 'bug-fix', - description: `There is a bug in the discount calculation system. When a user applies a percentage discount coupon, the final price is sometimes negative for large discounts. - -The bug report: "When I apply a 50% discount coupon to a $10 item, the price shows as -$5.00 instead of $5.00" - -Trace through the code files to find the root cause and fix it. The bug is in the calculation logic, not the formatting. Hint: look at how the discount is applied.`, - setupFiles: { - 'cart.ts': `import { applyDiscount } from './pricing'; -import type { CartItem, Coupon } from './types'; - -export function calculateCartTotal(items: CartItem[], coupon?: Coupon): number { - let total = items.reduce((sum, item) => sum + item.price * item.quantity, 0); - if (coupon) { - total = applyDiscount(total, coupon); - } - return total; -} -`, - 'pricing.ts': `import type { Coupon } from './types'; -import { calculatePercentageDiscount, calculateFixedDiscount } from './discounts'; - -export function applyDiscount(total: number, coupon: Coupon): number { - switch (coupon.type) { - case 'percentage': - return calculatePercentageDiscount(total, coupon.value); - case 'fixed': - return calculateFixedDiscount(total, coupon.value); - default: - return total; - } -} -`, - 'discounts.ts': `/** - * Calculate the discounted price after applying a percentage discount. - * @param total - Original price - * @param percentage - Discount percentage (e.g., 50 for 50%) - * @returns Discounted price - */ -export function calculatePercentageDiscount(total: number, percentage: number): number { - // BUG: subtracts percentage as a raw number instead of computing the percentage - const discount = percentage; - return total - discount; -} - -/** - * Calculate the discounted price after applying a fixed amount discount. - * @param total - Original price - * @param amount - Fixed discount amount - * @returns Discounted price (minimum 0) - */ -export function calculateFixedDiscount(total: number, amount: number): number { - return Math.max(0, total - amount); -} -`, - 'types.ts': `export interface CartItem { - name: string; - price: number; - quantity: number; -} - -export interface Coupon { - code: string; - type: 'percentage' | 'fixed'; - value: number; -} -`, - }, - expectedOutcome: 'The calculatePercentageDiscount function should compute the actual percentage discount (total * percentage / 100) and ensure the result is non-negative.', - evaluationCriteria: [ - 'Root cause identified: calculatePercentageDiscount subtracts raw percentage instead of computing percentage of total', - 'Fix: discount = total * (percentage / 100)', - 'Result includes Math.max(0, ...) to prevent negative prices', - 'Only discounts.ts is modified (other files have no bugs)', - 'calculateFixedDiscount is unchanged (it already works correctly)', - 'Function signature and JSDoc are preserved', - ], - expectedFiles: { - 'discounts.ts': `/** - * Calculate the discounted price after applying a percentage discount. - * @param total - Original price - * @param percentage - Discount percentage (e.g., 50 for 50%) - * @returns Discounted price - */ -export function calculatePercentageDiscount(total: number, percentage: number): number { - const discount = total * (percentage / 100); - return Math.max(0, total - discount); -} - -/** - * Calculate the discounted price after applying a fixed amount discount. - * @param total - Original price - * @param amount - Fixed discount amount - * @returns Discounted price (minimum 0) - */ -export function calculateFixedDiscount(total: number, amount: number): number { - return Math.max(0, total - amount); -} -`, - }, - timeLimitMs: 120_000, - toolCallLimit: 15, -}; - -// ──────────────────────────────────────────────────────────── -// All challenges in order -// ──────────────────────────────────────────────────────────── - -export const ALL_CHALLENGES: ChallengeDefinition[] = [ - CHALLENGE_1_FUNCTION_ADD, - CHALLENGE_2_FILE_PLUS_TEST, - CHALLENGE_3_EXTRACT_SHARED, - CHALLENGE_4_FEATURE_ENDPOINT, - CHALLENGE_5_BUG_FIX, -]; diff --git a/src/debug/jtag/system/code/server/CodeAgentOrchestrator.ts b/src/debug/jtag/system/code/server/CodeAgentOrchestrator.ts deleted file mode 100644 index 6161ccebb..000000000 --- a/src/debug/jtag/system/code/server/CodeAgentOrchestrator.ts +++ /dev/null @@ -1,879 +0,0 @@ -/** - * CodeAgentOrchestrator - Executes CodingPlans step-by-step - * - * Takes a CodingPlan (DAG of steps) and executes each step via Commands.execute(), - * respecting dependency ordering. Independent steps could execute in parallel. - * - * Execution lifecycle: - * 1. Discover β€” code/tree + code/search to understand codebase - * 2. Read β€” code/read to gather context - * 3. Plan β€” PlanFormulator decomposes task via LLM - * 4. Governance β€” Check if plan requires team approval (high-risk/system-tier) - * 5. Execute β€” Run each step via code/* commands - * 6. Verify β€” After each write/edit, read back to confirm - * 7. Fix β€” If verification fails, retry (max 3 attempts per step) - * 8. Report β€” Summarize changes via code/history - * - * Persistence: - * - Plans are persisted as CodingPlanEntity via DataDaemon - * - Status updated in real-time during execution - * - Persistence is best-effort (orchestrator works without DataDaemon) - * - * Budget enforcement: - * - Max duration (default 120s) - * - Max tool calls (default 15) - * - Stops gracefully when budget exceeded - */ - -import type { - CodingTask, - CodingPlan, - CodingStep, - CodingResult, - CodingResultStatus, - StepResult, - StepStatus, - ExecutionOptions, - RiskLevel, - SecurityTierLevel, -} from '../shared/CodingTypes'; -import { PlanFormulator } from './PlanFormulator'; -import { CodingModelSelector } from './CodingModelSelector'; -import { ToolAllowlistEnforcer, ToolDeniedError } from './ToolAllowlistEnforcer'; -import { getTier } from './SecurityTier'; -import { PlanGovernance } from './PlanGovernance'; -import { CodeTaskDelegator } from './CodeTaskDelegator'; -import { Commands } from '../../core/shared/Commands'; -import { Logger } from '../../core/logging/Logger'; -import { CodingPlanEntity } from '../../data/entities/CodingPlanEntity'; -import type { CodingStepSnapshot, CodingPlanStatus } from '../../data/entities/CodingPlanEntity'; -import { COLLECTIONS } from '../../shared/Constants'; -import type { UUID } from '../../core/types/CrossPlatformUUID'; -import { CodeDaemon } from '../../../daemons/code-daemon/shared/CodeDaemon'; -import { WorkspaceStrategy } from './WorkspaceStrategy'; -import type { WorkspaceResult } from './WorkspaceStrategy'; -import * as fs from 'fs'; -import * as path from 'path'; - -const log = Logger.create('CodeAgentOrchestrator', 'code'); - -/** Maximum retries per failed step */ -const MAX_RETRIES_PER_STEP = 3; - -/** Default budget limits */ -const DEFAULT_MAX_DURATION_MS = 120_000; -const DEFAULT_MAX_TOOL_CALLS = 15; - -/** - * Runtime budget tracker for execution limits. - */ -class ExecutionBudget { - private readonly startTime: number; - private readonly maxDurationMs: number; - private readonly maxToolCalls: number; - private _toolCallsUsed = 0; - - constructor(maxDurationMs: number, maxToolCalls: number) { - this.startTime = performance.now(); - this.maxDurationMs = maxDurationMs; - this.maxToolCalls = maxToolCalls; - } - - recordToolCall(): void { - this._toolCallsUsed++; - } - - get toolCallsUsed(): number { - return this._toolCallsUsed; - } - - get elapsedMs(): number { - return performance.now() - this.startTime; - } - - get exceeded(): boolean { - return this.elapsedMs >= this.maxDurationMs || this._toolCallsUsed >= this.maxToolCalls; - } - - get remainingToolCalls(): number { - return Math.max(0, this.maxToolCalls - this._toolCallsUsed); - } - - get reason(): string { - if (this.elapsedMs >= this.maxDurationMs) return 'time_exceeded'; - if (this._toolCallsUsed >= this.maxToolCalls) return 'tool_calls_exceeded'; - return 'ok'; - } -} - -export class CodeAgentOrchestrator { - private readonly modelSelector: CodingModelSelector; - private readonly planFormulator: PlanFormulator; - private readonly governance: PlanGovernance; - private readonly delegator: CodeTaskDelegator; - - constructor(modelSelector?: CodingModelSelector) { - this.modelSelector = modelSelector ?? new CodingModelSelector(); - this.planFormulator = new PlanFormulator(this.modelSelector); - this.governance = new PlanGovernance(); - this.delegator = new CodeTaskDelegator(); - } - - /** - * Ensure a workspace exists for this task. - * Delegates to WorkspaceStrategy which handles sandbox (default) and worktree modes. - * Returns the workspace result with handle and directory path. - */ - private async ensureWorkspace(task: CodingTask): Promise { - const mode = task.workspaceMode ?? 'sandbox'; - const slug = task.description?.slice(0, 30).replace(/\W+/g, '-').toLowerCase() ?? 'work'; - - return WorkspaceStrategy.create({ - personaId: task.personaId as string, - mode, - taskSlug: slug, - sparsePaths: task.sparsePaths, - }); - } - - /** - * Execute a coding task end-to-end: - * 1. Optionally discover codebase context - * 2. Formulate a plan via LLM - * 3. Check governance (high-risk plans require team approval) - * 4. Persist the plan as a CodingPlanEntity - * 5. Execute each step (updating entity in real-time) - * 6. Return results - * - * Options: - * - dryRun: Execute read-only commands normally, but mock write/edit commands - * - securityTier: Override the plan's required tier - * - delegationEnabled: Enable multi-agent delegation for parallel execution - */ - async execute(task: CodingTask, options?: ExecutionOptions): Promise { - const dryRun = options?.dryRun ?? false; - const budget = new ExecutionBudget( - task.maxDurationMs ?? DEFAULT_MAX_DURATION_MS, - task.maxToolCalls ?? DEFAULT_MAX_TOOL_CALLS, - ); - - log.info(`Starting task${dryRun ? ' [DRY RUN]' : ''}: ${task.description.slice(0, 80)}... (budget: ${budget.remainingToolCalls} calls)`); - - const filesModified: string[] = []; - const filesCreated: string[] = []; - const changeIds: string[] = []; - const errors: string[] = []; - const stepResults: StepResult[] = []; - let planEntity: CodingPlanEntity | undefined; - - try { - // Phase 0: Ensure workspace exists in Rust backend - // Skip if task has a pre-configured workspace handle (e.g., challenges) - if (!task.workspaceHandle) { - const workspace = await this.ensureWorkspace(task); - // Use the workspace handle for all subsequent code/* operations - // Override the task reference with the resolved handle - task = { ...task, workspaceHandle: workspace.handle } as CodingTask; - } - - // Phase 1: Discovery (optional β€” gather codebase context for planning) - let codebaseContext: string | undefined; - if (!budget.exceeded) { - codebaseContext = await this.discoverContext(task, budget); - } - - // Phase 2: Plan formulation - if (budget.exceeded) { - return this.buildResult(task, 'budget_exceeded', 'Budget exceeded before planning', stepResults, filesModified, filesCreated, changeIds, errors, budget); - } - - const plan = await this.planFormulator.formulate(task, codebaseContext); - log.info(`Plan: "${plan.summary}" β€” ${plan.steps.length} steps (risk: ${plan.riskLevel}, tier: ${plan.requiredTier})`); - - // Phase 2b: Create security enforcer from plan's required tier (or override) - const tierLevel = options?.securityTier ?? plan.requiredTier; - const enforcer = new ToolAllowlistEnforcer(getTier(tierLevel)); - - // Phase 2c: Persist plan as entity (best-effort β€” works without DataDaemon) - planEntity = await this.persistPlan(task, plan); - - // Phase 2d: Governance β€” check if plan requires approval - if (planEntity && this.governance.shouldRequireApproval(planEntity)) { - log.info(`Plan requires governance approval (risk: ${plan.riskLevel}, tier: ${tierLevel})`); - const proposalId = await this.governance.proposePlan(planEntity); - - if (proposalId) { - // Update plan status to 'proposed' and return early - await this.updatePlanStatus(planEntity, 'proposed'); - return this.buildResult( - task, 'pending_approval', - `Plan submitted for governance approval: ${plan.summary}`, - [], filesModified, filesCreated, changeIds, errors, budget, - { proposalId: proposalId as string, planMetadata: { riskLevel: plan.riskLevel, requiredTier: plan.requiredTier, planSummary: plan.summary } }, - ); - } - - // Governance proposal failed β€” log and continue (auto-approve) - log.warn('Governance proposal creation failed, auto-approving plan'); - } - - // Phase 3: Execute plan steps in dependency order - const completedSteps = new Set(); - - for (const step of plan.steps) { - if (budget.exceeded) { - log.warn(`Budget exceeded at step ${step.stepNumber}, stopping`); - stepResults.push({ - stepNumber: step.stepNumber, - status: 'skipped', - durationMs: 0, - toolCall: step.toolCall, - error: `Budget exceeded (${budget.reason})`, - }); - continue; - } - - // Check dependencies are met - const depsOk = step.dependsOn.every(dep => completedSteps.has(dep)); - if (!depsOk) { - const missingDeps = step.dependsOn.filter(d => !completedSteps.has(d)); - log.warn(`Step ${step.stepNumber} skipped β€” dependencies not met: ${missingDeps.join(', ')}`); - stepResults.push({ - stepNumber: step.stepNumber, - status: 'skipped', - durationMs: 0, - toolCall: step.toolCall, - error: `Dependencies not met: steps ${missingDeps.join(', ')}`, - }); - continue; - } - - // Execute step with retry (enforcer gates each tool call) - const result = await this.executeStepWithRetry(step, task, budget, enforcer, dryRun); - stepResults.push(result); - - if (result.status === 'completed') { - completedSteps.add(step.stepNumber); - - // Track file changes - this.trackChanges(step, result, filesModified, filesCreated, changeIds); - } else { - errors.push(`Step ${step.stepNumber} (${step.action}): ${result.error ?? 'unknown error'}`); - } - - // Update persisted plan step status - await this.updatePlanStep(planEntity, step.stepNumber, result); - } - - // Phase 4: Verifyβ†’Re-plan iteration loop - // After write/edit steps, verify compilation. If it fails, re-plan with error - // context and execute a fix plan. Repeat until verification passes or budget/iterations exhausted. - const autoVerify = options?.autoVerify ?? true; - const maxVerifyIterations = options?.maxVerifyIterations ?? 2; - const hasWriteSteps = stepResults.some( - r => r.status === 'completed' && (r.toolCall === 'code/write' || r.toolCall === 'code/edit') - ); - - if (hasWriteSteps && !budget.exceeded && !dryRun && autoVerify) { - for (let iteration = 0; iteration < maxVerifyIterations; iteration++) { - if (budget.exceeded) break; - - // Verify - const verifyErrors = await this.runVerification(task, budget); - - if (verifyErrors.length === 0) { - log.info(`Verification passed${iteration > 0 ? ` (after ${iteration} fix iteration(s))` : ''}`); - break; - } - - log.warn(`Verification failed (iteration ${iteration + 1}/${maxVerifyIterations}): ${verifyErrors.length} error(s)`); - - // Last iteration β€” just record errors, don't re-plan - if (iteration >= maxVerifyIterations - 1 || budget.exceeded) { - errors.push(...verifyErrors); - break; - } - - // Re-plan with error context - try { - const errorContext = verifyErrors.join('\n'); - const fixTask: CodingTask = { - ...task, - description: `Fix compilation errors from previous changes:\n${errorContext}\n\nOriginal task: ${task.description}`, - taskType: 'quick-fix', - }; - - const fixPlan = await this.planFormulator.formulate(fixTask, codebaseContext); - log.info(`Fix plan: ${fixPlan.steps.length} steps β€” "${fixPlan.summary}"`); - - // Execute fix plan steps - for (const step of fixPlan.steps) { - if (budget.exceeded) break; - - const depsOk = step.dependsOn.every(dep => - stepResults.some(r => r.stepNumber === dep && r.status === 'completed') - || completedSteps.has(dep) - ); - // For fix plans, skip dependency checks for step 1 (always execute first step) - if (!depsOk && step.stepNumber > 1) continue; - - const result = await this.executeStepWithRetry(step, task, budget, enforcer, false); - stepResults.push(result); - - if (result.status === 'completed') { - completedSteps.add(step.stepNumber + 1000 * (iteration + 1)); // Offset to avoid collisions - this.trackChanges(step, result, filesModified, filesCreated, changeIds); - } else { - errors.push(`Fix step ${step.stepNumber}: ${result.error ?? 'unknown'}`); - } - } - } catch (fixError) { - const msg = fixError instanceof Error ? fixError.message : String(fixError); - log.warn(`Re-plan failed (iteration ${iteration + 1}): ${msg}`); - errors.push(`Re-plan failed: ${msg}`); - break; - } - } - } - - // Determine overall status - const allCompleted = stepResults.every(r => r.status === 'completed'); - const anyCompleted = stepResults.some(r => r.status === 'completed'); - const status: CodingResultStatus = allCompleted - ? 'completed' - : anyCompleted - ? 'partial' - : budget.exceeded - ? 'budget_exceeded' - : 'failed'; - - const summary = allCompleted - ? `Completed: ${plan.summary}` - : `${status}: ${stepResults.filter(r => r.status === 'completed').length}/${plan.steps.length} steps completed`; - - const codingResult = this.buildResult( - task, status, summary, stepResults, filesModified, filesCreated, changeIds, errors, budget, - { planMetadata: { riskLevel: plan.riskLevel, requiredTier: plan.requiredTier, planSummary: plan.summary } }, - ); - - // Finalize persisted plan - await this.finalizePlan(planEntity, codingResult); - - return codingResult; - - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - log.error(`Task failed: ${message}`); - errors.push(message); - const codingResult = this.buildResult(task, 'failed', `Failed: ${message}`, stepResults, filesModified, filesCreated, changeIds, errors, budget); - await this.finalizePlan(planEntity, codingResult); - return codingResult; - } - } - - /** - * Discover codebase context for planning. - * Runs code/tree on the workspace root (or relevant paths). - */ - private async discoverContext(task: CodingTask, budget: ExecutionBudget): Promise { - try { - // Get workspace tree - const treeResult = await Commands.execute('code/tree', { - userId: task.workspaceHandle ?? task.personaId, - path: '', - maxDepth: 3, - }); - budget.recordToolCall(); - - if (!treeResult?.success) { - return undefined; - } - - let context = `## Workspace Tree\n${JSON.stringify(treeResult.root, null, 2).slice(0, 2000)}`; - - // Read relevant files for context β€” the LLM needs exact contents for precise edits - const filesToRead = task.relevantFiles && task.relevantFiles.length > 0 - ? task.relevantFiles - : this.extractFilesFromTree(treeResult.root); - - for (const file of filesToRead.slice(0, 8)) { // Max 8 files for context - if (budget.exceeded) break; - - const readResult = await Commands.execute('code/read', { - userId: task.workspaceHandle ?? task.personaId, - filePath: file, - }); - budget.recordToolCall(); - - if (readResult?.success && readResult.content) { - // Truncate large files - const content = readResult.content.length > 3000 - ? readResult.content.slice(0, 3000) + '\n... (truncated)' - : readResult.content; - context += `\n\n## ${file}\n\`\`\`\n${content}\n\`\`\``; - } - } - - // Load architecture documentation for convention-aware planning - context += await this.loadArchitectureContext(task, budget); - - return context; - } catch (error) { - log.warn(`Discovery failed: ${error instanceof Error ? error.message : String(error)}`); - return undefined; - } - } - - /** - * Load architecture documentation so the LLM plans follow project conventions. - * - * Reads CLAUDE.md from disk (it lives at the repo root, above the workspace read root) - * and key architecture docs from the jtag docs/ directory via code/read. - */ - private async loadArchitectureContext(task: CodingTask, budget: ExecutionBudget): Promise { - let archContext = ''; - - // CLAUDE.md lives at the repo root β€” read directly from disk since it's above read roots - const jtagRoot = process.cwd(); - const repoRoot = path.resolve(jtagRoot, '..', '..', '..'); - const claudeMdPath = path.join(repoRoot, 'CLAUDE.md'); - - try { - if (fs.existsSync(claudeMdPath)) { - let content = fs.readFileSync(claudeMdPath, 'utf-8'); - // Truncate to essential sections β€” full CLAUDE.md is ~20k chars - if (content.length > 6000) { - content = content.slice(0, 6000) + '\n... (truncated β€” see full CLAUDE.md for details)'; - } - archContext += `\n\n## Project Conventions (CLAUDE.md)\n\`\`\`\n${content}\n\`\`\``; - } - } catch { - // Non-critical β€” continue without CLAUDE.md - } - - // Read architecture docs from within the read root (jtag/docs/) - const archDocs = [ - 'docs/ARCHITECTURE-RULES.md', - 'docs/UNIVERSAL-PRIMITIVES.md', - ]; - - for (const doc of archDocs) { - if (budget.exceeded) break; - try { - const readResult = await Commands.execute('code/read', { - userId: task.workspaceHandle ?? task.personaId, - filePath: doc, - }); - budget.recordToolCall(); - - if (readResult?.success && readResult.content) { - const content = readResult.content.length > 3000 - ? readResult.content.slice(0, 3000) + '\n... (truncated)' - : readResult.content; - archContext += `\n\n## Architecture: ${doc}\n\`\`\`\n${content}\n\`\`\``; - } - } catch { - // Non-critical β€” continue without this doc - } - } - - return archContext; - } - - /** - * Extract file paths from a tree result for auto-discovery. - * For small workspaces (≀8 files), reads all files to give the LLM full context. - */ - private extractFilesFromTree(root: Record): string[] { - const files: string[] = []; - const walk = (node: Record, prefix: string) => { - const children = node.children as Record[] | undefined; - if (!children) return; - for (const child of children) { - const name = child.name as string; - const type = child.type as string; - const path = prefix ? `${prefix}/${name}` : name; - if (type === 'file') { - files.push(path); - } else if (type === 'directory') { - walk(child, path); - } - } - }; - walk(root, ''); - return files; - } - - /** - * Execute a single step with retry logic. - */ - private async executeStepWithRetry( - step: CodingStep, - task: CodingTask, - budget: ExecutionBudget, - enforcer: ToolAllowlistEnforcer, - dryRun: boolean = false, - ): Promise { - let lastError: string | undefined; - - for (let attempt = 0; attempt < MAX_RETRIES_PER_STEP; attempt++) { - if (budget.exceeded) { - return { - stepNumber: step.stepNumber, - status: 'failed', - durationMs: 0, - toolCall: step.toolCall, - error: `Budget exceeded before retry ${attempt + 1}`, - }; - } - - const result = await this.executeStep(step, task, budget, enforcer, dryRun); - - if (result.status === 'completed') { - return result; - } - - lastError = result.error; - if (attempt < MAX_RETRIES_PER_STEP - 1) { - log.warn(`Step ${step.stepNumber} failed (attempt ${attempt + 1}/${MAX_RETRIES_PER_STEP}): ${lastError}`); - } - } - - return { - stepNumber: step.stepNumber, - status: 'failed', - durationMs: 0, - toolCall: step.toolCall, - error: `Failed after ${MAX_RETRIES_PER_STEP} attempts: ${lastError}`, - }; - } - - /** - * Execute a single step via Commands.execute(). - * In dryRun mode, read-only commands execute normally but write commands return mock results. - */ - private async executeStep( - step: CodingStep, - task: CodingTask, - budget: ExecutionBudget, - enforcer: ToolAllowlistEnforcer, - dryRun: boolean = false, - ): Promise { - const startTime = performance.now(); - - try { - log.debug(`Step ${step.stepNumber}${dryRun ? ' [DRY]' : ''}: ${step.action} β€” ${step.description}`); - - // Inject workspace handle (userId) into params for workspace scoping - const params = { - ...step.toolParams, - userId: task.workspaceHandle ?? task.personaId, - }; - - // Gate tool call through security tier enforcer - enforcer.enforce(step.toolCall, params); - - // DryRun: mock write/edit commands, execute read-only normally - if (dryRun && this.isWriteAction(step.action)) { - budget.recordToolCall(); - const durationMs = performance.now() - startTime; - return { - stepNumber: step.stepNumber, - status: 'completed', - output: { - success: true, - dryRun: true, - wouldModify: step.targetFiles, - action: step.action, - description: step.description, - }, - durationMs, - toolCall: step.toolCall, - }; - } - - const result = await Commands.execute(step.toolCall, params); - budget.recordToolCall(); - - const durationMs = performance.now() - startTime; - const success = result?.success === true; - - if (!success) { - const error = result?.error?.message ?? result?.error ?? 'Command returned success=false'; - return { - stepNumber: step.stepNumber, - status: 'failed', - output: result, - error: typeof error === 'string' ? error : JSON.stringify(error), - durationMs, - toolCall: step.toolCall, - }; - } - - return { - stepNumber: step.stepNumber, - status: 'completed', - output: result, - durationMs, - toolCall: step.toolCall, - }; - } catch (error) { - const durationMs = performance.now() - startTime; - const message = error instanceof Error ? error.message : String(error); - return { - stepNumber: step.stepNumber, - status: 'failed', - error: message, - durationMs, - toolCall: step.toolCall, - }; - } - } - - /** - * Track file modifications and change IDs from step results. - */ - private trackChanges( - step: CodingStep, - result: StepResult, - filesModified: string[], - filesCreated: string[], - changeIds: string[], - ): void { - const output = result.output as Record | undefined; - - if (step.action === 'write' || step.action === 'edit') { - for (const file of step.targetFiles) { - if (step.action === 'write' && !filesModified.includes(file)) { - filesCreated.push(file); - } else if (!filesModified.includes(file)) { - filesModified.push(file); - } - } - - // Extract changeId from write/edit results - if (output?.changeId && typeof output.changeId === 'string') { - changeIds.push(output.changeId); - } - } - } - - /** - * Whether a coding action modifies files (write, edit, undo). - * DryRun mode mocks these actions instead of executing them. - */ - private isWriteAction(action: string): boolean { - return action === 'write' || action === 'edit' || action === 'undo'; - } - - /** - * Run TypeScript verification and return error strings. - * Empty array means verification passed. - */ - private async runVerification(task: CodingTask, budget: ExecutionBudget): Promise { - try { - const verifyResult = await Commands.execute('code/verify', { - userId: task.workspaceHandle ?? task.personaId, - typeCheck: true, - }); - budget.recordToolCall(); - - if (verifyResult?.success) { - return []; - } - - if (verifyResult?.typeCheck?.errors?.length > 0) { - return verifyResult.typeCheck.errors.map( - (e: { file: string; line: number; code: string; message: string }) => - `${e.file}:${e.line} ${e.code}: ${e.message}` - ); - } - - return ['TypeScript compilation failed (no detailed errors)']; - } catch (error) { - log.warn(`Verification error: ${error instanceof Error ? error.message : String(error)}`); - return [`Verification error: ${error instanceof Error ? error.message : String(error)}`]; - } - } - - /** - * Build the final CodingResult. - */ - private buildResult( - task: CodingTask, - status: CodingResultStatus, - summary: string, - stepResults: StepResult[], - filesModified: string[], - filesCreated: string[], - changeIds: string[], - errors: string[], - budget: ExecutionBudget, - extra?: { proposalId?: string; planMetadata?: CodingResult['planMetadata'] }, - ): CodingResult { - return { - taskId: task.id, - status, - summary, - stepResults, - filesModified, - filesCreated, - totalToolCalls: budget.toolCallsUsed, - totalDurationMs: budget.elapsedMs, - changeIds, - errors, - proposalId: extra?.proposalId, - planMetadata: extra?.planMetadata, - }; - } - - // ──────────────────────────────────────────────────────────── - // Plan Persistence (best-effort via DataDaemon) - // ──────────────────────────────────────────────────────────── - - /** - * Persist a newly formulated plan as a CodingPlanEntity. - * Returns the entity if persistence succeeded, undefined otherwise. - */ - private async persistPlan(task: CodingTask, plan: CodingPlan): Promise { - try { - const { DataDaemon } = await import('../../../daemons/data-daemon/shared/DataDaemon'); - - const entity = new CodingPlanEntity(); - entity.taskId = task.id; - entity.createdById = task.personaId; - entity.leadId = task.personaId; - entity.summary = plan.summary; - entity.taskDescription = task.description; - entity.estimatedToolCalls = plan.estimatedToolCalls; - entity.assignees = [task.personaId]; - entity.generatedBy = { - provider: plan.generatedBy.provider, - model: plan.generatedBy.model, - temperature: 0, - durationMs: 0, - }; - entity.riskLevel = plan.riskLevel; - entity.riskReason = plan.riskReason; - entity.securityTier = plan.requiredTier; - entity.status = 'executing'; - entity.executionStartedAt = Date.now(); - - // Convert plan steps to snapshots - entity.steps = plan.steps.map(step => ({ - stepNumber: step.stepNumber, - action: step.action, - description: step.description, - targetFiles: step.targetFiles, - toolCall: step.toolCall, - toolParams: step.toolParams, - dependsOn: step.dependsOn, - verification: step.verification, - status: 'pending' as const, - })); - - const stored = await DataDaemon.store(COLLECTIONS.CODING_PLANS, entity); - log.info(`Plan persisted: ${stored.id}`); - return stored; - } catch { - log.debug('Plan persistence skipped (DataDaemon not available)'); - return undefined; - } - } - - /** - * Update a step's status in the persisted plan entity. - */ - private async updatePlanStep( - planEntity: CodingPlanEntity | undefined, - stepNumber: number, - result: StepResult, - ): Promise { - if (!planEntity) return; - - try { - const { DataDaemon } = await import('../../../daemons/data-daemon/shared/DataDaemon'); - - const stepIndex = planEntity.steps.findIndex(s => s.stepNumber === stepNumber); - if (stepIndex === -1) return; - - // Update step snapshot in-place - const snapshot = planEntity.steps[stepIndex]; - snapshot.status = result.status === 'completed' ? 'completed' - : result.status === 'skipped' ? 'skipped' - : 'failed'; - snapshot.completedAt = Date.now(); - snapshot.durationMs = result.durationMs; - snapshot.output = result.output; - snapshot.error = result.error; - - await DataDaemon.update( - COLLECTIONS.CODING_PLANS, - planEntity.id as UUID, - { steps: planEntity.steps } as Partial, - ); - } catch { - // Best-effort β€” don't interrupt execution for persistence failures - } - } - - /** - * Update the plan's top-level status. - */ - private async updatePlanStatus( - planEntity: CodingPlanEntity, - status: CodingPlanStatus, - ): Promise { - try { - const { DataDaemon } = await import('../../../daemons/data-daemon/shared/DataDaemon'); - await DataDaemon.update( - COLLECTIONS.CODING_PLANS, - planEntity.id as UUID, - { status } as Partial, - ); - } catch { - // Best-effort - } - } - - /** - * Finalize the persisted plan with execution results. - */ - private async finalizePlan( - planEntity: CodingPlanEntity | undefined, - result: CodingResult, - ): Promise { - if (!planEntity) return; - - try { - const { DataDaemon } = await import('../../../daemons/data-daemon/shared/DataDaemon'); - - const statusMap: Record = { - completed: 'completed', - partial: 'partial', - failed: 'failed', - budget_exceeded: 'partial', - pending_approval: 'proposed', - }; - - await DataDaemon.update( - COLLECTIONS.CODING_PLANS, - planEntity.id as UUID, - { - status: statusMap[result.status] ?? 'failed', - executionCompletedAt: Date.now(), - filesModified: result.filesModified, - filesCreated: result.filesCreated, - changeIds: result.changeIds, - errors: result.errors, - totalToolCalls: result.totalToolCalls, - totalDurationMs: result.totalDurationMs, - } as Partial, - ); - - log.info(`Plan finalized: ${planEntity.id} β†’ ${result.status}`); - } catch { - // Best-effort - } - } -} diff --git a/src/debug/jtag/system/code/server/CodeTaskDelegator.ts b/src/debug/jtag/system/code/server/CodeTaskDelegator.ts deleted file mode 100644 index c5e440837..000000000 --- a/src/debug/jtag/system/code/server/CodeTaskDelegator.ts +++ /dev/null @@ -1,408 +0,0 @@ -/** - * CodeTaskDelegator - Decomposes plans into sub-plans for parallel multi-agent execution - * - * A lead AI creates a top-level plan, then the delegator: - * 1. Analyzes the step DAG for independent file clusters - * 2. Assigns clusters to available agents based on capabilities - * 3. Creates sub-plan entities (parentPlanId = parent) - * 4. After execution, consolidates results from sub-plans - * - * File clusters: Groups of steps that share file dependencies. - * Two steps that touch the same file MUST be in the same cluster. - * Steps in different clusters CAN execute in parallel. - */ - -import type { UUID } from '../../core/types/CrossPlatformUUID'; -import type { - AgentCapability, - DelegationResult, - CodingResult, - CodingResultStatus, -} from '../shared/CodingTypes'; -import { - CodingPlanEntity, - type CodingStepSnapshot, -} from '../../data/entities/CodingPlanEntity'; -import { Logger } from '../../core/logging/Logger'; - -const log = Logger.create('CodeTaskDelegator', 'code'); - -// ──────────────────────────────────────────────────────────── -// File cluster β€” a group of steps that share file dependencies -// ──────────────────────────────────────────────────────────── - -export interface FileCluster { - /** Unique cluster index */ - readonly index: number; - - /** Step numbers in this cluster (topologically ordered) */ - readonly stepNumbers: number[]; - - /** All files touched by steps in this cluster */ - readonly files: string[]; - - /** Step numbers from other clusters that this cluster depends on */ - readonly externalDeps: number[]; -} - -// ──────────────────────────────────────────────────────────── -// Agent assignment β€” which agent gets which cluster -// ──────────────────────────────────────────────────────────── - -export interface AgentAssignment { - readonly agentId: UUID; - readonly agentName: string; - readonly clusters: FileCluster[]; - readonly totalSteps: number; - readonly files: string[]; -} - -// ──────────────────────────────────────────────────────────── -// Implementation -// ──────────────────────────────────────────────────────────── - -export class CodeTaskDelegator { - - /** - * Decompose a plan's step DAG into independent file clusters. - * - * Algorithm (union-find on files): - * 1. Each step has a set of target files - * 2. Steps that share ANY file belong to the same cluster - * 3. Steps connected via dependsOn also belong to the same cluster - * 4. Result: disjoint clusters that can execute in parallel - */ - decompose(plan: CodingPlanEntity): FileCluster[] { - if (plan.steps.length === 0) return []; - - // Union-Find on step indices - const parent = new Map(); - const rank = new Map(); - - const find = (x: number): number => { - if (!parent.has(x)) { parent.set(x, x); rank.set(x, 0); } - if (parent.get(x) !== x) parent.set(x, find(parent.get(x)!)); - return parent.get(x)!; - }; - - const union = (a: number, b: number): void => { - const ra = find(a), rb = find(b); - if (ra === rb) return; - const rkA = rank.get(ra) ?? 0, rkB = rank.get(rb) ?? 0; - if (rkA < rkB) { parent.set(ra, rb); } - else if (rkA > rkB) { parent.set(rb, ra); } - else { parent.set(rb, ra); rank.set(ra, rkA + 1); } - }; - - // Initialize all steps - for (const step of plan.steps) { - find(step.stepNumber); - } - - // Union steps that share files - const fileToStep = new Map(); - for (const step of plan.steps) { - for (const file of step.targetFiles) { - const existing = fileToStep.get(file); - if (existing !== undefined) { - union(existing, step.stepNumber); - } else { - fileToStep.set(file, step.stepNumber); - } - } - } - - // Union steps connected by dependencies - for (const step of plan.steps) { - for (const dep of step.dependsOn) { - union(step.stepNumber, dep); - } - } - - // Group steps by root - const clusterMap = new Map(); - for (const step of plan.steps) { - const root = find(step.stepNumber); - const group = clusterMap.get(root) ?? []; - group.push(step.stepNumber); - clusterMap.set(root, group); - } - - // Build FileCluster objects - const stepByNumber = new Map(); - for (const step of plan.steps) { - stepByNumber.set(step.stepNumber, step); - } - - const clusters: FileCluster[] = []; - let clusterIndex = 0; - - for (const [, stepNumbers] of clusterMap) { - // Collect all files in this cluster - const files = new Set(); - for (const sn of stepNumbers) { - const step = stepByNumber.get(sn)!; - for (const f of step.targetFiles) files.add(f); - } - - // Identify external dependencies (deps outside this cluster) - const stepSet = new Set(stepNumbers); - const externalDeps: number[] = []; - for (const sn of stepNumbers) { - const step = stepByNumber.get(sn)!; - for (const dep of step.dependsOn) { - if (!stepSet.has(dep) && !externalDeps.includes(dep)) { - externalDeps.push(dep); - } - } - } - - // Sort steps topologically within cluster - stepNumbers.sort((a, b) => a - b); - - clusters.push({ - index: clusterIndex++, - stepNumbers, - files: Array.from(files).sort(), - externalDeps, - }); - } - - log.info(`Decomposed ${plan.steps.length} steps into ${clusters.length} clusters`); - return clusters; - } - - /** - * Assign file clusters to available agents. - * - * Strategy: - * - Sort agents by load (least loaded first) - * - Sort clusters by size (largest first β€” greedy bin packing) - * - Assign each cluster to the least-loaded agent that has capacity - * - Respect agent security tier (cluster needs write β†’ agent needs write+) - */ - assign( - clusters: FileCluster[], - agents: AgentCapability[], - plan: CodingPlanEntity, - ): AgentAssignment[] { - if (clusters.length === 0 || agents.length === 0) return []; - - // Sort agents by load ascending (least loaded first) - const sortedAgents = [...agents].sort((a, b) => a.currentLoad - b.currentLoad); - - // Sort clusters by step count descending (largest first) - const sortedClusters = [...clusters].sort((a, b) => b.stepNumbers.length - a.stepNumbers.length); - - // Track assignments - const assignments = new Map(); - - for (const cluster of sortedClusters) { - // Find the least-loaded agent that hasn't been given too many clusters - let assigned = false; - for (const agent of sortedAgents) { - const existing = assignments.get(agent.personaId); - const currentClusterCount = existing?.clusters.length ?? 0; - - // Simple load balancing: distribute evenly - const maxClustersPerAgent = Math.ceil(sortedClusters.length / sortedAgents.length); - if (currentClusterCount >= maxClustersPerAgent) continue; - - if (!existing) { - assignments.set(agent.personaId, { agent, clusters: [cluster] }); - } else { - existing.clusters.push(cluster); - } - assigned = true; - break; - } - - // If no agent available, assign to least loaded - if (!assigned && sortedAgents.length > 0) { - const fallback = sortedAgents[0]; - const existing = assignments.get(fallback.personaId); - if (!existing) { - assignments.set(fallback.personaId, { agent: fallback, clusters: [cluster] }); - } else { - existing.clusters.push(cluster); - } - } - } - - // Build AgentAssignment objects - const result: AgentAssignment[] = []; - for (const [, { agent, clusters: agentClusters }] of assignments) { - const allSteps: number[] = []; - const allFiles = new Set(); - for (const cluster of agentClusters) { - allSteps.push(...cluster.stepNumbers); - for (const f of cluster.files) allFiles.add(f); - } - - result.push({ - agentId: agent.personaId, - agentName: agent.name, - clusters: agentClusters, - totalSteps: allSteps.length, - files: Array.from(allFiles).sort(), - }); - } - - log.info(`Assigned ${clusters.length} clusters to ${result.length} agents`); - return result; - } - - /** - * Create sub-plan entities from agent assignments. - * Each sub-plan contains only the steps assigned to that agent. - */ - createSubPlans( - parentPlan: CodingPlanEntity, - assignments: AgentAssignment[], - ): CodingPlanEntity[] { - const stepByNumber = new Map(); - for (const step of parentPlan.steps) { - stepByNumber.set(step.stepNumber, step); - } - - const subPlans: CodingPlanEntity[] = []; - - for (const assignment of assignments) { - const subPlan = new CodingPlanEntity(); - subPlan.taskId = parentPlan.taskId; - subPlan.parentPlanId = parentPlan.id as UUID; - subPlan.createdById = parentPlan.leadId; - subPlan.leadId = assignment.agentId; - subPlan.summary = `Sub-plan for ${assignment.agentName}: ${assignment.files.slice(0, 3).join(', ')}${assignment.files.length > 3 ? '...' : ''}`; - subPlan.taskDescription = parentPlan.taskDescription; - subPlan.estimatedToolCalls = assignment.totalSteps; - subPlan.assignees = [assignment.agentId]; - subPlan.generatedBy = parentPlan.generatedBy; - subPlan.riskLevel = parentPlan.riskLevel; - subPlan.riskReason = parentPlan.riskReason; - subPlan.securityTier = parentPlan.securityTier; - subPlan.status = 'approved'; // Sub-plans inherit parent approval - - // Copy only the assigned steps, renumber sequentially - const assignedStepNumbers = new Set(); - for (const cluster of assignment.clusters) { - for (const sn of cluster.stepNumbers) { - assignedStepNumbers.add(sn); - } - } - - subPlan.steps = Array.from(assignedStepNumbers) - .sort((a, b) => a - b) - .map(sn => { - const original = stepByNumber.get(sn)!; - return { - ...original, - // Filter dependsOn to only include steps within this sub-plan - dependsOn: original.dependsOn.filter(d => assignedStepNumbers.has(d)), - }; - }); - - subPlans.push(subPlan); - } - - log.info(`Created ${subPlans.length} sub-plans from parent ${parentPlan.id}`); - return subPlans; - } - - /** - * Consolidate results from sub-plans into the parent plan's CodingResult. - */ - consolidate( - parentPlan: CodingPlanEntity, - subPlans: CodingPlanEntity[], - ): CodingResult { - const filesModified = new Set(); - const filesCreated = new Set(); - const changeIds: string[] = []; - const errors: string[] = []; - let totalToolCalls = 0; - let totalDurationMs = 0; - - for (const sub of subPlans) { - for (const f of sub.filesModified) filesModified.add(f); - for (const f of sub.filesCreated) filesCreated.add(f); - changeIds.push(...sub.changeIds); - errors.push(...sub.errors); - totalToolCalls += sub.totalToolCalls; - totalDurationMs = Math.max(totalDurationMs, sub.totalDurationMs); // Parallel = max, not sum - } - - // Detect conflicts: same file modified by multiple sub-plans - const fileToSubPlan = new Map(); - for (const sub of subPlans) { - for (const f of sub.filesModified) { - const existing = fileToSubPlan.get(f) ?? []; - existing.push(sub.id as UUID); - fileToSubPlan.set(f, existing); - } - } - const conflicts = Array.from(fileToSubPlan.entries()) - .filter(([, ids]) => ids.length > 1) - .map(([file]) => file); - - if (conflicts.length > 0) { - errors.push(`File conflicts detected: ${conflicts.join(', ')}`); - } - - // Determine overall status - if (subPlans.length === 0) { - return { - taskId: parentPlan.taskId, - status: 'failed', - summary: 'No sub-plans to consolidate', - stepResults: [], - filesModified: [], - filesCreated: [], - totalToolCalls: 0, - totalDurationMs: 0, - changeIds: [], - errors: ['No sub-plans were executed'], - }; - } - - const allCompleted = subPlans.every(s => s.status === 'completed'); - const anyCompleted = subPlans.some(s => s.status === 'completed'); - const status: CodingResultStatus = allCompleted - ? 'completed' - : anyCompleted - ? 'partial' - : 'failed'; - - // Build step results from all sub-plans - const stepResults = subPlans.flatMap(sub => - sub.steps.map(step => ({ - stepNumber: step.stepNumber, - status: step.status === 'completed' ? 'completed' as const - : step.status === 'skipped' ? 'skipped' as const - : step.status === 'failed' ? 'failed' as const - : 'pending' as const, - output: step.output, - error: step.error, - durationMs: step.durationMs ?? 0, - toolCall: step.toolCall, - })), - ); - - const summary = allCompleted - ? `All ${subPlans.length} sub-plans completed` - : `${subPlans.filter(s => s.status === 'completed').length}/${subPlans.length} sub-plans completed`; - - return { - taskId: parentPlan.taskId, - status, - summary, - stepResults, - filesModified: Array.from(filesModified), - filesCreated: Array.from(filesCreated), - totalToolCalls, - totalDurationMs, - changeIds, - errors, - }; - } -} diff --git a/src/debug/jtag/system/code/server/CodingChallengeRunner.ts b/src/debug/jtag/system/code/server/CodingChallengeRunner.ts deleted file mode 100644 index 4bca5b76b..000000000 --- a/src/debug/jtag/system/code/server/CodingChallengeRunner.ts +++ /dev/null @@ -1,239 +0,0 @@ -/** - * CodingChallengeRunner - Execute coding challenges and capture results - * - * Runs a coding challenge against the code/task pipeline: - * 1. Set up workspace with challenge files - * 2. Execute code/task with the challenge description - * 3. Collect result files from workspace - * 4. Pass to CodingJudge for evaluation - * 5. Record attempt on entity - * - * Each challenge gets a fresh workspace to prevent state leakage. - */ - -import { Logger } from '../../core/logging/Logger'; -import { CodeDaemon } from '../../../daemons/code-daemon/shared/CodeDaemon'; -import { CodeAgentOrchestrator } from './CodeAgentOrchestrator'; -import { CodingJudge } from './CodingJudge'; -import type { CodingTask, ExecutionOptions } from '../shared/CodingTypes'; -import type { CodingChallengeEntity, ChallengeAttempt, AttemptStatus } from '../../data/entities/CodingChallengeEntity'; -import type { UUID } from '../../core/types/CrossPlatformUUID'; -import { v4 as uuidv4 } from 'uuid'; -import * as fs from 'fs'; -import * as path from 'path'; - -const log = Logger.create('CodingChallengeRunner', 'code'); - -export interface ChallengeRunOptions { - /** Which AI persona runs the challenge */ - personaId: UUID; - /** Skip AI judge evaluation (just check execution success) */ - skipJudge?: boolean; - /** Override security tier (default: write) */ - securityTier?: string; -} - -export interface ChallengeRunResult { - success: boolean; - attempt: ChallengeAttempt; - /** Raw code/task result */ - taskResult?: Record; -} - -export class CodingChallengeRunner { - private readonly orchestrator: CodeAgentOrchestrator; - private readonly judge: CodingJudge; - - constructor() { - this.orchestrator = new CodeAgentOrchestrator(); - this.judge = new CodingJudge(); - } - - /** - * Execute a coding challenge for a persona. - * - * Creates a fresh workspace, seeds it with challenge files, - * runs the coding pipeline, evaluates results, and records the attempt. - */ - async run(challenge: CodingChallengeEntity, options: ChallengeRunOptions): Promise { - const { personaId } = options; - const startedAt = Date.now(); - - log.info(`Running challenge "${challenge.name}" (${challenge.difficulty}) for persona ${personaId}`); - - try { - // Phase 1: Set up challenge workspace with unique handle - const workspaceHandle = `challenge-${(challenge.id ?? challenge.sequenceNumber)}-${personaId}`; - const workspaceDir = await this.setupChallengeWorkspace(challenge, personaId, workspaceHandle); - - // Phase 2: Execute the coding task - const task: CodingTask = { - id: uuidv4() as UUID, - personaId, - description: challenge.description, - taskType: 'generation', - maxDurationMs: challenge.timeLimitMs, - maxToolCalls: challenge.toolCallLimit, - workspaceHandle, - relevantFiles: Object.keys(challenge.setupFiles), - createdAt: Date.now(), - }; - - const execOptions: ExecutionOptions = { - dryRun: false, - securityTier: (options.securityTier as any) ?? 'write', - }; - - const result = await this.orchestrator.execute(task, execOptions); - - // Phase 3: Collect result files from workspace - const resultFiles = await this.collectResultFiles(workspaceDir, challenge); - - // Phase 4: Judge evaluation - const completedAt = Date.now(); - let score = 0; - let feedback = ''; - let status: AttemptStatus; - - if (result.status === 'completed' || result.status === 'partial') { - if (options.skipJudge) { - score = result.status === 'completed' ? 70 : 40; - feedback = `Pipeline ${result.status}. ${result.stepResults.filter(s => s.status === 'completed').length}/${result.stepResults.length} steps completed.`; - status = result.status === 'completed' ? 'passed' : 'partial'; - } else { - const evaluation = await this.judge.evaluate(challenge, resultFiles, result); - score = evaluation.score; - feedback = evaluation.feedback; - status = evaluation.passed ? 'passed' : evaluation.score >= 40 ? 'partial' : 'failed'; - } - } else if (result.status === 'budget_exceeded') { - status = 'timeout'; - feedback = `Budget exceeded: ${result.errors.join('; ')}`; - } else { - status = 'failed'; - feedback = `Execution failed: ${result.errors.join('; ')}`; - } - - const attempt: ChallengeAttempt = { - personaId, - planId: task.id, - startedAt, - completedAt, - status, - score, - feedback, - filesModified: result.filesModified, - filesCreated: result.filesCreated, - errors: result.errors, - toolCallsUsed: result.totalToolCalls, - durationMs: result.totalDurationMs, - resultFiles, - }; - - // Phase 5: Record attempt on entity - challenge.recordAttempt(attempt); - - log.info(`Challenge "${challenge.name}" ${status}: score=${score}, duration=${result.totalDurationMs}ms`); - - return { - success: status === 'passed', - attempt, - taskResult: result as unknown as Record, - }; - - } catch (error) { - const completedAt = Date.now(); - const message = error instanceof Error ? error.message : String(error); - log.error(`Challenge "${challenge.name}" error: ${message}`); - - const attempt: ChallengeAttempt = { - personaId, - startedAt, - completedAt, - status: 'error', - score: 0, - feedback: `Runner error: ${message}`, - filesModified: [], - filesCreated: [], - errors: [message], - toolCallsUsed: 0, - durationMs: completedAt - startedAt, - }; - - challenge.recordAttempt(attempt); - - return { success: false, attempt }; - } - } - - /** - * Set up a fresh workspace with challenge files. - * Creates the workspace directory and writes all setup files. - */ - private async setupChallengeWorkspace( - challenge: CodingChallengeEntity, - personaId: UUID, - workspaceHandle: string, - ): Promise { - const jtagRoot = process.cwd(); - const challengeWorkspace = path.join( - jtagRoot, '.continuum', 'personas', personaId as string, - 'challenges', challenge.id as string, - ); - - // Create fresh workspace - if (fs.existsSync(challengeWorkspace)) { - fs.rmSync(challengeWorkspace, { recursive: true }); - } - fs.mkdirSync(challengeWorkspace, { recursive: true }); - - // Write setup files - for (const [filePath, content] of Object.entries(challenge.setupFiles)) { - const fullPath = path.join(challengeWorkspace, filePath); - const dir = path.dirname(fullPath); - if (!fs.existsSync(dir)) { - fs.mkdirSync(dir, { recursive: true }); - } - fs.writeFileSync(fullPath, content, 'utf-8'); - } - - // Register workspace in Rust backend using unique handle (writable, no read roots) - await CodeDaemon.createWorkspace(workspaceHandle, challengeWorkspace); - - log.debug(`Challenge workspace set up at ${challengeWorkspace} with ${Object.keys(challenge.setupFiles).length} files`); - - return challengeWorkspace; - } - - /** - * Collect result files from workspace after execution. - * Reads all files that were part of the challenge setup, plus any new files. - */ - private async collectResultFiles( - workspaceDir: string, - challenge: CodingChallengeEntity, - ): Promise> { - const resultFiles: Record = {}; - - const collectDir = (dir: string, prefix: string = ''): void => { - if (!fs.existsSync(dir)) return; - const entries = fs.readdirSync(dir, { withFileTypes: true }); - for (const entry of entries) { - const relativePath = prefix ? `${prefix}/${entry.name}` : entry.name; - const fullPath = path.join(dir, entry.name); - if (entry.isDirectory()) { - collectDir(fullPath, relativePath); - } else if (entry.isFile()) { - try { - resultFiles[relativePath] = fs.readFileSync(fullPath, 'utf-8'); - } catch { - // Skip unreadable files - } - } - } - }; - - collectDir(workspaceDir); - return resultFiles; - } -} diff --git a/src/debug/jtag/system/code/server/CodingJudge.ts b/src/debug/jtag/system/code/server/CodingJudge.ts deleted file mode 100644 index e78549ff2..000000000 --- a/src/debug/jtag/system/code/server/CodingJudge.ts +++ /dev/null @@ -1,288 +0,0 @@ -/** - * CodingJudge - AI evaluation of coding challenge attempts - * - * Uses a reasoning-class model to evaluate challenge solutions against rubric criteria. - * Returns a score (0-100) and detailed feedback. - * - * Evaluation considers: - * - Correctness: Does the code do what was asked? - * - Completeness: Were all requirements met? - * - Code quality: Is the code clean and idiomatic? - * - Efficiency: Were resources (tool calls, time) used well? - */ - -import { Logger } from '../../core/logging/Logger'; -import { AIProviderDaemon } from '../../../daemons/ai-provider-daemon/shared/AIProviderDaemon'; -import type { CodingChallengeEntity } from '../../data/entities/CodingChallengeEntity'; -import type { CodingResult } from '../shared/CodingTypes'; - -const log = Logger.create('CodingJudge', 'code'); - -export interface JudgeEvaluation { - /** Score from 0 to 100 */ - score: number; - /** Whether the challenge is considered passed (score >= 70) */ - passed: boolean; - /** Detailed feedback */ - feedback: string; - /** Per-criterion scores */ - criteriaScores: Array<{ criterion: string; score: number; comment: string }>; - /** Strengths identified */ - strengths: string[]; - /** Weaknesses identified */ - weaknesses: string[]; -} - -/** Minimum score to pass a challenge */ -const PASS_THRESHOLD = 70; - -export class CodingJudge { - - /** - * Evaluate a coding challenge attempt. - * - * Sends the challenge spec, result files, and execution metrics to a - * reasoning model that scores the attempt against the rubric. - */ - async evaluate( - challenge: CodingChallengeEntity, - resultFiles: Record, - executionResult: CodingResult, - ): Promise { - log.info(`Judging challenge "${challenge.name}" β€” ${Object.keys(resultFiles).length} result files`); - - const prompt = this.buildJudgePrompt(challenge, resultFiles, executionResult); - - try { - const response = await AIProviderDaemon.generateText({ - messages: [{ role: 'user', content: prompt }], - systemPrompt: JUDGE_SYSTEM_PROMPT, - preferredProvider: 'anthropic', - model: 'claude-sonnet-4-5-20250514', - temperature: 0.2, - maxTokens: 2000, - }); - - return this.parseJudgeResponse(response.text, challenge); - - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - log.error(`Judge evaluation failed: ${message}`); - - // Fallback: simple heuristic scoring when LLM unavailable - return this.heuristicScore(challenge, resultFiles, executionResult); - } - } - - /** - * Build the evaluation prompt for the judge model. - */ - private buildJudgePrompt( - challenge: CodingChallengeEntity, - resultFiles: Record, - executionResult: CodingResult, - ): string { - const setupFilesStr = Object.entries(challenge.setupFiles) - .map(([path, content]) => `### ${path} (BEFORE)\n\`\`\`\n${content}\n\`\`\``) - .join('\n\n'); - - const resultFilesStr = Object.entries(resultFiles) - .map(([path, content]) => `### ${path} (AFTER)\n\`\`\`\n${content}\n\`\`\``) - .join('\n\n'); - - const expectedFilesStr = challenge.expectedFiles - ? Object.entries(challenge.expectedFiles) - .map(([path, content]) => `### ${path} (EXPECTED)\n\`\`\`\n${content}\n\`\`\``) - .join('\n\n') - : 'No expected files provided β€” evaluate based on description and criteria.'; - - const criteriaList = challenge.evaluationCriteria - .map((c, i) => `${i + 1}. ${c}`) - .join('\n'); - - return `## Challenge: ${challenge.name} -**Difficulty**: ${challenge.difficulty} -**Category**: ${challenge.category} - -## Task Description -${challenge.description} - -## Expected Outcome -${challenge.expectedOutcome} - -## Evaluation Criteria -${criteriaList} - -## Setup Files (Initial State) -${setupFilesStr} - -## Result Files (After Execution) -${resultFilesStr} - -## Expected Files (Reference Solution) -${expectedFilesStr} - -## Execution Metrics -- Status: ${executionResult.status} -- Steps completed: ${executionResult.stepResults.filter(s => s.status === 'completed').length}/${executionResult.stepResults.length} -- Tool calls used: ${executionResult.totalToolCalls} -- Duration: ${executionResult.totalDurationMs}ms -- Files modified: ${executionResult.filesModified.join(', ') || 'none'} -- Files created: ${executionResult.filesCreated.join(', ') || 'none'} -- Errors: ${executionResult.errors.join('; ') || 'none'} - -## Instructions -Evaluate this coding challenge attempt. Score each criterion from 0-100, then provide an overall score. Respond with valid JSON matching this schema: - -\`\`\`json -{ - "score": , - "feedback": "", - "criteriaScores": [ - { "criterion": "", "score": <0-100>, "comment": "" } - ], - "strengths": ["", ...], - "weaknesses": ["", ...] -} -\`\`\``; - } - - /** - * Parse the LLM judge response into a JudgeEvaluation. - */ - private parseJudgeResponse(text: string, challenge: CodingChallengeEntity): JudgeEvaluation { - try { - // Extract JSON from response (may be wrapped in markdown code block) - const jsonMatch = text.match(/\{[\s\S]*\}/); - if (!jsonMatch) { - throw new Error('No JSON found in judge response'); - } - - const parsed = JSON.parse(jsonMatch[0]); - const score = Math.max(0, Math.min(100, Math.round(parsed.score ?? 0))); - - return { - score, - passed: score >= PASS_THRESHOLD, - feedback: parsed.feedback ?? 'No feedback provided', - criteriaScores: Array.isArray(parsed.criteriaScores) ? parsed.criteriaScores : [], - strengths: Array.isArray(parsed.strengths) ? parsed.strengths : [], - weaknesses: Array.isArray(parsed.weaknesses) ? parsed.weaknesses : [], - }; - } catch (error) { - log.warn(`Failed to parse judge response: ${error instanceof Error ? error.message : String(error)}`); - return { - score: 0, - passed: false, - feedback: `Judge response parsing failed: ${text.slice(0, 200)}`, - criteriaScores: [], - strengths: [], - weaknesses: [], - }; - } - } - - /** - * Simple heuristic scoring when LLM judge is unavailable. - * Based on execution success, file presence, and basic content checks. - */ - private heuristicScore( - challenge: CodingChallengeEntity, - resultFiles: Record, - executionResult: CodingResult, - ): JudgeEvaluation { - let score = 0; - const strengths: string[] = []; - const weaknesses: string[] = []; - - // Base score from execution status - if (executionResult.status === 'completed') { - score += 30; - strengths.push('All plan steps completed'); - } else if (executionResult.status === 'partial') { - score += 15; - weaknesses.push('Only partial execution completed'); - } else { - weaknesses.push(`Execution ${executionResult.status}`); - } - - // File presence check (30 points) - if (challenge.expectedFiles) { - const expectedPaths = Object.keys(challenge.expectedFiles); - const foundPaths = expectedPaths.filter(p => resultFiles[p] !== undefined); - const fileScore = expectedPaths.length > 0 - ? Math.round((foundPaths.length / expectedPaths.length) * 30) - : 0; - score += fileScore; - if (foundPaths.length === expectedPaths.length) { - strengths.push('All expected files present'); - } else { - weaknesses.push(`Missing ${expectedPaths.length - foundPaths.length} expected files`); - } - } else { - // No expected files β€” award points if any files were created/modified - if (executionResult.filesCreated.length > 0 || executionResult.filesModified.length > 0) { - score += 20; - strengths.push('Files were created/modified'); - } - } - - // Content match check (30 points) - if (challenge.expectedFiles) { - let contentMatches = 0; - let totalChecks = 0; - for (const [filePath, expectedContent] of Object.entries(challenge.expectedFiles)) { - if (resultFiles[filePath]) { - totalChecks++; - const actual = resultFiles[filePath].trim(); - const expected = expectedContent.trim(); - if (actual === expected) { - contentMatches++; - } else if (actual.includes(expected.split('\n')[0])) { - contentMatches += 0.5; - } - } - } - if (totalChecks > 0) { - score += Math.round((contentMatches / totalChecks) * 30); - } - } - - // Efficiency bonus (10 points) - const toolEfficiency = challenge.toolCallLimit > 0 - ? 1 - (executionResult.totalToolCalls / challenge.toolCallLimit) - : 0; - if (toolEfficiency > 0.5) { - score += 10; - strengths.push('Efficient tool call usage'); - } else if (toolEfficiency > 0.2) { - score += 5; - } - - score = Math.min(100, Math.max(0, score)); - - return { - score, - passed: score >= PASS_THRESHOLD, - feedback: `Heuristic evaluation (LLM judge unavailable): score=${score}`, - criteriaScores: challenge.evaluationCriteria.map(c => ({ - criterion: c, - score: score, - comment: 'Heuristic scoring β€” LLM judge unavailable', - })), - strengths, - weaknesses, - }; - } -} - -const JUDGE_SYSTEM_PROMPT = `You are a coding challenge evaluator. You assess AI-generated code solutions against specific criteria. - -Be strict but fair: -- Score 90-100: Excellent β€” meets all criteria, clean code, efficient -- Score 70-89: Good β€” meets most criteria, minor issues -- Score 50-69: Partial β€” some criteria met, significant gaps -- Score 30-49: Poor β€” major issues, few criteria met -- Score 0-29: Failed β€” solution doesn't address the task - -Always respond with valid JSON matching the requested schema. Be specific in feedback.`; diff --git a/src/debug/jtag/system/code/server/PlanFormulator.ts b/src/debug/jtag/system/code/server/PlanFormulator.ts deleted file mode 100644 index a99b6d590..000000000 --- a/src/debug/jtag/system/code/server/PlanFormulator.ts +++ /dev/null @@ -1,357 +0,0 @@ -/** - * PlanFormulator - LLM-powered task decomposition for coding tasks - * - * Takes a CodingTask + codebase context and produces a CodingPlan (DAG of steps). - * Uses a reasoning-class model (via CodingModelSelector) to decompose the task - * into concrete code/* command invocations. - * - * The LLM receives: - * - Task description - * - Available code/* tools with parameter schemas - * - Codebase context (tree, relevant file contents) - * - Constraints (max tool calls, max duration) - * - * The LLM returns a JSON CodingPlan that the CodeAgentOrchestrator executes. - */ - -import type { CodingTask, CodingPlan, CodingStep, CodingAction, RiskLevel, SecurityTierLevel } from '../shared/CodingTypes'; -import { CodingModelSelector } from './CodingModelSelector'; -import { AIProviderDaemon } from '../../../daemons/ai-provider-daemon/shared/AIProviderDaemon'; -import type { TextGenerationRequest, ChatMessage } from '../../../daemons/ai-provider-daemon/shared/AIProviderTypesV2'; -import { Logger } from '../../core/logging/Logger'; -import { riskToTier } from './SecurityTier'; - -const log = Logger.create('PlanFormulator', 'code'); - -/** - * Available code/* tools for the LLM to plan with. - * Each entry describes what the tool does and its parameters. - */ -const CODE_TOOL_SCHEMAS: readonly { name: string; description: string; params: string }[] = [ - { - name: 'code/tree', - description: 'List directory tree structure. Shows files and directories with sizes.', - params: 'path?: string, maxDepth?: number, includeHidden?: boolean', - }, - { - name: 'code/search', - description: 'Search for a regex pattern across workspace files.', - params: 'pattern: string, fileGlob?: string, maxResults?: number', - }, - { - name: 'code/read', - description: 'Read file contents. Can specify line range.', - params: 'filePath: string, startLine?: number, endLine?: number', - }, - { - name: 'code/write', - description: 'Create or overwrite a file. Records a ChangeNode for undo.', - params: 'filePath: string, content: string, description?: string', - }, - { - name: 'code/edit', - description: 'Edit a file. Flat params β€” choose ONE editType. search_replace: { editType: "search_replace", search, replace, replaceAll? }. line_range: { editType: "line_range", startLine, endLine, newContent }. insert_at: { editType: "insert_at", line, content }. append: { editType: "append", content }.', - params: 'filePath: string, editType: "search_replace"|"line_range"|"insert_at"|"append", search?: string, replace?: string, replaceAll?: boolean, startLine?: number, endLine?: number, newContent?: string, line?: number, content?: string, description?: string', - }, - { - name: 'code/diff', - description: 'Preview an edit as unified diff without applying it. Same params as code/edit.', - params: 'filePath: string, editType: "search_replace"|"line_range"|"insert_at"|"append", (same params as code/edit)', - }, - { - name: 'code/undo', - description: 'Undo a specific change or the last N changes.', - params: 'changeId?: string, count?: number', - }, - { - name: 'code/history', - description: 'View change history for a file or workspace.', - params: 'filePath?: string, limit?: number', - }, - { - name: 'code/verify', - description: 'Run TypeScript compilation check and optionally run tests. Use after editing files to verify changes compile correctly.', - params: 'typeCheck?: boolean, testFiles?: string[]', - }, - { - name: 'code/git', - description: 'Workspace-scoped git operations. Use after verifying changes to stage and commit them. Operations: status, diff, log, add, commit.', - params: 'operation: "status"|"diff"|"log"|"add"|"commit", paths?: string[], message?: string, staged?: boolean, count?: number', - }, -] as const; - -/** Valid actions the LLM can use in plan steps */ -const VALID_ACTIONS: ReadonlySet = new Set([ - 'discover', 'search', 'read', 'write', 'edit', 'diff', 'undo', 'verify', 'commit', 'report', -]); - -/** Map from action to the expected code/* command */ -const ACTION_TO_COMMAND: Record = { - discover: 'code/tree', - search: 'code/search', - read: 'code/read', - write: 'code/write', - edit: 'code/edit', - diff: 'code/diff', - undo: 'code/undo', - verify: 'code/verify', - commit: 'code/git', - report: 'code/history', -}; - -export class PlanFormulator { - private readonly modelSelector: CodingModelSelector; - - constructor(modelSelector: CodingModelSelector) { - this.modelSelector = modelSelector; - } - - /** - * Generate a CodingPlan for a task. - * - * @param task - The coding task to plan - * @param codebaseContext - Optional pre-fetched context (tree output, file contents) - * @returns A validated CodingPlan ready for execution - */ - async formulate(task: CodingTask, codebaseContext?: string): Promise { - const startTime = performance.now(); - log.info(`Formulating plan for task: ${task.description.slice(0, 80)}...`); - - const tier = this.modelSelector.select('planning'); - const messages = this.buildPlanningPrompt(task, codebaseContext); - - const request: TextGenerationRequest = { - messages, - model: tier.model, - temperature: tier.temperature, - maxTokens: tier.maxTokens, - preferredProvider: tier.provider, - purpose: 'coding-plan', - userId: task.personaId, - }; - - const response = await AIProviderDaemon.generateText(request); - - if (!response.text) { - throw new Error('PlanFormulator: LLM returned empty response'); - } - - const plan = this.parsePlanResponse(response.text, task, tier.provider, tier.model); - const durationMs = performance.now() - startTime; - - log.info(`Plan generated: ${plan.steps.length} steps, ${plan.estimatedToolCalls} tool calls (${Math.round(durationMs)}ms)`); - return plan; - } - - /** - * Build the prompt messages for plan generation. - */ - private buildPlanningPrompt(task: CodingTask, codebaseContext?: string): ChatMessage[] { - const toolDocs = CODE_TOOL_SCHEMAS - .map(t => `- **${t.name}**: ${t.description}\n Params: ${t.params}`) - .join('\n'); - - const maxToolCalls = task.maxToolCalls ?? 15; - const maxDurationSec = Math.round((task.maxDurationMs ?? 120000) / 1000); - - const systemPrompt = `You are a coding agent planner. Your job is to decompose a coding task into a concrete plan of steps. - -## Available Tools -${toolDocs} - -## Constraints -- Maximum ${maxToolCalls} tool calls total -- Maximum ${maxDurationSec} seconds execution time -- Always read files before editing them -- Always verify changes after editing β€” use code/verify for compilation checks, or code/read to verify content -- Prefer code/edit over code/write for existing files -- Use code/tree and code/search for discovery before making changes - -## Output Format -Respond with ONLY a JSON object (no markdown, no explanation): -{ - "summary": "Brief description of the approach", - "riskLevel": "low|medium|high|critical", - "riskReason": "Why this risk level was assigned", - "steps": [ - { - "stepNumber": 1, - "action": "discover|search|read|write|edit|diff|undo|verify|commit|report", - "description": "What this step does", - "targetFiles": ["path/to/file.ts"], - "toolCall": "code/tree", - "toolParams": { "path": "src/" }, - "dependsOn": [], - "verification": "How to verify success" - } - ] -} - -## Tool Param Examples -- code/edit append: { "filePath": "main.ts", "editType": "append", "content": "\\nexport function foo() {}" } -- code/edit search_replace: { "filePath": "main.ts", "editType": "search_replace", "search": "old text", "replace": "new text" } -- code/edit line_range: { "filePath": "main.ts", "editType": "line_range", "startLine": 5, "endLine": 10, "newContent": "replacement lines" } -- code/write: { "filePath": "new-file.ts", "content": "export const x = 1;" } -- code/read: { "filePath": "main.ts" } -- code/verify: { "typeCheck": true } -- code/verify with tests: { "typeCheck": true, "testFiles": ["tests/utils.test.ts"] } -- code/git status: { "operation": "status" } -- code/git add: { "operation": "add", "paths": ["."] } -- code/git commit: { "operation": "commit", "message": "Add feature X" } - -## CRITICAL: search_replace Rules -- The "search" string must be the EXACT, COMPLETE text from the file β€” never truncated, never abbreviated -- NEVER use "..." or ellipsis in search strings. The search is a literal text match -- For replacing large blocks of code (functions, classes), prefer code/write to rewrite the ENTIRE file - with the desired content, rather than trying to search_replace multi-line blocks -- For small, precise changes (renaming, adding an import line), search_replace works well -- When removing code and adding an import, use code/write to output the complete new file content - -## Risk Assessment Guidelines -- **low**: Read-only tasks, documentation, test-only changes, single-file edits -- **medium**: Multi-file edits, adding new functions, standard refactoring -- **high**: API/interface changes, security-sensitive code, cross-module refactoring -- **critical**: System configuration, build scripts, deployment, anything requiring shell execution - -## Architecture Awareness -If architecture documentation is provided in the codebase context, follow its conventions strictly: -- Use the project's established patterns (Commands.execute, Events, path aliases, etc.) -- Respect module structure (shared/browser/server separation) -- Follow the compression principle (one logical decision, one place β€” no duplication) -- Use strict typing β€” never use \`any\` or \`unknown\`, import correct types -- Follow naming conventions visible in existing code -- When creating new files, match the structure of similar existing files - -## Rules -1. Steps are numbered starting from 1 -2. dependsOn lists step numbers that must complete first (DAG) -3. Independent steps CAN have the same dependsOn (parallel execution) -4. Every write/edit MUST have a preceding read of the same file -5. action must be one of: discover, search, read, write, edit, diff, undo, verify, commit, report -6. toolCall must match a code/* command from the tools list -7. toolParams must match the command's parameter schema -8. Keep plans minimal β€” don't add unnecessary steps -9. For multi-file refactoring: use code/write to rewrite entire files rather than search_replace on large blocks -10. NEVER truncate or abbreviate text in search_replace "search" strings β€” they must be EXACT literal matches`; - - const messages: ChatMessage[] = [ - { role: 'system', content: systemPrompt }, - ]; - - if (codebaseContext) { - messages.push({ - role: 'user', - content: `## Codebase Context\n${codebaseContext}`, - }); - } - - if (task.relevantFiles && task.relevantFiles.length > 0) { - messages.push({ - role: 'user', - content: `## Relevant Files (hints)\n${task.relevantFiles.join('\n')}`, - }); - } - - messages.push({ - role: 'user', - content: `## Task\n${task.description}\n\nGenerate the execution plan as JSON.`, - }); - - return messages; - } - - /** - * Parse and validate the LLM's plan response. - */ - private parsePlanResponse( - responseText: string, - task: CodingTask, - provider: string, - model: string, - ): CodingPlan { - // Extract JSON from response (LLM may wrap in markdown code blocks) - const jsonMatch = responseText.match(/\{[\s\S]*\}/); - if (!jsonMatch) { - throw new Error('PlanFormulator: No JSON object found in LLM response'); - } - - let raw: unknown; - try { - raw = JSON.parse(jsonMatch[0]); - } catch (e) { - throw new Error(`PlanFormulator: Invalid JSON in LLM response: ${(e as Error).message}`); - } - - const parsed = raw as { summary?: string; steps?: unknown[]; riskLevel?: string; riskReason?: string }; - - if (!parsed.summary || typeof parsed.summary !== 'string') { - throw new Error('PlanFormulator: Plan missing "summary" field'); - } - - // Extract and validate risk assessment - const VALID_RISK_LEVELS: ReadonlySet = new Set(['low', 'medium', 'high', 'critical']); - const riskLevel: RiskLevel = VALID_RISK_LEVELS.has(parsed.riskLevel ?? '') - ? (parsed.riskLevel as RiskLevel) - : 'medium'; // Default to medium if LLM omits or gives invalid value - const riskReason = typeof parsed.riskReason === 'string' ? parsed.riskReason : 'No risk reason provided'; - const requiredTier: SecurityTierLevel = riskToTier(riskLevel); - - if (!Array.isArray(parsed.steps) || parsed.steps.length === 0) { - throw new Error('PlanFormulator: Plan has no steps'); - } - - const maxToolCalls = task.maxToolCalls ?? 15; - if (parsed.steps.length > maxToolCalls) { - throw new Error(`PlanFormulator: Plan has ${parsed.steps.length} steps, exceeds max ${maxToolCalls}`); - } - - // Validate each step - const steps: CodingStep[] = parsed.steps.map((rawStep, index) => { - const step = rawStep as Record; - const stepNum = (step.stepNumber as number) ?? (index + 1); - - // Validate action - const action = step.action as string; - if (!VALID_ACTIONS.has(action)) { - throw new Error(`PlanFormulator: Step ${stepNum} has invalid action "${action}"`); - } - - // Validate toolCall - const toolCall = (step.toolCall as string) ?? ACTION_TO_COMMAND[action as CodingAction]; - if (!toolCall.startsWith('code/')) { - throw new Error(`PlanFormulator: Step ${stepNum} toolCall "${toolCall}" is not a code/* command`); - } - - // Validate dependsOn references - const dependsOn = (step.dependsOn as number[]) ?? []; - for (const dep of dependsOn) { - if (dep < 1 || dep >= stepNum) { - throw new Error(`PlanFormulator: Step ${stepNum} depends on invalid step ${dep}`); - } - } - - return { - stepNumber: stepNum, - action: action as CodingAction, - description: (step.description as string) ?? `Step ${stepNum}`, - targetFiles: (step.targetFiles as string[]) ?? [], - toolCall, - toolParams: (step.toolParams as Record) ?? {}, - dependsOn, - verification: (step.verification as string) ?? '', - }; - }); - - return { - taskId: task.id, - steps, - summary: parsed.summary, - estimatedToolCalls: steps.length, - generatedBy: { provider, model }, - generatedAt: Date.now(), - riskLevel, - riskReason, - requiredTier, - }; - } -} diff --git a/src/debug/jtag/system/code/server/PlanGovernance.ts b/src/debug/jtag/system/code/server/PlanGovernance.ts deleted file mode 100644 index a75246468..000000000 --- a/src/debug/jtag/system/code/server/PlanGovernance.ts +++ /dev/null @@ -1,151 +0,0 @@ -/** - * PlanGovernance - Risk-based approval routing for coding plans - * - * Determines whether a plan needs team approval before execution, - * creates DecisionProposals for review, and handles governance callbacks. - * - * Approval rules: - * - Auto-approve: single-agent + riskLevel low/medium - * - Require approval: multi-agent OR riskLevel high/critical - * - Always require: system-tier operations - */ - -import type { UUID } from '../../core/types/CrossPlatformUUID'; -import type { RiskLevel, SecurityTierLevel } from '../shared/CodingTypes'; -import { CodingPlanEntity, type CodingPlanStatus } from '../../data/entities/CodingPlanEntity'; -import { riskRequiresApproval } from './SecurityTier'; -import { Logger } from '../../core/logging/Logger'; - -const log = Logger.create('PlanGovernance', 'code'); - -// ──────────────────────────────────────────────────────────── -// Governance decision outcomes -// ──────────────────────────────────────────────────────────── - -export type GovernanceOutcome = - | 'approved' - | 'approved_with_changes' - | 'changes_requested' - | 'rejected'; - -export interface GovernanceDecision { - readonly proposalId: UUID; - readonly outcome: GovernanceOutcome; - readonly reasoning: string; - readonly suggestedChanges?: string; -} - -// ──────────────────────────────────────────────────────────── -// Implementation -// ──────────────────────────────────────────────────────────── - -export class PlanGovernance { - - /** - * Determine if a plan needs team approval before execution. - */ - shouldRequireApproval(plan: CodingPlanEntity): boolean { - // System tier always requires approval - if (plan.securityTier === 'system') { - return true; - } - - // Delegate to SecurityTier's risk-based logic - const isMultiAgent = plan.assignees.length > 1; - return riskRequiresApproval(plan.riskLevel, isMultiAgent); - } - - /** - * Create a DecisionProposal for plan review. - * Returns the proposal ID, or undefined if proposal creation failed. - */ - async proposePlan(plan: CodingPlanEntity): Promise { - try { - // Dynamic import to avoid circular dependency - const { DecisionPropose } = await import( - '../../../commands/collaboration/decision/propose/shared/DecisionProposeTypes' - ); - - const fileList = this.extractTargetFiles(plan); - const stepSummary = plan.steps - .map(s => ` ${s.stepNumber}. [${s.action}] ${s.description}`) - .join('\n'); - - const rationale = [ - `**Task:** ${plan.taskDescription}`, - `**Approach:** ${plan.summary}`, - `**Risk Level:** ${plan.riskLevel} (${plan.riskReason ?? 'No reason provided'})`, - `**Security Tier:** ${plan.securityTier}`, - `**Assignees:** ${plan.assignees.length} agent(s)`, - `**Steps (${plan.steps.length}):**\n${stepSummary}`, - fileList.length > 0 ? `**Target Files:**\n${fileList.map(f => ` - ${f}`).join('\n')}` : '', - ].filter(Boolean).join('\n\n'); - - const result = await DecisionPropose.execute({ - topic: `Coding Plan: ${plan.summary}`, - rationale, - options: [ - { label: 'Approve', description: 'Approve the plan for execution' }, - { label: 'Approve with Changes', description: 'Approve with suggested modifications' }, - { label: 'Request Changes', description: 'Send back for revision' }, - { label: 'Reject', description: 'Decline this plan' }, - ], - scope: 'all', - significanceLevel: this.riskToSignificance(plan.riskLevel), - proposerId: plan.leadId, - }); - - if (result.success && result.proposalId) { - log.info(`Plan proposed for governance: ${result.proposalId} (plan: ${plan.taskId})`); - return result.proposalId; - } - - log.warn(`Governance proposal creation returned success=false`); - return undefined; - } catch (error) { - log.warn(`Governance proposal failed: ${error instanceof Error ? error.message : String(error)}`); - return undefined; - } - } - - /** - * Handle a governance decision callback. - * Returns the CodingPlanStatus the plan should transition to. - */ - resolveDecision(decision: GovernanceDecision): CodingPlanStatus { - switch (decision.outcome) { - case 'approved': - return 'approved'; - case 'approved_with_changes': - return 'approved'; - case 'changes_requested': - return 'draft'; - case 'rejected': - return 'cancelled'; - } - } - - // ──────────────────────────────────────────────────────────── - // Private helpers - // ──────────────────────────────────────────────────────────── - - /** - * Extract unique target files from all plan steps. - */ - private extractTargetFiles(plan: CodingPlanEntity): string[] { - const files = new Set(); - for (const step of plan.steps) { - for (const file of step.targetFiles) { - files.add(file); - } - } - return Array.from(files).sort(); - } - - /** - * Map risk level to governance significance. - */ - private riskToSignificance(risk: RiskLevel): 'low' | 'medium' | 'high' | 'critical' { - return risk; // 1:1 mapping β€” risk levels align with significance levels - } -} diff --git a/src/debug/jtag/system/code/server/Workspace.ts b/src/debug/jtag/system/code/server/Workspace.ts new file mode 100644 index 000000000..061ac8091 --- /dev/null +++ b/src/debug/jtag/system/code/server/Workspace.ts @@ -0,0 +1,310 @@ +/** + * Workspace - Bound handle for all workspace-scoped code operations + * + * Instead of passing userId/handle to every CodeDaemon call, + * create a Workspace instance and use it directly: + * + * const ws = await Workspace.create({ personaId, mode: 'worktree', taskSlug: 'fix-auth' }); + * const tree = await ws.tree(); + * const file = await ws.read('src/auth.ts'); + * await ws.edit('src/auth.ts', { editType: 'search_replace', search: 'old', replace: 'new' }); + * const result = await ws.verify(true); + * if (!result.success) { // read errors, fix, verify again } + * await ws.gitAdd(['.']); + * await ws.gitCommit('Fix auth token validation'); + * await ws.destroy(); + */ + +import { CodeDaemon } from '../../../daemons/code-daemon/shared/CodeDaemon'; +import type { + WorkspaceEditMode, + WorkspaceWriteResult, + WorkspaceReadResult, + WorkspaceSearchResult, + WorkspaceTreeResult, + WorkspaceUndoResult, + WorkspaceHistoryResult, + WorkspaceGitStatusInfo, + WorkspaceShellExecuteResponse, + WorkspaceShellPollResponse, + WorkspaceShellSessionInfo, + WorkspaceShellWatchResponse, + WorkspaceSentinelRule, + WorkspaceClassifiedLine, +} from '../../../daemons/code-daemon/shared/CodeDaemonTypes'; +import { WorkspaceStrategy } from './WorkspaceStrategy'; +import type { WorkspaceMode, WorkspaceConfig } from './WorkspaceStrategy'; +import { CodeVerify, type CodeVerifyResult } from '../../../commands/code/verify/shared/CodeVerifyTypes'; + +export class Workspace { + + private constructor( + /** Handle string used to identify this workspace in the Rust backend */ + readonly handle: string, + /** Absolute path to the workspace directory on disk */ + readonly dir: string, + /** Whether this is a sandbox or git worktree workspace */ + readonly mode: WorkspaceMode, + /** Git branch name (worktree mode only) */ + readonly branch?: string, + ) {} + + /** + * Create and register a new workspace. + * Returns a bound handle that encapsulates all workspace operations. + */ + static async create(config: WorkspaceConfig): Promise { + const result = await WorkspaceStrategy.create(config); + return new Workspace(result.handle, result.workspaceDir, result.mode, result.branch); + } + + /** + * Create a Workspace from an already-initialized handle. + * Useful when resuming a workspace that was previously created. + */ + static fromExisting(handle: string, dir: string, mode: WorkspaceMode, branch?: string): Workspace { + return new Workspace(handle, dir, mode, branch); + } + + // ════════════════════════════════════════════════════════════ + // File Operations + // ════════════════════════════════════════════════════════════ + + /** Read a file from this workspace */ + async read(filePath: string, startLine?: number, endLine?: number): Promise { + return CodeDaemon.workspaceRead(this.handle, filePath, startLine, endLine); + } + + /** Write or create a file in this workspace */ + async write(filePath: string, content: string, description?: string): Promise { + return CodeDaemon.workspaceWrite(this.handle, filePath, content, description); + } + + /** Edit a file using one of four edit modes */ + async edit(filePath: string, editMode: WorkspaceEditMode, description?: string): Promise { + return CodeDaemon.workspaceEdit(this.handle, filePath, editMode, description); + } + + /** Delete a file from this workspace */ + async delete(filePath: string, description?: string): Promise { + return CodeDaemon.workspaceDelete(this.handle, filePath, description); + } + + /** Preview an edit as unified diff without applying */ + async diff(filePath: string, editMode: WorkspaceEditMode): Promise<{ success: boolean; unified: string }> { + return CodeDaemon.workspaceDiff(this.handle, filePath, editMode); + } + + // ════════════════════════════════════════════════════════════ + // Search & Discovery + // ════════════════════════════════════════════════════════════ + + /** Search for a regex pattern across workspace files */ + async search(pattern: string, fileGlob?: string, maxResults?: number): Promise { + return CodeDaemon.workspaceSearch(this.handle, pattern, fileGlob, maxResults); + } + + /** Get directory tree structure */ + async tree(path?: string, maxDepth?: number, includeHidden?: boolean): Promise { + return CodeDaemon.workspaceTree(this.handle, path, maxDepth, includeHidden); + } + + // ════════════════════════════════════════════════════════════ + // Change Tracking + // ════════════════════════════════════════════════════════════ + + /** Undo a specific change or the last N changes */ + async undo(changeId?: string, count?: number): Promise { + return CodeDaemon.workspaceUndo(this.handle, changeId, count); + } + + /** Get change history for a file or entire workspace */ + async history(filePath?: string, limit?: number): Promise { + return CodeDaemon.workspaceHistory(this.handle, filePath, limit); + } + + // ════════════════════════════════════════════════════════════ + // Verification + // ════════════════════════════════════════════════════════════ + + /** Run TypeScript compilation check and optionally tests */ + async verify(typeCheck?: boolean, testFiles?: string[]): Promise { + return CodeVerify.execute({ + userId: this.handle, + typeCheck, + testFiles, + }); + } + + // ════════════════════════════════════════════════════════════ + // Git Operations + // ════════════════════════════════════════════════════════════ + + /** Get git status for this workspace */ + async gitStatus(): Promise { + return CodeDaemon.workspaceGitStatus(this.handle); + } + + /** Get git diff (staged or unstaged) */ + async gitDiff(staged?: boolean): Promise<{ success: boolean; diff: string }> { + return CodeDaemon.workspaceGitDiff(this.handle, staged); + } + + /** Get git log (last N commits) */ + async gitLog(count?: number): Promise<{ success: boolean; log: string }> { + return CodeDaemon.workspaceGitLog(this.handle, count); + } + + /** Stage files for commit */ + async gitAdd(paths: string[]): Promise<{ staged: string[] }> { + return CodeDaemon.workspaceGitAdd(this.handle, paths); + } + + /** Create a git commit */ + async gitCommit(message: string): Promise<{ hash: string }> { + return CodeDaemon.workspaceGitCommit(this.handle, message); + } + + /** Push the workspace branch to remote */ + async gitPush(remote?: string, branch?: string): Promise<{ output: string }> { + return CodeDaemon.workspaceGitPush(this.handle, remote, branch); + } + + // ════════════════════════════════════════════════════════════ + // Shell Session (Rust-backed, persistent per workspace) + // ════════════════════════════════════════════════════════════ + + private _shellCreated = false; + + /** + * Ensure the Rust-side shell session exists for this workspace. + * Called automatically by shell methods β€” idempotent after first call. + */ + private async ensureShell(): Promise { + if (this._shellCreated) return; + await CodeDaemon.shellCreate(this.handle, this.dir); + this._shellCreated = true; + } + + /** + * Execute a shell command synchronously (blocks until completion). + * Use for quick commands: `git status`, `npm test`, `ls`. + * + * The shell session retains cwd and env across calls β€” just like + * a real terminal. First call auto-creates the session. + */ + async exec(cmd: string, timeoutMs?: number): Promise { + await this.ensureShell(); + return CodeDaemon.shellExecute(this.handle, cmd, { + timeoutMs: timeoutMs ?? 30000, + wait: true, + }); + } + + /** + * Execute a shell command asynchronously (returns handle immediately). + * Use for long-running commands: `cargo build`, `npm run build`. + * + * Returns an execution_id. Call shellPoll() to stream output, + * shellKill() to abort. + */ + async execAsync(cmd: string, timeoutMs?: number): Promise { + await this.ensureShell(); + return CodeDaemon.shellExecute(this.handle, cmd, { + timeoutMs, + wait: false, + }); + } + + /** Poll a running execution for new stdout/stderr since last poll */ + async shellPoll(executionId: string): Promise { + return CodeDaemon.shellPoll(this.handle, executionId); + } + + /** Kill a running execution */ + async shellKill(executionId: string): Promise { + return CodeDaemon.shellKill(this.handle, executionId); + } + + /** Change the shell session's working directory */ + async shellCd(path: string): Promise<{ cwd: string }> { + await this.ensureShell(); + return CodeDaemon.shellCd(this.handle, path); + } + + /** Get shell session info (cwd, env, running executions) */ + async shellStatus(): Promise { + await this.ensureShell(); + return CodeDaemon.shellStatus(this.handle); + } + + // ════════════════════════════════════════════════════════════ + // Shell Watch + Sentinel (Event-driven output streaming) + // ════════════════════════════════════════════════════════════ + + /** + * Configure sentinel filter rules on a running execution. + * Rules classify output lines and control which are emitted or suppressed during watch(). + * Patterns are compiled to regex on the Rust side for performance. + */ + async sentinel(executionId: string, rules: WorkspaceSentinelRule[]): Promise<{ applied: boolean; ruleCount: number }> { + return CodeDaemon.shellSentinel(this.handle, executionId, rules); + } + + /** + * Watch a running execution for new output. + * Blocks until output is available β€” no timeout, no polling. + * Returns classified lines filtered through sentinel rules. + * Call in a loop until `finished` is true. + */ + async watch(executionId: string): Promise { + await this.ensureShell(); + return CodeDaemon.shellWatch(this.handle, executionId); + } + + /** + * Execute a command and watch its output with optional sentinel filtering. + * Convenience composition: exec β†’ sentinel β†’ watch loop. + * + * @param cmd Command to execute + * @param rules Optional sentinel filter rules + * @param onLine Optional callback for each classified line + * @returns Final watch response (finished=true, has exit_code) + */ + async execWatch( + cmd: string, + rules?: WorkspaceSentinelRule[], + onLine?: (line: WorkspaceClassifiedLine) => void, + ): Promise { + const exec = await this.execAsync(cmd); + + if (rules?.length) { + await this.sentinel(exec.execution_id, rules); + } + + let response: WorkspaceShellWatchResponse; + do { + response = await this.watch(exec.execution_id); + if (onLine) { + for (const line of response.lines) { + onLine(line); + } + } + } while (!response.finished); + + return response; + } + + // ════════════════════════════════════════════════════════════ + // Lifecycle + // ════════════════════════════════════════════════════════════ + + /** Clean up this workspace (shell session + worktree removal + handle deregistration) */ + async destroy(options?: { force?: boolean; deleteBranch?: boolean }): Promise { + if (this._shellCreated) { + await CodeDaemon.shellDestroy(this.handle); + this._shellCreated = false; + } + await WorkspaceStrategy.cleanup(this.handle, options); + } +} diff --git a/src/debug/jtag/system/code/shared/CodingTypes.ts b/src/debug/jtag/system/code/shared/CodingTypes.ts index a643eed82..aa0c276b9 100644 --- a/src/debug/jtag/system/code/shared/CodingTypes.ts +++ b/src/debug/jtag/system/code/shared/CodingTypes.ts @@ -1,12 +1,11 @@ /** - * Coding Agent Types - Shared type definitions for the coding agent system + * Coding Types - Shared type definitions for the coding system * * Defines the data structures for: - * - CodingTask: What the agent needs to accomplish - * - CodingPlan: DAG of steps to accomplish the task - * - CodingStep: Individual operation in the plan - * - CodingResult: Outcome of executing a plan - * - CodingModelTier: Model selection by task complexity + * - Security & risk levels for workspace operations + * - Model selection by task complexity + * - Coding actions that map to code/* commands + * - Coding tasks that describe work to be done */ import type { UUID } from '../../core/types/CrossPlatformUUID'; @@ -16,14 +15,14 @@ import type { UUID } from '../../core/types/CrossPlatformUUID'; // ============================================================================ /** - * Risk level assessed by PlanFormulator for a coding plan. - * Determines security tier and whether governance approval is needed. + * Risk level for coding operations. + * Determines security tier and oversight requirements. */ export type RiskLevel = 'low' | 'medium' | 'high' | 'critical'; /** - * Security tier that governs which tools a plan can use. - * Assigned based on risk level; higher tiers require more oversight. + * Security tier that governs which tools are available. + * Higher tiers require more oversight. */ export type SecurityTierLevel = 'discovery' | 'read' | 'write' | 'system'; @@ -56,13 +55,33 @@ export interface CodingModelTier { readonly description: string; } +// ============================================================================ +// Coding Actions +// ============================================================================ + +/** + * Actions a coding operation can perform. + * Each maps to a code/* command. + */ +export type CodingAction = + | 'discover' // code/tree β€” explore structure + | 'search' // code/search β€” find patterns + | 'read' // code/read β€” read file contents + | 'write' // code/write β€” create/overwrite file + | 'edit' // code/edit β€” partial edit + | 'diff' // code/diff β€” preview changes + | 'undo' // code/undo β€” revert changes + | 'verify' // code/verify β€” build/test verification + | 'commit' // code/git β€” stage and commit changes + | 'report'; // Meta: summarize what was done + // ============================================================================ // Coding Task // ============================================================================ /** - * A coding task is the input to the coding agent system. - * It describes what needs to be done, who's doing it, and constraints. + * A coding task describes what needs to be done in a workspace. + * Used by the coding activity to drive agent work. */ export interface CodingTask { /** Unique task ID */ @@ -83,17 +102,12 @@ export interface CodingTask { /** Files already known to be relevant (hints for discovery) */ readonly relevantFiles?: string[]; - /** Maximum execution time in milliseconds (default: 120000) */ + /** Maximum execution time in milliseconds */ readonly maxDurationMs?: number; - /** Maximum number of tool calls allowed (default: 15) */ - readonly maxToolCalls?: number; - /** * Workspace handle β€” identifies which Rust workspace to use for code/* operations. * Defaults to personaId (general persona workspace). - * Challenges and other isolated contexts register their own handle via - * CodeDaemon.createWorkspace(handle, dir) and pass it here. */ readonly workspaceHandle?: string; @@ -104,252 +118,9 @@ export interface CodingTask { */ readonly workspaceMode?: 'sandbox' | 'worktree'; - /** Paths to sparse-checkout when using worktree mode (e.g., ["src/system/code/", "docs/"]) */ + /** Paths to sparse-checkout when using worktree mode */ readonly sparsePaths?: string[]; /** When the task was created */ readonly createdAt: number; } - -// ============================================================================ -// Coding Plan (DAG of Steps) -// ============================================================================ - -/** - * Actions a coding step can perform. - * Each maps to a code/* command or meta-operation. - */ -export type CodingAction = - | 'discover' // code/tree β€” explore structure - | 'search' // code/search β€” find patterns - | 'read' // code/read β€” read file contents - | 'write' // code/write β€” create/overwrite file - | 'edit' // code/edit β€” partial edit - | 'diff' // code/diff β€” preview changes - | 'undo' // code/undo β€” revert changes - | 'verify' // code/verify β€” build/test verification - | 'commit' // code/git β€” stage and commit changes - | 'report'; // Meta: summarize what was done - -/** - * A single step in a CodingPlan. - * Steps form a DAG via dependsOn β€” independent steps can execute in parallel. - */ -export interface CodingStep { - /** Step number (1-indexed, unique within plan) */ - readonly stepNumber: number; - - /** What this step does */ - readonly action: CodingAction; - - /** Human-readable description of what this step accomplishes */ - readonly description: string; - - /** Files this step will operate on */ - readonly targetFiles: string[]; - - /** Which code/* command to execute (e.g., 'code/read', 'code/edit') */ - readonly toolCall: string; - - /** Parameters for the tool call */ - readonly toolParams: Record; - - /** Steps that must complete before this one (DAG edges) */ - readonly dependsOn: number[]; - - /** How to verify this step succeeded */ - readonly verification: string; -} - -/** - * A coding plan is a DAG of CodingSteps produced by the PlanFormulator. - * The orchestrator executes steps respecting dependency ordering. - */ -export interface CodingPlan { - /** The task this plan addresses */ - readonly taskId: UUID; - - /** Ordered steps (topologically sorted) */ - readonly steps: CodingStep[]; - - /** High-level summary of the approach */ - readonly summary: string; - - /** Estimated total tool calls */ - readonly estimatedToolCalls: number; - - /** Which model generated this plan */ - readonly generatedBy: { - readonly provider: string; - readonly model: string; - }; - - /** When the plan was generated */ - readonly generatedAt: number; - - /** Risk level assessed by PlanFormulator */ - readonly riskLevel: RiskLevel; - - /** Why this risk level was assigned */ - readonly riskReason: string; - - /** Minimum security tier required for execution */ - readonly requiredTier: SecurityTierLevel; -} - -// ============================================================================ -// Step Execution Result -// ============================================================================ - -export type StepStatus = 'pending' | 'running' | 'completed' | 'failed' | 'skipped'; - -/** - * Result of executing a single CodingStep. - */ -export interface StepResult { - /** Which step */ - readonly stepNumber: number; - - /** Execution status */ - readonly status: StepStatus; - - /** Command output (if any) */ - readonly output?: unknown; - - /** Error message (if failed) */ - readonly error?: string; - - /** Execution time in milliseconds */ - readonly durationMs: number; - - /** Tool call used */ - readonly toolCall: string; -} - -// ============================================================================ -// Coding Result (Final Output) -// ============================================================================ - -export type CodingResultStatus = 'completed' | 'partial' | 'failed' | 'budget_exceeded' | 'pending_approval'; - -/** - * Final result of executing a coding task. - */ -export interface CodingResult { - /** The task that was executed */ - readonly taskId: UUID; - - /** Overall status */ - readonly status: CodingResultStatus; - - /** Summary of what was accomplished */ - readonly summary: string; - - /** Results for each step */ - readonly stepResults: StepResult[]; - - /** Files that were modified */ - readonly filesModified: string[]; - - /** Files that were created */ - readonly filesCreated: string[]; - - /** Total tool calls used */ - readonly totalToolCalls: number; - - /** Total execution time in milliseconds */ - readonly totalDurationMs: number; - - /** Change IDs from code/write and code/edit for potential undo */ - readonly changeIds: string[]; - - /** Errors encountered */ - readonly errors: string[]; - - /** Governance proposal ID (set when status is 'pending_approval') */ - readonly proposalId?: string; - - /** Plan metadata (risk level, tier, plan summary) */ - readonly planMetadata?: { - readonly riskLevel: RiskLevel; - readonly requiredTier: SecurityTierLevel; - readonly planSummary: string; - }; -} - -// ============================================================================ -// Execution Options (Phase 4C: Multi-Agent Coordination) -// ============================================================================ - -/** - * Options controlling how a coding plan is executed. - * Passed to CodeAgentOrchestrator.execute(). - */ -export interface ExecutionOptions { - /** Execute but don't write β€” report what would happen */ - readonly dryRun?: boolean; - - /** Override the security tier (defaults to plan's requiredTier) */ - readonly securityTier?: SecurityTierLevel; - - /** Enable multi-agent delegation for this execution */ - readonly delegationEnabled?: boolean; - - /** Run TypeScript verification after write/edit steps (default: true) */ - readonly autoVerify?: boolean; - - /** Max verifyβ†’re-plan iterations when verification fails (default: 2) */ - readonly maxVerifyIterations?: number; -} - -// ============================================================================ -// Agent Capability (Phase 4C: Multi-Agent Delegation) -// ============================================================================ - -/** - * Describes an AI persona's capabilities for coding task delegation. - * Used by CodeTaskDelegator to match tasks to agents. - */ -export interface AgentCapability { - /** Persona ID */ - readonly personaId: UUID; - - /** Persona display name */ - readonly name: string; - - /** Coding specialties (e.g., 'typescript', 'testing', 'code-review') */ - readonly specialties: string[]; - - /** Current workload fraction (0.0 = idle, 1.0 = fully loaded) */ - readonly currentLoad: number; - - /** Security tier this agent is authorized for */ - readonly securityTier: SecurityTierLevel; -} - -// ============================================================================ -// Delegation Result (Phase 4C: Multi-Agent Coordination) -// ============================================================================ - -/** - * Result of delegating a plan to multiple agents. - */ -export interface DelegationResult { - /** Parent plan ID */ - readonly parentPlanId: UUID; - - /** Sub-plan IDs created for each agent cluster */ - readonly subPlanIds: UUID[]; - - /** Files assigned to each sub-plan */ - readonly assignments: ReadonlyArray<{ - readonly subPlanId: UUID; - readonly agentId: UUID; - readonly agentName: string; - readonly files: string[]; - readonly stepNumbers: number[]; - }>; - - /** Files with conflicts (claimed by multiple clusters) */ - readonly conflicts: string[]; -} diff --git a/src/debug/jtag/system/coordination/server/CodeCoordinationStream.ts b/src/debug/jtag/system/coordination/server/CodeCoordinationStream.ts deleted file mode 100644 index bf4265a22..000000000 --- a/src/debug/jtag/system/coordination/server/CodeCoordinationStream.ts +++ /dev/null @@ -1,349 +0,0 @@ -/** - * Code Coordination Stream - File-level MUTEX for multi-agent coding - * - * Extends BaseCoordinationStream to coordinate coding agents: - * - File-level locking: multiple agents CAN work in parallel if they touch different files - * - Conflict detection: overlapping file claims are detected and resolved - * - Lock release: automatic on step completion or plan finalization - * - * RTOS analogy: - * - Each file is a MUTEX β€” only one agent can hold it - * - The coordination stream manages MUTEX acquisition/release - * - Agents broadcast their target files as "thoughts" - * - The decision grants non-overlapping claims, defers the rest - * - * Config differences from Chat: - * - maxResponders: 5 (more parallel coding workers) - * - intentionWindowMs: 3000ms (coding needs more coordination time) - */ - -import type { UUID } from '../../core/types/CrossPlatformUUID'; -import { - BaseCoordinationStream, - type BaseThought, - type BaseDecision, - type BaseStream, -} from '../shared/BaseCoordinationStream'; - -// ──────────────────────────────────────────────────────────── -// Domain-specific types -// ──────────────────────────────────────────────────────────── - -/** - * Code-specific thought β€” a persona's claim to work on specific files. - */ -export interface CodeThought extends BaseThought { - /** Plan this thought relates to */ - planId: UUID; - - /** Files this agent intends to modify */ - targetFiles: string[]; - - /** Which plan steps this agent intends to execute */ - stepNumbers: number[]; -} - -/** - * Code-specific decision β€” file lock assignments and conflict report. - */ -export interface CodeDecision extends BaseDecision { - /** Plan this decision relates to */ - planId: UUID; - - /** File β†’ persona ID mapping of granted locks */ - fileLocks: Map; - - /** Files that were claimed by multiple agents (conflict detected) */ - conflicts: string[]; -} - -/** - * Code-specific stream state. - */ -export interface CodeStream extends BaseStream { - /** Plan being coordinated */ - planId: UUID; - - /** Current file locks: file path β†’ persona holding the lock */ - fileLocks: Map; -} - -// ──────────────────────────────────────────────────────────── -// Implementation -// ──────────────────────────────────────────────────────────── - -export class CodeCoordinationStream extends BaseCoordinationStream { - - /** Global file locks across all streams (prevents cross-plan conflicts) */ - private _globalFileLocks = new Map(); - - constructor() { - super({ - intentionWindowMs: 3000, // 3 seconds β€” coding needs more coordination time - maxResponders: 5, // Up to 5 parallel coding agents - enableLogging: true, - cleanupIntervalMs: 60000, // 1 minute β€” coding streams live longer - }); - } - - // ════════════════════════════════════════════════════════════ - // ABSTRACT METHOD IMPLEMENTATIONS - // ════════════════════════════════════════════════════════════ - - protected getDomainName(): string { - return 'Code'; - } - - protected createStream(eventId: string, contextId: UUID): CodeStream { - const maxResponders = this.getMaxResponders(); - - return { - eventId, - contextId, - phase: 'gathering', - thoughts: [], - considerations: new Map(), - startTime: Date.now(), - availableSlots: maxResponders, - claimedBy: new Set(), - - // Code-specific - planId: contextId, // contextId IS the planId for coding - fileLocks: new Map(), - }; - } - - protected convertDecision(baseDecision: BaseDecision, stream: CodeStream): CodeDecision { - // Collect all conflicts: files claimed by multiple personas - const fileClaims = new Map(); - for (const thought of stream.thoughts) { - if (thought.type === 'claiming') { - for (const file of thought.targetFiles) { - const existing = fileClaims.get(file) ?? []; - existing.push(thought.personaId); - fileClaims.set(file, existing); - } - } - } - - const conflicts: string[] = []; - for (const [file, claimants] of fileClaims) { - if (claimants.length > 1) { - conflicts.push(file); - } - } - - return { - ...baseDecision, - planId: stream.planId, - fileLocks: new Map(stream.fileLocks), - conflicts, - }; - } - - protected getEventLogContext(eventId: string): string { - return `plan ${eventId.slice(0, 8)}`; - } - - // ════════════════════════════════════════════════════════════ - // HOOK OVERRIDES - // ════════════════════════════════════════════════════════════ - - /** - * Validate a claim: check that the persona's target files are not already locked - * by another persona (either in this stream or globally). - */ - protected onClaim(stream: CodeStream, thought: CodeThought): boolean { - for (const file of thought.targetFiles) { - // Check global locks (cross-plan) - const globalHolder = this._globalFileLocks.get(file); - if (globalHolder && globalHolder !== thought.personaId) { - this.log(`Claim rejected: ${file} globally locked by ${globalHolder.slice(0, 8)}`); - return false; - } - - // Check stream-level locks (within same plan) - const streamHolder = stream.fileLocks.get(file); - if (streamHolder && streamHolder !== thought.personaId) { - this.log(`Claim rejected: ${file} locked by ${streamHolder.slice(0, 8)} in stream`); - return false; - } - } - - // Acquire locks for all target files - for (const file of thought.targetFiles) { - stream.fileLocks.set(file, thought.personaId); - this._globalFileLocks.set(file, thought.personaId); - } - - return true; - } - - /** - * After decision: log file lock summary. - */ - protected onDecisionMade(stream: CodeStream, decision: CodeDecision): void { - if (decision.conflicts.length > 0) { - this.log(`Conflicts detected: ${decision.conflicts.join(', ')}`); - } - this.log(`File locks: ${stream.fileLocks.size} files locked across ${decision.granted.length} agents`); - } - - /** - * Coding tasks are often single-agent β€” decide immediately if only one thought. - * For multi-agent, wait for the intention window. - */ - protected canDecideEarly(stream: CodeStream): boolean { - // If only one claimer and no one else is expected, decide immediately - if (stream.thoughts.length >= 1 && stream.claimedBy.size >= 1) { - // But wait if we might get more thoughts - const elapsed = Date.now() - stream.startTime; - if (elapsed > 1000) return true; // 1s grace period - } - return stream.thoughts.length >= 5; // Max parallel agents - } - - /** - * Coding streams use deterministic slot allocation (not probabilistic). - * All available agents get a slot (up to maxResponders). - */ - protected getMaxResponders(): number { - return this.config.maxResponders; // Deterministic: 5 - } - - /** - * Coding streams live longer β€” plans take time to execute. - */ - protected getStreamMaxAge(stream: CodeStream): number { - if (stream.phase === 'decided') return 30000; // 30s after decision - return 300000; // 5 min for gathering - } - - // ════════════════════════════════════════════════════════════ - // PUBLIC CODE-SPECIFIC API - // ════════════════════════════════════════════════════════════ - - /** - * Broadcast a coding thought for file-level coordination. - */ - async broadcastCodeThought( - planId: UUID, - thought: CodeThought, - ): Promise { - thought.planId = planId; - await this.broadcastThought(planId, planId, thought); - } - - /** - * Wait for a coding coordination decision. - */ - async waitForCodeDecision(planId: UUID, timeoutMs?: number): Promise { - return this.waitForDecision(planId, timeoutMs ?? 5000); - } - - /** - * Check if persona can work on specific files within a plan. - */ - async canWorkOnFiles(personaId: UUID, planId: UUID, files: string[]): Promise { - const stream = this.getStream(planId); - if (!stream) return true; // No coordination active β€” allow - - for (const file of files) { - const holder = stream.fileLocks.get(file); - if (holder && holder !== personaId) { - return false; - } - } - return true; - } - - /** - * Release file locks held by a persona (called after step/plan completion). - */ - releaseLocks(personaId: UUID, planId?: UUID): void { - // Release global locks - for (const [file, holder] of Array.from(this._globalFileLocks.entries())) { - if (holder === personaId) { - this._globalFileLocks.delete(file); - } - } - - // Release stream-level locks - if (planId) { - const stream = this.getStream(planId); - if (stream) { - for (const [file, holder] of Array.from(stream.fileLocks.entries())) { - if (holder === personaId) { - stream.fileLocks.delete(file); - } - } - } - } else { - // Release from all streams - for (const stream of this.streams.values()) { - for (const [file, holder] of Array.from(stream.fileLocks.entries())) { - if (holder === personaId) { - stream.fileLocks.delete(file); - } - } - } - } - - this.log(`Released locks for persona ${personaId.slice(0, 8)}`); - } - - /** - * Get all files currently locked and who holds them. - */ - get globalFileLocks(): ReadonlyMap { - return this._globalFileLocks; - } - - /** - * Check if a specific file is locked. - */ - isFileLocked(filePath: string): boolean { - return this._globalFileLocks.has(filePath); - } - - /** - * Get the persona holding a lock on a file (if any). - */ - lockHolder(filePath: string): UUID | undefined { - return this._globalFileLocks.get(filePath); - } - - /** - * Override shutdown to clear global locks. - */ - override shutdown(): void { - this._globalFileLocks.clear(); - super.shutdown(); - } -} - -// ════════════════════════════════════════════════════════════ -// SINGLETON PATTERN -// ════════════════════════════════════════════════════════════ - -let codeCoordinatorInstance: CodeCoordinationStream | null = null; - -/** - * Get global code coordinator instance. - */ -export function getCodeCoordinator(): CodeCoordinationStream { - if (!codeCoordinatorInstance) { - codeCoordinatorInstance = new CodeCoordinationStream(); - } - return codeCoordinatorInstance; -} - -/** - * Reset code coordinator (for testing). - */ -export function resetCodeCoordinator(): void { - if (codeCoordinatorInstance) { - codeCoordinatorInstance.shutdown(); - codeCoordinatorInstance = null; - } -} diff --git a/src/debug/jtag/system/data/constants/RoomConstants.ts b/src/debug/jtag/system/data/constants/RoomConstants.ts index 45b9aa289..3aa025cd7 100644 --- a/src/debug/jtag/system/data/constants/RoomConstants.ts +++ b/src/debug/jtag/system/data/constants/RoomConstants.ts @@ -15,7 +15,8 @@ export const ROOM_UNIQUE_IDS = { SETTINGS: 'settings', CANVAS: 'canvas', OUTREACH: 'outreach', - NEWSROOM: 'newsroom' + NEWSROOM: 'newsroom', + CODE: 'code' } as const; export type RoomUniqueId = typeof ROOM_UNIQUE_IDS[keyof typeof ROOM_UNIQUE_IDS]; diff --git a/src/debug/jtag/system/data/entities/CodingChallengeEntity.ts b/src/debug/jtag/system/data/entities/CodingChallengeEntity.ts deleted file mode 100644 index c163cb130..000000000 --- a/src/debug/jtag/system/data/entities/CodingChallengeEntity.ts +++ /dev/null @@ -1,276 +0,0 @@ -/** - * CodingChallengeEntity - Progressive coding challenges for AI training - * - * Defines challenge specifications and tracks attempt results. - * Challenges are progressive: beginner β†’ intermediate β†’ advanced β†’ expert. - * Each challenge has: - * - Setup files (initial codebase state) - * - Expected outcome description - * - Evaluation criteria (rubric for AI judge) - * - Resource limits (time, tool calls) - * - Attempt history with scores - * - * Used by CodingChallengeRunner to execute and CodingJudge to evaluate. - * Failed attempts feed into LoRA training data capture. - */ - -import type { UUID } from '../../core/types/CrossPlatformUUID'; -import { - TextField, - NumberField, - JsonField, - EnumField, - CompositeIndex, -} from '../decorators/FieldDecorators'; -import { BaseEntity } from './BaseEntity'; -import { COLLECTIONS } from '../../shared/Constants'; - -// ──────────────────────────────────────────────────────────── -// Challenge difficulty -// ──────────────────────────────────────────────────────────── - -export type ChallengeDifficulty = 'beginner' | 'intermediate' | 'advanced' | 'expert'; - -// ──────────────────────────────────────────────────────────── -// Challenge category -// ──────────────────────────────────────────────────────────── - -export type ChallengeCategory = - | 'single-file' // Operations on one file - | 'multi-file' // Cross-file coordination - | 'refactoring' // Extract, rename, restructure - | 'bug-fix' // Find and fix defects - | 'feature' // Add new functionality - | 'architecture' // Large-scale structural changes - | 'discovery'; // Codebase exploration and analysis - -// ──────────────────────────────────────────────────────────── -// Challenge attempt result -// ──────────────────────────────────────────────────────────── - -export type AttemptStatus = 'passed' | 'failed' | 'partial' | 'timeout' | 'error'; - -export interface ChallengeAttempt { - /** Which AI attempted this */ - personaId: UUID; - /** CodingPlan that was executed */ - planId?: UUID; - /** When the attempt started */ - startedAt: number; - /** When the attempt finished */ - completedAt: number; - /** Outcome */ - status: AttemptStatus; - /** AI judge score (0-100) */ - score: number; - /** AI judge feedback */ - feedback: string; - /** Files modified during the attempt */ - filesModified: string[]; - /** Files created during the attempt */ - filesCreated: string[]; - /** Errors encountered */ - errors: string[]; - /** Tool calls consumed */ - toolCallsUsed: number; - /** Total duration in milliseconds */ - durationMs: number; - /** File contents after execution (for judge evaluation) */ - resultFiles?: Record; -} - -// ──────────────────────────────────────────────────────────── -// Entity -// ──────────────────────────────────────────────────────────── - -@CompositeIndex({ - name: 'idx_coding_challenges_difficulty', - fields: ['difficulty', 'category'], - direction: 'ASC', -}) -@CompositeIndex({ - name: 'idx_coding_challenges_order', - fields: ['sequenceNumber'], - direction: 'ASC', -}) -export class CodingChallengeEntity extends BaseEntity { - static readonly collection = COLLECTIONS.CODING_CHALLENGES; - - // ── Identity ────────────────────────────────────────────── - - /** Human-readable challenge name */ - @TextField({ index: true }) - name!: string; - - /** Challenge description β€” what the AI needs to accomplish */ - @TextField() - description!: string; - - /** Ordering for progressive difficulty */ - @NumberField() - sequenceNumber!: number; - - // ── Classification ──────────────────────────────────────── - - @EnumField() - difficulty!: ChallengeDifficulty; - - @EnumField() - category!: ChallengeCategory; - - // ── Challenge specification ─────────────────────────────── - - /** Initial file contents that define the challenge workspace */ - @JsonField() - setupFiles!: Record; - - /** What success looks like (natural language for AI judge) */ - @TextField() - expectedOutcome!: string; - - /** Rubric criteria for the AI judge to evaluate */ - @JsonField() - evaluationCriteria!: string[]; - - /** Optional: expected file contents after successful completion */ - @JsonField() - expectedFiles?: Record; - - // ── Resource limits ─────────────────────────────────────── - - /** Maximum execution time in milliseconds */ - @NumberField() - timeLimitMs!: number; - - /** Maximum tool calls allowed */ - @NumberField() - toolCallLimit!: number; - - // ── Attempt history ─────────────────────────────────────── - - /** All attempts made against this challenge */ - @JsonField() - attempts!: ChallengeAttempt[]; - - // ── Statistics ──────────────────────────────────────────── - - /** Number of times this challenge has been attempted */ - @NumberField() - totalAttempts!: number; - - /** Number of times this challenge has been passed */ - @NumberField() - totalPasses!: number; - - /** Highest score achieved */ - @NumberField() - highScore!: number; - - // ── Index signature ─────────────────────────────────────── - - [key: string]: unknown; - - // ── Constructor ─────────────────────────────────────────── - - constructor() { - super(); - - this.name = ''; - this.description = ''; - this.sequenceNumber = 0; - this.difficulty = 'beginner'; - this.category = 'single-file'; - this.setupFiles = {}; - this.expectedOutcome = ''; - this.evaluationCriteria = []; - this.timeLimitMs = 60_000; - this.toolCallLimit = 10; - this.attempts = []; - this.totalAttempts = 0; - this.totalPasses = 0; - this.highScore = 0; - } - - // ── BaseEntity implementation ───────────────────────────── - - get collection(): string { - return CodingChallengeEntity.collection; - } - - static override getPaginationConfig(): { - defaultSortField: string; - defaultSortDirection: 'asc' | 'desc'; - defaultPageSize: number; - cursorField: string; - } { - return { - defaultSortField: 'sequenceNumber', - defaultSortDirection: 'asc', - defaultPageSize: 20, - cursorField: 'sequenceNumber', - }; - } - - validate(): { success: boolean; error?: string } { - if (!this.name?.trim()) { - return { success: false, error: 'Challenge name is required' }; - } - if (!this.description?.trim()) { - return { success: false, error: 'Challenge description is required' }; - } - if (typeof this.sequenceNumber !== 'number' || this.sequenceNumber < 1) { - return { success: false, error: 'Challenge sequenceNumber must be a positive integer' }; - } - if (!this.expectedOutcome?.trim()) { - return { success: false, error: 'Challenge expectedOutcome is required' }; - } - if (!Array.isArray(this.evaluationCriteria) || this.evaluationCriteria.length === 0) { - return { success: false, error: 'Challenge must have at least one evaluation criterion' }; - } - if (Object.keys(this.setupFiles).length === 0) { - return { success: false, error: 'Challenge must have at least one setup file' }; - } - if (this.timeLimitMs < 5000) { - return { success: false, error: 'Challenge time limit must be at least 5 seconds' }; - } - if (this.toolCallLimit < 2) { - return { success: false, error: 'Challenge tool call limit must be at least 2' }; - } - - return { success: true }; - } - - // ── Convenience methods ─────────────────────────────────── - - /** Pass rate as a percentage (0-100) */ - get passRate(): number { - if (this.totalAttempts === 0) return 0; - return Math.round((this.totalPasses / this.totalAttempts) * 100); - } - - /** Average score across all attempts */ - get averageScore(): number { - if (this.attempts.length === 0) return 0; - const total = this.attempts.reduce((sum, a) => sum + a.score, 0); - return Math.round(total / this.attempts.length); - } - - /** Best attempt for a specific persona */ - bestAttemptFor(personaId: UUID): ChallengeAttempt | undefined { - return this.attempts - .filter(a => a.personaId === personaId) - .sort((a, b) => b.score - a.score)[0]; - } - - /** Record a new attempt and update statistics */ - recordAttempt(attempt: ChallengeAttempt): void { - this.attempts.push(attempt); - this.totalAttempts++; - if (attempt.status === 'passed') { - this.totalPasses++; - } - if (attempt.score > this.highScore) { - this.highScore = attempt.score; - } - } -} diff --git a/src/debug/jtag/system/data/entities/CodingPlanEntity.ts b/src/debug/jtag/system/data/entities/CodingPlanEntity.ts deleted file mode 100644 index 286b83b0f..000000000 --- a/src/debug/jtag/system/data/entities/CodingPlanEntity.ts +++ /dev/null @@ -1,340 +0,0 @@ -/** - * CodingPlanEntity - Persistent coding plan with hierarchical team coordination - * - * First-class entity for the coding agent system. Tracks the full lifecycle: - * - Draft: PlanFormulator generates initial plan - * - Proposed: Plan submitted for team review - * - Approved: Team accepted the plan (or auto-approved for single-agent) - * - Executing: CodeAgentOrchestrator running steps - * - Completed/Failed: Final outcome with file changes and errors - * - * Hierarchical: A lead creates a top-level plan, then delegates sub-plans - * to team members via parentPlanId. Each sub-plan is scoped to a file cluster. - * - * Team-visible: All assigned AIs can view and propose modifications. - * Governance: Plans can be proposed for review via DecisionProposal integration. - */ - -import type { UUID } from '../../core/types/CrossPlatformUUID'; -import { - TextField, - NumberField, - JsonField, - EnumField, - CompositeIndex, -} from '../decorators/FieldDecorators'; -import { BaseEntity } from './BaseEntity'; -import { COLLECTIONS } from '../../shared/Constants'; -import type { CodingAction, RiskLevel, SecurityTierLevel } from '../../code/shared/CodingTypes'; - -// ──────────────────────────────────────────────────────────── -// Plan status lifecycle -// ──────────────────────────────────────────────────────────── - -export type CodingPlanStatus = - | 'draft' // Generated by PlanFormulator, not yet reviewed - | 'proposed' // Submitted for team review (DecisionProposal) - | 'approved' // Team accepted (or auto-approved for solo tasks) - | 'executing' // CodeAgentOrchestrator actively running steps - | 'completed' // All steps succeeded - | 'partial' // Some steps completed, budget or dependencies prevented full completion - | 'failed' // Execution failed (plan formulation error, all steps failed, etc.) - | 'cancelled'; // Manually cancelled before or during execution - -// ──────────────────────────────────────────────────────────── -// Step snapshot (persisted version of CodingStep + execution result) -// ──────────────────────────────────────────────────────────── - -export interface CodingStepSnapshot { - stepNumber: number; - action: CodingAction; - description: string; - targetFiles: string[]; - toolCall: string; - toolParams: Record; - dependsOn: number[]; - verification: string; - - // Execution state (populated during/after execution) - status: 'pending' | 'executing' | 'completed' | 'failed' | 'skipped'; - assigneeId?: string; // Which AI is executing this step (for delegation) - startedAt?: number; - completedAt?: number; - durationMs?: number; - output?: unknown; - error?: string; - retryCount?: number; -} - -// ──────────────────────────────────────────────────────────── -// Plan generation metadata -// ──────────────────────────────────────────────────────────── - -export interface PlanGenerationInfo { - provider: string; // e.g. 'anthropic' - model: string; // e.g. 'claude-sonnet-4-5-20250929' - temperature: number; - durationMs: number; // How long plan generation took - inputTokens?: number; - outputTokens?: number; -} - -// ──────────────────────────────────────────────────────────── -// Entity -// ──────────────────────────────────────────────────────────── - -@CompositeIndex({ - name: 'idx_coding_plans_persona_status', - fields: ['createdById', 'status'], - direction: 'DESC', -}) -@CompositeIndex({ - name: 'idx_coding_plans_task', - fields: ['taskId'], - direction: 'DESC', -}) -@CompositeIndex({ - name: 'idx_coding_plans_parent', - fields: ['parentPlanId'], - direction: 'DESC', -}) -export class CodingPlanEntity extends BaseEntity { - static readonly collection = COLLECTIONS.CODING_PLANS; - - // ── Identity ────────────────────────────────────────────── - - /** The coding task this plan addresses */ - @TextField({ index: true }) - taskId!: UUID; - - /** Parent plan ID (null for top-level plans, set for delegated sub-plans) */ - @TextField({ nullable: true, index: true }) - parentPlanId?: UUID; - - /** AI that created/formulated this plan */ - @TextField({ index: true }) - createdById!: UUID; - - /** Lead AI coordinating this plan (may differ from creator for delegated sub-plans) */ - @TextField({ index: true }) - leadId!: UUID; - - // ── Plan content ────────────────────────────────────────── - - /** Brief summary of the plan's approach */ - @TextField() - summary!: string; - - /** Original task description that prompted this plan */ - @TextField() - taskDescription!: string; - - /** Step DAG β€” the concrete execution plan */ - @JsonField() - steps!: CodingStepSnapshot[]; - - /** Estimated total tool calls for execution */ - @NumberField() - estimatedToolCalls!: number; - - // ── Team ────────────────────────────────────────────────── - - /** AI persona IDs assigned to work on this plan */ - @JsonField() - assignees!: UUID[]; - - // ── Model info ──────────────────────────────────────────── - - /** How the plan was generated */ - @JsonField() - generatedBy!: PlanGenerationInfo; - - // ── Risk & security ────────────────────────────────────── - - /** Risk level assessed by PlanFormulator */ - @EnumField() - riskLevel!: RiskLevel; - - /** Why this risk level was assigned */ - @TextField({ nullable: true }) - riskReason?: string; - - /** Security tier governing which tools this plan can use */ - @EnumField() - securityTier!: SecurityTierLevel; - - // ── Status & lifecycle ──────────────────────────────────── - - @EnumField({ index: true }) - status!: CodingPlanStatus; - - /** When execution started (null if not yet executing) */ - @NumberField({ nullable: true }) - executionStartedAt?: number; - - /** When execution completed/failed (null if still running) */ - @NumberField({ nullable: true }) - executionCompletedAt?: number; - - // ── Execution results ───────────────────────────────────── - - /** Files modified during execution */ - @JsonField() - filesModified!: string[]; - - /** Files created during execution */ - @JsonField() - filesCreated!: string[]; - - /** Change IDs from code/write and code/edit operations (for undo) */ - @JsonField() - changeIds!: string[]; - - /** Errors encountered during execution */ - @JsonField() - errors!: string[]; - - /** Total tool calls consumed */ - @NumberField() - totalToolCalls!: number; - - /** Total execution duration in milliseconds */ - @NumberField() - totalDurationMs!: number; - - // ── Governance ──────────────────────────────────────────── - - /** DecisionProposal ID if plan was proposed for team review */ - @TextField({ nullable: true }) - proposalId?: UUID; - - // ── Index signature ─────────────────────────────────────── - - [key: string]: unknown; - - // ── Constructor ─────────────────────────────────────────── - - constructor() { - super(); - - this.taskId = '' as UUID; - this.createdById = '' as UUID; - this.leadId = '' as UUID; - this.summary = ''; - this.taskDescription = ''; - this.steps = []; - this.estimatedToolCalls = 0; - this.assignees = []; - this.generatedBy = { provider: '', model: '', temperature: 0, durationMs: 0 }; - this.riskLevel = 'low'; - this.securityTier = 'write'; - this.status = 'draft'; - this.filesModified = []; - this.filesCreated = []; - this.changeIds = []; - this.errors = []; - this.totalToolCalls = 0; - this.totalDurationMs = 0; - } - - // ── BaseEntity implementation ───────────────────────────── - - get collection(): string { - return CodingPlanEntity.collection; - } - - static override getPaginationConfig(): { - defaultSortField: string; - defaultSortDirection: 'asc' | 'desc'; - defaultPageSize: number; - cursorField: string; - } { - return { - defaultSortField: 'createdAt', - defaultSortDirection: 'desc', - defaultPageSize: 20, - cursorField: 'createdAt', - }; - } - - validate(): { success: boolean; error?: string } { - if (!this.taskId?.trim()) { - return { success: false, error: 'CodingPlan taskId is required' }; - } - if (!this.createdById?.trim()) { - return { success: false, error: 'CodingPlan createdById is required' }; - } - if (!this.leadId?.trim()) { - return { success: false, error: 'CodingPlan leadId is required' }; - } - if (!this.summary?.trim()) { - return { success: false, error: 'CodingPlan summary is required' }; - } - if (!this.taskDescription?.trim()) { - return { success: false, error: 'CodingPlan taskDescription is required' }; - } - if (!Array.isArray(this.steps)) { - return { success: false, error: 'CodingPlan steps must be an array' }; - } - if (this.steps.length === 0) { - return { success: false, error: 'CodingPlan must have at least one step' }; - } - if (!Array.isArray(this.assignees)) { - return { success: false, error: 'CodingPlan assignees must be an array' }; - } - if (this.assignees.length === 0) { - return { success: false, error: 'CodingPlan must have at least one assignee' }; - } - - const validStatuses: CodingPlanStatus[] = [ - 'draft', 'proposed', 'approved', 'executing', - 'completed', 'partial', 'failed', 'cancelled', - ]; - if (!validStatuses.includes(this.status)) { - return { success: false, error: `CodingPlan status must be one of: ${validStatuses.join(', ')}` }; - } - - // Validate step structure - for (const step of this.steps) { - if (typeof step.stepNumber !== 'number' || step.stepNumber < 1) { - return { success: false, error: `CodingPlan step has invalid stepNumber: ${step.stepNumber}` }; - } - if (!step.action) { - return { success: false, error: `CodingPlan step ${step.stepNumber} is missing action` }; - } - if (!step.toolCall?.startsWith('code/')) { - return { success: false, error: `CodingPlan step ${step.stepNumber} has invalid toolCall: ${step.toolCall}` }; - } - } - - return { success: true }; - } - - // ── Convenience methods ─────────────────────────────────── - - /** Whether this is a sub-plan delegated from a parent */ - get isDelegated(): boolean { - return !!this.parentPlanId; - } - - /** Number of steps completed */ - get stepsCompleted(): number { - return this.steps.filter(s => s.status === 'completed').length; - } - - /** Number of steps failed */ - get stepsFailed(): number { - return this.steps.filter(s => s.status === 'failed').length; - } - - /** Number of steps still pending or executing */ - get stepsRemaining(): number { - return this.steps.filter(s => s.status === 'pending' || s.status === 'executing').length; - } - - /** Progress as a fraction (0.0 - 1.0) */ - get progress(): number { - if (this.steps.length === 0) return 0; - return this.stepsCompleted / this.steps.length; - } -} diff --git a/src/debug/jtag/system/recipes/coding.json b/src/debug/jtag/system/recipes/coding.json new file mode 100644 index 000000000..7a54ebc25 --- /dev/null +++ b/src/debug/jtag/system/recipes/coding.json @@ -0,0 +1,95 @@ +{ + "uniqueId": "coding", + "name": "Collaborative Coding", + "displayName": "Code", + "description": "Collaborative software development β€” reading, writing, reviewing, and shipping code as a team", + "version": 1, + + "layout": { + "main": ["chat-widget"], + "right": null + }, + + "locked": ["layout.main"], + + "pipeline": [ + { + "command": "rag/build", + "params": { + "maxMessages": 30, + "includeParticipants": true, + "includeRoomStrategy": true + }, + "outputTo": "ragContext" + }, + { + "command": "ai/should-respond", + "params": { + "ragContext": "$ragContext", + "strategy": "coding" + }, + "outputTo": "decision" + }, + { + "command": "ai/generate", + "params": { + "ragContext": "$ragContext", + "temperature": 0.3 + }, + "condition": "decision.shouldRespond === true" + } + ], + + "ragTemplate": { + "messageHistory": { + "maxMessages": 30, + "orderBy": "chronological", + "includeTimestamps": true + }, + "participants": { + "includeRoles": true, + "includeExpertise": true, + "includeHistory": false + }, + "roomMetadata": true + }, + + "strategy": { + "conversationPattern": "collaborative", + "responseRules": [ + "This room is for DOING code work, not just discussing it. Use code/* tools.", + "Read before you write β€” always code/read a file before editing it", + "Verify your work β€” run code/verify after every edit to check compilation", + "Iterate on failures β€” when verify fails, read the errors, fix them, verify again", + "Show your work β€” share what you changed and why in the room", + "Review each other β€” use code/read and code/git diff to review teammates' changes", + "Propose before big changes β€” use collaboration/decision/propose for architectural decisions", + "Commit working code β€” only code/git commit after code/verify passes", + "Coordinate naturally β€” claim files you're working on, don't pile on the same code", + "Ask for help when stuck β€” share errors, ask teammates to look at your approach" + ], + "decisionCriteria": [ + "Is there a coding task I can help with?", + "Has someone asked for a code review?", + "Did someone share an error they're stuck on?", + "Is there a verification failure I can diagnose?", + "Should I propose an architectural approach before coding?" + ] + }, + + "tools": [ + { "name": "code/read", "description": "Read a file from your workspace", "enabledFor": ["ai"] }, + { "name": "code/write", "description": "Create or overwrite a file", "enabledFor": ["ai"] }, + { "name": "code/edit", "description": "Edit a file (search-replace, line-range, insert, append)", "enabledFor": ["ai"] }, + { "name": "code/search", "description": "Search for patterns across workspace files", "enabledFor": ["ai"] }, + { "name": "code/tree", "description": "View directory structure", "enabledFor": ["ai"] }, + { "name": "code/diff", "description": "Preview an edit as unified diff", "enabledFor": ["ai"] }, + { "name": "code/undo", "description": "Undo recent changes", "enabledFor": ["ai"] }, + { "name": "code/history", "description": "View change history", "enabledFor": ["ai"] }, + { "name": "code/verify", "description": "Run compilation check and tests", "enabledFor": ["ai"] }, + { "name": "code/git", "description": "Git operations (status, diff, log, add, commit)", "enabledFor": ["ai"] } + ], + + "isPublic": true, + "tags": ["coding", "development", "engineering", "collaboration"] +} diff --git a/src/debug/jtag/system/user/server/PersonaUser.ts b/src/debug/jtag/system/user/server/PersonaUser.ts index dbb0ed6fc..e0e5c7382 100644 --- a/src/debug/jtag/system/user/server/PersonaUser.ts +++ b/src/debug/jtag/system/user/server/PersonaUser.ts @@ -118,6 +118,7 @@ import { RustCognitionBridge, type PersonaUserForRustCognition } from './modules import { SystemPaths } from '../../core/config/SystemPaths'; import { UnifiedConsciousness } from './modules/consciousness/UnifiedConsciousness'; import { registerConsciousness, unregisterConsciousness } from '../../rag/sources/GlobalAwarenessSource'; +import { Workspace } from '../../code/server/Workspace'; import { DATA_COMMANDS } from '@commands/data/shared/DataCommandConstants'; import { DataOpen } from '../../../commands/data/open/shared/DataOpenTypes'; import type { CorpusMemory } from '../../../workers/continuum-core/bindings/CorpusMemory'; @@ -199,6 +200,9 @@ export class PersonaUser extends AIUser { // MEMORY LEAK FIX: Track event subscriptions for cleanup private _eventUnsubscribes: (() => void)[] = []; + // Workspace handle β€” lazy-created on first code task, retained for session lifetime + private _workspace: Workspace | null = null; + /** * Get unified consciousness for cross-context awareness * Public for RAG sources and cognitive modules @@ -306,6 +310,32 @@ export class PersonaUser extends AIUser { return this.prefrontal.planFormulator; } + // ════════════════════════════════════════════════════════════════════════════ + // Workspace β€” per-persona code workspace (lazy-created, session-scoped) + // ════════════════════════════════════════════════════════════════════════════ + + /** Get the current workspace handle (null if not yet created) */ + public get workspace(): Workspace | null { + return this._workspace; + } + + /** + * Ensure a workspace exists for this persona. + * Creates a sandbox workspace on first call, retains for session lifetime. + * Called automatically when persona receives a code-domain task. + */ + public async ensureWorkspace(): Promise { + if (this._workspace) return this._workspace; + + this.log.info(`πŸ”§ ${this.displayName}: Creating workspace (sandbox mode)`); + this._workspace = await Workspace.create({ + personaId: this.id, + mode: 'sandbox', + }); + this.log.info(`πŸ”§ ${this.displayName}: Workspace created β€” handle=${this._workspace.handle}, dir=${this._workspace.dir}`); + return this._workspace; + } + // BEING ARCHITECTURE: Delegate to body for toolExecutor private get toolExecutor(): PersonaToolExecutor { if (!this.motorCortex) throw new Error('Motor cortex not initialized'); @@ -1962,6 +1992,17 @@ export class PersonaUser extends AIUser { // Stop autonomous servicing loop await this.autonomousLoop.stopServicing(); + // Clean up workspace (shell session + worktree) + if (this._workspace) { + try { + await this._workspace.destroy(); + this.log.info(`πŸ”§ ${this.displayName}: Workspace destroyed`); + } catch (e) { + this.log.warn(`⚠️ ${this.displayName}: Workspace cleanup failed: ${e}`); + } + this._workspace = null; + } + // PHASE 6: Shutdown memory module (genome + RAG) await this.memory.shutdown(); diff --git a/src/debug/jtag/system/user/server/modules/PersonaAutonomousLoop.ts b/src/debug/jtag/system/user/server/modules/PersonaAutonomousLoop.ts index 30941155f..49976ac12 100644 --- a/src/debug/jtag/system/user/server/modules/PersonaAutonomousLoop.ts +++ b/src/debug/jtag/system/user/server/modules/PersonaAutonomousLoop.ts @@ -275,10 +275,16 @@ export class PersonaAutonomousLoop { /** * PHASE 5: Execute a task based on its type * - * Handles all task types: memory-consolidation, skill-audit, fine-tune-lora, resume-work, etc. + * Handles all task types: memory-consolidation, skill-audit, fine-tune-lora, resume-work, + * and code tasks (write-feature, review-code). * Delegates to PersonaTaskExecutor module for actual execution. */ private async executeTask(task: InboxTask): Promise { + // For code-domain tasks, ensure workspace exists before dispatching + if (task.domain === 'code') { + await this.personaUser.ensureWorkspace(); + } + // Delegate to task executor module await this.personaUser.taskExecutor.executeTask(task); } diff --git a/src/debug/jtag/system/user/server/modules/PersonaTaskExecutor.ts b/src/debug/jtag/system/user/server/modules/PersonaTaskExecutor.ts index 7e79b8ba7..8c86a1564 100644 --- a/src/debug/jtag/system/user/server/modules/PersonaTaskExecutor.ts +++ b/src/debug/jtag/system/user/server/modules/PersonaTaskExecutor.ts @@ -78,6 +78,11 @@ export class PersonaTaskExecutor { outcome = await this.executeFineTuneLora(task); break; + case 'write-feature': + case 'review-code': + outcome = await this.executeCodeTask(task); + break; + default: outcome = `Unknown task type: ${task.taskType}`; status = 'failed'; @@ -572,6 +577,25 @@ export class PersonaTaskExecutor { } } + /** + * Code task execution (write-feature, review-code) + * + * Infrastructure hook for code-domain tasks. The workspace is guaranteed to exist + * by the time this runs (PersonaAutonomousLoop.ensureWorkspace called beforehand). + * + * The actual coding agent loop (readβ†’reasonβ†’editβ†’verifyβ†’commit) is driven by the + * persona's tool execution pipeline with code/* tools β€” not by this method. + * This method logs the task and returns, allowing the recipe pipeline to handle execution. + */ + private async executeCodeTask(task: InboxTask): Promise { + this.log(`πŸ’» ${this.displayName}: Code task received β€” ${task.taskType}: ${task.description}`); + + const roomId = task.metadata?.roomId ?? task.contextId; + this.log(`πŸ’» ${this.displayName}: Code task for room=${roomId}, workspace ensured by caller`); + + return `Code task acknowledged: ${task.taskType} β€” ${task.description}`; + } + /** * Collect training examples from recent chat interactions * diff --git a/src/debug/jtag/system/user/server/modules/QueueItemTypes.ts b/src/debug/jtag/system/user/server/modules/QueueItemTypes.ts index a0aa0e93f..0ef1c5380 100644 --- a/src/debug/jtag/system/user/server/modules/QueueItemTypes.ts +++ b/src/debug/jtag/system/user/server/modules/QueueItemTypes.ts @@ -181,6 +181,29 @@ export function fromRustServiceItem(json: Record): QueueItem | return msg; } + if (itemType === 'code') { + // Map Rust CodeQueueItem β†’ TS InboxTask with domain='code' + const task: InboxTask = { + id: json.id as UUID, + type: 'task', + taskId: json.id as UUID, + assigneeId: json.persona_id as UUID ?? json.personaId as UUID, + createdBy: json.persona_id as UUID ?? json.personaId as UUID, + domain: 'code' as TaskDomain, + taskType: (json.is_review ?? json.isReview) ? 'review-code' as TaskType : 'write-feature' as TaskType, + contextId: json.room_id as UUID ?? json.roomId as UUID, + description: json.task_description as string ?? json.taskDescription as string ?? '', + priority: json.priority as number, + status: 'pending' as TaskStatus, + timestamp: json.timestamp as number, + enqueuedAt: json.timestamp as number, + metadata: { + roomId: json.room_id as UUID ?? json.roomId as UUID, + }, + }; + return task; + } + if (itemType === 'task') { const task: InboxTask = { id: json.id as UUID, diff --git a/src/debug/jtag/tests/integration/coding-agent-workflow.test.ts b/src/debug/jtag/tests/integration/coding-agent-workflow.test.ts deleted file mode 100644 index a42addafb..000000000 --- a/src/debug/jtag/tests/integration/coding-agent-workflow.test.ts +++ /dev/null @@ -1,412 +0,0 @@ -/** - * Coding Agent Workflow Integration Test (TDD) - * - * Tests the complete plan β†’ execute β†’ persist lifecycle: - * 1. Orchestrator receives a coding task - * 2. PlanFormulator generates a step DAG (mocked LLM) - * 3. Steps execute via code/* commands (mocked) - * 4. CodingPlanEntity is persisted with initial state - * 5. Step statuses are updated during execution - * 6. Plan is finalized with results - * - * This is a workflow test β€” it exercises the real orchestrator logic - * with controlled inputs, verifying the full lifecycle including - * persistence. If any step in the chain breaks, this test catches it. - */ - -import { describe, it, expect, beforeEach, vi } from 'vitest'; -import { CodeAgentOrchestrator } from '../../system/code/server/CodeAgentOrchestrator'; -import { CodingPlanEntity } from '../../system/data/entities/CodingPlanEntity'; -import type { CodingTask } from '../../system/code/shared/CodingTypes'; -import type { UUID } from '../../system/core/types/CrossPlatformUUID'; - -// ── Mocks ────────────────────────────────────────────────── - -const mockGenerateText = vi.fn(); -vi.mock('../../daemons/ai-provider-daemon/shared/AIProviderDaemon', () => ({ - AIProviderDaemon: { - generateText: (...args: unknown[]) => mockGenerateText(...args), - }, -})); - -const mockExecute = vi.fn(); -vi.mock('../../system/core/shared/Commands', () => ({ - Commands: { - execute: (...args: unknown[]) => mockExecute(...args), - }, -})); - -vi.mock('../../system/core/logging/Logger', () => ({ - Logger: { - create: () => ({ - debug: () => {}, - info: () => {}, - warn: () => {}, - error: () => {}, - }), - }, -})); - -// Track DataDaemon persistence calls -const mockDataDaemonStore = vi.fn(); -const mockDataDaemonUpdate = vi.fn(); - -vi.mock('../../daemons/data-daemon/shared/DataDaemon', () => ({ - DataDaemon: { - store: (...args: unknown[]) => mockDataDaemonStore(...args), - update: (...args: unknown[]) => mockDataDaemonUpdate(...args), - }, -})); - -// ── Helpers ───────────────────────────────────────────────── - -function makeTask(overrides?: Partial): CodingTask { - return { - id: 'task-0001-0001-0001-task00000001' as UUID, - personaId: 'ai-00-0001-0001-0001-ai0000000001' as UUID, - description: 'Add a greet function to utils.ts', - taskType: 'generation', - maxToolCalls: 20, - maxDurationMs: 120000, - createdAt: Date.now(), - ...overrides, - }; -} - -/** 3-step plan: read β†’ edit β†’ verify */ -function mockThreeStepPlan() { - mockGenerateText.mockResolvedValue({ - text: JSON.stringify({ - summary: 'Read utils.ts, add greet function, verify', - steps: [ - { - stepNumber: 1, - action: 'read', - description: 'Read utils.ts', - targetFiles: ['utils.ts'], - toolCall: 'code/read', - toolParams: { filePath: 'utils.ts' }, - dependsOn: [], - verification: 'File content returned', - }, - { - stepNumber: 2, - action: 'edit', - description: 'Add greet function', - targetFiles: ['utils.ts'], - toolCall: 'code/edit', - toolParams: { - filePath: 'utils.ts', - editMode: { type: 'append', content: 'function greet() {}' }, - }, - dependsOn: [1], - verification: 'Edit applied', - }, - { - stepNumber: 3, - action: 'verify', - description: 'Verify changes', - targetFiles: ['utils.ts'], - toolCall: 'code/read', - toolParams: { filePath: 'utils.ts' }, - dependsOn: [2], - verification: 'greet function present', - }, - ], - }), - }); -} - -/** Simulate successful code/* command responses */ -function mockSuccessfulCodeCommands() { - mockExecute.mockImplementation(async (cmd: string) => { - if (cmd === 'code/tree') return { success: true, root: { name: '.', children: [] } }; - if (cmd === 'code/read') return { success: true, content: 'export function greet() {}' }; - if (cmd === 'code/edit') return { success: true, changeId: 'change-abc-001' }; - return { success: true }; - }); -} - -// ── Tests ─────────────────────────────────────────────────── - -describe('Coding Agent Workflow', () => { - let orchestrator: CodeAgentOrchestrator; - - beforeEach(() => { - mockGenerateText.mockReset(); - mockExecute.mockReset(); - mockDataDaemonStore.mockReset(); - mockDataDaemonUpdate.mockReset(); - - // DataDaemon.store returns the entity with an id assigned - mockDataDaemonStore.mockImplementation(async (_collection: string, entity: CodingPlanEntity) => { - entity.id = 'plan-persisted-id-0001' as UUID; - return entity; - }); - mockDataDaemonUpdate.mockResolvedValue({}); - - orchestrator = new CodeAgentOrchestrator(); - }); - - describe('happy path: plan β†’ execute β†’ persist', () => { - it('persists a CodingPlanEntity on successful execution', async () => { - mockThreeStepPlan(); - mockSuccessfulCodeCommands(); - - const result = await orchestrator.execute(makeTask()); - - // ── Execution succeeded ── - expect(result.status).toBe('completed'); - expect(result.stepResults).toHaveLength(3); - expect(result.stepResults.every(r => r.status === 'completed')).toBe(true); - - // ── Plan was persisted ── - expect(mockDataDaemonStore).toHaveBeenCalledTimes(1); - const [collection, entity] = mockDataDaemonStore.mock.calls[0]; - expect(collection).toBe('coding_plans'); - expect(entity).toBeInstanceOf(CodingPlanEntity); - }); - - it('persisted plan has correct initial structure', async () => { - mockThreeStepPlan(); - mockSuccessfulCodeCommands(); - - await orchestrator.execute(makeTask()); - - const entity: CodingPlanEntity = mockDataDaemonStore.mock.calls[0][1]; - - expect(entity.taskId).toBe('task-0001-0001-0001-task00000001'); - expect(entity.createdById).toBe('ai-00-0001-0001-0001-ai0000000001'); - expect(entity.leadId).toBe('ai-00-0001-0001-0001-ai0000000001'); - expect(entity.summary).toBe('Read utils.ts, add greet function, verify'); - expect(entity.taskDescription).toBe('Add a greet function to utils.ts'); - expect(entity.status).toBe('executing'); - expect(entity.steps).toHaveLength(3); - expect(entity.assignees).toContain('ai-00-0001-0001-0001-ai0000000001'); - expect(entity.executionStartedAt).toBeGreaterThan(0); - }); - - it('step snapshots have correct structural properties', async () => { - mockThreeStepPlan(); - mockSuccessfulCodeCommands(); - - await orchestrator.execute(makeTask()); - - const entity: CodingPlanEntity = mockDataDaemonStore.mock.calls[0][1]; - - // Structural properties (immutable during execution) - expect(entity.steps).toHaveLength(3); - for (const step of entity.steps) { - expect(step.toolCall).toMatch(/^code\//); - expect(step.stepNumber).toBeGreaterThan(0); - expect(step.action).toBeTruthy(); - expect(step.description).toBeTruthy(); - expect(Array.isArray(step.dependsOn)).toBe(true); - } - - // Store is called before any update (ordering proof) - expect(mockDataDaemonStore).toHaveBeenCalledTimes(1); - expect(mockDataDaemonUpdate).toHaveBeenCalled(); - }); - - it('updates step status during execution', async () => { - mockThreeStepPlan(); - mockSuccessfulCodeCommands(); - - await orchestrator.execute(makeTask()); - - // DataDaemon.update called for each step + finalization - // 3 step updates + 1 finalize = 4 calls - expect(mockDataDaemonUpdate).toHaveBeenCalledTimes(4); - - // Each step update includes the steps array - for (let i = 0; i < 3; i++) { - const updateCall = mockDataDaemonUpdate.mock.calls[i]; - expect(updateCall[0]).toBe('coding_plans'); // collection - expect(updateCall[1]).toBe('plan-persisted-id-0001'); // entity id - expect(updateCall[2]).toHaveProperty('steps'); - } - }); - - it('finalizes plan with execution results', async () => { - mockThreeStepPlan(); - mockSuccessfulCodeCommands(); - - await orchestrator.execute(makeTask()); - - // Last update call is finalization - const finalizeCall = mockDataDaemonUpdate.mock.calls[3]; - const finalizeData = finalizeCall[2]; - - expect(finalizeData.status).toBe('completed'); - expect(finalizeData.executionCompletedAt).toBeGreaterThan(0); - expect(finalizeData.filesModified).toContain('utils.ts'); - expect(finalizeData.changeIds).toContain('change-abc-001'); - expect(finalizeData.totalToolCalls).toBeGreaterThanOrEqual(4); - expect(finalizeData.totalDurationMs).toBeGreaterThan(0); - }); - - it('tracks changeIds from edit operations', async () => { - mockThreeStepPlan(); - mockSuccessfulCodeCommands(); - - const result = await orchestrator.execute(makeTask()); - - expect(result.changeIds).toContain('change-abc-001'); - expect(result.filesModified).toContain('utils.ts'); - }); - }); - - describe('partial completion: some steps fail', () => { - it('persists partial status when edit fails', async () => { - mockThreeStepPlan(); - mockExecute.mockImplementation(async (cmd: string) => { - if (cmd === 'code/tree') return { success: true, root: {} }; - if (cmd === 'code/read') return { success: true, content: 'data' }; - if (cmd === 'code/edit') return { success: false, error: 'Conflict' }; - return { success: true }; - }); - - const result = await orchestrator.execute(makeTask()); - - expect(result.status).toBe('partial'); - expect(result.errors.length).toBeGreaterThan(0); - - // Plan was finalized as partial - const finalizeCall = mockDataDaemonUpdate.mock.calls.at(-1); - expect(finalizeCall?.[2].status).toBe('partial'); - }); - - it('skipped steps are recorded in persistence', async () => { - mockThreeStepPlan(); - mockExecute.mockImplementation(async (cmd: string) => { - if (cmd === 'code/tree') return { success: true, root: {} }; - if (cmd === 'code/read') return { success: true, content: 'data' }; - if (cmd === 'code/edit') return { success: false, error: 'Failed' }; - return { success: true }; - }); - - const result = await orchestrator.execute(makeTask()); - - // Step 3 (verify) depends on step 2 (edit) which failed β†’ skipped - const verifyStep = result.stepResults.find(r => r.stepNumber === 3); - expect(verifyStep?.status).toBe('skipped'); - }); - }); - - describe('plan formulation failure', () => { - it('persists failed status when LLM is unavailable', async () => { - mockGenerateText.mockRejectedValue(new Error('LLM unavailable')); - mockExecute.mockResolvedValue({ success: true }); - - const result = await orchestrator.execute(makeTask()); - - expect(result.status).toBe('failed'); - expect(result.errors).toContain('LLM unavailable'); - - // No plan was created (failure happened before plan formulation) - // DataDaemon.store should NOT have been called - expect(mockDataDaemonStore).not.toHaveBeenCalled(); - }); - }); - - describe('persistence failure resilience', () => { - it('continues execution even if DataDaemon.store fails', async () => { - mockThreeStepPlan(); - mockSuccessfulCodeCommands(); - mockDataDaemonStore.mockRejectedValue(new Error('DB unavailable')); - - const result = await orchestrator.execute(makeTask()); - - // Execution should still complete successfully - expect(result.status).toBe('completed'); - expect(result.stepResults).toHaveLength(3); - expect(result.stepResults.every(r => r.status === 'completed')).toBe(true); - }); - - it('continues execution even if DataDaemon.update fails', async () => { - mockThreeStepPlan(); - mockSuccessfulCodeCommands(); - mockDataDaemonStore.mockImplementation(async (_c: string, entity: CodingPlanEntity) => { - entity.id = 'plan-id' as UUID; - return entity; - }); - mockDataDaemonUpdate.mockRejectedValue(new Error('DB write error')); - - const result = await orchestrator.execute(makeTask()); - - // Execution should still complete despite persistence failures - expect(result.status).toBe('completed'); - }); - }); - - describe('budget enforcement with persistence', () => { - it('persists budget_exceeded as partial status', async () => { - // Plan with 5 sequential steps - mockGenerateText.mockResolvedValue({ - text: JSON.stringify({ - summary: 'Five reads', - steps: Array.from({ length: 5 }, (_, i) => ({ - stepNumber: i + 1, - action: 'read', - targetFiles: [`file${i}.ts`], - toolCall: 'code/read', - toolParams: { filePath: `file${i}.ts` }, - dependsOn: i > 0 ? [i] : [], - verification: 'ok', - })), - }), - }); - mockSuccessfulCodeCommands(); - - const result = await orchestrator.execute(makeTask({ maxToolCalls: 5 })); - - expect(['partial', 'budget_exceeded']).toContain(result.status); - - // Plan was finalized - if (mockDataDaemonUpdate.mock.calls.length > 0) { - const finalizeCall = mockDataDaemonUpdate.mock.calls.at(-1); - expect(['partial', 'completed']).toContain(finalizeCall?.[2].status); - } - }); - }); - - describe('plan entity structure integrity', () => { - it('step snapshots preserve dependency DAG', async () => { - mockThreeStepPlan(); - mockSuccessfulCodeCommands(); - - await orchestrator.execute(makeTask()); - - const entity: CodingPlanEntity = mockDataDaemonStore.mock.calls[0][1]; - - expect(entity.steps[0].dependsOn).toEqual([]); - expect(entity.steps[1].dependsOn).toEqual([1]); - expect(entity.steps[2].dependsOn).toEqual([2]); - }); - - it('step snapshots preserve tool params', async () => { - mockThreeStepPlan(); - mockSuccessfulCodeCommands(); - - await orchestrator.execute(makeTask()); - - const entity: CodingPlanEntity = mockDataDaemonStore.mock.calls[0][1]; - - expect(entity.steps[0].toolParams).toEqual({ filePath: 'utils.ts' }); - expect(entity.steps[1].toolParams).toHaveProperty('editMode'); - }); - - it('generatedBy includes model info', async () => { - mockThreeStepPlan(); - mockSuccessfulCodeCommands(); - - await orchestrator.execute(makeTask()); - - const entity: CodingPlanEntity = mockDataDaemonStore.mock.calls[0][1]; - - expect(entity.generatedBy.provider).toBeTruthy(); - expect(entity.generatedBy.model).toBeTruthy(); - }); - }); -}); diff --git a/src/debug/jtag/tests/integration/sandbox-enforcement.test.ts b/src/debug/jtag/tests/integration/sandbox-enforcement.test.ts deleted file mode 100644 index 742913d37..000000000 --- a/src/debug/jtag/tests/integration/sandbox-enforcement.test.ts +++ /dev/null @@ -1,302 +0,0 @@ -/** - * Sandbox Enforcement Integration Test - * - * Tests that the CodeAgentOrchestrator respects security tiers: - * 1. Plans include riskLevel from PlanFormulator - * 2. ToolAllowlistEnforcer blocks disallowed tool calls - * 3. Risk level flows through to persisted CodingPlanEntity - * 4. Discovery-tier plans can't write files - */ - -import { describe, it, expect, beforeEach, vi } from 'vitest'; -import { CodeAgentOrchestrator } from '../../system/code/server/CodeAgentOrchestrator'; -import type { CodingTask } from '../../system/code/shared/CodingTypes'; -import type { UUID } from '../../system/core/types/CrossPlatformUUID'; - -// ── Mocks ────────────────────────────────────────────────── - -const mockGenerateText = vi.fn(); -vi.mock('../../daemons/ai-provider-daemon/shared/AIProviderDaemon', () => ({ - AIProviderDaemon: { - generateText: (...args: unknown[]) => mockGenerateText(...args), - }, -})); - -const mockExecute = vi.fn(); -vi.mock('../../system/core/shared/Commands', () => ({ - Commands: { - execute: (...args: unknown[]) => mockExecute(...args), - }, -})); - -vi.mock('../../system/core/logging/Logger', () => ({ - Logger: { - create: () => ({ - debug: () => {}, - info: () => {}, - warn: () => {}, - error: () => {}, - }), - }, -})); - -const mockDataDaemonStore = vi.fn(); -const mockDataDaemonUpdate = vi.fn(); -vi.mock('../../daemons/data-daemon/shared/DataDaemon', () => ({ - DataDaemon: { - store: (...args: unknown[]) => mockDataDaemonStore(...args), - update: (...args: unknown[]) => mockDataDaemonUpdate(...args), - }, -})); - -// ── Helpers ───────────────────────────────────────────────── - -function makeTask(overrides?: Partial): CodingTask { - return { - id: 'task-enforce-0001-0001-task00000001' as UUID, - personaId: 'ai-00-0001-0001-0001-ai0000000001' as UUID, - description: 'Test sandbox enforcement', - taskType: 'generation', - maxToolCalls: 20, - maxDurationMs: 120000, - createdAt: Date.now(), - ...overrides, - }; -} - -function mockSuccessfulCommands() { - mockExecute.mockImplementation(async (cmd: string) => { - if (cmd === 'code/tree') return { success: true, root: { name: '.', children: [] } }; - if (cmd === 'code/read') return { success: true, content: 'file content' }; - if (cmd === 'code/edit') return { success: true, changeId: 'change-001' }; - if (cmd === 'code/write') return { success: true, changeId: 'change-002' }; - if (cmd === 'development/exec') return { success: true, output: 'npm output' }; - return { success: true }; - }); -} - -// ── Tests ─────────────────────────────────────────────────── - -describe('Sandbox Enforcement', () => { - let orchestrator: CodeAgentOrchestrator; - - beforeEach(() => { - mockGenerateText.mockReset(); - mockExecute.mockReset(); - mockDataDaemonStore.mockReset(); - mockDataDaemonUpdate.mockReset(); - - mockDataDaemonStore.mockImplementation(async (_c: string, entity: any) => { - entity.id = 'plan-enforce-id' as UUID; - return entity; - }); - mockDataDaemonUpdate.mockResolvedValue({}); - - orchestrator = new CodeAgentOrchestrator(); - }); - - describe('riskLevel flows from plan to entity', () => { - it('low-risk plan persists riskLevel and securityTier', async () => { - mockGenerateText.mockResolvedValue({ - text: JSON.stringify({ - summary: 'Read a single file', - riskLevel: 'low', - riskReason: 'Read-only, no modifications', - steps: [{ - stepNumber: 1, - action: 'read', - description: 'Read utils.ts', - targetFiles: ['utils.ts'], - toolCall: 'code/read', - toolParams: { filePath: 'utils.ts' }, - dependsOn: [], - verification: 'File read', - }], - }), - }); - mockSuccessfulCommands(); - - const result = await orchestrator.execute(makeTask()); - - expect(result.status).toBe('completed'); - - // Verify entity was persisted with risk info - expect(mockDataDaemonStore).toHaveBeenCalledTimes(1); - const entity = mockDataDaemonStore.mock.calls[0][1]; - expect(entity.riskLevel).toBe('low'); - expect(entity.riskReason).toBe('Read-only, no modifications'); - expect(entity.securityTier).toBe('write'); // low β†’ write tier - }); - - it('critical-risk plan gets system tier', async () => { - mockGenerateText.mockResolvedValue({ - text: JSON.stringify({ - summary: 'Modify build system', - riskLevel: 'critical', - riskReason: 'Modifies build configuration and deployment scripts', - steps: [{ - stepNumber: 1, - action: 'read', - description: 'Read build config', - targetFiles: ['build.config.ts'], - toolCall: 'code/read', - toolParams: { filePath: 'build.config.ts' }, - dependsOn: [], - verification: 'Config read', - }], - }), - }); - mockSuccessfulCommands(); - - await orchestrator.execute(makeTask()); - - const entity = mockDataDaemonStore.mock.calls[0][1]; - expect(entity.riskLevel).toBe('critical'); - expect(entity.securityTier).toBe('system'); // critical β†’ system tier - }); - }); - - describe('enforcer blocks disallowed tools', () => { - it('write-tier plan blocks code/delete steps', async () => { - // Plan with riskLevel=low (β†’ write tier) tries to use code/delete (explicitly denied) - mockGenerateText.mockResolvedValue({ - text: JSON.stringify({ - summary: 'Delete old file', - riskLevel: 'low', - riskReason: 'Simple cleanup', - steps: [ - { - stepNumber: 1, - action: 'read', - description: 'Read old file', - targetFiles: ['old.ts'], - toolCall: 'code/read', - toolParams: { filePath: 'old.ts' }, - dependsOn: [], - verification: 'File read', - }, - { - stepNumber: 2, - action: 'verify', - description: 'Delete old file', - targetFiles: ['old.ts'], - toolCall: 'code/delete', - toolParams: { filePath: 'old.ts' }, - dependsOn: [1], - verification: 'File deleted', - }, - ], - }), - }); - mockSuccessfulCommands(); - - const result = await orchestrator.execute(makeTask()); - - // Step 1 (read) should succeed, step 2 (code/delete) should fail (denied in write tier) - const readStep = result.stepResults.find(r => r.stepNumber === 1); - const deleteStep = result.stepResults.find(r => r.stepNumber === 2); - - expect(readStep?.status).toBe('completed'); - expect(deleteStep?.status).toBe('failed'); - expect(deleteStep?.error).toContain('denied'); - }); - - it('system-tier plan allows code/delete', async () => { - // Plan with riskLevel=critical (β†’ system tier) can use code/delete - mockGenerateText.mockResolvedValue({ - text: JSON.stringify({ - summary: 'System cleanup', - riskLevel: 'critical', - riskReason: 'Requires deletion capability', - steps: [ - { - stepNumber: 1, - action: 'verify', - description: 'Delete deprecated file', - targetFiles: ['deprecated.ts'], - toolCall: 'code/delete', - toolParams: { filePath: 'deprecated.ts' }, - dependsOn: [], - verification: 'File removed', - }, - ], - }), - }); - mockSuccessfulCommands(); - - const result = await orchestrator.execute(makeTask()); - - const deleteStep = result.stepResults.find(r => r.stepNumber === 1); - expect(deleteStep?.status).toBe('completed'); - }); - - it('write-tier plan allows code/write and code/edit', async () => { - mockGenerateText.mockResolvedValue({ - text: JSON.stringify({ - summary: 'Edit files', - riskLevel: 'medium', - riskReason: 'Standard file modifications', - steps: [ - { - stepNumber: 1, - action: 'read', - description: 'Read file', - targetFiles: ['utils.ts'], - toolCall: 'code/read', - toolParams: { filePath: 'utils.ts' }, - dependsOn: [], - verification: 'Read', - }, - { - stepNumber: 2, - action: 'edit', - description: 'Edit file', - targetFiles: ['utils.ts'], - toolCall: 'code/edit', - toolParams: { filePath: 'utils.ts', editMode: { type: 'append', content: 'new code' } }, - dependsOn: [1], - verification: 'Edited', - }, - ], - }), - }); - mockSuccessfulCommands(); - - const result = await orchestrator.execute(makeTask()); - - expect(result.status).toBe('completed'); - expect(result.stepResults.every(r => r.status === 'completed')).toBe(true); - }); - }); - - describe('default risk handling', () => { - it('plan without riskLevel defaults to medium/write tier', async () => { - // Old-style plan without risk fields - mockGenerateText.mockResolvedValue({ - text: JSON.stringify({ - summary: 'Legacy plan', - steps: [{ - stepNumber: 1, - action: 'read', - description: 'Read file', - targetFiles: ['utils.ts'], - toolCall: 'code/read', - toolParams: { filePath: 'utils.ts' }, - dependsOn: [], - verification: 'Read', - }], - }), - }); - mockSuccessfulCommands(); - - const result = await orchestrator.execute(makeTask()); - - expect(result.status).toBe('completed'); - - // Entity should have default risk values - const entity = mockDataDaemonStore.mock.calls[0][1]; - expect(entity.riskLevel).toBe('medium'); - expect(entity.securityTier).toBe('write'); - }); - }); -}); diff --git a/src/debug/jtag/tests/unit/code/CodeAgentOrchestrator.test.ts b/src/debug/jtag/tests/unit/code/CodeAgentOrchestrator.test.ts deleted file mode 100644 index 014070be3..000000000 --- a/src/debug/jtag/tests/unit/code/CodeAgentOrchestrator.test.ts +++ /dev/null @@ -1,492 +0,0 @@ -/** - * CodeAgentOrchestrator Unit Tests - * - * Tests the execution engine by mocking PlanFormulator and Commands.execute. - * Validates: - * - Step execution in dependency order - * - Budget enforcement (time and tool calls) - * - Retry logic on step failure - * - Result aggregation (filesModified, changeIds, errors) - * - Graceful degradation on partial completion - */ - -import { describe, it, expect, beforeEach, vi } from 'vitest'; -import { CodeAgentOrchestrator } from '../../../system/code/server/CodeAgentOrchestrator'; -import type { CodingTask } from '../../../system/code/shared/CodingTypes'; -import type { UUID } from '../../../system/core/types/CrossPlatformUUID'; - -// Mock AIProviderDaemon (used by PlanFormulator) -const mockGenerateText = vi.fn(); -vi.mock('../../../daemons/ai-provider-daemon/shared/AIProviderDaemon', () => ({ - AIProviderDaemon: { - generateText: (...args: unknown[]) => mockGenerateText(...args), - }, -})); - -// Mock Commands.execute (used by orchestrator for code/* calls) -const mockExecute = vi.fn(); -vi.mock('../../../system/core/shared/Commands', () => ({ - Commands: { - execute: (...args: unknown[]) => mockExecute(...args), - }, -})); - -// Mock Logger -vi.mock('../../../system/core/logging/Logger', () => ({ - Logger: { - create: () => ({ - debug: () => {}, - info: () => {}, - warn: () => {}, - error: () => {}, - }), - }, -})); - -// Mock CodeDaemon.createWorkspace (workspace bootstrap) -vi.mock('../../../daemons/code-daemon/shared/CodeDaemon', () => ({ - CodeDaemon: { - createWorkspace: vi.fn().mockResolvedValue(undefined), - }, -})); - -// Mock fs for workspace directory creation + CLAUDE.md reading -vi.mock('fs', () => ({ - existsSync: vi.fn().mockReturnValue(true), - mkdirSync: vi.fn(), - readFileSync: vi.fn().mockReturnValue('# Project Conventions\nCompression principle applies.'), -})); - -function makeTask(overrides?: Partial): CodingTask { - return { - id: 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' as UUID, - personaId: '11111111-2222-3333-4444-555555555555' as UUID, - description: 'Add a greet function to utils.ts', - taskType: 'generation', - maxToolCalls: 20, - maxDurationMs: 120000, - createdAt: Date.now(), - ...overrides, - }; -} - -/** Mock PlanFormulator returning a simple 3-step plan */ -function mockSimplePlan() { - mockGenerateText.mockResolvedValue({ - text: JSON.stringify({ - summary: 'Read, edit, verify', - steps: [ - { - stepNumber: 1, - action: 'read', - description: 'Read utils.ts', - targetFiles: ['utils.ts'], - toolCall: 'code/read', - toolParams: { filePath: 'utils.ts' }, - dependsOn: [], - verification: 'File read', - }, - { - stepNumber: 2, - action: 'edit', - description: 'Add greet function', - targetFiles: ['utils.ts'], - toolCall: 'code/edit', - toolParams: { filePath: 'utils.ts', editMode: { type: 'append', content: 'function greet() {}' } }, - dependsOn: [1], - verification: 'Edit applied', - }, - { - stepNumber: 3, - action: 'verify', - description: 'Verify changes', - targetFiles: ['utils.ts'], - toolCall: 'code/read', - toolParams: { filePath: 'utils.ts' }, - dependsOn: [2], - verification: 'greet function present', - }, - ], - }), - }); -} - -describe('CodeAgentOrchestrator', () => { - let orchestrator: CodeAgentOrchestrator; - - beforeEach(() => { - mockGenerateText.mockReset(); - mockExecute.mockReset(); - orchestrator = new CodeAgentOrchestrator(); - }); - - describe('execute - happy path', () => { - it('executes all plan steps and returns completed', async () => { - mockSimplePlan(); - - // Use mockImplementation to handle discovery + architecture doc reads + plan steps - mockExecute.mockImplementation(async (cmd: string) => { - if (cmd === 'code/tree') return { success: true, root: {} }; - if (cmd === 'code/read') return { success: true, content: 'file content' }; - if (cmd === 'code/edit') return { success: true, changeId: 'c1' }; - return { success: true }; - }); - - const result = await orchestrator.execute(makeTask()); - - expect(result.status).toBe('completed'); - expect(result.stepResults).toHaveLength(3); - expect(result.stepResults.every(r => r.status === 'completed')).toBe(true); - expect(result.totalToolCalls).toBeGreaterThanOrEqual(4); // 1 discovery + arch reads + 3 steps - }); - - it('tracks modified files from edit steps', async () => { - mockSimplePlan(); - - mockExecute.mockImplementation(async (cmd: string) => { - if (cmd === 'code/tree') return { success: true, root: {} }; - if (cmd === 'code/read') return { success: true, content: 'file content' }; - if (cmd === 'code/edit') return { success: true, changeId: 'change-123' }; - return { success: true }; - }); - - const result = await orchestrator.execute(makeTask()); - - expect(result.filesModified).toContain('utils.ts'); - expect(result.changeIds).toContain('change-123'); - }); - - it('includes execution timing', async () => { - mockSimplePlan(); - mockExecute.mockResolvedValue({ success: true }); - - const result = await orchestrator.execute(makeTask()); - - expect(result.totalDurationMs).toBeGreaterThan(0); - for (const step of result.stepResults) { - expect(step.durationMs).toBeGreaterThanOrEqual(0); - } - }); - }); - - describe('budget enforcement', () => { - it('stops when max tool calls exceeded', async () => { - mockSimplePlan(); - - // Task with only 2 tool calls allowed (discovery uses 1, only 1 left for plan) - mockExecute.mockResolvedValue({ success: true }); - - const result = await orchestrator.execute(makeTask({ maxToolCalls: 3 })); - - // Should have stopped partway through - expect(result.totalToolCalls).toBeLessThanOrEqual(3); - const skipped = result.stepResults.filter(r => r.status === 'skipped'); - expect(skipped.length).toBeGreaterThan(0); - }); - - it('reports partial or budget_exceeded when budget runs out mid-execution', async () => { - // Plan with 5 steps (within maxToolCalls for formulation) - mockGenerateText.mockResolvedValue({ - text: JSON.stringify({ - summary: 'Five reads', - steps: Array.from({ length: 5 }, (_, i) => ({ - stepNumber: i + 1, - action: 'read', - targetFiles: [`file${i}.ts`], - toolCall: 'code/read', - toolParams: { filePath: `file${i}.ts` }, - dependsOn: i > 0 ? [i] : [], - verification: 'ok', - })), - }), - }); - - mockExecute.mockResolvedValue({ success: true }); - - // 5 tool calls total: 1 for discovery leaves 4 for 5 plan steps = can't finish all - const result = await orchestrator.execute(makeTask({ maxToolCalls: 5 })); - - // Some steps completed, some skipped due to budget - expect(['partial', 'budget_exceeded']).toContain(result.status); - const skipped = result.stepResults.filter(r => r.status === 'skipped'); - expect(skipped.length).toBeGreaterThan(0); - }); - }); - - describe('step failure and retry', () => { - it('retries failed steps up to 3 times', async () => { - mockSimplePlan(); - - let callCount = 0; - mockExecute.mockImplementation(async (cmd: string) => { - callCount++; - if (cmd === 'code/tree') return { success: true, root: {} }; - if (cmd === 'code/read') return { success: true, content: 'data' }; - if (cmd === 'code/edit') { - // Fail first 2 times, succeed on 3rd - if (callCount <= 4) return { success: false, error: 'Conflict' }; - return { success: true, changeId: 'c1' }; - } - return { success: true }; - }); - - const result = await orchestrator.execute(makeTask()); - - // Step 2 (edit) should have retried and eventually succeeded - const editStep = result.stepResults.find(r => r.toolCall === 'code/edit'); - expect(editStep?.status).toBe('completed'); - }); - - it('marks step as failed after max retries', async () => { - mockSimplePlan(); - - mockExecute.mockImplementation(async (cmd: string) => { - if (cmd === 'code/tree') return { success: true, root: {} }; - if (cmd === 'code/read') return { success: true, content: 'data' }; - if (cmd === 'code/edit') return { success: false, error: 'Always fails' }; - return { success: true }; - }); - - const result = await orchestrator.execute(makeTask()); - - const editStep = result.stepResults.find(r => r.toolCall === 'code/edit'); - expect(editStep?.status).toBe('failed'); - expect(editStep?.error).toContain('Always fails'); - }); - - it('skips dependent steps when dependency fails', async () => { - mockSimplePlan(); - - mockExecute.mockImplementation(async (cmd: string) => { - if (cmd === 'code/tree') return { success: true, root: {} }; - if (cmd === 'code/read') return { success: true, content: 'data' }; - if (cmd === 'code/edit') return { success: false, error: 'Edit failed' }; - return { success: true }; - }); - - const result = await orchestrator.execute(makeTask()); - - // Step 3 (verify) depends on step 2 (edit) which failed - const verifyStep = result.stepResults.find(r => r.stepNumber === 3); - expect(verifyStep?.status).toBe('skipped'); - expect(verifyStep?.error).toContain('Dependencies not met'); - }); - - it('returns partial status when some steps succeed', async () => { - mockSimplePlan(); - - mockExecute.mockImplementation(async (cmd: string) => { - if (cmd === 'code/tree') return { success: true, root: {} }; - if (cmd === 'code/read') return { success: true, content: 'data' }; - if (cmd === 'code/edit') return { success: false, error: 'Failed' }; - return { success: true }; - }); - - const result = await orchestrator.execute(makeTask()); - - expect(result.status).toBe('partial'); - expect(result.errors.length).toBeGreaterThan(0); - }); - }); - - describe('error handling', () => { - it('handles plan formulation failure gracefully', async () => { - mockGenerateText.mockRejectedValue(new Error('LLM unavailable')); - mockExecute.mockResolvedValue({ success: true }); - - const result = await orchestrator.execute(makeTask()); - - expect(result.status).toBe('failed'); - expect(result.errors).toContain('LLM unavailable'); - }); - - it('handles command execution exception', async () => { - mockSimplePlan(); - - mockExecute.mockImplementation(async (cmd: string) => { - if (cmd === 'code/tree') return { success: true, root: {} }; - if (cmd === 'code/read') throw new Error('Connection lost'); - return { success: true }; - }); - - const result = await orchestrator.execute(makeTask()); - - // Step 1 (read) should fail with exception - const readStep = result.stepResults.find(r => r.stepNumber === 1); - expect(readStep?.status).toBe('failed'); - expect(readStep?.error).toContain('Connection lost'); - }); - }); - - describe('dryRun mode', () => { - it('executes read steps normally in dryRun', async () => { - mockSimplePlan(); - mockExecute - .mockResolvedValueOnce({ success: true, root: {} }) // code/tree (discovery) - .mockResolvedValueOnce({ success: true, content: 'old' }) // step 1: code/read - .mockResolvedValue({ success: true, content: 'data' }); // remaining reads - - const result = await orchestrator.execute(makeTask(), { dryRun: true }); - - // Step 1 (read) should execute normally - const readStep = result.stepResults.find(r => r.stepNumber === 1); - expect(readStep?.status).toBe('completed'); - }); - - it('mocks write/edit steps in dryRun', async () => { - mockSimplePlan(); - mockExecute - .mockResolvedValueOnce({ success: true, root: {} }) // code/tree (discovery) - .mockResolvedValueOnce({ success: true, content: 'old' }) // step 1: code/read - .mockResolvedValue({ success: true, content: 'data' }); // step 3: verify read - - const result = await orchestrator.execute(makeTask(), { dryRun: true }); - - // Step 2 (edit) should be mocked β€” completed but with dryRun flag - const editStep = result.stepResults.find(r => r.stepNumber === 2); - expect(editStep?.status).toBe('completed'); - - const output = editStep?.output as Record; - expect(output?.dryRun).toBe(true); - expect(output?.wouldModify).toEqual(['utils.ts']); - }); - - it('dryRun does not call Commands.execute for write steps', async () => { - mockSimplePlan(); - - const callLog: string[] = []; - mockExecute.mockImplementation(async (cmd: string) => { - callLog.push(cmd); - if (cmd === 'code/tree') return { success: true, root: {} }; - return { success: true, content: 'data' }; - }); - - await orchestrator.execute(makeTask(), { dryRun: true }); - - // code/edit should NOT appear in call log - expect(callLog).not.toContain('code/edit'); - // code/read and code/tree should appear - expect(callLog).toContain('code/tree'); - expect(callLog).toContain('code/read'); - }); - - it('dryRun completes all steps successfully', async () => { - mockSimplePlan(); - mockExecute.mockResolvedValue({ success: true, content: 'data', root: {} }); - - const result = await orchestrator.execute(makeTask(), { dryRun: true }); - - expect(result.status).toBe('completed'); - expect(result.stepResults.every(r => r.status === 'completed')).toBe(true); - }); - - it('dryRun does not produce changeIds', async () => { - mockSimplePlan(); - mockExecute.mockResolvedValue({ success: true, content: 'data', root: {} }); - - const result = await orchestrator.execute(makeTask(), { dryRun: true }); - - // No real writes happened, so no changeIds - expect(result.changeIds).toHaveLength(0); - }); - }); - - describe('verifyβ†’re-plan iteration loop', () => { - it('skips verification when autoVerify is false', async () => { - mockSimplePlan(); - mockExecute.mockImplementation(async (cmd: string) => { - if (cmd === 'code/tree') return { success: true, root: {} }; - if (cmd === 'code/read') return { success: true, content: 'file content' }; - if (cmd === 'code/edit') return { success: true, changeId: 'c1' }; - return { success: true }; - }); - - const result = await orchestrator.execute(makeTask(), { autoVerify: false }); - - expect(result.status).toBe('completed'); - // code/verify should NOT have been called - const calls = mockExecute.mock.calls.map((c: unknown[]) => c[0]); - expect(calls).not.toContain('code/verify'); - }); - - it('skips verification in dryRun mode', async () => { - mockSimplePlan(); - mockExecute.mockResolvedValue({ success: true, content: 'data', root: {} }); - - const result = await orchestrator.execute(makeTask(), { dryRun: true }); - - // code/verify should NOT have been called - const calls = mockExecute.mock.calls.map((c: unknown[]) => c[0]); - expect(calls).not.toContain('code/verify'); - }); - - it('runs verification after write steps and passes', async () => { - mockSimplePlan(); - mockExecute.mockImplementation(async (cmd: string) => { - if (cmd === 'code/tree') return { success: true, root: {} }; - if (cmd === 'code/read') return { success: true, content: 'file content' }; - if (cmd === 'code/edit') return { success: true, changeId: 'c1' }; - if (cmd === 'code/verify') return { success: true, typeCheck: { passed: true, errorCount: 0, errors: [] } }; - return { success: true }; - }); - - const result = await orchestrator.execute(makeTask()); - - expect(result.status).toBe('completed'); - expect(result.errors).toHaveLength(0); - const calls = mockExecute.mock.calls.map((c: unknown[]) => c[0]); - expect(calls).toContain('code/verify'); - }); - - it('records errors when verification fails and iterations exhausted', async () => { - mockSimplePlan(); - - // First call for planning, then always fail verification - let verifyCallCount = 0; - mockExecute.mockImplementation(async (cmd: string) => { - if (cmd === 'code/tree') return { success: true, root: {} }; - if (cmd === 'code/read') return { success: true, content: 'file content' }; - if (cmd === 'code/edit') return { success: true, changeId: 'c1' }; - if (cmd === 'code/verify') { - verifyCallCount++; - return { - success: false, - typeCheck: { - passed: false, - errorCount: 1, - errors: [{ file: 'utils.ts', line: 5, column: 1, code: 'TS2345', message: 'Type error' }], - }, - }; - } - return { success: true }; - }); - - // Allow re-plan β€” the LLM mock needs to return a fix plan too - mockGenerateText - .mockResolvedValueOnce({ - text: JSON.stringify({ - summary: 'Original plan', - steps: [ - { stepNumber: 1, action: 'read', targetFiles: ['utils.ts'], toolCall: 'code/read', toolParams: { filePath: 'utils.ts' }, dependsOn: [], verification: 'ok' }, - { stepNumber: 2, action: 'edit', targetFiles: ['utils.ts'], toolCall: 'code/edit', toolParams: { filePath: 'utils.ts', editType: 'append', content: 'x' }, dependsOn: [1], verification: 'ok' }, - ], - }), - }) - .mockResolvedValueOnce({ - text: JSON.stringify({ - summary: 'Fix type error', - steps: [ - { stepNumber: 1, action: 'edit', targetFiles: ['utils.ts'], toolCall: 'code/edit', toolParams: { filePath: 'utils.ts', editType: 'search_replace', search: 'x', replace: 'y' }, dependsOn: [], verification: 'ok' }, - ], - }), - }); - - const result = await orchestrator.execute(makeTask({ maxToolCalls: 30 }), { maxVerifyIterations: 2 }); - - // Should have verification errors recorded - expect(result.errors.some((e: string) => e.includes('TS2345'))).toBe(true); - // Should have called verify at least twice (initial + after fix) - expect(verifyCallCount).toBeGreaterThanOrEqual(2); - }); - }); -}); diff --git a/src/debug/jtag/tests/unit/code/CodeCoordinationStream.test.ts b/src/debug/jtag/tests/unit/code/CodeCoordinationStream.test.ts deleted file mode 100644 index e138c7974..000000000 --- a/src/debug/jtag/tests/unit/code/CodeCoordinationStream.test.ts +++ /dev/null @@ -1,328 +0,0 @@ -/** - * CodeCoordinationStream Unit Tests - * - * Tests the file-level MUTEX coordination for multi-agent coding: - * - Stream creation and configuration - * - File lock acquisition and release - * - Conflict detection (overlapping file claims) - * - Multi-agent parallel coordination (non-overlapping files) - * - Global lock management - * - Singleton pattern - */ - -import { describe, it, expect, beforeEach, afterEach } from 'vitest'; -import { - CodeCoordinationStream, - getCodeCoordinator, - resetCodeCoordinator, - type CodeThought, - type CodeDecision, - type CodeStream, -} from '../../../system/coordination/server/CodeCoordinationStream'; -import type { UUID } from '../../../system/core/types/CrossPlatformUUID'; - -// ── Helpers ────────────────────────────────────────────────── - -const PLAN_ID = '11111111-2222-3333-4444-555555555555' as UUID; -const AGENT_A = 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' as UUID; -const AGENT_B = 'bbbbbbbb-cccc-dddd-eeee-ffffffffffff' as UUID; -const AGENT_C = 'cccccccc-dddd-eeee-ffff-111111111111' as UUID; - -function makeThought( - personaId: UUID, - targetFiles: string[], - overrides?: Partial, -): CodeThought { - return { - personaId, - personaName: `Agent-${personaId.slice(0, 4)}`, - type: 'claiming', - confidence: 0.8, - reasoning: `Claiming files: ${targetFiles.join(', ')}`, - timestamp: Date.now(), - planId: PLAN_ID, - targetFiles, - stepNumbers: [1, 2], - ...overrides, - }; -} - -// ── Tests ──────────────────────────────────────────────────── - -describe('CodeCoordinationStream', () => { - let coordinator: CodeCoordinationStream; - - beforeEach(() => { - resetCodeCoordinator(); - coordinator = new CodeCoordinationStream(); - }); - - afterEach(() => { - coordinator.shutdown(); - }); - - describe('construction and configuration', () => { - it('creates with coding-specific config', () => { - // Verify it's a proper instance - expect(coordinator).toBeInstanceOf(CodeCoordinationStream); - }); - - it('starts with no global file locks', () => { - expect(coordinator.globalFileLocks.size).toBe(0); - }); - - it('starts with no active streams', () => { - expect(coordinator.getStreams().size).toBe(0); - }); - }); - - describe('file lock acquisition', () => { - it('single agent acquires locks on broadcast', async () => { - const thought = makeThought(AGENT_A, ['src/main.ts', 'src/utils.ts']); - await coordinator.broadcastCodeThought(PLAN_ID, thought); - - expect(coordinator.globalFileLocks.size).toBe(2); - expect(coordinator.lockHolder('src/main.ts')).toBe(AGENT_A); - expect(coordinator.lockHolder('src/utils.ts')).toBe(AGENT_A); - }); - - it('isFileLocked returns correct status', async () => { - expect(coordinator.isFileLocked('src/main.ts')).toBe(false); - - const thought = makeThought(AGENT_A, ['src/main.ts']); - await coordinator.broadcastCodeThought(PLAN_ID, thought); - - expect(coordinator.isFileLocked('src/main.ts')).toBe(true); - expect(coordinator.isFileLocked('src/other.ts')).toBe(false); - }); - - it('lockHolder returns undefined for unlocked files', () => { - expect(coordinator.lockHolder('src/nonexistent.ts')).toBeUndefined(); - }); - }); - - describe('conflict detection', () => { - it('rejects claim when files already locked by another agent', async () => { - // Agent A claims main.ts - const thoughtA = makeThought(AGENT_A, ['src/main.ts']); - await coordinator.broadcastCodeThought(PLAN_ID, thoughtA); - - // Agent B tries to claim main.ts β€” should be rejected - const thoughtB = makeThought(AGENT_B, ['src/main.ts']); - await coordinator.broadcastCodeThought(PLAN_ID, thoughtB); - - // main.ts should still be locked by Agent A - expect(coordinator.lockHolder('src/main.ts')).toBe(AGENT_A); - }); - - it('allows same agent to reclaim their own files', async () => { - const thought1 = makeThought(AGENT_A, ['src/main.ts']); - await coordinator.broadcastCodeThought(PLAN_ID, thought1); - - const thought2 = makeThought(AGENT_A, ['src/main.ts', 'src/extra.ts']); - await coordinator.broadcastCodeThought(PLAN_ID, thought2); - - expect(coordinator.lockHolder('src/main.ts')).toBe(AGENT_A); - expect(coordinator.lockHolder('src/extra.ts')).toBe(AGENT_A); - }); - - it('rejects claim when any file in the set conflicts', async () => { - const thoughtA = makeThought(AGENT_A, ['src/shared.ts']); - await coordinator.broadcastCodeThought(PLAN_ID, thoughtA); - - // Agent B claims unique.ts + shared.ts β€” shared.ts conflicts - const thoughtB = makeThought(AGENT_B, ['src/unique.ts', 'src/shared.ts']); - await coordinator.broadcastCodeThought(PLAN_ID, thoughtB); - - // shared.ts still locked by A, unique.ts NOT locked (whole claim rejected) - expect(coordinator.lockHolder('src/shared.ts')).toBe(AGENT_A); - expect(coordinator.isFileLocked('src/unique.ts')).toBe(false); - }); - }); - - describe('parallel non-overlapping agents', () => { - it('multiple agents acquire non-overlapping file locks', async () => { - const thoughtA = makeThought(AGENT_A, ['src/moduleA.ts']); - const thoughtB = makeThought(AGENT_B, ['src/moduleB.ts']); - const thoughtC = makeThought(AGENT_C, ['src/moduleC.ts']); - - await coordinator.broadcastCodeThought(PLAN_ID, thoughtA); - await coordinator.broadcastCodeThought(PLAN_ID, thoughtB); - await coordinator.broadcastCodeThought(PLAN_ID, thoughtC); - - expect(coordinator.globalFileLocks.size).toBe(3); - expect(coordinator.lockHolder('src/moduleA.ts')).toBe(AGENT_A); - expect(coordinator.lockHolder('src/moduleB.ts')).toBe(AGENT_B); - expect(coordinator.lockHolder('src/moduleC.ts')).toBe(AGENT_C); - }); - - it('canWorkOnFiles checks correctly for non-overlapping', async () => { - const thought = makeThought(AGENT_A, ['src/moduleA.ts']); - await coordinator.broadcastCodeThought(PLAN_ID, thought); - - const canB = await coordinator.canWorkOnFiles(AGENT_B, PLAN_ID, ['src/moduleB.ts']); - expect(canB).toBe(true); - - const canBConflict = await coordinator.canWorkOnFiles(AGENT_B, PLAN_ID, ['src/moduleA.ts']); - expect(canBConflict).toBe(false); - }); - - it('canWorkOnFiles returns true when no stream exists', async () => { - const can = await coordinator.canWorkOnFiles(AGENT_A, 'no-such-plan' as UUID, ['anything.ts']); - expect(can).toBe(true); - }); - }); - - describe('lock release', () => { - it('releases all locks for a persona', async () => { - const thought = makeThought(AGENT_A, ['src/a.ts', 'src/b.ts', 'src/c.ts']); - await coordinator.broadcastCodeThought(PLAN_ID, thought); - - expect(coordinator.globalFileLocks.size).toBe(3); - - coordinator.releaseLocks(AGENT_A); - - expect(coordinator.globalFileLocks.size).toBe(0); - expect(coordinator.isFileLocked('src/a.ts')).toBe(false); - }); - - it('releases only the specified persona locks', async () => { - const thoughtA = makeThought(AGENT_A, ['src/a.ts']); - const thoughtB = makeThought(AGENT_B, ['src/b.ts']); - await coordinator.broadcastCodeThought(PLAN_ID, thoughtA); - await coordinator.broadcastCodeThought(PLAN_ID, thoughtB); - - coordinator.releaseLocks(AGENT_A); - - expect(coordinator.isFileLocked('src/a.ts')).toBe(false); - expect(coordinator.isFileLocked('src/b.ts')).toBe(true); - expect(coordinator.lockHolder('src/b.ts')).toBe(AGENT_B); - }); - - it('releases locks for a specific plan only', async () => { - const PLAN_2 = '22222222-3333-4444-5555-666666666666' as UUID; - const thoughtA1 = makeThought(AGENT_A, ['src/plan1.ts']); - const thoughtA2 = makeThought(AGENT_A, ['src/plan2.ts']); - - await coordinator.broadcastCodeThought(PLAN_ID, thoughtA1); - await coordinator.broadcastCodeThought(PLAN_2, thoughtA2); - - // Release only for PLAN_ID stream β€” global locks for PLAN_2 remain - coordinator.releaseLocks(AGENT_A, PLAN_ID); - - // Stream-level locks for plan1 should be gone - const stream1 = coordinator.getStream(PLAN_ID); - if (stream1) { - expect(stream1.fileLocks.has('src/plan1.ts')).toBe(false); - } - }); - }); - - describe('deferring', () => { - it('defer releases claimed slot', async () => { - const claim = makeThought(AGENT_A, ['src/main.ts'], { type: 'claiming' }); - await coordinator.broadcastCodeThought(PLAN_ID, claim); - - const stream = coordinator.getStream(PLAN_ID); - expect(stream).toBeDefined(); - expect(stream!.claimedBy.has(AGENT_A)).toBe(true); - - const defer = makeThought(AGENT_A, ['src/main.ts'], { type: 'deferring' }); - await coordinator.broadcastCodeThought(PLAN_ID, defer); - - expect(stream!.claimedBy.has(AGENT_A)).toBe(false); - }); - }); - - describe('stream lifecycle', () => { - it('creates stream on first thought', async () => { - expect(coordinator.getStreams().size).toBe(0); - - const thought = makeThought(AGENT_A, ['src/main.ts']); - await coordinator.broadcastCodeThought(PLAN_ID, thought); - - expect(coordinator.getStreams().size).toBe(1); - const stream = coordinator.getStream(PLAN_ID); - expect(stream).toBeDefined(); - expect(stream!.planId).toBe(PLAN_ID); - }); - - it('stream accumulates thoughts from multiple agents', async () => { - const thoughtA = makeThought(AGENT_A, ['src/a.ts']); - const thoughtB = makeThought(AGENT_B, ['src/b.ts']); - - await coordinator.broadcastCodeThought(PLAN_ID, thoughtA); - await coordinator.broadcastCodeThought(PLAN_ID, thoughtB); - - const stream = coordinator.getStream(PLAN_ID); - expect(stream!.thoughts).toHaveLength(2); - expect(stream!.considerations.size).toBe(2); - }); - }); - - describe('decision making', () => { - it('waitForCodeDecision returns null for non-existent stream', async () => { - const decision = await coordinator.waitForCodeDecision('no-such-plan' as UUID, 100); - expect(decision).toBeNull(); - }); - - it('decision includes file locks and conflicts', async () => { - // Set up two agents claiming different files - const thoughtA = makeThought(AGENT_A, ['src/a.ts'], { confidence: 0.9 }); - const thoughtB = makeThought(AGENT_B, ['src/b.ts'], { confidence: 0.8 }); - - await coordinator.broadcastCodeThought(PLAN_ID, thoughtA); - await coordinator.broadcastCodeThought(PLAN_ID, thoughtB); - - // Wait for decision (with short timeout since canDecideEarly may trigger) - const decision = await coordinator.waitForCodeDecision(PLAN_ID, 4000); - if (decision) { - expect(decision.planId).toBe(PLAN_ID); - expect(decision.fileLocks).toBeDefined(); - expect(decision.conflicts).toBeDefined(); - } - }); - }); - - describe('singleton pattern', () => { - it('getCodeCoordinator returns same instance', () => { - const a = getCodeCoordinator(); - const b = getCodeCoordinator(); - expect(a).toBe(b); - }); - - it('resetCodeCoordinator creates fresh instance', () => { - const a = getCodeCoordinator(); - resetCodeCoordinator(); - const b = getCodeCoordinator(); - expect(a).not.toBe(b); - }); - - it('reset clears global file locks', async () => { - const coord = getCodeCoordinator(); - const thought = makeThought(AGENT_A, ['src/locked.ts']); - await coord.broadcastCodeThought(PLAN_ID, thought); - - expect(coord.globalFileLocks.size).toBe(1); - resetCodeCoordinator(); - - const fresh = getCodeCoordinator(); - expect(fresh.globalFileLocks.size).toBe(0); - }); - }); - - describe('shutdown', () => { - it('clears all state on shutdown', async () => { - const thought = makeThought(AGENT_A, ['src/main.ts']); - await coordinator.broadcastCodeThought(PLAN_ID, thought); - - expect(coordinator.globalFileLocks.size).toBe(1); - expect(coordinator.getStreams().size).toBe(1); - - coordinator.shutdown(); - - expect(coordinator.globalFileLocks.size).toBe(0); - expect(coordinator.getStreams().size).toBe(0); - }); - }); -}); diff --git a/src/debug/jtag/tests/unit/code/CodeTaskDelegator.test.ts b/src/debug/jtag/tests/unit/code/CodeTaskDelegator.test.ts deleted file mode 100644 index 5e9cb4d69..000000000 --- a/src/debug/jtag/tests/unit/code/CodeTaskDelegator.test.ts +++ /dev/null @@ -1,530 +0,0 @@ -/** - * CodeTaskDelegator Unit Tests - * - * Tests plan decomposition and multi-agent assignment: - * - decompose: step DAG β†’ file clusters (union-find) - * - assign: clusters β†’ agents (load-balanced) - * - createSubPlans: assignments β†’ CodingPlanEntity sub-plans - * - consolidate: sub-plan results β†’ parent CodingResult - */ - -import { describe, it, expect } from 'vitest'; -import { CodeTaskDelegator, type FileCluster, type AgentAssignment } from '../../../system/code/server/CodeTaskDelegator'; -import { CodingPlanEntity, type CodingStepSnapshot } from '../../../system/data/entities/CodingPlanEntity'; -import type { UUID } from '../../../system/core/types/CrossPlatformUUID'; -import type { AgentCapability } from '../../../system/code/shared/CodingTypes'; - -// ── Helpers ────────────────────────────────────────────────── - -const TASK_ID = '11111111-2222-3333-4444-555555555555' as UUID; -const LEAD_ID = 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' as UUID; -const AGENT_A = 'aaaaaaaa-1111-2222-3333-444444444444' as UUID; -const AGENT_B = 'bbbbbbbb-1111-2222-3333-444444444444' as UUID; -const AGENT_C = 'cccccccc-1111-2222-3333-444444444444' as UUID; - -function makeStep( - stepNumber: number, - targetFiles: string[], - dependsOn: number[] = [], - action: string = 'edit', -): CodingStepSnapshot { - return { - stepNumber, - action: action as any, - description: `Step ${stepNumber}: ${action} ${targetFiles.join(', ')}`, - targetFiles, - toolCall: `code/${action}`, - toolParams: {}, - dependsOn, - verification: 'Verify step', - status: 'pending', - }; -} - -function makePlan(steps: CodingStepSnapshot[]): CodingPlanEntity { - const plan = new CodingPlanEntity(); - plan.taskId = TASK_ID; - plan.createdById = LEAD_ID; - plan.leadId = LEAD_ID; - plan.summary = 'Test plan for delegation'; - plan.taskDescription = 'Multi-file refactoring task'; - plan.steps = steps; - plan.estimatedToolCalls = steps.length; - plan.assignees = [LEAD_ID]; - plan.generatedBy = { provider: 'test', model: 'test-model', temperature: 0, durationMs: 0 }; - plan.riskLevel = 'medium'; - plan.securityTier = 'write'; - plan.status = 'approved'; - return plan; -} - -function makeAgent(id: UUID, name: string, load: number = 0): AgentCapability { - return { - personaId: id, - name, - specialties: ['typescript'], - currentLoad: load, - securityTier: 'write', - }; -} - -// ── Tests ──────────────────────────────────────────────────── - -describe('CodeTaskDelegator', () => { - const delegator = new CodeTaskDelegator(); - - describe('decompose', () => { - it('empty plan produces no clusters', () => { - const plan = makePlan([]); - const clusters = delegator.decompose(plan); - expect(clusters).toHaveLength(0); - }); - - it('single step produces one cluster', () => { - const plan = makePlan([ - makeStep(1, ['src/main.ts']), - ]); - const clusters = delegator.decompose(plan); - expect(clusters).toHaveLength(1); - expect(clusters[0].stepNumbers).toEqual([1]); - expect(clusters[0].files).toEqual(['src/main.ts']); - }); - - it('independent files produce separate clusters', () => { - const plan = makePlan([ - makeStep(1, ['src/moduleA.ts']), - makeStep(2, ['src/moduleB.ts']), - makeStep(3, ['src/moduleC.ts']), - ]); - const clusters = delegator.decompose(plan); - expect(clusters).toHaveLength(3); - - const allFiles = clusters.flatMap(c => c.files); - expect(allFiles).toContain('src/moduleA.ts'); - expect(allFiles).toContain('src/moduleB.ts'); - expect(allFiles).toContain('src/moduleC.ts'); - }); - - it('shared file merges steps into one cluster', () => { - const plan = makePlan([ - makeStep(1, ['src/shared.ts', 'src/a.ts']), - makeStep(2, ['src/shared.ts', 'src/b.ts']), - ]); - const clusters = delegator.decompose(plan); - expect(clusters).toHaveLength(1); - expect(clusters[0].stepNumbers).toContain(1); - expect(clusters[0].stepNumbers).toContain(2); - expect(clusters[0].files).toContain('src/shared.ts'); - expect(clusters[0].files).toContain('src/a.ts'); - expect(clusters[0].files).toContain('src/b.ts'); - }); - - it('dependencies merge steps into one cluster', () => { - const plan = makePlan([ - makeStep(1, ['src/a.ts']), - makeStep(2, ['src/b.ts'], [1]), // depends on step 1 - ]); - const clusters = delegator.decompose(plan); - expect(clusters).toHaveLength(1); - expect(clusters[0].stepNumbers).toContain(1); - expect(clusters[0].stepNumbers).toContain(2); - }); - - it('transitive file sharing merges all into one cluster', () => { - // A shares file with B, B shares file with C β†’ all in one cluster - const plan = makePlan([ - makeStep(1, ['src/a.ts', 'src/shared-ab.ts']), - makeStep(2, ['src/b.ts', 'src/shared-ab.ts', 'src/shared-bc.ts']), - makeStep(3, ['src/c.ts', 'src/shared-bc.ts']), - ]); - const clusters = delegator.decompose(plan); - expect(clusters).toHaveLength(1); - }); - - it('mixed independent and dependent steps', () => { - const plan = makePlan([ - // Cluster 1: steps 1, 2 share moduleA.ts - makeStep(1, ['src/moduleA.ts'], []), - makeStep(2, ['src/moduleA.ts'], [1]), - // Cluster 2: step 3 is independent - makeStep(3, ['src/moduleB.ts'], []), - // Cluster 3: steps 4, 5 share moduleC.ts - makeStep(4, ['src/moduleC.ts'], []), - makeStep(5, ['src/moduleC.ts', 'src/moduleC-test.ts'], [4]), - ]); - const clusters = delegator.decompose(plan); - expect(clusters).toHaveLength(3); - }); - - it('external dependencies are tracked', () => { - // Step 2 depends on step 1, but they touch different files - // If we force them into different clusters (no shared files, no deps), - // they'd be separate. But dependsOn forces merge. - // Test external deps by having step 3 depend on step 1 from a different cluster - const plan = makePlan([ - makeStep(1, ['src/a.ts']), - makeStep(2, ['src/a.ts'], [1]), // Same cluster as 1 - makeStep(3, ['src/b.ts']), // Different cluster - ]); - const clusters = delegator.decompose(plan); - // Steps 1 and 2 in one cluster (shared file + dependency) - // Step 3 in separate cluster (no shared files, no deps) - expect(clusters).toHaveLength(2); - - const clusterB = clusters.find(c => c.files.includes('src/b.ts')); - expect(clusterB).toBeDefined(); - expect(clusterB!.externalDeps).toEqual([]); // No external deps - }); - - it('steps are sorted within clusters', () => { - const plan = makePlan([ - makeStep(3, ['src/shared.ts']), - makeStep(1, ['src/shared.ts']), - makeStep(2, ['src/shared.ts']), - ]); - const clusters = delegator.decompose(plan); - expect(clusters).toHaveLength(1); - expect(clusters[0].stepNumbers).toEqual([1, 2, 3]); - }); - }); - - describe('assign', () => { - it('empty clusters produces empty assignments', () => { - const agents = [makeAgent(AGENT_A, 'Agent A')]; - const assignments = delegator.assign([], agents, makePlan([])); - expect(assignments).toHaveLength(0); - }); - - it('empty agents produces empty assignments', () => { - const clusters: FileCluster[] = [{ - index: 0, stepNumbers: [1], files: ['a.ts'], externalDeps: [], - }]; - const assignments = delegator.assign(clusters, [], makePlan([])); - expect(assignments).toHaveLength(0); - }); - - it('single cluster assigned to single agent', () => { - const clusters: FileCluster[] = [{ - index: 0, stepNumbers: [1, 2], files: ['src/main.ts'], externalDeps: [], - }]; - const agents = [makeAgent(AGENT_A, 'Agent A')]; - const assignments = delegator.assign(clusters, agents, makePlan([])); - - expect(assignments).toHaveLength(1); - expect(assignments[0].agentId).toBe(AGENT_A); - expect(assignments[0].totalSteps).toBe(2); - expect(assignments[0].files).toContain('src/main.ts'); - }); - - it('distributes clusters across agents evenly', () => { - const clusters: FileCluster[] = [ - { index: 0, stepNumbers: [1], files: ['a.ts'], externalDeps: [] }, - { index: 1, stepNumbers: [2], files: ['b.ts'], externalDeps: [] }, - { index: 2, stepNumbers: [3], files: ['c.ts'], externalDeps: [] }, - ]; - const agents = [ - makeAgent(AGENT_A, 'Agent A', 0.1), - makeAgent(AGENT_B, 'Agent B', 0.2), - makeAgent(AGENT_C, 'Agent C', 0.3), - ]; - const assignments = delegator.assign(clusters, agents, makePlan([])); - - expect(assignments).toHaveLength(3); - // Each agent gets one cluster (evenly distributed) - for (const a of assignments) { - expect(a.totalSteps).toBe(1); - } - }); - - it('prefers least-loaded agents', () => { - const clusters: FileCluster[] = [ - { index: 0, stepNumbers: [1, 2, 3], files: ['big.ts'], externalDeps: [] }, - ]; - const agents = [ - makeAgent(AGENT_A, 'Agent A', 0.8), // Heavily loaded - makeAgent(AGENT_B, 'Agent B', 0.1), // Least loaded - ]; - const assignments = delegator.assign(clusters, agents, makePlan([])); - - expect(assignments).toHaveLength(1); - expect(assignments[0].agentId).toBe(AGENT_B); // Least loaded gets it - }); - - it('handles more clusters than agents', () => { - const clusters: FileCluster[] = [ - { index: 0, stepNumbers: [1], files: ['a.ts'], externalDeps: [] }, - { index: 1, stepNumbers: [2], files: ['b.ts'], externalDeps: [] }, - { index: 2, stepNumbers: [3], files: ['c.ts'], externalDeps: [] }, - { index: 3, stepNumbers: [4], files: ['d.ts'], externalDeps: [] }, - ]; - const agents = [ - makeAgent(AGENT_A, 'Agent A'), - makeAgent(AGENT_B, 'Agent B'), - ]; - const assignments = delegator.assign(clusters, agents, makePlan([])); - - // 4 clusters, 2 agents β†’ each gets 2 - expect(assignments).toHaveLength(2); - const totalSteps = assignments.reduce((sum, a) => sum + a.totalSteps, 0); - expect(totalSteps).toBe(4); - }); - }); - - describe('createSubPlans', () => { - it('creates sub-plans from assignments', () => { - const plan = makePlan([ - makeStep(1, ['src/a.ts']), - makeStep(2, ['src/b.ts']), - ]); - - const assignments: AgentAssignment[] = [ - { - agentId: AGENT_A, - agentName: 'Agent A', - clusters: [{ index: 0, stepNumbers: [1], files: ['src/a.ts'], externalDeps: [] }], - totalSteps: 1, - files: ['src/a.ts'], - }, - { - agentId: AGENT_B, - agentName: 'Agent B', - clusters: [{ index: 1, stepNumbers: [2], files: ['src/b.ts'], externalDeps: [] }], - totalSteps: 1, - files: ['src/b.ts'], - }, - ]; - - const subPlans = delegator.createSubPlans(plan, assignments); - expect(subPlans).toHaveLength(2); - - // Sub-plan for Agent A - const subA = subPlans.find(s => s.leadId === AGENT_A); - expect(subA).toBeDefined(); - expect(subA!.steps).toHaveLength(1); - expect(subA!.steps[0].stepNumber).toBe(1); - expect(subA!.assignees).toEqual([AGENT_A]); - expect(subA!.status).toBe('approved'); - - // Sub-plan for Agent B - const subB = subPlans.find(s => s.leadId === AGENT_B); - expect(subB).toBeDefined(); - expect(subB!.steps).toHaveLength(1); - expect(subB!.steps[0].stepNumber).toBe(2); - }); - - it('sub-plans inherit parent metadata', () => { - const plan = makePlan([makeStep(1, ['src/a.ts'])]); - plan.riskLevel = 'high'; - plan.securityTier = 'write'; - - const assignments: AgentAssignment[] = [{ - agentId: AGENT_A, agentName: 'Agent A', - clusters: [{ index: 0, stepNumbers: [1], files: ['src/a.ts'], externalDeps: [] }], - totalSteps: 1, files: ['src/a.ts'], - }]; - - const subPlans = delegator.createSubPlans(plan, assignments); - expect(subPlans[0].taskId).toBe(plan.taskId); - expect(subPlans[0].riskLevel).toBe('high'); - expect(subPlans[0].securityTier).toBe('write'); - expect(subPlans[0].taskDescription).toBe(plan.taskDescription); - }); - - it('sub-plans filter dependsOn to only internal steps', () => { - const plan = makePlan([ - makeStep(1, ['src/a.ts']), - makeStep(2, ['src/a.ts'], [1]), // Depends on step 1 - makeStep(3, ['src/b.ts'], [1]), // Depends on step 1 (external dep) - ]); - - // Steps 1 and 2 go to Agent A (shared file), step 3 to Agent B - const assignments: AgentAssignment[] = [ - { - agentId: AGENT_A, agentName: 'Agent A', - clusters: [{ index: 0, stepNumbers: [1, 2], files: ['src/a.ts'], externalDeps: [] }], - totalSteps: 2, files: ['src/a.ts'], - }, - { - agentId: AGENT_B, agentName: 'Agent B', - clusters: [{ index: 1, stepNumbers: [3], files: ['src/b.ts'], externalDeps: [1] }], - totalSteps: 1, files: ['src/b.ts'], - }, - ]; - - const subPlans = delegator.createSubPlans(plan, assignments); - const subB = subPlans.find(s => s.leadId === AGENT_B)!; - - // Step 3's dependency on step 1 should be filtered out (step 1 is not in this sub-plan) - expect(subB.steps[0].dependsOn).toEqual([]); - }); - }); - - describe('consolidate', () => { - it('all completed β†’ completed', () => { - const plan = makePlan([]); - const sub1 = makePlan([makeStep(1, ['a.ts'])]); - sub1.status = 'completed'; - sub1.filesModified = ['a.ts']; - sub1.totalToolCalls = 3; - sub1.totalDurationMs = 1000; - sub1.steps[0].status = 'completed'; - - const sub2 = makePlan([makeStep(2, ['b.ts'])]); - sub2.status = 'completed'; - sub2.filesModified = ['b.ts']; - sub2.totalToolCalls = 2; - sub2.totalDurationMs = 800; - sub2.steps[0].status = 'completed'; - - const result = delegator.consolidate(plan, [sub1, sub2]); - expect(result.status).toBe('completed'); - expect(result.filesModified).toContain('a.ts'); - expect(result.filesModified).toContain('b.ts'); - expect(result.totalToolCalls).toBe(5); - // Duration is max (parallel), not sum - expect(result.totalDurationMs).toBe(1000); - }); - - it('some completed β†’ partial', () => { - const plan = makePlan([]); - const sub1 = makePlan([makeStep(1, ['a.ts'])]); - sub1.status = 'completed'; - sub1.steps[0].status = 'completed'; - - const sub2 = makePlan([makeStep(2, ['b.ts'])]); - sub2.status = 'failed'; - sub2.errors = ['Compilation failed']; - sub2.steps[0].status = 'failed'; - - const result = delegator.consolidate(plan, [sub1, sub2]); - expect(result.status).toBe('partial'); - expect(result.errors).toContain('Compilation failed'); - }); - - it('all failed β†’ failed', () => { - const plan = makePlan([]); - const sub1 = makePlan([makeStep(1, ['a.ts'])]); - sub1.status = 'failed'; - sub1.steps[0].status = 'failed'; - - const sub2 = makePlan([makeStep(2, ['b.ts'])]); - sub2.status = 'failed'; - sub2.steps[0].status = 'failed'; - - const result = delegator.consolidate(plan, [sub1, sub2]); - expect(result.status).toBe('failed'); - }); - - it('detects file conflicts across sub-plans', () => { - const plan = makePlan([]); - const sub1 = makePlan([makeStep(1, ['shared.ts'])]); - sub1.status = 'completed'; - sub1.filesModified = ['shared.ts']; - sub1.steps[0].status = 'completed'; - - const sub2 = makePlan([makeStep(2, ['shared.ts'])]); - sub2.status = 'completed'; - sub2.filesModified = ['shared.ts']; - sub2.steps[0].status = 'completed'; - - const result = delegator.consolidate(plan, [sub1, sub2]); - expect(result.errors.some(e => e.includes('conflict'))).toBe(true); - expect(result.errors.some(e => e.includes('shared.ts'))).toBe(true); - }); - - it('aggregates change IDs from all sub-plans', () => { - const plan = makePlan([]); - const sub1 = makePlan([makeStep(1, ['a.ts'])]); - sub1.status = 'completed'; - sub1.changeIds = ['change-1', 'change-2']; - sub1.steps[0].status = 'completed'; - - const sub2 = makePlan([makeStep(2, ['b.ts'])]); - sub2.status = 'completed'; - sub2.changeIds = ['change-3']; - sub2.steps[0].status = 'completed'; - - const result = delegator.consolidate(plan, [sub1, sub2]); - expect(result.changeIds).toEqual(['change-1', 'change-2', 'change-3']); - }); - - it('deduplicates modified files', () => { - const plan = makePlan([]); - const sub1 = makePlan([makeStep(1, ['shared.ts'])]); - sub1.status = 'completed'; - sub1.filesModified = ['shared.ts']; - sub1.steps[0].status = 'completed'; - - const sub2 = makePlan([makeStep(2, ['shared.ts'])]); - sub2.status = 'completed'; - sub2.filesModified = ['shared.ts']; - sub2.steps[0].status = 'completed'; - - const result = delegator.consolidate(plan, [sub1, sub2]); - // Set-based dedup: shared.ts appears once - expect(result.filesModified.filter(f => f === 'shared.ts')).toHaveLength(1); - }); - - it('empty sub-plans β†’ failed', () => { - const plan = makePlan([]); - const result = delegator.consolidate(plan, []); - expect(result.status).toBe('failed'); - }); - }); - - describe('full pipeline: decompose β†’ assign β†’ createSubPlans', () => { - it('end-to-end with 3 independent file groups', () => { - const plan = makePlan([ - // Group A: src/auth/* - makeStep(1, ['src/auth/login.ts'], [], 'read'), - makeStep(2, ['src/auth/login.ts'], [1], 'edit'), - // Group B: src/api/* - makeStep(3, ['src/api/routes.ts'], [], 'read'), - makeStep(4, ['src/api/routes.ts'], [3], 'edit'), - // Group C: src/utils/* - makeStep(5, ['src/utils/helpers.ts'], [], 'read'), - makeStep(6, ['src/utils/helpers.ts'], [5], 'edit'), - ]); - - const agents = [ - makeAgent(AGENT_A, 'Auth Specialist', 0.1), - makeAgent(AGENT_B, 'API Specialist', 0.2), - makeAgent(AGENT_C, 'Utils Specialist', 0.3), - ]; - - // Step 1: Decompose - const clusters = delegator.decompose(plan); - expect(clusters).toHaveLength(3); - - // Step 2: Assign - const assignments = delegator.assign(clusters, agents, plan); - expect(assignments).toHaveLength(3); - - // Step 3: Create sub-plans - const subPlans = delegator.createSubPlans(plan, assignments); - expect(subPlans).toHaveLength(3); - - // Each sub-plan has exactly 2 steps - for (const sub of subPlans) { - expect(sub.steps).toHaveLength(2); - expect(sub.status).toBe('approved'); - } - - // All 6 steps are accounted for - const allSteps = subPlans.flatMap(s => s.steps.map(st => st.stepNumber)); - expect(allSteps.sort()).toEqual([1, 2, 3, 4, 5, 6]); - }); - - it('single monolithic plan stays as one cluster', () => { - const plan = makePlan([ - makeStep(1, ['src/index.ts']), - makeStep(2, ['src/index.ts', 'src/types.ts'], [1]), - makeStep(3, ['src/types.ts', 'src/index.ts'], [2]), - ]); - - const clusters = delegator.decompose(plan); - expect(clusters).toHaveLength(1); - expect(clusters[0].stepNumbers).toEqual([1, 2, 3]); - }); - }); -}); diff --git a/src/debug/jtag/tests/unit/code/CodingPlanEntity.test.ts b/src/debug/jtag/tests/unit/code/CodingPlanEntity.test.ts deleted file mode 100644 index b337da3f2..000000000 --- a/src/debug/jtag/tests/unit/code/CodingPlanEntity.test.ts +++ /dev/null @@ -1,349 +0,0 @@ -/** - * CodingPlanEntity Unit Tests - * - * Tests the persistent coding plan entity: - * - Construction and default values - * - Validation (required fields, step structure, status enum) - * - Computed properties (progress, stepsCompleted, isDelegated) - * - Hierarchical plan relationships - * - Collection and pagination config - */ - -import { describe, it, expect } from 'vitest'; -import { - CodingPlanEntity, - type CodingStepSnapshot, - type CodingPlanStatus, -} from '../../../system/data/entities/CodingPlanEntity'; -import type { UUID } from '../../../system/core/types/CrossPlatformUUID'; - -function makeStep(overrides?: Partial): CodingStepSnapshot { - return { - stepNumber: 1, - action: 'read', - description: 'Read file', - targetFiles: ['utils.ts'], - toolCall: 'code/read', - toolParams: { filePath: 'utils.ts' }, - dependsOn: [], - verification: 'File content returned', - status: 'pending', - ...overrides, - }; -} - -function makePlan(overrides?: Partial): CodingPlanEntity { - const plan = new CodingPlanEntity(); - plan.taskId = 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' as UUID; - plan.createdById = '11111111-2222-3333-4444-555555555555' as UUID; - plan.leadId = '11111111-2222-3333-4444-555555555555' as UUID; - plan.summary = 'Read, edit, verify'; - plan.taskDescription = 'Add greet function to utils.ts'; - plan.steps = [ - makeStep({ stepNumber: 1, action: 'read' }), - makeStep({ stepNumber: 2, action: 'edit', toolCall: 'code/edit', dependsOn: [1] }), - makeStep({ stepNumber: 3, action: 'verify', dependsOn: [2] }), - ]; - plan.estimatedToolCalls = 3; - plan.assignees = ['11111111-2222-3333-4444-555555555555' as UUID]; - plan.generatedBy = { provider: 'anthropic', model: 'claude-sonnet', temperature: 0.3, durationMs: 500 }; - plan.status = 'draft'; - - // Apply overrides - if (overrides) { - for (const [key, value] of Object.entries(overrides)) { - (plan as Record)[key] = value; - } - } - - return plan; -} - -describe('CodingPlanEntity', () => { - describe('construction and defaults', () => { - it('creates with default values', () => { - const plan = new CodingPlanEntity(); - - expect(plan.taskId).toBe(''); - expect(plan.createdById).toBe(''); - expect(plan.leadId).toBe(''); - expect(plan.summary).toBe(''); - expect(plan.taskDescription).toBe(''); - expect(plan.steps).toEqual([]); - expect(plan.estimatedToolCalls).toBe(0); - expect(plan.assignees).toEqual([]); - expect(plan.status).toBe('draft'); - expect(plan.filesModified).toEqual([]); - expect(plan.filesCreated).toEqual([]); - expect(plan.changeIds).toEqual([]); - expect(plan.errors).toEqual([]); - expect(plan.totalToolCalls).toBe(0); - expect(plan.totalDurationMs).toBe(0); - }); - - it('has correct collection name', () => { - const plan = new CodingPlanEntity(); - expect(plan.collection).toBe('coding_plans'); - expect(CodingPlanEntity.collection).toBe('coding_plans'); - }); - - it('has pagination config with newest first', () => { - const config = CodingPlanEntity.getPaginationConfig(); - expect(config.defaultSortField).toBe('createdAt'); - expect(config.defaultSortDirection).toBe('desc'); - expect(config.defaultPageSize).toBe(20); - }); - }); - - describe('validation', () => { - it('validates a complete plan', () => { - const plan = makePlan(); - const result = plan.validate(); - expect(result.success).toBe(true); - }); - - it('rejects missing taskId', () => { - const plan = makePlan({ taskId: '' as UUID }); - const result = plan.validate(); - expect(result.success).toBe(false); - expect(result.error).toContain('taskId'); - }); - - it('rejects missing createdById', () => { - const plan = makePlan({ createdById: '' as UUID }); - const result = plan.validate(); - expect(result.success).toBe(false); - expect(result.error).toContain('createdById'); - }); - - it('rejects missing leadId', () => { - const plan = makePlan({ leadId: '' as UUID }); - const result = plan.validate(); - expect(result.success).toBe(false); - expect(result.error).toContain('leadId'); - }); - - it('rejects missing summary', () => { - const plan = makePlan({ summary: '' }); - const result = plan.validate(); - expect(result.success).toBe(false); - expect(result.error).toContain('summary'); - }); - - it('rejects missing taskDescription', () => { - const plan = makePlan({ taskDescription: ' ' }); - const result = plan.validate(); - expect(result.success).toBe(false); - expect(result.error).toContain('taskDescription'); - }); - - it('rejects empty steps array', () => { - const plan = makePlan(); - plan.steps = []; - const result = plan.validate(); - expect(result.success).toBe(false); - expect(result.error).toContain('at least one step'); - }); - - it('rejects empty assignees', () => { - const plan = makePlan(); - plan.assignees = []; - const result = plan.validate(); - expect(result.success).toBe(false); - expect(result.error).toContain('at least one assignee'); - }); - - it('rejects invalid status', () => { - const plan = makePlan(); - plan.status = 'bogus' as CodingPlanStatus; - const result = plan.validate(); - expect(result.success).toBe(false); - expect(result.error).toContain('status'); - }); - - it('validates all valid statuses', () => { - const validStatuses: CodingPlanStatus[] = [ - 'draft', 'proposed', 'approved', 'executing', - 'completed', 'partial', 'failed', 'cancelled', - ]; - - for (const status of validStatuses) { - const plan = makePlan({ status }); - const result = plan.validate(); - expect(result.success).toBe(true); - } - }); - - it('rejects step with invalid stepNumber', () => { - const plan = makePlan(); - plan.steps = [makeStep({ stepNumber: 0 })]; - const result = plan.validate(); - expect(result.success).toBe(false); - expect(result.error).toContain('stepNumber'); - }); - - it('rejects step with missing action', () => { - const plan = makePlan(); - plan.steps = [makeStep({ action: '' as any })]; - const result = plan.validate(); - expect(result.success).toBe(false); - expect(result.error).toContain('action'); - }); - - it('rejects step with non-code toolCall', () => { - const plan = makePlan(); - plan.steps = [makeStep({ toolCall: 'data/list' })]; - const result = plan.validate(); - expect(result.success).toBe(false); - expect(result.error).toContain('toolCall'); - }); - }); - - describe('computed properties', () => { - it('reports progress correctly', () => { - const plan = makePlan(); - expect(plan.progress).toBe(0); // All pending - - plan.steps[0].status = 'completed'; - expect(plan.progress).toBeCloseTo(1 / 3); - - plan.steps[1].status = 'completed'; - expect(plan.progress).toBeCloseTo(2 / 3); - - plan.steps[2].status = 'completed'; - expect(plan.progress).toBe(1); - }); - - it('counts completed steps', () => { - const plan = makePlan(); - expect(plan.stepsCompleted).toBe(0); - - plan.steps[0].status = 'completed'; - plan.steps[1].status = 'failed'; - plan.steps[2].status = 'skipped'; - expect(plan.stepsCompleted).toBe(1); - }); - - it('counts failed steps', () => { - const plan = makePlan(); - plan.steps[0].status = 'completed'; - plan.steps[1].status = 'failed'; - plan.steps[2].status = 'failed'; - expect(plan.stepsFailed).toBe(2); - }); - - it('counts remaining steps', () => { - const plan = makePlan(); - expect(plan.stepsRemaining).toBe(3); // All pending - - plan.steps[0].status = 'completed'; - plan.steps[1].status = 'executing'; - expect(plan.stepsRemaining).toBe(2); // 1 pending + 1 executing - }); - - it('progress is 0 for empty steps', () => { - const plan = new CodingPlanEntity(); - expect(plan.progress).toBe(0); - }); - }); - - describe('hierarchical structure', () => { - it('top-level plan has no parent', () => { - const plan = makePlan(); - expect(plan.parentPlanId).toBeUndefined(); - expect(plan.isDelegated).toBe(false); - }); - - it('sub-plan references parent', () => { - const plan = makePlan(); - plan.parentPlanId = 'parent-plan-id-1234' as UUID; - expect(plan.isDelegated).toBe(true); - }); - - it('sub-plan can have different lead than creator', () => { - const plan = makePlan(); - plan.createdById = 'lead-ai' as UUID; - plan.leadId = 'lead-ai' as UUID; - plan.assignees = ['specialist-ai' as UUID]; - // Sub-plan created by lead, assigned to specialist - expect(plan.assignees).not.toContain(plan.leadId); - }); - }); - - describe('execution tracking', () => { - it('tracks file modifications', () => { - const plan = makePlan({ status: 'completed' }); - plan.filesModified = ['src/utils.ts', 'src/index.ts']; - plan.filesCreated = ['src/greet.ts']; - plan.changeIds = ['change-001', 'change-002']; - - expect(plan.filesModified).toHaveLength(2); - expect(plan.filesCreated).toContain('src/greet.ts'); - expect(plan.changeIds).toContain('change-001'); - }); - - it('tracks errors', () => { - const plan = makePlan({ status: 'partial' }); - plan.errors = ['Step 2 (edit): Conflict', 'Step 3 (verify): Dependencies not met']; - expect(plan.errors).toHaveLength(2); - }); - - it('tracks execution timing', () => { - const plan = makePlan({ status: 'completed' }); - plan.executionStartedAt = 1000; - plan.executionCompletedAt = 5000; - plan.totalDurationMs = 4000; - plan.totalToolCalls = 5; - - expect(plan.executionStartedAt).toBe(1000); - expect(plan.executionCompletedAt).toBe(5000); - expect(plan.totalDurationMs).toBe(4000); - expect(plan.totalToolCalls).toBe(5); - }); - }); - - describe('risk and security', () => { - it('defaults riskLevel to low', () => { - const plan = new CodingPlanEntity(); - expect(plan.riskLevel).toBe('low'); - }); - - it('defaults securityTier to write', () => { - const plan = new CodingPlanEntity(); - expect(plan.securityTier).toBe('write'); - }); - - it('stores risk assessment data', () => { - const plan = makePlan(); - plan.riskLevel = 'high'; - plan.riskReason = 'Modifies API interfaces'; - plan.securityTier = 'write'; - - expect(plan.riskLevel).toBe('high'); - expect(plan.riskReason).toBe('Modifies API interfaces'); - expect(plan.securityTier).toBe('write'); - }); - - it('critical risk with system tier', () => { - const plan = makePlan(); - plan.riskLevel = 'critical'; - plan.securityTier = 'system'; - - expect(plan.riskLevel).toBe('critical'); - expect(plan.securityTier).toBe('system'); - }); - }); - - describe('governance', () => { - it('tracks proposal reference', () => { - const plan = makePlan({ status: 'proposed' }); - plan.proposalId = 'proposal-abc-123' as UUID; - expect(plan.proposalId).toBe('proposal-abc-123'); - }); - - it('plan without proposal has no proposalId', () => { - const plan = makePlan(); - expect(plan.proposalId).toBeUndefined(); - }); - }); -}); diff --git a/src/debug/jtag/tests/unit/code/PlanFormulator.test.ts b/src/debug/jtag/tests/unit/code/PlanFormulator.test.ts deleted file mode 100644 index ffe2d2a72..000000000 --- a/src/debug/jtag/tests/unit/code/PlanFormulator.test.ts +++ /dev/null @@ -1,397 +0,0 @@ -/** - * PlanFormulator Unit Tests - * - * Tests LLM plan generation by mocking AIProviderDaemon. - * Validates: - * - Prompt construction (system prompt, tool schemas, constraints) - * - JSON plan parsing from LLM responses - * - Plan validation (actions, dependencies, step numbers) - * - Error handling for invalid LLM output - */ - -import { describe, it, expect, beforeEach, vi } from 'vitest'; -import { PlanFormulator } from '../../../system/code/server/PlanFormulator'; -import { CodingModelSelector } from '../../../system/code/server/CodingModelSelector'; -import type { CodingTask } from '../../../system/code/shared/CodingTypes'; -import type { UUID } from '../../../system/core/types/CrossPlatformUUID'; - -// Mock AIProviderDaemon -const mockGenerateText = vi.fn(); -vi.mock('../../../daemons/ai-provider-daemon/shared/AIProviderDaemon', () => ({ - AIProviderDaemon: { - generateText: (...args: unknown[]) => mockGenerateText(...args), - }, -})); - -// Mock Logger -vi.mock('../../../system/core/logging/Logger', () => ({ - Logger: { - create: () => ({ - debug: () => {}, - info: () => {}, - warn: () => {}, - error: () => {}, - }), - }, -})); - -function makeTask(overrides?: Partial): CodingTask { - return { - id: 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' as UUID, - personaId: '11111111-2222-3333-4444-555555555555' as UUID, - description: 'Add a greet function to utils.ts', - taskType: 'generation', - maxToolCalls: 15, - maxDurationMs: 120000, - createdAt: Date.now(), - ...overrides, - }; -} - -/** Helper: mock LLM returning a valid plan JSON */ -function mockValidPlan() { - mockGenerateText.mockResolvedValue({ - text: JSON.stringify({ - summary: 'Read utils.ts, add greet function, verify', - steps: [ - { - stepNumber: 1, - action: 'read', - description: 'Read current utils.ts contents', - targetFiles: ['utils.ts'], - toolCall: 'code/read', - toolParams: { filePath: 'utils.ts' }, - dependsOn: [], - verification: 'File contents returned', - }, - { - stepNumber: 2, - action: 'edit', - description: 'Add greet function to utils.ts', - targetFiles: ['utils.ts'], - toolCall: 'code/edit', - toolParams: { - filePath: 'utils.ts', - editMode: { type: 'append', content: '\nexport function greet(name: string): string {\n return `Hello, ${name}!`;\n}\n' }, - description: 'Add greet function', - }, - dependsOn: [1], - verification: 'Edit applied successfully', - }, - { - stepNumber: 3, - action: 'verify', - description: 'Read back to verify greet function added', - targetFiles: ['utils.ts'], - toolCall: 'code/read', - toolParams: { filePath: 'utils.ts' }, - dependsOn: [2], - verification: 'greet function present in file', - }, - ], - }), - usage: { inputTokens: 500, outputTokens: 200 }, - }); -} - -describe('PlanFormulator', () => { - let formulator: PlanFormulator; - - beforeEach(() => { - mockGenerateText.mockReset(); - const selector = new CodingModelSelector(new Set(['anthropic', 'deepseek', 'groq'])); - formulator = new PlanFormulator(selector); - }); - - describe('formulate', () => { - it('generates a valid plan from LLM response', async () => { - mockValidPlan(); - - const plan = await formulator.formulate(makeTask()); - - expect(plan.taskId).toBe('aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee'); - expect(plan.summary).toBe('Read utils.ts, add greet function, verify'); - expect(plan.steps).toHaveLength(3); - expect(plan.estimatedToolCalls).toBe(3); - expect(plan.generatedBy.provider).toBe('anthropic'); - expect(plan.generatedAt).toBeGreaterThan(0); - }); - - it('preserves step structure from LLM', async () => { - mockValidPlan(); - - const plan = await formulator.formulate(makeTask()); - const step1 = plan.steps[0]; - - expect(step1.stepNumber).toBe(1); - expect(step1.action).toBe('read'); - expect(step1.toolCall).toBe('code/read'); - expect(step1.targetFiles).toEqual(['utils.ts']); - expect(step1.dependsOn).toEqual([]); - }); - - it('validates dependency ordering', async () => { - mockValidPlan(); - - const plan = await formulator.formulate(makeTask()); - - expect(plan.steps[1].dependsOn).toEqual([1]); // edit depends on read - expect(plan.steps[2].dependsOn).toEqual([2]); // verify depends on edit - }); - - it('passes task description to LLM', async () => { - mockValidPlan(); - - await formulator.formulate(makeTask({ description: 'Refactor auth module' })); - - expect(mockGenerateText).toHaveBeenCalledTimes(1); - const request = mockGenerateText.mock.calls[0][0]; - const userMessage = request.messages.find((m: any) => m.role === 'user' && m.content.includes('Refactor auth module')); - expect(userMessage).toBeDefined(); - }); - - it('includes tool schemas in system prompt', async () => { - mockValidPlan(); - - await formulator.formulate(makeTask()); - - const request = mockGenerateText.mock.calls[0][0]; - const systemMsg = request.messages.find((m: any) => m.role === 'system'); - expect(systemMsg.content).toContain('code/tree'); - expect(systemMsg.content).toContain('code/read'); - expect(systemMsg.content).toContain('code/write'); - expect(systemMsg.content).toContain('code/edit'); - expect(systemMsg.content).toContain('code/search'); - }); - - it('includes constraints in system prompt', async () => { - mockValidPlan(); - - await formulator.formulate(makeTask({ maxToolCalls: 10, maxDurationMs: 60000 })); - - const request = mockGenerateText.mock.calls[0][0]; - const systemMsg = request.messages.find((m: any) => m.role === 'system'); - expect(systemMsg.content).toContain('10'); // max tool calls - expect(systemMsg.content).toContain('60'); // 60 seconds - }); - - it('includes codebase context when provided', async () => { - mockValidPlan(); - - await formulator.formulate(makeTask(), '## Workspace Tree\nsrc/\n utils.ts (200 bytes)'); - - const request = mockGenerateText.mock.calls[0][0]; - const contextMsg = request.messages.find((m: any) => m.content?.includes('Workspace Tree')); - expect(contextMsg).toBeDefined(); - }); - - it('includes relevant files when specified', async () => { - mockValidPlan(); - - await formulator.formulate(makeTask({ relevantFiles: ['src/utils.ts', 'src/auth.ts'] })); - - const request = mockGenerateText.mock.calls[0][0]; - const filesMsg = request.messages.find((m: any) => m.content?.includes('src/utils.ts')); - expect(filesMsg).toBeDefined(); - }); - }); - - describe('error handling', () => { - it('throws on empty LLM response', async () => { - mockGenerateText.mockResolvedValue({ text: '' }); - - await expect(formulator.formulate(makeTask())).rejects.toThrow('empty response'); - }); - - it('throws on non-JSON response', async () => { - mockGenerateText.mockResolvedValue({ text: 'I think we should...' }); - - await expect(formulator.formulate(makeTask())).rejects.toThrow('No JSON object'); - }); - - it('throws on missing summary', async () => { - mockGenerateText.mockResolvedValue({ - text: JSON.stringify({ steps: [{ stepNumber: 1, action: 'read' }] }), - }); - - await expect(formulator.formulate(makeTask())).rejects.toThrow('missing "summary"'); - }); - - it('throws on empty steps array', async () => { - mockGenerateText.mockResolvedValue({ - text: JSON.stringify({ summary: 'Do stuff', steps: [] }), - }); - - await expect(formulator.formulate(makeTask())).rejects.toThrow('no steps'); - }); - - it('throws on too many steps', async () => { - const manySteps = Array.from({ length: 20 }, (_, i) => ({ - stepNumber: i + 1, - action: 'read', - toolCall: 'code/read', - toolParams: {}, - dependsOn: [], - })); - - mockGenerateText.mockResolvedValue({ - text: JSON.stringify({ summary: 'Too many', steps: manySteps }), - }); - - await expect(formulator.formulate(makeTask({ maxToolCalls: 15 }))).rejects.toThrow('exceeds max'); - }); - - it('throws on invalid action', async () => { - mockGenerateText.mockResolvedValue({ - text: JSON.stringify({ - summary: 'Bad action', - steps: [{ stepNumber: 1, action: 'hack', toolCall: 'code/read', dependsOn: [] }], - }), - }); - - await expect(formulator.formulate(makeTask())).rejects.toThrow('invalid action'); - }); - - it('throws on invalid toolCall', async () => { - mockGenerateText.mockResolvedValue({ - text: JSON.stringify({ - summary: 'Bad tool', - steps: [{ stepNumber: 1, action: 'read', toolCall: 'rm -rf', dependsOn: [] }], - }), - }); - - await expect(formulator.formulate(makeTask())).rejects.toThrow('not a code/* command'); - }); - - it('throws on forward dependency reference', async () => { - mockGenerateText.mockResolvedValue({ - text: JSON.stringify({ - summary: 'Bad deps', - steps: [ - { stepNumber: 1, action: 'read', toolCall: 'code/read', dependsOn: [2] }, - { stepNumber: 2, action: 'read', toolCall: 'code/read', dependsOn: [] }, - ], - }), - }); - - await expect(formulator.formulate(makeTask())).rejects.toThrow('invalid step'); - }); - - it('throws on self-dependency reference', async () => { - mockGenerateText.mockResolvedValue({ - text: JSON.stringify({ - summary: 'Self dep', - steps: [ - { stepNumber: 1, action: 'read', toolCall: 'code/read', dependsOn: [1] }, - ], - }), - }); - - await expect(formulator.formulate(makeTask())).rejects.toThrow('invalid step'); - }); - - it('extracts JSON from markdown code blocks', async () => { - const planJson = JSON.stringify({ - summary: 'Wrapped in markdown', - steps: [{ - stepNumber: 1, - action: 'read', - toolCall: 'code/read', - toolParams: { filePath: 'test.ts' }, - dependsOn: [], - }], - }); - - mockGenerateText.mockResolvedValue({ - text: `Here's the plan:\n\`\`\`json\n${planJson}\n\`\`\``, - }); - - const plan = await formulator.formulate(makeTask()); - expect(plan.summary).toBe('Wrapped in markdown'); - expect(plan.steps).toHaveLength(1); - }); - }); - - describe('risk assessment', () => { - it('parses riskLevel from LLM response', async () => { - mockGenerateText.mockResolvedValue({ - text: JSON.stringify({ - summary: 'Low risk read-only task', - riskLevel: 'low', - riskReason: 'Read-only operation, no file modifications', - steps: [{ - stepNumber: 1, - action: 'read', - toolCall: 'code/read', - toolParams: { filePath: 'test.ts' }, - dependsOn: [], - }], - }), - }); - - const plan = await formulator.formulate(makeTask()); - expect(plan.riskLevel).toBe('low'); - expect(plan.riskReason).toBe('Read-only operation, no file modifications'); - expect(plan.requiredTier).toBe('write'); // low β†’ write tier - }); - - it('defaults riskLevel to medium when omitted', async () => { - mockValidPlan(); // doesn't include riskLevel - - const plan = await formulator.formulate(makeTask()); - expect(plan.riskLevel).toBe('medium'); - expect(plan.requiredTier).toBe('write'); - }); - - it('defaults riskLevel to medium for invalid values', async () => { - mockGenerateText.mockResolvedValue({ - text: JSON.stringify({ - summary: 'Bad risk', - riskLevel: 'extreme', - steps: [{ - stepNumber: 1, - action: 'read', - toolCall: 'code/read', - toolParams: {}, - dependsOn: [], - }], - }), - }); - - const plan = await formulator.formulate(makeTask()); - expect(plan.riskLevel).toBe('medium'); - }); - - it('critical risk maps to system tier', async () => { - mockGenerateText.mockResolvedValue({ - text: JSON.stringify({ - summary: 'Critical system change', - riskLevel: 'critical', - riskReason: 'Modifies build configuration', - steps: [{ - stepNumber: 1, - action: 'edit', - toolCall: 'code/edit', - toolParams: { filePath: 'build.config.ts' }, - dependsOn: [], - }], - }), - }); - - const plan = await formulator.formulate(makeTask()); - expect(plan.riskLevel).toBe('critical'); - expect(plan.requiredTier).toBe('system'); - }); - - it('includes risk assessment guidelines in prompt', async () => { - mockValidPlan(); - - await formulator.formulate(makeTask()); - - const request = mockGenerateText.mock.calls[0][0]; - const systemMsg = request.messages.find((m: any) => m.role === 'system'); - expect(systemMsg.content).toContain('riskLevel'); - expect(systemMsg.content).toContain('Risk Assessment Guidelines'); - }); - }); -}); diff --git a/src/debug/jtag/tests/unit/code/PlanGovernance.test.ts b/src/debug/jtag/tests/unit/code/PlanGovernance.test.ts deleted file mode 100644 index d835d9004..000000000 --- a/src/debug/jtag/tests/unit/code/PlanGovernance.test.ts +++ /dev/null @@ -1,174 +0,0 @@ -/** - * PlanGovernance Unit Tests - * - * Tests risk-based approval routing: - * - shouldRequireApproval: risk level + multi-agent logic - * - resolveDecision: governance outcome β†’ plan status mapping - * - proposePlan: governance proposal creation (integration tested separately) - */ - -import { describe, it, expect } from 'vitest'; -import { PlanGovernance, type GovernanceDecision, type GovernanceOutcome } from '../../../system/code/server/PlanGovernance'; -import { CodingPlanEntity } from '../../../system/data/entities/CodingPlanEntity'; -import type { UUID } from '../../../system/core/types/CrossPlatformUUID'; -import type { RiskLevel, SecurityTierLevel } from '../../../system/code/shared/CodingTypes'; - -// ── Helpers ────────────────────────────────────────────────── - -const PERSONA_A = 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' as UUID; -const PERSONA_B = 'bbbbbbbb-cccc-dddd-eeee-ffffffffffff' as UUID; -const TASK_ID = '11111111-2222-3333-4444-555555555555' as UUID; - -function makePlan(overrides?: { - riskLevel?: RiskLevel; - securityTier?: SecurityTierLevel; - assignees?: UUID[]; -}): CodingPlanEntity { - const plan = new CodingPlanEntity(); - plan.taskId = TASK_ID; - plan.createdById = PERSONA_A; - plan.leadId = PERSONA_A; - plan.summary = 'Test plan'; - plan.taskDescription = 'Test task description'; - plan.assignees = overrides?.assignees ?? [PERSONA_A]; - plan.riskLevel = overrides?.riskLevel ?? 'low'; - plan.securityTier = overrides?.securityTier ?? 'write'; - plan.generatedBy = { provider: 'test', model: 'test-model', temperature: 0, durationMs: 0 }; - plan.steps = [{ - stepNumber: 1, - action: 'read', - description: 'Read main.ts', - targetFiles: ['src/main.ts'], - toolCall: 'code/read', - toolParams: { filePath: 'src/main.ts' }, - dependsOn: [], - verification: 'File content returned', - status: 'pending', - }]; - return plan; -} - -function makeDecision(outcome: GovernanceOutcome): GovernanceDecision { - return { - proposalId: '99999999-8888-7777-6666-555555555555' as UUID, - outcome, - reasoning: `Decision: ${outcome}`, - }; -} - -// ── Tests ──────────────────────────────────────────────────── - -describe('PlanGovernance', () => { - const governance = new PlanGovernance(); - - describe('shouldRequireApproval', () => { - describe('single-agent plans', () => { - it('low risk β†’ no approval required', () => { - const plan = makePlan({ riskLevel: 'low' }); - expect(governance.shouldRequireApproval(plan)).toBe(false); - }); - - it('medium risk β†’ no approval required', () => { - const plan = makePlan({ riskLevel: 'medium' }); - expect(governance.shouldRequireApproval(plan)).toBe(false); - }); - - it('high risk β†’ approval required', () => { - const plan = makePlan({ riskLevel: 'high' }); - expect(governance.shouldRequireApproval(plan)).toBe(true); - }); - - it('critical risk β†’ approval required', () => { - const plan = makePlan({ riskLevel: 'critical' }); - expect(governance.shouldRequireApproval(plan)).toBe(true); - }); - }); - - describe('multi-agent plans', () => { - it('low risk + multi-agent β†’ approval required', () => { - const plan = makePlan({ riskLevel: 'low', assignees: [PERSONA_A, PERSONA_B] }); - expect(governance.shouldRequireApproval(plan)).toBe(true); - }); - - it('medium risk + multi-agent β†’ approval required', () => { - const plan = makePlan({ riskLevel: 'medium', assignees: [PERSONA_A, PERSONA_B] }); - expect(governance.shouldRequireApproval(plan)).toBe(true); - }); - - it('high risk + multi-agent β†’ approval required', () => { - const plan = makePlan({ riskLevel: 'high', assignees: [PERSONA_A, PERSONA_B] }); - expect(governance.shouldRequireApproval(plan)).toBe(true); - }); - }); - - describe('system tier', () => { - it('system tier always requires approval regardless of risk', () => { - const plan = makePlan({ riskLevel: 'low', securityTier: 'system' }); - expect(governance.shouldRequireApproval(plan)).toBe(true); - }); - - it('system tier + single agent still requires approval', () => { - const plan = makePlan({ riskLevel: 'low', securityTier: 'system', assignees: [PERSONA_A] }); - expect(governance.shouldRequireApproval(plan)).toBe(true); - }); - }); - }); - - describe('resolveDecision', () => { - it('approved β†’ approved', () => { - const result = governance.resolveDecision(makeDecision('approved')); - expect(result).toBe('approved'); - }); - - it('approved_with_changes β†’ approved', () => { - const result = governance.resolveDecision(makeDecision('approved_with_changes')); - expect(result).toBe('approved'); - }); - - it('changes_requested β†’ draft', () => { - const result = governance.resolveDecision(makeDecision('changes_requested')); - expect(result).toBe('draft'); - }); - - it('rejected β†’ cancelled', () => { - const result = governance.resolveDecision(makeDecision('rejected')); - expect(result).toBe('cancelled'); - }); - }); - - describe('all outcomes map to valid plan statuses', () => { - const outcomes: GovernanceOutcome[] = ['approved', 'approved_with_changes', 'changes_requested', 'rejected']; - const validStatuses = ['draft', 'proposed', 'approved', 'executing', 'completed', 'partial', 'failed', 'cancelled']; - - for (const outcome of outcomes) { - it(`${outcome} maps to a valid CodingPlanStatus`, () => { - const result = governance.resolveDecision(makeDecision(outcome)); - expect(validStatuses).toContain(result); - }); - } - }); - - describe('approval matrix (exhaustive)', () => { - const riskLevels: RiskLevel[] = ['low', 'medium', 'high', 'critical']; - const tiers: SecurityTierLevel[] = ['discovery', 'read', 'write', 'system']; - - for (const risk of riskLevels) { - for (const tier of tiers) { - for (const multiAgent of [false, true]) { - it(`risk=${risk}, tier=${tier}, multiAgent=${multiAgent}`, () => { - const assignees = multiAgent ? [PERSONA_A, PERSONA_B] : [PERSONA_A]; - const plan = makePlan({ riskLevel: risk, securityTier: tier, assignees }); - const result = governance.shouldRequireApproval(plan); - expect(typeof result).toBe('boolean'); - - // Verify specific cases - if (tier === 'system') expect(result).toBe(true); - if (multiAgent) expect(result).toBe(true); - if (risk === 'high' || risk === 'critical') expect(result).toBe(true); - if (risk === 'low' && tier !== 'system' && !multiAgent) expect(result).toBe(false); - }); - } - } - } - }); -}); diff --git a/src/debug/jtag/tests/unit/code/Workspace.test.ts b/src/debug/jtag/tests/unit/code/Workspace.test.ts new file mode 100644 index 000000000..5458caa7e --- /dev/null +++ b/src/debug/jtag/tests/unit/code/Workspace.test.ts @@ -0,0 +1,644 @@ +/** + * Workspace Unit Tests + * + * Tests that the Workspace class: + * - Creates via WorkspaceStrategy and returns a bound handle + * - Delegates all operations to CodeDaemon with the retained handle + * - Provides fromExisting() for resuming previously created workspaces + * - Cleans up via WorkspaceStrategy.cleanup() + */ + +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { Workspace } from '../../../system/code/server/Workspace'; +import { WorkspaceStrategy } from '../../../system/code/server/WorkspaceStrategy'; +import { CodeDaemon } from '../../../daemons/code-daemon/shared/CodeDaemon'; +import { CodeVerify } from '../../../commands/code/verify/shared/CodeVerifyTypes'; + +// ── Mock dependencies ────────────────────────────────────── + +vi.mock('../../../system/code/server/WorkspaceStrategy', () => ({ + WorkspaceStrategy: { + create: vi.fn(), + cleanup: vi.fn(), + }, +})); + +vi.mock('../../../daemons/code-daemon/shared/CodeDaemon', () => ({ + CodeDaemon: { + workspaceRead: vi.fn(), + workspaceWrite: vi.fn(), + workspaceEdit: vi.fn(), + workspaceDelete: vi.fn(), + workspaceDiff: vi.fn(), + workspaceSearch: vi.fn(), + workspaceTree: vi.fn(), + workspaceUndo: vi.fn(), + workspaceHistory: vi.fn(), + workspaceGitStatus: vi.fn(), + workspaceGitDiff: vi.fn(), + workspaceGitLog: vi.fn(), + workspaceGitAdd: vi.fn(), + workspaceGitCommit: vi.fn(), + workspaceGitPush: vi.fn(), + // Shell session methods + shellCreate: vi.fn(), + shellExecute: vi.fn(), + shellPoll: vi.fn(), + shellKill: vi.fn(), + shellCd: vi.fn(), + shellStatus: vi.fn(), + shellDestroy: vi.fn(), + // Shell watch + sentinel + shellWatch: vi.fn(), + shellSentinel: vi.fn(), + }, +})); + +vi.mock('../../../commands/code/verify/shared/CodeVerifyTypes', () => ({ + CodeVerify: { + execute: vi.fn(), + }, +})); + +// ── Helpers ──────────────────────────────────────────────── + +const PERSONA_ID = 'test-persona-abc'; +const WORKSPACE_DIR = '/tmp/workspace/test'; +const HANDLE = `worktree-${PERSONA_ID}-fix-auth`; +const BRANCH = 'ai/fix-auth'; + +function mockWorkspaceCreate() { + vi.mocked(WorkspaceStrategy.create).mockResolvedValue({ + handle: HANDLE, + workspaceDir: WORKSPACE_DIR, + mode: 'worktree', + branch: BRANCH, + }); +} + +// ── Tests ────────────────────────────────────────────────── + +describe('Workspace', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + describe('creation', () => { + it('creates via WorkspaceStrategy and exposes handle, dir, mode, branch', async () => { + mockWorkspaceCreate(); + + const ws = await Workspace.create({ + personaId: PERSONA_ID, + mode: 'worktree', + taskSlug: 'fix-auth', + sparsePaths: ['src/'], + }); + + expect(WorkspaceStrategy.create).toHaveBeenCalledWith({ + personaId: PERSONA_ID, + mode: 'worktree', + taskSlug: 'fix-auth', + sparsePaths: ['src/'], + }); + + expect(ws.handle).toBe(HANDLE); + expect(ws.dir).toBe(WORKSPACE_DIR); + expect(ws.mode).toBe('worktree'); + expect(ws.branch).toBe(BRANCH); + }); + + it('creates sandbox workspace without branch', async () => { + vi.mocked(WorkspaceStrategy.create).mockResolvedValue({ + handle: PERSONA_ID, + workspaceDir: '/tmp/sandbox', + mode: 'sandbox', + }); + + const ws = await Workspace.create({ personaId: PERSONA_ID, mode: 'sandbox' }); + + expect(ws.handle).toBe(PERSONA_ID); + expect(ws.mode).toBe('sandbox'); + expect(ws.branch).toBeUndefined(); + }); + + it('fromExisting creates without calling WorkspaceStrategy', () => { + const ws = Workspace.fromExisting(HANDLE, WORKSPACE_DIR, 'worktree', BRANCH); + + expect(ws.handle).toBe(HANDLE); + expect(ws.dir).toBe(WORKSPACE_DIR); + expect(ws.mode).toBe('worktree'); + expect(ws.branch).toBe(BRANCH); + expect(WorkspaceStrategy.create).not.toHaveBeenCalled(); + }); + }); + + describe('file operations', () => { + let ws: Workspace; + + beforeEach(() => { + ws = Workspace.fromExisting(HANDLE, WORKSPACE_DIR, 'worktree', BRANCH); + }); + + it('read delegates to CodeDaemon.workspaceRead with handle', async () => { + const mockResult = { content: 'file contents', lineCount: 10, filePath: 'src/auth.ts' }; + vi.mocked(CodeDaemon.workspaceRead).mockResolvedValue(mockResult as any); + + const result = await ws.read('src/auth.ts', 1, 10); + + expect(CodeDaemon.workspaceRead).toHaveBeenCalledWith(HANDLE, 'src/auth.ts', 1, 10); + expect(result).toBe(mockResult); + }); + + it('write delegates to CodeDaemon.workspaceWrite with handle', async () => { + const mockResult = { changeId: 'ch-1', filePath: 'new.ts' }; + vi.mocked(CodeDaemon.workspaceWrite).mockResolvedValue(mockResult as any); + + const result = await ws.write('new.ts', 'content', 'Created new file'); + + expect(CodeDaemon.workspaceWrite).toHaveBeenCalledWith(HANDLE, 'new.ts', 'content', 'Created new file'); + expect(result).toBe(mockResult); + }); + + it('edit delegates to CodeDaemon.workspaceEdit with handle', async () => { + const editMode = { editType: 'search_replace' as const, search: 'old', replace: 'new' }; + vi.mocked(CodeDaemon.workspaceEdit).mockResolvedValue({ changeId: 'ch-2' } as any); + + await ws.edit('src/auth.ts', editMode as any, 'Fix token check'); + + expect(CodeDaemon.workspaceEdit).toHaveBeenCalledWith(HANDLE, 'src/auth.ts', editMode, 'Fix token check'); + }); + + it('delete delegates to CodeDaemon.workspaceDelete with handle', async () => { + vi.mocked(CodeDaemon.workspaceDelete).mockResolvedValue({ changeId: 'ch-3' } as any); + + await ws.delete('old-file.ts', 'Removed unused file'); + + expect(CodeDaemon.workspaceDelete).toHaveBeenCalledWith(HANDLE, 'old-file.ts', 'Removed unused file'); + }); + + it('diff delegates to CodeDaemon.workspaceDiff with handle', async () => { + const editMode = { editType: 'search_replace' as const, search: 'a', replace: 'b' }; + vi.mocked(CodeDaemon.workspaceDiff).mockResolvedValue({ success: true, unified: '--- a\n+++ b' }); + + const result = await ws.diff('file.ts', editMode as any); + + expect(CodeDaemon.workspaceDiff).toHaveBeenCalledWith(HANDLE, 'file.ts', editMode); + expect(result.unified).toContain('---'); + }); + }); + + describe('search and discovery', () => { + let ws: Workspace; + + beforeEach(() => { + ws = Workspace.fromExisting(HANDLE, WORKSPACE_DIR, 'worktree', BRANCH); + }); + + it('search delegates to CodeDaemon.workspaceSearch with handle', async () => { + vi.mocked(CodeDaemon.workspaceSearch).mockResolvedValue({ matches: [], totalMatches: 0 } as any); + + await ws.search('TODO', '*.ts', 50); + + expect(CodeDaemon.workspaceSearch).toHaveBeenCalledWith(HANDLE, 'TODO', '*.ts', 50); + }); + + it('tree delegates to CodeDaemon.workspaceTree with handle', async () => { + vi.mocked(CodeDaemon.workspaceTree).mockResolvedValue({ root: { name: '.' } } as any); + + await ws.tree('src/', 3, false); + + expect(CodeDaemon.workspaceTree).toHaveBeenCalledWith(HANDLE, 'src/', 3, false); + }); + }); + + describe('change tracking', () => { + let ws: Workspace; + + beforeEach(() => { + ws = Workspace.fromExisting(HANDLE, WORKSPACE_DIR, 'worktree', BRANCH); + }); + + it('undo delegates to CodeDaemon.workspaceUndo with handle', async () => { + vi.mocked(CodeDaemon.workspaceUndo).mockResolvedValue({ undone: 1 } as any); + + await ws.undo('ch-1'); + + expect(CodeDaemon.workspaceUndo).toHaveBeenCalledWith(HANDLE, 'ch-1', undefined); + }); + + it('history delegates to CodeDaemon.workspaceHistory with handle', async () => { + vi.mocked(CodeDaemon.workspaceHistory).mockResolvedValue({ changes: [] } as any); + + await ws.history('src/auth.ts', 5); + + expect(CodeDaemon.workspaceHistory).toHaveBeenCalledWith(HANDLE, 'src/auth.ts', 5); + }); + }); + + describe('verification', () => { + let ws: Workspace; + + beforeEach(() => { + ws = Workspace.fromExisting(HANDLE, WORKSPACE_DIR, 'worktree', BRANCH); + }); + + it('verify delegates to CodeVerify.execute with handle as userId', async () => { + vi.mocked(CodeVerify.execute).mockResolvedValue({ success: true } as any); + + await ws.verify(true, ['tests/auth.test.ts']); + + expect(CodeVerify.execute).toHaveBeenCalledWith({ + userId: HANDLE, + typeCheck: true, + testFiles: ['tests/auth.test.ts'], + }); + }); + }); + + describe('git operations', () => { + let ws: Workspace; + + beforeEach(() => { + ws = Workspace.fromExisting(HANDLE, WORKSPACE_DIR, 'worktree', BRANCH); + }); + + it('gitStatus delegates with handle', async () => { + vi.mocked(CodeDaemon.workspaceGitStatus).mockResolvedValue({ branch: BRANCH } as any); + await ws.gitStatus(); + expect(CodeDaemon.workspaceGitStatus).toHaveBeenCalledWith(HANDLE); + }); + + it('gitDiff delegates with handle', async () => { + vi.mocked(CodeDaemon.workspaceGitDiff).mockResolvedValue({ success: true, diff: '' }); + await ws.gitDiff(true); + expect(CodeDaemon.workspaceGitDiff).toHaveBeenCalledWith(HANDLE, true); + }); + + it('gitLog delegates with handle', async () => { + vi.mocked(CodeDaemon.workspaceGitLog).mockResolvedValue({ success: true, log: '' }); + await ws.gitLog(10); + expect(CodeDaemon.workspaceGitLog).toHaveBeenCalledWith(HANDLE, 10); + }); + + it('gitAdd delegates with handle', async () => { + vi.mocked(CodeDaemon.workspaceGitAdd).mockResolvedValue({ staged: ['.'] }); + await ws.gitAdd(['.']); + expect(CodeDaemon.workspaceGitAdd).toHaveBeenCalledWith(HANDLE, ['.']); + }); + + it('gitCommit delegates with handle', async () => { + vi.mocked(CodeDaemon.workspaceGitCommit).mockResolvedValue({ hash: 'abc123' }); + const result = await ws.gitCommit('Fix auth'); + expect(CodeDaemon.workspaceGitCommit).toHaveBeenCalledWith(HANDLE, 'Fix auth'); + expect(result.hash).toBe('abc123'); + }); + + it('gitPush delegates with handle', async () => { + vi.mocked(CodeDaemon.workspaceGitPush).mockResolvedValue({ output: 'pushed' }); + await ws.gitPush('origin', BRANCH); + expect(CodeDaemon.workspaceGitPush).toHaveBeenCalledWith(HANDLE, 'origin', BRANCH); + }); + }); + + describe('shell session', () => { + let ws: Workspace; + + beforeEach(() => { + ws = Workspace.fromExisting(HANDLE, WORKSPACE_DIR, 'worktree', BRANCH); + vi.mocked(CodeDaemon.shellCreate).mockResolvedValue({ + session_id: 'sess-1', + persona_id: HANDLE, + cwd: WORKSPACE_DIR, + active_executions: 0, + total_executions: 0, + } as any); + }); + + it('exec auto-creates shell session on first call', async () => { + vi.mocked(CodeDaemon.shellExecute).mockResolvedValue({ + execution_id: 'exec-1', + status: 'completed', + stdout: 'ok', + stderr: null, + exit_code: 0, + } as any); + + await ws.exec('echo hello'); + + expect(CodeDaemon.shellCreate).toHaveBeenCalledWith(HANDLE, WORKSPACE_DIR); + expect(CodeDaemon.shellExecute).toHaveBeenCalledWith(HANDLE, 'echo hello', { + timeoutMs: 30000, + wait: true, + }); + }); + + it('exec only creates shell session once', async () => { + vi.mocked(CodeDaemon.shellExecute).mockResolvedValue({ + execution_id: 'exec-1', status: 'completed', + } as any); + + await ws.exec('echo 1'); + await ws.exec('echo 2'); + + expect(CodeDaemon.shellCreate).toHaveBeenCalledTimes(1); + expect(CodeDaemon.shellExecute).toHaveBeenCalledTimes(2); + }); + + it('exec passes custom timeout', async () => { + vi.mocked(CodeDaemon.shellExecute).mockResolvedValue({ + execution_id: 'exec-1', status: 'completed', + } as any); + + await ws.exec('cargo build', 120000); + + expect(CodeDaemon.shellExecute).toHaveBeenCalledWith(HANDLE, 'cargo build', { + timeoutMs: 120000, + wait: true, + }); + }); + + it('execAsync returns handle immediately (wait=false)', async () => { + vi.mocked(CodeDaemon.shellExecute).mockResolvedValue({ + execution_id: 'exec-long', + status: 'running', + stdout: null, + stderr: null, + exit_code: null, + } as any); + + const result = await ws.execAsync('npm run build'); + + expect(CodeDaemon.shellExecute).toHaveBeenCalledWith(HANDLE, 'npm run build', { + timeoutMs: undefined, + wait: false, + }); + expect(result.execution_id).toBe('exec-long'); + expect(result.status).toBe('running'); + }); + + it('shellPoll delegates to CodeDaemon.shellPoll with handle', async () => { + vi.mocked(CodeDaemon.shellPoll).mockResolvedValue({ + execution_id: 'exec-1', + status: 'running', + new_stdout: ['line 1', 'line 2'], + new_stderr: [], + exit_code: null, + finished: false, + } as any); + + const result = await ws.shellPoll('exec-1'); + + expect(CodeDaemon.shellPoll).toHaveBeenCalledWith(HANDLE, 'exec-1'); + expect(result.new_stdout).toEqual(['line 1', 'line 2']); + expect(result.finished).toBe(false); + }); + + it('shellKill delegates to CodeDaemon.shellKill with handle', async () => { + vi.mocked(CodeDaemon.shellKill).mockResolvedValue(); + + await ws.shellKill('exec-1'); + + expect(CodeDaemon.shellKill).toHaveBeenCalledWith(HANDLE, 'exec-1'); + }); + + it('shellCd auto-creates session and delegates', async () => { + vi.mocked(CodeDaemon.shellCd).mockResolvedValue({ cwd: '/tmp/workspace/test/src' }); + + const result = await ws.shellCd('src'); + + expect(CodeDaemon.shellCreate).toHaveBeenCalledWith(HANDLE, WORKSPACE_DIR); + expect(CodeDaemon.shellCd).toHaveBeenCalledWith(HANDLE, 'src'); + expect(result.cwd).toBe('/tmp/workspace/test/src'); + }); + + it('shellStatus auto-creates session and delegates', async () => { + vi.mocked(CodeDaemon.shellStatus).mockResolvedValue({ + session_id: 'sess-1', + persona_id: HANDLE, + cwd: WORKSPACE_DIR, + active_executions: 0, + total_executions: 3, + } as any); + + const result = await ws.shellStatus(); + + expect(CodeDaemon.shellCreate).toHaveBeenCalledWith(HANDLE, WORKSPACE_DIR); + expect(CodeDaemon.shellStatus).toHaveBeenCalledWith(HANDLE); + expect(result.total_executions).toBe(3); + }); + }); + + describe('shell watch + sentinel', () => { + let ws: Workspace; + + beforeEach(() => { + ws = Workspace.fromExisting(HANDLE, WORKSPACE_DIR, 'worktree', BRANCH); + vi.mocked(CodeDaemon.shellCreate).mockResolvedValue({ + session_id: 'sess-1', + persona_id: HANDLE, + cwd: WORKSPACE_DIR, + active_executions: 0, + total_executions: 0, + } as any); + }); + + it('sentinel delegates to CodeDaemon.shellSentinel with handle', async () => { + vi.mocked(CodeDaemon.shellSentinel).mockResolvedValue({ applied: true, ruleCount: 2 }); + + const rules = [ + { pattern: '^error', classification: 'Error' as const, action: 'Emit' as const }, + { pattern: '.*', classification: 'Verbose' as const, action: 'Suppress' as const }, + ]; + + const result = await ws.sentinel('exec-1', rules); + + expect(CodeDaemon.shellSentinel).toHaveBeenCalledWith(HANDLE, 'exec-1', rules); + expect(result.applied).toBe(true); + expect(result.ruleCount).toBe(2); + }); + + it('watch auto-creates shell and delegates to CodeDaemon.shellWatch', async () => { + const watchResponse = { + execution_id: 'exec-1', + lines: [ + { text: 'Compiling...', classification: 'Info', line_number: 0, stream: 'stdout', timestamp: Date.now() }, + ], + finished: false, + exit_code: undefined, + }; + vi.mocked(CodeDaemon.shellWatch).mockResolvedValue(watchResponse as any); + + const result = await ws.watch('exec-1'); + + expect(CodeDaemon.shellCreate).toHaveBeenCalledWith(HANDLE, WORKSPACE_DIR); + expect(CodeDaemon.shellWatch).toHaveBeenCalledWith(HANDLE, 'exec-1'); + expect(result.lines).toHaveLength(1); + expect(result.lines[0].text).toBe('Compiling...'); + expect(result.finished).toBe(false); + }); + + it('execWatch composes exec β†’ sentinel β†’ watch loop', async () => { + // Mock execAsync + vi.mocked(CodeDaemon.shellExecute).mockResolvedValue({ + execution_id: 'exec-build', + status: 'running', + stdout: null, + stderr: null, + exit_code: null, + } as any); + + // Mock sentinel + vi.mocked(CodeDaemon.shellSentinel).mockResolvedValue({ applied: true, ruleCount: 1 }); + + // Mock watch β€” first call returns output, second returns finished + vi.mocked(CodeDaemon.shellWatch) + .mockResolvedValueOnce({ + execution_id: 'exec-build', + lines: [ + { text: 'Building...', classification: 'Info', line_number: 0, stream: 'stdout', timestamp: Date.now() }, + ], + finished: false, + } as any) + .mockResolvedValueOnce({ + execution_id: 'exec-build', + lines: [ + { text: 'Done', classification: 'Success', line_number: 1, stream: 'stdout', timestamp: Date.now() }, + ], + finished: true, + exit_code: 0, + } as any); + + const rules = [ + { pattern: '.*', classification: 'Info' as const, action: 'Emit' as const }, + ]; + const collectedLines: any[] = []; + + const result = await ws.execWatch('cargo build', rules, (line) => { + collectedLines.push(line); + }); + + // Verify composition: exec β†’ sentinel β†’ watch loop + expect(CodeDaemon.shellExecute).toHaveBeenCalledWith(HANDLE, 'cargo build', { + timeoutMs: undefined, + wait: false, + }); + expect(CodeDaemon.shellSentinel).toHaveBeenCalledWith(HANDLE, 'exec-build', rules); + expect(CodeDaemon.shellWatch).toHaveBeenCalledTimes(2); + + // Verify all lines were collected + expect(collectedLines).toHaveLength(2); + expect(collectedLines[0].text).toBe('Building...'); + expect(collectedLines[1].text).toBe('Done'); + + // Verify final response + expect(result.finished).toBe(true); + expect(result.exit_code).toBe(0); + }); + + it('execWatch works without sentinel rules', async () => { + vi.mocked(CodeDaemon.shellExecute).mockResolvedValue({ + execution_id: 'exec-quick', + status: 'running', + } as any); + + vi.mocked(CodeDaemon.shellWatch).mockResolvedValueOnce({ + execution_id: 'exec-quick', + lines: [], + finished: true, + exit_code: 0, + } as any); + + const result = await ws.execWatch('echo hello'); + + // No sentinel should be called + expect(CodeDaemon.shellSentinel).not.toHaveBeenCalled(); + expect(result.finished).toBe(true); + }); + + it('execWatch works without onLine callback', async () => { + vi.mocked(CodeDaemon.shellExecute).mockResolvedValue({ + execution_id: 'exec-silent', + status: 'running', + } as any); + + vi.mocked(CodeDaemon.shellWatch).mockResolvedValueOnce({ + execution_id: 'exec-silent', + lines: [ + { text: 'output', classification: 'Info', line_number: 0, stream: 'stdout', timestamp: Date.now() }, + ], + finished: true, + exit_code: 0, + } as any); + + // Should not throw even without onLine callback + const result = await ws.execWatch('echo hello'); + expect(result.finished).toBe(true); + }); + }); + + describe('lifecycle', () => { + it('destroy delegates to WorkspaceStrategy.cleanup', async () => { + vi.mocked(WorkspaceStrategy.cleanup).mockResolvedValue(); + + const ws = Workspace.fromExisting(HANDLE, WORKSPACE_DIR, 'worktree', BRANCH); + await ws.destroy({ force: true, deleteBranch: true }); + + expect(WorkspaceStrategy.cleanup).toHaveBeenCalledWith(HANDLE, { + force: true, + deleteBranch: true, + }); + }); + + it('destroy cleans up shell session if one was created', async () => { + vi.mocked(CodeDaemon.shellCreate).mockResolvedValue({} as any); + vi.mocked(CodeDaemon.shellExecute).mockResolvedValue({ execution_id: 'e1' } as any); + vi.mocked(CodeDaemon.shellDestroy).mockResolvedValue(); + vi.mocked(WorkspaceStrategy.cleanup).mockResolvedValue(); + + const ws = Workspace.fromExisting(HANDLE, WORKSPACE_DIR, 'worktree', BRANCH); + // Trigger shell creation + await ws.exec('echo hi'); + // Now destroy + await ws.destroy(); + + expect(CodeDaemon.shellDestroy).toHaveBeenCalledWith(HANDLE); + expect(WorkspaceStrategy.cleanup).toHaveBeenCalledWith(HANDLE, undefined); + }); + + it('destroy skips shell cleanup if no shell was created', async () => { + vi.mocked(WorkspaceStrategy.cleanup).mockResolvedValue(); + + const ws = Workspace.fromExisting(HANDLE, WORKSPACE_DIR, 'worktree', BRANCH); + await ws.destroy(); + + expect(CodeDaemon.shellDestroy).not.toHaveBeenCalled(); + expect(WorkspaceStrategy.cleanup).toHaveBeenCalledWith(HANDLE, undefined); + }); + }); + + describe('handle consistency', () => { + it('every operation uses the same handle β€” no handle drift', async () => { + const ws = Workspace.fromExisting(HANDLE, WORKSPACE_DIR, 'worktree', BRANCH); + + // Call several operations + vi.mocked(CodeDaemon.workspaceRead).mockResolvedValue({} as any); + vi.mocked(CodeDaemon.workspaceWrite).mockResolvedValue({} as any); + vi.mocked(CodeDaemon.workspaceSearch).mockResolvedValue({} as any); + vi.mocked(CodeDaemon.workspaceGitAdd).mockResolvedValue({ staged: [] }); + vi.mocked(CodeDaemon.workspaceGitCommit).mockResolvedValue({ hash: '' }); + + await ws.read('a.ts'); + await ws.write('b.ts', 'content'); + await ws.search('pattern'); + await ws.gitAdd(['.']); + await ws.gitCommit('msg'); + + // Every call should have used the exact same handle + expect(vi.mocked(CodeDaemon.workspaceRead).mock.calls[0][0]).toBe(HANDLE); + expect(vi.mocked(CodeDaemon.workspaceWrite).mock.calls[0][0]).toBe(HANDLE); + expect(vi.mocked(CodeDaemon.workspaceSearch).mock.calls[0][0]).toBe(HANDLE); + expect(vi.mocked(CodeDaemon.workspaceGitAdd).mock.calls[0][0]).toBe(HANDLE); + expect(vi.mocked(CodeDaemon.workspaceGitCommit).mock.calls[0][0]).toBe(HANDLE); + }); + }); +}); diff --git a/src/debug/jtag/tests/unit/code/SkillEntity.test.ts b/src/debug/jtag/tests/unit/skill/SkillEntity.test.ts similarity index 100% rename from src/debug/jtag/tests/unit/code/SkillEntity.test.ts rename to src/debug/jtag/tests/unit/skill/SkillEntity.test.ts diff --git a/src/debug/jtag/tests/unit/code/SkillLifecycle.test.ts b/src/debug/jtag/tests/unit/skill/SkillLifecycle.test.ts similarity index 100% rename from src/debug/jtag/tests/unit/code/SkillLifecycle.test.ts rename to src/debug/jtag/tests/unit/skill/SkillLifecycle.test.ts diff --git a/src/debug/jtag/workers/continuum-core/bindings/RustCoreIPC.ts b/src/debug/jtag/workers/continuum-core/bindings/RustCoreIPC.ts index 055748025..8f1bc6d0e 100644 --- a/src/debug/jtag/workers/continuum-core/bindings/RustCoreIPC.ts +++ b/src/debug/jtag/workers/continuum-core/bindings/RustCoreIPC.ts @@ -38,6 +38,12 @@ import type { ChangeNode, HistoryResult, GitStatusInfo, + // Shell session types + ShellExecuteResponse, + ShellPollResponse, + ShellSessionInfo, + ShellWatchResponse, + SentinelRule, } from '../../../shared/generated'; // Memory subsystem types (Hippocampus in Rust β€” corpus-based, no SQL) @@ -1087,6 +1093,170 @@ export class RustCoreIPCClient extends EventEmitter { return response.result as { output: string }; } + // ── Shell Session Methods ────────────────────────────────────── + + /** + * Create a shell session for a workspace. + */ + async shellCreate(personaId: string, workspaceRoot: string): Promise { + const response = await this.request({ + command: 'code/shell-create', + persona_id: personaId, + workspace_root: workspaceRoot, + }); + + if (!response.success) { + throw new Error(response.error || 'Failed to create shell session'); + } + + return response.result as ShellSessionInfo; + } + + /** + * Execute a command in a shell session. + * + * Two modes: + * - `wait: false` (default) β€” returns immediately with execution handle. Poll for output. + * - `wait: true` β€” blocks until completion, returns full stdout/stderr. + */ + async shellExecute( + personaId: string, + cmd: string, + options?: { timeoutMs?: number; wait?: boolean }, + ): Promise { + const response = await this.request({ + command: 'code/shell-execute', + persona_id: personaId, + cmd, + timeout_ms: options?.timeoutMs ?? null, + wait: options?.wait ?? false, + }); + + if (!response.success) { + throw new Error(response.error || 'Failed to execute command'); + } + + return response.result as ShellExecuteResponse; + } + + /** + * Poll an execution for new output since last poll. + * Call repeatedly until `finished` is true. + */ + async shellPoll(personaId: string, executionId: string): Promise { + const response = await this.request({ + command: 'code/shell-poll', + persona_id: personaId, + execution_id: executionId, + }); + + if (!response.success) { + throw new Error(response.error || 'Failed to poll execution'); + } + + return response.result as ShellPollResponse; + } + + /** + * Kill a running execution. + */ + async shellKill(personaId: string, executionId: string): Promise { + const response = await this.request({ + command: 'code/shell-kill', + persona_id: personaId, + execution_id: executionId, + }); + + if (!response.success) { + throw new Error(response.error || 'Failed to kill execution'); + } + } + + /** + * Change shell session working directory. + */ + async shellCd(personaId: string, path: string): Promise<{ cwd: string }> { + const response = await this.request({ + command: 'code/shell-cd', + persona_id: personaId, + path, + }); + + if (!response.success) { + throw new Error(response.error || 'Failed to change directory'); + } + + return response.result as { cwd: string }; + } + + /** + * Get shell session status/info. + */ + async shellStatus(personaId: string): Promise { + const response = await this.request({ + command: 'code/shell-status', + persona_id: personaId, + }); + + if (!response.success) { + throw new Error(response.error || 'Failed to get shell status'); + } + + return response.result as ShellSessionInfo; + } + + /** + * Destroy a shell session (kills all running executions). + */ + async shellDestroy(personaId: string): Promise { + const response = await this.request({ + command: 'code/shell-destroy', + persona_id: personaId, + }); + + if (!response.success) { + throw new Error(response.error || 'Failed to destroy shell session'); + } + } + + /** + * Watch a shell execution for new output. + * Blocks until output is available β€” no timeout, no polling. + * Returns classified output lines filtered through sentinel rules. + */ + async shellWatch(personaId: string, executionId: string): Promise { + const response = await this.request({ + command: 'code/shell-watch', + persona_id: personaId, + execution_id: executionId, + }); + + if (!response.success) { + throw new Error(response.error || 'Failed to watch execution'); + } + + return response.result as ShellWatchResponse; + } + + /** + * Configure sentinel filter rules on a shell execution. + * Rules classify output lines and control which are emitted or suppressed during watch. + */ + async shellSentinel(personaId: string, executionId: string, rules: SentinelRule[]): Promise<{ applied: boolean; ruleCount: number }> { + const response = await this.request({ + command: 'code/shell-sentinel', + persona_id: personaId, + execution_id: executionId, + rules, + }); + + if (!response.success) { + throw new Error(response.error || 'Failed to set sentinel rules'); + } + + return response.result as { applied: boolean; ruleCount: number }; + } + /** * Disconnect from server */ diff --git a/src/debug/jtag/workers/continuum-core/src/code/mod.rs b/src/debug/jtag/workers/continuum-core/src/code/mod.rs index c8184aa96..0f1dc2194 100644 --- a/src/debug/jtag/workers/continuum-core/src/code/mod.rs +++ b/src/debug/jtag/workers/continuum-core/src/code/mod.rs @@ -1,7 +1,9 @@ -//! Code module β€” file operations, change tracking, and code intelligence. +//! Code module β€” file operations, change tracking, shell sessions, and code intelligence. //! //! Provides the Rust foundation for the coding agent system: //! - `types` β€” Shared wire types for IPC (ChangeNode, FileDiff, EditMode, etc.) +//! - `shell_types` β€” Wire types for shell session IPC +//! - `shell_session` β€” Persistent shell sessions per workspace (handle + poll) //! - `diff_engine` β€” Unified diff computation using the `similar` crate //! - `change_graph` β€” Per-workspace DAG of file operations with undo/redo //! - `path_security` β€” Workspace-scoped path validation and traversal guard @@ -11,6 +13,8 @@ //! - `git_bridge` β€” Git status, diff, and branch operations pub mod types; +pub mod shell_types; +pub mod shell_session; pub mod diff_engine; pub mod change_graph; pub mod path_security; @@ -25,3 +29,4 @@ pub use change_graph::ChangeGraph; pub use diff_engine::{compute_diff, compute_bidirectional_diff}; pub use path_security::PathSecurity; pub use file_engine::FileEngine; +pub use shell_session::{ShellSession, watch_execution}; diff --git a/src/debug/jtag/workers/continuum-core/src/code/shell_session.rs b/src/debug/jtag/workers/continuum-core/src/code/shell_session.rs new file mode 100644 index 000000000..47410cb36 --- /dev/null +++ b/src/debug/jtag/workers/continuum-core/src/code/shell_session.rs @@ -0,0 +1,1082 @@ +//! ShellSession β€” Persistent shell session per workspace. +//! +//! Provides a handle-based shell execution model: +//! 1. Create session (bound to workspace directory) +//! 2. Execute command β†’ get execution handle immediately +//! 3. Poll execution handle β†’ get new stdout/stderr chunks +//! 4. Or: execute with wait=true β†’ block until complete +//! 5. Kill execution if needed +//! 6. Destroy session on cleanup +//! +//! Supports BOTH quick commands (wait=true β†’ immediate result) and +//! long-running commands (poll repeatedly β†’ streaming output). +//! +//! Each command runs in its own process for isolation. The session +//! maintains working directory and environment across executions. + +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use std::sync::{Arc, Mutex}; +use std::time::Duration; + +use regex::Regex; +use tokio::io::{AsyncBufReadExt, BufReader}; +use tokio::process::Command as TokioCommand; +use tokio::sync::Notify; +use uuid::Uuid; + +use super::shell_types::{ + ClassifiedLine, OutputClassification, SentinelAction, SentinelRule, + ShellExecuteResponse, ShellExecutionStatus, ShellHistoryEntry, ShellPollResponse, + ShellSessionInfo, ShellWatchResponse, +}; +use crate::log_info; + +// ============================================================================ +// Execution State (shared between tokio task and IPC handler) +// ============================================================================ + +/// Mutable state for a running or completed execution. +/// +/// Written by the background tokio task (stdout/stderr lines, status). +/// Read by the IPC poll handler (cursor-based output retrieval) and watch handler. +pub struct ExecutionState { + pub id: String, + pub command: String, + pub status: ShellExecutionStatus, + pub stdout_lines: Vec, + pub stderr_lines: Vec, + pub exit_code: Option, + pub pid: Option, + pub started_at: u64, + pub finished_at: Option, + /// Cursor: index of next stdout line to return on poll/watch. + stdout_cursor: usize, + /// Cursor: index of next stderr line to return on poll/watch. + stderr_cursor: usize, + /// Notified whenever new output lines arrive or execution finishes. + /// Used by `watch()` to block without polling. + pub output_notify: Arc, + /// Compiled sentinel filter rules (empty = pass all lines through as Info). + pub sentinel: CompiledSentinel, +} + +impl std::fmt::Debug for ExecutionState { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ExecutionState") + .field("id", &self.id) + .field("command", &self.command) + .field("status", &self.status) + .field("stdout_lines", &self.stdout_lines.len()) + .field("stderr_lines", &self.stderr_lines.len()) + .field("exit_code", &self.exit_code) + .field("pid", &self.pid) + .field("sentinel_rules", &self.sentinel.len()) + .finish() + } +} + +// ============================================================================ +// Compiled Sentinel β€” pre-compiled regex rules for output classification +// ============================================================================ + +/// Pre-compiled sentinel rules for efficient per-line classification. +/// +/// Regex patterns are compiled once when `set_sentinel()` is called, +/// then applied to every output line without re-compilation. +pub struct CompiledSentinel { + rules: Vec<(Regex, OutputClassification, SentinelAction)>, +} + +impl CompiledSentinel { + /// Create an empty sentinel (passes all lines through as Info). + pub fn empty() -> Self { + Self { rules: Vec::new() } + } + + /// Compile sentinel rules from wire format. Fails on invalid regex. + pub fn compile(rules: &[SentinelRule]) -> Result { + let mut compiled = Vec::with_capacity(rules.len()); + for rule in rules { + let regex = Regex::new(&rule.pattern) + .map_err(|e| format!("Invalid regex '{}': {}", rule.pattern, e))?; + compiled.push((regex, rule.classification.clone(), rule.action.clone())); + } + Ok(Self { rules: compiled }) + } + + /// Number of active rules. + pub fn len(&self) -> usize { + self.rules.len() + } + + /// Classify a single output line. Returns None if the line should be suppressed. + pub fn classify(&self, text: &str, stream: &str, line_num: u64) -> Option { + let ts = now(); + + if self.rules.is_empty() { + // No sentinel configured β€” pass everything through as Info + return Some(ClassifiedLine { + text: text.to_string(), + classification: OutputClassification::Info, + line_number: line_num, + stream: stream.to_string(), + timestamp: ts, + }); + } + + // First matching rule wins + for (regex, classification, action) in &self.rules { + if regex.is_match(text) { + return match action { + SentinelAction::Emit => Some(ClassifiedLine { + text: text.to_string(), + classification: classification.clone(), + line_number: line_num, + stream: stream.to_string(), + timestamp: ts, + }), + SentinelAction::Suppress => None, + }; + } + } + + // No rule matched β€” emit as Verbose + Some(ClassifiedLine { + text: text.to_string(), + classification: OutputClassification::Verbose, + line_number: line_num, + stream: stream.to_string(), + timestamp: ts, + }) + } +} + +// ============================================================================ +// Shell Session +// ============================================================================ + +/// A persistent shell session bound to a workspace. +/// +/// Maintains working directory and environment across command executions. +/// Each command runs in its own isolated process (bash -c "..."). +pub struct ShellSession { + id: String, + persona_id: String, + workspace_root: PathBuf, + cwd: PathBuf, + env: HashMap, + executions: HashMap>>, + history: Vec, + total_executions: u32, +} + +impl ShellSession { + /// Create a new shell session bound to a workspace directory. + /// + /// The workspace_root is canonicalized to resolve symlinks (required + /// for reliable path containment checks on macOS where /var β†’ /private/var). + pub fn new(session_id: &str, persona_id: &str, workspace_root: &Path) -> Result { + let canonical_root = workspace_root.canonicalize().map_err(|e| { + format!( + "Invalid workspace root '{}': {}", + workspace_root.display(), + e + ) + })?; + + let cwd = canonical_root.clone(); + Ok(Self { + id: session_id.to_string(), + persona_id: persona_id.to_string(), + workspace_root: canonical_root, + cwd, + env: HashMap::new(), + executions: HashMap::new(), + history: Vec::new(), + total_executions: 0, + }) + } + + pub fn id(&self) -> &str { + &self.id + } + pub fn persona_id(&self) -> &str { + &self.persona_id + } + pub fn cwd(&self) -> &Path { + &self.cwd + } + pub fn workspace_root(&self) -> &Path { + &self.workspace_root + } + + /// Set an environment variable for future commands. + pub fn set_env(&mut self, key: String, value: String) { + self.env.insert(key, value); + } + + /// Change working directory. Validates the path stays within workspace. + pub fn cd(&mut self, path: &str) -> Result { + let new_cwd = if Path::new(path).is_absolute() { + PathBuf::from(path) + } else { + self.cwd.join(path) + }; + + let canonical = new_cwd + .canonicalize() + .map_err(|e| format!("Cannot cd to '{}': {}", path, e))?; + + if !canonical.starts_with(&self.workspace_root) { + return Err(format!( + "Cannot cd to '{}': outside workspace boundary '{}'", + path, + self.workspace_root.display() + )); + } + + if !canonical.is_dir() { + return Err(format!("Cannot cd to '{}': not a directory", path)); + } + + self.cwd = canonical.clone(); + Ok(canonical.display().to_string()) + } + + /// Get session info snapshot. + pub fn info(&self) -> ShellSessionInfo { + let active = self + .executions + .values() + .filter(|e| { + e.lock() + .map(|s| s.status == ShellExecutionStatus::Running) + .unwrap_or(false) + }) + .count() as u32; + + ShellSessionInfo { + session_id: self.id.clone(), + persona_id: self.persona_id.clone(), + cwd: self.cwd.display().to_string(), + workspace_root: self.workspace_root.display().to_string(), + active_executions: active, + total_executions: self.total_executions, + } + } + + /// Start a command execution. Returns the execution ID immediately. + /// + /// The command runs asynchronously in a tokio task. Use `poll()` to + /// retrieve output, or pass `wait=true` to `execute_and_wait()`. + pub fn execute( + &mut self, + command: &str, + timeout_ms: Option, + rt_handle: &tokio::runtime::Handle, + ) -> Result { + let execution_id = Uuid::new_v4().to_string(); + let now_ms = now(); + + let notify = Arc::new(Notify::new()); + let state = Arc::new(Mutex::new(ExecutionState { + id: execution_id.clone(), + command: command.to_string(), + status: ShellExecutionStatus::Running, + stdout_lines: Vec::new(), + stderr_lines: Vec::new(), + exit_code: None, + pid: None, + started_at: now_ms, + finished_at: None, + stdout_cursor: 0, + stderr_cursor: 0, + output_notify: notify, + sentinel: CompiledSentinel::empty(), + })); + + self.executions + .insert(execution_id.clone(), state.clone()); + self.total_executions += 1; + + // Spawn the process in a tokio task + let cwd = self.cwd.clone(); + let env = self.env.clone(); + let cmd_str = command.to_string(); + + rt_handle.spawn(async move { + run_shell_command(state, &cmd_str, &cwd, &env, timeout_ms).await; + }); + + log_info!( + "code", + "shell", + "Execution {} started: {}", + &execution_id[..8], + command + ); + Ok(execution_id) + } + + /// Execute a command and block until completion. Returns the full result. + /// + /// For quick commands (git status, ls, etc.) where you want the result + /// immediately rather than polling. + pub fn execute_and_wait( + &mut self, + command: &str, + timeout_ms: Option, + rt_handle: &tokio::runtime::Handle, + ) -> Result { + let execution_id = self.execute(command, timeout_ms, rt_handle)?; + + // Block this thread until the execution finishes + let state_arc = self + .executions + .get(&execution_id) + .ok_or_else(|| "Execution vanished".to_string())? + .clone(); + + // Poll until complete (on the current IPC thread) + loop { + { + let s = state_arc + .lock() + .map_err(|e| format!("Lock poisoned: {}", e))?; + if s.status != ShellExecutionStatus::Running { + return Ok(ShellExecuteResponse { + execution_id: s.id.clone(), + status: s.status.clone(), + stdout: Some(s.stdout_lines.join("\n")), + stderr: Some(s.stderr_lines.join("\n")), + exit_code: s.exit_code, + }); + } + } + // Yield briefly to let the tokio task progress + std::thread::sleep(Duration::from_millis(10)); + } + } + + /// Poll an execution for new output since the last poll. + /// + /// Returns new stdout/stderr lines and current status. Call repeatedly + /// until `finished` is true. Cursor advances automatically β€” each line + /// is returned exactly once across polls. + pub fn poll(&self, execution_id: &str) -> Result { + let state_arc = self + .executions + .get(execution_id) + .ok_or_else(|| format!("No execution '{}'", execution_id))?; + + let mut state = state_arc + .lock() + .map_err(|e| format!("Lock poisoned: {}", e))?; + + let new_stdout: Vec = state.stdout_lines[state.stdout_cursor..].to_vec(); + let new_stderr: Vec = state.stderr_lines[state.stderr_cursor..].to_vec(); + state.stdout_cursor = state.stdout_lines.len(); + state.stderr_cursor = state.stderr_lines.len(); + + let finished = state.status != ShellExecutionStatus::Running; + + Ok(ShellPollResponse { + execution_id: execution_id.to_string(), + status: state.status.clone(), + new_stdout, + new_stderr, + exit_code: state.exit_code, + finished, + }) + } + + /// Kill a running execution. + /// + /// Sets the kill flag; the background task detects it and terminates + /// the child process. No-op if already finished. + pub fn kill(&self, execution_id: &str) -> Result<(), String> { + let state_arc = self + .executions + .get(execution_id) + .ok_or_else(|| format!("No execution '{}'", execution_id))?; + + let mut state = state_arc + .lock() + .map_err(|e| format!("Lock poisoned: {}", e))?; + + if state.status != ShellExecutionStatus::Running { + return Ok(()); // Already done + } + + // Signal kill β€” the tokio task will detect this and kill the child + state.status = ShellExecutionStatus::Killed; + state.finished_at = Some(now()); + + // Also send SIGKILL via the stored PID for immediate effect + if let Some(pid) = state.pid { + kill_process(pid); + } + + log_info!( + "code", + "shell", + "Killed execution {}: {}", + &execution_id[..8.min(execution_id.len())], + state.command + ); + Ok(()) + } + + /// Get history of completed executions. + pub fn history(&self) -> &[ShellHistoryEntry] { + &self.history + } + + /// Garbage-collect completed executions, moving them to history. + /// Call periodically to prevent unbounded memory growth. + pub fn gc(&mut self) { + let completed_ids: Vec = self + .executions + .iter() + .filter_map(|(id, state)| { + let s = state.lock().ok()?; + if s.status != ShellExecutionStatus::Running { + Some(id.clone()) + } else { + None + } + }) + .collect(); + + for id in completed_ids { + if let Some(state_arc) = self.executions.remove(&id) { + if let Ok(state) = state_arc.lock() { + self.history.push(ShellHistoryEntry { + execution_id: state.id.clone(), + command: state.command.clone(), + exit_code: state.exit_code, + started_at: state.started_at, + finished_at: state.finished_at, + }); + } + } + } + } + + /// Kill all running executions and clear state. + pub fn destroy(&mut self) { + for (_, state_arc) in self.executions.iter() { + if let Ok(mut state) = state_arc.lock() { + if state.status == ShellExecutionStatus::Running { + state.status = ShellExecutionStatus::Killed; + state.finished_at = Some(now()); + if let Some(pid) = state.pid { + kill_process(pid); + } + } + } + } + self.executions.clear(); + } + + // ════════════════════════════════════════════════════════════ + // Watch + Sentinel + // ════════════════════════════════════════════════════════════ + + /// Get execution state arc and notify handle for async watch. + /// + /// Returns clones that can be used after the DashMap lock is released. + /// The caller MUST release any DashMap locks before awaiting on the Notify. + pub fn get_watch_handles( + &self, + execution_id: &str, + ) -> Result<(Arc>, Arc), String> { + let exec_state = self + .executions + .get(execution_id) + .ok_or_else(|| format!("No execution '{}'", execution_id))? + .clone(); + let notify = exec_state + .lock() + .map_err(|e| format!("Lock poisoned: {}", e))? + .output_notify + .clone(); + Ok((exec_state, notify)) + } + + /// Configure sentinel filter rules on an execution. + /// + /// Rules are compiled to regexes immediately. Returns the count of rules applied. + /// Pass an empty slice to clear sentinel (reverts to pass-all-as-Info). + pub fn set_sentinel( + &self, + execution_id: &str, + rules: &[SentinelRule], + ) -> Result { + let exec_state = self + .executions + .get(execution_id) + .ok_or_else(|| format!("No execution '{}'", execution_id))?; + + let compiled = CompiledSentinel::compile(rules)?; + let count = compiled.len(); + + let mut state = exec_state + .lock() + .map_err(|e| format!("Lock poisoned: {}", e))?; + state.sentinel = compiled; + Ok(count) + } +} + +/// Watch an execution for new output β€” blocks until output is available. +/// +/// This is a free async function (not a method on ShellSession) because it must +/// be called AFTER releasing the DashMap lock. The caller extracts the handles +/// via `get_watch_handles()`, drops the DashMap ref, then calls this. +/// +/// Uses `tokio::sync::Notify` β€” blocks without polling or timeouts. +/// Like `read()` on a Unix pipe: returns when data arrives. +pub async fn watch_execution( + execution_id: &str, + exec_state: Arc>, + notify: Arc, +) -> Result { + loop { + // Check for new data under the lock + { + let mut state = exec_state + .lock() + .map_err(|e| format!("Lock poisoned: {}", e))?; + + let has_new_stdout = state.stdout_cursor < state.stdout_lines.len(); + let has_new_stderr = state.stderr_cursor < state.stderr_lines.len(); + let is_finished = state.status != ShellExecutionStatus::Running; + + if has_new_stdout || has_new_stderr || is_finished { + let lines = collect_and_classify(&mut state); + return Ok(ShellWatchResponse { + execution_id: execution_id.to_string(), + lines, + finished: is_finished, + exit_code: state.exit_code, + }); + } + } + // Lock released β€” safe to await + // notify_one() stores a permit if nobody is waiting, so we won't + // miss notifications between the lock release and this await. + notify.notified().await; + } +} + +/// Collect new output lines since the cursors and classify them through sentinel rules. +fn collect_and_classify(state: &mut ExecutionState) -> Vec { + let mut lines = Vec::new(); + + // Collect stdout since cursor + for i in state.stdout_cursor..state.stdout_lines.len() { + if let Some(classified) = state.sentinel.classify(&state.stdout_lines[i], "stdout", i as u64) { + lines.push(classified); + } + } + state.stdout_cursor = state.stdout_lines.len(); + + // Collect stderr since cursor + for i in state.stderr_cursor..state.stderr_lines.len() { + if let Some(classified) = state.sentinel.classify(&state.stderr_lines[i], "stderr", i as u64) { + lines.push(classified); + } + } + state.stderr_cursor = state.stderr_lines.len(); + + lines +} + +// ============================================================================ +// Background Command Execution +// ============================================================================ + +/// Run a shell command asynchronously, streaming output into shared state. +/// +/// This function runs in a tokio task. It: +/// 1. Spawns `bash -c "command"` with the session's cwd and env +/// 2. Reads stdout/stderr line-by-line into the shared ExecutionState +/// 3. Handles timeouts by killing the process +/// 4. Detects kill requests by checking the status flag +async fn run_shell_command( + state: Arc>, + command: &str, + cwd: &Path, + env: &HashMap, + timeout_ms: Option, +) { + // Build the command + let mut cmd = TokioCommand::new("bash"); + cmd.arg("-c") + .arg(command) + .current_dir(cwd) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + // Don't inherit stdin β€” non-interactive + .stdin(std::process::Stdio::null()); + + // Apply session environment variables + for (k, v) in env { + cmd.env(k, v); + } + + // Spawn the child process + let mut child = match cmd.spawn() { + Ok(c) => c, + Err(e) => { + if let Ok(mut s) = state.lock() { + s.status = ShellExecutionStatus::Failed; + s.stderr_lines + .push(format!("Failed to spawn bash: {}", e)); + s.finished_at = Some(now()); + s.output_notify.notify_one(); + } + return; + } + }; + + // Store PID for external kill capability + if let Some(pid) = child.id() { + if let Ok(mut s) = state.lock() { + s.pid = Some(pid); + } + } + + // Take stdout/stderr handles + let stdout = child.stdout.take().expect("stdout piped"); + let stderr = child.stderr.take().expect("stderr piped"); + + // Spawn line readers (notify watchers on each new line) + let state_out = state.clone(); + let stdout_task = tokio::spawn(async move { + let reader = BufReader::new(stdout); + let mut lines = reader.lines(); + while let Ok(Some(line)) = lines.next_line().await { + if let Ok(mut s) = state_out.lock() { + // If killed, stop reading + if s.status == ShellExecutionStatus::Killed { + break; + } + s.stdout_lines.push(line); + s.output_notify.notify_one(); + } + } + }); + + let state_err = state.clone(); + let stderr_task = tokio::spawn(async move { + let reader = BufReader::new(stderr); + let mut lines = reader.lines(); + while let Ok(Some(line)) = lines.next_line().await { + if let Ok(mut s) = state_err.lock() { + if s.status == ShellExecutionStatus::Killed { + break; + } + s.stderr_lines.push(line); + s.output_notify.notify_one(); + } + } + }); + + // Wait for process completion (with optional timeout and kill detection) + let state_wait = state.clone(); + let exit_status = if let Some(timeout) = timeout_ms { + tokio::select! { + // Branch 1: Process completes + result = child.wait() => { + match result { + Ok(status) => Some(status), + Err(e) => { + if let Ok(mut s) = state_wait.lock() { + s.stderr_lines.push(format!("Process wait error: {}", e)); + } + None + } + } + } + // Branch 2: Timeout fires + _ = tokio::time::sleep(Duration::from_millis(timeout)) => { + // Check if already killed + let already_done = state_wait.lock() + .map(|s| s.status != ShellExecutionStatus::Running) + .unwrap_or(false); + + if !already_done { + let _ = child.kill().await; + if let Ok(mut s) = state_wait.lock() { + if s.status == ShellExecutionStatus::Running { + s.status = ShellExecutionStatus::TimedOut; + s.stderr_lines.push(format!("Timed out after {}ms", timeout)); + s.finished_at = Some(now()); + s.output_notify.notify_one(); + } + } + } + None + } + } + } else { + // No timeout β€” wait indefinitely, but check for kill + let state_for_error = state.clone(); + let state_for_kill = state.clone(); + tokio::select! { + result = child.wait() => { + match result { + Ok(status) => Some(status), + Err(e) => { + if let Ok(mut s) = state_for_error.lock() { + s.stderr_lines.push(format!("Process wait error: {}", e)); + } + None + } + } + } + // Check kill flag periodically + _ = poll_kill_flag(state_for_kill) => { + let _ = child.kill().await; + None + } + } + }; + + // Wait for output readers to drain + let _ = stdout_task.await; + let _ = stderr_task.await; + + // Update final state (if not already set by timeout/kill) + if let Some(status) = exit_status { + if let Ok(mut s) = state.lock() { + if s.status == ShellExecutionStatus::Running { + s.exit_code = status.code(); + s.status = if status.success() { + ShellExecutionStatus::Completed + } else { + ShellExecutionStatus::Failed + }; + s.finished_at = Some(now()); + // Wake any blocked watch() calls to deliver final status + s.output_notify.notify_one(); + + log_info!( + "code", + "shell", + "Execution {} finished: exit={} cmd={}", + &s.id[..8], + s.exit_code.unwrap_or(-1), + &s.command + ); + } + } + } +} + +/// Poll the kill flag on the execution state. Returns when kill is requested. +async fn poll_kill_flag(state: Arc>) { + loop { + { + if let Ok(s) = state.lock() { + if s.status != ShellExecutionStatus::Running { + return; + } + } + } + tokio::time::sleep(Duration::from_millis(100)).await; + } +} + +/// Kill a process by PID (best-effort, Unix only). +fn kill_process(pid: u32) { + // Use kill command β€” works on macOS and Linux, no extra deps + let _ = std::process::Command::new("kill") + .args(["-9", &pid.to_string()]) + .output(); +} + +fn now() -> u64 { + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_millis() as u64 +} + +// ============================================================================ +// Tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + + fn setup_workspace() -> (tempfile::TempDir, tokio::runtime::Runtime) { + let dir = tempfile::tempdir().unwrap(); + fs::create_dir_all(dir.path().join("src")).unwrap(); + fs::write(dir.path().join("src/main.ts"), "console.log('hello');").unwrap(); + let rt = tokio::runtime::Runtime::new().unwrap(); + (dir, rt) + } + + #[test] + fn test_session_creation() { + let (dir, _rt) = setup_workspace(); + let session = ShellSession::new("test-session", "persona-1", dir.path()).unwrap(); + + assert_eq!(session.id(), "test-session"); + assert_eq!(session.persona_id(), "persona-1"); + // cwd and workspace_root are canonicalized (macOS: /var β†’ /private/var) + let canonical = dir.path().canonicalize().unwrap(); + assert_eq!(session.cwd(), canonical); + assert_eq!(session.workspace_root(), canonical); + } + + #[test] + fn test_cd_within_workspace() { + let (dir, _rt) = setup_workspace(); + let mut session = ShellSession::new("test", "p1", dir.path()).unwrap(); + + let result = session.cd("src"); + assert!(result.is_ok()); + assert!(session.cwd().ends_with("src")); + } + + #[test] + fn test_cd_outside_workspace_blocked() { + let (dir, _rt) = setup_workspace(); + let mut session = ShellSession::new("test", "p1", dir.path()).unwrap(); + + let result = session.cd(".."); + assert!(result.is_err()); + assert!(result.unwrap_err().contains("outside workspace")); + } + + #[test] + fn test_cd_nonexistent_blocked() { + let (dir, _rt) = setup_workspace(); + let mut session = ShellSession::new("test", "p1", dir.path()).unwrap(); + + let result = session.cd("nonexistent"); + assert!(result.is_err()); + } + + #[test] + fn test_execute_quick_command() { + let (dir, rt) = setup_workspace(); + let mut session = ShellSession::new("test", "p1", dir.path()).unwrap(); + + let result = session.execute_and_wait("echo hello", Some(5000), rt.handle()); + assert!(result.is_ok()); + + let response = result.unwrap(); + assert_eq!(response.status, ShellExecutionStatus::Completed); + assert_eq!(response.exit_code, Some(0)); + assert!(response.stdout.unwrap().contains("hello")); + } + + #[test] + fn test_execute_failing_command() { + let (dir, rt) = setup_workspace(); + let mut session = ShellSession::new("test", "p1", dir.path()).unwrap(); + + let result = + session.execute_and_wait("exit 42", Some(5000), rt.handle()); + assert!(result.is_ok()); + + let response = result.unwrap(); + assert_eq!(response.status, ShellExecutionStatus::Failed); + assert_eq!(response.exit_code, Some(42)); + } + + #[test] + fn test_execute_with_cwd() { + let (dir, rt) = setup_workspace(); + let mut session = ShellSession::new("test", "p1", dir.path()).unwrap(); + + // cd into src, then run pwd + session.cd("src").unwrap(); + let result = session.execute_and_wait("pwd", Some(5000), rt.handle()); + assert!(result.is_ok()); + + let response = result.unwrap(); + let stdout = response.stdout.unwrap(); + assert!(stdout.contains("src"), "pwd should show src dir: {}", stdout); + } + + #[test] + fn test_execute_with_env() { + let (dir, rt) = setup_workspace(); + let mut session = ShellSession::new("test", "p1", dir.path()).unwrap(); + + session.set_env("MY_VAR".to_string(), "hello_world".to_string()); + let result = session.execute_and_wait("echo $MY_VAR", Some(5000), rt.handle()); + assert!(result.is_ok()); + + let response = result.unwrap(); + assert!(response.stdout.unwrap().contains("hello_world")); + } + + #[test] + fn test_poll_pattern() { + let (dir, rt) = setup_workspace(); + let mut session = ShellSession::new("test", "p1", dir.path()).unwrap(); + + // Execute asynchronously + let exec_id = session + .execute("echo line1; echo line2; echo line3", Some(5000), rt.handle()) + .unwrap(); + + // Poll until finished + let mut all_stdout = Vec::new(); + loop { + std::thread::sleep(Duration::from_millis(50)); + let poll = session.poll(&exec_id).unwrap(); + all_stdout.extend(poll.new_stdout); + if poll.finished { + assert_eq!(poll.exit_code, Some(0)); + break; + } + } + + assert_eq!(all_stdout, vec!["line1", "line2", "line3"]); + } + + #[test] + fn test_timeout() { + let (dir, rt) = setup_workspace(); + let mut session = ShellSession::new("test", "p1", dir.path()).unwrap(); + + // Command that sleeps longer than timeout + let result = + session.execute_and_wait("sleep 30", Some(500), rt.handle()); + assert!(result.is_ok()); + + let response = result.unwrap(); + assert_eq!(response.status, ShellExecutionStatus::TimedOut); + } + + #[test] + fn test_kill_execution() { + let (dir, rt) = setup_workspace(); + let mut session = ShellSession::new("test", "p1", dir.path()).unwrap(); + + // Start a long-running command + let exec_id = session + .execute("sleep 60", None, rt.handle()) + .unwrap(); + + // Give it a moment to start + std::thread::sleep(Duration::from_millis(200)); + + // Kill it + session.kill(&exec_id).unwrap(); + + // Poll should show killed + std::thread::sleep(Duration::from_millis(200)); + let poll = session.poll(&exec_id).unwrap(); + assert!(poll.finished); + assert_eq!(poll.status, ShellExecutionStatus::Killed); + } + + #[test] + fn test_session_info() { + let (dir, _rt) = setup_workspace(); + let session = ShellSession::new("test-session", "persona-1", dir.path()).unwrap(); + + let info = session.info(); + assert_eq!(info.session_id, "test-session"); + assert_eq!(info.persona_id, "persona-1"); + assert_eq!(info.active_executions, 0); + assert_eq!(info.total_executions, 0); + } + + #[test] + fn test_gc_moves_to_history() { + let (dir, rt) = setup_workspace(); + let mut session = ShellSession::new("test", "p1", dir.path()).unwrap(); + + // Run a command to completion + let _result = session.execute_and_wait("echo done", Some(5000), rt.handle()); + + assert!(session.history().is_empty()); + + // GC should move it to history + session.gc(); + + assert_eq!(session.history().len(), 1); + assert_eq!(session.history()[0].command, "echo done"); + assert_eq!(session.history()[0].exit_code, Some(0)); + } + + #[test] + fn test_destroy_kills_running() { + let (dir, rt) = setup_workspace(); + let mut session = ShellSession::new("test", "p1", dir.path()).unwrap(); + + // Start long-running command + let _exec_id = session + .execute("sleep 60", None, rt.handle()) + .unwrap(); + + std::thread::sleep(Duration::from_millis(200)); + + // Destroy should kill it + session.destroy(); + assert!(session.executions.is_empty()); + } + + #[test] + fn test_multiple_executions() { + let (dir, rt) = setup_workspace(); + let mut session = ShellSession::new("test", "p1", dir.path()).unwrap(); + + // Run multiple sequential commands + let r1 = session + .execute_and_wait("echo first", Some(5000), rt.handle()) + .unwrap(); + let r2 = session + .execute_and_wait("echo second", Some(5000), rt.handle()) + .unwrap(); + let r3 = session + .execute_and_wait("echo third", Some(5000), rt.handle()) + .unwrap(); + + assert_eq!(r1.status, ShellExecutionStatus::Completed); + assert_eq!(r2.status, ShellExecutionStatus::Completed); + assert_eq!(r3.status, ShellExecutionStatus::Completed); + assert!(r1.stdout.unwrap().contains("first")); + assert!(r2.stdout.unwrap().contains("second")); + assert!(r3.stdout.unwrap().contains("third")); + } + + #[test] + fn test_command_reads_workspace_files() { + let (dir, rt) = setup_workspace(); + let mut session = ShellSession::new("test", "p1", dir.path()).unwrap(); + + let result = session + .execute_and_wait("cat src/main.ts", Some(5000), rt.handle()) + .unwrap(); + + assert_eq!(result.status, ShellExecutionStatus::Completed); + assert!(result.stdout.unwrap().contains("console.log")); + } + + #[test] + fn test_stderr_capture() { + let (dir, rt) = setup_workspace(); + let mut session = ShellSession::new("test", "p1", dir.path()).unwrap(); + + let result = session + .execute_and_wait("echo error_msg >&2", Some(5000), rt.handle()) + .unwrap(); + + assert_eq!(result.status, ShellExecutionStatus::Completed); + assert!(result.stderr.unwrap().contains("error_msg")); + } +} diff --git a/src/debug/jtag/workers/continuum-core/src/code/shell_types.rs b/src/debug/jtag/workers/continuum-core/src/code/shell_types.rs new file mode 100644 index 000000000..9f68a3c6a --- /dev/null +++ b/src/debug/jtag/workers/continuum-core/src/code/shell_types.rs @@ -0,0 +1,161 @@ +//! Shell wire types β€” IPC protocol for shell session management. +//! +//! TypeScript types generated via ts-rs. +//! Re-generate: `cargo test --package continuum-core export_bindings` + +use serde::{Deserialize, Serialize}; +use ts_rs::TS; + +/// Status of a shell command execution. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, TS)] +#[serde(rename_all = "snake_case")] +#[ts(export, export_to = "../../../shared/generated/code/ShellExecutionStatus.ts")] +pub enum ShellExecutionStatus { + Running, + Completed, + Failed, + TimedOut, + Killed, +} + +/// Response from `code/shell-execute`. +/// +/// Always returns immediately with the execution handle. +/// If `wait: true` was specified, also includes the completed result. +#[derive(Debug, Clone, Serialize, Deserialize, TS)] +#[ts(export, export_to = "../../../shared/generated/code/ShellExecuteResponse.ts")] +pub struct ShellExecuteResponse { + pub execution_id: String, + pub status: ShellExecutionStatus, + /// Full stdout (only present when `wait: true` and execution completed). + #[ts(optional)] + pub stdout: Option, + /// Full stderr (only present when `wait: true` and execution completed). + #[ts(optional)] + pub stderr: Option, + /// Exit code (only present when execution completed). + #[ts(optional)] + pub exit_code: Option, +} + +/// Response from `code/shell-poll`. +/// +/// Returns new output since the last poll (cursor-based). +/// Call repeatedly until `finished` is true. +#[derive(Debug, Clone, Serialize, Deserialize, TS)] +#[ts(export, export_to = "../../../shared/generated/code/ShellPollResponse.ts")] +pub struct ShellPollResponse { + pub execution_id: String, + pub status: ShellExecutionStatus, + /// New stdout lines since last poll. + pub new_stdout: Vec, + /// New stderr lines since last poll. + pub new_stderr: Vec, + /// Exit code (present when finished). + #[ts(optional)] + pub exit_code: Option, + /// True when the execution is no longer running. + pub finished: bool, +} + +/// Response from `code/shell-status` β€” session metadata. +#[derive(Debug, Clone, Serialize, Deserialize, TS)] +#[ts(export, export_to = "../../../shared/generated/code/ShellSessionInfo.ts")] +pub struct ShellSessionInfo { + pub session_id: String, + pub persona_id: String, + pub cwd: String, + pub workspace_root: String, + pub active_executions: u32, + pub total_executions: u32, +} + +/// A history entry for a completed execution. +#[derive(Debug, Clone, Serialize, Deserialize, TS)] +#[ts(export, export_to = "../../../shared/generated/code/ShellHistoryEntry.ts")] +pub struct ShellHistoryEntry { + pub execution_id: String, + pub command: String, + #[ts(optional)] + pub exit_code: Option, + #[ts(type = "number")] + pub started_at: u64, + #[ts(optional, type = "number")] + pub finished_at: Option, +} + +// ============================================================================ +// Sentinel Types β€” Output classification and filtering +// ============================================================================ + +/// Classification level for a line of shell output. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, TS)] +#[ts(export, export_to = "../../../shared/generated/code/OutputClassification.ts")] +pub enum OutputClassification { + Error, + Warning, + Info, + Success, + Verbose, +} + +/// What to do with a line that matches a sentinel rule. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, TS)] +#[ts(export, export_to = "../../../shared/generated/code/SentinelAction.ts")] +pub enum SentinelAction { + /// Include the line in watch results. + Emit, + /// Filter the line out silently. + Suppress, +} + +/// A sentinel filter rule: regex pattern β†’ classification + action. +/// +/// Wire type for IPC. Patterns are compiled to `regex::Regex` on the Rust side +/// when `set_sentinel()` is called. +#[derive(Debug, Clone, Serialize, Deserialize, TS)] +#[ts(export, export_to = "../../../shared/generated/code/SentinelRule.ts")] +pub struct SentinelRule { + /// Regex pattern to match against each output line. + pub pattern: String, + /// Classification to assign when this rule matches. + pub classification: OutputClassification, + /// Whether to include or suppress the matched line. + pub action: SentinelAction, +} + +/// A single line of classified shell output. +#[derive(Debug, Clone, Serialize, Deserialize, TS)] +#[ts(export, export_to = "../../../shared/generated/code/ClassifiedLine.ts")] +pub struct ClassifiedLine { + /// The raw text content of the line. + pub text: String, + /// Classification assigned by sentinel rules. + pub classification: OutputClassification, + /// Line number within the stream (0-indexed from execution start). + #[ts(type = "number")] + pub line_number: u64, + /// Which stream this line came from: "stdout" or "stderr". + pub stream: String, + /// Unix timestamp in milliseconds when the line was classified. + #[ts(type = "number")] + pub timestamp: u64, +} + +/// Response from `code/shell-watch`. +/// +/// Returns classified output lines since the last watch call. +/// Blocks until output is available (no timeout, no polling). +/// Call in a loop until `finished` is true. +#[derive(Debug, Clone, Serialize, Deserialize, TS)] +#[ts(export, export_to = "../../../shared/generated/code/ShellWatchResponse.ts")] +pub struct ShellWatchResponse { + pub execution_id: String, + /// Classified output lines (filtered through sentinel rules). + pub lines: Vec, + /// True when the execution is no longer running. + pub finished: bool, + /// Exit code (present when finished). + #[ts(optional)] + pub exit_code: Option, +} diff --git a/src/debug/jtag/workers/continuum-core/src/ipc/mod.rs b/src/debug/jtag/workers/continuum-core/src/ipc/mod.rs index a9586631a..73b797f86 100644 --- a/src/debug/jtag/workers/continuum-core/src/ipc/mod.rs +++ b/src/debug/jtag/workers/continuum-core/src/ipc/mod.rs @@ -12,7 +12,7 @@ use crate::voice::{UtteranceEvent, VoiceParticipant}; use crate::persona::{PersonaInbox, PersonaCognitionEngine, InboxMessage, SenderType, Modality, ChannelRegistry, ChannelEnqueueRequest, ActivityDomain, PersonaState}; use crate::rag::RagEngine; use crate::logging::TimingGuard; -use crate::code::{self, FileEngine, PathSecurity}; +use crate::code::{self, FileEngine, PathSecurity, ShellSession}; use ts_rs::TS; use crate::{log_debug, log_info, log_error}; use serde::{Deserialize, Serialize}; @@ -421,6 +421,81 @@ enum Request { branch: String, }, + // ── Shell Session Commands ────────────────────────────────────── + + /// Create a shell session for a workspace. + #[serde(rename = "code/shell-create")] + CodeShellCreate { + persona_id: String, + /// Workspace root directory (must match file engine workspace). + workspace_root: String, + }, + + /// Execute a command in a shell session. + /// Returns immediately with execution_id (handle). + /// If `wait` is true, blocks until completion and returns full result. + #[serde(rename = "code/shell-execute")] + CodeShellExecute { + persona_id: String, + /// The shell command to execute (named `cmd` to avoid serde tag conflict with `command`). + cmd: String, + #[serde(default)] + timeout_ms: Option, + /// If true, block until completion and return full result. + #[serde(default)] + wait: bool, + }, + + /// Poll an execution for new output since last poll. + #[serde(rename = "code/shell-poll")] + CodeShellPoll { + persona_id: String, + execution_id: String, + }, + + /// Kill a running execution. + #[serde(rename = "code/shell-kill")] + CodeShellKill { + persona_id: String, + execution_id: String, + }, + + /// Change the shell session's working directory. + #[serde(rename = "code/shell-cd")] + CodeShellCd { + persona_id: String, + path: String, + }, + + /// Get shell session status/info. + #[serde(rename = "code/shell-status")] + CodeShellStatus { + persona_id: String, + }, + + /// Watch an execution for new output. Blocks until output is available + /// (no timeout, no polling). Returns classified lines via sentinel rules. + #[serde(rename = "code/shell-watch")] + CodeShellWatch { + persona_id: String, + execution_id: String, + }, + + /// Configure sentinel filter rules on an execution. + /// Rules classify output lines and control which are emitted or suppressed. + #[serde(rename = "code/shell-sentinel")] + CodeShellSentinel { + persona_id: String, + execution_id: String, + rules: Vec, + }, + + /// Destroy a shell session (kills all running executions). + #[serde(rename = "code/shell-destroy")] + CodeShellDestroy { + persona_id: String, + }, + #[serde(rename = "health-check")] HealthCheck, @@ -492,6 +567,8 @@ struct ServerState { memory_manager: Arc, /// Per-persona file engines β€” workspace-scoped file operations with change tracking. file_engines: Arc>, + /// Per-persona shell sessions β€” persistent bash per workspace with handle+poll. + shell_sessions: Arc>, } impl ServerState { @@ -511,6 +588,7 @@ impl ServerState { rt_handle, memory_manager, file_engines: Arc::new(DashMap::new()), + shell_sessions: Arc::new(DashMap::new()), } } @@ -1654,6 +1732,200 @@ impl ServerState { } } + // ── Shell Session Handlers ────────────────────────────────── + + Request::CodeShellCreate { persona_id, workspace_root } => { + let _timer = TimingGuard::new("ipc", "code_shell_create"); + + let root = std::path::Path::new(&workspace_root); + match ShellSession::new(&persona_id, &persona_id, root) { + Ok(session) => { + let info = session.info(); + self.shell_sessions.insert(persona_id.clone(), session); + log_info!("ipc", "shell", "Created shell session for {} at {}", persona_id, workspace_root); + HandleResult::Json(Response::success( + serde_json::to_value(&info).unwrap_or_default() + )) + } + Err(e) => HandleResult::Json(Response::error( + format!("Failed to create shell session: {}", e) + )), + } + } + + Request::CodeShellExecute { persona_id, cmd, timeout_ms, wait } => { + let _timer = TimingGuard::new("ipc", "code_shell_execute"); + + let mut session = match self.shell_sessions.get_mut(&persona_id) { + Some(s) => s, + None => return HandleResult::Json(Response::error( + format!("No shell session for persona {}", persona_id) + )), + }; + + if wait { + // Blocking mode: wait for completion, return full result + match session.execute_and_wait(&cmd, timeout_ms, &self.rt_handle) { + Ok(result) => HandleResult::Json(Response::success( + serde_json::to_value(&result).unwrap_or_default() + )), + Err(e) => HandleResult::Json(Response::error(e)), + } + } else { + // Handle mode: return immediately with execution_id + match session.execute(&cmd, timeout_ms, &self.rt_handle) { + Ok(execution_id) => { + let response = code::shell_types::ShellExecuteResponse { + execution_id, + status: code::shell_types::ShellExecutionStatus::Running, + stdout: None, + stderr: None, + exit_code: None, + }; + HandleResult::Json(Response::success( + serde_json::to_value(&response).unwrap_or_default() + )) + } + Err(e) => HandleResult::Json(Response::error(e)), + } + } + } + + Request::CodeShellPoll { persona_id, execution_id } => { + let _timer = TimingGuard::new("ipc", "code_shell_poll"); + + let session = match self.shell_sessions.get(&persona_id) { + Some(s) => s, + None => return HandleResult::Json(Response::error( + format!("No shell session for persona {}", persona_id) + )), + }; + + match session.poll(&execution_id) { + Ok(result) => HandleResult::Json(Response::success( + serde_json::to_value(&result).unwrap_or_default() + )), + Err(e) => HandleResult::Json(Response::error(e)), + } + } + + Request::CodeShellKill { persona_id, execution_id } => { + let _timer = TimingGuard::new("ipc", "code_shell_kill"); + + let session = match self.shell_sessions.get(&persona_id) { + Some(s) => s, + None => return HandleResult::Json(Response::error( + format!("No shell session for persona {}", persona_id) + )), + }; + + match session.kill(&execution_id) { + Ok(()) => HandleResult::Json(Response::success(serde_json::json!({ + "killed": true + }))), + Err(e) => HandleResult::Json(Response::error(e)), + } + } + + Request::CodeShellCd { persona_id, path } => { + let _timer = TimingGuard::new("ipc", "code_shell_cd"); + + let mut session = match self.shell_sessions.get_mut(&persona_id) { + Some(s) => s, + None => return HandleResult::Json(Response::error( + format!("No shell session for persona {}", persona_id) + )), + }; + + match session.cd(&path) { + Ok(new_cwd) => HandleResult::Json(Response::success(serde_json::json!({ + "cwd": new_cwd + }))), + Err(e) => HandleResult::Json(Response::error(e)), + } + } + + Request::CodeShellStatus { persona_id } => { + let _timer = TimingGuard::new("ipc", "code_shell_status"); + + let session = match self.shell_sessions.get(&persona_id) { + Some(s) => s, + None => return HandleResult::Json(Response::error( + format!("No shell session for persona {}", persona_id) + )), + }; + + let info = session.info(); + HandleResult::Json(Response::success( + serde_json::to_value(&info).unwrap_or_default() + )) + } + + Request::CodeShellWatch { persona_id, execution_id } => { + let _timer = TimingGuard::new("ipc", "code_shell_watch"); + + // Extract watch handles THEN release the DashMap lock before blocking. + let handles = { + let session = match self.shell_sessions.get(&persona_id) { + Some(s) => s, + None => return HandleResult::Json(Response::error( + format!("No shell session for persona {}", persona_id) + )), + }; + session.get_watch_handles(&execution_id) + // DashMap Ref dropped here + }; + + match handles { + Err(e) => HandleResult::Json(Response::error(e)), + Ok((exec_state, notify)) => { + // Block this IPC thread until output is available. + // Safe: IPC runs on std threads, not inside the tokio runtime. + match self.rt_handle.block_on( + code::shell_session::watch_execution(&execution_id, exec_state, notify) + ) { + Ok(response) => HandleResult::Json(Response::success( + serde_json::to_value(&response).unwrap_or_default() + )), + Err(e) => HandleResult::Json(Response::error(e)), + } + } + } + } + + Request::CodeShellSentinel { persona_id, execution_id, rules } => { + let _timer = TimingGuard::new("ipc", "code_shell_sentinel"); + + let session = match self.shell_sessions.get(&persona_id) { + Some(s) => s, + None => return HandleResult::Json(Response::error( + format!("No shell session for persona {}", persona_id) + )), + }; + + match session.set_sentinel(&execution_id, &rules) { + Ok(count) => HandleResult::Json(Response::success(serde_json::json!({ + "applied": true, + "ruleCount": count + }))), + Err(e) => HandleResult::Json(Response::error(e)), + } + } + + Request::CodeShellDestroy { persona_id } => { + let _timer = TimingGuard::new("ipc", "code_shell_destroy"); + + if let Some(mut session) = self.shell_sessions.get_mut(&persona_id) { + session.destroy(); + } + self.shell_sessions.remove(&persona_id); + + log_info!("ipc", "shell", "Destroyed shell session for {}", persona_id); + HandleResult::Json(Response::success(serde_json::json!({ + "destroyed": true + }))) + } + Request::HealthCheck => { HandleResult::Json(Response::success(serde_json::json!({ "healthy": true }))) } diff --git a/src/debug/jtag/workers/continuum-core/src/persona/channel_items.rs b/src/debug/jtag/workers/continuum-core/src/persona/channel_items.rs index e439e238a..1b86aefb4 100644 --- a/src/debug/jtag/workers/continuum-core/src/persona/channel_items.rs +++ b/src/debug/jtag/workers/continuum-core/src/persona/channel_items.rs @@ -338,6 +338,71 @@ impl TaskQueueItem { } } +//============================================================================= +// CODE QUEUE ITEM +//============================================================================= + +/// Code: workspace-scoped coding tasks. Not urgent, never kicked, slow aging. +/// Consolidates multiple requests for the same workspace. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CodeQueueItem { + pub id: Uuid, + pub room_id: Uuid, + pub persona_id: Uuid, + pub task_description: String, + pub workspace_handle: String, + pub priority: f32, + pub is_review: bool, + pub timestamp: u64, + pub enqueued_at: u64, +} + +impl QueueItemBehavior for CodeQueueItem { + fn item_type(&self) -> &'static str { "code" } + fn domain(&self) -> ActivityDomain { ActivityDomain::Code } + fn id(&self) -> Uuid { self.id } + fn timestamp(&self) -> u64 { self.timestamp } + fn base_priority(&self) -> f32 { self.priority } + + // Slow aging β€” coding tasks are long-lived, 60s to reach max boost + fn aging_boost_ms(&self) -> f32 { 60_000.0 } + + // Not urgent β€” coding is not real-time + fn is_urgent(&self) -> bool { false } + + // Never kicked β€” don't drop active coding work + fn can_be_kicked(&self) -> bool { false } + fn kick_resistance(&self, _now_ms: u64, _enqueued_at_ms: u64) -> f32 { f32::INFINITY } + + // Consolidate multiple requests for the same workspace + fn should_consolidate_with(&self, other: &dyn QueueItemBehavior) -> bool { + if other.item_type() != "code" { + return false; + } + if let Some(other_code) = other.as_any().downcast_ref::() { + other_code.workspace_handle == self.workspace_handle + } else { + false + } + } + + fn as_any(&self) -> &dyn Any { self } + + fn to_json(&self) -> serde_json::Value { + serde_json::json!({ + "type": "code", + "id": self.id.to_string(), + "roomId": self.room_id.to_string(), + "personaId": self.persona_id.to_string(), + "taskDescription": self.task_description, + "workspaceHandle": self.workspace_handle, + "priority": self.priority, + "isReview": self.is_review, + "timestamp": self.timestamp, + }) + } +} + //============================================================================= // IPC REQUEST TYPES β€” For receiving items from TypeScript //============================================================================= @@ -392,6 +457,18 @@ pub enum ChannelEnqueueRequest { depends_on: Vec, blocked_by: Vec, }, + #[serde(rename = "code")] + Code { + id: String, + room_id: String, + persona_id: String, + task_description: String, + workspace_handle: String, + priority: f32, + is_review: bool, + #[ts(type = "number")] + timestamp: u64, + }, } impl ChannelEnqueueRequest { @@ -435,6 +512,22 @@ impl ChannelEnqueueRequest { consolidated_context: Vec::new(), })) } + ChannelEnqueueRequest::Code { + id, room_id, persona_id, task_description, + workspace_handle, priority, is_review, timestamp, + } => { + Ok(Box::new(CodeQueueItem { + id: parse_uuid(id, "id")?, + room_id: parse_uuid(room_id, "room_id")?, + persona_id: parse_uuid(persona_id, "persona_id")?, + task_description: task_description.clone(), + workspace_handle: workspace_handle.clone(), + priority: *priority, + is_review: *is_review, + timestamp: *timestamp, + enqueued_at: now, + })) + } ChannelEnqueueRequest::Task { id, task_id, assignee_id, created_by, task_domain, task_type, context_id, description, priority, status, diff --git a/src/debug/jtag/workers/continuum-core/src/persona/channel_registry.rs b/src/debug/jtag/workers/continuum-core/src/persona/channel_registry.rs index 131e06b24..ee02ceba1 100644 --- a/src/debug/jtag/workers/continuum-core/src/persona/channel_registry.rs +++ b/src/debug/jtag/workers/continuum-core/src/persona/channel_registry.rs @@ -39,6 +39,11 @@ impl ChannelRegistry { max_size: 500, name: "CHAT".into(), })); + registry.register(ChannelQueue::new(ChannelQueueConfig { + domain: ActivityDomain::Code, + max_size: 100, + name: "CODE".into(), + })); registry.register(ChannelQueue::new(ChannelQueueConfig { domain: ActivityDomain::Background, max_size: 200, @@ -229,6 +234,7 @@ fn domain_name(domain: ActivityDomain) -> &'static str { match domain { ActivityDomain::Audio => "AUDIO", ActivityDomain::Chat => "CHAT", + ActivityDomain::Code => "CODE", ActivityDomain::Background => "BACKGROUND", } } @@ -287,6 +293,7 @@ mod tests { let registry = ChannelRegistry::new(); assert!(registry.get(ActivityDomain::Audio).is_some()); assert!(registry.get(ActivityDomain::Chat).is_some()); + assert!(registry.get(ActivityDomain::Code).is_some()); assert!(registry.get(ActivityDomain::Background).is_some()); } @@ -343,7 +350,7 @@ mod tests { assert_eq!(status.total_size, 2); assert!(status.has_urgent_work); assert!(status.has_work); - assert_eq!(status.channels.len(), 3); // All domains reported + assert_eq!(status.channels.len(), 4); // All domains reported } #[test] diff --git a/src/debug/jtag/workers/continuum-core/src/persona/channel_types.rs b/src/debug/jtag/workers/continuum-core/src/persona/channel_types.rs index ccdf0157c..80ec5ca08 100644 --- a/src/debug/jtag/workers/continuum-core/src/persona/channel_types.rs +++ b/src/debug/jtag/workers/continuum-core/src/persona/channel_types.rs @@ -26,13 +26,10 @@ pub enum ActivityDomain { Audio, /// Chat messages: per-room consolidation, mention urgency, RTOS aging Chat, + /// Code: workspace-scoped coding tasks, not urgent, never kicked, slow aging + Code, /// Background tasks: dependency-aware, overdue urgency Background, - // Future domains: - // RealtimeGame, - // Code, - // Music, - // RobotControl, } /// All currently registered domains in priority order (highest first). @@ -40,6 +37,7 @@ pub enum ActivityDomain { pub const DOMAIN_PRIORITY_ORDER: &[ActivityDomain] = &[ ActivityDomain::Audio, ActivityDomain::Chat, + ActivityDomain::Code, ActivityDomain::Background, ]; @@ -229,6 +227,7 @@ mod tests { fn test_domain_priority_order() { assert_eq!(DOMAIN_PRIORITY_ORDER[0], ActivityDomain::Audio); assert_eq!(DOMAIN_PRIORITY_ORDER[1], ActivityDomain::Chat); - assert_eq!(DOMAIN_PRIORITY_ORDER[2], ActivityDomain::Background); + assert_eq!(DOMAIN_PRIORITY_ORDER[2], ActivityDomain::Code); + assert_eq!(DOMAIN_PRIORITY_ORDER[3], ActivityDomain::Background); } } From 3f131acb39d695177e0aad73341a29f0ae2f5f17 Mon Sep 17 00:00:00 2001 From: Joel Date: Mon, 2 Feb 2026 13:16:19 -0600 Subject: [PATCH 13/14] Phase 7+8: Multi-workspace, DataDaemon.read() API cleanup, recipe tools in system prompt DataDaemon.read() now returns T|null (consistent with store/update), eliminating the .data.data unwrapping pattern across ~30 call sites. Recipe tools and strategy rules flow from JSON through ChatRAGBuilder into RAGContext and are injected into PersonaResponseGenerator system prompt as activity context for the LLM. PersonaUser supports per-room workspaces via Map with room-aware mode selection (worktree for code rooms). RecipeToolDeclaration type added to RecipeTypes, making the tools field in recipe JSONs visible to TypeScript. --- .../inspect/server/RAGInspectServerCommand.ts | 5 +- .../server/ThoughtStreamServerCommand.ts | 17 ++--- .../delete/server/DataDeleteServerCommand.ts | 5 +- .../data/read/server/DataReadServerCommand.ts | 10 +-- .../server/SessionGetUserServerCommand.ts | 15 ++-- .../server/SkillActivateServerCommand.ts | 10 +-- .../server/SkillGenerateServerCommand.ts | 5 +- .../server/SkillValidateServerCommand.ts | 5 +- .../daemons/data-daemon/shared/DataDaemon.ts | 12 ++- .../server/SessionDaemonServer.ts | 23 +++--- .../server/SessionStateHelper.ts | 5 +- .../server/TrainingDaemonServer.ts | 3 +- .../user-daemon/server/UserDaemonServer.ts | 16 ++-- .../system/rag/builders/ChatRAGBuilder.ts | 76 +++++++++---------- .../system/rag/builders/CodebaseRAGBuilder.ts | 6 +- src/debug/jtag/system/rag/shared/RAGTypes.ts | 4 + .../rag/sources/PersonaIdentitySource.ts | 7 +- .../rag/sources/SocialMediaRAGSource.ts | 6 +- .../unit/ChatRAGBuilder.learningMode.test.ts | 76 ++++++------------- .../jtag/system/recipes/shared/RecipeTypes.ts | 19 +++++ .../jtag/system/user/server/CallerDetector.ts | 12 +-- .../jtag/system/user/server/PersonaUser.ts | 59 +++++++++----- .../server/modules/PersonaAutonomousLoop.ts | 25 +++++- .../server/modules/PersonaMessageEvaluator.ts | 7 +- .../modules/PersonaResponseGenerator.ts | 33 ++++++++ .../modules/cognitive/memory/PersonaMemory.ts | 12 ++- src/debug/jtag/system/user/shared/BaseUser.ts | 6 +- .../user/storage/server/SQLiteStateBackend.ts | 2 +- .../jtag/tests/unit/code/Workspace.test.ts | 57 ++++++++++++++ 29 files changed, 312 insertions(+), 226 deletions(-) diff --git a/src/debug/jtag/commands/ai/rag/inspect/server/RAGInspectServerCommand.ts b/src/debug/jtag/commands/ai/rag/inspect/server/RAGInspectServerCommand.ts index 0a83686b8..f31d8b84e 100644 --- a/src/debug/jtag/commands/ai/rag/inspect/server/RAGInspectServerCommand.ts +++ b/src/debug/jtag/commands/ai/rag/inspect/server/RAGInspectServerCommand.ts @@ -101,9 +101,8 @@ export class RAGInspectServerCommand extends RAGInspectCommand { if (params.triggerMessageId) { try { // Load the trigger message - const msgResult = await DataDaemon.read(ChatMessageEntity.collection, params.triggerMessageId); - if (msgResult.success && msgResult.data) { - const msg = msgResult.data.data; + const msg = await DataDaemon.read(ChatMessageEntity.collection, params.triggerMessageId); + if (msg) { // Get actual decision from ThoughtStream const coordinator = getThoughtStreamCoordinator(); diff --git a/src/debug/jtag/commands/ai/thoughtstream/server/ThoughtStreamServerCommand.ts b/src/debug/jtag/commands/ai/thoughtstream/server/ThoughtStreamServerCommand.ts index b23bea4e2..e0884eed2 100644 --- a/src/debug/jtag/commands/ai/thoughtstream/server/ThoughtStreamServerCommand.ts +++ b/src/debug/jtag/commands/ai/thoughtstream/server/ThoughtStreamServerCommand.ts @@ -74,16 +74,14 @@ export class ThoughtStreamServerCommand extends ThoughtStreamCommand { try { // Query data daemon for the message - const result = await DataDaemon.read( + const msg = await DataDaemon.read( COLLECTIONS.CHAT_MESSAGES, stream.messageId ); - if (result.success && result.data) { - const msg = result.data as any; - // Try different possible structures for message data - messageSender = msg.senderName || msg.data?.senderName || 'Unknown'; - messageContent = msg.content?.text || msg.data?.content?.text || msg.text || ''; + if (msg) { + messageSender = msg.senderName || 'Unknown'; + messageContent = msg.content?.text ?? ''; } } catch (error) { console.warn(`⚠️ Could not load message ${stream.messageId}:`, error); @@ -585,14 +583,13 @@ export class ThoughtStreamServerCommand extends ThoughtStreamCommand { private async getPersonaName(personaId: string, params: ThoughtStreamParams): Promise { try { - const result = await DataDaemon.read( + const user = await DataDaemon.read( COLLECTIONS.USERS, personaId ); - if (result.success && result.data) { - const userData = result.data as any; - return userData.displayName || userData.name || personaId.slice(0, 8); + if (user) { + return user.displayName || personaId.slice(0, 8); } return personaId.slice(0, 8); } catch { diff --git a/src/debug/jtag/commands/data/delete/server/DataDeleteServerCommand.ts b/src/debug/jtag/commands/data/delete/server/DataDeleteServerCommand.ts index 153581bc8..4453ac056 100644 --- a/src/debug/jtag/commands/data/delete/server/DataDeleteServerCommand.ts +++ b/src/debug/jtag/commands/data/delete/server/DataDeleteServerCommand.ts @@ -10,8 +10,7 @@ import type { ICommandDaemon } from '../../../../daemons/command-daemon/shared/C import type { DataDeleteParams, DataDeleteResult } from '../shared/DataDeleteTypes'; import { createDataDeleteResultFromParams } from '../shared/DataDeleteTypes'; import { DataDaemon } from '../../../../daemons/data-daemon/shared/DataDaemon'; -// import { BaseEntity } from '../../../../system/data/entities/BaseEntity'; -// import { Events } from '../../../../system/core/server/shared/Events'; +import type { BaseEntity } from '@system/data/entities/BaseEntity'; export class DataDeleteServerCommand extends CommandBase { @@ -24,7 +23,7 @@ export class DataDeleteServerCommand extends CommandBase { try { // Use DataDaemon for consistent storage access - const result = await DataDaemon.read(params.collection, params.id); + const entity = await DataDaemon.read(params.collection, params.id); - if (result.success && result.data) { + if (entity) { // Extract media if this is a chat message with attachments let media: MediaItem[] = []; - let cleanedData = result.data.data; + let cleanedData: BaseEntity = entity; - if (params.collection === 'chat_messages' && result.data.data) { - const messageData = result.data.data as ChatMessageEntity; + if (params.collection === 'chat_messages') { + const messageData = entity as ChatMessageEntity; if (messageData.content?.media && Array.isArray(messageData.content.media)) { // Extract media to top level media = messageData.content.media; diff --git a/src/debug/jtag/commands/session/get-user/server/SessionGetUserServerCommand.ts b/src/debug/jtag/commands/session/get-user/server/SessionGetUserServerCommand.ts index fb26558dd..6aa47e5ce 100644 --- a/src/debug/jtag/commands/session/get-user/server/SessionGetUserServerCommand.ts +++ b/src/debug/jtag/commands/session/get-user/server/SessionGetUserServerCommand.ts @@ -29,20 +29,18 @@ export class SessionGetUserServerCommand extends CommandBase(COLLECTIONS.USERS, getUserParams.userId); + const user = await DataDaemon.read(COLLECTIONS.USERS, getUserParams.userId); - if (!userResult.success || !userResult.data) { + if (!user) { return transformPayload(getUserParams, { success: false, error: `User not found: ${getUserParams.userId}` }); } - const user = userResult.data.data as UserEntity; - return transformPayload(getUserParams, { success: true, - user: user + user }); } @@ -91,18 +89,15 @@ export class SessionGetUserServerCommand extends CommandBase(COLLECTIONS.USERS, userId); + const user = await DataDaemon.read(COLLECTIONS.USERS, userId); - if (!userResult.success || !userResult.data) { + if (!user) { return transformPayload(getUserParams, { success: false, error: `User not found: ${userId}` }); } - // Extract user entity from DataRecord - const user = userResult.data.data as UserEntity; - return transformPayload(getUserParams, { success: true, user: user diff --git a/src/debug/jtag/commands/skill/activate/server/SkillActivateServerCommand.ts b/src/debug/jtag/commands/skill/activate/server/SkillActivateServerCommand.ts index 17f8c81d5..81df724db 100644 --- a/src/debug/jtag/commands/skill/activate/server/SkillActivateServerCommand.ts +++ b/src/debug/jtag/commands/skill/activate/server/SkillActivateServerCommand.ts @@ -29,11 +29,10 @@ export class SkillActivateServerCommand extends CommandBase(COLLECTIONS.SKILLS, skillId as UUID); - if (!readResult.success || !readResult.data) { + const skill = await DataDaemon.read(COLLECTIONS.SKILLS, skillId as UUID); + if (!skill) { throw new ValidationError('skillId', `Skill not found: ${skillId}`); } - const skill = readResult.data.data as SkillEntity; if (skill.status !== 'validated') { throw new ValidationError('skillId', @@ -47,9 +46,8 @@ export class SkillActivateServerCommand extends CommandBase; + const proposal = await DataDaemon.read(COLLECTIONS.DECISION_PROPOSALS, skill.proposalId); + if (proposal) { if (proposal.status !== 'approved' && proposal.status !== 'concluded') { throw new ValidationError('skillId', `Team skill '${skill.name}' has not been approved yet (proposal status: ${proposal.status}).`); diff --git a/src/debug/jtag/commands/skill/generate/server/SkillGenerateServerCommand.ts b/src/debug/jtag/commands/skill/generate/server/SkillGenerateServerCommand.ts index c6b3904a6..cd70a3d39 100644 --- a/src/debug/jtag/commands/skill/generate/server/SkillGenerateServerCommand.ts +++ b/src/debug/jtag/commands/skill/generate/server/SkillGenerateServerCommand.ts @@ -33,11 +33,10 @@ export class SkillGenerateServerCommand extends CommandBase(COLLECTIONS.SKILLS, skillId as UUID); - if (!readResult.success || !readResult.data) { + const skill = await DataDaemon.read(COLLECTIONS.SKILLS, skillId as UUID); + if (!skill) { throw new ValidationError('skillId', `Skill not found: ${skillId}`); } - const skill = readResult.data.data as SkillEntity; // Verify lifecycle state: personal skills can skip approval, team skills need 'approved' const canGenerate = diff --git a/src/debug/jtag/commands/skill/validate/server/SkillValidateServerCommand.ts b/src/debug/jtag/commands/skill/validate/server/SkillValidateServerCommand.ts index 78af94c54..c0317c914 100644 --- a/src/debug/jtag/commands/skill/validate/server/SkillValidateServerCommand.ts +++ b/src/debug/jtag/commands/skill/validate/server/SkillValidateServerCommand.ts @@ -32,11 +32,10 @@ export class SkillValidateServerCommand extends CommandBase(COLLECTIONS.SKILLS, skillId as UUID); - if (!readResult.success || !readResult.data) { + const skill = await DataDaemon.read(COLLECTIONS.SKILLS, skillId as UUID); + if (!skill) { throw new ValidationError('skillId', `Skill not found: ${skillId}`); } - const skill = readResult.data.data as SkillEntity; if (skill.status !== 'generated') { throw new ValidationError('skillId', diff --git a/src/debug/jtag/daemons/data-daemon/shared/DataDaemon.ts b/src/debug/jtag/daemons/data-daemon/shared/DataDaemon.ts index 6a1be3505..a39674210 100644 --- a/src/debug/jtag/daemons/data-daemon/shared/DataDaemon.ts +++ b/src/debug/jtag/daemons/data-daemon/shared/DataDaemon.ts @@ -1042,15 +1042,21 @@ export class DataDaemon { /** * Read single record by ID with automatic context injection - CLEAN INTERFACE * + * Returns the entity directly (unwrapped), or null if not found. + * Consistent with store() and update() which also return T directly. + * * @example - * const user = await DataDaemon.read('users', userId); + * const user = await DataDaemon.read(COLLECTIONS.USERS, userId); + * if (user) { console.log(user.displayName); } */ - static async read(collection: string, id: UUID): Promise>> { + static async read(collection: string, id: UUID): Promise { if (!DataDaemon.sharedInstance || !DataDaemon.context) { throw new Error('DataDaemon not initialized - system must call DataDaemon.initialize() first'); } - return await DataDaemon.sharedInstance.read(collection, id, DataDaemon.context); + const result = await DataDaemon.sharedInstance.read(collection, id, DataDaemon.context); + if (!result.success || !result.data) return null; + return result.data.data; } /** diff --git a/src/debug/jtag/daemons/session-daemon/server/SessionDaemonServer.ts b/src/debug/jtag/daemons/session-daemon/server/SessionDaemonServer.ts index 2518ab0b6..591ca2768 100644 --- a/src/debug/jtag/daemons/session-daemon/server/SessionDaemonServer.ts +++ b/src/debug/jtag/daemons/session-daemon/server/SessionDaemonServer.ts @@ -425,28 +425,23 @@ export class SessionDaemonServer extends SessionDaemon { } // Load UserEntity from database - const userResult = await DataDaemon.read(COLLECTIONS.USERS, userId); - if (!userResult.success || !userResult.data) { + const userEntity = await DataDaemon.read(COLLECTIONS.USERS, userId); + if (!userEntity) { throw new Error(`User ${userId} not found in database`); } - // DataRecord has { id, collection, data, metadata } - // Ensure id is present in the data (Rust adapter may not include it in data.data) - const userEntity = userResult.data.data as UserEntity; - if (!userEntity.id) { - (userEntity as any).id = userResult.data.id; - } - // Load UserStateEntity from database - const stateResult = await DataDaemon.read(COLLECTIONS.USER_STATES, userId); - if (!stateResult.success || !stateResult.data) { + const userState = await DataDaemon.read(COLLECTIONS.USER_STATES, userId); + if (!userState) { throw new Error(`UserState for ${userId} not found in database`); } - // Ensure id is present in the state data - const userState = stateResult.data.data as UserStateEntity; + // Ensure IDs are present (Rust adapter may not include them) + if (!userEntity.id) { + (userEntity as any).id = userId; + } if (!userState.id) { - (userState as any).id = stateResult.data.id; + (userState as any).id = userId; } // Create appropriate User subclass based on type diff --git a/src/debug/jtag/daemons/session-daemon/server/SessionStateHelper.ts b/src/debug/jtag/daemons/session-daemon/server/SessionStateHelper.ts index dd6063cc2..7bb45ad26 100644 --- a/src/debug/jtag/daemons/session-daemon/server/SessionStateHelper.ts +++ b/src/debug/jtag/daemons/session-daemon/server/SessionStateHelper.ts @@ -27,15 +27,14 @@ export class SessionStateHelper { */ static async getUserState(userId: UUID): Promise { try { - const stateResult = await DataDaemon.read(COLLECTIONS.USER_STATES, userId); + const userStateData = await DataDaemon.read(COLLECTIONS.USER_STATES, userId); - if (!stateResult.success || !stateResult.data) { + if (!userStateData) { this.log.warn(`UserState not found for userId: ${userId}`); return null; } // Hydrate UserStateEntity to get instance methods - const userStateData = stateResult.data.data; const userState = Object.assign(new UserStateEntity(), userStateData); return userState; diff --git a/src/debug/jtag/daemons/training-daemon/server/TrainingDaemonServer.ts b/src/debug/jtag/daemons/training-daemon/server/TrainingDaemonServer.ts index 94d8f69ff..188db8362 100644 --- a/src/debug/jtag/daemons/training-daemon/server/TrainingDaemonServer.ts +++ b/src/debug/jtag/daemons/training-daemon/server/TrainingDaemonServer.ts @@ -289,8 +289,7 @@ export class TrainingDaemonServer extends TrainingDaemon { */ private async fetchUser(userId: UUID): Promise { try { - const result = await DataDaemon.read(COLLECTIONS.USERS, userId); - return result.success && result.data ? result.data.data : null; + return await DataDaemon.read(COLLECTIONS.USERS, userId); } catch (error) { this.log.error(`❌ TrainingDaemon: Failed to fetch user ${userId}:`, error); return null; diff --git a/src/debug/jtag/daemons/user-daemon/server/UserDaemonServer.ts b/src/debug/jtag/daemons/user-daemon/server/UserDaemonServer.ts index b8eeb29bf..7a8e36b2c 100644 --- a/src/debug/jtag/daemons/user-daemon/server/UserDaemonServer.ts +++ b/src/debug/jtag/daemons/user-daemon/server/UserDaemonServer.ts @@ -288,14 +288,12 @@ export class UserDaemonServer extends UserDaemon { private async createPersonaClient(userEntity: UserEntity): Promise { try { // Load UserStateEntity (must exist - created by user/create command) - const userStateResult = await DataDaemon.read(COLLECTIONS.USER_STATES, userEntity.id); + const userState = await DataDaemon.read(COLLECTIONS.USER_STATES, userEntity.id); - if (!userStateResult.success || !userStateResult.data) { + if (!userState) { throw new Error(`UserStateEntity not found for persona ${userEntity.displayName} (${userEntity.id}) - user must be created via user/create command`); } - const userState: UserStateEntity = userStateResult.data.data; - // Initialize SQLite storage backend const dbPath = `.continuum/personas/${userEntity.id}/state.sqlite`; const storage = new SQLiteStateBackend(dbPath); @@ -334,9 +332,9 @@ export class UserDaemonServer extends UserDaemon { protected async ensureUserHasState(userId: UUID): Promise { try { // Check if UserState exists - const result = await DataDaemon.read(COLLECTIONS.USER_STATES, userId); + const existingState = await DataDaemon.read(COLLECTIONS.USER_STATES, userId); - if (result.success && result.data) { + if (existingState) { return true; // UserState exists } @@ -355,14 +353,12 @@ export class UserDaemonServer extends UserDaemon { private async createUserState(userId: UUID): Promise { try { // Load user entity to get type - const userResult = await DataDaemon.read(COLLECTIONS.USERS, userId); - if (!userResult.success || !userResult.data) { + const user = await DataDaemon.read(COLLECTIONS.USERS, userId); + if (!user) { this.log.error(`❌ UserDaemon: User ${userId} not found`); return false; } - const user: UserEntity = userResult.data.data; - // Create UserState with type-specific defaults const userState = new UserStateEntity(); userState.id = userId; diff --git a/src/debug/jtag/system/rag/builders/ChatRAGBuilder.ts b/src/debug/jtag/system/rag/builders/ChatRAGBuilder.ts index 46ad6feff..5f468f37f 100644 --- a/src/debug/jtag/system/rag/builders/ChatRAGBuilder.ts +++ b/src/debug/jtag/system/rag/builders/ChatRAGBuilder.ts @@ -21,6 +21,7 @@ import type { PersonaMemory, RecipeStrategy } from '../shared/RAGTypes'; +import type { RecipeToolDeclaration } from '../../recipes/shared/RecipeTypes'; import type { UUID } from '../../core/types/CrossPlatformUUID'; import { DataDaemon } from '../../../daemons/data-daemon/shared/DataDaemon'; import { ChatMessageEntity } from '../../data/entities/ChatMessageEntity'; @@ -163,6 +164,7 @@ export class ChatRAGBuilder extends RAGBuilder { let artifacts: RAGArtifact[]; let privateMemories: PersonaMemory[]; let recipeStrategy: RecipeStrategy | undefined; + let recipeTools: RecipeToolDeclaration[] | undefined; let learningConfig: { learningMode?: 'fine-tuning' | 'inference-only'; genomeId?: UUID; participantRole?: string } | undefined; let widgetContext: string | null; let globalAwareness: string | null; @@ -215,13 +217,14 @@ export class ChatRAGBuilder extends RAGBuilder { codeToolGuidance = extracted.codeToolGuidance; // Still load these via legacy methods (not yet extracted to sources) - const [extractedArtifacts, extractedRecipeStrategy, extractedLearningConfig] = await Promise.all([ + const [extractedArtifacts, extractedRecipeContext, extractedLearningConfig] = await Promise.all([ includeArtifacts ? this.extractArtifacts(contextId, maxMessages) : Promise.resolve([]), - this.loadRecipeStrategy(contextId), + this.loadRecipeContext(contextId), this.loadLearningConfig(contextId, personaId) ]); artifacts = extractedArtifacts; - recipeStrategy = extractedRecipeStrategy; + recipeStrategy = extractedRecipeContext?.strategy; + recipeTools = extractedRecipeContext?.tools; learningConfig = extractedLearningConfig; this.log(`πŸ”§ ChatRAGBuilder: Composed from ${composition.sections.length} sources in ${composition.totalLoadTimeMs.toFixed(1)}ms`); @@ -235,7 +238,7 @@ export class ChatRAGBuilder extends RAGBuilder { loadedConversationHistory, loadedArtifacts, loadedPrivateMemories, - loadedRecipeStrategy, + loadedRecipeContext, loadedLearningConfig, loadedWidgetContext ] = await Promise.all([ @@ -258,8 +261,8 @@ export class ChatRAGBuilder extends RAGBuilder { options?.currentMessage?.content // ← Semantic query: use current message for relevant memory recall ) : Promise.resolve([]), - // 5. Load room's recipe strategy (conversation governance rules) - this.loadRecipeStrategy(contextId), + // 5. Load room's recipe context (strategy + tool highlights) + this.loadRecipeContext(contextId), // 6. Load learning configuration (Phase 2: Per-participant learning mode) this.loadLearningConfig(contextId, personaId), @@ -272,7 +275,8 @@ export class ChatRAGBuilder extends RAGBuilder { conversationHistory = loadedConversationHistory; artifacts = loadedArtifacts; privateMemories = loadedPrivateMemories; - recipeStrategy = loadedRecipeStrategy; + recipeStrategy = loadedRecipeContext?.strategy; + recipeTools = loadedRecipeContext?.tools; learningConfig = loadedLearningConfig; widgetContext = loadedWidgetContext; globalAwareness = null; // Legacy path doesn't use GlobalAwarenessSource @@ -353,6 +357,7 @@ export class ChatRAGBuilder extends RAGBuilder { personaId, identity: finalIdentity, recipeStrategy, + recipeTools, conversationHistory: finalConversationHistory, artifacts: processedArtifacts, privateMemories, @@ -460,9 +465,9 @@ export class ChatRAGBuilder extends RAGBuilder { */ private async loadPersonaIdentity(personaId: UUID, roomId: UUID, options?: RAGBuildOptions): Promise { try { - const result = await DataDaemon.read(UserEntity.collection, personaId); + const user = await DataDaemon.read(UserEntity.collection, personaId); - if (!result.success || !result.data) { + if (!user) { this.log(`⚠️ ChatRAGBuilder: Could not load persona ${personaId}, using defaults`); return { name: 'AI Assistant', @@ -470,10 +475,6 @@ export class ChatRAGBuilder extends RAGBuilder { }; } - // DataDaemon.read returns DataRecord, access .data for entity - const userRecord = result.data; - const user = userRecord.data; - return { name: user.displayName, bio: user.profile?.bio, @@ -992,13 +993,13 @@ LIMITS: */ private async loadRoomName(roomId: UUID): Promise { try { - const roomResult = await DataDaemon.read(RoomEntity.collection, roomId); - if (!roomResult.success || !roomResult.data) { + const room = await DataDaemon.read(RoomEntity.collection, roomId); + if (!room) { this.log(`⚠️ ChatRAGBuilder: Could not load room ${roomId} for name lookup`); return null; } - return roomResult.data.data.name; + return room.name; } catch (error) { this.log(`❌ ChatRAGBuilder: Error loading room name:`, error); return null; @@ -1011,27 +1012,26 @@ LIMITS: private async loadRoomMembers(roomId: UUID): Promise { try { // 1. Load room entity - const roomResult = await DataDaemon.read(RoomEntity.collection, roomId); - if (!roomResult.success || !roomResult.data) { + const room = await DataDaemon.read(RoomEntity.collection, roomId); + if (!room) { this.log(`⚠️ ChatRAGBuilder: Could not load room ${roomId}`); return []; } - const room = roomResult.data.data; if (!room.members || room.members.length === 0) { return []; } // 2. Load user entities for each member to get display names (PARALLELIZED) - const memberResults = await Promise.all( + const members = await Promise.all( room.members.map(member => DataDaemon.read(UserEntity.collection, member.userId) ) ); - const memberNames = memberResults - .filter(result => result.success && result.data) - .map(result => result.data!.data.displayName); + const memberNames = members + .filter((user): user is UserEntity => user !== null) + .map(user => user.displayName); return memberNames; } catch (error) { @@ -1041,19 +1041,18 @@ LIMITS: } /** - * Load recipe strategy from room's recipeId + * Load recipe context (strategy + tools) from room's recipeId */ - private async loadRecipeStrategy(roomId: UUID): Promise { + private async loadRecipeContext(roomId: UUID): Promise<{ strategy?: RecipeStrategy; tools?: RecipeToolDeclaration[] } | undefined> { try { // 1. Load room to get recipeId - const roomResult = await DataDaemon.read(RoomEntity.collection, roomId); + const room = await DataDaemon.read(RoomEntity.collection, roomId); - if (!roomResult.success || !roomResult.data) { - this.log(`⚠️ ChatRAGBuilder: Could not load room ${roomId}, no recipe strategy`); + if (!room) { + this.log(`⚠️ ChatRAGBuilder: Could not load room ${roomId}, no recipe context`); return undefined; } - const room = roomResult.data.data; const recipeId = room.recipeId; if (!recipeId) { @@ -1065,15 +1064,18 @@ LIMITS: const recipeLoader = RecipeLoader.getInstance(); const recipe = await recipeLoader.loadRecipe(recipeId); - if (!recipe || !recipe.strategy) { - this.log(`⚠️ ChatRAGBuilder: Could not load recipe ${recipeId}, no strategy`); + if (!recipe) { + this.log(`⚠️ ChatRAGBuilder: Could not load recipe ${recipeId}`); return undefined; } - this.log(`βœ… ChatRAGBuilder: Loaded recipe strategy "${recipe.displayName}" (${recipeId})`); - return recipe.strategy; + this.log(`βœ… ChatRAGBuilder: Loaded recipe context "${recipe.displayName}" (${recipeId}) β€” strategy=${!!recipe.strategy}, tools=${recipe.tools?.length ?? 0}`); + return { + strategy: recipe.strategy, + tools: recipe.tools, + }; } catch (error) { - this.log(`❌ ChatRAGBuilder: Error loading recipe strategy:`, error); + this.log(`❌ ChatRAGBuilder: Error loading recipe context:`, error); return undefined; } } @@ -1088,14 +1090,12 @@ LIMITS: ): Promise<{ learningMode?: 'fine-tuning' | 'inference-only'; genomeId?: UUID; participantRole?: string } | undefined> { try { // 1. Load room entity - const roomResult = await DataDaemon.read(RoomEntity.collection, roomId); - if (!roomResult.success || !roomResult.data) { + const room = await DataDaemon.read(RoomEntity.collection, roomId); + if (!room) { this.log(`⚠️ ChatRAGBuilder: Could not load room ${roomId} for learning config`); return undefined; } - const room = roomResult.data.data; - // 2. Find this persona's membership const member = room.members.find(m => m.userId === personaId); if (!member) { diff --git a/src/debug/jtag/system/rag/builders/CodebaseRAGBuilder.ts b/src/debug/jtag/system/rag/builders/CodebaseRAGBuilder.ts index 0fa395c65..fac7c1aeb 100644 --- a/src/debug/jtag/system/rag/builders/CodebaseRAGBuilder.ts +++ b/src/debug/jtag/system/rag/builders/CodebaseRAGBuilder.ts @@ -101,9 +101,9 @@ export class CodebaseRAGBuilder extends RAGBuilder { */ private async loadPersonaIdentity(personaId: UUID): Promise { try { - const result = await DataDaemon.read(UserEntity.collection, personaId); + const user = await DataDaemon.read(UserEntity.collection, personaId); - if (!result.success || !result.data) { + if (!user) { console.warn(`⚠️ CodebaseRAGBuilder: Could not load persona ${personaId}, using defaults`); return { name: 'Code Expert', @@ -111,8 +111,6 @@ export class CodebaseRAGBuilder extends RAGBuilder { }; } - const user = result.data.data; - return { name: user.displayName, bio: user.profile?.bio, diff --git a/src/debug/jtag/system/rag/shared/RAGTypes.ts b/src/debug/jtag/system/rag/shared/RAGTypes.ts index 05db90da8..dd4371c6a 100644 --- a/src/debug/jtag/system/rag/shared/RAGTypes.ts +++ b/src/debug/jtag/system/rag/shared/RAGTypes.ts @@ -11,6 +11,7 @@ */ import type { UUID } from '../../core/types/CrossPlatformUUID'; +import type { RecipeToolDeclaration } from '../../recipes/shared/RecipeTypes'; /** * Domain types that can provide RAG context @@ -123,6 +124,9 @@ export interface RAGContext { // Conversation governance rules (from recipe) recipeStrategy?: RecipeStrategy; + // Recipe-highlighted tools (context for LLM, NOT a filter) + recipeTools?: RecipeToolDeclaration[]; + // Conversation history (public context) conversationHistory: LLMMessage[]; diff --git a/src/debug/jtag/system/rag/sources/PersonaIdentitySource.ts b/src/debug/jtag/system/rag/sources/PersonaIdentitySource.ts index 4dc16a129..2239786b7 100644 --- a/src/debug/jtag/system/rag/sources/PersonaIdentitySource.ts +++ b/src/debug/jtag/system/rag/sources/PersonaIdentitySource.ts @@ -31,16 +31,13 @@ export class PersonaIdentitySource implements RAGSource { const startTime = performance.now(); try { - const result = await DataDaemon.read(UserEntity.collection, context.personaId); + const user = await DataDaemon.read(UserEntity.collection, context.personaId); - if (!result.success || !result.data) { + if (!user) { log.warn(`Could not load persona ${context.personaId}, using defaults`); return this.defaultSection(startTime); } - const userRecord = result.data; - const user = userRecord.data; - const identity: PersonaIdentity = { name: user.displayName, bio: user.profile?.bio, diff --git a/src/debug/jtag/system/rag/sources/SocialMediaRAGSource.ts b/src/debug/jtag/system/rag/sources/SocialMediaRAGSource.ts index 6918174b1..c911f5769 100644 --- a/src/debug/jtag/system/rag/sources/SocialMediaRAGSource.ts +++ b/src/debug/jtag/system/rag/sources/SocialMediaRAGSource.ts @@ -233,18 +233,18 @@ export class SocialMediaRAGSource implements RAGSource { } // Look up persona's uniqueId via DataDaemon - const userResult = await SocialMediaRAGSource.withTimeout( + const user = await SocialMediaRAGSource.withTimeout( DataDaemon.read(UserEntity.collection, personaId), SocialMediaRAGSource.API_TIMEOUT_MS, 'DataDaemon.read' ); - if (!userResult.success || !userResult.data) { + if (!user) { log.debug(`No user found for persona ${personaId.slice(0, 8)} β€” caching null`); SocialMediaRAGSource._credentialCache.set(personaId, null); return undefined; } - const personaUniqueId = userResult.data.data.uniqueId; + const personaUniqueId = user.uniqueId; log.debug(`Resolving credentials for ${personaUniqueId} (${personaId.slice(0, 8)})`); // Try each registered platform diff --git a/src/debug/jtag/system/rag/test/unit/ChatRAGBuilder.learningMode.test.ts b/src/debug/jtag/system/rag/test/unit/ChatRAGBuilder.learningMode.test.ts index dfc0a6b6d..3ab88c057 100644 --- a/src/debug/jtag/system/rag/test/unit/ChatRAGBuilder.learningMode.test.ts +++ b/src/debug/jtag/system/rag/test/unit/ChatRAGBuilder.learningMode.test.ts @@ -36,10 +36,7 @@ describe('ChatRAGBuilder - Learning Mode (Phase 2)', () => { describe('loadLearningConfig', () => { it('should return undefined when room does not exist', async () => { // Mock DataDaemon to return no room - vi.mocked(DataDaemon.read).mockResolvedValueOnce({ - success: false, - data: undefined - } as never); + vi.mocked(DataDaemon.read).mockResolvedValueOnce(null as never); const context = await ragBuilder.buildContext(testRoomId, testPersonaId); @@ -64,10 +61,8 @@ describe('ChatRAGBuilder - Learning Mode (Phase 2)', () => { }; vi.mocked(DataDaemon.read).mockImplementation(async (collection, id) => { - if (collection === 'rooms') { - return { success: true, data: { data: mockRoom } } as never; - } - return { success: false } as never; + if (collection === 'rooms') return mockRoom as never; + return null as never; }); vi.mocked(DataDaemon.query).mockResolvedValue({ @@ -107,13 +102,9 @@ describe('ChatRAGBuilder - Learning Mode (Phase 2)', () => { }; vi.mocked(DataDaemon.read).mockImplementation(async (collection, id) => { - if (collection === 'rooms') { - return { success: true, data: { data: mockRoom } } as never; - } - if (collection === 'users' && id === testPersonaId) { - return { success: true, data: { data: mockUser } } as never; - } - return { success: false } as never; + if (collection === 'rooms') return mockRoom as never; + if (collection === 'users' && id === testPersonaId) return mockUser as never; + return null as never; }); vi.mocked(DataDaemon.query).mockResolvedValue({ @@ -157,13 +148,9 @@ describe('ChatRAGBuilder - Learning Mode (Phase 2)', () => { }; vi.mocked(DataDaemon.read).mockImplementation(async (collection, id) => { - if (collection === 'rooms') { - return { success: true, data: { data: mockRoom } } as never; - } - if (collection === 'users' && id === testPersonaId) { - return { success: true, data: { data: mockUser } } as never; - } - return { success: false } as never; + if (collection === 'rooms') return mockRoom as never; + if (collection === 'users' && id === testPersonaId) return mockUser as never; + return null as never; }); vi.mocked(DataDaemon.query).mockResolvedValue({ @@ -205,13 +192,9 @@ describe('ChatRAGBuilder - Learning Mode (Phase 2)', () => { }; vi.mocked(DataDaemon.read).mockImplementation(async (collection, id) => { - if (collection === 'rooms') { - return { success: true, data: { data: mockRoom } } as never; - } - if (collection === 'users' && id === testPersonaId) { - return { success: true, data: { data: mockUser } } as never; - } - return { success: false } as never; + if (collection === 'rooms') return mockRoom as never; + if (collection === 'users' && id === testPersonaId) return mockUser as never; + return null as never; }); vi.mocked(DataDaemon.query).mockResolvedValue({ @@ -256,13 +239,9 @@ describe('ChatRAGBuilder - Learning Mode (Phase 2)', () => { }; vi.mocked(DataDaemon.read).mockImplementation(async (collection, id) => { - if (collection === 'rooms') { - return { success: true, data: { data: mockRoom } } as never; - } - if (collection === 'users' && id === testPersonaId) { - return { success: true, data: { data: mockUser } } as never; - } - return { success: false } as never; + if (collection === 'rooms') return mockRoom as never; + if (collection === 'users' && id === testPersonaId) return mockUser as never; + return null as never; }); vi.mocked(DataDaemon.query).mockResolvedValue({ @@ -307,13 +286,9 @@ describe('ChatRAGBuilder - Learning Mode (Phase 2)', () => { }; vi.mocked(DataDaemon.read).mockImplementation(async (collection, id) => { - if (collection === 'rooms') { - return { success: true, data: { data: mockRoom } } as never; - } - if (collection === 'users' && id === testPersonaId) { - return { success: true, data: { data: mockUser } } as never; - } - return { success: false } as never; + if (collection === 'rooms') return mockRoom as never; + if (collection === 'users' && id === testPersonaId) return mockUser as never; + return null as never; }); vi.mocked(DataDaemon.query).mockResolvedValue({ @@ -366,13 +341,9 @@ describe('ChatRAGBuilder - Learning Mode (Phase 2)', () => { }; vi.mocked(DataDaemon.read).mockImplementation(async (collection, id) => { - if (collection === 'rooms') { - return { success: true, data: { data: mockRoom } } as never; - } - if (collection === 'users' && id === testPersonaId) { - return { success: true, data: { data: mockUser } } as never; - } - return { success: false } as never; + if (collection === 'rooms') return mockRoom as never; + if (collection === 'users' && id === testPersonaId) return mockUser as never; + return null as never; }); vi.mocked(DataDaemon.query).mockResolvedValue({ @@ -411,10 +382,7 @@ describe('ChatRAGBuilder - Learning Mode (Phase 2)', () => { }); it('should handle malformed room data', async () => { - vi.mocked(DataDaemon.read).mockResolvedValue({ - success: true, - data: { data: null } - } as never); + vi.mocked(DataDaemon.read).mockResolvedValue(null as never); vi.mocked(DataDaemon.query).mockResolvedValue({ success: true, diff --git a/src/debug/jtag/system/recipes/shared/RecipeTypes.ts b/src/debug/jtag/system/recipes/shared/RecipeTypes.ts index b08a5b5dd..6485bad11 100644 --- a/src/debug/jtag/system/recipes/shared/RecipeTypes.ts +++ b/src/debug/jtag/system/recipes/shared/RecipeTypes.ts @@ -76,6 +76,19 @@ export interface RecipeStrategy { decisionCriteria: string[]; // What LLM should consider } +/** + * Tool declaration in a recipe β€” HIGHLIGHTS, not filters. + * + * These tell the LLM "these tools are especially relevant for this activity." + * They do NOT restrict access β€” all 225+ tools remain available. + * The recipe just provides context about what's useful. + */ +export interface RecipeToolDeclaration { + name: string; + description: string; + enabledFor: ('ai' | 'human')[]; +} + /** * Recipe input parameter definition * Recipe = function definition, Activity = function call with arguments @@ -155,6 +168,9 @@ export interface RecipeEntity { */ locked?: string[]; + // Tool highlights β€” which tools are especially relevant for this activity + tools?: RecipeToolDeclaration[]; + // Sharing isPublic: boolean; createdBy: UUID; @@ -212,6 +228,9 @@ export interface RecipeDefinition { ragTemplate: RAGTemplate; strategy: RecipeStrategy; + // Tool highlights β€” which tools are especially relevant for this activity + tools?: RecipeToolDeclaration[]; + // UI composition (optional - defaults handled by layout system) layout?: ActivityUILayout; diff --git a/src/debug/jtag/system/user/server/CallerDetector.ts b/src/debug/jtag/system/user/server/CallerDetector.ts index fa5cd0d36..d32db8f44 100644 --- a/src/debug/jtag/system/user/server/CallerDetector.ts +++ b/src/debug/jtag/system/user/server/CallerDetector.ts @@ -37,15 +37,13 @@ export async function detectCallerType(context: JTAGContext, userId: UUID): Prom // 2. Look up user by userId try { - const userResult = await DataDaemon.read(COLLECTIONS.USERS, userId); + const user = await DataDaemon.read(COLLECTIONS.USERS, userId); - if (!userResult.success || !userResult.data) { + if (!user) { console.warn(`CallerDetector: User not found for userId=${userId}, defaulting to 'script'`); return 'script'; } - const user = userResult.data.data; - // 3. Map UserEntity.type to CallerType switch (user.type) { case 'persona': @@ -79,15 +77,13 @@ export async function detectCallerType(context: JTAGContext, userId: UUID): Prom */ export async function getCallerCapabilities(userId: UUID): Promise { try { - const userResult = await DataDaemon.read(COLLECTIONS.USERS, userId); + const user = await DataDaemon.read(COLLECTIONS.USERS, userId); - if (!userResult.success || !userResult.data) { + if (!user) { console.warn(`CallerDetector: User not found for userId=${userId}, returning default capabilities`); return getDefaultCapabilities(); } - const user = userResult.data.data; - // Build capabilities from user configuration const capabilities: CallerCapabilities = {}; diff --git a/src/debug/jtag/system/user/server/PersonaUser.ts b/src/debug/jtag/system/user/server/PersonaUser.ts index e0e5c7382..e6d16411b 100644 --- a/src/debug/jtag/system/user/server/PersonaUser.ts +++ b/src/debug/jtag/system/user/server/PersonaUser.ts @@ -200,8 +200,9 @@ export class PersonaUser extends AIUser { // MEMORY LEAK FIX: Track event subscriptions for cleanup private _eventUnsubscribes: (() => void)[] = []; - // Workspace handle β€” lazy-created on first code task, retained for session lifetime - private _workspace: Workspace | null = null; + // Workspace handles β€” lazy-created per context key, retained for session lifetime + // Keyed by context (e.g., room uniqueId) so personas can have per-room workspaces + private _workspaces: Map = new Map(); /** * Get unified consciousness for cross-context awareness @@ -314,26 +315,42 @@ export class PersonaUser extends AIUser { // Workspace β€” per-persona code workspace (lazy-created, session-scoped) // ════════════════════════════════════════════════════════════════════════════ - /** Get the current workspace handle (null if not yet created) */ - public get workspace(): Workspace | null { - return this._workspace; + /** Get a workspace by context key (null if not yet created for that context) */ + public getWorkspace(contextKey: string = 'default'): Workspace | null { + return this._workspaces.get(contextKey) ?? null; } /** - * Ensure a workspace exists for this persona. - * Creates a sandbox workspace on first call, retains for session lifetime. + * Ensure a workspace exists for this persona in the given context. + * Creates on first call per context key, retains for session lifetime. * Called automatically when persona receives a code-domain task. + * + * @param options.contextKey Room uniqueId or other scope key (default: 'default') + * @param options.mode 'sandbox' for isolated, 'worktree' for real git branches + * @param options.taskSlug Used for branch naming in worktree mode + * @param options.sparsePaths Sparse checkout paths for worktree mode */ - public async ensureWorkspace(): Promise { - if (this._workspace) return this._workspace; - - this.log.info(`πŸ”§ ${this.displayName}: Creating workspace (sandbox mode)`); - this._workspace = await Workspace.create({ + public async ensureWorkspace(options?: { + contextKey?: string; + mode?: 'sandbox' | 'worktree'; + taskSlug?: string; + sparsePaths?: string[]; + }): Promise { + const key = options?.contextKey ?? 'default'; + const existing = this._workspaces.get(key); + if (existing) return existing; + + const mode = options?.mode ?? 'sandbox'; + this.log.info(`${this.displayName}: Creating workspace (${mode} mode, context=${key})`); + const ws = await Workspace.create({ personaId: this.id, - mode: 'sandbox', + mode, + taskSlug: options?.taskSlug ?? key, + sparsePaths: options?.sparsePaths, }); - this.log.info(`πŸ”§ ${this.displayName}: Workspace created β€” handle=${this._workspace.handle}, dir=${this._workspace.dir}`); - return this._workspace; + this._workspaces.set(key, ws); + this.log.info(`${this.displayName}: Workspace created β€” handle=${ws.handle}, dir=${ws.dir}, mode=${mode}`); + return ws; } // BEING ARCHITECTURE: Delegate to body for toolExecutor @@ -1992,16 +2009,16 @@ export class PersonaUser extends AIUser { // Stop autonomous servicing loop await this.autonomousLoop.stopServicing(); - // Clean up workspace (shell session + worktree) - if (this._workspace) { + // Clean up all workspaces (shell sessions + worktrees) + for (const [key, ws] of this._workspaces) { try { - await this._workspace.destroy(); - this.log.info(`πŸ”§ ${this.displayName}: Workspace destroyed`); + await ws.destroy(); + this.log.info(`${this.displayName}: Workspace destroyed (context=${key})`); } catch (e) { - this.log.warn(`⚠️ ${this.displayName}: Workspace cleanup failed: ${e}`); + this.log.warn(`${this.displayName}: Workspace cleanup failed (context=${key}): ${e}`); } - this._workspace = null; } + this._workspaces.clear(); // PHASE 6: Shutdown memory module (genome + RAG) await this.memory.shutdown(); diff --git a/src/debug/jtag/system/user/server/modules/PersonaAutonomousLoop.ts b/src/debug/jtag/system/user/server/modules/PersonaAutonomousLoop.ts index 49976ac12..103647f22 100644 --- a/src/debug/jtag/system/user/server/modules/PersonaAutonomousLoop.ts +++ b/src/debug/jtag/system/user/server/modules/PersonaAutonomousLoop.ts @@ -18,6 +18,7 @@ import type { UUID } from '../../../core/types/CrossPlatformUUID'; import { DataDaemon } from '../../../../daemons/data-daemon/shared/DataDaemon'; import { COLLECTIONS } from '../../../shared/Constants'; import type { TaskEntity } from '../../../data/entities/TaskEntity'; +import { RoomEntity } from '../../../data/entities/RoomEntity'; import { taskEntityToInboxTask, inboxMessageToProcessable, type InboxTask, type QueueItem } from './QueueItemTypes'; import type { FastPathDecision } from './central-nervous-system/CNSTypes'; @@ -280,15 +281,35 @@ export class PersonaAutonomousLoop { * Delegates to PersonaTaskExecutor module for actual execution. */ private async executeTask(task: InboxTask): Promise { - // For code-domain tasks, ensure workspace exists before dispatching + // For code-domain tasks, ensure workspace exists with room-aware mode if (task.domain === 'code') { - await this.personaUser.ensureWorkspace(); + const roomId = task.metadata?.roomId ?? task.contextId; + const roomSlug = await this.resolveRoomSlug(roomId); + await this.personaUser.ensureWorkspace({ + contextKey: roomSlug, + mode: 'worktree', + taskSlug: roomSlug, + }); } // Delegate to task executor module await this.personaUser.taskExecutor.executeTask(task); } + /** + * Resolve a room UUID to its uniqueId slug for workspace naming. + * Falls back to truncated UUID if room lookup fails. + */ + private async resolveRoomSlug(roomId: UUID): Promise { + try { + const room = await DataDaemon.read(COLLECTIONS.ROOMS, roomId); + if (room?.uniqueId) return room.uniqueId; + } catch { + // Room lookup failed β€” use truncated UUID + } + return roomId.slice(0, 8); + } + /** * Stop autonomous servicing loops and cleanup */ diff --git a/src/debug/jtag/system/user/server/modules/PersonaMessageEvaluator.ts b/src/debug/jtag/system/user/server/modules/PersonaMessageEvaluator.ts index 130532d07..810a57ff4 100644 --- a/src/debug/jtag/system/user/server/modules/PersonaMessageEvaluator.ts +++ b/src/debug/jtag/system/user/server/modules/PersonaMessageEvaluator.ts @@ -932,15 +932,14 @@ export class PersonaMessageEvaluator { try { // Query the sender's UserEntity to check their type using DataDaemon directly - const result = await DataDaemon.read(COLLECTIONS.USERS, senderId); + const sender = await DataDaemon.read(COLLECTIONS.USERS, senderId); - if (!result.success || !result.data) { + if (!sender) { this.log(`⚠️ PersonaUser ${this.personaUser.displayName}: Could not read sender ${senderId}, BLOCKING response`); return false; // Fail CLOSED - don't respond if database fails (prevents loops) } - const senderType = result.data.data.type; - return senderType === 'human'; + return sender.type === 'human'; } catch (error: any) { this.log(`❌ PersonaUser ${this.personaUser.displayName}: Error checking sender type, BLOCKING response:`, error); diff --git a/src/debug/jtag/system/user/server/modules/PersonaResponseGenerator.ts b/src/debug/jtag/system/user/server/modules/PersonaResponseGenerator.ts index 45a2ab542..d7869ecae 100644 --- a/src/debug/jtag/system/user/server/modules/PersonaResponseGenerator.ts +++ b/src/debug/jtag/system/user/server/modules/PersonaResponseGenerator.ts @@ -597,6 +597,39 @@ export class PersonaResponseGenerator { this.log(`πŸ”§ ${this.personaName}: Injected ${availableTools.length} available tools into context`); } + // Inject recipe activity context (strategy rules + highlighted tools) + // Recipe tools are HIGHLIGHTS, not filters β€” they tell the LLM what's most relevant + if (fullRAGContext.recipeStrategy || fullRAGContext.recipeTools) { + let activitySection = '\n\n=== ACTIVITY CONTEXT ==='; + + if (fullRAGContext.recipeStrategy) { + const strategy = fullRAGContext.recipeStrategy; + activitySection += `\nActivity pattern: ${strategy.conversationPattern}`; + + if (strategy.responseRules.length > 0) { + activitySection += '\n\nRules for this activity:\n' + + strategy.responseRules.map(rule => `- ${rule}`).join('\n'); + } + + if (strategy.decisionCriteria.length > 0) { + activitySection += '\n\nWhen deciding whether to respond, consider:\n' + + strategy.decisionCriteria.map(c => `- ${c}`).join('\n'); + } + } + + if (fullRAGContext.recipeTools && fullRAGContext.recipeTools.length > 0) { + activitySection += '\n\nTools especially relevant to this activity:\n' + + fullRAGContext.recipeTools + .filter(t => t.enabledFor.includes('ai')) + .map(t => `- ${t.name}: ${t.description}`) + .join('\n'); + } + + activitySection += '\n================================'; + systemPrompt += activitySection; + this.log(`πŸ“‹ ${this.personaName}: Injected activity context (strategy + ${fullRAGContext.recipeTools?.length ?? 0} tool highlights)`); + } + // Inject governance guidance for democratic participation const governanceSection = `\n\n=== DEMOCRATIC GOVERNANCE === You are a citizen of a self-governing AI society. You can participate in collective decisions: diff --git a/src/debug/jtag/system/user/server/modules/cognitive/memory/PersonaMemory.ts b/src/debug/jtag/system/user/server/modules/cognitive/memory/PersonaMemory.ts index bde9486a2..ce7537d4a 100644 --- a/src/debug/jtag/system/user/server/modules/cognitive/memory/PersonaMemory.ts +++ b/src/debug/jtag/system/user/server/modules/cognitive/memory/PersonaMemory.ts @@ -86,7 +86,7 @@ export class PersonaMemory { // Check if record exists const existing = await DataDaemon.read(PERSONA_RAG_CONTEXTS_COLLECTION, recordId); - if (existing.success && existing.data) { + if (existing) { // Update existing record (DataDaemon handles updatedAt) await DataDaemon.update(PERSONA_RAG_CONTEXTS_COLLECTION, recordId, record as any); } else { @@ -108,16 +108,14 @@ export class PersonaMemory { const recordId = `rag-${this.personaId}-${roomId}`; try { - const result = await DataDaemon.read(PERSONA_RAG_CONTEXTS_COLLECTION, recordId); + const entity = await DataDaemon.read(PERSONA_RAG_CONTEXTS_COLLECTION, recordId); - if (!result.success || !result.data) { + if (!entity) { return null; } - // Parse the stored JSON context from the data.data.contextJson field - // DataRecord structure: { id, collection, data: { ...entityFields }, ... } - const entityData = result.data.data as any; - const storedContext = entityData?.contextJson; + // Parse the stored JSON context from the entity's contextJson field + const storedContext = (entity as Record).contextJson as string | undefined; if (typeof storedContext === 'string') { return JSON.parse(storedContext) as PersonaRAGContext; diff --git a/src/debug/jtag/system/user/shared/BaseUser.ts b/src/debug/jtag/system/user/shared/BaseUser.ts index 0d680b8d8..fed0f571d 100644 --- a/src/debug/jtag/system/user/shared/BaseUser.ts +++ b/src/debug/jtag/system/user/shared/BaseUser.ts @@ -391,14 +391,12 @@ export abstract class BaseUser { displayName: string ): Promise { // Read current room - const roomResult = await DataDaemon.read(COLLECTIONS.ROOMS, roomId); - if (!roomResult.success || !roomResult.data) { + const room = await DataDaemon.read(COLLECTIONS.ROOMS, roomId); + if (!room) { console.warn(`⚠️ ${this.name}.create: Room ${roomId} not found`); return; } - const room = roomResult.data.data; - // Check if already a member if (room.members.some((m: { userId: UUID }) => m.userId === userId)) { console.log(`ℹ️ ${this.name}.create: ${displayName} already member of room ${room.name}`); diff --git a/src/debug/jtag/system/user/storage/server/SQLiteStateBackend.ts b/src/debug/jtag/system/user/storage/server/SQLiteStateBackend.ts index 04d136eb0..043dbb7fc 100644 --- a/src/debug/jtag/system/user/storage/server/SQLiteStateBackend.ts +++ b/src/debug/jtag/system/user/storage/server/SQLiteStateBackend.ts @@ -46,7 +46,7 @@ export class SQLiteStateBackend implements IUserStateStorage { // Use DataDaemon static interface (avoids JTAGClient recursion during initialization) const existing = await DataDaemon.read(UserStateEntity.collection, state.id); - if (existing.success && existing.data) { + if (existing) { // Update existing state await DataDaemon.update(UserStateEntity.collection, state.id, state); } else { diff --git a/src/debug/jtag/tests/unit/code/Workspace.test.ts b/src/debug/jtag/tests/unit/code/Workspace.test.ts index 5458caa7e..2313effc9 100644 --- a/src/debug/jtag/tests/unit/code/Workspace.test.ts +++ b/src/debug/jtag/tests/unit/code/Workspace.test.ts @@ -616,6 +616,63 @@ describe('Workspace', () => { }); }); + describe('multi-workspace isolation', () => { + it('two workspaces from different create calls have independent handles', async () => { + vi.mocked(WorkspaceStrategy.create) + .mockResolvedValueOnce({ + handle: 'worktree-persona-room-a', + workspaceDir: '/tmp/workspace/room-a', + mode: 'worktree', + branch: 'ai/helper/room-a', + }) + .mockResolvedValueOnce({ + handle: 'worktree-persona-room-b', + workspaceDir: '/tmp/workspace/room-b', + mode: 'worktree', + branch: 'ai/helper/room-b', + }); + + const wsA = await Workspace.create({ personaId: PERSONA_ID, mode: 'worktree', taskSlug: 'room-a' }); + const wsB = await Workspace.create({ personaId: PERSONA_ID, mode: 'worktree', taskSlug: 'room-b' }); + + expect(wsA.handle).toBe('worktree-persona-room-a'); + expect(wsB.handle).toBe('worktree-persona-room-b'); + expect(wsA.handle).not.toBe(wsB.handle); + expect(wsA.dir).not.toBe(wsB.dir); + expect(wsA.branch).not.toBe(wsB.branch); + }); + + it('operations on workspace A do not affect workspace B', async () => { + const wsA = Workspace.fromExisting('handle-a', '/tmp/ws-a', 'worktree', 'branch-a'); + const wsB = Workspace.fromExisting('handle-b', '/tmp/ws-b', 'worktree', 'branch-b'); + + vi.mocked(CodeDaemon.workspaceRead).mockResolvedValue({} as any); + vi.mocked(CodeDaemon.workspaceWrite).mockResolvedValue({} as any); + + await wsA.read('file.ts'); + await wsB.write('other.ts', 'content'); + + expect(vi.mocked(CodeDaemon.workspaceRead).mock.calls[0][0]).toBe('handle-a'); + expect(vi.mocked(CodeDaemon.workspaceWrite).mock.calls[0][0]).toBe('handle-b'); + }); + + it('destroying one workspace does not affect another', async () => { + vi.mocked(WorkspaceStrategy.cleanup).mockResolvedValue(); + + const wsA = Workspace.fromExisting('handle-a', '/tmp/ws-a', 'worktree', 'branch-a'); + const wsB = Workspace.fromExisting('handle-b', '/tmp/ws-b', 'worktree', 'branch-b'); + + await wsA.destroy(); + + // wsB should still be usable + vi.mocked(CodeDaemon.workspaceRead).mockResolvedValue({} as any); + await wsB.read('file.ts'); + + expect(WorkspaceStrategy.cleanup).toHaveBeenCalledWith('handle-a', undefined); + expect(CodeDaemon.workspaceRead).toHaveBeenCalledWith('handle-b', 'file.ts', undefined, undefined); + }); + }); + describe('handle consistency', () => { it('every operation uses the same handle β€” no handle drift', async () => { const ws = Workspace.fromExisting(HANDLE, WORKSPACE_DIR, 'worktree', BRANCH); From 8337b5c17fae1e355ea5c7199529a31ea722657d Mon Sep 17 00:00:00 2001 From: Joel Date: Mon, 2 Feb 2026 14:42:52 -0600 Subject: [PATCH 14/14] Close training circuit: wire PersonaTrainingManager, TrainingDaemon, batch-micro-tune to GenomeJobCreate; fix VoiceService STT + GeminiLive cancel --- .../GenomeBatchMicroTuneServerCommand.ts | 110 +++-- .../server/TrainingDaemonServer.ts | 27 +- src/debug/jtag/generated-command-schemas.json | 2 +- src/debug/jtag/package-lock.json | 4 +- src/debug/jtag/package.json | 2 +- src/debug/jtag/shared/version.ts | 2 +- .../server/modules/PersonaTrainingManager.ts | 210 +++++++-- .../jtag/system/voice/server/VoiceService.ts | 20 +- .../server/adapters/GeminiLiveAdapter.ts | 19 +- .../unit/training/TrainingCircuit.test.ts | 432 ++++++++++++++++++ 10 files changed, 728 insertions(+), 100 deletions(-) create mode 100644 src/debug/jtag/tests/unit/training/TrainingCircuit.test.ts diff --git a/src/debug/jtag/commands/genome/batch-micro-tune/server/GenomeBatchMicroTuneServerCommand.ts b/src/debug/jtag/commands/genome/batch-micro-tune/server/GenomeBatchMicroTuneServerCommand.ts index 4a57c6da3..61c5b3738 100644 --- a/src/debug/jtag/commands/genome/batch-micro-tune/server/GenomeBatchMicroTuneServerCommand.ts +++ b/src/debug/jtag/commands/genome/batch-micro-tune/server/GenomeBatchMicroTuneServerCommand.ts @@ -1,8 +1,9 @@ /** - * GenomeBatchMicroTuneServerCommand - Lightweight in-recipe LoRA updates + * GenomeBatchMicroTuneServerCommand - Trigger LoRA micro-tuning from accumulated examples * - * Performs fast micro-tuning using accumulated training examples. - * Updates soft weights in RAM for immediate effect, not persisted yet. + * Accesses the PersonaUser's TrainingDataAccumulator, checks if enough examples + * have accumulated for the requested domain, and triggers training via + * PersonaTrainingManager. Supports forceUpdate to bypass threshold check. */ import { CommandBase } from '../../../../daemons/command-daemon/shared/CommandBase'; @@ -13,6 +14,8 @@ import type { GenomeBatchMicroTuneParams, GenomeBatchMicroTuneResult } from '../shared/GenomeBatchMicroTuneTypes'; +import { UserDaemonServer } from '@daemons/user-daemon/server/UserDaemonServer'; +import { PersonaUser } from '@system/user/server/PersonaUser'; export class GenomeBatchMicroTuneServerCommand extends CommandBase< GenomeBatchMicroTuneParams, @@ -24,78 +27,91 @@ export class GenomeBatchMicroTuneServerCommand extends CommandBase< async execute(params: JTAGPayload): Promise { const tuneParams = params as GenomeBatchMicroTuneParams; + const domain = tuneParams.domain; + const forceUpdate = tuneParams.forceUpdate ?? false; - console.log('🧬 GENOME MICRO-TUNE: Starting lightweight training'); - console.log(` Domain: ${tuneParams.domain}`); - console.log(` Role: ${tuneParams.roleId ?? 'all'}`); + console.log(`🧬 GENOME MICRO-TUNE: domain=${domain}, force=${forceUpdate}`); try { - // TODO: Access PersonaUser's TrainingDataAccumulator - // Check if batch threshold reached (unless forceUpdate) - // Get training examples and filter by quality - // Perform fast micro-tuning (soft weight update in RAM) - // This is placeholder implementation + // 1. Get UserDaemon singleton + const userDaemon = UserDaemonServer.getInstance(); + if (!userDaemon) { + return transformPayload(params, { + success: false, + error: 'UserDaemon not initialized', + }); + } - const startTime = Date.now(); + // 2. Get PersonaUser instance + const personaId = tuneParams.personaId ?? tuneParams.userId; + if (!personaId) { + return transformPayload(params, { + success: false, + error: 'No personaId or userId provided', + }); + } - // Placeholder: Check if ready for training - const batchThreshold = 10; - const bufferSize = 5; // Placeholder - const qualityThreshold = tuneParams.qualityThreshold ?? 0.7; + const baseUser = userDaemon.getPersonaUser(personaId); + if (!baseUser || !(baseUser instanceof PersonaUser)) { + return transformPayload(params, { + success: false, + error: `PersonaUser not found: ${personaId}`, + }); + } + + const personaUser = baseUser as PersonaUser; + const accumulator = personaUser.trainingAccumulator; - if (!tuneParams.forceUpdate && bufferSize < batchThreshold) { - console.log(`⏳ Buffer not ready (${bufferSize}/${batchThreshold}), skipping micro-tune`); + // 3. Check buffer readiness + const bufferSize = accumulator.getBufferSize(domain); + const batchThreshold = accumulator.getBatchThreshold(domain); + + if (!forceUpdate && !accumulator.shouldMicroTune(domain)) { + console.log(`⏳ GENOME MICRO-TUNE: Buffer not ready (${bufferSize}/${batchThreshold})`); return transformPayload(params, { success: true, training: { - domain: tuneParams.domain, - loraAdapter: tuneParams.loraAdapter ?? `${tuneParams.domain}-base`, + domain, + loraAdapter: tuneParams.loraAdapter ?? `${domain}-base`, examplesUsed: 0, examplesFiltered: 0, - updateType: 'none' - } + updateType: 'none', + }, }); } - // Placeholder: Get examples and filter by quality - const totalExamples = bufferSize; - const filteredExamples = Math.floor(totalExamples * 0.8); // 80% pass quality threshold - const examplesUsed = Math.min(filteredExamples, tuneParams.maxExamples ?? 50); - - // Placeholder: Perform micro-tuning - // In real implementation: - // - Load current LoRA adapter soft weights - // - Run lightweight fine-tuning step (gradient descent on batch) - // - Update soft weights in RAM (don't save to disk yet) - console.log(`πŸ”§ Micro-tuning with ${examplesUsed} examples...`); - - // Simulate training time (real would be 100-500ms) + // 4. Trigger training via PersonaTrainingManager + // forceDomain bypasses the threshold check for the specified domain + const startTime = Date.now(); + await personaUser.trainingManager.checkTrainingReadiness(forceUpdate ? domain : undefined); const trainingTime = Date.now() - startTime; - console.log(`βœ… GENOME MICRO-TUNE: Completed in ${trainingTime}ms`); + // 5. Get post-training stats (buffer should be consumed now) + const postBufferSize = accumulator.getBufferSize(domain); + const examplesUsed = bufferSize - postBufferSize; + + console.log(`βœ… GENOME MICRO-TUNE: ${examplesUsed} examples consumed in ${trainingTime}ms`); return transformPayload(params, { success: true, training: { - domain: tuneParams.domain, - loraAdapter: tuneParams.loraAdapter ?? `${tuneParams.domain}-base`, + domain, + loraAdapter: tuneParams.loraAdapter ?? `${domain}-base`, examplesUsed, - examplesFiltered: totalExamples - filteredExamples, - updateType: 'soft', - improvementEstimate: 0.05, // 5% improvement placeholder + examplesFiltered: 0, + updateType: examplesUsed > 0 ? 'soft' : 'none', metrics: { trainingTime, - averageQuality: 0.82, - diversityScore: 0.75 - } - } + averageQuality: 0, // Quality scoring is Phase 12 + diversityScore: 0, + }, + }, }); - } catch (error) { console.error('❌ GENOME MICRO-TUNE: Error:', error); return transformPayload(params, { success: false, - error: error instanceof Error ? error.message : String(error) + error: error instanceof Error ? error.message : String(error), }); } } diff --git a/src/debug/jtag/daemons/training-daemon/server/TrainingDaemonServer.ts b/src/debug/jtag/daemons/training-daemon/server/TrainingDaemonServer.ts index 188db8362..516c0fc49 100644 --- a/src/debug/jtag/daemons/training-daemon/server/TrainingDaemonServer.ts +++ b/src/debug/jtag/daemons/training-daemon/server/TrainingDaemonServer.ts @@ -305,7 +305,8 @@ export class TrainingDaemonServer extends TrainingDaemon { } /** - * Check if we've reached auto fine-tune threshold + * Check if we've reached auto fine-tune threshold. + * Emits 'training:dataset-ready' event when threshold is crossed. */ private async checkAutoFineTuneThreshold(): Promise { try { @@ -315,15 +316,23 @@ export class TrainingDaemonServer extends TrainingDaemon { limit: 1 // Just need count }); - if (queryResult.success && queryResult.metadata?.totalCount) { - const count = queryResult.metadata.totalCount; + if (!queryResult.success || !queryResult.metadata?.totalCount) return; - if (count >= this.config.autoFineTuneThreshold && count % this.config.autoFineTuneThreshold === 0) { - this.log.info(`πŸš€ TrainingDaemon: Auto fine-tune threshold reached (${count} examples)`); - this.log.info('πŸš€ TrainingDaemon: TODO: Trigger fine-tuning (Phase 2 implementation)'); - // Future: Trigger genome/batch-micro-tune command - } - } + const count = queryResult.metadata.totalCount; + + // Only trigger at exact threshold multiples (50, 100, 150, ...) + if (count < this.config.autoFineTuneThreshold) return; + if (count % this.config.autoFineTuneThreshold !== 0) return; + + this.log.info(`πŸš€ TrainingDaemon: Auto fine-tune threshold reached (${count} examples)`); + + // Emit event for TrainingOrchestrator or other listeners to pick up + await Events.emit('training:dataset-ready', { + exampleCount: count, + source: 'auto-threshold', + trigger: 'training-daemon', + timestamp: Date.now(), + }); } catch (error) { this.log.error('❌ TrainingDaemon: Failed to check auto fine-tune threshold:', error); } diff --git a/src/debug/jtag/generated-command-schemas.json b/src/debug/jtag/generated-command-schemas.json index febbf304f..5f4e33c79 100644 --- a/src/debug/jtag/generated-command-schemas.json +++ b/src/debug/jtag/generated-command-schemas.json @@ -1,5 +1,5 @@ { - "generated": "2026-02-02T17:26:41.508Z", + "generated": "2026-02-02T20:34:52.424Z", "version": "1.0.0", "commands": [ { diff --git a/src/debug/jtag/package-lock.json b/src/debug/jtag/package-lock.json index d371a2685..c5872462a 100644 --- a/src/debug/jtag/package-lock.json +++ b/src/debug/jtag/package-lock.json @@ -1,12 +1,12 @@ { "name": "@continuum/jtag", - "version": "1.0.7533", + "version": "1.0.7536", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@continuum/jtag", - "version": "1.0.7533", + "version": "1.0.7536", "hasInstallScript": true, "license": "MIT", "dependencies": { diff --git a/src/debug/jtag/package.json b/src/debug/jtag/package.json index 4777095c5..3bb9b2f18 100644 --- a/src/debug/jtag/package.json +++ b/src/debug/jtag/package.json @@ -1,6 +1,6 @@ { "name": "@continuum/jtag", - "version": "1.0.7533", + "version": "1.0.7536", "description": "Global CLI debugging system for any Node.js project. Install once globally, use anywhere: npm install -g @continuum/jtag", "config": { "active_example": "widget-ui", diff --git a/src/debug/jtag/shared/version.ts b/src/debug/jtag/shared/version.ts index 92353370f..2c28fd5df 100644 --- a/src/debug/jtag/shared/version.ts +++ b/src/debug/jtag/shared/version.ts @@ -3,5 +3,5 @@ * DO NOT EDIT MANUALLY */ -export const VERSION = '1.0.7533'; +export const VERSION = '1.0.7536'; export const PACKAGE_NAME = '@continuum/jtag'; diff --git a/src/debug/jtag/system/user/server/modules/PersonaTrainingManager.ts b/src/debug/jtag/system/user/server/modules/PersonaTrainingManager.ts index df43ceafa..60d41156d 100644 --- a/src/debug/jtag/system/user/server/modules/PersonaTrainingManager.ts +++ b/src/debug/jtag/system/user/server/modules/PersonaTrainingManager.ts @@ -1,18 +1,31 @@ /** * PersonaTrainingManager - Handles continuous learning for PersonaUser * - * REFACTORING: Extracted from PersonaUser.ts (lines 1918-2004) - * Pure function extraction - no behavioral changes + * Monitors training data accumulation and triggers LoRA fine-tuning + * when thresholds are reached. Wires into the genome/job-create command + * for real training execution via provider-specific adapters. */ +import * as fs from 'fs'; +import * as path from 'path'; import type { UUID } from '../../../core/types/CrossPlatformUUID'; import { Events } from '../../../core/shared/Events'; -import type { TrainingDataAccumulator } from './TrainingDataAccumulator'; +import type { TrainingDataAccumulator, TrainingExample as AccumulatorExample } from './TrainingDataAccumulator'; import type { UserStateEntity } from '../../../data/entities/UserStateEntity'; +import { TrainingDatasetBuilder } from '../../../genome/fine-tuning/server/TrainingDatasetBuilder'; +import { GenomeJobCreate } from '../../../../commands/genome/job-create/shared/GenomeJobCreateTypes'; +import { + TrainingMethod, + TrainOnInputs, + LRSchedulerType, +} from '../../../../daemons/data-daemon/shared/entities/FineTuningTypes'; +import type { TrainingDataset, TrainingExample } from '../../../genome/fine-tuning/shared/FineTuningTypes'; +import type { TraitType } from '../../../genome/entities/GenomeLayerEntity'; import { AI_LEARNING_EVENTS, type AITrainingStartedEventData, - type AITrainingCompleteEventData + type AITrainingCompleteEventData, + type AITrainingErrorEventData } from '../../../events/shared/AILearningEvents'; /** @@ -48,7 +61,7 @@ export class PersonaTrainingManager { * This enables continuous learning: PersonaUsers improve through recipe execution * without manual intervention. */ - async checkTrainingReadiness(): Promise { + async checkTrainingReadiness(forceDomain?: string): Promise { try { const domains = this.trainingAccumulator.getDomains(); @@ -57,7 +70,8 @@ export class PersonaTrainingManager { } for (const domain of domains) { - if (this.trainingAccumulator.shouldMicroTune(domain)) { + const isForced = domain === forceDomain; + if (isForced || this.trainingAccumulator.shouldMicroTune(domain)) { const bufferSize = this.trainingAccumulator.getBufferSize(domain); const threshold = this.trainingAccumulator.getBatchThreshold(domain); @@ -93,47 +107,177 @@ export class PersonaTrainingManager { // Consume training data from buffer const examples = await this.trainingAccumulator.consumeTrainingData(domain); + if (examples.length === 0) { + this.log(`πŸ“Š No examples after consumption for ${domain}, skipping`); + state.learningState.isLearning = false; + await this.saveState(); + continue; + } this.log(`πŸ“Š Consumed ${examples.length} examples for ${domain} training`); - // TODO Phase 7.5.1: Trigger genome/train command - // For now, just log that we would train - this.log(`πŸš€ Would train ${domain} adapter with ${examples.length} examples`); + // Convert accumulator examples to fine-tuning format + const ftExamples = this.convertAccumulatorExamples(examples); + + // Execute real training via genome/job-create + await this.executeTraining(domain as TraitType, ftExamples, provider); - // Clear learning state + // Clear learning state after training submitted state.learningState.isLearning = false; state.learningState.domain = undefined; state.learningState.provider = undefined; state.learningState.startedAt = undefined; state.learningState.exampleCount = undefined; state.learningState.estimatedCompletion = undefined; - await this.saveState(); // Persist state to database - - // Simulate training completion for UI feedback - const trainingCompleteData: AITrainingCompleteEventData = { - personaId: this.personaId, - personaName: this.displayName ?? 'AI Assistant', - domain, - provider, - examplesProcessed: examples.length, - trainingTime: examples.length * 25, - finalLoss: 0.5, - timestamp: Date.now() - }; - await Events.emit(AI_LEARNING_EVENTS.TRAINING_COMPLETE, trainingCompleteData); - - // Future implementation: - // await Commands.execute('genome/train', { - // personaId: this.personaId, - // provider: 'unsloth', - // domain, - // trainingExamples: examples, - // dryRun: false - // }); + await this.saveState(); } } } catch (error) { this.log(`❌ Error checking training readiness: ${error}`); } } + + /** + * Convert accumulator-format examples (input/output) to fine-tuning format (messages[]). + * The accumulator stores raw I/O pairs; the training pipeline expects chat completion format. + */ + private convertAccumulatorExamples(accExamples: AccumulatorExample[]): TrainingExample[] { + return accExamples.map(ex => ({ + messages: [ + { role: 'user' as const, content: ex.input }, + { role: 'assistant' as const, content: ex.output }, + ], + metadata: { + timestamp: ex.timestamp.getTime(), + confidence: ex.feedback?.rating, + }, + })); + } + + /** + * Execute real LoRA fine-tuning via genome/job-create. + * + * Flow: examples β†’ JSONL file on disk β†’ genome/job-create β†’ provider adapter β†’ training job + */ + private async executeTraining( + traitType: TraitType, + examples: TrainingExample[], + provider: string, + ): Promise { + try { + // Build dataset from accumulated examples + const dataset: TrainingDataset = { + examples, + metadata: { + personaId: this.personaId, + personaName: this.displayName ?? 'AI Assistant', + traitType, + createdAt: Date.now(), + source: 'conversations', + totalExamples: examples.length, + }, + }; + + // Validate dataset quality before training + const validation = TrainingDatasetBuilder.validateDataset(dataset); + if (!validation.valid) { + this.log(`❌ Dataset validation failed: ${validation.errors.join(', ')}`); + await Events.emit(AI_LEARNING_EVENTS.TRAINING_ERROR, { + personaId: this.personaId, + personaName: this.displayName ?? 'AI Assistant', + domain: traitType, + error: `Dataset validation failed: ${validation.errors.join(', ')}`, + phase: 'preparation', + timestamp: Date.now(), + } satisfies AITrainingErrorEventData); + return; + } + + if (validation.warnings.length > 0) { + this.log(`⚠️ Dataset warnings: ${validation.warnings.join(', ')}`); + } + + // Export to JSONL and write to disk + const jsonlContent = TrainingDatasetBuilder.exportToJSONL(dataset); + const jsonlPath = await this.writeTrainingFile(traitType, jsonlContent); + + this.log(`πŸ“ Training data written to ${jsonlPath} (${examples.length} examples)`); + + // Create fine-tuning job via the working command + const result = await GenomeJobCreate.execute({ + personaId: this.personaId, + provider, + trainingFileId: jsonlPath, + configuration: { + model: { baseModel: 'llama3.2' }, + datasets: { trainingFileId: jsonlPath }, + method: { + type: TrainingMethod.LORA, + loraConfig: { rank: 16, alpha: 32, dropout: 0, trainableModules: 'all-linear' }, + }, + schedule: { + epochs: 3, + batchSize: 4, + sequenceLength: 2048, + gradientAccumulation: 1, + checkpoints: 1, + evaluations: 1, + trainOnInputs: TrainOnInputs.DISABLED, + }, + optimizer: { + learningRate: 0.0001, + scheduler: { type: LRSchedulerType.COSINE, minLRRatio: 0, warmupRatio: 0.1 }, + weightDecay: 0, + maxGradientNorm: 1, + }, + optimizations: { enabled: [] }, + output: {}, + metadata: {}, + }, + }); + + if (result.success && result.job) { + this.log(`πŸš€ Training job created: ${result.job.jobId} (provider: ${provider})`); + // TRAINING_STARTED already emitted above; completion will be + // emitted by the training job when it finishes asynchronously + } else { + this.log(`❌ Training job creation failed: ${result.error}`); + await Events.emit(AI_LEARNING_EVENTS.TRAINING_ERROR, { + personaId: this.personaId, + personaName: this.displayName ?? 'AI Assistant', + domain: traitType, + error: result.error ?? 'Unknown error creating training job', + phase: 'preparation', + timestamp: Date.now(), + } satisfies AITrainingErrorEventData); + } + } catch (error) { + const errorMsg = error instanceof Error ? error.message : String(error); + this.log(`❌ Training execution failed: ${errorMsg}`); + await Events.emit(AI_LEARNING_EVENTS.TRAINING_ERROR, { + personaId: this.personaId, + personaName: this.displayName ?? 'AI Assistant', + domain: traitType, + error: errorMsg, + phase: 'preparation', + timestamp: Date.now(), + } satisfies AITrainingErrorEventData); + } + } + + /** + * Write JSONL training data to disk. + * Returns the file path for genome/job-create. + */ + private async writeTrainingFile(traitType: TraitType, jsonlContent: string): Promise { + const trainingDir = path.resolve('.continuum', 'training', 'auto', this.personaId); + await fs.promises.mkdir(trainingDir, { recursive: true }); + + const timestamp = Date.now(); + const filename = `${traitType}-${timestamp}.jsonl`; + const filePath = path.join(trainingDir, filename); + + await fs.promises.writeFile(filePath, jsonlContent, 'utf-8'); + return filePath; + } } diff --git a/src/debug/jtag/system/voice/server/VoiceService.ts b/src/debug/jtag/system/voice/server/VoiceService.ts index efd40fc29..37e89a102 100644 --- a/src/debug/jtag/system/voice/server/VoiceService.ts +++ b/src/debug/jtag/system/voice/server/VoiceService.ts @@ -10,6 +10,7 @@ import type { VoiceConfig, TTSAdapter } from '../shared/VoiceConfig'; import { DEFAULT_VOICE_CONFIG } from '../shared/VoiceConfig'; import { AUDIO_SAMPLE_RATE } from '../../../shared/AudioConstants'; import { VoiceSynthesize } from '../../../commands/voice/synthesize/shared/VoiceSynthesizeTypes'; +import { VoiceTranscribe } from '../../../commands/voice/transcribe/shared/VoiceTranscribeTypes'; export interface SynthesizeSpeechRequest { text: string; userId?: string; // For per-user preferences @@ -171,11 +172,24 @@ export class VoiceService { } /** - * Transcribe audio to text (future - not implemented yet) + * Transcribe audio to text via voice/transcribe command (Rust Whisper STT) */ async transcribeAudio(audioSamples: Int16Array, sampleRate: number): Promise { - // TODO: Implement STT via voice/transcribe command - throw new Error('Not implemented yet'); + // Convert Int16Array to base64 for the command + const buffer = Buffer.from(audioSamples.buffer, audioSamples.byteOffset, audioSamples.byteLength); + const audio = buffer.toString('base64'); + + const result = await VoiceTranscribe.execute({ + audio, + format: 'pcm16', + language: 'auto', + }); + + if (!result.success) { + throw new Error(result.error?.message ?? 'Transcription failed'); + } + + return result.text; } } diff --git a/src/debug/jtag/system/voice/server/adapters/GeminiLiveAdapter.ts b/src/debug/jtag/system/voice/server/adapters/GeminiLiveAdapter.ts index a37199803..d78380555 100644 --- a/src/debug/jtag/system/voice/server/adapters/GeminiLiveAdapter.ts +++ b/src/debug/jtag/system/voice/server/adapters/GeminiLiveAdapter.ts @@ -213,11 +213,24 @@ export class GeminiLiveAdapter implements IAudioNativeAdapter { /** * Cancel the current response + * + * Gemini Live API does not have an explicit response.cancel event. + * Interruption is handled server-side via VAD (when new audio input arrives, + * the server interrupts the current response). We clear local state and + * send an empty realtimeInput to signal the client wants to interrupt. */ cancelResponse(): void { - // Send interrupt/cancel message if supported - // Gemini may use a different mechanism - console.log('πŸ”Š Gemini Live: Cancel not yet implemented'); + if (!this.ws || this.ws.readyState !== WebSocket.OPEN) { + return; + } + + // Send empty realtimeInput to signal interruption intent + // The server's VAD will handle the actual interruption + this.ws.send(JSON.stringify({ + clientContent: { + turnComplete: true, + }, + })); } /** diff --git a/src/debug/jtag/tests/unit/training/TrainingCircuit.test.ts b/src/debug/jtag/tests/unit/training/TrainingCircuit.test.ts new file mode 100644 index 000000000..dc7840add --- /dev/null +++ b/src/debug/jtag/tests/unit/training/TrainingCircuit.test.ts @@ -0,0 +1,432 @@ +/** + * Training Circuit Unit Tests + * + * Verifies the three training paths are wired end-to-end: + * 1. PersonaTrainingManager: accumulator β†’ convert β†’ JSONL β†’ GenomeJobCreate + * 2. TrainingDaemonServer: threshold β†’ Events.emit('training:dataset-ready') + * 3. GenomeBatchMicroTuneServerCommand: PersonaUser β†’ accumulator β†’ PersonaTrainingManager + * + * Also tests the type conversion from accumulator format (input/output) + * to fine-tuning format (messages[]). + */ + +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { PersonaTrainingManager } from '../../../system/user/server/modules/PersonaTrainingManager'; +import { TrainingDataAccumulator } from '../../../system/user/server/modules/TrainingDataAccumulator'; +import type { InteractionCapture } from '../../../system/user/server/modules/TrainingDataAccumulator'; +import { Events } from '../../../system/core/shared/Events'; +import { GenomeJobCreate } from '../../../commands/genome/job-create/shared/GenomeJobCreateTypes'; +import { TrainingDatasetBuilder } from '../../../system/genome/fine-tuning/server/TrainingDatasetBuilder'; + +// Mock dependencies that PersonaTrainingManager uses +vi.mock('../../../system/core/shared/Events', () => ({ + Events: { + emit: vi.fn().mockResolvedValue(undefined), + subscribe: vi.fn(), + }, +})); + +vi.mock('../../../commands/genome/job-create/shared/GenomeJobCreateTypes', () => ({ + GenomeJobCreate: { + execute: vi.fn().mockResolvedValue({ + success: true, + job: { + jobId: 'test-job-123', + providerJobId: 'prov-job-456', + provider: 'peft', + status: 'queued', + baseModel: 'llama3.2', + trainingFileId: '/tmp/test.jsonl', + createdAt: Date.now(), + configurationSummary: { + method: 'lora', + epochs: 3, + batchSize: 4, + learningRate: 0.0001, + sequenceLength: 2048, + }, + }, + }), + }, +})); + +vi.mock('../../../system/genome/fine-tuning/server/TrainingDatasetBuilder', () => ({ + TrainingDatasetBuilder: { + validateDataset: vi.fn().mockReturnValue({ valid: true, warnings: [], errors: [] }), + exportToJSONL: vi.fn().mockReturnValue('{"messages":[{"role":"user","content":"hello"}]}\n'), + }, +})); + +vi.mock('../../../daemons/data-daemon/shared/entities/FineTuningTypes', () => ({ + TrainingMethod: { FULL: 'full', LORA: 'lora', QLORA: 'qlora' }, + TrainOnInputs: { AUTO: 'auto', ENABLED: 'enabled', DISABLED: 'disabled' }, + LRSchedulerType: { COSINE: 'cosine', LINEAR: 'linear', CONSTANT: 'constant' }, +})); + +vi.mock('fs', () => ({ + default: { + promises: { + mkdir: vi.fn().mockResolvedValue(undefined), + writeFile: vi.fn().mockResolvedValue(undefined), + }, + }, + promises: { + mkdir: vi.fn().mockResolvedValue(undefined), + writeFile: vi.fn().mockResolvedValue(undefined), + }, +})); + +// ── Helpers ──────────────────────────────────────────────── + +const PERSONA_ID = 'test-persona-training'; +const PERSONA_NAME = 'Test Trainer'; + +/** + * MIN_BATCH_SIZE in TrainingDataAccumulator is 10, so thresholds below 10 + * get clamped. Use 10 as the minimum meaningful threshold for tests. + */ +const MIN_THRESHOLD = 10; + +function createAccumulator(batchThreshold = MIN_THRESHOLD): TrainingDataAccumulator { + const accumulator = new TrainingDataAccumulator(PERSONA_ID, PERSONA_NAME, () => {}); + accumulator.setBatchThreshold('conversation', batchThreshold); + return accumulator; +} + +function createManager(accumulator: TrainingDataAccumulator): PersonaTrainingManager { + const mockState = { + learningState: { isLearning: false }, + }; + return new PersonaTrainingManager( + PERSONA_ID, + PERSONA_NAME, + accumulator, + () => mockState as any, + async () => ({ success: true }), + () => {}, // silent logger + ); +} + +async function fillAccumulator( + accumulator: TrainingDataAccumulator, + domain: string, + count: number, +): Promise { + const ids: string[] = []; + for (let i = 0; i < count; i++) { + const capture: InteractionCapture = { + roleId: 'student', + domain, + input: `Question ${i}: What is concept ${i}?`, + output: `Answer ${i}: Concept ${i} is an important idea in the domain.`, + }; + ids.push(await accumulator.captureInteraction(capture)); + } + return ids; +} + +// ── Tests ────────────────────────────────────────────────── + +describe('Training Circuit', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + describe('PersonaTrainingManager: type conversion', () => { + it('converts accumulator examples (input/output) to fine-tuning format (messages[])', async () => { + const accumulator = createAccumulator(MIN_THRESHOLD); + const manager = createManager(accumulator); + + await fillAccumulator(accumulator, 'conversation', MIN_THRESHOLD); + + // Trigger training (buffer at threshold) + await manager.checkTrainingReadiness(); + + // Verify GenomeJobCreate was called + expect(GenomeJobCreate.execute).toHaveBeenCalled(); + + // Verify TrainingDatasetBuilder.validateDataset was called with converted dataset + expect(TrainingDatasetBuilder.validateDataset).toHaveBeenCalled(); + expect(TrainingDatasetBuilder.exportToJSONL).toHaveBeenCalled(); + + // The dataset passed to validateDataset should have messages[] format + const validateCall = vi.mocked(TrainingDatasetBuilder.validateDataset).mock.calls[0][0]; + expect(validateCall.examples).toHaveLength(MIN_THRESHOLD); + expect(validateCall.examples[0].messages).toBeDefined(); + expect(validateCall.examples[0].messages).toHaveLength(2); + expect(validateCall.examples[0].messages[0].role).toBe('user'); + expect(validateCall.examples[0].messages[0].content).toContain('Question 0'); + expect(validateCall.examples[0].messages[1].role).toBe('assistant'); + expect(validateCall.examples[0].messages[1].content).toContain('Answer 0'); + }); + + it('preserves feedback rating as confidence in metadata', async () => { + const accumulator = createAccumulator(MIN_THRESHOLD); + const manager = createManager(accumulator); + + // Fill to threshold, then attach feedback to last one + const ids = await fillAccumulator(accumulator, 'conversation', MIN_THRESHOLD); + await accumulator.captureFeedback({ + interactionId: ids[0], + source: 'human', + rating: 0.95, + }); + + await manager.checkTrainingReadiness(); + + const validateCall = vi.mocked(TrainingDatasetBuilder.validateDataset).mock.calls[0][0]; + expect(validateCall.examples[0].metadata?.confidence).toBe(0.95); + }); + }); + + describe('PersonaTrainingManager: training trigger', () => { + it('does not trigger when buffer below threshold', async () => { + const accumulator = createAccumulator(MIN_THRESHOLD); + const manager = createManager(accumulator); + + await fillAccumulator(accumulator, 'conversation', MIN_THRESHOLD - 3); + + await manager.checkTrainingReadiness(); + + expect(GenomeJobCreate.execute).not.toHaveBeenCalled(); + }); + + it('triggers when buffer reaches threshold', async () => { + const accumulator = createAccumulator(MIN_THRESHOLD); + const manager = createManager(accumulator); + + await fillAccumulator(accumulator, 'conversation', MIN_THRESHOLD); + + await manager.checkTrainingReadiness(); + + expect(GenomeJobCreate.execute).toHaveBeenCalledTimes(1); + }); + + it('forceDomain bypasses threshold check', async () => { + const accumulator = createAccumulator(1000); // Clamps to MAX_BATCH_SIZE but well above fill count + const manager = createManager(accumulator); + + await fillAccumulator(accumulator, 'conversation', 2); + + await manager.checkTrainingReadiness('conversation'); // Force this domain + + expect(GenomeJobCreate.execute).toHaveBeenCalledTimes(1); + }); + + it('forceDomain does not affect other domains', async () => { + const accumulator = createAccumulator(1000); + accumulator.setBatchThreshold('code', 1000); + const manager = createManager(accumulator); + + await fillAccumulator(accumulator, 'conversation', 2); + await fillAccumulator(accumulator, 'code', 2); + + // Force 'conversation' only + await manager.checkTrainingReadiness('conversation'); + + // Only conversation should trigger, not code + expect(GenomeJobCreate.execute).toHaveBeenCalledTimes(1); + }); + + it('consumes buffer after training (buffer is empty after)', async () => { + const accumulator = createAccumulator(MIN_THRESHOLD); + const manager = createManager(accumulator); + + await fillAccumulator(accumulator, 'conversation', MIN_THRESHOLD + 3); + expect(accumulator.getBufferSize('conversation')).toBe(MIN_THRESHOLD + 3); + + await manager.checkTrainingReadiness(); + + expect(accumulator.getBufferSize('conversation')).toBe(0); + }); + + it('emits TRAINING_STARTED event', async () => { + const accumulator = createAccumulator(MIN_THRESHOLD); + const manager = createManager(accumulator); + + await fillAccumulator(accumulator, 'conversation', MIN_THRESHOLD); + await manager.checkTrainingReadiness(); + + const emitCalls = vi.mocked(Events.emit).mock.calls; + const startedCall = emitCalls.find( + call => call[0] === 'ai:learning:training-started', + ); + expect(startedCall).toBeDefined(); + expect(startedCall![1]).toMatchObject({ + personaId: PERSONA_ID, + domain: 'conversation', + exampleCount: MIN_THRESHOLD, + }); + }); + + it('writes JSONL file to disk before training', async () => { + const accumulator = createAccumulator(MIN_THRESHOLD); + const manager = createManager(accumulator); + + await fillAccumulator(accumulator, 'conversation', MIN_THRESHOLD); + await manager.checkTrainingReadiness(); + + const fs = await import('fs'); + expect(fs.promises.mkdir).toHaveBeenCalled(); + expect(fs.promises.writeFile).toHaveBeenCalled(); + + // Verify the file path includes personaId + const writeCalls = vi.mocked(fs.promises.writeFile).mock.calls; + const filePath = writeCalls[0][0] as string; + expect(filePath).toContain(PERSONA_ID); + expect(filePath).toContain('.jsonl'); + }); + + it('emits TRAINING_ERROR when validation fails', async () => { + const accumulator = createAccumulator(MIN_THRESHOLD); + const manager = createManager(accumulator); + + await fillAccumulator(accumulator, 'conversation', MIN_THRESHOLD); + + // Make validation fail + vi.mocked(TrainingDatasetBuilder.validateDataset).mockReturnValueOnce({ + valid: false, + warnings: [], + errors: ['Too few examples'], + }); + + await manager.checkTrainingReadiness(); + + const emitCalls = vi.mocked(Events.emit).mock.calls; + const errorCall = emitCalls.find( + call => call[0] === 'ai:learning:training-error', + ); + expect(errorCall).toBeDefined(); + expect(errorCall![1]).toMatchObject({ + personaId: PERSONA_ID, + phase: 'preparation', + }); + + // GenomeJobCreate should NOT have been called + expect(GenomeJobCreate.execute).not.toHaveBeenCalled(); + }); + + it('emits TRAINING_ERROR when GenomeJobCreate fails', async () => { + const accumulator = createAccumulator(MIN_THRESHOLD); + const manager = createManager(accumulator); + + await fillAccumulator(accumulator, 'conversation', MIN_THRESHOLD); + + vi.mocked(GenomeJobCreate.execute).mockResolvedValueOnce({ + success: false, + error: 'Provider unavailable', + } as any); + + await manager.checkTrainingReadiness(); + + const emitCalls = vi.mocked(Events.emit).mock.calls; + const errorCall = emitCalls.find( + call => call[0] === 'ai:learning:training-error', + ); + expect(errorCall).toBeDefined(); + expect((errorCall![1] as any).error).toContain('Provider unavailable'); + }); + }); + + describe('TrainingDataAccumulator: domain isolation', () => { + it('different domains accumulate independently', async () => { + const accumulator = createAccumulator(MIN_THRESHOLD); + // 'code' uses default threshold (50) + + await fillAccumulator(accumulator, 'conversation', 5); + await fillAccumulator(accumulator, 'code', 12); + + expect(accumulator.getBufferSize('conversation')).toBe(5); + expect(accumulator.getBufferSize('code')).toBe(12); + expect(accumulator.shouldMicroTune('conversation')).toBe(false); // 5 < 10 + expect(accumulator.shouldMicroTune('code')).toBe(false); // 12 < 50 (default) + }); + + it('consuming one domain does not affect others', async () => { + const accumulator = createAccumulator(MIN_THRESHOLD); + + await fillAccumulator(accumulator, 'conversation', 15); + await fillAccumulator(accumulator, 'code', 15); + + const consumed = await accumulator.consumeTrainingData('conversation'); + expect(consumed).toHaveLength(15); + expect(accumulator.getBufferSize('conversation')).toBe(0); + expect(accumulator.getBufferSize('code')).toBe(15); + }); + + it('getStats returns all domains with correct thresholds', async () => { + const accumulator = createAccumulator(MIN_THRESHOLD); + // 'code' gets default threshold (50) + + await fillAccumulator(accumulator, 'conversation', 3); + await fillAccumulator(accumulator, 'code', 7); + + const stats = accumulator.getStats(); + expect(stats['conversation']).toEqual({ count: 3, threshold: MIN_THRESHOLD, ready: false }); + expect(stats['code']).toEqual({ count: 7, threshold: 50, ready: false }); // Default threshold + }); + + it('getDomains only returns non-empty domains', async () => { + const accumulator = createAccumulator(MIN_THRESHOLD); + + await fillAccumulator(accumulator, 'conversation', 2); + await accumulator.consumeTrainingData('conversation'); + + const domains = accumulator.getDomains(); + expect(domains).not.toContain('conversation'); + }); + }); + + describe('PersonaTrainingManager: multi-domain training', () => { + it('trains all domains that are at threshold in single call', async () => { + const accumulator = createAccumulator(MIN_THRESHOLD); + accumulator.setBatchThreshold('code', MIN_THRESHOLD); + const manager = createManager(accumulator); + + await fillAccumulator(accumulator, 'conversation', MIN_THRESHOLD); + await fillAccumulator(accumulator, 'code', MIN_THRESHOLD + 3); + + await manager.checkTrainingReadiness(); + + // Both domains should trigger + expect(GenomeJobCreate.execute).toHaveBeenCalledTimes(2); + }); + + it('skips domains below threshold while training ready ones', async () => { + const accumulator = createAccumulator(MIN_THRESHOLD); + accumulator.setBatchThreshold('code', 100); + const manager = createManager(accumulator); + + await fillAccumulator(accumulator, 'conversation', MIN_THRESHOLD); // At threshold + await fillAccumulator(accumulator, 'code', 5); // Below code threshold (100) + + await manager.checkTrainingReadiness(); + + // Only conversation should trigger + expect(GenomeJobCreate.execute).toHaveBeenCalledTimes(1); + }); + }); + + describe('GenomeJobCreate integration', () => { + it('passes correct configuration to GenomeJobCreate', async () => { + const accumulator = createAccumulator(MIN_THRESHOLD); + const manager = createManager(accumulator); + + await fillAccumulator(accumulator, 'conversation', MIN_THRESHOLD); + await manager.checkTrainingReadiness(); + + const call = vi.mocked(GenomeJobCreate.execute).mock.calls[0][0]; + + expect(call.personaId).toBe(PERSONA_ID); + expect(call.provider).toBe('unsloth'); + expect(call.trainingFileId).toBeDefined(); + expect(call.configuration).toBeDefined(); + expect(call.configuration.model.baseModel).toBe('llama3.2'); + expect(call.configuration.method.type).toBe('lora'); + expect(call.configuration.method.loraConfig).toMatchObject({ rank: 16, alpha: 32 }); + expect(call.configuration.schedule.epochs).toBe(3); + expect(call.configuration.schedule.batchSize).toBe(4); + expect(call.configuration.optimizer.learningRate).toBe(0.0001); + }); + }); +});