diff --git a/.specflow/orchestration-state.json b/.specflow/orchestration-state.json index b5e268e..0cd0842 100644 --- a/.specflow/orchestration-state.json +++ b/.specflow/orchestration-state.json @@ -5,17 +5,18 @@ "name": "specflow", "path": "/Users/ppatterson/dev/specflow" }, - "last_updated": "2026-01-20T06:34:58.277Z", + "last_updated": "2026-01-22T18:14:43.458Z", "orchestration": { "phase": { "number": null, "name": null, "branch": null, - "status": "not_started" + "status": "not_started", + "userGateStatus": "confirmed" }, "next_phase": { - "number": "1055", - "name": "Smart Batching & Orchestration" + "number": "1056", + "name": "JSONL Watcher (Push Updates)" }, "step": { "current": "design", @@ -23,12 +24,12 @@ "status": "not_started" }, "implement": null, - "steps": {}, "progress": { "tasks_completed": 0, "tasks_total": 0, "percentage": 0 - } + }, + "steps": {} }, "health": { "status": "ready", @@ -290,6 +291,15 @@ "completed_at": "2026-01-20T06:34:58.276Z", "tasks_completed": 0, "tasks_total": 0 + }, + { + "type": "phase_completed", + "phase_number": "1055", + "phase_name": "Smart Batching & Orchestration", + "branch": "1055-smart-batching-orchestration", + "completed_at": "2026-01-22T18:14:43.457Z", + "tasks_completed": 0, + "tasks_total": 0 } ] } diff --git a/.specify/archive/1055-smart-batching-orchestration/checklists/implementation.md b/.specify/archive/1055-smart-batching-orchestration/checklists/implementation.md new file mode 100644 index 0000000..374d3bc --- /dev/null +++ b/.specify/archive/1055-smart-batching-orchestration/checklists/implementation.md @@ -0,0 +1,90 @@ +# Implementation Checklist: Smart Batching & Orchestration + +**Purpose**: Implementation guidance and quality verification during development +**Created**: 2026-01-21 +**Feature**: [spec.md](../spec.md) + +## Claude Helper Implementation + +- [ ] I-001 claudeHelper() accepts typed ClaudeHelperOptions with Zod schema +- [ ] I-002 Result is validated against provided schema before returning +- [ ] I-003 Session management supports: new session, resume (--resume), fork (--fork-session) +- [ ] I-004 Model selection supports sonnet, haiku, opus with fallback option +- [ ] I-005 Tool restrictions via --tools and --disallowedTools flags work correctly +- [ ] I-006 Budget enforcement stops execution when limit exceeded +- [ ] I-007 Timeout handling kills process and returns error +- [ ] I-008 Decision calls use read-only tools (Read, Grep, Glob only) + +## Batch Parser Implementation + +- [ ] I-010 Parser correctly identifies `##` section headers in tasks.md +- [ ] I-011 Each section with incomplete tasks becomes one batch +- [ ] I-012 Completed tasks are excluded from batches +- [ ] I-013 Fallback to fixed-size batches (default 15) when no sections found +- [ ] I-014 BatchPlan includes section names, task IDs, and counts + +## Orchestration Service Implementation + +- [ ] I-020 State machine has all phases: design, analyze, implement, verify, merge +- [ ] I-021 Dual confirmation waits for BOTH state update AND process completion +- [ ] I-022 State is persisted to {project}/.specflow/workflows/orchestration-{id}.json +- [ ] I-023 Decision log captures all transitions with timestamps and reasons +- [ ] I-024 Integration with specflow status --json parses output correctly +- [ ] I-025 Single orchestration per project enforced (rejects concurrent) +- [ ] I-026 Skip flags (skipDesign, skipAnalyze) correctly bypass steps + +## Auto-Healing Implementation + +- [ ] I-030 Failure context captures: stderr, attempted tasks, completed tasks, failed tasks +- [ ] I-031 Healer prompt includes error details and remaining task IDs +- [ ] I-032 Healer only attempts remaining tasks in current batch +- [ ] I-033 Max heal attempts per batch is enforced (default 1) +- [ ] I-034 Healer success marks batch as "healed" and continues +- [ ] I-035 Healer failure stops orchestration with full context for user + +## API Routes Implementation + +- [ ] I-040 POST /api/workflow/orchestrate validates project exists +- [ ] I-041 POST /api/workflow/orchestrate checks for existing orchestration +- [ ] I-042 Response includes orchestrationId and detected batch info +- [ ] I-043 GET /api/workflow/orchestrate/status returns full state +- [ ] I-044 POST /api/workflow/orchestrate/cancel terminates process and updates state +- [ ] I-045 POST /api/workflow/orchestrate/resume only works on paused orchestrations +- [ ] I-046 POST /api/workflow/orchestrate/merge only works when status is "waiting_merge" + +## UI Components Implementation + +- [ ] I-050 Configuration modal shows detected batch count in header +- [ ] I-051 Core options section always visible with correct defaults +- [ ] I-052 Advanced options collapsed by default, expandable +- [ ] I-053 Budget limits section validates numeric input +- [ ] I-054 PhaseProgressBar highlights current phase correctly +- [ ] I-055 BatchProgress shows section name, task counts, percentage +- [ ] I-056 DecisionLogPanel is collapsible and scrollable +- [ ] I-057 OrchestrationControls shows Pause/Cancel during active run +- [ ] I-058 MergeReadyPanel shows when status is "waiting_merge" +- [ ] I-059 OrchestrationBadge different color than workflow badges + +## Integration Implementation + +- [ ] I-060 CompletePhaseButton is primary (prominent styling, icon, subtitle) +- [ ] I-061 Secondary buttons (Orchestrate, Merge, Review, Memory) remain accessible +- [ ] I-062 Action buttons replaced by OrchestrationProgress when active +- [ ] I-063 Project card menu has "Complete Phase" first and highlighted +- [ ] I-064 "Run Workflow" reorganized as secondary flyout +- [ ] I-065 Reconciliation detects in-progress orchestrations on startup +- [ ] I-066 Reconciliation resumes or marks as failed based on process health + +## Code Quality + +- [ ] I-070 All new code uses TypeScript strict mode +- [ ] I-071 All external data validated with Zod schemas +- [ ] I-072 Error messages include context and next steps (Principle V) +- [ ] I-073 State stored in .specflow/ not .specify/ (Principle VIII) +- [ ] I-074 No direct edits to state files - use specflow CLI (Principle III) + +## Notes + +- Check items off as completed: `[x]` +- Reference task IDs from tasks.md when applicable +- Flag blockers immediately diff --git a/.specify/archive/1055-smart-batching-orchestration/checklists/verification.md b/.specify/archive/1055-smart-batching-orchestration/checklists/verification.md new file mode 100644 index 0000000..20dcd5d --- /dev/null +++ b/.specify/archive/1055-smart-batching-orchestration/checklists/verification.md @@ -0,0 +1,103 @@ +# Verification Checklist: Smart Batching & Orchestration + +**Purpose**: Post-implementation verification before USER GATE +**Created**: 2026-01-21 +**Feature**: [spec.md](../spec.md) + +## USER GATE Items (from Phase File) + +These items MUST be verified before phase can be considered complete: + +- [ ] V-001 Project detail: "Complete Phase" button is prominent, styled differently +- [ ] V-002 Project detail: Secondary buttons (Orchestrate, Merge, Review, Memory) still work +- [ ] V-003 Project card: "Complete Phase" is first menu item (highlighted) +- [ ] V-004 Project card: "Run Workflow" flyout contains Orchestrate, Merge, Review, Memory +- [ ] V-005 Configuration modal appears when clicking "Complete Phase" (both locations) +- [ ] V-006 Modal shows detected batch count and current phase status +- [ ] V-007 Start orchestration, see batches auto-detected from tasks.md sections +- [ ] V-008 State machine transitions: design → analyze → implement → verify +- [ ] V-009 Batches execute sequentially without user input +- [ ] V-010 Skip options work (skipDesign, skipAnalyze) +- [ ] V-011 Introduce a failure, see auto-heal attempt (uses Claude Helper) +- [ ] V-012 If heal succeeds, execution continues +- [ ] V-013 Progress UI replaces action buttons during orchestration +- [ ] V-014 Auto-merge works when enabled +- [ ] V-015 Pauses at merge-ready when auto-merge disabled +- [ ] V-016 Additional context appears in Claude's output +- [ ] V-017 Budget limits respected (orchestration stops if exceeded) +- [ ] V-018 Decision log shows Claude Helper calls and reasoning + +## UI Design Verification + +- [ ] V-UI1 UI implementation matches ui-design.md mockups +- [ ] V-UI2 All components from Component Inventory are implemented +- [ ] V-UI3 All interactions from Interactions table work as specified +- [ ] V-UI4 Design constraints from ui-design.md are respected +- [ ] V-UI5 Accessibility considerations from ui-design.md are addressed + +## Functional Verification + +### Configuration Modal + +- [ ] V-020 Core options have correct defaults (all off except auto-heal on) +- [ ] V-021 Advanced options expand/collapse with animation +- [ ] V-022 Budget limits accept valid numeric input only +- [ ] V-023 Start button disabled until valid configuration +- [ ] V-024 Warning shown if no sections detected in tasks.md + +### Progress Display + +- [ ] V-030 Phase progress bar shows correct phase as current +- [ ] V-031 Batch progress updates as tasks complete +- [ ] V-032 Decision log shows chronological entries +- [ ] V-033 Elapsed time updates in real-time +- [ ] V-034 Estimated remaining time calculated reasonably + +### State Management + +- [ ] V-040 Orchestration state persists across dashboard refresh +- [ ] V-041 Dashboard restart resumes in-progress orchestration +- [ ] V-042 Cancelled orchestration stops and preserves state +- [ ] V-043 Paused orchestration can be resumed +- [ ] V-044 Second orchestration attempt shows error message + +### Error Handling + +- [ ] V-050 Batch failure triggers auto-heal when enabled +- [ ] V-051 Heal failure stops orchestration with full context +- [ ] V-052 Budget exceeded stops gracefully with notification +- [ ] V-053 Stale process detected and marked appropriately +- [ ] V-054 Network/API errors show helpful messages + +## Integration Verification + +- [ ] V-060 Existing workflow buttons still work during non-orchestration +- [ ] V-061 Project card badges update correctly +- [ ] V-062 Orchestration works with projects that have USER GATE +- [ ] V-063 Orchestration works with projects without USER GATE +- [ ] V-064 Works with tasks.md having no ## sections (fallback batching) + +## Success Criteria Verification + +From spec.md: + +- [ ] V-SC1 User can complete 50-task phase with one click and one config +- [ ] V-SC2 Batches execute sequentially with progress visible +- [ ] V-SC3 Auto-healing recovers from common batch failures +- [ ] V-SC4 Orchestration survives dashboard restart +- [ ] V-SC5 Decision log provides clear debugging information +- [ ] V-SC6 Budget limits prevent runaway costs + +## Test Coverage Verification + +- [ ] V-070 claude-helper.test.ts covers schema validation, errors +- [ ] V-071 orchestration-service.test.ts covers all state transitions +- [ ] V-072 batch-parser.test.ts covers various tasks.md formats +- [ ] V-073 All tests pass: `pnpm test` + +## Notes + +- Check items off as completed: `[x]` +- Document any findings or issues inline +- All USER GATE items (V-001 through V-018) require manual testing +- Coordinate with user for USER GATE verification diff --git a/.specify/archive/1055-smart-batching-orchestration/discovery.md b/.specify/archive/1055-smart-batching-orchestration/discovery.md new file mode 100644 index 0000000..643bd47 --- /dev/null +++ b/.specify/archive/1055-smart-batching-orchestration/discovery.md @@ -0,0 +1,221 @@ +# Discovery: Smart Batching & Orchestration + +**Phase**: `1055-smart-batching-orchestration` +**Created**: 2026-01-21 +**Status**: Complete + +## Phase Context + +**Source**: ROADMAP Phase 1055, PDR `workflow-dashboard-orchestration.md` +**Goal**: Enable autonomous workflow execution with smart batching, configurable behavior, and auto-healing for large task lists that exceed context windows. + +--- + +## Codebase Examination + +### Related Implementations + +| Location | Description | Relevance | +|----------|-------------|-----------| +| `packages/dashboard/src/lib/services/workflow-service.ts` | Core workflow execution service | Foundation for orchestration - handles skill execution, state persistence | +| `packages/dashboard/src/lib/services/process-health.ts` | Process lifecycle and health monitoring | Provides staleness detection, PID tracking for batch monitoring | +| `packages/dashboard/src/app/api/workflow/start/route.ts` | API route for starting workflows | Entry point pattern to extend for orchestration | +| `packages/dashboard/src/components/projects/action-button.tsx` | Project card action buttons | Where "Complete Phase" button will be added | +| `packages/dashboard/src/hooks/use-workflow-actions.ts` | Workflow action mutations hook | Pattern for orchestration control actions | +| `packages/cli/src/lib/tasks.ts` | Tasks.md parser | Used for batch detection from `##` sections | +| `packages/shared/src/schemas/` | Zod validation schemas | Pattern for OrchestrationExecution schema | + +### Existing Patterns & Conventions + +- **Detached Process Spawning**: Workflows spawn Claude CLI as detached processes with PIDs tracked in `{project}/.specflow/workflows/{sessionId}/process.pid`. Orchestration will use the same pattern for batch executions. + +- **Dual-Storage State**: Pre-sessionId state in `pending-{id}.json`, moves to `{sessionId}/metadata.json` after CLI starts. Orchestration will add `orchestration-{id}.json` for aggregate state. + +- **Polling-Based Status**: 3-second polling interval via hooks/API, proven reliable. No SSE needed. + +- **Structured Output**: `--disallowedTools "AskUserQuestion"` forces Claude to use structured_output for questions. Same pattern for Claude Helper decisions. + +- **Skill Prompt Injection**: User context appended to skill prompts via buildInitialPrompt(). Same mechanism for batch constraints. + +- **WorkflowExecution Schema**: Full execution state tracked with status, answers, logs, cost. Extend with OrchestrationExecution for multi-batch tracking. + +### Integration Points + +- **Project Registry**: All workflows validate against `~/.specflow/registry.json`. Orchestrations will be project-scoped. + +- **Workflow Service**: `workflowService.start()` spawns skills. Orchestration state machine calls this for each step/batch. + +- **Process Reconciliation**: `ensureReconciliation()` on startup checks process health. Extend for orchestration resume. + +- **Project Detail UI**: Workflow actions area will transform to show orchestration progress when active. + +- **Specflow CLI**: `specflow status --json` provides phase/task/health context. State machine depends on this output. + +### Constraints Discovered + +- **Single Orchestration Per Project**: Cannot run concurrent orchestrations on same project - would conflict on tasks.md state. + +- **Dual Confirmation Timing**: Must wait for BOTH orchestration state update AND process completion before making decisions to prevent race conditions. + +- **Budget Limits**: Claude Helper calls need cost caps to prevent runaway spending on decisions/healing. + +- **Tool Restrictions**: Claude Helper for decisions should be read-only (no Edit/Write) to prevent unintended modifications. + +--- + +## Requirements Sources + +### From ROADMAP/Phase File + +Phase 1055 defined in ROADMAP.md: +- Smart Batching & Orchestration +- **USER GATE**: Auto-batch tasks, state machine, auto-healing + +### From Phase File (.specify/phases/1055-smart-batching.md) + +Comprehensive 10-section specification including: +1. Orchestration Configuration Modal - upfront user preferences +2. Programmatic Batch Detection - `##` sections as batch boundaries +3. Dashboard Orchestration State Machine - design → analyze → implement → verify flow +4. Sequential Batch Execution - one batch at a time with tracking +5. Auto-Healing on Failure - spawn healer Claude for failed batches +6. Orchestration Progress Display - phase bar, batch progress, status indicators +7. Orchestration State Structure - JSON schema for tracking +8. UI Integration Points - "Complete Phase" as primary action +9. API Design - new orchestration routes +10. Claude Helper Utility - typed interactions for decisions/healing + +### From PDR (workflow-dashboard-orchestration.md) + +Key principles: +- Build on POC, don't reinvent +- Minimal user interaction (configure upfront, then autonomous) +- Dashboard as orchestrator (hybrid: state machine + Claude fallback) +- Do NOT modify existing /flow.* skills + +### From Memory Documents + +- **Constitution**: + - Principle III (CLI Over Direct Edits) - Use `specflow` commands for state + - Principle VII (Three-Line Output Rule) - Progress UI should prioritize critical info + - Principle VIII (Repo Knowledge vs Operational State) - Orchestration state goes in `.specflow/` + +- **Tech Stack**: + - TypeScript/ESM for all new code + - Zod for validation schemas + - Next.js API routes pattern + - shadcn/ui components + +--- + +## Scope Clarification + +### Questions Asked + +The phase file (1055-smart-batching.md) was updated 2026-01-21 with extremely detailed specifications resolving all major design questions: + +#### Question 1: Batch Failure Detection + +**Context**: Need reliable detection of incomplete batches + +**Decision (from phase file)**: Use A + C approach +- Parse task completion from tasks.md (source of truth) +- AND require Claude to output structured completion status +- Check orchestration state `step.current` for skill-signaled completion + +#### Question 2: Healing Prompt Scope + +**Decision (from phase file)**: Current batch only +- Healer continues remaining tasks in current batch +- Once batch complete (or healer fails), proceed normally + +#### Question 3: Cross-batch State + +**Decision (from phase file)**: Out of scope +- If batch 2 breaks batch 1's work, healer tries once, then stops for user + +#### Question 4: Concurrent Orchestrations + +**Decision (from phase file)**: No - one per project +- Single active orchestration per project +- Error shown if attempting second + +#### Question 5: Resume After Dashboard Restart + +**Decision (from phase file)**: Yes, auto-resume +- State persisted to `{project}/.specflow/workflows/orchestration-{id}.json` +- Reconciler detects and resumes in-progress orchestrations + +#### Question 6: Decision Timing + +**Decision (from phase file)**: Wait for dual confirmation +- Don't make decisions on state change alone +- Wait for BOTH: state update AND process completion + +--- + +### Confirmed Understanding + +**What the user wants to achieve**: +Autonomous phase completion from the dashboard. User clicks "Complete Phase", configures preferences once, and the system handles everything: design, analyze, implement (in batches), verify, and optionally merge - with auto-healing on failures and minimal interruption. + +**How it relates to existing code**: +- Builds on workflow-service.ts execution patterns +- Extends WorkflowExecution schema with OrchestrationExecution +- Adds new API routes at `/api/workflow/orchestrate/*` +- Transforms project detail UI when orchestration active +- Uses existing tasks.ts parser for batch detection + +**Key constraints and requirements**: +- Single orchestration per project +- Dual confirmation before state transitions +- Budget limits for Claude Helper calls +- Read-only tools for decision calls +- Preserve existing /flow.* skills unchanged + +**Technical approach (from phase file)**: +- Configuration modal upfront (Core Options + Advanced Options + Budget) +- State machine with fallback to Claude Helper for unclear states +- Batch execution via skill input injection (no skill modifications) +- Auto-healing spawns continuation Claude with error context +- Progress UI replaces action buttons during orchestration + +**User confirmed**: Phase file serves as confirmed requirements + +--- + +## Recommendations for SPECIFY + +### Should Include in Spec + +- Configuration modal with all options from phase file Section 0 +- Claude Helper utility (Section 10) - foundational for decisions/healing +- State machine logic (Section 2) +- Batch detection from tasks.md sections (Section 1) +- Sequential batch execution (Section 3) +- Auto-healing mechanism (Section 4) +- Progress UI components (Section 5) +- New API routes (Section 9) +- OrchestrationExecution schema (Section 7) +- UI changes for "Complete Phase" button (Section 8) + +### Should Exclude from Spec (Non-Goals) + +- Branch strategy selection (future) +- Test/dry-run mode (future) +- Notification level customization (future) +- Time-based constraints (future) +- Modifying existing /flow.* skills +- SSE/WebSocket for real-time (polling sufficient) +- Individual task selection UI (programmatic only) + +### Potential Risks + +- **Race conditions**: State updates before process completion - mitigated by dual confirmation pattern +- **Infinite loops in healing**: Mitigated by single heal attempt per batch +- **Budget runaway**: Mitigated by configurable limits per batch/total/healing +- **Context window limits**: Mitigated by batching based on tasks.md sections + +### Questions to Address in CLARIFY + +None - phase file is comprehensive and includes resolved design decisions. diff --git a/.specify/archive/1055-smart-batching-orchestration/plan.md b/.specify/archive/1055-smart-batching-orchestration/plan.md new file mode 100644 index 0000000..032eaf7 --- /dev/null +++ b/.specify/archive/1055-smart-batching-orchestration/plan.md @@ -0,0 +1,341 @@ +# Implementation Plan: Smart Batching & Orchestration + +**Branch**: `1055-smart-batching-orchestration` | **Date**: 2026-01-21 | **Spec**: [spec.md](spec.md) +**Input**: Feature specification from `specs/1055-smart-batching-orchestration/spec.md` + +## Summary + +Implement autonomous phase completion with smart batching, configurable behavior, and auto-healing. The system enables users to click "Complete Phase", configure preferences once, and have the dashboard orchestrate the entire design → analyze → implement → verify → merge workflow with minimal intervention. + +Key technical components: +- **Claude Helper Utility**: Foundational service for typed Claude interactions (decisions, verification, healing) +- **Orchestration State Machine**: Manages phase transitions with dual confirmation pattern +- **Batch Detection**: Parses tasks.md `##` sections as batch boundaries +- **Configuration Modal**: Upfront user preferences before autonomous execution +- **Progress UI**: Replaces action buttons during active orchestration + +## Technical Context + +**Language/Version**: TypeScript 5.7+ (ESM, strict mode) +**Primary Dependencies**: Next.js 16.x, React 19.x, Commander.js 12.x, Zod 3.x, shadcn/ui +**Storage**: File-based JSON (`{project}/.specflow/workflows/orchestration-{id}.json`) +**Testing**: Vitest 2.x with memfs for filesystem mocking +**Target Platform**: Node.js 18+, macOS/Linux +**Project Type**: Monorepo (packages/dashboard, packages/cli, packages/shared) +**Performance Goals**: Polling at 3s intervals, budget tracking per batch +**Constraints**: Single orchestration per project, dual confirmation before transitions +**Scale/Scope**: Support 50+ task phases, 4-hour orchestrations + +## Constitution Check + +| Principle | Status | Notes | +|-----------|--------|-------| +| III. CLI Over Direct Edits | ✅ Pass | Uses `specflow status --json`, `specflow state set` | +| VII. Three-Line Output Rule | ✅ Pass | Progress UI prioritizes critical info | +| VIII. Repo Knowledge vs Operational State | ✅ Pass | Orchestration state in `.specflow/`, not `.specify/` | +| IIa. TypeScript for CLI Packages | ✅ Pass | All new code in TypeScript | +| V. Helpful Error Messages | ✅ Pass | Error states include context and next steps | + +No violations requiring justification. + +## Project Structure + +### Documentation (this feature) + +```text +specs/1055-smart-batching-orchestration/ +├── discovery.md # Codebase findings and decisions +├── spec.md # Feature specification +├── requirements.md # Requirements quality checklist +├── ui-design.md # Visual mockups and rationale +├── plan.md # This file +├── tasks.md # Task breakdown +└── checklists/ + ├── implementation.md + └── verification.md +``` + +### Source Code Changes + +```text +packages/dashboard/ +├── src/ +│ ├── app/ +│ │ └── api/ +│ │ └── workflow/ +│ │ └── orchestrate/ # NEW: Orchestration API routes +│ │ ├── route.ts # POST /api/workflow/orchestrate +│ │ ├── status/ +│ │ │ └── route.ts # GET /api/workflow/orchestrate/status +│ │ ├── list/ +│ │ │ └── route.ts # GET /api/workflow/orchestrate/list +│ │ ├── cancel/ +│ │ │ └── route.ts # POST /api/workflow/orchestrate/cancel +│ │ ├── resume/ +│ │ │ └── route.ts # POST /api/workflow/orchestrate/resume +│ │ └── merge/ +│ │ └── route.ts # POST /api/workflow/orchestrate/merge +│ │ +│ ├── components/ +│ │ └── orchestration/ # NEW: Orchestration UI components +│ │ ├── start-orchestration-modal.tsx +│ │ ├── orchestration-config-form.tsx +│ │ ├── orchestration-progress.tsx +│ │ ├── phase-progress-bar.tsx +│ │ ├── batch-progress.tsx +│ │ ├── decision-log-panel.tsx +│ │ ├── orchestration-controls.tsx +│ │ ├── merge-ready-panel.tsx +│ │ └── orchestration-badge.tsx +│ │ +│ ├── lib/ +│ │ └── services/ +│ │ ├── claude-helper.ts # NEW: Claude Helper utility +│ │ ├── orchestration-service.ts # NEW: Orchestration state machine +│ │ ├── batch-parser.ts # NEW: Batch detection from tasks.md +│ │ ├── auto-healing-service.ts # NEW: Auto-healing on failure +│ │ └── workflow-service.ts # MODIFY: Add orchestration hooks +│ │ +│ └── hooks/ +│ └── use-orchestration.ts # NEW: Orchestration state hook +│ +└── __tests__/ + └── orchestration/ # NEW: Orchestration tests + ├── claude-helper.test.ts + ├── orchestration-service.test.ts + ├── batch-parser.test.ts + └── auto-healing-service.test.ts + +packages/shared/ +└── src/ + └── schemas/ + ├── orchestration-execution.ts # NEW: OrchestrationExecution schema + └── orchestration-config.ts # NEW: OrchestrationConfig schema +``` + +**Structure Decision**: Extends existing monorepo structure. New orchestration components in dedicated directory. Services follow established pattern from workflow-service.ts. + +## Implementation Phases + +### Phase 1: Foundation (Claude Helper + Schemas) + +**Goal**: Establish foundational utilities needed by all other components. + +1. **Zod Schemas** (`packages/shared/`) + - `OrchestrationConfigSchema` - modal configuration + - `OrchestrationExecutionSchema` - full state tracking + - `BatchItemSchema` - per-batch tracking + - `ClaudeHelperOptionsSchema` - helper configuration + - `ClaudeHelperResultSchema` - helper response + +2. **Claude Helper Utility** (`claude-helper.ts`) + - Typed function with Zod schema validation + - Session management (new, resume, fork) + - Model selection with fallback + - Tool restrictions (read-only for decisions) + - Budget enforcement + - Error handling (timeout, validation failures) + +### Phase 2: Core Services (State Machine + Batch Detection) + +**Goal**: Implement orchestration logic independent of UI. + +1. **Batch Parser** (`batch-parser.ts`) + - Parse tasks.md for `##` sections + - Identify incomplete tasks per section + - Fall back to fixed-size batches + - Return batch plan with task IDs + +2. **Orchestration Service** (`orchestration-service.ts`) + - State machine implementation + - Dual confirmation pattern (state + process) + - Step transitions (design → analyze → implement → verify) + - State persistence to JSON + - Decision logging + - Integration with `specflow status --json` + +3. **Auto-Healing Service** (`auto-healing-service.ts`) + - Capture failure context (stderr, tasks) + - Build healer prompt + - Spawn healer via Claude Helper + - Handle success/failure outcomes + - Limit heal attempts per batch + +### Phase 3: API Routes + +**Goal**: Expose orchestration functionality via REST API. + +1. **POST /api/workflow/orchestrate** - Start orchestration + - Validate project exists + - Check no existing orchestration + - Parse batch plan + - Create orchestration record + - Start first step + +2. **GET /api/workflow/orchestrate/status** - Get status + - Return current orchestration state + - Include progress, batches, decision log + +3. **GET /api/workflow/orchestrate/list** - List orchestrations + - Return all orchestrations for project + - Include history (completed/failed) + +4. **POST /api/workflow/orchestrate/cancel** - Cancel + - Stop current execution + - Update state to cancelled + - Preserve state for debugging + +5. **POST /api/workflow/orchestrate/resume** - Resume + - Resume from paused state + - Continue from next step/batch + +6. **POST /api/workflow/orchestrate/merge** - Trigger merge + - Only when status is "waiting_merge" + - Start /flow.merge via workflow service + +### Phase 4: UI Components + +**Goal**: Build configuration modal and progress display. + +1. **Configuration Modal** (`start-orchestration-modal.tsx`) + - Core options section + - Advanced options (collapsible) + - Budget limits section + - Batch count display + - Start button with validation + +2. **Progress Components** + - `phase-progress-bar.tsx` - Design→Analyze→Implement→Verify→Merge + - `batch-progress.tsx` - Current batch, task counts, progress bar + - `decision-log-panel.tsx` - Collapsible log of decisions + - `orchestration-controls.tsx` - Pause/Cancel buttons + +3. **State Components** + - `merge-ready-panel.tsx` - When paused at merge + - `orchestration-badge.tsx` - For project cards + +### Phase 5: Integration + +**Goal**: Wire everything together in the dashboard. + +1. **Project Detail Integration** + - Add "Complete Phase" primary button + - Transform to progress when active + - Integrate with existing workflow actions + +2. **Project Card Integration** + - Add "Complete Phase" to menu (first, highlighted) + - Reorganize "Run Workflow" as secondary + - Show orchestration badge + +3. **Hook Integration** (`use-orchestration.ts`) + - Poll orchestration status + - Handle state transitions + - Trigger notifications + +4. **Reconciliation** + - Detect in-progress orchestrations on startup + - Resume or mark as failed + +## Data Flow + +``` +User clicks "Complete Phase" + │ + ▼ +┌──────────────────────┐ +│ StartOrchestrationModal │ +│ - Show config options │ +│ - Display batch count │ +└──────────┬──────────────┘ + │ user clicks Start + ▼ +POST /api/workflow/orchestrate + │ + ▼ +┌──────────────────────┐ +│ OrchestrationService │ +│ - Create state record │ +│ - Detect batches │ +│ - Start first step │ +└──────────┬──────────────┘ + │ + ▼ +┌──────────────────────┐ +│ WorkflowService.start│ ← Existing service +│ - Spawn Claude CLI │ +│ - Return execution ID │ +└──────────┬──────────────┘ + │ + ▼ +┌──────────────────────┐ +│ Polling Loop │ +│ - Check specflow status│ +│ - Check process health │ +│ - Wait for dual confirm│ +└──────────┬──────────────┘ + │ step complete + ▼ +┌──────────────────────┐ +│ OrchestrationService │ +│ - Update state │ +│ - Log decision │ +│ - Start next step │ +└──────────┴──────────────┘ + │ + (repeat) + │ + ▼ +┌──────────────────────┐ +│ Complete/Merge Ready │ +└──────────────────────┘ +``` + +## Error Handling + +| Error | Detection | Recovery | +|-------|-----------|----------| +| Batch failure | Exit code != 0, incomplete tasks | Auto-heal (if enabled) | +| Heal failure | Healer exits with error | Stop, notify user with context | +| Budget exceeded | Cost tracking > limit | Stop current batch, notify | +| Process stale | No session file update > 5min | Mark stale, user intervention | +| State corruption | JSON parse failure | Rebuild from artifacts | +| Concurrent attempt | Existing orchestration check | Reject with error message | +| Dashboard restart | Reconciliation on startup | Resume or mark failed | + +## Testing Strategy + +1. **Unit Tests** (with memfs) + - Batch parser: various tasks.md formats + - State machine: all transitions + - Claude Helper: schema validation, error handling + +2. **Integration Tests** + - Full orchestration flow (mocked Claude) + - API routes with test fixtures + - Reconciliation scenarios + +3. **Manual Testing** (per USER GATE) + - Start orchestration, observe batches + - Introduce failure, observe healing + - Dashboard restart, observe resume + - Budget limits, observe stop + +## Dependencies + +- **Phase 1048**: Workflow Foundation (workflow-service.ts) - Complete +- **Phase 1050**: Workflow UI (skill picker, status badges) - Complete +- **Phase 1051**: Questions & Notifications (question handling) - Complete +- **Phase 1052**: Session Viewer (JSONL parsing) - Complete + +All dependencies are complete. This phase builds on established patterns. + +## Risk Mitigation + +| Risk | Mitigation | +|------|------------| +| Race conditions | Dual confirmation pattern (state + process) | +| Infinite heal loops | Max heal attempts per batch (default 1) | +| Cost runaway | Budget limits per batch/total/healing | +| Long orchestrations | State persistence, resume on restart | +| Context window limits | Batch-based execution | diff --git a/.specify/archive/1055-smart-batching-orchestration/requirements.md b/.specify/archive/1055-smart-batching-orchestration/requirements.md new file mode 100644 index 0000000..7581e06 --- /dev/null +++ b/.specify/archive/1055-smart-batching-orchestration/requirements.md @@ -0,0 +1,62 @@ +# Requirements Quality Checklist: Smart Batching & Orchestration + +**Purpose**: Verify requirements are complete, clear, and testable before implementation +**Created**: 2026-01-21 +**Feature**: [spec.md](spec.md) + +## Requirement Completeness + +- [x] R-001 All user stories have acceptance scenarios +- [x] R-002 Edge cases are documented +- [x] R-003 Error handling scenarios defined (heal failures, budget exceeded, concurrent attempts) +- [x] R-004 Success criteria are measurable +- [x] R-005 Non-goals are explicitly stated +- [x] R-006 Dependencies on previous phases identified (1048, 1050, 1051, 1052) + +## Requirement Clarity + +- [x] R-010 Functional requirements use MUST/SHOULD language +- [x] R-011 No ambiguous terms ("quickly", "easily", "user-friendly") +- [x] R-012 Technical constraints are specific (single orchestration per project, dual confirmation) +- [x] R-013 UI requirements reference mockups in ui-design.md +- [x] R-014 API routes have clear endpoints and methods + +## Scenario Coverage + +- [x] R-020 Happy path: Full orchestration from design to merge +- [x] R-021 Skip paths: skipDesign, skipAnalyze configurations +- [x] R-022 Failure path: Batch failure with auto-healing +- [x] R-023 Failure path: Healer fails, orchestration stops +- [x] R-024 Resume path: Dashboard restart during orchestration +- [x] R-025 Cancel path: User cancels mid-orchestration +- [x] R-026 Concurrent attempt: Second orchestration rejected + +## Edge Case Coverage + +- [x] R-030 No sections in tasks.md (fallback batching) +- [x] R-031 USER GATE phase (pauses at verify) +- [x] R-032 Budget exceeded mid-batch +- [x] R-033 Stale process detection +- [x] R-034 Empty batch (all tasks already complete) + +## Data Model Clarity + +- [x] R-040 OrchestrationExecution schema defined in phase file +- [x] R-041 OrchestrationConfig options enumerated +- [x] R-042 BatchItem tracking fields specified +- [x] R-043 ClaudeHelper interfaces documented +- [x] R-044 State file locations documented + +## Integration Points + +- [x] R-050 Workflow service integration pattern defined +- [x] R-051 Process health integration defined +- [x] R-052 Specflow CLI dependency documented (`specflow status --json`) +- [x] R-053 Project registry dependency documented +- [x] R-054 Session JSONL integration for context + +## Notes + +- Phase 1055 phase file (.specify/phases/1055-smart-batching.md) is exceptionally detailed +- PDR (workflow-dashboard-orchestration.md) provides architecture context +- All design decisions pre-resolved in phase file "Design Decisions (Resolved)" section diff --git a/.specify/archive/1055-smart-batching-orchestration/spec.md b/.specify/archive/1055-smart-batching-orchestration/spec.md new file mode 100644 index 0000000..4f7671b --- /dev/null +++ b/.specify/archive/1055-smart-batching-orchestration/spec.md @@ -0,0 +1,247 @@ +# Feature Specification: Smart Batching & Orchestration + +**Feature Branch**: `1055-smart-batching-orchestration` +**Created**: 2026-01-21 +**Status**: Final +**Input**: Phase 1055 from ROADMAP, PDR workflow-dashboard-orchestration.md + +--- + +## User Scenarios & Testing + +### User Story 1 - Complete Phase with One Click (Priority: P1) + +A developer working on a SpecFlow project wants to complete an entire phase without manual intervention. They click "Complete Phase", configure their preferences once, and walk away while the system handles design, implement (in batches), and verify steps autonomously. + +**Why this priority**: Core value proposition - autonomous phase completion is the northstar goal of this feature. + +**Independent Test**: Start orchestration on a project with existing tasks.md, watch it progress through implement batches and complete without user interaction. + +**Acceptance Scenarios**: + +1. **Given** a project with phase 1055 open and tasks.md with 4 `##` sections, **When** user clicks "Complete Phase" and starts orchestration, **Then** system detects 4 batches and shows "Detected 4 batches from tasks.md" + +2. **Given** orchestration is configured with skipDesign=false, **When** orchestration starts on a project without spec.md, **Then** system runs /flow.design first before implement + +3. **Given** orchestration is running implement batch 2 of 4, **When** batch completes successfully, **Then** system automatically starts batch 3 without user intervention + +4. **Given** all tasks are complete, **When** implement phase finishes, **Then** system automatically runs /flow.verify + +--- + +### User Story 2 - Configuration Modal (Priority: P1) + +A developer wants to customize orchestration behavior before starting. They see a configuration modal with core options (auto-merge, skip design, additional context) and advanced options (auto-heal settings, batch size fallback). + +**Why this priority**: Essential for user control and trust - users must configure behavior before autonomous execution. + +**Independent Test**: Open configuration modal, adjust settings, verify they persist into orchestration execution. + +**Acceptance Scenarios**: + +1. **Given** user clicks "Complete Phase" button, **When** modal opens, **Then** modal displays Core Options section with auto-merge toggle (default: off), skip design toggle (default: off), skip analyze toggle (default: off), and additional context textarea + +2. **Given** user expands Advanced Options section, **When** viewing options, **Then** modal shows auto-heal toggle (default: on), max heal attempts (default: 1), batch size fallback (default: 15), pause between batches toggle (default: off) + +3. **Given** user enters "Focus on performance" in additional context, **When** orchestration runs /flow.implement, **Then** that context appears in Claude's skill prompt + +4. **Given** user sets skipDesign=true and project has no spec.md, **When** orchestration starts, **Then** system skips design and goes directly to analyze (or implement if skipAnalyze also set) + +--- + +### User Story 3 - Auto-Healing on Failure (Priority: P2) + +When a batch fails during implementation, the system should automatically attempt to fix the issue and continue, rather than requiring manual intervention. + +**Why this priority**: Critical for autonomous operation - failures are common and should self-heal when possible. + +**Independent Test**: Introduce a failure in a batch, observe healer Claude spawn and attempt recovery. + +**Acceptance Scenarios**: + +1. **Given** batch 2 fails with error "file not found", **When** auto-heal is enabled, **Then** system spawns healer Claude with error context and remaining task IDs + +2. **Given** healer Claude fixes the issue and completes remaining tasks, **When** healing succeeds, **Then** system marks batch as "healed" and continues to batch 3 + +3. **Given** healer Claude fails to fix the issue, **When** healing fails, **Then** system stops orchestration, marks batch as "failed", and notifies user with full context + +4. **Given** maxHealAttempts=1 and first heal attempt failed, **When** considering retry, **Then** system does NOT attempt second heal (prevents infinite loops) + +--- + +### User Story 4 - Orchestration Progress Display (Priority: P2) + +While orchestration runs, user wants clear visibility into current phase, batch progress, and overall status without needing to check CLI output. + +**Why this priority**: Visibility builds trust - users need to know what's happening during autonomous execution. + +**Independent Test**: Start orchestration, observe progress UI updating as batches complete. + +**Acceptance Scenarios**: + +1. **Given** orchestration is in implement phase, **When** viewing project detail, **Then** progress bar shows "Design --●-- Analyze --●-- Implement --○-- Verify --○-- Merge" with Implement highlighted + +2. **Given** implement is running batch 2 of 4 (Core Components), **When** viewing progress, **Then** displays "Implementing batch 2 of 4: Core Components" and "Tasks: 12/35 complete" + +3. **Given** auto-healing is in progress, **When** viewing status, **Then** shows healing indicator with message "Auto-healing batch 2..." + +4. **Given** orchestration completes verify step, **When** auto-merge is disabled, **Then** status shows "Merge ready" and waits for user action + +--- + +### User Story 5 - UI Entry Points (Priority: P2) + +Developer can start orchestration from multiple locations: project detail page and project card menu. + +**Why this priority**: Accessibility - users should find the primary action easily from wherever they are. + +**Independent Test**: Start orchestration from project card, verify same modal and behavior as project detail. + +**Acceptance Scenarios**: + +1. **Given** viewing project detail page, **When** looking at workflow actions area, **Then** "Complete Phase" is the primary prominent button (larger, gradient/accent color, icon) + +2. **Given** project card in project list, **When** opening actions menu, **Then** "Complete Phase" is first menu item (highlighted) + +3. **Given** orchestration is already running for project, **When** clicking "Complete Phase" again, **Then** error message "Orchestration already in progress" with option to cancel existing + +4. **Given** orchestration is active, **When** viewing project detail, **Then** action buttons are replaced with progress display and Cancel/Pause controls + +--- + +### User Story 6 - State Persistence and Resume (Priority: P3) + +If dashboard restarts while orchestration is running, the system should detect and resume the orchestration from where it left off. + +**Why this priority**: Reliability - orchestrations can take hours and must survive dashboard restarts. + +**Independent Test**: Start orchestration, restart dashboard, verify it resumes automatically. + +**Acceptance Scenarios**: + +1. **Given** orchestration is in implement batch 2, **When** dashboard process restarts, **Then** reconciler detects in-progress orchestration and resumes from batch 2 + +2. **Given** orchestration state saved to `{project}/.specflow/workflows/orchestration-{id}.json`, **When** dashboard starts, **Then** state is loaded and orchestration continues + +3. **Given** orchestration process died unexpectedly, **When** reconciler checks health, **Then** marks orchestration as failed if process is dead + +--- + +### Edge Cases + +- What happens when tasks.md has no `##` sections? Falls back to fixed-size batches (default 15 tasks per batch) +- What happens when user cancels mid-batch? Batch is marked cancelled, no further batches run, state preserved for potential resume +- How does system handle API rate limits during batch execution? Claude CLI handles internally; dashboard monitors for stale status +- What happens when project has USER GATE? Orchestration pauses at verify, notifies user, waits for manual /flow.merge +- What happens when another orchestration is already running? Returns error "Orchestration already in progress" with cancel option + +--- + +## Requirements + +### Functional Requirements + +**Configuration:** +- **FR-001**: System MUST display configuration modal when "Complete Phase" is clicked +- **FR-002**: Modal MUST include Core Options: auto-merge toggle, skip design toggle, skip analyze toggle, additional context textarea +- **FR-003**: Modal MUST include Advanced Options (collapsed): auto-heal toggle, max heal attempts, batch size fallback, pause between batches +- **FR-004**: Modal MUST show detected batch count before starting +- **FR-005**: Modal MUST show warning if no sections detected in tasks.md + +**Batch Detection:** +- **FR-010**: System MUST parse tasks.md to detect batches from `##` section headers +- **FR-011**: Each `##` section with incomplete tasks becomes one batch +- **FR-012**: System MUST fall back to fixed-size batches (configurable, default 15) if no sections found +- **FR-013**: Batch detection MUST respect task completion status (skip completed tasks) + +**State Machine:** +- **FR-020**: System MUST implement state machine with phases: design → analyze → implement → verify → merge +- **FR-021**: State machine MUST check `specflow status --json` between each step +- **FR-022**: System MUST wait for dual confirmation (state update AND process completion) before transitioning +- **FR-023**: System MUST persist state to `{project}/.specflow/workflows/orchestration-{id}.json` +- **FR-024**: System MUST support single orchestration per project (reject concurrent) + +**Batch Execution:** +- **FR-030**: System MUST execute batches sequentially (one at a time) +- **FR-031**: Batch execution MUST use skill input injection to constrain tasks (no skill modification) +- **FR-032**: System MUST track per-batch: status, started/completed timestamps, task IDs, heal attempts +- **FR-033**: System MUST link batch to its workflow execution ID + +**Auto-Healing:** +- **FR-040**: On batch failure, system MUST spawn healer Claude if auto-heal enabled +- **FR-041**: Healer prompt MUST include: error details, batch section, attempted tasks, completed tasks, failed tasks +- **FR-042**: Healer MUST only attempt remaining tasks in current batch +- **FR-043**: System MUST limit heal attempts per batch (configurable, default 1) +- **FR-044**: If healer fails, system MUST stop and notify user with full context + +**Claude Helper Utility:** +- **FR-050**: System MUST provide typed claudeHelper() function for decisions and healing +- **FR-051**: Claude Helper MUST support: sessionId resume, schema validation (Zod), tool restrictions +- **FR-052**: Claude Helper MUST support model selection (sonnet, haiku, opus) with fallback +- **FR-053**: Claude Helper MUST enforce budget limits (per call, total) +- **FR-054**: Decision calls MUST restrict tools to read-only (Read, Grep, Glob) + +**Progress Display:** +- **FR-060**: System MUST show phase progress bar (Design → Analyze → Implement → Verify → Merge) +- **FR-061**: System MUST show batch progress during implement (batch N of M, task counts) +- **FR-062**: System MUST show status indicators: Running, Paused, Healing, Waiting, Complete, Merge Ready +- **FR-063**: System MUST show timing information (elapsed, estimated remaining) +- **FR-064**: System MUST maintain decision log for debugging + +**UI Integration:** +- **FR-070**: "Complete Phase" MUST be primary action (prominent styling, icon) +- **FR-071**: Secondary buttons (Orchestrate, Merge, Review, Memory) MUST remain available +- **FR-072**: Progress UI MUST replace action buttons during active orchestration +- **FR-073**: Project card menu MUST include "Complete Phase" as first highlighted item + +**API Routes:** +- **FR-080**: POST `/api/workflow/orchestrate` - Start orchestration with config +- **FR-081**: GET `/api/workflow/orchestrate/status` - Get orchestration status +- **FR-082**: GET `/api/workflow/orchestrate/list` - List orchestrations for project +- **FR-083**: POST `/api/workflow/orchestrate/cancel` - Cancel orchestration +- **FR-084**: POST `/api/workflow/orchestrate/resume` - Resume paused orchestration +- **FR-085**: POST `/api/workflow/orchestrate/merge` - Trigger merge when paused + +### Key Entities + +- **OrchestrationExecution**: Tracks overall orchestration state including config, current phase, batches, linked executions, decision log +- **OrchestrationConfig**: User configuration from modal (auto-merge, skip flags, heal settings, budgets) +- **BatchItem**: Individual batch tracking (section name, task IDs, status, timing, heal attempts) +- **ClaudeHelperOptions**: Configuration for Claude Helper calls (schema, tools, budget, model) +- **ClaudeHelperResult**: Response from Claude Helper (parsed result, session ID, cost, timing) + +--- + +## Success Criteria + +### Measurable Outcomes + +- **SC-001**: User can complete a 50-task phase by clicking one button and configuring preferences once +- **SC-002**: Batches execute sequentially with progress visible at each step +- **SC-003**: Auto-healing successfully recovers from batch failures caused by: missing files, syntax errors, test failures, and dependency issues (at least 70% success rate for these failure types) +- **SC-004**: Orchestration survives dashboard restart and resumes from correct position +- **SC-005**: Decision log provides clear debugging information for all state transitions +- **SC-006**: Budget limits prevent runaway costs (default $5/batch, $50/total, $2/heal) + +--- + +## Non-Goals + +- **NG-001**: Branch strategy selection in modal (future consideration) +- **NG-002**: Test/dry-run mode for orchestration (future consideration) +- **NG-003**: Notification level customization (future consideration) +- **NG-004**: Time-based constraints (stop after N hours) (future consideration) +- **NG-005**: Modifying existing /flow.* skills (dashboard orchestrates, skills unchanged) +- **NG-006**: SSE/WebSocket for real-time updates (polling is sufficient) +- **NG-007**: UI for selecting individual tasks (programmatic batching only) +- **NG-008**: Concurrent orchestrations on same project + +--- + +## Visual Design Reference + +See [ui-design.md](ui-design.md) for: +- Configuration modal layout +- Progress display components +- Button hierarchy and styling +- Project card menu changes diff --git a/.specify/archive/1055-smart-batching-orchestration/tasks.md b/.specify/archive/1055-smart-batching-orchestration/tasks.md new file mode 100644 index 0000000..ef7659b --- /dev/null +++ b/.specify/archive/1055-smart-batching-orchestration/tasks.md @@ -0,0 +1,222 @@ +# Tasks: Smart Batching & Orchestration + +## Progress Dashboard + +> Last updated: 2026-01-21 | Run `specflow status` to refresh + +| Phase | Status | Progress | +|-------|--------|----------| +| Foundation | PENDING | 0/10 | +| Core Services | PENDING | 0/12 | +| API Routes | PENDING | 0/12 | +| UI Components | PENDING | 0/15 | +| Integration | PENDING | 0/8 | +| Polish | PENDING | 0/4 | + +**Overall**: 0/61 (0%) | **Current**: None + +--- + +**Input**: Design documents from `/specs/1055-smart-batching-orchestration/` +**Prerequisites**: plan.md, spec.md, ui-design.md + +## Format: `[ID] [P?] [Story] Description` + +- **[P]**: Can run in parallel (different files, no dependencies) +- **[US#]**: Which user story this task belongs to + +--- + +## Phase 1: Foundation (Schemas + Claude Helper) + +**Purpose**: Establish foundational utilities needed by all other components + +### Zod Schemas + +- [x] T001 [P] Create OrchestrationConfigSchema in packages/shared/src/schemas/orchestration-config.ts +- [x] T002 [P] Create OrchestrationExecutionSchema in packages/shared/src/schemas/orchestration-execution.ts +- [x] T003 [P] Create BatchItemSchema in packages/shared/src/schemas/batch-item.ts +- [x] T004 [P] Create ClaudeHelperOptionsSchema and ClaudeHelperResultSchema in packages/shared/src/schemas/claude-helper.ts +- [x] T005 Export all orchestration schemas from packages/shared/src/schemas/index.ts + +### Claude Helper Utility + +- [x] T006 [US1] Create claude-helper.ts base structure in packages/dashboard/src/lib/services/claude-helper.ts +- [x] T007 [US1] Implement session management (new, resume, fork) in claude-helper.ts +- [x] T008 [US1] Implement model selection with fallback in claude-helper.ts +- [x] T009 [US1] Implement tool restrictions and budget enforcement in claude-helper.ts +- [x] T010 [US1] Add error handling (timeout, validation, budget exceeded) in claude-helper.ts + +**Checkpoint**: Foundation ready - Claude Helper can make typed API calls to Claude CLI + +--- + +## Phase 2: Core Services (State Machine + Batch Detection) + +**Purpose**: Implement orchestration logic independent of UI + +### Batch Parser + +- [x] T011 [P] [US1] Create batch-parser.ts in packages/dashboard/src/lib/services/batch-parser.ts +- [x] T012 [US1] Implement parseBatchesFromTasksMd() to detect ## sections +- [x] T013 [US1] Implement fallback to fixed-size batches when no sections +- [x] T014 [US1] Return BatchPlan with task IDs, section names, counts + +### Orchestration Service + +- [x] T015 [US1] Create orchestration-service.ts in packages/dashboard/src/lib/services/orchestration-service.ts +- [x] T016 [US1] Implement state machine transitions (design→analyze→implement→verify→merge) +- [x] T017 [US1] Implement dual confirmation pattern (state + process completion) +- [x] T018 [US1] Implement state persistence to {project}/.specflow/workflows/orchestration-{id}.json +- [x] T019 [US1] Implement decision logging with timestamps +- [x] T020 [US1] Integrate with specflow status --json for state checking + +### Auto-Healing Service + +- [x] T021 [US3] Create auto-healing-service.ts in packages/dashboard/src/lib/services/auto-healing-service.ts +- [x] T022 [US3] Implement captureFailureContext() to gather error details, stderr, failed tasks + +**Checkpoint**: Core services can orchestrate batches and handle failures + +--- + +## Phase 3: API Routes + +**Purpose**: Expose orchestration functionality via REST API + +### Start Orchestration + +- [x] T023 [US1] Create POST /api/workflow/orchestrate route in packages/dashboard/src/app/api/workflow/orchestrate/route.ts +- [x] T024 [US1] Validate project exists and no existing orchestration +- [x] T025 [US1] Parse batch plan and create orchestration record +- [x] T026 [US1] Start first step via orchestration service + +### Status and List + +- [x] T027 [P] [US4] Create GET /api/workflow/orchestrate/status route in packages/dashboard/src/app/api/workflow/orchestrate/status/route.ts +- [x] T028 [P] [US4] Create GET /api/workflow/orchestrate/list route in packages/dashboard/src/app/api/workflow/orchestrate/list/route.ts + +### Control Routes + +- [x] T029 [P] [US5] Create POST /api/workflow/orchestrate/cancel route in packages/dashboard/src/app/api/workflow/orchestrate/cancel/route.ts +- [x] T030 [P] [US6] Create POST /api/workflow/orchestrate/resume route in packages/dashboard/src/app/api/workflow/orchestrate/resume/route.ts +- [x] T031 [US1] Create POST /api/workflow/orchestrate/merge route in packages/dashboard/src/app/api/workflow/orchestrate/merge/route.ts + +### Auto-Healing Integration + +- [x] T032 [US3] Implement buildHealerPrompt() with error context, remaining tasks +- [x] T033 [US3] Implement spawnHealer() via Claude Helper with fork session +- [x] T034 [US3] Handle healer success/failure outcomes and update batch status + +**Checkpoint**: API routes fully functional, can control orchestration via REST + +--- + +## Phase 4: UI Components + +**Purpose**: Build configuration modal and progress display + +### Configuration Modal + +- [x] T035 [US2] Create StartOrchestrationModal component in packages/dashboard/src/components/orchestration/start-orchestration-modal.tsx +- [x] T036 [US2] Create OrchestrationConfigForm with core options in packages/dashboard/src/components/orchestration/orchestration-config-form.tsx +- [x] T037 [US2] Add advanced options section (collapsible) to OrchestrationConfigForm +- [x] T038 [US2] Add budget limits section to OrchestrationConfigForm +- [x] T039 [US2] Display detected batch count in modal header +- [x] T040 [US2] Add validation and Start Orchestration button + +### Progress Components + +- [x] T041 [P] [US4] Create PhaseProgressBar component in packages/dashboard/src/components/orchestration/phase-progress-bar.tsx +- [x] T042 [P] [US4] Create BatchProgress component in packages/dashboard/src/components/orchestration/batch-progress.tsx +- [x] T043 [P] [US4] Create DecisionLogPanel component (collapsible) in packages/dashboard/src/components/orchestration/decision-log-panel.tsx +- [x] T044 [US4] Create OrchestrationProgress parent component in packages/dashboard/src/components/orchestration/orchestration-progress.tsx + +### Control and State Components + +- [x] T045 [P] [US4] Create OrchestrationControls (Pause/Cancel) in packages/dashboard/src/components/orchestration/orchestration-controls.tsx +- [x] T046 [P] [US4] Create MergeReadyPanel in packages/dashboard/src/components/orchestration/merge-ready-panel.tsx +- [x] T047 [P] [US5] Create OrchestrationBadge for project cards in packages/dashboard/src/components/orchestration/orchestration-badge.tsx + +### Orchestration Hook + +- [x] T048 [US4] Create useOrchestration hook in packages/dashboard/src/hooks/use-orchestration.ts +- [x] T049 [US4] Implement polling for orchestration status in useOrchestration + +**Checkpoint**: All UI components built and styled per ui-design.md + +--- + +## Phase 5: Integration + +**Purpose**: Wire everything together in the dashboard + +### Project Detail Integration + +- [x] T050 [US5] Add CompletePhaseButton as primary action in project detail workflow area +- [x] T051 [US5] Implement transform from buttons to OrchestrationProgress when active +- [x] T052 [US5] Wire StartOrchestrationModal open from CompletePhaseButton click + +### Project Card Integration + +- [x] T053 [US5] Add "Complete Phase" as first highlighted item in project card actions menu +- [x] T054 [US5] Reorganize "Run Workflow" as secondary flyout with Orchestrate, Merge, Review, Memory +- [x] T055 [US5] Add OrchestrationBadge to project cards when orchestration active + +### Reconciliation + +- [x] T056 [US6] Add orchestration detection to reconciliation on dashboard startup +- [x] T057 [US6] Implement resume or mark-as-failed logic for in-progress orchestrations + +**Checkpoint**: Full integration complete, end-to-end flow works + +--- + +## Phase 6: Polish & Testing + +**Purpose**: Quality improvements and test coverage + +- [x] T058 [P] Create claude-helper.test.ts with mocked Claude CLI in packages/dashboard/__tests__/orchestration/ +- [x] T059 [P] Create orchestration-service.test.ts with state machine transitions in packages/dashboard/__tests__/orchestration/ +- [x] T060 [P] Create batch-parser.test.ts with various tasks.md formats in packages/dashboard/__tests__/orchestration/ +- [x] T061 Verify USER GATE checklist items from spec.md verification gate + +**Checkpoint**: All tests passing, ready for USER GATE verification + +--- + +## Dependencies & Execution Order + +### Phase Dependencies + +- **Phase 1 (Foundation)**: No dependencies - schemas and Claude Helper first +- **Phase 2 (Core Services)**: Depends on Phase 1 (uses schemas, Claude Helper) +- **Phase 3 (API Routes)**: Depends on Phase 2 (uses orchestration service) +- **Phase 4 (UI Components)**: Depends on Phase 1 (uses schemas); can parallel with Phase 3 +- **Phase 5 (Integration)**: Depends on Phase 3 + Phase 4 +- **Phase 6 (Polish)**: Depends on all above + +### Within Each Phase + +- Tasks marked [P] can run in parallel +- Otherwise, execute in listed order + +### Recommended Execution + +1. T001-T005 (schemas) in parallel +2. T006-T010 (Claude Helper) sequentially +3. T011-T014 (batch parser) → T015-T020 (orchestration service) → T021-T022 (auto-healing) +4. T023-T034 (API routes) sequentially +5. T035-T049 (UI) - modal first (T035-T040), then progress (T041-T049) +6. T050-T057 (integration) sequentially +7. T058-T061 (polish) in parallel + +--- + +## Notes + +- [P] tasks = different files, no dependencies +- All paths relative to repository root +- Commit after each logical group of tasks +- Test each phase before moving to next +- Run `specflow mark T###` to mark tasks complete diff --git a/.specify/archive/1055-smart-batching-orchestration/ui-design.md b/.specify/archive/1055-smart-batching-orchestration/ui-design.md new file mode 100644 index 0000000..95151b0 --- /dev/null +++ b/.specify/archive/1055-smart-batching-orchestration/ui-design.md @@ -0,0 +1,318 @@ +# UI/UX Design: Smart Batching & Orchestration + +**Phase**: 1055 +**Created**: 2026-01-21 +**Status**: Final + +--- + +## Current State (Before) + +### Project Detail Workflow Actions + +Currently, the project detail page has a workflow actions area with several buttons: + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Workflow Actions │ +├─────────────────────────────────────────────────────────────┤ +│ │ +│ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ +│ │Orchestrate│ │ Merge │ │ Review │ │ Memory │ │ +│ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │ +│ │ +└─────────────────────────────────────────────────────────────┘ +``` + +All buttons are equally styled, no clear primary action. Users must know which skill to run. + +### Project Card Actions Menu + +``` +┌─────────────────────────┐ +│ ▷ Start Workflow → │──┬─ Design +├─────────────────────────┤ ├─ Analyze +│ 🔧 Maintenance │ ├─ Implement +│ Status │ ├─ Orchestrate +│ Validate │ ├─ Verify +└─────────────────────────┘ └─ Merge +``` + +"Start Workflow" shows all skills equally, requiring user to know which to run. + +--- + +## Proposed Design (After) + +### Project Detail Workflow Actions + +``` +┌─────────────────────────────────────────────────────────────┐ +│ ◈ Complete Phase → │ +│ Automatically execute all steps to complete phase │ +└─────────────────────────────────────────────────────────────┘ + + ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ + │Orchestrate│ │ Merge │ │ Review │ │ Memory │ + └──────────┘ └──────────┘ └──────────┘ └──────────┘ +``` + +**"Complete Phase"** is the primary action: +- Larger, more prominent than secondary buttons +- Gradient or accent color background (purple/blue) +- Icon: stacked layers (◈) suggesting multiple phases +- Subtitle explaining what it does +- Arrow (→) indicating it opens modal + +Secondary buttons remain for manual skill execution. + +### Configuration Modal + +``` +┌──────────────────────────────────────────────────────────────────┐ +│ Complete Phase [×] │ +├──────────────────────────────────────────────────────────────────┤ +│ │ +│ Phase 1055: Smart Batching & Orchestration │ +│ Detected 4 batches from tasks.md │ +│ │ +│ ────────────────────────────────────────────────────────────── │ +│ │ +│ CORE OPTIONS │ +│ │ +│ [○] Auto-merge on completion │ +│ Automatically run /flow.merge after verify succeeds │ +│ │ +│ [○] Skip design │ +│ Skip /flow.design if specs already exist │ +│ │ +│ [○] Skip analyze │ +│ Skip /flow.analyze step │ +│ │ +│ Additional context: │ +│ ┌──────────────────────────────────────────────────────────┐ │ +│ │ │ │ +│ │ (optional text injected into all skill prompts) │ │ +│ │ │ │ +│ └──────────────────────────────────────────────────────────┘ │ +│ │ +│ ────────────────────────────────────────────────────────────── │ +│ │ +│ ▶ ADVANCED OPTIONS │ +│ │ +│ ────────────────────────────────────────────────────────────── │ +│ │ +│ [ Start Orchestration ] │ +│ │ +└──────────────────────────────────────────────────────────────────┘ +``` + +**Advanced Options (collapsed by default):** + +``` +│ ▼ ADVANCED OPTIONS │ +│ │ +│ [●] Auto-heal enabled │ +│ Attempt automatic recovery on batch failure │ +│ │ +│ Max heal attempts: [ 1 ▼] │ +│ Retry limit per batch (prevents infinite loops) │ +│ │ +│ Batch size fallback: [ 15 ▼] │ +│ Task count per batch if no ## sections found │ +│ │ +│ [○] Pause between batches │ +│ Require user confirmation between implement batches │ +│ │ +│ ────────────────────────────────────────────────────────────── │ +│ │ +│ BUDGET LIMITS │ +│ │ +│ Max per batch: $[ 5.00 ] │ +│ Max total: $[ 50.00 ] │ +│ Healing budget: $[ 2.00 ] │ +│ Decision budget: $[ 0.50 ] │ +``` + +### Progress Display (During Orchestration) + +When orchestration is active, workflow actions area transforms: + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ Orchestration Progress │ +├─────────────────────────────────────────────────────────────────┤ +│ │ +│ Design ──●── Analyze ──●── Implement ──○── Verify ──○── Merge │ +│ ▲ current │ +│ │ +│ ────────────────────────────────────────────────────────────── │ +│ │ +│ Implementing batch 2 of 4: Core Components │ +│ │ +│ ████████████░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 12/35 tasks (34%) │ +│ │ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ ▼ Decision Log │ │ +│ │ 10:30:15 Checked status: hasSpec=true, tasks=12/35 │ │ +│ │ 10:30:12 Starting batch 2: Core Components (T008-T015)│ │ +│ │ 10:26:43 Batch 1 completed in 4m 32s │ │ +│ └─────────────────────────────────────────────────────────┘ │ +│ │ +│ Time elapsed: 8m 15s │ +│ Estimated remaining: ~12m │ +│ │ +│ ────────────────────────────────────────────────────────────── │ +│ │ +│ [ Pause ] [ Cancel ] │ +│ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +**Status Variations:** + +Healing status: +``` +│ 🔧 Auto-healing batch 2... │ +│ │ +│ Fixing: File not found error in T009 │ +│ Heal attempt: 1 of 1 │ +``` + +Waiting for input: +``` +│ ❓ Waiting for input │ +│ │ +│ Claude has questions that need your response. │ +│ [ Answer Questions ] │ +``` + +Merge ready (paused): +``` +│ ⏹️ Merge Ready │ +│ │ +│ All tasks complete. Phase verified and ready to merge. │ +│ │ +│ [ Run Merge ] [ View Diff ] │ +``` + +### Project Card Actions Menu + +``` +┌─────────────────────────────┐ +│ ◈ Complete Phase → │ ← PRIMARY (highlighted, gradient bg) +├─────────────────────────────┤ +│ ▷ Run Workflow → │──┬─ Orchestrate +├─────────────────────────────┤ ├─ Merge +│ 🔧 Maintenance │ ├─ Review +│ Status │ └─ Memory +│ Validate │ +├─────────────────────────────┤ +│ ⚙ Advanced │ +│ Sync State │ +└─────────────────────────────┘ +``` + +"Complete Phase" is first and highlighted. "Run Workflow" contains direct skill access as secondary option. + +### Status Badges on Project Cards + +``` +┌────────────────────────────────────────┐ +│ My Project ◈ ● ● │ ← ◈ = orchestration, ● = workflow +├────────────────────────────────────────┤ +│ Phase: 1055 - Smart Batching │ +│ │ +│ Completing phase (batch 2/4) [▓▓░░] │ ← Orchestration-specific badge +│ │ +└────────────────────────────────────────┘ +``` + +Orchestration badge shows: +- "Completing phase (batch N/M)" during implement +- "Phase: Waiting for merge" when paused +- Different color than regular workflow badges + +--- + +## Rationale + +- **Why primary "Complete Phase" button?** The northstar goal is autonomous phase completion. Users should immediately see the main action that achieves this. Secondary buttons remain for power users who need direct skill access. + +- **Why configuration modal?** Upfront configuration enables truly autonomous execution. Users set preferences once and don't need to intervene during the run. This builds trust and control. + +- **Why collapsed advanced options?** Most users won't need to change defaults. Keeping advanced options hidden reduces cognitive load while making them accessible when needed. + +- **Why progress replaces buttons?** During active orchestration, the primary actions are Pause/Cancel, not starting new workflows. Replacing buttons with progress provides clear visual state. + +- **User flow:** + 1. Click "Complete Phase" + 2. Review detected batches and configure options + 3. Click "Start Orchestration" + 4. Watch progress (optional - can walk away) + 5. Return when notified of completion or questions + 6. Click "Run Merge" if auto-merge disabled + +- **Accessibility considerations:** + - All toggles have descriptive labels + - Progress bar has text percentage for screen readers + - Status changes announced to screen readers + - Keyboard navigation for modal and all controls + +--- + +## Component Inventory + +| Component | Type | Purpose | Notes | +|-----------|------|---------|-------| +| CompletePhaseButton | Button | Primary action to start orchestration | Prominent styling, icon | +| StartOrchestrationModal | Modal | Configuration before starting | Contains options sections | +| OrchestrationConfigForm | Form | Core + Advanced options | Toggles, inputs, textarea | +| BudgetLimitsSection | Form section | Cost caps configuration | Currency inputs | +| OrchestrationProgress | Panel | Shows current orchestration state | Replaces action buttons | +| PhaseProgressBar | Progress | Visual step indicator | Design→Analyze→Implement→Verify→Merge | +| BatchProgress | Progress | Current batch progress | Section name, task counts, bar | +| DecisionLogPanel | Collapsible | Shows state machine decisions | Timestamps, messages | +| OrchestrationControls | Button group | Pause/Cancel during run | Context-aware visibility | +| MergeReadyPanel | Panel | Shown when paused at merge | Run Merge, View Diff buttons | +| OrchestrationBadge | Badge | Project card status | Different from workflow badge | +| ProjectCardMenu | Menu | Updated action menu | Complete Phase first | + +--- + +## Interactions + +| Action | Trigger | Result | +|--------|---------|--------| +| Open config modal | Click "Complete Phase" | Modal opens with detected batches | +| Toggle option | Click toggle | Value updates, no API call yet | +| Start orchestration | Click "Start Orchestration" in modal | Modal closes, progress shows, API called | +| Expand advanced | Click "Advanced Options" header | Section expands with animation | +| Cancel orchestration | Click "Cancel" | Confirmation dialog, then cancels | +| Pause orchestration | Click "Pause" | Pauses after current batch completes | +| Resume orchestration | Click "Resume" (on paused) | Continues from next batch | +| Run merge | Click "Run Merge" (merge ready) | Starts /flow.merge | +| View decision log | Click log header | Expands/collapses log panel | +| Open from card | Click "Complete Phase" in card menu | Same modal as project detail | +| Answer questions | Click "Answer Questions" | Opens question drawer | + +--- + +## Design Constraints + +- Must use existing shadcn/ui components (Button, Dialog, Toggle, Input, Progress) +- Must follow existing dark mode theming +- Must not break existing secondary workflow buttons +- Progress polling at 3s interval (no SSE) +- Must handle long orchestrations (hours) gracefully +- Must survive dashboard hot reload + +--- + +## Open Questions + +All questions resolved in phase file: +- [x] Button hierarchy decided: Complete Phase primary, others secondary +- [x] Modal structure decided: Core + Advanced (collapsed) +- [x] Progress location decided: Replaces action buttons +- [x] Badge design decided: Different color than workflow badges diff --git a/.specify/history/HISTORY.md b/.specify/history/HISTORY.md index 6eda19c..fdfa96c 100644 --- a/.specify/history/HISTORY.md +++ b/.specify/history/HISTORY.md @@ -4,6 +4,835 @@ --- +## 1055 - Smart Batching & Orchestration + +**Completed**: 2026-01-22 + +> **Architecture Context**: See [PDR: Workflow Dashboard Orchestration](../../memory/pdrs/workflow-dashboard-orchestration.md) for holistic architecture, design decisions, and how this phase fits into the larger vision. + +### 1055 - Smart Batching & Orchestration + +**Goal**: Autonomous workflow execution with smart batching, configurable behavior, and auto-healing. + +**Context**: Large task lists (50+) exceed context windows. This phase adds intelligent batching using existing tasks.md sections, a state machine for orchestration, user configuration modal, and auto-healing when batches fail. + +**Key Principles:** +- **Programmatic batching** - No UI for selecting individual tasks, automatic batch detection +- **Configurable autonomy** - User sets preferences before starting, then minimal interaction +- **Auto-healing** - Spawn fixer Claude on failure, configurable retry before stopping +- **Clear flow** - design → analyze → implement → verify → (pause for merge OR auto-merge) + +--- + +**Scope:** + +### 0. Orchestration Configuration Modal + +When user clicks "Start Orchestrate", display a configuration modal before execution begins. + +**Purpose**: Collect user preferences once upfront to enable truly autonomous execution. + +#### Core Options (always visible) + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| Auto-merge on completion | toggle | off | Automatically run /flow.merge after verify succeeds | +| Additional context | textarea | empty | Free-form text injected into all skill prompts | +| Skip design | toggle | off | Skip /flow.design if specs already exist | +| Skip analyze | toggle | off | Skip /flow.analyze step | + +#### Advanced Options (collapsed section) + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| Auto-heal enabled | toggle | on | Attempt automatic recovery on batch failure | +| Max heal attempts | number | 1 | Retry limit per batch (prevents infinite loops) | +| Batch size fallback | number | 15 | Task count per batch if no `##` sections found | +| Pause between batches | toggle | off | Require user confirmation between implement batches | + +#### Future Considerations (not in scope for this phase) +- Branch strategy selection (create new, use current, auto-name) +- Test/dry-run mode +- Notification level customization +- Time-based constraints (stop after N hours) + +**Modal UI Notes:** +- "Start Orchestration" button at bottom +- Show detected batch count before starting: "Detected 4 batches from tasks.md" +- Warning if no sections found: "No sections detected, will use 15-task batches" +- Pre-flight check: Show current phase status (hasSpecs, taskCount, etc.) + +--- + +### 1. Programmatic Batch Detection + +Parse existing task sections from tasks.md: +- Use markdown headers (`## Section Name`) as batch boundaries +- Each `##` section becomes one batch +- Fall back to fixed-size batches (~15 tasks) if no sections +- Respect task dependencies within sections + +Example tasks.md structure recognized: +```markdown +## Progress Dashboard +Total: 0/25 | Blocked: 0 + +## Setup +- [ ] T001 Create project structure +- [ ] T002 Configure build system + +## Core Components +- [ ] T003 Implement base service +- [ ] T004 Add API routes + +## Integration +- [ ] T005 Wire up endpoints +``` + +### 2. Dashboard Orchestration State Machine + +**Corrected Flow**: design → analyze → implement → verify → merge + +``` +[Start with Config] + │ + ▼ +┌──────────────────┐ +│ Check Status │◄─────────────────────────────────────┐ +│ specflow status │ │ +└────────┬─────────┘ │ + │ │ + ▼ │ + ┌─────────────┐ ┌───────────────────┐ │ + │Need Design? │─Yes─►│ /flow.design │──────────────┤ + │(skip if set)│ └───────────────────┘ │ + └──────┬──────┘ │ + │No │ + ▼ │ + ┌─────────────┐ ┌───────────────────┐ │ + │Need Analyze?│─Yes─►│ /flow.analyze │──────────────┤ + │(skip if set)│ └───────────────────┘ │ + └──────┬──────┘ │ + │No │ + ▼ │ + ┌─────────────┐ ┌───────────────────┐ │ + │Tasks Left? │─Yes─►│ /flow.implement │──┬───────────┤ + └──────┬──────┘ │ (batch N of M) │ │ │ + │No └─────────┬─────────┘ │ │ + │ │ │ │ + │ ┌──────▼──────┐ │ │ + │ │Batch Failed?│─No──┘ │ + │ └──────┬──────┘ │ + │ │Yes │ + │ ┌──────▼──────┐ │ + │ │Auto-Heal? │─No─►[Stop+Notify] + │ └──────┬──────┘ │ + │ │Yes │ + │ ┌──────▼──────┐ │ + │ │Spawn Healer │─────────────────┘ + │ └─────────────┘ + ▼ + ┌─────────────┐ ┌───────────────────┐ + │Need Verify? │─Yes─►│ /flow.verify │──────────────┘ + └──────┬──────┘ └───────────────────┘ + │No + ▼ + ┌─────────────┐ ┌───────────────────┐ + │Auto-merge? │─Yes─►│ /flow.merge │──►[Complete] + └──────┬──────┘ └───────────────────┘ + │No + ▼ + ┌─────────────┐ + │Pause: Merge │ ← User must manually trigger merge + │Ready │ + └─────────────┘ +``` + +**State Machine Logic:** + +- Between each step: `specflow status --json` to determine next action +- Configuration stored in orchestration execution record +- State persisted in `{project}/.specflow/workflows/orchestration-{id}.json` + +**Transition Rules:** + +| Condition | Action | +|-----------|--------| +| `hasSpec: false` AND `!config.skipDesign` | Run /flow.design | +| Post-design AND `!config.skipAnalyze` | Run /flow.analyze | +| `tasksComplete < tasksTotal` | Run /flow.implement (next incomplete batch) | +| `tasksComplete == tasksTotal` | Run /flow.verify | +| Verify complete AND `config.autoMerge` | Run /flow.merge | +| Verify complete AND `!config.autoMerge` | Pause, notify user "Ready to merge" | + +**Fallback Behavior:** +- If state unclear after 3 status checks → spawn Claude to analyze and decide +- Log decision rationale for debugging + +**Critical: Decision Timing** + +The state machine must wait for BOTH conditions before making decisions: + +1. **Orchestration state update** - `step.current` changes (e.g., implement → verify) +2. **Process completion** - Workflow execution status is terminal (completed/failed) + +Why: The skill may update orchestration state BEFORE it finishes all cleanup work. Making decisions based only on state changes can cause race conditions. + +**Decision Algorithm:** +``` +On state change detected: + 1. Check workflow execution status + 2. If status == 'running' or 'waiting_for_input': + → Wait, don't make decision yet + 3. If status == 'completed' or 'failed': + → Read final orchestration state + → Parse tasks.md for completion status + → Make state machine decision + 4. Poll every 3s until process exits +``` + +**Data Sources for Decisions:** + +| Source | What It Tells Us | How to Check | +|--------|-----------------|--------------| +| Orchestration state | Current step, status | `specflow status --json` | +| Workflow execution | Process status, exit code | `/api/workflow/status` | +| Session JSONL | Detailed execution log | Parse `~/.claude/projects/{hash}/{session}.jsonl` | +| tasks.md | Task completion status | `specflow status --json` (includes progress) | + +**Completion Detection (implements Q1: A+C):** +- **Primary**: Check `step.current == "verify"` in orchestration state (set by implement skill on completion) +- **Secondary**: Parse tasks.md to verify all batch tasks are marked complete +- **Fallback**: If process exited but state unclear, spawn Claude to assess + +### 3. Sequential Batch Execution + +**Mechanism**: Use existing context injection (no skill modifications needed). + +The workflow service already supports appending user context to skill prompts. For batched implement: + +```typescript +// Orchestrator builds skill input with batch context +const skillInput = `/flow.implement Execute only the "${batch.section}" section (${batch.taskIds.join(', ')}). Do NOT work on tasks from other sections.`; + +// Plus additional user context from config +if (config.additionalContext) { + skillInput += `\n\n${config.additionalContext}`; +} +``` + +This becomes the "# User Context" section in the final prompt: + +```markdown +# Skill Instructions +[/flow.implement content] + +# User Context +Execute only the "Core Components" section (T008, T009, T010, T011). +Do NOT work on tasks from other sections. + +Focus on performance, avoid N+1 queries. [← from config.additionalContext] +``` + +**Execution Flow:** + +1. Parse tasks.md to identify batches (sections with incomplete tasks) +2. For each batch: + - Build skill input with batch constraint + - Call workflow service `start()` with skill input + - Wait for completion (dual confirmation: state + process) + - Verify batch tasks are complete in tasks.md + - If incomplete + failure detected → trigger auto-heal +3. After all batches: proceed to verify step + +**Tracking per batch:** +- Batch index (1 of N) +- Section name +- Task IDs in batch +- Started at +- Completed at +- Status (pending, running, completed, failed, healed) +- Tasks completed count (pre/post) + +### 4. Auto-Healing on Failure + +When a batch fails: + +1. **Capture error details**: + - stderr output + - Session transcript (last N messages) + - Tasks attempted vs completed + - Specific error messages + +2. **Spawn healer Claude**: + ``` + The following implement batch failed: + - Batch: "## Core Components" + - Error: [error details] + - Tasks attempted: T005-T012 + - Tasks completed: T005-T008 + - Tasks failed: T009 (file not found) + + Analyze the failure and fix the issue, then continue + with remaining tasks in this batch. + ``` + +3. **Healer outcome**: + - If healer succeeds → mark batch complete, continue to next batch + - If healer fails → stop execution, notify user with full context + - Only one heal attempt per batch (prevent infinite loops) + +### 5. Orchestration Progress Display + +UI components showing current orchestration state: + +**Phase Progress Bar:** +``` +Design ──●── Analyze ──●── Implement ──○── Verify ──○── Merge + ▲ current +``` + +**Batch Progress (during implement):** +- "Implementing batch 2 of 4: Core Components" +- "Tasks: 12/35 complete" +- Visual progress bar within current batch + +**Status Indicators:** +- 🔄 Running - Active execution +- ⏸️ Paused - Waiting between batches (if configured) +- 🔧 Healing - Auto-heal in progress +- ❓ Waiting - Needs user input (question) +- ✅ Phase complete - Ready for next phase +- ⏹️ Merge ready - Paused waiting for merge approval + +**Timing Information:** +- Time elapsed for current phase/batch +- Estimated remaining (based on batch completion rate) + +**Orchestration Log Panel:** +- Collapsible log showing state machine decisions +- "Checked status: hasSpec=true, tasksComplete=12/35" +- "Starting batch 2: Core Components (T008-T015)" +- "Batch 1 completed in 4m 32s" + +--- + +### 6. Additional Context Injection + +The "Additional context" from the configuration modal gets injected into skill prompts: + +``` +[Standard skill prompt for /flow.implement] + +--- +ADDITIONAL CONTEXT FROM USER: +{config.additionalContext} +--- + +[Rest of prompt] +``` + +**Use Cases:** +- "Focus on performance, avoid N+1 queries" +- "Use the existing AuthService for all auth operations" +- "The API should follow REST conventions strictly" +- "Skip writing tests for now, I'll add them later" + +--- + +**Deliverables:** + +| Deliverable | Location | Description | +|-------------|----------|-------------| +| **Claude Helper Utility** | `claude-helper.ts` | Core utility for decisions + continuation | +| Configuration Modal | `StartOrchestrationModal.tsx` | Pre-flight config UI | +| Orchestration Config Schema | `packages/shared/src/schemas/` | Zod schema for config | +| Batch Parser | `orchestration-service.ts` | Extract batches (or use Claude Helper) | +| State Machine | `orchestration-state-machine.ts` | Decision logic, uses Claude Helper for fallback | +| Auto-Healing Service | `auto-healing-service.ts` | Uses Claude Helper for healing | +| Progress Component | `OrchestrationProgress.tsx` | Phase/batch/task progress UI | +| Orchestration API | `POST /api/workflow/orchestrate` | Start orchestration with config | +| Orchestration Status API | `GET /api/workflow/orchestrate/status` | Get orchestration-specific status | +| Tests | `__tests__/orchestration/` | State machine, Claude Helper mocks, healing | + +**Dependencies:** +- Phase 1054 complete (project details redesign) +- Uses existing: workflow-service.ts, tasks.ts parser, process management + +**Verification Gate: USER** +- [ ] Project detail: "Complete Phase" button is prominent, styled differently +- [ ] Project detail: Secondary buttons (Orchestrate, Merge, Review, Memory) still work +- [ ] Project card: "Complete Phase" is first menu item (highlighted) +- [ ] Project card: "Run Workflow" flyout contains Orchestrate, Merge, Review, Memory +- [ ] Configuration modal appears when clicking "Complete Phase" (both locations) +- [ ] Modal shows detected batch count and current phase status +- [ ] Start orchestration, see batches auto-detected from tasks.md sections +- [ ] State machine transitions: design → analyze → implement → verify +- [ ] Batches execute sequentially without user input +- [ ] Skip options work (skipDesign, skipAnalyze) +- [ ] Introduce a failure, see auto-heal attempt (uses Claude Helper) +- [ ] If heal succeeds, execution continues +- [ ] Progress UI replaces action buttons during orchestration +- [ ] Auto-merge works when enabled +- [ ] Pauses at merge-ready when auto-merge disabled +- [ ] Additional context appears in Claude's output +- [ ] Budget limits respected (orchestration stops if exceeded) +- [ ] Decision log shows Claude Helper calls and reasoning + +**Estimated Complexity**: High + +--- + +### 7. Orchestration State Structure + +**File location**: `{project}/.specflow/workflows/orchestration-{id}.json` + +Separate from individual workflow executions - this tracks the overall orchestration. + +```typescript +interface OrchestrationExecution { + id: string; // UUID + projectId: string; // Registry key + status: 'running' | 'paused' | 'waiting_merge' | 'completed' | 'failed' | 'cancelled'; + + // User configuration (from modal) + config: { + autoMerge: boolean; + additionalContext: string; + skipDesign: boolean; + skipAnalyze: boolean; + autoHealEnabled: boolean; + maxHealAttempts: number; + batchSizeFallback: number; + pauseBetweenBatches: boolean; + }; + + // Current position in flow + currentPhase: 'design' | 'analyze' | 'implement' | 'verify' | 'merge' | 'complete'; + + // Batch tracking (during implement phase) + batches: { + total: number; + current: number; // 0-indexed + items: Array<{ + index: number; + section: string; + taskIds: string[]; + status: 'pending' | 'running' | 'completed' | 'failed' | 'healed'; + startedAt?: string; + completedAt?: string; + healAttempts: number; + workflowExecutionId?: string; // Link to workflow execution for this batch + }>; + }; + + // Linked workflow executions + executions: { + design?: string; // Workflow execution IDs + analyze?: string; + implement: string[]; // One per batch + verify?: string; + merge?: string; + healers: string[]; // Auto-heal execution IDs + }; + + // Timing + startedAt: string; + updatedAt: string; + completedAt?: string; + + // Decision log for debugging + decisionLog: Array<{ + timestamp: string; + decision: string; + reason: string; + data?: unknown; + }>; +} +``` + +--- + +### 8. UI Integration Points + +**Workflow Actions Layout:** + +``` +┌─────────────────────────────────────────────────────────┐ +│ ◈ Complete Phase → │ ← PRIMARY (highlighted) +│ Automatically execute all steps to complete phase │ +└─────────────────────────────────────────────────────────┘ + + ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ + │Orchestrate│ │ Merge │ │ Review │ │ Memory │ ← SECONDARY (existing) + └──────────┘ └──────────┘ └──────────┘ └──────────┘ +``` + +**Button Hierarchy:** + +| Button | Action | Description | +|--------|--------|-------------| +| **Complete Phase** | Opens config modal → smart orchestration | NEW - autonomous batching, auto-healing | +| Orchestrate | Runs `/flow.orchestrate` directly | Existing skill (for manual control/testing) | +| Merge | Runs `/flow.merge` directly | Existing skill | +| Review | Runs `/flow.review` directly | Existing skill | +| Memory | Runs `/flow.memory` directly | Existing skill | + +**"Complete Phase" Button Styling:** +- Larger, more prominent than secondary buttons +- Gradient or accent color background (purple/blue as in mockup) +- Icon: stacked layers (◈) suggesting multiple phases +- Subtitle: "Automatically execute all steps to complete phase" +- Arrow indicator (→) suggesting it opens modal + +**Secondary Buttons Styling:** +- Uniform size, row layout +- Subtle background, icon + label +- Direct action (no modal, just skill picker confirmation) + +**Project Card Actions Menu:** + +``` +┌─────────────────────────────┐ +│ ◈ Complete Phase → │ ← PRIMARY (highlighted, opens modal) +├─────────────────────────────┤ +│ ▷ Run Workflow → │──┬─ Orchestrate +├─────────────────────────────┤ ├─ Merge +│ 🔧 Maintenance │ ├─ Review +│ Status │ └─ Memory +│ Validate │ +├─────────────────────────────┤ +│ ⚙ Advanced │ +│ Sync State │ +└─────────────────────────────┘ +``` + +**Menu Changes:** +- "Start Workflow" renamed to "Run Workflow" (secondary action) +- "Complete Phase" added as first item (primary, highlighted) +- "Run Workflow" flyout contains: Orchestrate, Merge, Review, Memory +- Removes individual workflow steps (Design, Analyze, etc.) from flyout - those are now part of "Complete Phase" + +**Entry Points for Complete Phase:** + +| Location | Trigger | Notes | +|----------|---------|-------| +| Project detail | Click "Complete Phase" button | Primary entry | +| Project card | Actions menu → "Complete Phase" | Opens same config modal | +| Command palette | Cmd+K → "Complete Phase for [project]" | Keyboard users | + +**Progress Display Location**: +- When "Complete Phase" is active, the entire workflow actions area transforms: + - Hide the action buttons + - Show orchestration progress (Section 5) + - Show "Cancel" and "Pause" controls +- When complete/cancelled, buttons reappear + +**Status in Project List**: +- Card shows orchestration status badge when active +- "Completing phase (batch 2/4)" or "Phase: Waiting for merge" +- Different badge color than regular workflow runs + +**Coexistence with Existing Workflows:** +- "Complete Phase" is the new smart orchestration (this phase) +- Secondary buttons remain for manual skill execution +- Allows testing new orchestration while keeping manual fallback +- Eventually, secondary buttons could be collapsed/hidden once orchestration is stable + +--- + +### 9. API Design + +**New Routes:** + +| Route | Method | Purpose | +|-------|--------|---------| +| `/api/workflow/orchestrate` | POST | Start orchestration with config | +| `/api/workflow/orchestrate/status` | GET | Get orchestration status by ID | +| `/api/workflow/orchestrate/list` | GET | List orchestrations for project | +| `/api/workflow/orchestrate/cancel` | POST | Cancel active orchestration | +| `/api/workflow/orchestrate/resume` | POST | Resume paused orchestration | +| `/api/workflow/orchestrate/merge` | POST | Trigger merge (when paused at merge-ready) | + +**POST /api/workflow/orchestrate Request:** +```typescript +{ + projectId: string; + config: OrchestrationConfig; +} +``` + +**Response:** +```typescript +{ + orchestrationId: string; + status: string; + batches: { total: number; detected: string[] }; // Show user what was detected +} +``` + +--- + +### 10. Claude Helper Utility + +A foundational utility for intelligent decision-making and session continuation. + +**Purpose**: Provide typed, structured interactions with Claude for orchestration decisions, verification, and healing - without hardcoding every edge case. + +#### Dual-Mode Operation + +| Mode | When to Use | Session Behavior | +|------|-------------|------------------| +| **Decision** | Quick questions, verification, batch planning | New session (optionally not persisted) | +| **Continuation** | Healing, resuming after questions | Resume existing session | + +#### TypeScript Interface + +```typescript +interface ClaudeHelperOptions { + // Session handling (one of these patterns) + sessionId?: string; // Resume existing session + forkSession?: boolean; // Branch session (don't pollute original) + noSessionPersistence?: boolean; // Don't save session (quick decisions) + + // Core (required) + message: string; // What to send to Claude + schema: z.ZodSchema; // Expected response structure (Zod) + projectPath: string; // Working directory for Claude + + // Model selection + model?: 'sonnet' | 'haiku' | 'opus'; // Default: sonnet + fallbackModel?: 'sonnet' | 'haiku'; // Auto-fallback if primary overloaded + + // Tool control + tools?: string[]; // Restrict to specific tools only + disallowedTools?: string[]; // Block specific tools (default: ['AskUserQuestion']) + + // Guardrails + maxTurns?: number; // Limit agentic turns (default: 10) + maxBudgetUsd?: number; // Cost cap for this call + timeout?: number; // Process timeout in ms (default: 120000) + + // Prompt customization + appendSystemPrompt?: string; // Add to default system prompt +} + +interface ClaudeHelperResult { + result: T; // Parsed, validated response + sessionId: string; // For potential follow-up + cost: number; // USD spent + turns: number; // Agentic turns used + duration: number; // Time in ms +} + +async function claudeHelper( + options: ClaudeHelperOptions +): Promise>; +``` + +#### CLI Flag Mapping + +| Option | CLI Flag | Notes | +|--------|----------|-------| +| `sessionId` | `--resume {id}` | Resume existing session | +| `forkSession` | `--fork-session` | Branch without polluting original | +| `noSessionPersistence` | `--no-session-persistence` | Don't save to disk | +| `schema` | `--json-schema "{...}"` | Zod schema converted to JSON Schema | +| `model` | `--model sonnet` | Model alias | +| `fallbackModel` | `--fallback-model sonnet` | Auto-fallback | +| `tools` | `--tools "Read,Grep,Glob"` | Restrict available tools | +| `disallowedTools` | `--disallowedTools "AskUserQuestion"` | Block tools | +| `maxTurns` | `--max-turns 10` | Limit iterations | +| `maxBudgetUsd` | `--max-budget-usd 2.00` | Cost cap | +| `appendSystemPrompt` | `--append-system-prompt "..."` | Add context | + +Always includes: `-p --output-format json --dangerously-skip-permissions` + +#### Use Case Examples + +**1. Quick Decision (stateless)** +```typescript +const NextStepSchema = z.object({ + action: z.enum(['run_design', 'run_analyze', 'run_implement', 'run_verify', 'wait', 'stop']), + reason: z.string(), + context: z.record(z.unknown()).optional(), +}); + +const { result } = await claudeHelper({ + message: `Given this orchestration state, what should happen next? + State: ${JSON.stringify(state)}`, + schema: NextStepSchema, + model: 'haiku', // Fast for simple decisions + noSessionPersistence: true, + maxTurns: 1, + projectPath, +}); +``` + +**2. Smart Batch Detection** +```typescript +const BatchPlanSchema = z.object({ + batches: z.array(z.object({ + name: z.string(), + taskIds: z.array(z.string()), + rationale: z.string(), + estimatedComplexity: z.enum(['low', 'medium', 'high']), + dependencies: z.array(z.string()).optional(), + })), + warnings: z.array(z.string()).optional(), +}); + +const { result } = await claudeHelper({ + message: `Group these tasks into logical implementation batches. + Consider dependencies, logical groupings, and ~10-15 tasks per batch. + + Tasks: + ${tasksContent}`, + schema: BatchPlanSchema, + model: 'sonnet', + tools: ['Read', 'Grep'], // Can read files to understand dependencies + maxTurns: 3, + maxBudgetUsd: 0.50, + projectPath, +}); +``` + +**3. Verification (read-only)** +```typescript +const VerificationSchema = z.object({ + completed: z.boolean(), + tasksVerified: z.array(z.string()), + failures: z.array(z.object({ + taskId: z.string(), + reason: z.string(), + evidence: z.string(), + })).optional(), + confidence: z.enum(['high', 'medium', 'low']), +}); + +const { result } = await claudeHelper({ + message: `Verify that batch "${batch.section}" completed successfully. + Expected tasks: ${batch.taskIds.join(', ')} + + Check: + 1. tasks.md shows these tasks as complete + 2. Referenced files exist and contain expected code + 3. Tests pass (if applicable)`, + schema: VerificationSchema, + model: 'sonnet', + tools: ['Read', 'Grep', 'Glob', 'Bash(npm test:*)', 'Bash(cat:*)'], // Read-only + tests + maxTurns: 5, + maxBudgetUsd: 1.00, + projectPath, +}); +``` + +**4. Healing with Session Fork** +```typescript +const HealingSchema = z.object({ + status: z.enum(['fixed', 'partial', 'failed']), + tasksCompleted: z.array(z.string()), + tasksRemaining: z.array(z.string()), + fixApplied: z.string().optional(), + blockerReason: z.string().optional(), +}); + +const { result } = await claudeHelper({ + sessionId: failedExecution.sessionId, + forkSession: true, // Don't pollute original if this fails too + message: `The batch failed with this error: + ${stderr} + + Fix the issue and complete remaining tasks: ${remainingTasks.join(', ')}`, + schema: HealingSchema, + maxTurns: 15, + maxBudgetUsd: 2.00, + projectPath, +}); +``` + +**5. Healing with Full Continuation** +```typescript +// When we're confident and want to continue the original session +const { result, sessionId } = await claudeHelper({ + sessionId: failedExecution.sessionId, + // No fork - continue the actual session + message: `You encountered an error. Here's stderr: + ${stderr} + + The original session has full context of what you were doing. + Fix the issue and complete the remaining tasks in this batch.`, + schema: HealingSchema, + maxTurns: 20, + maxBudgetUsd: 3.00, + projectPath, +}); +// sessionId is same as input - session continues +``` + +#### Budget Configuration (Modal Additions) + +Add to orchestration config modal (Advanced Options): + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| Max budget per batch | currency | $5.00 | Cost cap per implement batch | +| Max budget total | currency | $50.00 | Total orchestration cost cap | +| Healing budget | currency | $2.00 | Max spend per auto-heal attempt | +| Decision budget | currency | $0.50 | Max spend per decision call | + +#### Implementation Notes + +**File location**: `packages/dashboard/src/lib/services/claude-helper.ts` + +**Error Handling**: +- Schema validation failure → return structured error, don't throw +- Budget exceeded → stop gracefully, return partial result +- Timeout → kill process, return timeout error +- Invalid session ID → fall back to new session with warning + +**Logging**: +- Log all decisions to orchestration `decisionLog` +- Include: prompt summary, model used, cost, result summary + +**Testing**: +- Mock utility for unit tests +- Integration tests with real Claude for critical paths + +--- + +### Design Decisions (Resolved) + +1. **Batch failure detection**: ✅ **Use A + C** + - Parse task completion from tasks.md after each batch (source of truth) + - AND require Claude to output structured completion status (belt-and-suspenders) + - Check orchestration state `step.current` for skill-signaled completion + +2. **Healing prompt scope**: ✅ **Current batch only** + - Healer continues remaining tasks in the current batch + - Once batch complete (or healer fails), proceed normally to next batch + +3. **Cross-batch state**: ✅ **Out of scope** + - If batch 2 breaks batch 1's work, healer tries once, then stops for user + - User can manually fix and resume + +4. **Concurrent orchestrations**: ✅ **No - one per project** + - Single active orchestration per project + - Attempting to start a second shows error: "Orchestration already in progress" + - Can cancel existing to start new + +5. **Resume after dashboard restart**: ✅ **Yes, auto-resume** + - Orchestration state persisted to `{project}/.specflow/workflows/orchestration-{id}.json` + - On startup, reconciler detects in-progress orchestrations + - Resumes from last known state + +6. **Decision timing**: ✅ **Wait for dual confirmation** + - Don't make decisions on state change alone + - Wait for BOTH: state update AND process completion + - Prevents race conditions from state updates mid-execution + +--- + ## 1054 - Project Details Redesign **Completed**: 2026-01-20 diff --git a/.specify/memory/security-checklist.md b/.specify/memory/security-checklist.md new file mode 100644 index 0000000..3fa93d1 --- /dev/null +++ b/.specify/memory/security-checklist.md @@ -0,0 +1,129 @@ +# Security Checklist + +> Security patterns, input validation, and data protection guidelines for SpecFlow projects. + +**Last Updated**: 2026-01-21 +**Constitution Alignment**: Principle V (Helpful Errors), Principle VI (Safe Operations) + +--- + +## Overview + +This checklist defines security standards that `/flow.verify` checks during memory compliance verification (Step 5, Agent 5). All implementations should follow these patterns. + +--- + +## Input Validation + +| Check | Requirement | Example | +|-------|-------------|---------| +| User input boundaries | Validate all user inputs at system boundaries | CLI args, API params, form fields | +| Path traversal | Prevent directory traversal attacks | Reject paths containing `..` | +| Command injection | Sanitize inputs used in shell commands | Quote variables, avoid `eval` | +| Type coercion | Validate types explicitly | Use Zod schemas for validation | + +**Pattern**: +```typescript +// Good: Validate at boundary +const input = z.string().min(1).max(100).parse(userInput); + +// Bad: Trust user input +const query = `SELECT * FROM users WHERE name = '${userInput}'`; +``` + +--- + +## Error Handling + +| Check | Requirement | Example | +|-------|-------------|---------| +| No sensitive data | Error messages must not expose secrets | No API keys, passwords, paths | +| Safe stack traces | Production errors hide implementation details | Generic message + error code | +| Fail secure | On error, default to safe/denied state | Auth failure = access denied | + +**Pattern**: +```typescript +// Good: Generic error with code +throw new SpecflowError('Operation failed', 'E_OPERATION_FAILED'); + +// Bad: Exposes internals +throw new Error(`Database error: ${dbError.message} at ${dbError.stack}`); +``` + +--- + +## Authentication & Authorization + +| Check | Requirement | Example | +|-------|-------------|---------| +| Auth on sensitive ops | Protected operations require authentication | File writes, config changes | +| Principle of least privilege | Request minimum necessary permissions | Read-only when possible | +| Token handling | Never log or expose auth tokens | Mask in debug output | + +--- + +## Data Protection + +| Check | Requirement | Example | +|-------|-------------|---------| +| No secrets in code | Credentials in environment variables | `process.env.API_KEY` | +| No secrets in commits | Use `.gitignore` for sensitive files | `.env`, `credentials.json` | +| Secure storage | Use Keychain/secure storage for credentials | Not localStorage/UserDefaults | +| Encryption at rest | Sensitive data encrypted when stored | Use platform secure storage | + +**Pattern**: +```bash +# Good: Environment variable +API_KEY=$SPECFLOW_API_KEY + +# Bad: Hardcoded secret +API_KEY="sk-1234567890abcdef" +``` + +--- + +## File System Operations + +| Check | Requirement | Example | +|-------|-------------|---------| +| Path validation | Resolve and validate paths before use | `path.resolve()` then check | +| Sandbox enforcement | Operations stay within project directory | Reject absolute paths outside | +| Safe file permissions | Create files with restrictive permissions | 0600 for secrets, 0644 for config | + +**Pattern**: +```typescript +// Good: Validate path is within project +const resolved = path.resolve(projectRoot, userPath); +if (!resolved.startsWith(projectRoot)) { + throw new Error('Path outside project directory'); +} +``` + +--- + +## Verification Commands + +```bash +# Check for hardcoded secrets +grep -r "password\|secret\|api_key\|token" --include="*.ts" src/ + +# Check for unsafe eval usage +grep -r "eval\|Function(" --include="*.ts" src/ + +# Verify .gitignore includes sensitive patterns +cat .gitignore | grep -E "\.env|credentials|secret" +``` + +--- + +## Checklist Items for /flow.verify + +When verifying security compliance, check: + +- [ ] SEC-001: No hardcoded credentials in source code +- [ ] SEC-002: Environment variables used for sensitive config +- [ ] SEC-003: User inputs validated at system boundaries +- [ ] SEC-004: Error messages don't expose sensitive information +- [ ] SEC-005: File operations stay within project sandbox +- [ ] SEC-006: Auth checks on sensitive operations +- [ ] SEC-007: .gitignore excludes sensitive files diff --git a/.specify/phases/1055-smart-batching.md b/.specify/phases/1055-smart-batching.md deleted file mode 100644 index 28d80e7..0000000 --- a/.specify/phases/1055-smart-batching.md +++ /dev/null @@ -1,142 +0,0 @@ ---- -phase: 1055 -name: smart-batching-orchestration -status: not_started -created: 2026-01-18 -pdr: workflow-dashboard-orchestration.md ---- - -> **Architecture Context**: See [PDR: Workflow Dashboard Orchestration](../../memory/pdrs/workflow-dashboard-orchestration.md) for holistic architecture, design decisions, and how this phase fits into the larger vision. - -### 1055 - Smart Batching & Orchestration - -**Goal**: Autonomous implement execution with smart batching and auto-healing. - -**Context**: Large task lists (50+) exceed context windows. This phase adds intelligent batching using existing tasks.md sections, a state machine for orchestration, and auto-healing when batches fail. - -**Key Principles:** -- **Programmatic batching** - No UI for selecting tasks, fully automatic -- **Minimal user interaction** - User only intervenes for questions and true blockers -- **Auto-healing** - Spawn fixer Claude on failure, retry once before stopping - ---- - -**Scope:** - -### 1. Programmatic Batch Detection - -Parse existing task sections from tasks.md: -- Use markdown headers (`## Section Name`) as batch boundaries -- Each `##` section becomes one batch -- Fall back to fixed-size batches (~15 tasks) if no sections -- Respect task dependencies within sections - -Example tasks.md structure recognized: -```markdown -## Progress Dashboard -Total: 0/25 | Blocked: 0 - -## Setup -- [ ] T001 Create project structure -- [ ] T002 Configure build system - -## Core Components -- [ ] T003 Implement base service -- [ ] T004 Add API routes - -## Integration -- [ ] T005 Wire up endpoints -``` - -### 2. Dashboard Orchestration State Machine - -``` -[Start] → Check Status → Design needed? → /flow.design - → Tasks incomplete? → /flow.implement (batch N) - → All tasks done? → /flow.verify - → Verified? → /flow.merge (approval required) - → [Complete] -``` - -- Between each step: `specflow status --json` to determine next action -- State persisted in workflow execution record -- Transitions based on simple rules: - - `hasSpecs: false` → run design - - `tasksComplete < tasksTotal` → run implement (next batch) - - `tasksComplete == tasksTotal` → run verify - - `verificationComplete: true` → offer merge -- Fallback: Spawn Claude to analyze when state unclear - -### 3. Sequential Batch Execution - -- Run each task section as a separate /flow.implement invocation -- Modified prompt tells Claude which tasks to work on: - ``` - Execute the following tasks from the "Core Components" section: - T003, T004, T005 - - Do NOT work on tasks from other sections. - ``` -- Wait for completion before starting next batch -- Track: current batch index, batch status, tasks completed per batch - -### 4. Auto-Healing on Failure - -When a batch fails: - -1. **Capture error details**: - - stderr output - - Session transcript (last N messages) - - Tasks attempted vs completed - - Specific error messages - -2. **Spawn healer Claude**: - ``` - The following implement batch failed: - - Batch: "## Core Components" - - Error: [error details] - - Tasks attempted: T005-T012 - - Tasks completed: T005-T008 - - Tasks failed: T009 (file not found) - - Analyze the failure and fix the issue, then continue - with remaining tasks in this batch. - ``` - -3. **Healer outcome**: - - If healer succeeds → mark batch complete, continue to next batch - - If healer fails → stop execution, notify user with full context - - Only one heal attempt per batch (prevent infinite loops) - -### 5. Orchestration Progress Display - -UI components showing: -- Current phase indicator: `Design → Implement → Verify → Merge` -- Current batch: "Implementing batch 2 of 4: Core Components" -- Tasks completed: "12/35 tasks complete" -- Healing status: "Auto-healing batch 2..." (when active) -- Time elapsed per batch - ---- - -**Deliverables:** -- Batch parser in `workflow-service.ts` (uses existing tasks.ts) -- `OrchestrationStateMachine.ts` - State machine logic -- `AutoHealingService.ts` - Failure detection and healing prompts -- `OrchestrationProgress.tsx` - Progress display component -- API route: POST `/api/workflow/orchestrate` - Start full orchestration -- Tests for batch parsing and state machine transitions - -**Dependencies:** -- Phase 1050 (workflow execution infrastructure) -- Can run in parallel with 1051 (Questions) - -**Verification Gate: USER** -- [ ] Start orchestrate, see batches auto-detected from tasks.md sections -- [ ] Batches execute sequentially without user input -- [ ] Introduce a failure (e.g., missing file), see auto-heal attempt -- [ ] If heal succeeds, execution continues -- [ ] Progress shows batch status clearly -- [ ] State machine transitions correctly (design→implement→verify) - -**Estimated Complexity**: High diff --git a/.specify/phases/1056-jsonl-watcher.md b/.specify/phases/1056-jsonl-watcher.md new file mode 100644 index 0000000..9a8457c --- /dev/null +++ b/.specify/phases/1056-jsonl-watcher.md @@ -0,0 +1,117 @@ +--- +phase: 1056 +name: jsonl-watcher +status: not_started +created: 2026-01-22 +updated: 2026-01-22 +--- + +### 1056 - JSONL File Watcher (Push-Based Updates) + +**Goal**: Replace polling with push-based updates for session content, providing near-instant UI updates when JSONL files change. + +**Context**: Currently, the dashboard polls session files every 3 seconds. When Claude outputs messages or asks questions, there's up to 3 seconds of delay before the UI updates. This is especially problematic for questions where users need to respond promptly. File watching with Server-Sent Events (SSE) would provide instant updates. + +--- + +**Scope:** + +### 1. Server-Side File Watcher + +Implement file watching on the Next.js server: +- Watch active session JSONL files using `fs.watch` or `chokidar` +- Detect changes and parse new content +- Track which sessions are being watched (cleanup on disconnect) +- Handle file rotation/truncation gracefully + +### 2. SSE Endpoint + +New API route for streaming session updates: +- `GET /api/session/stream?sessionId=xxx&projectPath=yyy` +- Returns Server-Sent Events stream +- Events: `message`, `question`, `tool_call`, `session_end`, `error` +- Heartbeat every 30s to detect stale connections +- Automatic cleanup when client disconnects + +### 3. Client Hook Updates + +Update `useSessionMessages` (or create new `useSessionStream`): +- Prefer SSE when available, fallback to polling +- Reconnect on connection loss with exponential backoff +- Merge streamed updates with existing state +- Handle out-of-order events gracefully + +### 4. Question Detection Enhancement + +Improve question detection for instant display: +- Parse `AskUserQuestion` tool calls from JSONL in real-time +- Emit `question` SSE event immediately when detected +- Update `DecisionToast` visibility without waiting for workflow status poll + +--- + +**Technical Notes:** + +Architecture: +``` +┌─────────────────┐ fs.watch ┌─────────────────┐ +│ JSONL file │ ───────────────▶ │ Server (Next) │ +│ changes │ │ detects change │ +└─────────────────┘ └────────┬────────┘ + │ SSE push + ▼ + ┌─────────────────┐ + │ Client UI │ + │ updates │ + └─────────────────┘ +``` + +SSE Event Format: +```typescript +interface SessionSSEEvent { + type: 'message' | 'question' | 'tool_call' | 'session_end' | 'heartbeat'; + data: SessionMessage | Question | ToolCallInfo | null; + timestamp: string; +} +``` + +Considerations: +- File watcher limits on macOS (256 default, can be increased) +- Cleanup watchers for inactive sessions (5 min timeout) +- Rate limiting to prevent overwhelming clients (debounce 100ms) +- Graceful degradation to polling if SSE fails + +--- + +**UI Components:** +- No new visual components - improves responsiveness of existing UI + +**API Routes:** +- GET `/api/session/stream` - SSE endpoint for session updates + +**Hooks:** +- `useSessionStream.ts` - New hook for SSE-based session updates +- Update `useSessionMessages.ts` - Integrate SSE or keep as fallback + +**Services:** +- `session-watcher.ts` - Server-side file watcher manager +- `sse-manager.ts` - SSE connection management + +--- + +**Dependencies:** +- Phase 1055 (Smart Batching) - Stable orchestration foundation + +**Verification Gate: USER** +- [ ] Session messages appear within 500ms of Claude output +- [ ] Questions appear instantly (no 3s delay) +- [ ] Connection recovers gracefully after network interruption +- [ ] No memory leaks from file watchers +- [ ] Fallback to polling works when SSE unavailable + +**Estimated Complexity**: Medium + +**Risk Notes:** +- File watcher resource limits on systems with many concurrent sessions +- SSE connection limits in browsers (6 per domain in HTTP/1.1) +- Edge cases with rapid file changes (debouncing needed) diff --git a/.specify/templates/checklist-template.md b/.specify/templates/checklist-template.md index 086a594..643e6f0 100644 --- a/.specify/templates/checklist-template.md +++ b/.specify/templates/checklist-template.md @@ -11,6 +11,18 @@ description: 'Checklist template for verification' **Note**: This checklist is generated by the `/flow.design` command based on feature context and requirements. +## Checklist ID Prefixes (Standardized) + +| Prefix | Type | Example | Used In | +|--------|------|---------|---------| +| `V-###` | Verification item | V-001, V-002 | checklists/verification.md | +| `I-###` | Implementation guidance | I-001, I-002 | checklists/implementation.md | +| `T###` | Task (in tasks.md) | T001, T002 | tasks.md | +| `D-###` | Deferred item | D-001 | BACKLOG.md | +| `C-###` | Custom/other | C-001 | Custom checklists | + +**Mark items with**: `specflow mark V-001` or `specflow mark I-001` +