Select all that apply.
+ )}
- {/* Option buttons */}
+ {/* Option buttons - fixed at bottom */}
{!showCustomInput && (
- <>
+
)}
- {/* Custom input */}
+ {/* Custom input - fixed at bottom */}
{showCustomInput && (
-
+
)}
+ {/* FR-004: Step Override - Go Back to Previous Step */}
+ {hasOrchestration && currentStep && onGoBackToStep && (
+
+ )}
+
{/* Touched Files */}
diff --git a/packages/dashboard/src/components/orchestration/complete-phase-button.tsx b/packages/dashboard/src/components/orchestration/complete-phase-button.tsx
index b816c05..7a42191 100644
--- a/packages/dashboard/src/components/orchestration/complete-phase-button.tsx
+++ b/packages/dashboard/src/components/orchestration/complete-phase-button.tsx
@@ -198,6 +198,8 @@ export const CompletePhaseButton = React.forwardRef
void;
/** Callback for resume action */
@@ -47,6 +49,7 @@ export interface OrchestrationControlsProps {
export function OrchestrationControls({
isPaused,
+ isRunnerStalled = false,
onPause,
onResume,
onCancel,
@@ -81,13 +84,13 @@ export function OrchestrationControls({
{/* Pause/Resume Button */}
- {isPaused ? (
+ {isPaused || isRunnerStalled ? (
diff --git a/packages/dashboard/src/hooks/use-orchestration.ts b/packages/dashboard/src/hooks/use-orchestration.ts
index d4ead29..a201260 100644
--- a/packages/dashboard/src/hooks/use-orchestration.ts
+++ b/packages/dashboard/src/hooks/use-orchestration.ts
@@ -9,7 +9,8 @@
*/
import { useState, useCallback, useEffect, useRef } from 'react';
-import type { OrchestrationExecution, OrchestrationConfig } from '@specflow/shared';
+import type { OrchestrationConfig } from '@specflow/shared';
+import type { OrchestrationExecution } from '@/lib/services/orchestration-types';
import type { BatchPlanInfo } from '@/components/orchestration/start-orchestration-modal';
import type { RecoveryOption } from '@/components/orchestration/recovery-panel';
import { useUnifiedData } from '@/contexts/unified-data-context';
@@ -75,6 +76,12 @@ export interface UseOrchestrationReturn {
fetchBatchPlan: () => Promise
;
/** Refresh status */
refresh: () => Promise;
+ /** Go back to a previous step (FR-004) */
+ goBackToStep: (step: string) => Promise;
+ /** Whether going back to step is in progress */
+ isGoingBackToStep: boolean;
+ /** Whether the runner is stalled (status is running but runner process is dead) */
+ isRunnerStalled: boolean;
}
// =============================================================================
@@ -98,11 +105,13 @@ export function useOrchestration({
const [isWaitingForInput, setIsWaitingForInput] = useState(false);
const [isRecovering, setIsRecovering] = useState(false);
const [recoveryAction, setRecoveryAction] = useState(null);
+ const [isGoingBackToStep, setIsGoingBackToStep] = useState(false);
+ const [isRunnerStalled, setIsRunnerStalled] = useState(false);
const lastStatusRef = useRef(null);
// SSE data for event-driven refresh (T028: replaces polling)
- const { workflows, states } = useUnifiedData();
+ const { workflows, states, connectionStatus } = useUnifiedData();
// Use refs for callbacks to avoid recreating fetchStatus on every render
const onStatusChangeRef = useRef(onStatusChange);
@@ -142,6 +151,16 @@ export function useOrchestration({
// Check if workflow is waiting for input (FR-072)
setIsWaitingForInput(data.workflow?.status === 'waiting_for_input');
+ // Check if runner is stalled (running status but runner process is dead
+ // AND no active workflow — if a workflow is running, things are progressing fine)
+ const hasActiveWorkflow = data.workflow?.status === 'running' ||
+ data.workflow?.status === 'waiting_for_input';
+ setIsRunnerStalled(
+ newOrchestration?.status === 'running' &&
+ data.runnerActive === false &&
+ !hasActiveWorkflow
+ );
+
// Handle status change callbacks
if (newOrchestration) {
const newStatus = newOrchestration.status;
@@ -191,6 +210,9 @@ export function useOrchestration({
}
}, []);
+ // Ref to track active session polling so it can be cleaned up
+ const sessionPollAbortRef = useRef(null);
+
// Start orchestration
const start = useCallback(
async (config: OrchestrationConfig) => {
@@ -224,39 +246,47 @@ export function useOrchestration({
// Initial refresh to get orchestration state
await refresh();
- // Poll for sessionId - it becomes available after CLI spawns and returns first output
- // This can take 30+ seconds for complex workflows. Poll for up to 90 seconds.
- // IMPORTANT: We await this to keep isLoading=true until session is found
- const maxAttempts = 90;
- const pollInterval = 1000;
-
- for (let attempt = 0; attempt < maxAttempts; attempt++) {
- await new Promise(resolve => setTimeout(resolve, pollInterval));
-
- try {
- const statusResponse = await fetch(
- `/api/workflow/orchestrate/status?projectId=${encodeURIComponent(projectId)}`
- );
- if (statusResponse.ok) {
- const statusData = await statusResponse.json();
- if (statusData.workflow?.sessionId) {
- setActiveSessionId(statusData.workflow.sessionId);
- setOrchestration(statusData.orchestration);
- setIsLoading(false);
- return; // Found sessionId, stop polling
- }
- // Also update orchestration state during polling so UI shows progress
- if (statusData.orchestration) {
- setOrchestration(statusData.orchestration);
+ // Return immediately so the caller (modal) can close.
+ // Poll for sessionId in the background — SSE events will also
+ // update state, but polling provides a reliable fallback.
+ setIsLoading(false);
+
+ // Abort any previous session poll
+ sessionPollAbortRef.current?.abort();
+ const abortController = new AbortController();
+ sessionPollAbortRef.current = abortController;
+
+ (async () => {
+ const isConnected = connectionStatus === 'connected';
+ const maxAttempts = isConnected ? 12 : 20;
+ const pollInterval = isConnected ? 2000 : 3000;
+
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
+ if (abortController.signal.aborted) return;
+ await new Promise(resolve => setTimeout(resolve, pollInterval));
+ if (abortController.signal.aborted) return;
+
+ try {
+ const statusResponse = await fetch(
+ `/api/workflow/orchestrate/status?projectId=${encodeURIComponent(projectId)}`
+ );
+ if (statusResponse.ok) {
+ const statusData = await statusResponse.json();
+ if (statusData.workflow?.sessionId) {
+ setActiveSessionId(statusData.workflow.sessionId);
+ setOrchestration(statusData.orchestration);
+ return; // Found sessionId, stop polling
+ }
+ // Also update orchestration state during polling so UI shows progress
+ if (statusData.orchestration) {
+ setOrchestration(statusData.orchestration);
+ }
}
+ } catch {
+ // Continue polling on error
}
- } catch {
- // Continue polling on error
}
- }
-
- // Polling timed out without finding session - still set loading false
- setIsLoading(false);
+ })();
} catch (err) {
const message = err instanceof Error ? err.message : 'Unknown error';
setError(message);
@@ -264,7 +294,7 @@ export function useOrchestration({
setIsLoading(false);
}
},
- [projectId, refresh]
+ [projectId, refresh, connectionStatus]
);
// Pause orchestration
@@ -399,6 +429,32 @@ export function useOrchestration({
}
}, [orchestration, projectId, refresh]);
+ // Go back to a previous step (FR-004)
+ const goBackToStep = useCallback(async (step: string) => {
+ if (!orchestration) return;
+
+ setIsGoingBackToStep(true);
+ try {
+ const response = await fetch('/api/workflow/orchestrate/go-back', {
+ method: 'POST',
+ headers: { 'Content-Type': 'application/json' },
+ body: JSON.stringify({ projectId, id: orchestration.id, step }),
+ });
+
+ if (!response.ok) {
+ const data = await response.json();
+ throw new Error(data.error || 'Failed to go back to step');
+ }
+
+ await refresh();
+ } catch (err) {
+ const message = err instanceof Error ? err.message : 'Unknown error';
+ setError(message);
+ } finally {
+ setIsGoingBackToStep(false);
+ }
+ }, [orchestration, projectId, refresh]);
+
// T028: Event-driven refresh via SSE instead of polling
// When workflow or state SSE events come in, refresh orchestration status
// This replaces the previous setInterval polling
@@ -445,12 +501,15 @@ export function useOrchestration({
isWaitingForInput,
isRecovering,
recoveryAction,
+ isGoingBackToStep,
+ isRunnerStalled,
start,
pause,
resume,
cancel,
triggerMerge,
recover,
+ goBackToStep,
fetchBatchPlan,
refresh,
};
diff --git a/packages/dashboard/src/hooks/use-workflow-actions.ts b/packages/dashboard/src/hooks/use-workflow-actions.ts
index 6bf8174..9a4a9b2 100644
--- a/packages/dashboard/src/hooks/use-workflow-actions.ts
+++ b/packages/dashboard/src/hooks/use-workflow-actions.ts
@@ -24,19 +24,27 @@ import {
requestNotificationPermission,
hasRequestedPermission,
} from '@/lib/notifications';
+import { toastWarning } from '@/lib/toast-helpers';
interface StartWorkflowOptions {
/** Optional session ID to resume an existing session */
resumeSessionId?: string;
}
+interface SubmitAnswersOptions {
+ /** Execution ID (preferred) */
+ executionId?: string;
+ /** Alternative: session ID for lookup */
+ sessionId?: string;
+}
+
interface UseWorkflowActionsResult {
/** Start a new workflow with the given skill */
start: (skill: string, options?: StartWorkflowOptions) => Promise;
/** Cancel the current workflow */
cancel: (executionId?: string, sessionId?: string) => Promise;
- /** Submit answers to a waiting workflow */
- submitAnswers: (executionId: string, answers: Record) => Promise;
+ /** Submit answers to a waiting workflow. Can use executionId or sessionId for lookup. */
+ submitAnswers: (options: SubmitAnswersOptions, answers: Record) => Promise;
/** True while a workflow action is in progress */
isSubmitting: boolean;
/** Error from last action */
@@ -76,6 +84,9 @@ async function cancelWorkflowApi(
executionId?: string,
sessionId?: string
): Promise {
+ if (!executionId && !sessionId) {
+ return;
+ }
const params = new URLSearchParams();
if (executionId) params.set('id', executionId);
if (sessionId) params.set('sessionId', sessionId);
@@ -85,26 +96,45 @@ async function cancelWorkflowApi(
method: 'POST',
});
+ const data = await res.json().catch(() => ({}));
+
if (!res.ok) {
- const data = await res.json().catch(() => ({}));
// Not found is okay - workflow already cancelled/completed
if (!data.error?.includes('not found')) {
throw new Error(data.error || `Failed to cancel workflow: ${res.status}`);
}
+ return;
+ }
+
+ if (data.warning) {
+ toastWarning('Cancellation warning', data.warning);
}
}
/**
* Submit answers to a waiting workflow
+ * Supports either executionId (preferred) or sessionId+projectId lookup
*/
async function submitAnswersApi(
- executionId: string,
+ projectId: string,
+ options: SubmitAnswersOptions,
answers: Record
): Promise {
+ const body: Record = { answers };
+
+ if (options.executionId) {
+ body.id = options.executionId;
+ } else if (options.sessionId) {
+ body.sessionId = options.sessionId;
+ body.projectId = projectId;
+ } else {
+ throw new Error('Either executionId or sessionId must be provided');
+ }
+
const res = await fetch('/api/workflow/answer', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
- body: JSON.stringify({ id: executionId, answers }),
+ body: JSON.stringify(body),
});
if (!res.ok) {
@@ -170,12 +200,16 @@ export function useWorkflowActions(projectId: string | null): UseWorkflowActions
);
const submitAnswers = useCallback(
- async (executionId: string, answers: Record) => {
+ async (options: SubmitAnswersOptions, answers: Record) => {
+ if (!projectId) {
+ throw new Error('No project selected');
+ }
+
setIsSubmitting(true);
setError(null);
try {
- await submitAnswersApi(executionId, answers);
+ await submitAnswersApi(projectId, options, answers);
} catch (err) {
const e = err instanceof Error ? err : new Error('Unknown error');
setError(e);
@@ -184,7 +218,7 @@ export function useWorkflowActions(projectId: string | null): UseWorkflowActions
setIsSubmitting(false);
}
},
- []
+ [projectId]
);
return {
diff --git a/packages/dashboard/src/lib/services/orchestration-decisions.ts b/packages/dashboard/src/lib/services/orchestration-decisions.ts
index 7fdda6f..9aff8c9 100644
--- a/packages/dashboard/src/lib/services/orchestration-decisions.ts
+++ b/packages/dashboard/src/lib/services/orchestration-decisions.ts
@@ -1,662 +1,291 @@
/**
* Orchestration Decision Logic - Pure Functions
*
- * This module contains pure decision-making functions extracted from orchestration-runner.ts
- * for better testability and separation of concerns.
- *
- * Key principles:
- * - All functions are pure (no I/O, no side effects)
- * - State is passed in, decisions are returned
- * - Trusts step.status from state file (FR-001)
- * - Complete decision matrix with no ambiguous cases (FR-002)
+ * Simplified decision matrix that trusts CLI state as the source of truth.
+ * The runner supplies the current step/status, dashboard config, batch tracking,
+ * and a snapshot of any active workflow.
*/
import type {
- OrchestrationExecution,
+ BatchTracking,
+ OrchestrationConfig,
OrchestrationPhase,
- OrchestrationState,
StepStatus,
- BatchItem,
} from '@specflow/shared';
-import { STEP_INDEX_MAP } from '@specflow/shared';
// =============================================================================
// Types
// =============================================================================
-/**
- * Decision actions that the runner can execute
- */
export type DecisionAction =
- | 'wait' // Continue polling, nothing to do
- | 'wait_with_backoff' // Wait with exponential backoff (lookup failure)
- | 'wait_user_gate' // Wait for USER_GATE confirmation
- | 'wait_merge' // Wait for user to trigger merge
- | 'transition' // Transition to next step
- | 'spawn' // Spawn workflow for current step
- | 'spawn_batch' // Spawn workflow for current batch
- | 'advance_batch' // Move to next batch
- | 'initialize_batches' // Initialize batch tracking
- | 'force_step_complete' // Force step.status to complete (all batches done)
- | 'heal_batch' // Attempt to heal failed batch
- | 'pause' // Pause orchestration (pauseBetweenBatches)
- | 'complete' // Orchestration complete
- | 'recover_stale' // Recover from stale workflow
- | 'recover_failed' // Recover from failed step/workflow
- | 'needs_attention' // Needs user intervention
- | 'fail'; // Terminal failure
-
-/**
- * Result of the decision function
- */
-export interface DecisionResult {
+ | 'idle'
+ | 'wait'
+ | 'spawn'
+ | 'transition'
+ | 'wait_merge'
+ | 'initialize_batches'
+ | 'advance_batch'
+ | 'heal_batch'
+ | 'needs_attention';
+
+export interface Decision {
action: DecisionAction;
reason: string;
- /** Skill to spawn (for spawn/spawn_batch actions) */
- skill?: string;
- /** Next step to transition to */
nextStep?: string;
- /** Next step index */
- nextIndex?: number;
- /** Batch context for implement phase */
- batchContext?: string;
- /** Batch index for batch operations */
+ skill?: string;
batchIndex?: number;
- /** Error message for failure cases */
- errorMessage?: string;
- /** Recovery options for needs_attention */
- recoveryOptions?: Array<'retry' | 'skip' | 'abort'>;
- /** Failed workflow ID for recovery context */
- failedWorkflowId?: string;
- /** Backoff time in ms */
- backoffMs?: number;
- /** Workflow ID for stale recovery */
- workflowId?: string;
+ context?: string;
+ pauseAfterAdvance?: boolean;
}
-/**
- * Workflow state passed to decision functions
- * Simplified interface to avoid coupling to workflow service
- * NOTE: 'detached' and 'stale' are intermediate health states that
- * can occur during workflow execution monitoring
- */
export interface WorkflowState {
id: string;
- status: 'running' | 'waiting_for_input' | 'completed' | 'failed' | 'cancelled' | 'detached' | 'stale';
- error?: string;
- lastActivityAt?: string;
+ status: 'running' | 'waiting_for_input' | 'completed' | 'failed' | 'cancelled';
}
-/**
- * Input for makeDecision - all state needed to make a decision
- */
export interface DecisionInput {
- /** Current orchestration step from state file */
+ active: boolean;
step: {
- current: string | null;
- index: number | null;
+ current: OrchestrationPhase;
status: StepStatus | null;
};
- /** Phase info from state file */
- phase: {
- hasUserGate?: boolean;
- userGateStatus?: 'pending' | 'confirmed' | 'skipped';
- };
- /** Orchestration execution state */
- execution: OrchestrationExecution;
- /** Current workflow state (if any) */
+ config: OrchestrationConfig;
+ batches: BatchTracking;
workflow: WorkflowState | null;
- /** Last file change time (for staleness detection) */
- lastFileChangeTime?: number;
- /** Lookup failures count (for backoff) */
- lookupFailures?: number;
- /** Current timestamp (for duration checks) */
- currentTime?: number;
}
// =============================================================================
-// Constants
+// Helpers
// =============================================================================
-/** Stale threshold - 10 minutes with no activity */
-export const STALE_THRESHOLD_MS = 10 * 60 * 1000;
-
-/** Maximum orchestration duration - 4 hours */
-export const MAX_ORCHESTRATION_DURATION_MS = 4 * 60 * 60 * 1000;
-
-/** Step order for transitions */
-const STEP_ORDER: readonly string[] = ['design', 'analyze', 'implement', 'verify', 'merge'] as const;
-
-// =============================================================================
-// Helper Functions (Pure)
-// =============================================================================
-
-/**
- * Get the skill command for a given step
- */
-export function getSkillForStep(step: string): string {
- const skillMap: Record = {
- design: 'flow.design',
- analyze: 'flow.analyze',
- implement: 'flow.implement',
- verify: 'flow.verify',
- merge: 'flow.merge',
- };
- return skillMap[step] || 'flow.implement';
-}
-
-/**
- * Get the next step in the orchestration flow
- * Returns null if current step is the last one (merge)
- */
-export function getNextStep(current: string): string | null {
- const currentIndex = STEP_ORDER.indexOf(current);
- if (currentIndex === -1 || currentIndex >= STEP_ORDER.length - 1) {
- return null;
- }
- return STEP_ORDER[currentIndex + 1];
-}
+const ACTIVE_WORKFLOW_STATUSES = new Set([
+ 'running',
+ 'waiting_for_input',
+]);
-/**
- * Calculate exponential backoff for lookup failures
- */
-export function calculateExponentialBackoff(failures: number): number {
- const baseMs = 1000;
- const maxMs = 30000;
- const backoff = Math.min(baseMs * Math.pow(2, failures), maxMs);
- return backoff;
+function hasActiveWorkflow(workflow: WorkflowState | null): boolean {
+ return Boolean(workflow && ACTIVE_WORKFLOW_STATUSES.has(workflow.status));
}
-/**
- * Check if all batches are complete (completed or healed)
- */
-export function areAllBatchesComplete(batches: OrchestrationExecution['batches']): boolean {
+export function areAllBatchesComplete(batches: BatchTracking): boolean {
if (batches.items.length === 0) return false;
return batches.items.every(
- (b) => b.status === 'completed' || b.status === 'healed'
+ (batch) => batch.status === 'completed' || batch.status === 'healed'
);
}
-/**
- * Get the current batch from execution state
- */
-export function getCurrentBatch(execution: OrchestrationExecution): BatchItem | undefined {
- return execution.batches.items[execution.batches.current];
+function buildBatchContext(
+ batch: BatchTracking['items'][number],
+ additionalContext?: string
+): string {
+ const base = `Execute only the "${batch.section}" section (${batch.taskIds.join(', ')}). Do NOT work on tasks from other sections.`;
+ return additionalContext ? `${base}\n\n${additionalContext}` : base;
}
// =============================================================================
-// Batch Handling (Pure) - FR-003
+// Decision Matrix
// =============================================================================
-/**
- * Handle implement phase batching decisions
- *
- * This is the batch state machine from FR-003:
- * - No batches → initialize_batches
- * - Pending batch + no workflow → spawn_batch
- * - Running batch + workflow running → let staleness check handle
- * - Completed batch + pauseBetweenBatches → pause
- * - Completed batch + continue → advance_batch
- * - Failed batch + heal attempts remaining → heal_batch
- * - Failed batch + no attempts → recover_failed
- * - All batches complete + step not complete → force_step_complete
- *
- * Returns null if no batch-specific decision needed (defer to main matrix)
- */
-export function handleImplementBatching(
- step: DecisionInput['step'],
- execution: OrchestrationExecution,
- workflow: WorkflowState | null
-): DecisionResult | null {
- const { batches, config } = execution;
-
- // No batches yet - need to initialize (G2.1)
- if (batches.total === 0) {
- return {
- action: 'initialize_batches',
- reason: 'No batches populated',
- };
+export function getNextAction(input: DecisionInput): Decision {
+ if (!input.active) {
+ return { action: 'idle', reason: 'No active orchestration' };
}
- const currentBatch = batches.items[batches.current];
- const allBatchesComplete = areAllBatchesComplete(batches);
+ const stepStatus: StepStatus = input.step.status ?? 'not_started';
- // All batches done (G2.10) → check if step.status needs updating
- if (allBatchesComplete) {
- // Trust sub-command to set step.status=complete
- // But if it didn't, force it (G2.11)
- if (step.status !== 'complete') {
- return {
- action: 'force_step_complete',
- reason: 'All batches complete but step.status not updated',
- };
- }
- return null; // Let normal decision matrix handle transition
+ if (hasActiveWorkflow(input.workflow) && stepStatus !== 'complete' && stepStatus !== 'failed') {
+ return { action: 'wait', reason: 'Workflow running' };
}
- // Current batch running with active workflow (G2.5) → defer to staleness check
- if (currentBatch?.status === 'running' && workflow?.status === 'running') {
- return null; // Let normal staleness check handle this
+ switch (input.step.current) {
+ case 'design':
+ return handleSimpleStep('design', 'analyze', stepStatus, input.workflow);
+ case 'analyze':
+ return handleSimpleStep('analyze', 'implement', stepStatus, input.workflow);
+ case 'implement':
+ return handleImplement(stepStatus, input.batches, input.config, input.workflow);
+ case 'verify':
+ return handleVerify(stepStatus, input.config, input.workflow);
+ case 'merge':
+ return handleMerge(stepStatus, input.workflow);
+ default:
+ return { action: 'needs_attention', reason: `Unknown step: ${input.step.current}` };
}
+}
- // Current batch running but workflow completed → mark batch complete and advance (G2.5b)
- if (currentBatch?.status === 'running' && workflow?.status === 'completed') {
- // Check pauseBetweenBatches config (G2.6)
- if (config.pauseBetweenBatches) {
- return {
- action: 'advance_batch',
- batchIndex: batches.current,
- reason: 'Batch workflow complete, pauseBetweenBatches enabled - completing and pausing',
- };
- }
-
- const nextBatchIndex = batches.current + 1;
- if (nextBatchIndex < batches.total) {
- return {
- action: 'advance_batch',
- batchIndex: batches.current,
- reason: `Batch ${batches.current} workflow complete, advancing to batch ${nextBatchIndex}`,
- };
- }
+function handleSimpleStep(
+ current: OrchestrationPhase,
+ next: OrchestrationPhase,
+ stepStatus: StepStatus,
+ workflow: WorkflowState | null
+): Decision {
+ if (workflow?.status === 'failed') {
+ return { action: 'needs_attention', reason: `${current} workflow failed` };
+ }
- // All batches done, but step not marked complete yet
+ if (stepStatus === 'complete') {
return {
- action: 'force_step_complete',
- reason: 'All batches completed (last batch workflow done)',
+ action: 'transition',
+ nextStep: next,
+ skill: `flow.${next}`,
+ reason: `${current} complete`,
};
}
- // Current batch completed or healed → advance to next batch (G2.7, G2.8)
- if (currentBatch?.status === 'completed' || currentBatch?.status === 'healed') {
- // Check pauseBetweenBatches config (G2.6)
- if (config.pauseBetweenBatches) {
- return {
- action: 'pause',
- reason: 'Batch complete, pauseBetweenBatches enabled',
- };
- }
-
- const nextBatchIndex = batches.current + 1;
- if (nextBatchIndex < batches.total) {
- return {
- action: 'advance_batch',
- batchIndex: nextBatchIndex,
- reason: `Batch ${batches.current} complete, advancing to batch ${nextBatchIndex}`,
- };
- }
+ if (stepStatus === 'failed') {
+ return { action: 'needs_attention', reason: `${current} failed` };
}
- // Current batch pending + no workflow (G2.4) → spawn batch
- if (currentBatch?.status === 'pending' && !workflow) {
- const batchContext = `Execute tasks ${currentBatch.taskIds.join(', ')} in section "${currentBatch.section}"`;
- return {
- action: 'spawn_batch',
- skill: 'flow.implement',
- batchContext: config.additionalContext
- ? `${batchContext}\n\n${config.additionalContext}`
- : batchContext,
- reason: `Starting batch ${batches.current + 1}/${batches.total}: ${currentBatch.section}`,
- };
- }
-
- // Current batch failed (G2.9) → try healing
- if (currentBatch?.status === 'failed') {
- if (config.autoHealEnabled && currentBatch.healAttempts < config.maxHealAttempts) {
- return {
- action: 'heal_batch',
- batchIndex: batches.current,
- reason: 'Batch failed, attempting heal',
- };
- }
- return {
- action: 'recover_failed',
- reason: `Batch ${batches.current} failed after ${currentBatch.healAttempts} heal attempts`,
- errorMessage: `Batch ${batches.current} failed`,
- };
+ if (!hasActiveWorkflow(workflow)) {
+ return { action: 'spawn', skill: `flow.${current}`, reason: `Start ${current}` };
}
- return null; // No batch-specific decision, use normal matrix
+ return { action: 'wait', reason: `${current} in progress` };
}
-// =============================================================================
-// Main Decision Function (Pure) - FR-001, FR-002
-// =============================================================================
-
-/**
- * Make a decision about what to do next
- *
- * This is the complete decision matrix from FR-002. Every possible state
- * combination has an explicit action - no ambiguous cases.
- *
- * Key principle (FR-001): Trust step.status from state file. Sub-commands
- * set step.status=complete when done. We don't check for artifacts.
- *
- * @param input - All state needed to make a decision
- * @returns Decision result with action and reason
- */
-export function makeDecision(input: DecisionInput): DecisionResult {
- const { step, phase, execution, workflow, lastFileChangeTime, lookupFailures, currentTime } = input;
- const { config, batches } = execution;
- const currentStep = step.current || 'design';
-
- // ═══════════════════════════════════════════════════════════════════
- // PRE-DECISION GATES (G1.1, G1.2)
- // ═══════════════════════════════════════════════════════════════════
-
- // G1.1: Budget gate - fail if budget exceeded
- if (execution.totalCostUsd >= config.budget.maxTotal) {
+function handleImplement(
+ stepStatus: StepStatus,
+ batches: BatchTracking,
+ config: OrchestrationConfig,
+ workflow: WorkflowState | null
+): Decision {
+ if (stepStatus === 'complete' || areAllBatchesComplete(batches)) {
return {
- action: 'fail',
- reason: `Budget exceeded: $${execution.totalCostUsd.toFixed(2)} >= $${config.budget.maxTotal}`,
- errorMessage: 'Budget limit exceeded',
+ action: 'transition',
+ nextStep: 'verify',
+ skill: 'flow.verify',
+ reason: stepStatus === 'complete' ? 'Implement complete' : 'All batches complete',
};
}
- // G1.2: Duration gate - needs_attention if running too long (4 hours)
- if (currentTime !== undefined) {
- const startTime = new Date(execution.startedAt).getTime();
- const duration = currentTime - startTime;
- if (duration > MAX_ORCHESTRATION_DURATION_MS) {
- return {
- action: 'needs_attention',
- reason: `Orchestration running too long: ${Math.round(duration / (60 * 60 * 1000))} hours`,
- errorMessage: 'Orchestration duration exceeded 4 hours',
- recoveryOptions: ['retry', 'abort'],
- };
- }
+ if (stepStatus === 'failed') {
+ return { action: 'needs_attention', reason: 'Implement failed' };
}
- // ═══════════════════════════════════════════════════════════════════
- // IMPLEMENT PHASE: BATCH HANDLING (checked first) - FR-003
- // ═══════════════════════════════════════════════════════════════════
- if (currentStep === 'implement') {
- const batchDecision = handleImplementBatching(step, execution, workflow);
- if (batchDecision) return batchDecision;
+ if (batches.total === 0) {
+ return { action: 'initialize_batches', reason: 'No batches initialized' };
}
- // ═══════════════════════════════════════════════════════════════════
- // WORKFLOW IS RUNNING (G1.4, G1.5)
- // ═══════════════════════════════════════════════════════════════════
- if (workflow?.status === 'running') {
- // Check for stale workflow (G1.5)
- // Use the workflow's lastActivityAt, NOT project file changes
- // A workflow is stale if it's been running but hasn't had any activity
- if (workflow.lastActivityAt) {
- const workflowActivityTime = new Date(workflow.lastActivityAt).getTime();
- const staleDuration = Date.now() - workflowActivityTime;
- if (staleDuration > STALE_THRESHOLD_MS) {
- return {
- action: 'recover_stale',
- reason: `No activity for ${Math.round(staleDuration / 60000)} minutes`,
- workflowId: workflow.id,
- };
- }
- }
-
- // Active workflow (G1.4)
- return {
- action: 'wait',
- reason: 'Workflow running',
- };
+ const currentBatch = batches.items[batches.current];
+ if (!currentBatch) {
+ return { action: 'needs_attention', reason: 'Missing current batch' };
}
- // ═══════════════════════════════════════════════════════════════════
- // WORKFLOW NEEDS INPUT (G1.6, G1.7)
- // ═══════════════════════════════════════════════════════════════════
- if (workflow?.status === 'waiting_for_input') {
+ if (workflow?.status === 'failed') {
+ if (config.autoHealEnabled && currentBatch.healAttempts < config.maxHealAttempts) {
+ return {
+ action: 'heal_batch',
+ batchIndex: batches.current,
+ reason: 'Batch workflow failed, attempting heal',
+ };
+ }
return {
- action: 'wait',
- reason: 'Waiting for user input',
+ action: 'needs_attention',
+ reason: `Batch ${batches.current + 1} failed after ${currentBatch.healAttempts} attempts`,
};
}
- // ═══════════════════════════════════════════════════════════════════
- // WORKFLOW DETACHED OR STALE - Intermediate Health States
- // These are monitoring states that indicate the workflow might be stuck
- // We treat 'stale' as needing recovery and 'detached' as waiting
- // ═══════════════════════════════════════════════════════════════════
- if (workflow?.status === 'stale') {
- console.log(`[orchestration-decisions] DEBUG: Workflow ${workflow.id} is stale`);
+ if (currentBatch.status === 'running' && workflow?.status === 'completed') {
+ const hasNextBatch = batches.current < batches.total - 1;
return {
- action: 'recover_stale',
- reason: `Workflow ${workflow.id} appears stale - no recent activity`,
- workflowId: workflow.id,
+ action: 'advance_batch',
+ batchIndex: batches.current,
+ pauseAfterAdvance: config.pauseBetweenBatches && hasNextBatch,
+ reason: `Batch ${batches.current + 1} workflow completed`,
};
}
- if (workflow?.status === 'detached') {
- // Detached means process was orphaned but might still be running
- // Wait a bit and let the health checker determine final state
- console.log(`[orchestration-decisions] DEBUG: Workflow ${workflow.id} is detached, waiting`);
+ if (currentBatch.status === 'completed' || currentBatch.status === 'healed') {
+ const hasNextBatch = batches.current < batches.total - 1;
return {
- action: 'wait',
- reason: `Workflow ${workflow.id} detached, waiting for health check`,
+ action: 'advance_batch',
+ batchIndex: batches.current,
+ pauseAfterAdvance: config.pauseBetweenBatches && hasNextBatch,
+ reason: `Batch ${batches.current + 1} complete`,
};
}
- // ═══════════════════════════════════════════════════════════════════
- // WORKFLOW FAILED OR CANCELLED
- // ═══════════════════════════════════════════════════════════════════
- if (workflow?.status === 'failed' || workflow?.status === 'cancelled') {
- // If cancelled by user, don't auto-heal
- if (workflow.status === 'cancelled') {
+ if (currentBatch.status === 'failed') {
+ if (config.autoHealEnabled && currentBatch.healAttempts < config.maxHealAttempts) {
return {
- action: 'needs_attention',
- reason: 'Workflow was cancelled by user',
- errorMessage: 'Workflow cancelled',
- recoveryOptions: ['retry', 'skip', 'abort'],
- failedWorkflowId: workflow.id,
+ action: 'heal_batch',
+ batchIndex: batches.current,
+ reason: 'Batch failed, attempting heal',
};
}
-
- // If failed in implement phase, try auto-healing first (G2.9)
- if (currentStep === 'implement' && config.autoHealEnabled) {
- const currentBatch = batches.items[batches.current];
- if (currentBatch && currentBatch.healAttempts < config.maxHealAttempts) {
- return {
- action: 'heal_batch',
- reason: `Workflow failed, attempting heal (attempt ${currentBatch.healAttempts + 1}/${config.maxHealAttempts})`,
- batchIndex: batches.current,
- };
- }
- }
-
- // Otherwise, needs user attention
return {
action: 'needs_attention',
- reason: `Workflow ${workflow.status}: ${workflow.error || 'Unknown error'}`,
- errorMessage: workflow.error,
- recoveryOptions: ['retry', 'skip', 'abort'],
- failedWorkflowId: workflow.id,
+ reason: `Batch ${batches.current + 1} failed after ${currentBatch.healAttempts} attempts`,
};
}
- // ═══════════════════════════════════════════════════════════════════
- // WORKFLOW ID EXISTS BUT LOOKUP FAILS (G1.3)
- // ═══════════════════════════════════════════════════════════════════
- const storedWorkflowId = getStoredWorkflowId(execution, currentStep);
- if (storedWorkflowId && !workflow) {
+ if (currentBatch.status === 'running' && !hasActiveWorkflow(workflow)) {
return {
- action: 'wait_with_backoff',
- reason: `Workflow ${storedWorkflowId} lookup failed, waiting...`,
- backoffMs: calculateExponentialBackoff(lookupFailures || 0),
+ action: 'needs_attention',
+ reason: 'Batch marked running but no workflow is active',
};
}
- // ═══════════════════════════════════════════════════════════════════
- // WORKFLOW COMPLETED - INFER STEP COMPLETION (G1.7)
- // For non-implement phases, workflow completion means step is done.
- // Implement phase uses batch logic instead (handled separately).
- // ═══════════════════════════════════════════════════════════════════
- console.log(`[orchestration-decisions] DEBUG: workflow=${workflow?.id ?? 'none'}, status=${workflow?.status ?? 'none'}, currentStep=${currentStep}`);
- if (workflow?.status === 'completed' && currentStep !== 'implement') {
- console.log(`[orchestration-decisions] DEBUG: Workflow completed for ${currentStep}, transitioning...`);
- const nextStep = getNextStep(currentStep);
-
- // All steps done - after merge completes
- if (nextStep === null) {
- return {
- action: 'complete',
- reason: 'All steps finished (workflow completed)',
- };
- }
-
- // Verify complete → check USER_GATE before merge
- if (currentStep === 'verify' && nextStep === 'merge') {
- if (phase.hasUserGate && phase.userGateStatus !== 'confirmed') {
- return {
- action: 'wait_user_gate',
- reason: 'USER_GATE requires confirmation',
- };
- }
- if (!config.autoMerge) {
- return {
- action: 'wait_merge',
- reason: 'Verify workflow complete, waiting for user to trigger merge',
- };
- }
- return {
- action: 'transition',
- nextStep: 'merge',
- nextIndex: STEP_INDEX_MAP.verify + 1,
- skill: getSkillForStep('merge'),
- reason: 'Verify workflow complete, auto-merge enabled',
- };
- }
-
- // Normal step transition when workflow completes
+ if (currentBatch.status === 'pending' && !hasActiveWorkflow(workflow)) {
return {
- action: 'transition',
- nextStep,
- nextIndex: STEP_INDEX_MAP[nextStep as keyof typeof STEP_INDEX_MAP],
- skill: getSkillForStep(nextStep),
- reason: `${currentStep} workflow complete, advancing to ${nextStep}`,
+ action: 'spawn',
+ skill: 'flow.implement',
+ batchIndex: batches.current,
+ context: buildBatchContext(currentBatch, config.additionalContext),
+ reason: `Start batch ${batches.current + 1}/${batches.total}: ${currentBatch.section}`,
};
}
- // ═══════════════════════════════════════════════════════════════════
- // STEP IS COMPLETE - DETERMINE NEXT ACTION (G1.8 - G1.12)
- // ═══════════════════════════════════════════════════════════════════
- if (step.status === 'complete') {
- const nextStep = getNextStep(currentStep);
+ return { action: 'wait', reason: 'Batch in progress' };
+}
- // All steps done - after merge completes (G1.11)
- if (nextStep === null) {
- return {
- action: 'complete',
- reason: 'All steps finished',
- };
- }
+function handleVerify(
+ stepStatus: StepStatus,
+ config: OrchestrationConfig,
+ workflow: WorkflowState | null
+): Decision {
+ if (workflow?.status === 'failed') {
+ return { action: 'needs_attention', reason: 'Verify workflow failed' };
+ }
- // Verify complete → check USER_GATE before merge (G1.8)
- if (currentStep === 'verify' && nextStep === 'merge') {
- // USER_GATE requires explicit confirmation
- if (phase.hasUserGate && phase.userGateStatus !== 'confirmed') {
- return {
- action: 'wait_user_gate',
- reason: 'USER_GATE requires confirmation',
- };
- }
- // autoMerge disabled → wait for user to trigger (G1.9)
- if (!config.autoMerge) {
- return {
- action: 'wait_merge',
- reason: 'Auto-merge disabled, waiting for user',
- };
- }
- // autoMerge enabled → transition to merge step (G1.10)
+ if (stepStatus === 'complete') {
+ if (config.autoMerge) {
return {
action: 'transition',
nextStep: 'merge',
- nextIndex: STEP_INDEX_MAP.verify + 1, // merge is after verify
- skill: getSkillForStep('merge'),
- reason: 'Verify complete, auto-merge enabled',
+ skill: 'flow.merge',
+ reason: 'Verify complete, auto-merge',
};
}
-
- // Normal step transition (G1.12)
- return {
- action: 'transition',
- nextStep,
- nextIndex: STEP_INDEX_MAP[nextStep as keyof typeof STEP_INDEX_MAP],
- skill: getSkillForStep(nextStep),
- reason: `${currentStep} complete, advancing to ${nextStep}`,
- };
+ return { action: 'wait_merge', reason: 'Verify complete, waiting for user' };
}
- // ═══════════════════════════════════════════════════════════════════
- // STEP FAILED OR BLOCKED (G1.13, G1.14)
- // ═══════════════════════════════════════════════════════════════════
- if (step.status === 'failed' || step.status === 'blocked') {
- return {
- action: 'recover_failed',
- reason: `Step ${currentStep} is ${step.status}`,
- };
+ if (stepStatus === 'failed') {
+ return { action: 'needs_attention', reason: 'Verify failed' };
}
- // ═══════════════════════════════════════════════════════════════════
- // STEP IN PROGRESS BUT NO WORKFLOW (G1.15)
- // ═══════════════════════════════════════════════════════════════════
- if (step.status === 'in_progress' && !workflow) {
- return {
- action: 'spawn',
- skill: getSkillForStep(currentStep),
- reason: `Step ${currentStep} in_progress but no active workflow`,
- };
+ if (!hasActiveWorkflow(workflow)) {
+ return { action: 'spawn', skill: 'flow.verify', reason: 'Start verify' };
}
- // ═══════════════════════════════════════════════════════════════════
- // STEP NOT STARTED - SPAWN WORKFLOW (G1.16, G1.17)
- // ═══════════════════════════════════════════════════════════════════
- if (step.status === 'not_started' || step.status === null || step.status === undefined) {
- // Initialize batches when entering implement (G1.17)
- if (currentStep === 'implement' && batches.total === 0) {
- return {
- action: 'initialize_batches',
- reason: 'Entering implement, need to populate batches',
- };
- }
- return {
- action: 'spawn',
- skill: getSkillForStep(currentStep),
- reason: `Step ${currentStep} not started, spawning workflow`,
- };
- }
-
- // ═══════════════════════════════════════════════════════════════════
- // UNKNOWN STATUS - SHOULD NOT HAPPEN (G1.18)
- // ═══════════════════════════════════════════════════════════════════
- console.error(`[orchestration-decisions] Unknown step.status: ${step.status}`);
- return {
- action: 'needs_attention',
- reason: `Unknown status: ${step.status}`,
- errorMessage: `Unexpected step status: ${step.status}`,
- recoveryOptions: ['retry', 'abort'],
- };
+ return { action: 'wait', reason: 'Verify in progress' };
}
-// =============================================================================
-// Internal Helpers
-// =============================================================================
+function handleMerge(
+ stepStatus: StepStatus,
+ workflow: WorkflowState | null
+): Decision {
+ if (workflow?.status === 'failed') {
+ return { action: 'needs_attention', reason: 'Merge workflow failed' };
+ }
-/**
- * Get the stored workflow ID for a given step from execution state
- */
-function getStoredWorkflowId(execution: OrchestrationExecution, step: string): string | undefined {
- const { executions, batches } = execution;
+ if (stepStatus === 'complete') {
+ return { action: 'transition', nextStep: 'complete', reason: 'Merge complete' };
+ }
- switch (step) {
- case 'design':
- return executions.design;
- case 'analyze':
- return executions.analyze;
- case 'implement':
- return batches.items[batches.current]?.workflowExecutionId;
- case 'verify':
- return executions.verify;
- case 'merge':
- return executions.merge;
- default:
- return undefined;
+ if (!hasActiveWorkflow(workflow)) {
+ return { action: 'wait', reason: 'Awaiting merge trigger' };
}
+
+ return { action: 'wait', reason: 'Merge in progress' };
}
diff --git a/packages/dashboard/src/lib/services/orchestration-runner.ts b/packages/dashboard/src/lib/services/orchestration-runner.ts
index fd75aa7..1cb5496 100644
--- a/packages/dashboard/src/lib/services/orchestration-runner.ts
+++ b/packages/dashboard/src/lib/services/orchestration-runner.ts
@@ -10,30 +10,21 @@
* - Background polling for workflow completion
* - State machine decision logic
* - Sequential batch execution
- * - Auto-healing on failure
- * - Budget enforcement
+ * - Auto-heal on workflow completion
+ * - Decision logging
* - Decision logging
- * - Claude fallback analyzer (after 3 unclear state checks)
*/
-import { join } from 'path';
-import { existsSync, readFileSync, readdirSync, writeFileSync, unlinkSync, type Dirent } from 'fs';
-import { z } from 'zod';
-import { orchestrationService, getNextPhase, isPhaseComplete } from './orchestration-service';
+import { join, basename } from 'path';
+import { existsSync, readFileSync, readdirSync, writeFileSync, unlinkSync } from 'fs';
+import { orchestrationService, readDashboardState, writeDashboardState, readOrchestrationStep } from './orchestration-service';
import { workflowService, type WorkflowExecution } from './workflow-service';
import { attemptHeal, getHealingSummary } from './auto-healing-service';
-import { quickDecision } from './claude-helper';
-import { parseBatchesFromProject, verifyBatchTaskCompletion, getTotalIncompleteTasks } from './batch-parser';
-import { isClaudeHelperError, type OrchestrationExecution, type OrchestrationPhase, type SSEEvent } from '@specflow/shared';
-// G2 Compliance: Import pure decision functions from orchestration-decisions module
-import {
- makeDecision as makeDecisionPure,
- type DecisionInput,
- type DecisionResult as PureDecisionResult,
- type WorkflowState,
- getSkillForStep,
- STALE_THRESHOLD_MS,
-} from './orchestration-decisions';
+import { parseBatchesFromProject } from './batch-parser';
+import { type OrchestrationPhase, type SSEEvent, type StepStatus } from '@specflow/shared';
+import type { OrchestrationExecution } from './orchestration-types';
+import { getNextAction, type DecisionInput, type Decision, type WorkflowState } from './orchestration-decisions';
+import { getSpecflowEnv } from '@/lib/specflow-env';
// =============================================================================
// Types
@@ -45,36 +36,15 @@ interface RunnerContext {
orchestrationId: string;
pollingInterval: number;
maxPollingAttempts: number;
- consecutiveUnclearChecks: number;
+ /** Short repo name for log readability (e.g., "arrs-mcp-server") */
+ repoName: string;
}
-/**
- * Dependency injection interface for testing (T120/G12.4)
- * Allows injecting mock services without vi.mock
- */
-export interface OrchestrationDeps {
- orchestrationService: typeof orchestrationService;
- workflowService: typeof workflowService;
- getNextPhase: typeof getNextPhase;
- isPhaseComplete: typeof isPhaseComplete;
- attemptHeal?: typeof attemptHeal;
- quickDecision?: typeof quickDecision;
- parseBatchesFromProject?: typeof parseBatchesFromProject;
+/** Log prefix with repo name for readability */
+function runnerLog(ctx: RunnerContext | { repoName: string }): string {
+ return `[orchestration-runner][${ctx.repoName}]`;
}
-/**
- * Default dependencies using module imports
- */
-const defaultDeps: OrchestrationDeps = {
- orchestrationService,
- workflowService,
- getNextPhase,
- isPhaseComplete,
- attemptHeal,
- quickDecision,
- parseBatchesFromProject,
-};
-
// =============================================================================
// Spawn Intent Pattern (G5.3-G5.7)
// =============================================================================
@@ -176,13 +146,13 @@ async function spawnWorkflowWithIntent(
// G5.4: Check for existing spawn intent
if (hasSpawnIntent(ctx.projectPath, ctx.orchestrationId)) {
- console.log(`[orchestration-runner] Spawn intent already exists for orchestration ${ctx.orchestrationId}, skipping spawn`);
+ console.log(`${runnerLog(ctx)} Spawn intent already exists for orchestration ${ctx.orchestrationId}, skipping spawn`);
return null;
}
// G5.5: Check if there's already an active workflow
if (workflowService.hasActiveWorkflow(ctx.projectId, ctx.orchestrationId)) {
- console.log(`[orchestration-runner] Workflow already active for orchestration ${ctx.orchestrationId}, skipping spawn`);
+ console.log(`${runnerLog(ctx)} Workflow already active for orchestration ${ctx.orchestrationId}, skipping spawn`);
return null;
}
@@ -200,9 +170,18 @@ async function spawnWorkflowWithIntent(
);
// Link workflow to orchestration for backwards compatibility
- orchestrationService.linkWorkflowExecution(ctx.projectPath, ctx.orchestrationId, workflow.id);
+ await orchestrationService.linkWorkflowExecution(ctx.projectPath, ctx.orchestrationId, workflow.id);
+
+ // FR-003: Update dashboard lastWorkflow state for auto-heal tracking
+ await writeDashboardState(ctx.projectPath, {
+ lastWorkflow: {
+ id: workflow.id,
+ skill: skill,
+ status: 'running',
+ },
+ });
- console.log(`[orchestration-runner] Spawned workflow ${workflow.id} for ${skill} (linked to orchestration ${ctx.orchestrationId})`);
+ console.log(`${runnerLog(ctx)} Spawned workflow ${workflow.id} for ${skill} (linked to orchestration ${ctx.orchestrationId})`);
return workflow;
} finally {
@@ -267,6 +246,141 @@ function clearRunnerState(projectPath: string, orchestrationId: string): void {
}
}
+// =============================================================================
+// Auto-Heal Logic (FR-003) - Trust Sub-Commands
+// =============================================================================
+
+/**
+ * Map skill names to expected step names
+ */
+function getExpectedStepForSkill(skill: string): string {
+ const map: Record = {
+ 'flow.design': 'design',
+ 'flow.analyze': 'analyze',
+ 'flow.implement': 'implement',
+ 'flow.verify': 'verify',
+ 'flow.merge': 'merge',
+ '/flow.design': 'design',
+ '/flow.analyze': 'analyze',
+ '/flow.implement': 'implement',
+ '/flow.verify': 'verify',
+ '/flow.merge': 'merge',
+ };
+ return map[skill] || 'unknown';
+}
+
+/**
+ * Auto-heal state after workflow completes (FR-003)
+ *
+ * When a workflow ends, check if state matches expectations and fix if needed.
+ * This allows sub-commands to update step.status, with dashboard as backup.
+ *
+ * Rules:
+ * - Workflow completed: If step.status != complete, set it to complete
+ * - Workflow failed: If step.status != failed, set it to failed
+ *
+ * If the workflow's expected step doesn't match the current step,
+ * log and skip to avoid forcing state changes.
+ *
+ * @param projectPath - Project path for CLI commands
+ * @param completedSkill - The skill that just completed (e.g., 'flow.design')
+ * @param workflowStatus - How the workflow ended
+ * @returns true if healing was performed
+ */
+export async function autoHealAfterWorkflow(
+ projectPath: string,
+ completedSkill: string,
+ workflowStatus: 'completed' | 'failed'
+): Promise {
+ const expectedStep = getExpectedStepForSkill(completedSkill);
+
+ // Read current state from CLI state file
+ const dashboardState = readDashboardState(projectPath);
+
+ // If no active orchestration, nothing to heal
+ if (!dashboardState?.active) {
+ console.log('[auto-heal] No active orchestration, skipping heal');
+ return false;
+ }
+
+ // Read CLI state to get step info
+ const stepState = readOrchestrationStep(projectPath);
+ const currentStep = stepState?.current;
+ const stepStatus = stepState?.status;
+
+ console.log(`[auto-heal] Workflow ${completedSkill} ${workflowStatus}`);
+ console.log(`[auto-heal] Expected step: ${expectedStep}`);
+ console.log(`[auto-heal] Current step: ${currentStep}, status: ${stepStatus}`);
+
+ // Workflow completed successfully
+ if (workflowStatus === 'completed') {
+ if (dashboardState.lastWorkflow) {
+ await writeDashboardState(projectPath, {
+ lastWorkflow: {
+ id: dashboardState.lastWorkflow.id || 'unknown',
+ skill: completedSkill,
+ status: 'completed',
+ },
+ });
+ }
+
+ // Check if step matches and status needs updating
+ if (currentStep === expectedStep && stepStatus !== 'complete') {
+ console.log(`[auto-heal] Setting ${expectedStep}.status = complete`);
+ try {
+ const { execSync } = await import('child_process');
+ execSync(`specflow state set orchestration.step.status=complete`, {
+ cwd: projectPath,
+ encoding: 'utf-8',
+ timeout: 30000,
+ env: getSpecflowEnv(),
+ });
+
+ console.log(`[auto-heal] Successfully healed step.status to complete`);
+ return true;
+ } catch (error) {
+ console.error(`[auto-heal] Failed to heal state: ${error}`);
+ return false;
+ }
+ }
+ }
+
+ // Workflow failed - mark step as failed if not already
+ if (workflowStatus === 'failed') {
+ if (dashboardState.lastWorkflow) {
+ await writeDashboardState(projectPath, {
+ lastWorkflow: {
+ id: dashboardState.lastWorkflow.id || 'unknown',
+ skill: completedSkill,
+ status: 'failed',
+ },
+ });
+ }
+
+ if (currentStep === expectedStep && stepStatus !== 'failed') {
+ console.log(`[auto-heal] Setting ${expectedStep}.status = failed`);
+ try {
+ const { execSync } = await import('child_process');
+ execSync(`specflow state set orchestration.step.status=failed`, {
+ cwd: projectPath,
+ encoding: 'utf-8',
+ timeout: 30000,
+ env: getSpecflowEnv(),
+ });
+
+ console.log(`[auto-heal] Successfully healed step.status to failed`);
+ return true;
+ } catch (error) {
+ console.error(`[auto-heal] Failed to heal state: ${error}`);
+ return false;
+ }
+ }
+ }
+
+ console.log('[auto-heal] No healing needed');
+ return false;
+}
+
/**
* Check if a runner process is still alive by PID
*/
@@ -282,11 +396,14 @@ function isProcessAlive(pid: number): boolean {
/**
* Reconcile runners on dashboard startup (G5.10)
- * Detects orphaned runner state files where the process is no longer running
+ * Detects orphaned runner state files where the process is no longer running.
+ * Returns IDs of orchestrations that had runner state files cleaned up
+ * (i.e., were previously managed by this dashboard instance).
*/
-export function reconcileRunners(projectPath: string): void {
+export function reconcileRunners(projectPath: string): Set {
+ const cleanedUpIds = new Set();
const workflowsDir = join(projectPath, '.specflow', 'workflows');
- if (!existsSync(workflowsDir)) return;
+ if (!existsSync(workflowsDir)) return cleanedUpIds;
try {
const files = readdirSync(workflowsDir);
@@ -298,17 +415,20 @@ export function reconcileRunners(projectPath: string): void {
const content = readFileSync(filePath, 'utf-8');
const state = JSON.parse(content) as RunnerState;
- if (!isProcessAlive(state.pid)) {
- // Process is dead but state file exists - orphaned runner
- console.log(`[orchestration-runner] Detected orphaned runner for ${state.orchestrationId} (PID ${state.pid} is dead), cleaning up`);
+ if (state.pid !== process.pid) {
+ // PID doesn't match current server — runner is from a previous instance.
+ // Don't use isProcessAlive() because PIDs can be reused by unrelated processes.
+ console.log(`[orchestration-runner] Detected orphaned runner for ${state.orchestrationId} (PID ${state.pid} vs current ${process.pid}), cleaning up`);
unlinkSync(filePath);
+ cleanedUpIds.add(state.orchestrationId);
// Also clear from in-memory map if present
activeRunners.delete(state.orchestrationId);
} else {
- // Process is alive - mark as active in memory
- console.log(`[orchestration-runner] Runner for ${state.orchestrationId} is still active (PID ${state.pid})`);
- activeRunners.set(state.orchestrationId, true);
+ // PID matches current process — runner is ours (shouldn't happen on fresh startup)
+ console.log(`[orchestration-runner] Runner for ${state.orchestrationId} belongs to current process (PID ${state.pid})`);
+ runnerGeneration++;
+ activeRunners.set(state.orchestrationId, runnerGeneration);
}
} catch {
// Corrupted file, remove it
@@ -323,220 +443,8 @@ export function reconcileRunners(projectPath: string): void {
} catch (error) {
console.error(`[orchestration-runner] Failed to reconcile runners: ${error}`);
}
-}
-
-// =============================================================================
-// Claude State Analyzer (Fallback)
-// =============================================================================
-
-/**
- * Schema for Claude state analysis decision
- * Used when state is unclear after 3 consecutive checks
- */
-const StateAnalyzerDecisionSchema = z.object({
- action: z.enum(['run_design', 'run_analyze', 'run_implement', 'run_verify', 'run_merge', 'wait', 'stop', 'fail']),
- reason: z.string().describe('Explanation for this decision'),
- confidence: z.enum(['high', 'medium', 'low']).describe('How confident are you in this decision?'),
- suggestedSkill: z.string().optional().describe('If action requires running a skill, which one?'),
-});
-
-type StateAnalyzerDecision = z.infer;
-
-/**
- * Maximum consecutive "unclear" checks before spawning Claude analyzer
- */
-const MAX_UNCLEAR_CHECKS_BEFORE_CLAUDE = 3;
-
-/**
- * Spawn Claude to analyze state and make a decision
- * Called when state is unclear after MAX_UNCLEAR_CHECKS_BEFORE_CLAUDE consecutive waits
- */
-async function analyzeStateWithClaude(
- ctx: RunnerContext,
- orchestration: OrchestrationExecution,
- workflow: WorkflowExecution | undefined,
- specflowStatus: SpecflowStatus | null
-): Promise {
- console.log(`[orchestration-runner] State unclear after ${ctx.consecutiveUnclearChecks} checks, spawning Claude analyzer`);
-
- const prompt = `You are analyzing orchestration state to determine the next action.
-
-## Current Orchestration State
-- **Phase**: ${orchestration.currentPhase}
-- **Status**: ${orchestration.status}
-- **Batch Progress**: ${orchestration.batches.current + 1}/${orchestration.batches.total} batches
-- **Current Batch Status**: ${orchestration.batches.items[orchestration.batches.current]?.status ?? 'N/A'}
-- **Config**: autoMerge=${orchestration.config.autoMerge}, skipDesign=${orchestration.config.skipDesign}, skipAnalyze=${orchestration.config.skipAnalyze}
-
-## Current Workflow
-- **Workflow ID**: ${workflow?.id ?? 'None'}
-- **Workflow Status**: ${workflow?.status ?? 'None'}
-- **Workflow Skill**: ${workflow?.skill ?? 'None'}
-
-## Specflow Status
-\`\`\`json
-${JSON.stringify(specflowStatus, null, 2)}
-\`\`\`
-
-## Decision History (last 5)
-${orchestration.decisionLog.slice(-5).map((d) => `- ${d.decision}: ${d.reason}`).join('\n')}
-
-## Problem
-The orchestration has been in "continue/wait" state for ${ctx.consecutiveUnclearChecks} consecutive checks.
-This may indicate a stuck state or unclear completion status.
-
-## Your Task
-Analyze the state and determine what should happen next:
-- **run_design**: Run /flow.design
-- **run_analyze**: Run /flow.analyze
-- **run_implement**: Run /flow.implement
-- **run_verify**: Run /flow.verify
-- **run_merge**: Run /flow.merge
-- **wait**: Continue waiting (only if you're confident the workflow will complete)
-- **stop**: Pause and notify user (ambiguous state needing human review)
-- **fail**: Mark as failed (unrecoverable state)
-
-Provide a clear reason for your decision.`;
-
- try {
- const response = await quickDecision(
- prompt,
- StateAnalyzerDecisionSchema,
- ctx.projectPath,
- {
- maxBudgetUsd: orchestration.config.budget.decisionBudget,
- maxTurns: 3, // Allow a few turns to read files if needed
- tools: ['Read', 'Grep', 'Glob'], // Read-only tools
- }
- );
-
- if (isClaudeHelperError(response)) {
- console.error(`[orchestration-runner] Claude analyzer failed: ${response.errorMessage}`);
- return {
- action: 'fail',
- reason: `Claude analyzer failed after ${ctx.consecutiveUnclearChecks} unclear checks: ${response.errorMessage}`,
- errorMessage: 'State analysis failed - manual intervention required',
- };
- }
-
- const decision = response.result;
-
- // Track cost
- if (response.cost > 0) {
- orchestrationService.addCost(ctx.projectPath, ctx.orchestrationId, response.cost);
- }
-
- // Log Claude decision
- console.log(`[orchestration-runner] Claude analyzer decision: ${decision.action} (${decision.confidence}) - ${decision.reason}`);
-
- // Map Claude decision to DecisionResult
- return mapClaudeDecision(decision);
- } catch (error) {
- console.error(`[orchestration-runner] Error in Claude analyzer: ${error}`);
- return {
- action: 'fail',
- reason: `Claude analyzer error after ${ctx.consecutiveUnclearChecks} unclear checks: ${error instanceof Error ? error.message : 'Unknown error'}`,
- errorMessage: 'State analysis error - manual intervention required',
- };
- }
-}
-/**
- * Map Claude analyzer decision to runner DecisionResult
- */
-function mapClaudeDecision(decision: StateAnalyzerDecision): DecisionResult {
- switch (decision.action) {
- case 'run_design':
- return {
- action: 'spawn_workflow',
- reason: `[Claude analyzer] ${decision.reason}`,
- skill: 'flow.design',
- };
- case 'run_analyze':
- return {
- action: 'spawn_workflow',
- reason: `[Claude analyzer] ${decision.reason}`,
- skill: 'flow.analyze',
- };
- case 'run_implement':
- return {
- action: 'spawn_workflow',
- reason: `[Claude analyzer] ${decision.reason}`,
- skill: decision.suggestedSkill || 'flow.implement',
- };
- case 'run_verify':
- return {
- action: 'spawn_workflow',
- reason: `[Claude analyzer] ${decision.reason}`,
- skill: 'flow.verify',
- };
- case 'run_merge':
- return {
- action: 'spawn_workflow',
- reason: `[Claude analyzer] ${decision.reason}`,
- skill: 'flow.merge',
- };
- case 'wait':
- return {
- action: 'continue',
- reason: `[Claude analyzer] ${decision.reason}`,
- };
- case 'stop':
- return {
- action: 'wait_merge', // Use wait_merge to pause - user must manually resume
- reason: `[Claude analyzer - PAUSED] ${decision.reason}`,
- };
- case 'fail':
- return {
- action: 'fail',
- reason: `[Claude analyzer] ${decision.reason}`,
- errorMessage: decision.reason,
- };
- default:
- return {
- action: 'continue',
- reason: `[Claude analyzer] Unknown action: ${decision.action}`,
- };
- }
-}
-
-interface DecisionResult {
- action:
- // Legacy actions (kept for compatibility)
- | 'continue'
- | 'spawn_workflow'
- | 'spawn_batch'
- | 'heal'
- | 'wait_merge'
- | 'needs_attention'
- | 'complete'
- | 'fail'
- // G2 Compliance: New actions from pure decision module
- | 'transition'
- | 'advance_batch'
- | 'initialize_batches'
- | 'force_step_complete'
- | 'pause'
- | 'recover_stale'
- | 'recover_failed'
- | 'wait_with_backoff'
- | 'wait_user_gate';
- reason: string;
- skill?: string;
- batchContext?: string;
- errorMessage?: string;
- /** Recovery options when action is 'needs_attention' */
- recoveryOptions?: Array<'retry' | 'skip' | 'abort'>;
- /** Failed workflow ID for recovery context */
- failedWorkflowId?: string;
- /** Next step for transition action */
- nextStep?: string;
- /** Batch index for batch actions */
- batchIndex?: number;
- /** Workflow ID for stale recovery */
- workflowId?: string;
- /** Backoff time for wait_with_backoff */
- backoffMs?: number;
+ return cleanedUpIds;
}
// =============================================================================
@@ -561,601 +469,6 @@ function getProjectPath(projectId: string): string | null {
}
}
-// =============================================================================
-// Specflow Status Integration (Direct File Access - No Subprocess)
-// =============================================================================
-
-interface SpecflowStatus {
- phase?: {
- number?: number;
- name?: string;
- hasUserGate?: boolean;
- userGateStatus?: 'pending' | 'confirmed' | 'skipped';
- };
- context?: {
- hasSpec?: boolean;
- hasPlan?: boolean;
- hasTasks?: boolean;
- featureDir?: string;
- };
- progress?: {
- tasksTotal?: number;
- tasksComplete?: number;
- percentage?: number;
- };
- orchestration?: {
- step?: {
- current?: string;
- index?: number;
- status?: string;
- };
- };
-}
-
-/**
- * Task counts from parsing tasks.md directly
- */
-interface TaskCounts {
- total: number;
- completed: number;
- blocked: number;
- deferred: number;
- percentage: number;
-}
-
-/**
- * Get task counts by parsing tasks.md directly (no subprocess)
- *
- * @param tasksPath - Path to tasks.md file
- * @returns Task counts or null if file doesn't exist
- */
-function getTaskCounts(tasksPath: string): TaskCounts | null {
- if (!existsSync(tasksPath)) {
- return null;
- }
-
- try {
- const content = readFileSync(tasksPath, 'utf-8');
- const lines = content.split('\n');
-
- let total = 0;
- let completed = 0;
- let blocked = 0;
- let deferred = 0;
-
- for (const line of lines) {
- const trimmed = line.trim();
-
- // Match task lines: - [x] T###, - [ ] T###, etc.
- const taskMatch = trimmed.match(/^-\s*\[[xX ~\-bB]\]\s*T\d{3}/);
- if (!taskMatch) continue;
-
- total++;
-
- // Determine status from checkbox
- if (trimmed.startsWith('- [x]') || trimmed.startsWith('- [X]')) {
- completed++;
- } else if (trimmed.startsWith('- [b]') || trimmed.startsWith('- [B]')) {
- blocked++;
- } else if (trimmed.startsWith('- [~]') || trimmed.startsWith('- [-]')) {
- deferred++;
- }
- // else it's '- [ ]' which is todo (not counted separately)
- }
-
- return {
- total,
- completed,
- blocked,
- deferred,
- percentage: total > 0 ? Math.round((completed / total) * 100) : 0,
- };
- } catch {
- return null;
- }
-}
-
-/**
- * Check if design artifacts exist in a feature directory (no subprocess)
- *
- * @param featureDir - Path to the feature directory (specs/NNNN-name/)
- * @returns Object indicating which artifacts exist
- */
-function checkArtifactExistence(featureDir: string): { hasSpec: boolean; hasPlan: boolean; hasTasks: boolean } {
- return {
- hasSpec: existsSync(join(featureDir, 'spec.md')),
- hasPlan: existsSync(join(featureDir, 'plan.md')),
- hasTasks: existsSync(join(featureDir, 'tasks.md')),
- };
-}
-
-/**
- * Find the active feature directory in a project
- * Looks for specs/NNNN-name/ directories and returns the highest numbered one
- *
- * @param projectPath - Root path of the project
- * @returns Feature directory path or null if none found
- */
-function findActiveFeatureDir(projectPath: string): string | null {
- const specsDir = join(projectPath, 'specs');
- if (!existsSync(specsDir)) {
- return null;
- }
-
- try {
- const entries = readdirSync(specsDir, { withFileTypes: true }) as Dirent[];
-
- // Find directories matching NNNN-* pattern
- const featureDirs = entries
- .filter((e) => e.isDirectory() && /^\d{4}-/.test(e.name))
- .map((e) => e.name)
- .sort()
- .reverse();
-
- if (featureDirs.length === 0) {
- return null;
- }
-
- return join(specsDir, featureDirs[0]);
- } catch {
- return null;
- }
-}
-
-/**
- * Get specflow status by reading files directly (no subprocess)
- * Replaces the previous getSpecflowStatus that called `specflow status --json`
- *
- * @param projectPath - Root path of the project
- * @returns Status object compatible with previous interface
- */
-function getSpecflowStatus(projectPath: string): SpecflowStatus | null {
- try {
- // Find active feature directory
- const featureDir = findActiveFeatureDir(projectPath);
- if (!featureDir) {
- return {
- context: {
- hasSpec: false,
- hasPlan: false,
- hasTasks: false,
- },
- progress: {
- tasksTotal: 0,
- tasksComplete: 0,
- percentage: 0,
- },
- };
- }
-
- // Check which artifacts exist
- const artifacts = checkArtifactExistence(featureDir);
-
- // Get task counts if tasks.md exists
- const tasksPath = join(featureDir, 'tasks.md');
- const taskCounts = artifacts.hasTasks ? getTaskCounts(tasksPath) : null;
-
- // Extract phase info from directory name (e.g., "1056-jsonl-watcher" -> 1056)
- const dirName = featureDir.split('/').pop() || '';
- const phaseMatch = dirName.match(/^(\d+)-(.+)/);
-
- // Read orchestration state from state file
- let orchestrationState: SpecflowStatus['orchestration'] = undefined;
- let phaseGateInfo: Pick, 'hasUserGate' | 'userGateStatus'> = {};
- try {
- // Try .specflow first (v3), then .specify (v2)
- let statePath = join(projectPath, '.specflow', 'orchestration-state.json');
- if (!existsSync(statePath)) {
- statePath = join(projectPath, '.specify', 'orchestration-state.json');
- }
- if (existsSync(statePath)) {
- const stateContent = readFileSync(statePath, 'utf-8');
- const state = JSON.parse(stateContent);
- if (state?.orchestration?.step) {
- orchestrationState = {
- step: {
- current: state.orchestration.step.current,
- index: state.orchestration.step.index,
- status: state.orchestration.step.status,
- },
- };
- }
- // Extract phase gate info from state file
- if (state?.orchestration?.phase) {
- phaseGateInfo = {
- hasUserGate: state.orchestration.phase.hasUserGate,
- userGateStatus: state.orchestration.phase.userGateStatus,
- };
- }
- }
- } catch {
- // Ignore errors reading state file
- }
-
- return {
- phase: phaseMatch ? {
- number: parseInt(phaseMatch[1], 10),
- name: phaseMatch[2].replace(/-/g, ' '),
- ...phaseGateInfo,
- } : phaseGateInfo.hasUserGate !== undefined ? phaseGateInfo : undefined,
- context: {
- hasSpec: artifacts.hasSpec,
- hasPlan: artifacts.hasPlan,
- hasTasks: artifacts.hasTasks,
- featureDir,
- },
- progress: taskCounts ? {
- tasksTotal: taskCounts.total,
- tasksComplete: taskCounts.completed,
- percentage: taskCounts.percentage,
- } : {
- tasksTotal: 0,
- tasksComplete: 0,
- percentage: 0,
- },
- orchestration: orchestrationState,
- };
- } catch {
- return null;
- }
-}
-
-// =============================================================================
-// Staleness Detection
-// =============================================================================
-
-/**
- * Get the last file change time for the project
- * Used for staleness detection (G1.5)
- */
-function getLastFileChangeTime(projectPath: string): number {
- try {
- // Check common directories for recent changes
- const dirsToCheck = [
- join(projectPath, 'src'),
- join(projectPath, 'specs'),
- join(projectPath, '.specflow'),
- ];
-
- let latestTime = 0;
- for (const dir of dirsToCheck) {
- if (existsSync(dir)) {
- const stat = require('fs').statSync(dir);
- if (stat.mtimeMs > latestTime) {
- latestTime = stat.mtimeMs;
- }
- }
- }
- return latestTime || Date.now();
- } catch {
- return Date.now();
- }
-}
-
-// =============================================================================
-// State Machine Decision Logic
-// =============================================================================
-
-/**
- * Map orchestration phase to skill command
- */
-function getSkillForPhase(phase: OrchestrationPhase): string {
- switch (phase) {
- case 'design':
- return 'flow.design';
- case 'analyze':
- return 'flow.analyze';
- case 'implement':
- return 'flow.implement';
- case 'verify':
- return 'flow.verify';
- case 'merge':
- return 'flow.merge';
- default:
- return 'flow.implement';
- }
-}
-
-// =============================================================================
-// G2 Compliance: Adapter for Pure Decision Functions
-// =============================================================================
-
-/**
- * Convert runner context to DecisionInput for the pure makeDecision function
- * This adapter bridges the old runner patterns with the new pure decision module
- */
-function createDecisionInput(
- orchestration: OrchestrationExecution,
- workflow: WorkflowExecution | undefined,
- specflowStatus: SpecflowStatus | null,
- lastFileChangeTime?: number
-): DecisionInput {
- // Convert workflow to WorkflowState (simplified interface)
- const workflowState: WorkflowState | null = workflow ? {
- id: workflow.id,
- status: workflow.status as WorkflowState['status'],
- error: workflow.error,
- lastActivityAt: workflow.updatedAt,
- } : null;
-
- // Extract step info from specflow status and orchestration
- // IMPORTANT: The state file tracks the PROJECT's current step, which may differ from
- // the orchestration's currentPhase (e.g., when skipping to merge).
- // We only trust step.status if it's for the SAME step as the orchestration's currentPhase.
- const stateFileStep = specflowStatus?.orchestration?.step?.current;
- const rawStatus = specflowStatus?.orchestration?.step?.status;
- const validStatuses = ['not_started', 'pending', 'in_progress', 'complete', 'failed', 'blocked', 'skipped'] as const;
-
- // Only use the state file's status if it matches the orchestration's current phase
- // Otherwise, the step hasn't been started in this orchestration
- const stepStatus = (stateFileStep === orchestration.currentPhase && rawStatus && validStatuses.includes(rawStatus as typeof validStatuses[number]))
- ? (rawStatus as typeof validStatuses[number])
- : 'not_started';
-
- const stepCurrent = orchestration.currentPhase;
- const stepIndex = specflowStatus?.orchestration?.step?.index ?? 0;
-
- return {
- step: {
- current: stepCurrent,
- index: stepIndex,
- status: stepStatus,
- },
- phase: {
- hasUserGate: specflowStatus?.phase?.hasUserGate,
- userGateStatus: specflowStatus?.phase?.userGateStatus,
- },
- execution: orchestration,
- workflow: workflowState,
- lastFileChangeTime,
- lookupFailures: 0,
- currentTime: Date.now(),
- };
-}
-
-/**
- * Adapt pure DecisionResult to the legacy action names where needed
- * The executeDecision function will be updated to handle all new action types
- */
-function adaptDecisionResult(result: PureDecisionResult): DecisionResult {
- // Map new action names to ensure compatibility
- const actionMap: Record = {
- 'wait': 'continue', // wait → continue (legacy)
- 'spawn': 'spawn_workflow', // spawn → spawn_workflow (legacy)
- 'heal_batch': 'heal', // heal_batch → heal (legacy)
- };
-
- const action = actionMap[result.action] ?? result.action;
-
- return {
- action: action as DecisionResult['action'],
- reason: result.reason,
- skill: result.skill,
- batchContext: result.batchContext,
- errorMessage: result.errorMessage,
- recoveryOptions: result.recoveryOptions,
- failedWorkflowId: result.failedWorkflowId,
- // For transition actions, extract the skill
- ...(result.action === 'transition' && result.skill ? { skill: result.skill } : {}),
- };
-}
-
-/**
- * Make a decision using the pure decision module (G2 compliant)
- * Falls back to legacy makeDecision if pure module fails
- */
-function makeDecisionWithAdapter(
- orchestration: OrchestrationExecution,
- workflow: WorkflowExecution | undefined,
- specflowStatus: SpecflowStatus | null,
- lastFileChangeTime?: number
-): DecisionResult {
- // Create input for pure decision function
- const input = createDecisionInput(orchestration, workflow, specflowStatus, lastFileChangeTime);
-
- // Get decision from pure function
- const pureResult = makeDecisionPure(input);
-
- // Adapt to legacy format
- return adaptDecisionResult(pureResult);
-}
-
-/**
- * Make a decision about what to do next
- * @deprecated Use makeDecisionWithAdapter instead - this is kept for reference during transition
- */
-function makeDecision(
- orchestration: OrchestrationExecution,
- workflow: WorkflowExecution | undefined,
- specflowStatus: SpecflowStatus | null
-): DecisionResult {
- const { currentPhase, config, batches } = orchestration;
-
- // Check budget first
- if (orchestration.totalCostUsd >= config.budget.maxTotal) {
- return {
- action: 'fail',
- reason: `Budget exceeded: $${orchestration.totalCostUsd.toFixed(2)} >= $${config.budget.maxTotal}`,
- errorMessage: 'Budget limit exceeded',
- };
- }
-
- // Check if workflow is still running
- if (workflow && ['running', 'waiting_for_input'].includes(workflow.status)) {
- return {
- action: 'continue',
- reason: `Workflow ${workflow.id} still ${workflow.status}`,
- };
- }
-
- // Check if workflow failed or was cancelled
- if (workflow && ['failed', 'cancelled'].includes(workflow.status)) {
- // If cancelled by user, don't auto-heal, go to needs_attention
- if (workflow.status === 'cancelled') {
- return {
- action: 'needs_attention',
- reason: `Workflow was cancelled by user`,
- errorMessage: 'Workflow cancelled',
- recoveryOptions: ['retry', 'skip', 'abort'],
- failedWorkflowId: workflow.id,
- };
- }
-
- // If failed in implement phase, try auto-healing first
- if (currentPhase === 'implement' && config.autoHealEnabled) {
- const currentBatch = batches.items[batches.current];
- if (currentBatch && currentBatch.healAttempts < config.maxHealAttempts) {
- return {
- action: 'heal',
- reason: `Workflow failed, attempting heal (attempt ${currentBatch.healAttempts + 1}/${config.maxHealAttempts})`,
- };
- }
- }
-
- // Instead of immediately failing, go to needs_attention for user decision
- return {
- action: 'needs_attention',
- reason: `Workflow failed: ${workflow.error}`,
- errorMessage: workflow.error,
- recoveryOptions: ['retry', 'skip', 'abort'],
- failedWorkflowId: workflow.id,
- };
- }
-
- // Check if current phase is complete
- const phaseComplete = isPhaseComplete(specflowStatus, currentPhase);
-
- // Handle implement phase batches
- if (currentPhase === 'implement') {
- // ROBUST CHECK: Must have batches AND all must be completed/healed
- const completedCount = batches.items.filter(
- (b) => b.status === 'completed' || b.status === 'healed'
- ).length;
- const allBatchesComplete = batches.items.length > 0 && completedCount === batches.items.length;
-
- // DEBUG: Log batch state when checking completion
- console.log(`[orchestration-runner] Implement batch check: ${completedCount}/${batches.items.length} complete, current=${batches.current}, allComplete=${allBatchesComplete}`);
-
- if (allBatchesComplete) {
- // All batches done, move to verify
- const nextPhase = getNextPhase(currentPhase, config);
- console.log(`[orchestration-runner] ALL BATCHES COMPLETE - transitioning to ${nextPhase}`);
- if (nextPhase === 'merge' && !config.autoMerge) {
- return {
- action: 'wait_merge',
- reason: 'All batches complete, waiting for user to trigger merge',
- };
- }
- return {
- action: 'spawn_workflow',
- reason: `All batches complete, transitioning to ${nextPhase}`,
- skill: nextPhase ? getSkillForPhase(nextPhase) : undefined,
- };
- }
-
- // Check if current batch is done
- const currentBatch = batches.items[batches.current];
- if (currentBatch?.status === 'running' && workflow?.status === 'completed') {
- // Mark batch complete and check for more
- return {
- action: 'spawn_batch',
- reason: `Batch ${batches.current + 1} complete, starting next batch`,
- };
- }
-
- if (currentBatch?.status === 'pending') {
- // Start this batch
- const batchContext = `Execute only the "${currentBatch.section}" section (${currentBatch.taskIds.join(', ')}). Do NOT work on tasks from other sections.`;
- const fullContext = config.additionalContext
- ? `${batchContext}\n\n${config.additionalContext}`
- : batchContext;
-
- return {
- action: 'spawn_workflow',
- reason: `Starting batch ${batches.current + 1}/${batches.total}: ${currentBatch.section}`,
- skill: `flow.implement ${fullContext}`,
- batchContext: fullContext,
- };
- }
- }
-
- // For non-implement phases, check if complete and transition
- // CRITICAL: Skip this for implement phase - batch logic above handles transitions
- // CRITICAL: For design phase, require BOTH workflow completion AND artifacts exist
- // This prevents auto-advancing when workflow completes without producing required artifacts
- const workflowComplete = workflow?.status === 'completed';
- // Analyze and verify don't produce artifacts - workflow completion is enough
- const canAdvance = (currentPhase === 'analyze' || currentPhase === 'verify')
- ? workflowComplete // No artifacts, workflow completion is enough
- : (phaseComplete && workflowComplete); // Other phases need artifacts AND workflow done
-
- if (currentPhase !== 'implement' && canAdvance) {
- const nextPhase = getNextPhase(currentPhase, config);
-
- if (!nextPhase || nextPhase === 'complete') {
- return {
- action: 'complete',
- reason: 'All phases complete',
- };
- }
-
- if (nextPhase === 'merge' && !config.autoMerge) {
- return {
- action: 'wait_merge',
- reason: 'Verify complete, waiting for user to trigger merge',
- };
- }
-
- return {
- action: 'spawn_workflow',
- reason: `Phase ${currentPhase} complete, transitioning to ${nextPhase}`,
- skill: getSkillForPhase(nextPhase),
- };
- }
-
- // If no workflow exists for current phase, check if we should spawn one
- // GUARD: Don't re-spawn if we already have a workflow ID for this phase
- // This prevents spawning duplicate workflows when the lookup fails
- if (!workflow) {
- // Check if we already have a workflow ID for this phase
- let existingWorkflowId: string | undefined;
- if (currentPhase === 'implement') {
- const implExecutions = orchestration.executions.implement;
- existingWorkflowId = implExecutions?.length ? implExecutions[implExecutions.length - 1] : undefined;
- } else if (currentPhase === 'design') {
- existingWorkflowId = orchestration.executions.design;
- } else if (currentPhase === 'analyze') {
- existingWorkflowId = orchestration.executions.analyze;
- } else if (currentPhase === 'verify') {
- existingWorkflowId = orchestration.executions.verify;
- } else if (currentPhase === 'merge') {
- existingWorkflowId = orchestration.executions.merge;
- }
- if (existingWorkflowId && typeof existingWorkflowId === 'string') {
- // We have a workflow ID but couldn't find it - something is wrong
- // Don't spawn another, wait for manual intervention or the workflow to reappear
- console.log(`[orchestration-runner] WARNING: Workflow ${existingWorkflowId} for ${currentPhase} not found in lookup, but ID exists in state. Waiting...`);
- return {
- action: 'continue',
- reason: `Workflow ${existingWorkflowId} lookup failed, waiting for it to complete or reappear`,
- };
- }
-
- // Truly no workflow exists - spawn one (first time for this phase)
- return {
- action: 'spawn_workflow',
- reason: `No workflow found for ${currentPhase} phase, spawning one`,
- skill: getSkillForPhase(currentPhase),
- };
- }
-
- // Default: continue waiting
- return {
- action: 'continue',
- reason: 'Waiting for current workflow to complete',
- };
-}
-
// =============================================================================
// Event-Driven Orchestration (T025-T026, G5.11-G5.13)
// =============================================================================
@@ -1219,23 +532,8 @@ function subscribeToFileEvents(
}
// Wake up runner on relevant events
- switch (event.type) {
- case 'tasks':
- // Task file changed - might have new completions
- console.log(`[orchestration-runner] Tasks event for ${projectId}, waking runner`);
- wakeUp(orchestrationId);
- break;
- case 'workflow':
- // Workflow index changed - workflow might have completed
- console.log(`[orchestration-runner] Workflow event for ${projectId}, waking runner`);
- wakeUp(orchestrationId);
- break;
- case 'state':
- // Orchestration state changed - might need to react
- console.log(`[orchestration-runner] State event for ${projectId}, waking runner`);
- wakeUp(orchestrationId);
- break;
- // Ignore: registry, phases, heartbeat, session events
+ if (event.type === 'tasks' || event.type === 'workflow' || event.type === 'state') {
+ wakeUp(orchestrationId);
}
});
@@ -1296,6 +594,44 @@ function eventDrivenSleep(ms: number, orchestrationId: string): Promise {
});
}
+// =============================================================================
+// Decision Input Normalization
+// =============================================================================
+
+const VALID_PHASES: OrchestrationPhase[] = ['design', 'analyze', 'implement', 'verify', 'merge'];
+const VALID_STEP_STATUSES: StepStatus[] = [
+ 'not_started',
+ 'pending',
+ 'in_progress',
+ 'complete',
+ 'failed',
+ 'blocked',
+ 'skipped',
+];
+
+function normalizeStepCurrent(
+ current: unknown,
+ fallback: OrchestrationPhase
+): OrchestrationPhase {
+ return VALID_PHASES.includes(current as OrchestrationPhase)
+ ? (current as OrchestrationPhase)
+ : fallback;
+}
+
+function normalizeStepStatus(status: unknown): StepStatus {
+ return VALID_STEP_STATUSES.includes(status as StepStatus)
+ ? (status as StepStatus)
+ : 'not_started';
+}
+
+function toWorkflowState(workflow: WorkflowExecution | undefined): WorkflowState | null {
+ if (!workflow) return null;
+ const allowed = ['running', 'waiting_for_input', 'completed', 'failed', 'cancelled'] as const;
+ return allowed.includes(workflow.status as typeof allowed[number])
+ ? { id: workflow.id, status: workflow.status as WorkflowState['status'] }
+ : null;
+}
+
// =============================================================================
// Orchestration Runner
// =============================================================================
@@ -1303,7 +639,8 @@ function eventDrivenSleep(ms: number, orchestrationId: string): Promise {
/**
* Active runners tracked by orchestration ID
*/
-const activeRunners = new Map();
+const activeRunners = new Map();
+let runnerGeneration = 0;
/**
* Run the orchestration state machine loop
@@ -1315,14 +652,12 @@ const activeRunners = new Map();
* @param orchestrationId - Orchestration execution ID
* @param pollingInterval - Interval between state checks (ms)
* @param maxPollingAttempts - Maximum polling iterations before stopping
- * @param deps - Optional dependency injection for testing (T120/G12.4)
*/
export async function runOrchestration(
projectId: string,
orchestrationId: string,
- pollingInterval: number = 3000,
- maxPollingAttempts: number = 1000,
- deps: OrchestrationDeps = defaultDeps
+ pollingInterval: number = 5000,
+ maxPollingAttempts: number = 500
): Promise {
const projectPath = getProjectPath(projectId);
if (!projectPath) {
@@ -1330,18 +665,22 @@ export async function runOrchestration(
return;
}
- // Prevent duplicate runners
- if (activeRunners.get(orchestrationId)) {
+ // Prevent duplicate runners (unless force-restarted via stopRunner + runOrchestration)
+ if (activeRunners.has(orchestrationId)) {
console.log(`[orchestration-runner] Runner already active for ${orchestrationId}`);
return;
}
- activeRunners.set(orchestrationId, true);
+ runnerGeneration++;
+ const myGeneration = runnerGeneration;
+ activeRunners.set(orchestrationId, myGeneration);
// G5.8: Persist runner state to file for cross-process detection
persistRunnerState(projectPath, orchestrationId);
- console.log(`[orchestration-runner] Starting event-driven runner for ${orchestrationId}`);
+ const repoName = basename(projectPath);
+
+ console.log(`[orchestration-runner][${repoName}] Starting event-driven runner for ${orchestrationId}`);
const ctx: RunnerContext = {
projectId,
@@ -1349,7 +688,7 @@ export async function runOrchestration(
orchestrationId,
pollingInterval,
maxPollingAttempts,
- consecutiveUnclearChecks: 0,
+ repoName,
};
// T025: Subscribe to file events for event-driven wake-up
@@ -1358,110 +697,106 @@ export async function runOrchestration(
eventCleanup = subscribeToFileEvents(orchestrationId, projectId, () => {
// Wake-up callback is set by eventDrivenSleep
});
- console.log(`[orchestration-runner] Subscribed to file events for ${projectId}`);
+ console.log(`${runnerLog(ctx)} Subscribed to file events for ${projectId}`);
} catch (error) {
- console.log(`[orchestration-runner] Event subscription not available, using polling fallback: ${error}`);
+ console.log(`${runnerLog(ctx)} Event subscription not available, using polling fallback: ${error}`);
}
let attempts = 0;
+ let lastLoggedStatus: string | null = null;
try {
// T026: Event-driven loop - wake on file events OR timeout
while (attempts < maxPollingAttempts) {
attempts++;
+ // Check if this runner has been superseded (force-restarted via Resume)
+ if (activeRunners.get(orchestrationId) !== myGeneration) {
+ console.log(`${runnerLog(ctx)} Runner ${orchestrationId} superseded by newer runner, exiting`);
+ return; // Return early — don't run finally cleanup (new runner owns it now)
+ }
+
// Load current orchestration state
const orchestration = orchestrationService.get(projectPath, orchestrationId);
if (!orchestration) {
- console.error(`[orchestration-runner] Orchestration not found: ${orchestrationId}`);
+ console.error(`${runnerLog(ctx)} Orchestration not found: ${orchestrationId}`);
break;
}
// Check for terminal states
if (['completed', 'failed', 'cancelled'].includes(orchestration.status)) {
- console.log(`[orchestration-runner] Orchestration ${orchestrationId} reached terminal state: ${orchestration.status}`);
+ console.log(`${runnerLog(ctx)} Orchestration ${orchestrationId} reached terminal state: ${orchestration.status}`);
break;
}
// Check for paused/waiting states - use longer wait, still event-driven
- if (orchestration.status === 'needs_attention') {
- console.log(`[orchestration-runner] Orchestration ${orchestrationId} needs attention, waiting for user action...`);
+ // Only log once per state to avoid repeating on every poll cycle
+ if (['needs_attention', 'paused', 'waiting_merge'].includes(orchestration.status)) {
+ if (lastLoggedStatus !== orchestration.status) {
+ lastLoggedStatus = orchestration.status;
+ console.log(`${runnerLog(ctx)} Status: ${orchestration.status}, waiting...`);
+ }
await eventDrivenSleep(ctx.pollingInterval * 2, orchestrationId);
continue;
}
+ lastLoggedStatus = null;
- if (orchestration.status === 'paused') {
- console.log(`[orchestration-runner] Orchestration ${orchestrationId} is paused, waiting...`);
- await eventDrivenSleep(ctx.pollingInterval * 2, orchestrationId);
- continue;
- }
+ const dashboardState = readDashboardState(projectPath);
- if (orchestration.status === 'waiting_merge') {
- console.log(`[orchestration-runner] Orchestration ${orchestrationId} waiting for merge trigger`);
- await eventDrivenSleep(ctx.pollingInterval * 2, orchestrationId);
- continue;
+ if (!dashboardState?.active) {
+ console.log(`${runnerLog(ctx)} No active dashboard state found, stopping runner`);
+ break;
}
- // Get the current workflow (if any)
- // First try the stored workflow ID, then fallback to querying by orchestrationId
- // This provides resilience if the stored ID is stale/wrong
- const currentWorkflowId = getCurrentWorkflowId(orchestration);
- let workflow = currentWorkflowId
- ? workflowService.get(currentWorkflowId, projectId)
- : undefined;
+ const initialStepState = readOrchestrationStep(projectPath);
+ const stepCurrent = normalizeStepCurrent(initialStepState?.current, orchestration.currentPhase);
- // Fallback: if stored ID didn't find a workflow, check for any active workflows
- // linked to this orchestration (handles race conditions and cancelled workflows)
- if (!workflow || !['running', 'waiting_for_input'].includes(workflow.status)) {
- const activeWorkflows = workflowService.findActiveByOrchestration(projectId, orchestrationId);
- if (activeWorkflows.length > 0) {
- workflow = activeWorkflows[0];
- console.log(`[orchestration-runner] Found active workflow via orchestration link: ${workflow.id}`);
- }
- }
-
- // Get specflow status (now direct file access, no subprocess - T021-T024)
- const specflowStatus = getSpecflowStatus(projectPath);
+ const expectedSkill = `flow.${stepCurrent}`;
+ const lastSkill = (dashboardState.lastWorkflow?.skill || '').replace(/^\//, '');
+ const matchesStep = !lastSkill || lastSkill === expectedSkill;
+ const workflowId = dashboardState.lastWorkflow?.id && matchesStep
+ ? dashboardState.lastWorkflow.id
+ : undefined;
- // Get last file change time for staleness detection
- const lastFileChangeTime = getLastFileChangeTime(projectPath);
+ const workflow = workflowId ? workflowService.get(workflowId, projectId) : undefined;
- // DEBUG: Log state before decision
- console.log(`[orchestration-runner] DEBUG: Making decision for ${orchestrationId}`);
- console.log(`[orchestration-runner] DEBUG: currentPhase=${orchestration.currentPhase}`);
- console.log(`[orchestration-runner] DEBUG: workflow.id=${workflow?.id ?? 'none'}, workflow.status=${workflow?.status ?? 'none'}`);
- console.log(`[orchestration-runner] DEBUG: specflowStatus.step=${specflowStatus?.orchestration?.step?.current ?? 'none'}, stepStatus=${specflowStatus?.orchestration?.step?.status ?? 'none'}`);
+ // Auto-heal when a running workflow completes or fails
+ if (dashboardState.lastWorkflow?.status === 'running' &&
+ workflow &&
+ ['completed', 'failed', 'cancelled'].includes(workflow.status)) {
+ console.log(`${runnerLog(ctx)} Workflow status changed: running → ${workflow.status}`);
+ const healStatus = workflow.status === 'completed' ? 'completed' : 'failed';
+ await autoHealAfterWorkflow(projectPath, dashboardState.lastWorkflow.skill, healStatus);
+ }
- // Make decision using the G2-compliant pure decision module
- let decision = makeDecisionWithAdapter(orchestration, workflow, specflowStatus, lastFileChangeTime);
+ const refreshedStepState = readOrchestrationStep(projectPath);
+ const decisionInput: DecisionInput = {
+ active: Boolean(dashboardState.active),
+ step: {
+ current: normalizeStepCurrent(refreshedStepState?.current, stepCurrent),
+ status: normalizeStepStatus(refreshedStepState?.status),
+ },
+ config: orchestration.config,
+ batches: orchestration.batches,
+ workflow: toWorkflowState(workflow),
+ };
- // Track consecutive "continue" (unclear/waiting) decisions
- // Only count as "unclear" if NO workflow is actively running
- if (decision.action === 'continue') {
- // If workflow is actively running, this is a CLEAR state - we know what's happening
- // Don't count these as "unclear" checks that would trigger Claude analyzer
- if (workflow && ['running', 'waiting_for_input'].includes(workflow.status)) {
- ctx.consecutiveUnclearChecks = 0; // Reset - state is clear, just waiting
- } else {
- // No workflow running but we're not spawning one - this IS unclear
- ctx.consecutiveUnclearChecks++;
- }
+ const decision = getNextAction(decisionInput);
- // After MAX_UNCLEAR_CHECKS_BEFORE_CLAUDE consecutive TRULY unclear waits, spawn Claude analyzer
- if (ctx.consecutiveUnclearChecks >= MAX_UNCLEAR_CHECKS_BEFORE_CLAUDE) {
- decision = await analyzeStateWithClaude(ctx, orchestration, workflow, specflowStatus);
- ctx.consecutiveUnclearChecks = 0; // Reset counter after Claude analysis
- }
- } else {
- // Reset counter on any non-continue decision
- ctx.consecutiveUnclearChecks = 0;
+ if (decision.action === 'idle') {
+ console.log(`${runnerLog(ctx)} No active orchestration, exiting runner loop`);
+ break;
}
- // Log decision
- console.log(`[orchestration-runner] DEBUG: DECISION: action=${decision.action}, skill=${decision.skill ?? 'none'}, reason=${decision.reason}`);
- logDecision(ctx, orchestration, decision);
+ if (decision.action !== 'wait') {
+ await orchestrationService.logDecision(
+ ctx.projectPath,
+ ctx.orchestrationId,
+ decision.action,
+ decision.reason
+ );
+ }
- // Execute decision
await executeDecision(ctx, orchestration, decision, workflow);
// T026: Event-driven wait - wakes on file events OR timeout
@@ -1470,12 +805,12 @@ export async function runOrchestration(
}
if (attempts >= maxPollingAttempts) {
- console.error(`[orchestration-runner] Max polling attempts reached for ${orchestrationId}`);
- orchestrationService.fail(projectPath, orchestrationId, 'Max polling attempts exceeded');
+ console.error(`${runnerLog(ctx)} Max polling attempts reached for ${orchestrationId}`);
+ await orchestrationService.fail(projectPath, orchestrationId, 'Max polling attempts exceeded');
}
} catch (error) {
- console.error(`[orchestration-runner] Error in runner: ${error}`);
- orchestrationService.fail(
+ console.error(`${runnerLog(ctx)} Error in runner: ${error}`);
+ await orchestrationService.fail(
projectPath,
orchestrationId,
error instanceof Error ? error.message : 'Unknown error in orchestration runner'
@@ -1484,186 +819,122 @@ export async function runOrchestration(
// Cleanup event subscription
if (eventCleanup) {
eventCleanup();
- console.log(`[orchestration-runner] Unsubscribed from file events for ${projectId}`);
+ console.log(`${runnerLog(ctx)} Unsubscribed from file events for ${projectId}`);
}
- // G5.9: Clear runner state file when exiting
- clearRunnerState(projectPath, orchestrationId);
-
- activeRunners.delete(orchestrationId);
- console.log(`[orchestration-runner] Runner stopped for ${orchestrationId}`);
- }
-}
-
-/**
- * Get the current workflow execution ID from orchestration state
- */
-function getCurrentWorkflowId(orchestration: OrchestrationExecution): string | undefined {
- const { currentPhase, batches, executions } = orchestration;
-
- switch (currentPhase) {
- case 'design':
- return executions.design;
- case 'analyze':
- return executions.analyze;
- case 'implement':
- const currentBatch = batches.items[batches.current];
- return currentBatch?.workflowExecutionId;
- case 'verify':
- return executions.verify;
- case 'merge':
- return executions.merge;
- default:
- return undefined;
+ // Only clean up runner state if this runner is still the active one.
+ // If superseded by a newer runner (force-restart), the new runner owns cleanup.
+ if (activeRunners.get(orchestrationId) === myGeneration) {
+ clearRunnerState(projectPath, orchestrationId);
+ activeRunners.delete(orchestrationId);
+ console.log(`${runnerLog(ctx)} Runner stopped for ${orchestrationId}`);
+ } else {
+ console.log(`${runnerLog(ctx)} Superseded runner exiting for ${orchestrationId}`);
+ }
}
}
-/**
- * Log a decision to the orchestration state
- */
-function logDecision(
- ctx: RunnerContext,
- orchestration: OrchestrationExecution,
- decision: DecisionResult
-): void {
- // Add to orchestration decision log
- orchestration.decisionLog.push({
- timestamp: new Date().toISOString(),
- decision: decision.action,
- reason: decision.reason,
- data: {
- currentPhase: orchestration.currentPhase,
- batchIndex: orchestration.batches.current,
- skill: decision.skill,
- },
- });
-
- // Console log for debugging
- console.log(
- `[orchestration-runner] Decision: ${decision.action} - ${decision.reason}`
- );
-}
-
/**
* Execute a decision
*/
async function executeDecision(
ctx: RunnerContext,
orchestration: OrchestrationExecution,
- decision: DecisionResult,
+ decision: Decision,
currentWorkflow: WorkflowExecution | undefined
): Promise {
switch (decision.action) {
- case 'continue':
- // Nothing to do, just wait
+ case 'idle':
+ case 'wait':
break;
- case 'spawn_workflow': {
+ case 'spawn': {
if (!decision.skill) {
- console.error('[orchestration-runner] No skill specified for spawn_workflow');
+ console.error(`${runnerLog(ctx)} No skill specified for spawn action`);
return;
}
- // Transition to next phase if needed
- const nextPhase = getNextPhaseFromSkill(decision.skill);
-
- // GUARD: Never transition OUT of implement phase while batches are incomplete
- // This prevents Claude analyzer or other decisions from prematurely jumping to verify/merge
- const completedBatchCount = orchestration.batches.items.filter(
- (b) => b.status === 'completed' || b.status === 'healed'
- ).length;
- const allBatchesComplete = orchestration.batches.items.length > 0 &&
- completedBatchCount === orchestration.batches.items.length;
-
- if (orchestration.currentPhase === 'implement' && nextPhase !== 'implement') {
- console.log(`[orchestration-runner] GUARD CHECK: implement→${nextPhase}, batches=${completedBatchCount}/${orchestration.batches.items.length}, allComplete=${allBatchesComplete}`);
- if (!allBatchesComplete) {
- console.log(`[orchestration-runner] BLOCKED: Cannot transition from implement to ${nextPhase} - batches incomplete`);
- return;
- }
- }
-
- if (nextPhase && nextPhase !== orchestration.currentPhase) {
- // Before transitioning to implement, ensure batches are populated
- // This handles the case when phase was opened during this orchestration
- if (nextPhase === 'implement' && orchestration.batches.total === 0) {
- const batchPlan = parseBatchesFromProject(ctx.projectPath, orchestration.config.batchSizeFallback);
- if (batchPlan && batchPlan.totalIncomplete > 0) {
- orchestrationService.updateBatches(ctx.projectPath, ctx.orchestrationId, batchPlan);
- console.log(`[orchestration-runner] Populated batches: ${batchPlan.batches.length} batches, ${batchPlan.totalIncomplete} tasks`);
- } else {
- console.error('[orchestration-runner] No tasks found after design phase');
- orchestrationService.fail(ctx.projectPath, ctx.orchestrationId, 'No tasks found after design phase completed');
- return;
- }
- }
-
- orchestrationService.transitionToNextPhase(ctx.projectPath, ctx.orchestrationId);
- }
-
- // Use spawn intent pattern (G5.3-G5.7) to prevent race conditions
- const workflow = await spawnWorkflowWithIntent(ctx, decision.skill);
+ const workflow = await spawnWorkflowWithIntent(ctx, decision.skill, decision.context);
if (!workflow) {
- // Spawn was skipped (intent exists or workflow already active)
return;
}
- // Track cost from previous workflow
if (currentWorkflow?.costUsd) {
- orchestrationService.addCost(ctx.projectPath, ctx.orchestrationId, currentWorkflow.costUsd);
+ await orchestrationService.addCost(ctx.projectPath, ctx.orchestrationId, currentWorkflow.costUsd);
}
break;
}
- case 'spawn_batch': {
- // DO NOT call completeBatch here - the batch hasn't been executed yet!
- // spawn_batch is triggered when batch.status === 'pending' && no workflow
- // We spawn a workflow for the CURRENT batch, not advance to next.
+ case 'transition': {
+ await orchestrationService.transitionToNextPhase(ctx.projectPath, ctx.orchestrationId);
- // Track cost from previous workflow (if any - for healing scenarios)
if (currentWorkflow?.costUsd) {
- orchestrationService.addCost(ctx.projectPath, ctx.orchestrationId, currentWorkflow.costUsd);
+ await orchestrationService.addCost(ctx.projectPath, ctx.orchestrationId, currentWorkflow.costUsd);
}
- // Get the current batch (which is pending)
- const currentBatch = orchestration.batches.items[orchestration.batches.current];
- if (!currentBatch || currentBatch.status !== 'pending') {
- console.error(`[orchestration-runner] spawn_batch called but current batch is not pending: ${currentBatch?.status}`);
- break;
+ if (decision.skill) {
+ await spawnWorkflowWithIntent(ctx, decision.skill, decision.context);
+ } else {
+ await writeDashboardState(ctx.projectPath, { lastWorkflow: null });
}
- // Check for pause between batches (only applies after first batch)
- if (orchestration.batches.current > 0 && orchestration.config.pauseBetweenBatches) {
- orchestrationService.pause(ctx.projectPath, ctx.orchestrationId);
- console.log(`[orchestration-runner] Paused between batches (configured)`);
- break;
+ console.log(`${runnerLog(ctx)} Transitioned to ${decision.nextStep ?? 'next phase'}`);
+ break;
+ }
+
+ case 'wait_merge': {
+ if (currentWorkflow?.costUsd) {
+ await orchestrationService.addCost(ctx.projectPath, ctx.orchestrationId, currentWorkflow.costUsd);
}
- // Build batch context for the CURRENT batch
- const batchContext = `Execute only the "${currentBatch.section}" section (${currentBatch.taskIds.join(', ')}). Do NOT work on tasks from other sections.`;
- const fullContext = orchestration.config.additionalContext
- ? `${batchContext}\n\n${orchestration.config.additionalContext}`
- : batchContext;
+ await orchestrationService.transitionToNextPhase(ctx.projectPath, ctx.orchestrationId);
+ console.log(`${runnerLog(ctx)} Waiting for user to trigger merge`);
+ break;
+ }
- // Use spawn intent pattern (G5.3-G5.7) to prevent race conditions
- const workflow = await spawnWorkflowWithIntent(ctx, 'flow.implement', fullContext);
- if (workflow) {
- console.log(`[orchestration-runner] Spawned batch ${orchestration.batches.current + 1}/${orchestration.batches.total}: "${currentBatch.section}" (linked to orchestration ${ctx.orchestrationId})`);
+ case 'initialize_batches': {
+ const batchPlan = parseBatchesFromProject(ctx.projectPath, orchestration.config.batchSizeFallback);
+ if (batchPlan && batchPlan.totalIncomplete > 0) {
+ await orchestrationService.updateBatches(ctx.projectPath, ctx.orchestrationId, batchPlan);
+ console.log(`${runnerLog(ctx)} Initialized batches: ${batchPlan.batches.length} batches, ${batchPlan.totalIncomplete} tasks`);
+ } else {
+ console.error(`${runnerLog(ctx)} No tasks found to create batches`);
+ await orchestrationService.setNeedsAttention(
+ ctx.projectPath,
+ ctx.orchestrationId,
+ 'No tasks found to create batches',
+ ['retry', 'abort']
+ );
}
break;
}
- case 'heal': {
- const batch = orchestration.batches.items[orchestration.batches.current];
+ case 'advance_batch': {
+ await orchestrationService.completeBatch(ctx.projectPath, ctx.orchestrationId);
+
+ if (currentWorkflow?.costUsd) {
+ await orchestrationService.addCost(ctx.projectPath, ctx.orchestrationId, currentWorkflow.costUsd);
+ }
+
+ if (decision.pauseAfterAdvance) {
+ await orchestrationService.pause(ctx.projectPath, ctx.orchestrationId);
+ console.log(`${runnerLog(ctx)} Paused between batches`);
+ } else {
+ console.log(`${runnerLog(ctx)} Batch complete, advancing to next batch`);
+ }
+ break;
+ }
+
+ case 'heal_batch': {
+ const batchIndex = decision.batchIndex ?? orchestration.batches.current;
+ const batch = orchestration.batches.items[batchIndex];
if (!batch) {
- console.error('[orchestration-runner] No current batch to heal');
+ console.error(`${runnerLog(ctx)} No batch found to heal`);
return;
}
- // Increment heal attempt
- orchestrationService.incrementHealAttempt(ctx.projectPath, ctx.orchestrationId);
+ await orchestrationService.incrementHealAttempt(ctx.projectPath, ctx.orchestrationId);
- // Attempt healing
const healResult = await attemptHeal(
ctx.projectPath,
batch.workflowExecutionId || '',
@@ -1673,24 +944,21 @@ async function executeDecision(
orchestration.config.budget.healingBudget
);
- // Track healing cost
- orchestrationService.addCost(ctx.projectPath, ctx.orchestrationId, healResult.cost);
+ await orchestrationService.addCost(ctx.projectPath, ctx.orchestrationId, healResult.cost);
- console.log(`[orchestration-runner] Heal result: ${getHealingSummary(healResult)}`);
+ console.log(`${runnerLog(ctx)} Heal result: ${getHealingSummary(healResult)}`);
if (healResult.success && healResult.result?.status === 'fixed') {
- // Healing successful - mark batch as healed and continue
- orchestrationService.healBatch(
+ await orchestrationService.healBatch(
ctx.projectPath,
ctx.orchestrationId,
healResult.sessionId || ''
);
- orchestrationService.completeBatch(ctx.projectPath, ctx.orchestrationId);
+ await orchestrationService.completeBatch(ctx.projectPath, ctx.orchestrationId);
} else {
- // Healing failed
const canRetry = orchestrationService.canHealBatch(ctx.projectPath, ctx.orchestrationId);
if (!canRetry) {
- orchestrationService.fail(
+ await orchestrationService.fail(
ctx.projectPath,
ctx.orchestrationId,
`Batch healing failed after max attempts: ${healResult.errorMessage || 'Unknown error'}`
@@ -1700,257 +968,23 @@ async function executeDecision(
break;
}
- case 'wait_merge': {
- // Track cost from verify workflow
- if (currentWorkflow?.costUsd) {
- orchestrationService.addCost(ctx.projectPath, ctx.orchestrationId, currentWorkflow.costUsd);
- }
-
- // Transition to merge phase but in waiting status
- orchestrationService.transitionToNextPhase(ctx.projectPath, ctx.orchestrationId);
- console.log(`[orchestration-runner] Waiting for user to trigger merge`);
- break;
- }
-
- case 'complete': {
- // Track final cost
- if (currentWorkflow?.costUsd) {
- orchestrationService.addCost(ctx.projectPath, ctx.orchestrationId, currentWorkflow.costUsd);
- }
-
- // Mark complete
- const finalOrchestration = orchestrationService.get(ctx.projectPath, ctx.orchestrationId);
- if (finalOrchestration) {
- finalOrchestration.status = 'completed';
- finalOrchestration.completedAt = new Date().toISOString();
- finalOrchestration.decisionLog.push({
- timestamp: new Date().toISOString(),
- decision: 'complete',
- reason: 'All phases completed successfully',
- });
- }
- console.log(`[orchestration-runner] Orchestration complete!`);
- break;
- }
-
case 'needs_attention': {
- // Set orchestration to needs_attention instead of failing
- // This allows the user to decide what to do (retry, skip, abort)
- orchestrationService.setNeedsAttention(
+ await orchestrationService.setNeedsAttention(
ctx.projectPath,
ctx.orchestrationId,
- decision.errorMessage || 'Unknown issue',
- decision.recoveryOptions || ['retry', 'abort'],
- decision.failedWorkflowId
+ decision.reason,
+ ['retry', 'skip', 'abort']
);
- console.log(`[orchestration-runner] Orchestration needs attention: ${decision.errorMessage}`);
+ console.log(`${runnerLog(ctx)} Orchestration needs attention: ${decision.reason}`);
break;
}
- case 'fail': {
- orchestrationService.fail(ctx.projectPath, ctx.orchestrationId, decision.errorMessage || 'Unknown error');
- console.error(`[orchestration-runner] Orchestration failed: ${decision.errorMessage}`);
- break;
- }
-
- // =========================================================================
- // G2 Compliance: New action types from pure decision module
- // =========================================================================
-
- case 'transition': {
- // Transition to next step (G2.3)
- if (!decision.skill) {
- console.error('[orchestration-runner] No skill specified for transition');
- return;
- }
- orchestrationService.transitionToNextPhase(ctx.projectPath, ctx.orchestrationId);
- const workflow = await spawnWorkflowWithIntent(ctx, decision.skill);
- if (currentWorkflow?.costUsd) {
- orchestrationService.addCost(ctx.projectPath, ctx.orchestrationId, currentWorkflow.costUsd);
- }
- console.log(`[orchestration-runner] Transitioned to ${decision.nextStep}`);
- break;
- }
-
- case 'advance_batch': {
- // Move to next batch (G2.7, G2.8) - but first verify tasks were actually completed
- const currentBatch = orchestration.batches.items[orchestration.batches.current];
- if (currentBatch) {
- // Verify which tasks are actually complete in tasks.md
- const { completedTasks, incompleteTasks } = verifyBatchTaskCompletion(
- ctx.projectPath,
- currentBatch.taskIds
- );
-
- console.log(`[orchestration-runner] Batch ${orchestration.batches.current + 1} verification: ${completedTasks.length}/${currentBatch.taskIds.length} tasks complete`);
-
- if (incompleteTasks.length > 0) {
- // Tasks still incomplete - re-spawn the batch workflow to continue
- console.log(`[orchestration-runner] Batch has ${incompleteTasks.length} incomplete tasks, re-spawning workflow`);
- orchestrationService.logDecision(
- ctx.projectPath,
- ctx.orchestrationId,
- 'batch_incomplete',
- `Batch ${orchestration.batches.current + 1} still has ${incompleteTasks.length} incomplete tasks: ${incompleteTasks.join(', ')}`
- );
-
- // Re-spawn the batch workflow to continue working on incomplete tasks
- const batchContext = `Continue working on incomplete tasks in batch "${currentBatch.section}": ${incompleteTasks.join(', ')}`;
- const workflow = await spawnWorkflowWithIntent(
- ctx,
- 'flow.implement',
- orchestration.config.additionalContext
- ? `${batchContext}\n\n${orchestration.config.additionalContext}`
- : batchContext
- );
-
- if (workflow) {
- orchestrationService.linkWorkflowExecution(ctx.projectPath, ctx.orchestrationId, workflow.id);
- }
-
- // Don't advance - stay on current batch
- break;
- }
- }
-
- // All tasks in batch are complete - advance to next batch
- orchestrationService.completeBatch(ctx.projectPath, ctx.orchestrationId);
- if (currentWorkflow?.costUsd) {
- orchestrationService.addCost(ctx.projectPath, ctx.orchestrationId, currentWorkflow.costUsd);
- }
- console.log(`[orchestration-runner] Batch complete, advancing to batch ${decision.batchIndex}`);
- break;
- }
-
- case 'initialize_batches': {
- // Initialize batch tracking (G2.1)
- const batchPlan = parseBatchesFromProject(ctx.projectPath, orchestration.config.batchSizeFallback);
- if (batchPlan && batchPlan.totalIncomplete > 0) {
- orchestrationService.updateBatches(ctx.projectPath, ctx.orchestrationId, batchPlan);
- console.log(`[orchestration-runner] Initialized batches: ${batchPlan.batches.length} batches, ${batchPlan.totalIncomplete} tasks`);
- } else {
- console.error('[orchestration-runner] No tasks found to create batches');
- orchestrationService.setNeedsAttention(
- ctx.projectPath,
- ctx.orchestrationId,
- 'No tasks found to create batches',
- ['retry', 'abort']
- );
- }
- break;
- }
-
- case 'force_step_complete': {
- // Force step.status to complete when all batches done (G2.2)
- // First verify all tasks are actually complete in tasks.md
- const totalIncomplete = getTotalIncompleteTasks(ctx.projectPath);
-
- if (totalIncomplete !== null && totalIncomplete > 0) {
- // Tasks still incomplete - don't transition, re-initialize batches
- console.log(`[orchestration-runner] Still ${totalIncomplete} incomplete tasks, re-initializing batches`);
- orchestrationService.logDecision(
- ctx.projectPath,
- ctx.orchestrationId,
- 'tasks_incomplete',
- `Cannot mark implement complete: ${totalIncomplete} tasks still incomplete`
- );
-
- // Re-parse and update batches with remaining incomplete tasks
- const batchPlan = parseBatchesFromProject(ctx.projectPath, orchestration.config.batchSizeFallback);
- if (batchPlan && batchPlan.totalIncomplete > 0) {
- orchestrationService.updateBatches(ctx.projectPath, ctx.orchestrationId, batchPlan);
- console.log(`[orchestration-runner] Re-initialized batches: ${batchPlan.batches.length} batches, ${batchPlan.totalIncomplete} tasks`);
- }
- break;
- }
-
- // All tasks complete - transition to next phase
- orchestrationService.transitionToNextPhase(ctx.projectPath, ctx.orchestrationId);
- console.log(`[orchestration-runner] All tasks complete, transitioning to next phase`);
- break;
- }
-
- case 'pause': {
- // Pause orchestration (G2.6)
- orchestrationService.pause(ctx.projectPath, ctx.orchestrationId);
- console.log(`[orchestration-runner] Paused: ${decision.reason}`);
- break;
- }
-
- case 'recover_stale': {
- // Recover from stale workflow (G1.5, G3.7-G3.10)
- console.log(`[orchestration-runner] Workflow appears stale: ${decision.reason}`);
- orchestrationService.setNeedsAttention(
- ctx.projectPath,
- ctx.orchestrationId,
- `Workflow stale: ${decision.reason}`,
- ['retry', 'skip', 'abort'],
- decision.workflowId
- );
- break;
- }
-
- case 'recover_failed': {
- // Recover from failed step/workflow (G1.13, G1.14, G2.10, G3.11-G3.16)
- console.log(`[orchestration-runner] Step/batch failed: ${decision.reason}`);
- orchestrationService.setNeedsAttention(
- ctx.projectPath,
- ctx.orchestrationId,
- decision.errorMessage || decision.reason,
- decision.recoveryOptions || ['retry', 'skip', 'abort'],
- decision.failedWorkflowId
- );
- break;
- }
-
- case 'wait_with_backoff': {
- // Wait with exponential backoff (G1.7)
- console.log(`[orchestration-runner] Waiting with backoff: ${decision.reason}`);
- // The backoff is handled by the main loop, not here
- break;
- }
-
- case 'wait_user_gate': {
- // Wait for USER_GATE confirmation (G1.8)
- console.log(`[orchestration-runner] Waiting for USER_GATE confirmation`);
- // Update orchestration status to indicate waiting for user gate
- const orchToUpdate = orchestrationService.get(ctx.projectPath, ctx.orchestrationId);
- if (orchToUpdate) {
- orchToUpdate.status = 'waiting_user_gate' as OrchestrationExecution['status'];
- }
- break;
- }
-
- default: {
- // Unknown action - log error but don't crash
- console.error(`[orchestration-runner] Unknown decision action: ${decision.action}`);
+ default:
+ console.error(`${runnerLog(ctx)} Unknown decision action: ${decision.action}`);
break;
- }
}
}
-/**
- * Get phase from skill name
- */
-function getNextPhaseFromSkill(skill: string): OrchestrationPhase | null {
- const skillName = skill.split(' ')[0].replace('flow.', '');
- const phaseMap: Record = {
- design: 'design',
- analyze: 'analyze',
- implement: 'implement',
- verify: 'verify',
- merge: 'merge',
- };
- return phaseMap[skillName] || null;
-}
-
-/**
- * Sleep helper
- */
-function sleep(ms: number): Promise {
- return new Promise((resolve) => setTimeout(resolve, ms));
-}
-
// =============================================================================
// Resume/Merge Trigger Helpers
// =============================================================================
@@ -1967,7 +1001,7 @@ export async function resumeOrchestration(
if (!projectPath) return;
// Resume via orchestration service
- orchestrationService.resume(projectPath, orchestrationId);
+ await orchestrationService.resume(projectPath, orchestrationId);
// Restart the runner
runOrchestration(projectId, orchestrationId).catch(console.error);
@@ -2002,11 +1036,18 @@ export async function triggerMerge(
writeSpawnIntent(projectPath, orchestrationId, 'flow.merge');
// Update status via orchestration service
- orchestrationService.triggerMerge(projectPath, orchestrationId);
+ await orchestrationService.triggerMerge(projectPath, orchestrationId);
// Spawn merge workflow
const workflow = await workflowService.start(projectId, 'flow.merge', undefined, undefined, orchestrationId);
- orchestrationService.linkWorkflowExecution(projectPath, orchestrationId, workflow.id);
+ await orchestrationService.linkWorkflowExecution(projectPath, orchestrationId, workflow.id);
+ await writeDashboardState(projectPath, {
+ lastWorkflow: {
+ id: workflow.id,
+ skill: 'flow.merge',
+ status: 'running',
+ },
+ });
// Restart the runner to handle merge completion
runOrchestration(projectId, orchestrationId).catch(console.error);
@@ -2020,7 +1061,7 @@ export async function triggerMerge(
* Check if a runner is active for an orchestration
*/
export function isRunnerActive(orchestrationId: string): boolean {
- return activeRunners.get(orchestrationId) === true;
+ return activeRunners.has(orchestrationId);
}
/**
diff --git a/packages/dashboard/src/lib/services/orchestration-service.ts b/packages/dashboard/src/lib/services/orchestration-service.ts
index f79f356..0ab5cad 100644
--- a/packages/dashboard/src/lib/services/orchestration-service.ts
+++ b/packages/dashboard/src/lib/services/orchestration-service.ts
@@ -12,217 +12,355 @@
* - Integration with specflow status --json
*/
-import { existsSync, readFileSync, writeFileSync, mkdirSync, readdirSync, renameSync, unlinkSync } from 'fs';
+import { existsSync, readFileSync } from 'fs';
import { join } from 'path';
import { execSync } from 'child_process';
import { randomUUID } from 'crypto';
import { readPidFile, isPidAlive, killProcess, cleanupPidFile } from './process-spawner';
import {
- type OrchestrationExecution,
type OrchestrationConfig,
type OrchestrationPhase,
type OrchestrationStatus,
+ type DashboardOrchestrationStatus,
type BatchTracking,
type BatchPlan,
- type DecisionLogEntry,
- OrchestrationExecutionSchema,
- createOrchestrationExecution,
+ type DashboardState,
+ type OrchestrationState,
+ type StepStatus,
+ OrchestrationStateSchema,
+ DashboardStateSchema,
+ STEP_INDEX_MAP,
} from '@specflow/shared';
-import { parseBatchesFromProject, createBatchTracking } from './batch-parser';
-// =============================================================================
-// Constants
-// =============================================================================
+/**
+ * Type for orchestration step info from CLI state file
+ */
+export interface OrchestrationStepInfo {
+ current?: string | null;
+ index?: number | null;
+ status?: StepStatus | null;
+}
+import { createBatchTracking } from './batch-parser';
+import type { OrchestrationExecution } from './orchestration-types';
+import { getSpecflowEnv } from '@/lib/specflow-env';
-const ORCHESTRATION_FILE_PREFIX = 'orchestration-';
// =============================================================================
-// State Persistence (FR-023)
+// CLI State File Helpers (FR-001 - Single Source of Truth)
// =============================================================================
/**
- * Get the orchestration directory for a project
+ * Get the CLI state file path for a project
*/
-function getOrchestrationDir(projectPath: string): string {
- const dir = join(projectPath, '.specflow', 'workflows');
- mkdirSync(dir, { recursive: true });
- return dir;
+function getCliStateFilePath(projectPath: string): string {
+ // Try .specflow first (v3), then .specify (v2)
+ const v3Path = join(projectPath, '.specflow', 'orchestration-state.json');
+ const v2Path = join(projectPath, '.specify', 'orchestration-state.json');
+ return existsSync(v3Path) ? v3Path : existsSync(v2Path) ? v2Path : v3Path;
}
/**
- * Get the file path for an orchestration
+ * Read the full CLI state file
+ * Uses safeParse to handle schema mismatches gracefully
*/
-function getOrchestrationPath(projectPath: string, id: string): string {
- return join(getOrchestrationDir(projectPath), `${ORCHESTRATION_FILE_PREFIX}${id}.json`);
+function readCliState(projectPath: string): OrchestrationState | null {
+ const statePath = getCliStateFilePath(projectPath);
+ if (!existsSync(statePath)) {
+ return null;
+ }
+ try {
+ const content = readFileSync(statePath, 'utf-8');
+ const parsed = JSON.parse(content);
+ const result = OrchestrationStateSchema.safeParse(parsed);
+ if (result.success) {
+ return result.data;
+ }
+ // Return the raw parsed data with type assertion for graceful degradation
+ // The dashboard state extraction will handle any missing fields
+ return parsed as OrchestrationState;
+ } catch (error) {
+ console.warn('[orchestration-service] Failed to read CLI state:', error);
+ return null;
+ }
}
/**
- * Save orchestration state to file (atomic write - G5.1, G5.2)
- *
- * Uses write-to-temp + atomic rename pattern to prevent partial writes
- * from corrupting state during crashes or concurrent access.
+ * Read dashboard state from CLI state file
+ * Returns the orchestration.dashboard section or null if not present
+ * Uses safeParse for graceful handling of partial/incomplete state
*/
-function saveOrchestration(projectPath: string, execution: OrchestrationExecution): void {
- const filePath = getOrchestrationPath(projectPath, execution.id);
- const tempPath = `${filePath}.tmp`;
-
- execution.updatedAt = new Date().toISOString();
- const content = JSON.stringify(execution, null, 2);
-
- // G5.1: Write to temp file first
- writeFileSync(tempPath, content);
-
- // G5.2: Atomic rename (POSIX guarantees atomicity on same filesystem)
+export function readDashboardState(projectPath: string): DashboardState | null {
+ const state = readCliState(projectPath);
+ if (!state?.orchestration?.dashboard) {
+ return null;
+ }
try {
- renameSync(tempPath, filePath);
- } catch (error) {
- // Clean up temp file if rename fails
- try {
- unlinkSync(tempPath);
- } catch {
- // Ignore cleanup errors
+ const result = DashboardStateSchema.safeParse(state.orchestration.dashboard);
+ if (result.success) {
+ return result.data;
}
- throw error;
+ // Extract what we can from the raw data for graceful degradation
+ const raw = state.orchestration.dashboard as Record;
+ const active = raw.active as Record | null;
+
+ // Build active object with defaults for missing required fields
+ type ActiveType = NonNullable;
+ const defaultConfig: ActiveType['config'] = {
+ autoMerge: false,
+ additionalContext: '',
+ skipDesign: false,
+ skipAnalyze: false,
+ skipImplement: false,
+ skipVerify: false,
+ autoHealEnabled: true,
+ maxHealAttempts: 3,
+ pauseBetweenBatches: false,
+ batchSizeFallback: 5,
+ budget: { maxPerBatch: 10.0, maxTotal: 50.0, healingBudget: 1.0, decisionBudget: 0.5 },
+ };
+
+ return {
+ active: active ? {
+ id: (active.id as string) || 'unknown',
+ startedAt: (active.startedAt as string) || new Date().toISOString(),
+ status: ((active.status as string) || 'running') as ActiveType['status'],
+ config: (active.config as ActiveType['config']) || defaultConfig,
+ } : null,
+ batches: { total: 0, current: 0, items: [] },
+ cost: { total: 0, perBatch: [] },
+ decisionLog: [],
+ lastWorkflow: (raw.lastWorkflow as DashboardState['lastWorkflow']) || null,
+ recoveryContext: raw.recoveryContext as DashboardState['recoveryContext'],
+ };
+ } catch (error) {
+ console.warn('[orchestration-service] Invalid dashboard state:', error);
+ return null;
}
}
/**
- * Sync current phase to orchestration-state.json for UI consistency
- * This keeps the state file in sync with the orchestration execution
+ * Read orchestration step info from CLI state file
+ * Returns the orchestration.step object or null if not present
*/
-function syncPhaseToStateFile(projectPath: string, phase: OrchestrationPhase): void {
- try {
- // Try .specflow first (v3), then .specify (v2)
- let statePath = join(projectPath, '.specflow', 'orchestration-state.json');
- if (!existsSync(statePath)) {
- statePath = join(projectPath, '.specify', 'orchestration-state.json');
- }
- if (!existsSync(statePath)) {
- return; // No state file to update
+export function readOrchestrationStep(
+ projectPath: string
+): OrchestrationStepInfo | null {
+ const state = readCliState(projectPath);
+ return state?.orchestration?.step ?? null;
+}
+
+/**
+ * Write dashboard state to CLI state file
+ * Uses specflow state set for atomic, validated writes
+ */
+export async function writeDashboardState(
+ projectPath: string,
+ updates: Partial
+): Promise {
+ const commands: string[] = [];
+
+ // Build specflow state set commands for each field
+ if (updates.active !== undefined) {
+ if (updates.active === null) {
+ commands.push('orchestration.dashboard.active=null');
+ } else {
+ if (updates.active.id) commands.push(`orchestration.dashboard.active.id=${updates.active.id}`);
+ if (updates.active.startedAt) commands.push(`orchestration.dashboard.active.startedAt=${updates.active.startedAt}`);
+ if (updates.active.status) commands.push(`orchestration.dashboard.active.status=${updates.active.status}`);
+ // Config is a complex object - serialize to JSON
+ if (updates.active.config) {
+ const configJson = JSON.stringify(updates.active.config).replace(/"/g, '\\"');
+ commands.push(`orchestration.dashboard.active.config="${configJson}"`);
+ }
}
+ }
- const content = readFileSync(statePath, 'utf-8');
- const state = JSON.parse(content);
-
- // Update step.current to match orchestration phase
- if (state.orchestration) {
- state.orchestration.step = state.orchestration.step || {};
- state.orchestration.step.current = phase;
- state.orchestration.step.status = 'in_progress';
- state.last_updated = new Date().toISOString();
+ if (updates.batches !== undefined) {
+ commands.push(`orchestration.dashboard.batches.total=${updates.batches.total}`);
+ commands.push(`orchestration.dashboard.batches.current=${updates.batches.current}`);
+ // Items array needs special handling - serialize to JSON
+ const itemsJson = JSON.stringify(updates.batches.items).replace(/"/g, '\\"');
+ commands.push(`orchestration.dashboard.batches.items="${itemsJson}"`);
+ }
+
+ if (updates.cost !== undefined) {
+ commands.push(`orchestration.dashboard.cost.total=${updates.cost.total}`);
+ const perBatchJson = JSON.stringify(updates.cost.perBatch);
+ commands.push(`orchestration.dashboard.cost.perBatch="${perBatchJson}"`);
+ }
+
+ if (updates.lastWorkflow !== undefined) {
+ if (updates.lastWorkflow === null) {
+ commands.push('orchestration.dashboard.lastWorkflow=null');
+ } else {
+ commands.push(`orchestration.dashboard.lastWorkflow.id=${updates.lastWorkflow.id}`);
+ commands.push(`orchestration.dashboard.lastWorkflow.skill=${updates.lastWorkflow.skill}`);
+ commands.push(`orchestration.dashboard.lastWorkflow.status=${updates.lastWorkflow.status}`);
}
+ }
- writeFileSync(statePath, JSON.stringify(state, null, 2));
- } catch {
- // Non-critical: log but don't fail orchestration
- console.warn('[orchestration-service] Failed to sync phase to state file');
+ if (updates.decisionLog !== undefined) {
+ const logJson = JSON.stringify(updates.decisionLog).replace(/"/g, '\\"');
+ commands.push(`orchestration.dashboard.decisionLog="${logJson}"`);
}
-}
-/**
- * Load orchestration state from file
- */
-function loadOrchestration(projectPath: string, id: string): OrchestrationExecution | null {
- const filePath = getOrchestrationPath(projectPath, id);
- if (!existsSync(filePath)) {
- return null;
+ if (updates.recoveryContext !== undefined) {
+ if (!updates.recoveryContext) {
+ // Clear recovery context by setting to empty object
+ commands.push('orchestration.dashboard.recoveryContext=null');
+ } else {
+ commands.push(`orchestration.dashboard.recoveryContext.issue=${updates.recoveryContext.issue}`);
+ const optionsJson = JSON.stringify(updates.recoveryContext.options);
+ commands.push(`orchestration.dashboard.recoveryContext.options="${optionsJson}"`);
+ if (updates.recoveryContext.failedWorkflowId) {
+ commands.push(`orchestration.dashboard.recoveryContext.failedWorkflowId=${updates.recoveryContext.failedWorkflowId}`);
+ }
+ }
}
+
+ if (commands.length === 0) {
+ return; // Nothing to update
+ }
+
+ // Execute specflow state set with all updates
+ const fullCommand = `specflow state set ${commands.join(' ')}`;
try {
- const content = readFileSync(filePath, 'utf-8');
- return OrchestrationExecutionSchema.parse(JSON.parse(content));
- } catch {
- return null;
+ execSync(fullCommand, {
+ cwd: projectPath,
+ encoding: 'utf-8',
+ timeout: 30000,
+ env: getSpecflowEnv(),
+ });
+ } catch (error) {
+ console.error('[orchestration-service] Failed to write dashboard state:', error);
+ throw error;
}
}
/**
- * List all orchestrations for a project
+ * Helper to add a decision log entry via CLI state
*/
-function listOrchestrations(projectPath: string): OrchestrationExecution[] {
- const dir = getOrchestrationDir(projectPath);
- const orchestrations: OrchestrationExecution[] = [];
+export async function logDashboardDecision(
+ projectPath: string,
+ action: string,
+ reason: string
+): Promise {
+ const state = readDashboardState(projectPath);
+ const currentLog = state?.decisionLog || [];
+ const newEntry = {
+ timestamp: new Date().toISOString(),
+ action,
+ reason,
+ };
+ await writeDashboardState(projectPath, {
+ decisionLog: [...currentLog, newEntry],
+ });
+}
- try {
- const files = readdirSync(dir).filter(
- (f) => f.startsWith(ORCHESTRATION_FILE_PREFIX) && f.endsWith('.json')
- );
+// =============================================================================
+// Dashboard State Helpers
+// =============================================================================
- for (const file of files) {
- try {
- const content = readFileSync(join(dir, file), 'utf-8');
- const execution = OrchestrationExecutionSchema.parse(JSON.parse(content));
- orchestrations.push(execution);
- } catch {
- // Skip invalid files
- }
- }
- } catch {
- // Directory doesn't exist
- }
+function getActiveDashboardState(
+ projectPath: string,
+ orchestrationId?: string
+): DashboardState | null {
+ const state = readDashboardState(projectPath);
+ if (!state?.active) return null;
+ if (orchestrationId && state.active.id !== orchestrationId) return null;
+ return state;
+}
- // Sort by updatedAt descending
- return orchestrations.sort(
- (a, b) => new Date(b.updatedAt).getTime() - new Date(a.updatedAt).getTime()
- );
+async function persistDashboardState(
+ projectPath: string,
+ state: DashboardState
+): Promise {
+ await writeDashboardState(projectPath, {
+ active: state.active,
+ batches: state.batches,
+ cost: state.cost,
+ decisionLog: state.decisionLog,
+ lastWorkflow: state.lastWorkflow,
+ recoveryContext: state.recoveryContext,
+ });
}
+// =============================================================================
+// Orchestration Flow Helpers
+// =============================================================================
+
/**
- * Staleness threshold for waiting_merge orchestrations
- * If an orchestration has been waiting for merge for longer than this, consider it stale
+ * Get the starting phase based on config skip settings
*/
-const WAITING_MERGE_STALE_MS = 2 * 60 * 60 * 1000; // 2 hours
+function getStartingPhase(config: OrchestrationConfig): OrchestrationPhase {
+ if (!config.skipDesign) return 'design';
+ if (!config.skipAnalyze) return 'analyze';
+ if (!config.skipImplement) return 'implement';
+ if (!config.skipVerify) return 'verify';
+ return 'merge';
+}
+
/**
- * Check if an orchestration is stale based on its status and age
+ * Sync current phase to orchestration state via `specflow state set`
+ * Uses the CLI as the single source of truth (avoids direct JSON writes)
*/
-function isOrchestrationStale(orchestration: OrchestrationExecution): boolean {
- // Only apply staleness check to waiting_merge status
- // running/paused should always be considered active regardless of age
- if (orchestration.status !== 'waiting_merge') {
- return false;
- }
+function syncPhaseToStateFile(
+ projectPath: string,
+ phase: OrchestrationPhase,
+ status: 'in_progress' | 'not_started' | 'complete' = 'in_progress'
+): void {
+ try {
+ // Only sync phases that map to workflow steps
+ const stepIndex = STEP_INDEX_MAP[phase as keyof typeof STEP_INDEX_MAP];
+ if (stepIndex === undefined) {
+ return;
+ }
+
+ const commandParts = [
+ `orchestration.step.current=${phase}`,
+ `orchestration.step.status=${status}`,
+ `orchestration.step.index=${stepIndex}`,
+ ];
- // Check if waiting_merge has been stale for too long
- const updatedAt = new Date(orchestration.updatedAt).getTime();
- const age = Date.now() - updatedAt;
- return age > WAITING_MERGE_STALE_MS;
+ execSync(`specflow state set ${commandParts.join(' ')}`, {
+ cwd: projectPath,
+ encoding: 'utf-8',
+ timeout: 10000,
+ env: getSpecflowEnv(),
+ });
+ } catch {
+ // Non-critical: log but don't fail orchestration
+ console.warn('[orchestration-service] Failed to sync phase to state file');
+ }
}
/**
- * Find active orchestration for a project (FR-024)
- * Returns the first orchestration in 'running' or 'paused' status
- * Excludes stale waiting_merge orchestrations (older than 2 hours)
+ * Ensure CLI step aligns with orchestration status (e.g., waiting_merge -> merge step).
*/
-function findActiveOrchestration(projectPath: string): OrchestrationExecution | null {
- const orchestrations = listOrchestrations(projectPath);
- return orchestrations.find((o) =>
- ['running', 'paused', 'waiting_merge'].includes(o.status) &&
- !isOrchestrationStale(o)
- ) || null;
+function ensureStepMatchesStatus(
+ projectPath: string,
+ status: OrchestrationStatus | undefined
+): void {
+ if (status !== 'waiting_merge') return;
+
+ const cliState = readCliState(projectPath);
+ const step = cliState?.orchestration?.step;
+ const expectedIndex = STEP_INDEX_MAP.merge;
+
+ if (
+ step?.current !== 'merge' ||
+ step?.status !== 'not_started' ||
+ step?.index !== expectedIndex
+ ) {
+ syncPhaseToStateFile(projectPath, 'merge', 'not_started');
+ }
}
// =============================================================================
// Decision Logging (FR-064)
// =============================================================================
-/**
- * Add entry to decision log
- */
-function logDecision(
- execution: OrchestrationExecution,
- decision: string,
- reason: string,
- data?: Record
-): void {
- const entry: DecisionLogEntry = {
- timestamp: new Date().toISOString(),
- decision,
- reason,
- data,
- };
- execution.decisionLog.push(entry);
-}
-
// =============================================================================
// Specflow Status Integration (FR-021, T020)
// =============================================================================
@@ -261,6 +399,7 @@ function getSpecflowStatus(projectPath: string): SpecflowStatus | null {
cwd: projectPath,
encoding: 'utf-8',
timeout: 30000,
+ env: getSpecflowEnv(),
});
return JSON.parse(result);
} catch {
@@ -278,43 +417,39 @@ function getSpecflowStatus(projectPath: string): SpecflowStatus | null {
export function isPhaseComplete(status: SpecflowStatus | null, phase: OrchestrationPhase): boolean {
if (!status) return false;
+ // FR-001: Trust step.status as single source of truth
+ // Sub-commands set step.status=complete when they finish
+ // No artifact checks needed - we trust the state file
+ const currentStep = status.orchestration?.step?.current;
+ const stepStatus = status.orchestration?.step?.status;
+
switch (phase) {
case 'design':
- // Design is complete when plan.md and tasks.md exist
- return status.context?.hasPlan === true && status.context?.hasTasks === true;
+ // Design complete when step moved past design OR status is complete
+ return currentStep !== 'design' ||
+ (currentStep === 'design' && stepStatus === 'complete');
case 'analyze':
- // Analyze doesn't produce artifacts - check orchestration state
- // step.current must have moved past analyze (to 'implement' or later)
- // OR step.status is 'complete' when current step is analyze
- const analyzeStepComplete =
- status.orchestration?.step?.current === 'implement' ||
- status.orchestration?.step?.current === 'verify' ||
- (status.orchestration?.step?.current === 'analyze' &&
- status.orchestration?.step?.status === 'complete');
- return analyzeStepComplete ?? false;
+ // Analyze complete when step moved past analyze OR status is complete
+ return currentStep === 'implement' ||
+ currentStep === 'verify' ||
+ currentStep === 'merge' ||
+ (currentStep === 'analyze' && stepStatus === 'complete');
case 'implement':
- // All tasks complete
- return (
- status.progress?.tasksComplete === status.progress?.tasksTotal &&
- (status.progress?.tasksTotal ?? 0) > 0
- );
+ // Implement complete when step moved past implement OR status is complete
+ return currentStep === 'verify' ||
+ currentStep === 'merge' ||
+ (currentStep === 'implement' && stepStatus === 'complete');
case 'verify':
- // Verify is complete when step.current has moved past verify (to merge)
- // OR when step.status is 'complete' with current step as verify
- const verifyStepComplete =
- status.orchestration?.step?.current === 'merge' ||
- (status.orchestration?.step?.current === 'verify' &&
- status.orchestration?.step?.status === 'complete');
- return verifyStepComplete ?? false;
+ // Verify complete when step moved past verify OR status is complete
+ return currentStep === 'merge' ||
+ (currentStep === 'verify' && stepStatus === 'complete');
case 'merge':
- // Merge is complete when orchestration marks it so
- return status.orchestration?.step?.status === 'complete' &&
- (status.orchestration?.step?.current === 'merge' ||
- status.orchestration?.step?.current === undefined);
+ // Merge is complete when step.status is complete at merge step
+ return currentStep === 'merge' && stepStatus === 'complete';
case 'complete':
return true;
@@ -411,29 +546,26 @@ class OrchestrationService {
* @param batchPlan - Pre-parsed batch plan (null when phase needs opening first)
*/
async start(
- projectId: string,
+ _projectId: string,
projectPath: string,
config: OrchestrationConfig,
batchPlan: BatchPlan | null = null
): Promise {
// Check for existing active orchestration (FR-024)
- const existing = findActiveOrchestration(projectPath);
- if (existing) {
+ // Terminal states (completed, failed, cancelled) don't block new orchestrations
+ const existing = getActiveDashboardState(projectPath);
+ const terminalStatuses = ['completed', 'failed', 'cancelled'];
+ if (existing?.active && !terminalStatuses.includes(existing.active.status)) {
throw new Error(
- `Orchestration already in progress: ${existing.id}. Cancel it first or wait for completion.`
+ `Orchestration already in progress: ${existing.active.id}. Cancel it first or wait for completion.`
);
}
// Create batch tracking from plan, or empty tracking if phase needs opening
let batches: BatchTracking;
- let taskCount = 0;
- let usedFallback = false;
-
if (batchPlan) {
// Normal case: phase is open and we have tasks
batches = createBatchTracking(batchPlan);
- taskCount = batchPlan.totalIncomplete;
- usedFallback = batchPlan.usedFallback;
} else {
// Phase needs opening: start with empty batches
// Batches will be populated after design completes
@@ -444,29 +576,50 @@ class OrchestrationService {
};
}
- // Create execution
const id = randomUUID();
- const execution = createOrchestrationExecution(id, projectId, config, batches);
-
- // Log initial decision
- logDecision(
- execution,
- 'start',
- batchPlan ? 'User initiated orchestration' : 'User initiated orchestration (phase will be opened first)',
- {
+ const startedAt = new Date().toISOString();
+ const startingPhase = getStartingPhase(config);
+
+ const dashboardState: DashboardState = {
+ active: {
+ id,
+ startedAt,
+ status: 'running',
config,
- batchCount: batches.total,
- taskCount,
- usedFallback,
- phaseNeedsOpen: !batchPlan,
- }
- );
+ },
+ batches: {
+ total: batches.total,
+ current: batches.current,
+ items: batches.items.map((b) => ({
+ section: b.section,
+ taskIds: b.taskIds,
+ status: b.status,
+ workflowId: b.workflowExecutionId,
+ healAttempts: b.healAttempts,
+ })),
+ },
+ cost: {
+ total: 0,
+ perBatch: [],
+ },
+ decisionLog: [{
+ timestamp: new Date().toISOString(),
+ action: 'start',
+ reason: batchPlan ? 'User initiated orchestration' : 'User initiated orchestration (phase will be opened first)',
+ }],
+ lastWorkflow: null,
+ recoveryContext: undefined,
+ };
- // Save initial state
- saveOrchestration(projectPath, execution);
+ await persistDashboardState(projectPath, dashboardState);
// Sync initial phase to state file for UI consistency
- syncPhaseToStateFile(projectPath, execution.currentPhase);
+ syncPhaseToStateFile(projectPath, startingPhase);
+
+ const execution = this.convertDashboardStateToExecution(projectPath, dashboardState);
+ if (!execution) {
+ throw new Error('Failed to initialize orchestration state');
+ }
return execution;
}
@@ -475,497 +628,962 @@ class OrchestrationService {
* Update batches after design phase completes
* Called by runner when transitioning from design/analyze to implement
*/
- updateBatches(
+ async updateBatches(
projectPath: string,
orchestrationId: string,
batchPlan: BatchPlan
- ): OrchestrationExecution | null {
- const execution = loadOrchestration(projectPath, orchestrationId);
- if (!execution) return null;
-
- // Only update if batches are empty (phase was opened during this orchestration)
- if (execution.batches.total === 0) {
- const batches = createBatchTracking(batchPlan);
- execution.batches = batches;
-
- logDecision(execution, 'update_batches', 'Batches populated after design phase', {
- batchCount: batches.total,
- taskCount: batchPlan.totalIncomplete,
- usedFallback: batchPlan.usedFallback,
- });
+ ): Promise {
+ const dashboardState = getActiveDashboardState(projectPath, orchestrationId);
+ if (!dashboardState) return null;
- saveOrchestration(projectPath, execution);
+ if (dashboardState.batches.total !== 0) {
+ return this.convertDashboardStateToExecution(projectPath, dashboardState);
}
- return execution;
+ const batches = createBatchTracking(batchPlan);
+ const nextState: DashboardState = {
+ ...dashboardState,
+ batches: {
+ total: batches.total,
+ current: batches.current,
+ items: batches.items.map((b) => ({
+ section: b.section,
+ taskIds: b.taskIds,
+ status: b.status,
+ workflowId: b.workflowExecutionId,
+ healAttempts: b.healAttempts,
+ })),
+ },
+ decisionLog: [
+ ...(dashboardState.decisionLog || []),
+ {
+ timestamp: new Date().toISOString(),
+ action: 'update_batches',
+ reason: 'Batches populated after design phase',
+ },
+ ],
+ };
+
+ await persistDashboardState(projectPath, nextState);
+ return this.convertDashboardStateToExecution(projectPath, nextState);
}
/**
- * Get orchestration by ID
+ * Get orchestration by ID from CLI dashboard state
*/
get(projectPath: string, id: string): OrchestrationExecution | null {
- return loadOrchestration(projectPath, id);
+ const dashboardState = getActiveDashboardState(projectPath, id);
+ if (!dashboardState) return null;
+ return this.convertDashboardStateToExecution(projectPath, dashboardState);
}
/**
- * Get active orchestration for a project
+ * Get active orchestration for a project from CLI dashboard state
*/
getActive(projectPath: string): OrchestrationExecution | null {
- return findActiveOrchestration(projectPath);
+ const dashboardState = readDashboardState(projectPath);
+ if (!dashboardState?.active) return null;
+ ensureStepMatchesStatus(projectPath, dashboardState.active.status);
+ return this.convertDashboardStateToExecution(projectPath, dashboardState);
+ }
+
+ /**
+ * Convert CLI dashboard state to OrchestrationExecution format
+ * Used during migration period for backwards compatibility
+ */
+ private convertDashboardStateToExecution(
+ projectPath: string,
+ dashboardState: DashboardState
+ ): OrchestrationExecution | null {
+ if (!dashboardState.active) return null;
+
+ // Read project ID from registry
+ const cliState = readCliState(projectPath);
+ const projectId = cliState?.project?.id || 'unknown';
+
+ // Map dashboard status to orchestration status
+ const statusMap: Record = {
+ 'running': 'running',
+ 'paused': 'paused',
+ 'waiting_merge': 'waiting_merge',
+ 'needs_attention': 'needs_attention',
+ 'completed': 'completed',
+ 'failed': 'failed',
+ 'cancelled': 'cancelled',
+ };
+
+ // Get current phase from CLI state step
+ const step = cliState?.orchestration?.step;
+ const phaseMap: Record = {
+ 'design': 'design',
+ 'analyze': 'analyze',
+ 'implement': 'implement',
+ 'verify': 'verify',
+ 'merge': 'merge',
+ 'complete': 'complete',
+ };
+ let currentPhase: OrchestrationPhase = step?.current && phaseMap[step.current]
+ ? phaseMap[step.current]
+ : 'design';
+
+ if (dashboardState.active.status === 'waiting_merge') {
+ currentPhase = 'merge';
+ } else if (!step?.current && dashboardState.lastWorkflow?.skill) {
+ const skillPhase = dashboardState.lastWorkflow.skill.replace(/^\/?flow\./, '');
+ if (phaseMap[skillPhase]) {
+ currentPhase = phaseMap[skillPhase];
+ }
+ }
+
+ const executions: OrchestrationExecution['executions'] = {
+ implement: [],
+ healers: [],
+ };
+
+ const batchWorkflowIds = (dashboardState.batches?.items || [])
+ .map((b) => b.workflowId)
+ .filter((id): id is string => typeof id === 'string' && id.length > 0);
+ if (batchWorkflowIds.length > 0) {
+ executions.implement = Array.from(new Set(batchWorkflowIds));
+ }
+
+ const lastWorkflowId = dashboardState.lastWorkflow?.id;
+ if (lastWorkflowId) {
+ switch (currentPhase) {
+ case 'design':
+ executions.design = lastWorkflowId;
+ break;
+ case 'analyze':
+ executions.analyze = lastWorkflowId;
+ break;
+ case 'implement':
+ if (!executions.implement.includes(lastWorkflowId)) {
+ executions.implement = [lastWorkflowId, ...executions.implement];
+ }
+ break;
+ case 'verify':
+ executions.verify = lastWorkflowId;
+ break;
+ case 'merge':
+ executions.merge = lastWorkflowId;
+ break;
+ }
+ }
+
+ return {
+ id: dashboardState.active.id,
+ projectId,
+ status: statusMap[dashboardState.active.status] || 'running',
+ config: dashboardState.active.config,
+ currentPhase,
+ batches: {
+ total: dashboardState.batches?.total || 0,
+ current: dashboardState.batches?.current || 0,
+ items: (dashboardState.batches?.items || []).map((b, i) => ({
+ index: i,
+ section: b.section,
+ taskIds: b.taskIds,
+ status: b.status,
+ healAttempts: b.healAttempts || 0,
+ workflowExecutionId: b.workflowId,
+ })),
+ },
+ executions,
+ startedAt: dashboardState.active.startedAt,
+ updatedAt: new Date().toISOString(),
+ decisionLog: (dashboardState.decisionLog || []).map((d) => ({
+ timestamp: d.timestamp,
+ decision: d.action,
+ reason: d.reason,
+ })),
+ totalCostUsd: dashboardState.cost?.total || 0,
+ recoveryContext: dashboardState.recoveryContext,
+ };
}
/**
* List all orchestrations for a project
*/
list(projectPath: string): OrchestrationExecution[] {
- return listOrchestrations(projectPath);
+ const active = this.getActive(projectPath);
+ return active ? [active] : [];
}
/**
* Update orchestration with workflow execution ID
*/
- linkWorkflowExecution(
+ async linkWorkflowExecution(
projectPath: string,
orchestrationId: string,
workflowExecutionId: string
- ): OrchestrationExecution | null {
- const execution = loadOrchestration(projectPath, orchestrationId);
- if (!execution) return null;
-
- const phase = execution.currentPhase;
-
- // Link to appropriate execution slot
- switch (phase) {
- case 'design':
- execution.executions.design = workflowExecutionId;
- break;
- case 'analyze':
- execution.executions.analyze = workflowExecutionId;
- break;
- case 'implement':
- execution.executions.implement.push(workflowExecutionId);
- // Also link to current batch
- const currentBatch = execution.batches.items[execution.batches.current];
- if (currentBatch) {
- currentBatch.workflowExecutionId = workflowExecutionId;
- currentBatch.status = 'running';
- currentBatch.startedAt = new Date().toISOString();
- }
- break;
- case 'verify':
- execution.executions.verify = workflowExecutionId;
- break;
- case 'merge':
- execution.executions.merge = workflowExecutionId;
- break;
+ ): Promise {
+ const dashboardState = getActiveDashboardState(projectPath, orchestrationId);
+ if (!dashboardState) return null;
+
+ const cliState = readCliState(projectPath);
+ const phase = cliState?.orchestration?.step?.current || 'design';
+
+ let batches = dashboardState.batches;
+ if (phase === 'implement' && batches.items.length > 0) {
+ const items = [...batches.items];
+ const currentIndex = batches.current;
+ const currentBatch = items[currentIndex];
+ if (currentBatch) {
+ items[currentIndex] = {
+ ...currentBatch,
+ workflowId: workflowExecutionId,
+ status: 'running',
+ };
+ }
+ batches = {
+ ...batches,
+ items,
+ };
}
- logDecision(execution, 'link_execution', `Linked workflow execution for ${phase}`, {
- workflowExecutionId,
- phase,
- });
+ const nextState: DashboardState = {
+ ...dashboardState,
+ batches,
+ decisionLog: [
+ ...(dashboardState.decisionLog || []),
+ {
+ timestamp: new Date().toISOString(),
+ action: 'link_execution',
+ reason: `Linked workflow execution for ${phase}`,
+ },
+ ],
+ };
- saveOrchestration(projectPath, execution);
- return execution;
+ await persistDashboardState(projectPath, nextState);
+ return this.convertDashboardStateToExecution(projectPath, nextState);
}
/**
* Transition to next phase (FR-020, FR-022)
* Called after dual confirmation (state + process completion)
*/
- transitionToNextPhase(
+ async transitionToNextPhase(
projectPath: string,
orchestrationId: string
- ): OrchestrationExecution | null {
- const execution = loadOrchestration(projectPath, orchestrationId);
- if (!execution) return null;
+ ): Promise {
+ const dashboardState = getActiveDashboardState(projectPath, orchestrationId);
+ if (!dashboardState?.active) return null;
- const currentPhase = execution.currentPhase;
- const nextPhase = getNextPhase(currentPhase, execution.config);
+ const cliState = readCliState(projectPath);
+ const currentPhase = (cliState?.orchestration?.step?.current ||
+ getStartingPhase(dashboardState.active.config)) as OrchestrationPhase;
+ const nextPhase = getNextPhase(currentPhase, dashboardState.active.config);
if (!nextPhase) {
- // No more phases - complete
- execution.status = 'completed';
- execution.completedAt = new Date().toISOString();
- logDecision(execution, 'complete', 'All phases finished');
- saveOrchestration(projectPath, execution);
- return execution;
+ const nextState: DashboardState = {
+ ...dashboardState,
+ active: {
+ ...dashboardState.active,
+ status: 'completed',
+ },
+ decisionLog: [
+ ...(dashboardState.decisionLog || []),
+ {
+ timestamp: new Date().toISOString(),
+ action: 'complete',
+ reason: 'All phases finished',
+ },
+ ],
+ };
+
+ await persistDashboardState(projectPath, nextState);
+ syncPhaseToStateFile(projectPath, currentPhase, 'complete');
+ return this.convertDashboardStateToExecution(projectPath, nextState);
}
- // Handle merge phase with auto-merge disabled
- if (nextPhase === 'merge' && !execution.config.autoMerge) {
- execution.currentPhase = nextPhase;
- execution.status = 'waiting_merge';
- logDecision(execution, 'waiting_merge', 'Auto-merge disabled, waiting for user');
- saveOrchestration(projectPath, execution);
- // Sync to state file for UI consistency
- syncPhaseToStateFile(projectPath, nextPhase);
- return execution;
+ if (nextPhase === 'merge' && !dashboardState.active.config.autoMerge) {
+ const nextState: DashboardState = {
+ ...dashboardState,
+ active: {
+ ...dashboardState.active,
+ status: 'waiting_merge',
+ },
+ decisionLog: [
+ ...(dashboardState.decisionLog || []),
+ {
+ timestamp: new Date().toISOString(),
+ action: 'waiting_merge',
+ reason: 'Auto-merge disabled, waiting for user',
+ },
+ ],
+ };
+ await persistDashboardState(projectPath, nextState);
+ syncPhaseToStateFile(projectPath, nextPhase, 'not_started');
+ return this.convertDashboardStateToExecution(projectPath, nextState);
}
- // Transition to next phase
- execution.currentPhase = nextPhase;
- logDecision(execution, 'transition', `Moving from ${currentPhase} to ${nextPhase}`);
- saveOrchestration(projectPath, execution);
+ const nextState: DashboardState = {
+ ...dashboardState,
+ decisionLog: [
+ ...(dashboardState.decisionLog || []),
+ {
+ timestamp: new Date().toISOString(),
+ action: 'transition',
+ reason: `Moving from ${currentPhase} to ${nextPhase}`,
+ },
+ ],
+ };
- // Sync to state file for UI consistency (project list, sidebar)
+ await persistDashboardState(projectPath, nextState);
syncPhaseToStateFile(projectPath, nextPhase);
-
- return execution;
+ return this.convertDashboardStateToExecution(projectPath, nextState);
}
/**
* Mark current batch as complete and move to next
*/
- completeBatch(projectPath: string, orchestrationId: string): OrchestrationExecution | null {
- const execution = loadOrchestration(projectPath, orchestrationId);
- if (!execution) return null;
-
- const currentBatch = execution.batches.items[execution.batches.current];
- if (!currentBatch) return execution;
+ async completeBatch(projectPath: string, orchestrationId: string): Promise {
+ const dashboardState = getActiveDashboardState(projectPath, orchestrationId);
+ if (!dashboardState) return null;
+
+ const batches = dashboardState.batches;
+ const currentBatch = batches.items[batches.current];
+ if (!currentBatch) {
+ return this.convertDashboardStateToExecution(projectPath, dashboardState);
+ }
- // Mark batch complete
- currentBatch.status = 'completed';
- currentBatch.completedAt = new Date().toISOString();
+ const items = [...batches.items];
+ items[batches.current] = {
+ ...currentBatch,
+ status: 'completed',
+ };
- logDecision(execution, 'batch_complete', `Batch ${execution.batches.current + 1} completed`, {
- section: currentBatch.section,
- taskIds: currentBatch.taskIds,
+ const decisionLog = [...(dashboardState.decisionLog || [])];
+ decisionLog.push({
+ timestamp: new Date().toISOString(),
+ action: 'batch_complete',
+ reason: `Batch ${batches.current + 1} completed`,
});
- // Check if more batches
- if (execution.batches.current < execution.batches.total - 1) {
- // Move to next batch
- execution.batches.current++;
- const nextBatch = execution.batches.items[execution.batches.current];
- logDecision(execution, 'next_batch', `Starting batch ${execution.batches.current + 1}`, {
- section: nextBatch.section,
- taskCount: nextBatch.taskIds.length,
+ let nextCurrent = batches.current;
+ if (batches.current < batches.total - 1) {
+ nextCurrent = batches.current + 1;
+ const nextBatch = items[nextCurrent];
+ decisionLog.push({
+ timestamp: new Date().toISOString(),
+ action: 'next_batch',
+ reason: `Starting batch ${nextCurrent + 1}`,
});
} else {
- // All batches done - ready for verify
- logDecision(execution, 'all_batches_complete', 'All implement batches finished');
+ decisionLog.push({
+ timestamp: new Date().toISOString(),
+ action: 'all_batches_complete',
+ reason: 'All implement batches finished',
+ });
}
- saveOrchestration(projectPath, execution);
- return execution;
+ const nextState: DashboardState = {
+ ...dashboardState,
+ batches: {
+ ...batches,
+ current: nextCurrent,
+ items,
+ },
+ decisionLog,
+ };
+
+ await persistDashboardState(projectPath, nextState);
+ return this.convertDashboardStateToExecution(projectPath, nextState);
}
/**
* Mark current batch as failed
*/
- failBatch(
+ async failBatch(
projectPath: string,
orchestrationId: string,
errorMessage: string
- ): OrchestrationExecution | null {
- const execution = loadOrchestration(projectPath, orchestrationId);
- if (!execution) return null;
-
- const currentBatch = execution.batches.items[execution.batches.current];
- if (!currentBatch) return execution;
+ ): Promise {
+ const dashboardState = getActiveDashboardState(projectPath, orchestrationId);
+ if (!dashboardState) return null;
+
+ const batches = dashboardState.batches;
+ const currentBatch = batches.items[batches.current];
+ if (!currentBatch) {
+ return this.convertDashboardStateToExecution(projectPath, dashboardState);
+ }
- currentBatch.status = 'failed';
- currentBatch.completedAt = new Date().toISOString();
+ const items = [...batches.items];
+ items[batches.current] = {
+ ...currentBatch,
+ status: 'failed',
+ };
- logDecision(execution, 'batch_failed', `Batch ${execution.batches.current + 1} failed`, {
- section: currentBatch.section,
- error: errorMessage,
- });
+ const nextState: DashboardState = {
+ ...dashboardState,
+ batches: {
+ ...batches,
+ items,
+ },
+ decisionLog: [
+ ...(dashboardState.decisionLog || []),
+ {
+ timestamp: new Date().toISOString(),
+ action: 'batch_failed',
+ reason: `Batch ${batches.current + 1} failed`,
+ },
+ ],
+ };
- saveOrchestration(projectPath, execution);
- return execution;
+ await persistDashboardState(projectPath, nextState);
+ return this.convertDashboardStateToExecution(projectPath, nextState);
}
/**
* Mark batch as healed after successful auto-heal
*/
- healBatch(
+ async healBatch(
projectPath: string,
orchestrationId: string,
healerExecutionId: string
- ): OrchestrationExecution | null {
- const execution = loadOrchestration(projectPath, orchestrationId);
- if (!execution) return null;
-
- const currentBatch = execution.batches.items[execution.batches.current];
- if (!currentBatch) return execution;
+ ): Promise {
+ const dashboardState = getActiveDashboardState(projectPath, orchestrationId);
+ if (!dashboardState) return null;
+
+ const batches = dashboardState.batches;
+ const currentBatch = batches.items[batches.current];
+ if (!currentBatch) {
+ return this.convertDashboardStateToExecution(projectPath, dashboardState);
+ }
- currentBatch.status = 'healed';
- currentBatch.healerExecutionId = healerExecutionId;
- currentBatch.completedAt = new Date().toISOString();
- execution.executions.healers.push(healerExecutionId);
+ const items = [...batches.items];
+ items[batches.current] = {
+ ...currentBatch,
+ status: 'healed',
+ };
- logDecision(execution, 'batch_healed', `Batch ${execution.batches.current + 1} healed`, {
- section: currentBatch.section,
- healerExecutionId,
- healAttempts: currentBatch.healAttempts,
- });
+ const nextState: DashboardState = {
+ ...dashboardState,
+ batches: {
+ ...batches,
+ items,
+ },
+ decisionLog: [
+ ...(dashboardState.decisionLog || []),
+ {
+ timestamp: new Date().toISOString(),
+ action: 'batch_healed',
+ reason: `Batch ${batches.current + 1} healed`,
+ },
+ ],
+ };
- saveOrchestration(projectPath, execution);
- return execution;
+ await persistDashboardState(projectPath, nextState);
+ return this.convertDashboardStateToExecution(projectPath, nextState);
}
/**
* Increment heal attempt count for current batch
*/
- incrementHealAttempt(projectPath: string, orchestrationId: string): OrchestrationExecution | null {
- const execution = loadOrchestration(projectPath, orchestrationId);
- if (!execution) return null;
+ async incrementHealAttempt(projectPath: string, orchestrationId: string): Promise {
+ const dashboardState = getActiveDashboardState(projectPath, orchestrationId);
+ if (!dashboardState) return null;
+
+ const batches = dashboardState.batches;
+ const currentBatch = batches.items[batches.current];
+ if (!currentBatch) {
+ return this.convertDashboardStateToExecution(projectPath, dashboardState);
+ }
- const currentBatch = execution.batches.items[execution.batches.current];
- if (!currentBatch) return execution;
+ const items = [...batches.items];
+ items[batches.current] = {
+ ...currentBatch,
+ healAttempts: (currentBatch.healAttempts || 0) + 1,
+ };
- currentBatch.healAttempts++;
- saveOrchestration(projectPath, execution);
- return execution;
+ const nextState: DashboardState = {
+ ...dashboardState,
+ batches: {
+ ...batches,
+ items,
+ },
+ };
+
+ await persistDashboardState(projectPath, nextState);
+ return this.convertDashboardStateToExecution(projectPath, nextState);
}
/**
* Check if batch can be healed (FR-043)
*/
canHealBatch(projectPath: string, orchestrationId: string): boolean {
- const execution = loadOrchestration(projectPath, orchestrationId);
- if (!execution) return false;
+ const dashboardState = getActiveDashboardState(projectPath, orchestrationId);
+ if (!dashboardState?.active) return false;
- if (!execution.config.autoHealEnabled) return false;
+ if (!dashboardState.active.config.autoHealEnabled) return false;
- const currentBatch = execution.batches.items[execution.batches.current];
+ const currentBatch = dashboardState.batches.items[dashboardState.batches.current];
if (!currentBatch) return false;
- return currentBatch.healAttempts < execution.config.maxHealAttempts;
+ return (currentBatch.healAttempts || 0) < dashboardState.active.config.maxHealAttempts;
}
/**
* Pause orchestration and stop the current workflow process
* Note: Claude doesn't support true pause - we kill the process and resume from current state
*/
- pause(projectPath: string, orchestrationId: string): OrchestrationExecution | null {
- const execution = loadOrchestration(projectPath, orchestrationId);
- if (!execution || execution.status !== 'running') return null;
+ async pause(projectPath: string, orchestrationId: string): Promise {
+ const dashboardState = getActiveDashboardState(projectPath, orchestrationId);
+ if (!dashboardState?.active || dashboardState.active.status !== 'running') return null;
// Kill the current workflow process
- const currentWorkflowId = this.getCurrentWorkflowId(execution);
+ const currentWorkflowId = this.getCurrentWorkflowId(projectPath, dashboardState);
+ const decisionLog = [...(dashboardState.decisionLog || [])];
if (currentWorkflowId) {
const workflowDir = join(projectPath, '.specflow', 'workflows', currentWorkflowId);
const pids = readPidFile(workflowDir);
if (pids) {
if (pids.claudePid && isPidAlive(pids.claudePid)) {
killProcess(pids.claudePid, false);
- logDecision(execution, 'process_killed', `Paused: killed Claude process ${pids.claudePid}`);
+ decisionLog.push({
+ timestamp: new Date().toISOString(),
+ action: 'process_killed',
+ reason: `Paused: killed Claude process ${pids.claudePid}`,
+ });
}
if (pids.bashPid && isPidAlive(pids.bashPid)) {
killProcess(pids.bashPid, false);
- logDecision(execution, 'process_killed', `Paused: killed bash process ${pids.bashPid}`);
+ decisionLog.push({
+ timestamp: new Date().toISOString(),
+ action: 'process_killed',
+ reason: `Paused: killed bash process ${pids.bashPid}`,
+ });
}
cleanupPidFile(workflowDir);
}
}
- execution.status = 'paused';
- logDecision(execution, 'pause', 'User requested pause');
- saveOrchestration(projectPath, execution);
- return execution;
+ const nextState: DashboardState = {
+ ...dashboardState,
+ active: {
+ ...dashboardState.active,
+ status: 'paused',
+ },
+ decisionLog: [
+ ...decisionLog,
+ {
+ timestamp: new Date().toISOString(),
+ action: 'pause',
+ reason: 'User requested pause',
+ },
+ ],
+ };
+
+ await persistDashboardState(projectPath, nextState);
+ return this.convertDashboardStateToExecution(projectPath, nextState);
}
/**
* Resume paused orchestration
*/
- resume(projectPath: string, orchestrationId: string): OrchestrationExecution | null {
- const execution = loadOrchestration(projectPath, orchestrationId);
- if (!execution || execution.status !== 'paused') return null;
+ async resume(projectPath: string, orchestrationId: string): Promise {
+ const dashboardState = getActiveDashboardState(projectPath, orchestrationId);
+ if (!dashboardState?.active || dashboardState.active.status !== 'paused') return null;
+
+ const nextState: DashboardState = {
+ ...dashboardState,
+ active: {
+ ...dashboardState.active,
+ status: 'running',
+ },
+ decisionLog: [
+ ...(dashboardState.decisionLog || []),
+ {
+ timestamp: new Date().toISOString(),
+ action: 'resume',
+ reason: 'User requested resume',
+ },
+ ],
+ };
- execution.status = 'running';
- logDecision(execution, 'resume', 'User requested resume');
- saveOrchestration(projectPath, execution);
- return execution;
+ await persistDashboardState(projectPath, nextState);
+ return this.convertDashboardStateToExecution(projectPath, nextState);
+ }
+
+ /**
+ * Go back to a previous step (FR-004 - UI Step Override)
+ *
+ * This allows the UI to let users click a step to go back to it.
+ * Sets step.current to the target step and step.status to not_started.
+ *
+ * @param projectPath - Project path for CLI commands
+ * @param orchestrationId - Active orchestration ID
+ * @param targetStep - The step to go back to (design, analyze, implement, verify)
+ * @returns Updated orchestration execution or null if failed
+ */
+ async goBackToStep(
+ projectPath: string,
+ orchestrationId: string,
+ targetStep: string
+ ): Promise {
+ const validSteps = ['design', 'analyze', 'implement', 'verify'];
+ if (!validSteps.includes(targetStep)) {
+ console.error(`[orchestration-service] Invalid target step: ${targetStep}`);
+ return null;
+ }
+
+ const dashboardState = getActiveDashboardState(projectPath, orchestrationId);
+ if (!dashboardState?.active) return null;
+
+ const shouldResetBatches = ['design', 'analyze', 'implement'].includes(targetStep);
+ const resetBatches: DashboardState['batches'] = shouldResetBatches
+ ? { total: 0, current: 0, items: [] }
+ : dashboardState.batches;
+ const resetCost: DashboardState['cost'] = shouldResetBatches
+ ? { total: 0, perBatch: [] }
+ : dashboardState.cost;
+
+ // Pause the orchestration if running
+ if (dashboardState.active.status === 'running') {
+ // Kill any active workflow
+ const currentWorkflowId = this.getCurrentWorkflowId(projectPath, dashboardState);
+ if (currentWorkflowId) {
+ const workflowDir = join(projectPath, '.specflow', 'workflows', currentWorkflowId);
+ const pids = readPidFile(workflowDir);
+ if (pids) {
+ if (pids.claudePid && isPidAlive(pids.claudePid)) {
+ killProcess(pids.claudePid, false);
+ }
+ if (pids.bashPid && isPidAlive(pids.bashPid)) {
+ killProcess(pids.bashPid, false);
+ }
+ cleanupPidFile(workflowDir);
+ }
+ }
+ }
+
+ // Update CLI state via specflow state set
+ try {
+ const stepIndex = validSteps.indexOf(targetStep);
+ execSync(
+ `specflow state set orchestration.step.current=${targetStep} orchestration.step.status=not_started orchestration.step.index=${stepIndex}`,
+ {
+ cwd: projectPath,
+ encoding: 'utf-8',
+ timeout: 30000,
+ env: getSpecflowEnv(),
+ }
+ );
+
+ // Update dashboard state
+ await writeDashboardState(projectPath, {
+ lastWorkflow: null, // Clear last workflow when going back
+ batches: resetBatches,
+ cost: resetCost,
+ });
+
+ const nextState: DashboardState = {
+ ...dashboardState,
+ active: {
+ ...dashboardState.active,
+ status: 'running',
+ },
+ batches: resetBatches,
+ cost: resetCost,
+ lastWorkflow: null,
+ decisionLog: [
+ ...(dashboardState.decisionLog || []),
+ {
+ timestamp: new Date().toISOString(),
+ action: 'go_back_to_step',
+ reason: shouldResetBatches
+ ? `User navigated back to ${targetStep} step (reset batches)`
+ : `User navigated back to ${targetStep} step`,
+ },
+ ],
+ };
+
+ await persistDashboardState(projectPath, nextState);
+
+ console.log(`[orchestration-service] Went back to step: ${targetStep}`);
+ return this.convertDashboardStateToExecution(projectPath, nextState);
+ } catch (error) {
+ console.error(`[orchestration-service] Failed to go back to step: ${error}`);
+ return null;
+ }
}
/**
* Trigger merge (for waiting_merge status)
*/
- triggerMerge(projectPath: string, orchestrationId: string): OrchestrationExecution | null {
- const execution = loadOrchestration(projectPath, orchestrationId);
- if (!execution || execution.status !== 'waiting_merge') return null;
+ async triggerMerge(projectPath: string, orchestrationId: string): Promise {
+ const dashboardState = getActiveDashboardState(projectPath, orchestrationId);
+ if (!dashboardState?.active || dashboardState.active.status !== 'waiting_merge') return null;
+
+ const nextState: DashboardState = {
+ ...dashboardState,
+ active: {
+ ...dashboardState.active,
+ status: 'running',
+ },
+ decisionLog: [
+ ...(dashboardState.decisionLog || []),
+ {
+ timestamp: new Date().toISOString(),
+ action: 'merge_triggered',
+ reason: 'User triggered merge',
+ },
+ ],
+ };
- execution.status = 'running';
- logDecision(execution, 'merge_triggered', 'User triggered merge');
- saveOrchestration(projectPath, execution);
- return execution;
+ await persistDashboardState(projectPath, nextState);
+ syncPhaseToStateFile(projectPath, 'merge', 'in_progress');
+ return this.convertDashboardStateToExecution(projectPath, nextState);
}
/**
* Cancel orchestration and kill any running workflow process
*/
- cancel(projectPath: string, orchestrationId: string): OrchestrationExecution | null {
- const execution = loadOrchestration(projectPath, orchestrationId);
- if (!execution) return null;
+ async cancel(projectPath: string, orchestrationId: string): Promise {
+ const dashboardState = getActiveDashboardState(projectPath, orchestrationId);
+ if (!dashboardState?.active) return null;
- if (!['running', 'paused', 'waiting_merge', 'needs_attention'].includes(execution.status)) {
- return execution; // Already in terminal state
+ if (!['running', 'paused', 'waiting_merge', 'needs_attention'].includes(dashboardState.active.status)) {
+ return this.convertDashboardStateToExecution(projectPath, dashboardState);
}
// Kill the current workflow process if one is running
- const currentWorkflowId = this.getCurrentWorkflowId(execution);
+ const currentWorkflowId = this.getCurrentWorkflowId(projectPath, dashboardState);
+ const decisionLog = [...(dashboardState.decisionLog || [])];
if (currentWorkflowId) {
const workflowDir = join(projectPath, '.specflow', 'workflows', currentWorkflowId);
const pids = readPidFile(workflowDir);
if (pids) {
if (pids.claudePid && isPidAlive(pids.claudePid)) {
killProcess(pids.claudePid, false);
- logDecision(execution, 'process_killed', `Killed Claude process ${pids.claudePid}`);
+ decisionLog.push({
+ timestamp: new Date().toISOString(),
+ action: 'process_killed',
+ reason: `Killed Claude process ${pids.claudePid}`,
+ });
}
if (pids.bashPid && isPidAlive(pids.bashPid)) {
killProcess(pids.bashPid, false);
- logDecision(execution, 'process_killed', `Killed bash process ${pids.bashPid}`);
+ decisionLog.push({
+ timestamp: new Date().toISOString(),
+ action: 'process_killed',
+ reason: `Killed bash process ${pids.bashPid}`,
+ });
}
cleanupPidFile(workflowDir);
}
}
- execution.status = 'cancelled';
- logDecision(execution, 'cancel', 'User cancelled orchestration');
- saveOrchestration(projectPath, execution);
- return execution;
+ const nextState: DashboardState = {
+ ...dashboardState,
+ active: {
+ ...dashboardState.active,
+ status: 'cancelled',
+ },
+ decisionLog: [
+ ...decisionLog,
+ {
+ timestamp: new Date().toISOString(),
+ action: 'cancel',
+ reason: 'User cancelled orchestration',
+ },
+ ],
+ };
+
+ await persistDashboardState(projectPath, nextState);
+ return this.convertDashboardStateToExecution(projectPath, nextState);
}
/**
* Get the current workflow execution ID from orchestration state
*/
- private getCurrentWorkflowId(execution: OrchestrationExecution): string | undefined {
- const { currentPhase, batches, executions } = execution;
-
- switch (currentPhase) {
- case 'design':
- return executions.design;
- case 'analyze':
- return executions.analyze;
- case 'implement':
- const currentBatch = batches.items[batches.current];
- return currentBatch?.workflowExecutionId;
- case 'verify':
- return executions.verify;
- case 'merge':
- return executions.merge;
- default:
- return undefined;
+ private getCurrentWorkflowId(
+ projectPath: string,
+ dashboardState: DashboardState
+ ): string | undefined {
+ const cliState = readCliState(projectPath);
+ const currentStep = cliState?.orchestration?.step?.current;
+
+ if (currentStep === 'implement') {
+ const batch = dashboardState.batches.items[dashboardState.batches.current];
+ return batch?.workflowId || dashboardState.lastWorkflow?.id;
}
+
+ return dashboardState.lastWorkflow?.id;
}
/**
* Mark orchestration as failed
*/
- fail(
+ async fail(
projectPath: string,
orchestrationId: string,
errorMessage: string
- ): OrchestrationExecution | null {
- const execution = loadOrchestration(projectPath, orchestrationId);
- if (!execution) return null;
+ ): Promise {
+ const dashboardState = getActiveDashboardState(projectPath, orchestrationId);
+ if (!dashboardState?.active) return null;
+
+ const nextState: DashboardState = {
+ ...dashboardState,
+ active: {
+ ...dashboardState.active,
+ status: 'failed',
+ },
+ decisionLog: [
+ ...(dashboardState.decisionLog || []),
+ {
+ timestamp: new Date().toISOString(),
+ action: 'fail',
+ reason: errorMessage,
+ },
+ ],
+ };
- execution.status = 'failed';
- execution.errorMessage = errorMessage;
- logDecision(execution, 'fail', errorMessage);
- saveOrchestration(projectPath, execution);
- return execution;
+ await persistDashboardState(projectPath, nextState);
+ return this.convertDashboardStateToExecution(projectPath, nextState);
}
/**
* Set orchestration to needs_attention status (recoverable error)
* Allows user to decide: retry, skip, or abort
*/
- setNeedsAttention(
+ async setNeedsAttention(
projectPath: string,
orchestrationId: string,
issue: string,
options: Array<'retry' | 'skip' | 'abort'>,
failedWorkflowId?: string
- ): OrchestrationExecution | null {
- const execution = loadOrchestration(projectPath, orchestrationId);
- if (!execution) return null;
-
- execution.status = 'needs_attention';
- execution.recoveryContext = {
- issue,
- options,
- failedWorkflowId,
+ ): Promise {
+ const dashboardState = getActiveDashboardState(projectPath, orchestrationId);
+ if (!dashboardState?.active) return null;
+
+ const nextState: DashboardState = {
+ ...dashboardState,
+ active: {
+ ...dashboardState.active,
+ status: 'needs_attention',
+ },
+ recoveryContext: {
+ issue,
+ options,
+ failedWorkflowId,
+ },
+ decisionLog: [
+ ...(dashboardState.decisionLog || []),
+ {
+ timestamp: new Date().toISOString(),
+ action: 'needs_attention',
+ reason: issue,
+ },
+ ],
};
- logDecision(execution, 'needs_attention', issue);
- saveOrchestration(projectPath, execution);
- return execution;
+
+ await persistDashboardState(projectPath, nextState);
+ return this.convertDashboardStateToExecution(projectPath, nextState);
}
/**
* Handle recovery action from user (retry, skip, abort)
*/
- handleRecovery(
+ async handleRecovery(
projectPath: string,
orchestrationId: string,
action: 'retry' | 'skip' | 'abort'
- ): OrchestrationExecution | null {
- const execution = loadOrchestration(projectPath, orchestrationId);
- if (!execution) return null;
- if (execution.status !== 'needs_attention') return null;
-
- switch (action) {
- case 'retry':
- // Resume running - runner will respawn the workflow
- execution.status = 'running';
- execution.recoveryContext = undefined;
- logDecision(execution, 'recovery_retry', 'User chose to retry');
- break;
-
- case 'skip': {
- // Skip to next phase - mark current as done and move on
- execution.status = 'running';
- execution.recoveryContext = undefined;
- logDecision(execution, 'recovery_skip', 'User chose to skip current phase');
- // Actually transition to the next phase
- const nextPhase = getNextPhase(execution.currentPhase, execution.config);
- if (nextPhase) {
- execution.currentPhase = nextPhase;
- logDecision(execution, 'transition', `Skipped to ${nextPhase}`);
- }
- break;
+ ): Promise {
+ const dashboardState = getActiveDashboardState(projectPath, orchestrationId);
+ if (!dashboardState?.active) return null;
+ if (dashboardState.active.status !== 'needs_attention') return null;
+
+ const decisionLog = [...(dashboardState.decisionLog || [])];
+ let status: DashboardOrchestrationStatus = dashboardState.active.status;
+
+ if (action === 'retry') {
+ status = 'running';
+ decisionLog.push({
+ timestamp: new Date().toISOString(),
+ action: 'recovery_retry',
+ reason: 'User chose to retry',
+ });
+ }
+
+ if (action === 'skip') {
+ status = 'running';
+ decisionLog.push({
+ timestamp: new Date().toISOString(),
+ action: 'recovery_skip',
+ reason: 'User chose to skip current phase',
+ });
+
+ const cliState = readCliState(projectPath);
+ const currentPhase = (cliState?.orchestration?.step?.current ||
+ getStartingPhase(dashboardState.active.config)) as OrchestrationPhase;
+ const nextPhase = getNextPhase(currentPhase, dashboardState.active.config);
+ if (nextPhase) {
+ decisionLog.push({
+ timestamp: new Date().toISOString(),
+ action: 'transition',
+ reason: `Skipped to ${nextPhase}`,
+ });
+ syncPhaseToStateFile(projectPath, nextPhase);
}
+ }
- case 'abort':
- // User chose to abort - mark as cancelled
- execution.status = 'cancelled';
- execution.recoveryContext = undefined;
- logDecision(execution, 'recovery_abort', 'User chose to abort');
- break;
+ if (action === 'abort') {
+ status = 'cancelled';
+ decisionLog.push({
+ timestamp: new Date().toISOString(),
+ action: 'recovery_abort',
+ reason: 'User chose to abort',
+ });
}
- saveOrchestration(projectPath, execution);
- return execution;
+ const nextState: DashboardState = {
+ ...dashboardState,
+ active: {
+ ...dashboardState.active,
+ status,
+ },
+ recoveryContext: undefined,
+ decisionLog,
+ };
+
+ await persistDashboardState(projectPath, nextState);
+ return this.convertDashboardStateToExecution(projectPath, nextState);
}
/**
* Update total cost
*/
- addCost(
+ async addCost(
projectPath: string,
orchestrationId: string,
costUsd: number
- ): OrchestrationExecution | null {
- const execution = loadOrchestration(projectPath, orchestrationId);
- if (!execution) return null;
+ ): Promise {
+ const dashboardState = getActiveDashboardState(projectPath, orchestrationId);
+ if (!dashboardState) return null;
+
+ const nextState: DashboardState = {
+ ...dashboardState,
+ cost: {
+ ...dashboardState.cost,
+ total: (dashboardState.cost?.total || 0) + costUsd,
+ },
+ };
- execution.totalCostUsd += costUsd;
- saveOrchestration(projectPath, execution);
- return execution;
+ await persistDashboardState(projectPath, nextState);
+ return this.convertDashboardStateToExecution(projectPath, nextState);
}
/**
* Check if budget exceeded (FR-053)
*/
isBudgetExceeded(projectPath: string, orchestrationId: string): boolean {
- const execution = loadOrchestration(projectPath, orchestrationId);
- if (!execution) return false;
+ const dashboardState = getActiveDashboardState(projectPath, orchestrationId);
+ if (!dashboardState?.active) return false;
- const budget = execution.config.budget;
- return execution.totalCostUsd >= budget.maxTotal;
+ const budget = dashboardState.active.config.budget;
+ const total = dashboardState.cost?.total || 0;
+ return total >= budget.maxTotal;
}
/**
@@ -973,41 +1591,47 @@ class OrchestrationService {
* Called when external CLI session activity is detected
*/
touchActivity(projectPath: string, orchestrationId: string): void {
- const execution = loadOrchestration(projectPath, orchestrationId);
- if (!execution) return;
-
- // saveOrchestration already updates updatedAt, so just save
- saveOrchestration(projectPath, execution);
+ const dashboardState = getActiveDashboardState(projectPath, orchestrationId);
+ if (!dashboardState?.active) return;
+ // No-op: CLI state is the source of truth and does not track updatedAt.
}
/**
* Get the skill to run for the current phase
*/
getCurrentSkill(projectPath: string, orchestrationId: string): string | null {
- const execution = loadOrchestration(projectPath, orchestrationId);
- if (!execution) return null;
+ const dashboardState = getActiveDashboardState(projectPath, orchestrationId);
+ if (!dashboardState?.active) return null;
- return getPhaseSkill(execution.currentPhase);
+ const cliState = readCliState(projectPath);
+ const phase = (cliState?.orchestration?.step?.current ||
+ getStartingPhase(dashboardState.active.config)) as OrchestrationPhase;
+
+ return getPhaseSkill(phase);
}
/**
* Check if current step is complete using specflow status
*/
isCurrentStepComplete(projectPath: string, orchestrationId: string): boolean {
- const execution = loadOrchestration(projectPath, orchestrationId);
- if (!execution) return false;
+ const dashboardState = getActiveDashboardState(projectPath, orchestrationId);
+ if (!dashboardState?.active) return false;
+
+ const cliState = readCliState(projectPath);
+ const phase = (cliState?.orchestration?.step?.current ||
+ getStartingPhase(dashboardState.active.config)) as OrchestrationPhase;
- return isStepComplete(projectPath, execution.currentPhase);
+ return isStepComplete(projectPath, phase);
}
/**
* Check if all batches are complete
*/
areAllBatchesComplete(projectPath: string, orchestrationId: string): boolean {
- const execution = loadOrchestration(projectPath, orchestrationId);
- if (!execution) return false;
+ const dashboardState = getActiveDashboardState(projectPath, orchestrationId);
+ if (!dashboardState) return false;
- return execution.batches.items.every(
+ return dashboardState.batches.items.every(
(b) => b.status === 'completed' || b.status === 'healed'
);
}
@@ -1022,15 +1646,15 @@ class OrchestrationService {
taskIds: string[];
status: string;
} | null {
- const execution = loadOrchestration(projectPath, orchestrationId);
- if (!execution) return null;
+ const dashboardState = getActiveDashboardState(projectPath, orchestrationId);
+ if (!dashboardState) return null;
- const batch = execution.batches.items[execution.batches.current];
+ const batch = dashboardState.batches.items[dashboardState.batches.current];
if (!batch) return null;
return {
- index: execution.batches.current,
- total: execution.batches.total,
+ index: dashboardState.batches.current,
+ total: dashboardState.batches.total,
section: batch.section,
taskIds: batch.taskIds,
status: batch.status,
@@ -1045,13 +1669,24 @@ class OrchestrationService {
orchestrationId: string,
decision: string,
reason: string,
- data?: Record
- ): void {
- const execution = loadOrchestration(projectPath, orchestrationId);
- if (!execution) return;
+ _data?: Record
+ ): Promise {
+ const dashboardState = getActiveDashboardState(projectPath, orchestrationId);
+ if (!dashboardState) return Promise.resolve();
+
+ const nextState: DashboardState = {
+ ...dashboardState,
+ decisionLog: [
+ ...(dashboardState.decisionLog || []),
+ {
+ timestamp: new Date().toISOString(),
+ action: decision,
+ reason,
+ },
+ ],
+ };
- logDecision(execution, decision, reason, data);
- saveOrchestration(projectPath, execution);
+ return persistDashboardState(projectPath, nextState);
}
}
diff --git a/packages/dashboard/src/lib/services/orchestration-types.ts b/packages/dashboard/src/lib/services/orchestration-types.ts
index 11b3408..eb03112 100644
--- a/packages/dashboard/src/lib/services/orchestration-types.ts
+++ b/packages/dashboard/src/lib/services/orchestration-types.ts
@@ -12,12 +12,67 @@
*/
import type {
- OrchestrationExecution,
OrchestrationState,
WorkflowExecution,
BatchPlan,
+ OrchestrationConfig,
+ OrchestrationStatus,
+ OrchestrationPhase,
+ DecisionLogEntry,
+ BatchTracking,
} from '@specflow/shared';
+// =============================================================================
+// OrchestrationExecution Type (Legacy Compatibility)
+// =============================================================================
+
+/**
+ * Legacy OrchestrationExecution type - kept for dashboard compatibility
+ * This was previously in @specflow/shared/schemas/orchestration-execution.ts
+ * Now defined locally as we transition to CLI state as single source of truth
+ */
+export interface OrchestrationExecution {
+ /** Unique identifier */
+ id: string;
+ /** Project ID from registry */
+ projectId: string;
+ /** Current status */
+ status: OrchestrationStatus;
+ /** Configuration options */
+ config: OrchestrationConfig;
+ /** Current phase */
+ currentPhase: OrchestrationPhase;
+ /** Batch tracking */
+ batches: BatchTracking;
+ /** Linked workflow execution IDs */
+ executions: {
+ design?: string;
+ analyze?: string;
+ implement: string[];
+ verify?: string;
+ merge?: string;
+ healers?: string[];
+ };
+ /** ISO timestamp when started */
+ startedAt: string;
+ /** ISO timestamp of last update */
+ updatedAt: string;
+ /** ISO timestamp when completed/failed */
+ completedAt?: string;
+ /** Decision log for debugging */
+ decisionLog: DecisionLogEntry[];
+ /** Total cost in USD */
+ totalCostUsd: number;
+ /** Error message if failed */
+ errorMessage?: string;
+ /** Recovery context for needs_attention state */
+ recoveryContext?: {
+ issue: string;
+ options: Array<'retry' | 'skip' | 'abort'>;
+ failedWorkflowId?: string;
+ };
+}
+
// =============================================================================
// Clock Interface (NFR-003 - Testability)
// =============================================================================
@@ -163,42 +218,42 @@ export interface OrchestrationIO {
/**
* Update orchestration state
*/
- update(projectPath: string, orchestrationId: string, updates: Partial): void;
+ update(projectPath: string, orchestrationId: string, updates: Partial): Promise;
/**
* Transition to next phase
*/
- transitionToNextPhase(projectPath: string, orchestrationId: string): void;
+ transitionToNextPhase(projectPath: string, orchestrationId: string): Promise;
/**
* Link workflow execution to orchestration
*/
- linkWorkflowExecution(projectPath: string, orchestrationId: string, workflowId: string): void;
+ linkWorkflowExecution(projectPath: string, orchestrationId: string, workflowId: string): Promise;
/**
* Add cost to orchestration
*/
- addCost(projectPath: string, orchestrationId: string, cost: number): void;
+ addCost(projectPath: string, orchestrationId: string, cost: number): Promise;
/**
* Update batch tracking
*/
- updateBatches(projectPath: string, orchestrationId: string, batchPlan: BatchPlan): void;
+ updateBatches(projectPath: string, orchestrationId: string, batchPlan: BatchPlan): Promise;
/**
* Complete current batch
*/
- completeBatch(projectPath: string, orchestrationId: string): void;
+ completeBatch(projectPath: string, orchestrationId: string): Promise;
/**
* Mark batch as healed
*/
- healBatch(projectPath: string, orchestrationId: string, healerSessionId: string): void;
+ healBatch(projectPath: string, orchestrationId: string, healerSessionId: string): Promise;
/**
* Increment heal attempt counter
*/
- incrementHealAttempt(projectPath: string, orchestrationId: string): void;
+ incrementHealAttempt(projectPath: string, orchestrationId: string): Promise;
/**
* Check if batch can be healed (has remaining attempts)
@@ -214,27 +269,27 @@ export interface OrchestrationIO {
issue: string,
options: Array<'retry' | 'skip' | 'abort'>,
failedWorkflowId?: string
- ): void;
+ ): Promise;
/**
* Pause orchestration
*/
- pause(projectPath: string, orchestrationId: string): void;
+ pause(projectPath: string, orchestrationId: string): Promise;
/**
* Resume orchestration from paused state
*/
- resume(projectPath: string, orchestrationId: string): void;
+ resume(projectPath: string, orchestrationId: string): Promise;
/**
* Trigger merge phase
*/
- triggerMerge(projectPath: string, orchestrationId: string): void;
+ triggerMerge(projectPath: string, orchestrationId: string): Promise;
/**
* Mark orchestration as failed
*/
- fail(projectPath: string, orchestrationId: string, errorMessage: string): void;
+ fail(projectPath: string, orchestrationId: string, errorMessage: string): Promise;
}
// =============================================================================
diff --git a/packages/dashboard/src/lib/services/orchestration-validation.ts b/packages/dashboard/src/lib/services/orchestration-validation.ts
index 50e22b8..5d04501 100644
--- a/packages/dashboard/src/lib/services/orchestration-validation.ts
+++ b/packages/dashboard/src/lib/services/orchestration-validation.ts
@@ -14,8 +14,9 @@
* - Cross-file consistency
*/
-import type { OrchestrationExecution, OrchestrationState, StepStatus } from '@specflow/shared';
+import type { OrchestrationState, StepStatus } from '@specflow/shared';
import { STEP_INDEX_MAP } from '@specflow/shared';
+import type { OrchestrationExecution } from './orchestration-types';
// =============================================================================
// Types
diff --git a/packages/dashboard/src/lib/services/process-health.ts b/packages/dashboard/src/lib/services/process-health.ts
index 1137e61..ad337b0 100644
--- a/packages/dashboard/src/lib/services/process-health.ts
+++ b/packages/dashboard/src/lib/services/process-health.ts
@@ -6,12 +6,34 @@
* - Session file staleness (when was output last written?)
*/
-import { existsSync, statSync } from 'fs';
+import { existsSync, statSync, openSync, readSync, closeSync } from 'fs';
import { join } from 'path';
import type { WorkflowExecution } from './workflow-service';
import { isPidAlive, readPidFile } from './process-spawner';
import { getProjectSessionDir } from '@/lib/project-hash';
+/**
+ * Read only the tail of a file efficiently (without loading the entire file).
+ * Returns the last `bytes` of the file as a string.
+ */
+export function readFileTail(filePath: string, bytes: number = 10000): string {
+ try {
+ const stats = statSync(filePath);
+ const fileSize = stats.size;
+ const readSize = Math.min(bytes, fileSize);
+ const position = Math.max(0, fileSize - readSize);
+
+ const fd = openSync(filePath, 'r');
+ const buffer = Buffer.alloc(readSize);
+ readSync(fd, buffer, 0, readSize, position);
+ closeSync(fd);
+
+ return buffer.toString('utf-8');
+ } catch {
+ return '';
+ }
+}
+
/**
* Staleness threshold - if session file hasn't been updated in this time,
* consider the process potentially stuck
@@ -180,15 +202,81 @@ export function getHealthStatusMessage(health: ProcessHealthResult): string {
}
/**
- * Check if a session ended gracefully
+ * Session status as determined from file content analysis.
+ * This is the SINGLE SOURCE OF TRUTH for session status.
+ */
+export type SessionFileStatus =
+ | 'completed' // Session ended (has end marker or assistant finished responding)
+ | 'waiting_for_input' // AskUserQuestion pending
+ | 'running' // Active, no end markers
+ | 'stale'; // No activity for 5+ minutes, no end markers
+
+/**
+ * Determine session status from file content.
+ * THIS IS THE SINGLE SOURCE OF TRUTH FOR SESSION STATUS.
*
- * Reads the last portion of the session JSONL to detect if the session
- * completed normally vs terminated unexpectedly.
+ * All other code should use this function rather than implementing
+ * their own status detection logic.
*
- * Detection methods:
- * 1. Stop hook feedback meta message (most reliable)
- * 2. Result type message from Claude CLI
- * 3. Final assistant message without pending tool calls
+ * @param tail - Last ~10KB of session JSONL file
+ * @param ageMs - Milliseconds since file was last modified
+ * @returns Session status
+ */
+export function getSessionStatus(tail: string, ageMs: number): SessionFileStatus {
+ if (!tail) {
+ return ageMs <= STALENESS_THRESHOLD_MS ? 'running' : 'stale';
+ }
+
+ // Check for definitive end markers
+ const hasStopHook = tail.includes('"isMeta":true') && tail.includes('Stop hook feedback:');
+ const hasResult = tail.includes('"type":"result"');
+ const hasTurnDuration = tail.includes('"subtype":"turn_duration"');
+ const hasSummary = tail.includes('"type":"summary"');
+ const hasDefinitiveEnd = hasStopHook || hasResult || hasTurnDuration || hasSummary;
+
+ if (hasDefinitiveEnd) {
+ return 'completed';
+ }
+
+ // Check for AskUserQuestion pending (only valid if not stale)
+ const needsInput = tail.includes('"status":"needs_input"');
+ if (needsInput && ageMs <= STALENESS_THRESHOLD_MS) {
+ return 'waiting_for_input';
+ }
+
+ // Check if last message is an assistant text response (session idle, turn complete)
+ let lastMessageIsAssistantText = false;
+ try {
+ const lines = tail.split('\n').filter(l => l.trim());
+ if (lines.length > 0) {
+ const lastLine = lines[lines.length - 1];
+ const lastMsg = JSON.parse(lastLine);
+ if (lastMsg.type === 'assistant' && lastMsg.message?.content) {
+ const content = lastMsg.message.content;
+ if (Array.isArray(content)) {
+ lastMessageIsAssistantText = content.some(
+ (block: { type: string }) => block.type === 'text'
+ );
+ } else if (typeof content === 'string' && content.length > 0) {
+ lastMessageIsAssistantText = true;
+ }
+ }
+ }
+ } catch {
+ // Failed to parse last line
+ }
+
+ if (lastMessageIsAssistantText) {
+ return 'completed';
+ }
+
+ // No end markers - check staleness
+ return ageMs <= STALENESS_THRESHOLD_MS ? 'running' : 'stale';
+}
+
+/**
+ * Check if a session ended gracefully.
+ * Uses getSessionStatus as the single source of truth.
*/
export function didSessionEndGracefully(
projectPath: string,
@@ -202,56 +290,12 @@ export function didSessionEndGracefully(
try {
if (!existsSync(sessionFile)) return false;
- const { readFileSync } = require('fs');
- const content = readFileSync(sessionFile, 'utf-8');
+ const stats = statSync(sessionFile);
+ const ageMs = Date.now() - stats.mtime.getTime();
+ const tail = readFileTail(sessionFile, 10000);
+ const status = getSessionStatus(tail, ageMs);
- // Check the last portion of the file
- const lastChunk = content.slice(-10000); // Last 10KB for better coverage
-
- // Method 1: Stop hook feedback (most reliable indicator of graceful end)
- if (lastChunk.includes('"isMeta":true') && lastChunk.includes('Stop hook feedback:')) {
- return true;
- }
-
- // Method 2: Result type message from Claude CLI output
- if (lastChunk.includes('"type":"result"')) {
- return true;
- }
-
- // Method 3: Check if the last non-empty entry is an assistant message
- // without tool_use blocks (indicates natural completion)
- const lines = lastChunk.trim().split('\n').filter((l: string) => l.trim());
- if (lines.length > 0) {
- // Check last few lines for a final assistant message
- for (let i = lines.length - 1; i >= Math.max(0, lines.length - 5); i--) {
- try {
- const entry = JSON.parse(lines[i]);
- // Skip meta messages
- if (entry.isMeta) continue;
- // If we find an assistant message, check if it has tool calls
- if (entry.type === 'assistant' || entry.message?.role === 'assistant') {
- const msgContent = entry.message?.content || entry.content;
- // If it's a text-only response (no tool_use), likely completed
- if (msgContent && typeof msgContent === 'string') {
- return true;
- }
- // If content is array, check for tool_use blocks
- if (Array.isArray(msgContent)) {
- const hasToolUse = msgContent.some((c: { type?: string }) => c.type === 'tool_use');
- // No pending tool calls = likely completed
- if (!hasToolUse) {
- return true;
- }
- }
- break; // Only check the last assistant message
- }
- } catch {
- // Skip invalid JSON lines
- }
- }
- }
-
- return false;
+ return status === 'completed' || status === 'waiting_for_input';
} catch {
return false;
}
diff --git a/packages/dashboard/src/lib/services/process-reconciler.ts b/packages/dashboard/src/lib/services/process-reconciler.ts
index 6289bd9..d9abf0d 100644
--- a/packages/dashboard/src/lib/services/process-reconciler.ts
+++ b/packages/dashboard/src/lib/services/process-reconciler.ts
@@ -21,13 +21,8 @@ import {
import {
checkProcessHealth,
ORPHAN_GRACE_PERIOD_MS,
- type ProcessHealthResult,
} from './process-health';
import { WorkflowExecutionSchema, type WorkflowExecution } from './workflow-service';
-import {
- OrchestrationExecutionSchema,
- type OrchestrationExecution,
-} from '@specflow/shared';
// Track reconciliation state
let reconciliationDone = false;
@@ -127,71 +122,6 @@ function loadProjectWorkflows(projectPath: string): WorkflowExecution[] {
return executions;
}
-/**
- * Load all orchestration executions for a project (T056)
- */
-function loadProjectOrchestrations(projectPath: string): OrchestrationExecution[] {
- const workflowDir = join(projectPath, '.specflow', 'workflows');
- const executions: OrchestrationExecution[] = [];
-
- if (!existsSync(workflowDir)) {
- return [];
- }
-
- try {
- const files = readdirSync(workflowDir).filter(
- (f) => f.startsWith('orchestration-') && f.endsWith('.json')
- );
-
- for (const file of files) {
- try {
- const content = readFileSync(join(workflowDir, file), 'utf-8');
- executions.push(OrchestrationExecutionSchema.parse(JSON.parse(content)));
- } catch {
- // Skip invalid files
- }
- }
- } catch {
- // Directory doesn't exist or can't be read
- }
-
- return executions;
-}
-
-/**
- * Get the current linked workflow execution ID for an orchestration
- */
-function getCurrentLinkedWorkflowId(orchestration: OrchestrationExecution): string | undefined {
- const { executions, currentPhase, batches } = orchestration;
-
- switch (currentPhase) {
- case 'design':
- return executions.design;
- case 'analyze':
- return executions.analyze;
- case 'implement':
- // Get the current batch's workflow execution
- const currentBatch = batches.items[batches.current];
- return currentBatch?.workflowExecutionId;
- case 'verify':
- return executions.verify;
- case 'merge':
- return executions.merge;
- default:
- return undefined;
- }
-}
-
-/**
- * Save an orchestration execution
- */
-function saveOrchestration(execution: OrchestrationExecution, projectPath: string): void {
- const workflowDir = join(projectPath, '.specflow', 'workflows');
- mkdirSync(workflowDir, { recursive: true });
- const filePath = join(workflowDir, `orchestration-${execution.id}.json`);
- writeFileSync(filePath, JSON.stringify(execution, null, 2));
-}
-
/**
* Save a workflow execution
*/
@@ -210,6 +140,48 @@ function saveWorkflow(execution: WorkflowExecution, projectPath: string): void {
}
}
+/**
+ * Rebuild workflow index from metadata (source of truth).
+ * Ensures index.json doesn't keep stale running entries after reconciliation.
+ */
+function rebuildWorkflowIndex(projectPath: string): void {
+ const workflowDir = join(projectPath, '.specflow', 'workflows');
+ mkdirSync(workflowDir, { recursive: true });
+ const indexPath = join(workflowDir, 'index.json');
+
+ const workflows = loadProjectWorkflows(projectPath);
+ const bySession = new Map();
+
+ for (const workflow of workflows) {
+ if (!workflow.sessionId) continue;
+ const existing = bySession.get(workflow.sessionId);
+ if (!existing) {
+ bySession.set(workflow.sessionId, workflow);
+ continue;
+ }
+ const existingUpdated = new Date(existing.updatedAt).getTime();
+ const nextUpdated = new Date(workflow.updatedAt).getTime();
+ if (nextUpdated > existingUpdated) {
+ bySession.set(workflow.sessionId, workflow);
+ }
+ }
+
+ const sessions = Array.from(bySession.values())
+ .map((workflow) => ({
+ sessionId: workflow.sessionId as string,
+ executionId: workflow.id,
+ skill: workflow.skill,
+ status: workflow.status,
+ startedAt: workflow.startedAt,
+ updatedAt: workflow.updatedAt,
+ costUsd: workflow.costUsd,
+ }))
+ .sort((a, b) => new Date(b.updatedAt).getTime() - new Date(a.updatedAt).getTime())
+ .slice(0, 50);
+
+ writeFileSync(indexPath, JSON.stringify({ sessions }, null, 2));
+}
+
/**
* Collect all tracked PIDs from active workflows
*/
@@ -352,62 +324,8 @@ export async function reconcileWorkflows(): Promise {
}
}
- // Phase 1b: Check orchestration health (T056, T057)
- const orchestrations = loadProjectOrchestrations(project.path);
- for (const orchestration of orchestrations) {
- // Only check active orchestrations
- if (!['running', 'paused', 'waiting_merge'].includes(orchestration.status)) {
- continue;
- }
-
- result.orchestrationsChecked++;
- let updated = false;
-
- // Check if linked workflow executions are still alive
- const currentWorkflowId = getCurrentLinkedWorkflowId(orchestration);
- if (currentWorkflowId) {
- // Find the workflow execution
- const workflows = loadProjectWorkflows(project.path);
- const linkedWorkflow = workflows.find(
- (w) => w.id === currentWorkflowId || w.sessionId === currentWorkflowId
- );
-
- if (linkedWorkflow) {
- // If workflow is failed/cancelled, orchestration should reflect that
- if (linkedWorkflow.status === 'failed' || linkedWorkflow.status === 'cancelled') {
- orchestration.status = 'failed';
- orchestration.errorMessage = `Linked workflow ${linkedWorkflow.status}: ${linkedWorkflow.error || 'Unknown error'}`;
- orchestration.updatedAt = new Date().toISOString();
- orchestration.decisionLog.push({
- timestamp: new Date().toISOString(),
- decision: 'reconcile_failed',
- reason: `Workflow ${linkedWorkflow.status} detected on startup`,
- });
- updated = true;
- }
- }
- }
-
- // If orchestration has been running for too long without updates, mark as failed
- const lastUpdateAge = Date.now() - new Date(orchestration.updatedAt).getTime();
- const MAX_ORCHESTRATION_AGE_MS = 4 * 60 * 60 * 1000; // 4 hours
- if (orchestration.status === 'running' && lastUpdateAge > MAX_ORCHESTRATION_AGE_MS) {
- orchestration.status = 'failed';
- orchestration.errorMessage = 'Orchestration stale (no updates in 4+ hours)';
- orchestration.updatedAt = new Date().toISOString();
- orchestration.decisionLog.push({
- timestamp: new Date().toISOString(),
- decision: 'reconcile_stale',
- reason: 'No updates in 4+ hours, marking as failed',
- });
- updated = true;
- }
-
- if (updated) {
- saveOrchestration(orchestration, project.path);
- result.orchestrationsUpdated++;
- }
- }
+ // Rebuild workflow index from metadata to avoid stale running entries
+ rebuildWorkflowIndex(project.path);
} catch (err) {
result.errors.push(
`Error checking project ${project.id}: ${err instanceof Error ? err.message : String(err)}`
diff --git a/packages/dashboard/src/lib/services/runtime-state.ts b/packages/dashboard/src/lib/services/runtime-state.ts
new file mode 100644
index 0000000..e08432f
--- /dev/null
+++ b/packages/dashboard/src/lib/services/runtime-state.ts
@@ -0,0 +1,127 @@
+import type { WorkflowData, WorkflowIndexEntry } from '@specflow/shared';
+import type { WorkflowExecution } from './workflow-service';
+import { workflowService } from './workflow-service';
+import {
+ checkProcessHealth,
+ getSessionStatus,
+ readFileTail,
+ getSessionFileMtime,
+} from './process-health';
+import { getProjectSessionDir } from '@/lib/project-hash';
+import { join } from 'path';
+import { discoverCliSessions } from './workflow-discovery';
+
+const ACTIVE_STATUSES: WorkflowIndexEntry['status'][] = ['running', 'waiting_for_input'];
+
+/**
+ * Derive session status using the SINGLE SOURCE OF TRUTH (getSessionStatus).
+ * Process health checks are only used as fallback for edge cases.
+ */
+function deriveExecutionStatus(
+ execution: WorkflowExecution,
+ projectPath: string
+): WorkflowIndexEntry['status'] {
+ const persistedStatus = execution.status as WorkflowIndexEntry['status'];
+
+ if (!execution.sessionId) {
+ return persistedStatus;
+ }
+
+ // Get session file status - this is the SINGLE SOURCE OF TRUTH
+ const sessionDir = getProjectSessionDir(projectPath);
+ const sessionFile = join(sessionDir, `${execution.sessionId}.jsonl`);
+ const mtime = getSessionFileMtime(projectPath, execution.sessionId);
+
+ if (mtime) {
+ const ageMs = Date.now() - mtime.getTime();
+ const tail = readFileTail(sessionFile, 10000);
+ const fileStatus = getSessionStatus(tail, ageMs);
+
+ // File-based status takes precedence
+ if (fileStatus === 'completed' || fileStatus === 'waiting_for_input') {
+ return fileStatus;
+ }
+
+ // For running/stale, also check process health for tracked sessions
+ // (we have PID info that CLI sessions don't have)
+ const health = checkProcessHealth(execution, projectPath);
+
+ if (health.healthStatus === 'dead') {
+ // Process died but file doesn't show completion - failed
+ return 'failed';
+ }
+
+ // Use file-based status (running or stale)
+ return fileStatus;
+ }
+
+ // No session file - fall back to persisted status
+ return persistedStatus;
+}
+
+function toWorkflowIndexEntry(
+ execution: WorkflowExecution,
+ projectPath: string
+): WorkflowIndexEntry | null {
+ if (!execution.sessionId) return null;
+
+ return {
+ sessionId: execution.sessionId,
+ executionId: execution.id,
+ skill: execution.skill,
+ status: deriveExecutionStatus(execution, projectPath),
+ startedAt: execution.startedAt,
+ updatedAt: execution.updatedAt,
+ costUsd: execution.costUsd,
+ };
+}
+
+export async function buildWorkflowData(
+ projectId: string,
+ projectPath: string
+): Promise {
+ const executions = workflowService.list(projectId);
+ const trackedSessions = executions
+ .map((execution) => toWorkflowIndexEntry(execution, projectPath))
+ .filter((entry): entry is WorkflowIndexEntry => Boolean(entry));
+
+ const trackedSessionIds = new Set(trackedSessions.map((s) => s.sessionId));
+ const cliSessions = discoverCliSessions(projectPath, trackedSessionIds, 10);
+
+ const allSessions = [...trackedSessions, ...cliSessions];
+ allSessions.sort((a, b) => new Date(b.updatedAt).getTime() - new Date(a.updatedAt).getTime());
+
+ const currentExecution = allSessions.find((s) => ACTIVE_STATUSES.includes(s.status)) ?? null;
+
+ return {
+ currentExecution,
+ sessions: allSessions.slice(0, 10),
+ };
+}
+
+/**
+ * Fast version of buildWorkflowData that skips expensive CLI session discovery.
+ * Used for initial SSE connection to minimize latency.
+ * Full session discovery happens on subsequent file change events.
+ */
+export async function buildWorkflowDataFast(
+ projectId: string,
+ projectPath: string
+): Promise {
+ const executions = workflowService.list(projectId);
+ const trackedSessions = executions
+ .map((execution) => toWorkflowIndexEntry(execution, projectPath))
+ .filter((entry): entry is WorkflowIndexEntry => Boolean(entry));
+
+ // Skip discoverCliSessions() - this is the expensive operation
+ // CLI sessions will be discovered on subsequent file change events
+
+ trackedSessions.sort((a, b) => new Date(b.updatedAt).getTime() - new Date(a.updatedAt).getTime());
+
+ const currentExecution = trackedSessions.find((s) => ACTIVE_STATUSES.includes(s.status)) ?? null;
+
+ return {
+ currentExecution,
+ sessions: trackedSessions.slice(0, 10),
+ };
+}
diff --git a/packages/dashboard/src/lib/services/workflow-discovery.ts b/packages/dashboard/src/lib/services/workflow-discovery.ts
new file mode 100644
index 0000000..551ec83
--- /dev/null
+++ b/packages/dashboard/src/lib/services/workflow-discovery.ts
@@ -0,0 +1,162 @@
+import path from 'path';
+import { existsSync, readdirSync, statSync, openSync, readSync, closeSync } from 'fs';
+import { v4 as uuidv4 } from 'uuid';
+import { getProjectSessionDir } from '@/lib/project-hash';
+import { isCommandInjection } from '@/lib/session-parser';
+import { getSessionStatus, readFileTail } from './process-health';
+import type { WorkflowIndexEntry } from '@specflow/shared';
+
+/**
+ * Discover CLI sessions from Claude projects directory.
+ * Scans ~/.claude/projects/{hash}/ for .jsonl files and creates WorkflowIndexEntry objects.
+ * These are sessions started from CLI that weren't tracked by the dashboard.
+ *
+ * @param projectPath - Absolute path to the project
+ * @param trackedSessionIds - Set of session IDs already tracked by dashboard (to avoid duplicates)
+ * @param limit - Maximum number of sessions to return (default 50)
+ */
+export function discoverCliSessions(
+ projectPath: string,
+ trackedSessionIds: Set,
+ limit: number = 50
+): WorkflowIndexEntry[] {
+ const sessionDir = getProjectSessionDir(projectPath);
+
+ if (!existsSync(sessionDir)) {
+ return [];
+ }
+
+ try {
+ const files = readdirSync(sessionDir);
+ const jsonlFiles = files.filter(f => f.endsWith('.jsonl'));
+
+ // Phase 1: Get file stats quickly and filter to candidates
+ interface SessionCandidate {
+ sessionId: string;
+ fullPath: string;
+ stats: { mtime: Date; birthtime: Date };
+ }
+ const candidates: SessionCandidate[] = [];
+
+ for (const file of jsonlFiles) {
+ const sessionId = file.replace('.jsonl', '');
+
+ // Skip if already tracked by dashboard
+ if (trackedSessionIds.has(sessionId)) {
+ continue;
+ }
+
+ const fullPath = path.join(sessionDir, file);
+ try {
+ const stats = statSync(fullPath);
+ candidates.push({ sessionId, fullPath, stats });
+ } catch {
+ // Could not stat file, skip
+ }
+ }
+
+ // Phase 2: Sort by mtime and limit BEFORE doing expensive content reads
+ candidates.sort((a, b) => b.stats.mtime.getTime() - a.stats.mtime.getTime());
+ const topCandidates = candidates.slice(0, limit);
+
+ // Phase 3: Process only the top candidates (expensive operations)
+ const entries: WorkflowIndexEntry[] = [];
+
+ for (const { sessionId, fullPath, stats } of topCandidates) {
+ try {
+
+ // Try to extract skill from JSONL content
+ let skill = 'CLI Session';
+ try {
+ // Read enough to get past system messages to user prompt
+ // Skill prompts can be large, so read generously
+ const fd = openSync(fullPath, 'r');
+ const buffer = Buffer.alloc(32768);
+ const bytesRead = readSync(fd, buffer, 0, buffer.length, 0);
+ closeSync(fd);
+
+ const content = buffer.toString('utf-8', 0, bytesRead);
+ const lines = content.split('\n').slice(0, 20);
+ for (const line of lines) {
+ if (!line.trim()) continue;
+ try {
+ const msg = JSON.parse(line);
+ // Check for explicit skill field
+ if (msg.skill) {
+ skill = msg.skill;
+ break;
+ }
+
+ // Only check user messages for skill detection — assistant messages
+ // may reference other skills (e.g., "after /flow.design completed")
+ if (msg.type !== 'user') continue;
+
+ // Extract text from message content (string or array format)
+ let textContent = '';
+ const msgContent = msg.message?.content;
+ if (typeof msgContent === 'string') {
+ textContent = msgContent;
+ } else if (Array.isArray(msgContent)) {
+ textContent = msgContent
+ .filter((b: { type: string }) => b.type === 'text')
+ .map((b: { text: string }) => b.text)
+ .join('\n');
+ }
+
+ if (textContent) {
+ // Use isCommandInjection for robust skill detection — it has
+ // content-specific patterns (e.g., [IMPL] → flow.implement)
+ // that work even when skill prompts reference other skills
+ const commandInfo = isCommandInjection(textContent);
+ if (commandInfo.isCommand && commandInfo.commandName) {
+ skill = commandInfo.commandName;
+ break;
+ }
+ // Fallback: explicit header (e.g., "# flow.analyze")
+ const headerMatch = textContent.match(/^# \/?flow\.(\w+)/m);
+ if (headerMatch) {
+ skill = `flow.${headerMatch[1]}`;
+ break;
+ }
+ }
+ } catch {
+ // Invalid JSON line, continue
+ }
+ }
+ } catch {
+ // Could not read file content, use default skill
+ }
+
+ const ageMs = Date.now() - stats.mtime.getTime();
+
+ // Read tail and get status from single source of truth
+ let tail = '';
+ try {
+ tail = readFileTail(fullPath, 10000);
+ } catch {
+ // Ignore tail read failures
+ }
+
+ // Use centralized status detection (process-health.ts is the single source of truth)
+ const status = getSessionStatus(tail, ageMs);
+
+ entries.push({
+ sessionId,
+ executionId: uuidv4(), // Generate placeholder ID for CLI sessions
+ skill,
+ status,
+ startedAt: stats.birthtime.toISOString(),
+ updatedAt: stats.mtime.toISOString(),
+ costUsd: 0, // Unknown for CLI sessions
+ });
+ } catch {
+ // Could not process file, skip
+ }
+ }
+
+ // Already sorted by mtime in Phase 2, just return entries
+ return entries;
+ } catch {
+ return [];
+ }
+}
diff --git a/packages/dashboard/src/lib/services/workflow-service.ts b/packages/dashboard/src/lib/services/workflow-service.ts
index cba408a..0a8fd8a 100644
--- a/packages/dashboard/src/lib/services/workflow-service.ts
+++ b/packages/dashboard/src/lib/services/workflow-service.ts
@@ -130,9 +130,14 @@ export type StartWorkflowRequest = z.infer;
* Answer workflow request
*/
export const AnswerWorkflowRequestSchema = z.object({
- id: z.string().uuid(), // Execution ID is always UUID
+ id: z.string().uuid().optional(), // Execution ID (preferred)
+ sessionId: z.string().optional(), // Alternative: lookup by session ID
+ projectId: z.string().optional(), // Required with sessionId
answers: z.record(z.string(), z.string()),
-});
+}).refine(
+ data => data.id || (data.sessionId && data.projectId),
+ { message: 'Either id or both sessionId and projectId must be provided' }
+);
export type AnswerWorkflowRequest = z.infer;
@@ -178,54 +183,6 @@ export type WorkflowIndex = z.infer;
// This is a dashboard tracking timeout, not the actual CLI timeout
const DEFAULT_TIMEOUT_MS = 4 * 60 * 60 * 1000; // 4 hours
-/**
- * JSON Schema for workflow structured output (sent to Claude CLI)
- */
-const WORKFLOW_JSON_SCHEMA = {
- type: 'object',
- properties: {
- status: {
- type: 'string',
- enum: ['completed', 'needs_input', 'error'],
- },
- phase: { type: 'string' },
- message: { type: 'string' },
- questions: {
- type: 'array',
- items: {
- type: 'object',
- properties: {
- question: { type: 'string' },
- header: { type: 'string' },
- options: {
- type: 'array',
- items: {
- type: 'object',
- properties: {
- label: { type: 'string' },
- description: { type: 'string' },
- },
- },
- },
- multiSelect: { type: 'boolean' },
- },
- required: ['question'],
- },
- },
- artifacts: {
- type: 'array',
- items: {
- type: 'object',
- properties: {
- path: { type: 'string' },
- action: { type: 'string' },
- },
- },
- },
- },
- required: ['status'],
-};
-
// =============================================================================
// State Persistence - Project-Local Storage (Phase 1053)
// =============================================================================
@@ -587,14 +544,9 @@ function buildInitialPrompt(skillInput: string): { prompt: string; skillName: st
let prompt = `# CLI Mode Instructions
You are running in non-interactive CLI mode. IMPORTANT:
-1. You CANNOT use AskUserQuestion tool - it is disabled
-2. When you need user input, output questions in the JSON structured_output
-3. Set status to "needs_input" and include a questions array
-4. Use the SAME format as AskUserQuestion tool input:
- - question: The question text
- - header: Short label (max 12 chars)
- - options: Array of {label, description} choices
- - multiSelect: true if multiple selections allowed
+1. When you need user input, use the AskUserQuestion tool with a questions array
+2. Prefer asking all required questions in a single AskUserQuestion call
+3. After asking, wait for the user response before continuing
# Skill Instructions
@@ -619,22 +571,23 @@ ${context}`;
/**
* Build the resume prompt with user answers
*/
-function buildResumePrompt(answers: Record): string {
- const answerText = Object.entries(answers)
- .map(([key, value]) => `- ${key}: ${value}`)
- .join('\n');
+function formatAnswerList(answers: Record): string {
+ const entries = Object.entries(answers);
+ if (entries.length === 0) {
+ return '- (no answers provided)';
+ }
+ return entries.map(([question, answer]) => `- ${question}: ${answer}`).join('\n');
+}
- return `# User Answers
+function buildResumePrompt(answers: Record): string {
+ const answerText = formatAnswerList(answers);
-The user has answered the questions:
+ return `# Answers to your questions
${answerText}
-Continue the workflow using these answers. Remember:
-- You CANNOT use AskUserQuestion tool - it is disabled
-- If you need more input, set status to "needs_input" with questions array
-- If the workflow is complete, set status to "completed"
-- Use the structured_output JSON format`;
+Continue the workflow using these answers.
+If you need more input, ask via AskUserQuestion.`;
}
/**
@@ -909,12 +862,48 @@ class WorkflowService {
execution.updatedAt = new Date().toISOString();
execution.logs.push(`[HEALTH] Process recovered - session file updated`);
saveExecution(execution, projectPath);
+ } else if (health.healthStatus === 'unknown') {
+ if (didSessionEndGracefully(projectPath, execution.sessionId)) {
+ execution.status = 'completed';
+ execution.completedAt = new Date().toISOString();
+ execution.updatedAt = new Date().toISOString();
+ execution.logs.push(`[HEALTH] Session completed gracefully (no PID)`);
+ saveExecution(execution, projectPath);
+ this.updateSessionStatus(execution.sessionId, projectPath, 'completed');
+ } else if (health.isStale && execution.status !== 'stale') {
+ execution.status = 'stale';
+ execution.error = getHealthStatusMessage({
+ ...health,
+ healthStatus: 'stale',
+ });
+ execution.updatedAt = new Date().toISOString();
+ execution.logs.push(`[HEALTH] ${execution.error}`);
+ saveExecution(execution, projectPath);
+ }
}
}
return execution;
}
+ /**
+ * Get execution by session ID
+ * Looks up the execution ID from the workflow index and loads the execution
+ * @param sessionId - Session ID to look up
+ * @param projectId - Project registry key
+ * @returns The execution if found, undefined otherwise
+ */
+ getBySession(sessionId: string, projectId: string): WorkflowExecution | undefined {
+ const projectPath = getProjectPath(projectId);
+ if (!projectPath) return undefined;
+
+ const index = loadWorkflowIndex(projectPath);
+ const session = index.sessions.find(s => s.sessionId === sessionId);
+ if (!session) return undefined;
+
+ return this.get(session.executionId, projectId);
+ }
+
/**
* List executions for a project
* @param projectId - Registry key for the project
@@ -1045,14 +1034,33 @@ class WorkflowService {
}
const index = loadWorkflowIndex(projectPath);
- const sessionIdx = index.sessions.findIndex(s => s.sessionId === sessionId);
+ let sessionIdx = index.sessions.findIndex(s => s.sessionId === sessionId);
+ const now = new Date().toISOString();
+ // If session not in index, add it (handles discovered CLI sessions)
if (sessionIdx < 0) {
- return false;
+ const newEntry: WorkflowIndexEntry = {
+ sessionId,
+ executionId: randomUUID(),
+ skill: 'CLI Session',
+ status: finalStatus,
+ startedAt: now,
+ updatedAt: now,
+ costUsd: 0,
+ };
+ index.sessions.unshift(newEntry);
+ saveWorkflowIndex(projectPath, index);
+ return true;
}
const session = index.sessions[sessionIdx];
+ // If already in terminal state, return true for idempotency
+ // (calling cancel on already-cancelled session should succeed)
+ if (['completed', 'cancelled', 'failed'].includes(session.status)) {
+ return true;
+ }
+
// Only update if in an active state (includes detached/stale - session may still be running)
if (!['running', 'waiting_for_input', 'detached', 'stale'].includes(session.status)) {
return false;
@@ -1060,7 +1068,7 @@ class WorkflowService {
// Update the index entry
session.status = finalStatus;
- session.updatedAt = new Date().toISOString();
+ session.updatedAt = now;
saveWorkflowIndex(projectPath, index);
// Also try to update the metadata file if it exists
@@ -1072,13 +1080,13 @@ class WorkflowService {
const execution = WorkflowExecutionSchema.parse(JSON.parse(content));
execution.status = finalStatus;
if (finalStatus === 'cancelled') {
- execution.cancelledAt = new Date().toISOString();
+ execution.cancelledAt = now;
execution.logs.push('[CANCELLED] Session cancelled by user (tracking recovered)');
} else {
- execution.completedAt = new Date().toISOString();
+ execution.completedAt = now;
execution.logs.push('[COMPLETED] Session completed (detected from messages)');
}
- execution.updatedAt = new Date().toISOString();
+ execution.updatedAt = now;
writeFileSync(metadataPath, JSON.stringify(execution, null, 2));
} catch {
// Ignore errors updating metadata
@@ -1088,6 +1096,88 @@ class WorkflowService {
return true;
}
+ /**
+ * Mark a workflow as waiting for input based on AskUserQuestion detection.
+ * This keeps the dashboard state consistent even when structured output isn't used.
+ */
+ markWaitingForInput(
+ sessionId: string,
+ projectId: string,
+ questions?: Array<{
+ question: string;
+ header?: string;
+ options?: Array<{ label: string; description?: string }>;
+ multiSelect?: boolean;
+ }>
+ ): boolean {
+ const projectPath = getProjectPath(projectId);
+ if (!projectPath) {
+ return false;
+ }
+
+ const index = loadWorkflowIndex(projectPath);
+ const session = index.sessions.find(s => s.sessionId === sessionId);
+ if (!session) {
+ return false;
+ }
+
+ if (['completed', 'cancelled'].includes(session.status)) {
+ return false;
+ }
+
+ const now = new Date().toISOString();
+ session.status = 'waiting_for_input';
+ session.updatedAt = now;
+ saveWorkflowIndex(projectPath, index);
+
+ const normalizedQuestions = questions?.map((q) => ({
+ question: q.question,
+ header: q.header,
+ options: (q.options || []).map((opt) => ({
+ label: opt.label,
+ description: opt.description ?? '',
+ })),
+ multiSelect: q.multiSelect,
+ }));
+
+ const workflowDir = getProjectWorkflowDir(projectPath);
+ const metadataPath = join(workflowDir, sessionId, 'metadata.json');
+
+ const updateExecution = (execution: WorkflowExecution): void => {
+ execution.status = 'waiting_for_input';
+ execution.updatedAt = now;
+ execution.error = undefined;
+ execution.output = {
+ ...(execution.output || {}),
+ status: 'needs_input',
+ questions: normalizedQuestions ?? execution.output?.questions,
+ };
+ execution.logs.push('[WAITING] Questions detected via AskUserQuestion');
+ saveExecution(execution, projectPath);
+ };
+
+ if (existsSync(metadataPath)) {
+ try {
+ const content = readFileSync(metadataPath, 'utf-8');
+ const execution = WorkflowExecutionSchema.parse(JSON.parse(content));
+ updateExecution(execution);
+ return true;
+ } catch {
+ // Fall through to execution lookup
+ }
+ }
+
+ if (session.executionId) {
+ const execution = loadExecution(session.executionId, projectPath);
+ if (execution) {
+ updateExecution(execution);
+ return true;
+ }
+ }
+
+ return true;
+ }
+
/**
* Update session status in workflow index (internal helper)
*/
@@ -1159,15 +1249,12 @@ ${claudePath} -p --output-format json "Say hello" < /dev/null > "${outputFile}"
const promptFile = join(workflowDir, 'resume-prompt.txt');
writeFileSync(promptFile, resumePrompt);
- const schemaFile = join(workflowDir, 'schema.json');
- writeFileSync(schemaFile, JSON.stringify(WORKFLOW_JSON_SCHEMA));
-
execution.logs.push(`[RESUME] Session: ${effectiveSessionId}`);
execution.logs.push(`[INFO] Resume prompt (${resumePrompt.length} chars)`);
scriptContent = `#!/bin/bash
cd "${projectPath}"
-${claudePath} -p --output-format json --resume "${effectiveSessionId}" --dangerously-skip-permissions --disallowedTools "AskUserQuestion" --json-schema "$(cat ${schemaFile})" < "${promptFile}" > "${outputFile}" 2>&1
+${claudePath} -p --output-format json --resume "${effectiveSessionId}" --dangerously-skip-permissions < "${promptFile}" > "${outputFile}" 2>&1
`;
} else {
// Initial run (FR-005)
@@ -1188,12 +1275,9 @@ ${claudePath} -p --output-format json --resume "${effectiveSessionId}" --dangero
execution.logs.push(`[INFO] Skill: ${promptResult.skillName}`);
execution.logs.push(`[INFO] Initial prompt (${promptResult.prompt.length} chars)`);
- const schemaFile = join(workflowDir, 'schema.json');
- writeFileSync(schemaFile, JSON.stringify(WORKFLOW_JSON_SCHEMA));
-
scriptContent = `#!/bin/bash
cd "${projectPath}"
-${claudePath} -p --output-format json --dangerously-skip-permissions --disallowedTools "AskUserQuestion" --json-schema "$(cat ${schemaFile})" < "${promptFile}" > "${outputFile}" 2>&1
+${claudePath} -p --output-format json --dangerously-skip-permissions < "${promptFile}" > "${outputFile}" 2>&1
`;
}
diff --git a/packages/dashboard/src/lib/session-parser.ts b/packages/dashboard/src/lib/session-parser.ts
index d645ee5..e231ba0 100644
--- a/packages/dashboard/src/lib/session-parser.ts
+++ b/packages/dashboard/src/lib/session-parser.ts
@@ -27,6 +27,16 @@ export interface WorkflowOutput {
questions?: QuestionInfo[];
}
+/**
+ * Local CLI command data (e.g., /clear, /help)
+ */
+export interface LocalCommandData {
+ command: string;
+ message?: string;
+ args?: string;
+ stdout?: string;
+}
+
/**
* Session message from Claude JSONL files.
* Only user and assistant messages are displayed; tool calls are parsed for metrics.
@@ -47,6 +57,8 @@ export interface SessionMessage {
questions?: QuestionInfo[];
/** Agent tasks launched from this message */
agentTasks?: AgentTaskInfo[];
+ /** Local CLI command data (e.g., /clear, /help) */
+ localCommand?: LocalCommandData;
}
/**
@@ -289,16 +301,26 @@ function extractToolCallInfos(content: unknown): {
if (Array.isArray(questionItems)) {
for (const q of questionItems) {
if (typeof q === 'object' && q !== null && typeof q.question === 'string') {
+ const multiSelectValue = typeof q.multiSelect === 'boolean'
+ ? q.multiSelect
+ : typeof (q as { multiselect?: unknown }).multiselect === 'boolean'
+ ? (q as { multiselect?: boolean }).multiselect
+ : false;
const questionInfo: QuestionInfo = {
question: q.question,
header: typeof q.header === 'string' ? q.header : undefined,
options: [],
- multiSelect: typeof q.multiSelect === 'boolean' ? q.multiSelect : false,
+ multiSelect: multiSelectValue ?? false,
};
// Extract options
if (Array.isArray(q.options)) {
for (const opt of q.options) {
- if (typeof opt === 'object' && opt !== null && typeof opt.label === 'string') {
+ if (typeof opt === 'string') {
+ questionInfo.options.push({
+ label: opt,
+ description: undefined,
+ });
+ } else if (typeof opt === 'object' && opt !== null && typeof opt.label === 'string') {
questionInfo.options.push({
label: opt.label,
description: typeof opt.description === 'string' ? opt.description : undefined,
@@ -392,6 +414,37 @@ function extractToolCallInfos(content: unknown): {
return { toolCalls, todos, questions, workflowOutput, agentTasks };
}
+/**
+ * Parse the XML-like format from local CLI commands.
+ * Returns null if content is not a local command.
+ */
+export function parseLocalCommand(content: string): LocalCommandData | null {
+ // Check for the caveat marker
+ if (!content.includes('')) {
+ return null;
+ }
+
+ // Extract command name (strip leading /)
+ const commandMatch = content.match(/\/?([^<]+)<\/command-name>/);
+ if (!commandMatch) {
+ return null;
+ }
+
+ const command = commandMatch[1].trim();
+
+ // Extract optional fields
+ const messageMatch = content.match(/([^<]*)<\/command-message>/);
+ const argsMatch = content.match(/([^<]*)<\/command-args>/);
+ const stdoutMatch = content.match(/([^<]*)<\/local-command-stdout>/);
+
+ return {
+ command,
+ message: messageMatch?.[1]?.trim() || undefined,
+ args: argsMatch?.[1]?.trim() || undefined,
+ stdout: stdoutMatch?.[1]?.trim() || undefined,
+ };
+}
+
/**
* Detect if a message content is a command injection (workflow command).
*/
@@ -405,7 +458,7 @@ export function isCommandInjection(content: string): {
/^\*\*NEVER edit tasks\.md directly\*\*/,
/\$ARGUMENTS/,
/## Execution/,
- /\[IMPL\] INITIALIZE/,
+ /\[(IMPL|DESIGN|VERIFY|MERGE|ANALYZE|ORCH|REVIEW)\]/,
/## Memory Protocol/,
/## Phase Lifecycle/,
/# @\w+ Agent/,
@@ -426,14 +479,17 @@ export function isCommandInjection(content: string): {
// Extract command name from content
// Order matters - more specific patterns first
const namePatterns = [
- // Most specific: explicit command header or description line
- { pattern: /^# \/flow\.(\w+)/m, prefix: 'flow.' },
- { pattern: /^description:\s*.*flow\.(\w+)/im, prefix: 'flow.' },
- // Phase-specific patterns
- { pattern: /\[IMPL\]/i, prefix: '', name: 'flow.implement' },
- { pattern: /\[MERGE\]/i, prefix: '', name: 'flow.merge' },
- { pattern: /\[VERIFY\]/i, prefix: '', name: 'flow.verify' },
- { pattern: /\[DESIGN\]/i, prefix: '', name: 'flow.design' },
+ // Most specific: explicit command header (with or without /)
+ { pattern: /^# \/?flow\.(\w+)/m, prefix: 'flow.' },
+ { pattern: /^description:\s*.*?flow\.(\w+)/im, prefix: 'flow.' },
+ // Phase-specific patterns (each skill has unique [TAG] markers)
+ { pattern: /\[IMPL\]/, prefix: '', name: 'flow.implement' },
+ { pattern: /\[MERGE\]/, prefix: '', name: 'flow.merge' },
+ { pattern: /\[VERIFY\]/, prefix: '', name: 'flow.verify' },
+ { pattern: /\[DESIGN\]/, prefix: '', name: 'flow.design' },
+ { pattern: /\[ANALYZE\]/, prefix: '', name: 'flow.analyze' },
+ { pattern: /\[ORCH\]/, prefix: '', name: 'flow.orchestrate' },
+ { pattern: /\[REVIEW\]/, prefix: '', name: 'flow.review' },
{ pattern: /## Design Phase/i, prefix: '', name: 'flow.design' },
{ pattern: /## Verify Phase/i, prefix: '', name: 'flow.verify' },
{ pattern: /## Memory Protocol/i, prefix: '', name: 'flow.memory' },
@@ -454,7 +510,7 @@ export function isCommandInjection(content: string): {
}
}
- return { isCommand: true, commandName: 'Command' };
+ return { isCommand: true, commandName: 'Workflow' };
}
/**
@@ -487,6 +543,18 @@ export function parseSessionLine(line: string): ParseResult {
}
}
+ // Detect CLI result messages (session completed normally)
+ if (data.type === 'result') {
+ return {
+ message: {
+ role: 'system',
+ content: 'Session Ended',
+ timestamp: data.timestamp,
+ isSessionEnd: true,
+ },
+ };
+ }
+
// User and assistant messages are in data.message.content
if (data.type === 'user' || data.type === 'assistant') {
const messageContent = data.message?.content;
@@ -527,9 +595,10 @@ export function parseSessionLine(line: string): ParseResult {
// Also extract any tool calls for metrics
const toolCallMetrics = extractToolCallMetrics(messageContent);
- // Check if user message is a command injection
+ // Check if user message is a command injection or local command
const isUser = data.type === 'user';
const commandInfo = isUser ? isCommandInjection(textContent) : null;
+ const localCommandData = isUser ? parseLocalCommand(textContent) : null;
return {
message: {
@@ -541,6 +610,7 @@ export function parseSessionLine(line: string): ParseResult {
commandName: commandInfo?.commandName ?? undefined,
questions: questions.length > 0 ? questions : undefined,
agentTasks: agentTasks.length > 0 ? agentTasks : undefined,
+ localCommand: localCommandData ?? undefined,
},
toolCall: toolCallMetrics.length > 0 ? toolCallMetrics[0] : undefined,
toolCalls: detailedToolCalls.length > 0 ? detailedToolCalls : undefined,
diff --git a/packages/dashboard/src/lib/specflow-env.ts b/packages/dashboard/src/lib/specflow-env.ts
new file mode 100644
index 0000000..46f9cec
--- /dev/null
+++ b/packages/dashboard/src/lib/specflow-env.ts
@@ -0,0 +1,22 @@
+import type { ProcessEnvOptions } from 'child_process';
+
+/**
+ * Ensure specflow CLI is on PATH for server-side exec calls.
+ */
+export function getSpecflowEnv(): ProcessEnvOptions['env'] {
+ const homeDir = process.env.HOME || '/Users/ppatterson';
+ const existingPath = process.env.PATH || '';
+ const prefix = [
+ `${homeDir}/.claude/specflow-system/bin`,
+ `${homeDir}/.local/bin`,
+ '/usr/local/bin',
+ '/usr/bin',
+ '/bin',
+ ].join(':');
+
+ return {
+ ...process.env,
+ HOME: homeDir,
+ PATH: `${prefix}:${existingPath}`,
+ };
+}
diff --git a/packages/dashboard/src/lib/watcher.ts b/packages/dashboard/src/lib/watcher.ts
index 87a1ff3..b8dc85d 100644
--- a/packages/dashboard/src/lib/watcher.ts
+++ b/packages/dashboard/src/lib/watcher.ts
@@ -5,20 +5,16 @@ import path from 'path';
import {
RegistrySchema,
OrchestrationStateSchema,
- WorkflowIndexSchema,
type Registry,
type OrchestrationState,
type SSEEvent,
type TasksData,
- type WorkflowIndex,
- type WorkflowIndexEntry,
type WorkflowData,
type PhasesData,
type SessionContent,
type SessionQuestion,
} from '@specflow/shared';
-import { readdirSync, statSync, existsSync, readFileSync } from 'fs';
-import { v4 as uuidv4 } from 'uuid';
+import { existsSync, readFileSync } from 'fs';
import { parseTasks, type ParseTasksOptions } from './task-parser';
import { parseRoadmapToPhasesData } from './roadmap-parser';
import {
@@ -27,8 +23,10 @@ import {
migrateStateFiles,
} from './state-paths';
import { getProjectSessionDir, getClaudeProjectsDir } from './project-hash';
-import { reconcileRunners } from './services/orchestration-runner';
-import { orchestrationService } from './services/orchestration-service';
+import { reconcileRunners, runOrchestration, isRunnerActive } from './services/orchestration-runner';
+import { orchestrationService, readDashboardState } from './services/orchestration-service';
+import { workflowService } from './services/workflow-service';
+import { buildWorkflowData, buildWorkflowDataFast } from './services/runtime-state';
// Debounce delay in milliseconds
const DEBOUNCE_MS = 200;
@@ -56,6 +54,9 @@ const phasesCache: Map = new Map(); // projectId -> JSON string
// Cache session content to detect actual changes
const sessionCache: Map = new Map(); // sessionId -> JSON string
+// Cache questions to detect actual changes and avoid duplicate session:question events
+const questionCache: Map = new Map(); // sessionId -> JSON string of questions
+
// Session debounce (faster for real-time feel)
const SESSION_DEBOUNCE_MS = 100;
@@ -83,6 +84,7 @@ export function broadcast(event: SSEEvent): void {
/**
* Broadcast a session:question event for workflow-mode questions
* Called by workflow-service when structured_output has questions
+ * Uses questionCache to deduplicate - won't broadcast same questions twice
*/
export function broadcastWorkflowQuestions(
sessionId: string,
@@ -96,6 +98,15 @@ export function broadcastWorkflowQuestions(
): void {
if (!questions || questions.length === 0) return;
+ // Check if these questions were already broadcast (deduplication)
+ const questionsFingerprint = JSON.stringify(questions.map(q => ({ q: q.question, h: q.header })));
+ const cachedQuestions = questionCache.get(sessionId) ?? '';
+ if (questionsFingerprint === cachedQuestions) {
+ // Same questions already broadcast, skip
+ return;
+ }
+ questionCache.set(sessionId, questionsFingerprint);
+
const mappedQuestions = questions.map((q) => ({
question: q.question,
header: q.header,
@@ -281,178 +292,15 @@ async function handleTasksChange(projectId: string, tasksPath: string): Promise<
});
}
-/**
- * Read and parse workflow index file for a project
- */
-async function readWorkflowIndex(indexPath: string): Promise {
- try {
- const content = await fs.readFile(indexPath, 'utf-8');
- const parsed = WorkflowIndexSchema.parse(JSON.parse(content));
- return parsed;
- } catch {
- // File doesn't exist or is invalid - return empty
- return { sessions: [] };
- }
-}
-
-/**
- * Build WorkflowData from index
- * Finds current active execution and includes all sessions
- */
-function buildWorkflowData(index: WorkflowIndex): WorkflowData {
- // Find current active execution (running or waiting_for_input)
- const activeStates = ['running', 'waiting_for_input', 'detached', 'stale'];
- const currentExecution = index.sessions.find(s => activeStates.includes(s.status)) ?? null;
-
- return {
- currentExecution,
- sessions: index.sessions,
- };
-}
-
-/**
- * Discover CLI sessions from Claude projects directory.
- * Scans ~/.claude/projects/{hash}/ for .jsonl files and creates WorkflowIndexEntry objects.
- * These are sessions started from CLI that weren't tracked by the dashboard.
- *
- * @param projectPath - Absolute path to the project
- * @param trackedSessionIds - Set of session IDs already tracked by dashboard (to avoid duplicates)
- * @param limit - Maximum number of sessions to return (default 50)
- */
-function discoverCliSessions(
- projectPath: string,
- trackedSessionIds: Set,
- limit: number = 50
-): WorkflowIndexEntry[] {
- const sessionDir = getProjectSessionDir(projectPath);
-
- if (!existsSync(sessionDir)) {
- return [];
- }
-
- try {
- const files = readdirSync(sessionDir);
- const jsonlFiles = files.filter(f => f.endsWith('.jsonl'));
-
- // Get file stats and create entries
- const entries: WorkflowIndexEntry[] = [];
-
- for (const file of jsonlFiles) {
- const sessionId = file.replace('.jsonl', '');
-
- // Skip if already tracked by dashboard
- if (trackedSessionIds.has(sessionId)) {
- continue;
- }
-
- const fullPath = path.join(sessionDir, file);
- try {
- const stats = statSync(fullPath);
-
- // Try to extract skill from first line of JSONL (lazy - only read if needed)
- let skill = 'CLI Session';
- try {
- // Read just the first few KB to find skill info
- const fd = require('fs').openSync(fullPath, 'r');
- const buffer = Buffer.alloc(4096);
- require('fs').readSync(fd, buffer, 0, 4096, 0);
- require('fs').closeSync(fd);
-
- const firstLines = buffer.toString('utf-8').split('\n').slice(0, 5);
- for (const line of firstLines) {
- if (!line.trim()) continue;
- try {
- const msg = JSON.parse(line);
- // Look for skill in various places
- if (msg.skill) {
- skill = msg.skill;
- break;
- }
- if (msg.message?.content && typeof msg.message.content === 'string') {
- // Check for /flow.* commands in first user message
- const flowMatch = msg.message.content.match(/\/flow\.(\w+)/);
- if (flowMatch) {
- skill = `flow.${flowMatch[1]}`;
- break;
- }
- }
- } catch {
- // Invalid JSON line, continue
- }
- }
- } catch {
- // Could not read file content, use default skill
- }
-
- // Determine status based on file age
- const fileAgeMs = Date.now() - stats.mtime.getTime();
- const isRecent = fileAgeMs < 30 * 60 * 1000; // 30 minutes
- const status: WorkflowIndexEntry['status'] = isRecent ? 'detached' : 'completed';
-
- entries.push({
- sessionId,
- executionId: uuidv4(), // Generate placeholder ID for CLI sessions
- skill,
- status,
- startedAt: stats.birthtime.toISOString(),
- updatedAt: stats.mtime.toISOString(),
- costUsd: 0, // Unknown for CLI sessions
- });
- } catch {
- // Could not stat file, skip
- }
- }
-
- // Sort by updatedAt descending (newest first)
- entries.sort((a, b) => new Date(b.updatedAt).getTime() - new Date(a.updatedAt).getTime());
-
- // Return limited number
- return entries.slice(0, limit);
- } catch {
- return [];
- }
-}
-
/**
* Handle workflow index file change.
- * Merges dashboard-tracked sessions with discovered CLI sessions.
+ * Uses runtime aggregation instead of reading index.json directly.
*/
-async function handleWorkflowChange(projectId: string, indexPath: string): Promise {
- const index = await readWorkflowIndex(indexPath);
- if (!index) return;
-
- // Get project path for CLI session discovery
+async function handleWorkflowChange(projectId: string, _indexPath: string): Promise {
const projectPath = projectPathMap.get(projectId);
+ if (!projectPath) return;
- // Get tracked session IDs to avoid duplicates
- const trackedSessionIds = new Set(
- index.sessions.map(s => s.sessionId)
- );
-
- // Discover CLI sessions that aren't tracked by dashboard
- const cliSessions = projectPath
- ? discoverCliSessions(projectPath, trackedSessionIds, 50)
- : [];
-
- // Merge sessions: dashboard-tracked first, then CLI-discovered
- const allSessions = [
- ...index.sessions,
- ...cliSessions,
- ];
-
- // Sort all sessions by updatedAt (newest first)
- allSessions.sort((a, b) =>
- new Date(b.updatedAt).getTime() - new Date(a.updatedAt).getTime()
- );
-
- // Build workflow data with merged sessions
- const activeStates = ['running', 'waiting_for_input', 'detached', 'stale'];
- const currentExecution = allSessions.find(s => activeStates.includes(s.status)) ?? null;
-
- const data: WorkflowData = {
- currentExecution,
- sessions: allSessions.slice(0, 100), // Limit to 100 total sessions
- };
+ const data = await buildWorkflowData(projectId, projectPath);
// Check if data actually changed (avoid duplicate broadcasts)
const dataJson = JSON.stringify(data);
@@ -631,40 +479,14 @@ async function updateWatchedPaths(registry: Registry): Promise {
watcher.add(workflowIndexPath);
console.log(`[Watcher] Added workflow index: ${workflowIndexPath}`);
- // Broadcast initial workflow data (including CLI sessions)
- const index = await readWorkflowIndex(workflowIndexPath);
- if (index) {
- // Get tracked session IDs to avoid duplicates
- const trackedSessionIds = new Set(
- index.sessions.map(s => s.sessionId)
- );
-
- // Discover CLI sessions
- const cliSessions = discoverCliSessions(project.path, trackedSessionIds, 50);
-
- // Merge sessions
- const allSessions = [...index.sessions, ...cliSessions];
- allSessions.sort((a, b) =>
- new Date(b.updatedAt).getTime() - new Date(a.updatedAt).getTime()
- );
-
- // Build workflow data with merged sessions
- const activeStates = ['running', 'waiting_for_input', 'detached', 'stale'];
- const currentExecution = allSessions.find(s => activeStates.includes(s.status)) ?? null;
-
- const data: WorkflowData = {
- currentExecution,
- sessions: allSessions.slice(0, 100),
- };
-
- workflowCache.set(projectId, JSON.stringify(data));
- broadcast({
- type: 'workflow',
- timestamp: new Date().toISOString(),
- projectId,
- data,
- });
- }
+ const data = await buildWorkflowData(projectId, project.path);
+ workflowCache.set(projectId, JSON.stringify(data));
+ broadcast({
+ type: 'workflow',
+ timestamp: new Date().toISOString(),
+ projectId,
+ data,
+ });
}
// Add ROADMAP.md path for this project
@@ -832,7 +654,36 @@ export async function initWatcher(): Promise {
// This detects orphaned runner state files from crashed processes
for (const [projectId, project] of Object.entries(currentRegistry.projects)) {
try {
- reconcileRunners(project.path);
+ const cleanedUpIds = reconcileRunners(project.path);
+ const repoName = project.path.split('/').pop();
+
+ // Use CLI dashboard state as single source of truth for orchestration status.
+ // The legacy orchestration file can be out of sync (e.g., saying 'running'
+ // when the CLI has moved to 'waiting_merge'). Dashboard state is more reliable.
+ const dashboardState = readDashboardState(project.path);
+ const activeId = dashboardState?.active?.id;
+ const dashboardStatus = dashboardState?.active?.status;
+
+ // Fallback to legacy file only if dashboard state is unavailable
+ const legacyActive = orchestrationService.getActive(project.path);
+ const effectiveId = activeId || legacyActive?.id;
+ const effectiveStatus = dashboardStatus || legacyActive?.status;
+
+ console.log(`[Watcher] Checking ${repoName}: id=${effectiveId ?? 'none'}, dashboardStatus=${dashboardStatus ?? 'none'}, legacyStatus=${legacyActive?.status ?? 'none'}, runnerActive=${effectiveId ? isRunnerActive(effectiveId) : 'n/a'}`);
+
+ if (effectiveId && effectiveStatus === 'running' && !isRunnerActive(effectiveId)) {
+ // Only auto-restart if we found a runner state file (= dashboard was managing it).
+ // If no runner state file exists, this was likely CLI-managed or the server was
+ // stopped gracefully. User can click "Resume" to restart manually.
+ if (cleanedUpIds.has(effectiveId)) {
+ console.log(`[Watcher] Restarting runner for orchestration ${effectiveId} in ${repoName} (previous runner was orphaned)`);
+ runOrchestration(projectId, effectiveId).catch(error => {
+ console.error(`[Watcher] Failed to restart runner for ${effectiveId}:`, error);
+ });
+ } else {
+ console.log(`[Watcher] Active orchestration in ${repoName} has no previous runner state (manual resume available)`);
+ }
+ }
} catch (error) {
console.error(`[Watcher] Error reconciling runners for ${projectId}:`, error);
}
@@ -874,11 +725,19 @@ export async function getAllStates(): Promise