diff --git a/.claude-plugin/PLUGIN.md b/.claude-plugin/PLUGIN.md index 90e3a51..97da96d 100644 --- a/.claude-plugin/PLUGIN.md +++ b/.claude-plugin/PLUGIN.md @@ -4,7 +4,7 @@ Official Claude Code plugin for MAP Framework - Modular Agentic Planner with cog ## What is MAP Framework? -MAP (Modular Agentic Planner) is a cognitive architecture that orchestrates 9 specialized agents to improve code quality through systematic validation and iterative refinement. +MAP (Modular Agentic Planner) is a cognitive architecture that orchestrates 12 specialized agents to improve code quality through systematic validation and iterative refinement. **Based on research:** - [MAP Paper - Nature Communications (2025)](https://github.com/Shanka123/MAP) — 74% improvement in planning tasks @@ -12,7 +12,7 @@ MAP (Modular Agentic Planner) is a cognitive architecture that orchestrates 9 sp ## Features -### 9 Specialized Agents +### 12 Specialized Agents 1. **TaskDecomposer** — breaks goals into atomic subtasks 2. **Actor** — generates code and solutions @@ -21,8 +21,11 @@ MAP (Modular Agentic Planner) is a cognitive architecture that orchestrates 9 sp 5. **Evaluator** — scores solution quality (functionality, security, testability) 6. **Reflector** — extracts lessons from successes and failures 7. **Curator** — manages knowledge base (playbook) -8. **Orchestrator** — coordinates all agents -9. **DocumentationReviewer** — checks documentation completeness +8. **DocumentationReviewer** — checks documentation completeness +9. **Debate-Arbiter** — cross-evaluates variants with reasoning (Opus) +10. **Synthesizer** — merges multiple variants (Self-MoA) +11. **Research-Agent** — isolated codebase research +12. **Final-Verifier** — adversarial verification (Ralph Loop) ### Claude Code Integration @@ -33,11 +36,17 @@ MAP (Modular Agentic Planner) is a cognitive architecture that orchestrates 9 sp - `session-init` — loads ACE playbook at session start - `track-metrics` — tracks agent performance -**4 Slash Commands:** -- `/map-feature` — implement new features with full MAP workflow +**10 Slash Commands:** +- `/map-efficient` — implement features, refactor code, complex tasks with full MAP workflow - `/map-debug` — debug issues using MAP analysis -- `/map-refactor` — refactor code with impact analysis +- `/map-fast` — small, low-risk changes with minimal overhead +- `/map-debate` — multi-variant synthesis with Opus arbiter - `/map-review` — comprehensive review of changes +- `/map-check` — quality gates and verification +- `/map-plan` — architecture decomposition +- `/map-release` — release workflow with validation gates +- `/map-resume` — resume interrupted workflows +- `/map-learn` — extract and preserve lessons ### ACE Learning System @@ -97,14 +106,14 @@ cp map-framework/.claude/settings.hooks.json your-project/.claude/ ## Quick Start ```bash -# Feature development -/map-feature implement user authentication with JWT tokens +# Feature development / refactoring / complex tasks +/map-efficient implement user authentication with JWT tokens # Debugging /map-debug fix the API 500 error on login endpoint -# Refactoring -/map-refactor refactor UserService class with dependency injection +# Small, low-risk changes +/map-fast add environment variable for API timeout # Code review /map-review review the recent changes in auth.py diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index fb557f0..7491397 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -10,7 +10,7 @@ "id": "map-framework", "name": "MAP Framework", "version": "1.0.0", - "description": "Modular Agentic Planner - Cognitive architecture for AI agents inspired by prefrontal cortex functions. Orchestrates 9 specialized agents for development with automatic quality validation.", + "description": "Modular Agentic Planner - Cognitive architecture for AI agents inspired by prefrontal cortex functions. Orchestrates 12 specialized agents for development with automatic quality validation.", "author": "azalio", "license": "MIT", "source": "github:azalio/map-framework", @@ -32,10 +32,10 @@ "development" ], "features": [ - "9 specialized agents (TaskDecomposer, Actor, Monitor, Predictor, Evaluator, Reflector, Curator, Orchestrator, DocumentationReviewer)", + "12 specialized agents (TaskDecomposer, Actor, Monitor, Predictor, Evaluator, Reflector, Curator, DocumentationReviewer, Debate-Arbiter, Synthesizer, Research-Agent, Final-Verifier)", "ACE learning system with persistent knowledge base", "5 Claude Code hooks for automation", - "4 slash commands (/map-feature, /map-debug, /map-refactor, /map-review)", + "10 slash commands (/map-efficient, /map-debug, /map-fast, /map-debate, /map-review, /map-check, /map-plan, /map-release, /map-resume, /map-learn)", "Professional code review integration", "Cost optimization (40-60% savings)" ], diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index f0863de..3b4680b 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "map-framework", "version": "1.0.0", - "description": "Modular Agentic Planner (MAP) - Cognitive architecture for AI agents with 9 specialized agents, ACE learning system, and professional code review integration", + "description": "Modular Agentic Planner (MAP) - Cognitive architecture for AI agents with 12 specialized agents, ACE learning system, and professional code review integration", "author": "azalio", "license": "MIT", "repository": "https://github.com/azalio/map-framework", @@ -34,10 +34,10 @@ ] }, "features": [ - "9 specialized MAP agents (TaskDecomposer, Actor, Monitor, Predictor, Evaluator, Reflector, Curator, Orchestrator, DocumentationReviewer)", + "12 specialized MAP agents (TaskDecomposer, Actor, Monitor, Predictor, Evaluator, Reflector, Curator, DocumentationReviewer, Debate-Arbiter, Synthesizer, Research-Agent, Final-Verifier)", "ACE (Agentic Context Engineering) learning system with persistent knowledge base", "5 Claude Code hooks for automation (validate-agent-templates, auto-store-knowledge, enrich-context, session-init, track-metrics)", - "4 slash commands (/map-feature, /map-debug, /map-refactor, /map-review)", + "10 slash commands (/map-efficient, /map-debug, /map-fast, /map-debate, /map-review, /map-check, /map-plan, /map-release, /map-resume, /map-learn)", "Professional code review with claude-reviewer MCP", "Chain-of-thought reasoning with sequential-thinking MCP", "Semantic pattern search with embeddings cache", @@ -70,9 +70,9 @@ "documentation-reviewer" ], "commands": [ - "map-feature", + "map-efficient", "map-debug", - "map-refactor", + "map-fast", "map-review" ], "hooks": [ diff --git a/.claude/agents/actor.md b/.claude/agents/actor.md index 01cfaf1..0c69d83 100644 --- a/.claude/agents/actor.md +++ b/.claude/agents/actor.md @@ -271,15 +271,15 @@ Task( # Required Output Structure -**CRITICAL: Actor outputs CODE AS TEXT, NOT file edits** +**Actor applies code directly using Edit/Write tools.** -You are a **proposal generator**, NOT a code executor. Your output is reviewed by Monitor before application. +You are a code implementer. Read affected files, then apply changes with Edit/Write tools. +Monitor will validate the written code afterward. -- ✅ DO: Output complete code in markdown code blocks -- ❌ NEVER: Use Edit, Write, or MultiEdit tools -- ❌ NEVER: Modify files directly -- 📋 WHY: workflow-gate.py will BLOCK Edit/Write until actor+monitor steps complete -- 🔄 FLOW: You output → Monitor reviews → Orchestrator applies with Edit/Write +- Use Edit tool for modifying existing files +- Use Write tool for creating new files +- Read files before editing to understand current state +- Apply changes incrementally — one logical change per Edit call --- @@ -422,7 +422,7 @@ Only include if changes affect: ## Pre-Submission Checklist ### Code Quality (Mandatory) -- [ ] Follows {{standards_url}} style guide +- [ ] Follows {{standards_doc}} style guide - [ ] Complete implementations (no placeholders, no `...`) - [ ] Self-documenting names (clear variables/functions) - [ ] Comments for complex logic only @@ -502,6 +502,29 @@ When assessing performance impact, use these as default baselines unless project **Protocol**: Document rationale → Add TODO if needed → Proceed +### Evidence File (Artifact-Gated Validation) + +After applying all code changes, write an evidence file so the orchestrator can verify this step ran. Use Bash (not Write tool) to create the file: + +```bash +cat > .map//evidence/actor_.json << 'EVIDENCE' +{ + "phase": "ACTOR", + "subtask_id": "", + "timestamp": "", + "summary": "", + "aag_contract": "", + "files_changed": [""], + "status": "applied" +} +EVIDENCE +``` + +**Required fields** (orchestrator validates these): `phase`, `subtask_id`, `timestamp`. +Other fields are informational but recommended for audit trail. + +**CRITICAL**: Without this file, `validate_step("2.3")` will reject the step. + --- @@ -670,8 +693,8 @@ output: - **Project**: {{project_name}} - **Language**: {{language}} - **Framework**: {{framework}} -- **Standards**: {{standards_url}} -- **Branch**: {{branch}} +- **Standards**: {{standards_doc}} +- **Branch**: {{branch_name}} - **Allowed Scope**: {{allowed_scope}} - **Related Files**: {{related_files}} @@ -733,7 +756,7 @@ output: Follow this protocol exactly — do not infer "how seniors write" or add stylistic flourishes. -1. **Style standard**: Use {{standards_url}}. If unavailable: Python→PEP8, JS/TS→Google Style, Go→gofmt, Rust→rustfmt. +1. **Style standard**: Use {{standards_doc}}. If unavailable: Python→PEP8, JS/TS→Google Style, Go→gofmt, Rust→rustfmt. 2. **Architecture**: Dependency injection where applicable. No global mutable state. 3. **Naming**: Self-documenting (`user_count` not `n`, `is_valid` not `flag`). No abbreviations except industry-standard ones (URL, HTTP, ID). 4. **Intent comments**: Add a one-line `# Intent: ` comment above any non-obvious logic block. Do NOT comment obvious code. diff --git a/.claude/agents/debate-arbiter.md b/.claude/agents/debate-arbiter.md index d8d534b..509baf5 100644 --- a/.claude/agents/debate-arbiter.md +++ b/.claude/agents/debate-arbiter.md @@ -3,7 +3,7 @@ name: debate-arbiter description: Cross-evaluates Actor variants with explicit reasoning and synthesizes optimal solution (MAP Debate) model: opus version: 1.0.0 -last_updated: 2025-01-08 +last_updated: 2026-02-14 --- # QUICK REFERENCE (Read First) diff --git a/.claude/agents/documentation-reviewer.md b/.claude/agents/documentation-reviewer.md index ea32d0f..4c97d1f 100644 --- a/.claude/agents/documentation-reviewer.md +++ b/.claude/agents/documentation-reviewer.md @@ -444,7 +444,7 @@ mcp__deepwiki__ask_question( # 4. Check historical patterns (if mem0 available) mcp__mem0__map_tiered_search( query="CRD installation documentation patterns", - top_k=5, + limit=5, similarity_threshold=0.7 ) ``` diff --git a/.claude/agents/final-verifier.md b/.claude/agents/final-verifier.md index ea7d124..9c79083 100644 --- a/.claude/agents/final-verifier.md +++ b/.claude/agents/final-verifier.md @@ -18,36 +18,36 @@ You catch premature completion and hallucinated success. | Data | Source | How to Read | |------|--------|-------------| -| Original Goal | `.map/task_plan_.md` | Section "## Goal" or first paragraph | -| Acceptance Criteria | `.map/task_plan_.md` | Section "## Acceptance Criteria" (table) | -| Completed Subtasks | `.map/progress_.md` | Checkboxes marked `[x]` | +| Original Goal | `.map//task_plan_.md` | Section "## Goal" or first paragraph | +| Acceptance Criteria | `.map//task_plan_.md` | Section "## Acceptance Criteria" (table) | +| Completed Subtasks | `.map//progress_.md` | Checkboxes marked `[x]` | | Global Validation | Task argument `$VALIDATION_CRITERIA` | Passed from map-efficient.md | ### OUTPUT Destinations (where to store results) | Data | Destination | Format | Written By | |------|-------------|--------|------------| -| Verification Result | `.map/progress_.md` | Append "## Final Verification" section | **final-verifier agent** | +| Verification Result | `.map//progress_.md` | Append "## Final Verification" section | **final-verifier agent** | | Structured Result | `.map//final_verification.json` | JSON (for programmatic access) | **final-verifier agent** | | Root Cause (if failed) | `.map//final_verification.json` | In `root_cause` field | **final-verifier agent** | **WHO WRITES FILES:** - **final-verifier agent** writes verification results to BOTH markdown and JSON - **Orchestrator (map-efficient.md)** reads results and decides next action (COMPLETE/RE_DECOMPOSE/ESCALATE) -- **Orchestrator (map-efficient.md)** ensures Acceptance Criteria section exists in `task_plan_.md` (derived from decomposition output) +- **Orchestrator (map-efficient.md)** ensures Acceptance Criteria section exists in `.map//task_plan_.md` (derived from decomposition output) **IMPORTANT:** Always use sanitized branch name (e.g., `feature-foo` not `feature/foo`). **SOURCE OF TRUTH CONTRACT:** - `.map//final_verification.json` is the **ONLY** source of truth for orchestrator decisions -- `.map/progress_.md` "## Final Verification" section is for **human readability only** +- `.map//progress_.md` "## Final Verification" section is for **human readability only** - **Orchestrator (map-efficient.md) MUST read JSON**, not parse markdown - Both must be written, but only JSON is used programmatically ## Verification Protocol ### Step 1: Goal Extraction -Read `.map/task_plan_.md` to extract: +Read `.map//task_plan_.md` to extract: - Original goal from "## Goal" section - Acceptance criteria from "## Acceptance Criteria" table (if present) @@ -101,7 +101,7 @@ Score confidence (0.0-1.0): **CRITICAL:** `root_cause` is REQUIRED if `passed=false` -### 2. Append to `.map/progress_.md` +### 2. Append to `.map//progress_.md` ```markdown ## Final Verification diff --git a/.claude/agents/monitor.md b/.claude/agents/monitor.md index 7bb6cbd..14273cd 100644 --- a/.claude/agents/monitor.md +++ b/.claude/agents/monitor.md @@ -94,8 +94,8 @@ If implementation deviates from the AAG contract — `valid: false` — regardle |-------------|------|-------------|---------| | `{{project_name}}` | string | Project identifier | `"auth-service"` | | `{{language}}` | enum | Primary language | `"python"`, `"typescript"`, `"go"` | -| `{{solution}}` | string | Code/docs to review | Full code block or diff | -| `{{requirements}}` | string | Subtask requirements | "Implement JWT validation" | +| `{{solution}}` | string | Code/docs to review (in MAP workflow: provided via `` tag) | Full code block or diff | +| `{{requirements}}` | string | Subtask requirements (in MAP workflow: provided via `` tag) | "Implement JWT validation" | | `{{review_mode}}` | enum | Review scope mode | `"full"` or `"diff"` | ### Optional Placeholders @@ -105,6 +105,7 @@ If implementation deviates from the AAG contract — `valid: false` — regardle | `{{framework}}` | string | `""` | Framework/runtime (Express, FastAPI, etc.) | | `{{standards_doc}}` | string | `""` | URL/path to style guide | | `{{security_policy}}` | string | `""` | URL/path to security policy | +| `{{changed_files}}` | array | `[]` | List of modified file paths (for static analysis) | | `{{subtask_description}}` | string | `""` | Additional context | | `{{existing_patterns}}` | array | `[]` | Learned patterns from previous reviews | | `{{feedback}}` | array | `[]` | Previous review findings to verify | @@ -2494,3 +2495,27 @@ def check_rate_limit(user_id, action, limit=100, window=3600): - Only MEDIUM/LOW issues → valid=true (with feedback) + +### Evidence File (Artifact-Gated Validation) + +**Exception to read-only rule**: Monitor writes evidence files to `.map/` artifacts directory via Bash (not Write tool). This does NOT violate the read-only-for-project-code rule — `.map/` is a workflow artifact directory, not project code. + +After completing validation, write an evidence file: + +```bash +cat > .map//evidence/monitor_.json << 'EVIDENCE' +{ + "phase": "MONITOR", + "subtask_id": "", + "timestamp": "", + "valid": true, + "issues_found": , + "recommendation": "approve|reject|revise" +} +EVIDENCE +``` + +**Required fields** (orchestrator validates these): `phase`, `subtask_id`, `timestamp`. +Other fields are informational but recommended for audit trail. + +**CRITICAL**: Without this file, `validate_step("2.4")` will reject the step. diff --git a/.claude/agents/predictor.md b/.claude/agents/predictor.md index 076c078..976ad2e 100644 --- a/.claude/agents/predictor.md +++ b/.claude/agents/predictor.md @@ -124,16 +124,16 @@ CONFLICT (Category B: -0.10): ### Position in MAP Pipeline ``` -Actor (propose changes) - ↓ analyzer_output +Actor (implement changes) + ↓ code changes applied +Monitor (validate correctness) + ↓ validation_result PREDICTOR (assess impact) ← YOU ARE HERE ↓ prediction_output -Monitor (validate at runtime) - ↓ validation_result -Evaluator (score quality) +[Evaluator — only in /map-debug and /map-review] ``` -### Upstream (Actor → Predictor) +### Upstream (Actor → Monitor → Predictor) **Input Contract Version**: 1.0 | Field from Actor | How Predictor Uses It | @@ -1784,6 +1784,28 @@ When an edge case is detected, it MUST appear in THREE places: +### Evidence File (Artifact-Gated Validation) + +After completing impact analysis, write an evidence file via Bash: + +```bash +cat > .map//evidence/predictor_.json << 'EVIDENCE' +{ + "phase": "PREDICTOR", + "subtask_id": "", + "timestamp": "", + "risk_assessment": "", + "confidence_score": <0.30-0.95>, + "tier_selected": "<1|2|3>" +} +EVIDENCE +``` + +**Required fields** (orchestrator validates these): `phase`, `subtask_id`, `timestamp`. +Other fields are informational but recommended for audit trail. + +**CRITICAL**: Without this file, `validate_step("2.6")` will reject the step. + ## Confidence Scoring Methodology diff --git a/.claude/agents/reflector.md b/.claude/agents/reflector.md index db93dc6..58f75f3 100644 --- a/.claude/agents/reflector.md +++ b/.claude/agents/reflector.md @@ -3,7 +3,7 @@ name: reflector description: Extracts structured lessons from successes and failures (ACE) model: sonnet version: 4.0.0 -last_updated: 2025-01-12 +last_updated: 2026-01-12 --- # IDENTITY @@ -208,10 +208,12 @@ Analyze the following execution attempt: {{predictor_analysis}} ``` +{{#if evaluator_scores}} ## Evaluator Quality Scores ```json {{evaluator_scores}} ``` +{{/if}} ## Execution Outcome {{execution_outcome}} diff --git a/.claude/agents/research-agent.md b/.claude/agents/research-agent.md index 3309b0e..c2b279d 100644 --- a/.claude/agents/research-agent.md +++ b/.claude/agents/research-agent.md @@ -178,7 +178,7 @@ When orchestrator provides `findings_file` path in prompt, append research resul **Input Signal** (from orchestrator): ``` -Findings file: .map/findings_feature-auth.md +Findings file: .map/feature-auth/findings_feature-auth.md ``` **Action**: diff --git a/.claude/agents/task-decomposer.md b/.claude/agents/task-decomposer.md index 8f79219..e0715b6 100644 --- a/.claude/agents/task-decomposer.md +++ b/.claude/agents/task-decomposer.md @@ -50,6 +50,10 @@ machine-readable blueprint for the Actor/Monitor pipeline. │ └─ Map all dependencies (no cycles!) │ │ └─ Order by dependency (foundations first) │ │ └─ Add risks for complexity_score ≥ 7 │ +│ └─ CODE CHANGES ONLY: subtasks must produce code diffs. │ +│ Do NOT create operational subtasks (rollback plans, │ +│ integration test plans, deployment docs). These belong │ +│ in the plan's Notes section, not as separate subtasks. │ │ │ │ 6. VALIDATE (run checklist) │ │ └─ Circular dependency check (must be acyclic DAG) │ @@ -278,7 +282,7 @@ Subtasks should be ordered by dependency: ### Acceptance Criteria Section (Ralph Loop Integration) -When writing task plans to `.map/task_plan_.md`, the orchestrator generates an Acceptance Criteria section from subtask validation_criteria. The format is: +When writing task plans to `.map//task_plan_.md`, the orchestrator generates an Acceptance Criteria section from subtask validation_criteria. The format is: ```markdown ## Acceptance Criteria diff --git a/.claude/commands/map-check.md b/.claude/commands/map-check.md index 6ae6006..126947d 100644 --- a/.claude/commands/map-check.md +++ b/.claude/commands/map-check.md @@ -9,7 +9,7 @@ If no `.map//workflow_state.json` exists, run full quality suite: ```bash -BRANCH=$(git rev-parse --abbrev-ref HEAD | sed 's/\//-/g') +BRANCH=$(git rev-parse --abbrev-ref HEAD | sed -E 's|/|-|g; s|[^a-zA-Z0-9_.-]|-|g; s|-{2,}|-|g; s|^-||; s|-$||') STATE_FILE=".map/${BRANCH}/workflow_state.json" if [[ ! -f "$STATE_FILE" ]]; then @@ -109,10 +109,10 @@ If `.map//workflow_state.json` exists, verify subtask completion. Read the current state to understand what was completed: ```bash -BRANCH=$(git rev-parse --abbrev-ref HEAD | sed 's/\//-/g') +BRANCH=$(git rev-parse --abbrev-ref HEAD | sed -E 's|/|-|g; s|[^a-zA-Z0-9_.-]|-|g; s|-{2,}|-|g; s|^-||; s|-$||') STATE_FILE=".map/${BRANCH}/workflow_state.json" -cat "$STATE_FILE" +# Use Read tool to load the state file contents ``` ### Step 2: Validate All Subtasks Complete @@ -159,7 +159,7 @@ Read task_plan_.md to get acceptance criteria: ```bash PLAN_FILE=".map/${BRANCH}/task_plan_${BRANCH}.md" -cat "$PLAN_FILE" +# Use Read tool to load the plan file contents ``` ### Step 4: Call Final Verifier @@ -195,7 +195,7 @@ Even if verifier approves, run automated checks: **Tests:** ```bash -TEST_CMD=$(jq -r '.test_command // "pytest"' .claude/ralph-loop-config.json) +TEST_CMD="pytest" # Default; override if project uses different test runner echo "Running final tests..." eval "$TEST_CMD" @@ -203,7 +203,7 @@ eval "$TEST_CMD" # If tests fail and you want a durable artifact for follow-up/debugging, # re-run capturing output and parse to .map//diagnostics.json: # -# BRANCH=$(git rev-parse --abbrev-ref HEAD | sed 's/\//-/g') +# BRANCH=$(git rev-parse --abbrev-ref HEAD | sed -E 's|/|-|g; s|[^a-zA-Z0-9_.-]|-|g; s|-{2,}|-|g; s|^-||; s|-$||') # LOG_FILE=".map/${BRANCH}/tests.log" # mkdir -p ".map/${BRANCH}" # ( $TEST_CMD ) >"$LOG_FILE" 2>&1 @@ -218,12 +218,12 @@ fi **Linter:** ```bash -LINT_CMD=$(jq -r '.lint_command // "make lint"' .claude/ralph-loop-config.json) +LINT_CMD="make lint" # Default; override if project uses different linter echo "Running final lint..." eval "$LINT_CMD" # Optional (structured diagnostics): -# BRANCH=$(git rev-parse --abbrev-ref HEAD | sed 's/\//-/g') +# BRANCH=$(git rev-parse --abbrev-ref HEAD | sed -E 's|/|-|g; s|[^a-zA-Z0-9_.-]|-|g; s|-{2,}|-|g; s|^-||; s|-$||') # LOG_FILE=".map/${BRANCH}/lint.log" # mkdir -p ".map/${BRANCH}" # ( $LINT_CMD ) >"$LOG_FILE" 2>&1 diff --git a/.claude/commands/map-debate.md b/.claude/commands/map-debate.md index 00321a8..f5d60ee 100644 --- a/.claude/commands/map-debate.md +++ b/.claude/commands/map-debate.md @@ -303,20 +303,18 @@ retry_context = { If Monitor returns `escalation_required === true`, ask user: ``` -AskUserQuestion( - questions: [ - { - header: "Escalation Required", - question: "⚠️ Human review requested by Monitor.\n\nSubtask: [ST-XXX]\nReason: [escalation_reason]\nArbiter Confidence: [confidence]\n\nProceed anyway?", - multiSelect: false, - options: [ - { label: "YES - Proceed Anyway", description: "Continue (run Predictor if required, then apply changes)." }, - { label: "REVIEW - Show Details", description: "Show synthesis_reasoning + comparison_matrix, then ask again." }, - { label: "NO - Abort Subtask", description: "Do not apply changes; wait for human review." } - ] - } - ] -) +AskUserQuestion(questions=[ + { + "header": "Escalation", + "question": "Human review requested by Monitor.\n\nSubtask: [ST-XXX]\nReason: [escalation_reason]\nArbiter Confidence: [confidence]\n\nProceed anyway?", + "multiSelect": false, + "options": [ + {"label": "YES - Proceed", "description": "Continue (run Predictor if required, then apply changes)."}, + {"label": "REVIEW - Details", "description": "Show synthesis_reasoning + comparison_matrix, then ask again."}, + {"label": "NO - Abort", "description": "Do not apply changes; wait for human review."} + ] + } +]) ``` ### 2.10 Conditional Predictor @@ -389,9 +387,9 @@ If none found: mark gate as skipped and proceed. | Aspect | map-efficient | map-debate | |--------|---------------|------------| -| Variant generation | Conditional (Self-MoA check) | Always | -| Synthesis agent | synthesizer (sonnet) | debate-arbiter (opus) | -| Output | conflict_resolutions | comparison_matrix + decision_rationales + synthesis_reasoning | +| Variant generation | Single variant (one Actor) | Always 3 variants | +| Synthesis agent | N/A (single Actor) | debate-arbiter (opus) | +| Output | Direct implementation | comparison_matrix + decision_rationales + synthesis_reasoning | | Cost | Lower | ~3-5x higher (opus model) | | Use case | Efficiency | Reasoning transparency | diff --git a/.claude/commands/map-debug.md b/.claude/commands/map-debug.md index 19bbfd3..588b9b3 100644 --- a/.claude/commands/map-debug.md +++ b/.claude/commands/map-debug.md @@ -129,7 +129,7 @@ Provide FULL file content for changes." ### Monitor Validation -After each fix: +After each fix (max 5 Actor->Monitor retry iterations per subtask): ``` Task( diff --git a/.claude/commands/map-efficient.md b/.claude/commands/map-efficient.md index e97f988..8994064 100644 --- a/.claude/commands/map-efficient.md +++ b/.claude/commands/map-efficient.md @@ -14,7 +14,27 @@ State machine enforces sequencing, Python validates completion, hooks inject rem 1. Execute steps in order using state machine guidance 2. Use exact `subagent_type` specified — never substitute 3. Call each agent individually — no combining or skipping -4. Max 5 retry iterations per subtask +4. Max 5 retry iterations per subtask (note: /map-fast uses max 3) +5. Agent phases (ACTOR 2.3, MONITOR 2.4, PREDICTOR 2.6) require evidence files. + Each agent writes `.map//evidence/_.json` after completing work. + `validate_step` rejects the step if evidence is missing or malformed. + +## Intentional Agent Omissions + +/map-efficient does NOT use these agents (by design): +- **Evaluator** — quality scoring not needed; Monitor validates correctness directly +- **Reflector** — lesson extraction is a separate step via `/map-learn` +- **Curator** — pattern storage is a separate step via `/map-learn` + +This is NOT a violation of MAP agent rules. Learning is decoupled into `/map-learn` (optional, run after workflow completes) to reduce token usage during execution. + +## Dual State Files + +/map-efficient uses two state files in `.map//`: +- **`step_state.json`** — Orchestrator canonical state. Tracks current step, retry counts, circuit breaker. Written/read by `map_orchestrator.py`. This is the source of truth for workflow resumption. +- **`workflow_state.json`** — Enforcement gates. Tracks subtask completion for `workflow-gate.py` hook validation. Written by `map_step_runner.py`. + +Both files must stay in sync. The orchestrator updates `step_state.json` on every step; `workflow_state.json` is updated at phase boundaries (INIT_STATE, UPDATE_STATE). ## Architecture Overview @@ -25,7 +45,7 @@ State machine enforces sequencing, Python validates completion, hooks inject rem └─────────────────────────────────────────────────────────────┘ ↓ ┌─────────────────────────────────────────────────────────────┐ -│ map-efficient.md (THIS FILE - ~150 lines) │ +│ map-efficient.md (THIS FILE - ~540 lines) │ │ 1. Load state → Get next step instruction │ │ 2. Route to appropriate executor based on step phase │ │ 3. Execute step (Actor/Monitor/mem0/tests/etc) │ @@ -41,6 +61,21 @@ State machine enforces sequencing, Python validates completion, hooks inject rem **Task:** $ARGUMENTS +## Step 0: Detect Existing Plan from /map-plan + +Before starting the state machine, check if `/map-plan` already produced artifacts for this branch: + +```bash +BRANCH=$(git rev-parse --abbrev-ref HEAD | sed -E 's|/|-|g; s|[^a-zA-Z0-9_.-]|-|g; s|-{2,}|-|g; s|^-||; s|-$||') +if [ -f ".map/${BRANCH}/task_plan_${BRANCH}.md" ] && [ ! -f ".map/${BRANCH}/step_state.json" ]; then + # Plan exists but execution hasn't started — resume from plan + # step_state.json is the orchestrator's canonical state (see "Dual State Files" above) + python3 .map/scripts/map_orchestrator.py resume_from_plan +fi +``` + +If `resume_from_plan` succeeds, the orchestrator skips DECOMPOSE, INIT_PLAN, and REVIEW_PLAN (the plan was already approved in /map-plan) and starts from CHOOSE_MODE. + ## Step 1: Get Next Step Instruction ```bash @@ -53,7 +88,7 @@ IS_COMPLETE=$(echo "$NEXT_STEP" | jq -r '.is_complete') # Check if workflow complete if [ "$IS_COMPLETE" = "true" ]; then - echo "✅ All subtasks complete. Running final verification..." + echo "All subtasks complete. Running final verification..." # Go to Step 3: Final Verification fi ``` @@ -89,23 +124,26 @@ Purpose: Actor compiles this line into code. Monitor verifies against it. This eliminates reasoning overhead — the contract IS the specification.""" ) -# After decomposer returns: extract subtask sequence + aag_contracts, save to state -# Update state: python3 .map/scripts/map_orchestrator.py validate_step "1.0" +# After decomposer returns: +# 1. Extract subtask IDs from blueprint and register them in state: +# python3 .map/scripts/map_orchestrator.py set_subtasks ST-001 ST-002 ST-003 +# 2. Validate step completion: +# python3 .map/scripts/map_orchestrator.py validate_step "1.0" ``` ### Phase: INIT_PLAN (1.5) -Generate `.map/task_plan_.md` from blueprint: +Generate `.map//task_plan_.md` from blueprint: - Header: Goal from blueprint.summary -- For each subtask: ## ST-XXX section with **Status:** pending -- First subtask: **Status:** in_progress -- Terminal State: **Status:** pending +- For each subtask: ### ST-XXX section with `- **Status:** pending` +- First subtask: `- **Status:** in_progress` +- Terminal State: `- **Status:** pending` ### Phase: REVIEW_PLAN (1.55) Present the generated plan and require explicit user approval before any execution state is initialized. -1. Read the plan: `.map/task_plan_.md` +1. Read the plan: `.map//task_plan_.md` 2. Show a short summary in this format: ```text @@ -121,10 +159,10 @@ Notes: ═══════════════════════════════════════════════════ ``` -3. Ask for approval using AskUserQuestionTool (example): +3. Ask for approval using AskUserQuestion (example): ``` -AskUserQuestionTool(questions=[ +AskUserQuestion(questions=[ { "question": "Approve this plan and start execution?", "header": "Plan approval", @@ -163,20 +201,20 @@ Note: In `batch` mode the orchestrator auto-skips the pause step (2.11). ### Phase: INIT_STATE (1.6) -```bash -# Create workflow_state.json -BRANCH=$(git rev-parse --abbrev-ref HEAD | sed 's/\//-/g') -cat > .map/${BRANCH}/workflow_state.json <<'EOF' +Get the branch name via Bash: `git rev-parse --abbrev-ref HEAD | sed -E 's|/|-|g; s|[^a-zA-Z0-9_.-]|-|g; s|-{2,}|-|g; s|^-||; s|-$||'` + +Then use the **Write** tool to create `.map//workflow_state.json`: + +```json { "workflow": "map-efficient", - "started_at": "$(date -u +%Y-%m-%dT%H:%M:%SZ)", + "started_at": "", "current_subtask": null, "current_state": "INITIALIZED", "completed_steps": {}, "pending_steps": {}, "subtask_sequence": [] } -EOF ``` ### Phase: XML_PACKET (2.0) @@ -199,9 +237,9 @@ xml_packet = create_xml_packet(subtask) # Tiered search: branch → project → org mcp__mem0__map_tiered_search( query="[subtask description]", - top_k=5, - user_id="[branch_name]", - agent_id="map-efficient" + limit=5, + user_id="org:[org_name]", + run_id="proj:[project_name]:branch:[branch_name]" ) # Re-rank by relevance, pass top 3 to Actor @@ -219,7 +257,7 @@ if requires_research(subtask): File patterns: [relevant globs] Intent: locate Max tokens: 1500 -Findings file: .map/findings_{branch}.md +Findings file: .map/{branch}/findings_{branch}.md DISTILLATION RULE: Write ONLY actionable findings to the file: - file paths + line ranges + function signatures @@ -241,7 +279,7 @@ Task( [paste from .map//current_packet.xml] - + [top context_patterns from mem0 + relevance_score] @@ -299,8 +337,8 @@ if monitor_output["valid"] == false: # Go back to Phase: ACTOR with Monitor feedback # Actor will fix issues and re-apply code else: - # Escalate to user (3-strike protocol) - AskUserQuestion: CONTINUE / SKIP / ABORT + # Escalate to user (retry limit reached) + AskUserQuestion(questions=[{"question": "Monitor retry limit reached. How to proceed?", "header": "Retry limit", "options": [{"label": "Continue", "description": "Reset retry counter and try again"}, {"label": "Skip", "description": "Skip this subtask and move to next"}, {"label": "Abort", "description": "Stop workflow"}], "multiSelect": false}]) ``` ### Phase: PREDICTOR (2.6) @@ -328,8 +366,8 @@ Optional: analyzer_output, user_context""" # Code already applied by Actor, validated by Monitor # Update workflow state to mark subtask progress -python3 scripts/map_step_runner.py update_workflow_state "ST-XXX" "validated" "VALIDATED" -python3 scripts/map_step_runner.py update_plan_status "ST-XXX" "in_progress" +python3 .map/scripts/map_step_runner.py update_workflow_state "ST-XXX" "validated" "VALIDATED" +python3 .map/scripts/map_step_runner.py update_plan_status "ST-XXX" "in_progress" ``` ### Phase: TESTS_GATE (2.8) @@ -407,7 +445,7 @@ Only used when execution_mode is `step_by_step`. - Ask the user whether to continue to the next subtask. - If execution_mode is `batch`, the orchestrator auto-skips this step. -## Step 2.5: Validate Step Completion +## Step 2a: Validate Step Completion After executing step, validate and update state: @@ -417,11 +455,11 @@ python3 .map/scripts/map_orchestrator.py validate_step "$STEP_ID" # Update plan status if subtask complete if [ "$PHASE" = "VERIFY_ADHERENCE" ]; then - python3 scripts/map_step_runner.py update_plan_status "$SUBTASK_ID" "complete" + python3 .map/scripts/map_step_runner.py update_plan_status "$SUBTASK_ID" "complete" fi ``` -## Step 2.6: Continue or Complete (Context Distillation) +## Step 2b: Continue or Complete (Context Distillation) ```bash # Get next step @@ -445,7 +483,7 @@ else # Recurse: Launch new context with minimal state transfer echo "Next step: $(echo "$NEXT_STEP" | jq -r '.step_id')" - # Continue with Step 1 (fresh invocation via map-efficient-step) + # Continue with Step 1 (loop back to get_next_step, or use /map-resume in a fresh session) fi ``` @@ -462,7 +500,8 @@ TOOL_COUNT=$(echo "$CB_DATA" | jq -r '.tool_count') MAX_ITERATIONS=$(echo "$CB_DATA" | jq -r '.max_iterations') if [ "$TOOL_COUNT" -ge "$MAX_ITERATIONS" ]; then - AskUserQuestion: "Circuit breaker triggered. RESET_LIMITS or ABORT?" + # Ask user how to proceed + AskUserQuestion(questions=[{"question": "Circuit breaker triggered. How to proceed?", "header": "Circuit breaker", "options": [{"label": "Reset limits", "description": "Reset counters and continue workflow"}, {"label": "Abort", "description": "Stop workflow immediately"}], "multiSelect": false}]) fi ``` @@ -499,16 +538,21 @@ if verification["passed"] and verification["confidence"] >= 0.7: update_terminal_state("complete") print("✅ Workflow complete! Optional: Run /map-learn to preserve patterns.") -elif thrashing_detected(): - AskUserQuestion: "Thrashing detected. FORCE_COMPLETE / CONTINUE / ABORT?" +# NOTE: The conditions below are pseudocode representing orchestrator-level +# logic. The actual implementation uses check_circuit_breaker and retry_count +# from step_state.json to detect these conditions. + +elif verification["retry_count"] > verification["max_retries"]: + # Thrashing detected - too many retries without progress + AskUserQuestion(questions=[{"question": "Thrashing detected (repeated failures). How to proceed?", "header": "Thrashing", "options": [{"label": "Force complete", "description": "Mark as complete despite failures"}, {"label": "Continue", "description": "Reset retry counter and try again"}, {"label": "Abort", "description": "Stop workflow"}], "multiSelect": false}]) -elif plan_iteration < max_redecompositions: - # Re-decomposition - Task(subagent_type="task-decomposer", mode="re_decomposition", ...) +elif check_circuit_breaker()["triggered"] == false: + # Re-decomposition: break remaining work into new subtasks + Task(subagent_type="task-decomposer", description="Re-decompose remaining work", prompt="...") else: # Max iterations reached - AskUserQuestion: "Max iterations reached. RESET_LIMITS / ABORT?" + AskUserQuestion(questions=[{"question": "Max iterations reached. How to proceed?", "header": "Max iterations", "options": [{"label": "Reset limits", "description": "Reset counters and continue"}, {"label": "Abort", "description": "Stop workflow"}], "multiSelect": false}]) ``` ## Step 4: Summary diff --git a/.claude/commands/map-efficient.md.backup b/.claude/commands/map-efficient.md.backup deleted file mode 100644 index aff2654..0000000 --- a/.claude/commands/map-efficient.md.backup +++ /dev/null @@ -1,994 +0,0 @@ ---- -description: Token-efficient MAP workflow with conditional optimizations ---- - -# MAP Efficient Workflow - -## Execution Rules - -1. Execute steps in order without pausing; only ask user if (a) `task-decomposer` returns blocking `analysis.open_questions` with no subtasks OR (b) Monitor sets `escalation_required === true` (sub-steps explicitly marked "parallel" may run concurrently) -2. Use exact `subagent_type` specified — never substitute `general-purpose` -3. Call each agent individually — no combining or skipping steps -4. Max 5 retry iterations per subtask - -## ⛔ WORKFLOW ENFORCEMENT (Read Every Subtask) - -**CRITICAL ANTI-DRIFT RULE:** - -Before writing ANY implementation code, you MUST verify: - -```text -┌─────────────────────────────────────────────────────────────────┐ -│ ⚠️ SELF-CHECK: Am I about to write code myself? │ -│ │ -│ If YES → STOP! You are violating workflow. │ -│ Use Task(subagent_type="actor") instead. │ -│ │ -│ If calling Task tool → Continue. │ -└─────────────────────────────────────────────────────────────────┘ -``` - -**BEFORE each Agent call, output this checkpoint:** -```text -CHECKPOINT: Calling [agent_name] for ST-XXX -``` - -**VIOLATION INDICATORS (If you see yourself doing these, STOP):** -- Writing code blocks without calling Actor first -- Describing implementation approach without Actor -- Saying "Let me implement..." without Task tool -- Writing function/class definitions directly - -**CORRECT PATTERN:** -1. Output: `CHECKPOINT: Calling actor for ST-001` -2. Call: `Task(subagent_type="actor", ...)` -3. Wait for Actor output -4. Output: `CHECKPOINT: Calling monitor for ST-001` -5. Call: `Task(subagent_type="monitor", ...)` -6. Wait for Monitor output - -**Task:** $ARGUMENTS - -## Workflow Overview - -```text -1. DECOMPOSE → task-decomposer -1.5. INIT PLANNING → generate .map/task_plan_.md from blueprint -2. FOR each subtask: - a. CONTEXT → mem0 tiered search (Actor will run `mcp__mem0__map_tiered_search` per protocol; orchestrator MAY run additional mem0 searches to augment context) - b. RESEARCH → if existing code understanding needed - c. IF Self-MoA (--self-moa OR risk_level:high OR complexity_score>=7 OR security_critical:true): - → 3 Actors (security/performance/simplicity) - → 3 Monitors → Synthesizer → Final Monitor - ELSE: - → Actor → Monitor - d. If invalid: retry with feedback (max 5) - e. If risk_level ∈ {high, medium} OR escalation_required === true: → Predictor - f. Apply changes -3. SUMMARY → optionally suggest /map-learn -``` - -## Step 1: Task Decomposition - -```python -Task( - subagent_type="task-decomposer", - description="Decompose task into subtasks", - prompt="Break down into ≤20 atomic subtasks and RETURN ONLY JSON matching task-decomposer schema v2.0 (schema_version, analysis, blueprint{subtasks[]}). - -Task: $ARGUMENTS - -Hard requirements: -- Use `blueprint.subtasks[].validation_criteria` (2-4 testable, verifiable outcomes) -- Use `blueprint.subtasks[].dependencies` (array of subtask IDs) and order subtasks by dependency -- Include `blueprint.subtasks[].complexity_score` (1-10) and `risk_level` (low|medium|high) -- Include `blueprint.subtasks[].security_critical` (true for auth/crypto/validation/data access) -- Include `blueprint.subtasks[].test_strategy` with unit/integration/e2e keys" -) -``` - -## Step 1.5: Initialize Planning Session - -**REQUIRED**: Generate persistent plan file from task-decomposer blueprint. - -```bash -# 1. Create .map/ directory and planning files -.claude/skills/map-planning/scripts/init-session.sh -``` - -```bash -# 2. Generate task_plan from blueprint JSON -# Get branch-scoped plan path -PLAN_PATH=$(.claude/skills/map-planning/scripts/get-plan-path.sh) - -# Write plan content from blueprint: -# - Header: blueprint.summary as Goal -# - For each subtask: ## ST-XXX section with **Status:** pending -# - First subtask: **Status:** in_progress -# - Terminal State: **Status:** pending -``` - -**Plan file format** (`.map/task_plan_.md`): - -```markdown -# Task Plan: [blueprint.summary] - -## Goal -[blueprint.summary] - -## Current Phase -ST-001 - -## Phases - -### ST-001: [subtask.title] -**Status:** in_progress -Risk: [risk_level] -Complexity: [complexity_score] -Files: [affected_files] - -Validation: -- [ ] [validation_criteria[0]] -- [ ] [validation_criteria[1]] - -### ST-002: [subtask.title] -**Status:** pending -... - -## Terminal State -**Status:** pending -``` - -**Why required:** -- Enables resumption after context reset -- Prevents goal drift in long workflows -- Provides explicit state tracking for orchestrator - -## Step 1.6: Initialize Workflow State - -**REQUIRED**: Create workflow state tracking file for enforcement. - -```bash -# Get branch name (sanitized) -BRANCH=$(git rev-parse --abbrev-ref HEAD | sed 's/\//-/g') - -# Create workflow state file -cat > .map/${BRANCH}/workflow_state.json <<'EOF' -{ - "workflow": "map-efficient", - "started_at": "$(date -u +%Y-%m-%dT%H:%M:%SZ)", - "current_subtask": null, - "current_state": "INITIALIZED", - "completed_steps": {}, - "pending_steps": {}, - "subtask_sequence": [] -} -EOF -``` - -**State file schema** (`.map//workflow_state.json`): - -```json -{ - "workflow": "map-efficient", - "started_at": "2026-01-27T10:30:00Z", - "current_subtask": "ST-001", - "current_state": "ACTOR_CALLED", - "completed_steps": { - "ST-001": ["xml_packet", "mem0_search", "actor"] - }, - "pending_steps": { - "ST-001": ["monitor", "predictor", "tests", "linter"], - "ST-002": ["xml_packet", "mem0_search", "research", "actor", "monitor", "tests", "linter"] - }, - "subtask_sequence": ["ST-001", "ST-002", "ST-003"] -} -``` - -**Valid states:** -- `INITIALIZED` - Workflow started, no subtask active -- `XML_PACKET_CREATED` - AI packet created for subtask -- `CONTEXT_LOADED` - mem0 search completed -- `RESEARCH_DONE` - Research agent completed -- `ACTOR_CALLED` - Actor generated implementation -- `MONITOR_PASSED` - Monitor validated changes -- `PREDICTOR_ANALYZED` - Predictor assessed impact -- `TESTS_PASSED` - Test gate passed -- `LINTER_PASSED` - Linter gate passed -- `SUBTASK_COMPLETE` - Subtask fully done - -**Why required:** -- Enables workflow-gate.py hook enforcement (blocks Edit without actor+monitor) -- Provides explicit state tracking for resumption -- Makes workflow adherence visible and verifiable -- Prevents step-skipping through filesystem-based enforcement - -## Step 2: Subtask Loop - -**Before each subtask**: Read current plan to prevent goal drift: -```bash -PLAN_PATH=$(.claude/skills/map-planning/scripts/get-plan-path.sh) -# Read Goal and current in_progress phase from $PLAN_PATH -``` - -**⚠️ CRITICAL: State Tracking Protocol** - -After EVERY workflow step completion, you MUST update workflow_state.json using this pattern: - -```python -import json -from pathlib import Path - -# Load state -branch = subprocess.run(['git', 'rev-parse', '--abbrev-ref', 'HEAD'], - capture_output=True, text=True).stdout.strip().replace('/', '-') -state_file = Path(f".map/{branch}/workflow_state.json") -state = json.loads(state_file.read_text()) - -# Update for current subtask -subtask_id = state["current_subtask"] -state["completed_steps"][subtask_id].append("[step_name]") # e.g., "actor", "monitor" -state["current_state"] = "[NEW_STATE]" # e.g., "ACTOR_CALLED", "MONITOR_PASSED" - -# Write back -state_file.write_text(json.dumps(state, indent=2)) -``` - -**Required state updates:** -- After 2.0 (XML Packet): append "xml_packet", state="XML_PACKET_CREATED" -- After 2.1 (mem0 search): append "mem0_search", state="CONTEXT_LOADED" -- After 2.2 (Research): append "research", state="RESEARCH_DONE" -- After 2.4 (Actor): append "actor", state="ACTOR_CALLED" -- After 2.5 (Monitor): append "monitor", state="MONITOR_PASSED" -- After 2.6 (Predictor): append "predictor", state="PREDICTOR_ANALYZED" -- After 2.8 (Tests): append "tests", state="TESTS_PASSED" -- After 2.9 (Linter): append "linter", state="LINTER_PASSED" - -**Enforcement:** workflow-gate.py hook will BLOCK Edit/Write until "actor" AND "monitor" are in completed_steps. - -**⚠️ MANDATORY: Checkpoint Output Protocol** - -Before EVERY agent call or tool use that modifies state, you MUST output this checkpoint block: - -``` -═══════════════════════════════════════════════════ -WORKFLOW CHECKPOINT: [subtask_id] - [step_name] -═══════════════════════════════════════════════════ -Current Subtask: [subtask_id] -Current State: [state from workflow_state.json] - -Step Checklist: -□ Task Decomposition: [DONE/SKIPPED - reason] -□ XML Packet: [DONE/SKIPPED - reason] -□ mem0 Search: [DONE/SKIPPED - reason] -□ Research Agent: [DONE/SKIPPED - reason if 3+ files] -□ Actor Call: [DONE/SKIPPED - reason] -□ Monitor Validation: [DONE/SKIPPED - reason] -□ Predictor Analysis: [DONE/SKIPPED - reason if medium/high risk] -□ Tests Gate: [DONE/SKIPPED - reason] -□ Linter Gate: [DONE/SKIPPED - reason] - -About to: [description of next action] - -⚠️ SELF-VERIFICATION: -- Have I completed all required prior steps? -- If skipping ANY step: is there a VALID reason documented above? -- Am I following workflow, not just implementing solution directly? - -If any required step is SKIPPED without valid reason: STOP and fix. -═══════════════════════════════════════════════════ -``` - -**Valid skip reasons:** -- "Step not applicable for this subtask" (e.g., Research for 1-file change) -- "Already completed in previous iteration" -- "Dependency not met yet" - -**Invalid skip reasons:** -- "I can do it myself" (use agents, don't bypass) -- "Too slow" (workflow > speed) -- "Seems redundant" (all steps required) - -### 2.0 Build AI-Friendly Subtask Packet (XML Anchors) - -Before calling any agents for the subtask, build a single **AI Packet** with unique XML-like tags (NO attributes). - -**Rule:** Use the subtask ID as the anchor name. Convert `-` to `_` for XML tag safety: -- `ST-001` → `ST_001` - -**AI Packet template:** - -```xml - - ST-001 - ... - ... - low|medium|high - true|false - 1-10 - - path1;path2;... - ... - ... - ... - - ... - ... - -``` - -Pass this packet verbatim to Actor/Monitor/Predictor/Synthesizer. Do NOT rename tags mid-flow. - -### 2.1 Get Context + Re-rank - -```bash -# Optional prefetch: patterns from mem0 (branch → project → org) -# (Actor will still run its own `mcp__mem0__map_tiered_search` per protocol) -mcp__mem0__map_tiered_search(query="[subtask description]", top_k=5) -``` - -**Re-rank retrieved patterns** by relevance to current subtask: - -```text -FOR each pattern in retrieved_patterns: - relevance_score = evaluate: - - Domain match: Does pattern's domain match subtask? (+2) - - Technology overlap: Same language/framework? (+1) - - Recency: Created within 30 days? (+1) - - Success indicator: Marked validated/production? (+1) - - Complexity alignment: Similar complexity_score? (+1) - - SORT patterns by relevance_score DESC - PASS top 3 patterns to Actor as "context_patterns" -``` - -Pass `context_patterns` with relevance scores to Actor for informed decision-making. - -### 2.2 Research (Conditional) - -**Call if:** refactoring, bug fixes, extending existing code, touching 3+ files -**Skip for:** new standalone features, docs, config - -```bash -# Get findings file path for map-planning integration -FINDINGS_PATH=$(.claude/skills/map-planning/scripts/get-plan-path.sh | sed 's/task_plan/findings/') -``` - -```python -Task( - subagent_type="research-agent", - description="Research for subtask [ID]", - prompt="Query: [subtask description] -File patterns: [relevant globs] -Symbols: [optional keywords] -Intent: locate -Max tokens: 1500 -Findings file: [FINDINGS_PATH]" -) -``` - -Pass `executive_summary` to Actor if `confidence >= 0.7`. - -### 2.3 Self-MoA Check - -```python -self_moa_enabled = ( - "--self-moa" in user_command OR - subtask.risk_level == "high" OR - subtask.security_critical == true OR - subtask.complexity_score >= 7 -) -``` - -**If Self-MoA enabled:** Execute Self-MoA Path -**Else:** Execute Standard Path - ---- - -## Self-MoA Path - -### 2.3a Parallel Actors - -Call 3 Actors in parallel with different focuses: - -```python -# Variant 1: Security Focus -Task( - subagent_type="actor", - description="Implement subtask [ID] - Security (v1)", - prompt="Implement with SECURITY focus: -**AI Packet (XML):** [paste ...] -**Playbook Context:** [top context_patterns + relevance_score] -approach_focus: security, variant_id: v1, self_moa_mode: true -Follow the Actor agent protocol output format. Ensure `decisions_made` is included for Synthesizer." -) - -# Variant 2: Performance Focus -Task( - subagent_type="actor", - description="Implement subtask [ID] - Performance (v2)", - prompt="Implement with PERFORMANCE focus: -**AI Packet (XML):** [paste ...] -**Playbook Context:** [top context_patterns + relevance_score] -approach_focus: performance, variant_id: v2, self_moa_mode: true -Follow the Actor agent protocol output format. Ensure `decisions_made` is included for Synthesizer." -) - -# Variant 3: Simplicity Focus -Task( - subagent_type="actor", - description="Implement subtask [ID] - Simplicity (v3)", - prompt="Implement with SIMPLICITY focus: -**AI Packet (XML):** [paste ...] -**Playbook Context:** [top context_patterns + relevance_score] -approach_focus: simplicity, variant_id: v3, self_moa_mode: true -Follow the Actor agent protocol output format. Ensure `decisions_made` is included for Synthesizer." -) -``` - -### 2.3b Parallel Monitors - -Validate each variant: - -```python -Task( - subagent_type="monitor", - description="Validate v1", - prompt="Review variant v1 against requirements: -**AI Packet (XML):** [paste ...] -**Proposed Solution:** [paste v1 Actor output] -**Specification Contract (optional):** [SpecificationContract JSON or null] -variant_id: v1, self_moa_mode: true - -Return ONLY valid JSON following MonitorReviewOutput schema. -When in Self-MoA mode, include extension fields: variant_id, self_moa_mode, decisions_identified, compatibility_features, strengths, weaknesses, recommended_as_base. -If `validation_criteria` present: include `contract_compliance` + `contract_compliant`. -If a SpecificationContract is provided: include `spec_contract_compliant` + `spec_contract_violations`." -) -``` - -### 2.3c Synthesizer - -```python -Task( - subagent_type="synthesizer", - description="Synthesize best implementation", - prompt="Combine best parts from v1, v2, v3: - -**AI Packet (XML):** [paste ...] -**Variants (raw Actor outputs):** - -[paste v1 Actor output] - - -[paste v2 Actor output] - - -[paste v3 Actor output] - -**Monitor Results (MonitorReviewOutput JSON):** - -[paste v1 Monitor output JSON] - - -[paste v2 Monitor output JSON] - - -[paste v3 Monitor output JSON] - -**Specification Contract (optional):** [SpecificationContract JSON or null] -**Priority Policy:** [\"correctness\", \"security\", \"maintainability\", \"performance\"] - -Return ONLY valid JSON following SynthesizerOutput schema." -) -``` - -### 2.3d Final Monitor - -Validate synthesized code. If invalid: retry synthesis (max 2 iterations). - ---- - -## Standard Path - -```text -┌──────────────────────────────────────────────────────────────────┐ -│ ⚠️ REMINDER: You are the ORCHESTRATOR, not the implementer. │ -│ │ -│ DO NOT write implementation code yourself. │ -│ DO call Task(subagent_type="actor") to get implementation. │ -│ │ -│ This reminder appears because drift commonly occurs here. │ -└──────────────────────────────────────────────────────────────────┘ -``` - -### 2.3 Actor - -**PRE-STEP:** Output `CHECKPOINT: Calling actor for ST-XXX` - -```python -Task( - subagent_type="actor", - description="Implement subtask [ID]", - prompt="Implement: -**AI Packet (XML):** [paste ...] -**Risk Level:** [risk_level] -**Playbook Context:** [top context_patterns + relevance_score] - -Follow the Actor agent protocol output format." -) -``` - -### 2.4 Monitor (with Contract Validation) - -**PRE-STEP:** Output `CHECKPOINT: Calling monitor for ST-XXX` - -```python -Task( - subagent_type="monitor", - description="Validate implementation", - prompt="Review against requirements: -**AI Packet (XML):** [paste ...] -**Proposed Solution:** [paste Actor output] -**Specification Contract (optional):** [SpecificationContract JSON or null] - -Check: correctness, security, standards, tests. -If human review is required, set `escalation_required` + `escalation_reason` (per Monitor escalation protocol). - -**Contract Validation**: Verify each validation_criterion as testable contract. - -Return ONLY valid JSON following MonitorReviewOutput schema. -If validation_criteria present, include contract_compliance + contract_compliant fields." -) -``` - -### 2.5 Retry Loop (3-Strike Protocol) - -**⚠️ ANTI-DRIFT CHECKPOINT:** On retry, you MUST still call Task(actor), NOT implement yourself! - -If `valid === false`: provide feedback, retry Actor (max 5 iterations). - -```text -┌─────────────────────────────────────────────────────────────────────────────┐ -│ ⛔ CRITICAL: NEVER APPLY CHANGES WHEN valid === false │ -│ │ -│ Even if contract_compliant === true, you MUST NOT apply changes. │ -│ Even if "most issues are minor", you MUST NOT apply changes. │ -│ Even if you think "I'll note issues for later", you MUST NOT apply. │ -│ │ -│ The ONLY condition for applying changes: valid === true │ -│ │ -│ If valid === false → retry Actor with Monitor feedback │ -│ If 5 retries exhausted → escalate to user, do NOT apply partial solution │ -└─────────────────────────────────────────────────────────────────────────────┘ -``` - -**3-Strike Protocol** (for persistent failures): - -```bash -# Get progress file path -PROGRESS_PATH=$(.claude/skills/map-planning/scripts/get-plan-path.sh | sed 's/task_plan/progress/') -``` - -```python -FOR attempt = 1 to 5: - IF attempt >= 3: - # Log to progress file - Append to PROGRESS_PATH: - | Timestamp | Subtask | Attempt | Error | Resolution | - |-----------|---------|---------|-------|------------| - | [ISO-8601] | [ST-XXX] | [attempt] | [Monitor feedback summary] | [pending] | - - Call Actor with Monitor feedback - Call Monitor to validate - - IF valid === true: - Update progress log: Resolution = "Fixed on attempt [N]" - BREAK - - IF attempt === 3: - # Escalate after 3 failed attempts - AskUserQuestion( - questions: [{ - header: "3-Strike Limit", - question: "Subtask [ST-XXX] failed 3 attempts.\n\nLast error: [Monitor feedback]\n\nHow to proceed?", - multiSelect: false, - options: [ - { label: "CONTINUE", description: "Try 2 more attempts (max 5 total)" }, - { label: "SKIP", description: "Mark subtask as blocked, move to next" }, - { label: "ABORT", description: "Stop workflow, await manual fix" } - ] - }] - ) - - IF user selects "SKIP": - Update task_plan: **Status:** blocked - Update progress log: Resolution = "Marked blocked after 3 attempts" - CONTINUE to next subtask - - IF user selects "ABORT": - Update task_plan: **Status:** blocked - Update Terminal State: **Status:** blocked - EXIT workflow -``` - -### 2.5b Escalation Gate (AskUserQuestion) - -If Monitor returns `escalation_required === true`, you MUST ask user for confirmation before proceeding (Predictor and/or Apply). - -```python -AskUserQuestion( - questions: [ - { - header: "Escalation Required", - question: "⚠️ Human review requested by Monitor.\n\nSubtask: [ST-XXX]\nReason: [escalation_reason]\n\nProceed anyway?", - multiSelect: false, - options: [ - { label: "YES - Proceed Anyway", description: "Continue (run Predictor if required, then apply changes)." }, - { label: "REVIEW - Show Details", description: "Show Actor output + Monitor JSON + affected files, then ask again." }, - { label: "NO - Abort Subtask", description: "Do not apply changes; wait for human review." } - ] - } - ] -) -``` - -### 2.6 Conditional Predictor - -**Call if:** `risk_level ∈ {high, medium}` OR `escalation_required === true` - -```python -Task( - subagent_type="predictor", - description="Analyze impact", - prompt="Analyze impact using Predictor input schema. - -**AI Packet (XML):** [paste ...] - -Required inputs: -- change_description: [1-3 sentence summary of what the Actor change does] -- files_changed: [list of paths inferred from Actor output OR actual modified files] -- diff_content: [unified diff; if not available pre-apply, provide best-effort diff derived from proposed changes, and cap confidence] - -Optional inputs: -- analyzer_output: [Actor output] -- user_context: [subtask requirements + risk trigger] - -Return ONLY valid JSON following Predictor schema." -) -``` - -### 2.7 Apply Changes - -**GATE CHECK (mandatory before applying):** -```text -IF Monitor.valid !== true: - → DO NOT PROCEED. Return to Actor with feedback. - → This is a HARD BLOCK, not a suggestion. -``` - -Apply via Write/Edit tools. - -### 2.7.1 Update Plan Status - -After Monitor returns `valid === true`: - -```text -1. Read current task_plan from PLAN_PATH -2. Update current subtask: **Status:** in_progress → **Status:** complete -3. Check validation criteria checkboxes [x] -4. Set next pending subtask to **Status:** in_progress -5. Update "Current Phase" to next subtask ID -``` - -Proceed to next subtask. - -### 2.8 Gate 2: Tests Available / Run - -After applying changes for a subtask, run tests if available (do NOT install dependencies during this gate). - -**Prefer** the commands implied by ``. Otherwise: -- If `pytest` project: run `pytest` (or targeted tests if known) -- If `package.json` present: run `npm test` / `pnpm test` / `yarn test` (whichever is used in repo) -- If `go.mod` present: run `go test ./...` -- If `Cargo.toml` present: run `cargo test` - -If no tests found: mark gate as skipped and proceed. - -### 2.9 Gate 3: Formatter / Linter - -After tests gate, run formatter/linter checks if available (do NOT install dependencies during this gate). - -Prefer repo-standard commands first (e.g., `make lint`, `make fmt`, `make check`). Otherwise: -- Python: `ruff check`, `black --check`, `mypy` (if configured) -- JS/TS: `eslint`, `prettier -c` (if configured) -- Go: `gofmt` check + `golangci-lint run` (if configured) -- Rust: `cargo fmt --check`, `cargo clippy` - -If none found: mark gate as skipped and proceed. - ---- - -## Step 2.10: Self-Verification Before Completion - -**⚠️ CRITICAL: Workflow Adherence Check** - -Before proceeding to Step 3, you MUST verify you followed the complete workflow: - -``` -═══════════════════════════════════════════════════ -WORKFLOW ADHERENCE SELF-AUDIT -═══════════════════════════════════════════════════ - -Question 1: Did I call task-decomposer for decomposition? -Answer: [YES/NO - if NO, explain why not] - -Question 2: For EACH subtask, did I: - - Create XML packet? [YES/NO per subtask] - - Call mem0 search? [YES/NO per subtask] - - Call research-agent if 3+ files? [YES/NO/N/A per subtask] - - Call Actor agent? [YES/NO per subtask] - - Call Monitor agent after Actor? [YES/NO per subtask] - - Call Predictor if medium/high risk? [YES/NO/N/A per subtask] - - Run tests gate? [YES/NO per subtask] - - Run linter gate? [YES/NO per subtask] -Answer: [List each subtask and answers] - -Question 3: Did I ever write code directly without Actor? -Answer: [YES/NO - if YES, this is a VIOLATION] - -Question 4: Did I ever skip Monitor after Actor? -Answer: [YES/NO - if YES, this is a VIOLATION] - -Question 5: Did I output CHECKPOINT blocks before agent calls? -Answer: [YES/NO - if NO, add them now] - -Question 6: Did I update workflow_state.json after each step? -Answer: [YES/NO - if NO, update now] - -EVALUATION CRITERIA: -✅ PASSED: All required steps completed, valid reasons for any skips -❌ FAILED: Missing required steps without valid reasons - -If FAILED: DO NOT PROCEED. Go back and complete missing steps. - -⚠️ REMINDER: You are evaluated on WORKFLOW ADHERENCE, not just -output quality. A perfect solution that skipped steps is WORSE -than an imperfect solution that followed all steps. -═══════════════════════════════════════════════════ -``` - -**Action Required:** -- If self-audit PASSED: Proceed to Step 3 -- If self-audit FAILED: Return to Step 2 and complete missing steps - ---- - -## Step 3: Pre-Completion Checks - -- Run tests if applicable -- Verify all subtasks marked complete in task_plan -- Confirm workflow_state.json shows all subtasks in "completed_steps" - ---- - -## Step 3.5: Final Verification (Ralph Loop) - -**REQUIRED**: After all subtasks complete, verify the ENTIRE task goal is achieved before marking as complete. - -### 3.5a Circuit Breaker Check - -```python -# Circuit breaker check MUST be concrete and self-contained (no mapify_cli imports). -# Use only: -# - `.claude/ralph-loop-config.json` (single source of truth) -# - `.map//.tool_history.jsonl` (canonical tool call count) -# - `.map//ralph_state.json` (started_at / plan_iteration) - -# 1) Determine sanitized branch name (same sanitizer as hooks) -branch = Bash("python3 - <<'PY'\nimport re, subprocess\n\ntry:\n raw = subprocess.run(['git','rev-parse','--abbrev-ref','HEAD'], capture_output=True, text=True).stdout.strip()\nexcept Exception:\n raw = 'default'\n\ns = raw.replace('/', '-')\ns = re.sub(r'[^a-zA-Z0-9_.-]', '-', s)\ns = re.sub(r'-+', '-', s).strip('-')\nif '..' in s or s.startswith('.'): s = 'default'\nprint(s or 'default')\nPY").strip() - -# 2) Compute limits + counters (prints JSON) -cb_json = Bash(f"python3 - <<'PY'\nimport json\nfrom datetime import datetime\nfrom pathlib import Path\n\nbranch = {branch!r}\nstate_file = Path(f'.map/{branch}/ralph_state.json')\nhistory_file = Path(f'.map/{branch}/.tool_history.jsonl')\nalerts_file = Path(f'.map/{branch}/thrashing_alerts.jsonl')\nconfig_file = Path('.claude/ralph-loop-config.json')\n\ncfg = {}\nif config_file.exists():\n try:\n cfg = json.loads(config_file.read_text(encoding='utf-8'))\n except Exception:\n cfg = {}\n\nrl = cfg.get('ralph_loop', {})\ncb = rl.get('circuit_breaker', {})\nredecomp = rl.get('re_decomposition', {})\n\nmax_iterations = int(cb.get('max_total_iterations', 50))\nmax_wall = int(cb.get('max_wall_time_minutes', 60))\nmax_redecomp = int(redecomp.get('max_iterations', 3))\n\n# Ensure state exists with started_at -state = {'plan_iteration': 1}\nif state_file.exists():\n try:\n state = json.loads(state_file.read_text(encoding='utf-8'))\n except Exception:\n state = {'plan_iteration': 1}\n\nif 'started_at' not in state:\n state['started_at'] = datetime.now().isoformat()\n state_file.parent.mkdir(parents=True, exist_ok=True)\n state_file.write_text(json.dumps(state, indent=2, ensure_ascii=True), encoding='utf-8')\n\nplan_iteration = int(state.get('plan_iteration', 1))\n\n# Canonical tool call count from history JSONL -tool_count = 0\nif history_file.exists():\n try:\n tool_count = sum(1 for line in history_file.read_text(encoding='utf-8').splitlines() if line.strip())\n except Exception:\n tool_count = 0\n\n# Wall time from started_at -elapsed_minutes = 0.0\ntry:\n started = datetime.fromisoformat(state['started_at'])\n elapsed_minutes = (datetime.now() - started).total_seconds() / 60\nexcept Exception:\n elapsed_minutes = 0.0\n\n# Thrashing (hook-level): any alerts in the last window -thrashing_detected = False\nthrash_cfg = rl.get('thrashing_detection', {})\nthrash_window = int(thrash_cfg.get('window_size', 3))\ntry:\n if alerts_file.exists():\n recent = [ln for ln in alerts_file.read_text(encoding='utf-8').splitlines() if ln.strip()][-thrash_window:]\n thrashing_detected = len(recent) > 0\nexcept Exception:\n thrashing_detected = False\n\nprint(json.dumps({\n 'branch': branch,\n 'tool_count': tool_count,\n 'max_iterations': max_iterations,\n 'elapsed_minutes': elapsed_minutes,\n 'max_wall_time_minutes': max_wall,\n 'plan_iteration': plan_iteration,\n 'max_redecompositions': max_redecomp,\n 'thrashing_detected': thrashing_detected\n}, ensure_ascii=True))\nPY").strip() - -# 3) cb_json is now a JSON string. Explicit parsing happens in Step 3.5c. -# See Step 3.5c for: -# - json.loads(cb_json) to extract tool_count, max_iterations, elapsed_minutes, etc. -# - Circuit breaker limit checks -# - AskUserQuestion for RESET_LIMITS / ABORT if limits breached -``` - -### 3.5a.1 Universal Recovery on Hook Blocks - -If ANY tool call (Edit/Write/Bash) is blocked by the Ralph circuit breaker hook (exit code 2, stderr JSON includes `hookSpecificOutput` with message containing `RESET_LIMITS`), you MUST: -- AskUserQuestion with options: `RESET_LIMITS (Recommended)` / `ABORT` -- If `RESET_LIMITS`: `Write(.map//.ralph_reset_limits, "reset\n")` and retry the blocked tool ONCE -- If still blocked after retry: ABORT (do not loop) - -### 3.5b Run Final Verifier Agent - -```python -Task( - subagent_type="final-verifier", - description="Final verification of entire goal", - prompt=f"""Verify that the ORIGINAL GOAL is fully achieved. - -**Original Goal:** {original_goal_from_task_plan} -**Validation Criteria:** {validation_criteria_from_decomposition} -**Completed Subtasks:** {list_of_completed_subtask_ids} -**Branch:** {branch} - -You MUST: -1. Run available tests (pytest, npm test, etc.) -2. Check MCP tools for ground-truth if available -3. Verify integration between subtasks -4. If FAILED: Provide Root Cause Analysis JSON - -Write results to: -- .map/{branch}/final_verification.json (structured) -- .map/progress_{branch}.md (human-readable section) -""" -) -``` - -### 3.5c Evaluate Results and Decide - -```python -# STEP 1: Parse circuit breaker data from cb_json (output of Step 3.5a) -# cb_json is JSON string - parse it into usable variables -cb_data = json.loads(cb_json) - -# Extract all values explicitly (no mental parsing) -branch = cb_data["branch"] -tool_count = cb_data["tool_count"] -max_iterations = cb_data["max_iterations"] -elapsed_minutes = cb_data["elapsed_minutes"] -max_wall_time_minutes = cb_data["max_wall_time_minutes"] -plan_iteration = cb_data["plan_iteration"] -max_redecompositions = cb_data["max_redecompositions"] -thrashing_detected = cb_data["thrashing_detected"] - -# STEP 2: Check circuit breaker limits BEFORE continuing -circuit_breaker_triggered = False -circuit_breaker_reason = None - -if tool_count >= max_iterations: - circuit_breaker_triggered = True - circuit_breaker_reason = f"Tool call limit ({max_iterations}) reached" -elif elapsed_minutes >= max_wall_time_minutes: - circuit_breaker_triggered = True - circuit_breaker_reason = f"Wall time limit ({max_wall_time_minutes} min) reached" - -if circuit_breaker_triggered: - # Ask user for recovery action - user_choice = AskUserQuestion( - questions: [{ - header: "Circuit Breaker", - question: f"{circuit_breaker_reason}.\n\nHow to proceed?", - multiSelect: false, - options: [ - { label: "RESET_LIMITS", description: "(Recommended) Reset limits and continue" }, - { label: "ABORT", description: "Mark as hard_stop and exit" } - ] - }] - ) - - if user_choice == "RESET_LIMITS": - Write(file_path=f".map/{branch}/.ralph_reset_limits", content="reset\n") - # Re-run Step 3.5a to get fresh cb_json after reset - Go to Step 3.5a - else: - # ABORT - mark as hard_stop - Update Terminal State: **Status:** hard_stop - EXIT workflow - -# STEP 3: Read verification result (after circuit breaker check passes) -verification_file = Path(f".map/{branch}/final_verification.json") -verification = json.loads(verification_file.read_text()) - -# STEP 4: Decision logic with explicit variable usage -IF verification["passed"] AND verification["confidence"] >= 0.7: - # SUCCESS - Complete workflow - Update Terminal State: **Status:** complete - Generate success summary - **Optional:** Run `/map-learn` to preserve patterns - EXIT workflow - -ELSE IF thrashing_detected from cb_json is true: - # Thrashing detected - escalate - AskUserQuestion( - questions: [{ - header: "Thrashing Detected", - question: "Oscillation detected across iterations.\n\nHow to proceed?", - multiSelect: false, - options: [ - { label: "FORCE_COMPLETE", description: "Accept current state as done" }, - { label: "CONTINUE", description: "Try one more re-decomposition" }, - { label: "ABORT", description: "Stop for manual review" } - ] - }] - ) - -ELSE IF plan_iteration < max_redecompositions: - # Can retry - go to re-decomposition - Go to Step 3.5d - -ELSE: - # Max iterations reached - escalate - AskUserQuestion( - questions: [{ - header: "Max Iterations", - question: f"Reached max re-decompositions ({max_redecompositions}).\n\nRoot cause: {verification.get('root_cause', {}).get('suggested_action', 'Unknown')}\n\nHow to proceed?", - multiSelect: false, - options: [ - { label: "RESET_LIMITS", description: "Reset limits and try again" }, - { label: "ABORT", description: "Mark as blocked" } - ] - }] - ) - - IF user_choice == "RESET_LIMITS": - Write(file_path=f".map/{branch}/.ralph_reset_limits", content="reset\n") - Go to Step 3.5a -``` - -### 3.5d Re-Decomposition - -When Final Verification fails and retries remain: - -```python -# Summarize previous failure for context pruning -failure_summary = f"Iteration {plan_iteration}: Failed. Root cause: {verification['root_cause']['fix_type']}. Issues: {verification['issues'][:3]}" - -Task( - subagent_type="task-decomposer", - description="Re-decompose after verification failure", - prompt=f"""MODE: re_decomposition - -**Original Goal:** {original_goal} -**Previous Failure Summary:** {failure_summary} -**Root Cause Analysis:** {json.dumps(verification['root_cause'])} -**Iteration:** {plan_iteration + 1} - -RULES: -1. PRESERVE subtasks NOT in root_cause.invalidated_subtasks (keep same ST-IDs) -2. CREATE new subtasks targeting root_cause.unmet_requirements -3. ADD verification criteria for previously failed aspects -4. UPDATE dependency graph if needed - -Return JSON with: -- preserved_subtasks: [ST-IDs to keep] -- invalidated_subtasks: [ST-IDs to redo] -- new_subtasks: [new subtask definitions] -""" -) - -# Update state -state["plan_iteration"] = plan_iteration + 1 -state["failure_summaries"] = state.get("failure_summaries", []) + [failure_summary] -Write(file_path=state_file, content=json.dumps(state, indent=2)) - -# Update task_plan with new subtasks -# Go back to Step 2 (Subtask Loop) with updated plan -``` - ---- - -## Step 4: Summary - -- **Update Terminal State** in task_plan: - ```markdown - ## Terminal State - **Status:** complete - Reason: All [N] subtasks implemented and validated. Final verification passed. - ``` -- Create commit (if requested) -- Report: features implemented, files changed, verification confidence - -**Optional:** Run `/map-learn [summary]` to preserve valuable patterns for future workflows. - -Begin now with efficient workflow. diff --git a/.claude/commands/map-fast.md b/.claude/commands/map-fast.md index 7560034..c0db25e 100644 --- a/.claude/commands/map-fast.md +++ b/.claude/commands/map-fast.md @@ -6,7 +6,7 @@ description: Minimal workflow for small, low-risk changes (40-50% savings, NO le **⚠️ WARNING: Use for small, low-risk production changes only. Do not skip tests.** -Minimal agent sequence (40-50% token savings). Skips: Predictor, Evaluator, Reflector, Curator. +Minimal agent sequence (40-50% token savings). Skips: Predictor, Reflector, Curator. **Consequences:** No impact analysis, no quality scoring, no learning, playbook never improves. @@ -29,7 +29,6 @@ Minimal agent sequence (token-optimized, reduced analysis depth): **Agents INTENTIONALLY SKIPPED:** - Predictor (no impact analysis) -- Evaluator (no quality scoring) - Reflector (no lesson extraction) - Curator (no playbook updates) @@ -130,6 +129,6 @@ After all subtasks completed: - MAX 3 iterations per subtask - NO learning cycle (Reflector/Curator skipped) - NO impact analysis (Predictor skipped) -- NO quality scoring (Evaluator skipped) +- NO quality scoring Begin now with minimal workflow. diff --git a/.claude/commands/map-learn.md b/.claude/commands/map-learn.md index f6bb35f..5e53b41 100644 --- a/.claude/commands/map-learn.md +++ b/.claude/commands/map-learn.md @@ -27,11 +27,11 @@ description: Extract and preserve lessons from completed workflows (OPTIONAL lea ## ⚠️ IMPORTANT: This is an OPTIONAL step -**You are NOT required to run this command.** MAP workflows (except /map-fast) include learning by default. +**You are NOT required to run this command.** No MAP workflow includes automatic learning — learning is always a separate step via this command. Use /map-learn when: +- You completed /map-efficient, /map-debug, or /map-fast and want to preserve lessons - You want to batch-learn from multiple workflows at once -- You completed /map-fast and want to preserve lessons retroactively - You want to manually trigger learning for custom workflows **Do NOT use this command:** diff --git a/.claude/commands/map-plan.md b/.claude/commands/map-plan.md index 36d4601..bbc4d6e 100644 --- a/.claude/commands/map-plan.md +++ b/.claude/commands/map-plan.md @@ -50,6 +50,8 @@ User request: ) ``` +**Save discovery results:** The research-agent returns findings inline. Use the **Write** tool to save them to `.map//findings_.md` so they persist across sessions. Include key file paths, patterns found, and risks. + If discovery is not needed (new greenfield code or already-provided spec), skip to Step 1. ### Step 1: Assess Scope and Decide Interview Depth @@ -72,12 +74,12 @@ If interview is not needed, skip to Step 3. ### Step 2: Deep Interview (Spec Discovery) -Use AskUserQuestionTool to systematically interview the user. The goal is to surface non-obvious decisions and tradeoffs BEFORE planning. +Use AskUserQuestion to systematically interview the user. The goal is to surface non-obvious decisions and tradeoffs BEFORE planning. **Rules:** - Questions must be NON-OBVIOUS (don't ask what the user already stated) - Cover all dimensions: technical implementation, UI/UX, risks, tradeoffs, edge cases, data model, performance, security -- Ask in small rounds (1-2 high-signal questions; up to 2-4 if needed) using AskUserQuestionTool +- Ask in small rounds (1-2 high-signal questions; up to 2-4 if needed) using AskUserQuestion - Continue iterating until all critical decisions are captured - After each round, assess: are there still unresolved architectural decisions? @@ -90,9 +92,9 @@ Use AskUserQuestionTool to systematically interview the user. The goal is to sur 6. **Integration:** How does this interact with existing code? Migration needed? 7. **Contract Clarity:** Are ALL goals stated as outcomes (not processes)? Reject "improve auth" — require "AuthService returns 401 for expired tokens". Every goal must be verifiable. -**Example AskUserQuestionTool call:** +**Example AskUserQuestion call:** ``` -AskUserQuestionTool(questions=[ +AskUserQuestion(questions=[ { "question": "Should refresh tokens be stored server-side (Redis/DB) or stateless (signed JWT)?", "header": "Token store", @@ -143,7 +145,7 @@ AskUserQuestionTool(questions=[ ### Step 3: Create Branch Directory ```bash -BRANCH=$(git rev-parse --abbrev-ref HEAD | sed 's/\//-/g') +BRANCH=$(git rev-parse --abbrev-ref HEAD | sed -E 's|/|-|g; s|[^a-zA-Z0-9_.-]|-|g; s|-{2,}|-|g; s|^-||; s|-$||') mkdir -p .map/${BRANCH} ``` @@ -202,12 +204,17 @@ Output requirements: ### Step 6: Create Human-Readable Plan -Write the plan to `.map//task_plan_.md`. Wrap content in `` semantic brackets for machine-parseable handoff to executors: +Write the plan to `.map//task_plan_.md` using the **Write** tool. Wrap content in `` semantic brackets for machine-parseable handoff to executors. +First, get the branch name: ```bash -BRANCH=$(git rev-parse --abbrev-ref HEAD | sed 's/\//-/g') -cat > .map/${BRANCH}/task_plan_${BRANCH}.md < +git rev-parse --abbrev-ref HEAD | sed -E 's|/|-|g; s|[^a-zA-Z0-9_.-]|-|g; s|-{2,}|-|g; s|^-||; s|-$||' +``` + +Then use the **Write** tool to create `.map//task_plan_.md` with this structure: + +```markdown + # Task Plan: [Brief Title] @@ -220,6 +227,7 @@ cat > .map/${BRANCH}/task_plan_${BRANCH}.md < Action(params) -> Goal` - **Complexity:** [low/medium/high] - **Dependencies:** [none | ST-XXX, ST-YYY] @@ -244,25 +252,23 @@ cat > .map/${BRANCH}/task_plan_${BRANCH}.md < -EOF ``` **AAG Contract is REQUIRED** for every subtask. Copy directly from task-decomposer output's `aag_contract` field. This is the primary handoff to the Actor agent — without it, the Actor reasons instead of compiles. ### Step 7: Initialize Workflow State (Do This Last) -Create `.map//workflow_state.json` with the decomposition results. Wrap in `` comment for executor parsing. +Create `.map//workflow_state.json` with the decomposition results. Wrap in `MAP_State_v1_0` tag for executor parsing. Do this AFTER writing `task_plan_.md` so planning artifacts are created before the state gate becomes active. -```bash -BRANCH=$(git rev-parse --abbrev-ref HEAD | sed 's/\//-/g') -STARTED_AT=$(date -u +%Y-%m-%dT%H:%M:%SZ) -cat > .map/${BRANCH}/workflow_state.json </workflow_state.json` with this structure (substitute actual values): + +```json { "_semantic_tag": "MAP_State_v1_0", "workflow": "map-plan", - "started_at": "${STARTED_AT}", + "started_at": "", "current_subtask": null, "current_state": "INITIALIZED", "completed_steps": {}, @@ -273,7 +279,6 @@ cat > .map/${BRANCH}/workflow_state.json < Action(params) -> Goal" } } -EOF ``` **IMPORTANT:** @@ -307,7 +312,7 @@ Next Steps: **Note:** If interview was skipped (small/well-defined task), the spec line will not appear. -### Step 8: Context Distillation + STOP +### Step 9: Context Distillation + STOP **Before stopping, verify the distilled state is self-contained.** The next session starts fresh — it will ONLY see files, not this conversation. Ensure these files contain everything needed: diff --git a/.claude/commands/map-release.md b/.claude/commands/map-release.md index 3e6a35f..0bd8422 100644 --- a/.claude/commands/map-release.md +++ b/.claude/commands/map-release.md @@ -81,21 +81,11 @@ Execute all validation gates in parallel where possible: #### Gate 1-4: Code Quality Checks ```bash -# Run in parallel (all must succeed) -pytest tests/ --cov=src/mapify_cli --cov-report=term-missing & -PID_PYTEST=$! - -black src/ tests/ --check & -PID_BLACK=$! - -ruff check src/ tests/ & -PID_RUFF=$! - -mypy src/ & -PID_MYPY=$! - -# Wait for all checks -wait $PID_PYTEST && wait $PID_BLACK && wait $PID_RUFF && wait $PID_MYPY +# Run checks sequentially (all must succeed) +pytest tests/ --cov=src/mapify_cli --cov-report=term-missing && \ +black src/ tests/ --check && \ +ruff check src/ tests/ && \ +mypy src/ ``` **Expected Results:** @@ -321,7 +311,7 @@ Use AskUserQuestion to get user decision on version bump: ``` AskUserQuestion( - questions: [ + questions=[ { question: "What type of version bump should be performed for this release?", header: "Version Bump", @@ -541,7 +531,7 @@ Use AskUserQuestion for explicit confirmation: ``` AskUserQuestion( - questions: [ + questions=[ { question: "⚠️ IRREVERSIBLE OPERATION ⚠️\n\nPushing tag will immediately:\n1. Trigger GitHub Actions release workflow\n2. Build and publish package to PyPI\n3. Create public GitHub release\n\nVersion: $LAST_TAG\nTarget: origin/main\n\nDo you want to proceed with tag push?", header: "Confirm Push", @@ -1166,6 +1156,8 @@ Use these MCP tools throughout the workflow: - **`mcp__mem0__map_tiered_search`** - Search for release patterns from past projects - **`mcp__mem0__map_add_pattern`** - Store release learnings cross-project - **`mcp__sequential-thinking__sequentialthinking`** - Complex decision making for version bump + +**Built-in Tools (not MCP):** - **`AskUserQuestion`** - Get explicit confirmation for IRREVERSIBLE operations ### Critical Constraints diff --git a/.claude/commands/map-resume.md b/.claude/commands/map-resume.md index 28285ef..fb92058 100644 --- a/.claude/commands/map-resume.md +++ b/.claude/commands/map-resume.md @@ -13,20 +13,27 @@ description: Resume incomplete MAP workflow from checkpoint - When returning to an unfinished task **What it does:** -1. Detects `.map/progress.md` checkpoint file -2. Displays workflow progress summary -3. Shows completed and remaining subtasks -4. Asks user confirmation before resuming -5. Continues Actor→Monitor loop for remaining subtasks +1. Detects `.map//step_state.json` checkpoint (orchestrator canonical state) +2. Cross-references `.map//workflow_state.json` for subtask completion +3. Displays workflow progress summary +4. Shows completed and remaining subtasks +5. Asks user confirmation before resuming +6. Continues from the last incomplete step via the state machine + +**State files used:** +- **`step_state.json`** — Orchestrator canonical state. Source of truth for resumption. Tracks current step, retry counts, circuit breaker status. +- **`workflow_state.json`** — Enforcement gates. Tracks subtask completion for workflow-gate.py hook. +- **`task_plan_.md`** — Full task decomposition with validation criteria and AAG contracts. --- ## Step 1: Detect Checkpoint -Check if checkpoint file exists: +Check if state files exist for the current branch: ```bash -test -f .map/progress.md && echo "Found incomplete workflow" || echo "No checkpoint" +BRANCH=$(git rev-parse --abbrev-ref HEAD | sed -E 's|/|-|g; s|[^a-zA-Z0-9_.-]|-|g; s|-{2,}|-|g; s|^-||; s|-$||') +test -f ".map/${BRANCH}/step_state.json" && echo "Found incomplete workflow" || echo "No checkpoint" ``` **If no checkpoint exists:** @@ -36,12 +43,12 @@ Display message and exit: ```markdown ## No Workflow in Progress -No checkpoint file found at `.map/progress.md`. +No checkpoint file found at `.map//step_state.json`. **To start a new workflow, use:** - `/map-efficient "task description"` - Standard implementation workflow - `/map-debug "issue description"` - Debugging workflow -- `/map-fast "task description"` - Throwaway code workflow +- `/map-fast "task description"` - Minimal workflow No recovery needed. ``` @@ -52,35 +59,40 @@ No recovery needed. ## Step 2: Load and Display Progress -Read checkpoint file and display progress summary: +Read both state files and the task plan to display progress summary: ```bash -cat .map/progress.md +BRANCH=$(git rev-parse --abbrev-ref HEAD | sed -E 's|/|-|g; s|[^a-zA-Z0-9_.-]|-|g; s|-{2,}|-|g; s|^-||; s|-$||') + +# Read state files using the Read tool +# .map/${BRANCH}/step_state.json — current orchestrator state +# .map/${BRANCH}/workflow_state.json — subtask completion status +# .map/${BRANCH}/task_plan_${BRANCH}.md — full plan with AAG contracts ``` -Parse the YAML frontmatter and display: +Parse the state and display: ```markdown ## Found Incomplete Workflow -**Task:** [task_plan from frontmatter] -**Current Phase:** [current_phase] -**Turn Count:** [turn_count] -**Started:** [started_at] -**Last Updated:** [updated_at] +**Task:** [goal from task_plan] +**Branch:** ${BRANCH} +**Current Step:** [current_step from step_state.json] +**Current Phase:** [phase name from step_state.json] +**Started:** [started_at from workflow_state.json] ### Progress Overview [X/N] subtasks completed ([percentage]%) -### Completed Subtasks ✅ -- [x] **ST-001**: [description] (completed at [timestamp]) -- [x] **ST-002**: [description] (completed at [timestamp]) +### Completed Subtasks +- [x] **ST-001**: [description] (complete) +- [x] **ST-002**: [description] (complete) ... -### Remaining Subtasks 📋 -- [ ] **ST-003**: [description] -- [ ] **ST-004**: [description] +### Remaining Subtasks +- [ ] **ST-003**: [description] — currently at phase: [phase] +- [ ] **ST-004**: [description] — pending ... ``` @@ -88,65 +100,73 @@ Parse the YAML frontmatter and display: ## Step 3: User Confirmation -**⚠️ CRITICAL: Always ask for user confirmation before resuming.** - -Ask a simple yes/no question: +**CRITICAL: Always ask for user confirmation before resuming.** ``` -Resume from last checkpoint? [Y/n] +AskUserQuestion(questions=[ + { + "question": "Resume workflow from last checkpoint?", + "header": "Resume", + "options": [ + {"label": "Resume (recommended)", "description": "Continue from last checkpoint step"}, + {"label": "Start fresh", "description": "Delete state files and start over with /map-efficient"}, + {"label": "Abort", "description": "Do nothing, keep state files intact"} + ], + "multiSelect": false + } +]) ``` **Handle user response:** -- **Y or y or Enter (default):** Proceed to Step 4 (resume workflow) -- **n or N:** Delete checkpoint file and exit with message "Checkpoint cleared. Start fresh with /map-efficient." +- **Resume:** Proceed to Step 4 (resume workflow) +- **Start fresh:** Delete `.map//step_state.json` and `.map//workflow_state.json`, exit with "State cleared. Start fresh with /map-efficient." +- **Abort:** Exit without changes --- ## Step 4: Resume Workflow -Load remaining subtasks from checkpoint and continue Actor→Monitor loop. +Use the orchestrator to determine the next step and continue execution. **Important context loading:** Before resuming, read: -1. `.map/progress.md` - current state -2. `.map/task_plan_*.md` - full task decomposition with validation criteria +1. `.map//step_state.json` — current orchestrator state +2. `.map//workflow_state.json` — subtask completion +3. `.map//task_plan_.md` — full task decomposition with AAG contracts + +**Resume via orchestrator:** + +```bash +BRANCH=$(git rev-parse --abbrev-ref HEAD | sed -E 's|/|-|g; s|[^a-zA-Z0-9_.-]|-|g; s|-{2,}|-|g; s|^-||; s|-$||') + +# Get next step from orchestrator (reads step_state.json internally) +NEXT_STEP=$(python3 .map/scripts/map_orchestrator.py get_next_step) +STEP_ID=$(echo "$NEXT_STEP" | jq -r '.step_id') +PHASE=$(echo "$NEXT_STEP" | jq -r '.phase') +IS_COMPLETE=$(echo "$NEXT_STEP" | jq -r '.is_complete') +``` + +**Then follow the same phase routing as /map-efficient:** + +For each step, route to the appropriate executor based on `$PHASE` (ACTOR, MONITOR, PREDICTOR, TESTS_GATE, etc.) following the exact same phase handlers documented in map-efficient.md. **For each remaining subtask:** -1. **Mark subtask in_progress:** - - Update `.map/progress.md` with current subtask status - -2. **Call Actor:** - ``` - Task( - subagent_type="actor", - description="Implement [subtask_id]: [description]", - prompt="[Actor prompt with subtask details and validation criteria from task plan]" - ) - ``` - -3. **Call Monitor:** - ``` - Task( - subagent_type="monitor", - description="Validate [subtask_id] implementation", - prompt="[Monitor prompt with Actor output and validation criteria]" - ) - ``` - -4. **If Monitor returns `valid: false`:** - - Retry Actor with feedback (max 5 iterations) - - Update progress checkpoint after each iteration - -5. **If Monitor returns `valid: true`:** - - Apply changes - - Mark subtask complete in `.map/progress.md` - - Continue to next subtask - -6. **Update checkpoint after each subtask:** - - Save updated state to `.map/progress.md` +1. **Get next step** from orchestrator +2. **Execute phase** (Actor → Monitor → Predictor → etc.) +3. **Validate step** via `map_orchestrator.py validate_step` +4. **Update state** automatically via orchestrator +5. **Continue** to next step until workflow complete + +**If Monitor returns `valid: false`:** +- Retry Actor with feedback (max 5 iterations, tracked in step_state.json) +- State is saved after each iteration + +**If Monitor returns `valid: true`:** +- Changes already applied by Actor +- Continue to next phase --- @@ -155,15 +175,15 @@ Before resuming, read: After all subtasks complete: ```markdown -## Workflow Resumed and Completed ✅ +## Workflow Resumed and Completed -**Task:** [task_plan] +**Task:** [task from plan] +**Branch:** ${BRANCH} **Total Subtasks:** [N] **Subtasks Completed This Session:** [M] -**Total Actor→Monitor Iterations:** [count] ### Completion Summary -[List of all completed subtasks with timestamps] +[List of all completed subtasks] ### Files Modified [List of files changed during this session] @@ -172,6 +192,7 @@ After all subtasks complete: **Optional next steps:** - Run `/map-learn` to extract and preserve patterns from this workflow +- Run `/map-check` to verify all acceptance criteria - Run tests to verify implementation - Create a commit with your changes ``` @@ -180,44 +201,44 @@ After all subtasks complete: ## Error Handling -### Checkpoint File Corrupted +### State File Corrupted -If YAML frontmatter parsing fails: +If `step_state.json` or `workflow_state.json` parsing fails: ```markdown -## Checkpoint File Corrupted +## State File Corrupted -The checkpoint file at `.map/progress.md` could not be parsed. +The state file at `.map//step_state.json` could not be parsed. **Options:** 1. View raw file contents and attempt manual recovery -2. Delete checkpoint and start fresh +2. Delete state files and start fresh -Would you like me to show the raw checkpoint contents? +Would you like me to show the raw state contents? ``` ### Task Plan File Missing -If `.map/task_plan_*.md` referenced in checkpoint doesn't exist: +If `.map//task_plan_.md` doesn't exist but state files do: ```markdown ## Task Plan File Missing -The checkpoint references a task plan file that no longer exists. +State files exist but the task plan is missing. -**Checkpoint:** .map/progress.md -**Expected task plan:** .map/task_plan_[branch].md +**State:** .map//step_state.json +**Expected plan:** .map//task_plan_.md **Options:** -1. Create a new task plan based on checkpoint information -2. Clear checkpoint and start fresh workflow +1. Create a new task plan based on state information +2. Clear state files and start fresh workflow ``` ### Actor/Monitor Agent Failure If subagent fails during resume: -1. Save current progress to checkpoint +1. State is preserved in step_state.json (orchestrator saves after each step) 2. Display error message with last successful state 3. Suggest retry or escalation to user @@ -235,34 +256,34 @@ User: /map-resume Claude: ## Found Incomplete Workflow **Task:** Implement user authentication with JWT tokens -**Current Phase:** implementation -**Turn Count:** 12 -**Started:** 2025-01-15T10:30:00 -**Last Updated:** 2025-01-15T14:45:00 +**Branch:** feat-user-auth +**Current Step:** 2.3 +**Current Phase:** ACTOR +**Started:** 2025-01-15T10:30:00Z ### Progress Overview 3/5 subtasks completed (60%) -### Completed Subtasks ✅ +### Completed Subtasks - [x] **ST-001**: Create User model with SQLite schema - [x] **ST-002**: Implement password hashing with bcrypt - [x] **ST-003**: Create login API endpoint -### Remaining Subtasks 📋 -- [ ] **ST-004**: Implement JWT token generation -- [ ] **ST-005**: Add logout and token refresh endpoints +### Remaining Subtasks +- [ ] **ST-004**: Implement JWT token generation — currently at phase: ACTOR +- [ ] **ST-005**: Add logout and token refresh endpoints — pending -Resume from last checkpoint? [Y/n] +Resume workflow from last checkpoint? -User: Y +User: Resume -Claude: Resuming workflow from ST-004... -[Continues Actor→Monitor loop for remaining subtasks] +Claude: Resuming workflow from ST-004 (phase: ACTOR)... +[Continues state machine execution for remaining subtasks] ``` -### Example 2: User Abandons Checkpoint +### Example 2: User Starts Fresh -User decides to start fresh: +User decides to abandon current progress: ``` User: /map-resume @@ -270,11 +291,11 @@ User: /map-resume Claude: ## Found Incomplete Workflow [Shows progress summary] -Resume from last checkpoint? [Y/n] +Resume workflow from last checkpoint? -User: n +User: Start fresh -Claude: Checkpoint cleared. Start fresh with /map-efficient. +Claude: State cleared. Start fresh with /map-efficient. ``` ### Example 3: No Checkpoint @@ -286,12 +307,12 @@ User: /map-resume Claude: ## No Workflow in Progress -No checkpoint file found at `.map/progress.md`. +No checkpoint file found at `.map/feat-auth/step_state.json`. To start a new workflow, use: - `/map-efficient "task description"` - Standard implementation - `/map-debug "issue description"` - Debugging -- `/map-fast "task description"` - Throwaway code +- `/map-fast "task description"` - Minimal workflow No recovery needed. ``` @@ -303,18 +324,15 @@ No recovery needed. ### After `/clear` If user runs `/clear` during a workflow: -- Checkpoint is preserved in `.map/progress.md` +- State is preserved in `.map//step_state.json` and `workflow_state.json` - User can resume with `/map-resume` - Fresh context starts from checkpoint state ### With `/map-efficient` -`/map-efficient` automatically saves checkpoints: -- After decomposition phase -- After each subtask completion -- Before each Actor call - -`/map-resume` can continue from any of these checkpoints. +`/map-efficient` uses `map_orchestrator.py` which maintains `step_state.json`: +- State is updated after each step validation +- `/map-resume` reads this state to determine where to continue ### With `/map-learn` @@ -326,53 +344,55 @@ After `/map-resume` completes a workflow: ## Technical Notes -### Checkpoint File Format - -The `.map/progress.md` file uses YAML frontmatter: - -```yaml ---- -task_plan: "Task description" -current_phase: implementation -turn_count: 12 -started_at: 2025-01-15T10:30:00 -updated_at: 2025-01-15T14:45:00 -branch_name: feat/user-auth -completed_subtasks: - - ST-001 - - ST-002 - - ST-003 -subtasks: - - id: ST-001 - description: Create User model - status: complete - completed_at: 2025-01-15T11:00:00 - - id: ST-002 - description: Implement password hashing - status: complete - completed_at: 2025-01-15T12:30:00 - - id: ST-004 - description: Implement JWT generation - status: pending ---- +### State File Format + +The `.map//step_state.json` is managed by `map_orchestrator.py`: + +```json +{ + "current_step": "2.3", + "current_subtask": "ST-004", + "subtask_sequence": ["ST-001", "ST-002", "ST-003", "ST-004", "ST-005"], + "completed_subtasks": ["ST-001", "ST-002", "ST-003"], + "retry_count": 0, + "max_retries": 5, + "execution_mode": "step_by_step", + "plan_approved": true, + "circuit_breaker": { + "tool_count": 42, + "max_iterations": 200 + } +} +``` -# MAP Workflow Progress -[Human-readable markdown body] +The `.map//workflow_state.json` tracks enforcement gates: + +```json +{ + "workflow": "map-efficient", + "started_at": "2025-01-15T10:30:00Z", + "current_subtask": "ST-004", + "current_state": "IN_PROGRESS", + "completed_steps": {"ST-001": [...], "ST-002": [...], "ST-003": [...]}, + "pending_steps": {"ST-004": [...], "ST-005": [...]}, + "subtask_sequence": ["ST-001", "ST-002", "ST-003", "ST-004", "ST-005"] +} ``` ### State Restoration When resuming: -1. Parse YAML frontmatter for machine state -2. Use human-readable body for context summary -3. Load full task plan from referenced file -4. Continue from last incomplete subtask +1. Read `step_state.json` for orchestrator position (current step + subtask) +2. Read `workflow_state.json` for completed/pending subtask list +3. Read `task_plan_.md` for AAG contracts and validation criteria +4. Call `map_orchestrator.py get_next_step` to determine next action +5. Continue phase-based execution from that point ### Context Efficiency Resume is designed for context efficiency: -- Only loads necessary state, not full conversation history -- Checkpoint contains enough context to continue +- Only loads necessary state files, not full conversation history +- State files contain enough context to continue - Fresh agent calls don't carry previous context pollution --- @@ -393,25 +413,25 @@ Resume is designed for context efficiency: ### Issue: Checkpoint shows wrong subtask status -**Symptom:** Checkpoint says ST-003 is complete, but code shows incomplete implementation. +**Symptom:** step_state.json says ST-003 is complete, but code shows incomplete implementation. -**Cause:** Session crashed between code application and checkpoint update. +**Cause:** Session crashed between code application and state update. **Fix:** 1. Manually verify each subtask's actual completion status -2. Update checkpoint to match reality +2. Update step_state.json to match reality 3. Resume from corrected state ### Issue: Resume loads but doesn't continue -**Symptom:** Progress displayed, user confirms Continue, but nothing happens. +**Symptom:** Progress displayed, user confirms Resume, but nothing happens. **Cause:** Task plan file missing or invalid. **Fix:** -1. Check for `.map/task_plan_*.md` file +1. Check for `.map//task_plan_.md` file 2. Recreate task plan if missing -3. Ensure validation criteria are present for remaining subtasks +3. Ensure AAG contracts are present for remaining subtasks ### Issue: Actor context missing after resume @@ -421,3 +441,14 @@ Resume is designed for context efficiency: 1. Read recent git diff for changed files 2. Load relevant source files for remaining subtasks 3. Provide context summary in Actor prompt + +### Issue: step_state.json and workflow_state.json out of sync + +**Symptom:** step_state.json shows ST-004 in progress, but workflow_state.json shows ST-003 pending. + +**Cause:** Crash between orchestrator update and workflow state update. + +**Fix:** +1. Trust `step_state.json` as the canonical source +2. Update `workflow_state.json` to match +3. Resume from corrected state diff --git a/.claude/commands/map-review.md b/.claude/commands/map-review.md index 3af2dcd..1e14b69 100644 --- a/.claude/commands/map-review.md +++ b/.claude/commands/map-review.md @@ -94,19 +94,21 @@ Task( **Playbook Context:** [paste relevant playbook bullets] -Provide quality assessment: -- Code quality score (0-100) -- Test coverage assessment -- Documentation completeness -- Maintainability score -- Overall verdict +Provide quality assessment using 1-10 scoring (matches evaluator agent template): +- Functionality score (1-10) +- Code quality score (1-10) +- Performance score (1-10) +- Security score (1-10) +- Testability score (1-10) +- Completeness score (1-10) Output JSON with: -- scores: {code_quality, test_coverage, documentation, maintainability, overall} -- verdict: 'excellent' | 'good' | 'acceptable' | 'needs_work' | 'reject' +- scores: {functionality, code_quality, performance, security, testability, completeness} +- overall_score: weighted float (1.0-10.0) +- recommendation: 'proceed' | 'improve' | 'reconsider' - strengths: array of strings -- improvements_needed: array of strings -- final_recommendation: string" +- weaknesses: array of strings +- next_steps: array of strings" ) ``` @@ -133,10 +135,10 @@ Once all three agents have completed, combine their findings: - Affected Files: [predictor.affected_files.length] **Evaluator Assessment:** -- Overall Score: [evaluator.scores.overall]/100 -- Code Quality: [evaluator.scores.code_quality]/100 -- Test Coverage: [evaluator.scores.test_coverage]/100 -- Verdict: [evaluator.verdict] +- Overall Score: [evaluator.overall_score]/10 +- Code Quality: [evaluator.scores.code_quality]/10 +- Security: [evaluator.scores.security]/10 +- Recommendation: [evaluator.recommendation] ### Critical Issues (High Severity) @@ -160,9 +162,9 @@ Once all three agents have completed, combine their findings: ### Final Verdict Based on combined analysis: -- **Proceed if:** Monitor verdict = 'approved' AND Evaluator verdict = 'excellent'|'good'|'acceptable' -- **Revise if:** Monitor verdict = 'needs_revision' OR Evaluator verdict = 'needs_work' -- **Block if:** Monitor verdict = 'rejected' OR Evaluator verdict = 'reject' OR (Predictor risk_level = 'high' AND breaking_changes.length > 0) +- **Proceed if:** Monitor verdict = 'approved' AND Evaluator recommendation = 'proceed' +- **Revise if:** Monitor verdict = 'needs_revision' OR Evaluator recommendation = 'improve' +- **Block if:** Monitor verdict = 'rejected' OR Evaluator recommendation = 'reconsider' OR (Predictor risk_level = 'high' AND breaking_changes.length > 0) --- diff --git a/.claude/hooks/post-edit-reminder.py b/.claude/hooks/post-edit-reminder.py index 800abbf..de96db8 100755 --- a/.claude/hooks/post-edit-reminder.py +++ b/.claude/hooks/post-edit-reminder.py @@ -12,11 +12,22 @@ import json import os +import re import subprocess import sys from pathlib import Path +def sanitize_branch_name(branch: str) -> str: + """Sanitize branch name for safe filesystem paths.""" + sanitized = branch.replace("/", "-") + sanitized = re.sub(r"[^a-zA-Z0-9_.-]", "-", sanitized) + sanitized = re.sub(r"-+", "-", sanitized).strip("-") + if ".." in sanitized or sanitized.startswith("."): + return "default" + return sanitized or "default" + + def get_branch_name() -> str: """Get current git branch name.""" try: @@ -24,10 +35,10 @@ def get_branch_name() -> str: ["git", "rev-parse", "--abbrev-ref", "HEAD"], capture_output=True, text=True, - timeout=2, + timeout=1, ) if result.returncode == 0: - return result.stdout.strip().replace("/", "-") + return sanitize_branch_name(result.stdout.strip()) except Exception: pass return "default" diff --git a/.claude/hooks/ralph-context-pruner.py b/.claude/hooks/ralph-context-pruner.py index 258c32b..8adb60f 100755 --- a/.claude/hooks/ralph-context-pruner.py +++ b/.claude/hooks/ralph-context-pruner.py @@ -65,6 +65,7 @@ def get_branch_name() -> str: capture_output=True, text=True, cwd=PROJECT_DIR, + timeout=2, ) if result.returncode == 0: return sanitize_branch_name(result.stdout.strip()) diff --git a/.claude/hooks/ralph-iteration-logger.py b/.claude/hooks/ralph-iteration-logger.py index debcc45..2bcead6 100755 --- a/.claude/hooks/ralph-iteration-logger.py +++ b/.claude/hooks/ralph-iteration-logger.py @@ -89,6 +89,7 @@ def get_branch_name() -> str: capture_output=True, text=True, cwd=PROJECT_DIR, + timeout=1, ) if result.returncode == 0: return sanitize_branch_name(result.stdout.strip()) diff --git a/.claude/hooks/workflow-context-injector.py b/.claude/hooks/workflow-context-injector.py index 55a769f..f0b8d34 100755 --- a/.claude/hooks/workflow-context-injector.py +++ b/.claude/hooks/workflow-context-injector.py @@ -74,6 +74,16 @@ ] +def sanitize_branch_name(branch: str) -> str: + """Sanitize branch name for safe filesystem paths.""" + sanitized = branch.replace("/", "-") + sanitized = re.sub(r"[^a-zA-Z0-9_.-]", "-", sanitized) + sanitized = re.sub(r"-+", "-", sanitized).strip("-") + if ".." in sanitized or sanitized.startswith("."): + return "default" + return sanitized or "default" + + def get_branch_name() -> str: """Get current git branch name.""" import subprocess @@ -83,10 +93,10 @@ def get_branch_name() -> str: ["git", "rev-parse", "--abbrev-ref", "HEAD"], capture_output=True, text=True, - timeout=2, + timeout=1, ) if result.returncode == 0: - return result.stdout.strip().replace("/", "-") + return sanitize_branch_name(result.stdout.strip()) except Exception: pass return "default" diff --git a/.claude/hooks/workflow-gate.py b/.claude/hooks/workflow-gate.py index 73a0098..01e3429 100755 --- a/.claude/hooks/workflow-gate.py +++ b/.claude/hooks/workflow-gate.py @@ -42,6 +42,7 @@ """ import json import os +import re import sys from pathlib import Path from typing import Dict, Optional @@ -53,6 +54,16 @@ REQUIRED_STEPS = ["actor", "monitor"] +def sanitize_branch_name(branch: str) -> str: + """Sanitize branch name for safe filesystem paths.""" + sanitized = branch.replace("/", "-") + sanitized = re.sub(r"[^a-zA-Z0-9_.-]", "-", sanitized) + sanitized = re.sub(r"-+", "-", sanitized).strip("-") + if ".." in sanitized or sanitized.startswith("."): + return "default" + return sanitized or "default" + + def get_branch_name() -> str: """Get current git branch name (sanitized for filesystem).""" try: @@ -65,9 +76,7 @@ def get_branch_name() -> str: timeout=1, ) if result.returncode == 0: - branch = result.stdout.strip() - # Sanitize for filesystem (same as other MAP tools) - return branch.replace("/", "-").replace(" ", "-") + return sanitize_branch_name(result.stdout.strip()) except Exception: pass return "default" diff --git a/.claude/references/step-state-schema.md b/.claude/references/step-state-schema.md index 7e538b1..8b5c290 100644 --- a/.claude/references/step-state-schema.md +++ b/.claude/references/step-state-schema.md @@ -65,13 +65,12 @@ Current step set (linear order; some are conditional): 8. `2.2` RESEARCH (conditional) 9. `2.3` ACTOR 10. `2.4` MONITOR -11. `2.5` RETRY_LOOP (conditional) -12. `2.6` PREDICTOR (conditional) -13. `2.7` APPLY_CHANGES -14. `2.8` TESTS_GATE (conditional) -15. `2.9` LINTER_GATE (conditional) -16. `2.10` VERIFY_ADHERENCE -17. `2.11` SUBTASK_APPROVAL (conditional; step_by_step only) +11. `2.6` PREDICTOR (conditional) +12. `2.7` UPDATE_STATE +13. `2.8` TESTS_GATE (conditional) +14. `2.9` LINTER_GATE (conditional) +15. `2.10` VERIFY_ADHERENCE +16. `2.11` SUBTASK_APPROVAL (conditional; step_by_step only) ## Relationship to workflow_state.json diff --git a/.claude/references/workflow-state-schema.md b/.claude/references/workflow-state-schema.md index 16e4657..42e34fd 100644 --- a/.claude/references/workflow-state-schema.md +++ b/.claude/references/workflow-state-schema.md @@ -231,7 +231,7 @@ Check current state: ```bash # Show current state -BRANCH=$(git rev-parse --abbrev-ref HEAD | sed 's/\//-/g') +BRANCH=$(git rev-parse --abbrev-ref HEAD | sed -E 's|/|-|g; s|[^a-zA-Z0-9_.-]|-|g; s|-{2,}|-|g; s|^-||; s|-$||') cat .map/${BRANCH}/workflow_state.json | jq '.' # Check what steps are completed for current subtask diff --git a/.claude/settings.hooks.json b/.claude/settings.hooks.json index cfee485..eb4e7ef 100644 --- a/.claude/settings.hooks.json +++ b/.claude/settings.hooks.json @@ -22,7 +22,7 @@ { "type": "command", "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/workflow-context-injector.py", - "timeout": 1, + "timeout": 3, "description": "Injects workflow context only for Edit/Write and significant Bash commands" } ] @@ -36,7 +36,7 @@ { "type": "command", "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/post-edit-reminder.py", - "timeout": 1, + "timeout": 3, "description": "Reminds to run tests after Edit/Write (only when MAP workflow active)" } ] diff --git a/.claude/settings.json b/.claude/settings.json index f0ac2c7..22c2367 100644 --- a/.claude/settings.json +++ b/.claude/settings.json @@ -9,30 +9,30 @@ "Edit(./.env*)", "Edit(**/*credentials*)", "Edit(**/*secret*)", - "Bash(rm:-rf)", "Bash(rm -rf)", - "Bash(git:push:--force:origin:main)", - "Bash(git:push:--force:origin:master)", - "Bash(git:reset:--hard)" + "Bash(git push --force origin main)", + "Bash(git push --force origin master)", + "Bash(git reset --hard)" ], "allow": [ - "Bash(mapify:*)", - "Bash(pytest:*)", - "Bash(make:lint)", - "Bash(make:test)", - "Bash(sqlite3:.claude/playbook.db:*)", - "Bash(ruff:*)", - "Bash(black:*)", - "Bash(git:status)", - "Bash(git:diff)", - "Bash(git:log)", - "Bash(git:branch)", - "Bash(git:add)", - "Bash(git:commit)", - "Bash(helm:template:*)", - "Bash(go:vet:*)", - "Bash(gofmt:*)", - "Bash(kubectl:get:*)" + "Bash(mapify *)", + "Bash(pytest *)", + "Bash(make lint)", + "Bash(make test)", + "Bash(sqlite3 .claude/playbook.db *)", + "Bash(ruff *)", + "Bash(black *)", + "Bash(git status)", + "Bash(git diff)", + "Bash(git log)", + "Bash(git branch)", + "Bash(git rev-parse *)", + "Bash(git add)", + "Bash(git commit)", + "Bash(helm template *)", + "Bash(go vet *)", + "Bash(gofmt *)", + "Bash(kubectl get *)" ] } } diff --git a/.claude/skills/README.md b/.claude/skills/README.md index 1e2854c..fafbe35 100644 --- a/.claude/skills/README.md +++ b/.claude/skills/README.md @@ -34,7 +34,7 @@ Skills provide specialized guidance without executing code. They help users unde **Content:** - Quick decision tree (5 questions) -- Workflow comparison matrix (5 workflows) +- Workflow comparison matrix (4 implemented + 2 planned) - Detailed workflow descriptions - Agent architecture overview - 8 deep-dive resource files @@ -50,10 +50,10 @@ MAP: [Shows decision tree and comparison matrix] **Resources available:** - `map-fast-deep-dive.md` - When (not) to use /map-fast - `map-efficient-deep-dive.md` - Optimization strategies -- `map-feature-deep-dive.md` - Full validation workflow - `map-debug-deep-dive.md` - Debugging techniques -- `map-refactor-deep-dive.md` - Dependency analysis -- `agent-architecture.md` - How 8 agents orchestrate +- `map-feature-deep-dive.md` - Full validation workflow (PLANNED) +- `map-refactor-deep-dive.md` - Dependency analysis (PLANNED) +- `agent-architecture.md` - How 12 agents orchestrate - `playbook-system.md` - Knowledge storage and search --- diff --git a/.claude/skills/map-cli-reference/SKILL.md b/.claude/skills/map-cli-reference/SKILL.md index 45fdf44..1d8eb8d 100644 --- a/.claude/skills/map-cli-reference/SKILL.md +++ b/.claude/skills/map-cli-reference/SKILL.md @@ -1,5 +1,15 @@ +--- name: map-cli-reference -description: Use when encountering mapify CLI or MCP usage errors (no such command, no such option, parameter not found). Provides mem0 MCP and validate command corrections with common mistake patterns. +description: >- + Quick reference for mapify CLI and mem0 MCP usage errors. Use when + encountering "no such command", "no such option", "parameter not found", + or when user asks "how to use mapify", "mem0 commands", "validate graph". + Do NOT use for workflow selection (use map-workflows-guide) or planning + methodology (use map-planning). +metadata: + author: azalio + version: 3.1.0 + mcp-server: mem0 --- # MAP CLI Quick Reference @@ -137,6 +147,56 @@ mcp__mem0__map_tiered_search(query="error handling", limit=5) --- +## Examples + +### Example 1: Fixing a deprecated command error + +**User says:** "I'm getting `Error: No such command 'playbook'` when running mapify" + +**Actions:** +1. Identify error type — deprecated command usage +2. Explain: playbook commands removed in v4.0+ +3. Provide replacement: `mcp__mem0__map_tiered_search` for reads, `Task(subagent_type="curator", ...)` for writes + +**Result:** User switches to mem0 MCP tools, error resolved. + +### Example 2: Validating a dependency graph + +**User says:** "How do I check if my task plan has circular dependencies?" + +**Actions:** +1. Show command: `mapify validate graph task_plan.json` +2. Explain exit codes: 0 = valid, 1 = invalid, 2 = malformed JSON +3. Suggest `--strict` flag for CI pipelines and `--visualize` for debugging + +**Result:** User validates their task plan and fixes dependency issues before running workflow. + +### Example 3: mem0 MCP not responding + +**User says:** "mem0 tiered search returns empty results" + +**Actions:** +1. Check mem0 MCP configuration in `.claude/mcp_config.json` +2. Verify namespace conventions (org/project/branch) +3. Test with broad query: `mcp__mem0__map_tiered_search(query="test", limit=1)` + +**Result:** User identifies configuration issue and restores mem0 connectivity. + +--- + +## Troubleshooting + +| Issue | Cause | Solution | +|-------|-------|----------| +| `No such command 'playbook'` | Deprecated in v4.0+ | Use `mcp__mem0__map_tiered_search` for pattern retrieval | +| `No such option '--output'` | Wrong subcommand syntax | Check `mapify --help` for valid options | +| mem0 tool invocation fails | MCP server not configured | Add mem0 to `.claude/mcp_config.json` and restart | +| `validate graph` exit code 2 | Malformed JSON input | Validate JSON with `python -m json.tool < file.json` | +| Patterns not persisting | Writing directly instead of via Curator | Always use `Task(subagent_type="curator", ...)` for pattern writes | +| `mapify init` overwrites files | Using `--force` flag | Omit `--force` to preserve existing configuration | + +--- + **Version**: 1.1 **Last Updated**: 2026-01-15 **Lines**: ~200 (follows 500-line skill rule) diff --git a/.claude/skills/map-cli-reference/scripts/check-command.sh b/.claude/skills/map-cli-reference/scripts/check-command.sh new file mode 100755 index 0000000..22e3208 --- /dev/null +++ b/.claude/skills/map-cli-reference/scripts/check-command.sh @@ -0,0 +1,126 @@ +#!/usr/bin/env bash +# Check if a mapify subcommand exists and show usage help. +# +# Usage: +# ./check-command.sh [option] +# +# Examples: +# ./check-command.sh validate graph +# ./check-command.sh init +# ./check-command.sh playbook # deprecated command +# +# Exit codes: +# 0 - Command exists +# 1 - Command not found +# 2 - Command deprecated + +set -euo pipefail + +SUBCOMMAND="${1:-}" +OPTION="${2:-}" + +if [ -z "$SUBCOMMAND" ]; then + echo "Usage: $0 [option]" + echo "" + echo "Checks if a mapify subcommand exists." + echo "" + echo "Available subcommands:" + echo " init - Initialize project with MAP framework" + echo " check - Run system checks" + echo " upgrade - Upgrade agent templates" + echo " validate - Validate dependency graphs" + echo "" + echo "Deprecated subcommands:" + echo " playbook - Removed in v4.0+ (use mem0 MCP)" + exit 1 +fi + +# Known deprecated commands +DEPRECATED_COMMANDS="playbook" + +# Known valid commands +VALID_COMMANDS="init check upgrade validate" + +# Check deprecated first +for dep in $DEPRECATED_COMMANDS; do + if [ "$SUBCOMMAND" = "$dep" ]; then + echo "ERROR: '$SUBCOMMAND' is deprecated (removed in v4.0+)" + echo "" + echo "Replacements:" + case "$SUBCOMMAND" in + playbook) + echo " Pattern retrieval: mcp__mem0__map_tiered_search(query=\"...\", limit=5)" + echo " Pattern storage: Task(subagent_type=\"curator\", ...)" + echo " Pattern archival: mcp__mem0__map_archive_pattern(...)" + ;; + esac + exit 2 + fi +done + +# Check valid commands +FOUND=0 +for cmd in $VALID_COMMANDS; do + if [ "$SUBCOMMAND" = "$cmd" ]; then + FOUND=1 + break + fi +done + +if [ "$FOUND" -eq 0 ]; then + echo "ERROR: No such command '$SUBCOMMAND'" + echo "" + echo "Available commands: $VALID_COMMANDS" + echo "" + echo "Did you mean one of these?" + # Simple fuzzy match + for cmd in $VALID_COMMANDS; do + echo " mapify $cmd" + done + exit 1 +fi + +# Command exists, show help +echo "OK: 'mapify $SUBCOMMAND' is a valid command" + +# Show subcommand-specific help +case "$SUBCOMMAND" in + validate) + echo "" + echo "Usage: mapify validate graph [--strict] [--visualize]" + echo "" + echo "Options:" + echo " --strict Fail on warnings (exit code 1)" + echo " --visualize Show dependency graph" + echo "" + echo "Exit codes: 0=valid, 1=invalid, 2=malformed input" + if [ -n "$OPTION" ] && [ "$OPTION" != "graph" ]; then + echo "" + echo "WARNING: Unknown validate subcommand '$OPTION'. Did you mean 'graph'?" + fi + ;; + init) + echo "" + echo "Usage: mapify init [project-name] [--mcp essential|full] [--force]" + echo "" + echo "Options:" + echo " --mcp essential Install essential MCP tools only" + echo " --mcp full Install all MCP tools" + echo " --force Overwrite existing configuration" + ;; + check) + echo "" + echo "Usage: mapify check [--debug]" + echo "" + echo "Options:" + echo " --debug Show detailed diagnostic information" + ;; + upgrade) + echo "" + echo "Usage: mapify upgrade" + echo "" + echo "Upgrades agent templates to latest version." + ;; +esac + +exit 0 diff --git a/.claude/skills/map-planning/SKILL.md b/.claude/skills/map-planning/SKILL.md index cc72df3..244484e 100644 --- a/.claude/skills/map-planning/SKILL.md +++ b/.claude/skills/map-planning/SKILL.md @@ -1,8 +1,17 @@ --- name: map-planning version: "1.0.0" -description: Implements file-based planning for MAP Framework workflows with branch-scoped task tracking in .map/ directory. Prevents goal drift via automatic plan synchronization before tool use and validates completion state on exit. +description: >- + File-based planning for MAP Framework with branch-scoped task tracking + in .map/ directory. Use when user says "create a plan", "track progress", + "show task status", or needs persistent planning across agent sessions. + Prevents goal drift via automatic plan synchronization. Do NOT use for + workflow selection (use map-workflows-guide) or CLI errors (use + map-cli-reference). allowed-tools: Read, Write, Edit, Bash, Glob, Grep +metadata: + author: azalio + version: 3.1.0 hooks: PreToolUse: - matcher: "Write|Edit|Bash" @@ -31,19 +40,23 @@ Instead of relying solely on conversation context (limited window), this skill e ## File Structure -All files reside in `.map/` directory with branch-based naming: +All files reside in `.map//` directory with branch-based naming: ``` .map/ -├── task_plan_.md # Primary plan with phases and status -├── findings_.md # Research findings, decisions, key files -└── progress_.md # Action log, errors, test results +└── / + ├── task_plan_.md # Primary plan with phases and status + ├── findings_.md # Research findings, decisions, key files + ├── progress_.md # Action log, errors, test results + ├── workflow_state.json # Subtask completion tracking + ├── step_state.json # Orchestrator step state + └── evidence/ # Artifact-gated validation evidence ``` **Example**: On branch `feature-auth`: -- `.map/task_plan_feature-auth.md` -- `.map/findings_feature-auth.md` -- `.map/progress_feature-auth.md` +- `.map/feature-auth/task_plan_feature-auth.md` +- `.map/feature-auth/findings_feature-auth.md` +- `.map/feature-auth/progress_feature-auth.md` ## Hook Behavior @@ -103,7 +116,7 @@ Creates `.map/` directory and skeleton files for current branch. - Check validation criteria checkboxes [x] when done ### 3-Strike Error Protocol -Log errors to `progress_.md` after attempt 3+. After 3 failed attempts: +Log errors to `.map//progress_.md` after attempt 3+. After 3 failed attempts: 1. Escalate to user (CONTINUE/SKIP/ABORT options) 2. If SKIP: mark phase `blocked`, move to next subtask 3. If ABORT: mark workflow `blocked`, exit @@ -163,6 +176,56 @@ Only Monitor agent updates task_plan status (via `status_update` output field). --- +## Examples + +### Example 1: Starting a new feature plan + +**User says:** "Create a plan for implementing user notifications" + +**Actions:** +1. Run `init-session.sh` to create `.map/` skeleton for current branch +2. Populate `.map//task_plan_.md` with phases: research, design, implement, test +3. Set Goal: "Implement user notification system with email and in-app channels" +4. Mark ST-001 as `in_progress` + +**Result:** Persistent plan files created in `.map/` directory, PreToolUse hook keeps agent focused on current phase. + +### Example 2: Resuming work after context reset + +**User says:** "Show task status" or "What was I working on?" + +**Actions:** +1. Read `.map//task_plan_.md` to find current phase +2. Read `.map//progress_.md` for recent action log +3. Read `.map//findings_.md` for accumulated decisions + +**Result:** Agent resumes from last checkpoint without losing context, even after conversation window reset. + +### Example 3: Handling repeated failures + +**User says:** "The database migration keeps failing" + +**Actions:** +1. Log error to `.map//progress_.md` (attempt count tracked) +2. After 3 failed attempts, trigger 3-Strike Protocol +3. Present CONTINUE/SKIP/ABORT options to user + +**Result:** Phase marked `blocked`, agent moves to next subtask or exits cleanly. + +--- + +## Troubleshooting + +| Issue | Cause | Solution | +|-------|-------|----------| +| "Plan not found" warning | `.map/` directory not initialized | Run `init-session.sh` or start a MAP workflow | +| Stop hook warns "No terminal state" | `## Terminal State` section not updated | Update Terminal State to `complete`, `blocked`, `won't_do`, or `superseded` | +| Branch name causes file errors | Branch has `/` characters | Scripts auto-sanitize: `feature/auth` becomes `feature-auth` | +| PreToolUse hook shows stale focus | Plan file not updated after phase completion | Update `**Status:**` to `complete` and advance `## Current Phase` | +| `/map-fast` ignores planning | By design — `/map-fast` skips planning | Use `/map-efficient` for planning support | + +--- + **Version**: 1.0.0 (2025-01-10) **References**: diff --git a/.claude/skills/map-planning/scripts/get-plan-path.sh b/.claude/skills/map-planning/scripts/get-plan-path.sh index a7b8937..8051c19 100755 --- a/.claude/skills/map-planning/scripts/get-plan-path.sh +++ b/.claude/skills/map-planning/scripts/get-plan-path.sh @@ -4,32 +4,37 @@ # # Description: # Detects current git branch and outputs path to branch-specific task plan file. -# Sanitizes branch names by replacing '/' with '-' for filesystem compatibility. +# Sanitizes branch names for filesystem compatibility. # Defaults to 'main' branch when not in a git repository. # # Usage: # PLAN_PATH=$(bash .claude/skills/map-planning/scripts/get-plan-path.sh) # # Output: -# .map/task_plan_.md +# .map//task_plan_.md # # Examples: -# Branch: feature/map-planning -> .map/task_plan_feature-map-planning.md -# Branch: main -> .map/task_plan_main.md -# Not in repo -> .map/task_plan_main.md +# Branch: feature/map-planning -> .map/feature-map-planning/task_plan_feature-map-planning.md +# Branch: main -> .map/main/task_plan_main.md +# Not in repo -> .map/main/task_plan_main.md set -euo pipefail # Detect current git branch, default to 'main' if not in git repo -BRANCH=$(git branch --show-current 2>/dev/null || echo 'main') +BRANCH=$(git rev-parse --abbrev-ref HEAD 2>/dev/null || echo 'main') # Handle empty branch (detached HEAD or git issue) if [ -z "$BRANCH" ]; then BRANCH="main" fi -# Sanitize branch name: replace '/' with '-' for filesystem safety -SANITIZED_BRANCH=$(echo "$BRANCH" | tr '/' '-') +# Sanitize branch name for filesystem safety (matches MAP orchestrator convention) +SANITIZED_BRANCH=$(echo "$BRANCH" | sed -E 's|/|-|g; s|[^a-zA-Z0-9_.-]|-|g; s|-{2,}|-|g; s|^-||; s|-$||') -# Output the plan file path -echo ".map/task_plan_${SANITIZED_BRANCH}.md" +# Fallback if sanitization produced empty string +if [ -z "$SANITIZED_BRANCH" ]; then + SANITIZED_BRANCH="main" +fi + +# Output the plan file path (nested directory convention) +echo ".map/${SANITIZED_BRANCH}/task_plan_${SANITIZED_BRANCH}.md" diff --git a/.claude/skills/map-planning/scripts/init-session.sh b/.claude/skills/map-planning/scripts/init-session.sh index be75aa1..cf6ae96 100755 --- a/.claude/skills/map-planning/scripts/init-session.sh +++ b/.claude/skills/map-planning/scripts/init-session.sh @@ -3,16 +3,16 @@ # init-session.sh - Initialize planning files for new MAP session # # Description: -# Creates .map/ directory and copies templates for branch-scoped planning files. +# Creates .map// directory and copies templates for branch-scoped planning files. # Idempotent: skips files that already exist. # # Usage: # ${CLAUDE_PLUGIN_ROOT}/scripts/init-session.sh # # Created files: -# .map/task_plan_.md -# .map/findings_.md -# .map/progress_.md +# .map//task_plan_.md +# .map//findings_.md +# .map//progress_.md set -euo pipefail @@ -22,17 +22,22 @@ SKILL_ROOT="$(dirname "$SCRIPT_DIR")" TEMPLATE_DIR="$SKILL_ROOT/templates" # Get branch name for file naming -BRANCH=$(git branch --show-current 2>/dev/null || echo 'main') +BRANCH=$(git rev-parse --abbrev-ref HEAD 2>/dev/null || echo 'main') if [ -z "$BRANCH" ]; then BRANCH="main" fi -SANITIZED_BRANCH=$(echo "$BRANCH" | tr '/' '-') -# Create .map directory -MAP_DIR=".map" +# Sanitize branch name (matches MAP orchestrator convention) +SANITIZED_BRANCH=$(echo "$BRANCH" | sed -E 's|/|-|g; s|[^a-zA-Z0-9_.-]|-|g; s|-{2,}|-|g; s|^-||; s|-$||') +if [ -z "$SANITIZED_BRANCH" ]; then + SANITIZED_BRANCH="main" +fi + +# Create .map/ directory (nested convention) +MAP_DIR=".map/${SANITIZED_BRANCH}" mkdir -p "$MAP_DIR" -# Define file paths +# Define file paths (nested under branch directory) TASK_PLAN="$MAP_DIR/task_plan_${SANITIZED_BRANCH}.md" FINDINGS="$MAP_DIR/findings_${SANITIZED_BRANCH}.md" PROGRESS="$MAP_DIR/progress_${SANITIZED_BRANCH}.md" diff --git a/.claude/skills/map-workflows-guide/SKILL.md b/.claude/skills/map-workflows-guide/SKILL.md index dc12a41..dbd253f 100644 --- a/.claude/skills/map-workflows-guide/SKILL.md +++ b/.claude/skills/map-workflows-guide/SKILL.md @@ -1,12 +1,22 @@ --- name: map-workflows-guide -description: Comprehensive guide for choosing the right MAP workflow based on task type and requirements +description: >- + Guide for choosing the right MAP workflow based on task type, risk level, + and token budget. Use when user asks "which workflow should I use", + "difference between map-fast and map-efficient", "when to use map-debug", + or compares MAP workflows. Do NOT use for actual workflow execution — + use /map-efficient, /map-fast, etc. instead. Do NOT use for CLI errors + (use map-cli-reference). version: 1.0 +metadata: + author: azalio + version: 3.1.0 + mcp-server: mem0 --- # MAP Workflows Guide -This skill helps you choose the optimal MAP workflow for your development tasks. MAP Framework provides 5 specialized workflows, each optimized for different scenarios with varying token costs, learning capabilities, and quality gates. +This skill helps you choose the optimal MAP workflow for your development tasks. MAP Framework provides **10 workflow commands**: 4 primary workflows (`/map-fast`, `/map-efficient`, `/map-debug`, `/map-debate`) and 6 supporting commands (`/map-review`, `/map-check`, `/map-plan`, `/map-release`, `/map-resume`, `/map-learn`). Each is optimized for different scenarios with varying token costs, learning capabilities, and quality gates. Two additional workflows (`/map-feature`, `/map-refactor`) are planned but not yet implemented. ## Quick Decision Tree @@ -21,12 +31,12 @@ Answer these 5 questions to find your workflow: YES → Use /map-debug (70-80% tokens, focused analysis) NO → Continue to question 3 -3. Are you refactoring existing code or restructuring modules? - YES → Use /map-refactor (70-80% tokens, dependency analysis) +3. Do stakeholders need documented reasoning and trade-off analysis? + YES → Use /map-debate (3x cost, Opus arbiter, explicit reasoning) NO → Continue to question 4 4. Is this critical infrastructure or security-sensitive code? - YES → Use /map-feature (100% tokens, maximum validation) + YES → Use /map-efficient (60-70% tokens, recommended default) NO → Continue to question 5 5. Is this a change you'll maintain long-term or that has non-trivial impact? @@ -38,14 +48,20 @@ Answer these 5 questions to find your workflow: ## Workflow Comparison Matrix -| Aspect | `/map-fast` | `/map-efficient` | `/map-feature` | `/map-debug` | `/map-refactor` | -|--------|-----------|-----------------|----------------|-------------|-----------------| -| **Token Cost** | 40-50% | **60-70%** | 100% (baseline) | 70-80% | 70-80% | -| **Learning** | ❌ None | ✅ Batched | ✅ Per-subtask | ✅ Per-subtask | ✅ Per-subtask | -| **Quality Gates** | Basic | Essential | All 8 agents | Focused | Focused | -| **Impact Analysis** | ❌ Skipped | ⚠️ Conditional | ✅ Always | ✅ Yes | ✅ Yes | -| **Best For** | Low-risk | **Production** | Critical | Bugs | Refactoring | -| **Recommendation** | Use sparingly | **DEFAULT** | High-risk | Issues | Changes | +| Aspect | `/map-fast` | `/map-efficient` | `/map-debug` | `/map-debate` | +|--------|-----------|-----------------|-------------|--------------| +| **Token Cost** | 40-50% | **60-70%** | 70-80% | ~3x baseline | +| **Learning** | ❌ None | ✅ Via /map-learn | ✅ Per-subtask | ✅ Via /map-learn | +| **Quality Gates** | Basic | Essential | Focused | Multi-variant | +| **Impact Analysis** | ❌ Skipped | ⚠️ Conditional | ✅ Yes | ⚠️ Conditional | +| **Multi-Variant** | ❌ Never | ⚠️ Optional (--self-moa) | ❌ Never | ✅ Always (3 variants) | +| **Synthesis Model** | N/A | Sonnet | N/A | **Opus** | +| **Best For** | Low-risk | **Production** | Bugs | Reasoning transparency | +| **Recommendation** | Use sparingly | **DEFAULT** | Issues | Complex decisions | + +> **Note:** `/map-feature` and `/map-refactor` are **planned but not yet implemented**. +> Use `/map-efficient` for critical features and refactoring tasks. +> See [Planned Workflows](#planned-workflows) below for details. --- @@ -61,12 +77,12 @@ Answer these 5 questions to find your workflow: **What you get:** - ✅ Full implementation (Actor generates code) - ✅ Basic validation (Monitor checks correctness) -- ✅ Quality check (Evaluator scores solution) +- ❌ NO quality scoring (Evaluator skipped) - ❌ NO impact analysis (Predictor skipped entirely) - ❌ NO learning (Reflector/Curator skipped) **Trade-offs:** -- Saves 50-60% tokens vs /map-feature +- Saves 50-60% tokens vs full pipeline (every agent per subtask) - mem0 never improves (no patterns stored) - Knowledge never accumulates - Minimal quality gates (only basic checks) @@ -102,14 +118,15 @@ Answer these 5 questions to find your workflow: **What you get:** - ✅ Full implementation (Actor) - ✅ Comprehensive validation (Monitor with feedback loops) -- ✅ Quality gates (Evaluator approval) - ✅ Impact analysis (Predictor runs conditionally) -- ✅ **Batched learning** (Reflector/Curator run once at end) +- ✅ Tests gate + Linter gate per subtask +- ✅ Final-Verifier (adversarial verification at end) +- ✅ **Learning via /map-learn** (Reflector/Curator, optional after workflow) **Optimization strategy:** - **Conditional Predictor:** Runs only if risk detected (security, breaking changes) - **Batched Learning:** Reflector/Curator run ONCE after all subtasks complete -- **Result:** 35-40% token savings vs /map-feature while preserving learning +- **Result:** 35-40% token savings vs full pipeline while preserving learning - **Same quality gates:** Monitor still validates each subtask **When Predictor runs:** @@ -140,58 +157,7 @@ Despite token optimization, preserves: --- -### 3. /map-feature — Critical Features 🏗️ - -**Use this when:** -- Implementing security-critical functionality -- First-time complex features requiring maximum validation -- High-risk changes affecting many systems -- You need complete assurance before production -- Learning is critical for future similar tasks - -**What you get:** -- ✅ Full implementation (Actor) -- ✅ Comprehensive validation (Monitor with loops) -- ✅ **Per-subtask impact analysis** (Predictor always runs) -- ✅ Quality gates (Evaluator always runs) -- ✅ **Per-subtask learning** (Reflector/Curator after each subtask) - -**Trade-offs:** -- 100% token cost (no optimization applied) -- Slower execution (maximum agent cycles) -- Maximum quality assurance -- Most comprehensive learning (frequent reflections) -- Best for high-stakes implementations - -**When this is required:** -- Authentication/authorization systems -- Payment processing -- Database schema changes -- Multi-service coordination -- Code that affects many dependencies - -**Example tasks:** -- "Implement secure JWT authentication system" -- "Refactor database schema for multi-tenancy" -- "Add payment processing via Stripe" -- "Build real-time notification system" - -**Command syntax:** -```bash -/map-feature [task description] -``` - -**Agent pipeline:** -``` -TaskDecomposer → Actor → Monitor → Predictor → -Evaluator → Reflector → Curator → [Next subtask] -``` - -**See also:** [resources/map-feature-deep-dive.md](resources/map-feature-deep-dive.md) - ---- - -### 4. /map-debug — Bug Fixes 🐛 +### 3. /map-debug — Bug Fixes 🐛 **Use this when:** - Fixing specific bugs or defects @@ -234,52 +200,27 @@ Evaluator → Reflector → Curator → [Next subtask] --- -### 5. /map-refactor — Code Restructuring 🔧 +### Planned Workflows -**Use this when:** -- Refactoring existing code for readability -- Improving code structure or design -- Cleaning up technical debt -- Renaming/reorganizing modules -- Extracting common logic +The following workflows are **planned but not yet implemented**. Use `/map-efficient` as a substitute for both. -**What you get:** -- ✅ Implementation (Actor) -- ✅ Validation (Monitor) -- ✅ **Dependency impact analysis** (Predictor focused on dependencies) -- ✅ Quality gates (Evaluator) -- ✅ Learning (Reflector/Curator) +#### /map-feature — Critical Features (PLANNED) -**Specialized for:** -- Breaking change detection -- Dependency tracking -- Migration planning -- Careful phased refactoring +Intended for security-critical and high-risk features requiring maximum validation (100% token cost, per-subtask learning, Predictor always runs). **Not yet implemented.** Use `/map-efficient` instead — it provides the same agent pipeline with conditional Predictor and batched learning. -**Example tasks:** -- "Refactor auth service to separate concerns" -- "Extract common validation logic into shared module" -- "Rename User model to Account throughout codebase" -- "Convert callback-based API to promise-based" +**Design reference:** [resources/map-feature-deep-dive.md](resources/map-feature-deep-dive.md) -**Command syntax:** -```bash -/map-refactor [refactoring description] -``` +#### /map-refactor — Code Restructuring (PLANNED) -**Impact analysis includes:** -- Which files/modules depend on changed code -- Potential breaking changes -- Migration strategy -- Scope of refactoring +Intended for refactoring with dependency-focused impact analysis and breaking change detection. **Not yet implemented.** Use `/map-efficient` instead — describe the refactoring intent in the task description for appropriate Predictor analysis. -**See also:** [resources/map-refactor-deep-dive.md](resources/map-refactor-deep-dive.md) +**Design reference:** [resources/map-refactor-deep-dive.md](resources/map-refactor-deep-dive.md) --- ## Understanding MAP Agents -MAP workflows orchestrate **8 specialized agents**, each with specific responsibilities: +MAP workflows orchestrate **12 specialized agents**, each with specific responsibilities: ### Execution & Validation Agents @@ -306,6 +247,7 @@ MAP workflows orchestrate **8 specialized agents**, each with specific responsib - Checks completeness - Approves/rejects solution - Feedback loop: Returns to Actor if score < threshold +- **Only in /map-debug, /map-review** (skipped in /map-efficient, /map-fast, /map-debate) ### Analysis Agents @@ -314,7 +256,7 @@ MAP workflows orchestrate **8 specialized agents**, each with specific responsib - Predicts side effects - Identifies risks and breaking changes - **Conditional in /map-efficient** (runs if risk detected) -- **Always in /map-feature** (runs per subtask) +- **Always in /map-debug** (focused analysis) ### Learning Agents @@ -323,8 +265,8 @@ MAP workflows orchestrate **8 specialized agents**, each with specific responsib - Extracts reusable patterns - Searches mem0 for existing knowledge via `mcp__mem0__map_tiered_search` - Prevents duplicate pattern storage -- **Batched in /map-efficient** (runs once at end) -- **Per-subtask in /map-feature** (extracts frequently) +- **Batched in /map-efficient** (runs once at end, via /map-learn) +- **Skipped in /map-fast** (no learning) **Curator** — Knowledge management - Stores patterns in mem0 via `mcp__mem0__map_add_pattern` @@ -341,6 +283,34 @@ MAP workflows orchestrate **8 specialized agents**, each with specific responsib - Validates examples - Verifies external dependency docs current +### Synthesis Agents + +**Debate-Arbiter** — Multi-variant cross-evaluation (MAP Debate) +- Cross-evaluates Actor variants with explicit reasoning +- Synthesizes optimal solution from multiple approaches +- Uses Opus model for reasoning transparency +- **Only in /map-debate workflow** + +**Synthesizer** — Solution synthesis +- Extracts decisions from multiple variants +- Generates unified code from best elements (Self-MoA) +- Merges insights across Actor outputs +- **Used in /map-efficient with --self-moa flag** + +### Discovery & Verification Agents + +**Research-Agent** — Codebase discovery +- Heavy codebase reading with compressed output +- Gathers context proactively before Actor implementation +- Prevents context pollution in implementation agents +- **Used in /map-plan, /map-efficient, /map-debug** + +**Final-Verifier** — Adversarial verification (Ralph Loop) +- Root cause analysis via adversarial testing +- Terminal verification after all other agents +- Ensures no regressions or overlooked issues +- **Used in /map-check, /map-efficient** + --- ## Decision Flowchart @@ -364,26 +334,10 @@ START: What type of development task? │ │ NO ↓ │ -├─────────────────────────────────────┐ -│ Refactoring existing code? │ -│ (Improving structure, renaming) │ -├─────────────────────────────────────┘ -│ YES → /map-refactor (70-80% tokens, dependency tracking) -│ -│ NO ↓ -│ -├─────────────────────────────────────┐ -│ Critical/high-risk feature? │ -│ (Auth, payments, security, database)│ -├─────────────────────────────────────┘ -│ YES → /map-feature (100% tokens, full validation) -│ -│ NO ↓ -│ └─────────────────────────────────────┐ - Standard production feature? │ - (/map-efficient recommended) ←──────┘ - YES → /map-efficient (60-70% tokens, RECOMMENDED) + Everything else (features, │ + refactoring, critical code) ←──────┘ + → /map-efficient (60-70% tokens, RECOMMENDED) ``` --- @@ -410,22 +364,9 @@ Avoid /map-fast for: - Broad refactors or multi-module changes - High uncertainty requirements -**Q: What's the practical difference between /map-feature and /map-efficient?** - -A: Token cost vs learning frequency: +**Q: What about /map-feature and /map-refactor?** -**/map-feature:** Maximum assurance -- Predictor runs after EVERY subtask (100% analysis) -- Reflector/Curator run after EVERY subtask -- Cost: 100% tokens, slowest execution -- Best for: First implementations, critical systems - -**/map-efficient:** Smart optimization -- Predictor runs ONLY when risk detected (conditional) -- Reflector/Curator run ONCE at end (batched) -- Cost: 60-70% tokens, faster execution -- Same learning: Patterns still captured at end -- Best for: Standard features, most development +A: These are **planned but not yet implemented**. Use `/map-efficient` for all feature development and refactoring tasks. `/map-efficient` provides the full agent pipeline (Actor, Monitor, conditional Predictor, Tests/Linter gates, Final-Verifier) with optional learning via `/map-learn`. Describe the risk level and refactoring intent in your task description for appropriate Predictor analysis. **Q: Can I switch workflows mid-task?** @@ -477,9 +418,9 @@ For detailed information on each workflow: - **[map-fast Deep Dive](resources/map-fast-deep-dive.md)** — Token breakdown, skip conditions, risks - **[map-efficient Deep Dive](resources/map-efficient-deep-dive.md)** — Optimization strategy, Predictor conditions, batching -- **[map-feature Deep Dive](resources/map-feature-deep-dive.md)** — Full pipeline, cost analysis, when required - **[map-debug Deep Dive](resources/map-debug-deep-dive.md)** — Debugging strategies, error analysis, best practices -- **[map-refactor Deep Dive](resources/map-refactor-deep-dive.md)** — Impact analysis, breaking changes, migration planning +- **[map-feature Deep Dive](resources/map-feature-deep-dive.md)** — Design reference (PLANNED, not yet implemented) +- **[map-refactor Deep Dive](resources/map-refactor-deep-dive.md)** — Design reference (PLANNED, not yet implemented) Agent & system details: @@ -490,16 +431,16 @@ Agent & system details: ## Real-World Examples -### Example 1: Choosing between /map-efficient and /map-feature +### Example 1: Choosing /map-efficient for a critical feature **Task:** "Add OAuth2 authentication" **Analysis:** -- Affects security ✓ (high-risk indicator) -- Affects multiple modules ✓ (breaking changes possible) -- First implementation of OAuth2 ✓ (high complexity) +- Affects security (high-risk indicator) +- Affects multiple modules (breaking changes possible) +- First implementation of OAuth2 (high complexity) -**Decision:** `/map-feature` (worth 100% token cost for critical feature) +**Decision:** `/map-efficient` — describe the security-sensitive nature in the task description. Predictor will trigger conditionally on security-related subtasks. ### Example 2: Choosing /map-debug @@ -540,7 +481,7 @@ MAP: 🎯 Suggests /map-efficient ``` MAP: "Is this for production?" User: "Yes, but critical feature" -MAP: 🎯 Suggests /map-feature instead +MAP: 🎯 Suggests /map-efficient with --self-moa instead ``` **Direct command:** @@ -555,7 +496,7 @@ MAP: 📚 Loads this skill for context 1. **Default to /map-efficient** — It's the recommended choice for 80% of tasks 2. **Use /map-fast sparingly** — Only for small, low-risk changes with clear scope -3. **Reserve /map-feature for critical paths** — Don't overuse, save for auth/payments/security +3. **Use /map-efficient for critical paths** — Describe risk context in the task description for appropriate Predictor triggers 4. **Monitor pattern growth** — Use mem0 search to see learning improving 5. **Trust the optimization** — /map-efficient preserves quality while cutting token usage 6. **Review deep dives** — When in doubt, check the appropriate deep-dive resource @@ -566,11 +507,60 @@ MAP: 📚 Loads this skill for context ## Next Steps 1. **First time using MAP?** Start with `/map-efficient` -2. **Have a critical feature?** See [map-feature-deep-dive.md](resources/map-feature-deep-dive.md) +2. **Have a critical feature?** Use `/map-efficient` with risk context in the task description 3. **Debugging an issue?** See [map-debug-deep-dive.md](resources/map-debug-deep-dive.md) 4. **Understanding agents?** See [Agent Architecture](resources/agent-architecture.md) --- +## Examples + +### Example 1: Choosing a workflow for a new feature + +**User says:** "I need to add JWT authentication to the API" + +**Actions:** +1. Assess risk level — security-sensitive (high-risk indicator) +2. Check if first implementation — yes, OAuth/JWT is new +3. Multiple modules affected — auth middleware, user service, token storage + +**Result:** Recommend `/map-efficient` — describe the security context in the task. Predictor will trigger on security-sensitive subtasks. Batched learning captures patterns at the end. + +### Example 2: Quick fix with clear scope + +**User says:** "Update the error message in the login form" + +**Actions:** +1. Assess risk — low, localized text change +2. Check blast radius — single file, no dependencies +3. No security implications + +**Result:** Recommend `/map-fast` — small, low-risk change with clear acceptance criteria. No learning needed. + +### Example 3: Debugging a test failure + +**User says:** "Tests in auth.test.ts are failing after the last merge" + +**Actions:** +1. Identify task type — debugging/fixing specific issue +2. Need root cause analysis — yes, regression after merge +3. Not a new feature or refactor + +**Result:** Recommend `/map-debug` — focused on diagnosing failures with root cause analysis and regression prevention. + +--- + +## Troubleshooting + +| Issue | Cause | Solution | +|-------|-------|----------| +| Wrong workflow chosen mid-task | Cannot switch workflows during execution | Complete current workflow, then restart with correct one | +| Predictor never runs in /map-efficient | Subtasks assessed as low-risk | Expected behavior; Predictor is conditional. Use /map-debug for guaranteed analysis | +| No patterns stored after /map-fast | /map-fast skips learning agents | By design — use /map-efficient + /map-learn for pattern accumulation | +| mem0 search returns empty | mem0 MCP not configured or namespaces mismatch | Verify mem0 in `.claude/mcp_config.json`, check namespace conventions | +| Skill suggests wrong workflow | Description trigger mismatch | Check skill-rules.json triggers; refine query wording | + +--- + **Skill Version:** 1.0 **Last Updated:** 2025-11-03 **Recommended Reading Time:** 5-10 minutes diff --git a/.claude/skills/map-workflows-guide/resources/agent-architecture.md b/.claude/skills/map-workflows-guide/resources/agent-architecture.md index 96173ed..8a158fc 100644 --- a/.claude/skills/map-workflows-guide/resources/agent-architecture.md +++ b/.claude/skills/map-workflows-guide/resources/agent-architecture.md @@ -1,6 +1,6 @@ # Agent Architecture -MAP Framework orchestrates 8 specialized agents in a coordinated workflow. +MAP Framework orchestrates 12 specialized agents in a coordinated workflow. ## Agent Categories @@ -29,8 +29,8 @@ MAP Framework orchestrates 8 specialized agents in a coordinated workflow. - **Role:** Quality scoring and final approval - **Input:** Actor + Monitor results - **Output:** Quality score (0-10), approve/reject decision -- **When it runs:** /map-fast, /map-feature (per subtask), /map-debug, /map-refactor -- **Skipped in:** /map-efficient (Monitor provides sufficient validation) +- **When it runs:** /map-debug, /map-review +- **Skipped in:** /map-efficient, /map-fast (Monitor provides sufficient validation) ### Analysis @@ -39,9 +39,9 @@ MAP Framework orchestrates 8 specialized agents in a coordinated workflow. - **Input:** Planned changes - **Output:** Affected files, breaking changes, risk assessment - **When it runs:** - - /map-feature: Always (per subtask) - /map-efficient: Conditional (only if Monitor flags high risk) - - /map-debug, /map-refactor: Always (focused analysis) + - /map-debug: Always (focused analysis) + - /map-debate: Conditional (same as /map-efficient) - /map-fast: Never (skipped) ### Learning @@ -51,8 +51,7 @@ MAP Framework orchestrates 8 specialized agents in a coordinated workflow. - **Input:** All agent outputs for subtask(s) - **Output:** Insights, patterns discovered, pattern updates - **When it runs:** - - /map-feature: Per subtask - - /map-efficient, /map-debug, /map-refactor: Batched (once at end) + - /map-efficient, /map-debug, /map-debate: Batched (once at end, via /map-learn) - /map-fast: Never (skipped) - **MCP Tool:** Uses `mcp__mem0__map_tiered_search` to check for existing patterns @@ -73,6 +72,34 @@ MAP Framework orchestrates 8 specialized agents in a coordinated workflow. - **Output:** Completeness assessment, dependency analysis - **When it runs:** On-demand (not part of standard workflows) +### Synthesis + +**9. Debate-Arbiter** +- **Role:** Cross-evaluates Actor variants with explicit reasoning +- **Input:** Multiple Actor outputs (variants) +- **Output:** Synthesized optimal solution with reasoning trace +- **When it runs:** /map-debate (per subtask, uses Opus model) + +**10. Synthesizer** +- **Role:** Extracts decisions from variants and generates unified code (Self-MoA) +- **Input:** Multiple Actor outputs +- **Output:** Merged implementation combining best elements +- **When it runs:** /map-efficient with --self-moa flag + +### Discovery & Verification + +**11. Research-Agent** +- **Role:** Heavy codebase reading with compressed output +- **Input:** Research question or exploration goal +- **Output:** Compressed context for implementation agents +- **When it runs:** /map-plan, /map-efficient, /map-debug (before Actor) + +**12. Final-Verifier** +- **Role:** Adversarial verification with Root Cause Analysis (Ralph Loop) +- **Input:** Complete implementation after all other agents +- **Output:** Verification verdict, regression analysis +- **When it runs:** /map-check, /map-efficient (terminal verification) + --- ## Orchestration Patterns @@ -80,8 +107,8 @@ MAP Framework orchestrates 8 specialized agents in a coordinated workflow. ### Linear Pipeline (map-fast) ``` -TaskDecomposer → Actor → Monitor → Evaluator → Done -(No learning, no impact analysis) +TaskDecomposer → Actor → Monitor → Apply → Done +(No Evaluator, no Predictor, no learning) ``` ### Conditional Pipeline (map-efficient) @@ -90,23 +117,25 @@ TaskDecomposer → Actor → Monitor → Evaluator → Done TaskDecomposer ↓ For each subtask: - Actor → Monitor → [Predictor if high risk] → Apply changes + Actor → Monitor → [Predictor if high risk] → Tests → Linter → Apply ↓ - Batch learning: - Reflector (all subtasks) → Curator → Done + Final-Verifier (adversarial verification of entire goal) + ↓ + Done! Optional: /map-learn → Reflector → Curator ``` -### Full Pipeline (map-feature) +### Multi-Variant Pipeline (map-debate) ``` TaskDecomposer ↓ For each subtask: - Actor → Monitor → Predictor → Evaluator - ↓ if approved - Reflector → Curator → Apply changes + Actor×3 → Monitor×3 → debate-arbiter (Opus) + ↓ synthesized + Monitor → [Predictor if high risk] → Apply changes ↓ - Done + Batch learning (via /map-learn): + Reflector (all subtasks) → Curator → Done ``` --- @@ -210,10 +239,14 @@ Agents communicate via structured JSON: | TaskDecomposer | ~1.5K | Once | All workflows | | Actor | ~2-3K | Per subtask | All workflows | | Monitor | ~1K | Per Actor output | All workflows | -| Evaluator | ~0.8K | Per subtask | map-fast, map-feature | +| Evaluator | ~0.8K | Per subtask | map-debug, map-review | | Predictor | ~1.5K | Per subtask or conditional | Varies | | Reflector | ~2K | Per subtask or batched | Varies | | Curator | ~1.5K | After Reflector | Varies | +| Debate-Arbiter | ~3-4K | Per subtask | map-debate only | +| Synthesizer | ~2K | Per subtask | map-efficient (--self-moa) | +| Research-Agent | ~2-3K | Once (before Actor) | map-plan, map-efficient, map-debug | +| Final-Verifier | ~2K | Once (terminal) | map-check, map-efficient | **map-efficient savings:** - Skip Evaluator: ~0.8K per subtask @@ -228,7 +261,7 @@ Agents communicate via structured JSON: To add a custom agent: 1. Create `.claude/agents/my-agent.md` with prompt template -2. Add to workflow command (e.g., `.claude/commands/map-feature.md`) +2. Add to workflow command (e.g., `.claude/commands/map-efficient.md`) 3. Define when it runs (before/after which agents) 4. Specify input/output format diff --git a/.claude/skills/map-workflows-guide/resources/map-efficient-deep-dive.md b/.claude/skills/map-workflows-guide/resources/map-efficient-deep-dive.md index 5797b92..1e69fbf 100644 --- a/.claude/skills/map-workflows-guide/resources/map-efficient-deep-dive.md +++ b/.claude/skills/map-workflows-guide/resources/map-efficient-deep-dive.md @@ -36,27 +36,28 @@ Subtask 3: Add unit tests (tests/auth.test.ts) ### Reflector/Curator: Batched Learning -**Standard workflow (/map-feature):** +**Full pipeline (theoretical baseline):** ``` Subtask 1 → Actor → Monitor → Predictor → Evaluator → Reflector → Curator Subtask 2 → Actor → Monitor → Predictor → Evaluator → Reflector → Curator Subtask 3 → Actor → Monitor → Predictor → Evaluator → Reflector → Curator ``` -Result: 3 × Reflector/Curator cycles +Result: 3 × (Predictor + Evaluator + Reflector + Curator) cycles **Optimized workflow (/map-efficient):** ``` -Subtask 1 → Actor → Monitor → [Predictor?] → Evaluator -Subtask 2 → Actor → Monitor → [Predictor?] → Evaluator -Subtask 3 → Actor → Monitor → [Predictor?] → Evaluator +Subtask 1 → Actor → Monitor → [Predictor if high risk] → Apply +Subtask 2 → Actor → Monitor → [Predictor if high risk] → Apply +Subtask 3 → Actor → Monitor → [Predictor if high risk] → Apply ↓ - Reflector (analyzes ALL subtasks) + Final-Verifier (adversarial verification) ↓ - Curator (consolidates patterns) + Done! Optionally run /map-learn: + Reflector (analyzes ALL subtasks) → Curator (consolidates patterns) ``` -Result: 1 × Reflector/Curator cycle +Result: No Evaluator, no per-subtask Reflector/Curator. Learning decoupled to /map-learn. -**Token savings:** 35-40% vs /map-feature +**Token savings:** 35-40% vs full pipeline --- @@ -71,7 +72,7 @@ Result: 1 × Reflector/Curator cycle - Most development work (80% of tasks) ❌ **Don't use for:** -- Critical infrastructure (use /map-feature) +- Critical infrastructure (use /map-efficient with --self-moa or /map-debate) - Small, low-risk changes (use /map-fast) - Simple bug fixes (use /map-debug) @@ -81,16 +82,16 @@ Result: 1 × Reflector/Curator cycle **Myth:** "Optimized workflows sacrifice quality" -**Reality:** /map-efficient preserves all quality gates: -- ✅ Monitor validates every subtask -- ✅ Evaluator scores every implementation -- ✅ Predictor runs when needed (conditional) -- ✅ Reflector analyzes complete context -- ✅ Curator consolidates all patterns +**Reality:** /map-efficient preserves essential quality gates: +- ✅ Monitor validates every subtask (correctness gate) +- ✅ Predictor runs when needed (conditional impact analysis) +- ✅ Tests gate and linter gate run per subtask +- ✅ Final-Verifier checks entire goal at end (adversarial verification) +- ✅ Learning available via /map-learn after workflow completes -**What's optimized:** -- Frequency (when agents run) -- NOT functionality (what agents do) +**What's optimized (intentionally omitted per-subtask):** +- Evaluator — Monitor validates correctness directly +- Reflector/Curator — decoupled to /map-learn (optional, run after workflow) --- @@ -115,38 +116,40 @@ ST-1: Pagination params ├─ Actor: Modify routes/posts.ts ├─ Monitor: ✅ Valid ├─ Predictor: ⏭️ SKIPPED (low risk) -└─ Evaluator: ✅ Approved (score: 8/10) +├─ Tests gate: ✅ Passed +└─ Linter gate: ✅ Passed ST-2: Service update ├─ Actor: Modify services/PostService.ts ├─ Monitor: ✅ Valid ├─ Predictor: ✅ RAN (affects API contract) │ └─ Impact: Breaking change if clients expect all posts -├─ Evaluator: ✅ Approved (score: 9/10) +├─ Tests gate: ✅ Passed └─ Note: "Add API versioning or deprecation notice" ST-3: Integration tests ├─ Actor: Add tests/posts.integration.test.ts ├─ Monitor: ✅ Valid (tests pass) ├─ Predictor: ⏭️ SKIPPED (test file) -└─ Evaluator: ✅ Approved (score: 8/10) - -Reflector (batched): -├─ Analyzed: 3 subtasks -├─ Searched mem0: Found similar pagination patterns -└─ Extracted: - - Pagination parameter pattern (offset/limit) - - API versioning consideration - - Integration test structure - -Curator (batched): -├─ Checked duplicates: 2 similar bullets found -├─ Added: 1 new bullet (API pagination pattern) -└─ Updated: 1 existing bullet (test coverage++) +├─ Tests gate: ✅ Passed +└─ Linter gate: ✅ Passed + +Final-Verifier: ✅ All subtasks verified, goal achieved + +Optional /map-learn: + Reflector (batched): + ├─ Analyzed: 3 subtasks + ├─ Searched mem0: Found similar pagination patterns + └─ Extracted: pagination pattern, API versioning, test structure + + Curator (batched): + ├─ Checked duplicates: 2 similar bullets found + ├─ Added: 1 new bullet (API pagination pattern) + └─ Updated: 1 existing bullet (test coverage++) ``` **Token usage:** -- /map-feature: ~12k tokens +- Full pipeline: ~12k tokens - /map-efficient: ~7.5k tokens - **Savings: 37.5%** diff --git a/.claude/skills/map-workflows-guide/resources/map-fast-deep-dive.md b/.claude/skills/map-workflows-guide/resources/map-fast-deep-dive.md index 5061426..6cb71aa 100644 --- a/.claude/skills/map-workflows-guide/resources/map-fast-deep-dive.md +++ b/.claude/skills/map-workflows-guide/resources/map-fast-deep-dive.md @@ -30,6 +30,11 @@ ### Agents NOT Called +**Evaluator (Quality Scoring)** +- No quality scoring (0-10 scale) +- No approval/rejection gate +- Monitor handles basic correctness only + **Predictor (Impact Analysis)** - No dependency analysis - Breaking changes undetected @@ -51,7 +56,7 @@ - Basic implementation ✅ - Correctness validation ✅ -**Result:** Functional code, but zero learning +**Result:** Functional code, but zero learning and no quality scoring --- @@ -62,13 +67,13 @@ | TaskDecomposer | ~1.5K | ✅ Runs | | Actor | ~2-3K | ✅ Runs | | Monitor | ~1K | ✅ Runs | -| Evaluator | ~0.8K | ✅ Runs | +| Evaluator | ~0.8K | ❌ Skipped | | Predictor | ~1.5K | ❌ Skipped | | Reflector | ~2K | ❌ Skipped | | Curator | ~1.5K | ❌ Skipped | -**Total saved:** ~5K per subtask -**Percentage:** 40-50% vs /map-feature +**Total saved:** ~5.8K per subtask +**Percentage:** 40-50% vs full pipeline --- @@ -88,10 +93,10 @@ Next step: If scope grows, switch to /map-efficient ``` TaskDecomposer: 2 subtasks ST-1: Setup React Query client - Actor → Monitor → Evaluator → Apply + Actor → Monitor → Apply ST-2: Test with one API endpoint - Actor → Monitor → Evaluator → Apply -Done. No Reflector, no Curator, no patterns learned. + Actor → Monitor → Apply +Done. No Evaluator, no Reflector, no Curator, no patterns learned. ``` **Appropriate because:** @@ -119,7 +124,7 @@ Risk: High (security, breaking changes) 3. No Curator → Team doesn't learn from mistakes 4. High risk for under-validation mindset -**Correct choice:** `/map-feature` (critical infrastructure) +**Correct choice:** `/map-efficient` (critical infrastructure) --- @@ -190,7 +195,7 @@ Why? - Faster for tiny tasks (<50 lines) - Use when MAP overhead doesn't make sense -**3. /map-feature** +**3. /map-efficient or /map-debate** - For high-risk changes - Security or infrastructure work diff --git a/.claude/skills/map-workflows-guide/resources/map-feature-deep-dive.md b/.claude/skills/map-workflows-guide/resources/map-feature-deep-dive.md index 1e0c9e4..7ce5166 100644 --- a/.claude/skills/map-workflows-guide/resources/map-feature-deep-dive.md +++ b/.claude/skills/map-workflows-guide/resources/map-feature-deep-dive.md @@ -1,5 +1,9 @@ # /map-feature Deep Dive +> **STATUS: PLANNED — NOT YET IMPLEMENTED.** +> This workflow is designed but not yet available as a command. +> Use `/map-efficient` for all feature development, including critical/high-risk features. + ## When to Use **Critical features requiring maximum confidence:** diff --git a/.claude/skills/map-workflows-guide/resources/map-refactor-deep-dive.md b/.claude/skills/map-workflows-guide/resources/map-refactor-deep-dive.md index d9c3510..2faed47 100644 --- a/.claude/skills/map-workflows-guide/resources/map-refactor-deep-dive.md +++ b/.claude/skills/map-workflows-guide/resources/map-refactor-deep-dive.md @@ -1,5 +1,9 @@ # /map-refactor Deep Dive +> **STATUS: PLANNED — NOT YET IMPLEMENTED.** +> This workflow is designed but not yet available as a command. +> Use `/map-efficient` for refactoring tasks. + ## When to Use **Code restructuring without behavior changes:** diff --git a/.claude/skills/map-workflows-guide/scripts/validate-workflow-choice.py b/.claude/skills/map-workflows-guide/scripts/validate-workflow-choice.py new file mode 100755 index 0000000..6a576a0 --- /dev/null +++ b/.claude/skills/map-workflows-guide/scripts/validate-workflow-choice.py @@ -0,0 +1,159 @@ +#!/usr/bin/env python3 +"""Validate that a workflow choice matches task characteristics. + +Usage: + python validate-workflow-choice.py --workflow --risk --size --type + +Example: + python validate-workflow-choice.py --workflow map-efficient --risk medium --size medium --type feature + python validate-workflow-choice.py --workflow map-fast --risk high --size large --type security + +Exit codes: + 0 - Workflow choice is appropriate + 1 - Workflow choice is suboptimal (warning) + 2 - Workflow choice is inappropriate (error) +""" + +import argparse +import json +import sys + +# Workflow appropriateness rules +WORKFLOW_RULES = { + "map-fast": { + "allowed_risk": ["low"], + "allowed_size": ["small"], + "allowed_types": ["fix", "tweak", "maintenance", "docs"], + "forbidden_types": ["security", "auth", "payment", "database-schema"], + }, + "map-efficient": { + "allowed_risk": ["low", "medium", "high"], + "allowed_size": ["small", "medium", "large"], + "allowed_types": [ + "feature", + "enhancement", + "fix", + "tweak", + "maintenance", + "docs", + "security", + "auth", + "payment", + "database-schema", + "infrastructure", + "refactor", + "restructure", + "rename", + "extract", + "cleanup", + ], + "forbidden_types": [], + }, + "map-debug": { + "allowed_risk": ["low", "medium", "high"], + "allowed_size": ["small", "medium", "large"], + "allowed_types": ["bug", "fix", "test-failure", "error", "regression"], + "forbidden_types": ["feature", "refactor"], + }, +} + +# Recommendations for risky combinations +RISK_OVERRIDES = { + ("map-fast", "high"): "map-efficient", + ("map-fast", "medium"): "map-efficient", +} + + +def validate(workflow: str, risk: str, size: str, task_type: str) -> dict: + """Validate workflow choice against task characteristics. + + Returns dict with: + valid: bool + level: "ok" | "warning" | "error" + message: str + recommendation: str | None + """ + if workflow not in WORKFLOW_RULES: + return { + "valid": False, + "level": "error", + "message": f"Unknown workflow: {workflow}", + "recommendation": "map-efficient", + } + + rules = WORKFLOW_RULES[workflow] + issues = [] + + # Check risk level + if risk not in rules["allowed_risk"]: + issues.append(f"Risk level '{risk}' is too high for {workflow}") + + # Check size + if size not in rules["allowed_size"]: + issues.append(f"Size '{size}' is not suitable for {workflow}") + + # Check forbidden types + if task_type in rules["forbidden_types"]: + issues.append(f"Task type '{task_type}' is forbidden for {workflow}") + + # Check risk overrides + override_key = (workflow, risk) + recommendation = RISK_OVERRIDES.get(override_key) + + if issues: + level = "error" if any("forbidden" in i for i in issues) else "warning" + return { + "valid": False, + "level": level, + "message": "; ".join(issues), + "recommendation": recommendation or "map-efficient", + } + + return { + "valid": True, + "level": "ok", + "message": f"Workflow '{workflow}' is appropriate for {risk}-risk {size} {task_type} task", + "recommendation": None, + } + + +def main(): + parser = argparse.ArgumentParser(description="Validate MAP workflow choice") + parser.add_argument( + "--workflow", + required=True, + choices=list(WORKFLOW_RULES.keys()), + help="Chosen workflow", + ) + parser.add_argument( + "--risk", + required=True, + choices=["low", "medium", "high"], + help="Task risk level", + ) + parser.add_argument( + "--size", + required=True, + choices=["small", "medium", "large"], + help="Task size", + ) + parser.add_argument("--type", required=True, dest="task_type", help="Task type") + parser.add_argument("--json", action="store_true", help="Output as JSON") + + args = parser.parse_args() + result = validate(args.workflow, args.risk, args.size, args.task_type) + + if args.json: + print(json.dumps(result, indent=2)) + else: + status = {"ok": "OK", "warning": "WARNING", "error": "ERROR"}[result["level"]] + print(f"[{status}] {result['message']}") + if result["recommendation"]: + print(f" Recommendation: Use {result['recommendation']} instead") + + exit_codes = {"ok": 0, "warning": 1, "error": 2} + sys.exit(exit_codes[result["level"]]) + + +if __name__ == "__main__": + main() diff --git a/.claude/skills/skill-rules.json b/.claude/skills/skill-rules.json index c7eda29..20aac1d 100644 --- a/.claude/skills/skill-rules.json +++ b/.claude/skills/skill-rules.json @@ -19,11 +19,58 @@ ], "intentPatterns": [ "(which|what).*?(workflow|mode).*?(use|choose)", - "(difference|compare).*?(map-fast|map-efficient|map-feature)", + "(difference|compare).*?(map-fast|map-efficient|map-debug)", "(when|how).*(choose|use|pick).*(workflow|map-\\w+)", "explain.*?(workflow|map-fast|map-efficient)" ] } + }, + "map-planning": { + "type": "domain", + "enforcement": "suggest", + "priority": "medium", + "description": "File-based planning with branch-scoped task tracking", + "promptTriggers": { + "keywords": [ + "create plan", + "task plan", + "track progress", + "planning", + ".map directory", + "show task status", + "resume work" + ], + "intentPatterns": [ + "(create|make|start).*(plan|planning)", + "(track|show|check).*(progress|status|tasks)", + "(resume|continue).*(work|task|plan)", + "\\.map.*(directory|folder|files)" + ] + } + }, + "map-cli-reference": { + "type": "domain", + "enforcement": "suggest", + "priority": "high", + "description": "CLI and MCP error corrections", + "promptTriggers": { + "keywords": [ + "mapify command", + "mapify error", + "no such command", + "no such option", + "mem0 mcp", + "validate graph", + "mapify init", + "mapify check" + ], + "intentPatterns": [ + "(mapify|mem0).*(error|command|help|usage)", + "(no such).*(command|option)", + "validate.*(graph|dependency)", + "(how to|how do).*(mapify|mem0)" + ] + } } } } diff --git a/.claude/workflow-rules.json b/.claude/workflow-rules.json index c6933c9..01df164 100644 --- a/.claude/workflow-rules.json +++ b/.claude/workflow-rules.json @@ -31,9 +31,9 @@ ] } }, - "map-feature": { + "map-efficient": { "priority": "high", - "description": "Implement new features (critical, full validation)", + "description": "Production features, refactoring, critical code (recommended default, 60-70% tokens)", "promptTriggers": { "keywords": [ "implement", @@ -41,48 +41,47 @@ "new feature", "create", "build", - "critical" - ], - "intentPatterns": [ - "(implement|add|create|build).*?(feature|functionality)", - "new.*?(feature|component|module)", - "critical.*?(feature|implementation)" - ] - } - }, - "map-efficient": { - "priority": "high", - "description": "Production features (optimized workflow, 60-70% tokens)", - "promptTriggers": { - "keywords": [ + "critical", "production", "optimize", "enhance", "improve", - "update feature" + "update feature", + "refactor", + "restructure", + "reorganize", + "clean up", + "improve structure" ], "intentPatterns": [ + "(implement|add|create|build).*?(feature|functionality)", + "new.*?(feature|component|module)", + "critical.*?(feature|implementation)", "(optimize|enhance|improve).*?(feature|code|implementation)", "production.*?(feature|deploy)", - "update.*?(feature|functionality)" + "update.*?(feature|functionality)", + "(refactor|restructure|reorganize).*?(code|component|module)", + "clean.*?up.*?(code|structure)", + "improve.*?(structure|architecture)" ] } }, - "map-refactor": { + "map-debate": { "priority": "medium", - "description": "Refactor code, improve structure", + "description": "Multi-variant synthesis with Opus arbiter for reasoning transparency", "promptTriggers": { "keywords": [ - "refactor", - "restructure", - "reorganize", - "clean up", - "improve structure" + "debate", + "compare approaches", + "trade-offs", + "reasoning", + "multiple variants", + "architectural decision" ], "intentPatterns": [ - "(refactor|restructure|reorganize).*?(code|component|module)", - "clean.*?up.*?(code|structure)", - "improve.*?(structure|architecture)" + "(compare|debate|weigh).*?(approaches|options|trade-offs)", + "(need|want).*?(reasoning|transparency|justification)", + "(architectural|design).*?(decision|choice)" ] } }, diff --git a/.map/scripts/diagnostics.py b/.map/scripts/diagnostics.py index 61a7717..84f439f 100644 --- a/.map/scripts/diagnostics.py +++ b/.map/scripts/diagnostics.py @@ -36,7 +36,13 @@ def get_branch_name() -> str: check=False, ) if result.returncode == 0: - return result.stdout.strip().replace("/", "-") + branch = result.stdout.strip() + sanitized = branch.replace("/", "-") + sanitized = re.sub(r"[^a-zA-Z0-9_.-]", "-", sanitized) + sanitized = re.sub(r"-+", "-", sanitized).strip("-") + if ".." in sanitized or sanitized.startswith("."): + return "default" + return sanitized or "default" except Exception: pass return "default" diff --git a/.map/scripts/map_orchestrator.py b/.map/scripts/map_orchestrator.py index 45890d4..66db1e8 100755 --- a/.map/scripts/map_orchestrator.py +++ b/.map/scripts/map_orchestrator.py @@ -12,7 +12,7 @@ ARCHITECTURE: ┌─────────────────────────────────────────────────────────────┐ - │ map-efficient.md (150 lines) │ + │ map-efficient.md (~540 lines) │ │ ├─> 1. Call get_next_step() → returns step instruction │ │ ├─> 2. Execute step (Actor/Monitor/mem0/etc) │ │ ├─> 3. Call validate_step() → checks completion │ @@ -35,7 +35,7 @@ "pending_steps": ["2.1_MEM0_SEARCH", "2.3_ACTOR", "2.4_MONITOR", ...] } -STEP PHASES (14 total): +STEP PHASES (16 total): 1.0 DECOMPOSE - task-decomposer agent 1.5 INIT_PLAN - Generate task_plan.md 1.55 REVIEW_PLAN - User review + explicit approval checkpoint @@ -46,9 +46,8 @@ 2.2 RESEARCH - research-agent (conditional) 2.3 ACTOR - Actor agent implementation 2.4 MONITOR - Monitor validation - 2.5 RETRY_LOOP - Retry on Monitor failure 2.6 PREDICTOR - Impact analysis (conditional) - 2.7 APPLY_CHANGES - Write/Edit tools + 2.7 UPDATE_STATE - Mark subtask progress 2.8 TESTS_GATE - Run tests 2.9 LINTER_GATE - Run linter 2.10 VERIFY_ADHERENCE - Self-audit checkpoint @@ -112,9 +111,8 @@ "2.2": "RESEARCH", "2.3": "ACTOR", "2.4": "MONITOR", - "2.5": "RETRY_LOOP", "2.6": "PREDICTOR", - "2.7": "APPLY_CHANGES", + "2.7": "UPDATE_STATE", "2.8": "TESTS_GATE", "2.9": "LINTER_GATE", "2.10": "VERIFY_ADHERENCE", @@ -141,6 +139,16 @@ "2.11", ] +# Steps that require evidence files from agents before validation. +# Format: step_id -> (agent_phase, always_required) +# If always_required is False, evidence is only checked when the step +# appears in pending_steps (i.e., it wasn't skipped). +EVIDENCE_REQUIRED = { + "2.3": ("actor", True), # Always required + "2.4": ("monitor", True), # Always required + "2.6": ("predictor", False), # Only when 2.6 is in pending_steps +} + @dataclass class StepState: @@ -261,7 +269,7 @@ def get_step_instruction(step_id: str, state: StepState) -> str: "into ≤20 atomic subtasks with validation criteria." ), "1.5": ( - "Generate .map/task_plan_.md from decomposer blueprint. " + "Generate .map//task_plan_.md from decomposer blueprint. " "Include Goal, Current Phase, and status for each subtask." ), "1.55": ( @@ -294,19 +302,25 @@ def get_step_instruction(step_id: str, state: StepState) -> str: ), "2.3": ( f"Call Task(subagent_type='actor') to implement subtask " - f"{state.current_subtask_id}. Pass XML packet and context patterns." + f"{state.current_subtask_id}. Pass XML packet and context patterns. " + f"Actor MUST write evidence file: " + f".map//evidence/actor_{state.current_subtask_id}.json" ), "2.4": ( "Call Task(subagent_type='monitor') to validate Actor output. " - "Check correctness, security, standards, and tests." + "Check correctness, security, standards, and tests. " + f"Monitor MUST write evidence file: " + f".map//evidence/monitor_{state.current_subtask_id}.json" ), "2.6": ( "Call Task(subagent_type='predictor') for impact analysis " - "(required for medium/high risk subtasks)." + "(required for medium/high risk subtasks). " + f"Predictor MUST write evidence file: " + f".map//evidence/predictor_{state.current_subtask_id}.json" ), "2.7": ( - "Apply Actor's changes using Edit/Write tools. " - "GATE: Only allowed if Monitor.valid === true." + "Update workflow state to mark subtask progress. " + "Code was already applied by Actor and validated by Monitor." ), "2.8": ( "Run tests using pytest/npm test/go test/cargo test. " @@ -363,7 +377,8 @@ def get_next_step(branch: str) -> Dict: state.current_step_id = "2.0" state.current_step_phase = "XML_PACKET" # Reset to subtask-level steps (skip global setup steps) - state.pending_steps = STEP_ORDER[3:] # Start from 2.0 + xml_packet_idx = STEP_ORDER.index("2.0") + state.pending_steps = STEP_ORDER[xml_packet_idx:] # Start from 2.0 state.completed_steps = [] state.retry_count = 0 state.save(state_file) @@ -428,11 +443,74 @@ def validate_step(step_id: str, branch: str) -> Dict: "message": "Invalid execution_mode. Set mode first: python3 .map/scripts/map_orchestrator.py set_execution_mode step_by_step|batch", } + # Evidence-gated validation: require agent evidence files for key steps + if step_id in EVIDENCE_REQUIRED: + phase_name, always_required = EVIDENCE_REQUIRED[step_id] + evidence_dir = Path(f".map/{branch}/evidence") + if not evidence_dir.is_dir(): + return { + "valid": False, + "message": ( + f"Evidence directory missing: {evidence_dir}. " + f"Run initialize or resume_from_plan first." + ), + } + subtask_id = state.current_subtask_id or "unknown" + evidence_file = evidence_dir / f"{phase_name}_{subtask_id}.json" + if not evidence_file.exists(): + return { + "valid": False, + "message": ( + f"Evidence file missing: {evidence_file}. " + f"The {phase_name} agent must write this file before " + f"validate_step can accept step {step_id}." + ), + } + # Validate JSON structure + try: + evidence_data = json.loads( + evidence_file.read_text(encoding="utf-8") + ) + except (json.JSONDecodeError, OSError) as exc: + return { + "valid": False, + "message": ( + f"Evidence file {evidence_file} is not valid JSON: {exc}" + ), + } + # Check required fields + for required_field in ("phase", "subtask_id", "timestamp"): + if required_field not in evidence_data: + return { + "valid": False, + "message": ( + f"Evidence file {evidence_file} missing required " + f"field: '{required_field}'. " + f"Required fields: phase, subtask_id, timestamp." + ), + } + # Validate subtask_id matches current subtask + if evidence_data.get("subtask_id") != subtask_id: + return { + "valid": False, + "message": ( + f"Evidence file subtask_id mismatch: " + f"expected '{subtask_id}', " + f"got '{evidence_data.get('subtask_id')}'." + ), + } + # Mark step complete state.completed_steps.append(step_id) if step_id in state.pending_steps: state.pending_steps.remove(step_id) + # When transitioning from init phases to execution phases, + # ensure the first subtask is selected + if step_id == "1.6" and state.subtask_sequence and not state.current_subtask_id: + state.current_subtask_id = state.subtask_sequence[0] + state.subtask_index = 0 + # Advance current_step_id to next pending step if state.pending_steps: next_id = state.pending_steps[0] @@ -469,6 +547,10 @@ def initialize_workflow(task: str, branch: str) -> Dict: state = StepState() state.save(state_file) + # Create evidence directory for artifact-gated validation + evidence_dir = Path(f".map/{branch}/evidence") + evidence_dir.mkdir(parents=True, exist_ok=True) + return { "status": "initialized", "state_file": str(state_file), @@ -510,6 +592,198 @@ def set_execution_mode(mode: str, branch: str) -> Dict: return {"status": "success", "execution_mode": state.execution_mode} +SKIPPABLE_STEPS = {"2.2", "2.6", "2.11"} + + +def skip_step(step_id: str, branch: str) -> Dict: + """Skip a conditional step without executing it. + + Only steps that are defined as conditional can be skipped: + - 2.2 (RESEARCH): conditional on refactoring or 3+ files + - 2.6 (PREDICTOR): conditional on medium/high risk + - 2.11 (SUBTASK_APPROVAL): conditional on step_by_step mode + + Args: + step_id: Step identifier to skip + branch: Git branch name (sanitized) + + Returns: + Dict with status and next step info + """ + if step_id not in SKIPPABLE_STEPS: + return { + "status": "error", + "message": ( + f"Step {step_id} cannot be skipped. " + f"Only conditional steps can be skipped: " + f"{', '.join(sorted(SKIPPABLE_STEPS))}" + ), + } + + state_file = Path(f".map/{branch}/step_state.json") + state = StepState.load(state_file) + + if state.current_step_id != step_id: + return { + "status": "error", + "message": f"Step mismatch: current is {state.current_step_id}, cannot skip {step_id}", + } + + # Mark step as completed (skipped) and advance + state.completed_steps.append(step_id) + if step_id in state.pending_steps: + state.pending_steps.remove(step_id) + + # Advance to next pending step + if state.pending_steps: + next_id = state.pending_steps[0] + state.current_step_id = next_id + state.current_step_phase = STEP_PHASES.get(next_id, "UNKNOWN") + else: + state.current_step_id = "COMPLETE" + state.current_step_phase = "COMPLETE" + + state.save(state_file) + + return { + "status": "success", + "message": f"Step {step_id} skipped", + "next_step": state.current_step_id, + } + + +def check_circuit_breaker(branch: str) -> Dict: + """Check circuit breaker status based on completed steps count. + + Returns tool_count (total completed steps) and max_iterations threshold. + If tool_count >= max_iterations, the workflow should ask the user to continue or abort. + + Args: + branch: Git branch name (sanitized) + + Returns: + Dict with tool_count, max_iterations, triggered flag + """ + state_file = Path(f".map/{branch}/step_state.json") + state = StepState.load(state_file) + + tool_count = len(state.completed_steps) + max_iterations = len(state.subtask_sequence) * len(STEP_ORDER) + + return { + "tool_count": tool_count, + "max_iterations": max_iterations, + "triggered": tool_count >= max_iterations, + "retry_count": state.retry_count, + "max_retries": state.max_retries, + } + + +def set_subtasks(subtask_ids: List[str], branch: str) -> Dict: + """Set subtask sequence after decomposition and select the first subtask. + + Args: + subtask_ids: List of subtask IDs (e.g., ["ST-001", "ST-002", "ST-003"]) + branch: Git branch name (sanitized) + + Returns: + Dict with status and subtask info + """ + state_file = Path(f".map/{branch}/step_state.json") + state = StepState.load(state_file) + + if not subtask_ids: + return {"status": "error", "message": "At least one subtask ID is required"} + + state.subtask_sequence = subtask_ids + state.current_subtask_id = subtask_ids[0] + state.subtask_index = 0 + state.save(state_file) + + return { + "status": "success", + "subtask_sequence": subtask_ids, + "current_subtask_id": subtask_ids[0], + } + + +def resume_from_plan(branch: str) -> Dict: + """Resume workflow from an existing /map-plan output, skipping init phases. + + Detects task_plan_.md and workflow_state.json created by /map-plan. + Extracts subtask IDs from the plan, marks init phases as completed, and + starts execution from CHOOSE_MODE (user still picks step_by_step vs batch). + + Args: + branch: Git branch name (sanitized) + + Returns: + Dict with status and skipped phases + """ + plan_dir = Path(f".map/{branch}") + plan_file = plan_dir / f"task_plan_{branch}.md" + workflow_state_file = plan_dir / "workflow_state.json" + + # Verify plan artifacts exist + if not plan_file.exists(): + return { + "status": "error", + "message": f"No plan found at {plan_file}. Run /map-plan first.", + } + + # Extract subtask IDs from plan file (ST-XXX pattern) + import re + + plan_content = plan_file.read_text(encoding="utf-8") + subtask_ids = re.findall(r"###\s+(ST-\d+)", plan_content) + + if not subtask_ids: + return { + "status": "error", + "message": f"No subtask IDs (ST-XXX) found in {plan_file}.", + } + + # Extract AAG contracts if present in workflow_state.json + aag_contracts = {} + if workflow_state_file.exists(): + try: + ws_data = json.loads(workflow_state_file.read_text(encoding="utf-8")) + aag_contracts = ws_data.get("aag_contracts", {}) + except (json.JSONDecodeError, KeyError): + pass + + # Create state that skips DECOMPOSE, INIT_PLAN, REVIEW_PLAN (plan already approved) + # Start from CHOOSE_MODE so user can still pick execution mode + skipped_phases = ["1.0", "1.5", "1.55"] + execution_start = [s for s in STEP_ORDER if s not in skipped_phases] + + state_file = plan_dir / "step_state.json" + state = StepState( + current_subtask_id=subtask_ids[0], + subtask_index=0, + subtask_sequence=subtask_ids, + current_step_id="1.56", + current_step_phase="CHOOSE_MODE", + completed_steps=skipped_phases, + pending_steps=execution_start, + plan_approved=True, + ) + state.save(state_file) + + # Create evidence directory for artifact-gated validation + evidence_dir = plan_dir / "evidence" + evidence_dir.mkdir(parents=True, exist_ok=True) + + return { + "status": "success", + "message": "Resumed from /map-plan. Skipped DECOMPOSE, INIT_PLAN, REVIEW_PLAN.", + "subtask_sequence": subtask_ids, + "current_subtask_id": subtask_ids[0], + "aag_contracts_found": len(aag_contracts), + "next_phase": "CHOOSE_MODE", + } + + def main(): """CLI entry point.""" parser = argparse.ArgumentParser( @@ -523,11 +797,19 @@ def main(): "initialize", "set_plan_approved", "set_execution_mode", + "skip_step", + "set_subtasks", + "resume_from_plan", + "check_circuit_breaker", ], help="Command to execute", ) - parser.add_argument("task_or_step", nargs="?", help="Task description or step ID") - parser.add_argument("value", nargs="?", help="Optional value for setter commands") + parser.add_argument( + "task_or_step", nargs="?", help="Task description, step ID, or subtask IDs" + ) + parser.add_argument( + "extra_args", nargs="*", help="Additional arguments (e.g., more subtask IDs)" + ) parser.add_argument("--branch", help="Git branch (auto-detected if omitted)") args = parser.parse_args() @@ -577,6 +859,40 @@ def main(): result = set_execution_mode(mode, branch) print(json.dumps(result, indent=2)) + elif args.command == "skip_step": + if not args.task_or_step: + print( + json.dumps({"error": "step_id required for skip_step"}), + file=sys.stderr, + ) + sys.exit(1) + result = skip_step(args.task_or_step, branch) + print(json.dumps(result, indent=2)) + + elif args.command == "set_subtasks": + if not args.task_or_step: + print( + json.dumps( + { + "error": "At least one subtask ID required. " + "Usage: set_subtasks ST-001 ST-002 ST-003" + } + ), + file=sys.stderr, + ) + sys.exit(1) + subtask_ids = [args.task_or_step] + (args.extra_args or []) + result = set_subtasks(subtask_ids, branch) + print(json.dumps(result, indent=2)) + + elif args.command == "resume_from_plan": + result = resume_from_plan(branch) + print(json.dumps(result, indent=2)) + + elif args.command == "check_circuit_breaker": + result = check_circuit_breaker(branch) + print(json.dumps(result, indent=2)) + except Exception as e: print(json.dumps({"error": str(e)}), file=sys.stderr) sys.exit(1) diff --git a/.map/scripts/map_step_runner.py b/.map/scripts/map_step_runner.py index 117199a..92e388e 100755 --- a/.map/scripts/map_step_runner.py +++ b/.map/scripts/map_step_runner.py @@ -108,7 +108,7 @@ def update_workflow_state( return { "status": "success", - "message": f"Updated {subtask_id}: {step_name} → {new_state}", + "message": f"Updated {subtask_id}: {step_name} -> {new_state}", "completed_steps": state["completed_steps"][subtask_id], } @@ -135,7 +135,7 @@ def update_plan_status( if branch is None: branch = get_branch_name() - plan_file = Path(f".map/task_plan_{branch}.md") + plan_file = Path(f".map/{branch}/task_plan_{branch}.md") if not plan_file.exists(): return {"status": "error", "message": f"Plan file not found: {plan_file}"} @@ -143,8 +143,8 @@ def update_plan_status( try: content = plan_file.read_text(encoding="utf-8") - # Find subtask section (## ST-XXX: Title) - pattern = rf"(## {re.escape(subtask_id)}:.*?\n\*\*Status:\*\*\s+)\w+" + # Find subtask section (### ST-XXX: Title) + pattern = rf"(### {re.escape(subtask_id)}:.*?\n- \*\*Status:\*\*\s+)\w+" replacement = rf"\g<1>{new_status}" updated_content = re.sub(pattern, replacement, content) @@ -277,7 +277,7 @@ def get_plan_path(branch: Optional[str] = None) -> Path: """ if branch is None: branch = get_branch_name() - return Path(f".map/task_plan_{branch}.md") + return Path(f".map/{branch}/task_plan_{branch}.md") def read_current_goal(branch: Optional[str] = None) -> Optional[str]: diff --git a/README.md b/README.md index 9daeafb..8914473 100644 --- a/README.md +++ b/README.md @@ -1 +1,102 @@ -test +# MAP Framework for Claude Code + +[![PyPI version](https://badge.fury.io/py/mapify-cli.svg)](https://pypi.org/project/mapify-cli/) +[![Python 3.11+](https://img.shields.io/badge/python-3.11+-blue.svg)](https://www.python.org/downloads/) + +> Structured AI development workflows that replace ad-hoc prompting with **plan → execute → validate** loops. + +Based on [MAP cognitive architecture](https://github.com/Shanka123/MAP) (Nature Communications, 2025) — 74% improvement in planning tasks. + +## Why MAP? + +- **Structured workflows** — 12 specialized agents instead of single-prompt chaos +- **Quality gates** — automatic validation catches errors before they compound +- **40-60% cost savings** — prevents circular reasoning and scope creep +- **Learning system** — captures patterns for reuse across projects + +## Quick Start + +**1. Install** +```bash +uv tool install mapify-cli + +# or with pip +pip install mapify-cli +``` + +**2. Initialize** (in your project) +```bash +cd your-project +mapify init +``` + +**3. Start Claude Code and run your first workflow** +```bash +claude +``` +``` +/map-efficient implement user authentication with JWT tokens +``` + +**You'll know it's working when:** Claude spawns specialized agents (TaskDecomposer → Actor → Monitor) with structured output instead of freeform responses. + +## Core Commands + +| Command | Use For | +|---------|---------| +| `/map-efficient` | Production features, refactoring, complex tasks (recommended) | +| `/map-debug` | Bug fixes and debugging | +| `/map-fast` | Small, low-risk changes | +| `/map-debate` | Complex decisions with multi-variant synthesis | +| `/map-review` | Pre-commit code review | +| `/map-check` | Quality gates and verification | +| `/map-plan` | Task decomposition without implementation | +| `/map-release` | Package release workflow | +| `/map-resume` | Resume interrupted workflows | +| `/map-learn` | Extract lessons after workflow completion | + +[Detailed usage and options →](docs/USAGE.md) + +## How It Works + +MAP orchestrates specialized agents through slash commands: + +``` +TaskDecomposer → breaks goal into subtasks + ↓ + Actor → generates code + ↓ + Monitor → validates quality (loop if needed) + ↓ + Predictor → analyzes impact (for risky changes) +``` + +The orchestration lives in `.claude/commands/map-*.md` prompts created by `mapify init`. + +[Architecture deep-dive →](docs/ARCHITECTURE.md) + +## Documentation + +| Guide | Description | +|-------|-------------| +| [Installation](docs/INSTALL.md) | All install methods, PATH setup, troubleshooting | +| [Usage Guide](docs/USAGE.md) | Workflows, examples, cost optimization, playbook | +| [Architecture](docs/ARCHITECTURE.md) | Agents, MCP integration, customization | + +## Trouble? + +- **Command not found** → Run `mapify init` in your project first +- **Agent errors** → Check `.claude/agents/` has all 12 `.md` files +- [More help →](docs/INSTALL.md#troubleshooting) + +## Contributing + +Improvements welcome: prompts for specific languages, new agents, CI/CD integrations. + +## License + +MIT + +--- + +**MAP brings structure to AI-assisted development.** Start with `/map-efficient` and see the difference. diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 7feca96..e161fb4 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -21,7 +21,7 @@ Deep technical documentation for MAP (Modular Agentic Planner) implementation. ### High-Level Design -MAP Framework implements cognitive architecture inspired by prefrontal cortex functions, orchestrating 11 specialized agents for software development with automatic quality validation. +MAP Framework implements cognitive architecture inspired by prefrontal cortex functions, orchestrating 12 specialized agents for software development with automatic quality validation. **Key Design Principle:** Each slash command has its own unique workflow with different agent sequences. There is no single "standard" workflow — the orchestration logic is defined in `.claude/commands/map-*.md` files. @@ -161,7 +161,7 @@ MAP Framework implements cognitive architecture inspired by prefrontal cortex fu **State Management:** - Workflow checkpoint stored in `.map/progress.md` (YAML frontmatter + markdown) -- Task plan stored in `.map/task_plan_*.md` +- Task plan stored in `.map//task_plan_*.md` - Workflow logs in `.map/workflow_logs/` - Metrics tracked in `.claude/metrics/agent_metrics.jsonl` @@ -343,12 +343,12 @@ All JSON schemas are defined in `src/mapify_cli/schemas.py`: MAP Framework provides multiple workflow variants with different agent orchestration strategies: -#### 1. `/map-efficient` - Optimized Pipeline (3-5 Agents) ⭐ RECOMMENDED +#### 1. `/map-efficient` - Optimized Pipeline (4-6 Agents) ⭐ RECOMMENDED -**Agent Sequence:** TaskDecomposer → (Actor → Monitor → conditional Predictor) per subtask +**Agent Sequence:** TaskDecomposer → [conditional ResearchAgent] → (Actor → Monitor → [conditional Predictor]) per subtask → FinalVerifier **With Self-MoA** (--self-moa flag OR high risk/complexity): -TaskDecomposer → (3×Actor parallel → 3×Monitor parallel → Synthesizer → final Monitor → conditional Predictor) per subtask +TaskDecomposer → [conditional ResearchAgent] → (3×Actor parallel → 3×Monitor parallel → Synthesizer → final Monitor → [conditional Predictor]) per subtask → FinalVerifier **Optimizations:** @@ -427,9 +427,9 @@ print("Consider running /map-learn to save patterns") - Broad refactors or multi-module changes - High uncertainty requirements -#### 3. `/map-debate` - Debate-Based Multi-Variant (6 Agents) +#### 3. `/map-debate` - Debate-Based Multi-Variant (5-7 Agents) -**Agent Sequence:** TaskDecomposer → (3×Actor parallel → 3×Monitor parallel → DebateArbiter (Opus) → Monitor → [Predictor if risky]) per subtask +**Agent Sequence:** TaskDecomposer → [conditional ResearchAgent] → (3×Actor parallel → 3×Monitor parallel → DebateArbiter (Opus) → Monitor → [Predictor if risky]) per subtask **Multi-Variant Architecture:** @@ -729,14 +729,13 @@ See [USAGE.md - Workflow Variants](./USAGE.md#workflow-variants) for detailed de 7. `2.1 MEM0_SEARCH` - Tiered memory search 8. `2.2 RESEARCH` - research-agent (conditional) 9. `2.3 ACTOR` - Actor agent implementation -10. `2.4 MONITOR` - Monitor validation -11. `2.5 RETRY_LOOP` - Retry on Monitor failure (not shown in linear flow) -12. `2.6 PREDICTOR` - Impact analysis (conditional) -13. `2.7 APPLY_CHANGES` - Write/Edit tools -14. `2.8 TESTS_GATE` - Run tests -15. `2.9 LINTER_GATE` - Run linter -16. `2.10 VERIFY_ADHERENCE` - Self-audit checkpoint -17. `2.11 SUBTASK_APPROVAL` - Pause between subtasks (step_by_step only) +10. `2.4 MONITOR` - Monitor validation (retry up to 5 times) +11. `2.6 PREDICTOR` - Impact analysis (conditional) +12. `2.7 UPDATE_STATE` - Update workflow_state.json +13. `2.8 TESTS_GATE` - Run tests +14. `2.9 LINTER_GATE` - Run linter +15. `2.10 VERIFY_ADHERENCE` - Self-audit checkpoint +16. `2.11 SUBTASK_APPROVAL` - Pause between subtasks (step_by_step only) **State Files:** - `step_state.json` - Hook injection source (current step phase) @@ -842,7 +841,7 @@ If you modified `.claude/commands/map-efficient.md`, you must manually integrate **MCP Tool Usage:** - `mcp__mem0__map_tiered_search`: Find existing patterns before implementing -- `context7__get-library-docs`: Get current library documentation +- `mcp__context7__get-library-docs`: Get current library documentation ### 3. Monitor @@ -910,7 +909,7 @@ If you modified `.claude/commands/map-efficient.md`, you must manually integrate - Configuration requirements - Test coverage gaps -**Model Used:** Haiku (fast, cost-effective for analysis) +**Model Used:** Sonnet (impact analysis requires complex reasoning) ### 5. Evaluator @@ -941,7 +940,7 @@ If you modified `.claude/commands/map-efficient.md`, you must manually integrate **Approval Threshold:** >7.0 overall score -**Model Used:** Haiku (fast scoring) +**Model Used:** Sonnet (evaluation requires nuanced judgment) ### 6. Reflector @@ -1149,7 +1148,7 @@ If you modified `.claude/commands/map-efficient.md`, you must manually integrate **Usage Context:** Only invoked in `/map-debate` workflow after all variants validated **MCP Tool Usage:** -- `sequential-thinking`: Multi-step reasoning for complex trade-off analysis +- `mcp__sequential-thinking__sequentialthinking`: Multi-step reasoning for complex trade-off analysis ### 11. ResearchAgent @@ -1200,6 +1199,42 @@ If you modified `.claude/commands/map-efficient.md`, you must manually integrate - Outputs compressed summary (<2K tokens) - Prevents Actor context bloat (would be 20-50K tokens if Actor read directly) +### 12. FinalVerifier + +**Responsibility:** Adversarial verifier applying the "Four-Eyes Principle" — verifies the ENTIRE task goal is achieved, not just individual subtasks. Catches premature completion and hallucinated success. + +**Input:** +```json +{ + "original_goal": "From .map//task_plan_.md", + "acceptance_criteria": "From task plan table", + "completed_subtasks": "From progress_.md checkboxes", + "validation_criteria": "From orchestrator" +} +``` + +**Output:** +```json +{ + "verdict": "PASS", + "confidence": 0.95, + "criteria_met": ["All acceptance criteria verified"], + "root_cause": null, + "recommendation": "COMPLETE" +} +``` + +**Verification Process:** +1. Read original goal and acceptance criteria from `.map/` checkpoint files +2. Verify each acceptance criterion against actual file state (Read, Grep, Bash) +3. Run tests if specified in validation criteria +4. Apply root cause analysis if verification fails +5. Return verdict: PASS → COMPLETE, FAIL → RE_DECOMPOSE or ESCALATE + +**Model Used:** Sonnet (adversarial verification requires strong reasoning) + +**Usage Context:** Mandatory final step in `/map-efficient` and invoked by `/map-check` + --- ## MCP Integration @@ -1312,11 +1347,11 @@ MCP servers are configured differently depending on the usage context: **WHEN using external libraries:** 1. Resolve library ID: - - Tool: context7__resolve-library-id + - Tool: mcp__context7__resolve-library-id - Input: Library name (e.g., "Flask", "Next.js") 2. Fetch current docs: - - Tool: context7__get-library-docs + - Tool: mcp__context7__get-library-docs - Parameters: library_id, topic, tokens (default: 5000) 3. Use docs for: @@ -1774,7 +1809,7 @@ Agent prompts are located in `.claude/agents/*.md` and use **Handlebars template {{project_name}} # e.g., "my-web-app" {{language}} # e.g., "Python", "JavaScript" {{framework}} # e.g., "Flask", "Next.js" -{{standards_url}} # Link to coding standards +{{standards_doc}} # Link to coding standards ``` **Actor-specific:** @@ -1810,8 +1845,8 @@ MAP Framework uses intelligent model selection to balance quality and cost. | TaskDecomposer | sonnet-4-5 | Quality-critical: task planning | | Actor | sonnet-4-5 | Quality-critical: code generation | | Monitor | sonnet-4-5 | Quality-critical: validation | -| Predictor | haiku-3-5 | Fast analysis, non-critical | -| Evaluator | haiku-3-5 | Fast scoring, structured output | +| Predictor | sonnet-4-5 | Impact analysis requires complex reasoning | +| Evaluator | sonnet-4-5 | Evaluation requires nuanced judgment | | Reflector | sonnet-4-5 | Quality-critical: pattern extraction | | Curator | sonnet-4-5 | Quality-critical: knowledge management | | DocumentationReviewer | sonnet-4-5 | Quality-critical: doc validation | @@ -1830,14 +1865,13 @@ model: claude-sonnet-4-5 # or claude-haiku-3-5 ``` **Cost vs Quality Trade-offs:** -- **All Sonnet/Opus:** Highest quality, 3-4x cost (Opus for DebateArbiter) -- **Mixed (current):** Balanced, 40-60% cost reduction -- **All Haiku:** Lowest cost, risk of quality degradation in code generation +- **All Sonnet/Opus (current):** Highest quality, Opus only for DebateArbiter +- **Downgrade to Haiku:** Lower cost, risk of quality degradation in analysis and scoring **Recommended:** -- Keep on Sonnet: TaskDecomposer, Actor, Monitor, Reflector, Curator, DocumentationReviewer, Synthesizer, ResearchAgent +- Keep on Sonnet: TaskDecomposer, Actor, Monitor, Predictor, Evaluator, Reflector, Curator, DocumentationReviewer, Synthesizer, ResearchAgent - Keep on Opus: DebateArbiter (cross-variant reasoning requires highest quality) -- Safe to use Haiku: Predictor, Evaluator (fast analysis, structured output) +- Safe to downgrade to Haiku: Predictor, Evaluator (if cost reduction is priority) ### Adding Custom Agents @@ -1901,7 +1935,7 @@ model: claude-sonnet-4-5 # or claude-haiku-3-5 6. **Update orchestration:** Edit `.claude/commands/map-efficient.md` to call new agent: ```markdown - ## After Evaluator approves: + ## After Monitor validates: **6. Security Audit** (SecurityAuditor): - Call: Task(subagent_type="security-auditor", input=actor_output) @@ -1952,7 +1986,7 @@ model: claude-sonnet-4-5 # or claude-haiku-3-5 {{project_name}} # From .claude/config.json {{language}} # From .claude/config.json {{framework}} # From .claude/config.json -{{standards_url}} # From .claude/config.json +{{standards_doc}} # From .claude/config.json ``` **Pass custom variables:** @@ -2081,15 +2115,10 @@ Agent template changes are tracked in the project's main CHANGELOG.md. - Best practices and anti-patterns - Troubleshooting common issues -**Usage:** -```markdown -# In agent templates, reference patterns: - -See [MCP-PATTERNS.md](MCP-PATTERNS.md#actor-patterns) for: -- How to search mem0 before implementing -- When to fetch library docs -- Batch search optimization -``` +**Usage:** Each agent template contains its own MCP Tool Selection Matrix with: +- Conditions for when to use each tool +- Query patterns for effective searches +- Skip conditions to avoid unnecessary calls ### Updating Strategies @@ -2131,7 +2160,7 @@ See [MCP-PATTERNS.md](MCP-PATTERNS.md#actor-patterns) for: 6. **Document:** - Update `version` and `last_updated` in frontmatter - Add entry to CHANGELOG.md - - Update MCP-PATTERNS.md if tool usage changed + - Update MCP Tool Selection Matrix in agent template if tool usage changed **Rollback if needed:** ```bash @@ -2189,7 +2218,7 @@ MAP Framework applies cutting-edge context engineering principles for AI agents, Workflow state is managed through file-based persistence in `.map/` directory: - `.map/progress.md` - Workflow checkpoint (YAML frontmatter + markdown body) -- `.map/task_plan_*.md` - Task decomposition with validation criteria +- `.map//task_plan_*.md` - Task decomposition with validation criteria - `.map/dev_docs/context.md` - Project context - `.map/dev_docs/tasks.md` - Task checklist @@ -2273,7 +2302,7 @@ Filesystem (persists forever) Conversation Memory (clears on compactio **Implementation:** - Checkpoint: `.map/progress.md` (YAML frontmatter + markdown body) -- Task plan: `.map/task_plan_*.md` (subtask decomposition with validation criteria) +- Task plan: `.map//task_plan_*.md` (subtask decomposition with validation criteria) - Recovery: `/map-resume` command (detects checkpoint and offers to resume) ### Automatic Recovery (Phase 2) @@ -2302,7 +2331,7 @@ Command checks .map/progress.md existence ↓ [User confirms?] ↓ Yes - Load task plan from .map/task_plan_*.md + Load task plan from .map//task_plan_*.md ↓ Continue Actor→Monitor loop for remaining subtasks ↓ @@ -2316,7 +2345,7 @@ Command checks .map/progress.md existence | Resume command | `.claude/commands/map-resume.md` | User-facing recovery workflow | | WorkflowState class | `src/mapify_cli/workflow_state.py` | Checkpoint serialization/deserialization | | Checkpoint file | `.map/progress.md` | YAML frontmatter + markdown progress | -| Task plan | `.map/task_plan_*.md` | Subtask decomposition with validation | +| Task plan | `.map//task_plan_*.md` | Subtask decomposition with validation | | Unit tests | `tests/test_workflow_state.py` | WorkflowState logic coverage | **Execution Flow:** @@ -2564,7 +2593,7 @@ All failures are non-blocking - hook returns `{"continue": true}` and logs error **Problem:** Too many patterns distract model, reduce focus on most relevant patterns. -**Solution:** Limit patterns retrieved to `top_k=5` (configurable via tiered search). +**Solution:** Limit patterns retrieved to `limit=5` (configurable via tiered search). **Behavior:** @@ -2583,9 +2612,9 @@ result = mcp__mem0__map_tiered_search( - ✅ Faster retrieval via tiered caching **Customization:** -- `top_k=3`: Simple tasks, minimal context needed -- `top_k=5`: Balanced (recommended default) -- `top_k=7-10`: Complex tasks requiring multiple pattern references +- `limit=3`: Simple tasks, minimal context needed +- `limit=5`: Balanced (recommended default) +- `limit=7-10`: Complex tasks requiring multiple pattern references ### Template Optimization (Phase 1.4) @@ -2611,7 +2640,7 @@ result = mcp__mem0__map_tiered_search( **Phase 1 ✅ COMPLETED** (2025-10-18): - [x] **RecitationManager** (482 lines): Recitation Pattern for focus - [x] **MapWorkflowLogger** (246 lines): Detailed workflow logging -- [x] **Pattern top_k=5**: Limit retrieved patterns +- [x] **Pattern limit=5**: Limit retrieved patterns - [x] **Template Optimization**: Optimize verbose outputs (-9.6% tokens) **Phase 1 Results:** diff --git a/docs/COMPLETE_WORKFLOW.md b/docs/COMPLETE_WORKFLOW.md index f37d974..c33066b 100644 --- a/docs/COMPLETE_WORKFLOW.md +++ b/docs/COMPLETE_WORKFLOW.md @@ -28,7 +28,7 @@ **Внутри:** 1. **task-decomposer** agent разбивает задачу на subtasks -2. Создаёт `.map/task_plan_.md` с: +2. Создаёт `.map//task_plan_.md` с: - Goal (цель) - Subtasks (ST-001, ST-002, ...) с описаниями - Validation criteria для каждого subtask @@ -90,7 +90,7 @@ Validation: **👤 Действие пользователя:** - Просматривает план -- Может **отредактировать** `.map/task_plan_.md` вручную +- Может **отредактировать** `.map//task_plan_.md` вручную - Утверждает командой `/map-efficient` (переход к фазе 2) --- diff --git a/docs/INSTALL.md b/docs/INSTALL.md index 8003fa7..40d454b 100644 --- a/docs/INSTALL.md +++ b/docs/INSTALL.md @@ -147,8 +147,8 @@ mapify init my-project This will: - ✅ Create project directory -- ✅ Install 11 MAP agents (including ACE Reflector & Curator, Synthesizer, DebateArbiter, ResearchAgent) -- ✅ Add 7 slash commands (/map-efficient, /map-debug, /map-fast, /map-learn, /map-review, /map-debate, /map-release) +- ✅ Install 12 MAP agents (including ACE Reflector & Curator, Synthesizer, DebateArbiter, ResearchAgent, FinalVerifier) +- ✅ Add 10 slash commands (/map-efficient, /map-debug, /map-fast, /map-debate, /map-learn, /map-review, /map-release, /map-check, /map-plan, /map-resume) - ✅ Configure essential MCP servers - ✅ Initialize git repository - ✅ Create ACE playbook structure @@ -218,7 +218,7 @@ If you prefer manual setup: ``` your-project/ ├── .claude/ - │ ├── agents/ # 11 specialized agents + │ ├── agents/ # 12 specialized agents │ │ ├── task-decomposer.md # Decomposes tasks into subtasks │ │ ├── actor.md # Implements code │ │ ├── monitor.md # Validates implementations @@ -229,15 +229,19 @@ If you prefer manual setup: │ │ ├── synthesizer.md # Self-MoA: Merges variants │ │ ├── debate-arbiter.md # Opus: Cross-evaluates variants │ │ ├── research-agent.md # Isolated codebase research + │ │ ├── final-verifier.md # Adversarial verification (Ralph Loop) │ │ └── documentation-reviewer.md # Reviews technical docs - │ ├── commands/ # 7 slash commands + │ ├── commands/ # 10 slash commands │ │ ├── map-efficient.md # Optimized workflow (recommended) │ │ ├── map-debate.md # Multi-variant with Opus reasoning │ │ ├── map-debug.md # Debug workflow │ │ ├── map-fast.md # Minimal workflow (low-risk only) │ │ ├── map-learn.md # Extract and save lessons │ │ ├── map-review.md # Review workflow - │ │ └── map-release.md # Release workflow + │ │ ├── map-release.md # Release workflow + │ │ ├── map-check.md # Quality gates & verification + │ │ ├── map-plan.md # Architecture decomposition + │ │ └── map-resume.md # Resume interrupted workflows │ └── mcp_config.json ``` @@ -295,7 +299,11 @@ MAP Framework uses **slash commands** as entry points that coordinate specialize - **`/map-debate`** - Multi-variant with Opus arbiter (7 agents): 3 Actor variants → debate-arbiter synthesis - **`/map-debug`** - Diagnostic and fix workflows with agent coordination - **`/map-fast`** - Minimal workflow (3 agents) — small, low-risk changes (reduced analysis) -- **`/map-review`** - Comprehensive review with MAP analysis +- **`/map-review`** - Comprehensive review with Monitor, Predictor, and Evaluator agents +- **`/map-check`** - Quality gates and verification for staged changes +- **`/map-plan`** - Architect phase only: decompose task without implementation +- **`/map-release`** - Package release workflow with validation gates +- **`/map-resume`** - Resume incomplete MAP workflow from checkpoint - **`/map-learn`** - Extract lessons: reflector → curator → mem0 storage **Note:** Agents are invoked automatically by slash commands. Direct agent invocation is not the recommended approach—use the slash commands above for proper workflow orchestration. diff --git a/docs/USAGE.md b/docs/USAGE.md index f50e8f1..d1216e5 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -286,7 +286,7 @@ Instead of treating playbook bullets as plain text, the KG: - **Tracks provenance**: Links each entity back to the bullet it came from - **Finds contradictions**: Alerts you when new patterns conflict with existing knowledge -**Extraction happens automatically** during MAP workflows (Reflector/Curator agents), so you don't need to manually populate the graph. +**Extraction happens via `/map-learn`** after MAP workflows (Reflector/Curator agents), so you don't need to manually populate the graph. ### Entity Types (7) @@ -711,7 +711,7 @@ The checkpoint format (`.map/progress.md`) is designed with security in mind: ``` .map/progress.md - Workflow state (YAML frontmatter + markdown) - .map/task_plan_*.md - Task decomposition with validation criteria + .map/*/task_plan_*.md - Task decomposition with validation criteria ``` 2. **After compaction**, manually reference files: @@ -719,7 +719,7 @@ The checkpoint format (`.map/progress.md`) is designed with security in mind: ``` User: continue MAP workflow @.map/progress.md - @.map/task_plan_map-to-enchance.md + @.map/map-to-enchance/task_plan_map-to-enchance.md Claude: [reads files] Resuming subtask 4: "Add refresh token logic" @@ -934,7 +934,7 @@ MAP uses file-based persistence with automatic injection: **Files:** - `.map/progress.md` - Workflow checkpoint with YAML frontmatter (machine-readable) + markdown body (human-readable) -- `.map/task_plan_*.md` - Task decomposition with validation criteria +- `.map/*/task_plan_*.md` - Task decomposition with validation criteria - `.map/dev_docs/context.md` - Project context - `.map/dev_docs/tasks.md` - Task checklist @@ -1176,17 +1176,17 @@ Summary: ## 🔀 Workflow Variants -MAP Framework offers three workflow variants with different trade-offs between token usage, quality assurance, and learning: +MAP Framework offers three primary implementation workflows with different trade-offs between token usage, quality assurance, and learning. Additional supporting workflows (`/map-debug`, `/map-review`, `/map-check`, `/map-plan`, `/map-release`, `/map-resume`, `/map-learn`) are documented in their respective sections. ### Comparison Table | Feature | /map-efficient ⭐ | /map-debate | /map-fast ⚠️ | |---------|-------------------|-------------|--------------| -| **Agents Used** | 5-6 (optimized) | 7 (multi-variant) | 3 (minimal) | +| **Agents Used** | 4-6 (task-decomposer, actor, monitor + conditional research-agent, predictor + final-verifier) | 7 (multi-variant) | 3 (minimal) | | **Token Cost** | **Baseline** | 3x (Opus model) | 40-50% less | | **Learning** | Via `/map-learn` | Via `/map-learn` | ❌ None | -| **Quality Gates** | Essential agents | Opus arbiter | Basic only | -| **Impact Analysis** | ✅ Conditional | ✅ Conditional | ❌ Never | +| **Quality Gates** | Essential agents + Final-Verifier | Opus arbiter | Basic only | +| **Impact Analysis** | ✅ Conditional (Predictor) | ✅ Conditional | ❌ Never | | **Multi-Variant** | ⚠️ Conditional (Self-MoA) | ✅ **Always 3 variants** | ❌ Never | | **Synthesis Model** | Synthesizer (sonnet) | **debate-arbiter (opus)** | N/A | | **Playbook Updates** | Via `/map-learn` | Via `/map-learn` | ❌ None | @@ -1206,8 +1206,9 @@ MAP Framework offers three workflow variants with different trade-offs between t - ✅ Refactoring with clear scope **Why it's better than /map-fast:** -- Still preserves full learning (Reflector/Curator) +- Learning available via `/map-learn` after workflow (Reflector/Curator) - Conditional Predictor catches high-risk issues +- Final-Verifier provides adversarial verification - Only 10% less token savings but much safer **Example use cases:** @@ -1370,23 +1371,24 @@ MAP Framework offers three workflow variants with different trade-offs between t - Predictor only called if risk_level='high' or Monitor flags issues - Low-risk tasks (simple CRUD, UI updates) skip impact analysis -2. **Batched Learning** (10-15% savings) - - Reflector analyzes ALL subtasks together at end - - Curator makes single playbook update - - More holistic insights (sees patterns across subtasks) - - Saves (N-1) × 3K tokens for N subtasks +2. **Learning Decoupled to /map-learn** (token savings during main workflow) + - Reflector and Curator are NOT called during /map-efficient execution + - Run `/map-learn` after workflow completes to extract patterns + - Reflector then analyzes ALL subtasks together (batched, more holistic insights) + - Curator makes a single playbook update (deduplication via mem0) -3. **Evaluator Skipped** (8-12% savings) +3. **Evaluator Not Invoked** (8-12% savings) - Monitor provides sufficient validation for most tasks - - Evaluator's 6-dimension scoring rarely changes decisions + - The Evaluator agent is skipped entirely (not just its scoring) + - Evaluator only runs in `/map-debug` and `/map-review` - Quality still ensured by Monitor's comprehensive checks **What's Preserved:** -- ✅ Full learning cycle (Reflector + Curator) -- ✅ Playbook updates (batched but complete) -- ✅ mem0 integration (high-quality patterns stored) +- ✅ Learning available via `/map-learn` (Reflector + Curator, optional after workflow) +- ✅ Tests gate + Linter gate per subtask +- ✅ Final-Verifier (adversarial verification at end) - ✅ Essential quality gates (Monitor validation) -- ✅ Impact analysis (when needed) +- ✅ Impact analysis (conditional Predictor when needed) ### Workflow Selection Flowchart @@ -1432,10 +1434,10 @@ START: I need to implement a feature **✅ Reality:** /map-fast defeats MAP's purpose (no learning = repeat mistakes = waste tokens long-term). Use /map-efficient instead. **❌ Misconception:** "/map-efficient skips quality checks" -**✅ Reality:** Monitor still validates everything. Only Evaluator's scoring is skipped (rarely changes decisions). +**✅ Reality:** Monitor still validates every subtask. Evaluator is not invoked (it only runs in /map-debug and /map-review), but Tests gate, Linter gate, and Final-Verifier ensure quality. -**❌ Misconception:** "Batched learning in /map-efficient is inferior to per-subtask learning" -**✅ Reality:** Batched learning sees patterns ACROSS subtasks, often producing better insights than isolated per-subtask analysis. +**❌ Misconception:** "Learning via /map-learn is inferior to per-subtask learning" +**✅ Reality:** /map-learn runs Reflector/Curator after the workflow completes, analyzing ALL subtasks together. This batched approach sees patterns ACROSS subtasks, often producing better insights than isolated per-subtask analysis. ## 🎯 Best Practices @@ -1561,7 +1563,7 @@ The upgrade of Predictor and Evaluator from haiku to sonnet provides: **1. Use `/map-efficient` workflow (RECOMMENDED)** - Skips Evaluator per subtask (Monitor provides sufficient validation) - Conditional Predictor (only called for high-risk changes) -- Batched Reflector/Curator at end +- Reflector/Curator available via `/map-learn` after workflow - **Token savings: 30-40%** **2. Use `/map-fast` for small, low-risk changes** @@ -1711,9 +1713,9 @@ Skills automatically suggest themselves when relevant: User: "I need to add a feature" MAP: 🎯 "Consider /map-efficient" -User: "What's the difference between efficient and feature?" +User: "What's the difference between efficient and debate?" MAP: 📚 "Loading map-workflows-guide skill" -[Shows comparison: efficient = production, feature = critical] +[Shows comparison: efficient = production, debate = reasoning transparency] ``` ### Progressive Disclosure @@ -1738,7 +1740,7 @@ Skills follow the 500-line rule: - `map-release-deep-dive.md` - Release workflow, validation gates **System architecture:** -- `agent-architecture.md` - How 11 agents orchestrate +- `agent-architecture.md` - How 12 agents orchestrate - `playbook-system.md` - Knowledge storage, quality scoring ### Creating Custom Skills @@ -2045,7 +2047,7 @@ This enables detailed logging from hooks, showing: | Verification results | `.map/verification_results_.json` | Machine-readable check results | | Workflow state | `.map/state_.json` | Current workflow status | | Repo insight | `.map/repo_insight_.json` | Project language and suggested checks | -| Task plan | `.map/task_plan_.md` | Subtask breakdown with validation | +| Task plan | `.map//task_plan_.md` | Subtask breakdown with validation | | Progress checkpoint | `.map/progress.md` | Resume checkpoint for context recovery | #### Common Issues diff --git a/docs/WORKFLOW_FLOW.md b/docs/WORKFLOW_FLOW.md index fec8c75..2f7f299 100644 --- a/docs/WORKFLOW_FLOW.md +++ b/docs/WORKFLOW_FLOW.md @@ -165,7 +165,7 @@ └──────────────────┬──────────────────────────────────┘ ↓ ┌─────────────────────────────────────────────────────┐ -│ Turn 8: get_next_step → step_id=2.7, APPLY_CHANGES │ +│ Turn 8: get_next_step → step_id=2.7, UPDATE_STATE │ │ │ │ Выполняет: Edit/Write tools │ │ │ @@ -204,7 +204,7 @@ --- -## 🎯 17 фаз workflow +## 🎯 16 фаз workflow | Step | Фаза | Описание | Обязательно? | |------|------|----------|--------------| @@ -217,10 +217,9 @@ | **2.1** | MEM0_SEARCH | Поиск паттернов в mem0 | ✅ Да (для каждого ST) | | **2.2** | RESEARCH | research-agent для контекста | 🔶 Условно (если 3+ файлов) | | **2.3** | ACTOR | Actor генерирует код | ✅ Да (для каждого ST) | -| **2.4** | MONITOR | Monitor валидирует | ✅ Да (для каждого ST) | -| **2.5** | RETRY_LOOP | Повтор при Monitor.valid=false | 🔶 Условно (макс 5 раз) | +| **2.4** | MONITOR | Monitor валидирует (retry до 5 раз) | ✅ Да (для каждого ST) | | **2.6** | PREDICTOR | Анализ impact | 🔶 Условно (medium/high risk) | -| **2.7** | APPLY_CHANGES | Применение Edit/Write | ✅ Да (блокируется gate) | +| **2.7** | UPDATE_STATE | Обновление workflow_state.json | ✅ Да (для каждого ST) | | **2.8** | TESTS_GATE | Запуск тестов | 🔶 Условно (если есть) | | **2.9** | LINTER_GATE | Запуск линтера | 🔶 Условно (если есть) | | **2.10** | VERIFY_ADHERENCE | Self-audit checkpoint | ✅ Да (для каждого ST) | @@ -346,7 +345,7 @@ Claude: [Применяет Edit/Write] ✅ # Система автоматически: # 1. Создаст .map//step_state.json # 2. Будет показывать прогресс в хуках -# 3. Пройдет все 14 фаз для каждого subtask +# 3. Пройдет все 16 фаз для каждого subtask # 4. Завершится финальной верификацией ``` @@ -380,13 +379,13 @@ Applying modifications... **Проверить состояние:** ```bash # Посмотреть текущий шаг -cat .map/$(git branch --show-current | sed 's/\//-/g')/step_state.json +cat .map/$(git rev-parse --abbrev-ref HEAD | sed -E 's|/|-|g; s|[^a-zA-Z0-9_.-]|-|g; s|-{2,}|-|g; s|^-||; s|-$||')/step_state.json # Получить следующий шаг вручную python3 .map/scripts/map_orchestrator.py get_next_step # Проверить workflow state (для gate) -cat .map/$(git branch --show-current | sed 's/\//-/g')/workflow_state.json +cat .map/$(git rev-parse --abbrev-ref HEAD | sed -E 's|/|-|g; s|[^a-zA-Z0-9_.-]|-|g; s|-{2,}|-|g; s|^-||; s|-$||')/workflow_state.json ``` **Сбросить состояние:** diff --git a/presentation/en/01-introduction.md b/presentation/en/01-introduction.md index b5f1e04..403f887 100644 --- a/presentation/en/01-introduction.md +++ b/presentation/en/01-introduction.md @@ -14,9 +14,9 @@ ## Core Concepts -### 8 Specialized Agents +### 12 Specialized Agents -MAP coordinates 8 agents via the Orchestrator: +MAP coordinates 12 agents via the Orchestrator: 1. **[TaskDecomposer](https://github.com/azalio/map-framework/blob/main/.claude/agents/task-decomposer.md)** — breaks goals into atomic subtasks 2. **[Actor](https://github.com/azalio/map-framework/blob/main/.claude/agents/actor.md)** — generates code and solutions @@ -26,6 +26,10 @@ MAP coordinates 8 agents via the Orchestrator: 6. **[Reflector](https://github.com/azalio/map-framework/blob/main/.claude/agents/reflector.md)** — extracts lessons from successes and failures 7. **[Curator](https://github.com/azalio/map-framework/blob/main/.claude/agents/curator.md)** — manages the knowledge base (playbook) 8. **[DocumentationReviewer](https://github.com/azalio/map-framework/blob/main/.claude/agents/documentation-reviewer.md)** — checks documentation completeness and correctness +9. **[Debate-Arbiter](https://github.com/azalio/map-framework/blob/main/.claude/agents/debate-arbiter.md)** — cross-evaluates variants with explicit reasoning (Opus) +10. **[Synthesizer](https://github.com/azalio/map-framework/blob/main/.claude/agents/synthesizer.md)** — merges multiple variants into unified solution (Self-MoA) +11. **[Research-Agent](https://github.com/azalio/map-framework/blob/main/.claude/agents/research-agent.md)** — isolated codebase research +12. **[Final-Verifier](https://github.com/azalio/map-framework/blob/main/.claude/agents/final-verifier.md)** — adversarial verification (Ralph Loop) The **Orchestrator** is the workflow coordination logic implemented in slash commands (`.claude/commands/map-*.md`), not a separate agent template. @@ -33,6 +37,7 @@ The **Orchestrator** is the workflow coordination logic implemented in slash com MAP uses **5 MCP servers** to extend capabilities: +- **[mem0](https://github.com/mem0ai/mem0-mcp)** — semantic pattern memory (tiered search, pattern storage) - **[claude-reviewer](https://github.com/rsokolowski/mcp-claude-reviewer)** — professional code review with security analysis - **[sequential-thinking](https://github.com/modelcontextprotocol/servers/tree/main/src/sequentialthinking)** — chains of thought for complex tasks - **[context7](https://github.com/upstash/context7)** — up-to-date library documentation @@ -45,7 +50,7 @@ MAP uses **5 MCP servers** to extend capabilities: - Stored at [.claude/mem0 MCP](https://github.com/azalio/map-framework/blob/main/.claude/mem0 MCP) - **10 categories of patterns**: architecture, implementation, security, performance, errors, testing, code quality, tool usage, debugging, CLI tool patterns - **top_k = 5**: returns only the 5 most relevant patterns to reduce cognitive load -- **Automatic learning**: Reflector extracts patterns from every task, Curator incrementally updates the playbook +- **Learning via /map-learn**: Reflector extracts patterns, Curator incrementally updates the playbook (optional step after workflows) ## Benefits @@ -53,8 +58,8 @@ MAP uses **5 MCP servers** to extend capabilities: **Model allocation strategy:** -- Predictor, Evaluator: **haiku** (fast analysis) -- Actor, Monitor, TaskDecomposer, Reflector, Curator, DocumentationReviewer: **sonnet** (quality-critical) +- Actor, Monitor, TaskDecomposer, Predictor, Evaluator, Reflector, Curator, DocumentationReviewer: **sonnet** (quality-critical) +- DebateArbiter: **opus** (highest reasoning for cross-variant analysis) ### Agent Context Isolation diff --git a/presentation/en/02-architecture.md b/presentation/en/02-architecture.md index e77324b..2801e16 100644 --- a/presentation/en/02-architecture.md +++ b/presentation/en/02-architecture.md @@ -2,13 +2,13 @@ ## Overview -MAP Framework is built around **8 specialized agents**, coordinated by the Orchestrator. +MAP Framework is built around **12 specialized agents**, coordinated by the Orchestrator. -The **Orchestrator** is NOT an agent template. Workflow coordination logic lives in the slash commands `.claude/commands/map-*.md` (map-feature, map-debug, map-refactor, map-review). +The **Orchestrator** is NOT an agent template. Workflow coordination logic lives in the slash commands `.claude/commands/map-*.md` (map-efficient, map-debug, map-fast, map-debate, map-review, map-check, map-plan, map-release, map-resume, map-learn). ## System Components -### 1. TaskDecomposer (1,169 lines) +### 1. TaskDecomposer (867 lines) **Model:** sonnet **Purpose:** Translates high-level goals into atomic, testable subtasks with explicit dependencies @@ -22,7 +22,7 @@ The **Orchestrator** is NOT an agent template. Workflow coordination logic lives **Output:** JSON with subtasks, acceptance_criteria, estimated_complexity, depends_on -### 2. Actor (641 lines) +### 2. Actor (1,084 lines) **Model:** sonnet **Purpose:** Senior software engineer; writes clean, efficient, production-ready code @@ -37,7 +37,7 @@ The **Orchestrator** is NOT an agent template. Workflow coordination logic lives **Inputs:** {{existing_patterns}} (top_k=5), {{plan_context}} (recitation pattern), {{feedback}} (if retry) -### 3. Monitor (908 lines) +### 3. Monitor (2,521 lines) **Model:** sonnet **Purpose:** Meticulous code reviewer (10+ years), catches bugs, vulnerabilities, and standard violations @@ -55,22 +55,23 @@ The **Orchestrator** is NOT an agent template. Workflow coordination logic lives **Output:** valid (boolean), issues (severity/category/description), verdict (approved/needs_revision/rejected) -### 4. Predictor (898 lines) +### 4. Predictor (2,108 lines) -**Model:** haiku (cost-optimized) +**Model:** sonnet **Purpose:** Impact analysis specialist; predicts ripple effects BEFORE implementation -**MCP integrations (3 tools):** +**MCP integrations (4 tools):** - `mcp__mem0__map_tiered_search` — search past breaking changes and migration patterns -- `context7__get-library-docs` — check library version compatibility -- `deepwiki__read_wiki_structure + ask_question` — study migration patterns +- `mcp__context7__get-library-docs` — check library version compatibility +- `mcp__deepwiki__read_wiki_structure + ask_question` — study migration patterns +- `mcp__sequential-thinking__sequentialthinking` — complex trade-off analysis for multi-system impact **Output:** affected_files, breaking_changes, required_updates, risk_level (low/medium/high), rollback_plan -### 5. Evaluator (843 lines) +### 5. Evaluator (1,492 lines) -**Model:** haiku (cost-optimized) +**Model:** sonnet **Purpose:** Objective quality assessor with data-driven metrics **MCP integrations (5 tools):** @@ -85,7 +86,7 @@ The **Orchestrator** is NOT an agent template. Workflow coordination logic lives **Output:** scores (code_quality, test_coverage, documentation, security, performance, maintainability 0–10), overall_score, recommendation -### 6. Reflector (1,004 lines) — ACE Learning +### 6. Reflector (851 lines) — ACE Learning **Model:** sonnet **Purpose:** Expert learning analyst; extracts reusable patterns from implementations @@ -104,7 +105,7 @@ The **Orchestrator** is NOT an agent template. Workflow coordination logic lives **Output:** key_insight, patterns_used, patterns_discovered, bullet_updates (helpful/harmful count), suggested_new_bullets -### 7. Curator (1,145 lines) — ACE Learning +### 7. Curator (1,296 lines) — ACE Learning **Model:** sonnet **Purpose:** Knowledge curator; evolves the playbook without context collapse @@ -132,7 +133,7 @@ The **Orchestrator** is NOT an agent template. Workflow coordination logic lives - `Fetch` — MANDATORY: verify EVERY external URL in docs - `deepwiki__ask_question` — get architecture details from external projects -- `context7__resolve_library_id + get-library-docs` — verify API/integration details +- `context7__resolve-library-id + get-library-docs` — verify API/integration details - `mcp__mem0__map_tiered_search` — check known documentation anti-patterns **Critical constraints (NEVER violate):** @@ -145,6 +146,34 @@ The **Orchestrator** is NOT an agent template. Workflow coordination logic lives **Review Workflow:** Read source → Extract URLs → Fetch URLs → Check CRDs/dependencies → Verify documentation → Cross-check decomposition +### 9. Synthesizer + +**Model:** sonnet +**Purpose:** Merges multiple Actor variants into a unified solution (Self-MoA in /map-efficient) + +**Output:** Synthesized code combining best elements from all validated variants + +### 10. DebateArbiter + +**Model:** opus (highest reasoning quality) +**Purpose:** Cross-evaluates Actor variants with explicit reasoning matrix; synthesizes optimal solution in /map-debate + +**Output:** comparison_matrix, decision_rationales, synthesized code + +### 11. ResearchAgent + +**Model:** inherit (uses parent context model) +**Purpose:** Heavy codebase reading with compressed output; prevents Actor context bloat + +**Output:** Executive summary (<2K tokens) with file locations, patterns, and confidence score + +### 12. FinalVerifier + +**Model:** sonnet +**Purpose:** Adversarial verifier (Four-Eyes Principle); catches premature completion and hallucinated success + +**Output:** verdict (PASS/FAIL), confidence score, root cause analysis if failed + ## Agent Interactions ### Orchestrator Workflow (Automated sequence) @@ -173,13 +202,13 @@ The **Orchestrator** is NOT an agent template. Workflow coordination logic lives - ALWAYS verify MCP tool usage in agent outputs - Manual extraction/curation bypasses MCP tools → knowledge won't deduplicate → lessons won't be learned -**Enforcement source:** `.claude/commands/map-feature.md` lines 263–355 + MAP workflow enforcement rules +**Enforcement source:** `.claude/commands/map-efficient.md` + MAP workflow enforcement rules ### Template Structure **All agents use:** -- YAML frontmatter: name, description, model (sonnet/haiku), version 2.2.0 +- YAML frontmatter: name, description, model (sonnet/opus), version, last_updated - Handlebars variables: {{project_name}}, {{language}}, {{framework}}, {{subtask_description}}, {{existing_patterns}}, {{feedback}} - Standard sections: IDENTITY, context, mcp_integration, rationale, critical/constraints, examples, output_format @@ -187,5 +216,6 @@ The **Orchestrator** is NOT an agent template. Workflow coordination logic lives ### Model Strategy -- **haiku** (cost-optimized): Predictor, Evaluator -- **sonnet** (quality-critical): Actor, Monitor, TaskDecomposer, Reflector, Curator, DocumentationReviewer +- **sonnet** (quality-critical): Actor, Monitor, TaskDecomposer, Predictor, Evaluator, Reflector, Curator, DocumentationReviewer, Synthesizer, FinalVerifier +- **opus** (highest reasoning): DebateArbiter +- **inherit** (parent context): ResearchAgent diff --git a/presentation/en/03-workflow.md b/presentation/en/03-workflow.md index f127f36..67d22f3 100644 --- a/presentation/en/03-workflow.md +++ b/presentation/en/03-workflow.md @@ -4,12 +4,12 @@ MAP Framework uses a **strictly sequential orchestration** that begins with TaskDecomposer and then runs an implementation loop for each subtask. -**Mandatory sequence:** +**Full pipeline (conceptual — individual workflows may skip agents):** ```mermaid flowchart TD Start([Task Start]) --> Decompose[0. TaskDecomposer
Create subtasks] - Decompose --> Plan[2.5 Checkpoint
Create progress.md] + Decompose --> Plan[Checkpoint
Create progress.md] Plan --> Actor[1. Actor
Implement subtask] Actor --> Monitor[2. Monitor
Quality validation] @@ -21,20 +21,31 @@ flowchart TD Evaluator -->|Approved| Accept[5. ACCEPT changes
Apply to files] Evaluator -->|Not Approved| Actor - Accept --> Reflector[6. Reflector
Extract lessons
MANDATORY] - Reflector --> Curator[7. Curator
Update playbook
MANDATORY] + Accept --> Reflector[6. Reflector
Extract lessons] + Reflector --> Curator[7. Curator
Update playbook] - Curator --> End([Subtask Complete]) + Curator -->|More subtasks| Actor + Curator -->|All done| Verifier[8. FinalVerifier
Adversarial verification] + Verifier --> End([Workflow Complete]) ``` ## Orchestrator Slash Commands -MAP provides **4 specialized workflow commands** for different scenarios: +MAP provides **10 workflow commands** for different scenarios: -1. **`/map-feature`** — implement new features -2. **`/map-debug`** — debug issues -3. **`/map-refactor`** — refactor code -4. **`/map-review`** — review documentation +**Primary workflows:** +1. **`/map-efficient`** — implement features, refactor code, complex tasks (recommended default) +2. **`/map-debug`** — debug issues, fix bugs +3. **`/map-fast`** — small, low-risk changes with minimal overhead +4. **`/map-debate`** — multi-variant synthesis with Opus arbiter + +**Supporting commands:** +5. **`/map-review`** — review changes before commit +6. **`/map-check`** — quality gates and verification +7. **`/map-plan`** — architecture decomposition only +8. **`/map-release`** — release workflow with validation gates +9. **`/map-resume`** — resume interrupted workflows +10. **`/map-learn`** — extract and preserve lessons (optional learning step) The **Orchestrator** is NOT a separate agent template; it is the coordination logic implemented in these slash commands. @@ -131,7 +142,7 @@ After invoking Reflector or Curator, the orchestrator **MUST VERIFY** MCP tool u - Display: “⚠️ Retry attempt 2 — review previous errors” - Implements patterns `qual-0001` (WHAT/WHERE/HOW/WHY) and `arch-0005` (three-failure threshold) -**Sources:** `CONTEXT-ENGINEERING-IMPROVEMENTS.md` Phase 1.1 (lines 276–289), `.claude/commands/map-feature.md` lines 61–103 +**Sources:** `CONTEXT-ENGINEERING-IMPROVEMENTS.md` Phase 1.1 (lines 276–289), `.claude/commands/map-efficient.md` ## Actor–Monitor Retry Loop @@ -223,11 +234,6 @@ Before completing any MAP workflow subtask the orchestrator **MUST** check 2 que ## Exception: Non-MAP Tasks -These rules apply **ONLY** when using MAP framework commands: - -- `/map-feature` -- `/map-debug` -- `/map-refactor` -- `/map-review` +These rules apply **ONLY** when using MAP framework commands (`/map-efficient`, `/map-debug`, `/map-fast`, `/map-debate`, `/map-review`, `/map-check`, `/map-plan`, `/map-release`, `/map-resume`, `/map-learn`). For ordinary tasks (bug fixes, docs, simple changes) you can work directly without the full agent chain. diff --git a/presentation/en/04-getting-started.md b/presentation/en/04-getting-started.md index 05929d6..1ead324 100644 --- a/presentation/en/04-getting-started.md +++ b/presentation/en/04-getting-started.md @@ -64,7 +64,7 @@ mapify init . **Benefits:** - Automatic project structure setup -- Copies all 8 agents and 4 slash commands +- Copies all 12 agents and 10 slash commands - Creates `.claude/mem0 MCP` with a starter structure - Best choice for new projects @@ -92,7 +92,7 @@ Copy selected components into an existing project: ```bash .claude/ -├── agents/ # 8 agent template files +├── agents/ # 12 agent template files │ ├── task-decomposer.md │ ├── actor.md │ ├── monitor.md @@ -100,12 +100,22 @@ Copy selected components into an existing project: │ ├── evaluator.md │ ├── reflector.md │ ├── curator.md -│ └── documentation-reviewer.md -├── commands/ # 4 slash commands -│ ├── map-feature.md +│ ├── documentation-reviewer.md +│ ├── debate-arbiter.md +│ ├── synthesizer.md +│ ├── research-agent.md +│ └── final-verifier.md +├── commands/ # Workflow slash commands +│ ├── map-efficient.md │ ├── map-debug.md -│ ├── map-refactor.md -│ └── map-review.md +│ ├── map-fast.md +│ ├── map-debate.md +│ ├── map-review.md +│ ├── map-check.md +│ ├── map-plan.md +│ ├── map-release.md +│ ├── map-resume.md +│ └── map-learn.md └── mem0 MCP # ACE knowledge base (SQLite) ``` @@ -117,15 +127,15 @@ Copy selected components into an existing project: ## First Commands -After installation, you have **4 core workflow commands**: +After installation, you have **10 workflow commands** (4 primary + 6 supporting). Here are the 4 most commonly used: -### /map-feature — Implement New Features +### /map-efficient — Full Workflow (Features, Refactoring, Complex Tasks) ```bash -/map-feature Implement user authentication with JWT tokens +/map-efficient Implement user authentication with JWT tokens ``` -Automatically decomposes the task, implements, validates, and extracts reusable patterns for future work. +Automatically decomposes the task, implements, validates, and extracts reusable patterns for future work. Handles features, refactoring, and any complex development task. ### /map-debug — Debug Issues @@ -135,21 +145,21 @@ Automatically decomposes the task, implements, validates, and extracts reusable Diagnoses and fixes issues with detailed analysis and impact prediction. -### /map-refactor — Refactor Code +### /map-fast — Quick Changes ```bash -/map-refactor Extract database queries into repository pattern +/map-fast Add environment variable for API timeout ``` -Refactors with impact prediction and quality assessment. +Minimal workflow for small, low-risk changes with 40-50% token savings. -### /map-review — Review Documentation +### /map-review — Review Changes ```bash /map-review Check API documentation for completeness ``` -Comprehensive technical documentation review for completeness and correctness. +Comprehensive review of changes using Monitor, Predictor, and Evaluator agents. ## Configuration @@ -190,6 +200,7 @@ MAP requires **5 MCP servers** for full functionality: **Required:** +- **mem0** — semantic pattern memory (tiered search, pattern storage) - **claude-reviewer** — professional code review with security analysis **Optional (recommended):** @@ -223,11 +234,11 @@ After installation: 1. **Run your first workflow:** ```bash - /map-feature Implement hello world endpoint + /map-efficient Implement hello world endpoint ``` 2. **Inspect the generated checkpoint:** - - Open `.map/progress.md` + - Open `.map//progress.md` - Watch progress markers 3. **Review results:** diff --git "a/presentation/ru/01-\320\262\320\262\320\265\320\264\320\265\320\275\320\270\320\265.md" "b/presentation/ru/01-\320\262\320\262\320\265\320\264\320\265\320\275\320\270\320\265.md" index 2273a0f..b0c17ae 100644 --- "a/presentation/ru/01-\320\262\320\262\320\265\320\264\320\265\320\275\320\270\320\265.md" +++ "b/presentation/ru/01-\320\262\320\262\320\265\320\264\320\265\320\275\320\270\320\265.md" @@ -14,9 +14,9 @@ ## Основные концепции -### 8 Специализированных Агентов +### 12 Специализированных Агентов -MAP координирует работу 8 агентов через Orchestrator: +MAP координирует работу 12 агентов через Orchestrator: 1. **[TaskDecomposer](https://github.com/azalio/map-framework/blob/main/.claude/agents/task-decomposer.md)** — разбивает цели на атомарные подзадачи 2. **[Actor](https://github.com/azalio/map-framework/blob/main/.claude/agents/actor.md)** — генерирует код и решения @@ -26,6 +26,10 @@ MAP координирует работу 8 агентов через Orchestrat 6. **[Reflector](https://github.com/azalio/map-framework/blob/main/.claude/agents/reflector.md)** — извлекает уроки из успехов и неудач 7. **[Curator](https://github.com/azalio/map-framework/blob/main/.claude/agents/curator.md)** — управляет базой знаний (playbook) 8. **[DocumentationReviewer](https://github.com/azalio/map-framework/blob/main/.claude/agents/documentation-reviewer.md)** — проверяет полноту и корректность документации +9. **[Debate-Arbiter](https://github.com/azalio/map-framework/blob/main/.claude/agents/debate-arbiter.md)** — кросс-оценка вариантов с прозрачным обоснованием (Opus) +10. **[Synthesizer](https://github.com/azalio/map-framework/blob/main/.claude/agents/synthesizer.md)** — синтез решения из нескольких вариантов (Self-MoA) +11. **[Research-Agent](https://github.com/azalio/map-framework/blob/main/.claude/agents/research-agent.md)** — изолированное исследование кодовой базы +12. **[Final-Verifier](https://github.com/azalio/map-framework/blob/main/.claude/agents/final-verifier.md)** — адверсариальная верификация (Ralph Loop) **Orchestrator** — логика координации workflow, реализованная в slash-командах (`.claude/commands/map-*.md`), не отдельный шаблон агента. @@ -33,6 +37,7 @@ MAP координирует работу 8 агентов через Orchestrat MAP использует **5 MCP серверов** для расширения возможностей: +- **[mem0](https://github.com/mem0ai/mem0-mcp)** — семантическая память паттернов (tiered search, хранение паттернов) - **[claude-reviewer](https://github.com/rsokolowski/mcp-claude-reviewer)** — профессиональный code review с анализом безопасности - **[sequential-thinking](https://github.com/modelcontextprotocol/servers/tree/main/src/sequentialthinking)** — цепочки рассуждений для сложных задач - **[context7](https://github.com/upstash/context7)** — актуальная документация библиотек @@ -45,7 +50,7 @@ MAP использует **5 MCP серверов** для расширения - Хранится в [.claude/mem0 MCP](https://github.com/azalio/map-framework/blob/main/.claude/mem0 MCP) - **10 категорий паттернов**: архитектура, реализация, безопасность, производительность, ошибки, тестирование, качество кода, использование инструментов, отладка, CLI-инструменты - **top_k = 5**: возвращает только 5 наиболее релевантных паттернов для уменьшения когнитивной нагрузки -- **Автоматическое обучение**: Reflector извлекает паттерны из каждой задачи, Curator обновляет playbook инкрементально +- **Обучение через /map-learn**: Reflector извлекает паттерны, Curator обновляет playbook инкрементально (опциональный шаг после workflows) ## Преимущества @@ -53,8 +58,8 @@ MAP использует **5 MCP серверов** для расширения **Стратегия распределения моделей:** -- Predictor, Evaluator: **haiku** (быстрый анализ) -- Actor, Monitor, TaskDecomposer, Reflector, Curator, DocumentationReviewer: **sonnet** (критичное качество) +- Actor, Monitor, TaskDecomposer, Predictor, Evaluator, Reflector, Curator, DocumentationReviewer: **sonnet** (критичное качество) +- DebateArbiter: **opus** (наивысшее качество рассуждений для кросс-вариантного анализа) ### Изоляция Контекста Агентов diff --git "a/presentation/ru/02-\320\260\321\200\321\205\320\270\321\202\320\265\320\272\321\202\321\203\321\200\320\260.md" "b/presentation/ru/02-\320\260\321\200\321\205\320\270\321\202\320\265\320\272\321\202\321\203\321\200\320\260.md" index c56d7c5..fd9d781 100644 --- "a/presentation/ru/02-\320\260\321\200\321\205\320\270\321\202\320\265\320\272\321\202\321\203\321\200\320\260.md" +++ "b/presentation/ru/02-\320\260\321\200\321\205\320\270\321\202\320\265\320\272\321\202\321\203\321\200\320\260.md" @@ -2,13 +2,13 @@ ## Общая схема -MAP Framework построен на основе **8 специализированных агентов**, координируемых через Orchestrator. +MAP Framework построен на основе **12 специализированных агентов**, координируемых через Orchestrator. -**Orchestrator** — НЕ агент-шаблон. Логика координации workflow реализована в slash-командах `.claude/commands/map-*.md` (map-feature, map-debug, map-refactor, map-review). +**Orchestrator** — НЕ агент-шаблон. Логика координации workflow реализована в slash-командах `.claude/commands/map-*.md` (map-efficient, map-debug, map-fast, map-debate, map-review, map-check, map-plan, map-release, map-resume, map-learn). ## Компоненты системы -### 1. TaskDecomposer (1,169 строк) +### 1. TaskDecomposer (867 строк) **Модель:** sonnet **Назначение:** Переводит high-level цели в атомарные, тестируемые subtasks с явными зависимостями @@ -22,7 +22,7 @@ MAP Framework построен на основе **8 специализиров **Output:** JSON с subtasks, acceptance_criteria, estimated_complexity, depends_on -### 2. Actor (641 строка) +### 2. Actor (1,084 строки) **Модель:** sonnet **Назначение:** Senior software engineer, пишет clean, efficient, production-ready код @@ -37,7 +37,7 @@ MAP Framework построен на основе **8 специализиров **Входные данные:** {{existing_patterns}} (top_k=5), {{plan_context}} (recitation pattern), {{feedback}} (если retry) -### 3. Monitor (908 строк) +### 3. Monitor (2,521 строка) **Модель:** sonnet **Назначение:** Meticulous code reviewer (10+ лет опыта), ловит баги, уязвимости, нарушения стандартов @@ -55,22 +55,23 @@ MAP Framework построен на основе **8 специализиров **Output:** valid (boolean), issues (severity/category/description), verdict (approved/needs_revision/rejected) -### 4. Predictor (898 строк) +### 4. Predictor (2,108 строк) -**Модель:** haiku (cost-optimized) +**Модель:** sonnet **Назначение:** Impact analysis specialist, предсказывает ripple effects ДО реализации -**MCP интеграции (3 инструмента):** +**MCP интеграции (4 инструмента):** - `mcp__mem0__map_tiered_search` — поиск past breaking changes и migration паттернов -- `context7__get-library-docs` — проверка library version compatibility -- `deepwiki__read_wiki_structure + ask_question` — изучение migration паттернов +- `mcp__context7__get-library-docs` — проверка library version compatibility +- `mcp__deepwiki__read_wiki_structure + ask_question` — изучение migration паттернов +- `mcp__sequential-thinking__sequentialthinking` — комплексный trade-off анализ для multi-system impact **Output:** affected_files, breaking_changes, required_updates, risk_level (low/medium/high), rollback_plan -### 5. Evaluator (843 строки) +### 5. Evaluator (1,492 строки) -**Модель:** haiku (cost-optimized) +**Модель:** sonnet **Назначение:** Objective quality assessor с data-driven метриками **MCP интеграции (5 инструментов):** @@ -85,7 +86,7 @@ MAP Framework построен на основе **8 специализиров **Output:** scores (code_quality, test_coverage, documentation, security, performance, maintainability 0-10), overall_score, recommendation -### 6. Reflector (1,004 строки) — ACE Learning +### 6. Reflector (851 строка) — ACE Learning **Модель:** sonnet **Назначение:** Expert learning analyst, извлекает reusable паттерны из реализаций @@ -104,7 +105,7 @@ MAP Framework построен на основе **8 специализиров **Output:** key_insight, patterns_used, patterns_discovered, bullet_updates (helpful/harmful count), suggested_new_bullets -### 7. Curator (1,145 строк) — ACE Learning +### 7. Curator (1,296 строк) — ACE Learning **Модель:** sonnet **Назначение:** Knowledge curator, управляет evolving playbook без context collapse @@ -132,7 +133,7 @@ MAP Framework построен на основе **8 специализиров - `Fetch` — MANDATORY: верификация КАЖДОГО external URL в документации - `deepwiki__ask_question` — получение architecture details из external проектов -- `context7__resolve_library_id + get-library-docs` — верификация API/integration деталей +- `context7__resolve-library-id + get-library-docs` — верификация API/integration деталей - `mcp__mem0__map_tiered_search` — проверка known documentation anti-patterns **Критические ограничения (НИКОГДА не нарушать):** @@ -145,6 +146,34 @@ MAP Framework построен на основе **8 специализиров **Review Workflow:** Read source → Extract URLs → Fetch URLs → Check CRDs/dependencies → Verify documentation → Cross-check decomposition +### 9. Synthesizer + +**Модель:** sonnet +**Назначение:** Объединяет несколько вариантов Actor в единое решение (Self-MoA в /map-efficient) + +**Output:** Синтезированный код, комбинирующий лучшие элементы всех валидированных вариантов + +### 10. DebateArbiter + +**Модель:** opus (наивысшее качество рассуждений) +**Назначение:** Кросс-оценка вариантов Actor с явной матрицей рассуждений; синтез оптимального решения в /map-debate + +**Output:** comparison_matrix, decision_rationales, synthesized code + +### 11. ResearchAgent + +**Модель:** inherit (наследует модель родительского контекста) +**Назначение:** Глубокое чтение codebase со сжатым output; предотвращает раздувание контекста Actor + +**Output:** Executive summary (<2K токенов) с расположением файлов, паттернами и оценкой confidence + +### 12. FinalVerifier + +**Модель:** sonnet +**Назначение:** Adversarial verifier (принцип "четырёх глаз"); ловит преждевременное завершение и галлюцинации успеха + +**Output:** verdict (PASS/FAIL), confidence score, root cause analysis при неудаче + ## Взаимодействие агентов ### Orchestrator Workflow (Автоматизированная последовательность) @@ -173,13 +202,13 @@ MAP Framework построен на основе **8 специализиров - ВСЕГДА верифицируй MCP tool usage в agent outputs - Manual extraction/curation bypasses MCP tools → знания не дедуплицируются → уроки не усваиваются -**Enforcement source:** `.claude/commands/map-feature.md` lines 263-355 + MAP workflow enforcement rules +**Enforcement source:** `.claude/commands/map-efficient.md` + MAP workflow enforcement rules ### Шаблонная структура **Все агенты используют:** -- YAML frontmatter: name, description, model (sonnet/haiku), version 2.2.0 +- YAML frontmatter: name, description, model (sonnet/opus), version, last_updated - Handlebars переменные: {{project_name}}, {{language}}, {{framework}}, {{subtask_description}}, {{existing_patterns}}, {{feedback}} - Стандартные секции: IDENTITY, context, mcp_integration, rationale, critical/constraints, examples, output_format @@ -187,5 +216,6 @@ MAP Framework построен на основе **8 специализиров ### Модельная стратегия -- **haiku** (cost-optimized): Predictor, Evaluator -- **sonnet** (quality-critical): Actor, Monitor, TaskDecomposer, Reflector, Curator, DocumentationReviewer +- **sonnet** (quality-critical): Actor, Monitor, TaskDecomposer, Predictor, Evaluator, Reflector, Curator, DocumentationReviewer, Synthesizer, FinalVerifier +- **opus** (highest reasoning): DebateArbiter +- **inherit** (родительский контекст): ResearchAgent diff --git a/presentation/ru/03-workflow.md b/presentation/ru/03-workflow.md index 95d1124..3c5fd6f 100644 --- a/presentation/ru/03-workflow.md +++ b/presentation/ru/03-workflow.md @@ -4,12 +4,12 @@ MAP Framework использует **строго последовательную оркестрацию**, которая начинается с TaskDecomposer, после чего для каждой подзадачи запускается цикл реализации. -**Обязательная последовательность:** +**Полный pipeline (концептуальный — отдельные workflows могут пропускать агентов):** ```mermaid flowchart TD Start([Начало задачи]) --> Decompose[0. TaskDecomposer
Декомпозиция] - Decompose --> Plan[2.5 Checkpoint
Создать progress.md] + Decompose --> Plan[Checkpoint
Создать progress.md] Plan --> Actor[1. Actor
Реализация подзадачи] Actor --> Monitor[2. Monitor
Валидация качества] @@ -21,20 +21,31 @@ flowchart TD Evaluator -->|Approved| Accept[5. ACCEPT changes
Применение изменений] Evaluator -->|Not Approved| Actor - Accept --> Reflector[6. Reflector
Извлечение уроков
MANDATORY] - Reflector --> Curator[7. Curator
Обновление playbook
MANDATORY] + Accept --> Reflector[6. Reflector
Извлечение уроков] + Reflector --> Curator[7. Curator
Обновление playbook] - Curator --> End([Subtask Complete]) + Curator -->|Ещё подзадачи| Actor + Curator -->|Все готово| Verifier[8. FinalVerifier
Adversarial верификация] + Verifier --> End([Workflow завершён]) ``` ## Slash-команды Orchestrator -MAP предоставляет **4 специализированных workflow команды** для различных сценариев: +MAP предоставляет **10 workflow команд** для различных сценариев: -1. **`/map-feature`** — реализация новых функций -2. **`/map-debug`** — отладка проблем -3. **`/map-refactor`** — рефакторинг кода -4. **`/map-review`** — review документации +**Основные workflows:** +1. **`/map-efficient`** — реализация фичей, рефакторинг, сложные задачи (рекомендуемый по умолчанию) +2. **`/map-debug`** — отладка проблем, исправление багов +3. **`/map-fast`** — небольшие низкорисковые изменения +4. **`/map-debate`** — мульти-вариантный синтез с Opus арбитром + +**Вспомогательные команды:** +5. **`/map-review`** — review изменений перед коммитом +6. **`/map-check`** — quality gates и верификация +7. **`/map-plan`** — только архитектурная декомпозиция +8. **`/map-release`** — release workflow с валидационными гейтами +9. **`/map-resume`** — возобновление прерванных workflows +10. **`/map-learn`** — извлечение и сохранение уроков (опциональный шаг) **Orchestrator** — НЕ отдельный агент-шаблон, а логика координации, реализованная в этих slash-командах. @@ -142,7 +153,7 @@ MAP предоставляет **4 специализированных workflow - Дисплей: "⚠️ Retry attempt 2 - review previous errors" - Реализует паттерны `qual-0001` (WHAT/WHERE/HOW/WHY) и `arch-0005` (three-failure threshold) -**Источник:** `CONTEXT-ENGINEERING-IMPROVEMENTS.md` Phase 1.1 (lines 276-289), `.claude/commands/map-feature.md` lines 61-103 +**Источник:** `CONTEXT-ENGINEERING-IMPROVEMENTS.md` Phase 1.1 (lines 276-289), `.claude/commands/map-efficient.md` ## Actor-Monitor Retry Loop @@ -234,11 +245,6 @@ MAP использует **5 core MCP tools** для расширения воз ## Exception: Non-MAP Tasks -Эти правила **ТОЛЬКО** применяются при использовании MAP framework команд: - -- `/map-feature` -- `/map-debug` -- `/map-refactor` -- `/map-review` +Эти правила **ТОЛЬКО** применяются при использовании MAP framework команд (`/map-efficient`, `/map-debug`, `/map-fast`, `/map-debate`, `/map-review`, `/map-check`, `/map-plan`, `/map-release`, `/map-resume`, `/map-learn`). Для обычных задач (bug fixes, documentation, простые изменения) можно работать напрямую без полной agent chain. diff --git "a/presentation/ru/04-\320\275\320\260\321\207\320\260\320\273\320\276-\321\200\320\260\320\261\320\276\321\202\321\213.md" "b/presentation/ru/04-\320\275\320\260\321\207\320\260\320\273\320\276-\321\200\320\260\320\261\320\276\321\202\321\213.md" index 98f28a9..d186367 100644 --- "a/presentation/ru/04-\320\275\320\260\321\207\320\260\320\273\320\276-\321\200\320\260\320\261\320\276\321\202\321\213.md" +++ "b/presentation/ru/04-\320\275\320\260\321\207\320\260\320\273\320\276-\321\200\320\260\320\261\320\276\321\202\321\213.md" @@ -64,7 +64,7 @@ mapify init . **Преимущества:** - Автоматическая настройка структуры проекта -- Копирование всех 8 агентов и 4 slash-команд +- Копирование всех 12 агентов и 10 slash-команд - Настройка mem0 MCP (паттерны хранятся вне репозитория) - Лучший выбор для новых проектов @@ -92,7 +92,7 @@ cd map-framework ```bash .claude/ -├── agents/ # agent template files +├── agents/ # 12 agent template files │ ├── task-decomposer.md │ ├── actor.md │ ├── monitor.md @@ -100,13 +100,23 @@ cd map-framework │ ├── evaluator.md │ ├── reflector.md │ ├── curator.md -│ └── documentation-reviewer.md -├── commands/ # slash commands -│ ├── map-feature.md +│ ├── documentation-reviewer.md +│ ├── debate-arbiter.md +│ ├── synthesizer.md +│ ├── research-agent.md +│ └── final-verifier.md +├── commands/ # workflow slash commands +│ ├── map-efficient.md │ ├── map-debug.md -│ ├── map-refactor.md -│ └── map-review.md -└── mcp_config.json # MCP servers config (mem0, etc.) +│ ├── map-fast.md +│ ├── map-debate.md +│ ├── map-review.md +│ ├── map-check.md +│ ├── map-plan.md +│ ├── map-release.md +│ ├── map-resume.md +│ └── map-learn.md +└── mem0 MCP # ACE knowledge base (SQLite) ``` **Преимущества:** @@ -117,15 +127,15 @@ cd map-framework ## Первые Команды -После установки доступны **4 основных workflow команды**: +После установки доступны **10 workflow команд** (4 основных + 6 вспомогательных). Вот 4 наиболее часто используемые: -### /map-feature — Реализация Новых Функций +### /map-efficient — Полный Workflow (Фичи, Рефакторинг, Сложные Задачи) ```bash -/map-feature Implement user authentication with JWT tokens +/map-efficient Implement user authentication with JWT tokens ``` -Автоматическая декомпозиция задачи на подзадачи, реализация, валидация и извлечение паттернов для будущего использования. +Автоматическая декомпозиция задачи на подзадачи, реализация, валидация и извлечение паттернов для будущего использования. Подходит для фичей, рефакторинга и любых сложных задач. ### /map-debug — Отладка Проблем @@ -135,21 +145,21 @@ cd map-framework Анализ и исправление ошибок с детальной диагностикой и предсказанием влияния изменений. -### /map-refactor — Рефакторинг Кода +### /map-fast — Быстрые Изменения ```bash -/map-refactor Extract database queries into repository pattern +/map-fast Add environment variable for API timeout ``` -Рефакторинг кода с предсказанием влияния изменений и оценкой качества. +Минимальный workflow для небольших низкорисковых изменений с экономией 40-50% токенов. -### /map-review — Review Документации +### /map-review — Review Изменений ```bash /map-review Check API documentation for completeness ``` -Комплексный review технической документации с проверкой полноты и корректности. +Комплексный review изменений с использованием Monitor, Predictor и Evaluator агентов. ## Конфигурация @@ -190,6 +200,7 @@ MAP требует **5 MCP servers** для полной функциональ **Обязательные:** +- **mem0** — семантическая память паттернов (tiered search, хранение паттернов) - **claude-reviewer** — профессиональный code review с анализом безопасности **Опциональные (но рекомендуемые):** @@ -223,11 +234,11 @@ MAP требует **5 MCP servers** для полной функциональ 1. **Запустите первый workflow:** ```bash - /map-feature Implement hello world endpoint + /map-efficient Implement hello world endpoint ``` 2. **Изучите созданный checkpoint:** - - Откройте `.map/progress.md` + - Откройте `.map//progress.md` - Наблюдайте progress markers 3. **Просмотрите результаты:** diff --git a/scripts/lint-agent-templates.py b/scripts/lint-agent-templates.py index 09dcaa2..2894bf7 100755 --- a/scripts/lint-agent-templates.py +++ b/scripts/lint-agent-templates.py @@ -158,8 +158,8 @@ def lint_template_variables(self, file_path: Path, content: str): "subtask_description", "playbook_bullets", "feedback", - "standards_url", - "branch", + "standards_doc", + "branch_name", "related_files", ] diff --git a/src/mapify_cli/__init__.py b/src/mapify_cli/__init__.py index dd04014..12bcd54 100644 --- a/src/mapify_cli/__init__.py +++ b/src/mapify_cli/__init__.py @@ -1195,38 +1195,39 @@ def configure_global_permissions() -> None: # Default permissions for read-only commands default_permissions = { "allow": [ - "Bash(git status:*)", - "Bash(git log:*)", - "Bash(git diff:*)", - "Bash(git show:*)", - "Bash(git check-ignore:*)", - "Bash(git branch --show-current:*)", - "Bash(git branch -a:*)", - "Bash(git ls-files:*)", - "Bash(ls :*)", - "Bash(cat :*)", - "Bash(head :*)", - "Bash(tail :*)", - "Bash(wc :*)", - "Bash(grep :*)", - "Bash(find :*)", - "Bash(sort :*)", - "Bash(uniq :*)", - "Bash(jq :*)", - "Bash(which :*)", - "Bash(echo :*)", - "Bash(pwd:*)", - "Bash(whoami:*)", - "Bash(ruby -c :*)", - "Bash(go fmt /tmp/:*)", - "Bash(gofmt -l :*)", - "Bash(gofmt -d :*)", - "Bash(go vet :*)", - "Bash(go build:*)", - "Bash(go test -c:*)", - "Bash(go mod download:*)", - "Bash(go mod tidy:*)", - "Bash(chmod +x:*)", + "Bash(git status *)", + "Bash(git log *)", + "Bash(git diff *)", + "Bash(git show *)", + "Bash(git check-ignore *)", + "Bash(git branch --show-current *)", + "Bash(git branch -a *)", + "Bash(git rev-parse *)", + "Bash(git ls-files *)", + "Bash(ls *)", + "Bash(cat *)", + "Bash(head *)", + "Bash(tail *)", + "Bash(wc *)", + "Bash(grep *)", + "Bash(find *)", + "Bash(sort *)", + "Bash(uniq *)", + "Bash(jq *)", + "Bash(which *)", + "Bash(echo *)", + "Bash(pwd *)", + "Bash(whoami *)", + "Bash(ruby -c *)", + "Bash(go fmt /tmp/ *)", + "Bash(gofmt -l *)", + "Bash(gofmt -d *)", + "Bash(go vet *)", + "Bash(go build *)", + "Bash(go test -c *)", + "Bash(go mod download *)", + "Bash(go mod tidy *)", + "Bash(chmod +x *)", "Read(//Users/**)", "Read(//private/tmp/**)", "Glob(**)", @@ -1292,19 +1293,19 @@ def create_or_merge_project_settings_local(project_path: Path) -> None: # SourceCraft MCP helpers (project-scoped) "mcp__sourcecraft__list_pull_request_comments", # Common safe Go workflows (project-scoped) - "Bash(go test:*)", - "Bash(go test -c:*)", - "Bash(go vet :*)", - "Bash(go build:*)", - "Bash(go mod download:*)", - "Bash(go mod tidy:*)", - "Bash(gofmt -l :*)", - "Bash(gofmt -d :*)", + "Bash(go test *)", + "Bash(go test -c *)", + "Bash(go vet *)", + "Bash(go build *)", + "Bash(go mod download *)", + "Bash(go mod tidy *)", + "Bash(gofmt -l *)", + "Bash(gofmt -d *)", # Common safe Make targets "Bash(make generate manifests)", "Bash(make manifests)", # Common git workflows - "Bash(git worktree add:*)", + "Bash(git worktree add *)", # Used by some test/dev scripts to produce temporary dev certs 'Bash(openssl req -x509 -newkey rsa:512 -keyout /dev/null -out /dev/stdout -days 365 -nodes -subj "/CN=test" 2>/dev/null)', ], @@ -1369,10 +1370,13 @@ def create_mcp_config(project_path: Path, mcp_servers: List[str]) -> None: "monitor": [], "predictor": [], "evaluator": [], - "orchestrator": [], "reflector": [], "curator": [], "documentation-reviewer": [], + "debate-arbiter": [], + "synthesizer": [], + "research-agent": [], + "final-verifier": [], }, "workflow_settings": { "always_retrieve_knowledge": True, @@ -1426,14 +1430,14 @@ def create_mcp_config(project_path: Path, mcp_servers: List[str]) -> None: "task-decomposer", "monitor", "evaluator", - "orchestrator", "reflector", + "debate-arbiter", ]: if agent in config["agent_mcp_mappings"]: config["agent_mcp_mappings"][agent].append("sequential-thinking") if "claude-reviewer" in mcp_servers: - for agent in ["monitor", "evaluator", "orchestrator"]: + for agent in ["monitor", "evaluator", "final-verifier"]: if agent in config["agent_mcp_mappings"]: config["agent_mcp_mappings"][agent].append("claude-reviewer") @@ -2089,12 +2093,12 @@ def init( tracker.add("create-agents", "Create MAP agents") tracker.start("create-agents") create_agent_files(project_path, selected_mcp_servers) - tracker.complete("create-agents", "8 agents") + tracker.complete("create-agents", "12 agents") tracker.add("create-commands", "Create slash commands") tracker.start("create-commands") create_command_files(project_path) - tracker.complete("create-commands", "4 commands") + tracker.complete("create-commands", "10 commands") tracker.add("create-skills", "Create skills") tracker.start("create-skills") diff --git a/src/mapify_cli/entity_extractor.py b/src/mapify_cli/entity_extractor.py index a37f951..14f9c2f 100644 --- a/src/mapify_cli/entity_extractor.py +++ b/src/mapify_cli/entity_extractor.py @@ -283,9 +283,14 @@ def __init__(self): "trunk-based": "trunk-based-development", "code-review": "code-review-process", "pair-programming": "pair-programming", - "map-feature": "map-feature-workflow", + "map-efficient": "map-efficient-workflow", + "map-fast": "map-fast-workflow", "map-debug": "map-debug-workflow", - "map-refactor": "map-refactor-workflow", + "map-debate": "map-debate-workflow", + "map-review": "map-review-workflow", + "map-plan": "map-plan-workflow", + "map-check": "map-check-workflow", + "map-release": "map-release-workflow", "agile": "agile-methodology", "scrum": "scrum-framework", "kanban": "kanban-method", diff --git a/src/mapify_cli/schemas.py b/src/mapify_cli/schemas.py index 9400880..ba0bb8e 100644 --- a/src/mapify_cli/schemas.py +++ b/src/mapify_cli/schemas.py @@ -217,8 +217,8 @@ "properties": { "workflow": { "type": "string", - "description": "Type of MAP workflow (e.g., 'map-efficient', 'map-debug', 'map-feature')", - "examples": ["map-efficient", "map-debug", "map-feature", "map-refactor"], + "description": "Type of MAP workflow (e.g., 'map-efficient', 'map-debug', 'map-fast')", + "examples": ["map-efficient", "map-debug", "map-fast"], }, "terminal_status": { "type": "string", diff --git a/src/mapify_cli/templates/agents/actor.md b/src/mapify_cli/templates/agents/actor.md index 01cfaf1..0c69d83 100644 --- a/src/mapify_cli/templates/agents/actor.md +++ b/src/mapify_cli/templates/agents/actor.md @@ -271,15 +271,15 @@ Task( # Required Output Structure -**CRITICAL: Actor outputs CODE AS TEXT, NOT file edits** +**Actor applies code directly using Edit/Write tools.** -You are a **proposal generator**, NOT a code executor. Your output is reviewed by Monitor before application. +You are a code implementer. Read affected files, then apply changes with Edit/Write tools. +Monitor will validate the written code afterward. -- ✅ DO: Output complete code in markdown code blocks -- ❌ NEVER: Use Edit, Write, or MultiEdit tools -- ❌ NEVER: Modify files directly -- 📋 WHY: workflow-gate.py will BLOCK Edit/Write until actor+monitor steps complete -- 🔄 FLOW: You output → Monitor reviews → Orchestrator applies with Edit/Write +- Use Edit tool for modifying existing files +- Use Write tool for creating new files +- Read files before editing to understand current state +- Apply changes incrementally — one logical change per Edit call --- @@ -422,7 +422,7 @@ Only include if changes affect: ## Pre-Submission Checklist ### Code Quality (Mandatory) -- [ ] Follows {{standards_url}} style guide +- [ ] Follows {{standards_doc}} style guide - [ ] Complete implementations (no placeholders, no `...`) - [ ] Self-documenting names (clear variables/functions) - [ ] Comments for complex logic only @@ -502,6 +502,29 @@ When assessing performance impact, use these as default baselines unless project **Protocol**: Document rationale → Add TODO if needed → Proceed +### Evidence File (Artifact-Gated Validation) + +After applying all code changes, write an evidence file so the orchestrator can verify this step ran. Use Bash (not Write tool) to create the file: + +```bash +cat > .map//evidence/actor_.json << 'EVIDENCE' +{ + "phase": "ACTOR", + "subtask_id": "", + "timestamp": "", + "summary": "", + "aag_contract": "", + "files_changed": [""], + "status": "applied" +} +EVIDENCE +``` + +**Required fields** (orchestrator validates these): `phase`, `subtask_id`, `timestamp`. +Other fields are informational but recommended for audit trail. + +**CRITICAL**: Without this file, `validate_step("2.3")` will reject the step. + --- @@ -670,8 +693,8 @@ output: - **Project**: {{project_name}} - **Language**: {{language}} - **Framework**: {{framework}} -- **Standards**: {{standards_url}} -- **Branch**: {{branch}} +- **Standards**: {{standards_doc}} +- **Branch**: {{branch_name}} - **Allowed Scope**: {{allowed_scope}} - **Related Files**: {{related_files}} @@ -733,7 +756,7 @@ output: Follow this protocol exactly — do not infer "how seniors write" or add stylistic flourishes. -1. **Style standard**: Use {{standards_url}}. If unavailable: Python→PEP8, JS/TS→Google Style, Go→gofmt, Rust→rustfmt. +1. **Style standard**: Use {{standards_doc}}. If unavailable: Python→PEP8, JS/TS→Google Style, Go→gofmt, Rust→rustfmt. 2. **Architecture**: Dependency injection where applicable. No global mutable state. 3. **Naming**: Self-documenting (`user_count` not `n`, `is_valid` not `flag`). No abbreviations except industry-standard ones (URL, HTTP, ID). 4. **Intent comments**: Add a one-line `# Intent: ` comment above any non-obvious logic block. Do NOT comment obvious code. diff --git a/src/mapify_cli/templates/agents/debate-arbiter.md b/src/mapify_cli/templates/agents/debate-arbiter.md index d8d534b..509baf5 100644 --- a/src/mapify_cli/templates/agents/debate-arbiter.md +++ b/src/mapify_cli/templates/agents/debate-arbiter.md @@ -3,7 +3,7 @@ name: debate-arbiter description: Cross-evaluates Actor variants with explicit reasoning and synthesizes optimal solution (MAP Debate) model: opus version: 1.0.0 -last_updated: 2025-01-08 +last_updated: 2026-02-14 --- # QUICK REFERENCE (Read First) diff --git a/src/mapify_cli/templates/agents/documentation-reviewer.md b/src/mapify_cli/templates/agents/documentation-reviewer.md index ea32d0f..4c97d1f 100644 --- a/src/mapify_cli/templates/agents/documentation-reviewer.md +++ b/src/mapify_cli/templates/agents/documentation-reviewer.md @@ -444,7 +444,7 @@ mcp__deepwiki__ask_question( # 4. Check historical patterns (if mem0 available) mcp__mem0__map_tiered_search( query="CRD installation documentation patterns", - top_k=5, + limit=5, similarity_threshold=0.7 ) ``` diff --git a/src/mapify_cli/templates/agents/final-verifier.md b/src/mapify_cli/templates/agents/final-verifier.md index ea7d124..9c79083 100644 --- a/src/mapify_cli/templates/agents/final-verifier.md +++ b/src/mapify_cli/templates/agents/final-verifier.md @@ -18,36 +18,36 @@ You catch premature completion and hallucinated success. | Data | Source | How to Read | |------|--------|-------------| -| Original Goal | `.map/task_plan_.md` | Section "## Goal" or first paragraph | -| Acceptance Criteria | `.map/task_plan_.md` | Section "## Acceptance Criteria" (table) | -| Completed Subtasks | `.map/progress_.md` | Checkboxes marked `[x]` | +| Original Goal | `.map//task_plan_.md` | Section "## Goal" or first paragraph | +| Acceptance Criteria | `.map//task_plan_.md` | Section "## Acceptance Criteria" (table) | +| Completed Subtasks | `.map//progress_.md` | Checkboxes marked `[x]` | | Global Validation | Task argument `$VALIDATION_CRITERIA` | Passed from map-efficient.md | ### OUTPUT Destinations (where to store results) | Data | Destination | Format | Written By | |------|-------------|--------|------------| -| Verification Result | `.map/progress_.md` | Append "## Final Verification" section | **final-verifier agent** | +| Verification Result | `.map//progress_.md` | Append "## Final Verification" section | **final-verifier agent** | | Structured Result | `.map//final_verification.json` | JSON (for programmatic access) | **final-verifier agent** | | Root Cause (if failed) | `.map//final_verification.json` | In `root_cause` field | **final-verifier agent** | **WHO WRITES FILES:** - **final-verifier agent** writes verification results to BOTH markdown and JSON - **Orchestrator (map-efficient.md)** reads results and decides next action (COMPLETE/RE_DECOMPOSE/ESCALATE) -- **Orchestrator (map-efficient.md)** ensures Acceptance Criteria section exists in `task_plan_.md` (derived from decomposition output) +- **Orchestrator (map-efficient.md)** ensures Acceptance Criteria section exists in `.map//task_plan_.md` (derived from decomposition output) **IMPORTANT:** Always use sanitized branch name (e.g., `feature-foo` not `feature/foo`). **SOURCE OF TRUTH CONTRACT:** - `.map//final_verification.json` is the **ONLY** source of truth for orchestrator decisions -- `.map/progress_.md` "## Final Verification" section is for **human readability only** +- `.map//progress_.md` "## Final Verification" section is for **human readability only** - **Orchestrator (map-efficient.md) MUST read JSON**, not parse markdown - Both must be written, but only JSON is used programmatically ## Verification Protocol ### Step 1: Goal Extraction -Read `.map/task_plan_.md` to extract: +Read `.map//task_plan_.md` to extract: - Original goal from "## Goal" section - Acceptance criteria from "## Acceptance Criteria" table (if present) @@ -101,7 +101,7 @@ Score confidence (0.0-1.0): **CRITICAL:** `root_cause` is REQUIRED if `passed=false` -### 2. Append to `.map/progress_.md` +### 2. Append to `.map//progress_.md` ```markdown ## Final Verification diff --git a/src/mapify_cli/templates/agents/monitor.md b/src/mapify_cli/templates/agents/monitor.md index 7bb6cbd..14273cd 100644 --- a/src/mapify_cli/templates/agents/monitor.md +++ b/src/mapify_cli/templates/agents/monitor.md @@ -94,8 +94,8 @@ If implementation deviates from the AAG contract — `valid: false` — regardle |-------------|------|-------------|---------| | `{{project_name}}` | string | Project identifier | `"auth-service"` | | `{{language}}` | enum | Primary language | `"python"`, `"typescript"`, `"go"` | -| `{{solution}}` | string | Code/docs to review | Full code block or diff | -| `{{requirements}}` | string | Subtask requirements | "Implement JWT validation" | +| `{{solution}}` | string | Code/docs to review (in MAP workflow: provided via `` tag) | Full code block or diff | +| `{{requirements}}` | string | Subtask requirements (in MAP workflow: provided via `` tag) | "Implement JWT validation" | | `{{review_mode}}` | enum | Review scope mode | `"full"` or `"diff"` | ### Optional Placeholders @@ -105,6 +105,7 @@ If implementation deviates from the AAG contract — `valid: false` — regardle | `{{framework}}` | string | `""` | Framework/runtime (Express, FastAPI, etc.) | | `{{standards_doc}}` | string | `""` | URL/path to style guide | | `{{security_policy}}` | string | `""` | URL/path to security policy | +| `{{changed_files}}` | array | `[]` | List of modified file paths (for static analysis) | | `{{subtask_description}}` | string | `""` | Additional context | | `{{existing_patterns}}` | array | `[]` | Learned patterns from previous reviews | | `{{feedback}}` | array | `[]` | Previous review findings to verify | @@ -2494,3 +2495,27 @@ def check_rate_limit(user_id, action, limit=100, window=3600): - Only MEDIUM/LOW issues → valid=true (with feedback) + +### Evidence File (Artifact-Gated Validation) + +**Exception to read-only rule**: Monitor writes evidence files to `.map/` artifacts directory via Bash (not Write tool). This does NOT violate the read-only-for-project-code rule — `.map/` is a workflow artifact directory, not project code. + +After completing validation, write an evidence file: + +```bash +cat > .map//evidence/monitor_.json << 'EVIDENCE' +{ + "phase": "MONITOR", + "subtask_id": "", + "timestamp": "", + "valid": true, + "issues_found": , + "recommendation": "approve|reject|revise" +} +EVIDENCE +``` + +**Required fields** (orchestrator validates these): `phase`, `subtask_id`, `timestamp`. +Other fields are informational but recommended for audit trail. + +**CRITICAL**: Without this file, `validate_step("2.4")` will reject the step. diff --git a/src/mapify_cli/templates/agents/predictor.md b/src/mapify_cli/templates/agents/predictor.md index 076c078..976ad2e 100644 --- a/src/mapify_cli/templates/agents/predictor.md +++ b/src/mapify_cli/templates/agents/predictor.md @@ -124,16 +124,16 @@ CONFLICT (Category B: -0.10): ### Position in MAP Pipeline ``` -Actor (propose changes) - ↓ analyzer_output +Actor (implement changes) + ↓ code changes applied +Monitor (validate correctness) + ↓ validation_result PREDICTOR (assess impact) ← YOU ARE HERE ↓ prediction_output -Monitor (validate at runtime) - ↓ validation_result -Evaluator (score quality) +[Evaluator — only in /map-debug and /map-review] ``` -### Upstream (Actor → Predictor) +### Upstream (Actor → Monitor → Predictor) **Input Contract Version**: 1.0 | Field from Actor | How Predictor Uses It | @@ -1784,6 +1784,28 @@ When an edge case is detected, it MUST appear in THREE places: +### Evidence File (Artifact-Gated Validation) + +After completing impact analysis, write an evidence file via Bash: + +```bash +cat > .map//evidence/predictor_.json << 'EVIDENCE' +{ + "phase": "PREDICTOR", + "subtask_id": "", + "timestamp": "", + "risk_assessment": "", + "confidence_score": <0.30-0.95>, + "tier_selected": "<1|2|3>" +} +EVIDENCE +``` + +**Required fields** (orchestrator validates these): `phase`, `subtask_id`, `timestamp`. +Other fields are informational but recommended for audit trail. + +**CRITICAL**: Without this file, `validate_step("2.6")` will reject the step. + ## Confidence Scoring Methodology diff --git a/src/mapify_cli/templates/agents/reflector.md b/src/mapify_cli/templates/agents/reflector.md index db93dc6..58f75f3 100644 --- a/src/mapify_cli/templates/agents/reflector.md +++ b/src/mapify_cli/templates/agents/reflector.md @@ -3,7 +3,7 @@ name: reflector description: Extracts structured lessons from successes and failures (ACE) model: sonnet version: 4.0.0 -last_updated: 2025-01-12 +last_updated: 2026-01-12 --- # IDENTITY @@ -208,10 +208,12 @@ Analyze the following execution attempt: {{predictor_analysis}} ``` +{{#if evaluator_scores}} ## Evaluator Quality Scores ```json {{evaluator_scores}} ``` +{{/if}} ## Execution Outcome {{execution_outcome}} diff --git a/src/mapify_cli/templates/agents/research-agent.md b/src/mapify_cli/templates/agents/research-agent.md index 3309b0e..c2b279d 100644 --- a/src/mapify_cli/templates/agents/research-agent.md +++ b/src/mapify_cli/templates/agents/research-agent.md @@ -178,7 +178,7 @@ When orchestrator provides `findings_file` path in prompt, append research resul **Input Signal** (from orchestrator): ``` -Findings file: .map/findings_feature-auth.md +Findings file: .map/feature-auth/findings_feature-auth.md ``` **Action**: diff --git a/src/mapify_cli/templates/agents/task-decomposer.md b/src/mapify_cli/templates/agents/task-decomposer.md index 8f79219..e0715b6 100644 --- a/src/mapify_cli/templates/agents/task-decomposer.md +++ b/src/mapify_cli/templates/agents/task-decomposer.md @@ -50,6 +50,10 @@ machine-readable blueprint for the Actor/Monitor pipeline. │ └─ Map all dependencies (no cycles!) │ │ └─ Order by dependency (foundations first) │ │ └─ Add risks for complexity_score ≥ 7 │ +│ └─ CODE CHANGES ONLY: subtasks must produce code diffs. │ +│ Do NOT create operational subtasks (rollback plans, │ +│ integration test plans, deployment docs). These belong │ +│ in the plan's Notes section, not as separate subtasks. │ │ │ │ 6. VALIDATE (run checklist) │ │ └─ Circular dependency check (must be acyclic DAG) │ @@ -278,7 +282,7 @@ Subtasks should be ordered by dependency: ### Acceptance Criteria Section (Ralph Loop Integration) -When writing task plans to `.map/task_plan_.md`, the orchestrator generates an Acceptance Criteria section from subtask validation_criteria. The format is: +When writing task plans to `.map//task_plan_.md`, the orchestrator generates an Acceptance Criteria section from subtask validation_criteria. The format is: ```markdown ## Acceptance Criteria diff --git a/src/mapify_cli/templates/commands/map-check.md b/src/mapify_cli/templates/commands/map-check.md index 6ae6006..126947d 100644 --- a/src/mapify_cli/templates/commands/map-check.md +++ b/src/mapify_cli/templates/commands/map-check.md @@ -9,7 +9,7 @@ If no `.map//workflow_state.json` exists, run full quality suite: ```bash -BRANCH=$(git rev-parse --abbrev-ref HEAD | sed 's/\//-/g') +BRANCH=$(git rev-parse --abbrev-ref HEAD | sed -E 's|/|-|g; s|[^a-zA-Z0-9_.-]|-|g; s|-{2,}|-|g; s|^-||; s|-$||') STATE_FILE=".map/${BRANCH}/workflow_state.json" if [[ ! -f "$STATE_FILE" ]]; then @@ -109,10 +109,10 @@ If `.map//workflow_state.json` exists, verify subtask completion. Read the current state to understand what was completed: ```bash -BRANCH=$(git rev-parse --abbrev-ref HEAD | sed 's/\//-/g') +BRANCH=$(git rev-parse --abbrev-ref HEAD | sed -E 's|/|-|g; s|[^a-zA-Z0-9_.-]|-|g; s|-{2,}|-|g; s|^-||; s|-$||') STATE_FILE=".map/${BRANCH}/workflow_state.json" -cat "$STATE_FILE" +# Use Read tool to load the state file contents ``` ### Step 2: Validate All Subtasks Complete @@ -159,7 +159,7 @@ Read task_plan_.md to get acceptance criteria: ```bash PLAN_FILE=".map/${BRANCH}/task_plan_${BRANCH}.md" -cat "$PLAN_FILE" +# Use Read tool to load the plan file contents ``` ### Step 4: Call Final Verifier @@ -195,7 +195,7 @@ Even if verifier approves, run automated checks: **Tests:** ```bash -TEST_CMD=$(jq -r '.test_command // "pytest"' .claude/ralph-loop-config.json) +TEST_CMD="pytest" # Default; override if project uses different test runner echo "Running final tests..." eval "$TEST_CMD" @@ -203,7 +203,7 @@ eval "$TEST_CMD" # If tests fail and you want a durable artifact for follow-up/debugging, # re-run capturing output and parse to .map//diagnostics.json: # -# BRANCH=$(git rev-parse --abbrev-ref HEAD | sed 's/\//-/g') +# BRANCH=$(git rev-parse --abbrev-ref HEAD | sed -E 's|/|-|g; s|[^a-zA-Z0-9_.-]|-|g; s|-{2,}|-|g; s|^-||; s|-$||') # LOG_FILE=".map/${BRANCH}/tests.log" # mkdir -p ".map/${BRANCH}" # ( $TEST_CMD ) >"$LOG_FILE" 2>&1 @@ -218,12 +218,12 @@ fi **Linter:** ```bash -LINT_CMD=$(jq -r '.lint_command // "make lint"' .claude/ralph-loop-config.json) +LINT_CMD="make lint" # Default; override if project uses different linter echo "Running final lint..." eval "$LINT_CMD" # Optional (structured diagnostics): -# BRANCH=$(git rev-parse --abbrev-ref HEAD | sed 's/\//-/g') +# BRANCH=$(git rev-parse --abbrev-ref HEAD | sed -E 's|/|-|g; s|[^a-zA-Z0-9_.-]|-|g; s|-{2,}|-|g; s|^-||; s|-$||') # LOG_FILE=".map/${BRANCH}/lint.log" # mkdir -p ".map/${BRANCH}" # ( $LINT_CMD ) >"$LOG_FILE" 2>&1 diff --git a/src/mapify_cli/templates/commands/map-debate.md b/src/mapify_cli/templates/commands/map-debate.md index 00321a8..f5d60ee 100644 --- a/src/mapify_cli/templates/commands/map-debate.md +++ b/src/mapify_cli/templates/commands/map-debate.md @@ -303,20 +303,18 @@ retry_context = { If Monitor returns `escalation_required === true`, ask user: ``` -AskUserQuestion( - questions: [ - { - header: "Escalation Required", - question: "⚠️ Human review requested by Monitor.\n\nSubtask: [ST-XXX]\nReason: [escalation_reason]\nArbiter Confidence: [confidence]\n\nProceed anyway?", - multiSelect: false, - options: [ - { label: "YES - Proceed Anyway", description: "Continue (run Predictor if required, then apply changes)." }, - { label: "REVIEW - Show Details", description: "Show synthesis_reasoning + comparison_matrix, then ask again." }, - { label: "NO - Abort Subtask", description: "Do not apply changes; wait for human review." } - ] - } - ] -) +AskUserQuestion(questions=[ + { + "header": "Escalation", + "question": "Human review requested by Monitor.\n\nSubtask: [ST-XXX]\nReason: [escalation_reason]\nArbiter Confidence: [confidence]\n\nProceed anyway?", + "multiSelect": false, + "options": [ + {"label": "YES - Proceed", "description": "Continue (run Predictor if required, then apply changes)."}, + {"label": "REVIEW - Details", "description": "Show synthesis_reasoning + comparison_matrix, then ask again."}, + {"label": "NO - Abort", "description": "Do not apply changes; wait for human review."} + ] + } +]) ``` ### 2.10 Conditional Predictor @@ -389,9 +387,9 @@ If none found: mark gate as skipped and proceed. | Aspect | map-efficient | map-debate | |--------|---------------|------------| -| Variant generation | Conditional (Self-MoA check) | Always | -| Synthesis agent | synthesizer (sonnet) | debate-arbiter (opus) | -| Output | conflict_resolutions | comparison_matrix + decision_rationales + synthesis_reasoning | +| Variant generation | Single variant (one Actor) | Always 3 variants | +| Synthesis agent | N/A (single Actor) | debate-arbiter (opus) | +| Output | Direct implementation | comparison_matrix + decision_rationales + synthesis_reasoning | | Cost | Lower | ~3-5x higher (opus model) | | Use case | Efficiency | Reasoning transparency | diff --git a/src/mapify_cli/templates/commands/map-debug.md b/src/mapify_cli/templates/commands/map-debug.md index 19bbfd3..588b9b3 100644 --- a/src/mapify_cli/templates/commands/map-debug.md +++ b/src/mapify_cli/templates/commands/map-debug.md @@ -129,7 +129,7 @@ Provide FULL file content for changes." ### Monitor Validation -After each fix: +After each fix (max 5 Actor->Monitor retry iterations per subtask): ``` Task( diff --git a/src/mapify_cli/templates/commands/map-efficient.md b/src/mapify_cli/templates/commands/map-efficient.md index e97f988..8994064 100644 --- a/src/mapify_cli/templates/commands/map-efficient.md +++ b/src/mapify_cli/templates/commands/map-efficient.md @@ -14,7 +14,27 @@ State machine enforces sequencing, Python validates completion, hooks inject rem 1. Execute steps in order using state machine guidance 2. Use exact `subagent_type` specified — never substitute 3. Call each agent individually — no combining or skipping -4. Max 5 retry iterations per subtask +4. Max 5 retry iterations per subtask (note: /map-fast uses max 3) +5. Agent phases (ACTOR 2.3, MONITOR 2.4, PREDICTOR 2.6) require evidence files. + Each agent writes `.map//evidence/_.json` after completing work. + `validate_step` rejects the step if evidence is missing or malformed. + +## Intentional Agent Omissions + +/map-efficient does NOT use these agents (by design): +- **Evaluator** — quality scoring not needed; Monitor validates correctness directly +- **Reflector** — lesson extraction is a separate step via `/map-learn` +- **Curator** — pattern storage is a separate step via `/map-learn` + +This is NOT a violation of MAP agent rules. Learning is decoupled into `/map-learn` (optional, run after workflow completes) to reduce token usage during execution. + +## Dual State Files + +/map-efficient uses two state files in `.map//`: +- **`step_state.json`** — Orchestrator canonical state. Tracks current step, retry counts, circuit breaker. Written/read by `map_orchestrator.py`. This is the source of truth for workflow resumption. +- **`workflow_state.json`** — Enforcement gates. Tracks subtask completion for `workflow-gate.py` hook validation. Written by `map_step_runner.py`. + +Both files must stay in sync. The orchestrator updates `step_state.json` on every step; `workflow_state.json` is updated at phase boundaries (INIT_STATE, UPDATE_STATE). ## Architecture Overview @@ -25,7 +45,7 @@ State machine enforces sequencing, Python validates completion, hooks inject rem └─────────────────────────────────────────────────────────────┘ ↓ ┌─────────────────────────────────────────────────────────────┐ -│ map-efficient.md (THIS FILE - ~150 lines) │ +│ map-efficient.md (THIS FILE - ~540 lines) │ │ 1. Load state → Get next step instruction │ │ 2. Route to appropriate executor based on step phase │ │ 3. Execute step (Actor/Monitor/mem0/tests/etc) │ @@ -41,6 +61,21 @@ State machine enforces sequencing, Python validates completion, hooks inject rem **Task:** $ARGUMENTS +## Step 0: Detect Existing Plan from /map-plan + +Before starting the state machine, check if `/map-plan` already produced artifacts for this branch: + +```bash +BRANCH=$(git rev-parse --abbrev-ref HEAD | sed -E 's|/|-|g; s|[^a-zA-Z0-9_.-]|-|g; s|-{2,}|-|g; s|^-||; s|-$||') +if [ -f ".map/${BRANCH}/task_plan_${BRANCH}.md" ] && [ ! -f ".map/${BRANCH}/step_state.json" ]; then + # Plan exists but execution hasn't started — resume from plan + # step_state.json is the orchestrator's canonical state (see "Dual State Files" above) + python3 .map/scripts/map_orchestrator.py resume_from_plan +fi +``` + +If `resume_from_plan` succeeds, the orchestrator skips DECOMPOSE, INIT_PLAN, and REVIEW_PLAN (the plan was already approved in /map-plan) and starts from CHOOSE_MODE. + ## Step 1: Get Next Step Instruction ```bash @@ -53,7 +88,7 @@ IS_COMPLETE=$(echo "$NEXT_STEP" | jq -r '.is_complete') # Check if workflow complete if [ "$IS_COMPLETE" = "true" ]; then - echo "✅ All subtasks complete. Running final verification..." + echo "All subtasks complete. Running final verification..." # Go to Step 3: Final Verification fi ``` @@ -89,23 +124,26 @@ Purpose: Actor compiles this line into code. Monitor verifies against it. This eliminates reasoning overhead — the contract IS the specification.""" ) -# After decomposer returns: extract subtask sequence + aag_contracts, save to state -# Update state: python3 .map/scripts/map_orchestrator.py validate_step "1.0" +# After decomposer returns: +# 1. Extract subtask IDs from blueprint and register them in state: +# python3 .map/scripts/map_orchestrator.py set_subtasks ST-001 ST-002 ST-003 +# 2. Validate step completion: +# python3 .map/scripts/map_orchestrator.py validate_step "1.0" ``` ### Phase: INIT_PLAN (1.5) -Generate `.map/task_plan_.md` from blueprint: +Generate `.map//task_plan_.md` from blueprint: - Header: Goal from blueprint.summary -- For each subtask: ## ST-XXX section with **Status:** pending -- First subtask: **Status:** in_progress -- Terminal State: **Status:** pending +- For each subtask: ### ST-XXX section with `- **Status:** pending` +- First subtask: `- **Status:** in_progress` +- Terminal State: `- **Status:** pending` ### Phase: REVIEW_PLAN (1.55) Present the generated plan and require explicit user approval before any execution state is initialized. -1. Read the plan: `.map/task_plan_.md` +1. Read the plan: `.map//task_plan_.md` 2. Show a short summary in this format: ```text @@ -121,10 +159,10 @@ Notes: ═══════════════════════════════════════════════════ ``` -3. Ask for approval using AskUserQuestionTool (example): +3. Ask for approval using AskUserQuestion (example): ``` -AskUserQuestionTool(questions=[ +AskUserQuestion(questions=[ { "question": "Approve this plan and start execution?", "header": "Plan approval", @@ -163,20 +201,20 @@ Note: In `batch` mode the orchestrator auto-skips the pause step (2.11). ### Phase: INIT_STATE (1.6) -```bash -# Create workflow_state.json -BRANCH=$(git rev-parse --abbrev-ref HEAD | sed 's/\//-/g') -cat > .map/${BRANCH}/workflow_state.json <<'EOF' +Get the branch name via Bash: `git rev-parse --abbrev-ref HEAD | sed -E 's|/|-|g; s|[^a-zA-Z0-9_.-]|-|g; s|-{2,}|-|g; s|^-||; s|-$||'` + +Then use the **Write** tool to create `.map//workflow_state.json`: + +```json { "workflow": "map-efficient", - "started_at": "$(date -u +%Y-%m-%dT%H:%M:%SZ)", + "started_at": "", "current_subtask": null, "current_state": "INITIALIZED", "completed_steps": {}, "pending_steps": {}, "subtask_sequence": [] } -EOF ``` ### Phase: XML_PACKET (2.0) @@ -199,9 +237,9 @@ xml_packet = create_xml_packet(subtask) # Tiered search: branch → project → org mcp__mem0__map_tiered_search( query="[subtask description]", - top_k=5, - user_id="[branch_name]", - agent_id="map-efficient" + limit=5, + user_id="org:[org_name]", + run_id="proj:[project_name]:branch:[branch_name]" ) # Re-rank by relevance, pass top 3 to Actor @@ -219,7 +257,7 @@ if requires_research(subtask): File patterns: [relevant globs] Intent: locate Max tokens: 1500 -Findings file: .map/findings_{branch}.md +Findings file: .map/{branch}/findings_{branch}.md DISTILLATION RULE: Write ONLY actionable findings to the file: - file paths + line ranges + function signatures @@ -241,7 +279,7 @@ Task( [paste from .map//current_packet.xml] - + [top context_patterns from mem0 + relevance_score] @@ -299,8 +337,8 @@ if monitor_output["valid"] == false: # Go back to Phase: ACTOR with Monitor feedback # Actor will fix issues and re-apply code else: - # Escalate to user (3-strike protocol) - AskUserQuestion: CONTINUE / SKIP / ABORT + # Escalate to user (retry limit reached) + AskUserQuestion(questions=[{"question": "Monitor retry limit reached. How to proceed?", "header": "Retry limit", "options": [{"label": "Continue", "description": "Reset retry counter and try again"}, {"label": "Skip", "description": "Skip this subtask and move to next"}, {"label": "Abort", "description": "Stop workflow"}], "multiSelect": false}]) ``` ### Phase: PREDICTOR (2.6) @@ -328,8 +366,8 @@ Optional: analyzer_output, user_context""" # Code already applied by Actor, validated by Monitor # Update workflow state to mark subtask progress -python3 scripts/map_step_runner.py update_workflow_state "ST-XXX" "validated" "VALIDATED" -python3 scripts/map_step_runner.py update_plan_status "ST-XXX" "in_progress" +python3 .map/scripts/map_step_runner.py update_workflow_state "ST-XXX" "validated" "VALIDATED" +python3 .map/scripts/map_step_runner.py update_plan_status "ST-XXX" "in_progress" ``` ### Phase: TESTS_GATE (2.8) @@ -407,7 +445,7 @@ Only used when execution_mode is `step_by_step`. - Ask the user whether to continue to the next subtask. - If execution_mode is `batch`, the orchestrator auto-skips this step. -## Step 2.5: Validate Step Completion +## Step 2a: Validate Step Completion After executing step, validate and update state: @@ -417,11 +455,11 @@ python3 .map/scripts/map_orchestrator.py validate_step "$STEP_ID" # Update plan status if subtask complete if [ "$PHASE" = "VERIFY_ADHERENCE" ]; then - python3 scripts/map_step_runner.py update_plan_status "$SUBTASK_ID" "complete" + python3 .map/scripts/map_step_runner.py update_plan_status "$SUBTASK_ID" "complete" fi ``` -## Step 2.6: Continue or Complete (Context Distillation) +## Step 2b: Continue or Complete (Context Distillation) ```bash # Get next step @@ -445,7 +483,7 @@ else # Recurse: Launch new context with minimal state transfer echo "Next step: $(echo "$NEXT_STEP" | jq -r '.step_id')" - # Continue with Step 1 (fresh invocation via map-efficient-step) + # Continue with Step 1 (loop back to get_next_step, or use /map-resume in a fresh session) fi ``` @@ -462,7 +500,8 @@ TOOL_COUNT=$(echo "$CB_DATA" | jq -r '.tool_count') MAX_ITERATIONS=$(echo "$CB_DATA" | jq -r '.max_iterations') if [ "$TOOL_COUNT" -ge "$MAX_ITERATIONS" ]; then - AskUserQuestion: "Circuit breaker triggered. RESET_LIMITS or ABORT?" + # Ask user how to proceed + AskUserQuestion(questions=[{"question": "Circuit breaker triggered. How to proceed?", "header": "Circuit breaker", "options": [{"label": "Reset limits", "description": "Reset counters and continue workflow"}, {"label": "Abort", "description": "Stop workflow immediately"}], "multiSelect": false}]) fi ``` @@ -499,16 +538,21 @@ if verification["passed"] and verification["confidence"] >= 0.7: update_terminal_state("complete") print("✅ Workflow complete! Optional: Run /map-learn to preserve patterns.") -elif thrashing_detected(): - AskUserQuestion: "Thrashing detected. FORCE_COMPLETE / CONTINUE / ABORT?" +# NOTE: The conditions below are pseudocode representing orchestrator-level +# logic. The actual implementation uses check_circuit_breaker and retry_count +# from step_state.json to detect these conditions. + +elif verification["retry_count"] > verification["max_retries"]: + # Thrashing detected - too many retries without progress + AskUserQuestion(questions=[{"question": "Thrashing detected (repeated failures). How to proceed?", "header": "Thrashing", "options": [{"label": "Force complete", "description": "Mark as complete despite failures"}, {"label": "Continue", "description": "Reset retry counter and try again"}, {"label": "Abort", "description": "Stop workflow"}], "multiSelect": false}]) -elif plan_iteration < max_redecompositions: - # Re-decomposition - Task(subagent_type="task-decomposer", mode="re_decomposition", ...) +elif check_circuit_breaker()["triggered"] == false: + # Re-decomposition: break remaining work into new subtasks + Task(subagent_type="task-decomposer", description="Re-decompose remaining work", prompt="...") else: # Max iterations reached - AskUserQuestion: "Max iterations reached. RESET_LIMITS / ABORT?" + AskUserQuestion(questions=[{"question": "Max iterations reached. How to proceed?", "header": "Max iterations", "options": [{"label": "Reset limits", "description": "Reset counters and continue"}, {"label": "Abort", "description": "Stop workflow"}], "multiSelect": false}]) ``` ## Step 4: Summary diff --git a/src/mapify_cli/templates/commands/map-fast.md b/src/mapify_cli/templates/commands/map-fast.md index 7560034..c0db25e 100644 --- a/src/mapify_cli/templates/commands/map-fast.md +++ b/src/mapify_cli/templates/commands/map-fast.md @@ -6,7 +6,7 @@ description: Minimal workflow for small, low-risk changes (40-50% savings, NO le **⚠️ WARNING: Use for small, low-risk production changes only. Do not skip tests.** -Minimal agent sequence (40-50% token savings). Skips: Predictor, Evaluator, Reflector, Curator. +Minimal agent sequence (40-50% token savings). Skips: Predictor, Reflector, Curator. **Consequences:** No impact analysis, no quality scoring, no learning, playbook never improves. @@ -29,7 +29,6 @@ Minimal agent sequence (token-optimized, reduced analysis depth): **Agents INTENTIONALLY SKIPPED:** - Predictor (no impact analysis) -- Evaluator (no quality scoring) - Reflector (no lesson extraction) - Curator (no playbook updates) @@ -130,6 +129,6 @@ After all subtasks completed: - MAX 3 iterations per subtask - NO learning cycle (Reflector/Curator skipped) - NO impact analysis (Predictor skipped) -- NO quality scoring (Evaluator skipped) +- NO quality scoring Begin now with minimal workflow. diff --git a/src/mapify_cli/templates/commands/map-learn.md b/src/mapify_cli/templates/commands/map-learn.md index f6bb35f..5e53b41 100644 --- a/src/mapify_cli/templates/commands/map-learn.md +++ b/src/mapify_cli/templates/commands/map-learn.md @@ -27,11 +27,11 @@ description: Extract and preserve lessons from completed workflows (OPTIONAL lea ## ⚠️ IMPORTANT: This is an OPTIONAL step -**You are NOT required to run this command.** MAP workflows (except /map-fast) include learning by default. +**You are NOT required to run this command.** No MAP workflow includes automatic learning — learning is always a separate step via this command. Use /map-learn when: +- You completed /map-efficient, /map-debug, or /map-fast and want to preserve lessons - You want to batch-learn from multiple workflows at once -- You completed /map-fast and want to preserve lessons retroactively - You want to manually trigger learning for custom workflows **Do NOT use this command:** diff --git a/src/mapify_cli/templates/commands/map-plan.md b/src/mapify_cli/templates/commands/map-plan.md index 36d4601..bbc4d6e 100644 --- a/src/mapify_cli/templates/commands/map-plan.md +++ b/src/mapify_cli/templates/commands/map-plan.md @@ -50,6 +50,8 @@ User request: ) ``` +**Save discovery results:** The research-agent returns findings inline. Use the **Write** tool to save them to `.map//findings_.md` so they persist across sessions. Include key file paths, patterns found, and risks. + If discovery is not needed (new greenfield code or already-provided spec), skip to Step 1. ### Step 1: Assess Scope and Decide Interview Depth @@ -72,12 +74,12 @@ If interview is not needed, skip to Step 3. ### Step 2: Deep Interview (Spec Discovery) -Use AskUserQuestionTool to systematically interview the user. The goal is to surface non-obvious decisions and tradeoffs BEFORE planning. +Use AskUserQuestion to systematically interview the user. The goal is to surface non-obvious decisions and tradeoffs BEFORE planning. **Rules:** - Questions must be NON-OBVIOUS (don't ask what the user already stated) - Cover all dimensions: technical implementation, UI/UX, risks, tradeoffs, edge cases, data model, performance, security -- Ask in small rounds (1-2 high-signal questions; up to 2-4 if needed) using AskUserQuestionTool +- Ask in small rounds (1-2 high-signal questions; up to 2-4 if needed) using AskUserQuestion - Continue iterating until all critical decisions are captured - After each round, assess: are there still unresolved architectural decisions? @@ -90,9 +92,9 @@ Use AskUserQuestionTool to systematically interview the user. The goal is to sur 6. **Integration:** How does this interact with existing code? Migration needed? 7. **Contract Clarity:** Are ALL goals stated as outcomes (not processes)? Reject "improve auth" — require "AuthService returns 401 for expired tokens". Every goal must be verifiable. -**Example AskUserQuestionTool call:** +**Example AskUserQuestion call:** ``` -AskUserQuestionTool(questions=[ +AskUserQuestion(questions=[ { "question": "Should refresh tokens be stored server-side (Redis/DB) or stateless (signed JWT)?", "header": "Token store", @@ -143,7 +145,7 @@ AskUserQuestionTool(questions=[ ### Step 3: Create Branch Directory ```bash -BRANCH=$(git rev-parse --abbrev-ref HEAD | sed 's/\//-/g') +BRANCH=$(git rev-parse --abbrev-ref HEAD | sed -E 's|/|-|g; s|[^a-zA-Z0-9_.-]|-|g; s|-{2,}|-|g; s|^-||; s|-$||') mkdir -p .map/${BRANCH} ``` @@ -202,12 +204,17 @@ Output requirements: ### Step 6: Create Human-Readable Plan -Write the plan to `.map//task_plan_.md`. Wrap content in `` semantic brackets for machine-parseable handoff to executors: +Write the plan to `.map//task_plan_.md` using the **Write** tool. Wrap content in `` semantic brackets for machine-parseable handoff to executors. +First, get the branch name: ```bash -BRANCH=$(git rev-parse --abbrev-ref HEAD | sed 's/\//-/g') -cat > .map/${BRANCH}/task_plan_${BRANCH}.md < +git rev-parse --abbrev-ref HEAD | sed -E 's|/|-|g; s|[^a-zA-Z0-9_.-]|-|g; s|-{2,}|-|g; s|^-||; s|-$||' +``` + +Then use the **Write** tool to create `.map//task_plan_.md` with this structure: + +```markdown + # Task Plan: [Brief Title] @@ -220,6 +227,7 @@ cat > .map/${BRANCH}/task_plan_${BRANCH}.md < Action(params) -> Goal` - **Complexity:** [low/medium/high] - **Dependencies:** [none | ST-XXX, ST-YYY] @@ -244,25 +252,23 @@ cat > .map/${BRANCH}/task_plan_${BRANCH}.md < -EOF ``` **AAG Contract is REQUIRED** for every subtask. Copy directly from task-decomposer output's `aag_contract` field. This is the primary handoff to the Actor agent — without it, the Actor reasons instead of compiles. ### Step 7: Initialize Workflow State (Do This Last) -Create `.map//workflow_state.json` with the decomposition results. Wrap in `` comment for executor parsing. +Create `.map//workflow_state.json` with the decomposition results. Wrap in `MAP_State_v1_0` tag for executor parsing. Do this AFTER writing `task_plan_.md` so planning artifacts are created before the state gate becomes active. -```bash -BRANCH=$(git rev-parse --abbrev-ref HEAD | sed 's/\//-/g') -STARTED_AT=$(date -u +%Y-%m-%dT%H:%M:%SZ) -cat > .map/${BRANCH}/workflow_state.json </workflow_state.json` with this structure (substitute actual values): + +```json { "_semantic_tag": "MAP_State_v1_0", "workflow": "map-plan", - "started_at": "${STARTED_AT}", + "started_at": "", "current_subtask": null, "current_state": "INITIALIZED", "completed_steps": {}, @@ -273,7 +279,6 @@ cat > .map/${BRANCH}/workflow_state.json < Action(params) -> Goal" } } -EOF ``` **IMPORTANT:** @@ -307,7 +312,7 @@ Next Steps: **Note:** If interview was skipped (small/well-defined task), the spec line will not appear. -### Step 8: Context Distillation + STOP +### Step 9: Context Distillation + STOP **Before stopping, verify the distilled state is self-contained.** The next session starts fresh — it will ONLY see files, not this conversation. Ensure these files contain everything needed: diff --git a/src/mapify_cli/templates/commands/map-release.md b/src/mapify_cli/templates/commands/map-release.md index 3e6a35f..0bd8422 100644 --- a/src/mapify_cli/templates/commands/map-release.md +++ b/src/mapify_cli/templates/commands/map-release.md @@ -81,21 +81,11 @@ Execute all validation gates in parallel where possible: #### Gate 1-4: Code Quality Checks ```bash -# Run in parallel (all must succeed) -pytest tests/ --cov=src/mapify_cli --cov-report=term-missing & -PID_PYTEST=$! - -black src/ tests/ --check & -PID_BLACK=$! - -ruff check src/ tests/ & -PID_RUFF=$! - -mypy src/ & -PID_MYPY=$! - -# Wait for all checks -wait $PID_PYTEST && wait $PID_BLACK && wait $PID_RUFF && wait $PID_MYPY +# Run checks sequentially (all must succeed) +pytest tests/ --cov=src/mapify_cli --cov-report=term-missing && \ +black src/ tests/ --check && \ +ruff check src/ tests/ && \ +mypy src/ ``` **Expected Results:** @@ -321,7 +311,7 @@ Use AskUserQuestion to get user decision on version bump: ``` AskUserQuestion( - questions: [ + questions=[ { question: "What type of version bump should be performed for this release?", header: "Version Bump", @@ -541,7 +531,7 @@ Use AskUserQuestion for explicit confirmation: ``` AskUserQuestion( - questions: [ + questions=[ { question: "⚠️ IRREVERSIBLE OPERATION ⚠️\n\nPushing tag will immediately:\n1. Trigger GitHub Actions release workflow\n2. Build and publish package to PyPI\n3. Create public GitHub release\n\nVersion: $LAST_TAG\nTarget: origin/main\n\nDo you want to proceed with tag push?", header: "Confirm Push", @@ -1166,6 +1156,8 @@ Use these MCP tools throughout the workflow: - **`mcp__mem0__map_tiered_search`** - Search for release patterns from past projects - **`mcp__mem0__map_add_pattern`** - Store release learnings cross-project - **`mcp__sequential-thinking__sequentialthinking`** - Complex decision making for version bump + +**Built-in Tools (not MCP):** - **`AskUserQuestion`** - Get explicit confirmation for IRREVERSIBLE operations ### Critical Constraints diff --git a/src/mapify_cli/templates/commands/map-resume.md b/src/mapify_cli/templates/commands/map-resume.md index 28285ef..fb92058 100644 --- a/src/mapify_cli/templates/commands/map-resume.md +++ b/src/mapify_cli/templates/commands/map-resume.md @@ -13,20 +13,27 @@ description: Resume incomplete MAP workflow from checkpoint - When returning to an unfinished task **What it does:** -1. Detects `.map/progress.md` checkpoint file -2. Displays workflow progress summary -3. Shows completed and remaining subtasks -4. Asks user confirmation before resuming -5. Continues Actor→Monitor loop for remaining subtasks +1. Detects `.map//step_state.json` checkpoint (orchestrator canonical state) +2. Cross-references `.map//workflow_state.json` for subtask completion +3. Displays workflow progress summary +4. Shows completed and remaining subtasks +5. Asks user confirmation before resuming +6. Continues from the last incomplete step via the state machine + +**State files used:** +- **`step_state.json`** — Orchestrator canonical state. Source of truth for resumption. Tracks current step, retry counts, circuit breaker status. +- **`workflow_state.json`** — Enforcement gates. Tracks subtask completion for workflow-gate.py hook. +- **`task_plan_.md`** — Full task decomposition with validation criteria and AAG contracts. --- ## Step 1: Detect Checkpoint -Check if checkpoint file exists: +Check if state files exist for the current branch: ```bash -test -f .map/progress.md && echo "Found incomplete workflow" || echo "No checkpoint" +BRANCH=$(git rev-parse --abbrev-ref HEAD | sed -E 's|/|-|g; s|[^a-zA-Z0-9_.-]|-|g; s|-{2,}|-|g; s|^-||; s|-$||') +test -f ".map/${BRANCH}/step_state.json" && echo "Found incomplete workflow" || echo "No checkpoint" ``` **If no checkpoint exists:** @@ -36,12 +43,12 @@ Display message and exit: ```markdown ## No Workflow in Progress -No checkpoint file found at `.map/progress.md`. +No checkpoint file found at `.map//step_state.json`. **To start a new workflow, use:** - `/map-efficient "task description"` - Standard implementation workflow - `/map-debug "issue description"` - Debugging workflow -- `/map-fast "task description"` - Throwaway code workflow +- `/map-fast "task description"` - Minimal workflow No recovery needed. ``` @@ -52,35 +59,40 @@ No recovery needed. ## Step 2: Load and Display Progress -Read checkpoint file and display progress summary: +Read both state files and the task plan to display progress summary: ```bash -cat .map/progress.md +BRANCH=$(git rev-parse --abbrev-ref HEAD | sed -E 's|/|-|g; s|[^a-zA-Z0-9_.-]|-|g; s|-{2,}|-|g; s|^-||; s|-$||') + +# Read state files using the Read tool +# .map/${BRANCH}/step_state.json — current orchestrator state +# .map/${BRANCH}/workflow_state.json — subtask completion status +# .map/${BRANCH}/task_plan_${BRANCH}.md — full plan with AAG contracts ``` -Parse the YAML frontmatter and display: +Parse the state and display: ```markdown ## Found Incomplete Workflow -**Task:** [task_plan from frontmatter] -**Current Phase:** [current_phase] -**Turn Count:** [turn_count] -**Started:** [started_at] -**Last Updated:** [updated_at] +**Task:** [goal from task_plan] +**Branch:** ${BRANCH} +**Current Step:** [current_step from step_state.json] +**Current Phase:** [phase name from step_state.json] +**Started:** [started_at from workflow_state.json] ### Progress Overview [X/N] subtasks completed ([percentage]%) -### Completed Subtasks ✅ -- [x] **ST-001**: [description] (completed at [timestamp]) -- [x] **ST-002**: [description] (completed at [timestamp]) +### Completed Subtasks +- [x] **ST-001**: [description] (complete) +- [x] **ST-002**: [description] (complete) ... -### Remaining Subtasks 📋 -- [ ] **ST-003**: [description] -- [ ] **ST-004**: [description] +### Remaining Subtasks +- [ ] **ST-003**: [description] — currently at phase: [phase] +- [ ] **ST-004**: [description] — pending ... ``` @@ -88,65 +100,73 @@ Parse the YAML frontmatter and display: ## Step 3: User Confirmation -**⚠️ CRITICAL: Always ask for user confirmation before resuming.** - -Ask a simple yes/no question: +**CRITICAL: Always ask for user confirmation before resuming.** ``` -Resume from last checkpoint? [Y/n] +AskUserQuestion(questions=[ + { + "question": "Resume workflow from last checkpoint?", + "header": "Resume", + "options": [ + {"label": "Resume (recommended)", "description": "Continue from last checkpoint step"}, + {"label": "Start fresh", "description": "Delete state files and start over with /map-efficient"}, + {"label": "Abort", "description": "Do nothing, keep state files intact"} + ], + "multiSelect": false + } +]) ``` **Handle user response:** -- **Y or y or Enter (default):** Proceed to Step 4 (resume workflow) -- **n or N:** Delete checkpoint file and exit with message "Checkpoint cleared. Start fresh with /map-efficient." +- **Resume:** Proceed to Step 4 (resume workflow) +- **Start fresh:** Delete `.map//step_state.json` and `.map//workflow_state.json`, exit with "State cleared. Start fresh with /map-efficient." +- **Abort:** Exit without changes --- ## Step 4: Resume Workflow -Load remaining subtasks from checkpoint and continue Actor→Monitor loop. +Use the orchestrator to determine the next step and continue execution. **Important context loading:** Before resuming, read: -1. `.map/progress.md` - current state -2. `.map/task_plan_*.md` - full task decomposition with validation criteria +1. `.map//step_state.json` — current orchestrator state +2. `.map//workflow_state.json` — subtask completion +3. `.map//task_plan_.md` — full task decomposition with AAG contracts + +**Resume via orchestrator:** + +```bash +BRANCH=$(git rev-parse --abbrev-ref HEAD | sed -E 's|/|-|g; s|[^a-zA-Z0-9_.-]|-|g; s|-{2,}|-|g; s|^-||; s|-$||') + +# Get next step from orchestrator (reads step_state.json internally) +NEXT_STEP=$(python3 .map/scripts/map_orchestrator.py get_next_step) +STEP_ID=$(echo "$NEXT_STEP" | jq -r '.step_id') +PHASE=$(echo "$NEXT_STEP" | jq -r '.phase') +IS_COMPLETE=$(echo "$NEXT_STEP" | jq -r '.is_complete') +``` + +**Then follow the same phase routing as /map-efficient:** + +For each step, route to the appropriate executor based on `$PHASE` (ACTOR, MONITOR, PREDICTOR, TESTS_GATE, etc.) following the exact same phase handlers documented in map-efficient.md. **For each remaining subtask:** -1. **Mark subtask in_progress:** - - Update `.map/progress.md` with current subtask status - -2. **Call Actor:** - ``` - Task( - subagent_type="actor", - description="Implement [subtask_id]: [description]", - prompt="[Actor prompt with subtask details and validation criteria from task plan]" - ) - ``` - -3. **Call Monitor:** - ``` - Task( - subagent_type="monitor", - description="Validate [subtask_id] implementation", - prompt="[Monitor prompt with Actor output and validation criteria]" - ) - ``` - -4. **If Monitor returns `valid: false`:** - - Retry Actor with feedback (max 5 iterations) - - Update progress checkpoint after each iteration - -5. **If Monitor returns `valid: true`:** - - Apply changes - - Mark subtask complete in `.map/progress.md` - - Continue to next subtask - -6. **Update checkpoint after each subtask:** - - Save updated state to `.map/progress.md` +1. **Get next step** from orchestrator +2. **Execute phase** (Actor → Monitor → Predictor → etc.) +3. **Validate step** via `map_orchestrator.py validate_step` +4. **Update state** automatically via orchestrator +5. **Continue** to next step until workflow complete + +**If Monitor returns `valid: false`:** +- Retry Actor with feedback (max 5 iterations, tracked in step_state.json) +- State is saved after each iteration + +**If Monitor returns `valid: true`:** +- Changes already applied by Actor +- Continue to next phase --- @@ -155,15 +175,15 @@ Before resuming, read: After all subtasks complete: ```markdown -## Workflow Resumed and Completed ✅ +## Workflow Resumed and Completed -**Task:** [task_plan] +**Task:** [task from plan] +**Branch:** ${BRANCH} **Total Subtasks:** [N] **Subtasks Completed This Session:** [M] -**Total Actor→Monitor Iterations:** [count] ### Completion Summary -[List of all completed subtasks with timestamps] +[List of all completed subtasks] ### Files Modified [List of files changed during this session] @@ -172,6 +192,7 @@ After all subtasks complete: **Optional next steps:** - Run `/map-learn` to extract and preserve patterns from this workflow +- Run `/map-check` to verify all acceptance criteria - Run tests to verify implementation - Create a commit with your changes ``` @@ -180,44 +201,44 @@ After all subtasks complete: ## Error Handling -### Checkpoint File Corrupted +### State File Corrupted -If YAML frontmatter parsing fails: +If `step_state.json` or `workflow_state.json` parsing fails: ```markdown -## Checkpoint File Corrupted +## State File Corrupted -The checkpoint file at `.map/progress.md` could not be parsed. +The state file at `.map//step_state.json` could not be parsed. **Options:** 1. View raw file contents and attempt manual recovery -2. Delete checkpoint and start fresh +2. Delete state files and start fresh -Would you like me to show the raw checkpoint contents? +Would you like me to show the raw state contents? ``` ### Task Plan File Missing -If `.map/task_plan_*.md` referenced in checkpoint doesn't exist: +If `.map//task_plan_.md` doesn't exist but state files do: ```markdown ## Task Plan File Missing -The checkpoint references a task plan file that no longer exists. +State files exist but the task plan is missing. -**Checkpoint:** .map/progress.md -**Expected task plan:** .map/task_plan_[branch].md +**State:** .map//step_state.json +**Expected plan:** .map//task_plan_.md **Options:** -1. Create a new task plan based on checkpoint information -2. Clear checkpoint and start fresh workflow +1. Create a new task plan based on state information +2. Clear state files and start fresh workflow ``` ### Actor/Monitor Agent Failure If subagent fails during resume: -1. Save current progress to checkpoint +1. State is preserved in step_state.json (orchestrator saves after each step) 2. Display error message with last successful state 3. Suggest retry or escalation to user @@ -235,34 +256,34 @@ User: /map-resume Claude: ## Found Incomplete Workflow **Task:** Implement user authentication with JWT tokens -**Current Phase:** implementation -**Turn Count:** 12 -**Started:** 2025-01-15T10:30:00 -**Last Updated:** 2025-01-15T14:45:00 +**Branch:** feat-user-auth +**Current Step:** 2.3 +**Current Phase:** ACTOR +**Started:** 2025-01-15T10:30:00Z ### Progress Overview 3/5 subtasks completed (60%) -### Completed Subtasks ✅ +### Completed Subtasks - [x] **ST-001**: Create User model with SQLite schema - [x] **ST-002**: Implement password hashing with bcrypt - [x] **ST-003**: Create login API endpoint -### Remaining Subtasks 📋 -- [ ] **ST-004**: Implement JWT token generation -- [ ] **ST-005**: Add logout and token refresh endpoints +### Remaining Subtasks +- [ ] **ST-004**: Implement JWT token generation — currently at phase: ACTOR +- [ ] **ST-005**: Add logout and token refresh endpoints — pending -Resume from last checkpoint? [Y/n] +Resume workflow from last checkpoint? -User: Y +User: Resume -Claude: Resuming workflow from ST-004... -[Continues Actor→Monitor loop for remaining subtasks] +Claude: Resuming workflow from ST-004 (phase: ACTOR)... +[Continues state machine execution for remaining subtasks] ``` -### Example 2: User Abandons Checkpoint +### Example 2: User Starts Fresh -User decides to start fresh: +User decides to abandon current progress: ``` User: /map-resume @@ -270,11 +291,11 @@ User: /map-resume Claude: ## Found Incomplete Workflow [Shows progress summary] -Resume from last checkpoint? [Y/n] +Resume workflow from last checkpoint? -User: n +User: Start fresh -Claude: Checkpoint cleared. Start fresh with /map-efficient. +Claude: State cleared. Start fresh with /map-efficient. ``` ### Example 3: No Checkpoint @@ -286,12 +307,12 @@ User: /map-resume Claude: ## No Workflow in Progress -No checkpoint file found at `.map/progress.md`. +No checkpoint file found at `.map/feat-auth/step_state.json`. To start a new workflow, use: - `/map-efficient "task description"` - Standard implementation - `/map-debug "issue description"` - Debugging -- `/map-fast "task description"` - Throwaway code +- `/map-fast "task description"` - Minimal workflow No recovery needed. ``` @@ -303,18 +324,15 @@ No recovery needed. ### After `/clear` If user runs `/clear` during a workflow: -- Checkpoint is preserved in `.map/progress.md` +- State is preserved in `.map//step_state.json` and `workflow_state.json` - User can resume with `/map-resume` - Fresh context starts from checkpoint state ### With `/map-efficient` -`/map-efficient` automatically saves checkpoints: -- After decomposition phase -- After each subtask completion -- Before each Actor call - -`/map-resume` can continue from any of these checkpoints. +`/map-efficient` uses `map_orchestrator.py` which maintains `step_state.json`: +- State is updated after each step validation +- `/map-resume` reads this state to determine where to continue ### With `/map-learn` @@ -326,53 +344,55 @@ After `/map-resume` completes a workflow: ## Technical Notes -### Checkpoint File Format - -The `.map/progress.md` file uses YAML frontmatter: - -```yaml ---- -task_plan: "Task description" -current_phase: implementation -turn_count: 12 -started_at: 2025-01-15T10:30:00 -updated_at: 2025-01-15T14:45:00 -branch_name: feat/user-auth -completed_subtasks: - - ST-001 - - ST-002 - - ST-003 -subtasks: - - id: ST-001 - description: Create User model - status: complete - completed_at: 2025-01-15T11:00:00 - - id: ST-002 - description: Implement password hashing - status: complete - completed_at: 2025-01-15T12:30:00 - - id: ST-004 - description: Implement JWT generation - status: pending ---- +### State File Format + +The `.map//step_state.json` is managed by `map_orchestrator.py`: + +```json +{ + "current_step": "2.3", + "current_subtask": "ST-004", + "subtask_sequence": ["ST-001", "ST-002", "ST-003", "ST-004", "ST-005"], + "completed_subtasks": ["ST-001", "ST-002", "ST-003"], + "retry_count": 0, + "max_retries": 5, + "execution_mode": "step_by_step", + "plan_approved": true, + "circuit_breaker": { + "tool_count": 42, + "max_iterations": 200 + } +} +``` -# MAP Workflow Progress -[Human-readable markdown body] +The `.map//workflow_state.json` tracks enforcement gates: + +```json +{ + "workflow": "map-efficient", + "started_at": "2025-01-15T10:30:00Z", + "current_subtask": "ST-004", + "current_state": "IN_PROGRESS", + "completed_steps": {"ST-001": [...], "ST-002": [...], "ST-003": [...]}, + "pending_steps": {"ST-004": [...], "ST-005": [...]}, + "subtask_sequence": ["ST-001", "ST-002", "ST-003", "ST-004", "ST-005"] +} ``` ### State Restoration When resuming: -1. Parse YAML frontmatter for machine state -2. Use human-readable body for context summary -3. Load full task plan from referenced file -4. Continue from last incomplete subtask +1. Read `step_state.json` for orchestrator position (current step + subtask) +2. Read `workflow_state.json` for completed/pending subtask list +3. Read `task_plan_.md` for AAG contracts and validation criteria +4. Call `map_orchestrator.py get_next_step` to determine next action +5. Continue phase-based execution from that point ### Context Efficiency Resume is designed for context efficiency: -- Only loads necessary state, not full conversation history -- Checkpoint contains enough context to continue +- Only loads necessary state files, not full conversation history +- State files contain enough context to continue - Fresh agent calls don't carry previous context pollution --- @@ -393,25 +413,25 @@ Resume is designed for context efficiency: ### Issue: Checkpoint shows wrong subtask status -**Symptom:** Checkpoint says ST-003 is complete, but code shows incomplete implementation. +**Symptom:** step_state.json says ST-003 is complete, but code shows incomplete implementation. -**Cause:** Session crashed between code application and checkpoint update. +**Cause:** Session crashed between code application and state update. **Fix:** 1. Manually verify each subtask's actual completion status -2. Update checkpoint to match reality +2. Update step_state.json to match reality 3. Resume from corrected state ### Issue: Resume loads but doesn't continue -**Symptom:** Progress displayed, user confirms Continue, but nothing happens. +**Symptom:** Progress displayed, user confirms Resume, but nothing happens. **Cause:** Task plan file missing or invalid. **Fix:** -1. Check for `.map/task_plan_*.md` file +1. Check for `.map//task_plan_.md` file 2. Recreate task plan if missing -3. Ensure validation criteria are present for remaining subtasks +3. Ensure AAG contracts are present for remaining subtasks ### Issue: Actor context missing after resume @@ -421,3 +441,14 @@ Resume is designed for context efficiency: 1. Read recent git diff for changed files 2. Load relevant source files for remaining subtasks 3. Provide context summary in Actor prompt + +### Issue: step_state.json and workflow_state.json out of sync + +**Symptom:** step_state.json shows ST-004 in progress, but workflow_state.json shows ST-003 pending. + +**Cause:** Crash between orchestrator update and workflow state update. + +**Fix:** +1. Trust `step_state.json` as the canonical source +2. Update `workflow_state.json` to match +3. Resume from corrected state diff --git a/src/mapify_cli/templates/commands/map-review.md b/src/mapify_cli/templates/commands/map-review.md index 3af2dcd..1e14b69 100644 --- a/src/mapify_cli/templates/commands/map-review.md +++ b/src/mapify_cli/templates/commands/map-review.md @@ -94,19 +94,21 @@ Task( **Playbook Context:** [paste relevant playbook bullets] -Provide quality assessment: -- Code quality score (0-100) -- Test coverage assessment -- Documentation completeness -- Maintainability score -- Overall verdict +Provide quality assessment using 1-10 scoring (matches evaluator agent template): +- Functionality score (1-10) +- Code quality score (1-10) +- Performance score (1-10) +- Security score (1-10) +- Testability score (1-10) +- Completeness score (1-10) Output JSON with: -- scores: {code_quality, test_coverage, documentation, maintainability, overall} -- verdict: 'excellent' | 'good' | 'acceptable' | 'needs_work' | 'reject' +- scores: {functionality, code_quality, performance, security, testability, completeness} +- overall_score: weighted float (1.0-10.0) +- recommendation: 'proceed' | 'improve' | 'reconsider' - strengths: array of strings -- improvements_needed: array of strings -- final_recommendation: string" +- weaknesses: array of strings +- next_steps: array of strings" ) ``` @@ -133,10 +135,10 @@ Once all three agents have completed, combine their findings: - Affected Files: [predictor.affected_files.length] **Evaluator Assessment:** -- Overall Score: [evaluator.scores.overall]/100 -- Code Quality: [evaluator.scores.code_quality]/100 -- Test Coverage: [evaluator.scores.test_coverage]/100 -- Verdict: [evaluator.verdict] +- Overall Score: [evaluator.overall_score]/10 +- Code Quality: [evaluator.scores.code_quality]/10 +- Security: [evaluator.scores.security]/10 +- Recommendation: [evaluator.recommendation] ### Critical Issues (High Severity) @@ -160,9 +162,9 @@ Once all three agents have completed, combine their findings: ### Final Verdict Based on combined analysis: -- **Proceed if:** Monitor verdict = 'approved' AND Evaluator verdict = 'excellent'|'good'|'acceptable' -- **Revise if:** Monitor verdict = 'needs_revision' OR Evaluator verdict = 'needs_work' -- **Block if:** Monitor verdict = 'rejected' OR Evaluator verdict = 'reject' OR (Predictor risk_level = 'high' AND breaking_changes.length > 0) +- **Proceed if:** Monitor verdict = 'approved' AND Evaluator recommendation = 'proceed' +- **Revise if:** Monitor verdict = 'needs_revision' OR Evaluator recommendation = 'improve' +- **Block if:** Monitor verdict = 'rejected' OR Evaluator recommendation = 'reconsider' OR (Predictor risk_level = 'high' AND breaking_changes.length > 0) --- diff --git a/src/mapify_cli/templates/hooks/post-edit-reminder.py b/src/mapify_cli/templates/hooks/post-edit-reminder.py index 800abbf..de96db8 100755 --- a/src/mapify_cli/templates/hooks/post-edit-reminder.py +++ b/src/mapify_cli/templates/hooks/post-edit-reminder.py @@ -12,11 +12,22 @@ import json import os +import re import subprocess import sys from pathlib import Path +def sanitize_branch_name(branch: str) -> str: + """Sanitize branch name for safe filesystem paths.""" + sanitized = branch.replace("/", "-") + sanitized = re.sub(r"[^a-zA-Z0-9_.-]", "-", sanitized) + sanitized = re.sub(r"-+", "-", sanitized).strip("-") + if ".." in sanitized or sanitized.startswith("."): + return "default" + return sanitized or "default" + + def get_branch_name() -> str: """Get current git branch name.""" try: @@ -24,10 +35,10 @@ def get_branch_name() -> str: ["git", "rev-parse", "--abbrev-ref", "HEAD"], capture_output=True, text=True, - timeout=2, + timeout=1, ) if result.returncode == 0: - return result.stdout.strip().replace("/", "-") + return sanitize_branch_name(result.stdout.strip()) except Exception: pass return "default" diff --git a/src/mapify_cli/templates/hooks/ralph-context-pruner.py b/src/mapify_cli/templates/hooks/ralph-context-pruner.py index 6f348ba..8adb60f 100755 --- a/src/mapify_cli/templates/hooks/ralph-context-pruner.py +++ b/src/mapify_cli/templates/hooks/ralph-context-pruner.py @@ -65,6 +65,7 @@ def get_branch_name() -> str: capture_output=True, text=True, cwd=PROJECT_DIR, + timeout=2, ) if result.returncode == 0: return sanitize_branch_name(result.stdout.strip()) @@ -233,10 +234,7 @@ def main() -> None: if state: # Save restore point if save_restore_point(branch, state): - print( - f"[ralph-pruner] Saved restore_point for branch: {branch}", - file=sys.stderr, - ) + print(f"[ralph-pruner] Saved restore_point for branch: {branch}", file=sys.stderr) # Inject recovery message into context recovery_msg = format_recovery_message(state, branch) diff --git a/src/mapify_cli/templates/hooks/ralph-iteration-logger.py b/src/mapify_cli/templates/hooks/ralph-iteration-logger.py index debcc45..2bcead6 100755 --- a/src/mapify_cli/templates/hooks/ralph-iteration-logger.py +++ b/src/mapify_cli/templates/hooks/ralph-iteration-logger.py @@ -89,6 +89,7 @@ def get_branch_name() -> str: capture_output=True, text=True, cwd=PROJECT_DIR, + timeout=1, ) if result.returncode == 0: return sanitize_branch_name(result.stdout.strip()) diff --git a/src/mapify_cli/templates/hooks/safety-guardrails.py b/src/mapify_cli/templates/hooks/safety-guardrails.py index 5358a97..a0158c7 100644 --- a/src/mapify_cli/templates/hooks/safety-guardrails.py +++ b/src/mapify_cli/templates/hooks/safety-guardrails.py @@ -46,16 +46,7 @@ ] # Safe path prefixes (skip checks for known safe directories) -SAFE_PATH_PREFIXES = [ - "src/", - "lib/", - "test/", - "tests/", - "docs/", - "pkg/", - "cmd/", - "internal/", -] +SAFE_PATH_PREFIXES = ["src/", "lib/", "test/", "tests/", "docs/", "pkg/", "cmd/", "internal/"] def is_safe_path(path: str) -> bool: @@ -76,10 +67,7 @@ def check_file_safety(path: str) -> tuple[bool, str]: path_lower = path.lower() for pattern in DANGEROUS_FILE_PATTERNS: if re.search(pattern, path_lower, re.IGNORECASE): - return ( - False, - f"Blocked: Access to sensitive file pattern '{pattern}' in path: {path}", - ) + return False, f"Blocked: Access to sensitive file pattern '{pattern}' in path: {path}" return True, "" diff --git a/src/mapify_cli/templates/hooks/workflow-context-injector.py b/src/mapify_cli/templates/hooks/workflow-context-injector.py index 55a769f..f0b8d34 100755 --- a/src/mapify_cli/templates/hooks/workflow-context-injector.py +++ b/src/mapify_cli/templates/hooks/workflow-context-injector.py @@ -74,6 +74,16 @@ ] +def sanitize_branch_name(branch: str) -> str: + """Sanitize branch name for safe filesystem paths.""" + sanitized = branch.replace("/", "-") + sanitized = re.sub(r"[^a-zA-Z0-9_.-]", "-", sanitized) + sanitized = re.sub(r"-+", "-", sanitized).strip("-") + if ".." in sanitized or sanitized.startswith("."): + return "default" + return sanitized or "default" + + def get_branch_name() -> str: """Get current git branch name.""" import subprocess @@ -83,10 +93,10 @@ def get_branch_name() -> str: ["git", "rev-parse", "--abbrev-ref", "HEAD"], capture_output=True, text=True, - timeout=2, + timeout=1, ) if result.returncode == 0: - return result.stdout.strip().replace("/", "-") + return sanitize_branch_name(result.stdout.strip()) except Exception: pass return "default" diff --git a/src/mapify_cli/templates/hooks/workflow-gate.py b/src/mapify_cli/templates/hooks/workflow-gate.py index 73a0098..01e3429 100755 --- a/src/mapify_cli/templates/hooks/workflow-gate.py +++ b/src/mapify_cli/templates/hooks/workflow-gate.py @@ -42,6 +42,7 @@ """ import json import os +import re import sys from pathlib import Path from typing import Dict, Optional @@ -53,6 +54,16 @@ REQUIRED_STEPS = ["actor", "monitor"] +def sanitize_branch_name(branch: str) -> str: + """Sanitize branch name for safe filesystem paths.""" + sanitized = branch.replace("/", "-") + sanitized = re.sub(r"[^a-zA-Z0-9_.-]", "-", sanitized) + sanitized = re.sub(r"-+", "-", sanitized).strip("-") + if ".." in sanitized or sanitized.startswith("."): + return "default" + return sanitized or "default" + + def get_branch_name() -> str: """Get current git branch name (sanitized for filesystem).""" try: @@ -65,9 +76,7 @@ def get_branch_name() -> str: timeout=1, ) if result.returncode == 0: - branch = result.stdout.strip() - # Sanitize for filesystem (same as other MAP tools) - return branch.replace("/", "-").replace(" ", "-") + return sanitize_branch_name(result.stdout.strip()) except Exception: pass return "default" diff --git a/src/mapify_cli/templates/map/scripts/diagnostics.py b/src/mapify_cli/templates/map/scripts/diagnostics.py index 61a7717..84f439f 100644 --- a/src/mapify_cli/templates/map/scripts/diagnostics.py +++ b/src/mapify_cli/templates/map/scripts/diagnostics.py @@ -36,7 +36,13 @@ def get_branch_name() -> str: check=False, ) if result.returncode == 0: - return result.stdout.strip().replace("/", "-") + branch = result.stdout.strip() + sanitized = branch.replace("/", "-") + sanitized = re.sub(r"[^a-zA-Z0-9_.-]", "-", sanitized) + sanitized = re.sub(r"-+", "-", sanitized).strip("-") + if ".." in sanitized or sanitized.startswith("."): + return "default" + return sanitized or "default" except Exception: pass return "default" diff --git a/src/mapify_cli/templates/map/scripts/map_orchestrator.py b/src/mapify_cli/templates/map/scripts/map_orchestrator.py index 45890d4..66db1e8 100755 --- a/src/mapify_cli/templates/map/scripts/map_orchestrator.py +++ b/src/mapify_cli/templates/map/scripts/map_orchestrator.py @@ -12,7 +12,7 @@ ARCHITECTURE: ┌─────────────────────────────────────────────────────────────┐ - │ map-efficient.md (150 lines) │ + │ map-efficient.md (~540 lines) │ │ ├─> 1. Call get_next_step() → returns step instruction │ │ ├─> 2. Execute step (Actor/Monitor/mem0/etc) │ │ ├─> 3. Call validate_step() → checks completion │ @@ -35,7 +35,7 @@ "pending_steps": ["2.1_MEM0_SEARCH", "2.3_ACTOR", "2.4_MONITOR", ...] } -STEP PHASES (14 total): +STEP PHASES (16 total): 1.0 DECOMPOSE - task-decomposer agent 1.5 INIT_PLAN - Generate task_plan.md 1.55 REVIEW_PLAN - User review + explicit approval checkpoint @@ -46,9 +46,8 @@ 2.2 RESEARCH - research-agent (conditional) 2.3 ACTOR - Actor agent implementation 2.4 MONITOR - Monitor validation - 2.5 RETRY_LOOP - Retry on Monitor failure 2.6 PREDICTOR - Impact analysis (conditional) - 2.7 APPLY_CHANGES - Write/Edit tools + 2.7 UPDATE_STATE - Mark subtask progress 2.8 TESTS_GATE - Run tests 2.9 LINTER_GATE - Run linter 2.10 VERIFY_ADHERENCE - Self-audit checkpoint @@ -112,9 +111,8 @@ "2.2": "RESEARCH", "2.3": "ACTOR", "2.4": "MONITOR", - "2.5": "RETRY_LOOP", "2.6": "PREDICTOR", - "2.7": "APPLY_CHANGES", + "2.7": "UPDATE_STATE", "2.8": "TESTS_GATE", "2.9": "LINTER_GATE", "2.10": "VERIFY_ADHERENCE", @@ -141,6 +139,16 @@ "2.11", ] +# Steps that require evidence files from agents before validation. +# Format: step_id -> (agent_phase, always_required) +# If always_required is False, evidence is only checked when the step +# appears in pending_steps (i.e., it wasn't skipped). +EVIDENCE_REQUIRED = { + "2.3": ("actor", True), # Always required + "2.4": ("monitor", True), # Always required + "2.6": ("predictor", False), # Only when 2.6 is in pending_steps +} + @dataclass class StepState: @@ -261,7 +269,7 @@ def get_step_instruction(step_id: str, state: StepState) -> str: "into ≤20 atomic subtasks with validation criteria." ), "1.5": ( - "Generate .map/task_plan_.md from decomposer blueprint. " + "Generate .map//task_plan_.md from decomposer blueprint. " "Include Goal, Current Phase, and status for each subtask." ), "1.55": ( @@ -294,19 +302,25 @@ def get_step_instruction(step_id: str, state: StepState) -> str: ), "2.3": ( f"Call Task(subagent_type='actor') to implement subtask " - f"{state.current_subtask_id}. Pass XML packet and context patterns." + f"{state.current_subtask_id}. Pass XML packet and context patterns. " + f"Actor MUST write evidence file: " + f".map//evidence/actor_{state.current_subtask_id}.json" ), "2.4": ( "Call Task(subagent_type='monitor') to validate Actor output. " - "Check correctness, security, standards, and tests." + "Check correctness, security, standards, and tests. " + f"Monitor MUST write evidence file: " + f".map//evidence/monitor_{state.current_subtask_id}.json" ), "2.6": ( "Call Task(subagent_type='predictor') for impact analysis " - "(required for medium/high risk subtasks)." + "(required for medium/high risk subtasks). " + f"Predictor MUST write evidence file: " + f".map//evidence/predictor_{state.current_subtask_id}.json" ), "2.7": ( - "Apply Actor's changes using Edit/Write tools. " - "GATE: Only allowed if Monitor.valid === true." + "Update workflow state to mark subtask progress. " + "Code was already applied by Actor and validated by Monitor." ), "2.8": ( "Run tests using pytest/npm test/go test/cargo test. " @@ -363,7 +377,8 @@ def get_next_step(branch: str) -> Dict: state.current_step_id = "2.0" state.current_step_phase = "XML_PACKET" # Reset to subtask-level steps (skip global setup steps) - state.pending_steps = STEP_ORDER[3:] # Start from 2.0 + xml_packet_idx = STEP_ORDER.index("2.0") + state.pending_steps = STEP_ORDER[xml_packet_idx:] # Start from 2.0 state.completed_steps = [] state.retry_count = 0 state.save(state_file) @@ -428,11 +443,74 @@ def validate_step(step_id: str, branch: str) -> Dict: "message": "Invalid execution_mode. Set mode first: python3 .map/scripts/map_orchestrator.py set_execution_mode step_by_step|batch", } + # Evidence-gated validation: require agent evidence files for key steps + if step_id in EVIDENCE_REQUIRED: + phase_name, always_required = EVIDENCE_REQUIRED[step_id] + evidence_dir = Path(f".map/{branch}/evidence") + if not evidence_dir.is_dir(): + return { + "valid": False, + "message": ( + f"Evidence directory missing: {evidence_dir}. " + f"Run initialize or resume_from_plan first." + ), + } + subtask_id = state.current_subtask_id or "unknown" + evidence_file = evidence_dir / f"{phase_name}_{subtask_id}.json" + if not evidence_file.exists(): + return { + "valid": False, + "message": ( + f"Evidence file missing: {evidence_file}. " + f"The {phase_name} agent must write this file before " + f"validate_step can accept step {step_id}." + ), + } + # Validate JSON structure + try: + evidence_data = json.loads( + evidence_file.read_text(encoding="utf-8") + ) + except (json.JSONDecodeError, OSError) as exc: + return { + "valid": False, + "message": ( + f"Evidence file {evidence_file} is not valid JSON: {exc}" + ), + } + # Check required fields + for required_field in ("phase", "subtask_id", "timestamp"): + if required_field not in evidence_data: + return { + "valid": False, + "message": ( + f"Evidence file {evidence_file} missing required " + f"field: '{required_field}'. " + f"Required fields: phase, subtask_id, timestamp." + ), + } + # Validate subtask_id matches current subtask + if evidence_data.get("subtask_id") != subtask_id: + return { + "valid": False, + "message": ( + f"Evidence file subtask_id mismatch: " + f"expected '{subtask_id}', " + f"got '{evidence_data.get('subtask_id')}'." + ), + } + # Mark step complete state.completed_steps.append(step_id) if step_id in state.pending_steps: state.pending_steps.remove(step_id) + # When transitioning from init phases to execution phases, + # ensure the first subtask is selected + if step_id == "1.6" and state.subtask_sequence and not state.current_subtask_id: + state.current_subtask_id = state.subtask_sequence[0] + state.subtask_index = 0 + # Advance current_step_id to next pending step if state.pending_steps: next_id = state.pending_steps[0] @@ -469,6 +547,10 @@ def initialize_workflow(task: str, branch: str) -> Dict: state = StepState() state.save(state_file) + # Create evidence directory for artifact-gated validation + evidence_dir = Path(f".map/{branch}/evidence") + evidence_dir.mkdir(parents=True, exist_ok=True) + return { "status": "initialized", "state_file": str(state_file), @@ -510,6 +592,198 @@ def set_execution_mode(mode: str, branch: str) -> Dict: return {"status": "success", "execution_mode": state.execution_mode} +SKIPPABLE_STEPS = {"2.2", "2.6", "2.11"} + + +def skip_step(step_id: str, branch: str) -> Dict: + """Skip a conditional step without executing it. + + Only steps that are defined as conditional can be skipped: + - 2.2 (RESEARCH): conditional on refactoring or 3+ files + - 2.6 (PREDICTOR): conditional on medium/high risk + - 2.11 (SUBTASK_APPROVAL): conditional on step_by_step mode + + Args: + step_id: Step identifier to skip + branch: Git branch name (sanitized) + + Returns: + Dict with status and next step info + """ + if step_id not in SKIPPABLE_STEPS: + return { + "status": "error", + "message": ( + f"Step {step_id} cannot be skipped. " + f"Only conditional steps can be skipped: " + f"{', '.join(sorted(SKIPPABLE_STEPS))}" + ), + } + + state_file = Path(f".map/{branch}/step_state.json") + state = StepState.load(state_file) + + if state.current_step_id != step_id: + return { + "status": "error", + "message": f"Step mismatch: current is {state.current_step_id}, cannot skip {step_id}", + } + + # Mark step as completed (skipped) and advance + state.completed_steps.append(step_id) + if step_id in state.pending_steps: + state.pending_steps.remove(step_id) + + # Advance to next pending step + if state.pending_steps: + next_id = state.pending_steps[0] + state.current_step_id = next_id + state.current_step_phase = STEP_PHASES.get(next_id, "UNKNOWN") + else: + state.current_step_id = "COMPLETE" + state.current_step_phase = "COMPLETE" + + state.save(state_file) + + return { + "status": "success", + "message": f"Step {step_id} skipped", + "next_step": state.current_step_id, + } + + +def check_circuit_breaker(branch: str) -> Dict: + """Check circuit breaker status based on completed steps count. + + Returns tool_count (total completed steps) and max_iterations threshold. + If tool_count >= max_iterations, the workflow should ask the user to continue or abort. + + Args: + branch: Git branch name (sanitized) + + Returns: + Dict with tool_count, max_iterations, triggered flag + """ + state_file = Path(f".map/{branch}/step_state.json") + state = StepState.load(state_file) + + tool_count = len(state.completed_steps) + max_iterations = len(state.subtask_sequence) * len(STEP_ORDER) + + return { + "tool_count": tool_count, + "max_iterations": max_iterations, + "triggered": tool_count >= max_iterations, + "retry_count": state.retry_count, + "max_retries": state.max_retries, + } + + +def set_subtasks(subtask_ids: List[str], branch: str) -> Dict: + """Set subtask sequence after decomposition and select the first subtask. + + Args: + subtask_ids: List of subtask IDs (e.g., ["ST-001", "ST-002", "ST-003"]) + branch: Git branch name (sanitized) + + Returns: + Dict with status and subtask info + """ + state_file = Path(f".map/{branch}/step_state.json") + state = StepState.load(state_file) + + if not subtask_ids: + return {"status": "error", "message": "At least one subtask ID is required"} + + state.subtask_sequence = subtask_ids + state.current_subtask_id = subtask_ids[0] + state.subtask_index = 0 + state.save(state_file) + + return { + "status": "success", + "subtask_sequence": subtask_ids, + "current_subtask_id": subtask_ids[0], + } + + +def resume_from_plan(branch: str) -> Dict: + """Resume workflow from an existing /map-plan output, skipping init phases. + + Detects task_plan_.md and workflow_state.json created by /map-plan. + Extracts subtask IDs from the plan, marks init phases as completed, and + starts execution from CHOOSE_MODE (user still picks step_by_step vs batch). + + Args: + branch: Git branch name (sanitized) + + Returns: + Dict with status and skipped phases + """ + plan_dir = Path(f".map/{branch}") + plan_file = plan_dir / f"task_plan_{branch}.md" + workflow_state_file = plan_dir / "workflow_state.json" + + # Verify plan artifacts exist + if not plan_file.exists(): + return { + "status": "error", + "message": f"No plan found at {plan_file}. Run /map-plan first.", + } + + # Extract subtask IDs from plan file (ST-XXX pattern) + import re + + plan_content = plan_file.read_text(encoding="utf-8") + subtask_ids = re.findall(r"###\s+(ST-\d+)", plan_content) + + if not subtask_ids: + return { + "status": "error", + "message": f"No subtask IDs (ST-XXX) found in {plan_file}.", + } + + # Extract AAG contracts if present in workflow_state.json + aag_contracts = {} + if workflow_state_file.exists(): + try: + ws_data = json.loads(workflow_state_file.read_text(encoding="utf-8")) + aag_contracts = ws_data.get("aag_contracts", {}) + except (json.JSONDecodeError, KeyError): + pass + + # Create state that skips DECOMPOSE, INIT_PLAN, REVIEW_PLAN (plan already approved) + # Start from CHOOSE_MODE so user can still pick execution mode + skipped_phases = ["1.0", "1.5", "1.55"] + execution_start = [s for s in STEP_ORDER if s not in skipped_phases] + + state_file = plan_dir / "step_state.json" + state = StepState( + current_subtask_id=subtask_ids[0], + subtask_index=0, + subtask_sequence=subtask_ids, + current_step_id="1.56", + current_step_phase="CHOOSE_MODE", + completed_steps=skipped_phases, + pending_steps=execution_start, + plan_approved=True, + ) + state.save(state_file) + + # Create evidence directory for artifact-gated validation + evidence_dir = plan_dir / "evidence" + evidence_dir.mkdir(parents=True, exist_ok=True) + + return { + "status": "success", + "message": "Resumed from /map-plan. Skipped DECOMPOSE, INIT_PLAN, REVIEW_PLAN.", + "subtask_sequence": subtask_ids, + "current_subtask_id": subtask_ids[0], + "aag_contracts_found": len(aag_contracts), + "next_phase": "CHOOSE_MODE", + } + + def main(): """CLI entry point.""" parser = argparse.ArgumentParser( @@ -523,11 +797,19 @@ def main(): "initialize", "set_plan_approved", "set_execution_mode", + "skip_step", + "set_subtasks", + "resume_from_plan", + "check_circuit_breaker", ], help="Command to execute", ) - parser.add_argument("task_or_step", nargs="?", help="Task description or step ID") - parser.add_argument("value", nargs="?", help="Optional value for setter commands") + parser.add_argument( + "task_or_step", nargs="?", help="Task description, step ID, or subtask IDs" + ) + parser.add_argument( + "extra_args", nargs="*", help="Additional arguments (e.g., more subtask IDs)" + ) parser.add_argument("--branch", help="Git branch (auto-detected if omitted)") args = parser.parse_args() @@ -577,6 +859,40 @@ def main(): result = set_execution_mode(mode, branch) print(json.dumps(result, indent=2)) + elif args.command == "skip_step": + if not args.task_or_step: + print( + json.dumps({"error": "step_id required for skip_step"}), + file=sys.stderr, + ) + sys.exit(1) + result = skip_step(args.task_or_step, branch) + print(json.dumps(result, indent=2)) + + elif args.command == "set_subtasks": + if not args.task_or_step: + print( + json.dumps( + { + "error": "At least one subtask ID required. " + "Usage: set_subtasks ST-001 ST-002 ST-003" + } + ), + file=sys.stderr, + ) + sys.exit(1) + subtask_ids = [args.task_or_step] + (args.extra_args or []) + result = set_subtasks(subtask_ids, branch) + print(json.dumps(result, indent=2)) + + elif args.command == "resume_from_plan": + result = resume_from_plan(branch) + print(json.dumps(result, indent=2)) + + elif args.command == "check_circuit_breaker": + result = check_circuit_breaker(branch) + print(json.dumps(result, indent=2)) + except Exception as e: print(json.dumps({"error": str(e)}), file=sys.stderr) sys.exit(1) diff --git a/src/mapify_cli/templates/map/scripts/map_step_runner.py b/src/mapify_cli/templates/map/scripts/map_step_runner.py index 117199a..92e388e 100755 --- a/src/mapify_cli/templates/map/scripts/map_step_runner.py +++ b/src/mapify_cli/templates/map/scripts/map_step_runner.py @@ -108,7 +108,7 @@ def update_workflow_state( return { "status": "success", - "message": f"Updated {subtask_id}: {step_name} → {new_state}", + "message": f"Updated {subtask_id}: {step_name} -> {new_state}", "completed_steps": state["completed_steps"][subtask_id], } @@ -135,7 +135,7 @@ def update_plan_status( if branch is None: branch = get_branch_name() - plan_file = Path(f".map/task_plan_{branch}.md") + plan_file = Path(f".map/{branch}/task_plan_{branch}.md") if not plan_file.exists(): return {"status": "error", "message": f"Plan file not found: {plan_file}"} @@ -143,8 +143,8 @@ def update_plan_status( try: content = plan_file.read_text(encoding="utf-8") - # Find subtask section (## ST-XXX: Title) - pattern = rf"(## {re.escape(subtask_id)}:.*?\n\*\*Status:\*\*\s+)\w+" + # Find subtask section (### ST-XXX: Title) + pattern = rf"(### {re.escape(subtask_id)}:.*?\n- \*\*Status:\*\*\s+)\w+" replacement = rf"\g<1>{new_status}" updated_content = re.sub(pattern, replacement, content) @@ -277,7 +277,7 @@ def get_plan_path(branch: Optional[str] = None) -> Path: """ if branch is None: branch = get_branch_name() - return Path(f".map/task_plan_{branch}.md") + return Path(f".map/{branch}/task_plan_{branch}.md") def read_current_goal(branch: Optional[str] = None) -> Optional[str]: diff --git a/src/mapify_cli/templates/references/step-state-schema.md b/src/mapify_cli/templates/references/step-state-schema.md index 7e538b1..8b5c290 100644 --- a/src/mapify_cli/templates/references/step-state-schema.md +++ b/src/mapify_cli/templates/references/step-state-schema.md @@ -65,13 +65,12 @@ Current step set (linear order; some are conditional): 8. `2.2` RESEARCH (conditional) 9. `2.3` ACTOR 10. `2.4` MONITOR -11. `2.5` RETRY_LOOP (conditional) -12. `2.6` PREDICTOR (conditional) -13. `2.7` APPLY_CHANGES -14. `2.8` TESTS_GATE (conditional) -15. `2.9` LINTER_GATE (conditional) -16. `2.10` VERIFY_ADHERENCE -17. `2.11` SUBTASK_APPROVAL (conditional; step_by_step only) +11. `2.6` PREDICTOR (conditional) +12. `2.7` UPDATE_STATE +13. `2.8` TESTS_GATE (conditional) +14. `2.9` LINTER_GATE (conditional) +15. `2.10` VERIFY_ADHERENCE +16. `2.11` SUBTASK_APPROVAL (conditional; step_by_step only) ## Relationship to workflow_state.json diff --git a/src/mapify_cli/templates/references/workflow-state-schema.md b/src/mapify_cli/templates/references/workflow-state-schema.md index 16e4657..42e34fd 100644 --- a/src/mapify_cli/templates/references/workflow-state-schema.md +++ b/src/mapify_cli/templates/references/workflow-state-schema.md @@ -231,7 +231,7 @@ Check current state: ```bash # Show current state -BRANCH=$(git rev-parse --abbrev-ref HEAD | sed 's/\//-/g') +BRANCH=$(git rev-parse --abbrev-ref HEAD | sed -E 's|/|-|g; s|[^a-zA-Z0-9_.-]|-|g; s|-{2,}|-|g; s|^-||; s|-$||') cat .map/${BRANCH}/workflow_state.json | jq '.' # Check what steps are completed for current subtask diff --git a/src/mapify_cli/templates/settings.hooks.json b/src/mapify_cli/templates/settings.hooks.json index cfee485..eb4e7ef 100644 --- a/src/mapify_cli/templates/settings.hooks.json +++ b/src/mapify_cli/templates/settings.hooks.json @@ -22,7 +22,7 @@ { "type": "command", "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/workflow-context-injector.py", - "timeout": 1, + "timeout": 3, "description": "Injects workflow context only for Edit/Write and significant Bash commands" } ] @@ -36,7 +36,7 @@ { "type": "command", "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/post-edit-reminder.py", - "timeout": 1, + "timeout": 3, "description": "Reminds to run tests after Edit/Write (only when MAP workflow active)" } ] diff --git a/src/mapify_cli/templates/settings.json b/src/mapify_cli/templates/settings.json index f0ac2c7..22c2367 100644 --- a/src/mapify_cli/templates/settings.json +++ b/src/mapify_cli/templates/settings.json @@ -9,30 +9,30 @@ "Edit(./.env*)", "Edit(**/*credentials*)", "Edit(**/*secret*)", - "Bash(rm:-rf)", "Bash(rm -rf)", - "Bash(git:push:--force:origin:main)", - "Bash(git:push:--force:origin:master)", - "Bash(git:reset:--hard)" + "Bash(git push --force origin main)", + "Bash(git push --force origin master)", + "Bash(git reset --hard)" ], "allow": [ - "Bash(mapify:*)", - "Bash(pytest:*)", - "Bash(make:lint)", - "Bash(make:test)", - "Bash(sqlite3:.claude/playbook.db:*)", - "Bash(ruff:*)", - "Bash(black:*)", - "Bash(git:status)", - "Bash(git:diff)", - "Bash(git:log)", - "Bash(git:branch)", - "Bash(git:add)", - "Bash(git:commit)", - "Bash(helm:template:*)", - "Bash(go:vet:*)", - "Bash(gofmt:*)", - "Bash(kubectl:get:*)" + "Bash(mapify *)", + "Bash(pytest *)", + "Bash(make lint)", + "Bash(make test)", + "Bash(sqlite3 .claude/playbook.db *)", + "Bash(ruff *)", + "Bash(black *)", + "Bash(git status)", + "Bash(git diff)", + "Bash(git log)", + "Bash(git branch)", + "Bash(git rev-parse *)", + "Bash(git add)", + "Bash(git commit)", + "Bash(helm template *)", + "Bash(go vet *)", + "Bash(gofmt *)", + "Bash(kubectl get *)" ] } } diff --git a/src/mapify_cli/templates/skills/README.md b/src/mapify_cli/templates/skills/README.md index 1e2854c..fafbe35 100644 --- a/src/mapify_cli/templates/skills/README.md +++ b/src/mapify_cli/templates/skills/README.md @@ -34,7 +34,7 @@ Skills provide specialized guidance without executing code. They help users unde **Content:** - Quick decision tree (5 questions) -- Workflow comparison matrix (5 workflows) +- Workflow comparison matrix (4 implemented + 2 planned) - Detailed workflow descriptions - Agent architecture overview - 8 deep-dive resource files @@ -50,10 +50,10 @@ MAP: [Shows decision tree and comparison matrix] **Resources available:** - `map-fast-deep-dive.md` - When (not) to use /map-fast - `map-efficient-deep-dive.md` - Optimization strategies -- `map-feature-deep-dive.md` - Full validation workflow - `map-debug-deep-dive.md` - Debugging techniques -- `map-refactor-deep-dive.md` - Dependency analysis -- `agent-architecture.md` - How 8 agents orchestrate +- `map-feature-deep-dive.md` - Full validation workflow (PLANNED) +- `map-refactor-deep-dive.md` - Dependency analysis (PLANNED) +- `agent-architecture.md` - How 12 agents orchestrate - `playbook-system.md` - Knowledge storage and search --- diff --git a/src/mapify_cli/templates/skills/map-cli-reference/SKILL.md b/src/mapify_cli/templates/skills/map-cli-reference/SKILL.md index 45fdf44..1d8eb8d 100644 --- a/src/mapify_cli/templates/skills/map-cli-reference/SKILL.md +++ b/src/mapify_cli/templates/skills/map-cli-reference/SKILL.md @@ -1,5 +1,15 @@ +--- name: map-cli-reference -description: Use when encountering mapify CLI or MCP usage errors (no such command, no such option, parameter not found). Provides mem0 MCP and validate command corrections with common mistake patterns. +description: >- + Quick reference for mapify CLI and mem0 MCP usage errors. Use when + encountering "no such command", "no such option", "parameter not found", + or when user asks "how to use mapify", "mem0 commands", "validate graph". + Do NOT use for workflow selection (use map-workflows-guide) or planning + methodology (use map-planning). +metadata: + author: azalio + version: 3.1.0 + mcp-server: mem0 --- # MAP CLI Quick Reference @@ -137,6 +147,56 @@ mcp__mem0__map_tiered_search(query="error handling", limit=5) --- +## Examples + +### Example 1: Fixing a deprecated command error + +**User says:** "I'm getting `Error: No such command 'playbook'` when running mapify" + +**Actions:** +1. Identify error type — deprecated command usage +2. Explain: playbook commands removed in v4.0+ +3. Provide replacement: `mcp__mem0__map_tiered_search` for reads, `Task(subagent_type="curator", ...)` for writes + +**Result:** User switches to mem0 MCP tools, error resolved. + +### Example 2: Validating a dependency graph + +**User says:** "How do I check if my task plan has circular dependencies?" + +**Actions:** +1. Show command: `mapify validate graph task_plan.json` +2. Explain exit codes: 0 = valid, 1 = invalid, 2 = malformed JSON +3. Suggest `--strict` flag for CI pipelines and `--visualize` for debugging + +**Result:** User validates their task plan and fixes dependency issues before running workflow. + +### Example 3: mem0 MCP not responding + +**User says:** "mem0 tiered search returns empty results" + +**Actions:** +1. Check mem0 MCP configuration in `.claude/mcp_config.json` +2. Verify namespace conventions (org/project/branch) +3. Test with broad query: `mcp__mem0__map_tiered_search(query="test", limit=1)` + +**Result:** User identifies configuration issue and restores mem0 connectivity. + +--- + +## Troubleshooting + +| Issue | Cause | Solution | +|-------|-------|----------| +| `No such command 'playbook'` | Deprecated in v4.0+ | Use `mcp__mem0__map_tiered_search` for pattern retrieval | +| `No such option '--output'` | Wrong subcommand syntax | Check `mapify --help` for valid options | +| mem0 tool invocation fails | MCP server not configured | Add mem0 to `.claude/mcp_config.json` and restart | +| `validate graph` exit code 2 | Malformed JSON input | Validate JSON with `python -m json.tool < file.json` | +| Patterns not persisting | Writing directly instead of via Curator | Always use `Task(subagent_type="curator", ...)` for pattern writes | +| `mapify init` overwrites files | Using `--force` flag | Omit `--force` to preserve existing configuration | + +--- + **Version**: 1.1 **Last Updated**: 2026-01-15 **Lines**: ~200 (follows 500-line skill rule) diff --git a/src/mapify_cli/templates/skills/map-cli-reference/scripts/check-command.sh b/src/mapify_cli/templates/skills/map-cli-reference/scripts/check-command.sh new file mode 100755 index 0000000..22e3208 --- /dev/null +++ b/src/mapify_cli/templates/skills/map-cli-reference/scripts/check-command.sh @@ -0,0 +1,126 @@ +#!/usr/bin/env bash +# Check if a mapify subcommand exists and show usage help. +# +# Usage: +# ./check-command.sh [option] +# +# Examples: +# ./check-command.sh validate graph +# ./check-command.sh init +# ./check-command.sh playbook # deprecated command +# +# Exit codes: +# 0 - Command exists +# 1 - Command not found +# 2 - Command deprecated + +set -euo pipefail + +SUBCOMMAND="${1:-}" +OPTION="${2:-}" + +if [ -z "$SUBCOMMAND" ]; then + echo "Usage: $0 [option]" + echo "" + echo "Checks if a mapify subcommand exists." + echo "" + echo "Available subcommands:" + echo " init - Initialize project with MAP framework" + echo " check - Run system checks" + echo " upgrade - Upgrade agent templates" + echo " validate - Validate dependency graphs" + echo "" + echo "Deprecated subcommands:" + echo " playbook - Removed in v4.0+ (use mem0 MCP)" + exit 1 +fi + +# Known deprecated commands +DEPRECATED_COMMANDS="playbook" + +# Known valid commands +VALID_COMMANDS="init check upgrade validate" + +# Check deprecated first +for dep in $DEPRECATED_COMMANDS; do + if [ "$SUBCOMMAND" = "$dep" ]; then + echo "ERROR: '$SUBCOMMAND' is deprecated (removed in v4.0+)" + echo "" + echo "Replacements:" + case "$SUBCOMMAND" in + playbook) + echo " Pattern retrieval: mcp__mem0__map_tiered_search(query=\"...\", limit=5)" + echo " Pattern storage: Task(subagent_type=\"curator\", ...)" + echo " Pattern archival: mcp__mem0__map_archive_pattern(...)" + ;; + esac + exit 2 + fi +done + +# Check valid commands +FOUND=0 +for cmd in $VALID_COMMANDS; do + if [ "$SUBCOMMAND" = "$cmd" ]; then + FOUND=1 + break + fi +done + +if [ "$FOUND" -eq 0 ]; then + echo "ERROR: No such command '$SUBCOMMAND'" + echo "" + echo "Available commands: $VALID_COMMANDS" + echo "" + echo "Did you mean one of these?" + # Simple fuzzy match + for cmd in $VALID_COMMANDS; do + echo " mapify $cmd" + done + exit 1 +fi + +# Command exists, show help +echo "OK: 'mapify $SUBCOMMAND' is a valid command" + +# Show subcommand-specific help +case "$SUBCOMMAND" in + validate) + echo "" + echo "Usage: mapify validate graph [--strict] [--visualize]" + echo "" + echo "Options:" + echo " --strict Fail on warnings (exit code 1)" + echo " --visualize Show dependency graph" + echo "" + echo "Exit codes: 0=valid, 1=invalid, 2=malformed input" + if [ -n "$OPTION" ] && [ "$OPTION" != "graph" ]; then + echo "" + echo "WARNING: Unknown validate subcommand '$OPTION'. Did you mean 'graph'?" + fi + ;; + init) + echo "" + echo "Usage: mapify init [project-name] [--mcp essential|full] [--force]" + echo "" + echo "Options:" + echo " --mcp essential Install essential MCP tools only" + echo " --mcp full Install all MCP tools" + echo " --force Overwrite existing configuration" + ;; + check) + echo "" + echo "Usage: mapify check [--debug]" + echo "" + echo "Options:" + echo " --debug Show detailed diagnostic information" + ;; + upgrade) + echo "" + echo "Usage: mapify upgrade" + echo "" + echo "Upgrades agent templates to latest version." + ;; +esac + +exit 0 diff --git a/src/mapify_cli/templates/skills/map-planning/SKILL.md b/src/mapify_cli/templates/skills/map-planning/SKILL.md index cc72df3..244484e 100644 --- a/src/mapify_cli/templates/skills/map-planning/SKILL.md +++ b/src/mapify_cli/templates/skills/map-planning/SKILL.md @@ -1,8 +1,17 @@ --- name: map-planning version: "1.0.0" -description: Implements file-based planning for MAP Framework workflows with branch-scoped task tracking in .map/ directory. Prevents goal drift via automatic plan synchronization before tool use and validates completion state on exit. +description: >- + File-based planning for MAP Framework with branch-scoped task tracking + in .map/ directory. Use when user says "create a plan", "track progress", + "show task status", or needs persistent planning across agent sessions. + Prevents goal drift via automatic plan synchronization. Do NOT use for + workflow selection (use map-workflows-guide) or CLI errors (use + map-cli-reference). allowed-tools: Read, Write, Edit, Bash, Glob, Grep +metadata: + author: azalio + version: 3.1.0 hooks: PreToolUse: - matcher: "Write|Edit|Bash" @@ -31,19 +40,23 @@ Instead of relying solely on conversation context (limited window), this skill e ## File Structure -All files reside in `.map/` directory with branch-based naming: +All files reside in `.map//` directory with branch-based naming: ``` .map/ -├── task_plan_.md # Primary plan with phases and status -├── findings_.md # Research findings, decisions, key files -└── progress_.md # Action log, errors, test results +└── / + ├── task_plan_.md # Primary plan with phases and status + ├── findings_.md # Research findings, decisions, key files + ├── progress_.md # Action log, errors, test results + ├── workflow_state.json # Subtask completion tracking + ├── step_state.json # Orchestrator step state + └── evidence/ # Artifact-gated validation evidence ``` **Example**: On branch `feature-auth`: -- `.map/task_plan_feature-auth.md` -- `.map/findings_feature-auth.md` -- `.map/progress_feature-auth.md` +- `.map/feature-auth/task_plan_feature-auth.md` +- `.map/feature-auth/findings_feature-auth.md` +- `.map/feature-auth/progress_feature-auth.md` ## Hook Behavior @@ -103,7 +116,7 @@ Creates `.map/` directory and skeleton files for current branch. - Check validation criteria checkboxes [x] when done ### 3-Strike Error Protocol -Log errors to `progress_.md` after attempt 3+. After 3 failed attempts: +Log errors to `.map//progress_.md` after attempt 3+. After 3 failed attempts: 1. Escalate to user (CONTINUE/SKIP/ABORT options) 2. If SKIP: mark phase `blocked`, move to next subtask 3. If ABORT: mark workflow `blocked`, exit @@ -163,6 +176,56 @@ Only Monitor agent updates task_plan status (via `status_update` output field). --- +## Examples + +### Example 1: Starting a new feature plan + +**User says:** "Create a plan for implementing user notifications" + +**Actions:** +1. Run `init-session.sh` to create `.map/` skeleton for current branch +2. Populate `.map//task_plan_.md` with phases: research, design, implement, test +3. Set Goal: "Implement user notification system with email and in-app channels" +4. Mark ST-001 as `in_progress` + +**Result:** Persistent plan files created in `.map/` directory, PreToolUse hook keeps agent focused on current phase. + +### Example 2: Resuming work after context reset + +**User says:** "Show task status" or "What was I working on?" + +**Actions:** +1. Read `.map//task_plan_.md` to find current phase +2. Read `.map//progress_.md` for recent action log +3. Read `.map//findings_.md` for accumulated decisions + +**Result:** Agent resumes from last checkpoint without losing context, even after conversation window reset. + +### Example 3: Handling repeated failures + +**User says:** "The database migration keeps failing" + +**Actions:** +1. Log error to `.map//progress_.md` (attempt count tracked) +2. After 3 failed attempts, trigger 3-Strike Protocol +3. Present CONTINUE/SKIP/ABORT options to user + +**Result:** Phase marked `blocked`, agent moves to next subtask or exits cleanly. + +--- + +## Troubleshooting + +| Issue | Cause | Solution | +|-------|-------|----------| +| "Plan not found" warning | `.map/` directory not initialized | Run `init-session.sh` or start a MAP workflow | +| Stop hook warns "No terminal state" | `## Terminal State` section not updated | Update Terminal State to `complete`, `blocked`, `won't_do`, or `superseded` | +| Branch name causes file errors | Branch has `/` characters | Scripts auto-sanitize: `feature/auth` becomes `feature-auth` | +| PreToolUse hook shows stale focus | Plan file not updated after phase completion | Update `**Status:**` to `complete` and advance `## Current Phase` | +| `/map-fast` ignores planning | By design — `/map-fast` skips planning | Use `/map-efficient` for planning support | + +--- + **Version**: 1.0.0 (2025-01-10) **References**: diff --git a/src/mapify_cli/templates/skills/map-planning/scripts/get-plan-path.sh b/src/mapify_cli/templates/skills/map-planning/scripts/get-plan-path.sh index a7b8937..8051c19 100755 --- a/src/mapify_cli/templates/skills/map-planning/scripts/get-plan-path.sh +++ b/src/mapify_cli/templates/skills/map-planning/scripts/get-plan-path.sh @@ -4,32 +4,37 @@ # # Description: # Detects current git branch and outputs path to branch-specific task plan file. -# Sanitizes branch names by replacing '/' with '-' for filesystem compatibility. +# Sanitizes branch names for filesystem compatibility. # Defaults to 'main' branch when not in a git repository. # # Usage: # PLAN_PATH=$(bash .claude/skills/map-planning/scripts/get-plan-path.sh) # # Output: -# .map/task_plan_.md +# .map//task_plan_.md # # Examples: -# Branch: feature/map-planning -> .map/task_plan_feature-map-planning.md -# Branch: main -> .map/task_plan_main.md -# Not in repo -> .map/task_plan_main.md +# Branch: feature/map-planning -> .map/feature-map-planning/task_plan_feature-map-planning.md +# Branch: main -> .map/main/task_plan_main.md +# Not in repo -> .map/main/task_plan_main.md set -euo pipefail # Detect current git branch, default to 'main' if not in git repo -BRANCH=$(git branch --show-current 2>/dev/null || echo 'main') +BRANCH=$(git rev-parse --abbrev-ref HEAD 2>/dev/null || echo 'main') # Handle empty branch (detached HEAD or git issue) if [ -z "$BRANCH" ]; then BRANCH="main" fi -# Sanitize branch name: replace '/' with '-' for filesystem safety -SANITIZED_BRANCH=$(echo "$BRANCH" | tr '/' '-') +# Sanitize branch name for filesystem safety (matches MAP orchestrator convention) +SANITIZED_BRANCH=$(echo "$BRANCH" | sed -E 's|/|-|g; s|[^a-zA-Z0-9_.-]|-|g; s|-{2,}|-|g; s|^-||; s|-$||') -# Output the plan file path -echo ".map/task_plan_${SANITIZED_BRANCH}.md" +# Fallback if sanitization produced empty string +if [ -z "$SANITIZED_BRANCH" ]; then + SANITIZED_BRANCH="main" +fi + +# Output the plan file path (nested directory convention) +echo ".map/${SANITIZED_BRANCH}/task_plan_${SANITIZED_BRANCH}.md" diff --git a/src/mapify_cli/templates/skills/map-planning/scripts/init-session.sh b/src/mapify_cli/templates/skills/map-planning/scripts/init-session.sh index be75aa1..cf6ae96 100755 --- a/src/mapify_cli/templates/skills/map-planning/scripts/init-session.sh +++ b/src/mapify_cli/templates/skills/map-planning/scripts/init-session.sh @@ -3,16 +3,16 @@ # init-session.sh - Initialize planning files for new MAP session # # Description: -# Creates .map/ directory and copies templates for branch-scoped planning files. +# Creates .map// directory and copies templates for branch-scoped planning files. # Idempotent: skips files that already exist. # # Usage: # ${CLAUDE_PLUGIN_ROOT}/scripts/init-session.sh # # Created files: -# .map/task_plan_.md -# .map/findings_.md -# .map/progress_.md +# .map//task_plan_.md +# .map//findings_.md +# .map//progress_.md set -euo pipefail @@ -22,17 +22,22 @@ SKILL_ROOT="$(dirname "$SCRIPT_DIR")" TEMPLATE_DIR="$SKILL_ROOT/templates" # Get branch name for file naming -BRANCH=$(git branch --show-current 2>/dev/null || echo 'main') +BRANCH=$(git rev-parse --abbrev-ref HEAD 2>/dev/null || echo 'main') if [ -z "$BRANCH" ]; then BRANCH="main" fi -SANITIZED_BRANCH=$(echo "$BRANCH" | tr '/' '-') -# Create .map directory -MAP_DIR=".map" +# Sanitize branch name (matches MAP orchestrator convention) +SANITIZED_BRANCH=$(echo "$BRANCH" | sed -E 's|/|-|g; s|[^a-zA-Z0-9_.-]|-|g; s|-{2,}|-|g; s|^-||; s|-$||') +if [ -z "$SANITIZED_BRANCH" ]; then + SANITIZED_BRANCH="main" +fi + +# Create .map/ directory (nested convention) +MAP_DIR=".map/${SANITIZED_BRANCH}" mkdir -p "$MAP_DIR" -# Define file paths +# Define file paths (nested under branch directory) TASK_PLAN="$MAP_DIR/task_plan_${SANITIZED_BRANCH}.md" FINDINGS="$MAP_DIR/findings_${SANITIZED_BRANCH}.md" PROGRESS="$MAP_DIR/progress_${SANITIZED_BRANCH}.md" diff --git a/src/mapify_cli/templates/skills/map-workflows-guide/SKILL.md b/src/mapify_cli/templates/skills/map-workflows-guide/SKILL.md index dc12a41..dbd253f 100644 --- a/src/mapify_cli/templates/skills/map-workflows-guide/SKILL.md +++ b/src/mapify_cli/templates/skills/map-workflows-guide/SKILL.md @@ -1,12 +1,22 @@ --- name: map-workflows-guide -description: Comprehensive guide for choosing the right MAP workflow based on task type and requirements +description: >- + Guide for choosing the right MAP workflow based on task type, risk level, + and token budget. Use when user asks "which workflow should I use", + "difference between map-fast and map-efficient", "when to use map-debug", + or compares MAP workflows. Do NOT use for actual workflow execution — + use /map-efficient, /map-fast, etc. instead. Do NOT use for CLI errors + (use map-cli-reference). version: 1.0 +metadata: + author: azalio + version: 3.1.0 + mcp-server: mem0 --- # MAP Workflows Guide -This skill helps you choose the optimal MAP workflow for your development tasks. MAP Framework provides 5 specialized workflows, each optimized for different scenarios with varying token costs, learning capabilities, and quality gates. +This skill helps you choose the optimal MAP workflow for your development tasks. MAP Framework provides **10 workflow commands**: 4 primary workflows (`/map-fast`, `/map-efficient`, `/map-debug`, `/map-debate`) and 6 supporting commands (`/map-review`, `/map-check`, `/map-plan`, `/map-release`, `/map-resume`, `/map-learn`). Each is optimized for different scenarios with varying token costs, learning capabilities, and quality gates. Two additional workflows (`/map-feature`, `/map-refactor`) are planned but not yet implemented. ## Quick Decision Tree @@ -21,12 +31,12 @@ Answer these 5 questions to find your workflow: YES → Use /map-debug (70-80% tokens, focused analysis) NO → Continue to question 3 -3. Are you refactoring existing code or restructuring modules? - YES → Use /map-refactor (70-80% tokens, dependency analysis) +3. Do stakeholders need documented reasoning and trade-off analysis? + YES → Use /map-debate (3x cost, Opus arbiter, explicit reasoning) NO → Continue to question 4 4. Is this critical infrastructure or security-sensitive code? - YES → Use /map-feature (100% tokens, maximum validation) + YES → Use /map-efficient (60-70% tokens, recommended default) NO → Continue to question 5 5. Is this a change you'll maintain long-term or that has non-trivial impact? @@ -38,14 +48,20 @@ Answer these 5 questions to find your workflow: ## Workflow Comparison Matrix -| Aspect | `/map-fast` | `/map-efficient` | `/map-feature` | `/map-debug` | `/map-refactor` | -|--------|-----------|-----------------|----------------|-------------|-----------------| -| **Token Cost** | 40-50% | **60-70%** | 100% (baseline) | 70-80% | 70-80% | -| **Learning** | ❌ None | ✅ Batched | ✅ Per-subtask | ✅ Per-subtask | ✅ Per-subtask | -| **Quality Gates** | Basic | Essential | All 8 agents | Focused | Focused | -| **Impact Analysis** | ❌ Skipped | ⚠️ Conditional | ✅ Always | ✅ Yes | ✅ Yes | -| **Best For** | Low-risk | **Production** | Critical | Bugs | Refactoring | -| **Recommendation** | Use sparingly | **DEFAULT** | High-risk | Issues | Changes | +| Aspect | `/map-fast` | `/map-efficient` | `/map-debug` | `/map-debate` | +|--------|-----------|-----------------|-------------|--------------| +| **Token Cost** | 40-50% | **60-70%** | 70-80% | ~3x baseline | +| **Learning** | ❌ None | ✅ Via /map-learn | ✅ Per-subtask | ✅ Via /map-learn | +| **Quality Gates** | Basic | Essential | Focused | Multi-variant | +| **Impact Analysis** | ❌ Skipped | ⚠️ Conditional | ✅ Yes | ⚠️ Conditional | +| **Multi-Variant** | ❌ Never | ⚠️ Optional (--self-moa) | ❌ Never | ✅ Always (3 variants) | +| **Synthesis Model** | N/A | Sonnet | N/A | **Opus** | +| **Best For** | Low-risk | **Production** | Bugs | Reasoning transparency | +| **Recommendation** | Use sparingly | **DEFAULT** | Issues | Complex decisions | + +> **Note:** `/map-feature` and `/map-refactor` are **planned but not yet implemented**. +> Use `/map-efficient` for critical features and refactoring tasks. +> See [Planned Workflows](#planned-workflows) below for details. --- @@ -61,12 +77,12 @@ Answer these 5 questions to find your workflow: **What you get:** - ✅ Full implementation (Actor generates code) - ✅ Basic validation (Monitor checks correctness) -- ✅ Quality check (Evaluator scores solution) +- ❌ NO quality scoring (Evaluator skipped) - ❌ NO impact analysis (Predictor skipped entirely) - ❌ NO learning (Reflector/Curator skipped) **Trade-offs:** -- Saves 50-60% tokens vs /map-feature +- Saves 50-60% tokens vs full pipeline (every agent per subtask) - mem0 never improves (no patterns stored) - Knowledge never accumulates - Minimal quality gates (only basic checks) @@ -102,14 +118,15 @@ Answer these 5 questions to find your workflow: **What you get:** - ✅ Full implementation (Actor) - ✅ Comprehensive validation (Monitor with feedback loops) -- ✅ Quality gates (Evaluator approval) - ✅ Impact analysis (Predictor runs conditionally) -- ✅ **Batched learning** (Reflector/Curator run once at end) +- ✅ Tests gate + Linter gate per subtask +- ✅ Final-Verifier (adversarial verification at end) +- ✅ **Learning via /map-learn** (Reflector/Curator, optional after workflow) **Optimization strategy:** - **Conditional Predictor:** Runs only if risk detected (security, breaking changes) - **Batched Learning:** Reflector/Curator run ONCE after all subtasks complete -- **Result:** 35-40% token savings vs /map-feature while preserving learning +- **Result:** 35-40% token savings vs full pipeline while preserving learning - **Same quality gates:** Monitor still validates each subtask **When Predictor runs:** @@ -140,58 +157,7 @@ Despite token optimization, preserves: --- -### 3. /map-feature — Critical Features 🏗️ - -**Use this when:** -- Implementing security-critical functionality -- First-time complex features requiring maximum validation -- High-risk changes affecting many systems -- You need complete assurance before production -- Learning is critical for future similar tasks - -**What you get:** -- ✅ Full implementation (Actor) -- ✅ Comprehensive validation (Monitor with loops) -- ✅ **Per-subtask impact analysis** (Predictor always runs) -- ✅ Quality gates (Evaluator always runs) -- ✅ **Per-subtask learning** (Reflector/Curator after each subtask) - -**Trade-offs:** -- 100% token cost (no optimization applied) -- Slower execution (maximum agent cycles) -- Maximum quality assurance -- Most comprehensive learning (frequent reflections) -- Best for high-stakes implementations - -**When this is required:** -- Authentication/authorization systems -- Payment processing -- Database schema changes -- Multi-service coordination -- Code that affects many dependencies - -**Example tasks:** -- "Implement secure JWT authentication system" -- "Refactor database schema for multi-tenancy" -- "Add payment processing via Stripe" -- "Build real-time notification system" - -**Command syntax:** -```bash -/map-feature [task description] -``` - -**Agent pipeline:** -``` -TaskDecomposer → Actor → Monitor → Predictor → -Evaluator → Reflector → Curator → [Next subtask] -``` - -**See also:** [resources/map-feature-deep-dive.md](resources/map-feature-deep-dive.md) - ---- - -### 4. /map-debug — Bug Fixes 🐛 +### 3. /map-debug — Bug Fixes 🐛 **Use this when:** - Fixing specific bugs or defects @@ -234,52 +200,27 @@ Evaluator → Reflector → Curator → [Next subtask] --- -### 5. /map-refactor — Code Restructuring 🔧 +### Planned Workflows -**Use this when:** -- Refactoring existing code for readability -- Improving code structure or design -- Cleaning up technical debt -- Renaming/reorganizing modules -- Extracting common logic +The following workflows are **planned but not yet implemented**. Use `/map-efficient` as a substitute for both. -**What you get:** -- ✅ Implementation (Actor) -- ✅ Validation (Monitor) -- ✅ **Dependency impact analysis** (Predictor focused on dependencies) -- ✅ Quality gates (Evaluator) -- ✅ Learning (Reflector/Curator) +#### /map-feature — Critical Features (PLANNED) -**Specialized for:** -- Breaking change detection -- Dependency tracking -- Migration planning -- Careful phased refactoring +Intended for security-critical and high-risk features requiring maximum validation (100% token cost, per-subtask learning, Predictor always runs). **Not yet implemented.** Use `/map-efficient` instead — it provides the same agent pipeline with conditional Predictor and batched learning. -**Example tasks:** -- "Refactor auth service to separate concerns" -- "Extract common validation logic into shared module" -- "Rename User model to Account throughout codebase" -- "Convert callback-based API to promise-based" +**Design reference:** [resources/map-feature-deep-dive.md](resources/map-feature-deep-dive.md) -**Command syntax:** -```bash -/map-refactor [refactoring description] -``` +#### /map-refactor — Code Restructuring (PLANNED) -**Impact analysis includes:** -- Which files/modules depend on changed code -- Potential breaking changes -- Migration strategy -- Scope of refactoring +Intended for refactoring with dependency-focused impact analysis and breaking change detection. **Not yet implemented.** Use `/map-efficient` instead — describe the refactoring intent in the task description for appropriate Predictor analysis. -**See also:** [resources/map-refactor-deep-dive.md](resources/map-refactor-deep-dive.md) +**Design reference:** [resources/map-refactor-deep-dive.md](resources/map-refactor-deep-dive.md) --- ## Understanding MAP Agents -MAP workflows orchestrate **8 specialized agents**, each with specific responsibilities: +MAP workflows orchestrate **12 specialized agents**, each with specific responsibilities: ### Execution & Validation Agents @@ -306,6 +247,7 @@ MAP workflows orchestrate **8 specialized agents**, each with specific responsib - Checks completeness - Approves/rejects solution - Feedback loop: Returns to Actor if score < threshold +- **Only in /map-debug, /map-review** (skipped in /map-efficient, /map-fast, /map-debate) ### Analysis Agents @@ -314,7 +256,7 @@ MAP workflows orchestrate **8 specialized agents**, each with specific responsib - Predicts side effects - Identifies risks and breaking changes - **Conditional in /map-efficient** (runs if risk detected) -- **Always in /map-feature** (runs per subtask) +- **Always in /map-debug** (focused analysis) ### Learning Agents @@ -323,8 +265,8 @@ MAP workflows orchestrate **8 specialized agents**, each with specific responsib - Extracts reusable patterns - Searches mem0 for existing knowledge via `mcp__mem0__map_tiered_search` - Prevents duplicate pattern storage -- **Batched in /map-efficient** (runs once at end) -- **Per-subtask in /map-feature** (extracts frequently) +- **Batched in /map-efficient** (runs once at end, via /map-learn) +- **Skipped in /map-fast** (no learning) **Curator** — Knowledge management - Stores patterns in mem0 via `mcp__mem0__map_add_pattern` @@ -341,6 +283,34 @@ MAP workflows orchestrate **8 specialized agents**, each with specific responsib - Validates examples - Verifies external dependency docs current +### Synthesis Agents + +**Debate-Arbiter** — Multi-variant cross-evaluation (MAP Debate) +- Cross-evaluates Actor variants with explicit reasoning +- Synthesizes optimal solution from multiple approaches +- Uses Opus model for reasoning transparency +- **Only in /map-debate workflow** + +**Synthesizer** — Solution synthesis +- Extracts decisions from multiple variants +- Generates unified code from best elements (Self-MoA) +- Merges insights across Actor outputs +- **Used in /map-efficient with --self-moa flag** + +### Discovery & Verification Agents + +**Research-Agent** — Codebase discovery +- Heavy codebase reading with compressed output +- Gathers context proactively before Actor implementation +- Prevents context pollution in implementation agents +- **Used in /map-plan, /map-efficient, /map-debug** + +**Final-Verifier** — Adversarial verification (Ralph Loop) +- Root cause analysis via adversarial testing +- Terminal verification after all other agents +- Ensures no regressions or overlooked issues +- **Used in /map-check, /map-efficient** + --- ## Decision Flowchart @@ -364,26 +334,10 @@ START: What type of development task? │ │ NO ↓ │ -├─────────────────────────────────────┐ -│ Refactoring existing code? │ -│ (Improving structure, renaming) │ -├─────────────────────────────────────┘ -│ YES → /map-refactor (70-80% tokens, dependency tracking) -│ -│ NO ↓ -│ -├─────────────────────────────────────┐ -│ Critical/high-risk feature? │ -│ (Auth, payments, security, database)│ -├─────────────────────────────────────┘ -│ YES → /map-feature (100% tokens, full validation) -│ -│ NO ↓ -│ └─────────────────────────────────────┐ - Standard production feature? │ - (/map-efficient recommended) ←──────┘ - YES → /map-efficient (60-70% tokens, RECOMMENDED) + Everything else (features, │ + refactoring, critical code) ←──────┘ + → /map-efficient (60-70% tokens, RECOMMENDED) ``` --- @@ -410,22 +364,9 @@ Avoid /map-fast for: - Broad refactors or multi-module changes - High uncertainty requirements -**Q: What's the practical difference between /map-feature and /map-efficient?** - -A: Token cost vs learning frequency: +**Q: What about /map-feature and /map-refactor?** -**/map-feature:** Maximum assurance -- Predictor runs after EVERY subtask (100% analysis) -- Reflector/Curator run after EVERY subtask -- Cost: 100% tokens, slowest execution -- Best for: First implementations, critical systems - -**/map-efficient:** Smart optimization -- Predictor runs ONLY when risk detected (conditional) -- Reflector/Curator run ONCE at end (batched) -- Cost: 60-70% tokens, faster execution -- Same learning: Patterns still captured at end -- Best for: Standard features, most development +A: These are **planned but not yet implemented**. Use `/map-efficient` for all feature development and refactoring tasks. `/map-efficient` provides the full agent pipeline (Actor, Monitor, conditional Predictor, Tests/Linter gates, Final-Verifier) with optional learning via `/map-learn`. Describe the risk level and refactoring intent in your task description for appropriate Predictor analysis. **Q: Can I switch workflows mid-task?** @@ -477,9 +418,9 @@ For detailed information on each workflow: - **[map-fast Deep Dive](resources/map-fast-deep-dive.md)** — Token breakdown, skip conditions, risks - **[map-efficient Deep Dive](resources/map-efficient-deep-dive.md)** — Optimization strategy, Predictor conditions, batching -- **[map-feature Deep Dive](resources/map-feature-deep-dive.md)** — Full pipeline, cost analysis, when required - **[map-debug Deep Dive](resources/map-debug-deep-dive.md)** — Debugging strategies, error analysis, best practices -- **[map-refactor Deep Dive](resources/map-refactor-deep-dive.md)** — Impact analysis, breaking changes, migration planning +- **[map-feature Deep Dive](resources/map-feature-deep-dive.md)** — Design reference (PLANNED, not yet implemented) +- **[map-refactor Deep Dive](resources/map-refactor-deep-dive.md)** — Design reference (PLANNED, not yet implemented) Agent & system details: @@ -490,16 +431,16 @@ Agent & system details: ## Real-World Examples -### Example 1: Choosing between /map-efficient and /map-feature +### Example 1: Choosing /map-efficient for a critical feature **Task:** "Add OAuth2 authentication" **Analysis:** -- Affects security ✓ (high-risk indicator) -- Affects multiple modules ✓ (breaking changes possible) -- First implementation of OAuth2 ✓ (high complexity) +- Affects security (high-risk indicator) +- Affects multiple modules (breaking changes possible) +- First implementation of OAuth2 (high complexity) -**Decision:** `/map-feature` (worth 100% token cost for critical feature) +**Decision:** `/map-efficient` — describe the security-sensitive nature in the task description. Predictor will trigger conditionally on security-related subtasks. ### Example 2: Choosing /map-debug @@ -540,7 +481,7 @@ MAP: 🎯 Suggests /map-efficient ``` MAP: "Is this for production?" User: "Yes, but critical feature" -MAP: 🎯 Suggests /map-feature instead +MAP: 🎯 Suggests /map-efficient with --self-moa instead ``` **Direct command:** @@ -555,7 +496,7 @@ MAP: 📚 Loads this skill for context 1. **Default to /map-efficient** — It's the recommended choice for 80% of tasks 2. **Use /map-fast sparingly** — Only for small, low-risk changes with clear scope -3. **Reserve /map-feature for critical paths** — Don't overuse, save for auth/payments/security +3. **Use /map-efficient for critical paths** — Describe risk context in the task description for appropriate Predictor triggers 4. **Monitor pattern growth** — Use mem0 search to see learning improving 5. **Trust the optimization** — /map-efficient preserves quality while cutting token usage 6. **Review deep dives** — When in doubt, check the appropriate deep-dive resource @@ -566,11 +507,60 @@ MAP: 📚 Loads this skill for context ## Next Steps 1. **First time using MAP?** Start with `/map-efficient` -2. **Have a critical feature?** See [map-feature-deep-dive.md](resources/map-feature-deep-dive.md) +2. **Have a critical feature?** Use `/map-efficient` with risk context in the task description 3. **Debugging an issue?** See [map-debug-deep-dive.md](resources/map-debug-deep-dive.md) 4. **Understanding agents?** See [Agent Architecture](resources/agent-architecture.md) --- +## Examples + +### Example 1: Choosing a workflow for a new feature + +**User says:** "I need to add JWT authentication to the API" + +**Actions:** +1. Assess risk level — security-sensitive (high-risk indicator) +2. Check if first implementation — yes, OAuth/JWT is new +3. Multiple modules affected — auth middleware, user service, token storage + +**Result:** Recommend `/map-efficient` — describe the security context in the task. Predictor will trigger on security-sensitive subtasks. Batched learning captures patterns at the end. + +### Example 2: Quick fix with clear scope + +**User says:** "Update the error message in the login form" + +**Actions:** +1. Assess risk — low, localized text change +2. Check blast radius — single file, no dependencies +3. No security implications + +**Result:** Recommend `/map-fast` — small, low-risk change with clear acceptance criteria. No learning needed. + +### Example 3: Debugging a test failure + +**User says:** "Tests in auth.test.ts are failing after the last merge" + +**Actions:** +1. Identify task type — debugging/fixing specific issue +2. Need root cause analysis — yes, regression after merge +3. Not a new feature or refactor + +**Result:** Recommend `/map-debug` — focused on diagnosing failures with root cause analysis and regression prevention. + +--- + +## Troubleshooting + +| Issue | Cause | Solution | +|-------|-------|----------| +| Wrong workflow chosen mid-task | Cannot switch workflows during execution | Complete current workflow, then restart with correct one | +| Predictor never runs in /map-efficient | Subtasks assessed as low-risk | Expected behavior; Predictor is conditional. Use /map-debug for guaranteed analysis | +| No patterns stored after /map-fast | /map-fast skips learning agents | By design — use /map-efficient + /map-learn for pattern accumulation | +| mem0 search returns empty | mem0 MCP not configured or namespaces mismatch | Verify mem0 in `.claude/mcp_config.json`, check namespace conventions | +| Skill suggests wrong workflow | Description trigger mismatch | Check skill-rules.json triggers; refine query wording | + +--- + **Skill Version:** 1.0 **Last Updated:** 2025-11-03 **Recommended Reading Time:** 5-10 minutes diff --git a/src/mapify_cli/templates/skills/map-workflows-guide/resources/agent-architecture.md b/src/mapify_cli/templates/skills/map-workflows-guide/resources/agent-architecture.md index 96173ed..8a158fc 100644 --- a/src/mapify_cli/templates/skills/map-workflows-guide/resources/agent-architecture.md +++ b/src/mapify_cli/templates/skills/map-workflows-guide/resources/agent-architecture.md @@ -1,6 +1,6 @@ # Agent Architecture -MAP Framework orchestrates 8 specialized agents in a coordinated workflow. +MAP Framework orchestrates 12 specialized agents in a coordinated workflow. ## Agent Categories @@ -29,8 +29,8 @@ MAP Framework orchestrates 8 specialized agents in a coordinated workflow. - **Role:** Quality scoring and final approval - **Input:** Actor + Monitor results - **Output:** Quality score (0-10), approve/reject decision -- **When it runs:** /map-fast, /map-feature (per subtask), /map-debug, /map-refactor -- **Skipped in:** /map-efficient (Monitor provides sufficient validation) +- **When it runs:** /map-debug, /map-review +- **Skipped in:** /map-efficient, /map-fast (Monitor provides sufficient validation) ### Analysis @@ -39,9 +39,9 @@ MAP Framework orchestrates 8 specialized agents in a coordinated workflow. - **Input:** Planned changes - **Output:** Affected files, breaking changes, risk assessment - **When it runs:** - - /map-feature: Always (per subtask) - /map-efficient: Conditional (only if Monitor flags high risk) - - /map-debug, /map-refactor: Always (focused analysis) + - /map-debug: Always (focused analysis) + - /map-debate: Conditional (same as /map-efficient) - /map-fast: Never (skipped) ### Learning @@ -51,8 +51,7 @@ MAP Framework orchestrates 8 specialized agents in a coordinated workflow. - **Input:** All agent outputs for subtask(s) - **Output:** Insights, patterns discovered, pattern updates - **When it runs:** - - /map-feature: Per subtask - - /map-efficient, /map-debug, /map-refactor: Batched (once at end) + - /map-efficient, /map-debug, /map-debate: Batched (once at end, via /map-learn) - /map-fast: Never (skipped) - **MCP Tool:** Uses `mcp__mem0__map_tiered_search` to check for existing patterns @@ -73,6 +72,34 @@ MAP Framework orchestrates 8 specialized agents in a coordinated workflow. - **Output:** Completeness assessment, dependency analysis - **When it runs:** On-demand (not part of standard workflows) +### Synthesis + +**9. Debate-Arbiter** +- **Role:** Cross-evaluates Actor variants with explicit reasoning +- **Input:** Multiple Actor outputs (variants) +- **Output:** Synthesized optimal solution with reasoning trace +- **When it runs:** /map-debate (per subtask, uses Opus model) + +**10. Synthesizer** +- **Role:** Extracts decisions from variants and generates unified code (Self-MoA) +- **Input:** Multiple Actor outputs +- **Output:** Merged implementation combining best elements +- **When it runs:** /map-efficient with --self-moa flag + +### Discovery & Verification + +**11. Research-Agent** +- **Role:** Heavy codebase reading with compressed output +- **Input:** Research question or exploration goal +- **Output:** Compressed context for implementation agents +- **When it runs:** /map-plan, /map-efficient, /map-debug (before Actor) + +**12. Final-Verifier** +- **Role:** Adversarial verification with Root Cause Analysis (Ralph Loop) +- **Input:** Complete implementation after all other agents +- **Output:** Verification verdict, regression analysis +- **When it runs:** /map-check, /map-efficient (terminal verification) + --- ## Orchestration Patterns @@ -80,8 +107,8 @@ MAP Framework orchestrates 8 specialized agents in a coordinated workflow. ### Linear Pipeline (map-fast) ``` -TaskDecomposer → Actor → Monitor → Evaluator → Done -(No learning, no impact analysis) +TaskDecomposer → Actor → Monitor → Apply → Done +(No Evaluator, no Predictor, no learning) ``` ### Conditional Pipeline (map-efficient) @@ -90,23 +117,25 @@ TaskDecomposer → Actor → Monitor → Evaluator → Done TaskDecomposer ↓ For each subtask: - Actor → Monitor → [Predictor if high risk] → Apply changes + Actor → Monitor → [Predictor if high risk] → Tests → Linter → Apply ↓ - Batch learning: - Reflector (all subtasks) → Curator → Done + Final-Verifier (adversarial verification of entire goal) + ↓ + Done! Optional: /map-learn → Reflector → Curator ``` -### Full Pipeline (map-feature) +### Multi-Variant Pipeline (map-debate) ``` TaskDecomposer ↓ For each subtask: - Actor → Monitor → Predictor → Evaluator - ↓ if approved - Reflector → Curator → Apply changes + Actor×3 → Monitor×3 → debate-arbiter (Opus) + ↓ synthesized + Monitor → [Predictor if high risk] → Apply changes ↓ - Done + Batch learning (via /map-learn): + Reflector (all subtasks) → Curator → Done ``` --- @@ -210,10 +239,14 @@ Agents communicate via structured JSON: | TaskDecomposer | ~1.5K | Once | All workflows | | Actor | ~2-3K | Per subtask | All workflows | | Monitor | ~1K | Per Actor output | All workflows | -| Evaluator | ~0.8K | Per subtask | map-fast, map-feature | +| Evaluator | ~0.8K | Per subtask | map-debug, map-review | | Predictor | ~1.5K | Per subtask or conditional | Varies | | Reflector | ~2K | Per subtask or batched | Varies | | Curator | ~1.5K | After Reflector | Varies | +| Debate-Arbiter | ~3-4K | Per subtask | map-debate only | +| Synthesizer | ~2K | Per subtask | map-efficient (--self-moa) | +| Research-Agent | ~2-3K | Once (before Actor) | map-plan, map-efficient, map-debug | +| Final-Verifier | ~2K | Once (terminal) | map-check, map-efficient | **map-efficient savings:** - Skip Evaluator: ~0.8K per subtask @@ -228,7 +261,7 @@ Agents communicate via structured JSON: To add a custom agent: 1. Create `.claude/agents/my-agent.md` with prompt template -2. Add to workflow command (e.g., `.claude/commands/map-feature.md`) +2. Add to workflow command (e.g., `.claude/commands/map-efficient.md`) 3. Define when it runs (before/after which agents) 4. Specify input/output format diff --git a/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-efficient-deep-dive.md b/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-efficient-deep-dive.md index 5797b92..1e69fbf 100644 --- a/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-efficient-deep-dive.md +++ b/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-efficient-deep-dive.md @@ -36,27 +36,28 @@ Subtask 3: Add unit tests (tests/auth.test.ts) ### Reflector/Curator: Batched Learning -**Standard workflow (/map-feature):** +**Full pipeline (theoretical baseline):** ``` Subtask 1 → Actor → Monitor → Predictor → Evaluator → Reflector → Curator Subtask 2 → Actor → Monitor → Predictor → Evaluator → Reflector → Curator Subtask 3 → Actor → Monitor → Predictor → Evaluator → Reflector → Curator ``` -Result: 3 × Reflector/Curator cycles +Result: 3 × (Predictor + Evaluator + Reflector + Curator) cycles **Optimized workflow (/map-efficient):** ``` -Subtask 1 → Actor → Monitor → [Predictor?] → Evaluator -Subtask 2 → Actor → Monitor → [Predictor?] → Evaluator -Subtask 3 → Actor → Monitor → [Predictor?] → Evaluator +Subtask 1 → Actor → Monitor → [Predictor if high risk] → Apply +Subtask 2 → Actor → Monitor → [Predictor if high risk] → Apply +Subtask 3 → Actor → Monitor → [Predictor if high risk] → Apply ↓ - Reflector (analyzes ALL subtasks) + Final-Verifier (adversarial verification) ↓ - Curator (consolidates patterns) + Done! Optionally run /map-learn: + Reflector (analyzes ALL subtasks) → Curator (consolidates patterns) ``` -Result: 1 × Reflector/Curator cycle +Result: No Evaluator, no per-subtask Reflector/Curator. Learning decoupled to /map-learn. -**Token savings:** 35-40% vs /map-feature +**Token savings:** 35-40% vs full pipeline --- @@ -71,7 +72,7 @@ Result: 1 × Reflector/Curator cycle - Most development work (80% of tasks) ❌ **Don't use for:** -- Critical infrastructure (use /map-feature) +- Critical infrastructure (use /map-efficient with --self-moa or /map-debate) - Small, low-risk changes (use /map-fast) - Simple bug fixes (use /map-debug) @@ -81,16 +82,16 @@ Result: 1 × Reflector/Curator cycle **Myth:** "Optimized workflows sacrifice quality" -**Reality:** /map-efficient preserves all quality gates: -- ✅ Monitor validates every subtask -- ✅ Evaluator scores every implementation -- ✅ Predictor runs when needed (conditional) -- ✅ Reflector analyzes complete context -- ✅ Curator consolidates all patterns +**Reality:** /map-efficient preserves essential quality gates: +- ✅ Monitor validates every subtask (correctness gate) +- ✅ Predictor runs when needed (conditional impact analysis) +- ✅ Tests gate and linter gate run per subtask +- ✅ Final-Verifier checks entire goal at end (adversarial verification) +- ✅ Learning available via /map-learn after workflow completes -**What's optimized:** -- Frequency (when agents run) -- NOT functionality (what agents do) +**What's optimized (intentionally omitted per-subtask):** +- Evaluator — Monitor validates correctness directly +- Reflector/Curator — decoupled to /map-learn (optional, run after workflow) --- @@ -115,38 +116,40 @@ ST-1: Pagination params ├─ Actor: Modify routes/posts.ts ├─ Monitor: ✅ Valid ├─ Predictor: ⏭️ SKIPPED (low risk) -└─ Evaluator: ✅ Approved (score: 8/10) +├─ Tests gate: ✅ Passed +└─ Linter gate: ✅ Passed ST-2: Service update ├─ Actor: Modify services/PostService.ts ├─ Monitor: ✅ Valid ├─ Predictor: ✅ RAN (affects API contract) │ └─ Impact: Breaking change if clients expect all posts -├─ Evaluator: ✅ Approved (score: 9/10) +├─ Tests gate: ✅ Passed └─ Note: "Add API versioning or deprecation notice" ST-3: Integration tests ├─ Actor: Add tests/posts.integration.test.ts ├─ Monitor: ✅ Valid (tests pass) ├─ Predictor: ⏭️ SKIPPED (test file) -└─ Evaluator: ✅ Approved (score: 8/10) - -Reflector (batched): -├─ Analyzed: 3 subtasks -├─ Searched mem0: Found similar pagination patterns -└─ Extracted: - - Pagination parameter pattern (offset/limit) - - API versioning consideration - - Integration test structure - -Curator (batched): -├─ Checked duplicates: 2 similar bullets found -├─ Added: 1 new bullet (API pagination pattern) -└─ Updated: 1 existing bullet (test coverage++) +├─ Tests gate: ✅ Passed +└─ Linter gate: ✅ Passed + +Final-Verifier: ✅ All subtasks verified, goal achieved + +Optional /map-learn: + Reflector (batched): + ├─ Analyzed: 3 subtasks + ├─ Searched mem0: Found similar pagination patterns + └─ Extracted: pagination pattern, API versioning, test structure + + Curator (batched): + ├─ Checked duplicates: 2 similar bullets found + ├─ Added: 1 new bullet (API pagination pattern) + └─ Updated: 1 existing bullet (test coverage++) ``` **Token usage:** -- /map-feature: ~12k tokens +- Full pipeline: ~12k tokens - /map-efficient: ~7.5k tokens - **Savings: 37.5%** diff --git a/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-fast-deep-dive.md b/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-fast-deep-dive.md index 5061426..6cb71aa 100644 --- a/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-fast-deep-dive.md +++ b/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-fast-deep-dive.md @@ -30,6 +30,11 @@ ### Agents NOT Called +**Evaluator (Quality Scoring)** +- No quality scoring (0-10 scale) +- No approval/rejection gate +- Monitor handles basic correctness only + **Predictor (Impact Analysis)** - No dependency analysis - Breaking changes undetected @@ -51,7 +56,7 @@ - Basic implementation ✅ - Correctness validation ✅ -**Result:** Functional code, but zero learning +**Result:** Functional code, but zero learning and no quality scoring --- @@ -62,13 +67,13 @@ | TaskDecomposer | ~1.5K | ✅ Runs | | Actor | ~2-3K | ✅ Runs | | Monitor | ~1K | ✅ Runs | -| Evaluator | ~0.8K | ✅ Runs | +| Evaluator | ~0.8K | ❌ Skipped | | Predictor | ~1.5K | ❌ Skipped | | Reflector | ~2K | ❌ Skipped | | Curator | ~1.5K | ❌ Skipped | -**Total saved:** ~5K per subtask -**Percentage:** 40-50% vs /map-feature +**Total saved:** ~5.8K per subtask +**Percentage:** 40-50% vs full pipeline --- @@ -88,10 +93,10 @@ Next step: If scope grows, switch to /map-efficient ``` TaskDecomposer: 2 subtasks ST-1: Setup React Query client - Actor → Monitor → Evaluator → Apply + Actor → Monitor → Apply ST-2: Test with one API endpoint - Actor → Monitor → Evaluator → Apply -Done. No Reflector, no Curator, no patterns learned. + Actor → Monitor → Apply +Done. No Evaluator, no Reflector, no Curator, no patterns learned. ``` **Appropriate because:** @@ -119,7 +124,7 @@ Risk: High (security, breaking changes) 3. No Curator → Team doesn't learn from mistakes 4. High risk for under-validation mindset -**Correct choice:** `/map-feature` (critical infrastructure) +**Correct choice:** `/map-efficient` (critical infrastructure) --- @@ -190,7 +195,7 @@ Why? - Faster for tiny tasks (<50 lines) - Use when MAP overhead doesn't make sense -**3. /map-feature** +**3. /map-efficient or /map-debate** - For high-risk changes - Security or infrastructure work diff --git a/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-feature-deep-dive.md b/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-feature-deep-dive.md index 1e0c9e4..7ce5166 100644 --- a/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-feature-deep-dive.md +++ b/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-feature-deep-dive.md @@ -1,5 +1,9 @@ # /map-feature Deep Dive +> **STATUS: PLANNED — NOT YET IMPLEMENTED.** +> This workflow is designed but not yet available as a command. +> Use `/map-efficient` for all feature development, including critical/high-risk features. + ## When to Use **Critical features requiring maximum confidence:** diff --git a/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-refactor-deep-dive.md b/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-refactor-deep-dive.md index d9c3510..2faed47 100644 --- a/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-refactor-deep-dive.md +++ b/src/mapify_cli/templates/skills/map-workflows-guide/resources/map-refactor-deep-dive.md @@ -1,5 +1,9 @@ # /map-refactor Deep Dive +> **STATUS: PLANNED — NOT YET IMPLEMENTED.** +> This workflow is designed but not yet available as a command. +> Use `/map-efficient` for refactoring tasks. + ## When to Use **Code restructuring without behavior changes:** diff --git a/src/mapify_cli/templates/skills/map-workflows-guide/scripts/validate-workflow-choice.py b/src/mapify_cli/templates/skills/map-workflows-guide/scripts/validate-workflow-choice.py new file mode 100755 index 0000000..6a576a0 --- /dev/null +++ b/src/mapify_cli/templates/skills/map-workflows-guide/scripts/validate-workflow-choice.py @@ -0,0 +1,159 @@ +#!/usr/bin/env python3 +"""Validate that a workflow choice matches task characteristics. + +Usage: + python validate-workflow-choice.py --workflow --risk --size --type + +Example: + python validate-workflow-choice.py --workflow map-efficient --risk medium --size medium --type feature + python validate-workflow-choice.py --workflow map-fast --risk high --size large --type security + +Exit codes: + 0 - Workflow choice is appropriate + 1 - Workflow choice is suboptimal (warning) + 2 - Workflow choice is inappropriate (error) +""" + +import argparse +import json +import sys + +# Workflow appropriateness rules +WORKFLOW_RULES = { + "map-fast": { + "allowed_risk": ["low"], + "allowed_size": ["small"], + "allowed_types": ["fix", "tweak", "maintenance", "docs"], + "forbidden_types": ["security", "auth", "payment", "database-schema"], + }, + "map-efficient": { + "allowed_risk": ["low", "medium", "high"], + "allowed_size": ["small", "medium", "large"], + "allowed_types": [ + "feature", + "enhancement", + "fix", + "tweak", + "maintenance", + "docs", + "security", + "auth", + "payment", + "database-schema", + "infrastructure", + "refactor", + "restructure", + "rename", + "extract", + "cleanup", + ], + "forbidden_types": [], + }, + "map-debug": { + "allowed_risk": ["low", "medium", "high"], + "allowed_size": ["small", "medium", "large"], + "allowed_types": ["bug", "fix", "test-failure", "error", "regression"], + "forbidden_types": ["feature", "refactor"], + }, +} + +# Recommendations for risky combinations +RISK_OVERRIDES = { + ("map-fast", "high"): "map-efficient", + ("map-fast", "medium"): "map-efficient", +} + + +def validate(workflow: str, risk: str, size: str, task_type: str) -> dict: + """Validate workflow choice against task characteristics. + + Returns dict with: + valid: bool + level: "ok" | "warning" | "error" + message: str + recommendation: str | None + """ + if workflow not in WORKFLOW_RULES: + return { + "valid": False, + "level": "error", + "message": f"Unknown workflow: {workflow}", + "recommendation": "map-efficient", + } + + rules = WORKFLOW_RULES[workflow] + issues = [] + + # Check risk level + if risk not in rules["allowed_risk"]: + issues.append(f"Risk level '{risk}' is too high for {workflow}") + + # Check size + if size not in rules["allowed_size"]: + issues.append(f"Size '{size}' is not suitable for {workflow}") + + # Check forbidden types + if task_type in rules["forbidden_types"]: + issues.append(f"Task type '{task_type}' is forbidden for {workflow}") + + # Check risk overrides + override_key = (workflow, risk) + recommendation = RISK_OVERRIDES.get(override_key) + + if issues: + level = "error" if any("forbidden" in i for i in issues) else "warning" + return { + "valid": False, + "level": level, + "message": "; ".join(issues), + "recommendation": recommendation or "map-efficient", + } + + return { + "valid": True, + "level": "ok", + "message": f"Workflow '{workflow}' is appropriate for {risk}-risk {size} {task_type} task", + "recommendation": None, + } + + +def main(): + parser = argparse.ArgumentParser(description="Validate MAP workflow choice") + parser.add_argument( + "--workflow", + required=True, + choices=list(WORKFLOW_RULES.keys()), + help="Chosen workflow", + ) + parser.add_argument( + "--risk", + required=True, + choices=["low", "medium", "high"], + help="Task risk level", + ) + parser.add_argument( + "--size", + required=True, + choices=["small", "medium", "large"], + help="Task size", + ) + parser.add_argument("--type", required=True, dest="task_type", help="Task type") + parser.add_argument("--json", action="store_true", help="Output as JSON") + + args = parser.parse_args() + result = validate(args.workflow, args.risk, args.size, args.task_type) + + if args.json: + print(json.dumps(result, indent=2)) + else: + status = {"ok": "OK", "warning": "WARNING", "error": "ERROR"}[result["level"]] + print(f"[{status}] {result['message']}") + if result["recommendation"]: + print(f" Recommendation: Use {result['recommendation']} instead") + + exit_codes = {"ok": 0, "warning": 1, "error": 2} + sys.exit(exit_codes[result["level"]]) + + +if __name__ == "__main__": + main() diff --git a/src/mapify_cli/templates/skills/skill-rules.json b/src/mapify_cli/templates/skills/skill-rules.json index c7eda29..20aac1d 100644 --- a/src/mapify_cli/templates/skills/skill-rules.json +++ b/src/mapify_cli/templates/skills/skill-rules.json @@ -19,11 +19,58 @@ ], "intentPatterns": [ "(which|what).*?(workflow|mode).*?(use|choose)", - "(difference|compare).*?(map-fast|map-efficient|map-feature)", + "(difference|compare).*?(map-fast|map-efficient|map-debug)", "(when|how).*(choose|use|pick).*(workflow|map-\\w+)", "explain.*?(workflow|map-fast|map-efficient)" ] } + }, + "map-planning": { + "type": "domain", + "enforcement": "suggest", + "priority": "medium", + "description": "File-based planning with branch-scoped task tracking", + "promptTriggers": { + "keywords": [ + "create plan", + "task plan", + "track progress", + "planning", + ".map directory", + "show task status", + "resume work" + ], + "intentPatterns": [ + "(create|make|start).*(plan|planning)", + "(track|show|check).*(progress|status|tasks)", + "(resume|continue).*(work|task|plan)", + "\\.map.*(directory|folder|files)" + ] + } + }, + "map-cli-reference": { + "type": "domain", + "enforcement": "suggest", + "priority": "high", + "description": "CLI and MCP error corrections", + "promptTriggers": { + "keywords": [ + "mapify command", + "mapify error", + "no such command", + "no such option", + "mem0 mcp", + "validate graph", + "mapify init", + "mapify check" + ], + "intentPatterns": [ + "(mapify|mem0).*(error|command|help|usage)", + "(no such).*(command|option)", + "validate.*(graph|dependency)", + "(how to|how do).*(mapify|mem0)" + ] + } } } } diff --git a/src/mapify_cli/templates/workflow-rules.json b/src/mapify_cli/templates/workflow-rules.json index c6933c9..01df164 100644 --- a/src/mapify_cli/templates/workflow-rules.json +++ b/src/mapify_cli/templates/workflow-rules.json @@ -31,9 +31,9 @@ ] } }, - "map-feature": { + "map-efficient": { "priority": "high", - "description": "Implement new features (critical, full validation)", + "description": "Production features, refactoring, critical code (recommended default, 60-70% tokens)", "promptTriggers": { "keywords": [ "implement", @@ -41,48 +41,47 @@ "new feature", "create", "build", - "critical" - ], - "intentPatterns": [ - "(implement|add|create|build).*?(feature|functionality)", - "new.*?(feature|component|module)", - "critical.*?(feature|implementation)" - ] - } - }, - "map-efficient": { - "priority": "high", - "description": "Production features (optimized workflow, 60-70% tokens)", - "promptTriggers": { - "keywords": [ + "critical", "production", "optimize", "enhance", "improve", - "update feature" + "update feature", + "refactor", + "restructure", + "reorganize", + "clean up", + "improve structure" ], "intentPatterns": [ + "(implement|add|create|build).*?(feature|functionality)", + "new.*?(feature|component|module)", + "critical.*?(feature|implementation)", "(optimize|enhance|improve).*?(feature|code|implementation)", "production.*?(feature|deploy)", - "update.*?(feature|functionality)" + "update.*?(feature|functionality)", + "(refactor|restructure|reorganize).*?(code|component|module)", + "clean.*?up.*?(code|structure)", + "improve.*?(structure|architecture)" ] } }, - "map-refactor": { + "map-debate": { "priority": "medium", - "description": "Refactor code, improve structure", + "description": "Multi-variant synthesis with Opus arbiter for reasoning transparency", "promptTriggers": { "keywords": [ - "refactor", - "restructure", - "reorganize", - "clean up", - "improve structure" + "debate", + "compare approaches", + "trade-offs", + "reasoning", + "multiple variants", + "architectural decision" ], "intentPatterns": [ - "(refactor|restructure|reorganize).*?(code|component|module)", - "clean.*?up.*?(code|structure)", - "improve.*?(structure|architecture)" + "(compare|debate|weigh).*?(approaches|options|trade-offs)", + "(need|want).*?(reasoning|transparency|justification)", + "(architectural|design).*?(decision|choice)" ] } }, diff --git a/src/mapify_cli/verification_recorder.py b/src/mapify_cli/verification_recorder.py index b384251..63649b0 100644 --- a/src/mapify_cli/verification_recorder.py +++ b/src/mapify_cli/verification_recorder.py @@ -1,11 +1,12 @@ """Verification results recorder for MAP Framework. -Records verification results to .map/verification_results_.json +Records verification results to .map/verification_results_.json with atomic writes to prevent concurrent write corruption. """ import json import os +import re import sys import tempfile from pathlib import Path @@ -35,19 +36,21 @@ class VerificationResults(TypedDict): def _sanitize_branch_name(branch: str) -> str: """Sanitize branch name for use in filenames. - Replaces characters that could cause path issues (like '/') with underscores. + Replaces characters that could cause path issues with dashes, + consistent with MAP framework branch sanitization elsewhere. Args: branch: Git branch name (e.g., 'feature/foo', 'main') Returns: - Sanitized branch name safe for filenames (e.g., 'feature_foo', 'main') + Sanitized branch name safe for filenames (e.g., 'feature-foo', 'main') """ - # Replace forward slashes (common in feature/bugfix branches) with underscores - sanitized = branch.replace("/", "_") - # Also handle backslashes just in case - sanitized = sanitized.replace("\\", "_") - return sanitized + sanitized = branch.replace("/", "-") + sanitized = re.sub(r"[^a-zA-Z0-9_.-]", "-", sanitized) + sanitized = re.sub(r"-+", "-", sanitized).strip("-") + if ".." in sanitized or sanitized.startswith("."): + return "default" + return sanitized or "default" def _log_warning(message: str) -> None: diff --git a/tests/test_command_templates.py b/tests/test_command_templates.py index 882a5d3..39cb9f4 100644 --- a/tests/test_command_templates.py +++ b/tests/test_command_templates.py @@ -90,13 +90,18 @@ def test_map_efficient_suggests_map_learn(self, templates_commands_dir): assert "optional" in content.lower(), "Should mention /map-learn is optional" def test_all_command_templates_exist(self, templates_commands_dir): - """Test that all expected command template files exist.""" + """Test that all 10 expected command template files exist.""" expected_commands = [ + "map-check.md", # Quality gates + "map-debate.md", # Multi-variant with Opus arbiter + "map-debug.md", # Debugging workflow "map-efficient.md", # Recommended workflow - "map-debug.md", "map-fast.md", # Minimal workflow "map-learn.md", # Optional learning + "map-plan.md", # Decomposition only "map-release.md", # Release workflow + "map-resume.md", # Resume interrupted workflow + "map-review.md", # Code review ] for command in expected_commands: diff --git a/tests/test_entity_extractor.py b/tests/test_entity_extractor.py index cea6b8e..5a3e630 100644 --- a/tests/test_entity_extractor.py +++ b/tests/test_entity_extractor.py @@ -233,13 +233,13 @@ def test_extract_workflow(self, extractor): def test_extract_map_workflow(self, extractor): """Test extracting MAP Framework workflows.""" - text = "Use map-feature workflow for implementation and map-debug for troubleshooting." + text = "Use map-efficient workflow for implementation and map-debug for troubleshooting." entities = extractor.extract_entities(text) workflow_entities = [e for e in entities if e.type == EntityType.WORKFLOW] workflow_names = {e.name.lower() for e in workflow_entities} - assert any("map" in name and "feature" in name for name in workflow_names) + assert any("map" in name and "efficient" in name for name in workflow_names) assert any("map" in name and "debug" in name for name in workflow_names) # ============================================================================ diff --git a/tests/test_mapify_cli.py b/tests/test_mapify_cli.py index b6f57dc..a1c72ff 100644 --- a/tests/test_mapify_cli.py +++ b/tests/test_mapify_cli.py @@ -185,14 +185,14 @@ def test_init_basic(self, tmp_path): assert settings_local.exists() settings = json.loads(settings_local.read_text()) allow = settings.get("permissions", {}).get("allow", []) - assert "Bash(go test:*)" in allow - assert "Bash(go vet :*)" in allow - assert "Bash(go mod tidy:*)" in allow + assert "Bash(go test *)" in allow + assert "Bash(go vet *)" in allow + assert "Bash(go mod tidy *)" in allow assert "mcp__mem0__*" in allow assert "mcp__sourcecraft__list_pull_request_comments" in allow assert "Bash(make generate manifests)" in allow assert "Bash(make manifests)" in allow - assert "Bash(git worktree add:*)" in allow + assert "Bash(git worktree add *)" in allow assert ( 'Bash(openssl req -x509 -newkey rsa:512 -keyout /dev/null -out /dev/stdout -days 365 -nodes -subj "/CN=test" 2>/dev/null)' in allow @@ -586,9 +586,13 @@ def test_create_agent_files_fallback(self, mock_get_templates, tmp_path): Verifies that: - Fallback generators create valid agent content - - All 8 agents are created successfully + - 8 core agents are created via fallback generators - Content includes required sections (IDENTITY, ROLE) - MCP integration sections are included when MCP servers specified + + Note: Fallback generators only cover 8 core agents. The remaining 4 + (debate-arbiter, synthesizer, research-agent, final-verifier) are + only available when copying from templates. """ # Mock templates directory that doesn't have agent templates mock_templates_path = tmp_path / "mock_templates" @@ -601,7 +605,7 @@ def test_create_agent_files_fallback(self, mock_get_templates, tmp_path): agents_dir = tmp_path / ".claude" / "agents" assert agents_dir.exists() - # Verify all 8 agents were created using fallback generators + # Verify core agents were created using fallback generators expected_agents = [ "task-decomposer.md", "actor.md", diff --git a/tests/test_skills.py b/tests/test_skills.py new file mode 100644 index 0000000..396c3b8 --- /dev/null +++ b/tests/test_skills.py @@ -0,0 +1,276 @@ +""" +Tests for MAP Framework skill structure, frontmatter, and trigger compliance. + +Validates that all skills follow the Anthropic Skills Guide best practices: +- Valid YAML frontmatter with --- delimiters +- Descriptions include trigger phrases ("Use when") +- Descriptions include negative triggers ("Do NOT use") +- Skill folder names use kebab-case +- No README.md inside skill folders (per Anthropic guide) +- skill-rules.json has entries for all skills +- Required sections (Examples, Troubleshooting) present +""" + +import json +import re +from pathlib import Path + +import pytest +import yaml + + +class TestSkillStructure: + """Test that all skill directories follow the expected structure.""" + + @pytest.fixture + def project_root(self): + return Path(__file__).parent.parent + + @pytest.fixture + def skills_dir(self, project_root): + return project_root / ".claude" / "skills" + + @pytest.fixture + def template_skills_dir(self, project_root): + return project_root / "src" / "mapify_cli" / "templates" / "skills" + + @pytest.fixture + def skill_folders(self, skills_dir): + """Return list of skill folder names (excluding files).""" + if not skills_dir.exists(): + pytest.skip(".claude/skills/ directory doesn't exist") + return [ + d.name + for d in skills_dir.iterdir() + if d.is_dir() and not d.name.startswith(".") + ] + + @pytest.fixture + def skill_rules(self, skills_dir): + rules_file = skills_dir / "skill-rules.json" + if not rules_file.exists(): + pytest.skip("skill-rules.json doesn't exist") + return json.loads(rules_file.read_text()) + + def _parse_frontmatter(self, skill_md_path: Path) -> dict: + """Parse YAML frontmatter from a SKILL.md file.""" + content = skill_md_path.read_text() + if not content.startswith("---"): + return {} + end = content.find("---", 3) + if end == -1: + return {} + frontmatter_str = content[3:end].strip() + return yaml.safe_load(frontmatter_str) or {} + + # --- Structural tests --- + + def test_all_skills_have_skill_md(self, skills_dir, skill_folders): + """All skill folders must contain a SKILL.md file.""" + for folder in skill_folders: + skill_file = skills_dir / folder / "SKILL.md" + assert skill_file.exists(), ( + f"Skill '{folder}' is missing SKILL.md" + ) + + def test_skill_names_are_kebab_case(self, skill_folders): + """Skill folder names must use kebab-case only.""" + kebab_re = re.compile(r"^[a-z][a-z0-9]*(-[a-z0-9]+)*$") + for folder in skill_folders: + assert kebab_re.match(folder), ( + f"Skill folder '{folder}' is not kebab-case. " + f"Use lowercase letters, numbers, and hyphens only." + ) + + def test_no_readme_in_skill_folders(self, skills_dir, skill_folders): + """Skill folders should not contain README.md (per Anthropic guide).""" + for folder in skill_folders: + readme = skills_dir / folder / "README.md" + assert not readme.exists(), ( + f"Skill '{folder}' has a README.md inside the skill folder. " + f"Per Anthropic guide, use SKILL.md as the main file." + ) + + # --- Frontmatter tests --- + + def test_all_skills_have_valid_frontmatter(self, skills_dir, skill_folders): + """All SKILL.md files must have valid YAML frontmatter between --- delimiters.""" + for folder in skill_folders: + skill_file = skills_dir / folder / "SKILL.md" + content = skill_file.read_text() + assert content.startswith("---"), ( + f"Skill '{folder}/SKILL.md' is missing opening '---' delimiter" + ) + # Find closing delimiter (skip the opening one) + end = content.find("---", 3) + assert end > 3, ( + f"Skill '{folder}/SKILL.md' is missing closing '---' delimiter" + ) + # Parse YAML + frontmatter = self._parse_frontmatter(skill_file) + assert frontmatter, ( + f"Skill '{folder}/SKILL.md' has empty or invalid YAML frontmatter" + ) + + def test_frontmatter_has_required_fields(self, skills_dir, skill_folders): + """Frontmatter must include 'name' and 'description' fields.""" + for folder in skill_folders: + skill_file = skills_dir / folder / "SKILL.md" + fm = self._parse_frontmatter(skill_file) + assert "name" in fm, ( + f"Skill '{folder}' frontmatter is missing 'name' field" + ) + assert "description" in fm, ( + f"Skill '{folder}' frontmatter is missing 'description' field" + ) + # Name should match folder + assert fm["name"] == folder, ( + f"Skill '{folder}' frontmatter name '{fm['name']}' doesn't match folder name" + ) + + def test_descriptions_include_trigger_phrases(self, skills_dir, skill_folders): + """Descriptions must mention 'Use when' or trigger conditions.""" + trigger_patterns = [ + r"[Uu]se when", + r"[Uu]se this when", + r"[Uu]se for", + ] + for folder in skill_folders: + skill_file = skills_dir / folder / "SKILL.md" + fm = self._parse_frontmatter(skill_file) + desc = fm.get("description", "") + has_trigger = any(re.search(p, desc) for p in trigger_patterns) + assert has_trigger, ( + f"Skill '{folder}' description doesn't include trigger phrases. " + f"Add 'Use when ...' to the description." + ) + + def test_descriptions_include_negative_triggers(self, skills_dir, skill_folders): + """Descriptions must mention 'Do NOT use' exclusions.""" + negative_patterns = [ + r"[Dd]o [Nn][Oo][Tt] use", + r"[Dd]on't use", + r"[Nn]ot for", + ] + for folder in skill_folders: + skill_file = skills_dir / folder / "SKILL.md" + fm = self._parse_frontmatter(skill_file) + desc = fm.get("description", "") + has_negative = any(re.search(p, desc) for p in negative_patterns) + assert has_negative, ( + f"Skill '{folder}' description doesn't include negative triggers. " + f"Add 'Do NOT use for ...' to the description." + ) + + # --- Content section tests --- + + def test_skills_have_examples_section(self, skills_dir, skill_folders): + """All skills should have an Examples section.""" + for folder in skill_folders: + skill_file = skills_dir / folder / "SKILL.md" + content = skill_file.read_text() + assert re.search(r"^## Examples", content, re.MULTILINE), ( + f"Skill '{folder}' is missing '## Examples' section" + ) + + def test_skills_have_troubleshooting_section(self, skills_dir, skill_folders): + """All skills should have a Troubleshooting section.""" + for folder in skill_folders: + skill_file = skills_dir / folder / "SKILL.md" + content = skill_file.read_text() + assert re.search(r"^## Troubleshooting", content, re.MULTILINE), ( + f"Skill '{folder}' is missing '## Troubleshooting' section" + ) + + # --- skill-rules.json tests --- + + def test_skill_rules_json_is_valid(self, skills_dir): + """skill-rules.json must be valid JSON.""" + rules_file = skills_dir / "skill-rules.json" + assert rules_file.exists(), "skill-rules.json not found" + content = rules_file.read_text() + try: + json.loads(content) + except json.JSONDecodeError as e: + pytest.fail(f"skill-rules.json is not valid JSON: {e}") + + def test_all_skills_have_trigger_rules(self, skill_folders, skill_rules): + """All skill folders should have corresponding entries in skill-rules.json.""" + skills_in_rules = set(skill_rules.get("skills", {}).keys()) + for folder in skill_folders: + assert folder in skills_in_rules, ( + f"Skill '{folder}' has no trigger rules in skill-rules.json. " + f"Add a '{folder}' entry with promptTriggers." + ) + + def test_trigger_rules_have_keywords(self, skill_rules): + """Each skill's trigger rules should have keywords defined.""" + for name, rule in skill_rules.get("skills", {}).items(): + triggers = rule.get("promptTriggers", {}) + keywords = triggers.get("keywords", []) + assert len(keywords) >= 3, ( + f"Skill '{name}' has fewer than 3 keywords in skill-rules.json. " + f"Add more keywords for reliable triggering." + ) + + def test_trigger_rules_have_intent_patterns(self, skill_rules): + """Each skill's trigger rules should have intent patterns.""" + for name, rule in skill_rules.get("skills", {}).items(): + triggers = rule.get("promptTriggers", {}) + patterns = triggers.get("intentPatterns", []) + assert len(patterns) >= 2, ( + f"Skill '{name}' has fewer than 2 intent patterns in skill-rules.json. " + f"Add more patterns for reliable triggering." + ) + + # --- Template sync tests --- + + def test_skill_templates_in_sync(self, skills_dir, template_skills_dir, skill_folders): + """Skill SKILL.md files should be in sync between .claude/ and templates/.""" + if not template_skills_dir.exists(): + pytest.skip("Template skills directory doesn't exist") + + for folder in skill_folders: + source = skills_dir / folder / "SKILL.md" + target = template_skills_dir / folder / "SKILL.md" + if not target.exists(): + pytest.fail( + f"Skill '{folder}/SKILL.md' missing from templates. " + f"Run: make sync-templates" + ) + assert source.read_text() == target.read_text(), ( + f"Skill '{folder}/SKILL.md' differs between .claude/skills/ and templates/skills/. " + f"Run: make sync-templates" + ) + + def test_skill_rules_in_sync(self, skills_dir, template_skills_dir): + """skill-rules.json should be in sync between .claude/ and templates/.""" + if not template_skills_dir.exists(): + pytest.skip("Template skills directory doesn't exist") + + source = skills_dir / "skill-rules.json" + target = template_skills_dir / "skill-rules.json" + if not source.exists() or not target.exists(): + pytest.skip("skill-rules.json missing from one location") + assert source.read_text() == target.read_text(), ( + "skill-rules.json differs between .claude/skills/ and templates/skills/. " + "Run: make sync-templates" + ) + + # --- Validation script tests --- + + def test_validation_scripts_are_executable(self, skills_dir, skill_folders): + """Scripts in skill scripts/ directories should be executable.""" + for folder in skill_folders: + scripts_dir = skills_dir / folder / "scripts" + if not scripts_dir.exists(): + continue + for script in scripts_dir.iterdir(): + if script.is_file() and script.suffix in (".sh", ".py"): + # Check file has executable permission or is a python script + if script.suffix == ".sh": + import os + assert os.access(script, os.X_OK), ( + f"Script '{script}' is not executable. " + f"Run: chmod +x {script}" + ) diff --git a/tests/test_template_sync.py b/tests/test_template_sync.py index 72a5eb9..f58c0d1 100644 --- a/tests/test_template_sync.py +++ b/tests/test_template_sync.py @@ -37,16 +37,20 @@ def templates_agents_dir(self, project_root): @pytest.fixture def expected_agents(self): - """List of expected agent template files.""" + """List of expected agent template files (all 12 agents).""" return [ "actor.md", + "curator.md", + "debate-arbiter.md", + "documentation-reviewer.md", + "evaluator.md", + "final-verifier.md", "monitor.md", "predictor.md", - "evaluator.md", - "curator.md", "reflector.md", + "research-agent.md", + "synthesizer.md", "task-decomposer.md", - "documentation-reviewer.md", ] def test_all_agents_exist_in_both_directories(