From 28d7460a1e1c2def3b4516b9cf8c504304fdf858 Mon Sep 17 00:00:00 2001 From: "Mikhail [azalio] Petrov" Date: Mon, 16 Feb 2026 00:36:19 +0300 Subject: [PATCH 1/7] =?UTF-8?q?Strengthen=20validation=20criteria=20and=20?= =?UTF-8?q?enforce=20VC=E2=86=92tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .claude/agents/actor.md | 29 ++++++++++-- .claude/agents/monitor.md | 46 ++++++++++++++++--- .claude/agents/task-decomposer.md | 31 +++++++++++-- .claude/commands/map-debate.md | 6 ++- .claude/commands/map-efficient.md | 4 ++ src/mapify_cli/templates/agents/actor.md | 29 ++++++++++-- src/mapify_cli/templates/agents/monitor.md | 46 ++++++++++++++++--- .../templates/agents/task-decomposer.md | 31 +++++++++++-- .../templates/commands/map-debate.md | 6 ++- .../templates/commands/map-efficient.md | 4 ++ 10 files changed, 202 insertions(+), 30 deletions(-) diff --git a/.claude/agents/actor.md b/.claude/agents/actor.md index 40f1794..1ed955e 100644 --- a/.claude/agents/actor.md +++ b/.claude/agents/actor.md @@ -375,6 +375,10 @@ Document key decisions using this structure: - [ ] Error cases (invalid input, failures) - [ ] Security cases (injection, auth bypass) — if applicable +**Validation criteria → tests (MANDATORY when test_strategy is not N/A)**: +- For each `VCn:` item in `validation_criteria`, implement or update at least one automated test that would fail without your change and pass with it. +- Prefer naming tests with `vc` (e.g., `test_vc1_*`, `TestVC1*`) so Monitor can deterministically confirm coverage. + **Format**: ``` 1. test_[function]_[scenario]_[expected] @@ -392,7 +396,18 @@ Document key decisions using this structure: Expected: 409, {"error": "Email already registered"} -## 6. Used Patterns (ACE Learning) +## 6. Validation Criteria Coverage (Evidence) + +If the subtask packet includes `validation_criteria`, list each `VCn:` and where it is enforced. + +**Format**: +``` +VC1: +- Code: path/to/file.ext#SymbolOrLocation +- Tests: path/to/test_file.ext::test_name (or N/A with reason) +``` + +## 7. Used Patterns (ACE Learning) **Format**: `["impl-0012", "sec-0034"]` or `[]` if none @@ -403,7 +418,7 @@ Document key decisions using this structure: **If no patterns match**: `[]` with note "No relevant patterns in current mem0" -## 7. Integration Notes (If Applicable) +## 8. Integration Notes (If Applicable) Only include if changes affect: - Database schema (migrations needed?) @@ -449,6 +464,7 @@ Only include if changes affect: - [ ] AAG contract stated BEFORE code (Section 1) - [ ] Trade-offs documented with alternatives - [ ] Test cases cover happy + edge + error paths +- [ ] Each `validation_criteria` item has at least one automated test (or explicit N/A with reason) - [ ] Used patterns tracked (or `[]` if none) - [ ] Template variables `{{...}}` preserved in generated code @@ -518,6 +534,14 @@ with the following JSON content: "summary": "", "aag_contract": "", "files_changed": [""], + "tests_changed": [""], + "validation_criteria_coverage": [ + { + "criterion": "VC1: ...", + "tests": ["path/to/test_file.ext::test_name"], + "notes": "Short justification if tests are N/A or partial" + } + ], "status": "applied" } ``` @@ -1083,4 +1107,3 @@ export class ReconnectingWebSocket { **Used Bullets**: `[]` (No similar patterns in mem0. Novel implementation.) - diff --git a/.claude/agents/monitor.md b/.claude/agents/monitor.md index 6358283..59043ad 100644 --- a/.claude/agents/monitor.md +++ b/.claude/agents/monitor.md @@ -821,8 +821,9 @@ When `{{requirements}}` or `{{subtask_description}}` includes `validation_criter ``` FOR each criterion in validation_criteria: 1. PARSE criterion into testable assertion - 2. VERIFY assertion against {{solution}} - 3. RECORD result: PASS | FAIL | PARTIAL | UNTESTABLE + 2. VERIFY assertion against {{solution}} (code-path evidence) + 3. VERIFY test coverage using test_strategy (if not N/A) + 4. RECORD result: PASS | FAIL | PARTIAL | UNTESTABLE CONTRACT_STATUS: - ALL PASS → contract_compliant: true @@ -830,6 +831,17 @@ CONTRACT_STATUS: - ANY UNTESTABLE → flag for clarification ``` +### Test Coverage Rule (Executable Contracts) + +Design constraints only become reliable when they are enforced by executable checks. + +For each `VCn:` criterion: +- If `test_strategy` is provided and not `N/A`, require at least one concrete test case that covers it. +- Prefer deterministic mapping: test names include `vc` (e.g., `test_vc1_*`, `TestVC1*`). +- Evidence MUST include both: + - **Code evidence** (where in code the behavior is implemented), and + - **Test evidence** (where in tests it is asserted). + ### Contract Assertion Patterns | Criterion Type | How to Verify | Example | @@ -852,15 +864,31 @@ Include in JSON output when validation_criteria provided: "failed": 1, "untestable": 0, "details": [ - {"criterion": "Returns 401 for expired token", "status": "PASS", "evidence": "Line 45: if token.expired: return 401"}, - {"criterion": "Creates audit log entry", "status": "FAIL", "evidence": "No audit.log() call found in create_user()"} + { + "criterion": "VC1: Returns 401 for expired token (auth/middleware.py:validate_token)", + "status": "PASS", + "code_evidence": "auth/middleware.py:45: if token.expired: return 401", + "test_coverage": "PASS", + "test_evidence": "tests/test_auth.py::test_vc1_expired_token_returns_401" + }, + { + "criterion": "VC2: Creates audit log entry with user_id (audit/logger.py:log_event)", + "status": "FAIL", + "code_evidence": "No audit.log_event() call found in create_user()", + "test_coverage": "MISSING", + "test_evidence": "No test found matching vc2 or described in test_strategy" + } ] }, "contract_compliant": false } ``` -**Decision Rule**: If `contract_compliant: false`, set `valid: false` unless ALL failed contracts are LOW severity (documentation, naming). +**Decision Rule**: +- If `contract_compliant: false`, set `valid: false` unless ALL failed contracts are LOW severity (documentation, naming). +- If any Behavioral/Integration/Edge-case criterion has `test_coverage != PASS` and test_strategy is not `N/A`: + - If `security_critical == true`: set `valid: false` (missing executable enforcement is a release blocker). + - Otherwise: add a **testability** issue and require Actor to add tests. @@ -2512,7 +2540,13 @@ with the following JSON content: "timestamp": "", "valid": true, "issues_found": 0, - "recommendation": "approve|reject|revise" + "recommendation": "approve|reject|revise", + "validation_criteria_test_coverage": { + "total": 0, + "covered": 0, + "missing": 0, + "notes": "Optional: summarize VC→test coverage findings" + } } ``` diff --git a/.claude/agents/task-decomposer.md b/.claude/agents/task-decomposer.md index efcbb97..58e8afd 100644 --- a/.claude/agents/task-decomposer.md +++ b/.claude/agents/task-decomposer.md @@ -248,8 +248,21 @@ Return **ONLY** valid JSON in this exact structure: **subtasks[].complexity_rationale**: MUST reference factors: "Score N: factor (+X), factor (+Y)..." **subtasks[].validation_criteria**: Array of **testable conditions** that prove completion - REQUIRED: 2-4 specific, verifiable outcomes - - Good: "Returns 401 for expired token", "Creates audit log entry with user_id" - - Bad: "Works correctly", "Handles errors" + - Format (recommended): Prefix each item with `VC1:`, `VC2:`, ... for stable cross-agent reference. + - Each criterion MUST be both: + - **Behavior-/artifact-verifiable** (can be checked by reading code), and + - **Test-verifiable** (has at least one concrete test case planned in `test_strategy`). + - Each criterion SHOULD include a concrete anchor: + - endpoint/handler + route, OR + - function/class name + file path + - Good: + - "VC1: POST /users returns 201 and persists normalized email (users/routes.py:create_user)" + - "VC2: Returns 401 for expired token (auth/middleware.py:validate_token)" + - "VC3: Creates audit log entry with user_id (audit/logger.py:log_event)" + - Bad: + - "Works correctly" + - "Handles errors" + - "Tests pass" **subtasks[].contracts**: Array of **executable assertion patterns** (optional but recommended for complexity_score ≥ 5) - `type`: "precondition" | "postcondition" | "invariant" - `assertion`: Executable pattern (e.g., "response.status == 401 WHEN token.expired") @@ -269,6 +282,13 @@ Return **ONLY** valid JSON in this exact structure: - OMIT when: standard pattern with obvious implementation - Example: "Use existing RateLimiter middleware, configure for /api/* routes" **subtasks[].test_strategy**: Required object with unit/integration/e2e keys. Use "N/A" for levels not applicable. + - MUST map `validation_criteria` → tests: + - For each `VCn:` criterion, include at least one planned test name that covers it. + - Recommended naming: include `vc` in the test name (e.g., `test_vc1_*`, `TestVC1*`) for deterministic grep-ability. + - Recommended format: `path/to/test_file.ext::test_name_or_symbol` + - "N/A" is acceptable ONLY when: + - The repository has no automated test harness, and adding one is out-of-scope for this subtask. + - In that case: either add a FOUNDATION subtask to introduce a minimal test harness, or document the gap explicitly in risks/assumptions. **subtasks[].affected_files**: Precise file paths (NOT "backend", "frontend"); use [] if paths unknown ### Subtask Ordering @@ -491,9 +511,10 @@ When invoked with `mode: "re_decomposition"` from the orchestrator, you receive - [ ] Descriptions explain HOW, not just WHAT **Acceptance Criteria**: -- [ ] Each subtask has 3-5 specific criteria +- [ ] Each subtask has 2-4 specific criteria - [ ] Criteria are testable and measurable -- [ ] Criteria cover: functionality + edge cases + testing +- [ ] Criteria cover: functionality + edge cases (as applicable) +- [ ] Each VC has a concrete verification hook in test_strategy (at least one planned test per VC) - [ ] No vague criteria ("works", "is good", "done") **File Paths**: @@ -510,7 +531,7 @@ When invoked with `mode: "re_decomposition"` from the orchestrator, you receive **Test Strategy**: - [ ] test_strategy object included for each subtask -- [ ] Unit tests specified (REQUIRED for all subtasks) +- [ ] Unit tests specified (default). If repo has no test harness: add a FOUNDATION subtask to introduce minimal tests or explicitly justify "N/A". - [ ] Integration tests specified when subtask integrates multiple components - [ ] E2e tests specified when subtask impacts user-facing functionality - [ ] "N/A" used appropriately when test layer not applicable diff --git a/.claude/commands/map-debate.md b/.claude/commands/map-debate.md index 039526e..b1db61e 100644 --- a/.claude/commands/map-debate.md +++ b/.claude/commands/map-debate.md @@ -44,10 +44,14 @@ Task: $ARGUMENTS Hard requirements: - Use `blueprint.subtasks[].validation_criteria` (2-4 testable, verifiable outcomes) + - Prefix each criterion with `VC1:`, `VC2:`, ... (stable references for Actor/Monitor) + - Include a concrete anchor per VC (endpoint/function + file path) - Use `blueprint.subtasks[].dependencies` (array of subtask IDs) and order subtasks by dependency - Include `blueprint.subtasks[].complexity_score` (1-10) and `risk_level` (low|medium|high) - Include `blueprint.subtasks[].security_critical` (true for auth/crypto/validation/data access) -- Include `blueprint.subtasks[].test_strategy` with unit/integration/e2e keys" +- Include `blueprint.subtasks[].test_strategy` with unit/integration/e2e keys + - Map every `VCn:` to ≥1 planned test case (prefer test name contains `vc`) + - Recommended format: `path/to/test_file.ext::test_name_or_symbol`" ) ``` diff --git a/.claude/commands/map-efficient.md b/.claude/commands/map-efficient.md index e47b357..b91c5f3 100644 --- a/.claude/commands/map-efficient.md +++ b/.claude/commands/map-efficient.md @@ -109,10 +109,14 @@ Task: $ARGUMENTS Hard requirements: - Use `blueprint.subtasks[].validation_criteria` (2-4 testable outcomes) + - Prefix each criterion with `VC1:`, `VC2:`, ... (stable references for Actor/Monitor) + - Include a concrete anchor per VC (endpoint/function + file path) - Use `blueprint.subtasks[].dependencies` (array of subtask IDs) - Include `complexity_score` (1-10) and `risk_level` (low|medium|high) - Include `security_critical` (true for auth/crypto/validation) - Include `test_strategy` with unit/integration/e2e keys + - Map every `VCn:` to ≥1 planned test case (prefer test name contains `vc`) + - Recommended format: `path/to/test_file.ext::test_name_or_symbol` - Include `aag_contract` (one-line pseudocode: Actor -> Action -> Goal) AAG Contract format (REQUIRED per subtask): diff --git a/src/mapify_cli/templates/agents/actor.md b/src/mapify_cli/templates/agents/actor.md index 40f1794..1ed955e 100644 --- a/src/mapify_cli/templates/agents/actor.md +++ b/src/mapify_cli/templates/agents/actor.md @@ -375,6 +375,10 @@ Document key decisions using this structure: - [ ] Error cases (invalid input, failures) - [ ] Security cases (injection, auth bypass) — if applicable +**Validation criteria → tests (MANDATORY when test_strategy is not N/A)**: +- For each `VCn:` item in `validation_criteria`, implement or update at least one automated test that would fail without your change and pass with it. +- Prefer naming tests with `vc` (e.g., `test_vc1_*`, `TestVC1*`) so Monitor can deterministically confirm coverage. + **Format**: ``` 1. test_[function]_[scenario]_[expected] @@ -392,7 +396,18 @@ Document key decisions using this structure: Expected: 409, {"error": "Email already registered"} -## 6. Used Patterns (ACE Learning) +## 6. Validation Criteria Coverage (Evidence) + +If the subtask packet includes `validation_criteria`, list each `VCn:` and where it is enforced. + +**Format**: +``` +VC1: +- Code: path/to/file.ext#SymbolOrLocation +- Tests: path/to/test_file.ext::test_name (or N/A with reason) +``` + +## 7. Used Patterns (ACE Learning) **Format**: `["impl-0012", "sec-0034"]` or `[]` if none @@ -403,7 +418,7 @@ Document key decisions using this structure: **If no patterns match**: `[]` with note "No relevant patterns in current mem0" -## 7. Integration Notes (If Applicable) +## 8. Integration Notes (If Applicable) Only include if changes affect: - Database schema (migrations needed?) @@ -449,6 +464,7 @@ Only include if changes affect: - [ ] AAG contract stated BEFORE code (Section 1) - [ ] Trade-offs documented with alternatives - [ ] Test cases cover happy + edge + error paths +- [ ] Each `validation_criteria` item has at least one automated test (or explicit N/A with reason) - [ ] Used patterns tracked (or `[]` if none) - [ ] Template variables `{{...}}` preserved in generated code @@ -518,6 +534,14 @@ with the following JSON content: "summary": "", "aag_contract": "", "files_changed": [""], + "tests_changed": [""], + "validation_criteria_coverage": [ + { + "criterion": "VC1: ...", + "tests": ["path/to/test_file.ext::test_name"], + "notes": "Short justification if tests are N/A or partial" + } + ], "status": "applied" } ``` @@ -1083,4 +1107,3 @@ export class ReconnectingWebSocket { **Used Bullets**: `[]` (No similar patterns in mem0. Novel implementation.) - diff --git a/src/mapify_cli/templates/agents/monitor.md b/src/mapify_cli/templates/agents/monitor.md index 6358283..59043ad 100644 --- a/src/mapify_cli/templates/agents/monitor.md +++ b/src/mapify_cli/templates/agents/monitor.md @@ -821,8 +821,9 @@ When `{{requirements}}` or `{{subtask_description}}` includes `validation_criter ``` FOR each criterion in validation_criteria: 1. PARSE criterion into testable assertion - 2. VERIFY assertion against {{solution}} - 3. RECORD result: PASS | FAIL | PARTIAL | UNTESTABLE + 2. VERIFY assertion against {{solution}} (code-path evidence) + 3. VERIFY test coverage using test_strategy (if not N/A) + 4. RECORD result: PASS | FAIL | PARTIAL | UNTESTABLE CONTRACT_STATUS: - ALL PASS → contract_compliant: true @@ -830,6 +831,17 @@ CONTRACT_STATUS: - ANY UNTESTABLE → flag for clarification ``` +### Test Coverage Rule (Executable Contracts) + +Design constraints only become reliable when they are enforced by executable checks. + +For each `VCn:` criterion: +- If `test_strategy` is provided and not `N/A`, require at least one concrete test case that covers it. +- Prefer deterministic mapping: test names include `vc` (e.g., `test_vc1_*`, `TestVC1*`). +- Evidence MUST include both: + - **Code evidence** (where in code the behavior is implemented), and + - **Test evidence** (where in tests it is asserted). + ### Contract Assertion Patterns | Criterion Type | How to Verify | Example | @@ -852,15 +864,31 @@ Include in JSON output when validation_criteria provided: "failed": 1, "untestable": 0, "details": [ - {"criterion": "Returns 401 for expired token", "status": "PASS", "evidence": "Line 45: if token.expired: return 401"}, - {"criterion": "Creates audit log entry", "status": "FAIL", "evidence": "No audit.log() call found in create_user()"} + { + "criterion": "VC1: Returns 401 for expired token (auth/middleware.py:validate_token)", + "status": "PASS", + "code_evidence": "auth/middleware.py:45: if token.expired: return 401", + "test_coverage": "PASS", + "test_evidence": "tests/test_auth.py::test_vc1_expired_token_returns_401" + }, + { + "criterion": "VC2: Creates audit log entry with user_id (audit/logger.py:log_event)", + "status": "FAIL", + "code_evidence": "No audit.log_event() call found in create_user()", + "test_coverage": "MISSING", + "test_evidence": "No test found matching vc2 or described in test_strategy" + } ] }, "contract_compliant": false } ``` -**Decision Rule**: If `contract_compliant: false`, set `valid: false` unless ALL failed contracts are LOW severity (documentation, naming). +**Decision Rule**: +- If `contract_compliant: false`, set `valid: false` unless ALL failed contracts are LOW severity (documentation, naming). +- If any Behavioral/Integration/Edge-case criterion has `test_coverage != PASS` and test_strategy is not `N/A`: + - If `security_critical == true`: set `valid: false` (missing executable enforcement is a release blocker). + - Otherwise: add a **testability** issue and require Actor to add tests. @@ -2512,7 +2540,13 @@ with the following JSON content: "timestamp": "", "valid": true, "issues_found": 0, - "recommendation": "approve|reject|revise" + "recommendation": "approve|reject|revise", + "validation_criteria_test_coverage": { + "total": 0, + "covered": 0, + "missing": 0, + "notes": "Optional: summarize VC→test coverage findings" + } } ``` diff --git a/src/mapify_cli/templates/agents/task-decomposer.md b/src/mapify_cli/templates/agents/task-decomposer.md index efcbb97..58e8afd 100644 --- a/src/mapify_cli/templates/agents/task-decomposer.md +++ b/src/mapify_cli/templates/agents/task-decomposer.md @@ -248,8 +248,21 @@ Return **ONLY** valid JSON in this exact structure: **subtasks[].complexity_rationale**: MUST reference factors: "Score N: factor (+X), factor (+Y)..." **subtasks[].validation_criteria**: Array of **testable conditions** that prove completion - REQUIRED: 2-4 specific, verifiable outcomes - - Good: "Returns 401 for expired token", "Creates audit log entry with user_id" - - Bad: "Works correctly", "Handles errors" + - Format (recommended): Prefix each item with `VC1:`, `VC2:`, ... for stable cross-agent reference. + - Each criterion MUST be both: + - **Behavior-/artifact-verifiable** (can be checked by reading code), and + - **Test-verifiable** (has at least one concrete test case planned in `test_strategy`). + - Each criterion SHOULD include a concrete anchor: + - endpoint/handler + route, OR + - function/class name + file path + - Good: + - "VC1: POST /users returns 201 and persists normalized email (users/routes.py:create_user)" + - "VC2: Returns 401 for expired token (auth/middleware.py:validate_token)" + - "VC3: Creates audit log entry with user_id (audit/logger.py:log_event)" + - Bad: + - "Works correctly" + - "Handles errors" + - "Tests pass" **subtasks[].contracts**: Array of **executable assertion patterns** (optional but recommended for complexity_score ≥ 5) - `type`: "precondition" | "postcondition" | "invariant" - `assertion`: Executable pattern (e.g., "response.status == 401 WHEN token.expired") @@ -269,6 +282,13 @@ Return **ONLY** valid JSON in this exact structure: - OMIT when: standard pattern with obvious implementation - Example: "Use existing RateLimiter middleware, configure for /api/* routes" **subtasks[].test_strategy**: Required object with unit/integration/e2e keys. Use "N/A" for levels not applicable. + - MUST map `validation_criteria` → tests: + - For each `VCn:` criterion, include at least one planned test name that covers it. + - Recommended naming: include `vc` in the test name (e.g., `test_vc1_*`, `TestVC1*`) for deterministic grep-ability. + - Recommended format: `path/to/test_file.ext::test_name_or_symbol` + - "N/A" is acceptable ONLY when: + - The repository has no automated test harness, and adding one is out-of-scope for this subtask. + - In that case: either add a FOUNDATION subtask to introduce a minimal test harness, or document the gap explicitly in risks/assumptions. **subtasks[].affected_files**: Precise file paths (NOT "backend", "frontend"); use [] if paths unknown ### Subtask Ordering @@ -491,9 +511,10 @@ When invoked with `mode: "re_decomposition"` from the orchestrator, you receive - [ ] Descriptions explain HOW, not just WHAT **Acceptance Criteria**: -- [ ] Each subtask has 3-5 specific criteria +- [ ] Each subtask has 2-4 specific criteria - [ ] Criteria are testable and measurable -- [ ] Criteria cover: functionality + edge cases + testing +- [ ] Criteria cover: functionality + edge cases (as applicable) +- [ ] Each VC has a concrete verification hook in test_strategy (at least one planned test per VC) - [ ] No vague criteria ("works", "is good", "done") **File Paths**: @@ -510,7 +531,7 @@ When invoked with `mode: "re_decomposition"` from the orchestrator, you receive **Test Strategy**: - [ ] test_strategy object included for each subtask -- [ ] Unit tests specified (REQUIRED for all subtasks) +- [ ] Unit tests specified (default). If repo has no test harness: add a FOUNDATION subtask to introduce minimal tests or explicitly justify "N/A". - [ ] Integration tests specified when subtask integrates multiple components - [ ] E2e tests specified when subtask impacts user-facing functionality - [ ] "N/A" used appropriately when test layer not applicable diff --git a/src/mapify_cli/templates/commands/map-debate.md b/src/mapify_cli/templates/commands/map-debate.md index 039526e..b1db61e 100644 --- a/src/mapify_cli/templates/commands/map-debate.md +++ b/src/mapify_cli/templates/commands/map-debate.md @@ -44,10 +44,14 @@ Task: $ARGUMENTS Hard requirements: - Use `blueprint.subtasks[].validation_criteria` (2-4 testable, verifiable outcomes) + - Prefix each criterion with `VC1:`, `VC2:`, ... (stable references for Actor/Monitor) + - Include a concrete anchor per VC (endpoint/function + file path) - Use `blueprint.subtasks[].dependencies` (array of subtask IDs) and order subtasks by dependency - Include `blueprint.subtasks[].complexity_score` (1-10) and `risk_level` (low|medium|high) - Include `blueprint.subtasks[].security_critical` (true for auth/crypto/validation/data access) -- Include `blueprint.subtasks[].test_strategy` with unit/integration/e2e keys" +- Include `blueprint.subtasks[].test_strategy` with unit/integration/e2e keys + - Map every `VCn:` to ≥1 planned test case (prefer test name contains `vc`) + - Recommended format: `path/to/test_file.ext::test_name_or_symbol`" ) ``` diff --git a/src/mapify_cli/templates/commands/map-efficient.md b/src/mapify_cli/templates/commands/map-efficient.md index e47b357..b91c5f3 100644 --- a/src/mapify_cli/templates/commands/map-efficient.md +++ b/src/mapify_cli/templates/commands/map-efficient.md @@ -109,10 +109,14 @@ Task: $ARGUMENTS Hard requirements: - Use `blueprint.subtasks[].validation_criteria` (2-4 testable outcomes) + - Prefix each criterion with `VC1:`, `VC2:`, ... (stable references for Actor/Monitor) + - Include a concrete anchor per VC (endpoint/function + file path) - Use `blueprint.subtasks[].dependencies` (array of subtask IDs) - Include `complexity_score` (1-10) and `risk_level` (low|medium|high) - Include `security_critical` (true for auth/crypto/validation) - Include `test_strategy` with unit/integration/e2e keys + - Map every `VCn:` to ≥1 planned test case (prefer test name contains `vc`) + - Recommended format: `path/to/test_file.ext::test_name_or_symbol` - Include `aag_contract` (one-line pseudocode: Actor -> Action -> Goal) AAG Contract format (REQUIRED per subtask): From c080a5942ed50c67f8ca92fdb2f5c3b287fb32c3 Mon Sep 17 00:00:00 2001 From: "Mikhail [azalio] Petrov" Date: Mon, 16 Feb 2026 00:36:32 +0300 Subject: [PATCH 2/7] Fix Claude Code hook configuration and outputs --- .claude/hooks/block-dangerous.sh | 92 +++++++++++++++---- .claude/hooks/block-secrets.py | 27 +++--- .claude/hooks/post-edit-reminder.py | 9 +- .claude/hooks/ralph-context-pruner.py | 7 +- .claude/hooks/safety-guardrails.py | 33 ++++--- .claude/hooks/workflow-context-injector.py | 7 +- .claude/hooks/workflow-gate.py | 27 ++++-- .claude/references/workflow-state-schema.md | 2 +- .claude/settings.hooks.json | 74 --------------- .claude/settings.json | 81 ++++++++++++++++ .claude/skills/README.md | 4 +- CHANGELOG.md | 4 +- CLAUDE.md | 2 +- IMPLEMENTATION_SUMMARY.md | 18 ++-- docs/USAGE.md | 4 +- scripts/sync-templates.sh | 2 +- src/mapify_cli/__init__.py | 39 +++----- src/mapify_cli/templates/CLAUDE.md | 2 +- .../templates/hooks/block-dangerous.sh | 92 +++++++++++++++---- .../templates/hooks/block-secrets.py | 27 +++--- .../templates/hooks/post-edit-reminder.py | 9 +- .../templates/hooks/ralph-context-pruner.py | 7 +- .../templates/hooks/safety-guardrails.py | 33 ++++--- .../hooks/workflow-context-injector.py | 7 +- .../templates/hooks/workflow-gate.py | 27 ++++-- .../references/workflow-state-schema.md | 2 +- src/mapify_cli/templates/settings.hooks.json | 74 --------------- src/mapify_cli/templates/settings.json | 81 ++++++++++++++++ src/mapify_cli/templates/skills/README.md | 4 +- 29 files changed, 481 insertions(+), 316 deletions(-) delete mode 100644 .claude/settings.hooks.json delete mode 100644 src/mapify_cli/templates/settings.hooks.json diff --git a/.claude/hooks/block-dangerous.sh b/.claude/hooks/block-dangerous.sh index ab90770..f42a6ba 100755 --- a/.claude/hooks/block-dangerous.sh +++ b/.claude/hooks/block-dangerous.sh @@ -14,7 +14,7 @@ # # EXIT CODES: # 0 - Allow command execution -# 2 - Block command execution (dangerous command detected) +# 0 + permissionDecision=deny - Block command execution (preferred) # # TESTING: # echo '{"tool_name": "Bash", "tool_input": {"command": "rm -rf /"}}' | bash block-dangerous.sh @@ -53,58 +53,112 @@ COMMAND_LOWER=$(echo "$COMMAND" | tr '[:upper:]' '[:lower:]') # Check for rm -rf (recursive force delete) # Matches: rm -rf, rm -fr, rm -r -f, rm -f -r, rm --recursive --force if echo "$COMMAND" | grep -qE 'rm\s+(-rf|-fr)\s'; then - echo '{"hookSpecificOutput":{"hookEventName":"PreToolUse","error":"Blocked: rm -rf is prohibited","details":"Recursive force delete can cause irreversible data loss","suggestion":"Use rm with specific paths and without -rf flag"}}' >&2 - exit 2 + jq -n '{ + hookSpecificOutput: { + hookEventName: "PreToolUse", + permissionDecision: "deny", + permissionDecisionReason: "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)" + } + }' + exit 0 fi # Catch rm -rf at end of command or with path immediately after if echo "$COMMAND" | grep -qE 'rm\s+(-rf|-fr)\s*(/|~|\.|[a-zA-Z])'; then - echo '{"hookSpecificOutput":{"hookEventName":"PreToolUse","error":"Blocked: rm -rf is prohibited","details":"Recursive force delete can cause irreversible data loss","suggestion":"Use rm with specific paths and without -rf flag"}}' >&2 - exit 2 + jq -n '{ + hookSpecificOutput: { + hookEventName: "PreToolUse", + permissionDecision: "deny", + permissionDecisionReason: "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)" + } + }' + exit 0 fi # Catch separated flags: rm -r -f or rm -f -r if echo "$COMMAND" | grep -qE 'rm\s+(-r\s+-f|-f\s+-r)\s'; then - echo '{"hookSpecificOutput":{"hookEventName":"PreToolUse","error":"Blocked: rm -rf is prohibited","details":"Recursive force delete can cause irreversible data loss","suggestion":"Use rm with specific paths and without -rf flag"}}' >&2 - exit 2 + jq -n '{ + hookSpecificOutput: { + hookEventName: "PreToolUse", + permissionDecision: "deny", + permissionDecisionReason: "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)" + } + }' + exit 0 fi # Check for git push --force to main/master # Matches: git push --force origin main, git push -f origin master if echo "$COMMAND" | grep -qE 'git\s+push\s+.*(-f|--force).*\s+(origin|upstream)\s+(main|master)(\s|$)'; then - echo '{"hookSpecificOutput":{"hookEventName":"PreToolUse","error":"Blocked: Force push to main/master is prohibited","details":"Force pushing to protected branches can overwrite team work","suggestion":"Use regular push or push to a feature branch"}}' >&2 - exit 2 + jq -n '{ + hookSpecificOutput: { + hookEventName: "PreToolUse", + permissionDecision: "deny", + permissionDecisionReason: "Blocked: Force push to main/master is prohibited (can overwrite team work)" + } + }' + exit 0 fi # Also check reverse order: git push origin main --force if echo "$COMMAND" | grep -qE 'git\s+push\s+(origin|upstream)\s+(main|master)\s+(-f|--force)'; then - echo '{"hookSpecificOutput":{"hookEventName":"PreToolUse","error":"Blocked: Force push to main/master is prohibited","details":"Force pushing to protected branches can overwrite team work","suggestion":"Use regular push or push to a feature branch"}}' >&2 - exit 2 + jq -n '{ + hookSpecificOutput: { + hookEventName: "PreToolUse", + permissionDecision: "deny", + permissionDecisionReason: "Blocked: Force push to main/master is prohibited (can overwrite team work)" + } + }' + exit 0 fi # Check for git reset --hard (without specific commit, or dangerous patterns) # Block: git reset --hard, git reset --hard HEAD~, git reset --hard origin/ if echo "$COMMAND" | grep -qE 'git\s+reset\s+--hard(\s|$)'; then - echo '{"hookSpecificOutput":{"hookEventName":"PreToolUse","error":"Blocked: git reset --hard is prohibited","details":"Hard reset can cause irreversible loss of uncommitted changes","suggestion":"Use git stash or git reset --soft instead"}}' >&2 - exit 2 + jq -n '{ + hookSpecificOutput: { + hookEventName: "PreToolUse", + permissionDecision: "deny", + permissionDecisionReason: "Blocked: git reset --hard is prohibited (can cause irreversible loss of uncommitted changes)" + } + }' + exit 0 fi # Check for dangerous chmod/chown on system directories if echo "$COMMAND" | grep -qE '(chmod|chown)\s+(-R|--recursive)\s+.*\s+/($|\s)'; then - echo '{"hookSpecificOutput":{"hookEventName":"PreToolUse","error":"Blocked: Recursive chmod/chown on root is prohibited","details":"This can break system permissions","suggestion":"Specify a more targeted path"}}' >&2 - exit 2 + jq -n '{ + hookSpecificOutput: { + hookEventName: "PreToolUse", + permissionDecision: "deny", + permissionDecisionReason: "Blocked: Recursive chmod/chown on / is prohibited (can break system permissions)" + } + }' + exit 0 fi # Check for dd with of=/dev/ if echo "$COMMAND" | grep -qE 'dd\s+.*of=/dev/'; then - echo '{"hookSpecificOutput":{"hookEventName":"PreToolUse","error":"Blocked: dd to /dev/ is prohibited","details":"Writing to raw devices can destroy data","suggestion":"Use safer file operations"}}' >&2 - exit 2 + jq -n '{ + hookSpecificOutput: { + hookEventName: "PreToolUse", + permissionDecision: "deny", + permissionDecisionReason: "Blocked: dd with of=/dev/* is prohibited (writing to raw devices can destroy data)" + } + }' + exit 0 fi # Check for mkfs (format filesystem) if echo "$COMMAND" | grep -qE 'mkfs'; then - echo '{"hookSpecificOutput":{"hookEventName":"PreToolUse","error":"Blocked: mkfs is prohibited","details":"Formatting filesystems can destroy data","suggestion":"This operation requires manual execution"}}' >&2 - exit 2 + jq -n '{ + hookSpecificOutput: { + hookEventName: "PreToolUse", + permissionDecision: "deny", + permissionDecisionReason: "Blocked: mkfs is prohibited (formatting filesystems can destroy data)" + } + }' + exit 0 fi # ============================================================================= diff --git a/.claude/hooks/block-secrets.py b/.claude/hooks/block-secrets.py index bbeb466..fb1d2e1 100755 --- a/.claude/hooks/block-secrets.py +++ b/.claude/hooks/block-secrets.py @@ -22,7 +22,7 @@ HOOK BEHAVIOR: - Exit code 0: Allow tool execution (non-sensitive file) - - Exit code 2: Block tool execution (sensitive file detected) + - Exit code 0 + permissionDecision=deny: Block tool execution (sensitive file detected) TESTING: echo '{"tool_name": "Read", "tool_input": {"file_path": ".env"}}' | python3 block-secrets.py @@ -78,16 +78,21 @@ def is_sensitive_file(file_path: str) -> bool: def block_access(file_path: str, tool_name: str): """Block tool execution and output error message.""" - error_output = { - "hookSpecificOutput": { - "hookEventName": "PreToolUse", - "error": f"Blocked: Access to sensitive file '{file_path}' is prohibited", - "details": f"Tool '{tool_name}' attempted to access a protected file. Sensitive files include: .env*, *credentials*, *secret*, private keys (*.pem, *.key, *_rsa, etc.)", - "suggestion": "If you need to work with sensitive data, use environment variables or a secrets management system instead of reading raw credential files.", - } - } - print(json.dumps(error_output), file=sys.stderr) - sys.exit(2) # Exit code 2 blocks execution + print( + json.dumps( + { + "hookSpecificOutput": { + "hookEventName": "PreToolUse", + "permissionDecision": "deny", + "permissionDecisionReason": ( + f"Blocked: Access to sensitive file '{file_path}' is prohibited " + f"(tool={tool_name})." + ), + } + } + ) + ) + sys.exit(0) def main(): diff --git a/.claude/hooks/post-edit-reminder.py b/.claude/hooks/post-edit-reminder.py index de96db8..ff77beb 100755 --- a/.claude/hooks/post-edit-reminder.py +++ b/.claude/hooks/post-edit-reminder.py @@ -7,7 +7,7 @@ Trigger: Edit|Write Exit codes: Always 0 (non-blocking) -Output: ~80 char reminder via hookSpecificOutput.appended_text +Output: ~80 char reminder via hookSpecificOutput.additionalContext """ import json @@ -73,7 +73,12 @@ def main() -> None: # Inject lightweight reminder reminder = "[MAP] Code changed. Run tests before committing!" - output = {"hookSpecificOutput": {"appended_text": reminder}} + output = { + "hookSpecificOutput": { + "hookEventName": "PostToolUse", + "additionalContext": reminder, + } + } print(json.dumps(output)) sys.exit(0) diff --git a/.claude/hooks/ralph-context-pruner.py b/.claude/hooks/ralph-context-pruner.py index 8adb60f..b37b174 100755 --- a/.claude/hooks/ralph-context-pruner.py +++ b/.claude/hooks/ralph-context-pruner.py @@ -13,7 +13,7 @@ 0 - Always (PreCompact hooks don't block) Output: - hookSpecificOutput.appended_text - Recovery message injected into context + hookSpecificOutput.additionalContext - Recovery message injected into context """ import json import os @@ -238,7 +238,10 @@ def main() -> None: # Inject recovery message into context recovery_msg = format_recovery_message(state, branch) - output["hookSpecificOutput"] = {"appended_text": recovery_msg} + output["hookSpecificOutput"] = { + "hookEventName": "PreCompact", + "additionalContext": recovery_msg, + } # Prune log files in ALL branch directories actions = [] diff --git a/.claude/hooks/safety-guardrails.py b/.claude/hooks/safety-guardrails.py index a0158c7..4174337 100644 --- a/.claude/hooks/safety-guardrails.py +++ b/.claude/hooks/safety-guardrails.py @@ -9,7 +9,7 @@ Trigger: Edit|Write|Bash Exit codes: 0 - Allow - 2 - Block (with error message) + 0 + permissionDecision=deny - Block (preferred) """ import json @@ -84,17 +84,20 @@ def check_command_safety(command: str) -> tuple[bool, str]: return True, "" -def block_access(reason: str, tool_name: str) -> None: - """Block tool execution with error message.""" - error_output = { - "hookSpecificOutput": { - "hookEventName": "PreToolUse", - "error": reason, - "details": f"Tool '{tool_name}' blocked by safety guardrails", - } - } - print(json.dumps(error_output), file=sys.stderr) - sys.exit(2) +def deny(reason: str) -> None: + """Deny tool execution using structured PreToolUse decision control.""" + print( + json.dumps( + { + "hookSpecificOutput": { + "hookEventName": "PreToolUse", + "permissionDecision": "deny", + "permissionDecisionReason": reason, + } + } + ) + ) + sys.exit(0) def main() -> None: @@ -108,18 +111,18 @@ def main() -> None: tool_input = input_data.get("tool_input", {}) # Check file-based tools - if tool_name in ("Edit", "Write", "Read"): + if tool_name in ("Edit", "Write", "Read", "MultiEdit"): file_path = tool_input.get("file_path", "") or tool_input.get("path", "") is_safe, reason = check_file_safety(file_path) if not is_safe: - block_access(reason, tool_name) + deny(f"{reason} (tool={tool_name})") # Check bash commands elif tool_name == "Bash": command = tool_input.get("command", "") is_safe, reason = check_command_safety(command) if not is_safe: - block_access(reason, tool_name) + deny(f"{reason} (tool={tool_name})") print("{}") sys.exit(0) diff --git a/.claude/hooks/workflow-context-injector.py b/.claude/hooks/workflow-context-injector.py index f0b8d34..72145e5 100755 --- a/.claude/hooks/workflow-context-injector.py +++ b/.claude/hooks/workflow-context-injector.py @@ -243,7 +243,12 @@ def main() -> None: reminder = format_reminder(state, branch) if reminder: - output = {"hookSpecificOutput": {"appended_text": reminder}} + output = { + "hookSpecificOutput": { + "hookEventName": "PreToolUse", + "additionalContext": reminder, + } + } print(json.dumps(output)) else: print("{}") diff --git a/.claude/hooks/workflow-gate.py b/.claude/hooks/workflow-gate.py index 01e3429..499bdfc 100755 --- a/.claude/hooks/workflow-gate.py +++ b/.claude/hooks/workflow-gate.py @@ -23,12 +23,12 @@ - pending_steps: Dict mapping subtask_id -> list of pending steps HOOK BEHAVIOR: - - Exit code 0 + approve: Allow tool execution - - Exit code 2 + block: Block tool execution with error message + - Exit code 0: Allow tool execution + - Exit code 0 + permissionDecision=deny: Block tool execution with reason (preferred) TESTING: echo '{"tool_name": "Edit", "tool_input": {"file_path": "test.py"}}' | python3 workflow-gate.py - # Expected: Exit code 2, JSON error on stderr (no workflow state) + # Expected: Exit code 0, allow (no workflow state) OR deny with reason (if workflow active + missing steps) PERFORMANCE: Target: <100ms per invocation @@ -138,7 +138,7 @@ def main(): # Allow non-editing tools if tool_name not in EDITING_TOOLS: - print(json.dumps({"decision": "approve"})) + print("{}") sys.exit(0) # Get current branch @@ -150,25 +150,32 @@ def main(): if state is None: # No workflow state = not in MAP workflow mode, allow # (This prevents breaking non-MAP work) - print(json.dumps({"decision": "approve"})) + print("{}") sys.exit(0) # Check workflow compliance is_compliant, error_message = check_workflow_compliance(state) if is_compliant: - print(json.dumps({"decision": "approve"})) + print("{}") sys.exit(0) else: print( - json.dumps({"decision": "block", "reason": error_message}), - file=sys.stderr, + json.dumps( + { + "hookSpecificOutput": { + "hookEventName": "PreToolUse", + "permissionDecision": "deny", + "permissionDecisionReason": error_message, + } + } + ) ) - sys.exit(2) + sys.exit(0) except Exception as e: # On hook failure, approve (fail-open to avoid blocking work) - print(json.dumps({"decision": "approve"})) + print("{}") if os.environ.get("DEBUG_WORKFLOW_GATE"): print(f"[workflow-gate] ERROR: {e}", file=sys.stderr) sys.exit(0) diff --git a/.claude/references/workflow-state-schema.md b/.claude/references/workflow-state-schema.md index 42e34fd..e490808 100644 --- a/.claude/references/workflow-state-schema.md +++ b/.claude/references/workflow-state-schema.md @@ -266,4 +266,4 @@ This external state file combined with hook enforcement implements that principl - `.claude/hooks/workflow-gate.py` - Enforcement hook - `.claude/commands/map-efficient.md` - Workflow definition - `.map//task_plan_.md` - Human-readable plan -- `.claude/settings.hooks.json` - Hook registration +- `.claude/settings.json` - Hook registration diff --git a/.claude/settings.hooks.json b/.claude/settings.hooks.json deleted file mode 100644 index eb4e7ef..0000000 --- a/.claude/settings.hooks.json +++ /dev/null @@ -1,74 +0,0 @@ -{ - "$schema": "https://json.schemastore.org/claude-code-settings.json", - "description": "Claude Code hooks configuration for MAP Framework (Tiered)", - "hooks": { - "PreToolUse": [ - { - "matcher": "Edit|Write|Read|Bash", - "description": "Safety Guardrails - block sensitive files and dangerous commands", - "hooks": [ - { - "type": "command", - "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/safety-guardrails.py", - "timeout": 5, - "description": "Blocks .env, credentials, private keys, and dangerous bash commands" - } - ] - }, - { - "matcher": "Edit|Write|Bash", - "description": "Workflow Context Injection (Tiered) - inject reminders for significant operations", - "hooks": [ - { - "type": "command", - "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/workflow-context-injector.py", - "timeout": 3, - "description": "Injects workflow context only for Edit/Write and significant Bash commands" - } - ] - } - ], - "PostToolUse": [ - { - "matcher": "Edit|Write|MultiEdit", - "description": "Post-Edit Reminder - lightweight test reminder after code changes", - "hooks": [ - { - "type": "command", - "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/post-edit-reminder.py", - "timeout": 3, - "description": "Reminds to run tests after Edit/Write (only when MAP workflow active)" - } - ] - } - ], - "PreCompact": [ - { - "matcher": "*", - "description": "Anti-Amnesia Hook - save state and prune logs before compaction", - "hooks": [ - { - "type": "command", - "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/ralph-context-pruner.py", - "timeout": 10, - "description": "Saves workflow restore point and prunes old logs" - } - ] - } - ], - "Stop": [ - { - "matcher": "*", - "description": "Lightweight quality gates - only critical issues", - "hooks": [ - { - "type": "command", - "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/end-of-turn.sh", - "timeout": 30, - "description": "Auto-fixes silently, only reports secrets/syntax errors" - } - ] - } - ] - } -} diff --git a/.claude/settings.json b/.claude/settings.json index 6c2c9a2..0779e49 100644 --- a/.claude/settings.json +++ b/.claude/settings.json @@ -33,5 +33,86 @@ "Bash(gofmt *)", "Bash(kubectl get *)" ] + }, + "hooks": { + "PreToolUse": [ + { + "matcher": "Edit|Write|Read|MultiEdit|Bash", + "description": "Safety Guardrails - block sensitive files and dangerous commands", + "hooks": [ + { + "type": "command", + "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/safety-guardrails.py", + "timeout": 5, + "description": "Blocks .env, credentials, private keys, and dangerous bash commands" + } + ] + }, + { + "matcher": "Edit|Write|MultiEdit", + "description": "Workflow Gate - enforce Actor+Monitor before edits (MAP)", + "hooks": [ + { + "type": "command", + "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/workflow-gate.py", + "timeout": 5, + "description": "Blocks Edit/Write/MultiEdit until workflow_state indicates Actor+Monitor completed" + } + ] + }, + { + "matcher": "Edit|Write|MultiEdit|Bash", + "description": "Workflow Context Injection (Tiered) - inject reminders for significant operations", + "hooks": [ + { + "type": "command", + "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/workflow-context-injector.py", + "timeout": 3, + "description": "Injects workflow context only for Edit/Write/MultiEdit and significant Bash commands" + } + ] + } + ], + "PostToolUse": [ + { + "matcher": "Edit|Write|MultiEdit", + "description": "Post-Edit Reminder - lightweight test reminder after code changes", + "hooks": [ + { + "type": "command", + "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/post-edit-reminder.py", + "timeout": 3, + "description": "Reminds to run tests after Edit/Write (only when MAP workflow active)" + } + ] + } + ], + "PreCompact": [ + { + "matcher": "*", + "description": "Anti-Amnesia Hook - save state and prune logs before compaction", + "hooks": [ + { + "type": "command", + "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/ralph-context-pruner.py", + "timeout": 10, + "description": "Saves workflow restore point and prunes old logs" + } + ] + } + ], + "Stop": [ + { + "description": "Lightweight quality gates - only critical issues", + "hooks": [ + { + "type": "command", + "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/end-of-turn.sh", + "timeout": 30, + "description": "Auto-fixes silently, only reports secrets/syntax errors" + } + ] + } + ] } } diff --git a/.claude/skills/README.md b/.claude/skills/README.md index 2d234d4..c15191d 100644 --- a/.claude/skills/README.md +++ b/.claude/skills/README.md @@ -152,12 +152,12 @@ Skills work seamlessly with the prompt improvement system: **Check:** 1. `skill-rules.json` has correct triggers -2. `improve-prompt.py` hook is enabled in `settings.hooks.json` +2. `improve-prompt.py` hook is enabled in `.claude/settings.json` 3. Hook processes UserPromptSubmit events **Fix:** - Update trigger patterns in `skill-rules.json` -- Verify hook configuration in `.claude/settings.hooks.json` +- Verify hook configuration in `.claude/settings.json` ### Skill content too long diff --git a/CHANGELOG.md b/CHANGELOG.md index 3b6a592..bb2ec10 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -72,7 +72,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - **task-decomposer.md**: Enhanced with Acceptance Criteria table format, re-decomposition mode, dependency enforcement - **map-efficient.md**: Added Step 3.5 Final Verification with circuit breaker check, final-verifier invocation, re-decomposition logic -- **settings.hooks.json**: Added PreToolUse, PostToolUse, and PreCompact hook entries for Ralph Loop +- **.claude/settings.json (hooks)**: Added PreToolUse, PostToolUse, and PreCompact hook entries for Ralph Loop ### Documentation - Branch-scoped artifacts stored in `.map//` directory @@ -353,7 +353,7 @@ rm -rf .claude/hooks/ - Preserved user settings during hooks installation (merge strategy) - **mapify init Command Fixes** (1aee890, 7d264ef, 956ef96) - - Fixed mapify init to copy Python hooks and settings.hooks.json correctly + - Fixed mapify init to copy Python hooks and hook-enabled `.claude/settings.json` correctly - Corrected settings file location (.claude/ not .claude/hooks/) - Restored SessionStart hook functionality diff --git a/CLAUDE.md b/CLAUDE.md index a5dd7c0..29b8c1a 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -16,7 +16,7 @@ Common synced paths: - `.claude/commands/` → `src/mapify_cli/templates/commands/` - `.claude/hooks/` → `src/mapify_cli/templates/hooks/` - `.claude/references/` → `src/mapify_cli/templates/references/` -- `.claude/settings.json`, `.claude/settings.hooks.json`, `.claude/workflow-rules.json` → `src/mapify_cli/templates/` +- `.claude/settings.json`, `.claude/workflow-rules.json` → `src/mapify_cli/templates/` Do the sync via a deterministic command (preferred): - `make sync-templates` (runs `scripts/sync-templates.sh`) diff --git a/IMPLEMENTATION_SUMMARY.md b/IMPLEMENTATION_SUMMARY.md index e307ef5..cecea31 100644 --- a/IMPLEMENTATION_SUMMARY.md +++ b/IMPLEMENTATION_SUMMARY.md @@ -44,7 +44,7 @@ - Recursive invocation for fresh context per subtask ### Configuration Updates -- **`.claude/settings.hooks.json`**: Added workflow-context-injector.py hook registration +- **`.claude/settings.json`**: Added workflow-context-injector.py hook registration - **Template sync**: All files copied to `src/mapify_cli/templates/` ### Documentation Updates @@ -148,7 +148,7 @@ Instead of a 995-line command file that Claude "compresses" mentally, inject ~30 .claude/ ├── hooks/ │ ├── workflow-context-injector.py (NEW) -│ └── settings.hooks.json (UPDATED) +│ └── settings.json (UPDATED) ├── commands/ │ ├── map-efficient.md (OPTIMIZED - 995→394 lines) │ └── map-efficient.md.backup (BACKUP) @@ -157,13 +157,13 @@ Instead of a 995-line command file that Claude "compresses" mentally, inject ~30 ├── map_orchestrator.py (NEW) └── map_step_runner.py (NEW) -src/mapify_cli/templates/ -├── hooks/workflow-context-injector.py (SYNCED) -├── commands/map-efficient.md (SYNCED) -├── map/scripts/ -│ ├── map_orchestrator.py (SYNCED) -│ └── map_step_runner.py (SYNCED) -└── settings.hooks.json (SYNCED) + src/mapify_cli/templates/ + ├── hooks/workflow-context-injector.py (SYNCED) + ├── commands/map-efficient.md (SYNCED) + ├── map/scripts/ + │ ├── map_orchestrator.py (SYNCED) + │ └── map_step_runner.py (SYNCED) + └── settings.json (SYNCED) docs/ ├── ARCHITECTURE.md (UPDATED - new section) diff --git a/docs/USAGE.md b/docs/USAGE.md index fd610ed..1e1d016 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -1941,7 +1941,7 @@ If you prefer direct execution without clarification: * implement user authentication # Skips improvement ``` -**Option 2: Remove from settings.hooks.json** +**Option 2: Remove from `.claude/settings.json`** ```json { "hooks": { @@ -1972,7 +1972,7 @@ MAP Framework includes additional hooks for security and quality: | `block-dangerous.sh` | PreToolUse | Block dangerous shell commands | | `end-of-turn.sh` | Stop | Quality gates (linting, secret scanning) | -**Configuration:** See `.claude/settings.hooks.json` for hook configuration. +**Configuration:** See `.claude/settings.json` for hook configuration (or manage via `/hooks`). **Security hooks:** See [Security Model: Three-Layer Defense](#-security-model-three-layer-defense) for details. diff --git a/scripts/sync-templates.sh b/scripts/sync-templates.sh index eb68752..24f72fb 100755 --- a/scripts/sync-templates.sh +++ b/scripts/sync-templates.sh @@ -12,7 +12,7 @@ cp -a .claude/agents/*.md "$templates_root/agents/" cp -a .claude/commands/*.md "$templates_root/commands/" cp -a .claude/hooks/* "$templates_root/hooks/" cp -a .claude/references/* "$templates_root/references/" -cp -a .claude/settings.json .claude/settings.hooks.json .claude/workflow-rules.json .claude/ralph-loop-config.json "$templates_root/" +cp -a .claude/settings.json .claude/workflow-rules.json .claude/ralph-loop-config.json "$templates_root/" # Sync skills directory (preserving nested structure) if [[ -d .claude/skills ]]; then diff --git a/src/mapify_cli/__init__.py b/src/mapify_cli/__init__.py index 51e4ae3..4747953 100644 --- a/src/mapify_cli/__init__.py +++ b/src/mapify_cli/__init__.py @@ -1269,15 +1269,16 @@ def configure_global_permissions() -> None: def create_or_merge_project_settings_local(project_path: Path) -> None: - """Create/merge .claude/settings.local.json with safe project allowlist and hooks. + """Create/merge .claude/settings.local.json with safe project allowlist. Claude Code supports per-project approvals via `.claude/settings.local.json`. This file is user-local (should not be committed) and is merged by Claude Code with global settings from `~/.claude/settings.json`. - IMPORTANT: Claude Code only reads hooks from `settings.json` or `settings.local.json`. - The separate `settings.hooks.json` file is NOT read by Claude Code. Therefore, this - function merges hooks from the template `settings.hooks.json` into `settings.local.json`. + IMPORTANT: + - Shared, repo-committed hooks MUST be configured in `.claude/settings.json`. + - `.claude/settings.local.json` is for user-local approvals/allowlists and should + not be used as the primary distribution mechanism for project hooks. We keep this allowlist intentionally narrow and focused on common safe actions for local development workflows. @@ -1313,20 +1314,6 @@ def create_or_merge_project_settings_local(project_path: Path) -> None: "ask": [], } - # Load hooks from template settings.hooks.json - # Claude Code doesn't read hooks from settings.hooks.json, so we merge them here - hooks_config: Dict[str, Any] = {} - templates_dir = get_templates_dir() - hooks_template_file = templates_dir / "settings.hooks.json" - if hooks_template_file.exists(): - try: - hooks_data = json.loads(hooks_template_file.read_text(encoding="utf-8")) - hooks_config = hooks_data.get("hooks", {}) - except (json.JSONDecodeError, OSError) as e: - console.print( - f"[yellow]Warning:[/yellow] Could not read hooks template: {e}" - ) - # Load existing settings if present if settings_file.exists(): try: @@ -1339,6 +1326,14 @@ def create_or_merge_project_settings_local(project_path: Path) -> None: else: existing_settings = {} + if isinstance(existing_settings, dict) and existing_settings.get("hooks"): + console.print( + "[yellow]Warning:[/yellow] .claude/settings.local.json contains hooks. " + "Claude Code loads hooks from BOTH .claude/settings.json and .claude/settings.local.json, " + "so this can cause duplicate hook executions. " + "Move shared hooks to .claude/settings.json and remove the hooks section from settings.local.json." + ) + existing_settings.setdefault("permissions", {}) permissions = existing_settings["permissions"] @@ -1351,12 +1346,6 @@ def create_or_merge_project_settings_local(project_path: Path) -> None: permissions.setdefault("deny", permissions.get("deny", [])) permissions.setdefault("ask", permissions.get("ask", [])) - # Replace hooks configuration from template - # Always use template hooks to ensure correct $CLAUDE_PROJECT_DIR paths - # User customizations should be done by editing the template or post-init - if hooks_config: - existing_settings["hooks"] = hooks_config - settings_file.write_text(json.dumps(existing_settings, indent=2) + "\n") @@ -1904,7 +1893,6 @@ def create_config_files(project_path: Path) -> int: Copies configuration files: - settings.json - - settings.hooks.json - ralph-loop-config.json - workflow-rules.json @@ -1919,7 +1907,6 @@ def create_config_files(project_path: Path) -> int: config_files = [ "settings.json", - "settings.hooks.json", "ralph-loop-config.json", "workflow-rules.json", ] diff --git a/src/mapify_cli/templates/CLAUDE.md b/src/mapify_cli/templates/CLAUDE.md index afadff8..45550f3 100644 --- a/src/mapify_cli/templates/CLAUDE.md +++ b/src/mapify_cli/templates/CLAUDE.md @@ -16,7 +16,7 @@ Common synced paths: - `.claude/commands/` → `src/mapify_cli/templates/commands/` - `.claude/hooks/` → `src/mapify_cli/templates/hooks/` - `.claude/references/` → `src/mapify_cli/templates/references/` -- `.claude/settings.json`, `.claude/settings.hooks.json`, `.claude/workflow-rules.json` → `src/mapify_cli/templates/` +- `.claude/settings.json`, `.claude/workflow-rules.json` → `src/mapify_cli/templates/` Do the sync via a deterministic command (preferred): - `make sync-templates` (runs `scripts/sync-templates.sh`) diff --git a/src/mapify_cli/templates/hooks/block-dangerous.sh b/src/mapify_cli/templates/hooks/block-dangerous.sh index ab90770..f42a6ba 100755 --- a/src/mapify_cli/templates/hooks/block-dangerous.sh +++ b/src/mapify_cli/templates/hooks/block-dangerous.sh @@ -14,7 +14,7 @@ # # EXIT CODES: # 0 - Allow command execution -# 2 - Block command execution (dangerous command detected) +# 0 + permissionDecision=deny - Block command execution (preferred) # # TESTING: # echo '{"tool_name": "Bash", "tool_input": {"command": "rm -rf /"}}' | bash block-dangerous.sh @@ -53,58 +53,112 @@ COMMAND_LOWER=$(echo "$COMMAND" | tr '[:upper:]' '[:lower:]') # Check for rm -rf (recursive force delete) # Matches: rm -rf, rm -fr, rm -r -f, rm -f -r, rm --recursive --force if echo "$COMMAND" | grep -qE 'rm\s+(-rf|-fr)\s'; then - echo '{"hookSpecificOutput":{"hookEventName":"PreToolUse","error":"Blocked: rm -rf is prohibited","details":"Recursive force delete can cause irreversible data loss","suggestion":"Use rm with specific paths and without -rf flag"}}' >&2 - exit 2 + jq -n '{ + hookSpecificOutput: { + hookEventName: "PreToolUse", + permissionDecision: "deny", + permissionDecisionReason: "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)" + } + }' + exit 0 fi # Catch rm -rf at end of command or with path immediately after if echo "$COMMAND" | grep -qE 'rm\s+(-rf|-fr)\s*(/|~|\.|[a-zA-Z])'; then - echo '{"hookSpecificOutput":{"hookEventName":"PreToolUse","error":"Blocked: rm -rf is prohibited","details":"Recursive force delete can cause irreversible data loss","suggestion":"Use rm with specific paths and without -rf flag"}}' >&2 - exit 2 + jq -n '{ + hookSpecificOutput: { + hookEventName: "PreToolUse", + permissionDecision: "deny", + permissionDecisionReason: "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)" + } + }' + exit 0 fi # Catch separated flags: rm -r -f or rm -f -r if echo "$COMMAND" | grep -qE 'rm\s+(-r\s+-f|-f\s+-r)\s'; then - echo '{"hookSpecificOutput":{"hookEventName":"PreToolUse","error":"Blocked: rm -rf is prohibited","details":"Recursive force delete can cause irreversible data loss","suggestion":"Use rm with specific paths and without -rf flag"}}' >&2 - exit 2 + jq -n '{ + hookSpecificOutput: { + hookEventName: "PreToolUse", + permissionDecision: "deny", + permissionDecisionReason: "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)" + } + }' + exit 0 fi # Check for git push --force to main/master # Matches: git push --force origin main, git push -f origin master if echo "$COMMAND" | grep -qE 'git\s+push\s+.*(-f|--force).*\s+(origin|upstream)\s+(main|master)(\s|$)'; then - echo '{"hookSpecificOutput":{"hookEventName":"PreToolUse","error":"Blocked: Force push to main/master is prohibited","details":"Force pushing to protected branches can overwrite team work","suggestion":"Use regular push or push to a feature branch"}}' >&2 - exit 2 + jq -n '{ + hookSpecificOutput: { + hookEventName: "PreToolUse", + permissionDecision: "deny", + permissionDecisionReason: "Blocked: Force push to main/master is prohibited (can overwrite team work)" + } + }' + exit 0 fi # Also check reverse order: git push origin main --force if echo "$COMMAND" | grep -qE 'git\s+push\s+(origin|upstream)\s+(main|master)\s+(-f|--force)'; then - echo '{"hookSpecificOutput":{"hookEventName":"PreToolUse","error":"Blocked: Force push to main/master is prohibited","details":"Force pushing to protected branches can overwrite team work","suggestion":"Use regular push or push to a feature branch"}}' >&2 - exit 2 + jq -n '{ + hookSpecificOutput: { + hookEventName: "PreToolUse", + permissionDecision: "deny", + permissionDecisionReason: "Blocked: Force push to main/master is prohibited (can overwrite team work)" + } + }' + exit 0 fi # Check for git reset --hard (without specific commit, or dangerous patterns) # Block: git reset --hard, git reset --hard HEAD~, git reset --hard origin/ if echo "$COMMAND" | grep -qE 'git\s+reset\s+--hard(\s|$)'; then - echo '{"hookSpecificOutput":{"hookEventName":"PreToolUse","error":"Blocked: git reset --hard is prohibited","details":"Hard reset can cause irreversible loss of uncommitted changes","suggestion":"Use git stash or git reset --soft instead"}}' >&2 - exit 2 + jq -n '{ + hookSpecificOutput: { + hookEventName: "PreToolUse", + permissionDecision: "deny", + permissionDecisionReason: "Blocked: git reset --hard is prohibited (can cause irreversible loss of uncommitted changes)" + } + }' + exit 0 fi # Check for dangerous chmod/chown on system directories if echo "$COMMAND" | grep -qE '(chmod|chown)\s+(-R|--recursive)\s+.*\s+/($|\s)'; then - echo '{"hookSpecificOutput":{"hookEventName":"PreToolUse","error":"Blocked: Recursive chmod/chown on root is prohibited","details":"This can break system permissions","suggestion":"Specify a more targeted path"}}' >&2 - exit 2 + jq -n '{ + hookSpecificOutput: { + hookEventName: "PreToolUse", + permissionDecision: "deny", + permissionDecisionReason: "Blocked: Recursive chmod/chown on / is prohibited (can break system permissions)" + } + }' + exit 0 fi # Check for dd with of=/dev/ if echo "$COMMAND" | grep -qE 'dd\s+.*of=/dev/'; then - echo '{"hookSpecificOutput":{"hookEventName":"PreToolUse","error":"Blocked: dd to /dev/ is prohibited","details":"Writing to raw devices can destroy data","suggestion":"Use safer file operations"}}' >&2 - exit 2 + jq -n '{ + hookSpecificOutput: { + hookEventName: "PreToolUse", + permissionDecision: "deny", + permissionDecisionReason: "Blocked: dd with of=/dev/* is prohibited (writing to raw devices can destroy data)" + } + }' + exit 0 fi # Check for mkfs (format filesystem) if echo "$COMMAND" | grep -qE 'mkfs'; then - echo '{"hookSpecificOutput":{"hookEventName":"PreToolUse","error":"Blocked: mkfs is prohibited","details":"Formatting filesystems can destroy data","suggestion":"This operation requires manual execution"}}' >&2 - exit 2 + jq -n '{ + hookSpecificOutput: { + hookEventName: "PreToolUse", + permissionDecision: "deny", + permissionDecisionReason: "Blocked: mkfs is prohibited (formatting filesystems can destroy data)" + } + }' + exit 0 fi # ============================================================================= diff --git a/src/mapify_cli/templates/hooks/block-secrets.py b/src/mapify_cli/templates/hooks/block-secrets.py index bbeb466..fb1d2e1 100755 --- a/src/mapify_cli/templates/hooks/block-secrets.py +++ b/src/mapify_cli/templates/hooks/block-secrets.py @@ -22,7 +22,7 @@ HOOK BEHAVIOR: - Exit code 0: Allow tool execution (non-sensitive file) - - Exit code 2: Block tool execution (sensitive file detected) + - Exit code 0 + permissionDecision=deny: Block tool execution (sensitive file detected) TESTING: echo '{"tool_name": "Read", "tool_input": {"file_path": ".env"}}' | python3 block-secrets.py @@ -78,16 +78,21 @@ def is_sensitive_file(file_path: str) -> bool: def block_access(file_path: str, tool_name: str): """Block tool execution and output error message.""" - error_output = { - "hookSpecificOutput": { - "hookEventName": "PreToolUse", - "error": f"Blocked: Access to sensitive file '{file_path}' is prohibited", - "details": f"Tool '{tool_name}' attempted to access a protected file. Sensitive files include: .env*, *credentials*, *secret*, private keys (*.pem, *.key, *_rsa, etc.)", - "suggestion": "If you need to work with sensitive data, use environment variables or a secrets management system instead of reading raw credential files.", - } - } - print(json.dumps(error_output), file=sys.stderr) - sys.exit(2) # Exit code 2 blocks execution + print( + json.dumps( + { + "hookSpecificOutput": { + "hookEventName": "PreToolUse", + "permissionDecision": "deny", + "permissionDecisionReason": ( + f"Blocked: Access to sensitive file '{file_path}' is prohibited " + f"(tool={tool_name})." + ), + } + } + ) + ) + sys.exit(0) def main(): diff --git a/src/mapify_cli/templates/hooks/post-edit-reminder.py b/src/mapify_cli/templates/hooks/post-edit-reminder.py index de96db8..ff77beb 100755 --- a/src/mapify_cli/templates/hooks/post-edit-reminder.py +++ b/src/mapify_cli/templates/hooks/post-edit-reminder.py @@ -7,7 +7,7 @@ Trigger: Edit|Write Exit codes: Always 0 (non-blocking) -Output: ~80 char reminder via hookSpecificOutput.appended_text +Output: ~80 char reminder via hookSpecificOutput.additionalContext """ import json @@ -73,7 +73,12 @@ def main() -> None: # Inject lightweight reminder reminder = "[MAP] Code changed. Run tests before committing!" - output = {"hookSpecificOutput": {"appended_text": reminder}} + output = { + "hookSpecificOutput": { + "hookEventName": "PostToolUse", + "additionalContext": reminder, + } + } print(json.dumps(output)) sys.exit(0) diff --git a/src/mapify_cli/templates/hooks/ralph-context-pruner.py b/src/mapify_cli/templates/hooks/ralph-context-pruner.py index 8adb60f..b37b174 100755 --- a/src/mapify_cli/templates/hooks/ralph-context-pruner.py +++ b/src/mapify_cli/templates/hooks/ralph-context-pruner.py @@ -13,7 +13,7 @@ 0 - Always (PreCompact hooks don't block) Output: - hookSpecificOutput.appended_text - Recovery message injected into context + hookSpecificOutput.additionalContext - Recovery message injected into context """ import json import os @@ -238,7 +238,10 @@ def main() -> None: # Inject recovery message into context recovery_msg = format_recovery_message(state, branch) - output["hookSpecificOutput"] = {"appended_text": recovery_msg} + output["hookSpecificOutput"] = { + "hookEventName": "PreCompact", + "additionalContext": recovery_msg, + } # Prune log files in ALL branch directories actions = [] diff --git a/src/mapify_cli/templates/hooks/safety-guardrails.py b/src/mapify_cli/templates/hooks/safety-guardrails.py index a0158c7..4174337 100644 --- a/src/mapify_cli/templates/hooks/safety-guardrails.py +++ b/src/mapify_cli/templates/hooks/safety-guardrails.py @@ -9,7 +9,7 @@ Trigger: Edit|Write|Bash Exit codes: 0 - Allow - 2 - Block (with error message) + 0 + permissionDecision=deny - Block (preferred) """ import json @@ -84,17 +84,20 @@ def check_command_safety(command: str) -> tuple[bool, str]: return True, "" -def block_access(reason: str, tool_name: str) -> None: - """Block tool execution with error message.""" - error_output = { - "hookSpecificOutput": { - "hookEventName": "PreToolUse", - "error": reason, - "details": f"Tool '{tool_name}' blocked by safety guardrails", - } - } - print(json.dumps(error_output), file=sys.stderr) - sys.exit(2) +def deny(reason: str) -> None: + """Deny tool execution using structured PreToolUse decision control.""" + print( + json.dumps( + { + "hookSpecificOutput": { + "hookEventName": "PreToolUse", + "permissionDecision": "deny", + "permissionDecisionReason": reason, + } + } + ) + ) + sys.exit(0) def main() -> None: @@ -108,18 +111,18 @@ def main() -> None: tool_input = input_data.get("tool_input", {}) # Check file-based tools - if tool_name in ("Edit", "Write", "Read"): + if tool_name in ("Edit", "Write", "Read", "MultiEdit"): file_path = tool_input.get("file_path", "") or tool_input.get("path", "") is_safe, reason = check_file_safety(file_path) if not is_safe: - block_access(reason, tool_name) + deny(f"{reason} (tool={tool_name})") # Check bash commands elif tool_name == "Bash": command = tool_input.get("command", "") is_safe, reason = check_command_safety(command) if not is_safe: - block_access(reason, tool_name) + deny(f"{reason} (tool={tool_name})") print("{}") sys.exit(0) diff --git a/src/mapify_cli/templates/hooks/workflow-context-injector.py b/src/mapify_cli/templates/hooks/workflow-context-injector.py index f0b8d34..72145e5 100755 --- a/src/mapify_cli/templates/hooks/workflow-context-injector.py +++ b/src/mapify_cli/templates/hooks/workflow-context-injector.py @@ -243,7 +243,12 @@ def main() -> None: reminder = format_reminder(state, branch) if reminder: - output = {"hookSpecificOutput": {"appended_text": reminder}} + output = { + "hookSpecificOutput": { + "hookEventName": "PreToolUse", + "additionalContext": reminder, + } + } print(json.dumps(output)) else: print("{}") diff --git a/src/mapify_cli/templates/hooks/workflow-gate.py b/src/mapify_cli/templates/hooks/workflow-gate.py index 01e3429..499bdfc 100755 --- a/src/mapify_cli/templates/hooks/workflow-gate.py +++ b/src/mapify_cli/templates/hooks/workflow-gate.py @@ -23,12 +23,12 @@ - pending_steps: Dict mapping subtask_id -> list of pending steps HOOK BEHAVIOR: - - Exit code 0 + approve: Allow tool execution - - Exit code 2 + block: Block tool execution with error message + - Exit code 0: Allow tool execution + - Exit code 0 + permissionDecision=deny: Block tool execution with reason (preferred) TESTING: echo '{"tool_name": "Edit", "tool_input": {"file_path": "test.py"}}' | python3 workflow-gate.py - # Expected: Exit code 2, JSON error on stderr (no workflow state) + # Expected: Exit code 0, allow (no workflow state) OR deny with reason (if workflow active + missing steps) PERFORMANCE: Target: <100ms per invocation @@ -138,7 +138,7 @@ def main(): # Allow non-editing tools if tool_name not in EDITING_TOOLS: - print(json.dumps({"decision": "approve"})) + print("{}") sys.exit(0) # Get current branch @@ -150,25 +150,32 @@ def main(): if state is None: # No workflow state = not in MAP workflow mode, allow # (This prevents breaking non-MAP work) - print(json.dumps({"decision": "approve"})) + print("{}") sys.exit(0) # Check workflow compliance is_compliant, error_message = check_workflow_compliance(state) if is_compliant: - print(json.dumps({"decision": "approve"})) + print("{}") sys.exit(0) else: print( - json.dumps({"decision": "block", "reason": error_message}), - file=sys.stderr, + json.dumps( + { + "hookSpecificOutput": { + "hookEventName": "PreToolUse", + "permissionDecision": "deny", + "permissionDecisionReason": error_message, + } + } + ) ) - sys.exit(2) + sys.exit(0) except Exception as e: # On hook failure, approve (fail-open to avoid blocking work) - print(json.dumps({"decision": "approve"})) + print("{}") if os.environ.get("DEBUG_WORKFLOW_GATE"): print(f"[workflow-gate] ERROR: {e}", file=sys.stderr) sys.exit(0) diff --git a/src/mapify_cli/templates/references/workflow-state-schema.md b/src/mapify_cli/templates/references/workflow-state-schema.md index 42e34fd..e490808 100644 --- a/src/mapify_cli/templates/references/workflow-state-schema.md +++ b/src/mapify_cli/templates/references/workflow-state-schema.md @@ -266,4 +266,4 @@ This external state file combined with hook enforcement implements that principl - `.claude/hooks/workflow-gate.py` - Enforcement hook - `.claude/commands/map-efficient.md` - Workflow definition - `.map//task_plan_.md` - Human-readable plan -- `.claude/settings.hooks.json` - Hook registration +- `.claude/settings.json` - Hook registration diff --git a/src/mapify_cli/templates/settings.hooks.json b/src/mapify_cli/templates/settings.hooks.json deleted file mode 100644 index eb4e7ef..0000000 --- a/src/mapify_cli/templates/settings.hooks.json +++ /dev/null @@ -1,74 +0,0 @@ -{ - "$schema": "https://json.schemastore.org/claude-code-settings.json", - "description": "Claude Code hooks configuration for MAP Framework (Tiered)", - "hooks": { - "PreToolUse": [ - { - "matcher": "Edit|Write|Read|Bash", - "description": "Safety Guardrails - block sensitive files and dangerous commands", - "hooks": [ - { - "type": "command", - "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/safety-guardrails.py", - "timeout": 5, - "description": "Blocks .env, credentials, private keys, and dangerous bash commands" - } - ] - }, - { - "matcher": "Edit|Write|Bash", - "description": "Workflow Context Injection (Tiered) - inject reminders for significant operations", - "hooks": [ - { - "type": "command", - "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/workflow-context-injector.py", - "timeout": 3, - "description": "Injects workflow context only for Edit/Write and significant Bash commands" - } - ] - } - ], - "PostToolUse": [ - { - "matcher": "Edit|Write|MultiEdit", - "description": "Post-Edit Reminder - lightweight test reminder after code changes", - "hooks": [ - { - "type": "command", - "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/post-edit-reminder.py", - "timeout": 3, - "description": "Reminds to run tests after Edit/Write (only when MAP workflow active)" - } - ] - } - ], - "PreCompact": [ - { - "matcher": "*", - "description": "Anti-Amnesia Hook - save state and prune logs before compaction", - "hooks": [ - { - "type": "command", - "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/ralph-context-pruner.py", - "timeout": 10, - "description": "Saves workflow restore point and prunes old logs" - } - ] - } - ], - "Stop": [ - { - "matcher": "*", - "description": "Lightweight quality gates - only critical issues", - "hooks": [ - { - "type": "command", - "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/end-of-turn.sh", - "timeout": 30, - "description": "Auto-fixes silently, only reports secrets/syntax errors" - } - ] - } - ] - } -} diff --git a/src/mapify_cli/templates/settings.json b/src/mapify_cli/templates/settings.json index 6c2c9a2..0779e49 100644 --- a/src/mapify_cli/templates/settings.json +++ b/src/mapify_cli/templates/settings.json @@ -33,5 +33,86 @@ "Bash(gofmt *)", "Bash(kubectl get *)" ] + }, + "hooks": { + "PreToolUse": [ + { + "matcher": "Edit|Write|Read|MultiEdit|Bash", + "description": "Safety Guardrails - block sensitive files and dangerous commands", + "hooks": [ + { + "type": "command", + "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/safety-guardrails.py", + "timeout": 5, + "description": "Blocks .env, credentials, private keys, and dangerous bash commands" + } + ] + }, + { + "matcher": "Edit|Write|MultiEdit", + "description": "Workflow Gate - enforce Actor+Monitor before edits (MAP)", + "hooks": [ + { + "type": "command", + "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/workflow-gate.py", + "timeout": 5, + "description": "Blocks Edit/Write/MultiEdit until workflow_state indicates Actor+Monitor completed" + } + ] + }, + { + "matcher": "Edit|Write|MultiEdit|Bash", + "description": "Workflow Context Injection (Tiered) - inject reminders for significant operations", + "hooks": [ + { + "type": "command", + "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/workflow-context-injector.py", + "timeout": 3, + "description": "Injects workflow context only for Edit/Write/MultiEdit and significant Bash commands" + } + ] + } + ], + "PostToolUse": [ + { + "matcher": "Edit|Write|MultiEdit", + "description": "Post-Edit Reminder - lightweight test reminder after code changes", + "hooks": [ + { + "type": "command", + "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/post-edit-reminder.py", + "timeout": 3, + "description": "Reminds to run tests after Edit/Write (only when MAP workflow active)" + } + ] + } + ], + "PreCompact": [ + { + "matcher": "*", + "description": "Anti-Amnesia Hook - save state and prune logs before compaction", + "hooks": [ + { + "type": "command", + "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/ralph-context-pruner.py", + "timeout": 10, + "description": "Saves workflow restore point and prunes old logs" + } + ] + } + ], + "Stop": [ + { + "description": "Lightweight quality gates - only critical issues", + "hooks": [ + { + "type": "command", + "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/end-of-turn.sh", + "timeout": 30, + "description": "Auto-fixes silently, only reports secrets/syntax errors" + } + ] + } + ] } } diff --git a/src/mapify_cli/templates/skills/README.md b/src/mapify_cli/templates/skills/README.md index 2d234d4..c15191d 100644 --- a/src/mapify_cli/templates/skills/README.md +++ b/src/mapify_cli/templates/skills/README.md @@ -152,12 +152,12 @@ Skills work seamlessly with the prompt improvement system: **Check:** 1. `skill-rules.json` has correct triggers -2. `improve-prompt.py` hook is enabled in `settings.hooks.json` +2. `improve-prompt.py` hook is enabled in `.claude/settings.json` 3. Hook processes UserPromptSubmit events **Fix:** - Update trigger patterns in `skill-rules.json` -- Verify hook configuration in `.claude/settings.hooks.json` +- Verify hook configuration in `.claude/settings.json` ### Skill content too long From 877de178006b2de0f005fd412d798991da4b24ce Mon Sep 17 00:00:00 2001 From: "Mikhail [azalio] Petrov" Date: Mon, 16 Feb 2026 00:42:06 +0300 Subject: [PATCH 3/7] Make Monitor valid=false a hard stop and require integrated AAG contracts --- .claude/agents/actor.md | 5 ++++- .claude/agents/monitor.md | 4 ++++ .claude/agents/task-decomposer.md | 5 +++++ CLAUDE.md | 6 ++++++ src/mapify_cli/templates/agents/actor.md | 5 ++++- src/mapify_cli/templates/agents/monitor.md | 4 ++++ src/mapify_cli/templates/agents/task-decomposer.md | 5 +++++ 7 files changed, 32 insertions(+), 2 deletions(-) diff --git a/.claude/agents/actor.md b/.claude/agents/actor.md index 1ed955e..bf30de5 100644 --- a/.claude/agents/actor.md +++ b/.claude/agents/actor.md @@ -739,7 +739,10 @@ output: {{feedback}} -**Action Required**: Address ALL issues above. Focus on: +**Action Required**: Address ALL issues above. Do NOT dismiss feedback as "out of scope" or "separate task". +If you believe an item should be deferred, STOP and ask the user for explicit approval to defer. + +Focus on: 1. Specific line items mentioned 2. Quality checklist items that failed 3. Security or constraint violations diff --git a/.claude/agents/monitor.md b/.claude/agents/monitor.md index 59043ad..01b45f1 100644 --- a/.claude/agents/monitor.md +++ b/.claude/agents/monitor.md @@ -2523,6 +2523,10 @@ def check_rate_limit(user_id, action, limit=100, window=3600): - Requirements unmet → valid=false - Only MEDIUM/LOW issues → valid=true (with feedback) +**Hard-stop semantics**: +- If you set `valid=false`, the workflow MUST fix the issues before proceeding. +- Do not accept "we'll do it later" reasoning as a resolution unless the user explicitly approves deferral. + ### Evidence File (Artifact-Gated Validation) diff --git a/.claude/agents/task-decomposer.md b/.claude/agents/task-decomposer.md index 58e8afd..a6e43fd 100644 --- a/.claude/agents/task-decomposer.md +++ b/.claude/agents/task-decomposer.md @@ -273,10 +273,14 @@ Return **ONLY** valid JSON in this exact structure: - This is the primary handoff artifact to the Actor agent - Actor "compiles" this contract into code; Monitor verifies against it - Format: `" -> (params) -> "` + - **Integration is part of the contract**: + - Prefer describing the *entrypoint + call chain* that makes the behavior real (especially for validation, policy checks, auth, migrations). + - Avoid leaf-only contracts that are easy to satisfy in isolation but not wired into production code paths. - Examples: - `"AuthService -> validate(token) -> returns 401|200 with user_id"` - `"ProjectModel -> add_field(archived_at: DateTime?) -> migration passes"` - `"RateLimiter -> decorate(endpoint, 100/min) -> returns 429 when exceeded"` + - `"ConfigLoader -> load_policy(path) -> calls validate_risk_policy(); raises ConfigValidationError on contradictions"` **subtasks[].implementation_hint**: Optional guidance for non-obvious implementations - RECOMMENDED when: complexity_score >= 5 OR security_critical OR dependencies.length >= 2 - OMIT when: standard pattern with obvious implementation @@ -504,6 +508,7 @@ When invoked with `mode: "re_decomposition"` from the orchestrator, you receive - [ ] Each subtask is atomic (independently implementable + testable) - [ ] Each subtask has an aag_contract in `Actor -> Action(params) -> Goal` format - [ ] AAG contracts are specific (not "does stuff" — name classes, methods, return types) +- [ ] AAG contracts include wiring/integration when relevant (entrypoint + validator/policy checks, not leaf-only helpers) - [ ] All dependencies are explicit and accurate - [ ] Subtasks ordered by dependency (foundations first) - [ ] 5-8 subtasks (not too granular or too coarse) diff --git a/CLAUDE.md b/CLAUDE.md index 29b8c1a..d9f322c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -39,6 +39,12 @@ Verification: - Don't add or expose secrets. Avoid reading/writing `.env*` and credential/key files. - When changing pattern storage behavior, ensure Curator-mediated writes through mem0 MCP are preserved (see `.claude/agents/curator.md` and `docs/ARCHITECTURE.md`). +## MAP Workflow Rules + +- If **Monitor** returns `valid=false`, treat it as a **hard stop**: fix the issues before proceeding. + - Do NOT dismiss Monitor feedback as "out of scope" / "separate task". + - If you're unsure whether fixing it is in scope: ask the user explicitly and wait for a decision. + ## Bash Command Guidelines **CRITICAL:** Avoid output buffering issues that cause commands to hang. diff --git a/src/mapify_cli/templates/agents/actor.md b/src/mapify_cli/templates/agents/actor.md index 1ed955e..bf30de5 100644 --- a/src/mapify_cli/templates/agents/actor.md +++ b/src/mapify_cli/templates/agents/actor.md @@ -739,7 +739,10 @@ output: {{feedback}} -**Action Required**: Address ALL issues above. Focus on: +**Action Required**: Address ALL issues above. Do NOT dismiss feedback as "out of scope" or "separate task". +If you believe an item should be deferred, STOP and ask the user for explicit approval to defer. + +Focus on: 1. Specific line items mentioned 2. Quality checklist items that failed 3. Security or constraint violations diff --git a/src/mapify_cli/templates/agents/monitor.md b/src/mapify_cli/templates/agents/monitor.md index 59043ad..01b45f1 100644 --- a/src/mapify_cli/templates/agents/monitor.md +++ b/src/mapify_cli/templates/agents/monitor.md @@ -2523,6 +2523,10 @@ def check_rate_limit(user_id, action, limit=100, window=3600): - Requirements unmet → valid=false - Only MEDIUM/LOW issues → valid=true (with feedback) +**Hard-stop semantics**: +- If you set `valid=false`, the workflow MUST fix the issues before proceeding. +- Do not accept "we'll do it later" reasoning as a resolution unless the user explicitly approves deferral. + ### Evidence File (Artifact-Gated Validation) diff --git a/src/mapify_cli/templates/agents/task-decomposer.md b/src/mapify_cli/templates/agents/task-decomposer.md index 58e8afd..a6e43fd 100644 --- a/src/mapify_cli/templates/agents/task-decomposer.md +++ b/src/mapify_cli/templates/agents/task-decomposer.md @@ -273,10 +273,14 @@ Return **ONLY** valid JSON in this exact structure: - This is the primary handoff artifact to the Actor agent - Actor "compiles" this contract into code; Monitor verifies against it - Format: `" -> (params) -> "` + - **Integration is part of the contract**: + - Prefer describing the *entrypoint + call chain* that makes the behavior real (especially for validation, policy checks, auth, migrations). + - Avoid leaf-only contracts that are easy to satisfy in isolation but not wired into production code paths. - Examples: - `"AuthService -> validate(token) -> returns 401|200 with user_id"` - `"ProjectModel -> add_field(archived_at: DateTime?) -> migration passes"` - `"RateLimiter -> decorate(endpoint, 100/min) -> returns 429 when exceeded"` + - `"ConfigLoader -> load_policy(path) -> calls validate_risk_policy(); raises ConfigValidationError on contradictions"` **subtasks[].implementation_hint**: Optional guidance for non-obvious implementations - RECOMMENDED when: complexity_score >= 5 OR security_critical OR dependencies.length >= 2 - OMIT when: standard pattern with obvious implementation @@ -504,6 +508,7 @@ When invoked with `mode: "re_decomposition"` from the orchestrator, you receive - [ ] Each subtask is atomic (independently implementable + testable) - [ ] Each subtask has an aag_contract in `Actor -> Action(params) -> Goal` format - [ ] AAG contracts are specific (not "does stuff" — name classes, methods, return types) +- [ ] AAG contracts include wiring/integration when relevant (entrypoint + validator/policy checks, not leaf-only helpers) - [ ] All dependencies are explicit and accurate - [ ] Subtasks ordered by dependency (foundations first) - [ ] 5-8 subtasks (not too granular or too coarse) From e953949e06359094df4adba7d4c66877db9eb9b3 Mon Sep 17 00:00:00 2001 From: "Mikhail [azalio] Petrov" Date: Mon, 16 Feb 2026 00:56:15 +0300 Subject: [PATCH 4/7] Fix tests for Claude Code hook schema --- tests/hooks/test_block_dangerous.py | 103 +++++++++++++-------- tests/hooks/test_block_secrets.py | 113 ++++++++++++++++-------- tests/test_workflow_context_injector.py | 20 ++--- tests/test_workflow_gate.py | 97 ++++++++++---------- 4 files changed, 200 insertions(+), 133 deletions(-) diff --git a/tests/hooks/test_block_dangerous.py b/tests/hooks/test_block_dangerous.py index 3215e52..3ba4cf6 100644 --- a/tests/hooks/test_block_dangerous.py +++ b/tests/hooks/test_block_dangerous.py @@ -27,6 +27,22 @@ def run_hook(command: str) -> tuple[int, str, str]: return result.returncode, result.stdout, result.stderr +def _parse_stdout(stdout: str) -> dict: + stdout = (stdout or "").strip() + if not stdout: + return {} + return json.loads(stdout) + + +def _assert_denied(payload: dict, expected_substring: str | None = None) -> None: + assert payload.get("hookSpecificOutput", {}).get("hookEventName") == "PreToolUse" + assert payload["hookSpecificOutput"].get("permissionDecision") == "deny" + reason = payload["hookSpecificOutput"].get("permissionDecisionReason", "") + assert reason + if expected_substring: + assert expected_substring.lower() in reason.lower() + + # ============================================================================= # Validation Criteria Tests # ============================================================================= @@ -36,32 +52,37 @@ class TestValidationCriteria: """Tests for the validation criteria from task decomposition.""" def test_criterion_1_rm_rf_blocked(self): - """VC1: Bash command 'rm -rf /' is blocked with exit code 2.""" - exit_code, _, stderr = run_hook("rm -rf /") - assert exit_code == 2, f"Expected exit 2, got {exit_code}" - assert "Blocked" in stderr, f"Expected 'Blocked' in stderr: {stderr}" + """VC1: Bash command 'rm -rf /' is blocked (permissionDecision=deny).""" + exit_code, stdout, stderr = run_hook("rm -rf /") + assert exit_code == 0, f"Expected exit 0, got {exit_code}. stderr: {stderr}" + payload = _parse_stdout(stdout) + _assert_denied(payload, expected_substring="rm -rf") def test_criterion_2_force_push_main_blocked(self): """VC2: git push --force origin main is blocked.""" - exit_code, _, stderr = run_hook("git push --force origin main") - assert exit_code == 2, f"Expected exit 2, got {exit_code}" - assert "Blocked" in stderr + exit_code, stdout, stderr = run_hook("git push --force origin main") + assert exit_code == 0, f"Expected exit 0, got {exit_code}. stderr: {stderr}" + payload = _parse_stdout(stdout) + _assert_denied(payload, expected_substring="force push") def test_criterion_3_reset_hard_blocked(self): """VC3: git reset --hard is blocked.""" - exit_code, _, stderr = run_hook("git reset --hard") - assert exit_code == 2, f"Expected exit 2, got {exit_code}" - assert "Blocked" in stderr + exit_code, stdout, stderr = run_hook("git reset --hard") + assert exit_code == 0, f"Expected exit 0, got {exit_code}. stderr: {stderr}" + payload = _parse_stdout(stdout) + _assert_denied(payload, expected_substring="reset --hard") def test_criterion_4_force_push_feature_allowed(self): """VC4: git push --force origin feature-branch is allowed.""" - exit_code, _, _ = run_hook("git push --force origin feature-branch") - assert exit_code == 0, "Force push to feature branch should be allowed" + exit_code, stdout, stderr = run_hook("git push --force origin feature-branch") + assert exit_code == 0, f"Expected exit 0. stderr: {stderr}" + assert _parse_stdout(stdout) == {}, "Force push to feature branch should be allowed" def test_criterion_5_legitimate_allowed(self): """VC5: Legitimate commands like 'pytest' are allowed.""" - exit_code, _, _ = run_hook("pytest") - assert exit_code == 0, "pytest should be allowed" + exit_code, stdout, stderr = run_hook("pytest") + assert exit_code == 0, f"Expected exit 0. stderr: {stderr}" + assert _parse_stdout(stdout) == {}, "pytest should be allowed" # ============================================================================= @@ -86,9 +107,10 @@ class TestRmRfBlocking: ], ) def test_rm_rf_variants_blocked(self, command): - exit_code, _, stderr = run_hook(command) - assert exit_code == 2, f"'{command}' should be blocked" - assert "rm -rf" in stderr.lower() or "blocked" in stderr.lower() + exit_code, stdout, stderr = run_hook(command) + assert exit_code == 0, f"Expected exit 0. stderr: {stderr}" + payload = _parse_stdout(stdout) + _assert_denied(payload, expected_substring="rm -rf") def test_rm_without_rf_allowed(self): """rm without -rf should be allowed.""" @@ -123,9 +145,10 @@ class TestGitForcePushBlocking: ], ) def test_force_push_protected_blocked(self, command): - exit_code, _, stderr = run_hook(command) - assert exit_code == 2, f"'{command}' should be blocked" - assert "Blocked" in stderr + exit_code, stdout, stderr = run_hook(command) + assert exit_code == 0, f"Expected exit 0. stderr: {stderr}" + payload = _parse_stdout(stdout) + _assert_denied(payload, expected_substring="force push") @pytest.mark.parametrize( "command", @@ -166,9 +189,10 @@ class TestGitResetHardBlocking: ], ) def test_reset_hard_variants_blocked(self, command): - exit_code, _, stderr = run_hook(command) - assert exit_code == 2, f"'{command}' should be blocked" - assert "Blocked" in stderr + exit_code, stdout, stderr = run_hook(command) + assert exit_code == 0, f"Expected exit 0. stderr: {stderr}" + payload = _parse_stdout(stdout) + _assert_denied(payload, expected_substring="reset --hard") @pytest.mark.parametrize( "command", @@ -217,8 +241,9 @@ class TestLegitimateCommands: ], ) def test_safe_commands_allowed(self, command): - exit_code, _, stderr = run_hook(command) + exit_code, stdout, stderr = run_hook(command) assert exit_code == 0, f"'{command}' should be allowed. stderr: {stderr}" + assert _parse_stdout(stdout) == {}, f"'{command}' should be allowed" # ============================================================================= @@ -282,18 +307,24 @@ class TestAdditionalDangerousCommands: ], ) def test_recursive_permissions_on_root_blocked(self, command): - exit_code, _, _ = run_hook(command) - assert exit_code == 2, f"'{command}' should be blocked" + exit_code, stdout, stderr = run_hook(command) + assert exit_code == 0, f"Expected exit 0. stderr: {stderr}" + payload = _parse_stdout(stdout) + _assert_denied(payload, expected_substring="chmod/chown") def test_dd_to_device_blocked(self): """dd writing to /dev/ should be blocked.""" - exit_code, _, _ = run_hook("dd if=/dev/zero of=/dev/sda") - assert exit_code == 2, "dd to device should be blocked" + exit_code, stdout, stderr = run_hook("dd if=/dev/zero of=/dev/sda") + assert exit_code == 0, f"Expected exit 0. stderr: {stderr}" + payload = _parse_stdout(stdout) + _assert_denied(payload, expected_substring="dd") def test_mkfs_blocked(self): """mkfs should be blocked.""" - exit_code, _, _ = run_hook("mkfs.ext4 /dev/sda1") - assert exit_code == 2, "mkfs should be blocked" + exit_code, stdout, stderr = run_hook("mkfs.ext4 /dev/sda1") + assert exit_code == 0, f"Expected exit 0. stderr: {stderr}" + payload = _parse_stdout(stdout) + _assert_denied(payload, expected_substring="mkfs") # ============================================================================= @@ -305,10 +336,8 @@ class TestOutputFormat: """Test that error output follows expected JSON structure.""" def test_error_json_structure(self): - """Blocked command should output valid JSON to stderr.""" - _, _, stderr = run_hook("rm -rf /") - error_data = json.loads(stderr) - assert "hookSpecificOutput" in error_data - assert error_data["hookSpecificOutput"]["hookEventName"] == "PreToolUse" - assert "error" in error_data["hookSpecificOutput"] - assert "Blocked" in error_data["hookSpecificOutput"]["error"] + """Blocked command should output valid JSON to stdout.""" + exit_code, stdout, stderr = run_hook("rm -rf /") + assert exit_code == 0, f"Expected exit 0. stderr: {stderr}" + payload = _parse_stdout(stdout) + _assert_denied(payload, expected_substring="Blocked") diff --git a/tests/hooks/test_block_secrets.py b/tests/hooks/test_block_secrets.py index 6d1e339..36ba85c 100644 --- a/tests/hooks/test_block_secrets.py +++ b/tests/hooks/test_block_secrets.py @@ -29,6 +29,22 @@ def run_hook(tool_name: str, file_path: str) -> tuple[int, str, str]: return result.returncode, result.stdout, result.stderr +def _parse_stdout(stdout: str) -> dict: + stdout = (stdout or "").strip() + if not stdout: + return {} + return json.loads(stdout) + + +def _assert_denied(payload: dict, expected_file_fragment: str | None = None) -> None: + assert payload.get("hookSpecificOutput", {}).get("hookEventName") == "PreToolUse" + assert payload["hookSpecificOutput"].get("permissionDecision") == "deny" + reason = payload["hookSpecificOutput"].get("permissionDecisionReason", "") + assert reason + if expected_file_fragment: + assert expected_file_fragment.lower() in reason.lower() + + # ============================================================================= # Validation Criteria Tests # ============================================================================= @@ -38,16 +54,18 @@ class TestValidationCriteria: """Tests for the 5 validation criteria from the task.""" def test_criterion_1_env_blocked(self): - """VC1: Read('.env') returns exit code 2 with 'Blocked' message.""" - exit_code, _, stderr = run_hook("Read", ".env") - assert exit_code == 2, f"Expected exit 2, got {exit_code}" - assert "Blocked" in stderr, f"Expected 'Blocked' in stderr: {stderr}" + """VC1: Read('.env') is blocked (permissionDecision=deny).""" + exit_code, stdout, stderr = run_hook("Read", ".env") + assert exit_code == 0, f"Expected exit 0. stderr: {stderr}" + payload = _parse_stdout(stdout) + _assert_denied(payload, expected_file_fragment=".env") def test_criterion_2_credentials_blocked(self): """VC2: Write('credentials.json') is blocked.""" - exit_code, _, stderr = run_hook("Write", "credentials.json") - assert exit_code == 2, f"Expected exit 2, got {exit_code}" - assert "Blocked" in stderr + exit_code, stdout, stderr = run_hook("Write", "credentials.json") + assert exit_code == 0, f"Expected exit 0. stderr: {stderr}" + payload = _parse_stdout(stdout) + _assert_denied(payload, expected_file_fragment="credentials.json") def test_criterion_3_legitimate_allowed(self): """VC3: Legitimate files like 'app.py' are allowed.""" @@ -86,8 +104,10 @@ class TestEnvFiles: ], ) def test_env_variants_blocked(self, filename): - exit_code, _, _ = run_hook("Read", filename) - assert exit_code == 2, f"{filename} should be blocked" + exit_code, stdout, stderr = run_hook("Read", filename) + assert exit_code == 0, f"Expected exit 0. stderr: {stderr}" + payload = _parse_stdout(stdout) + _assert_denied(payload, expected_file_fragment=filename) class TestCredentialFiles: @@ -104,8 +124,10 @@ class TestCredentialFiles: ], ) def test_credentials_blocked(self, filename): - exit_code, _, _ = run_hook("Read", filename) - assert exit_code == 2, f"{filename} should be blocked" + exit_code, stdout, stderr = run_hook("Read", filename) + assert exit_code == 0, f"Expected exit 0. stderr: {stderr}" + payload = _parse_stdout(stdout) + _assert_denied(payload, expected_file_fragment=filename) class TestSecretFiles: @@ -122,8 +144,10 @@ class TestSecretFiles: ], ) def test_secrets_blocked(self, filename): - exit_code, _, _ = run_hook("Read", filename) - assert exit_code == 2, f"{filename} should be blocked" + exit_code, stdout, stderr = run_hook("Read", filename) + assert exit_code == 0, f"Expected exit 0. stderr: {stderr}" + payload = _parse_stdout(stdout) + _assert_denied(payload, expected_file_fragment=filename) class TestPrivateKeys: @@ -156,8 +180,10 @@ class TestPrivateKeys: ], ) def test_private_keys_blocked(self, filename): - exit_code, _, stderr = run_hook("Read", filename) - assert exit_code == 2, f"{filename} should be blocked. stderr: {stderr}" + exit_code, stdout, stderr = run_hook("Read", filename) + assert exit_code == 0, f"Expected exit 0. stderr: {stderr}" + payload = _parse_stdout(stdout) + _assert_denied(payload, expected_file_fragment=filename) # ============================================================================= @@ -246,15 +272,17 @@ class TestPathTraversal: ], ) def test_sensitive_path_component_blocked(self, path): - exit_code, _, stderr = run_hook("Read", path) - assert ( - exit_code == 2 - ), f"Path with sensitive component '{path}' should be blocked. stderr: {stderr}" + exit_code, stdout, stderr = run_hook("Read", path) + assert exit_code == 0, f"Expected exit 0. stderr: {stderr}" + payload = _parse_stdout(stdout) + _assert_denied(payload, expected_file_fragment=path) def test_relative_traversal_with_env(self): """Test ../../../.env style paths.""" - exit_code, _, _ = run_hook("Read", "../../../.env") - assert exit_code == 2, "Relative path with .env should be blocked" + exit_code, stdout, stderr = run_hook("Read", "../../../.env") + assert exit_code == 0, f"Expected exit 0. stderr: {stderr}" + payload = _parse_stdout(stdout) + _assert_denied(payload, expected_file_fragment=".env") # ============================================================================= @@ -266,16 +294,22 @@ class TestToolInterception: """Test that only Read/Edit/Write tools are intercepted.""" def test_read_intercepted(self): - exit_code, _, _ = run_hook("Read", ".env") - assert exit_code == 2 + exit_code, stdout, stderr = run_hook("Read", ".env") + assert exit_code == 0, f"Expected exit 0. stderr: {stderr}" + payload = _parse_stdout(stdout) + _assert_denied(payload, expected_file_fragment=".env") def test_write_intercepted(self): - exit_code, _, _ = run_hook("Write", ".env") - assert exit_code == 2 + exit_code, stdout, stderr = run_hook("Write", ".env") + assert exit_code == 0, f"Expected exit 0. stderr: {stderr}" + payload = _parse_stdout(stdout) + _assert_denied(payload, expected_file_fragment=".env") def test_edit_intercepted(self): - exit_code, _, _ = run_hook("Edit", ".env") - assert exit_code == 2 + exit_code, stdout, stderr = run_hook("Edit", ".env") + assert exit_code == 0, f"Expected exit 0. stderr: {stderr}" + payload = _parse_stdout(stdout) + _assert_denied(payload, expected_file_fragment=".env") def test_bash_not_intercepted(self): """Bash tool should not be intercepted (different hook).""" @@ -338,19 +372,18 @@ class TestOutputFormat: """Test that error output follows expected JSON structure.""" def test_error_json_structure(self): - """Blocked file should output valid JSON to stderr.""" - _, _, stderr = run_hook("Read", ".env") - error_data = json.loads(stderr) - assert "hookSpecificOutput" in error_data - assert error_data["hookSpecificOutput"]["hookEventName"] == "PreToolUse" - assert "error" in error_data["hookSpecificOutput"] - assert "Blocked" in error_data["hookSpecificOutput"]["error"] + """Blocked file should output valid JSON to stdout.""" + exit_code, stdout, stderr = run_hook("Read", ".env") + assert exit_code == 0, f"Expected exit 0. stderr: {stderr}" + payload = _parse_stdout(stdout) + _assert_denied(payload, expected_file_fragment=".env") def test_error_contains_file_path(self): """Error message should mention the blocked file.""" - _, _, stderr = run_hook("Read", "credentials.json") - error_data = json.loads(stderr) - assert "credentials.json" in error_data["hookSpecificOutput"]["error"] + exit_code, stdout, stderr = run_hook("Read", "credentials.json") + assert exit_code == 0, f"Expected exit 0. stderr: {stderr}" + payload = _parse_stdout(stdout) + _assert_denied(payload, expected_file_fragment="credentials.json") # ============================================================================= @@ -374,5 +407,7 @@ class TestCaseSensitivity: ], ) def test_case_insensitive_blocking(self, filename): - exit_code, _, _ = run_hook("Read", filename) - assert exit_code == 2, f"Case variant {filename} should be blocked" + exit_code, stdout, stderr = run_hook("Read", filename) + assert exit_code == 0, f"Expected exit 0. stderr: {stderr}" + payload = _parse_stdout(stdout) + _assert_denied(payload, expected_file_fragment=filename) diff --git a/tests/test_workflow_context_injector.py b/tests/test_workflow_context_injector.py index e6f46dc..e6d6798 100644 --- a/tests/test_workflow_context_injector.py +++ b/tests/test_workflow_context_injector.py @@ -56,12 +56,12 @@ def test_injects_for_edit_when_step_state_exists(tmp_path: Path) -> None: assert code == 0 assert err == "" payload = json.loads(out) - appended = payload["hookSpecificOutput"]["appended_text"] - assert "[MAP]" in appended - assert "1.55" in appended - assert "REVIEW_PLAN" in appended - assert "ST-001" in appended - assert "REQUIRED" in appended + additional = payload["hookSpecificOutput"]["additionalContext"] + assert "[MAP]" in additional + assert "1.55" in additional + assert "REVIEW_PLAN" in additional + assert "ST-001" in additional + assert "REQUIRED" in additional def test_skips_for_readonly_bash(tmp_path: Path) -> None: @@ -111,7 +111,7 @@ def test_injects_for_pytest_bash_when_step_state_exists(tmp_path: Path) -> None: assert code == 0 assert err == "" payload = json.loads(out) - appended = payload["hookSpecificOutput"]["appended_text"] - assert "2.8" in appended - assert "TESTS_GATE" in appended - assert "ST-002" in appended + additional = payload["hookSpecificOutput"]["additionalContext"] + assert "2.8" in additional + assert "TESTS_GATE" in additional + assert "ST-002" in additional diff --git a/tests/test_workflow_gate.py b/tests/test_workflow_gate.py index b997aaf..5a2027f 100644 --- a/tests/test_workflow_gate.py +++ b/tests/test_workflow_gate.py @@ -23,6 +23,23 @@ class TestWorkflowGate: HOOK_PATH = REPO_ROOT / ".claude/hooks/workflow-gate.py" + def _parse_stdout(self, stdout: str) -> dict: + stdout = (stdout or "").strip() + if not stdout: + return {} + return json.loads(stdout) + + def _assert_allowed(self, stdout: str) -> None: + assert self._parse_stdout(stdout) == {} + + def _assert_denied(self, stdout: str) -> str: + payload = self._parse_stdout(stdout) + assert payload.get("hookSpecificOutput", {}).get("hookEventName") == "PreToolUse" + assert payload["hookSpecificOutput"].get("permissionDecision") == "deny" + reason = payload["hookSpecificOutput"].get("permissionDecisionReason", "") + assert reason + return reason + def run_hook( self, input_data: dict, tmp_path: Path, branch: str = "master" ) -> Tuple[int, str, str]: @@ -129,8 +146,7 @@ def test_allows_non_editing_tools(self, tmp_path: Path) -> None: ) assert code == 0, f"{tool_name} should be allowed" - response = json.loads(stdout.strip()) - assert response["decision"] == "approve" + self._assert_allowed(stdout) def test_allows_edit_when_no_workflow_state(self, tmp_path: Path) -> None: """Edit should be allowed when workflow_state.json doesn't exist (fail-open).""" @@ -143,8 +159,7 @@ def test_allows_edit_when_no_workflow_state(self, tmp_path: Path) -> None: ) assert code == 0 - response = json.loads(stdout.strip()) - assert response["decision"] == "approve" + self._assert_allowed(stdout) def test_blocks_edit_without_actor(self, tmp_path: Path) -> None: """Edit should be blocked if 'actor' step not completed.""" @@ -170,7 +185,7 @@ def test_blocks_edit_without_actor(self, tmp_path: Path) -> None: ) ) - code, _, stderr = self.run_hook( + code, stdout, _ = self.run_hook( { "tool_name": "Edit", "tool_input": {"file_path": "/test.py"}, @@ -178,11 +193,10 @@ def test_blocks_edit_without_actor(self, tmp_path: Path) -> None: tmp_path, ) - assert code == 2 # Blocked - response = json.loads(stderr.strip()) - assert response["decision"] == "block" - assert "actor" in response["reason"].lower() - assert "monitor" in response["reason"].lower() + assert code == 0 # Hook signals blocking via JSON decision + reason = self._assert_denied(stdout) + assert "actor" in reason.lower() + assert "monitor" in reason.lower() def test_blocks_edit_without_monitor(self, tmp_path: Path) -> None: """Edit should be blocked if 'monitor' step not completed (even if actor done).""" @@ -206,7 +220,7 @@ def test_blocks_edit_without_monitor(self, tmp_path: Path) -> None: ) ) - code, _, stderr = self.run_hook( + code, stdout, _ = self.run_hook( { "tool_name": "Edit", "tool_input": {"file_path": "/test.py"}, @@ -214,10 +228,9 @@ def test_blocks_edit_without_monitor(self, tmp_path: Path) -> None: tmp_path, ) - assert code == 2 # Blocked - response = json.loads(stderr.strip()) - assert response["decision"] == "block" - assert "monitor" in response["reason"].lower() + assert code == 0 + reason = self._assert_denied(stdout) + assert "monitor" in reason.lower() def test_allows_edit_after_actor_and_monitor(self, tmp_path: Path) -> None: """Edit should be allowed when both actor and monitor are completed.""" @@ -246,8 +259,7 @@ def test_allows_edit_after_actor_and_monitor(self, tmp_path: Path) -> None: ) assert code == 0 - response = json.loads(stdout.strip()) - assert response["decision"] == "approve" + self._assert_allowed(stdout) def test_blocks_write_without_required_steps(self, tmp_path: Path) -> None: """Write should be blocked like Edit.""" @@ -265,7 +277,7 @@ def test_blocks_write_without_required_steps(self, tmp_path: Path) -> None: ) ) - code, _, stderr = self.run_hook( + code, stdout, _ = self.run_hook( { "tool_name": "Write", "tool_input": {"file_path": "/test.py"}, @@ -273,9 +285,8 @@ def test_blocks_write_without_required_steps(self, tmp_path: Path) -> None: tmp_path, ) - assert code == 2 # Blocked - response = json.loads(stderr.strip()) - assert response["decision"] == "block" + assert code == 0 + self._assert_denied(stdout) def test_blocks_multiedit_without_required_steps(self, tmp_path: Path) -> None: """MultiEdit should be blocked like Edit and Write.""" @@ -293,7 +304,7 @@ def test_blocks_multiedit_without_required_steps(self, tmp_path: Path) -> None: ) ) - code, _, stderr = self.run_hook( + code, stdout, _ = self.run_hook( { "tool_name": "MultiEdit", "tool_input": {"file_path": "/test.py"}, @@ -301,9 +312,8 @@ def test_blocks_multiedit_without_required_steps(self, tmp_path: Path) -> None: tmp_path, ) - assert code == 2 # Blocked - response = json.loads(stderr.strip()) - assert response["decision"] == "block" + assert code == 0 + self._assert_denied(stdout) def test_blocks_when_no_current_subtask(self, tmp_path: Path) -> None: """Should block if current_subtask is null or missing.""" @@ -321,7 +331,7 @@ def test_blocks_when_no_current_subtask(self, tmp_path: Path) -> None: ) ) - code, _, stderr = self.run_hook( + code, stdout, _ = self.run_hook( { "tool_name": "Edit", "tool_input": {"file_path": "/test.py"}, @@ -329,10 +339,9 @@ def test_blocks_when_no_current_subtask(self, tmp_path: Path) -> None: tmp_path, ) - assert code == 2 # Blocked - response = json.loads(stderr.strip()) - assert response["decision"] == "block" - assert "current_subtask" in response["reason"].lower() + assert code == 0 + reason = self._assert_denied(stdout) + assert "current_subtask" in reason.lower() def test_handles_invalid_json_gracefully(self, tmp_path: Path) -> None: """Should fail-open (allow) on invalid workflow_state.json.""" @@ -350,8 +359,7 @@ def test_handles_invalid_json_gracefully(self, tmp_path: Path) -> None: ) assert code == 0 # Fail-open - response = json.loads(stdout.strip()) - assert response["decision"] == "approve" + self._assert_allowed(stdout) def test_handles_malformed_hook_input_gracefully(self, tmp_path: Path) -> None: """Should fail-open (allow) on malformed hook input JSON.""" @@ -364,8 +372,7 @@ def test_handles_malformed_hook_input_gracefully(self, tmp_path: Path) -> None: ) assert result.returncode == 0 # Fail-open - response = json.loads(result.stdout.strip()) - assert response["decision"] == "approve" + self._assert_allowed(result.stdout) def test_respects_branch_scoping(self, tmp_path: Path) -> None: """Workflow state should be branch-scoped (.map//).""" @@ -397,8 +404,7 @@ def test_respects_branch_scoping(self, tmp_path: Path) -> None: ) assert code == 0 - response = json.loads(stdout.strip()) - assert response["decision"] == "approve" + self._assert_allowed(stdout) def test_different_subtask_steps_independent(self, tmp_path: Path) -> None: """completed_steps should be tracked per subtask.""" @@ -420,7 +426,7 @@ def test_different_subtask_steps_independent(self, tmp_path: Path) -> None: ) # Should block because ST-002 (current) is incomplete - code, _, stderr = self.run_hook( + code, stdout, _ = self.run_hook( { "tool_name": "Edit", "tool_input": {"file_path": "/test.py"}, @@ -428,10 +434,9 @@ def test_different_subtask_steps_independent(self, tmp_path: Path) -> None: tmp_path, ) - assert code == 2 # Blocked - response = json.loads(stderr.strip()) - assert response["decision"] == "block" - assert "ST-002" in response["reason"] or "actor" in response["reason"].lower() + assert code == 0 + reason = self._assert_denied(stdout) + assert "ST-002" in reason or "actor" in reason.lower() def test_error_message_includes_workflow_steps(self, tmp_path: Path) -> None: """Block message should explain required workflow steps.""" @@ -449,7 +454,7 @@ def test_error_message_includes_workflow_steps(self, tmp_path: Path) -> None: ) ) - code, _, stderr = self.run_hook( + code, stdout, _ = self.run_hook( { "tool_name": "Edit", "tool_input": {"file_path": "/test.py"}, @@ -457,9 +462,8 @@ def test_error_message_includes_workflow_steps(self, tmp_path: Path) -> None: tmp_path, ) - assert code == 2 - response = json.loads(stderr.strip()) - error_msg = response["reason"] + assert code == 0 + error_msg = self._assert_denied(stdout) # Should mention the workflow steps assert "actor" in error_msg.lower() @@ -506,5 +510,4 @@ def test_branch_name_sanitization( ) assert code == 0 - response = json.loads(stdout.strip()) - assert response["decision"] == "approve" + self._assert_allowed(stdout) From 5fac796a3ee2c37a557730d98c9f0e5594b9860f Mon Sep 17 00:00:00 2001 From: "Mikhail [azalio] Petrov" Date: Mon, 16 Feb 2026 01:15:46 +0300 Subject: [PATCH 5/7] Address PR feedback: hook docs, lint, robustness --- .claude/agents/actor.md | 4 +- .claude/agents/monitor.md | 2 +- .claude/hooks/block-dangerous.sh | 165 +++++++++--------- .claude/hooks/block-secrets.py | 2 +- .claude/hooks/workflow-gate.py | 3 +- IMPLEMENTATION_SUMMARY.md | 3 +- src/mapify_cli/templates/agents/actor.md | 4 +- src/mapify_cli/templates/agents/monitor.md | 2 +- .../templates/hooks/block-dangerous.sh | 165 +++++++++--------- .../templates/hooks/block-secrets.py | 2 +- .../templates/hooks/workflow-gate.py | 3 +- tests/hooks/test_block_secrets.py | 5 +- 12 files changed, 187 insertions(+), 173 deletions(-) diff --git a/.claude/agents/actor.md b/.claude/agents/actor.md index bf30de5..97fa53e 100644 --- a/.claude/agents/actor.md +++ b/.claude/agents/actor.md @@ -380,7 +380,7 @@ Document key decisions using this structure: - Prefer naming tests with `vc` (e.g., `test_vc1_*`, `TestVC1*`) so Monitor can deterministically confirm coverage. **Format**: -``` +```text 1. test_[function]_[scenario]_[expected] Input: [specific input] Expected: [specific output/behavior] @@ -401,7 +401,7 @@ Document key decisions using this structure: If the subtask packet includes `validation_criteria`, list each `VCn:` and where it is enforced. **Format**: -``` +```text VC1: - Code: path/to/file.ext#SymbolOrLocation - Tests: path/to/test_file.ext::test_name (or N/A with reason) diff --git a/.claude/agents/monitor.md b/.claude/agents/monitor.md index 01b45f1..807ef53 100644 --- a/.claude/agents/monitor.md +++ b/.claude/agents/monitor.md @@ -2524,7 +2524,7 @@ def check_rate_limit(user_id, action, limit=100, window=3600): - Only MEDIUM/LOW issues → valid=true (with feedback) **Hard-stop semantics**: -- If you set `valid=false`, the workflow MUST fix the issues before proceeding. +- If you set `valid=false`, the workflow MUST resolve the issues before proceeding. - Do not accept "we'll do it later" reasoning as a resolution unless the user explicitly approves deferral. diff --git a/.claude/hooks/block-dangerous.sh b/.claude/hooks/block-dangerous.sh index f42a6ba..3ba7186 100755 --- a/.claude/hooks/block-dangerous.sh +++ b/.claude/hooks/block-dangerous.sh @@ -18,7 +18,7 @@ # # TESTING: # echo '{"tool_name": "Bash", "tool_input": {"command": "rm -rf /"}}' | bash block-dangerous.sh -# # Expected: Exit code 2 +# # Expected: Exit code 0 with permissionDecision=deny in JSON output # # ============================================================================= @@ -27,9 +27,60 @@ set -euo pipefail # Read JSON from stdin INPUT=$(cat) -# Extract tool_name and command using jq (or fallback to grep) -TOOL_NAME=$(echo "$INPUT" | jq -r '.tool_name // empty' 2>/dev/null || echo "") -COMMAND=$(echo "$INPUT" | jq -r '.tool_input.command // empty' 2>/dev/null || echo "") +deny() { + local reason="$1" + + if command -v jq >/dev/null 2>&1; then + jq -n --arg reason "$reason" '{ + hookSpecificOutput: { + hookEventName: "PreToolUse", + permissionDecision: "deny", + permissionDecisionReason: $reason + } + }' + return 0 + fi + + if command -v python3 >/dev/null 2>&1; then + python3 - "$reason" <<'PY' +import json +import sys + +reason = sys.argv[1] +print( + json.dumps( + { + "hookSpecificOutput": { + "hookEventName": "PreToolUse", + "permissionDecision": "deny", + "permissionDecisionReason": reason, + } + } + ) +) +PY + return 0 + fi + + local escaped=${reason//\\/\\\\} + escaped=${escaped//\"/\\\"} + printf '{"hookSpecificOutput":{"hookEventName":"PreToolUse","permissionDecision":"deny","permissionDecisionReason":"%s"}}\n' "$escaped" +} + +if command -v jq >/dev/null 2>&1; then + TOOL_NAME=$(jq -r '.tool_name // empty' <<<"$INPUT" 2>/dev/null || true) + COMMAND=$(jq -r '.tool_input.command // empty' <<<"$INPUT" 2>/dev/null || true) +elif command -v python3 >/dev/null 2>&1; then + TOOL_NAME=$( + python3 -c 'import json,sys; d=json.loads(sys.stdin.read() or "{}"); print(d.get("tool_name",""))' <<<"$INPUT" 2>/dev/null || true + ) + COMMAND=$( + python3 -c 'import json,sys; d=json.loads(sys.stdin.read() or "{}"); ti=d.get("tool_input") or {}; print(ti.get("command",""))' <<<"$INPUT" 2>/dev/null || true + ) +else + TOOL_NAME="" + COMMAND="" +fi # Only intercept Bash tool if [[ "$TOOL_NAME" != "Bash" ]]; then @@ -51,113 +102,67 @@ COMMAND_LOWER=$(echo "$COMMAND" | tr '[:upper:]' '[:lower:]') # ============================================================================= # Check for rm -rf (recursive force delete) -# Matches: rm -rf, rm -fr, rm -r -f, rm -f -r, rm --recursive --force -if echo "$COMMAND" | grep -qE 'rm\s+(-rf|-fr)\s'; then - jq -n '{ - hookSpecificOutput: { - hookEventName: "PreToolUse", - permissionDecision: "deny", - permissionDecisionReason: "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)" - } - }' - exit 0 -fi - -# Catch rm -rf at end of command or with path immediately after -if echo "$COMMAND" | grep -qE 'rm\s+(-rf|-fr)\s*(/|~|\.|[a-zA-Z])'; then - jq -n '{ - hookSpecificOutput: { - hookEventName: "PreToolUse", - permissionDecision: "deny", - permissionDecisionReason: "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)" - } - }' - exit 0 -fi - -# Catch separated flags: rm -r -f or rm -f -r -if echo "$COMMAND" | grep -qE 'rm\s+(-r\s+-f|-f\s+-r)\s'; then - jq -n '{ - hookSpecificOutput: { - hookEventName: "PreToolUse", - permissionDecision: "deny", - permissionDecisionReason: "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)" - } - }' - exit 0 +# Matches: rm -rf, rm -fr, rm -r -f, rm -f -r, rm --recursive --force, and edge +# cases where flags touch the path (e.g., rm -rf0dir). +if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&()])rm[[:space:]]'; then + # Long flags: --recursive and --force + if echo "$COMMAND_LOWER" | grep -qE -- '--recursive' && echo "$COMMAND_LOWER" | grep -qE -- '--force'; then + deny "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)" + exit 0 + fi + + # Combined short flags: -rf / -fr (including edge cases where the path touches flags) + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&()])rm[[:space:]].*-[^[:space:]]*r[^[:space:]]*f'; then + deny "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)" + exit 0 + fi + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&()])rm[[:space:]].*-[^[:space:]]*f[^[:space:]]*r'; then + deny "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)" + exit 0 + fi + + # Separate short flags: -r -f / -f -r + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:]])-r([[:space:]]|$)' && echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:]])-f([[:space:]]|$)'; then + deny "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)" + exit 0 + fi fi # Check for git push --force to main/master # Matches: git push --force origin main, git push -f origin master if echo "$COMMAND" | grep -qE 'git\s+push\s+.*(-f|--force).*\s+(origin|upstream)\s+(main|master)(\s|$)'; then - jq -n '{ - hookSpecificOutput: { - hookEventName: "PreToolUse", - permissionDecision: "deny", - permissionDecisionReason: "Blocked: Force push to main/master is prohibited (can overwrite team work)" - } - }' + deny "Blocked: Force push to main/master is prohibited (can overwrite team work)" exit 0 fi # Also check reverse order: git push origin main --force if echo "$COMMAND" | grep -qE 'git\s+push\s+(origin|upstream)\s+(main|master)\s+(-f|--force)'; then - jq -n '{ - hookSpecificOutput: { - hookEventName: "PreToolUse", - permissionDecision: "deny", - permissionDecisionReason: "Blocked: Force push to main/master is prohibited (can overwrite team work)" - } - }' + deny "Blocked: Force push to main/master is prohibited (can overwrite team work)" exit 0 fi # Check for git reset --hard (without specific commit, or dangerous patterns) # Block: git reset --hard, git reset --hard HEAD~, git reset --hard origin/ if echo "$COMMAND" | grep -qE 'git\s+reset\s+--hard(\s|$)'; then - jq -n '{ - hookSpecificOutput: { - hookEventName: "PreToolUse", - permissionDecision: "deny", - permissionDecisionReason: "Blocked: git reset --hard is prohibited (can cause irreversible loss of uncommitted changes)" - } - }' + deny "Blocked: git reset --hard is prohibited (can cause irreversible loss of uncommitted changes)" exit 0 fi # Check for dangerous chmod/chown on system directories if echo "$COMMAND" | grep -qE '(chmod|chown)\s+(-R|--recursive)\s+.*\s+/($|\s)'; then - jq -n '{ - hookSpecificOutput: { - hookEventName: "PreToolUse", - permissionDecision: "deny", - permissionDecisionReason: "Blocked: Recursive chmod/chown on / is prohibited (can break system permissions)" - } - }' + deny "Blocked: Recursive chmod/chown on / is prohibited (can break system permissions)" exit 0 fi # Check for dd with of=/dev/ if echo "$COMMAND" | grep -qE 'dd\s+.*of=/dev/'; then - jq -n '{ - hookSpecificOutput: { - hookEventName: "PreToolUse", - permissionDecision: "deny", - permissionDecisionReason: "Blocked: dd with of=/dev/* is prohibited (writing to raw devices can destroy data)" - } - }' + deny "Blocked: dd with of=/dev/* is prohibited (writing to raw devices can destroy data)" exit 0 fi # Check for mkfs (format filesystem) if echo "$COMMAND" | grep -qE 'mkfs'; then - jq -n '{ - hookSpecificOutput: { - hookEventName: "PreToolUse", - permissionDecision: "deny", - permissionDecisionReason: "Blocked: mkfs is prohibited (formatting filesystems can destroy data)" - } - }' + deny "Blocked: mkfs is prohibited (formatting filesystems can destroy data)" exit 0 fi diff --git a/.claude/hooks/block-secrets.py b/.claude/hooks/block-secrets.py index fb1d2e1..ac46e72 100755 --- a/.claude/hooks/block-secrets.py +++ b/.claude/hooks/block-secrets.py @@ -26,7 +26,7 @@ TESTING: echo '{"tool_name": "Read", "tool_input": {"file_path": ".env"}}' | python3 block-secrets.py - # Expected: Exit code 2, JSON error on stderr + # Expected: Exit code 0, JSON on stdout with permissionDecision=deny PERFORMANCE: Target: <100ms per invocation diff --git a/.claude/hooks/workflow-gate.py b/.claude/hooks/workflow-gate.py index 499bdfc..b0e67f0 100755 --- a/.claude/hooks/workflow-gate.py +++ b/.claude/hooks/workflow-gate.py @@ -28,7 +28,8 @@ TESTING: echo '{"tool_name": "Edit", "tool_input": {"file_path": "test.py"}}' | python3 workflow-gate.py - # Expected: Exit code 0, allow (no workflow state) OR deny with reason (if workflow active + missing steps) + # Expected (no workflow state): Exit 0, stdout: {} + # Expected (workflow active, missing steps): Exit 0, stdout: {"hookSpecificOutput":{"permissionDecision":"deny",...}} PERFORMANCE: Target: <100ms per invocation diff --git a/IMPLEMENTATION_SUMMARY.md b/IMPLEMENTATION_SUMMARY.md index cecea31..96d4b8a 100644 --- a/IMPLEMENTATION_SUMMARY.md +++ b/IMPLEMENTATION_SUMMARY.md @@ -148,7 +148,8 @@ Instead of a 995-line command file that Claude "compresses" mentally, inject ~30 .claude/ ├── hooks/ │ ├── workflow-context-injector.py (NEW) -│ └── settings.json (UPDATED) +│ └── (other hooks omitted) +├── settings.json (UPDATED) ├── commands/ │ ├── map-efficient.md (OPTIMIZED - 995→394 lines) │ └── map-efficient.md.backup (BACKUP) diff --git a/src/mapify_cli/templates/agents/actor.md b/src/mapify_cli/templates/agents/actor.md index bf30de5..97fa53e 100644 --- a/src/mapify_cli/templates/agents/actor.md +++ b/src/mapify_cli/templates/agents/actor.md @@ -380,7 +380,7 @@ Document key decisions using this structure: - Prefer naming tests with `vc` (e.g., `test_vc1_*`, `TestVC1*`) so Monitor can deterministically confirm coverage. **Format**: -``` +```text 1. test_[function]_[scenario]_[expected] Input: [specific input] Expected: [specific output/behavior] @@ -401,7 +401,7 @@ Document key decisions using this structure: If the subtask packet includes `validation_criteria`, list each `VCn:` and where it is enforced. **Format**: -``` +```text VC1: - Code: path/to/file.ext#SymbolOrLocation - Tests: path/to/test_file.ext::test_name (or N/A with reason) diff --git a/src/mapify_cli/templates/agents/monitor.md b/src/mapify_cli/templates/agents/monitor.md index 01b45f1..807ef53 100644 --- a/src/mapify_cli/templates/agents/monitor.md +++ b/src/mapify_cli/templates/agents/monitor.md @@ -2524,7 +2524,7 @@ def check_rate_limit(user_id, action, limit=100, window=3600): - Only MEDIUM/LOW issues → valid=true (with feedback) **Hard-stop semantics**: -- If you set `valid=false`, the workflow MUST fix the issues before proceeding. +- If you set `valid=false`, the workflow MUST resolve the issues before proceeding. - Do not accept "we'll do it later" reasoning as a resolution unless the user explicitly approves deferral. diff --git a/src/mapify_cli/templates/hooks/block-dangerous.sh b/src/mapify_cli/templates/hooks/block-dangerous.sh index f42a6ba..3ba7186 100755 --- a/src/mapify_cli/templates/hooks/block-dangerous.sh +++ b/src/mapify_cli/templates/hooks/block-dangerous.sh @@ -18,7 +18,7 @@ # # TESTING: # echo '{"tool_name": "Bash", "tool_input": {"command": "rm -rf /"}}' | bash block-dangerous.sh -# # Expected: Exit code 2 +# # Expected: Exit code 0 with permissionDecision=deny in JSON output # # ============================================================================= @@ -27,9 +27,60 @@ set -euo pipefail # Read JSON from stdin INPUT=$(cat) -# Extract tool_name and command using jq (or fallback to grep) -TOOL_NAME=$(echo "$INPUT" | jq -r '.tool_name // empty' 2>/dev/null || echo "") -COMMAND=$(echo "$INPUT" | jq -r '.tool_input.command // empty' 2>/dev/null || echo "") +deny() { + local reason="$1" + + if command -v jq >/dev/null 2>&1; then + jq -n --arg reason "$reason" '{ + hookSpecificOutput: { + hookEventName: "PreToolUse", + permissionDecision: "deny", + permissionDecisionReason: $reason + } + }' + return 0 + fi + + if command -v python3 >/dev/null 2>&1; then + python3 - "$reason" <<'PY' +import json +import sys + +reason = sys.argv[1] +print( + json.dumps( + { + "hookSpecificOutput": { + "hookEventName": "PreToolUse", + "permissionDecision": "deny", + "permissionDecisionReason": reason, + } + } + ) +) +PY + return 0 + fi + + local escaped=${reason//\\/\\\\} + escaped=${escaped//\"/\\\"} + printf '{"hookSpecificOutput":{"hookEventName":"PreToolUse","permissionDecision":"deny","permissionDecisionReason":"%s"}}\n' "$escaped" +} + +if command -v jq >/dev/null 2>&1; then + TOOL_NAME=$(jq -r '.tool_name // empty' <<<"$INPUT" 2>/dev/null || true) + COMMAND=$(jq -r '.tool_input.command // empty' <<<"$INPUT" 2>/dev/null || true) +elif command -v python3 >/dev/null 2>&1; then + TOOL_NAME=$( + python3 -c 'import json,sys; d=json.loads(sys.stdin.read() or "{}"); print(d.get("tool_name",""))' <<<"$INPUT" 2>/dev/null || true + ) + COMMAND=$( + python3 -c 'import json,sys; d=json.loads(sys.stdin.read() or "{}"); ti=d.get("tool_input") or {}; print(ti.get("command",""))' <<<"$INPUT" 2>/dev/null || true + ) +else + TOOL_NAME="" + COMMAND="" +fi # Only intercept Bash tool if [[ "$TOOL_NAME" != "Bash" ]]; then @@ -51,113 +102,67 @@ COMMAND_LOWER=$(echo "$COMMAND" | tr '[:upper:]' '[:lower:]') # ============================================================================= # Check for rm -rf (recursive force delete) -# Matches: rm -rf, rm -fr, rm -r -f, rm -f -r, rm --recursive --force -if echo "$COMMAND" | grep -qE 'rm\s+(-rf|-fr)\s'; then - jq -n '{ - hookSpecificOutput: { - hookEventName: "PreToolUse", - permissionDecision: "deny", - permissionDecisionReason: "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)" - } - }' - exit 0 -fi - -# Catch rm -rf at end of command or with path immediately after -if echo "$COMMAND" | grep -qE 'rm\s+(-rf|-fr)\s*(/|~|\.|[a-zA-Z])'; then - jq -n '{ - hookSpecificOutput: { - hookEventName: "PreToolUse", - permissionDecision: "deny", - permissionDecisionReason: "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)" - } - }' - exit 0 -fi - -# Catch separated flags: rm -r -f or rm -f -r -if echo "$COMMAND" | grep -qE 'rm\s+(-r\s+-f|-f\s+-r)\s'; then - jq -n '{ - hookSpecificOutput: { - hookEventName: "PreToolUse", - permissionDecision: "deny", - permissionDecisionReason: "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)" - } - }' - exit 0 +# Matches: rm -rf, rm -fr, rm -r -f, rm -f -r, rm --recursive --force, and edge +# cases where flags touch the path (e.g., rm -rf0dir). +if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&()])rm[[:space:]]'; then + # Long flags: --recursive and --force + if echo "$COMMAND_LOWER" | grep -qE -- '--recursive' && echo "$COMMAND_LOWER" | grep -qE -- '--force'; then + deny "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)" + exit 0 + fi + + # Combined short flags: -rf / -fr (including edge cases where the path touches flags) + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&()])rm[[:space:]].*-[^[:space:]]*r[^[:space:]]*f'; then + deny "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)" + exit 0 + fi + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&()])rm[[:space:]].*-[^[:space:]]*f[^[:space:]]*r'; then + deny "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)" + exit 0 + fi + + # Separate short flags: -r -f / -f -r + if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:]])-r([[:space:]]|$)' && echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:]])-f([[:space:]]|$)'; then + deny "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)" + exit 0 + fi fi # Check for git push --force to main/master # Matches: git push --force origin main, git push -f origin master if echo "$COMMAND" | grep -qE 'git\s+push\s+.*(-f|--force).*\s+(origin|upstream)\s+(main|master)(\s|$)'; then - jq -n '{ - hookSpecificOutput: { - hookEventName: "PreToolUse", - permissionDecision: "deny", - permissionDecisionReason: "Blocked: Force push to main/master is prohibited (can overwrite team work)" - } - }' + deny "Blocked: Force push to main/master is prohibited (can overwrite team work)" exit 0 fi # Also check reverse order: git push origin main --force if echo "$COMMAND" | grep -qE 'git\s+push\s+(origin|upstream)\s+(main|master)\s+(-f|--force)'; then - jq -n '{ - hookSpecificOutput: { - hookEventName: "PreToolUse", - permissionDecision: "deny", - permissionDecisionReason: "Blocked: Force push to main/master is prohibited (can overwrite team work)" - } - }' + deny "Blocked: Force push to main/master is prohibited (can overwrite team work)" exit 0 fi # Check for git reset --hard (without specific commit, or dangerous patterns) # Block: git reset --hard, git reset --hard HEAD~, git reset --hard origin/ if echo "$COMMAND" | grep -qE 'git\s+reset\s+--hard(\s|$)'; then - jq -n '{ - hookSpecificOutput: { - hookEventName: "PreToolUse", - permissionDecision: "deny", - permissionDecisionReason: "Blocked: git reset --hard is prohibited (can cause irreversible loss of uncommitted changes)" - } - }' + deny "Blocked: git reset --hard is prohibited (can cause irreversible loss of uncommitted changes)" exit 0 fi # Check for dangerous chmod/chown on system directories if echo "$COMMAND" | grep -qE '(chmod|chown)\s+(-R|--recursive)\s+.*\s+/($|\s)'; then - jq -n '{ - hookSpecificOutput: { - hookEventName: "PreToolUse", - permissionDecision: "deny", - permissionDecisionReason: "Blocked: Recursive chmod/chown on / is prohibited (can break system permissions)" - } - }' + deny "Blocked: Recursive chmod/chown on / is prohibited (can break system permissions)" exit 0 fi # Check for dd with of=/dev/ if echo "$COMMAND" | grep -qE 'dd\s+.*of=/dev/'; then - jq -n '{ - hookSpecificOutput: { - hookEventName: "PreToolUse", - permissionDecision: "deny", - permissionDecisionReason: "Blocked: dd with of=/dev/* is prohibited (writing to raw devices can destroy data)" - } - }' + deny "Blocked: dd with of=/dev/* is prohibited (writing to raw devices can destroy data)" exit 0 fi # Check for mkfs (format filesystem) if echo "$COMMAND" | grep -qE 'mkfs'; then - jq -n '{ - hookSpecificOutput: { - hookEventName: "PreToolUse", - permissionDecision: "deny", - permissionDecisionReason: "Blocked: mkfs is prohibited (formatting filesystems can destroy data)" - } - }' + deny "Blocked: mkfs is prohibited (formatting filesystems can destroy data)" exit 0 fi diff --git a/src/mapify_cli/templates/hooks/block-secrets.py b/src/mapify_cli/templates/hooks/block-secrets.py index fb1d2e1..ac46e72 100755 --- a/src/mapify_cli/templates/hooks/block-secrets.py +++ b/src/mapify_cli/templates/hooks/block-secrets.py @@ -26,7 +26,7 @@ TESTING: echo '{"tool_name": "Read", "tool_input": {"file_path": ".env"}}' | python3 block-secrets.py - # Expected: Exit code 2, JSON error on stderr + # Expected: Exit code 0, JSON on stdout with permissionDecision=deny PERFORMANCE: Target: <100ms per invocation diff --git a/src/mapify_cli/templates/hooks/workflow-gate.py b/src/mapify_cli/templates/hooks/workflow-gate.py index 499bdfc..b0e67f0 100755 --- a/src/mapify_cli/templates/hooks/workflow-gate.py +++ b/src/mapify_cli/templates/hooks/workflow-gate.py @@ -28,7 +28,8 @@ TESTING: echo '{"tool_name": "Edit", "tool_input": {"file_path": "test.py"}}' | python3 workflow-gate.py - # Expected: Exit code 0, allow (no workflow state) OR deny with reason (if workflow active + missing steps) + # Expected (no workflow state): Exit 0, stdout: {} + # Expected (workflow active, missing steps): Exit 0, stdout: {"hookSpecificOutput":{"permissionDecision":"deny",...}} PERFORMANCE: Target: <100ms per invocation diff --git a/tests/hooks/test_block_secrets.py b/tests/hooks/test_block_secrets.py index 36ba85c..09a2fd0 100644 --- a/tests/hooks/test_block_secrets.py +++ b/tests/hooks/test_block_secrets.py @@ -69,8 +69,9 @@ def test_criterion_2_credentials_blocked(self): def test_criterion_3_legitimate_allowed(self): """VC3: Legitimate files like 'app.py' are allowed.""" - exit_code, _, _ = run_hook("Read", "app.py") - assert exit_code == 0, "app.py should be allowed" + exit_code, stdout, stderr = run_hook("Read", "app.py") + assert exit_code == 0, f"app.py should be allowed. stderr: {stderr}" + assert _parse_stdout(stdout) == {} def test_criterion_4_performance(self): """VC4: Hook execution completes in <100ms.""" From ab9a5af9a65c1ef318d7e8ab83f16d1364828a51 Mon Sep 17 00:00:00 2001 From: "Mikhail [azalio] Petrov" Date: Tue, 17 Feb 2026 00:35:20 +0300 Subject: [PATCH 6/7] research/preconditions-postconditions: Rewrite map-review as interactive 4-section review Replace batch-report workflow with interactive section-by-section review: - 4 sections: Architecture, Code Quality, Tests, Performance - Review Section Protocol with BIG/SMALL mode and CI mode (--ci/--auto) - 4 targeted mem0 queries + 3 parallel agent calls - Structured verdict: PROCEED / REVISE / BLOCK with priority ordering - Add 3 test classes (TestMapReviewStructure, TestMapReviewVerdictLogic, TestAgentSchemaFieldsPresent) and command template sync tests - Update ARCHITECTURE.md diagram, section description, and token estimates --- .claude/commands/map-review.md | 299 +++++++++++++----- docs/ARCHITECTURE.md | 41 ++- .../templates/commands/map-review.md | 299 +++++++++++++----- tests/test_command_templates.py | 212 +++++++++++++ tests/test_template_sync.py | 82 +++++ 5 files changed, 770 insertions(+), 163 deletions(-) diff --git a/.claude/commands/map-review.md b/.claude/commands/map-review.md index b295835..ea8623b 100644 --- a/.claude/commands/map-review.md +++ b/.claude/commands/map-review.md @@ -1,47 +1,133 @@ --- -description: Comprehensive MAP review of changes using Monitor, Predictor, and Evaluator agents +description: Interactive 4-section code review using Monitor, Predictor, and Evaluator agents --- # MAP Review Workflow -Review current changes using MAP agents for comprehensive quality analysis. +Interactive, structured code review of current changes using Monitor, Predictor, and Evaluator agents. -## What This Command Does +**Task:** $ARGUMENTS -1. **Monitor** - Validates code correctness, security, standards compliance -2. **Predictor** - Analyzes impact on codebase, breaking changes, dependencies -3. **Evaluator** - Scores overall quality and provides actionable feedback +## Execution Rules -## Step 1: Query mem0 for Review Patterns +1. Execute all steps in the order listed below +2. Use the exact `subagent_type` values specified (monitor, predictor, evaluator) +3. All 3 agents are required — do not skip any +4. **Monitor `valid=false` is a hard stop** — report issues immediately, do not proceed to interactive presentation +5. Wait for all parallel calls to complete before proceeding to the next phase -```bash -# Get review best practices -REVIEW_PATTERNS=$(mcp__mem0__map_tiered_search(query="code review", limit=5)) -``` +## Review Preferences (Customize per project) + +These defaults guide how agents weigh findings. Override by editing this section: + +- **DRY:** Important — flag duplication aggressively +- **Testing:** Non-negotiable — missing tests = high severity +- **Engineering level:** "Engineered enough" — reject both under-engineering and over-engineering +- **Edge cases:** Prefer handling more, especially for public APIs +- **Clarity:** Explicit over clever — readable code wins +- **Performance:** Flag only when measurable impact is likely + +## Expected Agent Output Schemas (Contract Reference) + +These are the fields each agent is expected to return. The command prompt explicitly requests them. + +**Monitor:** +- `valid`: boolean — overall pass/fail +- `summary`: string — brief description of findings +- `verdict`: `'approved'` | `'needs_revision'` | `'rejected'` — requested explicitly (not in base schema, `additionalProperties: true`) +- `issues[]`: array of `{severity, category, description, file_path, line_range, suggestion}` +- `passed_checks[]`: array of strings — checks that passed +- `failed_checks[]`: array of strings — checks that failed + +**Predictor:** +- `risk_assessment`: `'low'` | `'medium'` | `'high'` | `'critical'` +- `predicted_state.affected_components[]`: array of affected components/files +- `predicted_state.breaking_changes[]`: array of `{type, description, mitigation}` +- `predicted_state.required_updates[]`: array of required follow-up changes +- `confidence.score`: float 0.0-1.0 + +**Evaluator:** +- `scores.functionality`: int 0-10 +- `scores.code_quality`: int 0-10 +- `scores.performance`: int 0-10 +- `scores.security`: int 0-10 +- `scores.testability`: int 0-10 +- `scores.completeness`: int 0-10 +- `overall_score`: float 1.0-10.0 (weighted) +- `recommendation`: `'proceed'` | `'improve'` | `'reconsider'` +- `strengths[]`: array of strings +- `weaknesses[]`: array of strings +- `next_steps[]`: array of strings + +## Review Section Protocol + +This protocol is used identically by all 4 review sections below. Do NOT deviate. + +1. **Present top N issues** (N=4 in BIG mode, N=1 in SMALL mode) from the primary source agent for this section, using the section prefix (e.g., ARCH-1, QUALITY-2, TESTS-1, PERF-3) +2. **For each issue:** + - Describe the problem with `file:line` references where available + - Present 2-3 options with tradeoffs (pros/cons for each) + - **Recommended option is always listed first** (marked with "(Recommended)") +3. **AskUserQuestion** with numbered issues and lettered options for each + - Example: "ARCH-1: Option A (Recommended) / Option B / Option C" + - **Skip AskUserQuestion in CI mode** — auto-select recommended options +4. **Summarize decisions** from this section in 3-5 lines before proceeding to the next section + - Include: which issues were addressed, which options were chosen, what remains -## Step 2: Get Current Changes +## Step 0: Select Review Mode + +**Parse $ARGUMENTS for `--ci` or `--auto`:** +- If `--ci` or `--auto` is present in $ARGUMENTS → set CI_MODE=true +- CI_MODE skips all AskUserQuestion calls and auto-selects recommended options + +**If NOT CI_MODE:** Use AskUserQuestion to ask the user: + +> How thorough should this review be? +> - **BIG** (Recommended): Up to 4 issues per section — comprehensive review +> - **SMALL**: 1 issue per section — quick pass for small changes + +Default to BIG if user doesn't respond or in CI mode. + +## Phase A: Collection (Parallel) + +### Step A.1: Gather changes ```bash -# Get staged and unstaged changes git diff HEAD git status ``` -## Step 3: Invoke All Review Agents in Parallel +Save the diff output — it will be passed to all 3 agents. + +### Step A.2: Launch all parallel calls + +In **ONE message**, launch all 7 calls in parallel (no dependencies between them): + +**4 mem0 queries:** + +``` +mcp__mem0__map_tiered_search(query="architecture review patterns") +mcp__mem0__map_tiered_search(query="code quality standards") +mcp__mem0__map_tiered_search(query="test coverage criteria") +mcp__mem0__map_tiered_search(query="performance review patterns") +``` -**IMPORTANT**: Call Monitor, Predictor, and Evaluator simultaneously by invoking all three Task calls in a single message. These agents operate independently on the same git diff without shared state. +**3 agent Task calls** (pass the git diff + Review Preferences to each): ``` Task( subagent_type="monitor", description="Review code changes", - prompt="Review the following changes for code quality: + prompt="Review the following changes for code quality, security, and correctness. + +**Review Preferences:** +[paste Review Preferences section above] **Changes:** [paste git diff output] **mem0 Context:** -[paste relevant mem0 patterns] +[paste relevant mem0 patterns from queries above — use architecture + code quality results] Check for: - Code correctness and logic errors @@ -52,49 +138,59 @@ Check for: Output JSON with: - valid: boolean -- issues: array of {severity, category, description, file_path, line_range, suggestion} +- summary: string - verdict: 'approved' | 'needs_revision' | 'rejected' -- summary: string" +- issues: array of {severity, category, description, file_path, line_range, suggestion} +- passed_checks: array of strings +- failed_checks: array of strings" ) Task( subagent_type="predictor", description="Analyze change impact", - prompt="Analyze the impact of these changes: + prompt="Analyze the impact of these changes on the broader codebase. + +**Review Preferences:** +[paste Review Preferences section above] **Changes:** [paste git diff output] **mem0 Context:** -[paste relevant mem0 patterns] +[paste relevant mem0 patterns from queries above — use architecture results] Analyze: -- Affected files and modules +- Affected components and modules - Breaking changes (API, schema, behavior) - Dependencies that need updates -- Risk assessment +- Risk assessment (low/medium/high/critical) - Integration points affected Output JSON with: -- affected_files: array of {path, change_type, impact_level} -- breaking_changes: array of {type, description, mitigation} -- dependencies_affected: array of strings -- risk_level: 'low' | 'medium' | 'high' -- recommendations: array of strings" +- risk_assessment: 'low' | 'medium' | 'high' | 'critical' +- predicted_state: + affected_components: array of affected files/modules + breaking_changes: array of {type, description, mitigation} + required_updates: array of strings +- confidence: + score: float 0.0-1.0" ) Task( subagent_type="evaluator", description="Score change quality", - prompt="Evaluate the overall quality of these changes: + prompt="Evaluate the overall quality of these changes. + +**Review Preferences:** +[paste Review Preferences section above] **Changes:** [paste git diff output] **mem0 Context:** -[paste relevant mem0 patterns] +[paste relevant mem0 patterns from queries above — use code quality + test coverage results] -Provide quality assessment using 1-10 scoring (matches evaluator agent template): +Provide quality assessment using 1-10 scoring: - Functionality score (1-10) - Code quality score (1-10) - Performance score (1-10) @@ -112,71 +208,126 @@ Output JSON with: ) ``` -**How Parallel Execution Works:** -1. Claude Code will process all three Task calls from the same message -2. Each agent analyzes the git diff independently -3. Wait for all three Task calls to complete before proceeding -4. Collect results from Monitor, Predictor, and Evaluator outputs +**Parallel execution:** All 7 calls (4 mem0 + 3 agents) MUST be issued in a single message. Wait for all to complete before proceeding. -## Step 4: Aggregate and Present Results +### Hard Stop Check -Once all three agents have completed, combine their findings: +After all agents complete, check Monitor output: +- If `Monitor.valid = false` → **HARD STOP**. Present Monitor issues immediately and do not proceed to Phase B. The user must fix issues before re-running the review. -### Review Summary +## Phase B: Interactive Presentation (4 Sections) -**Monitor Analysis:** -- Verdict: [monitor.verdict] -- Issues Found: [count by severity] -- Valid: [monitor.valid] +Present findings section by section. Each section follows the **Review Section Protocol** defined above. -**Predictor Analysis:** -- Risk Level: [predictor.risk_level] -- Breaking Changes: [predictor.breaking_changes.length] -- Affected Files: [predictor.affected_files.length] +### Section 1: Architecture -**Evaluator Assessment:** -- Overall Score: [evaluator.overall_score]/10 -- Code Quality: [evaluator.scores.code_quality]/10 -- Security: [evaluator.scores.security]/10 -- Recommendation: [evaluator.recommendation] +**Primary source:** Predictor (`breaking_changes`, `affected_components`, `risk_assessment`) +**Cross-reference:** Evaluator `scores.completeness` +**Issue prefix:** `ARCH` -### Critical Issues (High Severity) +Focus on: +- Breaking changes and their mitigations +- Affected component blast radius +- Architectural fit of the changes +- Completeness of the change set (are all affected areas updated?) -[List high-severity issues from Monitor] +→ Follow **Review Section Protocol** +→ Summarize decisions before proceeding to Section 2 -### Breaking Changes +### Section 2: Code Quality -[List breaking changes from Predictor] +**Primary source:** Monitor (`issues` filtered by category: correctness, code-quality, maintainability) +**Cross-reference:** Evaluator `scores.code_quality` +**Issue prefix:** `QUALITY` -### Recommendations +Focus on: +- Correctness issues (logic errors, edge cases) +- Code quality issues (naming, structure, DRY violations) +- Maintainability concerns +- Standards compliance -**From Monitor:** -[List Monitor suggestions for critical issues] +→ Follow **Review Section Protocol** +→ Summarize decisions before proceeding to Section 3 -**From Predictor:** -[List Predictor recommendations] +### Section 3: Tests -**From Evaluator:** -[List Evaluator improvements needed] +**Primary source:** Monitor (`issues` filtered by category: testability, test-coverage) +**Cross-reference:** Evaluator `scores.testability` +**Issue prefix:** `TESTS` -### Final Verdict +Focus on: +- Missing test coverage for new/changed code +- Test quality (edge cases, error paths) +- Testability of the implementation (dependency injection, mocking seams) -Based on combined analysis: -- **Proceed if:** Monitor verdict = 'approved' AND Evaluator recommendation = 'proceed' -- **Revise if:** Monitor verdict = 'needs_revision' OR Evaluator recommendation = 'improve' -- **Block if:** Monitor verdict = 'rejected' OR Evaluator recommendation = 'reconsider' OR (Predictor risk_level = 'high' AND breaking_changes.length > 0) +→ Follow **Review Section Protocol** +→ Summarize decisions before proceeding to Section 4 ---- +### Section 4: Performance + +**Primary source:** Monitor (`issues` filtered by category: performance) +**Cross-reference:** Evaluator `scores.performance` + Predictor `risk_assessment` +**Issue prefix:** `PERF` + +Focus on: +- Performance regressions +- Algorithmic complexity concerns +- Resource usage (memory, CPU, I/O) +- Only flag issues where measurable impact is likely (per Review Preferences) + +→ Follow **Review Section Protocol** + +## Final Verdict -## 💡 Optional: Preserve Review Learnings +Based on combined agent outputs, determine one of: + +**PROCEED:** All conditions met: +- `Monitor.verdict = 'approved'` AND `Monitor.valid = true` +- `Evaluator.recommendation = 'proceed'` + +**REVISE:** Any condition true: +- `Monitor.verdict = 'needs_revision'` +- `Evaluator.recommendation = 'improve'` + +**BLOCK:** Any condition true (highest priority): +- `Monitor.verdict = 'rejected'` +- `Evaluator.recommendation = 'reconsider'` +- `Evaluator.scores.security < 5` +- `Evaluator.scores.functionality < 5` +- `Predictor.risk_assessment = 'high'` AND `predicted_state.breaking_changes` is non-empty + +**Priority:** BLOCK > REVISE > PROCEED + +Present the verdict with a summary table: +- Monitor verdict + valid status +- Evaluator overall score + recommendation +- Predictor risk assessment +- Key issues resolved during interactive review +- Remaining action items + +## CI/Auto Mode Behavior + +When `CI_MODE = true` (triggered by `--ci` or `--auto` in $ARGUMENTS): +- Skip all AskUserQuestion calls +- Auto-select BIG mode (4 issues per section) +- Auto-select recommended options for all issues +- Present all 4 sections as a batch report (no pauses between sections) +- Output structured verdict at the end +- Suitable for CI pipelines and automated review contexts + +## Optional: Preserve Review Learnings If the review revealed valuable patterns or common issues worth preserving: ``` -/map-learn [review summary with issues found and resolution patterns] +/map-learn [review summary with key findings, resolution patterns, and verdict rationale] ``` -## MCP Tools Available +## MCP Tools Used + +- `mcp__mem0__map_tiered_search` — Search past review patterns (4 targeted queries) +- `mcp__sequential-thinking__sequentialthinking` — Complex analysis decisions during interactive presentation + +--- -- `mcp__mem0__map_tiered_search` - Search past review patterns -- `mcp__sequential-thinking__sequentialthinking` - Complex analysis decisions +**Begin review now.** diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index e347a1b..a6529dc 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -68,12 +68,13 @@ MAP Framework implements cognitive architecture inspired by prefrontal cortex fu │ │ Uses Claude Opus for cross-evaluation and synthesis │ │ │ └──────────────────────────────────────────────────────────┘ │ │ │ -│ /map-review (parallel analysis): │ +│ /map-review (interactive 4-section): │ │ ┌──────────────────────────────────────────────────────────┐ │ -│ │ Query mem0 patterns → Get git diff │ │ -│ │ → [Monitor + Predictor + Evaluator] (all 3 in parallel) │ │ -│ │ → Aggregate results → Final verdict │ │ -│ │ No TaskDecomposer. Reviews current branch changes │ │ +│ │ 4× mem0 queries + git diff │ │ +│ │ → [Monitor + Predictor + Evaluator] (all 3 parallel) │ │ +│ │ → Interactive: Architecture → Quality → Tests → Perf │ │ +│ │ → Verdict: PROCEED / REVISE / BLOCK │ │ +│ │ --ci mode: batch report, no interaction │ │ │ └──────────────────────────────────────────────────────────┘ │ │ │ │ /map-fast (⚠️ minimal, low-risk only): │ @@ -549,20 +550,29 @@ for subtask in subtasks: - Root cause analysis - Regression debugging -#### 5. `/map-review` - Code Review Workflow (3 Agents) +#### 5. `/map-review` - Interactive Code Review (3 Agents) -**Agent Sequence:** mem0 pattern query → git diff → [Monitor + Predictor + Evaluator] (parallel) → Aggregate verdict +**Agent Sequence:** 4× mem0 targeted queries + git diff → [Monitor + Predictor + Evaluator] (all 3 parallel) → Interactive 4-section presentation → Verdict **Review-Specific Features:** 1. **No TaskDecomposer** - Reviews current branch changes as-is -2. **Parallel Agent Execution** - All 3 agents run simultaneously on same diff -3. **Aggregated Verdict Logic:** - - Proceed if: Monitor approved AND Evaluator excellent/good/acceptable - - Revise if: Monitor needs_revision OR Evaluator needs_work - - Block if: Monitor rejected OR Evaluator reject OR (Predictor high risk + breaking changes) - -**Token Usage:** 50-60% of baseline (single diff, no implementation) +2. **Parallel Collection** - 4 mem0 queries + 3 agents launched in a single message (7 parallel calls) +3. **Interactive 4-Section Presentation:** + - **Architecture** (primary: Predictor — breaking changes, affected components) + - **Code Quality** (primary: Monitor — correctness, maintainability issues) + - **Tests** (primary: Monitor — testability, coverage gaps) + - **Performance** (primary: Monitor — performance issues, cross-ref Predictor risk) +4. **Review Section Protocol** — each section presents top N issues (BIG=4, SMALL=1) with options and tradeoffs, user picks resolution via AskUserQuestion +5. **BIG/SMALL mode** — user selects review depth at start +6. **CI/Auto mode** (`--ci`/`--auto` flag) — batch report with no interaction, auto-selects recommended options +7. **Verdict Logic:** + - PROCEED: Monitor approved + valid AND Evaluator proceed + - REVISE: Monitor needs_revision OR Evaluator improve + - BLOCK: Monitor rejected OR Evaluator reconsider OR security/functionality < 5 OR (Predictor high risk + breaking changes) + - Priority: BLOCK > REVISE > PROCEED + +**Token Usage:** ~15-25K tokens (4 mem0 queries + parallel agents + interactive 4-section presentation; `--ci` mode ~12-15K) **Learning:** Optional via `/map-learn` **Quality Gates:** All 3 review agents @@ -570,6 +580,7 @@ for subtask in subtasks: - Pre-commit code review - PR review automation - Quality gate before merge +- CI pipeline integration (`--ci` mode) #### 6. `/map-release` - Release Workflow (No Agents) @@ -631,7 +642,7 @@ Typical token consumption per subtask (estimated): - /map-efficient (Self-MoA): ~25-30K tokens (3× Actor + Synthesizer) - /map-fast: ~8-10K tokens (minimal, no learning) - /map-debug: ~15-20K tokens (full pipeline with Evaluator) -- /map-review: ~8-10K tokens (parallel agents on single diff) +- /map-review: ~15-25K tokens (4 mem0 queries + parallel agents + interactive 4-section presentation; --ci mode ~12-15K) - /map-debate: ~30-40K tokens (3× Actor + Opus DebateArbiter) **For 5-subtask workflow:** diff --git a/src/mapify_cli/templates/commands/map-review.md b/src/mapify_cli/templates/commands/map-review.md index b295835..ea8623b 100644 --- a/src/mapify_cli/templates/commands/map-review.md +++ b/src/mapify_cli/templates/commands/map-review.md @@ -1,47 +1,133 @@ --- -description: Comprehensive MAP review of changes using Monitor, Predictor, and Evaluator agents +description: Interactive 4-section code review using Monitor, Predictor, and Evaluator agents --- # MAP Review Workflow -Review current changes using MAP agents for comprehensive quality analysis. +Interactive, structured code review of current changes using Monitor, Predictor, and Evaluator agents. -## What This Command Does +**Task:** $ARGUMENTS -1. **Monitor** - Validates code correctness, security, standards compliance -2. **Predictor** - Analyzes impact on codebase, breaking changes, dependencies -3. **Evaluator** - Scores overall quality and provides actionable feedback +## Execution Rules -## Step 1: Query mem0 for Review Patterns +1. Execute all steps in the order listed below +2. Use the exact `subagent_type` values specified (monitor, predictor, evaluator) +3. All 3 agents are required — do not skip any +4. **Monitor `valid=false` is a hard stop** — report issues immediately, do not proceed to interactive presentation +5. Wait for all parallel calls to complete before proceeding to the next phase -```bash -# Get review best practices -REVIEW_PATTERNS=$(mcp__mem0__map_tiered_search(query="code review", limit=5)) -``` +## Review Preferences (Customize per project) + +These defaults guide how agents weigh findings. Override by editing this section: + +- **DRY:** Important — flag duplication aggressively +- **Testing:** Non-negotiable — missing tests = high severity +- **Engineering level:** "Engineered enough" — reject both under-engineering and over-engineering +- **Edge cases:** Prefer handling more, especially for public APIs +- **Clarity:** Explicit over clever — readable code wins +- **Performance:** Flag only when measurable impact is likely + +## Expected Agent Output Schemas (Contract Reference) + +These are the fields each agent is expected to return. The command prompt explicitly requests them. + +**Monitor:** +- `valid`: boolean — overall pass/fail +- `summary`: string — brief description of findings +- `verdict`: `'approved'` | `'needs_revision'` | `'rejected'` — requested explicitly (not in base schema, `additionalProperties: true`) +- `issues[]`: array of `{severity, category, description, file_path, line_range, suggestion}` +- `passed_checks[]`: array of strings — checks that passed +- `failed_checks[]`: array of strings — checks that failed + +**Predictor:** +- `risk_assessment`: `'low'` | `'medium'` | `'high'` | `'critical'` +- `predicted_state.affected_components[]`: array of affected components/files +- `predicted_state.breaking_changes[]`: array of `{type, description, mitigation}` +- `predicted_state.required_updates[]`: array of required follow-up changes +- `confidence.score`: float 0.0-1.0 + +**Evaluator:** +- `scores.functionality`: int 0-10 +- `scores.code_quality`: int 0-10 +- `scores.performance`: int 0-10 +- `scores.security`: int 0-10 +- `scores.testability`: int 0-10 +- `scores.completeness`: int 0-10 +- `overall_score`: float 1.0-10.0 (weighted) +- `recommendation`: `'proceed'` | `'improve'` | `'reconsider'` +- `strengths[]`: array of strings +- `weaknesses[]`: array of strings +- `next_steps[]`: array of strings + +## Review Section Protocol + +This protocol is used identically by all 4 review sections below. Do NOT deviate. + +1. **Present top N issues** (N=4 in BIG mode, N=1 in SMALL mode) from the primary source agent for this section, using the section prefix (e.g., ARCH-1, QUALITY-2, TESTS-1, PERF-3) +2. **For each issue:** + - Describe the problem with `file:line` references where available + - Present 2-3 options with tradeoffs (pros/cons for each) + - **Recommended option is always listed first** (marked with "(Recommended)") +3. **AskUserQuestion** with numbered issues and lettered options for each + - Example: "ARCH-1: Option A (Recommended) / Option B / Option C" + - **Skip AskUserQuestion in CI mode** — auto-select recommended options +4. **Summarize decisions** from this section in 3-5 lines before proceeding to the next section + - Include: which issues were addressed, which options were chosen, what remains -## Step 2: Get Current Changes +## Step 0: Select Review Mode + +**Parse $ARGUMENTS for `--ci` or `--auto`:** +- If `--ci` or `--auto` is present in $ARGUMENTS → set CI_MODE=true +- CI_MODE skips all AskUserQuestion calls and auto-selects recommended options + +**If NOT CI_MODE:** Use AskUserQuestion to ask the user: + +> How thorough should this review be? +> - **BIG** (Recommended): Up to 4 issues per section — comprehensive review +> - **SMALL**: 1 issue per section — quick pass for small changes + +Default to BIG if user doesn't respond or in CI mode. + +## Phase A: Collection (Parallel) + +### Step A.1: Gather changes ```bash -# Get staged and unstaged changes git diff HEAD git status ``` -## Step 3: Invoke All Review Agents in Parallel +Save the diff output — it will be passed to all 3 agents. + +### Step A.2: Launch all parallel calls + +In **ONE message**, launch all 7 calls in parallel (no dependencies between them): + +**4 mem0 queries:** + +``` +mcp__mem0__map_tiered_search(query="architecture review patterns") +mcp__mem0__map_tiered_search(query="code quality standards") +mcp__mem0__map_tiered_search(query="test coverage criteria") +mcp__mem0__map_tiered_search(query="performance review patterns") +``` -**IMPORTANT**: Call Monitor, Predictor, and Evaluator simultaneously by invoking all three Task calls in a single message. These agents operate independently on the same git diff without shared state. +**3 agent Task calls** (pass the git diff + Review Preferences to each): ``` Task( subagent_type="monitor", description="Review code changes", - prompt="Review the following changes for code quality: + prompt="Review the following changes for code quality, security, and correctness. + +**Review Preferences:** +[paste Review Preferences section above] **Changes:** [paste git diff output] **mem0 Context:** -[paste relevant mem0 patterns] +[paste relevant mem0 patterns from queries above — use architecture + code quality results] Check for: - Code correctness and logic errors @@ -52,49 +138,59 @@ Check for: Output JSON with: - valid: boolean -- issues: array of {severity, category, description, file_path, line_range, suggestion} +- summary: string - verdict: 'approved' | 'needs_revision' | 'rejected' -- summary: string" +- issues: array of {severity, category, description, file_path, line_range, suggestion} +- passed_checks: array of strings +- failed_checks: array of strings" ) Task( subagent_type="predictor", description="Analyze change impact", - prompt="Analyze the impact of these changes: + prompt="Analyze the impact of these changes on the broader codebase. + +**Review Preferences:** +[paste Review Preferences section above] **Changes:** [paste git diff output] **mem0 Context:** -[paste relevant mem0 patterns] +[paste relevant mem0 patterns from queries above — use architecture results] Analyze: -- Affected files and modules +- Affected components and modules - Breaking changes (API, schema, behavior) - Dependencies that need updates -- Risk assessment +- Risk assessment (low/medium/high/critical) - Integration points affected Output JSON with: -- affected_files: array of {path, change_type, impact_level} -- breaking_changes: array of {type, description, mitigation} -- dependencies_affected: array of strings -- risk_level: 'low' | 'medium' | 'high' -- recommendations: array of strings" +- risk_assessment: 'low' | 'medium' | 'high' | 'critical' +- predicted_state: + affected_components: array of affected files/modules + breaking_changes: array of {type, description, mitigation} + required_updates: array of strings +- confidence: + score: float 0.0-1.0" ) Task( subagent_type="evaluator", description="Score change quality", - prompt="Evaluate the overall quality of these changes: + prompt="Evaluate the overall quality of these changes. + +**Review Preferences:** +[paste Review Preferences section above] **Changes:** [paste git diff output] **mem0 Context:** -[paste relevant mem0 patterns] +[paste relevant mem0 patterns from queries above — use code quality + test coverage results] -Provide quality assessment using 1-10 scoring (matches evaluator agent template): +Provide quality assessment using 1-10 scoring: - Functionality score (1-10) - Code quality score (1-10) - Performance score (1-10) @@ -112,71 +208,126 @@ Output JSON with: ) ``` -**How Parallel Execution Works:** -1. Claude Code will process all three Task calls from the same message -2. Each agent analyzes the git diff independently -3. Wait for all three Task calls to complete before proceeding -4. Collect results from Monitor, Predictor, and Evaluator outputs +**Parallel execution:** All 7 calls (4 mem0 + 3 agents) MUST be issued in a single message. Wait for all to complete before proceeding. -## Step 4: Aggregate and Present Results +### Hard Stop Check -Once all three agents have completed, combine their findings: +After all agents complete, check Monitor output: +- If `Monitor.valid = false` → **HARD STOP**. Present Monitor issues immediately and do not proceed to Phase B. The user must fix issues before re-running the review. -### Review Summary +## Phase B: Interactive Presentation (4 Sections) -**Monitor Analysis:** -- Verdict: [monitor.verdict] -- Issues Found: [count by severity] -- Valid: [monitor.valid] +Present findings section by section. Each section follows the **Review Section Protocol** defined above. -**Predictor Analysis:** -- Risk Level: [predictor.risk_level] -- Breaking Changes: [predictor.breaking_changes.length] -- Affected Files: [predictor.affected_files.length] +### Section 1: Architecture -**Evaluator Assessment:** -- Overall Score: [evaluator.overall_score]/10 -- Code Quality: [evaluator.scores.code_quality]/10 -- Security: [evaluator.scores.security]/10 -- Recommendation: [evaluator.recommendation] +**Primary source:** Predictor (`breaking_changes`, `affected_components`, `risk_assessment`) +**Cross-reference:** Evaluator `scores.completeness` +**Issue prefix:** `ARCH` -### Critical Issues (High Severity) +Focus on: +- Breaking changes and their mitigations +- Affected component blast radius +- Architectural fit of the changes +- Completeness of the change set (are all affected areas updated?) -[List high-severity issues from Monitor] +→ Follow **Review Section Protocol** +→ Summarize decisions before proceeding to Section 2 -### Breaking Changes +### Section 2: Code Quality -[List breaking changes from Predictor] +**Primary source:** Monitor (`issues` filtered by category: correctness, code-quality, maintainability) +**Cross-reference:** Evaluator `scores.code_quality` +**Issue prefix:** `QUALITY` -### Recommendations +Focus on: +- Correctness issues (logic errors, edge cases) +- Code quality issues (naming, structure, DRY violations) +- Maintainability concerns +- Standards compliance -**From Monitor:** -[List Monitor suggestions for critical issues] +→ Follow **Review Section Protocol** +→ Summarize decisions before proceeding to Section 3 -**From Predictor:** -[List Predictor recommendations] +### Section 3: Tests -**From Evaluator:** -[List Evaluator improvements needed] +**Primary source:** Monitor (`issues` filtered by category: testability, test-coverage) +**Cross-reference:** Evaluator `scores.testability` +**Issue prefix:** `TESTS` -### Final Verdict +Focus on: +- Missing test coverage for new/changed code +- Test quality (edge cases, error paths) +- Testability of the implementation (dependency injection, mocking seams) -Based on combined analysis: -- **Proceed if:** Monitor verdict = 'approved' AND Evaluator recommendation = 'proceed' -- **Revise if:** Monitor verdict = 'needs_revision' OR Evaluator recommendation = 'improve' -- **Block if:** Monitor verdict = 'rejected' OR Evaluator recommendation = 'reconsider' OR (Predictor risk_level = 'high' AND breaking_changes.length > 0) +→ Follow **Review Section Protocol** +→ Summarize decisions before proceeding to Section 4 ---- +### Section 4: Performance + +**Primary source:** Monitor (`issues` filtered by category: performance) +**Cross-reference:** Evaluator `scores.performance` + Predictor `risk_assessment` +**Issue prefix:** `PERF` + +Focus on: +- Performance regressions +- Algorithmic complexity concerns +- Resource usage (memory, CPU, I/O) +- Only flag issues where measurable impact is likely (per Review Preferences) + +→ Follow **Review Section Protocol** + +## Final Verdict -## 💡 Optional: Preserve Review Learnings +Based on combined agent outputs, determine one of: + +**PROCEED:** All conditions met: +- `Monitor.verdict = 'approved'` AND `Monitor.valid = true` +- `Evaluator.recommendation = 'proceed'` + +**REVISE:** Any condition true: +- `Monitor.verdict = 'needs_revision'` +- `Evaluator.recommendation = 'improve'` + +**BLOCK:** Any condition true (highest priority): +- `Monitor.verdict = 'rejected'` +- `Evaluator.recommendation = 'reconsider'` +- `Evaluator.scores.security < 5` +- `Evaluator.scores.functionality < 5` +- `Predictor.risk_assessment = 'high'` AND `predicted_state.breaking_changes` is non-empty + +**Priority:** BLOCK > REVISE > PROCEED + +Present the verdict with a summary table: +- Monitor verdict + valid status +- Evaluator overall score + recommendation +- Predictor risk assessment +- Key issues resolved during interactive review +- Remaining action items + +## CI/Auto Mode Behavior + +When `CI_MODE = true` (triggered by `--ci` or `--auto` in $ARGUMENTS): +- Skip all AskUserQuestion calls +- Auto-select BIG mode (4 issues per section) +- Auto-select recommended options for all issues +- Present all 4 sections as a batch report (no pauses between sections) +- Output structured verdict at the end +- Suitable for CI pipelines and automated review contexts + +## Optional: Preserve Review Learnings If the review revealed valuable patterns or common issues worth preserving: ``` -/map-learn [review summary with issues found and resolution patterns] +/map-learn [review summary with key findings, resolution patterns, and verdict rationale] ``` -## MCP Tools Available +## MCP Tools Used + +- `mcp__mem0__map_tiered_search` — Search past review patterns (4 targeted queries) +- `mcp__sequential-thinking__sequentialthinking` — Complex analysis decisions during interactive presentation + +--- -- `mcp__mem0__map_tiered_search` - Search past review patterns -- `mcp__sequential-thinking__sequentialthinking` - Complex analysis decisions +**Begin review now.** diff --git a/tests/test_command_templates.py b/tests/test_command_templates.py index 39cb9f4..82dc329 100644 --- a/tests/test_command_templates.py +++ b/tests/test_command_templates.py @@ -165,3 +165,215 @@ def test_map_efficient_is_token_efficient(self, templates_commands_dir): assert ( "token-efficient" in content.lower() or "efficient" in content.lower() ), "Should describe itself as efficient" + + +class TestMapReviewStructure: + """Test structural properties of the map-review command template.""" + + @pytest.fixture + def project_root(self): + return Path(__file__).parent.parent + + @pytest.fixture + def templates_commands_dir(self, project_root): + return project_root / "src" / "mapify_cli" / "templates" / "commands" + + @pytest.fixture + def review_content(self, templates_commands_dir): + return (templates_commands_dir / "map-review.md").read_text() + + def test_has_frontmatter(self, review_content): + """map-review.md starts with YAML frontmatter containing description.""" + assert review_content.startswith("---") + assert "description:" in review_content[:200] + + def test_has_arguments_placeholder(self, review_content): + """Command references $ARGUMENTS for user input.""" + assert "$ARGUMENTS" in review_content + + def test_has_review_section_protocol(self, review_content): + """Review Section Protocol is defined once and referenced by sections.""" + assert "Review Section Protocol" in review_content + + def test_has_four_section_headings(self, review_content): + """All 4 review sections are present.""" + assert "Section 1: Architecture" in review_content + assert "Section 2: Code Quality" in review_content + assert "Section 3: Tests" in review_content + assert "Section 4: Performance" in review_content + + @pytest.mark.parametrize("prefix", ["ARCH", "QUALITY", "TESTS", "PERF"]) + def test_section_prefixes_present(self, review_content, prefix): + """Each section defines its issue prefix.""" + assert prefix in review_content + + @pytest.mark.parametrize( + "section,source", + [ + ("Section 1: Architecture", "Predictor"), + ("Section 2: Code Quality", "Monitor"), + ("Section 3: Tests", "Monitor"), + ("Section 4: Performance", "Monitor"), + ], + ) + def test_primary_source_mapping(self, review_content, section, source): + """Each section references its primary source agent.""" + # Find the section and check the source is mentioned nearby + idx = review_content.index(section) + section_block = review_content[idx : idx + 500] + assert source in section_block, ( + f"{section} should reference {source} as primary source" + ) + + def test_three_agent_task_calls(self, review_content): + """Command includes Task calls for all 3 agents.""" + assert 'subagent_type="monitor"' in review_content + assert 'subagent_type="predictor"' in review_content + assert 'subagent_type="evaluator"' in review_content + + def test_four_mem0_queries(self, review_content): + """Command includes at least 4 mem0 tiered search queries.""" + count = review_content.count("map_tiered_search") + assert count >= 4, f"Expected at least 4 mem0 queries, found {count}" + + def test_ci_mode_flag(self, review_content): + """Command documents --ci flag for CI mode.""" + assert "--ci" in review_content + + def test_ask_user_question_mentioned(self, review_content): + """Command uses AskUserQuestion for interactive presentation.""" + assert "AskUserQuestion" in review_content + + def test_review_preferences_section(self, review_content): + """Command includes Review Preferences section.""" + assert "Review Preferences" in review_content + + def test_schema_documentation(self, review_content): + """Command documents expected agent output schemas.""" + assert "Expected Agent Output Schemas" in review_content + + def test_map_learn_suggestion(self, review_content): + """Command suggests /map-learn for preserving review learnings.""" + assert "/map-learn" in review_content + + def test_parallel_execution_instruction(self, review_content): + """Command instructs parallel execution of agents.""" + content_lower = review_content.lower() + assert "parallel" in content_lower + + def test_previous_section_summary(self, review_content): + """Command instructs summarizing decisions before next section.""" + assert "Summarize decisions" in review_content or "summarize" in review_content.lower() + + +class TestMapReviewVerdictLogic: + """Test verdict logic conditions in map-review command.""" + + @pytest.fixture + def project_root(self): + return Path(__file__).parent.parent + + @pytest.fixture + def templates_commands_dir(self, project_root): + return project_root / "src" / "mapify_cli" / "templates" / "commands" + + @pytest.fixture + def review_content(self, templates_commands_dir): + return (templates_commands_dir / "map-review.md").read_text() + + def test_proceed_conditions(self, review_content): + """PROCEED requires Monitor approved + Evaluator proceed.""" + assert "PROCEED" in review_content + assert "Monitor.verdict" in review_content + assert "'approved'" in review_content + assert "Evaluator.recommendation" in review_content + assert "'proceed'" in review_content + + def test_revise_conditions(self, review_content): + """REVISE triggered by needs_revision or improve.""" + assert "REVISE" in review_content + assert "'needs_revision'" in review_content + assert "'improve'" in review_content + + def test_block_conditions(self, review_content): + """BLOCK triggered by rejected, reconsider, or critical thresholds.""" + assert "BLOCK" in review_content + assert "'rejected'" in review_content + assert "'reconsider'" in review_content + + def test_block_security_threshold(self, review_content): + """BLOCK includes security score < 5 condition.""" + assert "security < 5" in review_content or "security<5" in review_content + + def test_block_functionality_threshold(self, review_content): + """BLOCK includes functionality score < 5 condition.""" + assert ( + "functionality < 5" in review_content + or "functionality<5" in review_content + ) + + def test_block_predictor_risk(self, review_content): + """BLOCK includes high risk + breaking changes condition.""" + assert "risk_assessment" in review_content + assert "breaking_changes" in review_content + + def test_priority_ordering(self, review_content): + """Verdict priority is BLOCK > REVISE > PROCEED.""" + assert "BLOCK > REVISE > PROCEED" in review_content + + def test_references_monitor_valid(self, review_content): + """Verdict references Monitor.valid field.""" + assert "Monitor.valid" in review_content + + def test_references_evaluator_overall_score(self, review_content): + """Verdict section references Evaluator overall_score.""" + assert "overall_score" in review_content + + def test_references_predictor_risk_assessment(self, review_content): + """Verdict references Predictor risk_assessment.""" + assert "Predictor.risk_assessment" in review_content or ( + "Predictor" in review_content and "risk_assessment" in review_content + ) + + +class TestAgentSchemaFieldsPresent: + """Test that agent template files contain expected schema fields.""" + + @pytest.fixture + def project_root(self): + return Path(__file__).parent.parent + + @pytest.fixture + def claude_agents_dir(self, project_root): + """Use .claude/agents/ (development source) for field verification.""" + return project_root / ".claude" / "agents" + + def test_monitor_has_valid_field(self, claude_agents_dir): + """Monitor agent template contains 'valid' field.""" + content = (claude_agents_dir / "monitor.md").read_text() + assert "valid" in content + + def test_monitor_has_issues_field(self, claude_agents_dir): + """Monitor agent template contains 'issues' field.""" + content = (claude_agents_dir / "monitor.md").read_text() + assert "issues" in content + + def test_predictor_has_risk_assessment(self, claude_agents_dir): + """Predictor agent template contains 'risk_assessment' field.""" + content = (claude_agents_dir / "predictor.md").read_text() + assert "risk_assessment" in content + + def test_predictor_has_breaking_changes(self, claude_agents_dir): + """Predictor agent template contains 'breaking_changes' field.""" + content = (claude_agents_dir / "predictor.md").read_text() + assert "breaking_changes" in content + + def test_evaluator_has_scores(self, claude_agents_dir): + """Evaluator agent template contains 'scores' field.""" + content = (claude_agents_dir / "evaluator.md").read_text() + assert "scores" in content + + def test_evaluator_has_recommendation(self, claude_agents_dir): + """Evaluator agent template contains 'recommendation' field.""" + content = (claude_agents_dir / "evaluator.md").read_text() + assert "recommendation" in content diff --git a/tests/test_template_sync.py b/tests/test_template_sync.py index f58c0d1..a5205fa 100644 --- a/tests/test_template_sync.py +++ b/tests/test_template_sync.py @@ -170,3 +170,85 @@ def test_agent_frontmatter_no_deleted_changelog( f"{agent} has 'changelog:' in frontmatter pointing to deleted file. " f"Remove the changelog field from the frontmatter." ) + + +class TestCommandTemplateSynchronization: + """Test that command templates are synchronized between .claude/commands/ and templates/commands/.""" + + @pytest.fixture + def project_root(self): + return Path(__file__).parent.parent + + @pytest.fixture + def claude_commands_dir(self, project_root): + return project_root / ".claude" / "commands" + + @pytest.fixture + def templates_commands_dir(self, project_root): + return project_root / "src" / "mapify_cli" / "templates" / "commands" + + @pytest.mark.parametrize( + "command", + [ + "map-check.md", + "map-debate.md", + "map-debug.md", + "map-efficient.md", + "map-fast.md", + "map-learn.md", + "map-plan.md", + "map-release.md", + "map-resume.md", + "map-review.md", + ], + ) + def test_command_content_matches( + self, claude_commands_dir, templates_commands_dir, command + ): + """Test that command file content is identical between directories.""" + claude_file = claude_commands_dir / command + template_file = templates_commands_dir / command + + if not claude_file.exists() or not template_file.exists(): + pytest.skip(f"{command} doesn't exist in both directories") + + assert filecmp.cmp(claude_file, template_file, shallow=False), ( + f"{command} content differs between .claude/commands/ and templates/commands/. " + f"Run: make sync-templates" + ) + + def test_command_file_count_matches( + self, claude_commands_dir, templates_commands_dir + ): + """Test that both directories have the same number of command .md files.""" + if not claude_commands_dir.exists() or not templates_commands_dir.exists(): + pytest.skip("One or both directories don't exist") + + claude_count = len(list(claude_commands_dir.glob("map-*.md"))) + template_count = len(list(templates_commands_dir.glob("map-*.md"))) + + assert claude_count == template_count, ( + f"Command file count mismatch: .claude/commands/ has {claude_count} map-*.md files, " + f"templates/commands/ has {template_count} map-*.md files. " + f"Run: make sync-templates" + ) + + def test_no_orphaned_command_templates( + self, claude_commands_dir, templates_commands_dir + ): + """Test that templates/commands/ doesn't have map-* files missing from .claude/commands/.""" + if not templates_commands_dir.exists(): + pytest.skip("Templates directory doesn't exist") + + claude_files = ( + {f.name for f in claude_commands_dir.glob("map-*.md")} + if claude_commands_dir.exists() + else set() + ) + template_files = {f.name for f in templates_commands_dir.glob("map-*.md")} + + orphaned = template_files - claude_files + assert not orphaned, ( + f"Orphaned command files in templates/commands/ not in .claude/commands/: {orphaned}. " + f"Run: make sync-templates" + ) From a39792d71189fef9f430e8940a90a4b6044ce748 Mon Sep 17 00:00:00 2001 From: "Mikhail [azalio] Petrov" Date: Tue, 17 Feb 2026 17:03:47 +0300 Subject: [PATCH 7/7] Allow map artifact updates through workflow gate --- .claude/hooks/workflow-gate.py | 71 ++++++++++++++++++- .../templates/hooks/workflow-gate.py | 71 ++++++++++++++++++- 2 files changed, 138 insertions(+), 4 deletions(-) diff --git a/.claude/hooks/workflow-gate.py b/.claude/hooks/workflow-gate.py index b0e67f0..a74be23 100755 --- a/.claude/hooks/workflow-gate.py +++ b/.claude/hooks/workflow-gate.py @@ -10,9 +10,10 @@ No manual invocation needed - Claude Code handles hook execution. ENFORCEMENT RULES: - - Blocks Edit/Write/MultiEdit if workflow_state.json is missing + - Allows Edit/Write/MultiEdit when workflow_state.json is missing (fail-open) - Blocks if current subtask hasn't completed required steps: ['actor', 'monitor'] - Allows tools if all required steps are completed + - Always allows edits under .map/ (workflow artifacts/state) to prevent deadlocks - Allows Read, Bash, and other non-editing tools always WORKFLOW STATE FILE: @@ -55,6 +56,56 @@ REQUIRED_STEPS = ["actor", "monitor"] +def extract_target_file_paths(tool_call: Dict) -> list[str]: + """Best-effort extraction of file paths from Claude Code tool payloads.""" + tool_input = tool_call.get("tool_input") or {} + if not isinstance(tool_input, dict): + return [] + + paths: list[str] = [] + + direct = tool_input.get("file_path") + if isinstance(direct, str) and direct.strip(): + paths.append(direct) + + edits = tool_input.get("edits") + if isinstance(edits, list): + for edit in edits: + if not isinstance(edit, dict): + continue + fp = edit.get("file_path") + if isinstance(fp, str) and fp.strip(): + paths.append(fp) + + return paths + + +def is_map_artifact_path(file_path: str) -> bool: + """ + Return True if file_path resolves under the current working directory's .map/. + + This allowlist prevents the workflow gate from deadlocking itself by blocking + updates to workflow artifacts (including workflow_state.json). + """ + if not isinstance(file_path, str) or not file_path.strip(): + return False + + repo_root = Path.cwd().resolve() + candidate = Path(file_path) + resolved = ( + candidate.resolve(strict=False) + if candidate.is_absolute() + else (repo_root / candidate).resolve(strict=False) + ) + + try: + rel = resolved.relative_to(repo_root) + except ValueError: + return False + + return bool(rel.parts) and rel.parts[0] == ".map" + + def sanitize_branch_name(branch: str) -> str: """Sanitize branch name for safe filesystem paths.""" sanitized = branch.replace("/", "-") @@ -106,7 +157,17 @@ def check_workflow_compliance(state: Dict) -> tuple[bool, Optional[str]]: """ current_subtask = state.get("current_subtask") if not current_subtask: - return False, "No current_subtask defined in workflow_state.json" + current_state = state.get("current_state") or "UNKNOWN" + return False, ( + "⛔ Workflow Enforcement: No current_subtask defined in workflow_state.json\n\n" + f"current_state: {current_state}\n\n" + "Edits to non-.map files are blocked until current_subtask is set and " + "the required steps are completed.\n\n" + "To fix:\n" + " - Update .map//workflow_state.json to set current_subtask\n" + " - Or re-run /map-resume or /map-plan to regenerate state\n" + " - Or delete .map//workflow_state.json to disable enforcement" + ) completed = state.get("completed_steps", {}).get(current_subtask, []) @@ -142,6 +203,12 @@ def main(): print("{}") sys.exit(0) + # Always allow edits to MAP workflow artifacts under .map/ + target_paths = extract_target_file_paths(tool_call) + if target_paths and all(is_map_artifact_path(p) for p in target_paths): + print("{}") + sys.exit(0) + # Get current branch branch = get_branch_name() diff --git a/src/mapify_cli/templates/hooks/workflow-gate.py b/src/mapify_cli/templates/hooks/workflow-gate.py index b0e67f0..a74be23 100755 --- a/src/mapify_cli/templates/hooks/workflow-gate.py +++ b/src/mapify_cli/templates/hooks/workflow-gate.py @@ -10,9 +10,10 @@ No manual invocation needed - Claude Code handles hook execution. ENFORCEMENT RULES: - - Blocks Edit/Write/MultiEdit if workflow_state.json is missing + - Allows Edit/Write/MultiEdit when workflow_state.json is missing (fail-open) - Blocks if current subtask hasn't completed required steps: ['actor', 'monitor'] - Allows tools if all required steps are completed + - Always allows edits under .map/ (workflow artifacts/state) to prevent deadlocks - Allows Read, Bash, and other non-editing tools always WORKFLOW STATE FILE: @@ -55,6 +56,56 @@ REQUIRED_STEPS = ["actor", "monitor"] +def extract_target_file_paths(tool_call: Dict) -> list[str]: + """Best-effort extraction of file paths from Claude Code tool payloads.""" + tool_input = tool_call.get("tool_input") or {} + if not isinstance(tool_input, dict): + return [] + + paths: list[str] = [] + + direct = tool_input.get("file_path") + if isinstance(direct, str) and direct.strip(): + paths.append(direct) + + edits = tool_input.get("edits") + if isinstance(edits, list): + for edit in edits: + if not isinstance(edit, dict): + continue + fp = edit.get("file_path") + if isinstance(fp, str) and fp.strip(): + paths.append(fp) + + return paths + + +def is_map_artifact_path(file_path: str) -> bool: + """ + Return True if file_path resolves under the current working directory's .map/. + + This allowlist prevents the workflow gate from deadlocking itself by blocking + updates to workflow artifacts (including workflow_state.json). + """ + if not isinstance(file_path, str) or not file_path.strip(): + return False + + repo_root = Path.cwd().resolve() + candidate = Path(file_path) + resolved = ( + candidate.resolve(strict=False) + if candidate.is_absolute() + else (repo_root / candidate).resolve(strict=False) + ) + + try: + rel = resolved.relative_to(repo_root) + except ValueError: + return False + + return bool(rel.parts) and rel.parts[0] == ".map" + + def sanitize_branch_name(branch: str) -> str: """Sanitize branch name for safe filesystem paths.""" sanitized = branch.replace("/", "-") @@ -106,7 +157,17 @@ def check_workflow_compliance(state: Dict) -> tuple[bool, Optional[str]]: """ current_subtask = state.get("current_subtask") if not current_subtask: - return False, "No current_subtask defined in workflow_state.json" + current_state = state.get("current_state") or "UNKNOWN" + return False, ( + "⛔ Workflow Enforcement: No current_subtask defined in workflow_state.json\n\n" + f"current_state: {current_state}\n\n" + "Edits to non-.map files are blocked until current_subtask is set and " + "the required steps are completed.\n\n" + "To fix:\n" + " - Update .map//workflow_state.json to set current_subtask\n" + " - Or re-run /map-resume or /map-plan to regenerate state\n" + " - Or delete .map//workflow_state.json to disable enforcement" + ) completed = state.get("completed_steps", {}).get(current_subtask, []) @@ -142,6 +203,12 @@ def main(): print("{}") sys.exit(0) + # Always allow edits to MAP workflow artifacts under .map/ + target_paths = extract_target_file_paths(tool_call) + if target_paths and all(is_map_artifact_path(p) for p in target_paths): + print("{}") + sys.exit(0) + # Get current branch branch = get_branch_name()