From 28d7460a1e1c2def3b4516b9cf8c504304fdf858 Mon Sep 17 00:00:00 2001
From: "Mikhail [azalio] Petrov" <azalio@azalio.net>
Date: Mon, 16 Feb 2026 00:36:19 +0300
Subject: [PATCH 1/7] =?UTF-8?q?Strengthen=20validation=20criteria=20and=20?=
 =?UTF-8?q?enforce=20VC=E2=86=92tests?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .claude/agents/actor.md                       | 29 ++++++++++--
 .claude/agents/monitor.md                     | 46 ++++++++++++++++---
 .claude/agents/task-decomposer.md             | 31 +++++++++++--
 .claude/commands/map-debate.md                |  6 ++-
 .claude/commands/map-efficient.md             |  4 ++
 src/mapify_cli/templates/agents/actor.md      | 29 ++++++++++--
 src/mapify_cli/templates/agents/monitor.md    | 46 ++++++++++++++++---
 .../templates/agents/task-decomposer.md       | 31 +++++++++++--
 .../templates/commands/map-debate.md          |  6 ++-
 .../templates/commands/map-efficient.md       |  4 ++
 10 files changed, 202 insertions(+), 30 deletions(-)
diff --git a/.claude/agents/actor.md b/.claude/agents/actor.md
index 40f1794..1ed955e 100644
--- a/.claude/agents/actor.md
+++ b/.claude/agents/actor.md
@@ -375,6 +375,10 @@ Document key decisions using this structure:
 - [ ] Error cases (invalid input, failures)
 - [ ] Security cases (injection, auth bypass) — if applicable
 
+**Validation criteria → tests (MANDATORY when test_strategy is not N/A)**:
+- For each `VCn:` item in `validation_criteria`, implement or update at least one automated test that would fail without your change and pass with it.
+- Prefer naming tests with `vc<n>` (e.g., `test_vc1_*`, `TestVC1*`) so Monitor can deterministically confirm coverage.
+
 **Format**:
 ```
 1. test_[function]_[scenario]_[expected]
@@ -392,7 +396,18 @@ Document key decisions using this structure:
    Expected: 409, {"error": "Email already registered"}
 </example>
 
-## 6. Used Patterns (ACE Learning)
+## 6. Validation Criteria Coverage (Evidence)
+
+If the subtask packet includes `validation_criteria`, list each `VCn:` and where it is enforced.
+
+**Format**:
+```
+VC1: <criterion text>
+- Code: path/to/file.ext#SymbolOrLocation
+- Tests: path/to/test_file.ext::test_name (or N/A with reason)
+```
+
+## 7. Used Patterns (ACE Learning)
 
 **Format**: `["impl-0012", "sec-0034"]` or `[]` if none
 
@@ -403,7 +418,7 @@ Document key decisions using this structure:
 
 **If no patterns match**: `[]` with note "No relevant patterns in current mem0"
 
-## 7. Integration Notes (If Applicable)
+## 8. Integration Notes (If Applicable)
 
 Only include if changes affect:
 - Database schema (migrations needed?)
@@ -449,6 +464,7 @@ Only include if changes affect:
 - [ ] AAG contract stated BEFORE code (Section 1)
 - [ ] Trade-offs documented with alternatives
 - [ ] Test cases cover happy + edge + error paths
+- [ ] Each `validation_criteria` item has at least one automated test (or explicit N/A with reason)
 - [ ] Used patterns tracked (or `[]` if none)
 - [ ] Template variables `{{...}}` preserved in generated code
 
@@ -518,6 +534,14 @@ with the following JSON content:
   "summary": "<one-line description of what was implemented>",
   "aag_contract": "<the AAG contract line>",
   "files_changed": ["<list of modified file paths>"],
+  "tests_changed": ["<list of modified/added test file paths>"],
+  "validation_criteria_coverage": [
+    {
+      "criterion": "VC1: ...",
+      "tests": ["path/to/test_file.ext::test_name"],
+      "notes": "Short justification if tests are N/A or partial"
+    }
+  ],
   "status": "applied"
 }
 ```
@@ -1083,4 +1107,3 @@ export class ReconnectingWebSocket {
 **Used Bullets**: `[]` (No similar patterns in mem0. Novel implementation.)
 
 </Actor_Reference_Examples>
-
diff --git a/.claude/agents/monitor.md b/.claude/agents/monitor.md
index 6358283..59043ad 100644
--- a/.claude/agents/monitor.md
+++ b/.claude/agents/monitor.md
@@ -821,8 +821,9 @@ When `{{requirements}}` or `{{subtask_description}}` includes `validation_criter
 ```
 FOR each criterion in validation_criteria:
   1. PARSE criterion into testable assertion
-  2. VERIFY assertion against {{solution}}
-  3. RECORD result: PASS | FAIL | PARTIAL | UNTESTABLE
+  2. VERIFY assertion against {{solution}} (code-path evidence)
+  3. VERIFY test coverage using test_strategy (if not N/A)
+  4. RECORD result: PASS | FAIL | PARTIAL | UNTESTABLE
 
 CONTRACT_STATUS:
   - ALL PASS → contract_compliant: true
@@ -830,6 +831,17 @@ CONTRACT_STATUS:
   - ANY UNTESTABLE → flag for clarification
 ```
 
+### Test Coverage Rule (Executable Contracts)
+
+Design constraints only become reliable when they are enforced by executable checks.
+
+For each `VCn:` criterion:
+- If `test_strategy` is provided and not `N/A`, require at least one concrete test case that covers it.
+- Prefer deterministic mapping: test names include `vc<n>` (e.g., `test_vc1_*`, `TestVC1*`).
+- Evidence MUST include both:
+  - **Code evidence** (where in code the behavior is implemented), and
+  - **Test evidence** (where in tests it is asserted).
+
 ### Contract Assertion Patterns
 
 | Criterion Type | How to Verify | Example |
@@ -852,15 +864,31 @@ Include in JSON output when validation_criteria provided:
     "failed": 1,
     "untestable": 0,
     "details": [
-      {"criterion": "Returns 401 for expired token", "status": "PASS", "evidence": "Line 45: if token.expired: return 401"},
-      {"criterion": "Creates audit log entry", "status": "FAIL", "evidence": "No audit.log() call found in create_user()"}
+      {
+        "criterion": "VC1: Returns 401 for expired token (auth/middleware.py:validate_token)",
+        "status": "PASS",
+        "code_evidence": "auth/middleware.py:45: if token.expired: return 401",
+        "test_coverage": "PASS",
+        "test_evidence": "tests/test_auth.py::test_vc1_expired_token_returns_401"
+      },
+      {
+        "criterion": "VC2: Creates audit log entry with user_id (audit/logger.py:log_event)",
+        "status": "FAIL",
+        "code_evidence": "No audit.log_event() call found in create_user()",
+        "test_coverage": "MISSING",
+        "test_evidence": "No test found matching vc2 or described in test_strategy"
+      }
     ]
   },
   "contract_compliant": false
 }
 ```
 
-**Decision Rule**: If `contract_compliant: false`, set `valid: false` unless ALL failed contracts are LOW severity (documentation, naming).
+**Decision Rule**:
+- If `contract_compliant: false`, set `valid: false` unless ALL failed contracts are LOW severity (documentation, naming).
+- If any Behavioral/Integration/Edge-case criterion has `test_coverage != PASS` and test_strategy is not `N/A`:
+  - If `security_critical == true`: set `valid: false` (missing executable enforcement is a release blocker).
+  - Otherwise: add a **testability** issue and require Actor to add tests.
 
 </Monitor_Contract_Validation>
 
@@ -2512,7 +2540,13 @@ with the following JSON content:
   "timestamp": "<ISO 8601 UTC>",
   "valid": true,
   "issues_found": 0,
-  "recommendation": "approve|reject|revise"
+  "recommendation": "approve|reject|revise",
+  "validation_criteria_test_coverage": {
+    "total": 0,
+    "covered": 0,
+    "missing": 0,
+    "notes": "Optional: summarize VC→test coverage findings"
+  }
 }
 ```
 
diff --git a/.claude/agents/task-decomposer.md b/.claude/agents/task-decomposer.md
index efcbb97..58e8afd 100644
--- a/.claude/agents/task-decomposer.md
+++ b/.claude/agents/task-decomposer.md
@@ -248,8 +248,21 @@ Return **ONLY** valid JSON in this exact structure:
 **subtasks[].complexity_rationale**: MUST reference factors: "Score N: factor (+X), factor (+Y)..."
 **subtasks[].validation_criteria**: Array of **testable conditions** that prove completion
   - REQUIRED: 2-4 specific, verifiable outcomes
-  - Good: "Returns 401 for expired token", "Creates audit log entry with user_id"
-  - Bad: "Works correctly", "Handles errors"
+  - Format (recommended): Prefix each item with `VC1:`, `VC2:`, ... for stable cross-agent reference.
+  - Each criterion MUST be both:
+    - **Behavior-/artifact-verifiable** (can be checked by reading code), and
+    - **Test-verifiable** (has at least one concrete test case planned in `test_strategy`).
+  - Each criterion SHOULD include a concrete anchor:
+    - endpoint/handler + route, OR
+    - function/class name + file path
+  - Good:
+    - "VC1: POST /users returns 201 and persists normalized email (users/routes.py:create_user)"
+    - "VC2: Returns 401 for expired token (auth/middleware.py:validate_token)"
+    - "VC3: Creates audit log entry with user_id (audit/logger.py:log_event)"
+  - Bad:
+    - "Works correctly"
+    - "Handles errors"
+    - "Tests pass"
 **subtasks[].contracts**: Array of **executable assertion patterns** (optional but recommended for complexity_score ≥ 5)
   - `type`: "precondition" | "postcondition" | "invariant"
   - `assertion`: Executable pattern (e.g., "response.status == 401 WHEN token.expired")
@@ -269,6 +282,13 @@ Return **ONLY** valid JSON in this exact structure:
   - OMIT when: standard pattern with obvious implementation
   - Example: "Use existing RateLimiter middleware, configure for /api/* routes"
 **subtasks[].test_strategy**: Required object with unit/integration/e2e keys. Use "N/A" for levels not applicable.
+  - MUST map `validation_criteria` → tests:
+    - For each `VCn:` criterion, include at least one planned test name that covers it.
+    - Recommended naming: include `vc<n>` in the test name (e.g., `test_vc1_*`, `TestVC1*`) for deterministic grep-ability.
+    - Recommended format: `path/to/test_file.ext::test_name_or_symbol`
+  - "N/A" is acceptable ONLY when:
+    - The repository has no automated test harness, and adding one is out-of-scope for this subtask.
+    - In that case: either add a FOUNDATION subtask to introduce a minimal test harness, or document the gap explicitly in risks/assumptions.
 **subtasks[].affected_files**: Precise file paths (NOT "backend", "frontend"); use [] if paths unknown
 
 ### Subtask Ordering
@@ -491,9 +511,10 @@ When invoked with `mode: "re_decomposition"` from the orchestrator, you receive
 - [ ] Descriptions explain HOW, not just WHAT
 
 **Acceptance Criteria**:
-- [ ] Each subtask has 3-5 specific criteria
+- [ ] Each subtask has 2-4 specific criteria
 - [ ] Criteria are testable and measurable
-- [ ] Criteria cover: functionality + edge cases + testing
+- [ ] Criteria cover: functionality + edge cases (as applicable)
+- [ ] Each VC has a concrete verification hook in test_strategy (at least one planned test per VC)
 - [ ] No vague criteria ("works", "is good", "done")
 
 **File Paths**:
@@ -510,7 +531,7 @@ When invoked with `mode: "re_decomposition"` from the orchestrator, you receive
 
 **Test Strategy**:
 - [ ] test_strategy object included for each subtask
-- [ ] Unit tests specified (REQUIRED for all subtasks)
+- [ ] Unit tests specified (default). If repo has no test harness: add a FOUNDATION subtask to introduce minimal tests or explicitly justify "N/A".
 - [ ] Integration tests specified when subtask integrates multiple components
 - [ ] E2e tests specified when subtask impacts user-facing functionality
 - [ ] "N/A" used appropriately when test layer not applicable
diff --git a/.claude/commands/map-debate.md b/.claude/commands/map-debate.md
index 039526e..b1db61e 100644
--- a/.claude/commands/map-debate.md
+++ b/.claude/commands/map-debate.md
@@ -44,10 +44,14 @@ Task: $ARGUMENTS
 
 Hard requirements:
 - Use `blueprint.subtasks[].validation_criteria` (2-4 testable, verifiable outcomes)
+  - Prefix each criterion with `VC1:`, `VC2:`, ... (stable references for Actor/Monitor)
+  - Include a concrete anchor per VC (endpoint/function + file path)
 - Use `blueprint.subtasks[].dependencies` (array of subtask IDs) and order subtasks by dependency
 - Include `blueprint.subtasks[].complexity_score` (1-10) and `risk_level` (low|medium|high)
 - Include `blueprint.subtasks[].security_critical` (true for auth/crypto/validation/data access)
-- Include `blueprint.subtasks[].test_strategy` with unit/integration/e2e keys"
+- Include `blueprint.subtasks[].test_strategy` with unit/integration/e2e keys
+  - Map every `VCn:` to ≥1 planned test case (prefer test name contains `vc<n>`)
+  - Recommended format: `path/to/test_file.ext::test_name_or_symbol`"
 )
 ```
 
diff --git a/.claude/commands/map-efficient.md b/.claude/commands/map-efficient.md
index e47b357..b91c5f3 100644
--- a/.claude/commands/map-efficient.md
+++ b/.claude/commands/map-efficient.md
@@ -109,10 +109,14 @@ Task: $ARGUMENTS
 
 Hard requirements:
 - Use `blueprint.subtasks[].validation_criteria` (2-4 testable outcomes)
+  - Prefix each criterion with `VC1:`, `VC2:`, ... (stable references for Actor/Monitor)
+  - Include a concrete anchor per VC (endpoint/function + file path)
 - Use `blueprint.subtasks[].dependencies` (array of subtask IDs)
 - Include `complexity_score` (1-10) and `risk_level` (low|medium|high)
 - Include `security_critical` (true for auth/crypto/validation)
 - Include `test_strategy` with unit/integration/e2e keys
+  - Map every `VCn:` to ≥1 planned test case (prefer test name contains `vc<n>`)
+  - Recommended format: `path/to/test_file.ext::test_name_or_symbol`
 - Include `aag_contract` (one-line pseudocode: Actor -> Action -> Goal)
 
 AAG Contract format (REQUIRED per subtask):
diff --git a/src/mapify_cli/templates/agents/actor.md b/src/mapify_cli/templates/agents/actor.md
index 40f1794..1ed955e 100644
--- a/src/mapify_cli/templates/agents/actor.md
+++ b/src/mapify_cli/templates/agents/actor.md
@@ -375,6 +375,10 @@ Document key decisions using this structure:
 - [ ] Error cases (invalid input, failures)
 - [ ] Security cases (injection, auth bypass) — if applicable
 
+**Validation criteria → tests (MANDATORY when test_strategy is not N/A)**:
+- For each `VCn:` item in `validation_criteria`, implement or update at least one automated test that would fail without your change and pass with it.
+- Prefer naming tests with `vc<n>` (e.g., `test_vc1_*`, `TestVC1*`) so Monitor can deterministically confirm coverage.
+
 **Format**:
 ```
 1. test_[function]_[scenario]_[expected]
@@ -392,7 +396,18 @@ Document key decisions using this structure:
    Expected: 409, {"error": "Email already registered"}
 </example>
 
-## 6. Used Patterns (ACE Learning)
+## 6. Validation Criteria Coverage (Evidence)
+
+If the subtask packet includes `validation_criteria`, list each `VCn:` and where it is enforced.
+
+**Format**:
+```
+VC1: <criterion text>
+- Code: path/to/file.ext#SymbolOrLocation
+- Tests: path/to/test_file.ext::test_name (or N/A with reason)
+```
+
+## 7. Used Patterns (ACE Learning)
 
 **Format**: `["impl-0012", "sec-0034"]` or `[]` if none
 
@@ -403,7 +418,7 @@ Document key decisions using this structure:
 
 **If no patterns match**: `[]` with note "No relevant patterns in current mem0"
 
-## 7. Integration Notes (If Applicable)
+## 8. Integration Notes (If Applicable)
 
 Only include if changes affect:
 - Database schema (migrations needed?)
@@ -449,6 +464,7 @@ Only include if changes affect:
 - [ ] AAG contract stated BEFORE code (Section 1)
 - [ ] Trade-offs documented with alternatives
 - [ ] Test cases cover happy + edge + error paths
+- [ ] Each `validation_criteria` item has at least one automated test (or explicit N/A with reason)
 - [ ] Used patterns tracked (or `[]` if none)
 - [ ] Template variables `{{...}}` preserved in generated code
 
@@ -518,6 +534,14 @@ with the following JSON content:
   "summary": "<one-line description of what was implemented>",
   "aag_contract": "<the AAG contract line>",
   "files_changed": ["<list of modified file paths>"],
+  "tests_changed": ["<list of modified/added test file paths>"],
+  "validation_criteria_coverage": [
+    {
+      "criterion": "VC1: ...",
+      "tests": ["path/to/test_file.ext::test_name"],
+      "notes": "Short justification if tests are N/A or partial"
+    }
+  ],
   "status": "applied"
 }
 ```
@@ -1083,4 +1107,3 @@ export class ReconnectingWebSocket {
 **Used Bullets**: `[]` (No similar patterns in mem0. Novel implementation.)
 
 </Actor_Reference_Examples>
-
diff --git a/src/mapify_cli/templates/agents/monitor.md b/src/mapify_cli/templates/agents/monitor.md
index 6358283..59043ad 100644
--- a/src/mapify_cli/templates/agents/monitor.md
+++ b/src/mapify_cli/templates/agents/monitor.md
@@ -821,8 +821,9 @@ When `{{requirements}}` or `{{subtask_description}}` includes `validation_criter
 ```
 FOR each criterion in validation_criteria:
   1. PARSE criterion into testable assertion
-  2. VERIFY assertion against {{solution}}
-  3. RECORD result: PASS | FAIL | PARTIAL | UNTESTABLE
+  2. VERIFY assertion against {{solution}} (code-path evidence)
+  3. VERIFY test coverage using test_strategy (if not N/A)
+  4. RECORD result: PASS | FAIL | PARTIAL | UNTESTABLE
 
 CONTRACT_STATUS:
   - ALL PASS → contract_compliant: true
@@ -830,6 +831,17 @@ CONTRACT_STATUS:
   - ANY UNTESTABLE → flag for clarification
 ```
 
+### Test Coverage Rule (Executable Contracts)
+
+Design constraints only become reliable when they are enforced by executable checks.
+
+For each `VCn:` criterion:
+- If `test_strategy` is provided and not `N/A`, require at least one concrete test case that covers it.
+- Prefer deterministic mapping: test names include `vc<n>` (e.g., `test_vc1_*`, `TestVC1*`).
+- Evidence MUST include both:
+  - **Code evidence** (where in code the behavior is implemented), and
+  - **Test evidence** (where in tests it is asserted).
+
 ### Contract Assertion Patterns
 
 | Criterion Type | How to Verify | Example |
@@ -852,15 +864,31 @@ Include in JSON output when validation_criteria provided:
     "failed": 1,
     "untestable": 0,
     "details": [
-      {"criterion": "Returns 401 for expired token", "status": "PASS", "evidence": "Line 45: if token.expired: return 401"},
-      {"criterion": "Creates audit log entry", "status": "FAIL", "evidence": "No audit.log() call found in create_user()"}
+      {
+        "criterion": "VC1: Returns 401 for expired token (auth/middleware.py:validate_token)",
+        "status": "PASS",
+        "code_evidence": "auth/middleware.py:45: if token.expired: return 401",
+        "test_coverage": "PASS",
+        "test_evidence": "tests/test_auth.py::test_vc1_expired_token_returns_401"
+      },
+      {
+        "criterion": "VC2: Creates audit log entry with user_id (audit/logger.py:log_event)",
+        "status": "FAIL",
+        "code_evidence": "No audit.log_event() call found in create_user()",
+        "test_coverage": "MISSING",
+        "test_evidence": "No test found matching vc2 or described in test_strategy"
+      }
     ]
   },
   "contract_compliant": false
 }
 ```
 
-**Decision Rule**: If `contract_compliant: false`, set `valid: false` unless ALL failed contracts are LOW severity (documentation, naming).
+**Decision Rule**:
+- If `contract_compliant: false`, set `valid: false` unless ALL failed contracts are LOW severity (documentation, naming).
+- If any Behavioral/Integration/Edge-case criterion has `test_coverage != PASS` and test_strategy is not `N/A`:
+  - If `security_critical == true`: set `valid: false` (missing executable enforcement is a release blocker).
+  - Otherwise: add a **testability** issue and require Actor to add tests.
 
 </Monitor_Contract_Validation>
 
@@ -2512,7 +2540,13 @@ with the following JSON content:
   "timestamp": "<ISO 8601 UTC>",
   "valid": true,
   "issues_found": 0,
-  "recommendation": "approve|reject|revise"
+  "recommendation": "approve|reject|revise",
+  "validation_criteria_test_coverage": {
+    "total": 0,
+    "covered": 0,
+    "missing": 0,
+    "notes": "Optional: summarize VC→test coverage findings"
+  }
 }
 ```
 
diff --git a/src/mapify_cli/templates/agents/task-decomposer.md b/src/mapify_cli/templates/agents/task-decomposer.md
index efcbb97..58e8afd 100644
--- a/src/mapify_cli/templates/agents/task-decomposer.md
+++ b/src/mapify_cli/templates/agents/task-decomposer.md
@@ -248,8 +248,21 @@ Return **ONLY** valid JSON in this exact structure:
 **subtasks[].complexity_rationale**: MUST reference factors: "Score N: factor (+X), factor (+Y)..."
 **subtasks[].validation_criteria**: Array of **testable conditions** that prove completion
   - REQUIRED: 2-4 specific, verifiable outcomes
-  - Good: "Returns 401 for expired token", "Creates audit log entry with user_id"
-  - Bad: "Works correctly", "Handles errors"
+  - Format (recommended): Prefix each item with `VC1:`, `VC2:`, ... for stable cross-agent reference.
+  - Each criterion MUST be both:
+    - **Behavior-/artifact-verifiable** (can be checked by reading code), and
+    - **Test-verifiable** (has at least one concrete test case planned in `test_strategy`).
+  - Each criterion SHOULD include a concrete anchor:
+    - endpoint/handler + route, OR
+    - function/class name + file path
+  - Good:
+    - "VC1: POST /users returns 201 and persists normalized email (users/routes.py:create_user)"
+    - "VC2: Returns 401 for expired token (auth/middleware.py:validate_token)"
+    - "VC3: Creates audit log entry with user_id (audit/logger.py:log_event)"
+  - Bad:
+    - "Works correctly"
+    - "Handles errors"
+    - "Tests pass"
 **subtasks[].contracts**: Array of **executable assertion patterns** (optional but recommended for complexity_score ≥ 5)
   - `type`: "precondition" | "postcondition" | "invariant"
   - `assertion`: Executable pattern (e.g., "response.status == 401 WHEN token.expired")
@@ -269,6 +282,13 @@ Return **ONLY** valid JSON in this exact structure:
   - OMIT when: standard pattern with obvious implementation
   - Example: "Use existing RateLimiter middleware, configure for /api/* routes"
 **subtasks[].test_strategy**: Required object with unit/integration/e2e keys. Use "N/A" for levels not applicable.
+  - MUST map `validation_criteria` → tests:
+    - For each `VCn:` criterion, include at least one planned test name that covers it.
+    - Recommended naming: include `vc<n>` in the test name (e.g., `test_vc1_*`, `TestVC1*`) for deterministic grep-ability.
+    - Recommended format: `path/to/test_file.ext::test_name_or_symbol`
+  - "N/A" is acceptable ONLY when:
+    - The repository has no automated test harness, and adding one is out-of-scope for this subtask.
+    - In that case: either add a FOUNDATION subtask to introduce a minimal test harness, or document the gap explicitly in risks/assumptions.
 **subtasks[].affected_files**: Precise file paths (NOT "backend", "frontend"); use [] if paths unknown
 
 ### Subtask Ordering
@@ -491,9 +511,10 @@ When invoked with `mode: "re_decomposition"` from the orchestrator, you receive
 - [ ] Descriptions explain HOW, not just WHAT
 
 **Acceptance Criteria**:
-- [ ] Each subtask has 3-5 specific criteria
+- [ ] Each subtask has 2-4 specific criteria
 - [ ] Criteria are testable and measurable
-- [ ] Criteria cover: functionality + edge cases + testing
+- [ ] Criteria cover: functionality + edge cases (as applicable)
+- [ ] Each VC has a concrete verification hook in test_strategy (at least one planned test per VC)
 - [ ] No vague criteria ("works", "is good", "done")
 
 **File Paths**:
@@ -510,7 +531,7 @@ When invoked with `mode: "re_decomposition"` from the orchestrator, you receive
 
 **Test Strategy**:
 - [ ] test_strategy object included for each subtask
-- [ ] Unit tests specified (REQUIRED for all subtasks)
+- [ ] Unit tests specified (default). If repo has no test harness: add a FOUNDATION subtask to introduce minimal tests or explicitly justify "N/A".
 - [ ] Integration tests specified when subtask integrates multiple components
 - [ ] E2e tests specified when subtask impacts user-facing functionality
 - [ ] "N/A" used appropriately when test layer not applicable
diff --git a/src/mapify_cli/templates/commands/map-debate.md b/src/mapify_cli/templates/commands/map-debate.md
index 039526e..b1db61e 100644
--- a/src/mapify_cli/templates/commands/map-debate.md
+++ b/src/mapify_cli/templates/commands/map-debate.md
@@ -44,10 +44,14 @@ Task: $ARGUMENTS
 
 Hard requirements:
 - Use `blueprint.subtasks[].validation_criteria` (2-4 testable, verifiable outcomes)
+  - Prefix each criterion with `VC1:`, `VC2:`, ... (stable references for Actor/Monitor)
+  - Include a concrete anchor per VC (endpoint/function + file path)
 - Use `blueprint.subtasks[].dependencies` (array of subtask IDs) and order subtasks by dependency
 - Include `blueprint.subtasks[].complexity_score` (1-10) and `risk_level` (low|medium|high)
 - Include `blueprint.subtasks[].security_critical` (true for auth/crypto/validation/data access)
-- Include `blueprint.subtasks[].test_strategy` with unit/integration/e2e keys"
+- Include `blueprint.subtasks[].test_strategy` with unit/integration/e2e keys
+  - Map every `VCn:` to ≥1 planned test case (prefer test name contains `vc<n>`)
+  - Recommended format: `path/to/test_file.ext::test_name_or_symbol`"
 )
 ```
 
diff --git a/src/mapify_cli/templates/commands/map-efficient.md b/src/mapify_cli/templates/commands/map-efficient.md
index e47b357..b91c5f3 100644
--- a/src/mapify_cli/templates/commands/map-efficient.md
+++ b/src/mapify_cli/templates/commands/map-efficient.md
@@ -109,10 +109,14 @@ Task: $ARGUMENTS
 
 Hard requirements:
 - Use `blueprint.subtasks[].validation_criteria` (2-4 testable outcomes)
+  - Prefix each criterion with `VC1:`, `VC2:`, ... (stable references for Actor/Monitor)
+  - Include a concrete anchor per VC (endpoint/function + file path)
 - Use `blueprint.subtasks[].dependencies` (array of subtask IDs)
 - Include `complexity_score` (1-10) and `risk_level` (low|medium|high)
 - Include `security_critical` (true for auth/crypto/validation)
 - Include `test_strategy` with unit/integration/e2e keys
+  - Map every `VCn:` to ≥1 planned test case (prefer test name contains `vc<n>`)
+  - Recommended format: `path/to/test_file.ext::test_name_or_symbol`
 - Include `aag_contract` (one-line pseudocode: Actor -> Action -> Goal)
 
 AAG Contract format (REQUIRED per subtask):

From c080a5942ed50c67f8ca92fdb2f5c3b287fb32c3 Mon Sep 17 00:00:00 2001
From: "Mikhail [azalio] Petrov" <azalio@azalio.net>
Date: Mon, 16 Feb 2026 00:36:32 +0300
Subject: [PATCH 2/7] Fix Claude Code hook configuration and outputs

---
 .claude/hooks/block-dangerous.sh              | 92 +++++++++++++++----
 .claude/hooks/block-secrets.py                | 27 +++---
 .claude/hooks/post-edit-reminder.py           |  9 +-
 .claude/hooks/ralph-context-pruner.py         |  7 +-
 .claude/hooks/safety-guardrails.py            | 33 ++++---
 .claude/hooks/workflow-context-injector.py    |  7 +-
 .claude/hooks/workflow-gate.py                | 27 ++++--
 .claude/references/workflow-state-schema.md   |  2 +-
 .claude/settings.hooks.json                   | 74 ---------------
 .claude/settings.json                         | 81 ++++++++++++++++
 .claude/skills/README.md                      |  4 +-
 CHANGELOG.md                                  |  4 +-
 CLAUDE.md                                     |  2 +-
 IMPLEMENTATION_SUMMARY.md                     | 18 ++--
 docs/USAGE.md                                 |  4 +-
 scripts/sync-templates.sh                     |  2 +-
 src/mapify_cli/__init__.py                    | 39 +++-----
 src/mapify_cli/templates/CLAUDE.md            |  2 +-
 .../templates/hooks/block-dangerous.sh        | 92 +++++++++++++++----
 .../templates/hooks/block-secrets.py          | 27 +++---
 .../templates/hooks/post-edit-reminder.py     |  9 +-
 .../templates/hooks/ralph-context-pruner.py   |  7 +-
 .../templates/hooks/safety-guardrails.py      | 33 ++++---
 .../hooks/workflow-context-injector.py        |  7 +-
 .../templates/hooks/workflow-gate.py          | 27 ++++--
 .../references/workflow-state-schema.md       |  2 +-
 src/mapify_cli/templates/settings.hooks.json  | 74 ---------------
 src/mapify_cli/templates/settings.json        | 81 ++++++++++++++++
 src/mapify_cli/templates/skills/README.md     |  4 +-
 29 files changed, 481 insertions(+), 316 deletions(-)
 delete mode 100644 .claude/settings.hooks.json
 delete mode 100644 src/mapify_cli/templates/settings.hooks.json

diff --git a/.claude/hooks/block-dangerous.sh b/.claude/hooks/block-dangerous.sh
index ab90770..f42a6ba 100755
--- a/.claude/hooks/block-dangerous.sh
+++ b/.claude/hooks/block-dangerous.sh
@@ -14,7 +14,7 @@
 #
 # EXIT CODES:
 #   0 - Allow command execution
-#   2 - Block command execution (dangerous command detected)
+#   0 + permissionDecision=deny - Block command execution (preferred)
 #
 # TESTING:
 #   echo '{"tool_name": "Bash", "tool_input": {"command": "rm -rf /"}}' | bash block-dangerous.sh
@@ -53,58 +53,112 @@ COMMAND_LOWER=$(echo "$COMMAND" | tr '[:upper:]' '[:lower:]')
 # Check for rm -rf (recursive force delete)
 # Matches: rm -rf, rm -fr, rm -r -f, rm -f -r, rm --recursive --force
 if echo "$COMMAND" | grep -qE 'rm\s+(-rf|-fr)\s'; then
-    echo '{"hookSpecificOutput":{"hookEventName":"PreToolUse","error":"Blocked: rm -rf is prohibited","details":"Recursive force delete can cause irreversible data loss","suggestion":"Use rm with specific paths and without -rf flag"}}' >&2
-    exit 2
+    jq -n '{
+      hookSpecificOutput: {
+        hookEventName: "PreToolUse",
+        permissionDecision: "deny",
+        permissionDecisionReason: "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)"
+      }
+    }'
+    exit 0
 fi
 
 # Catch rm -rf at end of command or with path immediately after
 if echo "$COMMAND" | grep -qE 'rm\s+(-rf|-fr)\s*(/|~|\.|[a-zA-Z])'; then
-    echo '{"hookSpecificOutput":{"hookEventName":"PreToolUse","error":"Blocked: rm -rf is prohibited","details":"Recursive force delete can cause irreversible data loss","suggestion":"Use rm with specific paths and without -rf flag"}}' >&2
-    exit 2
+    jq -n '{
+      hookSpecificOutput: {
+        hookEventName: "PreToolUse",
+        permissionDecision: "deny",
+        permissionDecisionReason: "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)"
+      }
+    }'
+    exit 0
 fi
 
 # Catch separated flags: rm -r -f or rm -f -r
 if echo "$COMMAND" | grep -qE 'rm\s+(-r\s+-f|-f\s+-r)\s'; then
-    echo '{"hookSpecificOutput":{"hookEventName":"PreToolUse","error":"Blocked: rm -rf is prohibited","details":"Recursive force delete can cause irreversible data loss","suggestion":"Use rm with specific paths and without -rf flag"}}' >&2
-    exit 2
+    jq -n '{
+      hookSpecificOutput: {
+        hookEventName: "PreToolUse",
+        permissionDecision: "deny",
+        permissionDecisionReason: "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)"
+      }
+    }'
+    exit 0
 fi
 
 # Check for git push --force to main/master
 # Matches: git push --force origin main, git push -f origin master
 if echo "$COMMAND" | grep -qE 'git\s+push\s+.*(-f|--force).*\s+(origin|upstream)\s+(main|master)(\s|$)'; then
-    echo '{"hookSpecificOutput":{"hookEventName":"PreToolUse","error":"Blocked: Force push to main/master is prohibited","details":"Force pushing to protected branches can overwrite team work","suggestion":"Use regular push or push to a feature branch"}}' >&2
-    exit 2
+    jq -n '{
+      hookSpecificOutput: {
+        hookEventName: "PreToolUse",
+        permissionDecision: "deny",
+        permissionDecisionReason: "Blocked: Force push to main/master is prohibited (can overwrite team work)"
+      }
+    }'
+    exit 0
 fi
 
 # Also check reverse order: git push origin main --force
 if echo "$COMMAND" | grep -qE 'git\s+push\s+(origin|upstream)\s+(main|master)\s+(-f|--force)'; then
-    echo '{"hookSpecificOutput":{"hookEventName":"PreToolUse","error":"Blocked: Force push to main/master is prohibited","details":"Force pushing to protected branches can overwrite team work","suggestion":"Use regular push or push to a feature branch"}}' >&2
-    exit 2
+    jq -n '{
+      hookSpecificOutput: {
+        hookEventName: "PreToolUse",
+        permissionDecision: "deny",
+        permissionDecisionReason: "Blocked: Force push to main/master is prohibited (can overwrite team work)"
+      }
+    }'
+    exit 0
 fi
 
 # Check for git reset --hard (without specific commit, or dangerous patterns)
 # Block: git reset --hard, git reset --hard HEAD~, git reset --hard origin/
 if echo "$COMMAND" | grep -qE 'git\s+reset\s+--hard(\s|$)'; then
-    echo '{"hookSpecificOutput":{"hookEventName":"PreToolUse","error":"Blocked: git reset --hard is prohibited","details":"Hard reset can cause irreversible loss of uncommitted changes","suggestion":"Use git stash or git reset --soft instead"}}' >&2
-    exit 2
+    jq -n '{
+      hookSpecificOutput: {
+        hookEventName: "PreToolUse",
+        permissionDecision: "deny",
+        permissionDecisionReason: "Blocked: git reset --hard is prohibited (can cause irreversible loss of uncommitted changes)"
+      }
+    }'
+    exit 0
 fi
 
 # Check for dangerous chmod/chown on system directories
 if echo "$COMMAND" | grep -qE '(chmod|chown)\s+(-R|--recursive)\s+.*\s+/($|\s)'; then
-    echo '{"hookSpecificOutput":{"hookEventName":"PreToolUse","error":"Blocked: Recursive chmod/chown on root is prohibited","details":"This can break system permissions","suggestion":"Specify a more targeted path"}}' >&2
-    exit 2
+    jq -n '{
+      hookSpecificOutput: {
+        hookEventName: "PreToolUse",
+        permissionDecision: "deny",
+        permissionDecisionReason: "Blocked: Recursive chmod/chown on / is prohibited (can break system permissions)"
+      }
+    }'
+    exit 0
 fi
 
 # Check for dd with of=/dev/
 if echo "$COMMAND" | grep -qE 'dd\s+.*of=/dev/'; then
-    echo '{"hookSpecificOutput":{"hookEventName":"PreToolUse","error":"Blocked: dd to /dev/ is prohibited","details":"Writing to raw devices can destroy data","suggestion":"Use safer file operations"}}' >&2
-    exit 2
+    jq -n '{
+      hookSpecificOutput: {
+        hookEventName: "PreToolUse",
+        permissionDecision: "deny",
+        permissionDecisionReason: "Blocked: dd with of=/dev/* is prohibited (writing to raw devices can destroy data)"
+      }
+    }'
+    exit 0
 fi
 
 # Check for mkfs (format filesystem)
 if echo "$COMMAND" | grep -qE 'mkfs'; then
-    echo '{"hookSpecificOutput":{"hookEventName":"PreToolUse","error":"Blocked: mkfs is prohibited","details":"Formatting filesystems can destroy data","suggestion":"This operation requires manual execution"}}' >&2
-    exit 2
+    jq -n '{
+      hookSpecificOutput: {
+        hookEventName: "PreToolUse",
+        permissionDecision: "deny",
+        permissionDecisionReason: "Blocked: mkfs is prohibited (formatting filesystems can destroy data)"
+      }
+    }'
+    exit 0
 fi
 
 # =============================================================================
diff --git a/.claude/hooks/block-secrets.py b/.claude/hooks/block-secrets.py
index bbeb466..fb1d2e1 100755
--- a/.claude/hooks/block-secrets.py
+++ b/.claude/hooks/block-secrets.py
@@ -22,7 +22,7 @@
 
 HOOK BEHAVIOR:
   - Exit code 0: Allow tool execution (non-sensitive file)
-  - Exit code 2: Block tool execution (sensitive file detected)
+  - Exit code 0 + permissionDecision=deny: Block tool execution (sensitive file detected)
 
 TESTING:
   echo '{"tool_name": "Read", "tool_input": {"file_path": ".env"}}' | python3 block-secrets.py
@@ -78,16 +78,21 @@ def is_sensitive_file(file_path: str) -> bool:
 
 def block_access(file_path: str, tool_name: str):
     """Block tool execution and output error message."""
-    error_output = {
-        "hookSpecificOutput": {
-            "hookEventName": "PreToolUse",
-            "error": f"Blocked: Access to sensitive file '{file_path}' is prohibited",
-            "details": f"Tool '{tool_name}' attempted to access a protected file. Sensitive files include: .env*, *credentials*, *secret*, private keys (*.pem, *.key, *_rsa, etc.)",
-            "suggestion": "If you need to work with sensitive data, use environment variables or a secrets management system instead of reading raw credential files.",
-        }
-    }
-    print(json.dumps(error_output), file=sys.stderr)
-    sys.exit(2)  # Exit code 2 blocks execution
+    print(
+        json.dumps(
+            {
+                "hookSpecificOutput": {
+                    "hookEventName": "PreToolUse",
+                    "permissionDecision": "deny",
+                    "permissionDecisionReason": (
+                        f"Blocked: Access to sensitive file '{file_path}' is prohibited "
+                        f"(tool={tool_name})."
+                    ),
+                }
+            }
+        )
+    )
+    sys.exit(0)
 
 
 def main():
diff --git a/.claude/hooks/post-edit-reminder.py b/.claude/hooks/post-edit-reminder.py
index de96db8..ff77beb 100755
--- a/.claude/hooks/post-edit-reminder.py
+++ b/.claude/hooks/post-edit-reminder.py
@@ -7,7 +7,7 @@
 
 Trigger: Edit|Write
 Exit codes: Always 0 (non-blocking)
-Output: ~80 char reminder via hookSpecificOutput.appended_text
+Output: ~80 char reminder via hookSpecificOutput.additionalContext
 """
 
 import json
@@ -73,7 +73,12 @@ def main() -> None:
 
     # Inject lightweight reminder
     reminder = "[MAP] Code changed. Run tests before committing!"
-    output = {"hookSpecificOutput": {"appended_text": reminder}}
+    output = {
+        "hookSpecificOutput": {
+            "hookEventName": "PostToolUse",
+            "additionalContext": reminder,
+        }
+    }
     print(json.dumps(output))
     sys.exit(0)
 
diff --git a/.claude/hooks/ralph-context-pruner.py b/.claude/hooks/ralph-context-pruner.py
index 8adb60f..b37b174 100755
--- a/.claude/hooks/ralph-context-pruner.py
+++ b/.claude/hooks/ralph-context-pruner.py
@@ -13,7 +13,7 @@
   0 - Always (PreCompact hooks don't block)
 
 Output:
-  hookSpecificOutput.appended_text - Recovery message injected into context
+  hookSpecificOutput.additionalContext - Recovery message injected into context
 """
 import json
 import os
@@ -238,7 +238,10 @@ def main() -> None:
 
         # Inject recovery message into context
         recovery_msg = format_recovery_message(state, branch)
-        output["hookSpecificOutput"] = {"appended_text": recovery_msg}
+        output["hookSpecificOutput"] = {
+            "hookEventName": "PreCompact",
+            "additionalContext": recovery_msg,
+        }
 
     # Prune log files in ALL branch directories
     actions = []
diff --git a/.claude/hooks/safety-guardrails.py b/.claude/hooks/safety-guardrails.py
index a0158c7..4174337 100644
--- a/.claude/hooks/safety-guardrails.py
+++ b/.claude/hooks/safety-guardrails.py
@@ -9,7 +9,7 @@
 Trigger: Edit|Write|Bash
 Exit codes:
   0 - Allow
-  2 - Block (with error message)
+  0 + permissionDecision=deny - Block (preferred)
 """
 
 import json
@@ -84,17 +84,20 @@ def check_command_safety(command: str) -> tuple[bool, str]:
     return True, ""
 
 
-def block_access(reason: str, tool_name: str) -> None:
-    """Block tool execution with error message."""
-    error_output = {
-        "hookSpecificOutput": {
-            "hookEventName": "PreToolUse",
-            "error": reason,
-            "details": f"Tool '{tool_name}' blocked by safety guardrails",
-        }
-    }
-    print(json.dumps(error_output), file=sys.stderr)
-    sys.exit(2)
+def deny(reason: str) -> None:
+    """Deny tool execution using structured PreToolUse decision control."""
+    print(
+        json.dumps(
+            {
+                "hookSpecificOutput": {
+                    "hookEventName": "PreToolUse",
+                    "permissionDecision": "deny",
+                    "permissionDecisionReason": reason,
+                }
+            }
+        )
+    )
+    sys.exit(0)
 
 
 def main() -> None:
@@ -108,18 +111,18 @@ def main() -> None:
     tool_input = input_data.get("tool_input", {})
 
     # Check file-based tools
-    if tool_name in ("Edit", "Write", "Read"):
+    if tool_name in ("Edit", "Write", "Read", "MultiEdit"):
         file_path = tool_input.get("file_path", "") or tool_input.get("path", "")
         is_safe, reason = check_file_safety(file_path)
         if not is_safe:
-            block_access(reason, tool_name)
+            deny(f"{reason} (tool={tool_name})")
 
     # Check bash commands
     elif tool_name == "Bash":
         command = tool_input.get("command", "")
         is_safe, reason = check_command_safety(command)
         if not is_safe:
-            block_access(reason, tool_name)
+            deny(f"{reason} (tool={tool_name})")
 
     print("{}")
     sys.exit(0)
diff --git a/.claude/hooks/workflow-context-injector.py b/.claude/hooks/workflow-context-injector.py
index f0b8d34..72145e5 100755
--- a/.claude/hooks/workflow-context-injector.py
+++ b/.claude/hooks/workflow-context-injector.py
@@ -243,7 +243,12 @@ def main() -> None:
 
     reminder = format_reminder(state, branch)
     if reminder:
-        output = {"hookSpecificOutput": {"appended_text": reminder}}
+        output = {
+            "hookSpecificOutput": {
+                "hookEventName": "PreToolUse",
+                "additionalContext": reminder,
+            }
+        }
         print(json.dumps(output))
     else:
         print("{}")
diff --git a/.claude/hooks/workflow-gate.py b/.claude/hooks/workflow-gate.py
index 01e3429..499bdfc 100755
--- a/.claude/hooks/workflow-gate.py
+++ b/.claude/hooks/workflow-gate.py
@@ -23,12 +23,12 @@
     - pending_steps: Dict mapping subtask_id -> list of pending steps
 
 HOOK BEHAVIOR:
-  - Exit code 0 + approve: Allow tool execution
-  - Exit code 2 + block: Block tool execution with error message
+  - Exit code 0: Allow tool execution
+  - Exit code 0 + permissionDecision=deny: Block tool execution with reason (preferred)
 
 TESTING:
   echo '{"tool_name": "Edit", "tool_input": {"file_path": "test.py"}}' | python3 workflow-gate.py
-  # Expected: Exit code 2, JSON error on stderr (no workflow state)
+  # Expected: Exit code 0, allow (no workflow state) OR deny with reason (if workflow active + missing steps)
 
 PERFORMANCE:
   Target: <100ms per invocation
@@ -138,7 +138,7 @@ def main():
 
         # Allow non-editing tools
         if tool_name not in EDITING_TOOLS:
-            print(json.dumps({"decision": "approve"}))
+            print("{}")
             sys.exit(0)
 
         # Get current branch
@@ -150,25 +150,32 @@ def main():
         if state is None:
             # No workflow state = not in MAP workflow mode, allow
             # (This prevents breaking non-MAP work)
-            print(json.dumps({"decision": "approve"}))
+            print("{}")
             sys.exit(0)
 
         # Check workflow compliance
         is_compliant, error_message = check_workflow_compliance(state)
 
         if is_compliant:
-            print(json.dumps({"decision": "approve"}))
+            print("{}")
             sys.exit(0)
         else:
             print(
-                json.dumps({"decision": "block", "reason": error_message}),
-                file=sys.stderr,
+                json.dumps(
+                    {
+                        "hookSpecificOutput": {
+                            "hookEventName": "PreToolUse",
+                            "permissionDecision": "deny",
+                            "permissionDecisionReason": error_message,
+                        }
+                    }
+                )
             )
-            sys.exit(2)
+            sys.exit(0)
 
     except Exception as e:
         # On hook failure, approve (fail-open to avoid blocking work)
-        print(json.dumps({"decision": "approve"}))
+        print("{}")
         if os.environ.get("DEBUG_WORKFLOW_GATE"):
             print(f"[workflow-gate] ERROR: {e}", file=sys.stderr)
         sys.exit(0)
diff --git a/.claude/references/workflow-state-schema.md b/.claude/references/workflow-state-schema.md
index 42e34fd..e490808 100644
--- a/.claude/references/workflow-state-schema.md
+++ b/.claude/references/workflow-state-schema.md
@@ -266,4 +266,4 @@ This external state file combined with hook enforcement implements that principl
 - `.claude/hooks/workflow-gate.py` - Enforcement hook
 - `.claude/commands/map-efficient.md` - Workflow definition
 - `.map/<branch>/task_plan_<branch>.md` - Human-readable plan
-- `.claude/settings.hooks.json` - Hook registration
+- `.claude/settings.json` - Hook registration
diff --git a/.claude/settings.hooks.json b/.claude/settings.hooks.json
deleted file mode 100644
index eb4e7ef..0000000
--- a/.claude/settings.hooks.json
+++ /dev/null
@@ -1,74 +0,0 @@
-{
-  "$schema": "https://json.schemastore.org/claude-code-settings.json",
-  "description": "Claude Code hooks configuration for MAP Framework (Tiered)",
-  "hooks": {
-    "PreToolUse": [
-      {
-        "matcher": "Edit|Write|Read|Bash",
-        "description": "Safety Guardrails - block sensitive files and dangerous commands",
-        "hooks": [
-          {
-            "type": "command",
-            "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/safety-guardrails.py",
-            "timeout": 5,
-            "description": "Blocks .env, credentials, private keys, and dangerous bash commands"
-          }
-        ]
-      },
-      {
-        "matcher": "Edit|Write|Bash",
-        "description": "Workflow Context Injection (Tiered) - inject reminders for significant operations",
-        "hooks": [
-          {
-            "type": "command",
-            "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/workflow-context-injector.py",
-            "timeout": 3,
-            "description": "Injects workflow context only for Edit/Write and significant Bash commands"
-          }
-        ]
-      }
-    ],
-    "PostToolUse": [
-      {
-        "matcher": "Edit|Write|MultiEdit",
-        "description": "Post-Edit Reminder - lightweight test reminder after code changes",
-        "hooks": [
-          {
-            "type": "command",
-            "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/post-edit-reminder.py",
-            "timeout": 3,
-            "description": "Reminds to run tests after Edit/Write (only when MAP workflow active)"
-          }
-        ]
-      }
-    ],
-    "PreCompact": [
-      {
-        "matcher": "*",
-        "description": "Anti-Amnesia Hook - save state and prune logs before compaction",
-        "hooks": [
-          {
-            "type": "command",
-            "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/ralph-context-pruner.py",
-            "timeout": 10,
-            "description": "Saves workflow restore point and prunes old logs"
-          }
-        ]
-      }
-    ],
-    "Stop": [
-      {
-        "matcher": "*",
-        "description": "Lightweight quality gates - only critical issues",
-        "hooks": [
-          {
-            "type": "command",
-            "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/end-of-turn.sh",
-            "timeout": 30,
-            "description": "Auto-fixes silently, only reports secrets/syntax errors"
-          }
-        ]
-      }
-    ]
-  }
-}
diff --git a/.claude/settings.json b/.claude/settings.json
index 6c2c9a2..0779e49 100644
--- a/.claude/settings.json
+++ b/.claude/settings.json
@@ -33,5 +33,86 @@
       "Bash(gofmt *)",
       "Bash(kubectl get *)"
     ]
+  },
+  "hooks": {
+    "PreToolUse": [
+      {
+        "matcher": "Edit|Write|Read|MultiEdit|Bash",
+        "description": "Safety Guardrails - block sensitive files and dangerous commands",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/safety-guardrails.py",
+            "timeout": 5,
+            "description": "Blocks .env, credentials, private keys, and dangerous bash commands"
+          }
+        ]
+      },
+      {
+        "matcher": "Edit|Write|MultiEdit",
+        "description": "Workflow Gate - enforce Actor+Monitor before edits (MAP)",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/workflow-gate.py",
+            "timeout": 5,
+            "description": "Blocks Edit/Write/MultiEdit until workflow_state indicates Actor+Monitor completed"
+          }
+        ]
+      },
+      {
+        "matcher": "Edit|Write|MultiEdit|Bash",
+        "description": "Workflow Context Injection (Tiered) - inject reminders for significant operations",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/workflow-context-injector.py",
+            "timeout": 3,
+            "description": "Injects workflow context only for Edit/Write/MultiEdit and significant Bash commands"
+          }
+        ]
+      }
+    ],
+    "PostToolUse": [
+      {
+        "matcher": "Edit|Write|MultiEdit",
+        "description": "Post-Edit Reminder - lightweight test reminder after code changes",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/post-edit-reminder.py",
+            "timeout": 3,
+            "description": "Reminds to run tests after Edit/Write (only when MAP workflow active)"
+          }
+        ]
+      }
+    ],
+    "PreCompact": [
+      {
+        "matcher": "*",
+        "description": "Anti-Amnesia Hook - save state and prune logs before compaction",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/ralph-context-pruner.py",
+            "timeout": 10,
+            "description": "Saves workflow restore point and prunes old logs"
+          }
+        ]
+      }
+    ],
+    "Stop": [
+      {
+        "description": "Lightweight quality gates - only critical issues",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/end-of-turn.sh",
+            "timeout": 30,
+            "description": "Auto-fixes silently, only reports secrets/syntax errors"
+          }
+        ]
+      }
+    ]
   }
 }
diff --git a/.claude/skills/README.md b/.claude/skills/README.md
index 2d234d4..c15191d 100644
--- a/.claude/skills/README.md
+++ b/.claude/skills/README.md
@@ -152,12 +152,12 @@ Skills work seamlessly with the prompt improvement system:
 
 **Check:**
 1. `skill-rules.json` has correct triggers
-2. `improve-prompt.py` hook is enabled in `settings.hooks.json`
+2. `improve-prompt.py` hook is enabled in `.claude/settings.json`
 3. Hook processes UserPromptSubmit events
 
 **Fix:**
 - Update trigger patterns in `skill-rules.json`
-- Verify hook configuration in `.claude/settings.hooks.json`
+- Verify hook configuration in `.claude/settings.json`
 
 ### Skill content too long
 
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3b6a592..bb2ec10 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -72,7 +72,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Changed
 - **task-decomposer.md**: Enhanced with Acceptance Criteria table format, re-decomposition mode, dependency enforcement
 - **map-efficient.md**: Added Step 3.5 Final Verification with circuit breaker check, final-verifier invocation, re-decomposition logic
-- **settings.hooks.json**: Added PreToolUse, PostToolUse, and PreCompact hook entries for Ralph Loop
+- **.claude/settings.json (hooks)**: Added PreToolUse, PostToolUse, and PreCompact hook entries for Ralph Loop
 
 ### Documentation
 - Branch-scoped artifacts stored in `.map/<sanitized-branch>/` directory
@@ -353,7 +353,7 @@ rm -rf .claude/hooks/
   - Preserved user settings during hooks installation (merge strategy)
 
 - **mapify init Command Fixes** (1aee890, 7d264ef, 956ef96)
-  - Fixed mapify init to copy Python hooks and settings.hooks.json correctly
+  - Fixed mapify init to copy Python hooks and hook-enabled `.claude/settings.json` correctly
   - Corrected settings file location (.claude/ not .claude/hooks/)
   - Restored SessionStart hook functionality
 
diff --git a/CLAUDE.md b/CLAUDE.md
index a5dd7c0..29b8c1a 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -16,7 +16,7 @@ Common synced paths:
 - `.claude/commands/` → `src/mapify_cli/templates/commands/`
 - `.claude/hooks/` → `src/mapify_cli/templates/hooks/`
 - `.claude/references/` → `src/mapify_cli/templates/references/`
-- `.claude/settings.json`, `.claude/settings.hooks.json`, `.claude/workflow-rules.json` → `src/mapify_cli/templates/`
+- `.claude/settings.json`, `.claude/workflow-rules.json` → `src/mapify_cli/templates/`
 
 Do the sync via a deterministic command (preferred):
 - `make sync-templates` (runs `scripts/sync-templates.sh`)
diff --git a/IMPLEMENTATION_SUMMARY.md b/IMPLEMENTATION_SUMMARY.md
index e307ef5..cecea31 100644
--- a/IMPLEMENTATION_SUMMARY.md
+++ b/IMPLEMENTATION_SUMMARY.md
@@ -44,7 +44,7 @@
   - Recursive invocation for fresh context per subtask
 
 ### Configuration Updates
-- **`.claude/settings.hooks.json`**: Added workflow-context-injector.py hook registration
+- **`.claude/settings.json`**: Added workflow-context-injector.py hook registration
 - **Template sync**: All files copied to `src/mapify_cli/templates/`
 
 ### Documentation Updates
@@ -148,7 +148,7 @@ Instead of a 995-line command file that Claude "compresses" mentally, inject ~30
 .claude/
 ├── hooks/
 │   ├── workflow-context-injector.py (NEW)
-│   └── settings.hooks.json (UPDATED)
+│   └── settings.json (UPDATED)
 ├── commands/
 │   ├── map-efficient.md (OPTIMIZED - 995→394 lines)
 │   └── map-efficient.md.backup (BACKUP)
@@ -157,13 +157,13 @@ Instead of a 995-line command file that Claude "compresses" mentally, inject ~30
 ├── map_orchestrator.py (NEW)
 └── map_step_runner.py (NEW)
 
-src/mapify_cli/templates/
-├── hooks/workflow-context-injector.py (SYNCED)
-├── commands/map-efficient.md (SYNCED)
-├── map/scripts/
-│   ├── map_orchestrator.py (SYNCED)
-│   └── map_step_runner.py (SYNCED)
-└── settings.hooks.json (SYNCED)
+ src/mapify_cli/templates/
+ ├── hooks/workflow-context-injector.py (SYNCED)
+ ├── commands/map-efficient.md (SYNCED)
+ ├── map/scripts/
+ │   ├── map_orchestrator.py (SYNCED)
+ │   └── map_step_runner.py (SYNCED)
+ └── settings.json (SYNCED)
 
 docs/
 ├── ARCHITECTURE.md (UPDATED - new section)
diff --git a/docs/USAGE.md b/docs/USAGE.md
index fd610ed..1e1d016 100644
--- a/docs/USAGE.md
+++ b/docs/USAGE.md
@@ -1941,7 +1941,7 @@ If you prefer direct execution without clarification:
 * implement user authentication  # Skips improvement
 ```
 
-**Option 2: Remove from settings.hooks.json**
+**Option 2: Remove from `.claude/settings.json`**
 ```json
 {
   "hooks": {
@@ -1972,7 +1972,7 @@ MAP Framework includes additional hooks for security and quality:
 | `block-dangerous.sh` | PreToolUse | Block dangerous shell commands |
 | `end-of-turn.sh` | Stop | Quality gates (linting, secret scanning) |
 
-**Configuration:** See `.claude/settings.hooks.json` for hook configuration.
+**Configuration:** See `.claude/settings.json` for hook configuration (or manage via `/hooks`).
 
 **Security hooks:** See [Security Model: Three-Layer Defense](#-security-model-three-layer-defense) for details.
 
diff --git a/scripts/sync-templates.sh b/scripts/sync-templates.sh
index eb68752..24f72fb 100755
--- a/scripts/sync-templates.sh
+++ b/scripts/sync-templates.sh
@@ -12,7 +12,7 @@ cp -a .claude/agents/*.md "$templates_root/agents/"
 cp -a .claude/commands/*.md "$templates_root/commands/"
 cp -a .claude/hooks/* "$templates_root/hooks/"
 cp -a .claude/references/* "$templates_root/references/"
-cp -a .claude/settings.json .claude/settings.hooks.json .claude/workflow-rules.json .claude/ralph-loop-config.json "$templates_root/"
+cp -a .claude/settings.json .claude/workflow-rules.json .claude/ralph-loop-config.json "$templates_root/"
 
 # Sync skills directory (preserving nested structure)
 if [[ -d .claude/skills ]]; then
diff --git a/src/mapify_cli/__init__.py b/src/mapify_cli/__init__.py
index 51e4ae3..4747953 100644
--- a/src/mapify_cli/__init__.py
+++ b/src/mapify_cli/__init__.py
@@ -1269,15 +1269,16 @@ def configure_global_permissions() -> None:
 
 
 def create_or_merge_project_settings_local(project_path: Path) -> None:
-    """Create/merge .claude/settings.local.json with safe project allowlist and hooks.
+    """Create/merge .claude/settings.local.json with safe project allowlist.
 
     Claude Code supports per-project approvals via `.claude/settings.local.json`.
     This file is user-local (should not be committed) and is merged by Claude Code
     with global settings from `~/.claude/settings.json`.
 
-    IMPORTANT: Claude Code only reads hooks from `settings.json` or `settings.local.json`.
-    The separate `settings.hooks.json` file is NOT read by Claude Code. Therefore, this
-    function merges hooks from the template `settings.hooks.json` into `settings.local.json`.
+    IMPORTANT:
+    - Shared, repo-committed hooks MUST be configured in `.claude/settings.json`.
+    - `.claude/settings.local.json` is for user-local approvals/allowlists and should
+      not be used as the primary distribution mechanism for project hooks.
 
     We keep this allowlist intentionally narrow and focused on common safe actions
     for local development workflows.
@@ -1313,20 +1314,6 @@ def create_or_merge_project_settings_local(project_path: Path) -> None:
         "ask": [],
     }
 
-    # Load hooks from template settings.hooks.json
-    # Claude Code doesn't read hooks from settings.hooks.json, so we merge them here
-    hooks_config: Dict[str, Any] = {}
-    templates_dir = get_templates_dir()
-    hooks_template_file = templates_dir / "settings.hooks.json"
-    if hooks_template_file.exists():
-        try:
-            hooks_data = json.loads(hooks_template_file.read_text(encoding="utf-8"))
-            hooks_config = hooks_data.get("hooks", {})
-        except (json.JSONDecodeError, OSError) as e:
-            console.print(
-                f"[yellow]Warning:[/yellow] Could not read hooks template: {e}"
-            )
-
     # Load existing settings if present
     if settings_file.exists():
         try:
@@ -1339,6 +1326,14 @@ def create_or_merge_project_settings_local(project_path: Path) -> None:
     else:
         existing_settings = {}
 
+    if isinstance(existing_settings, dict) and existing_settings.get("hooks"):
+        console.print(
+            "[yellow]Warning:[/yellow] .claude/settings.local.json contains hooks. "
+            "Claude Code loads hooks from BOTH .claude/settings.json and .claude/settings.local.json, "
+            "so this can cause duplicate hook executions. "
+            "Move shared hooks to .claude/settings.json and remove the hooks section from settings.local.json."
+        )
+
     existing_settings.setdefault("permissions", {})
     permissions = existing_settings["permissions"]
 
@@ -1351,12 +1346,6 @@ def create_or_merge_project_settings_local(project_path: Path) -> None:
     permissions.setdefault("deny", permissions.get("deny", []))
     permissions.setdefault("ask", permissions.get("ask", []))
 
-    # Replace hooks configuration from template
-    # Always use template hooks to ensure correct $CLAUDE_PROJECT_DIR paths
-    # User customizations should be done by editing the template or post-init
-    if hooks_config:
-        existing_settings["hooks"] = hooks_config
-
     settings_file.write_text(json.dumps(existing_settings, indent=2) + "\n")
 
 
@@ -1904,7 +1893,6 @@ def create_config_files(project_path: Path) -> int:
 
     Copies configuration files:
     - settings.json
-    - settings.hooks.json
     - ralph-loop-config.json
     - workflow-rules.json
 
@@ -1919,7 +1907,6 @@ def create_config_files(project_path: Path) -> int:
 
     config_files = [
         "settings.json",
-        "settings.hooks.json",
         "ralph-loop-config.json",
         "workflow-rules.json",
     ]
diff --git a/src/mapify_cli/templates/CLAUDE.md b/src/mapify_cli/templates/CLAUDE.md
index afadff8..45550f3 100644
--- a/src/mapify_cli/templates/CLAUDE.md
+++ b/src/mapify_cli/templates/CLAUDE.md
@@ -16,7 +16,7 @@ Common synced paths:
 - `.claude/commands/` → `src/mapify_cli/templates/commands/`
 - `.claude/hooks/` → `src/mapify_cli/templates/hooks/`
 - `.claude/references/` → `src/mapify_cli/templates/references/`
-- `.claude/settings.json`, `.claude/settings.hooks.json`, `.claude/workflow-rules.json` → `src/mapify_cli/templates/`
+- `.claude/settings.json`, `.claude/workflow-rules.json` → `src/mapify_cli/templates/`
 
 Do the sync via a deterministic command (preferred):
 - `make sync-templates` (runs `scripts/sync-templates.sh`)
diff --git a/src/mapify_cli/templates/hooks/block-dangerous.sh b/src/mapify_cli/templates/hooks/block-dangerous.sh
index ab90770..f42a6ba 100755
--- a/src/mapify_cli/templates/hooks/block-dangerous.sh
+++ b/src/mapify_cli/templates/hooks/block-dangerous.sh
@@ -14,7 +14,7 @@
 #
 # EXIT CODES:
 #   0 - Allow command execution
-#   2 - Block command execution (dangerous command detected)
+#   0 + permissionDecision=deny - Block command execution (preferred)
 #
 # TESTING:
 #   echo '{"tool_name": "Bash", "tool_input": {"command": "rm -rf /"}}' | bash block-dangerous.sh
@@ -53,58 +53,112 @@ COMMAND_LOWER=$(echo "$COMMAND" | tr '[:upper:]' '[:lower:]')
 # Check for rm -rf (recursive force delete)
 # Matches: rm -rf, rm -fr, rm -r -f, rm -f -r, rm --recursive --force
 if echo "$COMMAND" | grep -qE 'rm\s+(-rf|-fr)\s'; then
-    echo '{"hookSpecificOutput":{"hookEventName":"PreToolUse","error":"Blocked: rm -rf is prohibited","details":"Recursive force delete can cause irreversible data loss","suggestion":"Use rm with specific paths and without -rf flag"}}' >&2
-    exit 2
+    jq -n '{
+      hookSpecificOutput: {
+        hookEventName: "PreToolUse",
+        permissionDecision: "deny",
+        permissionDecisionReason: "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)"
+      }
+    }'
+    exit 0
 fi
 
 # Catch rm -rf at end of command or with path immediately after
 if echo "$COMMAND" | grep -qE 'rm\s+(-rf|-fr)\s*(/|~|\.|[a-zA-Z])'; then
-    echo '{"hookSpecificOutput":{"hookEventName":"PreToolUse","error":"Blocked: rm -rf is prohibited","details":"Recursive force delete can cause irreversible data loss","suggestion":"Use rm with specific paths and without -rf flag"}}' >&2
-    exit 2
+    jq -n '{
+      hookSpecificOutput: {
+        hookEventName: "PreToolUse",
+        permissionDecision: "deny",
+        permissionDecisionReason: "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)"
+      }
+    }'
+    exit 0
 fi
 
 # Catch separated flags: rm -r -f or rm -f -r
 if echo "$COMMAND" | grep -qE 'rm\s+(-r\s+-f|-f\s+-r)\s'; then
-    echo '{"hookSpecificOutput":{"hookEventName":"PreToolUse","error":"Blocked: rm -rf is prohibited","details":"Recursive force delete can cause irreversible data loss","suggestion":"Use rm with specific paths and without -rf flag"}}' >&2
-    exit 2
+    jq -n '{
+      hookSpecificOutput: {
+        hookEventName: "PreToolUse",
+        permissionDecision: "deny",
+        permissionDecisionReason: "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)"
+      }
+    }'
+    exit 0
 fi
 
 # Check for git push --force to main/master
 # Matches: git push --force origin main, git push -f origin master
 if echo "$COMMAND" | grep -qE 'git\s+push\s+.*(-f|--force).*\s+(origin|upstream)\s+(main|master)(\s|$)'; then
-    echo '{"hookSpecificOutput":{"hookEventName":"PreToolUse","error":"Blocked: Force push to main/master is prohibited","details":"Force pushing to protected branches can overwrite team work","suggestion":"Use regular push or push to a feature branch"}}' >&2
-    exit 2
+    jq -n '{
+      hookSpecificOutput: {
+        hookEventName: "PreToolUse",
+        permissionDecision: "deny",
+        permissionDecisionReason: "Blocked: Force push to main/master is prohibited (can overwrite team work)"
+      }
+    }'
+    exit 0
 fi
 
 # Also check reverse order: git push origin main --force
 if echo "$COMMAND" | grep -qE 'git\s+push\s+(origin|upstream)\s+(main|master)\s+(-f|--force)'; then
-    echo '{"hookSpecificOutput":{"hookEventName":"PreToolUse","error":"Blocked: Force push to main/master is prohibited","details":"Force pushing to protected branches can overwrite team work","suggestion":"Use regular push or push to a feature branch"}}' >&2
-    exit 2
+    jq -n '{
+      hookSpecificOutput: {
+        hookEventName: "PreToolUse",
+        permissionDecision: "deny",
+        permissionDecisionReason: "Blocked: Force push to main/master is prohibited (can overwrite team work)"
+      }
+    }'
+    exit 0
 fi
 
 # Check for git reset --hard (without specific commit, or dangerous patterns)
 # Block: git reset --hard, git reset --hard HEAD~, git reset --hard origin/
 if echo "$COMMAND" | grep -qE 'git\s+reset\s+--hard(\s|$)'; then
-    echo '{"hookSpecificOutput":{"hookEventName":"PreToolUse","error":"Blocked: git reset --hard is prohibited","details":"Hard reset can cause irreversible loss of uncommitted changes","suggestion":"Use git stash or git reset --soft instead"}}' >&2
-    exit 2
+    jq -n '{
+      hookSpecificOutput: {
+        hookEventName: "PreToolUse",
+        permissionDecision: "deny",
+        permissionDecisionReason: "Blocked: git reset --hard is prohibited (can cause irreversible loss of uncommitted changes)"
+      }
+    }'
+    exit 0
 fi
 
 # Check for dangerous chmod/chown on system directories
 if echo "$COMMAND" | grep -qE '(chmod|chown)\s+(-R|--recursive)\s+.*\s+/($|\s)'; then
-    echo '{"hookSpecificOutput":{"hookEventName":"PreToolUse","error":"Blocked: Recursive chmod/chown on root is prohibited","details":"This can break system permissions","suggestion":"Specify a more targeted path"}}' >&2
-    exit 2
+    jq -n '{
+      hookSpecificOutput: {
+        hookEventName: "PreToolUse",
+        permissionDecision: "deny",
+        permissionDecisionReason: "Blocked: Recursive chmod/chown on / is prohibited (can break system permissions)"
+      }
+    }'
+    exit 0
 fi
 
 # Check for dd with of=/dev/
 if echo "$COMMAND" | grep -qE 'dd\s+.*of=/dev/'; then
-    echo '{"hookSpecificOutput":{"hookEventName":"PreToolUse","error":"Blocked: dd to /dev/ is prohibited","details":"Writing to raw devices can destroy data","suggestion":"Use safer file operations"}}' >&2
-    exit 2
+    jq -n '{
+      hookSpecificOutput: {
+        hookEventName: "PreToolUse",
+        permissionDecision: "deny",
+        permissionDecisionReason: "Blocked: dd with of=/dev/* is prohibited (writing to raw devices can destroy data)"
+      }
+    }'
+    exit 0
 fi
 
 # Check for mkfs (format filesystem)
 if echo "$COMMAND" | grep -qE 'mkfs'; then
-    echo '{"hookSpecificOutput":{"hookEventName":"PreToolUse","error":"Blocked: mkfs is prohibited","details":"Formatting filesystems can destroy data","suggestion":"This operation requires manual execution"}}' >&2
-    exit 2
+    jq -n '{
+      hookSpecificOutput: {
+        hookEventName: "PreToolUse",
+        permissionDecision: "deny",
+        permissionDecisionReason: "Blocked: mkfs is prohibited (formatting filesystems can destroy data)"
+      }
+    }'
+    exit 0
 fi
 
 # =============================================================================
diff --git a/src/mapify_cli/templates/hooks/block-secrets.py b/src/mapify_cli/templates/hooks/block-secrets.py
index bbeb466..fb1d2e1 100755
--- a/src/mapify_cli/templates/hooks/block-secrets.py
+++ b/src/mapify_cli/templates/hooks/block-secrets.py
@@ -22,7 +22,7 @@
 
 HOOK BEHAVIOR:
   - Exit code 0: Allow tool execution (non-sensitive file)
-  - Exit code 2: Block tool execution (sensitive file detected)
+  - Exit code 0 + permissionDecision=deny: Block tool execution (sensitive file detected)
 
 TESTING:
   echo '{"tool_name": "Read", "tool_input": {"file_path": ".env"}}' | python3 block-secrets.py
@@ -78,16 +78,21 @@ def is_sensitive_file(file_path: str) -> bool:
 
 def block_access(file_path: str, tool_name: str):
     """Block tool execution and output error message."""
-    error_output = {
-        "hookSpecificOutput": {
-            "hookEventName": "PreToolUse",
-            "error": f"Blocked: Access to sensitive file '{file_path}' is prohibited",
-            "details": f"Tool '{tool_name}' attempted to access a protected file. Sensitive files include: .env*, *credentials*, *secret*, private keys (*.pem, *.key, *_rsa, etc.)",
-            "suggestion": "If you need to work with sensitive data, use environment variables or a secrets management system instead of reading raw credential files.",
-        }
-    }
-    print(json.dumps(error_output), file=sys.stderr)
-    sys.exit(2)  # Exit code 2 blocks execution
+    print(
+        json.dumps(
+            {
+                "hookSpecificOutput": {
+                    "hookEventName": "PreToolUse",
+                    "permissionDecision": "deny",
+                    "permissionDecisionReason": (
+                        f"Blocked: Access to sensitive file '{file_path}' is prohibited "
+                        f"(tool={tool_name})."
+                    ),
+                }
+            }
+        )
+    )
+    sys.exit(0)
 
 
 def main():
diff --git a/src/mapify_cli/templates/hooks/post-edit-reminder.py b/src/mapify_cli/templates/hooks/post-edit-reminder.py
index de96db8..ff77beb 100755
--- a/src/mapify_cli/templates/hooks/post-edit-reminder.py
+++ b/src/mapify_cli/templates/hooks/post-edit-reminder.py
@@ -7,7 +7,7 @@
 
 Trigger: Edit|Write
 Exit codes: Always 0 (non-blocking)
-Output: ~80 char reminder via hookSpecificOutput.appended_text
+Output: ~80 char reminder via hookSpecificOutput.additionalContext
 """
 
 import json
@@ -73,7 +73,12 @@ def main() -> None:
 
     # Inject lightweight reminder
     reminder = "[MAP] Code changed. Run tests before committing!"
-    output = {"hookSpecificOutput": {"appended_text": reminder}}
+    output = {
+        "hookSpecificOutput": {
+            "hookEventName": "PostToolUse",
+            "additionalContext": reminder,
+        }
+    }
     print(json.dumps(output))
     sys.exit(0)
 
diff --git a/src/mapify_cli/templates/hooks/ralph-context-pruner.py b/src/mapify_cli/templates/hooks/ralph-context-pruner.py
index 8adb60f..b37b174 100755
--- a/src/mapify_cli/templates/hooks/ralph-context-pruner.py
+++ b/src/mapify_cli/templates/hooks/ralph-context-pruner.py
@@ -13,7 +13,7 @@
   0 - Always (PreCompact hooks don't block)
 
 Output:
-  hookSpecificOutput.appended_text - Recovery message injected into context
+  hookSpecificOutput.additionalContext - Recovery message injected into context
 """
 import json
 import os
@@ -238,7 +238,10 @@ def main() -> None:
 
         # Inject recovery message into context
         recovery_msg = format_recovery_message(state, branch)
-        output["hookSpecificOutput"] = {"appended_text": recovery_msg}
+        output["hookSpecificOutput"] = {
+            "hookEventName": "PreCompact",
+            "additionalContext": recovery_msg,
+        }
 
     # Prune log files in ALL branch directories
     actions = []
diff --git a/src/mapify_cli/templates/hooks/safety-guardrails.py b/src/mapify_cli/templates/hooks/safety-guardrails.py
index a0158c7..4174337 100644
--- a/src/mapify_cli/templates/hooks/safety-guardrails.py
+++ b/src/mapify_cli/templates/hooks/safety-guardrails.py
@@ -9,7 +9,7 @@
 Trigger: Edit|Write|Bash
 Exit codes:
   0 - Allow
-  2 - Block (with error message)
+  0 + permissionDecision=deny - Block (preferred)
 """
 
 import json
@@ -84,17 +84,20 @@ def check_command_safety(command: str) -> tuple[bool, str]:
     return True, ""
 
 
-def block_access(reason: str, tool_name: str) -> None:
-    """Block tool execution with error message."""
-    error_output = {
-        "hookSpecificOutput": {
-            "hookEventName": "PreToolUse",
-            "error": reason,
-            "details": f"Tool '{tool_name}' blocked by safety guardrails",
-        }
-    }
-    print(json.dumps(error_output), file=sys.stderr)
-    sys.exit(2)
+def deny(reason: str) -> None:
+    """Deny tool execution using structured PreToolUse decision control."""
+    print(
+        json.dumps(
+            {
+                "hookSpecificOutput": {
+                    "hookEventName": "PreToolUse",
+                    "permissionDecision": "deny",
+                    "permissionDecisionReason": reason,
+                }
+            }
+        )
+    )
+    sys.exit(0)
 
 
 def main() -> None:
@@ -108,18 +111,18 @@ def main() -> None:
     tool_input = input_data.get("tool_input", {})
 
     # Check file-based tools
-    if tool_name in ("Edit", "Write", "Read"):
+    if tool_name in ("Edit", "Write", "Read", "MultiEdit"):
         file_path = tool_input.get("file_path", "") or tool_input.get("path", "")
         is_safe, reason = check_file_safety(file_path)
         if not is_safe:
-            block_access(reason, tool_name)
+            deny(f"{reason} (tool={tool_name})")
 
     # Check bash commands
     elif tool_name == "Bash":
         command = tool_input.get("command", "")
         is_safe, reason = check_command_safety(command)
         if not is_safe:
-            block_access(reason, tool_name)
+            deny(f"{reason} (tool={tool_name})")
 
     print("{}")
     sys.exit(0)
diff --git a/src/mapify_cli/templates/hooks/workflow-context-injector.py b/src/mapify_cli/templates/hooks/workflow-context-injector.py
index f0b8d34..72145e5 100755
--- a/src/mapify_cli/templates/hooks/workflow-context-injector.py
+++ b/src/mapify_cli/templates/hooks/workflow-context-injector.py
@@ -243,7 +243,12 @@ def main() -> None:
 
     reminder = format_reminder(state, branch)
     if reminder:
-        output = {"hookSpecificOutput": {"appended_text": reminder}}
+        output = {
+            "hookSpecificOutput": {
+                "hookEventName": "PreToolUse",
+                "additionalContext": reminder,
+            }
+        }
         print(json.dumps(output))
     else:
         print("{}")
diff --git a/src/mapify_cli/templates/hooks/workflow-gate.py b/src/mapify_cli/templates/hooks/workflow-gate.py
index 01e3429..499bdfc 100755
--- a/src/mapify_cli/templates/hooks/workflow-gate.py
+++ b/src/mapify_cli/templates/hooks/workflow-gate.py
@@ -23,12 +23,12 @@
     - pending_steps: Dict mapping subtask_id -> list of pending steps
 
 HOOK BEHAVIOR:
-  - Exit code 0 + approve: Allow tool execution
-  - Exit code 2 + block: Block tool execution with error message
+  - Exit code 0: Allow tool execution
+  - Exit code 0 + permissionDecision=deny: Block tool execution with reason (preferred)
 
 TESTING:
   echo '{"tool_name": "Edit", "tool_input": {"file_path": "test.py"}}' | python3 workflow-gate.py
-  # Expected: Exit code 2, JSON error on stderr (no workflow state)
+  # Expected: Exit code 0, allow (no workflow state) OR deny with reason (if workflow active + missing steps)
 
 PERFORMANCE:
   Target: <100ms per invocation
@@ -138,7 +138,7 @@ def main():
 
         # Allow non-editing tools
         if tool_name not in EDITING_TOOLS:
-            print(json.dumps({"decision": "approve"}))
+            print("{}")
             sys.exit(0)
 
         # Get current branch
@@ -150,25 +150,32 @@ def main():
         if state is None:
             # No workflow state = not in MAP workflow mode, allow
             # (This prevents breaking non-MAP work)
-            print(json.dumps({"decision": "approve"}))
+            print("{}")
             sys.exit(0)
 
         # Check workflow compliance
         is_compliant, error_message = check_workflow_compliance(state)
 
         if is_compliant:
-            print(json.dumps({"decision": "approve"}))
+            print("{}")
             sys.exit(0)
         else:
             print(
-                json.dumps({"decision": "block", "reason": error_message}),
-                file=sys.stderr,
+                json.dumps(
+                    {
+                        "hookSpecificOutput": {
+                            "hookEventName": "PreToolUse",
+                            "permissionDecision": "deny",
+                            "permissionDecisionReason": error_message,
+                        }
+                    }
+                )
             )
-            sys.exit(2)
+            sys.exit(0)
 
     except Exception as e:
         # On hook failure, approve (fail-open to avoid blocking work)
-        print(json.dumps({"decision": "approve"}))
+        print("{}")
         if os.environ.get("DEBUG_WORKFLOW_GATE"):
             print(f"[workflow-gate] ERROR: {e}", file=sys.stderr)
         sys.exit(0)
diff --git a/src/mapify_cli/templates/references/workflow-state-schema.md b/src/mapify_cli/templates/references/workflow-state-schema.md
index 42e34fd..e490808 100644
--- a/src/mapify_cli/templates/references/workflow-state-schema.md
+++ b/src/mapify_cli/templates/references/workflow-state-schema.md
@@ -266,4 +266,4 @@ This external state file combined with hook enforcement implements that principl
 - `.claude/hooks/workflow-gate.py` - Enforcement hook
 - `.claude/commands/map-efficient.md` - Workflow definition
 - `.map/<branch>/task_plan_<branch>.md` - Human-readable plan
-- `.claude/settings.hooks.json` - Hook registration
+- `.claude/settings.json` - Hook registration
diff --git a/src/mapify_cli/templates/settings.hooks.json b/src/mapify_cli/templates/settings.hooks.json
deleted file mode 100644
index eb4e7ef..0000000
--- a/src/mapify_cli/templates/settings.hooks.json
+++ /dev/null
@@ -1,74 +0,0 @@
-{
-  "$schema": "https://json.schemastore.org/claude-code-settings.json",
-  "description": "Claude Code hooks configuration for MAP Framework (Tiered)",
-  "hooks": {
-    "PreToolUse": [
-      {
-        "matcher": "Edit|Write|Read|Bash",
-        "description": "Safety Guardrails - block sensitive files and dangerous commands",
-        "hooks": [
-          {
-            "type": "command",
-            "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/safety-guardrails.py",
-            "timeout": 5,
-            "description": "Blocks .env, credentials, private keys, and dangerous bash commands"
-          }
-        ]
-      },
-      {
-        "matcher": "Edit|Write|Bash",
-        "description": "Workflow Context Injection (Tiered) - inject reminders for significant operations",
-        "hooks": [
-          {
-            "type": "command",
-            "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/workflow-context-injector.py",
-            "timeout": 3,
-            "description": "Injects workflow context only for Edit/Write and significant Bash commands"
-          }
-        ]
-      }
-    ],
-    "PostToolUse": [
-      {
-        "matcher": "Edit|Write|MultiEdit",
-        "description": "Post-Edit Reminder - lightweight test reminder after code changes",
-        "hooks": [
-          {
-            "type": "command",
-            "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/post-edit-reminder.py",
-            "timeout": 3,
-            "description": "Reminds to run tests after Edit/Write (only when MAP workflow active)"
-          }
-        ]
-      }
-    ],
-    "PreCompact": [
-      {
-        "matcher": "*",
-        "description": "Anti-Amnesia Hook - save state and prune logs before compaction",
-        "hooks": [
-          {
-            "type": "command",
-            "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/ralph-context-pruner.py",
-            "timeout": 10,
-            "description": "Saves workflow restore point and prunes old logs"
-          }
-        ]
-      }
-    ],
-    "Stop": [
-      {
-        "matcher": "*",
-        "description": "Lightweight quality gates - only critical issues",
-        "hooks": [
-          {
-            "type": "command",
-            "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/end-of-turn.sh",
-            "timeout": 30,
-            "description": "Auto-fixes silently, only reports secrets/syntax errors"
-          }
-        ]
-      }
-    ]
-  }
-}
diff --git a/src/mapify_cli/templates/settings.json b/src/mapify_cli/templates/settings.json
index 6c2c9a2..0779e49 100644
--- a/src/mapify_cli/templates/settings.json
+++ b/src/mapify_cli/templates/settings.json
@@ -33,5 +33,86 @@
       "Bash(gofmt *)",
       "Bash(kubectl get *)"
     ]
+  },
+  "hooks": {
+    "PreToolUse": [
+      {
+        "matcher": "Edit|Write|Read|MultiEdit|Bash",
+        "description": "Safety Guardrails - block sensitive files and dangerous commands",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/safety-guardrails.py",
+            "timeout": 5,
+            "description": "Blocks .env, credentials, private keys, and dangerous bash commands"
+          }
+        ]
+      },
+      {
+        "matcher": "Edit|Write|MultiEdit",
+        "description": "Workflow Gate - enforce Actor+Monitor before edits (MAP)",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/workflow-gate.py",
+            "timeout": 5,
+            "description": "Blocks Edit/Write/MultiEdit until workflow_state indicates Actor+Monitor completed"
+          }
+        ]
+      },
+      {
+        "matcher": "Edit|Write|MultiEdit|Bash",
+        "description": "Workflow Context Injection (Tiered) - inject reminders for significant operations",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/workflow-context-injector.py",
+            "timeout": 3,
+            "description": "Injects workflow context only for Edit/Write/MultiEdit and significant Bash commands"
+          }
+        ]
+      }
+    ],
+    "PostToolUse": [
+      {
+        "matcher": "Edit|Write|MultiEdit",
+        "description": "Post-Edit Reminder - lightweight test reminder after code changes",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/post-edit-reminder.py",
+            "timeout": 3,
+            "description": "Reminds to run tests after Edit/Write (only when MAP workflow active)"
+          }
+        ]
+      }
+    ],
+    "PreCompact": [
+      {
+        "matcher": "*",
+        "description": "Anti-Amnesia Hook - save state and prune logs before compaction",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/ralph-context-pruner.py",
+            "timeout": 10,
+            "description": "Saves workflow restore point and prunes old logs"
+          }
+        ]
+      }
+    ],
+    "Stop": [
+      {
+        "description": "Lightweight quality gates - only critical issues",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/end-of-turn.sh",
+            "timeout": 30,
+            "description": "Auto-fixes silently, only reports secrets/syntax errors"
+          }
+        ]
+      }
+    ]
   }
 }
diff --git a/src/mapify_cli/templates/skills/README.md b/src/mapify_cli/templates/skills/README.md
index 2d234d4..c15191d 100644
--- a/src/mapify_cli/templates/skills/README.md
+++ b/src/mapify_cli/templates/skills/README.md
@@ -152,12 +152,12 @@ Skills work seamlessly with the prompt improvement system:
 
 **Check:**
 1. `skill-rules.json` has correct triggers
-2. `improve-prompt.py` hook is enabled in `settings.hooks.json`
+2. `improve-prompt.py` hook is enabled in `.claude/settings.json`
 3. Hook processes UserPromptSubmit events
 
 **Fix:**
 - Update trigger patterns in `skill-rules.json`
-- Verify hook configuration in `.claude/settings.hooks.json`
+- Verify hook configuration in `.claude/settings.json`
 
 ### Skill content too long
 

From 877de178006b2de0f005fd412d798991da4b24ce Mon Sep 17 00:00:00 2001
From: "Mikhail [azalio] Petrov" <azalio@azalio.net>
Date: Mon, 16 Feb 2026 00:42:06 +0300
Subject: [PATCH 3/7] Make Monitor valid=false a hard stop and require
 integrated AAG contracts

---
 .claude/agents/actor.md                            | 5 ++++-
 .claude/agents/monitor.md                          | 4 ++++
 .claude/agents/task-decomposer.md                  | 5 +++++
 CLAUDE.md                                          | 6 ++++++
 src/mapify_cli/templates/agents/actor.md           | 5 ++++-
 src/mapify_cli/templates/agents/monitor.md         | 4 ++++
 src/mapify_cli/templates/agents/task-decomposer.md | 5 +++++
 7 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/.claude/agents/actor.md b/.claude/agents/actor.md
index 1ed955e..bf30de5 100644
--- a/.claude/agents/actor.md
+++ b/.claude/agents/actor.md
@@ -739,7 +739,10 @@ output:
 
 {{feedback}}
 
-**Action Required**: Address ALL issues above. Focus on:
+**Action Required**: Address ALL issues above. Do NOT dismiss feedback as "out of scope" or "separate task".
+If you believe an item should be deferred, STOP and ask the user for explicit approval to defer.
+
+Focus on:
 1. Specific line items mentioned
 2. Quality checklist items that failed
 3. Security or constraint violations
diff --git a/.claude/agents/monitor.md b/.claude/agents/monitor.md
index 59043ad..01b45f1 100644
--- a/.claude/agents/monitor.md
+++ b/.claude/agents/monitor.md
@@ -2523,6 +2523,10 @@ def check_rate_limit(user_id, action, limit=100, window=3600):
 - Requirements unmet → valid=false
 - Only MEDIUM/LOW issues → valid=true (with feedback)
 
+**Hard-stop semantics**:
+- If you set `valid=false`, the workflow MUST fix the issues before proceeding.
+- Do not accept "we'll do it later" reasoning as a resolution unless the user explicitly approves deferral.
+
 </Monitor_Critical_Reminders>
 
 ### Evidence File (Artifact-Gated Validation)
diff --git a/.claude/agents/task-decomposer.md b/.claude/agents/task-decomposer.md
index 58e8afd..a6e43fd 100644
--- a/.claude/agents/task-decomposer.md
+++ b/.claude/agents/task-decomposer.md
@@ -273,10 +273,14 @@ Return **ONLY** valid JSON in this exact structure:
   - This is the primary handoff artifact to the Actor agent
   - Actor "compiles" this contract into code; Monitor verifies against it
   - Format: `"<Actor> -> <Action>(params) -> <Goal with success criteria>"`
+  - **Integration is part of the contract**:
+    - Prefer describing the *entrypoint + call chain* that makes the behavior real (especially for validation, policy checks, auth, migrations).
+    - Avoid leaf-only contracts that are easy to satisfy in isolation but not wired into production code paths.
   - Examples:
     - `"AuthService -> validate(token) -> returns 401|200 with user_id"`
     - `"ProjectModel -> add_field(archived_at: DateTime?) -> migration passes"`
     - `"RateLimiter -> decorate(endpoint, 100/min) -> returns 429 when exceeded"`
+    - `"ConfigLoader -> load_policy(path) -> calls validate_risk_policy(); raises ConfigValidationError on contradictions"`
 **subtasks[].implementation_hint**: Optional guidance for non-obvious implementations
   - RECOMMENDED when: complexity_score >= 5 OR security_critical OR dependencies.length >= 2
   - OMIT when: standard pattern with obvious implementation
@@ -504,6 +508,7 @@ When invoked with `mode: "re_decomposition"` from the orchestrator, you receive
 - [ ] Each subtask is atomic (independently implementable + testable)
 - [ ] Each subtask has an aag_contract in `Actor -> Action(params) -> Goal` format
 - [ ] AAG contracts are specific (not "does stuff" — name classes, methods, return types)
+- [ ] AAG contracts include wiring/integration when relevant (entrypoint + validator/policy checks, not leaf-only helpers)
 - [ ] All dependencies are explicit and accurate
 - [ ] Subtasks ordered by dependency (foundations first)
 - [ ] 5-8 subtasks (not too granular or too coarse)
diff --git a/CLAUDE.md b/CLAUDE.md
index 29b8c1a..d9f322c 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -39,6 +39,12 @@ Verification:
 - Don't add or expose secrets. Avoid reading/writing `.env*` and credential/key files.
 - When changing pattern storage behavior, ensure Curator-mediated writes through mem0 MCP are preserved (see `.claude/agents/curator.md` and `docs/ARCHITECTURE.md`).
 
+## MAP Workflow Rules
+
+- If **Monitor** returns `valid=false`, treat it as a **hard stop**: fix the issues before proceeding.
+  - Do NOT dismiss Monitor feedback as "out of scope" / "separate task".
+  - If you're unsure whether fixing it is in scope: ask the user explicitly and wait for a decision.
+
 ## Bash Command Guidelines
 
 **CRITICAL:** Avoid output buffering issues that cause commands to hang.
diff --git a/src/mapify_cli/templates/agents/actor.md b/src/mapify_cli/templates/agents/actor.md
index 1ed955e..bf30de5 100644
--- a/src/mapify_cli/templates/agents/actor.md
+++ b/src/mapify_cli/templates/agents/actor.md
@@ -739,7 +739,10 @@ output:
 
 {{feedback}}
 
-**Action Required**: Address ALL issues above. Focus on:
+**Action Required**: Address ALL issues above. Do NOT dismiss feedback as "out of scope" or "separate task".
+If you believe an item should be deferred, STOP and ask the user for explicit approval to defer.
+
+Focus on:
 1. Specific line items mentioned
 2. Quality checklist items that failed
 3. Security or constraint violations
diff --git a/src/mapify_cli/templates/agents/monitor.md b/src/mapify_cli/templates/agents/monitor.md
index 59043ad..01b45f1 100644
--- a/src/mapify_cli/templates/agents/monitor.md
+++ b/src/mapify_cli/templates/agents/monitor.md
@@ -2523,6 +2523,10 @@ def check_rate_limit(user_id, action, limit=100, window=3600):
 - Requirements unmet → valid=false
 - Only MEDIUM/LOW issues → valid=true (with feedback)
 
+**Hard-stop semantics**:
+- If you set `valid=false`, the workflow MUST fix the issues before proceeding.
+- Do not accept "we'll do it later" reasoning as a resolution unless the user explicitly approves deferral.
+
 </Monitor_Critical_Reminders>
 
 ### Evidence File (Artifact-Gated Validation)
diff --git a/src/mapify_cli/templates/agents/task-decomposer.md b/src/mapify_cli/templates/agents/task-decomposer.md
index 58e8afd..a6e43fd 100644
--- a/src/mapify_cli/templates/agents/task-decomposer.md
+++ b/src/mapify_cli/templates/agents/task-decomposer.md
@@ -273,10 +273,14 @@ Return **ONLY** valid JSON in this exact structure:
   - This is the primary handoff artifact to the Actor agent
   - Actor "compiles" this contract into code; Monitor verifies against it
   - Format: `"<Actor> -> <Action>(params) -> <Goal with success criteria>"`
+  - **Integration is part of the contract**:
+    - Prefer describing the *entrypoint + call chain* that makes the behavior real (especially for validation, policy checks, auth, migrations).
+    - Avoid leaf-only contracts that are easy to satisfy in isolation but not wired into production code paths.
   - Examples:
     - `"AuthService -> validate(token) -> returns 401|200 with user_id"`
     - `"ProjectModel -> add_field(archived_at: DateTime?) -> migration passes"`
     - `"RateLimiter -> decorate(endpoint, 100/min) -> returns 429 when exceeded"`
+    - `"ConfigLoader -> load_policy(path) -> calls validate_risk_policy(); raises ConfigValidationError on contradictions"`
 **subtasks[].implementation_hint**: Optional guidance for non-obvious implementations
   - RECOMMENDED when: complexity_score >= 5 OR security_critical OR dependencies.length >= 2
   - OMIT when: standard pattern with obvious implementation
@@ -504,6 +508,7 @@ When invoked with `mode: "re_decomposition"` from the orchestrator, you receive
 - [ ] Each subtask is atomic (independently implementable + testable)
 - [ ] Each subtask has an aag_contract in `Actor -> Action(params) -> Goal` format
 - [ ] AAG contracts are specific (not "does stuff" — name classes, methods, return types)
+- [ ] AAG contracts include wiring/integration when relevant (entrypoint + validator/policy checks, not leaf-only helpers)
 - [ ] All dependencies are explicit and accurate
 - [ ] Subtasks ordered by dependency (foundations first)
 - [ ] 5-8 subtasks (not too granular or too coarse)

From e953949e06359094df4adba7d4c66877db9eb9b3 Mon Sep 17 00:00:00 2001
From: "Mikhail [azalio] Petrov" <azalio@azalio.net>
Date: Mon, 16 Feb 2026 00:56:15 +0300
Subject: [PATCH 4/7] Fix tests for Claude Code hook schema

---
 tests/hooks/test_block_dangerous.py     | 103 +++++++++++++--------
 tests/hooks/test_block_secrets.py       | 113 ++++++++++++++++--------
 tests/test_workflow_context_injector.py |  20 ++---
 tests/test_workflow_gate.py             |  97 ++++++++++----------
 4 files changed, 200 insertions(+), 133 deletions(-)

diff --git a/tests/hooks/test_block_dangerous.py b/tests/hooks/test_block_dangerous.py
index 3215e52..3ba4cf6 100644
--- a/tests/hooks/test_block_dangerous.py
+++ b/tests/hooks/test_block_dangerous.py
@@ -27,6 +27,22 @@ def run_hook(command: str) -> tuple[int, str, str]:
     return result.returncode, result.stdout, result.stderr
 
 
+def _parse_stdout(stdout: str) -> dict:
+    stdout = (stdout or "").strip()
+    if not stdout:
+        return {}
+    return json.loads(stdout)
+
+
+def _assert_denied(payload: dict, expected_substring: str | None = None) -> None:
+    assert payload.get("hookSpecificOutput", {}).get("hookEventName") == "PreToolUse"
+    assert payload["hookSpecificOutput"].get("permissionDecision") == "deny"
+    reason = payload["hookSpecificOutput"].get("permissionDecisionReason", "")
+    assert reason
+    if expected_substring:
+        assert expected_substring.lower() in reason.lower()
+
+
 # =============================================================================
 # Validation Criteria Tests
 # =============================================================================
@@ -36,32 +52,37 @@ class TestValidationCriteria:
     """Tests for the validation criteria from task decomposition."""
 
     def test_criterion_1_rm_rf_blocked(self):
-        """VC1: Bash command 'rm -rf /' is blocked with exit code 2."""
-        exit_code, _, stderr = run_hook("rm -rf /")
-        assert exit_code == 2, f"Expected exit 2, got {exit_code}"
-        assert "Blocked" in stderr, f"Expected 'Blocked' in stderr: {stderr}"
+        """VC1: Bash command 'rm -rf /' is blocked (permissionDecision=deny)."""
+        exit_code, stdout, stderr = run_hook("rm -rf /")
+        assert exit_code == 0, f"Expected exit 0, got {exit_code}. stderr: {stderr}"
+        payload = _parse_stdout(stdout)
+        _assert_denied(payload, expected_substring="rm -rf")
 
     def test_criterion_2_force_push_main_blocked(self):
         """VC2: git push --force origin main is blocked."""
-        exit_code, _, stderr = run_hook("git push --force origin main")
-        assert exit_code == 2, f"Expected exit 2, got {exit_code}"
-        assert "Blocked" in stderr
+        exit_code, stdout, stderr = run_hook("git push --force origin main")
+        assert exit_code == 0, f"Expected exit 0, got {exit_code}. stderr: {stderr}"
+        payload = _parse_stdout(stdout)
+        _assert_denied(payload, expected_substring="force push")
 
     def test_criterion_3_reset_hard_blocked(self):
         """VC3: git reset --hard is blocked."""
-        exit_code, _, stderr = run_hook("git reset --hard")
-        assert exit_code == 2, f"Expected exit 2, got {exit_code}"
-        assert "Blocked" in stderr
+        exit_code, stdout, stderr = run_hook("git reset --hard")
+        assert exit_code == 0, f"Expected exit 0, got {exit_code}. stderr: {stderr}"
+        payload = _parse_stdout(stdout)
+        _assert_denied(payload, expected_substring="reset --hard")
 
     def test_criterion_4_force_push_feature_allowed(self):
         """VC4: git push --force origin feature-branch is allowed."""
-        exit_code, _, _ = run_hook("git push --force origin feature-branch")
-        assert exit_code == 0, "Force push to feature branch should be allowed"
+        exit_code, stdout, stderr = run_hook("git push --force origin feature-branch")
+        assert exit_code == 0, f"Expected exit 0. stderr: {stderr}"
+        assert _parse_stdout(stdout) == {}, "Force push to feature branch should be allowed"
 
     def test_criterion_5_legitimate_allowed(self):
         """VC5: Legitimate commands like 'pytest' are allowed."""
-        exit_code, _, _ = run_hook("pytest")
-        assert exit_code == 0, "pytest should be allowed"
+        exit_code, stdout, stderr = run_hook("pytest")
+        assert exit_code == 0, f"Expected exit 0. stderr: {stderr}"
+        assert _parse_stdout(stdout) == {}, "pytest should be allowed"
 
 
 # =============================================================================
@@ -86,9 +107,10 @@ class TestRmRfBlocking:
         ],
     )
     def test_rm_rf_variants_blocked(self, command):
-        exit_code, _, stderr = run_hook(command)
-        assert exit_code == 2, f"'{command}' should be blocked"
-        assert "rm -rf" in stderr.lower() or "blocked" in stderr.lower()
+        exit_code, stdout, stderr = run_hook(command)
+        assert exit_code == 0, f"Expected exit 0. stderr: {stderr}"
+        payload = _parse_stdout(stdout)
+        _assert_denied(payload, expected_substring="rm -rf")
 
     def test_rm_without_rf_allowed(self):
         """rm without -rf should be allowed."""
@@ -123,9 +145,10 @@ class TestGitForcePushBlocking:
         ],
     )
     def test_force_push_protected_blocked(self, command):
-        exit_code, _, stderr = run_hook(command)
-        assert exit_code == 2, f"'{command}' should be blocked"
-        assert "Blocked" in stderr
+        exit_code, stdout, stderr = run_hook(command)
+        assert exit_code == 0, f"Expected exit 0. stderr: {stderr}"
+        payload = _parse_stdout(stdout)
+        _assert_denied(payload, expected_substring="force push")
 
     @pytest.mark.parametrize(
         "command",
@@ -166,9 +189,10 @@ class TestGitResetHardBlocking:
         ],
     )
     def test_reset_hard_variants_blocked(self, command):
-        exit_code, _, stderr = run_hook(command)
-        assert exit_code == 2, f"'{command}' should be blocked"
-        assert "Blocked" in stderr
+        exit_code, stdout, stderr = run_hook(command)
+        assert exit_code == 0, f"Expected exit 0. stderr: {stderr}"
+        payload = _parse_stdout(stdout)
+        _assert_denied(payload, expected_substring="reset --hard")
 
     @pytest.mark.parametrize(
         "command",
@@ -217,8 +241,9 @@ class TestLegitimateCommands:
         ],
     )
     def test_safe_commands_allowed(self, command):
-        exit_code, _, stderr = run_hook(command)
+        exit_code, stdout, stderr = run_hook(command)
         assert exit_code == 0, f"'{command}' should be allowed. stderr: {stderr}"
+        assert _parse_stdout(stdout) == {}, f"'{command}' should be allowed"
 
 
 # =============================================================================
@@ -282,18 +307,24 @@ class TestAdditionalDangerousCommands:
         ],
     )
     def test_recursive_permissions_on_root_blocked(self, command):
-        exit_code, _, _ = run_hook(command)
-        assert exit_code == 2, f"'{command}' should be blocked"
+        exit_code, stdout, stderr = run_hook(command)
+        assert exit_code == 0, f"Expected exit 0. stderr: {stderr}"
+        payload = _parse_stdout(stdout)
+        _assert_denied(payload, expected_substring="chmod/chown")
 
     def test_dd_to_device_blocked(self):
         """dd writing to /dev/ should be blocked."""
-        exit_code, _, _ = run_hook("dd if=/dev/zero of=/dev/sda")
-        assert exit_code == 2, "dd to device should be blocked"
+        exit_code, stdout, stderr = run_hook("dd if=/dev/zero of=/dev/sda")
+        assert exit_code == 0, f"Expected exit 0. stderr: {stderr}"
+        payload = _parse_stdout(stdout)
+        _assert_denied(payload, expected_substring="dd")
 
     def test_mkfs_blocked(self):
         """mkfs should be blocked."""
-        exit_code, _, _ = run_hook("mkfs.ext4 /dev/sda1")
-        assert exit_code == 2, "mkfs should be blocked"
+        exit_code, stdout, stderr = run_hook("mkfs.ext4 /dev/sda1")
+        assert exit_code == 0, f"Expected exit 0. stderr: {stderr}"
+        payload = _parse_stdout(stdout)
+        _assert_denied(payload, expected_substring="mkfs")
 
 
 # =============================================================================
@@ -305,10 +336,8 @@ class TestOutputFormat:
     """Test that error output follows expected JSON structure."""
 
     def test_error_json_structure(self):
-        """Blocked command should output valid JSON to stderr."""
-        _, _, stderr = run_hook("rm -rf /")
-        error_data = json.loads(stderr)
-        assert "hookSpecificOutput" in error_data
-        assert error_data["hookSpecificOutput"]["hookEventName"] == "PreToolUse"
-        assert "error" in error_data["hookSpecificOutput"]
-        assert "Blocked" in error_data["hookSpecificOutput"]["error"]
+        """Blocked command should output valid JSON to stdout."""
+        exit_code, stdout, stderr = run_hook("rm -rf /")
+        assert exit_code == 0, f"Expected exit 0. stderr: {stderr}"
+        payload = _parse_stdout(stdout)
+        _assert_denied(payload, expected_substring="Blocked")
diff --git a/tests/hooks/test_block_secrets.py b/tests/hooks/test_block_secrets.py
index 6d1e339..36ba85c 100644
--- a/tests/hooks/test_block_secrets.py
+++ b/tests/hooks/test_block_secrets.py
@@ -29,6 +29,22 @@ def run_hook(tool_name: str, file_path: str) -> tuple[int, str, str]:
     return result.returncode, result.stdout, result.stderr
 
 
+def _parse_stdout(stdout: str) -> dict:
+    stdout = (stdout or "").strip()
+    if not stdout:
+        return {}
+    return json.loads(stdout)
+
+
+def _assert_denied(payload: dict, expected_file_fragment: str | None = None) -> None:
+    assert payload.get("hookSpecificOutput", {}).get("hookEventName") == "PreToolUse"
+    assert payload["hookSpecificOutput"].get("permissionDecision") == "deny"
+    reason = payload["hookSpecificOutput"].get("permissionDecisionReason", "")
+    assert reason
+    if expected_file_fragment:
+        assert expected_file_fragment.lower() in reason.lower()
+
+
 # =============================================================================
 # Validation Criteria Tests
 # =============================================================================
@@ -38,16 +54,18 @@ class TestValidationCriteria:
     """Tests for the 5 validation criteria from the task."""
 
     def test_criterion_1_env_blocked(self):
-        """VC1: Read('.env') returns exit code 2 with 'Blocked' message."""
-        exit_code, _, stderr = run_hook("Read", ".env")
-        assert exit_code == 2, f"Expected exit 2, got {exit_code}"
-        assert "Blocked" in stderr, f"Expected 'Blocked' in stderr: {stderr}"
+        """VC1: Read('.env') is blocked (permissionDecision=deny)."""
+        exit_code, stdout, stderr = run_hook("Read", ".env")
+        assert exit_code == 0, f"Expected exit 0. stderr: {stderr}"
+        payload = _parse_stdout(stdout)
+        _assert_denied(payload, expected_file_fragment=".env")
 
     def test_criterion_2_credentials_blocked(self):
         """VC2: Write('credentials.json') is blocked."""
-        exit_code, _, stderr = run_hook("Write", "credentials.json")
-        assert exit_code == 2, f"Expected exit 2, got {exit_code}"
-        assert "Blocked" in stderr
+        exit_code, stdout, stderr = run_hook("Write", "credentials.json")
+        assert exit_code == 0, f"Expected exit 0. stderr: {stderr}"
+        payload = _parse_stdout(stdout)
+        _assert_denied(payload, expected_file_fragment="credentials.json")
 
     def test_criterion_3_legitimate_allowed(self):
         """VC3: Legitimate files like 'app.py' are allowed."""
@@ -86,8 +104,10 @@ class TestEnvFiles:
         ],
     )
     def test_env_variants_blocked(self, filename):
-        exit_code, _, _ = run_hook("Read", filename)
-        assert exit_code == 2, f"{filename} should be blocked"
+        exit_code, stdout, stderr = run_hook("Read", filename)
+        assert exit_code == 0, f"Expected exit 0. stderr: {stderr}"
+        payload = _parse_stdout(stdout)
+        _assert_denied(payload, expected_file_fragment=filename)
 
 
 class TestCredentialFiles:
@@ -104,8 +124,10 @@ class TestCredentialFiles:
         ],
     )
     def test_credentials_blocked(self, filename):
-        exit_code, _, _ = run_hook("Read", filename)
-        assert exit_code == 2, f"{filename} should be blocked"
+        exit_code, stdout, stderr = run_hook("Read", filename)
+        assert exit_code == 0, f"Expected exit 0. stderr: {stderr}"
+        payload = _parse_stdout(stdout)
+        _assert_denied(payload, expected_file_fragment=filename)
 
 
 class TestSecretFiles:
@@ -122,8 +144,10 @@ class TestSecretFiles:
         ],
     )
     def test_secrets_blocked(self, filename):
-        exit_code, _, _ = run_hook("Read", filename)
-        assert exit_code == 2, f"{filename} should be blocked"
+        exit_code, stdout, stderr = run_hook("Read", filename)
+        assert exit_code == 0, f"Expected exit 0. stderr: {stderr}"
+        payload = _parse_stdout(stdout)
+        _assert_denied(payload, expected_file_fragment=filename)
 
 
 class TestPrivateKeys:
@@ -156,8 +180,10 @@ class TestPrivateKeys:
         ],
     )
     def test_private_keys_blocked(self, filename):
-        exit_code, _, stderr = run_hook("Read", filename)
-        assert exit_code == 2, f"{filename} should be blocked. stderr: {stderr}"
+        exit_code, stdout, stderr = run_hook("Read", filename)
+        assert exit_code == 0, f"Expected exit 0. stderr: {stderr}"
+        payload = _parse_stdout(stdout)
+        _assert_denied(payload, expected_file_fragment=filename)
 
 
 # =============================================================================
@@ -246,15 +272,17 @@ class TestPathTraversal:
         ],
     )
     def test_sensitive_path_component_blocked(self, path):
-        exit_code, _, stderr = run_hook("Read", path)
-        assert (
-            exit_code == 2
-        ), f"Path with sensitive component '{path}' should be blocked. stderr: {stderr}"
+        exit_code, stdout, stderr = run_hook("Read", path)
+        assert exit_code == 0, f"Expected exit 0. stderr: {stderr}"
+        payload = _parse_stdout(stdout)
+        _assert_denied(payload, expected_file_fragment=path)
 
     def test_relative_traversal_with_env(self):
         """Test ../../../.env style paths."""
-        exit_code, _, _ = run_hook("Read", "../../../.env")
-        assert exit_code == 2, "Relative path with .env should be blocked"
+        exit_code, stdout, stderr = run_hook("Read", "../../../.env")
+        assert exit_code == 0, f"Expected exit 0. stderr: {stderr}"
+        payload = _parse_stdout(stdout)
+        _assert_denied(payload, expected_file_fragment=".env")
 
 
 # =============================================================================
@@ -266,16 +294,22 @@ class TestToolInterception:
     """Test that only Read/Edit/Write tools are intercepted."""
 
     def test_read_intercepted(self):
-        exit_code, _, _ = run_hook("Read", ".env")
-        assert exit_code == 2
+        exit_code, stdout, stderr = run_hook("Read", ".env")
+        assert exit_code == 0, f"Expected exit 0. stderr: {stderr}"
+        payload = _parse_stdout(stdout)
+        _assert_denied(payload, expected_file_fragment=".env")
 
     def test_write_intercepted(self):
-        exit_code, _, _ = run_hook("Write", ".env")
-        assert exit_code == 2
+        exit_code, stdout, stderr = run_hook("Write", ".env")
+        assert exit_code == 0, f"Expected exit 0. stderr: {stderr}"
+        payload = _parse_stdout(stdout)
+        _assert_denied(payload, expected_file_fragment=".env")
 
     def test_edit_intercepted(self):
-        exit_code, _, _ = run_hook("Edit", ".env")
-        assert exit_code == 2
+        exit_code, stdout, stderr = run_hook("Edit", ".env")
+        assert exit_code == 0, f"Expected exit 0. stderr: {stderr}"
+        payload = _parse_stdout(stdout)
+        _assert_denied(payload, expected_file_fragment=".env")
 
     def test_bash_not_intercepted(self):
         """Bash tool should not be intercepted (different hook)."""
@@ -338,19 +372,18 @@ class TestOutputFormat:
     """Test that error output follows expected JSON structure."""
 
     def test_error_json_structure(self):
-        """Blocked file should output valid JSON to stderr."""
-        _, _, stderr = run_hook("Read", ".env")
-        error_data = json.loads(stderr)
-        assert "hookSpecificOutput" in error_data
-        assert error_data["hookSpecificOutput"]["hookEventName"] == "PreToolUse"
-        assert "error" in error_data["hookSpecificOutput"]
-        assert "Blocked" in error_data["hookSpecificOutput"]["error"]
+        """Blocked file should output valid JSON to stdout."""
+        exit_code, stdout, stderr = run_hook("Read", ".env")
+        assert exit_code == 0, f"Expected exit 0. stderr: {stderr}"
+        payload = _parse_stdout(stdout)
+        _assert_denied(payload, expected_file_fragment=".env")
 
     def test_error_contains_file_path(self):
         """Error message should mention the blocked file."""
-        _, _, stderr = run_hook("Read", "credentials.json")
-        error_data = json.loads(stderr)
-        assert "credentials.json" in error_data["hookSpecificOutput"]["error"]
+        exit_code, stdout, stderr = run_hook("Read", "credentials.json")
+        assert exit_code == 0, f"Expected exit 0. stderr: {stderr}"
+        payload = _parse_stdout(stdout)
+        _assert_denied(payload, expected_file_fragment="credentials.json")
 
 
 # =============================================================================
@@ -374,5 +407,7 @@ class TestCaseSensitivity:
         ],
     )
     def test_case_insensitive_blocking(self, filename):
-        exit_code, _, _ = run_hook("Read", filename)
-        assert exit_code == 2, f"Case variant {filename} should be blocked"
+        exit_code, stdout, stderr = run_hook("Read", filename)
+        assert exit_code == 0, f"Expected exit 0. stderr: {stderr}"
+        payload = _parse_stdout(stdout)
+        _assert_denied(payload, expected_file_fragment=filename)
diff --git a/tests/test_workflow_context_injector.py b/tests/test_workflow_context_injector.py
index e6f46dc..e6d6798 100644
--- a/tests/test_workflow_context_injector.py
+++ b/tests/test_workflow_context_injector.py
@@ -56,12 +56,12 @@ def test_injects_for_edit_when_step_state_exists(tmp_path: Path) -> None:
     assert code == 0
     assert err == ""
     payload = json.loads(out)
-    appended = payload["hookSpecificOutput"]["appended_text"]
-    assert "[MAP]" in appended
-    assert "1.55" in appended
-    assert "REVIEW_PLAN" in appended
-    assert "ST-001" in appended
-    assert "REQUIRED" in appended
+    additional = payload["hookSpecificOutput"]["additionalContext"]
+    assert "[MAP]" in additional
+    assert "1.55" in additional
+    assert "REVIEW_PLAN" in additional
+    assert "ST-001" in additional
+    assert "REQUIRED" in additional
 
 
 def test_skips_for_readonly_bash(tmp_path: Path) -> None:
@@ -111,7 +111,7 @@ def test_injects_for_pytest_bash_when_step_state_exists(tmp_path: Path) -> None:
     assert code == 0
     assert err == ""
     payload = json.loads(out)
-    appended = payload["hookSpecificOutput"]["appended_text"]
-    assert "2.8" in appended
-    assert "TESTS_GATE" in appended
-    assert "ST-002" in appended
+    additional = payload["hookSpecificOutput"]["additionalContext"]
+    assert "2.8" in additional
+    assert "TESTS_GATE" in additional
+    assert "ST-002" in additional
diff --git a/tests/test_workflow_gate.py b/tests/test_workflow_gate.py
index b997aaf..5a2027f 100644
--- a/tests/test_workflow_gate.py
+++ b/tests/test_workflow_gate.py
@@ -23,6 +23,23 @@ class TestWorkflowGate:
 
     HOOK_PATH = REPO_ROOT / ".claude/hooks/workflow-gate.py"
 
+    def _parse_stdout(self, stdout: str) -> dict:
+        stdout = (stdout or "").strip()
+        if not stdout:
+            return {}
+        return json.loads(stdout)
+
+    def _assert_allowed(self, stdout: str) -> None:
+        assert self._parse_stdout(stdout) == {}
+
+    def _assert_denied(self, stdout: str) -> str:
+        payload = self._parse_stdout(stdout)
+        assert payload.get("hookSpecificOutput", {}).get("hookEventName") == "PreToolUse"
+        assert payload["hookSpecificOutput"].get("permissionDecision") == "deny"
+        reason = payload["hookSpecificOutput"].get("permissionDecisionReason", "")
+        assert reason
+        return reason
+
     def run_hook(
         self, input_data: dict, tmp_path: Path, branch: str = "master"
     ) -> Tuple[int, str, str]:
@@ -129,8 +146,7 @@ def test_allows_non_editing_tools(self, tmp_path: Path) -> None:
             )
 
             assert code == 0, f"{tool_name} should be allowed"
-            response = json.loads(stdout.strip())
-            assert response["decision"] == "approve"
+            self._assert_allowed(stdout)
 
     def test_allows_edit_when_no_workflow_state(self, tmp_path: Path) -> None:
         """Edit should be allowed when workflow_state.json doesn't exist (fail-open)."""
@@ -143,8 +159,7 @@ def test_allows_edit_when_no_workflow_state(self, tmp_path: Path) -> None:
         )
 
         assert code == 0
-        response = json.loads(stdout.strip())
-        assert response["decision"] == "approve"
+        self._assert_allowed(stdout)
 
     def test_blocks_edit_without_actor(self, tmp_path: Path) -> None:
         """Edit should be blocked if 'actor' step not completed."""
@@ -170,7 +185,7 @@ def test_blocks_edit_without_actor(self, tmp_path: Path) -> None:
             )
         )
 
-        code, _, stderr = self.run_hook(
+        code, stdout, _ = self.run_hook(
             {
                 "tool_name": "Edit",
                 "tool_input": {"file_path": "/test.py"},
@@ -178,11 +193,10 @@ def test_blocks_edit_without_actor(self, tmp_path: Path) -> None:
             tmp_path,
         )
 
-        assert code == 2  # Blocked
-        response = json.loads(stderr.strip())
-        assert response["decision"] == "block"
-        assert "actor" in response["reason"].lower()
-        assert "monitor" in response["reason"].lower()
+        assert code == 0  # Hook signals blocking via JSON decision
+        reason = self._assert_denied(stdout)
+        assert "actor" in reason.lower()
+        assert "monitor" in reason.lower()
 
     def test_blocks_edit_without_monitor(self, tmp_path: Path) -> None:
         """Edit should be blocked if 'monitor' step not completed (even if actor done)."""
@@ -206,7 +220,7 @@ def test_blocks_edit_without_monitor(self, tmp_path: Path) -> None:
             )
         )
 
-        code, _, stderr = self.run_hook(
+        code, stdout, _ = self.run_hook(
             {
                 "tool_name": "Edit",
                 "tool_input": {"file_path": "/test.py"},
@@ -214,10 +228,9 @@ def test_blocks_edit_without_monitor(self, tmp_path: Path) -> None:
             tmp_path,
         )
 
-        assert code == 2  # Blocked
-        response = json.loads(stderr.strip())
-        assert response["decision"] == "block"
-        assert "monitor" in response["reason"].lower()
+        assert code == 0
+        reason = self._assert_denied(stdout)
+        assert "monitor" in reason.lower()
 
     def test_allows_edit_after_actor_and_monitor(self, tmp_path: Path) -> None:
         """Edit should be allowed when both actor and monitor are completed."""
@@ -246,8 +259,7 @@ def test_allows_edit_after_actor_and_monitor(self, tmp_path: Path) -> None:
         )
 
         assert code == 0
-        response = json.loads(stdout.strip())
-        assert response["decision"] == "approve"
+        self._assert_allowed(stdout)
 
     def test_blocks_write_without_required_steps(self, tmp_path: Path) -> None:
         """Write should be blocked like Edit."""
@@ -265,7 +277,7 @@ def test_blocks_write_without_required_steps(self, tmp_path: Path) -> None:
             )
         )
 
-        code, _, stderr = self.run_hook(
+        code, stdout, _ = self.run_hook(
             {
                 "tool_name": "Write",
                 "tool_input": {"file_path": "/test.py"},
@@ -273,9 +285,8 @@ def test_blocks_write_without_required_steps(self, tmp_path: Path) -> None:
             tmp_path,
         )
 
-        assert code == 2  # Blocked
-        response = json.loads(stderr.strip())
-        assert response["decision"] == "block"
+        assert code == 0
+        self._assert_denied(stdout)
 
     def test_blocks_multiedit_without_required_steps(self, tmp_path: Path) -> None:
         """MultiEdit should be blocked like Edit and Write."""
@@ -293,7 +304,7 @@ def test_blocks_multiedit_without_required_steps(self, tmp_path: Path) -> None:
             )
         )
 
-        code, _, stderr = self.run_hook(
+        code, stdout, _ = self.run_hook(
             {
                 "tool_name": "MultiEdit",
                 "tool_input": {"file_path": "/test.py"},
@@ -301,9 +312,8 @@ def test_blocks_multiedit_without_required_steps(self, tmp_path: Path) -> None:
             tmp_path,
         )
 
-        assert code == 2  # Blocked
-        response = json.loads(stderr.strip())
-        assert response["decision"] == "block"
+        assert code == 0
+        self._assert_denied(stdout)
 
     def test_blocks_when_no_current_subtask(self, tmp_path: Path) -> None:
         """Should block if current_subtask is null or missing."""
@@ -321,7 +331,7 @@ def test_blocks_when_no_current_subtask(self, tmp_path: Path) -> None:
             )
         )
 
-        code, _, stderr = self.run_hook(
+        code, stdout, _ = self.run_hook(
             {
                 "tool_name": "Edit",
                 "tool_input": {"file_path": "/test.py"},
@@ -329,10 +339,9 @@ def test_blocks_when_no_current_subtask(self, tmp_path: Path) -> None:
             tmp_path,
         )
 
-        assert code == 2  # Blocked
-        response = json.loads(stderr.strip())
-        assert response["decision"] == "block"
-        assert "current_subtask" in response["reason"].lower()
+        assert code == 0
+        reason = self._assert_denied(stdout)
+        assert "current_subtask" in reason.lower()
 
     def test_handles_invalid_json_gracefully(self, tmp_path: Path) -> None:
         """Should fail-open (allow) on invalid workflow_state.json."""
@@ -350,8 +359,7 @@ def test_handles_invalid_json_gracefully(self, tmp_path: Path) -> None:
         )
 
         assert code == 0  # Fail-open
-        response = json.loads(stdout.strip())
-        assert response["decision"] == "approve"
+        self._assert_allowed(stdout)
 
     def test_handles_malformed_hook_input_gracefully(self, tmp_path: Path) -> None:
         """Should fail-open (allow) on malformed hook input JSON."""
@@ -364,8 +372,7 @@ def test_handles_malformed_hook_input_gracefully(self, tmp_path: Path) -> None:
         )
 
         assert result.returncode == 0  # Fail-open
-        response = json.loads(result.stdout.strip())
-        assert response["decision"] == "approve"
+        self._assert_allowed(result.stdout)
 
     def test_respects_branch_scoping(self, tmp_path: Path) -> None:
         """Workflow state should be branch-scoped (.map/<branch>/)."""
@@ -397,8 +404,7 @@ def test_respects_branch_scoping(self, tmp_path: Path) -> None:
         )
 
         assert code == 0
-        response = json.loads(stdout.strip())
-        assert response["decision"] == "approve"
+        self._assert_allowed(stdout)
 
     def test_different_subtask_steps_independent(self, tmp_path: Path) -> None:
         """completed_steps should be tracked per subtask."""
@@ -420,7 +426,7 @@ def test_different_subtask_steps_independent(self, tmp_path: Path) -> None:
         )
 
         # Should block because ST-002 (current) is incomplete
-        code, _, stderr = self.run_hook(
+        code, stdout, _ = self.run_hook(
             {
                 "tool_name": "Edit",
                 "tool_input": {"file_path": "/test.py"},
@@ -428,10 +434,9 @@ def test_different_subtask_steps_independent(self, tmp_path: Path) -> None:
             tmp_path,
         )
 
-        assert code == 2  # Blocked
-        response = json.loads(stderr.strip())
-        assert response["decision"] == "block"
-        assert "ST-002" in response["reason"] or "actor" in response["reason"].lower()
+        assert code == 0
+        reason = self._assert_denied(stdout)
+        assert "ST-002" in reason or "actor" in reason.lower()
 
     def test_error_message_includes_workflow_steps(self, tmp_path: Path) -> None:
         """Block message should explain required workflow steps."""
@@ -449,7 +454,7 @@ def test_error_message_includes_workflow_steps(self, tmp_path: Path) -> None:
             )
         )
 
-        code, _, stderr = self.run_hook(
+        code, stdout, _ = self.run_hook(
             {
                 "tool_name": "Edit",
                 "tool_input": {"file_path": "/test.py"},
@@ -457,9 +462,8 @@ def test_error_message_includes_workflow_steps(self, tmp_path: Path) -> None:
             tmp_path,
         )
 
-        assert code == 2
-        response = json.loads(stderr.strip())
-        error_msg = response["reason"]
+        assert code == 0
+        error_msg = self._assert_denied(stdout)
 
         # Should mention the workflow steps
         assert "actor" in error_msg.lower()
@@ -506,5 +510,4 @@ def test_branch_name_sanitization(
         )
 
         assert code == 0
-        response = json.loads(stdout.strip())
-        assert response["decision"] == "approve"
+        self._assert_allowed(stdout)

From 5fac796a3ee2c37a557730d98c9f0e5594b9860f Mon Sep 17 00:00:00 2001
From: "Mikhail [azalio] Petrov" <azalio@azalio.net>
Date: Mon, 16 Feb 2026 01:15:46 +0300
Subject: [PATCH 5/7] Address PR feedback: hook docs, lint, robustness

---
 .claude/agents/actor.md                       |   4 +-
 .claude/agents/monitor.md                     |   2 +-
 .claude/hooks/block-dangerous.sh              | 165 +++++++++---------
 .claude/hooks/block-secrets.py                |   2 +-
 .claude/hooks/workflow-gate.py                |   3 +-
 IMPLEMENTATION_SUMMARY.md                     |   3 +-
 src/mapify_cli/templates/agents/actor.md      |   4 +-
 src/mapify_cli/templates/agents/monitor.md    |   2 +-
 .../templates/hooks/block-dangerous.sh        | 165 +++++++++---------
 .../templates/hooks/block-secrets.py          |   2 +-
 .../templates/hooks/workflow-gate.py          |   3 +-
 tests/hooks/test_block_secrets.py             |   5 +-
 12 files changed, 187 insertions(+), 173 deletions(-)

diff --git a/.claude/agents/actor.md b/.claude/agents/actor.md
index bf30de5..97fa53e 100644
--- a/.claude/agents/actor.md
+++ b/.claude/agents/actor.md
@@ -380,7 +380,7 @@ Document key decisions using this structure:
 - Prefer naming tests with `vc<n>` (e.g., `test_vc1_*`, `TestVC1*`) so Monitor can deterministically confirm coverage.
 
 **Format**:
-```
+```text
 1. test_[function]_[scenario]_[expected]
    Input: [specific input]
    Expected: [specific output/behavior]
@@ -401,7 +401,7 @@ Document key decisions using this structure:
 If the subtask packet includes `validation_criteria`, list each `VCn:` and where it is enforced.
 
 **Format**:
-```
+```text
 VC1: <criterion text>
 - Code: path/to/file.ext#SymbolOrLocation
 - Tests: path/to/test_file.ext::test_name (or N/A with reason)
diff --git a/.claude/agents/monitor.md b/.claude/agents/monitor.md
index 01b45f1..807ef53 100644
--- a/.claude/agents/monitor.md
+++ b/.claude/agents/monitor.md
@@ -2524,7 +2524,7 @@ def check_rate_limit(user_id, action, limit=100, window=3600):
 - Only MEDIUM/LOW issues → valid=true (with feedback)
 
 **Hard-stop semantics**:
-- If you set `valid=false`, the workflow MUST fix the issues before proceeding.
+- If you set `valid=false`, the workflow MUST resolve the issues before proceeding.
 - Do not accept "we'll do it later" reasoning as a resolution unless the user explicitly approves deferral.
 
 </Monitor_Critical_Reminders>
diff --git a/.claude/hooks/block-dangerous.sh b/.claude/hooks/block-dangerous.sh
index f42a6ba..3ba7186 100755
--- a/.claude/hooks/block-dangerous.sh
+++ b/.claude/hooks/block-dangerous.sh
@@ -18,7 +18,7 @@
 #
 # TESTING:
 #   echo '{"tool_name": "Bash", "tool_input": {"command": "rm -rf /"}}' | bash block-dangerous.sh
-#   # Expected: Exit code 2
+#   # Expected: Exit code 0 with permissionDecision=deny in JSON output
 #
 # =============================================================================
 
@@ -27,9 +27,60 @@ set -euo pipefail
 # Read JSON from stdin
 INPUT=$(cat)
 
-# Extract tool_name and command using jq (or fallback to grep)
-TOOL_NAME=$(echo "$INPUT" | jq -r '.tool_name // empty' 2>/dev/null || echo "")
-COMMAND=$(echo "$INPUT" | jq -r '.tool_input.command // empty' 2>/dev/null || echo "")
+deny() {
+    local reason="$1"
+
+    if command -v jq >/dev/null 2>&1; then
+        jq -n --arg reason "$reason" '{
+          hookSpecificOutput: {
+            hookEventName: "PreToolUse",
+            permissionDecision: "deny",
+            permissionDecisionReason: $reason
+          }
+        }'
+        return 0
+    fi
+
+    if command -v python3 >/dev/null 2>&1; then
+        python3 - "$reason" <<'PY'
+import json
+import sys
+
+reason = sys.argv[1]
+print(
+    json.dumps(
+        {
+            "hookSpecificOutput": {
+                "hookEventName": "PreToolUse",
+                "permissionDecision": "deny",
+                "permissionDecisionReason": reason,
+            }
+        }
+    )
+)
+PY
+        return 0
+    fi
+
+    local escaped=${reason//\\/\\\\}
+    escaped=${escaped//\"/\\\"}
+    printf '{"hookSpecificOutput":{"hookEventName":"PreToolUse","permissionDecision":"deny","permissionDecisionReason":"%s"}}\n' "$escaped"
+}
+
+if command -v jq >/dev/null 2>&1; then
+    TOOL_NAME=$(jq -r '.tool_name // empty' <<<"$INPUT" 2>/dev/null || true)
+    COMMAND=$(jq -r '.tool_input.command // empty' <<<"$INPUT" 2>/dev/null || true)
+elif command -v python3 >/dev/null 2>&1; then
+    TOOL_NAME=$(
+        python3 -c 'import json,sys; d=json.loads(sys.stdin.read() or "{}"); print(d.get("tool_name",""))' <<<"$INPUT" 2>/dev/null || true
+    )
+    COMMAND=$(
+        python3 -c 'import json,sys; d=json.loads(sys.stdin.read() or "{}"); ti=d.get("tool_input") or {}; print(ti.get("command",""))' <<<"$INPUT" 2>/dev/null || true
+    )
+else
+    TOOL_NAME=""
+    COMMAND=""
+fi
 
 # Only intercept Bash tool
 if [[ "$TOOL_NAME" != "Bash" ]]; then
@@ -51,113 +102,67 @@ COMMAND_LOWER=$(echo "$COMMAND" | tr '[:upper:]' '[:lower:]')
 # =============================================================================
 
 # Check for rm -rf (recursive force delete)
-# Matches: rm -rf, rm -fr, rm -r -f, rm -f -r, rm --recursive --force
-if echo "$COMMAND" | grep -qE 'rm\s+(-rf|-fr)\s'; then
-    jq -n '{
-      hookSpecificOutput: {
-        hookEventName: "PreToolUse",
-        permissionDecision: "deny",
-        permissionDecisionReason: "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)"
-      }
-    }'
-    exit 0
-fi
-
-# Catch rm -rf at end of command or with path immediately after
-if echo "$COMMAND" | grep -qE 'rm\s+(-rf|-fr)\s*(/|~|\.|[a-zA-Z])'; then
-    jq -n '{
-      hookSpecificOutput: {
-        hookEventName: "PreToolUse",
-        permissionDecision: "deny",
-        permissionDecisionReason: "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)"
-      }
-    }'
-    exit 0
-fi
-
-# Catch separated flags: rm -r -f or rm -f -r
-if echo "$COMMAND" | grep -qE 'rm\s+(-r\s+-f|-f\s+-r)\s'; then
-    jq -n '{
-      hookSpecificOutput: {
-        hookEventName: "PreToolUse",
-        permissionDecision: "deny",
-        permissionDecisionReason: "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)"
-      }
-    }'
-    exit 0
+# Matches: rm -rf, rm -fr, rm -r -f, rm -f -r, rm --recursive --force, and edge
+# cases where flags touch the path (e.g., rm -rf0dir).
+if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&()])rm[[:space:]]'; then
+    # Long flags: --recursive and --force
+    if echo "$COMMAND_LOWER" | grep -qE -- '--recursive' && echo "$COMMAND_LOWER" | grep -qE -- '--force'; then
+        deny "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)"
+        exit 0
+    fi
+
+    # Combined short flags: -rf / -fr (including edge cases where the path touches flags)
+    if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&()])rm[[:space:]].*-[^[:space:]]*r[^[:space:]]*f'; then
+        deny "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)"
+        exit 0
+    fi
+    if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&()])rm[[:space:]].*-[^[:space:]]*f[^[:space:]]*r'; then
+        deny "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)"
+        exit 0
+    fi
+
+    # Separate short flags: -r -f / -f -r
+    if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:]])-r([[:space:]]|$)' && echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:]])-f([[:space:]]|$)'; then
+        deny "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)"
+        exit 0
+    fi
 fi
 
 # Check for git push --force to main/master
 # Matches: git push --force origin main, git push -f origin master
 if echo "$COMMAND" | grep -qE 'git\s+push\s+.*(-f|--force).*\s+(origin|upstream)\s+(main|master)(\s|$)'; then
-    jq -n '{
-      hookSpecificOutput: {
-        hookEventName: "PreToolUse",
-        permissionDecision: "deny",
-        permissionDecisionReason: "Blocked: Force push to main/master is prohibited (can overwrite team work)"
-      }
-    }'
+    deny "Blocked: Force push to main/master is prohibited (can overwrite team work)"
     exit 0
 fi
 
 # Also check reverse order: git push origin main --force
 if echo "$COMMAND" | grep -qE 'git\s+push\s+(origin|upstream)\s+(main|master)\s+(-f|--force)'; then
-    jq -n '{
-      hookSpecificOutput: {
-        hookEventName: "PreToolUse",
-        permissionDecision: "deny",
-        permissionDecisionReason: "Blocked: Force push to main/master is prohibited (can overwrite team work)"
-      }
-    }'
+    deny "Blocked: Force push to main/master is prohibited (can overwrite team work)"
     exit 0
 fi
 
 # Check for git reset --hard (without specific commit, or dangerous patterns)
 # Block: git reset --hard, git reset --hard HEAD~, git reset --hard origin/
 if echo "$COMMAND" | grep -qE 'git\s+reset\s+--hard(\s|$)'; then
-    jq -n '{
-      hookSpecificOutput: {
-        hookEventName: "PreToolUse",
-        permissionDecision: "deny",
-        permissionDecisionReason: "Blocked: git reset --hard is prohibited (can cause irreversible loss of uncommitted changes)"
-      }
-    }'
+    deny "Blocked: git reset --hard is prohibited (can cause irreversible loss of uncommitted changes)"
     exit 0
 fi
 
 # Check for dangerous chmod/chown on system directories
 if echo "$COMMAND" | grep -qE '(chmod|chown)\s+(-R|--recursive)\s+.*\s+/($|\s)'; then
-    jq -n '{
-      hookSpecificOutput: {
-        hookEventName: "PreToolUse",
-        permissionDecision: "deny",
-        permissionDecisionReason: "Blocked: Recursive chmod/chown on / is prohibited (can break system permissions)"
-      }
-    }'
+    deny "Blocked: Recursive chmod/chown on / is prohibited (can break system permissions)"
     exit 0
 fi
 
 # Check for dd with of=/dev/
 if echo "$COMMAND" | grep -qE 'dd\s+.*of=/dev/'; then
-    jq -n '{
-      hookSpecificOutput: {
-        hookEventName: "PreToolUse",
-        permissionDecision: "deny",
-        permissionDecisionReason: "Blocked: dd with of=/dev/* is prohibited (writing to raw devices can destroy data)"
-      }
-    }'
+    deny "Blocked: dd with of=/dev/* is prohibited (writing to raw devices can destroy data)"
     exit 0
 fi
 
 # Check for mkfs (format filesystem)
 if echo "$COMMAND" | grep -qE 'mkfs'; then
-    jq -n '{
-      hookSpecificOutput: {
-        hookEventName: "PreToolUse",
-        permissionDecision: "deny",
-        permissionDecisionReason: "Blocked: mkfs is prohibited (formatting filesystems can destroy data)"
-      }
-    }'
+    deny "Blocked: mkfs is prohibited (formatting filesystems can destroy data)"
     exit 0
 fi
 
diff --git a/.claude/hooks/block-secrets.py b/.claude/hooks/block-secrets.py
index fb1d2e1..ac46e72 100755
--- a/.claude/hooks/block-secrets.py
+++ b/.claude/hooks/block-secrets.py
@@ -26,7 +26,7 @@
 
 TESTING:
   echo '{"tool_name": "Read", "tool_input": {"file_path": ".env"}}' | python3 block-secrets.py
-  # Expected: Exit code 2, JSON error on stderr
+  # Expected: Exit code 0, JSON on stdout with permissionDecision=deny
 
 PERFORMANCE:
   Target: <100ms per invocation
diff --git a/.claude/hooks/workflow-gate.py b/.claude/hooks/workflow-gate.py
index 499bdfc..b0e67f0 100755
--- a/.claude/hooks/workflow-gate.py
+++ b/.claude/hooks/workflow-gate.py
@@ -28,7 +28,8 @@
 
 TESTING:
   echo '{"tool_name": "Edit", "tool_input": {"file_path": "test.py"}}' | python3 workflow-gate.py
-  # Expected: Exit code 0, allow (no workflow state) OR deny with reason (if workflow active + missing steps)
+  # Expected (no workflow state): Exit 0, stdout: {}
+  # Expected (workflow active, missing steps): Exit 0, stdout: {"hookSpecificOutput":{"permissionDecision":"deny",...}}
 
 PERFORMANCE:
   Target: <100ms per invocation
diff --git a/IMPLEMENTATION_SUMMARY.md b/IMPLEMENTATION_SUMMARY.md
index cecea31..96d4b8a 100644
--- a/IMPLEMENTATION_SUMMARY.md
+++ b/IMPLEMENTATION_SUMMARY.md
@@ -148,7 +148,8 @@ Instead of a 995-line command file that Claude "compresses" mentally, inject ~30
 .claude/
 ├── hooks/
 │   ├── workflow-context-injector.py (NEW)
-│   └── settings.json (UPDATED)
+│   └── (other hooks omitted)
+├── settings.json (UPDATED)
 ├── commands/
 │   ├── map-efficient.md (OPTIMIZED - 995→394 lines)
 │   └── map-efficient.md.backup (BACKUP)
diff --git a/src/mapify_cli/templates/agents/actor.md b/src/mapify_cli/templates/agents/actor.md
index bf30de5..97fa53e 100644
--- a/src/mapify_cli/templates/agents/actor.md
+++ b/src/mapify_cli/templates/agents/actor.md
@@ -380,7 +380,7 @@ Document key decisions using this structure:
 - Prefer naming tests with `vc<n>` (e.g., `test_vc1_*`, `TestVC1*`) so Monitor can deterministically confirm coverage.
 
 **Format**:
-```
+```text
 1. test_[function]_[scenario]_[expected]
    Input: [specific input]
    Expected: [specific output/behavior]
@@ -401,7 +401,7 @@ Document key decisions using this structure:
 If the subtask packet includes `validation_criteria`, list each `VCn:` and where it is enforced.
 
 **Format**:
-```
+```text
 VC1: <criterion text>
 - Code: path/to/file.ext#SymbolOrLocation
 - Tests: path/to/test_file.ext::test_name (or N/A with reason)
diff --git a/src/mapify_cli/templates/agents/monitor.md b/src/mapify_cli/templates/agents/monitor.md
index 01b45f1..807ef53 100644
--- a/src/mapify_cli/templates/agents/monitor.md
+++ b/src/mapify_cli/templates/agents/monitor.md
@@ -2524,7 +2524,7 @@ def check_rate_limit(user_id, action, limit=100, window=3600):
 - Only MEDIUM/LOW issues → valid=true (with feedback)
 
 **Hard-stop semantics**:
-- If you set `valid=false`, the workflow MUST fix the issues before proceeding.
+- If you set `valid=false`, the workflow MUST resolve the issues before proceeding.
 - Do not accept "we'll do it later" reasoning as a resolution unless the user explicitly approves deferral.
 
 </Monitor_Critical_Reminders>
diff --git a/src/mapify_cli/templates/hooks/block-dangerous.sh b/src/mapify_cli/templates/hooks/block-dangerous.sh
index f42a6ba..3ba7186 100755
--- a/src/mapify_cli/templates/hooks/block-dangerous.sh
+++ b/src/mapify_cli/templates/hooks/block-dangerous.sh
@@ -18,7 +18,7 @@
 #
 # TESTING:
 #   echo '{"tool_name": "Bash", "tool_input": {"command": "rm -rf /"}}' | bash block-dangerous.sh
-#   # Expected: Exit code 2
+#   # Expected: Exit code 0 with permissionDecision=deny in JSON output
 #
 # =============================================================================
 
@@ -27,9 +27,60 @@ set -euo pipefail
 # Read JSON from stdin
 INPUT=$(cat)
 
-# Extract tool_name and command using jq (or fallback to grep)
-TOOL_NAME=$(echo "$INPUT" | jq -r '.tool_name // empty' 2>/dev/null || echo "")
-COMMAND=$(echo "$INPUT" | jq -r '.tool_input.command // empty' 2>/dev/null || echo "")
+deny() {
+    local reason="$1"
+
+    if command -v jq >/dev/null 2>&1; then
+        jq -n --arg reason "$reason" '{
+          hookSpecificOutput: {
+            hookEventName: "PreToolUse",
+            permissionDecision: "deny",
+            permissionDecisionReason: $reason
+          }
+        }'
+        return 0
+    fi
+
+    if command -v python3 >/dev/null 2>&1; then
+        python3 - "$reason" <<'PY'
+import json
+import sys
+
+reason = sys.argv[1]
+print(
+    json.dumps(
+        {
+            "hookSpecificOutput": {
+                "hookEventName": "PreToolUse",
+                "permissionDecision": "deny",
+                "permissionDecisionReason": reason,
+            }
+        }
+    )
+)
+PY
+        return 0
+    fi
+
+    local escaped=${reason//\\/\\\\}
+    escaped=${escaped//\"/\\\"}
+    printf '{"hookSpecificOutput":{"hookEventName":"PreToolUse","permissionDecision":"deny","permissionDecisionReason":"%s"}}\n' "$escaped"
+}
+
+if command -v jq >/dev/null 2>&1; then
+    TOOL_NAME=$(jq -r '.tool_name // empty' <<<"$INPUT" 2>/dev/null || true)
+    COMMAND=$(jq -r '.tool_input.command // empty' <<<"$INPUT" 2>/dev/null || true)
+elif command -v python3 >/dev/null 2>&1; then
+    TOOL_NAME=$(
+        python3 -c 'import json,sys; d=json.loads(sys.stdin.read() or "{}"); print(d.get("tool_name",""))' <<<"$INPUT" 2>/dev/null || true
+    )
+    COMMAND=$(
+        python3 -c 'import json,sys; d=json.loads(sys.stdin.read() or "{}"); ti=d.get("tool_input") or {}; print(ti.get("command",""))' <<<"$INPUT" 2>/dev/null || true
+    )
+else
+    TOOL_NAME=""
+    COMMAND=""
+fi
 
 # Only intercept Bash tool
 if [[ "$TOOL_NAME" != "Bash" ]]; then
@@ -51,113 +102,67 @@ COMMAND_LOWER=$(echo "$COMMAND" | tr '[:upper:]' '[:lower:]')
 # =============================================================================
 
 # Check for rm -rf (recursive force delete)
-# Matches: rm -rf, rm -fr, rm -r -f, rm -f -r, rm --recursive --force
-if echo "$COMMAND" | grep -qE 'rm\s+(-rf|-fr)\s'; then
-    jq -n '{
-      hookSpecificOutput: {
-        hookEventName: "PreToolUse",
-        permissionDecision: "deny",
-        permissionDecisionReason: "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)"
-      }
-    }'
-    exit 0
-fi
-
-# Catch rm -rf at end of command or with path immediately after
-if echo "$COMMAND" | grep -qE 'rm\s+(-rf|-fr)\s*(/|~|\.|[a-zA-Z])'; then
-    jq -n '{
-      hookSpecificOutput: {
-        hookEventName: "PreToolUse",
-        permissionDecision: "deny",
-        permissionDecisionReason: "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)"
-      }
-    }'
-    exit 0
-fi
-
-# Catch separated flags: rm -r -f or rm -f -r
-if echo "$COMMAND" | grep -qE 'rm\s+(-r\s+-f|-f\s+-r)\s'; then
-    jq -n '{
-      hookSpecificOutput: {
-        hookEventName: "PreToolUse",
-        permissionDecision: "deny",
-        permissionDecisionReason: "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)"
-      }
-    }'
-    exit 0
+# Matches: rm -rf, rm -fr, rm -r -f, rm -f -r, rm --recursive --force, and edge
+# cases where flags touch the path (e.g., rm -rf0dir).
+if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&()])rm[[:space:]]'; then
+    # Long flags: --recursive and --force
+    if echo "$COMMAND_LOWER" | grep -qE -- '--recursive' && echo "$COMMAND_LOWER" | grep -qE -- '--force'; then
+        deny "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)"
+        exit 0
+    fi
+
+    # Combined short flags: -rf / -fr (including edge cases where the path touches flags)
+    if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&()])rm[[:space:]].*-[^[:space:]]*r[^[:space:]]*f'; then
+        deny "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)"
+        exit 0
+    fi
+    if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:];|&()])rm[[:space:]].*-[^[:space:]]*f[^[:space:]]*r'; then
+        deny "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)"
+        exit 0
+    fi
+
+    # Separate short flags: -r -f / -f -r
+    if echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:]])-r([[:space:]]|$)' && echo "$COMMAND_LOWER" | grep -qE '(^|[[:space:]])-f([[:space:]]|$)'; then
+        deny "Blocked: rm -rf is prohibited (recursive force delete can cause irreversible data loss)"
+        exit 0
+    fi
 fi
 
 # Check for git push --force to main/master
 # Matches: git push --force origin main, git push -f origin master
 if echo "$COMMAND" | grep -qE 'git\s+push\s+.*(-f|--force).*\s+(origin|upstream)\s+(main|master)(\s|$)'; then
-    jq -n '{
-      hookSpecificOutput: {
-        hookEventName: "PreToolUse",
-        permissionDecision: "deny",
-        permissionDecisionReason: "Blocked: Force push to main/master is prohibited (can overwrite team work)"
-      }
-    }'
+    deny "Blocked: Force push to main/master is prohibited (can overwrite team work)"
     exit 0
 fi
 
 # Also check reverse order: git push origin main --force
 if echo "$COMMAND" | grep -qE 'git\s+push\s+(origin|upstream)\s+(main|master)\s+(-f|--force)'; then
-    jq -n '{
-      hookSpecificOutput: {
-        hookEventName: "PreToolUse",
-        permissionDecision: "deny",
-        permissionDecisionReason: "Blocked: Force push to main/master is prohibited (can overwrite team work)"
-      }
-    }'
+    deny "Blocked: Force push to main/master is prohibited (can overwrite team work)"
     exit 0
 fi
 
 # Check for git reset --hard (without specific commit, or dangerous patterns)
 # Block: git reset --hard, git reset --hard HEAD~, git reset --hard origin/
 if echo "$COMMAND" | grep -qE 'git\s+reset\s+--hard(\s|$)'; then
-    jq -n '{
-      hookSpecificOutput: {
-        hookEventName: "PreToolUse",
-        permissionDecision: "deny",
-        permissionDecisionReason: "Blocked: git reset --hard is prohibited (can cause irreversible loss of uncommitted changes)"
-      }
-    }'
+    deny "Blocked: git reset --hard is prohibited (can cause irreversible loss of uncommitted changes)"
     exit 0
 fi
 
 # Check for dangerous chmod/chown on system directories
 if echo "$COMMAND" | grep -qE '(chmod|chown)\s+(-R|--recursive)\s+.*\s+/($|\s)'; then
-    jq -n '{
-      hookSpecificOutput: {
-        hookEventName: "PreToolUse",
-        permissionDecision: "deny",
-        permissionDecisionReason: "Blocked: Recursive chmod/chown on / is prohibited (can break system permissions)"
-      }
-    }'
+    deny "Blocked: Recursive chmod/chown on / is prohibited (can break system permissions)"
     exit 0
 fi
 
 # Check for dd with of=/dev/
 if echo "$COMMAND" | grep -qE 'dd\s+.*of=/dev/'; then
-    jq -n '{
-      hookSpecificOutput: {
-        hookEventName: "PreToolUse",
-        permissionDecision: "deny",
-        permissionDecisionReason: "Blocked: dd with of=/dev/* is prohibited (writing to raw devices can destroy data)"
-      }
-    }'
+    deny "Blocked: dd with of=/dev/* is prohibited (writing to raw devices can destroy data)"
     exit 0
 fi
 
 # Check for mkfs (format filesystem)
 if echo "$COMMAND" | grep -qE 'mkfs'; then
-    jq -n '{
-      hookSpecificOutput: {
-        hookEventName: "PreToolUse",
-        permissionDecision: "deny",
-        permissionDecisionReason: "Blocked: mkfs is prohibited (formatting filesystems can destroy data)"
-      }
-    }'
+    deny "Blocked: mkfs is prohibited (formatting filesystems can destroy data)"
     exit 0
 fi
 
diff --git a/src/mapify_cli/templates/hooks/block-secrets.py b/src/mapify_cli/templates/hooks/block-secrets.py
index fb1d2e1..ac46e72 100755
--- a/src/mapify_cli/templates/hooks/block-secrets.py
+++ b/src/mapify_cli/templates/hooks/block-secrets.py
@@ -26,7 +26,7 @@
 
 TESTING:
   echo '{"tool_name": "Read", "tool_input": {"file_path": ".env"}}' | python3 block-secrets.py
-  # Expected: Exit code 2, JSON error on stderr
+  # Expected: Exit code 0, JSON on stdout with permissionDecision=deny
 
 PERFORMANCE:
   Target: <100ms per invocation
diff --git a/src/mapify_cli/templates/hooks/workflow-gate.py b/src/mapify_cli/templates/hooks/workflow-gate.py
index 499bdfc..b0e67f0 100755
--- a/src/mapify_cli/templates/hooks/workflow-gate.py
+++ b/src/mapify_cli/templates/hooks/workflow-gate.py
@@ -28,7 +28,8 @@
 
 TESTING:
   echo '{"tool_name": "Edit", "tool_input": {"file_path": "test.py"}}' | python3 workflow-gate.py
-  # Expected: Exit code 0, allow (no workflow state) OR deny with reason (if workflow active + missing steps)
+  # Expected (no workflow state): Exit 0, stdout: {}
+  # Expected (workflow active, missing steps): Exit 0, stdout: {"hookSpecificOutput":{"permissionDecision":"deny",...}}
 
 PERFORMANCE:
   Target: <100ms per invocation
diff --git a/tests/hooks/test_block_secrets.py b/tests/hooks/test_block_secrets.py
index 36ba85c..09a2fd0 100644
--- a/tests/hooks/test_block_secrets.py
+++ b/tests/hooks/test_block_secrets.py
@@ -69,8 +69,9 @@ def test_criterion_2_credentials_blocked(self):
 
     def test_criterion_3_legitimate_allowed(self):
         """VC3: Legitimate files like 'app.py' are allowed."""
-        exit_code, _, _ = run_hook("Read", "app.py")
-        assert exit_code == 0, "app.py should be allowed"
+        exit_code, stdout, stderr = run_hook("Read", "app.py")
+        assert exit_code == 0, f"app.py should be allowed. stderr: {stderr}"
+        assert _parse_stdout(stdout) == {}
 
     def test_criterion_4_performance(self):
         """VC4: Hook execution completes in <100ms."""

From ab9a5af9a65c1ef318d7e8ab83f16d1364828a51 Mon Sep 17 00:00:00 2001
From: "Mikhail [azalio] Petrov" <azalio@azalio.net>
Date: Tue, 17 Feb 2026 00:35:20 +0300
Subject: [PATCH 6/7] research/preconditions-postconditions: Rewrite map-review
 as interactive 4-section review

Replace batch-report workflow with interactive section-by-section review:
- 4 sections: Architecture, Code Quality, Tests, Performance
- Review Section Protocol with BIG/SMALL mode and CI mode (--ci/--auto)
- 4 targeted mem0 queries + 3 parallel agent calls
- Structured verdict: PROCEED / REVISE / BLOCK with priority ordering
- Add 3 test classes (TestMapReviewStructure, TestMapReviewVerdictLogic,
  TestAgentSchemaFieldsPresent) and command template sync tests
- Update ARCHITECTURE.md diagram, section description, and token estimates
---
 .claude/commands/map-review.md                | 299 +++++++++++++-----
 docs/ARCHITECTURE.md                          |  41 ++-
 .../templates/commands/map-review.md          | 299 +++++++++++++-----
 tests/test_command_templates.py               | 212 +++++++++++++
 tests/test_template_sync.py                   |  82 +++++
 5 files changed, 770 insertions(+), 163 deletions(-)

diff --git a/.claude/commands/map-review.md b/.claude/commands/map-review.md
index b295835..ea8623b 100644
--- a/.claude/commands/map-review.md
+++ b/.claude/commands/map-review.md
@@ -1,47 +1,133 @@
 ---
-description: Comprehensive MAP review of changes using Monitor, Predictor, and Evaluator agents
+description: Interactive 4-section code review using Monitor, Predictor, and Evaluator agents
 ---
 
 # MAP Review Workflow
 
-Review current changes using MAP agents for comprehensive quality analysis.
+Interactive, structured code review of current changes using Monitor, Predictor, and Evaluator agents.
 
-## What This Command Does
+**Task:** $ARGUMENTS
 
-1. **Monitor** - Validates code correctness, security, standards compliance
-2. **Predictor** - Analyzes impact on codebase, breaking changes, dependencies
-3. **Evaluator** - Scores overall quality and provides actionable feedback
+## Execution Rules
 
-## Step 1: Query mem0 for Review Patterns
+1. Execute all steps in the order listed below
+2. Use the exact `subagent_type` values specified (monitor, predictor, evaluator)
+3. All 3 agents are required — do not skip any
+4. **Monitor `valid=false` is a hard stop** — report issues immediately, do not proceed to interactive presentation
+5. Wait for all parallel calls to complete before proceeding to the next phase
 
-```bash
-# Get review best practices
-REVIEW_PATTERNS=$(mcp__mem0__map_tiered_search(query="code review", limit=5))
-```
+## Review Preferences (Customize per project)
+
+These defaults guide how agents weigh findings. Override by editing this section:
+
+- **DRY:** Important — flag duplication aggressively
+- **Testing:** Non-negotiable — missing tests = high severity
+- **Engineering level:** "Engineered enough" — reject both under-engineering and over-engineering
+- **Edge cases:** Prefer handling more, especially for public APIs
+- **Clarity:** Explicit over clever — readable code wins
+- **Performance:** Flag only when measurable impact is likely
+
+## Expected Agent Output Schemas (Contract Reference)
+
+These are the fields each agent is expected to return. The command prompt explicitly requests them.
+
+**Monitor:**
+- `valid`: boolean — overall pass/fail
+- `summary`: string — brief description of findings
+- `verdict`: `'approved'` | `'needs_revision'` | `'rejected'` — requested explicitly (not in base schema, `additionalProperties: true`)
+- `issues[]`: array of `{severity, category, description, file_path, line_range, suggestion}`
+- `passed_checks[]`: array of strings — checks that passed
+- `failed_checks[]`: array of strings — checks that failed
+
+**Predictor:**
+- `risk_assessment`: `'low'` | `'medium'` | `'high'` | `'critical'`
+- `predicted_state.affected_components[]`: array of affected components/files
+- `predicted_state.breaking_changes[]`: array of `{type, description, mitigation}`
+- `predicted_state.required_updates[]`: array of required follow-up changes
+- `confidence.score`: float 0.0-1.0
+
+**Evaluator:**
+- `scores.functionality`: int 0-10
+- `scores.code_quality`: int 0-10
+- `scores.performance`: int 0-10
+- `scores.security`: int 0-10
+- `scores.testability`: int 0-10
+- `scores.completeness`: int 0-10
+- `overall_score`: float 1.0-10.0 (weighted)
+- `recommendation`: `'proceed'` | `'improve'` | `'reconsider'`
+- `strengths[]`: array of strings
+- `weaknesses[]`: array of strings
+- `next_steps[]`: array of strings
+
+## Review Section Protocol
+
+This protocol is used identically by all 4 review sections below. Do NOT deviate.
+
+1. **Present top N issues** (N=4 in BIG mode, N=1 in SMALL mode) from the primary source agent for this section, using the section prefix (e.g., ARCH-1, QUALITY-2, TESTS-1, PERF-3)
+2. **For each issue:**
+   - Describe the problem with `file:line` references where available
+   - Present 2-3 options with tradeoffs (pros/cons for each)
+   - **Recommended option is always listed first** (marked with "(Recommended)")
+3. **AskUserQuestion** with numbered issues and lettered options for each
+   - Example: "ARCH-1: Option A (Recommended) / Option B / Option C"
+   - **Skip AskUserQuestion in CI mode** — auto-select recommended options
+4. **Summarize decisions** from this section in 3-5 lines before proceeding to the next section
+   - Include: which issues were addressed, which options were chosen, what remains
 
-## Step 2: Get Current Changes
+## Step 0: Select Review Mode
+
+**Parse $ARGUMENTS for `--ci` or `--auto`:**
+- If `--ci` or `--auto` is present in $ARGUMENTS → set CI_MODE=true
+- CI_MODE skips all AskUserQuestion calls and auto-selects recommended options
+
+**If NOT CI_MODE:** Use AskUserQuestion to ask the user:
+
+> How thorough should this review be?
+> - **BIG** (Recommended): Up to 4 issues per section — comprehensive review
+> - **SMALL**: 1 issue per section — quick pass for small changes
+
+Default to BIG if user doesn't respond or in CI mode.
+
+## Phase A: Collection (Parallel)
+
+### Step A.1: Gather changes
 
 ```bash
-# Get staged and unstaged changes
 git diff HEAD
 git status
 ```
 
-## Step 3: Invoke All Review Agents in Parallel
+Save the diff output — it will be passed to all 3 agents.
+
+### Step A.2: Launch all parallel calls
+
+In **ONE message**, launch all 7 calls in parallel (no dependencies between them):
+
+**4 mem0 queries:**
+
+```
+mcp__mem0__map_tiered_search(query="architecture review patterns")
+mcp__mem0__map_tiered_search(query="code quality standards")
+mcp__mem0__map_tiered_search(query="test coverage criteria")
+mcp__mem0__map_tiered_search(query="performance review patterns")
+```
 
-**IMPORTANT**: Call Monitor, Predictor, and Evaluator simultaneously by invoking all three Task calls in a single message. These agents operate independently on the same git diff without shared state.
+**3 agent Task calls** (pass the git diff + Review Preferences to each):
 
 ```
 Task(
   subagent_type="monitor",
   description="Review code changes",
-  prompt="Review the following changes for code quality:
+  prompt="Review the following changes for code quality, security, and correctness.
+
+**Review Preferences:**
+[paste Review Preferences section above]
 
 **Changes:**
 [paste git diff output]
 
 **mem0 Context:**
-[paste relevant mem0 patterns]
+[paste relevant mem0 patterns from queries above — use architecture + code quality results]
 
 Check for:
 - Code correctness and logic errors
@@ -52,49 +138,59 @@ Check for:
 
 Output JSON with:
 - valid: boolean
-- issues: array of {severity, category, description, file_path, line_range, suggestion}
+- summary: string
 - verdict: 'approved' | 'needs_revision' | 'rejected'
-- summary: string"
+- issues: array of {severity, category, description, file_path, line_range, suggestion}
+- passed_checks: array of strings
+- failed_checks: array of strings"
 )
 
 Task(
   subagent_type="predictor",
   description="Analyze change impact",
-  prompt="Analyze the impact of these changes:
+  prompt="Analyze the impact of these changes on the broader codebase.
+
+**Review Preferences:**
+[paste Review Preferences section above]
 
 **Changes:**
 [paste git diff output]
 
 **mem0 Context:**
-[paste relevant mem0 patterns]
+[paste relevant mem0 patterns from queries above — use architecture results]
 
 Analyze:
-- Affected files and modules
+- Affected components and modules
 - Breaking changes (API, schema, behavior)
 - Dependencies that need updates
-- Risk assessment
+- Risk assessment (low/medium/high/critical)
 - Integration points affected
 
 Output JSON with:
-- affected_files: array of {path, change_type, impact_level}
-- breaking_changes: array of {type, description, mitigation}
-- dependencies_affected: array of strings
-- risk_level: 'low' | 'medium' | 'high'
-- recommendations: array of strings"
+- risk_assessment: 'low' | 'medium' | 'high' | 'critical'
+- predicted_state:
+    affected_components: array of affected files/modules
+    breaking_changes: array of {type, description, mitigation}
+    required_updates: array of strings
+- confidence:
+    score: float 0.0-1.0"
 )
 
 Task(
   subagent_type="evaluator",
   description="Score change quality",
-  prompt="Evaluate the overall quality of these changes:
+  prompt="Evaluate the overall quality of these changes.
+
+**Review Preferences:**
+[paste Review Preferences section above]
 
 **Changes:**
 [paste git diff output]
 
 **mem0 Context:**
-[paste relevant mem0 patterns]
+[paste relevant mem0 patterns from queries above — use code quality + test coverage results]
 
-Provide quality assessment using 1-10 scoring (matches evaluator agent template):
+Provide quality assessment using 1-10 scoring:
 - Functionality score (1-10)
 - Code quality score (1-10)
 - Performance score (1-10)
@@ -112,71 +208,126 @@ Output JSON with:
 )
 ```
 
-**How Parallel Execution Works:**
-1. Claude Code will process all three Task calls from the same message
-2. Each agent analyzes the git diff independently
-3. Wait for all three Task calls to complete before proceeding
-4. Collect results from Monitor, Predictor, and Evaluator outputs
+**Parallel execution:** All 7 calls (4 mem0 + 3 agents) MUST be issued in a single message. Wait for all to complete before proceeding.
 
-## Step 4: Aggregate and Present Results
+### Hard Stop Check
 
-Once all three agents have completed, combine their findings:
+After all agents complete, check Monitor output:
+- If `Monitor.valid = false` → **HARD STOP**. Present Monitor issues immediately and do not proceed to Phase B. The user must fix issues before re-running the review.
 
-### Review Summary
+## Phase B: Interactive Presentation (4 Sections)
 
-**Monitor Analysis:**
-- Verdict: [monitor.verdict]
-- Issues Found: [count by severity]
-- Valid: [monitor.valid]
+Present findings section by section. Each section follows the **Review Section Protocol** defined above.
 
-**Predictor Analysis:**
-- Risk Level: [predictor.risk_level]
-- Breaking Changes: [predictor.breaking_changes.length]
-- Affected Files: [predictor.affected_files.length]
+### Section 1: Architecture
 
-**Evaluator Assessment:**
-- Overall Score: [evaluator.overall_score]/10
-- Code Quality: [evaluator.scores.code_quality]/10
-- Security: [evaluator.scores.security]/10
-- Recommendation: [evaluator.recommendation]
+**Primary source:** Predictor (`breaking_changes`, `affected_components`, `risk_assessment`)
+**Cross-reference:** Evaluator `scores.completeness`
+**Issue prefix:** `ARCH`
 
-### Critical Issues (High Severity)
+Focus on:
+- Breaking changes and their mitigations
+- Affected component blast radius
+- Architectural fit of the changes
+- Completeness of the change set (are all affected areas updated?)
 
-[List high-severity issues from Monitor]
+→ Follow **Review Section Protocol**
+→ Summarize decisions before proceeding to Section 2
 
-### Breaking Changes
+### Section 2: Code Quality
 
-[List breaking changes from Predictor]
+**Primary source:** Monitor (`issues` filtered by category: correctness, code-quality, maintainability)
+**Cross-reference:** Evaluator `scores.code_quality`
+**Issue prefix:** `QUALITY`
 
-### Recommendations
+Focus on:
+- Correctness issues (logic errors, edge cases)
+- Code quality issues (naming, structure, DRY violations)
+- Maintainability concerns
+- Standards compliance
 
-**From Monitor:**
-[List Monitor suggestions for critical issues]
+→ Follow **Review Section Protocol**
+→ Summarize decisions before proceeding to Section 3
 
-**From Predictor:**
-[List Predictor recommendations]
+### Section 3: Tests
 
-**From Evaluator:**
-[List Evaluator improvements needed]
+**Primary source:** Monitor (`issues` filtered by category: testability, test-coverage)
+**Cross-reference:** Evaluator `scores.testability`
+**Issue prefix:** `TESTS`
 
-### Final Verdict
+Focus on:
+- Missing test coverage for new/changed code
+- Test quality (edge cases, error paths)
+- Testability of the implementation (dependency injection, mocking seams)
 
-Based on combined analysis:
-- **Proceed if:** Monitor verdict = 'approved' AND Evaluator recommendation = 'proceed'
-- **Revise if:** Monitor verdict = 'needs_revision' OR Evaluator recommendation = 'improve'
-- **Block if:** Monitor verdict = 'rejected' OR Evaluator recommendation = 'reconsider' OR (Predictor risk_level = 'high' AND breaking_changes.length > 0)
+→ Follow **Review Section Protocol**
+→ Summarize decisions before proceeding to Section 4
 
----
+### Section 4: Performance
+
+**Primary source:** Monitor (`issues` filtered by category: performance)
+**Cross-reference:** Evaluator `scores.performance` + Predictor `risk_assessment`
+**Issue prefix:** `PERF`
+
+Focus on:
+- Performance regressions
+- Algorithmic complexity concerns
+- Resource usage (memory, CPU, I/O)
+- Only flag issues where measurable impact is likely (per Review Preferences)
+
+→ Follow **Review Section Protocol**
+
+## Final Verdict
 
-## 💡 Optional: Preserve Review Learnings
+Based on combined agent outputs, determine one of:
+
+**PROCEED:** All conditions met:
+- `Monitor.verdict = 'approved'` AND `Monitor.valid = true`
+- `Evaluator.recommendation = 'proceed'`
+
+**REVISE:** Any condition true:
+- `Monitor.verdict = 'needs_revision'`
+- `Evaluator.recommendation = 'improve'`
+
+**BLOCK:** Any condition true (highest priority):
+- `Monitor.verdict = 'rejected'`
+- `Evaluator.recommendation = 'reconsider'`
+- `Evaluator.scores.security < 5`
+- `Evaluator.scores.functionality < 5`
+- `Predictor.risk_assessment = 'high'` AND `predicted_state.breaking_changes` is non-empty
+
+**Priority:** BLOCK > REVISE > PROCEED
+
+Present the verdict with a summary table:
+- Monitor verdict + valid status
+- Evaluator overall score + recommendation
+- Predictor risk assessment
+- Key issues resolved during interactive review
+- Remaining action items
+
+## CI/Auto Mode Behavior
+
+When `CI_MODE = true` (triggered by `--ci` or `--auto` in $ARGUMENTS):
+- Skip all AskUserQuestion calls
+- Auto-select BIG mode (4 issues per section)
+- Auto-select recommended options for all issues
+- Present all 4 sections as a batch report (no pauses between sections)
+- Output structured verdict at the end
+- Suitable for CI pipelines and automated review contexts
+
+## Optional: Preserve Review Learnings
 
 If the review revealed valuable patterns or common issues worth preserving:
 
 ```
-/map-learn [review summary with issues found and resolution patterns]
+/map-learn [review summary with key findings, resolution patterns, and verdict rationale]
 ```
 
-## MCP Tools Available
+## MCP Tools Used
+
+- `mcp__mem0__map_tiered_search` — Search past review patterns (4 targeted queries)
+- `mcp__sequential-thinking__sequentialthinking` — Complex analysis decisions during interactive presentation
+
+---
 
-- `mcp__mem0__map_tiered_search` - Search past review patterns
-- `mcp__sequential-thinking__sequentialthinking` - Complex analysis decisions
+**Begin review now.**
diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md
index e347a1b..a6529dc 100644
--- a/docs/ARCHITECTURE.md
+++ b/docs/ARCHITECTURE.md
@@ -68,12 +68,13 @@ MAP Framework implements cognitive architecture inspired by prefrontal cortex fu
 │  │ Uses Claude Opus for cross-evaluation and synthesis      │   │
 │  └──────────────────────────────────────────────────────────┘   │
 │                                                                  │
-│  /map-review (parallel analysis):                               │
+│  /map-review (interactive 4-section):                            │
 │  ┌──────────────────────────────────────────────────────────┐   │
-│  │ Query mem0 patterns → Get git diff                       │   │
-│  │ → [Monitor + Predictor + Evaluator] (all 3 in parallel)  │   │
-│  │ → Aggregate results → Final verdict                      │   │
-│  │ No TaskDecomposer. Reviews current branch changes        │   │
+│  │ 4× mem0 queries + git diff                               │   │
+│  │ → [Monitor + Predictor + Evaluator] (all 3 parallel)     │   │
+│  │ → Interactive: Architecture → Quality → Tests → Perf     │   │
+│  │ → Verdict: PROCEED / REVISE / BLOCK                      │   │
+│  │ --ci mode: batch report, no interaction                   │   │
 │  └──────────────────────────────────────────────────────────┘   │
 │                                                                  │
 │  /map-fast (⚠️ minimal, low-risk only):                        │
@@ -549,20 +550,29 @@ for subtask in subtasks:
 - Root cause analysis
 - Regression debugging
 
-#### 5. `/map-review` - Code Review Workflow (3 Agents)
+#### 5. `/map-review` - Interactive Code Review (3 Agents)
 
-**Agent Sequence:** mem0 pattern query → git diff → [Monitor + Predictor + Evaluator] (parallel) → Aggregate verdict
+**Agent Sequence:** 4× mem0 targeted queries + git diff → [Monitor + Predictor + Evaluator] (all 3 parallel) → Interactive 4-section presentation → Verdict
 
 **Review-Specific Features:**
 
 1. **No TaskDecomposer** - Reviews current branch changes as-is
-2. **Parallel Agent Execution** - All 3 agents run simultaneously on same diff
-3. **Aggregated Verdict Logic:**
-   - Proceed if: Monitor approved AND Evaluator excellent/good/acceptable
-   - Revise if: Monitor needs_revision OR Evaluator needs_work
-   - Block if: Monitor rejected OR Evaluator reject OR (Predictor high risk + breaking changes)
-
-**Token Usage:** 50-60% of baseline (single diff, no implementation)
+2. **Parallel Collection** - 4 mem0 queries + 3 agents launched in a single message (7 parallel calls)
+3. **Interactive 4-Section Presentation:**
+   - **Architecture** (primary: Predictor — breaking changes, affected components)
+   - **Code Quality** (primary: Monitor — correctness, maintainability issues)
+   - **Tests** (primary: Monitor — testability, coverage gaps)
+   - **Performance** (primary: Monitor — performance issues, cross-ref Predictor risk)
+4. **Review Section Protocol** — each section presents top N issues (BIG=4, SMALL=1) with options and tradeoffs, user picks resolution via AskUserQuestion
+5. **BIG/SMALL mode** — user selects review depth at start
+6. **CI/Auto mode** (`--ci`/`--auto` flag) — batch report with no interaction, auto-selects recommended options
+7. **Verdict Logic:**
+   - PROCEED: Monitor approved + valid AND Evaluator proceed
+   - REVISE: Monitor needs_revision OR Evaluator improve
+   - BLOCK: Monitor rejected OR Evaluator reconsider OR security/functionality < 5 OR (Predictor high risk + breaking changes)
+   - Priority: BLOCK > REVISE > PROCEED
+
+**Token Usage:** ~15-25K tokens (4 mem0 queries + parallel agents + interactive 4-section presentation; `--ci` mode ~12-15K)
 **Learning:** Optional via `/map-learn`
 **Quality Gates:** All 3 review agents
 
@@ -570,6 +580,7 @@ for subtask in subtasks:
 - Pre-commit code review
 - PR review automation
 - Quality gate before merge
+- CI pipeline integration (`--ci` mode)
 
 #### 6. `/map-release` - Release Workflow (No Agents)
 
@@ -631,7 +642,7 @@ Typical token consumption per subtask (estimated):
 - /map-efficient (Self-MoA): ~25-30K tokens (3× Actor + Synthesizer)
 - /map-fast: ~8-10K tokens (minimal, no learning)
 - /map-debug: ~15-20K tokens (full pipeline with Evaluator)
-- /map-review: ~8-10K tokens (parallel agents on single diff)
+- /map-review: ~15-25K tokens (4 mem0 queries + parallel agents + interactive 4-section presentation; --ci mode ~12-15K)
 - /map-debate: ~30-40K tokens (3× Actor + Opus DebateArbiter)
 
 **For 5-subtask workflow:**
diff --git a/src/mapify_cli/templates/commands/map-review.md b/src/mapify_cli/templates/commands/map-review.md
index b295835..ea8623b 100644
--- a/src/mapify_cli/templates/commands/map-review.md
+++ b/src/mapify_cli/templates/commands/map-review.md
@@ -1,47 +1,133 @@
 ---
-description: Comprehensive MAP review of changes using Monitor, Predictor, and Evaluator agents
+description: Interactive 4-section code review using Monitor, Predictor, and Evaluator agents
 ---
 
 # MAP Review Workflow
 
-Review current changes using MAP agents for comprehensive quality analysis.
+Interactive, structured code review of current changes using Monitor, Predictor, and Evaluator agents.
 
-## What This Command Does
+**Task:** $ARGUMENTS
 
-1. **Monitor** - Validates code correctness, security, standards compliance
-2. **Predictor** - Analyzes impact on codebase, breaking changes, dependencies
-3. **Evaluator** - Scores overall quality and provides actionable feedback
+## Execution Rules
 
-## Step 1: Query mem0 for Review Patterns
+1. Execute all steps in the order listed below
+2. Use the exact `subagent_type` values specified (monitor, predictor, evaluator)
+3. All 3 agents are required — do not skip any
+4. **Monitor `valid=false` is a hard stop** — report issues immediately, do not proceed to interactive presentation
+5. Wait for all parallel calls to complete before proceeding to the next phase
 
-```bash
-# Get review best practices
-REVIEW_PATTERNS=$(mcp__mem0__map_tiered_search(query="code review", limit=5))
-```
+## Review Preferences (Customize per project)
+
+These defaults guide how agents weigh findings. Override by editing this section:
+
+- **DRY:** Important — flag duplication aggressively
+- **Testing:** Non-negotiable — missing tests = high severity
+- **Engineering level:** "Engineered enough" — reject both under-engineering and over-engineering
+- **Edge cases:** Prefer handling more, especially for public APIs
+- **Clarity:** Explicit over clever — readable code wins
+- **Performance:** Flag only when measurable impact is likely
+
+## Expected Agent Output Schemas (Contract Reference)
+
+These are the fields each agent is expected to return. The command prompt explicitly requests them.
+
+**Monitor:**
+- `valid`: boolean — overall pass/fail
+- `summary`: string — brief description of findings
+- `verdict`: `'approved'` | `'needs_revision'` | `'rejected'` — requested explicitly (not in base schema, `additionalProperties: true`)
+- `issues[]`: array of `{severity, category, description, file_path, line_range, suggestion}`
+- `passed_checks[]`: array of strings — checks that passed
+- `failed_checks[]`: array of strings — checks that failed
+
+**Predictor:**
+- `risk_assessment`: `'low'` | `'medium'` | `'high'` | `'critical'`
+- `predicted_state.affected_components[]`: array of affected components/files
+- `predicted_state.breaking_changes[]`: array of `{type, description, mitigation}`
+- `predicted_state.required_updates[]`: array of required follow-up changes
+- `confidence.score`: float 0.0-1.0
+
+**Evaluator:**
+- `scores.functionality`: int 0-10
+- `scores.code_quality`: int 0-10
+- `scores.performance`: int 0-10
+- `scores.security`: int 0-10
+- `scores.testability`: int 0-10
+- `scores.completeness`: int 0-10
+- `overall_score`: float 1.0-10.0 (weighted)
+- `recommendation`: `'proceed'` | `'improve'` | `'reconsider'`
+- `strengths[]`: array of strings
+- `weaknesses[]`: array of strings
+- `next_steps[]`: array of strings
+
+## Review Section Protocol
+
+This protocol is used identically by all 4 review sections below. Do NOT deviate.
+
+1. **Present top N issues** (N=4 in BIG mode, N=1 in SMALL mode) from the primary source agent for this section, using the section prefix (e.g., ARCH-1, QUALITY-2, TESTS-1, PERF-3)
+2. **For each issue:**
+   - Describe the problem with `file:line` references where available
+   - Present 2-3 options with tradeoffs (pros/cons for each)
+   - **Recommended option is always listed first** (marked with "(Recommended)")
+3. **AskUserQuestion** with numbered issues and lettered options for each
+   - Example: "ARCH-1: Option A (Recommended) / Option B / Option C"
+   - **Skip AskUserQuestion in CI mode** — auto-select recommended options
+4. **Summarize decisions** from this section in 3-5 lines before proceeding to the next section
+   - Include: which issues were addressed, which options were chosen, what remains
 
-## Step 2: Get Current Changes
+## Step 0: Select Review Mode
+
+**Parse $ARGUMENTS for `--ci` or `--auto`:**
+- If `--ci` or `--auto` is present in $ARGUMENTS → set CI_MODE=true
+- CI_MODE skips all AskUserQuestion calls and auto-selects recommended options
+
+**If NOT CI_MODE:** Use AskUserQuestion to ask the user:
+
+> How thorough should this review be?
+> - **BIG** (Recommended): Up to 4 issues per section — comprehensive review
+> - **SMALL**: 1 issue per section — quick pass for small changes
+
+Default to BIG if user doesn't respond or in CI mode.
+
+## Phase A: Collection (Parallel)
+
+### Step A.1: Gather changes
 
 ```bash
-# Get staged and unstaged changes
 git diff HEAD
 git status
 ```
 
-## Step 3: Invoke All Review Agents in Parallel
+Save the diff output — it will be passed to all 3 agents.
+
+### Step A.2: Launch all parallel calls
+
+In **ONE message**, launch all 7 calls in parallel (no dependencies between them):
+
+**4 mem0 queries:**
+
+```
+mcp__mem0__map_tiered_search(query="architecture review patterns")
+mcp__mem0__map_tiered_search(query="code quality standards")
+mcp__mem0__map_tiered_search(query="test coverage criteria")
+mcp__mem0__map_tiered_search(query="performance review patterns")
+```
 
-**IMPORTANT**: Call Monitor, Predictor, and Evaluator simultaneously by invoking all three Task calls in a single message. These agents operate independently on the same git diff without shared state.
+**3 agent Task calls** (pass the git diff + Review Preferences to each):
 
 ```
 Task(
   subagent_type="monitor",
   description="Review code changes",
-  prompt="Review the following changes for code quality:
+  prompt="Review the following changes for code quality, security, and correctness.
+
+**Review Preferences:**
+[paste Review Preferences section above]
 
 **Changes:**
 [paste git diff output]
 
 **mem0 Context:**
-[paste relevant mem0 patterns]
+[paste relevant mem0 patterns from queries above — use architecture + code quality results]
 
 Check for:
 - Code correctness and logic errors
@@ -52,49 +138,59 @@ Check for:
 
 Output JSON with:
 - valid: boolean
-- issues: array of {severity, category, description, file_path, line_range, suggestion}
+- summary: string
 - verdict: 'approved' | 'needs_revision' | 'rejected'
-- summary: string"
+- issues: array of {severity, category, description, file_path, line_range, suggestion}
+- passed_checks: array of strings
+- failed_checks: array of strings"
 )
 
 Task(
   subagent_type="predictor",
   description="Analyze change impact",
-  prompt="Analyze the impact of these changes:
+  prompt="Analyze the impact of these changes on the broader codebase.
+
+**Review Preferences:**
+[paste Review Preferences section above]
 
 **Changes:**
 [paste git diff output]
 
 **mem0 Context:**
-[paste relevant mem0 patterns]
+[paste relevant mem0 patterns from queries above — use architecture results]
 
 Analyze:
-- Affected files and modules
+- Affected components and modules
 - Breaking changes (API, schema, behavior)
 - Dependencies that need updates
-- Risk assessment
+- Risk assessment (low/medium/high/critical)
 - Integration points affected
 
 Output JSON with:
-- affected_files: array of {path, change_type, impact_level}
-- breaking_changes: array of {type, description, mitigation}
-- dependencies_affected: array of strings
-- risk_level: 'low' | 'medium' | 'high'
-- recommendations: array of strings"
+- risk_assessment: 'low' | 'medium' | 'high' | 'critical'
+- predicted_state:
+    affected_components: array of affected files/modules
+    breaking_changes: array of {type, description, mitigation}
+    required_updates: array of strings
+- confidence:
+    score: float 0.0-1.0"
 )
 
 Task(
   subagent_type="evaluator",
   description="Score change quality",
-  prompt="Evaluate the overall quality of these changes:
+  prompt="Evaluate the overall quality of these changes.
+
+**Review Preferences:**
+[paste Review Preferences section above]
 
 **Changes:**
 [paste git diff output]
 
 **mem0 Context:**
-[paste relevant mem0 patterns]
+[paste relevant mem0 patterns from queries above — use code quality + test coverage results]
 
-Provide quality assessment using 1-10 scoring (matches evaluator agent template):
+Provide quality assessment using 1-10 scoring:
 - Functionality score (1-10)
 - Code quality score (1-10)
 - Performance score (1-10)
@@ -112,71 +208,126 @@ Output JSON with:
 )
 ```
 
-**How Parallel Execution Works:**
-1. Claude Code will process all three Task calls from the same message
-2. Each agent analyzes the git diff independently
-3. Wait for all three Task calls to complete before proceeding
-4. Collect results from Monitor, Predictor, and Evaluator outputs
+**Parallel execution:** All 7 calls (4 mem0 + 3 agents) MUST be issued in a single message. Wait for all to complete before proceeding.
 
-## Step 4: Aggregate and Present Results
+### Hard Stop Check
 
-Once all three agents have completed, combine their findings:
+After all agents complete, check Monitor output:
+- If `Monitor.valid = false` → **HARD STOP**. Present Monitor issues immediately and do not proceed to Phase B. The user must fix issues before re-running the review.
 
-### Review Summary
+## Phase B: Interactive Presentation (4 Sections)
 
-**Monitor Analysis:**
-- Verdict: [monitor.verdict]
-- Issues Found: [count by severity]
-- Valid: [monitor.valid]
+Present findings section by section. Each section follows the **Review Section Protocol** defined above.
 
-**Predictor Analysis:**
-- Risk Level: [predictor.risk_level]
-- Breaking Changes: [predictor.breaking_changes.length]
-- Affected Files: [predictor.affected_files.length]
+### Section 1: Architecture
 
-**Evaluator Assessment:**
-- Overall Score: [evaluator.overall_score]/10
-- Code Quality: [evaluator.scores.code_quality]/10
-- Security: [evaluator.scores.security]/10
-- Recommendation: [evaluator.recommendation]
+**Primary source:** Predictor (`breaking_changes`, `affected_components`, `risk_assessment`)
+**Cross-reference:** Evaluator `scores.completeness`
+**Issue prefix:** `ARCH`
 
-### Critical Issues (High Severity)
+Focus on:
+- Breaking changes and their mitigations
+- Affected component blast radius
+- Architectural fit of the changes
+- Completeness of the change set (are all affected areas updated?)
 
-[List high-severity issues from Monitor]
+→ Follow **Review Section Protocol**
+→ Summarize decisions before proceeding to Section 2
 
-### Breaking Changes
+### Section 2: Code Quality
 
-[List breaking changes from Predictor]
+**Primary source:** Monitor (`issues` filtered by category: correctness, code-quality, maintainability)
+**Cross-reference:** Evaluator `scores.code_quality`
+**Issue prefix:** `QUALITY`
 
-### Recommendations
+Focus on:
+- Correctness issues (logic errors, edge cases)
+- Code quality issues (naming, structure, DRY violations)
+- Maintainability concerns
+- Standards compliance
 
-**From Monitor:**
-[List Monitor suggestions for critical issues]
+→ Follow **Review Section Protocol**
+→ Summarize decisions before proceeding to Section 3
 
-**From Predictor:**
-[List Predictor recommendations]
+### Section 3: Tests
 
-**From Evaluator:**
-[List Evaluator improvements needed]
+**Primary source:** Monitor (`issues` filtered by category: testability, test-coverage)
+**Cross-reference:** Evaluator `scores.testability`
+**Issue prefix:** `TESTS`
 
-### Final Verdict
+Focus on:
+- Missing test coverage for new/changed code
+- Test quality (edge cases, error paths)
+- Testability of the implementation (dependency injection, mocking seams)
 
-Based on combined analysis:
-- **Proceed if:** Monitor verdict = 'approved' AND Evaluator recommendation = 'proceed'
-- **Revise if:** Monitor verdict = 'needs_revision' OR Evaluator recommendation = 'improve'
-- **Block if:** Monitor verdict = 'rejected' OR Evaluator recommendation = 'reconsider' OR (Predictor risk_level = 'high' AND breaking_changes.length > 0)
+→ Follow **Review Section Protocol**
+→ Summarize decisions before proceeding to Section 4
 
----
+### Section 4: Performance
+
+**Primary source:** Monitor (`issues` filtered by category: performance)
+**Cross-reference:** Evaluator `scores.performance` + Predictor `risk_assessment`
+**Issue prefix:** `PERF`
+
+Focus on:
+- Performance regressions
+- Algorithmic complexity concerns
+- Resource usage (memory, CPU, I/O)
+- Only flag issues where measurable impact is likely (per Review Preferences)
+
+→ Follow **Review Section Protocol**
+
+## Final Verdict
 
-## 💡 Optional: Preserve Review Learnings
+Based on combined agent outputs, determine one of:
+
+**PROCEED:** All conditions met:
+- `Monitor.verdict = 'approved'` AND `Monitor.valid = true`
+- `Evaluator.recommendation = 'proceed'`
+
+**REVISE:** Any condition true:
+- `Monitor.verdict = 'needs_revision'`
+- `Evaluator.recommendation = 'improve'`
+
+**BLOCK:** Any condition true (highest priority):
+- `Monitor.verdict = 'rejected'`
+- `Evaluator.recommendation = 'reconsider'`
+- `Evaluator.scores.security < 5`
+- `Evaluator.scores.functionality < 5`
+- `Predictor.risk_assessment = 'high'` AND `predicted_state.breaking_changes` is non-empty
+
+**Priority:** BLOCK > REVISE > PROCEED
+
+Present the verdict with a summary table:
+- Monitor verdict + valid status
+- Evaluator overall score + recommendation
+- Predictor risk assessment
+- Key issues resolved during interactive review
+- Remaining action items
+
+## CI/Auto Mode Behavior
+
+When `CI_MODE = true` (triggered by `--ci` or `--auto` in $ARGUMENTS):
+- Skip all AskUserQuestion calls
+- Auto-select BIG mode (4 issues per section)
+- Auto-select recommended options for all issues
+- Present all 4 sections as a batch report (no pauses between sections)
+- Output structured verdict at the end
+- Suitable for CI pipelines and automated review contexts
+
+## Optional: Preserve Review Learnings
 
 If the review revealed valuable patterns or common issues worth preserving:
 
 ```
-/map-learn [review summary with issues found and resolution patterns]
+/map-learn [review summary with key findings, resolution patterns, and verdict rationale]
 ```
 
-## MCP Tools Available
+## MCP Tools Used
+
+- `mcp__mem0__map_tiered_search` — Search past review patterns (4 targeted queries)
+- `mcp__sequential-thinking__sequentialthinking` — Complex analysis decisions during interactive presentation
+
+---
 
-- `mcp__mem0__map_tiered_search` - Search past review patterns
-- `mcp__sequential-thinking__sequentialthinking` - Complex analysis decisions
+**Begin review now.**
diff --git a/tests/test_command_templates.py b/tests/test_command_templates.py
index 39cb9f4..82dc329 100644
--- a/tests/test_command_templates.py
+++ b/tests/test_command_templates.py
@@ -165,3 +165,215 @@ def test_map_efficient_is_token_efficient(self, templates_commands_dir):
         assert (
             "token-efficient" in content.lower() or "efficient" in content.lower()
         ), "Should describe itself as efficient"
+
+
+class TestMapReviewStructure:
+    """Test structural properties of the map-review command template."""
+
+    @pytest.fixture
+    def project_root(self):
+        return Path(__file__).parent.parent
+
+    @pytest.fixture
+    def templates_commands_dir(self, project_root):
+        return project_root / "src" / "mapify_cli" / "templates" / "commands"
+
+    @pytest.fixture
+    def review_content(self, templates_commands_dir):
+        return (templates_commands_dir / "map-review.md").read_text()
+
+    def test_has_frontmatter(self, review_content):
+        """map-review.md starts with YAML frontmatter containing description."""
+        assert review_content.startswith("---")
+        assert "description:" in review_content[:200]
+
+    def test_has_arguments_placeholder(self, review_content):
+        """Command references $ARGUMENTS for user input."""
+        assert "$ARGUMENTS" in review_content
+
+    def test_has_review_section_protocol(self, review_content):
+        """Review Section Protocol is defined once and referenced by sections."""
+        assert "Review Section Protocol" in review_content
+
+    def test_has_four_section_headings(self, review_content):
+        """All 4 review sections are present."""
+        assert "Section 1: Architecture" in review_content
+        assert "Section 2: Code Quality" in review_content
+        assert "Section 3: Tests" in review_content
+        assert "Section 4: Performance" in review_content
+
+    @pytest.mark.parametrize("prefix", ["ARCH", "QUALITY", "TESTS", "PERF"])
+    def test_section_prefixes_present(self, review_content, prefix):
+        """Each section defines its issue prefix."""
+        assert prefix in review_content
+
+    @pytest.mark.parametrize(
+        "section,source",
+        [
+            ("Section 1: Architecture", "Predictor"),
+            ("Section 2: Code Quality", "Monitor"),
+            ("Section 3: Tests", "Monitor"),
+            ("Section 4: Performance", "Monitor"),
+        ],
+    )
+    def test_primary_source_mapping(self, review_content, section, source):
+        """Each section references its primary source agent."""
+        # Find the section and check the source is mentioned nearby
+        idx = review_content.index(section)
+        section_block = review_content[idx : idx + 500]
+        assert source in section_block, (
+            f"{section} should reference {source} as primary source"
+        )
+
+    def test_three_agent_task_calls(self, review_content):
+        """Command includes Task calls for all 3 agents."""
+        assert 'subagent_type="monitor"' in review_content
+        assert 'subagent_type="predictor"' in review_content
+        assert 'subagent_type="evaluator"' in review_content
+
+    def test_four_mem0_queries(self, review_content):
+        """Command includes at least 4 mem0 tiered search queries."""
+        count = review_content.count("map_tiered_search")
+        assert count >= 4, f"Expected at least 4 mem0 queries, found {count}"
+
+    def test_ci_mode_flag(self, review_content):
+        """Command documents --ci flag for CI mode."""
+        assert "--ci" in review_content
+
+    def test_ask_user_question_mentioned(self, review_content):
+        """Command uses AskUserQuestion for interactive presentation."""
+        assert "AskUserQuestion" in review_content
+
+    def test_review_preferences_section(self, review_content):
+        """Command includes Review Preferences section."""
+        assert "Review Preferences" in review_content
+
+    def test_schema_documentation(self, review_content):
+        """Command documents expected agent output schemas."""
+        assert "Expected Agent Output Schemas" in review_content
+
+    def test_map_learn_suggestion(self, review_content):
+        """Command suggests /map-learn for preserving review learnings."""
+        assert "/map-learn" in review_content
+
+    def test_parallel_execution_instruction(self, review_content):
+        """Command instructs parallel execution of agents."""
+        content_lower = review_content.lower()
+        assert "parallel" in content_lower
+
+    def test_previous_section_summary(self, review_content):
+        """Command instructs summarizing decisions before next section."""
+        assert "Summarize decisions" in review_content or "summarize" in review_content.lower()
+
+
+class TestMapReviewVerdictLogic:
+    """Test verdict logic conditions in map-review command."""
+
+    @pytest.fixture
+    def project_root(self):
+        return Path(__file__).parent.parent
+
+    @pytest.fixture
+    def templates_commands_dir(self, project_root):
+        return project_root / "src" / "mapify_cli" / "templates" / "commands"
+
+    @pytest.fixture
+    def review_content(self, templates_commands_dir):
+        return (templates_commands_dir / "map-review.md").read_text()
+
+    def test_proceed_conditions(self, review_content):
+        """PROCEED requires Monitor approved + Evaluator proceed."""
+        assert "PROCEED" in review_content
+        assert "Monitor.verdict" in review_content
+        assert "'approved'" in review_content
+        assert "Evaluator.recommendation" in review_content
+        assert "'proceed'" in review_content
+
+    def test_revise_conditions(self, review_content):
+        """REVISE triggered by needs_revision or improve."""
+        assert "REVISE" in review_content
+        assert "'needs_revision'" in review_content
+        assert "'improve'" in review_content
+
+    def test_block_conditions(self, review_content):
+        """BLOCK triggered by rejected, reconsider, or critical thresholds."""
+        assert "BLOCK" in review_content
+        assert "'rejected'" in review_content
+        assert "'reconsider'" in review_content
+
+    def test_block_security_threshold(self, review_content):
+        """BLOCK includes security score < 5 condition."""
+        assert "security < 5" in review_content or "security<5" in review_content
+
+    def test_block_functionality_threshold(self, review_content):
+        """BLOCK includes functionality score < 5 condition."""
+        assert (
+            "functionality < 5" in review_content
+            or "functionality<5" in review_content
+        )
+
+    def test_block_predictor_risk(self, review_content):
+        """BLOCK includes high risk + breaking changes condition."""
+        assert "risk_assessment" in review_content
+        assert "breaking_changes" in review_content
+
+    def test_priority_ordering(self, review_content):
+        """Verdict priority is BLOCK > REVISE > PROCEED."""
+        assert "BLOCK > REVISE > PROCEED" in review_content
+
+    def test_references_monitor_valid(self, review_content):
+        """Verdict references Monitor.valid field."""
+        assert "Monitor.valid" in review_content
+
+    def test_references_evaluator_overall_score(self, review_content):
+        """Verdict section references Evaluator overall_score."""
+        assert "overall_score" in review_content
+
+    def test_references_predictor_risk_assessment(self, review_content):
+        """Verdict references Predictor risk_assessment."""
+        assert "Predictor.risk_assessment" in review_content or (
+            "Predictor" in review_content and "risk_assessment" in review_content
+        )
+
+
+class TestAgentSchemaFieldsPresent:
+    """Test that agent template files contain expected schema fields."""
+
+    @pytest.fixture
+    def project_root(self):
+        return Path(__file__).parent.parent
+
+    @pytest.fixture
+    def claude_agents_dir(self, project_root):
+        """Use .claude/agents/ (development source) for field verification."""
+        return project_root / ".claude" / "agents"
+
+    def test_monitor_has_valid_field(self, claude_agents_dir):
+        """Monitor agent template contains 'valid' field."""
+        content = (claude_agents_dir / "monitor.md").read_text()
+        assert "valid" in content
+
+    def test_monitor_has_issues_field(self, claude_agents_dir):
+        """Monitor agent template contains 'issues' field."""
+        content = (claude_agents_dir / "monitor.md").read_text()
+        assert "issues" in content
+
+    def test_predictor_has_risk_assessment(self, claude_agents_dir):
+        """Predictor agent template contains 'risk_assessment' field."""
+        content = (claude_agents_dir / "predictor.md").read_text()
+        assert "risk_assessment" in content
+
+    def test_predictor_has_breaking_changes(self, claude_agents_dir):
+        """Predictor agent template contains 'breaking_changes' field."""
+        content = (claude_agents_dir / "predictor.md").read_text()
+        assert "breaking_changes" in content
+
+    def test_evaluator_has_scores(self, claude_agents_dir):
+        """Evaluator agent template contains 'scores' field."""
+        content = (claude_agents_dir / "evaluator.md").read_text()
+        assert "scores" in content
+
+    def test_evaluator_has_recommendation(self, claude_agents_dir):
+        """Evaluator agent template contains 'recommendation' field."""
+        content = (claude_agents_dir / "evaluator.md").read_text()
+        assert "recommendation" in content
diff --git a/tests/test_template_sync.py b/tests/test_template_sync.py
index f58c0d1..a5205fa 100644
--- a/tests/test_template_sync.py
+++ b/tests/test_template_sync.py
@@ -170,3 +170,85 @@ def test_agent_frontmatter_no_deleted_changelog(
                         f"{agent} has 'changelog:' in frontmatter pointing to deleted file. "
                         f"Remove the changelog field from the frontmatter."
                     )
+
+
+class TestCommandTemplateSynchronization:
+    """Test that command templates are synchronized between .claude/commands/ and templates/commands/."""
+
+    @pytest.fixture
+    def project_root(self):
+        return Path(__file__).parent.parent
+
+    @pytest.fixture
+    def claude_commands_dir(self, project_root):
+        return project_root / ".claude" / "commands"
+
+    @pytest.fixture
+    def templates_commands_dir(self, project_root):
+        return project_root / "src" / "mapify_cli" / "templates" / "commands"
+
+    @pytest.mark.parametrize(
+        "command",
+        [
+            "map-check.md",
+            "map-debate.md",
+            "map-debug.md",
+            "map-efficient.md",
+            "map-fast.md",
+            "map-learn.md",
+            "map-plan.md",
+            "map-release.md",
+            "map-resume.md",
+            "map-review.md",
+        ],
+    )
+    def test_command_content_matches(
+        self, claude_commands_dir, templates_commands_dir, command
+    ):
+        """Test that command file content is identical between directories."""
+        claude_file = claude_commands_dir / command
+        template_file = templates_commands_dir / command
+
+        if not claude_file.exists() or not template_file.exists():
+            pytest.skip(f"{command} doesn't exist in both directories")
+
+        assert filecmp.cmp(claude_file, template_file, shallow=False), (
+            f"{command} content differs between .claude/commands/ and templates/commands/. "
+            f"Run: make sync-templates"
+        )
+
+    def test_command_file_count_matches(
+        self, claude_commands_dir, templates_commands_dir
+    ):
+        """Test that both directories have the same number of command .md files."""
+        if not claude_commands_dir.exists() or not templates_commands_dir.exists():
+            pytest.skip("One or both directories don't exist")
+
+        claude_count = len(list(claude_commands_dir.glob("map-*.md")))
+        template_count = len(list(templates_commands_dir.glob("map-*.md")))
+
+        assert claude_count == template_count, (
+            f"Command file count mismatch: .claude/commands/ has {claude_count} map-*.md files, "
+            f"templates/commands/ has {template_count} map-*.md files. "
+            f"Run: make sync-templates"
+        )
+
+    def test_no_orphaned_command_templates(
+        self, claude_commands_dir, templates_commands_dir
+    ):
+        """Test that templates/commands/ doesn't have map-* files missing from .claude/commands/."""
+        if not templates_commands_dir.exists():
+            pytest.skip("Templates directory doesn't exist")
+
+        claude_files = (
+            {f.name for f in claude_commands_dir.glob("map-*.md")}
+            if claude_commands_dir.exists()
+            else set()
+        )
+        template_files = {f.name for f in templates_commands_dir.glob("map-*.md")}
+
+        orphaned = template_files - claude_files
+        assert not orphaned, (
+            f"Orphaned command files in templates/commands/ not in .claude/commands/: {orphaned}. "
+            f"Run: make sync-templates"
+        )

From a39792d71189fef9f430e8940a90a4b6044ce748 Mon Sep 17 00:00:00 2001
From: "Mikhail [azalio] Petrov" <azalio@azalio.net>
Date: Tue, 17 Feb 2026 17:03:47 +0300
Subject: [PATCH 7/7] Allow map artifact updates through workflow gate

---
 .claude/hooks/workflow-gate.py                | 71 ++++++++++++++++++-
 .../templates/hooks/workflow-gate.py          | 71 ++++++++++++++++++-
 2 files changed, 138 insertions(+), 4 deletions(-)

diff --git a/.claude/hooks/workflow-gate.py b/.claude/hooks/workflow-gate.py
index b0e67f0..a74be23 100755
--- a/.claude/hooks/workflow-gate.py
+++ b/.claude/hooks/workflow-gate.py
@@ -10,9 +10,10 @@
   No manual invocation needed - Claude Code handles hook execution.
 
 ENFORCEMENT RULES:
-  - Blocks Edit/Write/MultiEdit if workflow_state.json is missing
+  - Allows Edit/Write/MultiEdit when workflow_state.json is missing (fail-open)
   - Blocks if current subtask hasn't completed required steps: ['actor', 'monitor']
   - Allows tools if all required steps are completed
+  - Always allows edits under .map/ (workflow artifacts/state) to prevent deadlocks
   - Allows Read, Bash, and other non-editing tools always
 
 WORKFLOW STATE FILE:
@@ -55,6 +56,56 @@
 REQUIRED_STEPS = ["actor", "monitor"]
 
 
+def extract_target_file_paths(tool_call: Dict) -> list[str]:
+    """Best-effort extraction of file paths from Claude Code tool payloads."""
+    tool_input = tool_call.get("tool_input") or {}
+    if not isinstance(tool_input, dict):
+        return []
+
+    paths: list[str] = []
+
+    direct = tool_input.get("file_path")
+    if isinstance(direct, str) and direct.strip():
+        paths.append(direct)
+
+    edits = tool_input.get("edits")
+    if isinstance(edits, list):
+        for edit in edits:
+            if not isinstance(edit, dict):
+                continue
+            fp = edit.get("file_path")
+            if isinstance(fp, str) and fp.strip():
+                paths.append(fp)
+
+    return paths
+
+
+def is_map_artifact_path(file_path: str) -> bool:
+    """
+    Return True if file_path resolves under the current working directory's .map/.
+
+    This allowlist prevents the workflow gate from deadlocking itself by blocking
+    updates to workflow artifacts (including workflow_state.json).
+    """
+    if not isinstance(file_path, str) or not file_path.strip():
+        return False
+
+    repo_root = Path.cwd().resolve()
+    candidate = Path(file_path)
+    resolved = (
+        candidate.resolve(strict=False)
+        if candidate.is_absolute()
+        else (repo_root / candidate).resolve(strict=False)
+    )
+
+    try:
+        rel = resolved.relative_to(repo_root)
+    except ValueError:
+        return False
+
+    return bool(rel.parts) and rel.parts[0] == ".map"
+
+
 def sanitize_branch_name(branch: str) -> str:
     """Sanitize branch name for safe filesystem paths."""
     sanitized = branch.replace("/", "-")
@@ -106,7 +157,17 @@ def check_workflow_compliance(state: Dict) -> tuple[bool, Optional[str]]:
     """
     current_subtask = state.get("current_subtask")
     if not current_subtask:
-        return False, "No current_subtask defined in workflow_state.json"
+        current_state = state.get("current_state") or "UNKNOWN"
+        return False, (
+            "⛔ Workflow Enforcement: No current_subtask defined in workflow_state.json\n\n"
+            f"current_state: {current_state}\n\n"
+            "Edits to non-.map files are blocked until current_subtask is set and "
+            "the required steps are completed.\n\n"
+            "To fix:\n"
+            "  - Update .map/<branch>/workflow_state.json to set current_subtask\n"
+            "  - Or re-run /map-resume or /map-plan to regenerate state\n"
+            "  - Or delete .map/<branch>/workflow_state.json to disable enforcement"
+        )
 
     completed = state.get("completed_steps", {}).get(current_subtask, [])
 
@@ -142,6 +203,12 @@ def main():
             print("{}")
             sys.exit(0)
 
+        # Always allow edits to MAP workflow artifacts under .map/
+        target_paths = extract_target_file_paths(tool_call)
+        if target_paths and all(is_map_artifact_path(p) for p in target_paths):
+            print("{}")
+            sys.exit(0)
+
         # Get current branch
         branch = get_branch_name()
 
diff --git a/src/mapify_cli/templates/hooks/workflow-gate.py b/src/mapify_cli/templates/hooks/workflow-gate.py
index b0e67f0..a74be23 100755
--- a/src/mapify_cli/templates/hooks/workflow-gate.py
+++ b/src/mapify_cli/templates/hooks/workflow-gate.py
@@ -10,9 +10,10 @@
   No manual invocation needed - Claude Code handles hook execution.
 
 ENFORCEMENT RULES:
-  - Blocks Edit/Write/MultiEdit if workflow_state.json is missing
+  - Allows Edit/Write/MultiEdit when workflow_state.json is missing (fail-open)
   - Blocks if current subtask hasn't completed required steps: ['actor', 'monitor']
   - Allows tools if all required steps are completed
+  - Always allows edits under .map/ (workflow artifacts/state) to prevent deadlocks
   - Allows Read, Bash, and other non-editing tools always
 
 WORKFLOW STATE FILE:
@@ -55,6 +56,56 @@
 REQUIRED_STEPS = ["actor", "monitor"]
 
 
+def extract_target_file_paths(tool_call: Dict) -> list[str]:
+    """Best-effort extraction of file paths from Claude Code tool payloads."""
+    tool_input = tool_call.get("tool_input") or {}
+    if not isinstance(tool_input, dict):
+        return []
+
+    paths: list[str] = []
+
+    direct = tool_input.get("file_path")
+    if isinstance(direct, str) and direct.strip():
+        paths.append(direct)
+
+    edits = tool_input.get("edits")
+    if isinstance(edits, list):
+        for edit in edits:
+            if not isinstance(edit, dict):
+                continue
+            fp = edit.get("file_path")
+            if isinstance(fp, str) and fp.strip():
+                paths.append(fp)
+
+    return paths
+
+
+def is_map_artifact_path(file_path: str) -> bool:
+    """
+    Return True if file_path resolves under the current working directory's .map/.
+
+    This allowlist prevents the workflow gate from deadlocking itself by blocking
+    updates to workflow artifacts (including workflow_state.json).
+    """
+    if not isinstance(file_path, str) or not file_path.strip():
+        return False
+
+    repo_root = Path.cwd().resolve()
+    candidate = Path(file_path)
+    resolved = (
+        candidate.resolve(strict=False)
+        if candidate.is_absolute()
+        else (repo_root / candidate).resolve(strict=False)
+    )
+
+    try:
+        rel = resolved.relative_to(repo_root)
+    except ValueError:
+        return False
+
+    return bool(rel.parts) and rel.parts[0] == ".map"
+
+
 def sanitize_branch_name(branch: str) -> str:
     """Sanitize branch name for safe filesystem paths."""
     sanitized = branch.replace("/", "-")
@@ -106,7 +157,17 @@ def check_workflow_compliance(state: Dict) -> tuple[bool, Optional[str]]:
     """
     current_subtask = state.get("current_subtask")
     if not current_subtask:
-        return False, "No current_subtask defined in workflow_state.json"
+        current_state = state.get("current_state") or "UNKNOWN"
+        return False, (
+            "⛔ Workflow Enforcement: No current_subtask defined in workflow_state.json\n\n"
+            f"current_state: {current_state}\n\n"
+            "Edits to non-.map files are blocked until current_subtask is set and "
+            "the required steps are completed.\n\n"
+            "To fix:\n"
+            "  - Update .map/<branch>/workflow_state.json to set current_subtask\n"
+            "  - Or re-run /map-resume or /map-plan to regenerate state\n"
+            "  - Or delete .map/<branch>/workflow_state.json to disable enforcement"
+        )
 
     completed = state.get("completed_steps", {}).get(current_subtask, [])
 
@@ -142,6 +203,12 @@ def main():
             print("{}")
             sys.exit(0)
 
+        # Always allow edits to MAP workflow artifacts under .map/
+        target_paths = extract_target_file_paths(tool_call)
+        if target_paths and all(is_map_artifact_path(p) for p in target_paths):
+            print("{}")
+            sys.exit(0)
+
         # Get current branch
         branch = get_branch_name()