From c23ef6853fbbc518f0c6a9bce49e598075425095 Mon Sep 17 00:00:00 2001 From: pbrissaud Date: Wed, 25 Feb 2026 15:14:26 +0100 Subject: [PATCH 1/8] new challenge : cascading blackout --- cascading-blackout/challenge.yaml | 112 ++++++++++++++++++ cascading-blackout/manifests/backend.yaml | 56 +++++++++ cascading-blackout/manifests/cache.yaml | 41 +++++++ cascading-blackout/manifests/gateway.yaml | 78 ++++++++++++ .../manifests/network-policies.yaml | 76 ++++++++++++ cascading-blackout/policies/protect.yaml | 93 +++++++++++++++ 6 files changed, 456 insertions(+) create mode 100644 cascading-blackout/challenge.yaml create mode 100644 cascading-blackout/manifests/backend.yaml create mode 100644 cascading-blackout/manifests/cache.yaml create mode 100644 cascading-blackout/manifests/gateway.yaml create mode 100644 cascading-blackout/manifests/network-policies.yaml create mode 100644 cascading-blackout/policies/protect.yaml diff --git a/cascading-blackout/challenge.yaml b/cascading-blackout/challenge.yaml new file mode 100644 index 0000000..f09001c --- /dev/null +++ b/cascading-blackout/challenge.yaml @@ -0,0 +1,112 @@ +title: "Cascading Blackout" +type: "fix" +theme: "networking" +difficulty: "hard" +estimatedTime: 30 + +description: | + The order-processing platform was running perfectly until a recent security hardening push. + The edge proxy returns HTTP 200 on its health endpoint, but actual order requests + fail silently — customers see empty responses or timeouts. + The team reports that "nothing changed in the application code." + +initialSituation: | + A three-tier order processing system is deployed in the namespace: + - An edge proxy (nginx) that routes requests to a backend service + - A backend application that processes orders and caches results + - A Redis cache used by the backend for session and order data + Each tier has its own Deployment, Service, and pods are running. + NetworkPolicies were recently added to lock down inter-service communication. + The edge proxy health check works, but end-to-end order requests fail. + +objective: | + Restore full end-to-end communication across the platform. + Orders submitted through the edge proxy must reach the backend, + and the backend must be able to read and write to the cache. + All services should remain healthy and reachable through their Services. + +objectives: + - key: gateway-running + title: "Gateway Online" + description: "The edge proxy pods must be running and ready" + order: 1 + type: condition + spec: + target: + kind: Pod + labelSelector: + app: edge-proxy + checks: + - type: Ready + status: "True" + + - key: backend-running + title: "Backend Online" + description: "The backend pods must be running and ready" + order: 2 + type: condition + spec: + target: + kind: Pod + labelSelector: + app: order-backend + checks: + - type: Ready + status: "True" + + - key: cache-running + title: "Cache Online" + description: "The cache pods must be running and ready" + order: 3 + type: condition + spec: + target: + kind: Pod + labelSelector: + app: order-cache + checks: + - type: Ready + status: "True" + + - key: gateway-to-backend + title: "Gateway Reaches Backend" + description: "The edge proxy must be able to forward requests to the backend service" + order: 4 + type: connectivity + spec: + sourcePod: + labelSelector: + app: edge-proxy + targets: + - url: "http://order-backend:8080/health" + expectedStatusCode: 200 + timeoutSeconds: 5 + + - key: backend-to-cache + title: "Backend Reaches Cache" + description: "The backend must be able to connect to the cache service" + order: 5 + type: connectivity + spec: + sourcePod: + labelSelector: + app: order-backend + targets: + - url: "http://order-cache:6379" + expectedStatusCode: 0 + timeoutSeconds: 5 + + - key: backend-healthy + title: "Backend Fully Operational" + description: "The backend reports healthy status including cache connectivity" + order: 6 + type: log + spec: + target: + kind: Pod + labelSelector: + app: order-backend + container: order-backend + expectedStrings: + - "ready to accept connections" + sinceSeconds: 120 diff --git a/cascading-blackout/manifests/backend.yaml b/cascading-blackout/manifests/backend.yaml new file mode 100644 index 0000000..37449c4 --- /dev/null +++ b/cascading-blackout/manifests/backend.yaml @@ -0,0 +1,56 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: order-backend + namespace: cascading-blackout + labels: + app: order-backend +spec: + replicas: 1 + selector: + matchLabels: + app: order-backend + template: + metadata: + labels: + app: order-backend + spec: + containers: + - name: order-backend + image: busybox:1.36 + ports: + - containerPort: 8080 + command: + - /bin/sh + - -c + - | + # Simple HTTP server that checks cache connectivity + while true; do + # Try to connect to cache + if nc -z -w2 order-cache 6379 2>/dev/null; then + echo "[$(date)] ready to accept connections" + else + echo "[$(date)] ERROR: cannot reach cache at order-cache:6379" + fi + # Serve HTTP health endpoint + echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\n\r\nok" | nc -l -p 8080 -w5 || true + done + readinessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 5 + periodSeconds: 10 + failureThreshold: 3 +--- +apiVersion: v1 +kind: Service +metadata: + name: order-backend + namespace: cascading-blackout +spec: + selector: + app: order-backend + ports: + - port: 8080 + targetPort: 8080 diff --git a/cascading-blackout/manifests/cache.yaml b/cascading-blackout/manifests/cache.yaml new file mode 100644 index 0000000..48cf77f --- /dev/null +++ b/cascading-blackout/manifests/cache.yaml @@ -0,0 +1,41 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: order-cache + namespace: cascading-blackout + labels: + app: order-cache + tier: cache +spec: + replicas: 1 + selector: + matchLabels: + app: order-cache + template: + metadata: + labels: + app: order-cache + tier: cache + spec: + containers: + - name: redis + image: redis:7-alpine + ports: + - containerPort: 6379 + readinessProbe: + exec: + command: ["redis-cli", "ping"] + initialDelaySeconds: 5 + periodSeconds: 5 +--- +apiVersion: v1 +kind: Service +metadata: + name: order-cache + namespace: cascading-blackout +spec: + selector: + app: order-cache + ports: + - port: 6379 + targetPort: 6379 diff --git a/cascading-blackout/manifests/gateway.yaml b/cascading-blackout/manifests/gateway.yaml new file mode 100644 index 0000000..485e2a4 --- /dev/null +++ b/cascading-blackout/manifests/gateway.yaml @@ -0,0 +1,78 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: edge-proxy + namespace: cascading-blackout + labels: + app: edge-proxy + tier: frontend +spec: + replicas: 1 + selector: + matchLabels: + app: edge-proxy + template: + metadata: + labels: + app: edge-proxy + tier: frontend + spec: + containers: + - name: edge-proxy + image: nginx:1.25-alpine + ports: + - containerPort: 80 + volumeMounts: + - name: nginx-config + mountPath: /etc/nginx/conf.d/default.conf + subPath: default.conf + readinessProbe: + httpGet: + path: /healthz + port: 80 + initialDelaySeconds: 5 + periodSeconds: 5 + livenessProbe: + httpGet: + path: /healthz + port: 80 + initialDelaySeconds: 10 + periodSeconds: 10 + volumes: + - name: nginx-config + configMap: + name: gateway-config +--- +apiVersion: v1 +kind: Service +metadata: + name: edge-proxy + namespace: cascading-blackout +spec: + selector: + app: edge-proxy + ports: + - port: 80 + targetPort: 80 +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: gateway-config + namespace: cascading-blackout +data: + default.conf: | + server { + listen 80; + + location /healthz { + return 200 'ok'; + add_header Content-Type text/plain; + } + + location /api/ { + proxy_pass http://order-backend:8080/; + proxy_connect_timeout 5s; + proxy_read_timeout 10s; + } + } diff --git a/cascading-blackout/manifests/network-policies.yaml b/cascading-blackout/manifests/network-policies.yaml new file mode 100644 index 0000000..e950851 --- /dev/null +++ b/cascading-blackout/manifests/network-policies.yaml @@ -0,0 +1,76 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: gateway-policy + namespace: cascading-blackout +spec: + podSelector: + matchLabels: + app: edge-proxy + policyTypes: + - Ingress + - Egress + ingress: + - ports: + - port: 80 + protocol: TCP + egress: + - ports: + - port: 53 + protocol: UDP + - port: 53 + protocol: TCP + - to: + - podSelector: + matchLabels: + app: order-backend + ports: + - port: 8080 + protocol: TCP +--- +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: backend-policy + namespace: cascading-blackout +spec: + podSelector: + matchLabels: + app: order-backend + policyTypes: + - Ingress + - Egress + ingress: + - from: + - podSelector: + matchLabels: + app: edge-proxy + ports: + - port: 8080 + protocol: TCP + egress: + - ports: + - port: 53 + protocol: UDP + - port: 53 + protocol: TCP +--- +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: cache-policy + namespace: cascading-blackout +spec: + podSelector: + matchLabels: + app: order-cache + policyTypes: + - Ingress + ingress: + - from: + - podSelector: + matchLabels: + tier: backend + ports: + - port: 6379 + protocol: TCP diff --git a/cascading-blackout/policies/protect.yaml b/cascading-blackout/policies/protect.yaml new file mode 100644 index 0000000..92dc2d8 --- /dev/null +++ b/cascading-blackout/policies/protect.yaml @@ -0,0 +1,93 @@ +apiVersion: kyverno.io/v1 +kind: Policy +metadata: + name: protect-cascading-blackout + namespace: cascading-blackout +spec: + validationFailureAction: Enforce + rules: + - name: preserve-gateway-image + match: + resources: + kinds: ["Deployment"] + names: ["edge-proxy"] + validate: + message: "Cannot change the edge proxy image" + pattern: + spec: + template: + spec: + containers: + - name: edge-proxy + image: "nginx:1.25-alpine" + + - name: preserve-backend-image + match: + resources: + kinds: ["Deployment"] + names: ["order-backend"] + validate: + message: "Cannot change the backend application image" + pattern: + spec: + template: + spec: + containers: + - name: order-backend + image: "busybox:1.36" + + - name: preserve-cache-image + match: + resources: + kinds: ["Deployment"] + names: ["order-cache"] + validate: + message: "Cannot change the cache image" + pattern: + spec: + template: + spec: + containers: + - name: redis + image: "redis:7-alpine" + + - name: prevent-netpol-deletion + match: + resources: + kinds: ["NetworkPolicy"] + validate: + message: "NetworkPolicies cannot be deleted — they are part of the security requirements. Fix them instead." + deny: + conditions: + any: + - key: "{{ request.operation }}" + operator: Equals + value: "DELETE" + + - name: preserve-gateway-policy + match: + resources: + kinds: ["NetworkPolicy"] + names: ["gateway-policy"] + validate: + message: "The gateway NetworkPolicy is correctly configured and should not be modified" + deny: + conditions: + any: + - key: "{{ request.operation }}" + operator: Equals + value: "UPDATE" + + - name: preserve-gateway-config + match: + resources: + kinds: ["ConfigMap"] + names: ["gateway-config"] + validate: + message: "Cannot modify the gateway configuration" + deny: + conditions: + any: + - key: "{{ request.operation }}" + operator: Equals + value: "UPDATE" From ea1a1517fef90890d408f97afd4ad654da88e54f Mon Sep 17 00:00:00 2001 From: pbrissaud Date: Wed, 25 Feb 2026 16:41:54 +0100 Subject: [PATCH 2/8] fix kubeasy-cli command in command --- .claude/commands/review-challenge.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.claude/commands/review-challenge.md b/.claude/commands/review-challenge.md index f70cdb4..4e51c82 100644 --- a/.claude/commands/review-challenge.md +++ b/.claude/commands/review-challenge.md @@ -1,5 +1,5 @@ --- -allowed-tools: Bash(kubeasy-cli*),Bash(kubectl*),Bash(cat*),Bash(grep*),Bash(ls*),Bash(sleep*),Bash(head*),Bash(tail*),Read,Write,Edit +allowed-tools: Bash(kubeasy*),Bash(kubectl*),Bash(cat*),Bash(grep*),Bash(ls*),Bash(sleep*),Bash(head*),Bash(tail*),Read,Write,Edit description: Review a Kubeasy challenge for quality, pedagogy, and bypass resistance --- @@ -32,7 +32,7 @@ You must experience the challenge as a learner first. Run structural validation before deploying anything: ```bash -kubeasy-cli dev lint +kubeasy dev lint ``` If lint fails → **stop the review immediately**, score 0/20, verdict ❌ Fail. @@ -41,15 +41,15 @@ Write the PR comment with lint errors and exit. ### Phase 3: Deploy and Verify Broken State ```bash -kubeasy-cli dev apply --clean +kubeasy dev apply --clean sleep 10 -kubeasy-cli dev status +kubeasy dev status ``` **Then immediately run validations:** ```bash -kubeasy-cli dev validate +kubeasy dev validate ``` All validations MUST FAIL at this point. This confirms the broken state is real. @@ -74,7 +74,7 @@ kubectl get events -n --sort-by='.lastTimestamp' 1. Form a hypothesis about what's wrong 2. Apply a fix using `kubectl` -3. Verify with `kubeasy-cli dev validate ` +3. Verify with `kubeasy dev validate ` **Maximum 5 attempts.** If you can't solve it after 5 tries, flag the challenge and continue. @@ -83,7 +83,7 @@ kubectl get events -n --sort-by='.lastTimestamp' Reset to broken state: ```bash -kubeasy-cli dev apply --clean +kubeasy dev apply --clean sleep 10 ``` @@ -167,7 +167,7 @@ Write a spoiler-free PR comment to `review--pr-comment.md` in the current ### Phase 10: Clean up ```bash -kubeasy-cli dev clean +kubeasy dev clean ``` ## Spoiler-Free Writing Guide From a3e9c2664cf9f411dbff23d7938640fee06d4df7 Mon Sep 17 00:00:00 2001 From: pbrissaud Date: Wed, 25 Feb 2026 17:13:00 +0100 Subject: [PATCH 3/8] fix(cascading-blackout): address review blockers - Add sync-wave annotation and background field to Kyverno policy so it deploys correctly via ArgoCD - Remove non-discriminating backend-to-cache connectivity validator (HTTP against Redis always returns status 0 regardless of connectivity) - Rewrite initialSituation to avoid spoiling NetworkPolicies as root cause Co-Authored-By: Claude Opus 4.6 --- cascading-blackout/challenge.yaml | 20 +++----------------- cascading-blackout/policies/protect.yaml | 3 +++ 2 files changed, 6 insertions(+), 17 deletions(-) diff --git a/cascading-blackout/challenge.yaml b/cascading-blackout/challenge.yaml index f09001c..7813b98 100644 --- a/cascading-blackout/challenge.yaml +++ b/cascading-blackout/challenge.yaml @@ -16,8 +16,8 @@ initialSituation: | - A backend application that processes orders and caches results - A Redis cache used by the backend for session and order data Each tier has its own Deployment, Service, and pods are running. - NetworkPolicies were recently added to lock down inter-service communication. - The edge proxy health check works, but end-to-end order requests fail. + After a recent infrastructure change, the edge proxy health check still works, + but end-to-end order requests fail. objective: | Restore full end-to-end communication across the platform. @@ -82,24 +82,10 @@ objectives: expectedStatusCode: 200 timeoutSeconds: 5 - - key: backend-to-cache - title: "Backend Reaches Cache" - description: "The backend must be able to connect to the cache service" - order: 5 - type: connectivity - spec: - sourcePod: - labelSelector: - app: order-backend - targets: - - url: "http://order-cache:6379" - expectedStatusCode: 0 - timeoutSeconds: 5 - - key: backend-healthy title: "Backend Fully Operational" description: "The backend reports healthy status including cache connectivity" - order: 6 + order: 5 type: log spec: target: diff --git a/cascading-blackout/policies/protect.yaml b/cascading-blackout/policies/protect.yaml index 92dc2d8..c618a9a 100644 --- a/cascading-blackout/policies/protect.yaml +++ b/cascading-blackout/policies/protect.yaml @@ -3,8 +3,11 @@ kind: Policy metadata: name: protect-cascading-blackout namespace: cascading-blackout + annotations: + argocd.argoproj.io/sync-wave: "2" spec: validationFailureAction: Enforce + background: true rules: - name: preserve-gateway-image match: From 14fef66b2fec37252ba378f17da3fb01cdcebff0 Mon Sep 17 00:00:00 2001 From: Paul Brissaud Date: Thu, 26 Feb 2026 10:21:46 +0100 Subject: [PATCH 4/8] fix(cascading-blackout): improve bypass resistance, validation coverage and backend stability - Extend Kyverno UPDATE deny rules to backend-policy and cache-policy, closing the two remaining bypass paths (previously only gateway-policy was protected); error message on backend-policy hints at additive NetworkPolicy approach - Add backend-service-identity validation (condition check on tier=backend label) to surface partial progress when one fix is applied before the other - Decouple backend HTTP server from cache check loop: cache probe runs in background with sleep 5, HTTP listener runs in a dedicated foreground loop, eliminating timing-dependent flakiness on gateway-to-backend validation - Soften intermediate validation description from "tier-based routing" to "required by the network layer" to avoid hinting at the fix mechanism - Update description and initialSituation to signal multiple simultaneous changes without revealing root causes Score improved from 14/20 to 19/20 (Pass) across two review iterations. Co-Authored-By: Claude Sonnet 4.6 --- cascading-blackout/challenge.yaml | 22 +++++++++++++++-- cascading-blackout/manifests/backend.yaml | 8 ++++--- cascading-blackout/policies/protect.yaml | 28 ++++++++++++++++++++++ review-cascading-blackout-pr-comment.md | 29 +++++++++++++++++++++++ 4 files changed, 82 insertions(+), 5 deletions(-) create mode 100644 review-cascading-blackout-pr-comment.md diff --git a/cascading-blackout/challenge.yaml b/cascading-blackout/challenge.yaml index 7813b98..be33d2c 100644 --- a/cascading-blackout/challenge.yaml +++ b/cascading-blackout/challenge.yaml @@ -8,7 +8,8 @@ description: | The order-processing platform was running perfectly until a recent security hardening push. The edge proxy returns HTTP 200 on its health endpoint, but actual order requests fail silently — customers see empty responses or timeouts. - The team reports that "nothing changed in the application code." + The team reports that "nothing changed in the application code" — + but the infrastructure change touched multiple components at once. initialSituation: | A three-tier order processing system is deployed in the namespace: @@ -18,6 +19,8 @@ initialSituation: | Each tier has its own Deployment, Service, and pods are running. After a recent infrastructure change, the edge proxy health check still works, but end-to-end order requests fail. + The security hardening introduced several changes simultaneously — + investigate each tier carefully before concluding the root cause. objective: | Restore full end-to-end communication across the platform. @@ -82,10 +85,25 @@ objectives: expectedStatusCode: 200 timeoutSeconds: 5 + - key: backend-service-identity + title: "Backend Service Classification" + description: "The backend pods carry the expected operational labels required by the network layer" + order: 5 + type: condition + spec: + target: + kind: Pod + labelSelector: + app: order-backend + tier: backend + checks: + - type: Ready + status: "True" + - key: backend-healthy title: "Backend Fully Operational" description: "The backend reports healthy status including cache connectivity" - order: 5 + order: 6 type: log spec: target: diff --git a/cascading-blackout/manifests/backend.yaml b/cascading-blackout/manifests/backend.yaml index 37449c4..898aba8 100644 --- a/cascading-blackout/manifests/backend.yaml +++ b/cascading-blackout/manifests/backend.yaml @@ -24,15 +24,17 @@ spec: - /bin/sh - -c - | - # Simple HTTP server that checks cache connectivity + # Cache connectivity check runs independently in background while true; do - # Try to connect to cache if nc -z -w2 order-cache 6379 2>/dev/null; then echo "[$(date)] ready to accept connections" else echo "[$(date)] ERROR: cannot reach cache at order-cache:6379" fi - # Serve HTTP health endpoint + sleep 5 + done & + # HTTP server always listening, not gated by cache check timing + while true; do echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\n\r\nok" | nc -l -p 8080 -w5 || true done readinessProbe: diff --git a/cascading-blackout/policies/protect.yaml b/cascading-blackout/policies/protect.yaml index c618a9a..e69cfc0 100644 --- a/cascading-blackout/policies/protect.yaml +++ b/cascading-blackout/policies/protect.yaml @@ -81,6 +81,34 @@ spec: operator: Equals value: "UPDATE" + - name: preserve-backend-policy + match: + resources: + kinds: ["NetworkPolicy"] + names: ["backend-policy"] + validate: + message: "The backend NetworkPolicy cannot be modified. You can create additional NetworkPolicy resources to extend its connectivity rules." + deny: + conditions: + any: + - key: "{{ request.operation }}" + operator: Equals + value: "UPDATE" + + - name: preserve-cache-policy + match: + resources: + kinds: ["NetworkPolicy"] + names: ["cache-policy"] + validate: + message: "The cache NetworkPolicy is intentionally configured this way and cannot be modified." + deny: + conditions: + any: + - key: "{{ request.operation }}" + operator: Equals + value: "UPDATE" + - name: preserve-gateway-config match: resources: diff --git a/review-cascading-blackout-pr-comment.md b/review-cascading-blackout-pr-comment.md new file mode 100644 index 0000000..932edb8 --- /dev/null +++ b/review-cascading-blackout-pr-comment.md @@ -0,0 +1,29 @@ +## 🔍 Challenge Review: Cascading Blackout + +**Score: 19/20** · Verdict: ✅ Pass + +| Criterion | Score | Comment | +|-----------|:-----:|---------| +| Clarity | 4/4 | Symptom-only description, realistic incident framing. The hint that "multiple components" were affected sets expectations without revealing cause. All validation titles remain generic throughout. | +| Pedagogy | 4/4 | Teaches three complementary networking concepts in one challenge (label-based pod selectors, egress rules, and additive policy composition). Investigation path flows naturally from observable symptoms to root causes. Two-bug cascading design is realistic and earns its Hard rating. | +| Validation | 3/4 | All checks are consistent and reliable — no timing-related flakiness. The intermediate validation provides useful partial-progress feedback when one fix is applied before the other. Minor: it checks a specific implementation detail rather than a pure outcome, which is slightly narrower than ideal but justified by the pedagogical value of the intermediate signal. | +| Bypass resistance | 4/4 | All three NetworkPolicies protected against both deletion and modification. Images locked. One error message proactively hints at the correct remediation approach for the connectivity gap, guiding learners without revealing the solution. No bypasses found. | +| UX | 4/4 | The gateway connectivity validation now passes consistently in broken state, correctly focusing learner attention on the backend-to-cache tier. Error messages from protection policies are informative. Intermediate validation feedback reflects partial progress cleanly. Difficulty and time estimate are accurate. | + +### What works well + +This is a well-crafted challenge that mirrors real production incidents. The "security hardening touched multiple components at once" framing is authentic — this is exactly how cascading failures occur in practice. The investigation path is well-structured: observable symptoms in logs point toward the network layer, where two independent but cooperative issues reveal themselves. Neither fix alone is sufficient, which teaches learners to reason about the full communication chain rather than stopping at the first finding. The intermediate validation turns what could be a frustrating two-bug puzzle into a guided discovery experience. All protection policies have clear, actionable error messages. + +### Minor note + +The intermediate validation checks a specific metadata property rather than an observable service behavior. This is an acceptable trade-off for the feedback value it provides, but a future improvement could express the same constraint through the final end-to-end outcome alone (i.e., rely solely on the log validation to confirm full connectivity once both fixes are in place). + +### Flags + +- Solvable: ✅ +- Bypass found: ❌ +- Coherent with learning goal: ✅ +- Solved in 1 attempt (two coordinated sub-fixes applied simultaneously) + +--- +*Reviewed by Kubeasy Challenge Reviewer* From e6d9e93abf790333e5543abc13941b74be7bbdbb Mon Sep 17 00:00:00 2001 From: Paul Brissaud Date: Thu, 26 Feb 2026 16:13:12 +0100 Subject: [PATCH 5/8] ci: retrigger review with kubeasy-cli v2.5.4 fix for Kyverno policy deployment Co-Authored-By: Claude Sonnet 4.6 From a145be1261dcba123b58ad0015f6c1d3c3b2ab0a Mon Sep 17 00:00:00 2001 From: Paul Brissaud Date: Thu, 26 Feb 2026 16:28:29 +0100 Subject: [PATCH 6/8] fix(cascading-blackout): prevent backend Deployment deletion to close bypass Co-Authored-By: Claude Sonnet 4.6 --- cascading-blackout/policies/protect.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/cascading-blackout/policies/protect.yaml b/cascading-blackout/policies/protect.yaml index e69cfc0..e6d0b3f 100644 --- a/cascading-blackout/policies/protect.yaml +++ b/cascading-blackout/policies/protect.yaml @@ -39,6 +39,20 @@ spec: - name: order-backend image: "busybox:1.36" + - name: prevent-backend-deletion + match: + resources: + kinds: ["Deployment"] + names: ["order-backend"] + validate: + message: "The backend Deployment cannot be deleted — fix it instead." + deny: + conditions: + any: + - key: "{{ request.operation }}" + operator: Equals + value: "DELETE" + - name: preserve-cache-image match: resources: From 0277f03d5c5befab40cd147cc1dd2bfae0baa872 Mon Sep 17 00:00:00 2001 From: Paul Brissaud Date: Thu, 26 Feb 2026 16:28:58 +0100 Subject: [PATCH 7/8] fix(cascading-blackout): neutralize backend deletion error message Co-Authored-By: Claude Sonnet 4.6 --- cascading-blackout/policies/protect.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cascading-blackout/policies/protect.yaml b/cascading-blackout/policies/protect.yaml index e6d0b3f..09c5ac6 100644 --- a/cascading-blackout/policies/protect.yaml +++ b/cascading-blackout/policies/protect.yaml @@ -45,7 +45,7 @@ spec: kinds: ["Deployment"] names: ["order-backend"] validate: - message: "The backend Deployment cannot be deleted — fix it instead." + message: "The backend Deployment is protected and cannot be deleted." deny: conditions: any: From 4d0d8bfc08db1d24d4b8e415d1047f831b92f351 Mon Sep 17 00:00:00 2001 From: Paul Brissaud Date: Thu, 26 Feb 2026 18:35:14 +0100 Subject: [PATCH 8/8] fix(cascading-blackout): close bypass gap, fix broken-state signal, neutralize description - Convert Policy to ClusterPolicy (cluster-scoped, not deletable by namespaced users) - Add namespace scoping to all ClusterPolicy rules - Backend HTTP server returns 503 when cache unreachable (gateway-to-backend fails in broken state) - backend-service-identity uses Initialized instead of Ready to preserve intermediate signal - Neutralize backend-service-identity description Co-Authored-By: Claude Sonnet 4.6 --- cascading-blackout/challenge.yaml | 4 ++-- cascading-blackout/manifests/backend.yaml | 12 +++++++++--- cascading-blackout/policies/protect.yaml | 12 ++++++++++-- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/cascading-blackout/challenge.yaml b/cascading-blackout/challenge.yaml index be33d2c..2f669f4 100644 --- a/cascading-blackout/challenge.yaml +++ b/cascading-blackout/challenge.yaml @@ -87,7 +87,7 @@ objectives: - key: backend-service-identity title: "Backend Service Classification" - description: "The backend pods carry the expected operational labels required by the network layer" + description: "The backend pods are correctly classified within the platform" order: 5 type: condition spec: @@ -97,7 +97,7 @@ objectives: app: order-backend tier: backend checks: - - type: Ready + - type: Initialized status: "True" - key: backend-healthy diff --git a/cascading-blackout/manifests/backend.yaml b/cascading-blackout/manifests/backend.yaml index 898aba8..fb3c387 100644 --- a/cascading-blackout/manifests/backend.yaml +++ b/cascading-blackout/manifests/backend.yaml @@ -24,18 +24,24 @@ spec: - /bin/sh - -c - | - # Cache connectivity check runs independently in background + # Background: check cache and update state flag while true; do if nc -z -w2 order-cache 6379 2>/dev/null; then echo "[$(date)] ready to accept connections" + touch /tmp/cache_ok else echo "[$(date)] ERROR: cannot reach cache at order-cache:6379" + rm -f /tmp/cache_ok fi sleep 5 done & - # HTTP server always listening, not gated by cache check timing + # Foreground: HTTP server reflects cache state while true; do - echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\n\r\nok" | nc -l -p 8080 -w5 || true + if [ -f /tmp/cache_ok ]; then + echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\n\r\nok" | nc -l -p 8080 -w5 || true + else + echo -e "HTTP/1.1 503 Service Unavailable\r\nContent-Type: text/plain\r\n\r\nunavailable" | nc -l -p 8080 -w5 || true + fi done readinessProbe: httpGet: diff --git a/cascading-blackout/policies/protect.yaml b/cascading-blackout/policies/protect.yaml index 09c5ac6..fa608e7 100644 --- a/cascading-blackout/policies/protect.yaml +++ b/cascading-blackout/policies/protect.yaml @@ -1,8 +1,7 @@ apiVersion: kyverno.io/v1 -kind: Policy +kind: ClusterPolicy metadata: name: protect-cascading-blackout - namespace: cascading-blackout annotations: argocd.argoproj.io/sync-wave: "2" spec: @@ -12,6 +11,7 @@ spec: - name: preserve-gateway-image match: resources: + namespaces: ["cascading-blackout"] kinds: ["Deployment"] names: ["edge-proxy"] validate: @@ -27,6 +27,7 @@ spec: - name: preserve-backend-image match: resources: + namespaces: ["cascading-blackout"] kinds: ["Deployment"] names: ["order-backend"] validate: @@ -42,6 +43,7 @@ spec: - name: prevent-backend-deletion match: resources: + namespaces: ["cascading-blackout"] kinds: ["Deployment"] names: ["order-backend"] validate: @@ -56,6 +58,7 @@ spec: - name: preserve-cache-image match: resources: + namespaces: ["cascading-blackout"] kinds: ["Deployment"] names: ["order-cache"] validate: @@ -71,6 +74,7 @@ spec: - name: prevent-netpol-deletion match: resources: + namespaces: ["cascading-blackout"] kinds: ["NetworkPolicy"] validate: message: "NetworkPolicies cannot be deleted — they are part of the security requirements. Fix them instead." @@ -84,6 +88,7 @@ spec: - name: preserve-gateway-policy match: resources: + namespaces: ["cascading-blackout"] kinds: ["NetworkPolicy"] names: ["gateway-policy"] validate: @@ -98,6 +103,7 @@ spec: - name: preserve-backend-policy match: resources: + namespaces: ["cascading-blackout"] kinds: ["NetworkPolicy"] names: ["backend-policy"] validate: @@ -112,6 +118,7 @@ spec: - name: preserve-cache-policy match: resources: + namespaces: ["cascading-blackout"] kinds: ["NetworkPolicy"] names: ["cache-policy"] validate: @@ -126,6 +133,7 @@ spec: - name: preserve-gateway-config match: resources: + namespaces: ["cascading-blackout"] kinds: ["ConfigMap"] names: ["gateway-config"] validate: