diff --git a/infra/helm/cert-manager/letsencrypt-staging.yaml b/infra/helm/cert-manager/letsencrypt-staging.yaml new file mode 100644 index 0000000..2fa6a77 --- /dev/null +++ b/infra/helm/cert-manager/letsencrypt-staging.yaml @@ -0,0 +1,24 @@ +# ClusterIssuer for Let's Encrypt TLS certificates +# Generated by: uv run api-forge-cli k8s setup-tls --email pieware@gmail.com +# This is a cluster-scoped resource (not namespaced). +# Apply with: kubectl apply -f infra/helm/cert-manager/letsencrypt-staging.yaml +apiVersion: cert-manager.io/v1 +kind: ClusterIssuer +metadata: + name: letsencrypt-staging + labels: + app.kubernetes.io/managed-by: api-forge-cli +spec: + acme: + # Let's Encrypt ACME server + server: https://acme-staging-v02.api.letsencrypt.org/directory + # Email for certificate expiration notifications + email: pieware@gmail.com + # Secret to store the ACME account private key + privateKeySecretRef: + name: letsencrypt-staging-account-key + # HTTP-01 challenge solver using NGINX ingress + solvers: + - http01: + ingress: + class: nginx diff --git a/pyproject.toml b/pyproject.toml index b6f7748..a802980 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,6 +29,7 @@ dependencies = [ "temporalio>=1.18.1", "requests>=2.32.5", "ruamel.yaml>=0.18.6", + "kr8s>=0.20.14", ] [build-system] diff --git a/src/cli/__init__.py b/src/cli/__init__.py index 6be2076..b1cd3d1 100644 --- a/src/cli/__init__.py +++ b/src/cli/__init__.py @@ -1,10 +1,30 @@ -"""Main CLI application module.""" +"""Main CLI application module. + +This module provides the main entry point for the API Forge CLI. +Commands are organized by deployment target (dev, prod, k8s, fly) +rather than by operation type (up, down, status). + +Command Groups: +- dev: Development Docker Compose environment +- prod: Production Docker Compose deployment +- k8s: Kubernetes Helm deployment +- fly: Fly.io Kubernetes (coming soon) +- entity: Entity/model scaffolding +- secrets: Secret management +- users: Keycloak user management (dev) +""" import typer -from .deploy_commands import deploy_app -from .entity_commands import entity_app -from .secrets_commands import secrets_app +from .commands import ( + dev_app, + entity_app, + fly_app, + k8s_app, + prod_app, + secrets_app, + users_app, +) # Create the main CLI application app = typer.Typer( @@ -13,10 +33,16 @@ rich_markup_mode="rich", ) -# Register command groups -app.add_typer(deploy_app, name="deploy") +# Register deployment target command groups +app.add_typer(dev_app, name="dev", help="Development environment commands") +app.add_typer(prod_app, name="prod", help="Production Docker Compose commands") +app.add_typer(k8s_app, name="k8s", help="Kubernetes Helm deployment commands") +app.add_typer(fly_app, name="fly", help="Fly.io Kubernetes commands (coming soon)") + +# Register utility command groups app.add_typer(entity_app, name="entity") app.add_typer(secrets_app, name="secrets") +app.add_typer(users_app, name="users") def main() -> None: diff --git a/src/cli/commands/__init__.py b/src/cli/commands/__init__.py new file mode 100644 index 0000000..8d79da5 --- /dev/null +++ b/src/cli/commands/__init__.py @@ -0,0 +1,32 @@ +"""CLI command modules organized by deployment target. + +This package provides the restructured CLI with separate command groups +for each deployment target (dev, prod, k8s, fly) and utilities (entity, secrets, users). + +Command Groups: +- dev: Development environment using Docker Compose +- prod: Production Docker Compose deployment +- k8s: Kubernetes deployment using Helm +- fly: Fly.io Kubernetes (FKS) deployment (future) +- entity: Entity/model scaffolding +- secrets: Secret management utilities +- users: Keycloak user management (dev environment) +""" + +from .dev import app as dev_app +from .entity import entity_app +from .fly import fly_app +from .k8s import k8s_app +from .prod import prod_app +from .secrets import secrets_app +from .users import users_app + +__all__ = [ + "dev_app", + "prod_app", + "k8s_app", + "fly_app", + "entity_app", + "secrets_app", + "users_app", +] diff --git a/src/cli/commands/dev.py b/src/cli/commands/dev.py new file mode 100644 index 0000000..aeb0a1d --- /dev/null +++ b/src/cli/commands/dev.py @@ -0,0 +1,264 @@ +"""Development environment CLI commands. + +This module provides commands for managing the Docker Compose +development environment including Keycloak, PostgreSQL, Redis, and Temporal. + +Commands: + up - Start the development environment + down - Stop the development environment + status - Show status of development services + logs - View logs from a service + restart - Restart a specific service +""" + +from pathlib import Path + +import typer + +from src.cli.deployment import DevDeployer +from src.cli.deployment.helm_deployer.image_builder import DeploymentError + +from .shared import ( + confirm_action, + console, + get_project_root, + handle_error, + print_header, +) + +# Create the dev command group +app = typer.Typer( + name="dev", + help="🔧 Development environment commands (Docker Compose)", + no_args_is_help=True, +) + + +def _get_deployer() -> DevDeployer: + """Create a DevDeployer instance with current project context.""" + return DevDeployer(console, Path(get_project_root())) + + +# ============================================================================= +# Commands +# ============================================================================= + + +@app.command() +def up( + force: bool = typer.Option( + False, + "--force", + "-f", + help="Force restart even if services are already running", + ), + no_wait: bool = typer.Option( + False, + "--no-wait", + help="Don't wait for services to be healthy", + ), + start_server: bool = typer.Option( + True, + "--start-server/--no-start-server", + help="Start FastAPI dev server after services are ready", + ), +) -> None: + """🚀 Start the development environment. + + Starts all development services (Keycloak, PostgreSQL, Redis, Temporal) + using Docker Compose, then optionally starts the FastAPI development server. + + Examples: + # Start everything including dev server + api-forge-cli dev up + + # Start services only, no dev server + api-forge-cli dev up --no-start-server + + # Force restart all services + api-forge-cli dev up --force + """ + print_header("Starting Development Environment") + + try: + deployer = _get_deployer() + deployer.deploy(force=force, no_wait=no_wait, start_server=start_server) + except DeploymentError as e: + handle_error(f"Deployment failed: {e.message}", e.details) + + +@app.command() +def down( + volumes: bool = typer.Option( + False, + "--volumes", + "-v", + help="Also remove data volumes (DESTROYS ALL DATA)", + ), + yes: bool = typer.Option( + False, + "--yes", + "-y", + help="Skip confirmation prompt", + ), +) -> None: + """âšī¸ Stop the development environment. + + Stops all Docker Compose services. Use --volumes to also remove + persistent data (databases, caches). + + Examples: + # Stop services (preserves data) + api-forge-cli dev down + + # Stop and remove all data + api-forge-cli dev down --volumes + """ + details = "This will stop all development Docker Compose services." + extra_warning = None + + if volumes: + extra_warning = ( + "âš ī¸ --volumes flag is set: ALL DATA WILL BE PERMANENTLY DELETED!\n" + " This includes databases, caches, and any persistent storage." + ) + + if not confirm_action( + action="Stop development environment", + details=details, + extra_warning=extra_warning, + force=yes, + ): + console.print("[dim]Operation cancelled.[/dim]") + raise typer.Exit(0) + + print_header("Stopping Development Environment", style="red") + + try: + deployer = _get_deployer() + deployer.teardown(volumes=volumes) + except DeploymentError as e: + handle_error(f"Teardown failed: {e.message}", e.details) + + +@app.command() +def status() -> None: + """📊 Show status of development services. + + Displays the current status of all development services including + health check results and connection information. + + Examples: + api-forge-cli dev status + """ + deployer = _get_deployer() + deployer.show_status() + + +@app.command() +def logs( + service: str = typer.Argument( + None, + help="Service name (keycloak, postgres, redis, temporal). Shows all if omitted.", + ), + follow: bool = typer.Option( + False, + "--follow", + "-f", + help="Follow log output", + ), + tail: int = typer.Option( + 100, + "--tail", + "-n", + help="Number of lines to show from the end", + ), +) -> None: + """📜 View logs from development services. + + Shows logs from Docker Compose services. Specify a service name + to view logs from a single service. + + Examples: + # View all logs + api-forge-cli dev logs + + # View PostgreSQL logs + api-forge-cli dev logs postgres + + # Follow Keycloak logs + api-forge-cli dev logs keycloak --follow + """ + import subprocess + + compose_file = "docker-compose.dev.yml" + cmd = ["docker", "compose", "-f", compose_file, "logs"] + + if tail: + cmd.extend(["--tail", str(tail)]) + + if follow: + cmd.append("--follow") + + if service: + # Map friendly names to Docker Compose service names + service_map = { + "keycloak": "keycloak", + "postgres": "postgres", + "redis": "redis", + "temporal": "temporal", + "temporal-ui": "temporal-web", + } + compose_service = service_map.get(service.lower(), service) + cmd.append(compose_service) + + try: + subprocess.run(cmd, cwd=get_project_root(), check=True) + except subprocess.CalledProcessError as e: + handle_error(f"Failed to get logs: {e}") + except KeyboardInterrupt: + pass # User cancelled with Ctrl+C + + +@app.command() +def restart( + service: str = typer.Argument( + ..., + help="Service to restart (keycloak, postgres, redis, temporal)", + ), +) -> None: + """🔄 Restart a specific development service. + + Restarts a single service without affecting other services. + + Examples: + # Restart PostgreSQL + api-forge-cli dev restart postgres + + # Restart Keycloak + api-forge-cli dev restart keycloak + """ + import subprocess + + compose_file = "docker-compose.dev.yml" + + # Map friendly names to Docker Compose service names + service_map = { + "keycloak": "keycloak", + "postgres": "postgres", + "redis": "redis", + "temporal": "temporal", + "temporal-ui": "temporal-web", + } + + compose_service = service_map.get(service.lower(), service) + + console.print(f"[bold]Restarting {service}...[/bold]") + + cmd = ["docker", "compose", "-f", compose_file, "restart", compose_service] + + try: + subprocess.run(cmd, cwd=get_project_root(), check=True) + console.print(f"[green]✅ {service} restarted successfully[/green]") + except subprocess.CalledProcessError as e: + handle_error(f"Failed to restart {service}: {e}") diff --git a/src/cli/entity_commands.py b/src/cli/commands/entity.py similarity index 99% rename from src/cli/entity_commands.py rename to src/cli/commands/entity.py index 77ae192..c0441ef 100644 --- a/src/cli/entity_commands.py +++ b/src/cli/commands/entity.py @@ -10,7 +10,7 @@ from rich.prompt import Prompt from rich.table import Table -from .utils import console, get_project_root +from .shared import console, get_project_root # Create the entity command group entity_app = typer.Typer(help="🎭 Entity management commands") diff --git a/src/cli/commands/fly.py b/src/cli/commands/fly.py new file mode 100644 index 0000000..666a913 --- /dev/null +++ b/src/cli/commands/fly.py @@ -0,0 +1,130 @@ +"""Fly.io Kubernetes (FKS) deployment commands. + +This module provides a placeholder for future Fly.io Kubernetes Service +deployment commands. FKS is currently in beta and not yet fully supported. + +See docs/fastapi-flyio-kubernetes.md for compatibility analysis. +""" + +from typing import Annotated + +import typer + +from .shared import console, print_header + +# --------------------------------------------------------------------------- +# Typer App +# --------------------------------------------------------------------------- + +fly_app = typer.Typer( + name="fly", + help="Fly.io Kubernetes (FKS) deployment commands (coming soon).", + no_args_is_help=True, +) + + +# --------------------------------------------------------------------------- +# Commands +# --------------------------------------------------------------------------- + + +@fly_app.command() +def up( + cluster: Annotated[ + str | None, + typer.Option( + "--cluster", + "-c", + help="FKS cluster name", + ), + ] = None, +) -> None: + """Deploy to Fly.io Kubernetes Service (coming soon). + + This command is a placeholder for future FKS deployment support. + FKS is currently in beta with some limitations for our use case. + + See docs/fastapi-flyio-kubernetes.md for details. + """ + print_header("Fly.io Kubernetes Deployment") + _show_coming_soon_message() + + +@fly_app.command() +def down( + cluster: Annotated[ + str | None, + typer.Option( + "--cluster", + "-c", + help="FKS cluster name", + ), + ] = None, +) -> None: + """Remove Fly.io Kubernetes deployment (coming soon). + + This command is a placeholder for future FKS deployment support. + """ + print_header("Removing Fly.io Deployment") + _show_coming_soon_message() + + +@fly_app.command() +def status( + cluster: Annotated[ + str | None, + typer.Option( + "--cluster", + "-c", + help="FKS cluster name", + ), + ] = None, +) -> None: + """Show Fly.io Kubernetes deployment status (coming soon). + + This command is a placeholder for future FKS deployment support. + """ + print_header("Fly.io Deployment Status") + _show_coming_soon_message() + + +@fly_app.command() +def clusters() -> None: + """List available FKS clusters (coming soon). + + This command is a placeholder for future FKS support. + """ + print_header("FKS Clusters") + _show_coming_soon_message() + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _show_coming_soon_message() -> None: + """Display the coming soon message with context.""" + from rich.panel import Panel + + message = """[yellow]Fly.io Kubernetes Service (FKS) support is planned but not yet implemented.[/yellow] + +[bold cyan]Current Status:[/bold cyan] +â€ĸ FKS is in public beta +â€ĸ Some features we need (e.g., Ingress) require workarounds +â€ĸ We're monitoring FKS development for GA readiness + +[bold cyan]Key Differences from Standard K8s:[/bold cyan] +â€ĸ Uses LoadBalancer instead of Ingress for external access +â€ĸ No built-in Ingress controller +â€ĸ Uses Fly.io's global edge network for routing +â€ĸ Requires flyctl for cluster management + +[bold cyan]Next Steps:[/bold cyan] +1. Review compatibility analysis: docs/fastapi-flyio-kubernetes.md +2. For standard Kubernetes, use: [green]uv run api-forge-cli k8s up[/green] +3. For Docker Compose production: [green]uv run api-forge-cli prod up[/green] + +[dim]Want to help implement FKS support? Contributions welcome![/dim]""" + + console.print(Panel(message, title="Coming Soon", border_style="yellow")) diff --git a/src/cli/commands/k8s.py b/src/cli/commands/k8s.py new file mode 100644 index 0000000..ce40909 --- /dev/null +++ b/src/cli/commands/k8s.py @@ -0,0 +1,865 @@ +"""Kubernetes Helm deployment commands. + +This module provides commands for deploying, managing, and monitoring +Kubernetes deployments via Helm. +""" + +from typing import TYPE_CHECKING, Annotated + +import typer +from rich.panel import Panel +from rich.table import Table + +from src.infra.k8s import Kr8sController, run_sync + +from .shared import ( + confirm_action, + console, + get_project_root, + print_header, + with_error_handling, +) + +if TYPE_CHECKING: + from src.cli.deployment.helm_deployer.deployer import HelmDeployer + + +# --------------------------------------------------------------------------- +# Kubernetes Controller (module-level singleton) +# --------------------------------------------------------------------------- + +_controller = Kr8sController() + + +# --------------------------------------------------------------------------- +# Deployer Factory +# --------------------------------------------------------------------------- + + +def _get_deployer() -> "HelmDeployer": + """Get the Helm deployer instance. + + Returns: + HelmDeployer instance configured for current project + """ + from src.cli.deployment.helm_deployer.deployer import HelmDeployer + + return HelmDeployer(console, get_project_root()) + + +# --------------------------------------------------------------------------- +# Helper Functions +# --------------------------------------------------------------------------- + + +def _check_cluster_issuer_ready(issuer_name: str) -> bool: + """Check if a ClusterIssuer exists and is ready. + + Args: + issuer_name: Name of the ClusterIssuer to check + + Returns: + True if the ClusterIssuer exists and is ready, False otherwise + """ + status = run_sync(_controller.get_cluster_issuer_status(issuer_name)) + return status.exists and status.ready + + +def _check_cert_manager_installed() -> bool: + """Check if cert-manager is installed in the cluster. + + Returns: + True if cert-manager pods are running, False otherwise + """ + return run_sync(_controller.check_cert_manager_installed()) + + +def _install_cert_manager() -> bool: + """Install cert-manager using Helm. + + Returns: + True if installation succeeded, False otherwise + """ + import subprocess + + console.print("[cyan]Installing cert-manager via Helm...[/cyan]") + + # Add Helm repo + subprocess.run( + ["helm", "repo", "add", "jetstack", "https://charts.jetstack.io"], + capture_output=True, + check=False, + ) + subprocess.run( + ["helm", "repo", "update"], + capture_output=True, + check=False, + ) + + # Install cert-manager + result = subprocess.run( + [ + "helm", + "install", + "cert-manager", + "jetstack/cert-manager", + "--namespace", + "cert-manager", + "--create-namespace", + "--set", + "installCRDs=true", + "--wait", + "--timeout", + "5m", + ], + capture_output=True, + text=True, + ) + + if result.returncode != 0: + console.print("[red]Failed to install cert-manager[/red]") + if result.stderr: + console.print(f"[dim]{result.stderr}[/dim]") + return False + + console.print("[green]✓[/green] cert-manager installed successfully") + return True + + +def _wait_for_cluster_issuer(issuer_name: str, timeout: int = 60) -> bool: + """Wait for a ClusterIssuer to become ready. + + Args: + issuer_name: Name of the ClusterIssuer + timeout: Maximum seconds to wait + + Returns: + True if issuer became ready, False if timeout + """ + import time + + console.print( + f"[dim]Waiting for ClusterIssuer '{issuer_name}' to be ready...[/dim]" + ) + + start = time.time() + while time.time() - start < timeout: + if _check_cluster_issuer_ready(issuer_name): + return True + time.sleep(2) + + # Check if it exists but isn't ready + yaml_output = run_sync(_controller.get_cluster_issuer_yaml(issuer_name)) + if yaml_output: + console.print("[yellow]ClusterIssuer exists but not ready yet[/yellow]") + console.print(f"[dim]{yaml_output}[/dim]") + + return False + + +# --------------------------------------------------------------------------- +# Typer App +# --------------------------------------------------------------------------- + +k8s_app = typer.Typer( + name="k8s", + help="Kubernetes Helm deployment commands.", + no_args_is_help=True, +) + + +# --------------------------------------------------------------------------- +# Commands +# --------------------------------------------------------------------------- + + +@k8s_app.command() +@with_error_handling +def up( + namespace: Annotated[ + str, + typer.Option( + "--namespace", + "-n", + help="Kubernetes namespace", + ), + ] = "api-forge-prod", + registry: Annotated[ + str | None, + typer.Option( + "--registry", + "-r", + help="Container registry URL (e.g., ghcr.io/myuser)", + ), + ] = None, + ingress: Annotated[ + bool, + typer.Option( + "--ingress", + help="Enable Ingress for external access", + ), + ] = False, + ingress_host: Annotated[ + str | None, + typer.Option( + "--ingress-host", + help="Hostname for Ingress (e.g., api.example.com)", + ), + ] = None, + ingress_tls_secret: Annotated[ + str | None, + typer.Option( + "--ingress-tls-secret", + help="TLS secret name for HTTPS (manual certificate)", + ), + ] = None, + ingress_tls_auto: Annotated[ + bool, + typer.Option( + "--ingress-tls-auto", + help="Auto-provision TLS via cert-manager (requires setup-tls first)", + ), + ] = False, + ingress_tls_staging: Annotated[ + bool, + typer.Option( + "--ingress-tls-staging", + help="Use Let's Encrypt staging (with --ingress-tls-auto)", + ), + ] = False, +) -> None: + """Deploy to Kubernetes cluster using Helm. + + This command: + - Runs pre-deployment validation with cleanup prompts + - Builds Docker images with content-based tagging + - Loads images into target cluster (Minikube, Kind, or registry) + - Deploys Kubernetes secrets + - Syncs config.yaml to Helm values + - Deploys via Helm upgrade --install + - Waits for rollouts to complete + + Examples: + uv run api-forge-cli k8s up + uv run api-forge-cli k8s up -n my-namespace + uv run api-forge-cli k8s up --registry ghcr.io/myuser + uv run api-forge-cli k8s up --ingress --ingress-host api.example.com + uv run api-forge-cli k8s up --ingress --ingress-host api.example.com --ingress-tls-auto + """ + print_header("Deploying to Kubernetes") + + # Validate TLS options + if ingress_tls_auto and ingress_tls_secret: + console.print( + "[red]Cannot use both --ingress-tls-auto and --ingress-tls-secret[/red]" + ) + raise typer.Exit(1) + + if ingress_tls_auto and not ingress: + console.print( + "[yellow]--ingress-tls-auto implies --ingress, enabling it[/yellow]" + ) + ingress = True + + if ingress_tls_staging and not ingress_tls_auto: + console.print("[red]--ingress-tls-staging requires --ingress-tls-auto[/red]") + raise typer.Exit(1) + + # Check cert-manager is ready if using auto TLS + if ingress_tls_auto: + issuer_name = ( + "letsencrypt-staging" if ingress_tls_staging else "letsencrypt-prod" + ) + if not _check_cluster_issuer_ready(issuer_name): + console.print( + f"[red]ClusterIssuer '{issuer_name}' not found or not ready.[/red]" + ) + console.print("\n[dim]Run setup-tls first:[/dim]") + staging_flag = " --staging" if ingress_tls_staging else "" + console.print( + f" [cyan]uv run api-forge-cli k8s setup-tls --email your@email.com{staging_flag}[/cyan]" + ) + raise typer.Exit(1) + + deployer = _get_deployer() + deployer.deploy( + namespace=namespace, + registry=registry, + ingress_enabled=ingress, + ingress_host=ingress_host, + ingress_tls_secret=ingress_tls_secret, + ingress_tls_auto=ingress_tls_auto, + ingress_tls_staging=ingress_tls_staging, + ) + + +@k8s_app.command() +@with_error_handling +def down( + namespace: Annotated[ + str, + typer.Option( + "--namespace", + "-n", + help="Kubernetes namespace", + ), + ] = "api-forge-prod", + yes: Annotated[ + bool, + typer.Option( + "--yes", + "-y", + help="Skip confirmation prompt", + ), + ] = False, +) -> None: + """Remove Kubernetes deployment. + + Uninstalls the Helm release and deletes the namespace. + + Examples: + uv run api-forge-cli k8s down + uv run api-forge-cli k8s down -n my-namespace + uv run api-forge-cli k8s down -y # Skip confirmation + """ + print_header("Removing Kubernetes Deployment") + + if not yes: + if not confirm_action( + "Remove Kubernetes deployment", + f"This will:\n" + f" â€ĸ Uninstall the Helm release\n" + f" â€ĸ Delete namespace '{namespace}' and all resources\n" + f" â€ĸ Remove all persistent volume claims", + ): + console.print("[dim]Operation cancelled[/dim]") + raise typer.Exit(0) + + deployer = _get_deployer() + deployer.teardown(namespace=namespace) + + +@k8s_app.command() +@with_error_handling +def status( + namespace: Annotated[ + str, + typer.Option( + "--namespace", + "-n", + help="Kubernetes namespace", + ), + ] = "api-forge-prod", +) -> None: + """Show the status of Kubernetes deployment. + + Displays the health and configuration of pods, services, and ingress. + + Examples: + uv run api-forge-cli k8s status + uv run api-forge-cli k8s status -n my-namespace + """ + print_header("Kubernetes Deployment Status") + + deployer = _get_deployer() + deployer.show_status(namespace=namespace) + + +@k8s_app.command() +@with_error_handling +def history( + namespace: Annotated[ + str, + typer.Option( + "--namespace", + "-n", + help="Kubernetes namespace", + ), + ] = "api-forge-prod", + max_revisions: Annotated[ + int, + typer.Option( + "--max", + "-m", + help="Maximum number of revisions to show", + ), + ] = 10, +) -> None: + """Show Kubernetes deployment revision history. + + Displays the Helm release history including revision numbers, + timestamps, status, and descriptions. Use this to identify + which revision to rollback to. + + Examples: + uv run api-forge-cli k8s history + uv run api-forge-cli k8s history --max 5 + """ + print_header("Release History") + + deployer = _get_deployer() + + # Get release history + history_data = deployer.commands.helm.history( + deployer.constants.HELM_RELEASE_NAME, namespace, max_revisions + ) + + if not history_data: + console.print( + f"[yellow]No release history found for '{deployer.constants.HELM_RELEASE_NAME}' " + f"in namespace '{namespace}'[/yellow]" + ) + console.print("\n[dim]Deploy first with: uv run api-forge-cli k8s up[/dim]") + return + + table = Table(show_header=True, header_style="bold") + table.add_column("Revision", justify="right") + table.add_column("Updated") + table.add_column("Status") + table.add_column("Chart") + table.add_column("Description") + + for entry in history_data: + revision = entry.get("revision", "") + updated = entry.get("updated", "")[:19] # Trim timezone + status_str = entry.get("status", "") + chart = entry.get("chart", "") + description = entry.get("description", "")[:40] + + # Color status + if status_str == "deployed": + status_display = f"[green]{status_str}[/green]" + elif status_str in ("failed", "superseded"): + status_display = f"[red]{status_str}[/red]" + elif status_str == "pending-upgrade": + status_display = f"[yellow]{status_str}[/yellow]" + else: + status_display = status_str + + table.add_row(str(revision), updated, status_display, chart, description) + + console.print(table) + + # Show rollback hint + if len(history_data) > 1: + console.print( + "\n[dim]To rollback: uv run api-forge-cli k8s rollback [/dim]" + ) + + +@k8s_app.command() +@with_error_handling +def rollback( + revision: Annotated[ + int | None, + typer.Argument( + help="Revision number to rollback to (default: previous revision)", + ), + ] = None, + namespace: Annotated[ + str, + typer.Option( + "--namespace", + "-n", + help="Kubernetes namespace", + ), + ] = "api-forge-prod", + yes: Annotated[ + bool, + typer.Option( + "--yes", + "-y", + help="Skip confirmation prompt", + ), + ] = False, +) -> None: + """Rollback Kubernetes deployment to a previous revision. + + Uses Helm's native rollback functionality to restore + the deployment to a previous working state. + + Examples: + uv run api-forge-cli k8s rollback # Previous revision + uv run api-forge-cli k8s rollback 3 # Specific revision + uv run api-forge-cli k8s history # View history first + """ + print_header("Rollback Deployment") + + deployer = _get_deployer() + + # Get release history + history_data = deployer.commands.helm.history( + deployer.constants.HELM_RELEASE_NAME, namespace + ) + + if not history_data: + console.print( + f"[red]No release history found for '{deployer.constants.HELM_RELEASE_NAME}' " + f"in namespace '{namespace}'[/red]" + ) + console.print("\n[dim]Make sure the release exists and you have access.[/dim]") + raise typer.Exit(1) + + # Show current state + current = history_data[-1] + current_revision = int(current.get("revision", 0)) + + if current_revision <= 1: + console.print( + "[yellow]⚠ Only one revision exists. Nothing to rollback to.[/yellow]" + ) + raise typer.Exit(0) + + # Determine target revision + target_revision = revision if revision is not None else current_revision - 1 + + if target_revision < 1 or target_revision >= current_revision: + console.print( + f"[red]Invalid revision {target_revision}. " + f"Must be between 1 and {current_revision - 1}.[/red]" + ) + raise typer.Exit(1) + + # Find target revision info + target_info = next( + (h for h in history_data if int(h.get("revision", 0)) == target_revision), None + ) + + # Show rollback plan + console.print("\n[bold cyan]📋 Rollback Plan[/bold cyan]\n") + + table = Table(show_header=True, header_style="bold") + table.add_column("", style="dim") + table.add_column("Revision") + table.add_column("Status") + table.add_column("Description") + + table.add_row( + "Current", + str(current_revision), + current.get("status", "unknown"), + current.get("description", "")[:50], + ) + + if target_info: + table.add_row( + "Target", + str(target_revision), + target_info.get("status", "unknown"), + target_info.get("description", "")[:50], + ) + + console.print(table) + + # Confirm + if not yes: + if not confirm_action( + f"Rollback to revision {target_revision}", + f"This will restore the deployment in namespace '{namespace}' " + f"to revision {target_revision}.\n" + "Active pods will be replaced with the previous configuration.", + ): + console.print("[dim]Rollback cancelled.[/dim]") + raise typer.Exit(0) + + # Perform rollback + console.print( + Panel.fit( + f"[bold yellow]âĒ Rolling back to revision {target_revision}[/bold yellow]", + border_style="yellow", + ) + ) + + result = deployer.commands.helm.rollback( + deployer.constants.HELM_RELEASE_NAME, + namespace, + target_revision, + wait=True, + timeout="5m", + ) + + if result.success: + console.print( + f"\n[bold green]✅ Successfully rolled back to revision {target_revision}![/bold green]" + ) + console.print("\n[dim]Run 'uv run api-forge-cli k8s status' to verify.[/dim]") + else: + console.print("\n[bold red]❌ Rollback failed[/bold red]") + if result.stderr: + console.print(Panel(result.stderr, title="Error", border_style="red")) + raise typer.Exit(1) + + +@k8s_app.command() +@with_error_handling +def logs( + pod: Annotated[ + str | None, + typer.Argument( + help="Pod name or label selector (e.g., 'app=api-forge')", + ), + ] = None, + namespace: Annotated[ + str, + typer.Option( + "--namespace", + "-n", + help="Kubernetes namespace", + ), + ] = "api-forge-prod", + container: Annotated[ + str | None, + typer.Option( + "--container", + "-c", + help="Container name (if pod has multiple containers)", + ), + ] = None, + follow: Annotated[ + bool, + typer.Option( + "--follow", + "-f", + help="Follow log output", + ), + ] = False, + tail: Annotated[ + int, + typer.Option( + "--tail", + help="Number of lines to show from the end of the logs", + ), + ] = 100, + previous: Annotated[ + bool, + typer.Option( + "--previous", + "-p", + help="Show logs from previous container instance", + ), + ] = False, +) -> None: + """View logs from Kubernetes pods. + + Shows logs from pods in the deployment. If no pod is specified, + shows logs from all pods with the app label. + + Examples: + uv run api-forge-cli k8s logs # All app pods + uv run api-forge-cli k8s logs api-forge-abc123 # Specific pod + uv run api-forge-cli k8s logs -f # Follow logs + uv run api-forge-cli k8s logs --previous # Previous container + """ + console.print(f"[dim]Namespace: {namespace}[/dim]\n") + + # Determine label selector for non-specific pod requests + label_selector = "app=api-forge" if not pod else None + + try: + result = run_sync( + _controller.get_pod_logs( + namespace=namespace, + pod=pod, + container=container, + label_selector=label_selector, + follow=follow, + tail=tail, + previous=previous, + ) + ) + if result.stdout: + console.print(result.stdout) + if not result.success and result.stderr: + console.print(f"[red]{result.stderr}[/red]") + except KeyboardInterrupt: + console.print("\n[dim]Log streaming stopped[/dim]") + + +@k8s_app.command(name="setup-tls") +@with_error_handling +def setup_tls( + email: Annotated[ + str | None, + typer.Option( + "--email", + "-e", + help="Email for Let's Encrypt certificate notifications (required)", + ), + ] = None, + staging: Annotated[ + bool, + typer.Option( + "--staging", + help="Use Let's Encrypt staging server (for testing)", + ), + ] = False, + install_cert_manager: Annotated[ + bool, + typer.Option( + "--install-cert-manager", + help="Automatically install cert-manager if not present", + ), + ] = True, +) -> None: + """Set up TLS with cert-manager and Let's Encrypt. + + This command: + 1. Checks if cert-manager is installed (installs via Helm if not) + 2. Creates a ClusterIssuer for Let's Encrypt + 3. Waits for the ClusterIssuer to be ready + + After setup, use --ingress-tls-auto with 'k8s up' for automatic certificates. + + Examples: + uv run api-forge-cli k8s setup-tls --email admin@example.com + uv run api-forge-cli k8s setup-tls --email admin@example.com --staging + uv run api-forge-cli k8s up --ingress --ingress-host api.example.com --ingress-tls-auto + """ + print_header("TLS Setup with cert-manager") + + if not email: + console.print("[red]Email is required for Let's Encrypt registration.[/red]") + console.print("\n[dim]Example:[/dim]") + console.print( + " [cyan]uv run api-forge-cli k8s setup-tls --email admin@example.com[/cyan]" + ) + raise typer.Exit(1) + + # Step 1: Check/install cert-manager + console.print("\n[bold]Step 1/3:[/bold] Checking cert-manager installation...") + + if _check_cert_manager_installed(): + console.print("[green]✓[/green] cert-manager is already installed") + else: + if install_cert_manager: + console.print("[yellow]cert-manager not found, installing...[/yellow]") + if not _install_cert_manager(): + raise typer.Exit(1) + else: + console.print("[red]cert-manager is not installed.[/red]") + console.print( + "\n[dim]Run with --install-cert-manager or install manually:[/dim]" + ) + console.print( + " helm install cert-manager jetstack/cert-manager " + "--namespace cert-manager --create-namespace --set installCRDs=true" + ) + raise typer.Exit(1) + + # Step 2: Create ClusterIssuer + console.print("\n[bold]Step 2/3:[/bold] Creating ClusterIssuer...") + + if staging: + server = "https://acme-staging-v02.api.letsencrypt.org/directory" + issuer_name = "letsencrypt-staging" + console.print( + "[yellow]Using Let's Encrypt staging server (for testing)[/yellow]" + ) + else: + server = "https://acme-v02.api.letsencrypt.org/directory" + issuer_name = "letsencrypt-prod" + console.print("[cyan]Using Let's Encrypt production server[/cyan]") + + # Check if issuer already exists and is ready + if _check_cluster_issuer_ready(issuer_name): + console.print( + f"[green]✓[/green] ClusterIssuer '{issuer_name}' already exists and is ready" + ) + else: + # Create ClusterIssuer manifest file (version-controlled, GitOps-friendly) + project_root = get_project_root() + cert_manager_dir = project_root / "infra" / "helm" / "cert-manager" + cert_manager_dir.mkdir(parents=True, exist_ok=True) + + issuer_file = cert_manager_dir / f"{issuer_name}.yaml" + + cluster_issuer_yaml = f"""# ClusterIssuer for Let's Encrypt TLS certificates +# Generated by: uv run api-forge-cli k8s setup-tls --email {email} +# This is a cluster-scoped resource (not namespaced). +# Apply with: kubectl apply -f {issuer_file.relative_to(project_root)} +apiVersion: cert-manager.io/v1 +kind: ClusterIssuer +metadata: + name: {issuer_name} + labels: + app.kubernetes.io/managed-by: api-forge-cli +spec: + acme: + # Let's Encrypt ACME server + server: {server} + # Email for certificate expiration notifications + email: {email} + # Secret to store the ACME account private key + privateKeySecretRef: + name: {issuer_name}-account-key + # HTTP-01 challenge solver using NGINX ingress + solvers: + - http01: + ingress: + class: nginx +""" + + # Write manifest to file + issuer_file.write_text(cluster_issuer_yaml) + console.print( + f"[dim]Wrote ClusterIssuer manifest to {issuer_file.relative_to(project_root)}[/dim]" + ) + + # Apply the manifest + console.print(f"[dim]Applying ClusterIssuer '{issuer_name}'...[/dim]") + + result = run_sync(_controller.apply_manifest(issuer_file)) + + if not result.success: + console.print("[red]Failed to create ClusterIssuer[/red]") + if result.stderr: + console.print(Panel(result.stderr, title="Error", border_style="red")) + raise typer.Exit(1) + + console.print(f"[green]✓[/green] ClusterIssuer '{issuer_name}' created") + + # Step 3: Wait for ClusterIssuer to be ready + console.print("\n[bold]Step 3/3:[/bold] Waiting for ClusterIssuer to be ready...") + + if _wait_for_cluster_issuer(issuer_name, timeout=60): + console.print(f"[green]✓[/green] ClusterIssuer '{issuer_name}' is ready") + else: + console.print( + f"[yellow]⚠ ClusterIssuer '{issuer_name}' created but not ready yet[/yellow]" + ) + console.print( + "[dim]This is normal - it will become ready when you create your first certificate.[/dim]" + ) + + # Success message with next steps + console.print("\n" + "=" * 60) + console.print("[bold green]✅ TLS setup complete![/bold green]") + console.print("=" * 60) + + console.print("\n[bold cyan]Deploy with automatic TLS:[/bold cyan]") + staging_flag = " --ingress-tls-staging" if staging else "" + console.print( + f" [cyan]uv run api-forge-cli k8s up --ingress --ingress-host api.example.com --ingress-tls-auto{staging_flag}[/cyan]" + ) + + console.print("\n[bold cyan]What happens next:[/bold cyan]") + console.print(" 1. Ingress is created with cert-manager annotation") + console.print(" 2. cert-manager detects the annotation and requests a certificate") + console.print(" 3. Let's Encrypt validates domain ownership via HTTP-01 challenge") + console.print(" 4. Certificate is stored in a Kubernetes secret") + console.print(" 5. NGINX Ingress serves HTTPS automatically") + console.print(" 6. cert-manager auto-renews before expiry") + + if staging: + console.print( + "\n[yellow]⚠ Staging certificates are not trusted by browsers.[/yellow]" + ) + console.print( + "[yellow] Run without --staging for production certificates.[/yellow]" + ) + + console.print("\n[bold cyan]Manifest saved to:[/bold cyan]") + console.print(f" [dim]infra/helm/cert-manager/{issuer_name}.yaml[/dim]") + console.print( + " [dim]Commit this file to version control for GitOps workflows.[/dim]" + ) diff --git a/src/cli/commands/prod.py b/src/cli/commands/prod.py new file mode 100644 index 0000000..0e0e2a5 --- /dev/null +++ b/src/cli/commands/prod.py @@ -0,0 +1,386 @@ +"""Production Docker Compose environment commands. + +This module provides commands for managing the production Docker Compose +environment: starting services, stopping them, and checking status. +""" + +from typing import TYPE_CHECKING, Annotated + +import typer + +from .shared import ( + confirm_action, + console, + get_project_root, + handle_error, + print_header, + with_error_handling, +) + +if TYPE_CHECKING: + from src.cli.deployment.prod_deployer import ProdDeployer + + +# --------------------------------------------------------------------------- +# Deployer Factory +# --------------------------------------------------------------------------- + + +def _get_deployer() -> "ProdDeployer": + """Get the production deployer instance. + + Returns: + ProdDeployer instance configured for current project + """ + from src.cli.deployment.prod_deployer import ProdDeployer + + return ProdDeployer(console, get_project_root()) + + +# --------------------------------------------------------------------------- +# Typer App +# --------------------------------------------------------------------------- + +prod_app = typer.Typer( + name="prod", + help="Production Docker Compose environment commands.", + no_args_is_help=True, +) + + +# --------------------------------------------------------------------------- +# Commands +# --------------------------------------------------------------------------- + + +@prod_app.command() +@with_error_handling +def up( + skip_build: Annotated[ + bool, + typer.Option( + "--skip-build", + help="Skip building the application image", + ), + ] = False, + no_wait: Annotated[ + bool, + typer.Option( + "--no-wait", + help="Don't wait for health checks to complete", + ), + ] = False, + force_recreate: Annotated[ + bool, + typer.Option( + "--force-recreate", + help="Force recreate containers (useful for secret rotation)", + ), + ] = False, +) -> None: + """Start the production Docker Compose environment. + + This command: + - Ensures required data directories exist + - Validates and cleans up stale bind-mount volumes + - Builds the application Docker image (unless --skip-build) + - Starts all production services with health checks + - Monitors service health (unless --no-wait) + + Examples: + uv run api-forge-cli prod up + uv run api-forge-cli prod up --skip-build --no-wait + uv run api-forge-cli prod up --force-recreate # For secret rotation + """ + print_header("Starting Production Environment") + + deployer = _get_deployer() + deployer.deploy( + skip_build=skip_build, + no_wait=no_wait, + force_recreate=force_recreate, + ) + + +@prod_app.command() +@with_error_handling +def down( + volumes: Annotated[ + bool, + typer.Option( + "--volumes", + "-v", + help="Also remove data volumes and directories (DESTRUCTIVE)", + ), + ] = False, + yes: Annotated[ + bool, + typer.Option( + "--yes", + "-y", + help="Skip confirmation prompt for destructive operations", + ), + ] = False, +) -> None: + """Stop the production Docker Compose environment. + + By default, this preserves all data volumes so you can restart later + without losing data. Use --volumes to also remove data (requires confirmation). + + Examples: + uv run api-forge-cli prod down + uv run api-forge-cli prod down --volumes # Remove data too + uv run api-forge-cli prod down -v -y # Remove data without prompt + """ + print_header("Stopping Production Environment") + + if volumes and not yes: + if not confirm_action( + "Remove data volumes", + "This will permanently delete all production data including:\n" + " â€ĸ PostgreSQL database\n" + " â€ĸ Redis cache and sessions\n" + " â€ĸ Application logs\n" + " â€ĸ SSL certificates", + ): + console.print("[dim]Operation cancelled[/dim]") + raise typer.Exit(0) + + deployer = _get_deployer() + deployer.teardown(volumes=volumes) + + +@prod_app.command() +@with_error_handling +def status() -> None: + """Show the status of production services. + + Displays the health and configuration of each production service. + + Examples: + uv run api-forge-cli prod status + """ + print_header("Production Environment Status") + + deployer = _get_deployer() + deployer.show_status() + + +@prod_app.command() +@with_error_handling +def logs( + service: Annotated[ + str | None, + typer.Argument( + help="Service name to view logs for (e.g., app, postgres, redis, temporal)", + ), + ] = None, + follow: Annotated[ + bool, + typer.Option( + "--follow", + "-f", + help="Follow log output", + ), + ] = False, + tail: Annotated[ + int, + typer.Option( + "--tail", + "-n", + help="Number of lines to show from the end of the logs", + ), + ] = 100, +) -> None: + """View logs from production services. + + Shows logs from the production Docker Compose environment. Optionally + specify a service name to filter logs. + + Examples: + uv run api-forge-cli prod logs # All services + uv run api-forge-cli prod logs app # Just the app service + uv run api-forge-cli prod logs app -f # Follow app logs + uv run api-forge-cli prod logs -n 50 # Last 50 lines + """ + import subprocess + + project_root = get_project_root() + compose_file = project_root / "docker-compose.prod.yml" + + if not compose_file.exists(): + handle_error(f"Compose file not found: {compose_file}") + raise typer.Exit(1) + + cmd = [ + "docker", + "compose", + "-p", + "api-forge-prod", + "-f", + str(compose_file), + "logs", + f"--tail={tail}", + ] + + if follow: + cmd.append("--follow") + + if service: + cmd.append(service) + console.print(f"[dim]Showing logs for service: {service}[/dim]\n") + else: + console.print("[dim]Showing logs for all production services[/dim]\n") + + try: + subprocess.run(cmd, check=True) + except subprocess.CalledProcessError as e: + handle_error(f"Failed to retrieve logs: {e}") + raise typer.Exit(1) from e + except KeyboardInterrupt: + console.print("\n[dim]Log streaming stopped[/dim]") + + +@prod_app.command() +@with_error_handling +def restart( + service: Annotated[ + str | None, + typer.Argument( + help="Service name to restart (restarts all if not specified)", + ), + ] = None, + force_recreate: Annotated[ + bool, + typer.Option( + "--force-recreate", + help="Force recreate containers", + ), + ] = False, +) -> None: + """Restart production services. + + Restarts one or all production services. Useful for picking up + configuration changes. + + Examples: + uv run api-forge-cli prod restart # Restart all + uv run api-forge-cli prod restart app # Just restart app + uv run api-forge-cli prod restart --force-recreate + """ + import subprocess + + project_root = get_project_root() + compose_file = project_root / "docker-compose.prod.yml" + + if not compose_file.exists(): + handle_error(f"Compose file not found: {compose_file}") + raise typer.Exit(1) + + if service: + console.print(f"[cyan]Restarting service: {service}[/cyan]") + cmd = [ + "docker", + "compose", + "-p", + "api-forge-prod", + "-f", + str(compose_file), + "restart", + service, + ] + elif force_recreate: + # Full restart with force-recreate + console.print("[cyan]Force restarting all production services...[/cyan]") + cmd = [ + "docker", + "compose", + "-p", + "api-forge-prod", + "-f", + str(compose_file), + "up", + "-d", + "--force-recreate", + ] + else: + console.print("[cyan]Restarting all production services...[/cyan]") + cmd = [ + "docker", + "compose", + "-p", + "api-forge-prod", + "-f", + str(compose_file), + "restart", + ] + + try: + subprocess.run(cmd, check=True) + console.print("[green]✓[/green] Restart complete") + except subprocess.CalledProcessError as e: + handle_error(f"Failed to restart services: {e}") + raise typer.Exit(1) from e + + +@prod_app.command() +@with_error_handling +def build( + service: Annotated[ + str | None, + typer.Argument( + help="Service name to build (builds all if not specified)", + ), + ] = None, + no_cache: Annotated[ + bool, + typer.Option( + "--no-cache", + help="Build without using cache", + ), + ] = False, +) -> None: + """Build production Docker images. + + Builds one or all production service images. Useful for rebuilding + after Dockerfile changes. + + Examples: + uv run api-forge-cli prod build # Build all + uv run api-forge-cli prod build app # Just build app + uv run api-forge-cli prod build --no-cache + """ + import subprocess + + project_root = get_project_root() + compose_file = project_root / "docker-compose.prod.yml" + + if not compose_file.exists(): + handle_error(f"Compose file not found: {compose_file}") + raise typer.Exit(1) + + cmd = [ + "docker", + "compose", + "-p", + "api-forge-prod", + "-f", + str(compose_file), + "build", + ] + + if no_cache: + cmd.append("--no-cache") + + if service: + cmd.append(service) + console.print(f"[cyan]Building service: {service}[/cyan]") + else: + console.print("[cyan]Building all production images...[/cyan]") + + try: + subprocess.run(cmd, check=True) + console.print("[green]✓[/green] Build complete") + except subprocess.CalledProcessError as e: + handle_error(f"Build failed: {e}") + raise typer.Exit(1) from e diff --git a/src/cli/secrets_commands.py b/src/cli/commands/secrets.py similarity index 99% rename from src/cli/secrets_commands.py rename to src/cli/commands/secrets.py index 34064f6..a4b8849 100644 --- a/src/cli/secrets_commands.py +++ b/src/cli/commands/secrets.py @@ -7,7 +7,7 @@ from rich.panel import Panel from rich.table import Table -from .utils import confirm_destructive_action, console, get_project_root +from .shared import confirm_destructive_action, console, get_project_root # Create the secrets command group secrets_app = typer.Typer(help="🔐 Secrets management commands") diff --git a/src/cli/commands/shared.py b/src/cli/commands/shared.py new file mode 100644 index 0000000..ae2a862 --- /dev/null +++ b/src/cli/commands/shared.py @@ -0,0 +1,143 @@ +"""Shared utilities for CLI commands. + +This module provides common utilities used across all command modules, +including console output, confirmation dialogs, and path resolution. +""" + +from collections.abc import Callable +from pathlib import Path + +import typer +from rich.console import Console +from rich.panel import Panel + +# Shared console instance for consistent output +console = Console() + + +def get_project_root() -> Path: + """Get the project root directory. + + Walks up from the module location to find the project root, + identified by the presence of pyproject.toml. + + Returns: + Path to the project root directory + """ + current = Path(__file__).resolve() + + # Walk up the directory tree looking for pyproject.toml + for parent in [current, *current.parents]: + if (parent / "pyproject.toml").exists(): + return parent + + # Fallback to four levels up (src/cli/commands/shared.py -> project root) + return Path(__file__).parent.parent.parent.parent + + +def confirm_action( + action: str, + details: str | None = None, + extra_warning: str | None = None, + force: bool = False, +) -> bool: + """Prompt user to confirm a potentially destructive action. + + Args: + action: Description of the action (e.g., "Stop all services") + details: Additional details about what will be affected + extra_warning: Extra warning message (e.g., for data loss) + force: If True, skip the confirmation prompt + + Returns: + True if the user confirmed, False otherwise + """ + if force: + return True + + # Build warning message + warning_lines = [f"[bold red]âš ī¸ {action}[/bold red]"] + + if details: + warning_lines.append(f"\n{details}") + + if extra_warning: + warning_lines.append(f"\n[yellow]{extra_warning}[/yellow]") + + console.print( + Panel( + "\n".join(warning_lines), + title="Confirmation Required", + border_style="red", + ) + ) + + try: + response = console.input( + "\n[bold]Are you sure you want to proceed?[/bold] \\[y/N]: " + ) + return response.strip().lower() in ("y", "yes") + except (KeyboardInterrupt, EOFError): + console.print("\n[dim]Cancelled.[/dim]") + return False + + +# Alias for backward compatibility +confirm_destructive_action = confirm_action + + +def handle_error(message: str, details: str | None = None, exit_code: int = 1) -> None: + """Handle an error by printing a message and exiting. + + Args: + message: Error message to display + details: Optional additional details + exit_code: Exit code to use + """ + console.print(f"\n[bold red]❌ {message}[/bold red]\n") + if details: + console.print(Panel(details, title="Details", border_style="red")) + raise typer.Exit(exit_code) + + +def print_header(title: str, style: str = "blue") -> None: + """Print a styled header panel. + + Args: + title: Header title text + style: Border style color + """ + console.print( + Panel.fit( + f"[bold {style}]{title}[/bold {style}]", + border_style=style, + ) + ) + + +def with_error_handling(func: Callable[..., None]) -> Callable[..., None]: + """Decorator to wrap command functions with standard error handling. + + Catches common exceptions and formats them consistently. + + Args: + func: The command function to wrap + + Returns: + Wrapped function with error handling + """ + from functools import wraps + + from src.cli.deployment.helm_deployer.image_builder import DeploymentError + + @wraps(func) + def wrapper(*args: object, **kwargs: object) -> None: + try: + func(*args, **kwargs) + except DeploymentError as e: + handle_error(e.message, e.details) + except KeyboardInterrupt: + console.print("\n[dim]Operation cancelled by user.[/dim]") + raise typer.Exit(130) from None + + return wrapper diff --git a/src/cli/user_commands.py b/src/cli/commands/users.py similarity index 99% rename from src/cli/user_commands.py rename to src/cli/commands/users.py index 640be34..a60e39a 100644 --- a/src/cli/user_commands.py +++ b/src/cli/commands/users.py @@ -1,13 +1,12 @@ """Keycloak user management CLI commands.""" import typer -from rich.console import Console from rich.prompt import Confirm from rich.table import Table from src.dev.keycloak_client import KeycloakClient -console = Console() +from .shared import console # Create the users subcommand app users_app = typer.Typer(help="Manage Keycloak users in development environment") diff --git a/src/cli/deploy_commands.py b/src/cli/deploy_commands.py deleted file mode 100644 index 2a76d83..0000000 --- a/src/cli/deploy_commands.py +++ /dev/null @@ -1,628 +0,0 @@ -"""Deployment CLI commands for dev, prod, and k8s environments.""" - -import subprocess -import sys -from enum import Enum -from pathlib import Path - -import typer -from rich.panel import Panel - -from .deployment import DevDeployer, HelmDeployer, ProdDeployer -from .deployment.helm_deployer.image_builder import DeploymentError -from .utils import confirm_destructive_action, console, get_project_root - -# Create the deploy command group -deploy_app = typer.Typer(help="🚀 Deployment commands for different environments") - - -class Environment(str, Enum): - """Deployment environment options.""" - - DEV = "dev" - PROD = "prod" - K8S = "k8s" - - -@deploy_app.command() -def up( - env: Environment = typer.Argument( - ..., help="Environment to deploy (dev, prod, or k8s)" - ), - force: bool = typer.Option( - False, "--force", help="Force restart even if services are running (dev only)" - ), - no_wait: bool = typer.Option( - False, "--no-wait", help="Don't wait for services to be ready" - ), - start_server: bool = typer.Option( - True, - "--start-server/--no-start-server", - help="Start FastAPI dev server after deploying services (dev only)", - ), - skip_build: bool = typer.Option( - False, "--skip-build", help="Skip building the app image (prod only)" - ), - force_recreate: bool = typer.Option( - False, - "--force-recreate", - help="Force recreate containers to pick up new secrets (prod/k8s only)", - ), - namespace: str = typer.Option( - "api-forge-prod", "--namespace", "-n", help="Kubernetes namespace (k8s only)" - ), - registry: str = typer.Option( - None, - "--registry", - "-r", - help="Container registry for remote k8s clusters (e.g., ghcr.io/myuser)", - ), - ingress: bool = typer.Option( - False, - "--ingress/--no-ingress", - help="Enable Ingress for external access (k8s only)", - ), - ingress_host: str = typer.Option( - None, - "--ingress-host", - help="Ingress hostname (k8s only, e.g., api.example.com)", - ), - ingress_tls_secret: str = typer.Option( - None, - "--ingress-tls-secret", - help="TLS secret name for HTTPS (k8s only)", - ), -) -> None: - """ - 🚀 Deploy the application to the specified environment. - - Environments: - - dev: Development environment with hot reload - - prod: Production-like Docker Compose environment - - k8s: Kubernetes cluster deployment - - For k8s deployments, the cluster type is auto-detected: - - Minikube/Kind: Images loaded directly into cluster cache - - Remote clusters: Use --registry to push images to a container registry - """ - project_root = Path(get_project_root()) - - # Display header - env_name = env.value.upper() - console.print( - Panel.fit( - f"[bold blue]Deploying {env_name} Environment[/bold blue]", - border_style="blue", - ) - ) - - # Create appropriate deployer and execute deployment - try: - deployer: DevDeployer | ProdDeployer | HelmDeployer - if env == Environment.DEV: - deployer = DevDeployer(console, project_root) - deployer.deploy(force=force, no_wait=no_wait, start_server=start_server) - - elif env == Environment.PROD: - deployer = ProdDeployer(console, project_root) - deployer.deploy( - skip_build=skip_build, no_wait=no_wait, force_recreate=force_recreate - ) - - elif env == Environment.K8S: - deployer = HelmDeployer(console, project_root) - deployer.deploy( - namespace=namespace, - no_wait=no_wait, - force_recreate=force_recreate, - registry=registry, - ingress_enabled=ingress, - ingress_host=ingress_host, - ingress_tls_secret=ingress_tls_secret, - ) - - except DeploymentError as e: - console.print(f"\n[bold red]❌ Deployment failed: {e.message}[/bold red]\n") - if e.details: - console.print(Panel(e.details, title="Details", border_style="red")) - sys.exit(1) - - -@deploy_app.command() -def down( - env: Environment = typer.Argument( - ..., help="Environment to stop (dev, prod, or k8s)" - ), - namespace: str = typer.Option( - "api-forge-prod", "--namespace", "-n", help="Kubernetes namespace (k8s only)" - ), - volumes: bool = typer.Option( - False, "--volumes", "-v", help="Remove volumes/PVCs along with deployment" - ), - yes: bool = typer.Option(False, "--yes", "-y", help="Skip confirmation prompt"), -) -> None: - """ - âšī¸ Stop services in the specified environment. - - Environments: - - dev: Stop development Docker Compose services - - prod: Stop production Docker Compose services and optionally volumes - - k8s: Delete Kubernetes deployment and optionally PVCs - """ - project_root = Path(get_project_root()) - env_name = env.value.upper() - - # Build confirmation details - if env == Environment.K8S: - details = f"This will stop all services in namespace '{namespace}'." - else: - details = f"This will stop all {env_name} Docker Compose services." - - extra_warning = None - if volumes: - extra_warning = ( - "âš ī¸ --volumes flag is set: ALL DATA WILL BE PERMANENTLY DELETED!\n" - " This includes databases, caches, and any persistent storage." - ) - - # Confirm destructive action - if not confirm_destructive_action( - action=f"Stop {env_name} environment", - details=details, - extra_warning=extra_warning, - force=yes, - ): - console.print("[dim]Operation cancelled.[/dim]") - raise typer.Exit(0) - - # Display header - console.print( - Panel.fit( - f"[bold red]Stopping {env_name} Environment[/bold red]", - border_style="red", - ) - ) - - # Create appropriate deployer and execute teardown - try: - deployer: DevDeployer | ProdDeployer | HelmDeployer - if env == Environment.DEV: - deployer = DevDeployer(console, project_root) - deployer.teardown(volumes=volumes) - - elif env == Environment.PROD: - deployer = ProdDeployer(console, project_root) - deployer.teardown(volumes=volumes) - - elif env == Environment.K8S: - deployer = HelmDeployer(console, project_root) - deployer.teardown(namespace=namespace, volumes=volumes) - - except DeploymentError as e: - console.print(f"\n[bold red]❌ Teardown failed: {e.message}[/bold red]\n") - if e.details: - console.print(Panel(e.details, title="Details", border_style="red")) - sys.exit(1) - - -@deploy_app.command() -def status( - env: Environment = typer.Argument( - ..., help="Environment to check status (dev, prod, or k8s)" - ), - namespace: str = typer.Option( - "api-forge-prod", "--namespace", "-n", help="Kubernetes namespace (k8s only)" - ), -) -> None: - """ - 📊 Show status of services in the specified environment. - - Environments: - - dev: Show development Docker Compose services status - - prod: Show production Docker Compose services status - - k8s: Show Kubernetes deployment status - """ - project_root = Path(get_project_root()) - - # Create appropriate deployer and show status - deployer: DevDeployer | ProdDeployer | HelmDeployer - if env == Environment.DEV: - deployer = DevDeployer(console, project_root) - deployer.show_status() - - elif env == Environment.PROD: - deployer = ProdDeployer(console, project_root) - deployer.show_status() - - elif env == Environment.K8S: - deployer = HelmDeployer(console, project_root) - deployer.show_status(namespace) - - -@deploy_app.command() -def rotate( - env: Environment = typer.Argument( - ..., help="Environment to rotate secrets for (prod or k8s)" - ), - redeploy: bool = typer.Option( - True, "--redeploy/--no-redeploy", help="Automatically redeploy after rotation" - ), - force: bool = typer.Option( - True, "--force/--no-force", help="Force overwrite existing secrets" - ), - backup: bool = typer.Option( - True, "--backup/--no-backup", help="Backup existing secrets before rotation" - ), - namespace: str = typer.Option( - "api-forge-prod", "--namespace", "-n", help="Kubernetes namespace (k8s only)" - ), - yes: bool = typer.Option(False, "--yes", "-y", help="Skip confirmation prompt"), -) -> None: - """ - 🔐 Rotate secrets and optionally redeploy. - - This command: - 1. Generates new cryptographically secure secrets - 2. Optionally backs up existing secrets - 3. Optionally redeploys the environment to pick up new secrets - - Environments: - - prod: Rotate Docker Compose production secrets - - k8s: Rotate Kubernetes secrets - - Example usage: - # Rotate and redeploy prod (default behavior) - uv run api-forge-cli deploy rotate prod - - # Rotate without redeploying - uv run api-forge-cli deploy rotate prod --no-redeploy - - # Rotate k8s secrets with backup - uv run api-forge-cli deploy rotate k8s --backup - """ - project_root = Path(get_project_root()) - secrets_script = project_root / "infra" / "secrets" / "generate_secrets.sh" - - if not secrets_script.exists(): - console.print( - f"[red]✗[/red] Secret generation script not found at {secrets_script}" - ) - raise typer.Exit(1) - - if env == Environment.DEV: - console.print( - "[yellow]⚠[/yellow] Secret rotation is not needed for dev environment" - ) - console.print(" Dev environment uses hardcoded test credentials") - raise typer.Exit(0) - - # Confirm destructive action - env_name = env.value.upper() - details = ( - "This will regenerate all production secrets including:\n" - " â€ĸ Database passwords\n" - " â€ĸ Session signing secrets\n" - " â€ĸ CSRF signing secrets\n" - " â€ĸ OIDC client secrets" - ) - extra_warning = ( - "âš ī¸ Existing secrets will be overwritten!\n" - " Running services will need to be restarted to use new secrets." - ) - if not backup: - extra_warning += "\n --no-backup: Old secrets will NOT be backed up!" - - if not confirm_destructive_action( - action=f"Rotate {env_name} secrets", - details=details, - extra_warning=extra_warning, - force=yes, - ): - console.print("[dim]Operation cancelled.[/dim]") - raise typer.Exit(0) - - # Display header - env_name = env.value.upper() - console.print( - Panel.fit( - f"[bold yellow]🔐 Rotating {env_name} Secrets[/bold yellow]", - border_style="yellow", - ) - ) - - # Step 1: Backup existing secrets (if requested) - if backup: - console.print("\n[bold]Step 1/3:[/bold] Backing up existing secrets...") - backup_cmd = [str(secrets_script), "--backup-only"] - try: - result = subprocess.run( - backup_cmd, - cwd=secrets_script.parent, - capture_output=True, - text=True, - check=True, - ) - console.print("[green]✓[/green] Backup complete") - if result.stdout: - console.print(result.stdout) - except subprocess.CalledProcessError as e: - console.print( - f"[yellow]⚠[/yellow] Backup failed (continuing anyway): {e.stderr}" - ) - - # Step 2: Generate new secrets - console.print( - f"\n[bold]Step {'2/3' if backup else '1/2'}:[/bold] Generating new secrets..." - ) - generate_cmd = [str(secrets_script)] - if force: - generate_cmd.append("--force") - - try: - subprocess.run( - generate_cmd, - cwd=secrets_script.parent, - capture_output=False, # Show output in real-time - text=True, - check=True, - ) - console.print("[green]✓[/green] New secrets generated") - except subprocess.CalledProcessError as e: - console.print(f"[red]✗[/red] Secret generation failed: {e}") - raise typer.Exit(1) from e - - # Step 3: Redeploy (if requested) - if redeploy: - console.print( - f"\n[bold]Step {'3/3' if backup else '2/2'}:[/bold] Redeploying with new secrets..." - ) - - deployer: DevDeployer | ProdDeployer | HelmDeployer - if env == Environment.PROD: - deployer = ProdDeployer(console, project_root) - deployer.deploy(skip_build=False, no_wait=False, force_recreate=True) - - elif env == Environment.K8S: - deployer = HelmDeployer(console, project_root) - deployer.deploy(namespace=namespace, no_wait=False, force_recreate=True) - - console.print( - "\n[bold green]🎉 Secret rotation and redeployment complete![/bold green]" - ) - else: - console.print( - "\n[bold yellow]⚠[/bold yellow] Secrets rotated but not deployed." - ) - console.print(" Run the following command to deploy with new secrets:") - if env == Environment.PROD: - console.print( - " [cyan]uv run api-forge-cli deploy up prod --force-recreate[/cyan]" - ) - elif env == Environment.K8S: - console.print( - f" [cyan]uv run api-forge-cli deploy up k8s --force-recreate -n {namespace}[/cyan]" - ) - - -@deploy_app.command() -def rollback( - revision: int = typer.Argument( - None, help="Revision number to rollback to (default: previous revision)" - ), - namespace: str = typer.Option( - "api-forge-prod", "--namespace", "-n", help="Kubernetes namespace" - ), - yes: bool = typer.Option(False, "--yes", "-y", help="Skip confirmation prompt"), -) -> None: - """ - âĒ Rollback Kubernetes deployment to a previous revision. - - This command uses Helm's native rollback functionality to restore - the deployment to a previous working state. - - Examples: - # Rollback to the previous revision - uv run api-forge-cli deploy rollback - - # Rollback to a specific revision - uv run api-forge-cli deploy rollback 3 - - # View revision history first - uv run api-forge-cli deploy history - """ - from rich.table import Table - - from .deployment import HelmDeployer - - project_root = Path(get_project_root()) - deployer = HelmDeployer(console, project_root) - - # Get release history - history = deployer.commands.helm.history( - deployer.constants.HELM_RELEASE_NAME, namespace - ) - - if not history: - console.print( - f"[red]No release history found for '{deployer.constants.HELM_RELEASE_NAME}' " - f"in namespace '{namespace}'[/red]" - ) - console.print("\n[dim]Make sure the release exists and you have access.[/dim]") - raise typer.Exit(1) - - # Show current state - current = history[-1] - current_revision = int(current.get("revision", 0)) - - if current_revision <= 1: - console.print( - "[yellow]⚠ Only one revision exists. Nothing to rollback to.[/yellow]" - ) - raise typer.Exit(0) - - # Determine target revision - target_revision = revision if revision is not None else current_revision - 1 - - if target_revision < 1 or target_revision >= current_revision: - console.print( - f"[red]Invalid revision {target_revision}. " - f"Must be between 1 and {current_revision - 1}.[/red]" - ) - raise typer.Exit(1) - - # Find target revision info - target_info = next( - (h for h in history if int(h.get("revision", 0)) == target_revision), None - ) - - # Show rollback plan - console.print("\n[bold cyan]📋 Rollback Plan[/bold cyan]\n") - - table = Table(show_header=True, header_style="bold") - table.add_column("", style="dim") - table.add_column("Revision") - table.add_column("Status") - table.add_column("Description") - - table.add_row( - "Current", - str(current_revision), - current.get("status", "unknown"), - current.get("description", "")[:50], - ) - - if target_info: - table.add_row( - "Target", - str(target_revision), - target_info.get("status", "unknown"), - target_info.get("description", "")[:50], - ) - - console.print(table) - - # Confirm - if not confirm_destructive_action( - action=f"Rollback to revision {target_revision}", - details=f"This will restore the deployment in namespace '{namespace}' to revision {target_revision}.", - extra_warning="Active pods will be replaced with the previous configuration.", - force=yes, - ): - console.print("[dim]Rollback cancelled.[/dim]") - raise typer.Exit(0) - - # Perform rollback - console.print( - Panel.fit( - f"[bold yellow]âĒ Rolling back to revision {target_revision}[/bold yellow]", - border_style="yellow", - ) - ) - - result = deployer.commands.helm.rollback( - deployer.constants.HELM_RELEASE_NAME, - namespace, - target_revision, - wait=True, - timeout="5m", - ) - - if result.success: - console.print( - f"\n[bold green]✅ Successfully rolled back to revision {target_revision}![/bold green]" - ) - console.print( - "\n[dim]Run 'uv run api-forge-cli deploy status k8s' to verify.[/dim]" - ) - else: - console.print("\n[bold red]❌ Rollback failed[/bold red]") - if result.stderr: - console.print(Panel(result.stderr, title="Error", border_style="red")) - raise typer.Exit(1) - - -@deploy_app.command() -def history( - namespace: str = typer.Option( - "api-forge-prod", "--namespace", "-n", help="Kubernetes namespace" - ), - max_revisions: int = typer.Option( - 10, "--max", "-m", help="Maximum number of revisions to show" - ), -) -> None: - """ - 📜 Show Kubernetes deployment revision history. - - Displays the Helm release history including revision numbers, - timestamps, status, and descriptions. Use this to identify - which revision to rollback to. - - Examples: - # Show last 10 revisions - uv run api-forge-cli deploy history - - # Show last 5 revisions - uv run api-forge-cli deploy history --max 5 - """ - from rich.table import Table - - from .deployment import HelmDeployer - - project_root = Path(get_project_root()) - deployer = HelmDeployer(console, project_root) - - # Get release history - history_data = deployer.commands.helm.history( - deployer.constants.HELM_RELEASE_NAME, namespace, max_revisions - ) - - if not history_data: - console.print( - f"[yellow]No release history found for '{deployer.constants.HELM_RELEASE_NAME}' " - f"in namespace '{namespace}'[/yellow]" - ) - console.print( - "\n[dim]Deploy first with: uv run api-forge-cli deploy up k8s[/dim]" - ) - return - - console.print( - Panel.fit( - f"[bold cyan]📜 Release History: {deployer.constants.HELM_RELEASE_NAME}[/bold cyan]", - border_style="cyan", - ) - ) - - table = Table(show_header=True, header_style="bold") - table.add_column("Revision", justify="right") - table.add_column("Updated") - table.add_column("Status") - table.add_column("Chart") - table.add_column("Description") - - for entry in history_data: - revision = entry.get("revision", "") - updated = entry.get("updated", "")[:19] # Trim timezone - status = entry.get("status", "") - chart = entry.get("chart", "") - description = entry.get("description", "")[:40] - - # Color status - if status == "deployed": - status_display = f"[green]{status}[/green]" - elif status in ("failed", "superseded"): - status_display = f"[red]{status}[/red]" - elif status == "pending-upgrade": - status_display = f"[yellow]{status}[/yellow]" - else: - status_display = status - - table.add_row(str(revision), updated, status_display, chart, description) - - console.print(table) - - # Show rollback hint - if len(history_data) > 1: - console.print( - "\n[dim]To rollback: uv run api-forge-cli deploy rollback [/dim]" - ) diff --git a/src/cli/deployment/helm_deployer/cleanup.py b/src/cli/deployment/helm_deployer/cleanup.py index 098d405..f4983f9 100644 --- a/src/cli/deployment/helm_deployer/cleanup.py +++ b/src/cli/deployment/helm_deployer/cleanup.py @@ -8,7 +8,9 @@ from typing import TYPE_CHECKING -from ..shell_commands import ReplicaSetInfo, calculate_replicaset_age_hours +from src.infra.k8s.controller import ReplicaSetInfo + +from ..shell_commands import calculate_replicaset_age_hours from .constants import DeploymentConstants if TYPE_CHECKING: diff --git a/src/cli/deployment/helm_deployer/deployer.py b/src/cli/deployment/helm_deployer/deployer.py index c35f574..39da86a 100644 --- a/src/cli/deployment/helm_deployer/deployer.py +++ b/src/cli/deployment/helm_deployer/deployer.py @@ -167,6 +167,8 @@ def deploy( ingress_enabled: bool = False, ingress_host: str | None = None, ingress_tls_secret: str | None = None, + ingress_tls_auto: bool = False, + ingress_tls_staging: bool = False, **kwargs: Any, ) -> None: """Deploy to Kubernetes cluster. @@ -187,7 +189,9 @@ def deploy( registry: Container registry for remote clusters ingress_enabled: Whether to enable Ingress for external access ingress_host: Hostname for Ingress (e.g., api.example.com) - ingress_tls_secret: TLS secret name for HTTPS + ingress_tls_secret: TLS secret name for HTTPS (manual) + ingress_tls_auto: Auto-provision TLS via cert-manager + ingress_tls_staging: Use staging Let's Encrypt (with ingress_tls_auto) **kwargs: Reserved for future options """ if not self.check_env_file(): @@ -255,6 +259,8 @@ def deploy( ingress_enabled=ingress_enabled, ingress_host=ingress_host, ingress_tls_secret=ingress_tls_secret, + ingress_tls_auto=ingress_tls_auto, + ingress_tls_staging=ingress_tls_staging, ) self.helm_release.deploy_release(namespace, image_override_file) diff --git a/src/cli/deployment/helm_deployer/helm_release.py b/src/cli/deployment/helm_deployer/helm_release.py index 5a8e9b3..e99f3e3 100644 --- a/src/cli/deployment/helm_deployer/helm_release.py +++ b/src/cli/deployment/helm_deployer/helm_release.py @@ -58,6 +58,8 @@ def create_image_override_file( ingress_enabled: bool = False, ingress_host: str | None = None, ingress_tls_secret: str | None = None, + ingress_tls_auto: bool = False, + ingress_tls_staging: bool = False, ) -> Path: """Create a temporary values file to override image tags and ingress. @@ -66,7 +68,9 @@ def create_image_override_file( registry: Optional container registry prefix for remote clusters ingress_enabled: Whether to enable Ingress for external access ingress_host: Hostname for Ingress (e.g., api.example.com) - ingress_tls_secret: TLS secret name for HTTPS + ingress_tls_secret: TLS secret name for HTTPS (manual certificate) + ingress_tls_auto: Auto-provision TLS via cert-manager + ingress_tls_staging: Use staging Let's Encrypt (with ingress_tls_auto) Returns: Path to the temporary override file @@ -102,17 +106,34 @@ def create_image_override_file( {"host": host, "paths": [{"path": "/", "pathType": "Prefix"}]} ] - # Add TLS configuration if secret is provided - if ingress_tls_secret: + tls_info = "" + + # Handle automatic TLS via cert-manager + if ingress_tls_auto: + issuer_name = ( + "letsencrypt-staging" if ingress_tls_staging else "letsencrypt-prod" + ) + # Add cert-manager annotation + ingress_config["annotations"] = { + "cert-manager.io/cluster-issuer": issuer_name + } + # Generate secret name from hostname (sanitize for K8s naming) + auto_secret_name = host.replace(".", "-") + "-tls" + ingress_config["tls"] = [ + {"secretName": auto_secret_name, "hosts": [host]} + ] + tls_info = f" (TLS: auto via {issuer_name})" + # Add TLS configuration if manual secret is provided + elif ingress_tls_secret: ingress_config["tls"] = [ {"secretName": ingress_tls_secret, "hosts": [host]} ] + tls_info = f" (TLS: {ingress_tls_secret})" override_values["app"]["ingress"] = ingress_config self.console.print( - f"[bold cyan]🌐 Ingress enabled:[/bold cyan] {host}" - + (f" (TLS: {ingress_tls_secret})" if ingress_tls_secret else "") + f"[bold cyan]🌐 Ingress enabled:[/bold cyan] {host}{tls_info}" ) temp_file = Path(tempfile.mktemp(suffix=".yaml", prefix="helm-image-override-")) diff --git a/src/cli/deployment/helm_deployer/validator.py b/src/cli/deployment/helm_deployer/validator.py index d9040f8..d46c55e 100644 --- a/src/cli/deployment/helm_deployer/validator.py +++ b/src/cli/deployment/helm_deployer/validator.py @@ -16,6 +16,8 @@ from enum import Enum from typing import TYPE_CHECKING +from src.infra.k8s.controller import PodInfo + if TYPE_CHECKING: from rich.console import Console @@ -302,8 +304,8 @@ def _check_failed_jobs(self, namespace: str, result: ValidationResult) -> None: jobs = self.commands.kubectl.get_jobs(namespace) for job in jobs: - job_name = job["name"] - job_status = job.get("status") + job_name = job.name + job_status = job.status # If job succeeded, it's fine - ignore any previous failures if job_status == "Complete": @@ -337,9 +339,9 @@ def _check_crashloop_pods(self, namespace: str, result: ValidationResult) -> Non pods = self.commands.kubectl.get_pods(namespace) for pod in pods: - if pod.get("status") == "CrashLoopBackOff": - pod_name = str(pod["name"]) - restarts = pod.get("restarts", 0) + if pod.status == "CrashLoopBackOff": + pod_name = pod.name + restarts = pod.restarts result.issues.append( ValidationIssue( severity=ValidationSeverity.ERROR, @@ -362,8 +364,8 @@ def _check_pending_pods(self, namespace: str, result: ValidationResult) -> None: pods = self.commands.kubectl.get_pods(namespace) for pod in pods: - if pod.get("status") == "Pending": - pod_name = str(pod["name"]) + if pod.status == "Pending": + pod_name = pod.name # Check if it's been pending for a while (ignore recently created) # For now, treat all Pending as warnings result.issues.append( @@ -392,11 +394,11 @@ def _check_error_pods(self, namespace: str, result: ValidationResult) -> None: pods = self.commands.kubectl.get_pods(namespace) # Group job-owned pods by their job name - job_pods: dict[str, list[dict[str, str | int]]] = {} - non_job_pods: list[dict[str, str | int]] = [] + job_pods: dict[str, list[PodInfo]] = {} + non_job_pods: list[PodInfo] = [] for pod in pods: - job_owner = str(pod.get("jobOwner", "")) + job_owner = pod.job_owner if job_owner: if job_owner not in job_pods: job_pods[job_owner] = [] @@ -406,8 +408,8 @@ def _check_error_pods(self, namespace: str, result: ValidationResult) -> None: # Check non-job pods for errors (these are always relevant) for pod in non_job_pods: - if pod.get("status") == "Error": - pod_name = str(pod["name"]) + if pod.status == "Error": + pod_name = pod.name result.issues.append( ValidationIssue( severity=ValidationSeverity.ERROR, @@ -431,7 +433,7 @@ def _check_error_pods(self, namespace: str, result: ValidationResult) -> None: # ISO 8601 timestamps sort correctly as strings sorted_pods = sorted( pods_list, - key=lambda p: str(p.get("creationTimestamp", "")), + key=lambda p: p.creation_timestamp, reverse=True, ) @@ -439,12 +441,12 @@ def _check_error_pods(self, namespace: str, result: ValidationResult) -> None: continue most_recent_pod = sorted_pods[0] - pod_status = most_recent_pod.get("status") + pod_status = most_recent_pod.status # Only flag if the most recent pod is in Error state # Completed/Succeeded pods are fine, older failed pods are irrelevant if pod_status == "Error": - pod_name = str(most_recent_pod["name"]) + pod_name = most_recent_pod.name result.issues.append( ValidationIssue( severity=ValidationSeverity.WARNING, diff --git a/src/cli/deployment/shell_commands/kubectl.py b/src/cli/deployment/shell_commands/kubectl.py index 575e3d1..71a8070 100644 --- a/src/cli/deployment/shell_commands/kubectl.py +++ b/src/cli/deployment/shell_commands/kubectl.py @@ -1,16 +1,23 @@ """Kubectl command abstractions. This module provides commands for Kubernetes resource management via kubectl, -organized into logical groups for namespaces, deployments, ReplicaSets, and pods. +delegating to Kr8sController for the actual operations. + +This is a sync wrapper around the async Kr8sController for backward +compatibility with existing code. """ from __future__ import annotations -import json -from datetime import datetime from typing import TYPE_CHECKING -from .types import CommandResult, ReplicaSetInfo +from src.infra.k8s import Kr8sController, run_sync +from src.infra.k8s.controller import ( + CommandResult, + JobInfo, + PodInfo, + ReplicaSetInfo, +) if TYPE_CHECKING: from .runner import CommandRunner @@ -19,6 +26,10 @@ class KubectlCommands: """Kubectl-related shell commands. + This is a sync wrapper around Kr8sController that provides backward + compatibility with existing code. All methods delegate to the async + controller using run_sync(). + Provides operations for: - Cluster context detection - Namespace management @@ -32,58 +43,32 @@ def __init__(self, runner: CommandRunner) -> None: """Initialize kubectl commands. Args: - runner: Command runner for executing shell commands + runner: Command runner (kept for interface compatibility, not used) """ + # Keep runner reference for interface compatibility self._runner = runner + # Delegate to the async controller + self._controller = Kr8sController() # ========================================================================= # Cluster Context # ========================================================================= def is_minikube_context(self) -> bool: - """Check if the current kubectl context is Minikube. - - Returns: - True if current context is minikube, False otherwise - """ - result = self._runner.run( - ["kubectl", "config", "current-context"], - capture_output=True, - ) - if not result.success: - return False - return "minikube" in result.stdout.strip().lower() + """Check if the current kubectl context is Minikube.""" + return run_sync(self._controller.is_minikube_context()) def get_current_context(self) -> str: - """Get the current kubectl context name. - - Returns: - Context name, or "unknown" if detection fails - """ - result = self._runner.run( - ["kubectl", "config", "current-context"], - capture_output=True, - ) - return result.stdout.strip() if result.success else "unknown" + """Get the current kubectl context name.""" + return run_sync(self._controller.get_current_context()) # ========================================================================= # Namespace Management # ========================================================================= def namespace_exists(self, namespace: str) -> bool: - """Check if a namespace exists. - - Args: - namespace: Namespace to check - - Returns: - True if the namespace exists, False otherwise - """ - result = self._runner.run( - ["kubectl", "get", "namespace", namespace], - capture_output=True, - ) - return result.success + """Check if a namespace exists.""" + return run_sync(self._controller.namespace_exists(namespace)) def delete_namespace( self, @@ -92,38 +77,14 @@ def delete_namespace( wait: bool = True, timeout: str = "120s", ) -> CommandResult: - """Delete a Kubernetes namespace and all its resources. - - Warning: This is a destructive operation that deletes all resources - in the namespace. - - Args: - namespace: Namespace to delete - wait: Whether to wait for deletion to complete - timeout: Maximum time to wait - - Returns: - CommandResult with deletion status - """ - cmd = ["kubectl", "delete", "namespace", namespace] - if wait: - cmd.append("--wait=true") - cmd.extend(["--timeout", timeout]) - return self._runner.run(cmd) + """Delete a Kubernetes namespace and all its resources.""" + return run_sync( + self._controller.delete_namespace(namespace, wait=wait, timeout=timeout) + ) def delete_pvcs(self, namespace: str) -> CommandResult: - """Delete all PersistentVolumeClaims in a namespace. - - Args: - namespace: Kubernetes namespace - - Returns: - CommandResult with deletion status - """ - return self._runner.run( - ["kubectl", "delete", "pvc", "--all", "-n", namespace], - capture_output=True, - ) + """Delete all PersistentVolumeClaims in a namespace.""" + return run_sync(self._controller.delete_pvcs(namespace)) # ========================================================================= # Resource Deletion @@ -137,138 +98,36 @@ def delete_resources_by_label( *, force: bool = False, ) -> CommandResult: - """Delete Kubernetes resources matching a label selector. - - Args: - resource_types: Comma-separated resource types - (e.g., "all,configmap,secret") - namespace: Kubernetes namespace - label_selector: Label selector - (e.g., "app.kubernetes.io/instance=my-app") - force: Whether to force delete (bypass graceful deletion) - - Returns: - CommandResult with deletion status - """ - cmd = [ - "kubectl", - "delete", - resource_types, - "-n", - namespace, - "-l", - label_selector, - ] - if force: - cmd.extend(["--force", "--grace-period=0"]) - return self._runner.run(cmd) + """Delete Kubernetes resources matching a label selector.""" + return run_sync( + self._controller.delete_resources_by_label( + resource_types, namespace, label_selector, force=force + ) + ) def delete_helm_secrets( self, namespace: str, release_name: str, ) -> CommandResult: - """Delete Helm release metadata secrets. - - This is useful for cleaning up stuck Helm releases that can't - be uninstalled normally. - - Args: - namespace: Kubernetes namespace - release_name: Helm release name - - Returns: - CommandResult with deletion status - """ - return self._runner.run( - [ - "kubectl", - "delete", - "secret", - "-n", - namespace, - "-l", - f"name={release_name},owner=helm", - ] - ) + """Delete Helm release metadata secrets.""" + return run_sync(self._controller.delete_helm_secrets(namespace, release_name)) # ========================================================================= # ReplicaSet Operations # ========================================================================= def get_replicasets(self, namespace: str) -> list[ReplicaSetInfo]: - """Get all ReplicaSets in a namespace. - - Args: - namespace: Kubernetes namespace - - Returns: - List of ReplicaSetInfo objects with parsed metadata - """ - result = self._runner.run( - ["kubectl", "get", "replicasets", "-n", namespace, "-o", "json"] - ) - if not result.success or not result.stdout: - return [] - - try: - data = json.loads(result.stdout) - replicasets = [] - - for rs in data.get("items", []): - metadata = rs.get("metadata", {}) - spec = rs.get("spec", {}) - annotations = metadata.get("annotations", {}) - owner_refs = metadata.get("ownerReferences", []) - - # Parse creation timestamp - created_at = None - if creation_ts := metadata.get("creationTimestamp"): - try: - created_at = datetime.fromisoformat( - creation_ts.replace("Z", "+00:00") - ) - except ValueError: - pass - - # Get owner deployment name - owner_deployment = None - if owner_refs: - owner_deployment = owner_refs[0].get("name") - - replicasets.append( - ReplicaSetInfo( - name=metadata.get("name", ""), - replicas=spec.get("replicas", 0), - revision=annotations.get( - "deployment.kubernetes.io/revision", "" - ), - created_at=created_at, - owner_deployment=owner_deployment, - ) - ) - - return replicasets - except json.JSONDecodeError: - return [] + """Get all ReplicaSets in a namespace.""" + return run_sync(self._controller.get_replicasets(namespace)) def delete_replicaset( self, name: str, namespace: str, ) -> CommandResult: - """Delete a specific ReplicaSet. - - Args: - name: ReplicaSet name - namespace: Kubernetes namespace - - Returns: - CommandResult with deletion status - """ - return self._runner.run( - ["kubectl", "delete", "replicaset", name, "-n", namespace] - ) + """Delete a specific ReplicaSet.""" + return run_sync(self._controller.delete_replicaset(name, namespace)) def scale_replicaset( self, @@ -276,55 +135,16 @@ def scale_replicaset( namespace: str, replicas: int, ) -> CommandResult: - """Scale a ReplicaSet to a specific number of replicas. - - Args: - name: ReplicaSet name - namespace: Kubernetes namespace - replicas: Desired number of replicas - - Returns: - CommandResult with scale status - """ - return self._runner.run( - [ - "kubectl", - "scale", - "replicaset", - name, - f"--replicas={replicas}", - "-n", - namespace, - ] - ) + """Scale a ReplicaSet to a specific number of replicas.""" + return run_sync(self._controller.scale_replicaset(name, namespace, replicas)) # ========================================================================= # Deployment Operations # ========================================================================= def get_deployments(self, namespace: str) -> list[str]: - """Get list of deployment names in a namespace. - - Args: - namespace: Kubernetes namespace - - Returns: - List of deployment names - """ - result = self._runner.run( - [ - "kubectl", - "get", - "deployments", - "-n", - namespace, - "-o", - "jsonpath={.items[*].metadata.name}", - ] - ) - if not result.success or not result.stdout: - return [] - return result.stdout.strip().split() + """Get list of deployment names in a namespace.""" + return run_sync(self._controller.get_deployments(namespace)) def rollout_restart( self, @@ -332,35 +152,10 @@ def rollout_restart( namespace: str, name: str | None = None, ) -> CommandResult: - """Trigger a rolling restart of a deployment/daemonset/statefulset. - - Args: - resource_type: Resource type ("deployment", "daemonset", "statefulset") - namespace: Kubernetes namespace - name: Specific resource name, or None to restart all of that type - - Returns: - CommandResult with restart status - - Example: - >>> # Restart all deployments - >>> kubectl.rollout_restart("deployment", "production") - >>> # Restart specific deployment - >>> kubectl.rollout_restart("deployment", "production", "api-server") - """ - if name: - cmd = [ - "kubectl", - "rollout", - "restart", - resource_type, - name, - "-n", - namespace, - ] - else: - cmd = ["kubectl", "rollout", "restart", resource_type, "-n", namespace] - return self._runner.run(cmd, capture_output=True) + """Trigger a rolling restart of a deployment/daemonset/statefulset.""" + return run_sync( + self._controller.rollout_restart(resource_type, namespace, name) + ) def rollout_status( self, @@ -370,75 +165,20 @@ def rollout_status( *, timeout: str = "300s", ) -> CommandResult: - """Wait for a rollout to complete. - - Blocks until the rollout finishes (all pods are ready) or times out. - - Args: - resource_type: Resource type ("deployment", "daemonset", "statefulset") - namespace: Kubernetes namespace - name: Specific resource name, or None to wait for all of that type - timeout: Maximum time to wait for rollout to complete - - Returns: - CommandResult with rollout status - - Example: - >>> # Wait for all deployments to be ready - >>> kubectl.rollout_status("deployment", "production") - >>> # Wait for specific deployment - >>> kubectl.rollout_status("deployment", "production", "api-server") - """ - if name: - cmd = [ - "kubectl", - "rollout", - "status", - resource_type, - name, - "-n", - namespace, - f"--timeout={timeout}", - ] - else: - cmd = [ - "kubectl", - "rollout", - "status", - resource_type, - "-n", - namespace, - f"--timeout={timeout}", - ] - return self._runner.run(cmd, capture_output=False) + """Wait for a rollout to complete.""" + return run_sync( + self._controller.rollout_status( + resource_type, namespace, name, timeout=timeout + ) + ) def get_deployment_revision( self, name: str, namespace: str, ) -> str | None: - """Get the current revision number of a deployment. - - Args: - name: Deployment name - namespace: Kubernetes namespace - - Returns: - Revision number as string, or None if not found - """ - result = self._runner.run( - [ - "kubectl", - "get", - "deployment", - name, - "-n", - namespace, - "-o", - "jsonpath={.metadata.annotations.deployment\\.kubernetes\\.io/revision}", - ] - ) - return result.stdout.strip() if result.success and result.stdout else None + """Get the current revision number of a deployment.""" + return run_sync(self._controller.get_deployment_revision(name, namespace)) # ========================================================================= # Pod Operations @@ -452,153 +192,29 @@ def wait_for_pods( condition: str = "ready", timeout: str = "300s", ) -> CommandResult: - """Wait for pods matching a selector to reach a condition. - - Args: - namespace: Kubernetes namespace - label_selector: Label selector for pods - condition: Condition to wait for (e.g., "ready", "delete") - timeout: Maximum time to wait - - Returns: - CommandResult with wait status - - Example: - >>> kubectl.wait_for_pods( - ... "production", - ... "app.kubernetes.io/component=application", - ... timeout="120s", - ... ) - """ - return self._runner.run( - [ - "kubectl", - "wait", - "--for", - f"condition={condition}", - "pod", - "-l", - label_selector, - "-n", - namespace, - f"--timeout={timeout}", - ], - capture_output=False, + """Wait for pods matching a selector to reach a condition.""" + return run_sync( + self._controller.wait_for_pods( + namespace, label_selector, condition=condition, timeout=timeout + ) ) - def get_pods(self, namespace: str) -> list[dict[str, str | int]]: + def get_pods(self, namespace: str) -> list[PodInfo]: """Get all pods in a namespace with their status. - Args: - namespace: Kubernetes namespace - - Returns: - List of dicts with pod name, status, restarts, creation timestamp, - and job owner (if pod is owned by a Job) + Note: Return type changed from list[dict] to list[PodInfo]. + Access fields as attributes: pod.name, pod.status, etc. """ - result = self._runner.run( - ["kubectl", "get", "pods", "-n", namespace, "-o", "json"], - capture_output=True, - ) - if not result.success or not result.stdout: - return [] - - try: - data = json.loads(result.stdout) - pods = [] - - for pod in data.get("items", []): - metadata = pod.get("metadata", {}) - name = metadata.get("name", "") - creation_timestamp = metadata.get("creationTimestamp", "") - status = pod.get("status", {}) - - # Check if pod is owned by a Job - job_owner = "" - for owner_ref in metadata.get("ownerReferences", []): - if owner_ref.get("kind") == "Job": - job_owner = owner_ref.get("name", "") - break - - # Determine pod status - phase = status.get("phase", "Unknown") - container_statuses = status.get("containerStatuses", []) - - # Check for specific states - pod_status = phase - restarts = 0 - - for cs in container_statuses: - restarts += cs.get("restartCount", 0) - state = cs.get("state", {}) - if "waiting" in state: - reason = state["waiting"].get("reason", "") - if reason: - pod_status = reason # e.g., CrashLoopBackOff - elif "terminated" in state: - reason = state["terminated"].get("reason", "") - if reason == "Error": - pod_status = "Error" - - pods.append( - { - "name": name, - "status": pod_status, - "restarts": restarts, - "creationTimestamp": creation_timestamp, - "jobOwner": job_owner, - } - ) - - return pods - except json.JSONDecodeError: - return [] + return run_sync(self._controller.get_pods(namespace)) # ========================================================================= # Job Operations # ========================================================================= - def get_jobs(self, namespace: str) -> list[dict[str, str]]: + def get_jobs(self, namespace: str) -> list[JobInfo]: """Get all jobs in a namespace with their status. - Args: - namespace: Kubernetes namespace - - Returns: - List of dicts with job name and status (Running/Complete/Failed) + Note: Return type changed from list[dict] to list[JobInfo]. + Access fields as attributes: job.name, job.status """ - result = self._runner.run( - ["kubectl", "get", "jobs", "-n", namespace, "-o", "json"], - capture_output=True, - ) - if not result.success or not result.stdout: - return [] - - try: - data = json.loads(result.stdout) - jobs = [] - - for job in data.get("items", []): - name = job.get("metadata", {}).get("name", "") - status = job.get("status", {}) - - # Determine job status - if status.get("succeeded", 0) > 0: - job_status = "Complete" - elif status.get("failed", 0) > 0: - job_status = "Failed" - elif status.get("active", 0) > 0: - job_status = "Running" - else: - job_status = "Unknown" - - jobs.append( - { - "name": name, - "status": job_status, - } - ) - - return jobs - except json.JSONDecodeError: - return [] + return run_sync(self._controller.get_jobs(namespace)) diff --git a/src/cli/deployment/shell_commands/types.py b/src/cli/deployment/shell_commands/types.py index 03391bb..1f7a5f3 100644 --- a/src/cli/deployment/shell_commands/types.py +++ b/src/cli/deployment/shell_commands/types.py @@ -2,6 +2,9 @@ This module contains all dataclasses and type definitions used across the shell command modules. + +Note: CommandResult and ReplicaSetInfo are re-exported from src.infra.k8s.controller +for backward compatibility. New code should import directly from there. """ from __future__ import annotations @@ -9,22 +12,16 @@ from dataclasses import dataclass from datetime import UTC, datetime +# Re-export Kubernetes types from canonical location +from src.infra.k8s.controller import CommandResult, ReplicaSetInfo -@dataclass -class CommandResult: - """Result of a shell command execution. - - Attributes: - success: Whether the command completed successfully (exit code 0) - stdout: Standard output from the command - stderr: Standard error from the command - returncode: The exit code of the command - """ - - success: bool - stdout: str - stderr: str - returncode: int +__all__ = [ + "CommandResult", + "ReplicaSetInfo", + "HelmRelease", + "GitStatus", + "calculate_replicaset_age_hours", +] @dataclass @@ -44,25 +41,6 @@ class HelmRelease: revision: str -@dataclass -class ReplicaSetInfo: - """Information about a Kubernetes ReplicaSet. - - Attributes: - name: ReplicaSet name - replicas: Desired replica count - revision: Deployment revision annotation - created_at: Creation timestamp - owner_deployment: Name of the owning Deployment (if any) - """ - - name: str - replicas: int - revision: str - created_at: datetime | None - owner_deployment: str | None - - @dataclass class GitStatus: """Git repository status information. diff --git a/src/cli/deployment/status_display.py b/src/cli/deployment/status_display.py index aaeb2f9..008de23 100644 --- a/src/cli/deployment/status_display.py +++ b/src/cli/deployment/status_display.py @@ -1,7 +1,6 @@ """Status display utilities for deployment environments.""" import os -import subprocess import requests # type: ignore from dotenv.main import load_dotenv @@ -15,10 +14,15 @@ check_redis_status, check_temporal_status, ) +from src.infra.k8s import Kr8sController, run_sync +from src.infra.k8s.controller import PodInfo, ServiceInfo from .health_checks import HealthChecker from .service_config import get_production_services, is_temporal_enabled +# Module-level controller singleton +_controller = Kr8sController() + class StatusDisplay: """Utility class for displaying deployment status.""" @@ -88,34 +92,27 @@ def show_k8s_status(self, namespace: str = "api-forge-prod") -> None: Args: namespace: Kubernetes namespace to check """ - self.console.print( - Panel.fit( - "[bold magenta]Kubernetes Deployment Status[/bold magenta]", - border_style="magenta", + # Note: Header is printed by the calling command, don't duplicate + + # Get and format pod status + pods = run_sync(_controller.get_pods(namespace)) + self.console.print("\n[bold cyan]Pods:[/bold cyan]") + if pods: + pods_output = self._format_pods_table(pods) + self.console.print(pods_output) + else: + self.console.print(f" [dim]No pods found in namespace {namespace}[/dim]") + + # Get and format service status + services = run_sync(_controller.get_services(namespace)) + self.console.print("\n[bold cyan]Services:[/bold cyan]") + if services: + services_output = self._format_services_table(services) + self.console.print(services_output) + else: + self.console.print( + f" [dim]No services found in namespace {namespace}[/dim]" ) - ) - - # Get pod status - result = subprocess.run( - ["kubectl", "get", "pods", "-n", namespace, "-o", "wide"], - capture_output=True, - text=True, - ) - - if result.returncode == 0: - self.console.print("\n[bold cyan]Pods:[/bold cyan]") - self.console.print(result.stdout) - - # Get service status - result = subprocess.run( - ["kubectl", "get", "svc", "-n", namespace], - capture_output=True, - text=True, - ) - - if result.returncode == 0: - self.console.print("\n[bold cyan]Services:[/bold cyan]") - self.console.print(result.stdout) self._show_k8s_access_instructions(namespace) @@ -265,3 +262,55 @@ def _show_k8s_access_instructions(self, namespace: str) -> None: self.console.print( f" └─ View logs: kubectl logs -n {namespace} -l app.kubernetes.io/name=app -f" ) + + def _format_pods_table(self, pods: list[PodInfo]) -> str: + """Format pods data into a kubectl-like table string. + + Args: + pods: List of PodInfo objects + + Returns: + Formatted table string similar to kubectl get pods -o wide + """ + if not pods: + return "" + + # Header row + header = f"{'NAME':<40} {'READY':<8} {'STATUS':<16} {'RESTARTS':<8} {'AGE':<8} {'IP':<15} {'NODE':<20}" + rows = [header] + + for pod in pods: + # Determine ready status (simplified) + ready = "1/1" if pod.status in ["Running", "Succeeded"] else "0/1" + + # Format age (simplified - just show timestamp for now) + age = pod.creation_timestamp[:10] if pod.creation_timestamp else "" + + row = f"{pod.name:<40} {ready:<8} {pod.status:<16} {pod.restarts:<8} {age:<8} {pod.ip:<15} {pod.node:<20}" + rows.append(row) + + return "\n".join(rows) + + def _format_services_table(self, services: list[ServiceInfo]) -> str: + """Format services data into a kubectl-like table string. + + Args: + services: List of ServiceInfo objects + + Returns: + Formatted table string similar to kubectl get svc + """ + if not services: + return "" + + # Header row + header = f"{'NAME':<30} {'TYPE':<15} {'CLUSTER-IP':<15} {'EXTERNAL-IP':<15} {'PORT(S)':<20}" + rows = [header] + + for svc in services: + external_ip = svc.external_ip if svc.external_ip else "" + + row = f"{svc.name:<30} {svc.type:<15} {svc.cluster_ip:<15} {external_ip:<15} {svc.ports:<20}" + rows.append(row) + + return "\n".join(rows) diff --git a/src/cli/utils.py b/src/cli/utils.py deleted file mode 100644 index b6d0abb..0000000 --- a/src/cli/utils.py +++ /dev/null @@ -1,109 +0,0 @@ -"""Shared utilities for CLI commands.""" - -import subprocess -from pathlib import Path -from typing import Any - -import typer -from rich.console import Console -from rich.panel import Panel - -# Initialize Rich console for colored output -console = Console() - - -def confirm_destructive_action( - action: str, - details: str | None = None, - extra_warning: str | None = None, - force: bool = False, -) -> bool: - """Prompt user to confirm a destructive action. - - Args: - action: Description of the action (e.g., "Stop all services") - details: Additional details about what will be affected - extra_warning: Extra warning message (e.g., for data loss) - force: If True, skip the confirmation prompt - - Returns: - True if the user confirmed, False otherwise - """ - if force: - return True - - # Build warning message - warning_lines = [f"[bold red]âš ī¸ {action}[/bold red]"] - - if details: - warning_lines.append(f"\n{details}") - - if extra_warning: - warning_lines.append(f"\n[yellow]{extra_warning}[/yellow]") - - console.print( - Panel( - "\n".join(warning_lines), - title="Confirmation Required", - border_style="red", - ) - ) - - try: - # Escape brackets with backslash for Rich markup - response = console.input( - "\n[bold]Are you sure you want to proceed?[/bold] \\[y/N]: " - ) - return response.strip().lower() in ("y", "yes") - except (KeyboardInterrupt, EOFError): - console.print("\n[dim]Cancelled.[/dim]") - return False - - -def get_project_root() -> Path: - """Get the project root directory. - - Walks up from the module location to find the project root, - identified by the presence of pyproject.toml. - """ - current = Path(__file__).resolve() - - # Walk up the directory tree looking for pyproject.toml - for parent in [current, *current.parents]: - if (parent / "pyproject.toml").exists(): - return parent - - # Fallback to three levels up (src/cli/utils.py -> project root) - return Path(__file__).parent.parent.parent - - -def get_dev_dir() -> Path: - """Get the dev_env directory (infrastructure and Docker files).""" - project_root = get_project_root() - return project_root / "docker" / "dev" - - -def run_command( - command: list[str], - cwd: Path | None = None, - check: bool = True, - capture_output: bool = False, -) -> subprocess.CompletedProcess[Any]: - """Run a shell command with proper error handling.""" - try: - result = subprocess.run( - command, - cwd=cwd or get_project_root(), - check=check, - capture_output=capture_output, - text=True, - ) - return result - except subprocess.CalledProcessError as e: - console.print(f"[red]Command failed: {' '.join(command)}[/red]") - console.print(f"[red]Exit code: {e.returncode}[/red]") - if e.stdout: - console.print(f"[red]stdout: {e.stdout}[/red]") - if e.stderr: - console.print(f"[red]stderr: {e.stderr}[/red]") - raise typer.Exit(1) from e diff --git a/src/infra/k8s/__init__.py b/src/infra/k8s/__init__.py new file mode 100644 index 0000000..25a7c5e --- /dev/null +++ b/src/infra/k8s/__init__.py @@ -0,0 +1,50 @@ +"""Kubernetes infrastructure abstraction layer. + +This module provides a clean abstraction over Kubernetes operations, +supporting multiple backends (kubectl subprocess, kr8s library). + +Example: + from src.infra.k8s import KubernetesController, KubectlController, run_sync + + # Create controller + controller = KubectlController() + + # Use async methods in sync context + exists = run_sync(controller.namespace_exists("my-namespace")) + pods = run_sync(controller.get_pods("my-namespace")) + + # Or use the kr8s-based controller for native async operations + from src.infra.k8s import Kr8sController + + kr8s_controller = Kr8sController() + pods = run_sync(kr8s_controller.get_pods("my-namespace")) +""" + +from .controller import ( + ClusterIssuerStatus, + CommandResult, + JobInfo, + KubernetesController, + PodInfo, + ReplicaSetInfo, + ServiceInfo, +) +from .kr8s_controller import Kr8sController +from .kubectl_controller import KubectlController +from .utils import run_sync + +__all__ = [ + # Controller classes + "KubernetesController", + "KubectlController", + "Kr8sController", + # Data classes + "CommandResult", + "PodInfo", + "ReplicaSetInfo", + "JobInfo", + "ServiceInfo", + "ClusterIssuerStatus", + # Utilities + "run_sync", +] diff --git a/src/infra/k8s/controller.py b/src/infra/k8s/controller.py new file mode 100644 index 0000000..6997eff --- /dev/null +++ b/src/infra/k8s/controller.py @@ -0,0 +1,494 @@ +"""Abstract Kubernetes controller interface. + +Defines the contract for Kubernetes operations that can be implemented +by different backends (kubectl subprocess, kr8s library, etc.). +""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from dataclasses import dataclass +from datetime import datetime +from pathlib import Path + +# ============================================================================= +# Data Types +# ============================================================================= + + +@dataclass +class CommandResult: + """Result of a command execution.""" + + success: bool + stdout: str = "" + stderr: str = "" + returncode: int = 0 + + +@dataclass +class PodInfo: + """Information about a Kubernetes pod.""" + + name: str + status: str + restarts: int = 0 + creation_timestamp: str = "" + job_owner: str = "" + ip: str = "" + node: str = "" + + +@dataclass +class ReplicaSetInfo: + """Information about a Kubernetes ReplicaSet.""" + + name: str + replicas: int + revision: str = "" + created_at: datetime | None = None + owner_deployment: str | None = None + + +@dataclass +class JobInfo: + """Information about a Kubernetes Job.""" + + name: str + status: str # "Running", "Complete", "Failed", "Unknown" + + +@dataclass +class ServiceInfo: + """Information about a Kubernetes Service.""" + + name: str + type: str + cluster_ip: str + external_ip: str = "" + ports: str = "" + + +@dataclass +class ClusterIssuerStatus: + """Status of a cert-manager ClusterIssuer.""" + + exists: bool + ready: bool + message: str = "" + + +# ============================================================================= +# Abstract Controller +# ============================================================================= + + +class KubernetesController(ABC): + """Abstract base class for Kubernetes operations. + + All methods are async to support both sync (kubectl) and async (kr8s) + implementations. Use `run_sync()` to call from synchronous code. + + Example: + from src.infra.k8s import KubectlController, run_sync + + controller = KubectlController() + pods = run_sync(controller.get_pods("my-namespace")) + """ + + # ========================================================================= + # Cluster Context + # ========================================================================= + + @abstractmethod + async def get_current_context(self) -> str: + """Get the current kubectl context name. + + Returns: + Context name, or "unknown" if detection fails + """ + ... + + @abstractmethod + async def is_minikube_context(self) -> bool: + """Check if the current kubectl context is Minikube. + + Returns: + True if current context is minikube, False otherwise + """ + ... + + # ========================================================================= + # Namespace Operations + # ========================================================================= + + @abstractmethod + async def namespace_exists(self, namespace: str) -> bool: + """Check if a namespace exists. + + Args: + namespace: Namespace to check + + Returns: + True if the namespace exists, False otherwise + """ + ... + + @abstractmethod + async def delete_namespace( + self, + namespace: str, + *, + wait: bool = True, + timeout: str = "120s", + ) -> CommandResult: + """Delete a Kubernetes namespace and all its resources. + + Warning: This is a destructive operation. + + Args: + namespace: Namespace to delete + wait: Whether to wait for deletion to complete + timeout: Maximum time to wait + + Returns: + CommandResult with deletion status + """ + ... + + @abstractmethod + async def delete_pvcs(self, namespace: str) -> CommandResult: + """Delete all PersistentVolumeClaims in a namespace. + + Args: + namespace: Kubernetes namespace + + Returns: + CommandResult with deletion status + """ + ... + + # ========================================================================= + # Resource Operations + # ========================================================================= + + @abstractmethod + async def apply_manifest(self, manifest_path: Path) -> CommandResult: + """Apply a Kubernetes manifest file. + + Args: + manifest_path: Path to the YAML manifest file + + Returns: + CommandResult with apply status + """ + ... + + @abstractmethod + async def delete_resources_by_label( + self, + resource_types: str, + namespace: str, + label_selector: str, + *, + force: bool = False, + ) -> CommandResult: + """Delete Kubernetes resources matching a label selector. + + Args: + resource_types: Comma-separated resource types + (e.g., "all,configmap,secret") + namespace: Kubernetes namespace + label_selector: Label selector + (e.g., "app.kubernetes.io/instance=my-app") + force: Whether to force delete (bypass graceful deletion) + + Returns: + CommandResult with deletion status + """ + ... + + @abstractmethod + async def delete_helm_secrets( + self, + namespace: str, + release_name: str, + ) -> CommandResult: + """Delete Helm release metadata secrets. + + Useful for cleaning up stuck Helm releases. + + Args: + namespace: Kubernetes namespace + release_name: Helm release name + + Returns: + CommandResult with deletion status + """ + ... + + # ========================================================================= + # Deployment Operations + # ========================================================================= + + @abstractmethod + async def get_deployments(self, namespace: str) -> list[str]: + """Get list of deployment names in a namespace. + + Args: + namespace: Kubernetes namespace + + Returns: + List of deployment names + """ + ... + + @abstractmethod + async def rollout_restart( + self, + resource_type: str, + namespace: str, + name: str | None = None, + ) -> CommandResult: + """Trigger a rolling restart of a deployment/daemonset/statefulset. + + Args: + resource_type: Resource type ("deployment", "daemonset", "statefulset") + namespace: Kubernetes namespace + name: Specific resource name, or None to restart all of that type + + Returns: + CommandResult with restart status + """ + ... + + @abstractmethod + async def rollout_status( + self, + resource_type: str, + namespace: str, + name: str | None = None, + *, + timeout: str = "300s", + ) -> CommandResult: + """Wait for a rollout to complete. + + Blocks until the rollout finishes (all pods are ready) or times out. + + Args: + resource_type: Resource type ("deployment", "daemonset", "statefulset") + namespace: Kubernetes namespace + name: Specific resource name, or None to wait for all of that type + timeout: Maximum time to wait for rollout to complete + + Returns: + CommandResult with rollout status + """ + ... + + @abstractmethod + async def get_deployment_revision( + self, + name: str, + namespace: str, + ) -> str | None: + """Get the current revision number of a deployment. + + Args: + name: Deployment name + namespace: Kubernetes namespace + + Returns: + Revision number as string, or None if not found + """ + ... + + # ========================================================================= + # ReplicaSet Operations + # ========================================================================= + + @abstractmethod + async def get_replicasets(self, namespace: str) -> list[ReplicaSetInfo]: + """Get all ReplicaSets in a namespace. + + Args: + namespace: Kubernetes namespace + + Returns: + List of ReplicaSetInfo objects with parsed metadata + """ + ... + + @abstractmethod + async def delete_replicaset( + self, + name: str, + namespace: str, + ) -> CommandResult: + """Delete a specific ReplicaSet. + + Args: + name: ReplicaSet name + namespace: Kubernetes namespace + + Returns: + CommandResult with deletion status + """ + ... + + @abstractmethod + async def scale_replicaset( + self, + name: str, + namespace: str, + replicas: int, + ) -> CommandResult: + """Scale a ReplicaSet to a specific number of replicas. + + Args: + name: ReplicaSet name + namespace: Kubernetes namespace + replicas: Desired number of replicas + + Returns: + CommandResult with scale status + """ + ... + + # ========================================================================= + # Pod Operations + # ========================================================================= + + @abstractmethod + async def get_pods(self, namespace: str) -> list[PodInfo]: + """Get all pods in a namespace with their status. + + Args: + namespace: Kubernetes namespace + + Returns: + List of PodInfo objects with pod details + """ + ... + + @abstractmethod + async def wait_for_pods( + self, + namespace: str, + label_selector: str, + *, + condition: str = "ready", + timeout: str = "300s", + ) -> CommandResult: + """Wait for pods matching a selector to reach a condition. + + Args: + namespace: Kubernetes namespace + label_selector: Label selector for pods + condition: Condition to wait for (e.g., "ready", "delete") + timeout: Maximum time to wait + + Returns: + CommandResult with wait status + """ + ... + + @abstractmethod + async def get_pod_logs( + self, + namespace: str, + pod: str | None = None, + *, + container: str | None = None, + label_selector: str | None = None, + follow: bool = False, + tail: int = 100, + previous: bool = False, + ) -> CommandResult: + """Get logs from Kubernetes pods. + + Args: + namespace: Kubernetes namespace + pod: Specific pod name, or None to use label_selector + container: Container name (if pod has multiple containers) + label_selector: Label selector for pods (if pod is None) + follow: Whether to follow log output + tail: Number of lines to show from the end + previous: Show logs from previous container instance + + Returns: + CommandResult with logs in stdout + """ + ... + + # ========================================================================= + # Job Operations + # ========================================================================= + + @abstractmethod + async def get_jobs(self, namespace: str) -> list[JobInfo]: + """Get all jobs in a namespace with their status. + + Args: + namespace: Kubernetes namespace + + Returns: + List of JobInfo objects + """ + ... + + # ========================================================================= + # Service Operations + # ========================================================================= + + @abstractmethod + async def get_services(self, namespace: str) -> list[ServiceInfo]: + """Get all services in a namespace. + + Args: + namespace: Kubernetes namespace + + Returns: + List of ServiceInfo objects + """ + ... + + # ========================================================================= + # Cert-Manager Operations + # ========================================================================= + + @abstractmethod + async def check_cert_manager_installed(self) -> bool: + """Check if cert-manager is installed in the cluster. + + Returns: + True if cert-manager pods are running, False otherwise + """ + ... + + @abstractmethod + async def get_cluster_issuer_status( + self, + issuer_name: str, + ) -> ClusterIssuerStatus: + """Get the status of a cert-manager ClusterIssuer. + + Args: + issuer_name: Name of the ClusterIssuer + + Returns: + ClusterIssuerStatus with exists, ready, and message + """ + ... + + @abstractmethod + async def get_cluster_issuer_yaml(self, issuer_name: str) -> str | None: + """Get the YAML representation of a ClusterIssuer. + + Args: + issuer_name: Name of the ClusterIssuer + + Returns: + YAML string, or None if not found + """ + ... diff --git a/src/infra/k8s/kr8s_controller.py b/src/infra/k8s/kr8s_controller.py new file mode 100644 index 0000000..5c0ff37 --- /dev/null +++ b/src/infra/k8s/kr8s_controller.py @@ -0,0 +1,764 @@ +"""Kr8s-based implementation of KubernetesController. + +Uses the kr8s library for native async Kubernetes operations. +""" + +from __future__ import annotations + +import asyncio +from datetime import datetime +from pathlib import Path +from typing import Any + +import kr8s +from kr8s.asyncio.objects import ( + Deployment, + Job, + Namespace, + PersistentVolumeClaim, + Pod, + ReplicaSet, + Secret, + Service, +) + +from .controller import ( + ClusterIssuerStatus, + CommandResult, + JobInfo, + KubernetesController, + PodInfo, + ReplicaSetInfo, + ServiceInfo, +) + + +class Kr8sController(KubernetesController): + """Kubernetes controller using kr8s library. + + All methods are natively async, leveraging kr8s's async API. + + Note: The kr8s API client is NOT cached because it's tied to the event loop + that was running when created. When using run_sync() which calls asyncio.run(), + each call creates a new event loop, making the cached API unusable. + """ + + def __init__(self) -> None: + """Initialize the kr8s controller.""" + # Note: We don't cache the API because kr8s clients are tied to + # the event loop they were created in. Since run_sync() uses + # asyncio.run() which creates/closes event loops, we need a fresh + # API client each time. + pass + + async def _get_api(self) -> Any: # Returns kr8s._api.Api + """Get or create the kr8s API client. + + Creates a new API client each call because kr8s clients are bound + to the event loop they were created in. + """ + return await kr8s.asyncio.api() + + # ========================================================================= + # Cluster Context + # ========================================================================= + + async def get_current_context(self) -> str: + """Get the current kubectl context name.""" + try: + api = await self._get_api() + # Access context via auth object + return api.auth.active_context or "unknown" + except Exception: + return "unknown" + + async def is_minikube_context(self) -> bool: + """Check if the current kubectl context is Minikube.""" + context = await self.get_current_context() + return "minikube" in context.lower() + + # ========================================================================= + # Namespace Operations + # ========================================================================= + + async def namespace_exists(self, namespace: str) -> bool: + """Check if a namespace exists.""" + try: + api = await self._get_api() + ns = await Namespace.get(namespace, api=api) + return ns is not None + except kr8s.NotFoundError: + return False + except Exception: + return False + + async def delete_namespace( + self, + namespace: str, + *, + wait: bool = True, + timeout: str = "120s", + ) -> CommandResult: + """Delete a Kubernetes namespace and all its resources.""" + try: + api = await self._get_api() + ns = await Namespace.get(namespace, api=api) + await ns.delete() + + if wait: + # Parse timeout + timeout_seconds = self._parse_timeout(timeout) + try: + await asyncio.wait_for( + self._wait_for_namespace_deletion(namespace), + timeout=timeout_seconds, + ) + except TimeoutError: + return CommandResult( + success=False, + stderr=f"Timeout waiting for namespace {namespace} deletion", + returncode=1, + ) + + return CommandResult( + success=True, stdout=f'namespace "{namespace}" deleted' + ) + except kr8s.NotFoundError: + return CommandResult( + success=False, + stderr=f'namespace "{namespace}" not found', + returncode=1, + ) + except Exception as e: + return CommandResult(success=False, stderr=str(e), returncode=1) + + async def _wait_for_namespace_deletion(self, namespace: str) -> None: + """Wait until a namespace no longer exists.""" + while await self.namespace_exists(namespace): + await asyncio.sleep(1) + + async def delete_pvcs(self, namespace: str) -> CommandResult: + """Delete all PersistentVolumeClaims in a namespace.""" + try: + api = await self._get_api() + deleted = [] + async for pvc in PersistentVolumeClaim.list(namespace=namespace, api=api): + await pvc.delete() + deleted.append(pvc.name) + return CommandResult( + success=True, + stdout=f"Deleted PVCs: {', '.join(deleted)}" + if deleted + else "No PVCs found", + ) + except Exception as e: + return CommandResult(success=False, stderr=str(e), returncode=1) + + # ========================================================================= + # Resource Operations + # ========================================================================= + + async def apply_manifest(self, manifest_path: Path) -> CommandResult: + """Apply a Kubernetes manifest file. + + Note: kr8s doesn't have a direct 'apply' equivalent, so we use + kubectl subprocess for this operation. + """ + import subprocess + + def _run() -> CommandResult: + result = subprocess.run( + ["kubectl", "apply", "-f", str(manifest_path)], + capture_output=True, + text=True, + ) + return CommandResult( + success=result.returncode == 0, + stdout=result.stdout or "", + stderr=result.stderr or "", + returncode=result.returncode, + ) + + return await asyncio.to_thread(_run) + + async def delete_resources_by_label( + self, + resource_types: str, + namespace: str, + label_selector: str, + *, + force: bool = False, + ) -> CommandResult: + """Delete Kubernetes resources matching a label selector. + + Note: Uses kubectl for complex multi-resource deletion. + """ + import subprocess + + cmd = [ + "kubectl", + "delete", + resource_types, + "-n", + namespace, + "-l", + label_selector, + ] + if force: + cmd.extend(["--force", "--grace-period=0"]) + + def _run() -> CommandResult: + result = subprocess.run(cmd, capture_output=True, text=True) + return CommandResult( + success=result.returncode == 0, + stdout=result.stdout or "", + stderr=result.stderr or "", + returncode=result.returncode, + ) + + return await asyncio.to_thread(_run) + + async def delete_helm_secrets( + self, + namespace: str, + release_name: str, + ) -> CommandResult: + """Delete Helm release metadata secrets.""" + try: + api = await self._get_api() + deleted = [] + async for secret in Secret.list( + namespace=namespace, + label_selector=f"name={release_name},owner=helm", + api=api, + ): + await secret.delete() + deleted.append(secret.name) + return CommandResult( + success=True, + stdout=f"Deleted secrets: {', '.join(deleted)}" + if deleted + else "No secrets found", + ) + except Exception as e: + return CommandResult(success=False, stderr=str(e), returncode=1) + + # ========================================================================= + # Deployment Operations + # ========================================================================= + + async def get_deployments(self, namespace: str) -> list[str]: + """Get list of deployment names in a namespace.""" + try: + api = await self._get_api() + return [d.name async for d in Deployment.list(namespace=namespace, api=api)] + except Exception: + return [] + + async def rollout_restart( + self, + resource_type: str, + namespace: str, + name: str | None = None, + ) -> CommandResult: + """Trigger a rolling restart of a deployment/daemonset/statefulset. + + Note: kr8s doesn't have a direct rollout restart, using kubectl. + """ + import subprocess + + if name: + cmd = [ + "kubectl", + "rollout", + "restart", + resource_type, + name, + "-n", + namespace, + ] + else: + cmd = ["kubectl", "rollout", "restart", resource_type, "-n", namespace] + + def _run() -> CommandResult: + result = subprocess.run(cmd, capture_output=True, text=True) + return CommandResult( + success=result.returncode == 0, + stdout=result.stdout or "", + stderr=result.stderr or "", + returncode=result.returncode, + ) + + return await asyncio.to_thread(_run) + + async def rollout_status( + self, + resource_type: str, + namespace: str, + name: str | None = None, + *, + timeout: str = "300s", + ) -> CommandResult: + """Wait for a rollout to complete. + + Note: Uses kubectl for streaming status output. + """ + import subprocess + + if name: + cmd = [ + "kubectl", + "rollout", + "status", + resource_type, + name, + "-n", + namespace, + f"--timeout={timeout}", + ] + else: + cmd = [ + "kubectl", + "rollout", + "status", + resource_type, + "-n", + namespace, + f"--timeout={timeout}", + ] + + def _run() -> CommandResult: + result = subprocess.run(cmd, capture_output=False, text=True) + return CommandResult( + success=result.returncode == 0, + returncode=result.returncode, + ) + + return await asyncio.to_thread(_run) + + async def get_deployment_revision( + self, + name: str, + namespace: str, + ) -> str | None: + """Get the current revision number of a deployment.""" + try: + api = await self._get_api() + deployment = await Deployment.get(name, namespace=namespace, api=api) + annotations = deployment.metadata.get("annotations", {}) + revision: str | None = annotations.get("deployment.kubernetes.io/revision") + return revision + except Exception: + return None + + # ========================================================================= + # ReplicaSet Operations + # ========================================================================= + + async def get_replicasets(self, namespace: str) -> list[ReplicaSetInfo]: + """Get all ReplicaSets in a namespace.""" + try: + api = await self._get_api() + result = [] + + async for rs in ReplicaSet.list(namespace=namespace, api=api): + metadata = rs.metadata + spec = rs.spec + annotations = metadata.get("annotations", {}) + owner_refs = metadata.get("ownerReferences", []) + + # Parse creation timestamp + created_at = None + if creation_ts := metadata.get("creationTimestamp"): + try: + created_at = datetime.fromisoformat( + creation_ts.replace("Z", "+00:00") + ) + except ValueError: + pass + + # Get owner deployment name + owner_deployment = None + if owner_refs: + owner_deployment = owner_refs[0].get("name") + + result.append( + ReplicaSetInfo( + name=metadata.get("name", ""), + replicas=spec.get("replicas", 0), + revision=annotations.get( + "deployment.kubernetes.io/revision", "" + ), + created_at=created_at, + owner_deployment=owner_deployment, + ) + ) + + return result + except Exception: + return [] + + async def delete_replicaset( + self, + name: str, + namespace: str, + ) -> CommandResult: + """Delete a specific ReplicaSet.""" + try: + api = await self._get_api() + rs = await ReplicaSet.get(name, namespace=namespace, api=api) + await rs.delete() + return CommandResult(success=True, stdout=f'replicaset "{name}" deleted') + except kr8s.NotFoundError: + return CommandResult( + success=False, + stderr=f'replicaset "{name}" not found', + returncode=1, + ) + except Exception as e: + return CommandResult(success=False, stderr=str(e), returncode=1) + + async def scale_replicaset( + self, + name: str, + namespace: str, + replicas: int, + ) -> CommandResult: + """Scale a ReplicaSet to a specific number of replicas.""" + try: + api = await self._get_api() + rs = await ReplicaSet.get(name, namespace=namespace, api=api) + await rs.scale(replicas) + return CommandResult( + success=True, + stdout=f"replicaset/{name} scaled to {replicas}", + ) + except Exception as e: + return CommandResult(success=False, stderr=str(e), returncode=1) + + # ========================================================================= + # Pod Operations + # ========================================================================= + + async def get_pods(self, namespace: str) -> list[PodInfo]: + """Get all pods in a namespace with their status.""" + try: + api = await self._get_api() + result = [] + + async for pod in Pod.list(namespace=namespace, api=api): + metadata = pod.metadata + spec = pod.spec + status = pod.status + + name = metadata.get("name", "") + creation_timestamp = metadata.get("creationTimestamp", "") + + # Check if pod is owned by a Job + job_owner = "" + for owner_ref in metadata.get("ownerReferences", []): + if owner_ref.get("kind") == "Job": + job_owner = owner_ref.get("name", "") + break + + # Determine pod status + phase = status.get("phase", "Unknown") + container_statuses = status.get("containerStatuses", []) + + pod_status = phase + restarts = 0 + + for cs in container_statuses: + restarts += cs.get("restartCount", 0) + state = cs.get("state", {}) + if "waiting" in state: + reason = state["waiting"].get("reason", "") + if reason: + pod_status = reason + elif "terminated" in state: + reason = state["terminated"].get("reason", "") + if reason == "Error": + pod_status = "Error" + + result.append( + PodInfo( + name=name, + status=pod_status, + restarts=restarts, + creation_timestamp=creation_timestamp, + job_owner=job_owner, + ip=status.get("podIP", ""), + node=spec.get("nodeName", ""), + ) + ) + + return result + except Exception: + return [] + + async def wait_for_pods( + self, + namespace: str, + label_selector: str, + *, + condition: str = "ready", + timeout: str = "300s", + ) -> CommandResult: + """Wait for pods matching a selector to reach a condition. + + Note: Uses kubectl for the wait operation. + """ + import subprocess + + cmd = [ + "kubectl", + "wait", + "--for", + f"condition={condition}", + "pod", + "-l", + label_selector, + "-n", + namespace, + f"--timeout={timeout}", + ] + + def _run() -> CommandResult: + result = subprocess.run(cmd, capture_output=False, text=True) + return CommandResult( + success=result.returncode == 0, + returncode=result.returncode, + ) + + return await asyncio.to_thread(_run) + + async def get_pod_logs( + self, + namespace: str, + pod: str | None = None, + *, + container: str | None = None, + label_selector: str | None = None, + follow: bool = False, + tail: int = 100, + previous: bool = False, + ) -> CommandResult: + """Get logs from Kubernetes pods. + + Note: Uses kubectl for log streaming support. + """ + import subprocess + + cmd = ["kubectl", "logs", "-n", namespace] + + if pod: + cmd.append(pod) + elif label_selector: + cmd.extend(["-l", label_selector, "--all-containers=true"]) + + if container: + cmd.extend(["-c", container]) + + if follow: + cmd.append("-f") + + cmd.append(f"--tail={tail}") + + if previous: + cmd.append("--previous") + + def _run() -> CommandResult: + result = subprocess.run( + cmd, + capture_output=not follow, + text=True, + ) + return CommandResult( + success=result.returncode == 0, + stdout=result.stdout or "" if not follow else "", + stderr=result.stderr or "" if not follow else "", + returncode=result.returncode, + ) + + return await asyncio.to_thread(_run) + + # ========================================================================= + # Job Operations + # ========================================================================= + + async def get_jobs(self, namespace: str) -> list[JobInfo]: + """Get all jobs in a namespace with their status.""" + try: + api = await self._get_api() + result = [] + + async for job in Job.list(namespace=namespace, api=api): + name = job.metadata.get("name", "") + status = job.status + + if status.get("succeeded", 0) > 0: + job_status = "Complete" + elif status.get("failed", 0) > 0: + job_status = "Failed" + elif status.get("active", 0) > 0: + job_status = "Running" + else: + job_status = "Unknown" + + result.append(JobInfo(name=name, status=job_status)) + + return result + except Exception: + return [] + + # ========================================================================= + # Service Operations + # ========================================================================= + + async def get_services(self, namespace: str) -> list[ServiceInfo]: + """Get all services in a namespace.""" + try: + api = await self._get_api() + result = [] + + async for svc in Service.list(namespace=namespace, api=api): + metadata = svc.metadata + spec = svc.spec + status = svc.status + + # Get external IP from LoadBalancer status + external_ip = "" + lb_ingress = status.get("loadBalancer", {}).get("ingress", []) + if lb_ingress: + external_ip = lb_ingress[0].get( + "ip", lb_ingress[0].get("hostname", "") + ) + + # Format ports + ports = [] + for port in spec.get("ports", []): + port_str = f"{port.get('port')}" + if target := port.get("targetPort"): + port_str += f":{target}" + if proto := port.get("protocol"): + port_str += f"/{proto}" + ports.append(port_str) + + result.append( + ServiceInfo( + name=metadata.get("name", ""), + type=spec.get("type", ""), + cluster_ip=spec.get("clusterIP", ""), + external_ip=external_ip, + ports=",".join(ports), + ) + ) + + return result + except Exception: + return [] + + # ========================================================================= + # Cert-Manager Operations + # ========================================================================= + + async def check_cert_manager_installed(self) -> bool: + """Check if cert-manager is installed in the cluster.""" + try: + api = await self._get_api() + pods = [pod async for pod in Pod.list(namespace="cert-manager", api=api)] + return len(pods) > 0 + except Exception: + return False + + async def get_cluster_issuer_status( + self, + issuer_name: str, + ) -> ClusterIssuerStatus: + """Get the status of a cert-manager ClusterIssuer. + + Note: Uses kubectl as ClusterIssuer is a CRD. + """ + import subprocess + + def _run() -> ClusterIssuerStatus: + result = subprocess.run( + [ + "kubectl", + "get", + "clusterissuer", + issuer_name, + "-o", + "json", + ], + capture_output=True, + text=True, + ) + + if result.returncode != 0: + return ClusterIssuerStatus( + exists=False, + ready=False, + message="ClusterIssuer not found", + ) + + try: + import json + + data = json.loads(result.stdout) + conditions = data.get("status", {}).get("conditions", []) + + ready = False + message = "" + + for condition in conditions: + if condition.get("type") == "Ready": + ready = condition.get("status") == "True" + message = condition.get("message", "") + break + + return ClusterIssuerStatus( + exists=True, + ready=ready, + message=message, + ) + except Exception: + return ClusterIssuerStatus( + exists=True, + ready=False, + message="Failed to parse ClusterIssuer status", + ) + + return await asyncio.to_thread(_run) + + async def get_cluster_issuer_yaml(self, issuer_name: str) -> str | None: + """Get the YAML representation of a ClusterIssuer. + + Note: Uses kubectl as ClusterIssuer is a CRD. + """ + import subprocess + + def _run() -> str | None: + result = subprocess.run( + ["kubectl", "get", "clusterissuer", issuer_name, "-o", "yaml"], + capture_output=True, + text=True, + ) + return result.stdout if result.returncode == 0 else None + + return await asyncio.to_thread(_run) + + # ========================================================================= + # Helpers + # ========================================================================= + + def _parse_timeout(self, timeout: str) -> float: + """Parse a timeout string like '120s' or '5m' to seconds.""" + if timeout.endswith("s"): + return float(timeout[:-1]) + elif timeout.endswith("m"): + return float(timeout[:-1]) * 60 + elif timeout.endswith("h"): + return float(timeout[:-1]) * 3600 + else: + return float(timeout) diff --git a/src/infra/k8s/kubectl_controller.py b/src/infra/k8s/kubectl_controller.py new file mode 100644 index 0000000..376e366 --- /dev/null +++ b/src/infra/k8s/kubectl_controller.py @@ -0,0 +1,594 @@ +"""Kubectl-based implementation of KubernetesController. + +Uses subprocess calls to kubectl for all operations. +""" + +from __future__ import annotations + +import asyncio +import json +from datetime import datetime +from pathlib import Path + +from .controller import ( + ClusterIssuerStatus, + CommandResult, + JobInfo, + KubernetesController, + PodInfo, + ReplicaSetInfo, + ServiceInfo, +) + + +class KubectlController(KubernetesController): + """Kubernetes controller using kubectl subprocess calls. + + All methods are async but internally use asyncio.to_thread() + to run blocking subprocess calls without blocking the event loop. + """ + + async def _run_kubectl( + self, + args: list[str], + *, + capture_output: bool = True, + input_data: str | None = None, + ) -> CommandResult: + """Run a kubectl command asynchronously. + + Args: + args: Command arguments (without 'kubectl' prefix) + capture_output: Whether to capture stdout/stderr + input_data: Optional input to send to stdin + + Returns: + CommandResult with execution results + """ + import subprocess + + cmd = ["kubectl", *args] + + def _run() -> CommandResult: + result = subprocess.run( + cmd, + capture_output=capture_output, + text=True, + input=input_data, + ) + return CommandResult( + success=result.returncode == 0, + stdout=result.stdout or "", + stderr=result.stderr or "", + returncode=result.returncode, + ) + + return await asyncio.to_thread(_run) + + # ========================================================================= + # Cluster Context + # ========================================================================= + + async def get_current_context(self) -> str: + """Get the current kubectl context name.""" + result = await self._run_kubectl(["config", "current-context"]) + return result.stdout.strip() if result.success else "unknown" + + async def is_minikube_context(self) -> bool: + """Check if the current kubectl context is Minikube.""" + result = await self._run_kubectl(["config", "current-context"]) + if not result.success: + return False + return "minikube" in result.stdout.strip().lower() + + # ========================================================================= + # Namespace Operations + # ========================================================================= + + async def namespace_exists(self, namespace: str) -> bool: + """Check if a namespace exists.""" + result = await self._run_kubectl(["get", "namespace", namespace]) + return result.success + + async def delete_namespace( + self, + namespace: str, + *, + wait: bool = True, + timeout: str = "120s", + ) -> CommandResult: + """Delete a Kubernetes namespace and all its resources.""" + args = ["delete", "namespace", namespace] + if wait: + args.append("--wait=true") + args.extend(["--timeout", timeout]) + return await self._run_kubectl(args) + + async def delete_pvcs(self, namespace: str) -> CommandResult: + """Delete all PersistentVolumeClaims in a namespace.""" + return await self._run_kubectl(["delete", "pvc", "--all", "-n", namespace]) + + # ========================================================================= + # Resource Operations + # ========================================================================= + + async def apply_manifest(self, manifest_path: Path) -> CommandResult: + """Apply a Kubernetes manifest file.""" + return await self._run_kubectl(["apply", "-f", str(manifest_path)]) + + async def delete_resources_by_label( + self, + resource_types: str, + namespace: str, + label_selector: str, + *, + force: bool = False, + ) -> CommandResult: + """Delete Kubernetes resources matching a label selector.""" + args = [ + "delete", + resource_types, + "-n", + namespace, + "-l", + label_selector, + ] + if force: + args.extend(["--force", "--grace-period=0"]) + return await self._run_kubectl(args) + + async def delete_helm_secrets( + self, + namespace: str, + release_name: str, + ) -> CommandResult: + """Delete Helm release metadata secrets.""" + return await self._run_kubectl( + [ + "delete", + "secret", + "-n", + namespace, + "-l", + f"name={release_name},owner=helm", + ] + ) + + # ========================================================================= + # Deployment Operations + # ========================================================================= + + async def get_deployments(self, namespace: str) -> list[str]: + """Get list of deployment names in a namespace.""" + result = await self._run_kubectl( + [ + "get", + "deployments", + "-n", + namespace, + "-o", + "jsonpath={.items[*].metadata.name}", + ] + ) + if not result.success or not result.stdout: + return [] + return result.stdout.strip().split() + + async def rollout_restart( + self, + resource_type: str, + namespace: str, + name: str | None = None, + ) -> CommandResult: + """Trigger a rolling restart of a deployment/daemonset/statefulset.""" + if name: + args = [ + "rollout", + "restart", + resource_type, + name, + "-n", + namespace, + ] + else: + args = ["rollout", "restart", resource_type, "-n", namespace] + return await self._run_kubectl(args) + + async def rollout_status( + self, + resource_type: str, + namespace: str, + name: str | None = None, + *, + timeout: str = "300s", + ) -> CommandResult: + """Wait for a rollout to complete.""" + if name: + args = [ + "rollout", + "status", + resource_type, + name, + "-n", + namespace, + f"--timeout={timeout}", + ] + else: + args = [ + "rollout", + "status", + resource_type, + "-n", + namespace, + f"--timeout={timeout}", + ] + return await self._run_kubectl(args, capture_output=False) + + async def get_deployment_revision( + self, + name: str, + namespace: str, + ) -> str | None: + """Get the current revision number of a deployment.""" + result = await self._run_kubectl( + [ + "get", + "deployment", + name, + "-n", + namespace, + "-o", + "jsonpath={.metadata.annotations.deployment\\.kubernetes\\.io/revision}", + ] + ) + return result.stdout.strip() if result.success and result.stdout else None + + # ========================================================================= + # ReplicaSet Operations + # ========================================================================= + + async def get_replicasets(self, namespace: str) -> list[ReplicaSetInfo]: + """Get all ReplicaSets in a namespace.""" + result = await self._run_kubectl( + ["get", "replicasets", "-n", namespace, "-o", "json"] + ) + if not result.success or not result.stdout: + return [] + + try: + data = json.loads(result.stdout) + replicasets = [] + + for rs in data.get("items", []): + metadata = rs.get("metadata", {}) + spec = rs.get("spec", {}) + annotations = metadata.get("annotations", {}) + owner_refs = metadata.get("ownerReferences", []) + + # Parse creation timestamp + created_at = None + if creation_ts := metadata.get("creationTimestamp"): + try: + created_at = datetime.fromisoformat( + creation_ts.replace("Z", "+00:00") + ) + except ValueError: + pass + + # Get owner deployment name + owner_deployment = None + if owner_refs: + owner_deployment = owner_refs[0].get("name") + + replicasets.append( + ReplicaSetInfo( + name=metadata.get("name", ""), + replicas=spec.get("replicas", 0), + revision=annotations.get( + "deployment.kubernetes.io/revision", "" + ), + created_at=created_at, + owner_deployment=owner_deployment, + ) + ) + + return replicasets + except json.JSONDecodeError: + return [] + + async def delete_replicaset( + self, + name: str, + namespace: str, + ) -> CommandResult: + """Delete a specific ReplicaSet.""" + return await self._run_kubectl(["delete", "replicaset", name, "-n", namespace]) + + async def scale_replicaset( + self, + name: str, + namespace: str, + replicas: int, + ) -> CommandResult: + """Scale a ReplicaSet to a specific number of replicas.""" + return await self._run_kubectl( + [ + "scale", + "replicaset", + name, + f"--replicas={replicas}", + "-n", + namespace, + ] + ) + + # ========================================================================= + # Pod Operations + # ========================================================================= + + async def get_pods(self, namespace: str) -> list[PodInfo]: + """Get all pods in a namespace with their status.""" + result = await self._run_kubectl(["get", "pods", "-n", namespace, "-o", "json"]) + if not result.success or not result.stdout: + return [] + + try: + data = json.loads(result.stdout) + pods = [] + + for pod in data.get("items", []): + metadata = pod.get("metadata", {}) + name = metadata.get("name", "") + creation_timestamp = metadata.get("creationTimestamp", "") + status = pod.get("status", {}) + + # Check if pod is owned by a Job + job_owner = "" + for owner_ref in metadata.get("ownerReferences", []): + if owner_ref.get("kind") == "Job": + job_owner = owner_ref.get("name", "") + break + + # Determine pod status + phase = status.get("phase", "Unknown") + container_statuses = status.get("containerStatuses", []) + + pod_status = phase + restarts = 0 + + for cs in container_statuses: + restarts += cs.get("restartCount", 0) + state = cs.get("state", {}) + if "waiting" in state: + reason = state["waiting"].get("reason", "") + if reason: + pod_status = reason + elif "terminated" in state: + reason = state["terminated"].get("reason", "") + if reason == "Error": + pod_status = "Error" + + pods.append( + PodInfo( + name=name, + status=pod_status, + restarts=restarts, + creation_timestamp=creation_timestamp, + job_owner=job_owner, + ip=status.get("podIP", ""), + node=spec.get("nodeName", "") + if (spec := pod.get("spec")) + else "", + ) + ) + + return pods + except json.JSONDecodeError: + return [] + + async def wait_for_pods( + self, + namespace: str, + label_selector: str, + *, + condition: str = "ready", + timeout: str = "300s", + ) -> CommandResult: + """Wait for pods matching a selector to reach a condition.""" + return await self._run_kubectl( + [ + "wait", + "--for", + f"condition={condition}", + "pod", + "-l", + label_selector, + "-n", + namespace, + f"--timeout={timeout}", + ], + capture_output=False, + ) + + async def get_pod_logs( + self, + namespace: str, + pod: str | None = None, + *, + container: str | None = None, + label_selector: str | None = None, + follow: bool = False, + tail: int = 100, + previous: bool = False, + ) -> CommandResult: + """Get logs from Kubernetes pods.""" + args = ["logs", "-n", namespace] + + if pod: + args.append(pod) + elif label_selector: + args.extend(["-l", label_selector, "--all-containers=true"]) + + if container: + args.extend(["-c", container]) + + if follow: + args.append("-f") + + args.append(f"--tail={tail}") + + if previous: + args.append("--previous") + + return await self._run_kubectl(args, capture_output=not follow) + + # ========================================================================= + # Job Operations + # ========================================================================= + + async def get_jobs(self, namespace: str) -> list[JobInfo]: + """Get all jobs in a namespace with their status.""" + result = await self._run_kubectl(["get", "jobs", "-n", namespace, "-o", "json"]) + if not result.success or not result.stdout: + return [] + + try: + data = json.loads(result.stdout) + jobs = [] + + for job in data.get("items", []): + name = job.get("metadata", {}).get("name", "") + status = job.get("status", {}) + + if status.get("succeeded", 0) > 0: + job_status = "Complete" + elif status.get("failed", 0) > 0: + job_status = "Failed" + elif status.get("active", 0) > 0: + job_status = "Running" + else: + job_status = "Unknown" + + jobs.append(JobInfo(name=name, status=job_status)) + + return jobs + except json.JSONDecodeError: + return [] + + # ========================================================================= + # Service Operations + # ========================================================================= + + async def get_services(self, namespace: str) -> list[ServiceInfo]: + """Get all services in a namespace.""" + result = await self._run_kubectl( + ["get", "services", "-n", namespace, "-o", "json"] + ) + if not result.success or not result.stdout: + return [] + + try: + data = json.loads(result.stdout) + services = [] + + for svc in data.get("items", []): + metadata = svc.get("metadata", {}) + spec = svc.get("spec", {}) + status = svc.get("status", {}) + + # Get external IP from LoadBalancer status + external_ip = "" + lb_ingress = status.get("loadBalancer", {}).get("ingress", []) + if lb_ingress: + external_ip = lb_ingress[0].get( + "ip", lb_ingress[0].get("hostname", "") + ) + + # Format ports + ports = [] + for port in spec.get("ports", []): + port_str = f"{port.get('port')}" + if target := port.get("targetPort"): + port_str += f":{target}" + if proto := port.get("protocol"): + port_str += f"/{proto}" + ports.append(port_str) + + services.append( + ServiceInfo( + name=metadata.get("name", ""), + type=spec.get("type", ""), + cluster_ip=spec.get("clusterIP", ""), + external_ip=external_ip, + ports=",".join(ports), + ) + ) + + return services + except json.JSONDecodeError: + return [] + + # ========================================================================= + # Cert-Manager Operations + # ========================================================================= + + async def check_cert_manager_installed(self) -> bool: + """Check if cert-manager is installed in the cluster.""" + result = await self._run_kubectl( + ["get", "pods", "-n", "cert-manager", "-o", "name"] + ) + return result.success and bool(result.stdout.strip()) + + async def get_cluster_issuer_status( + self, + issuer_name: str, + ) -> ClusterIssuerStatus: + """Get the status of a cert-manager ClusterIssuer.""" + result = await self._run_kubectl( + [ + "get", + "clusterissuer", + issuer_name, + "-o", + "json", + ] + ) + + if not result.success: + return ClusterIssuerStatus( + exists=False, + ready=False, + message="ClusterIssuer not found", + ) + + try: + data = json.loads(result.stdout) + conditions = data.get("status", {}).get("conditions", []) + + ready = False + message = "" + + for condition in conditions: + if condition.get("type") == "Ready": + ready = condition.get("status") == "True" + message = condition.get("message", "") + break + + return ClusterIssuerStatus( + exists=True, + ready=ready, + message=message, + ) + except json.JSONDecodeError: + return ClusterIssuerStatus( + exists=True, + ready=False, + message="Failed to parse ClusterIssuer status", + ) + + async def get_cluster_issuer_yaml(self, issuer_name: str) -> str | None: + """Get the YAML representation of a ClusterIssuer.""" + result = await self._run_kubectl( + ["get", "clusterissuer", issuer_name, "-o", "yaml"] + ) + return result.stdout if result.success else None diff --git a/src/infra/k8s/utils.py b/src/infra/k8s/utils.py new file mode 100644 index 0000000..a7b1960 --- /dev/null +++ b/src/infra/k8s/utils.py @@ -0,0 +1,47 @@ +"""Utility functions for the Kubernetes infrastructure layer. + +Provides helper functions for running async code in sync contexts +and other common utilities. +""" + +from __future__ import annotations + +import asyncio +from collections.abc import Coroutine +from typing import Any + + +def run_sync[T](coro: Coroutine[Any, Any, T]) -> T: + """Run an async coroutine in a blocking sync context. + + This is useful for calling async KubernetesController methods + from synchronous CLI commands. + + Args: + coro: The coroutine to execute + + Returns: + The result of the coroutine + + Example: + from src.infra.k8s import KubectlController, run_sync + + controller = KubectlController() + pods = run_sync(controller.get_pods("my-namespace")) + """ + try: + loop = asyncio.get_running_loop() + except RuntimeError: + # No running loop, create a new one + return asyncio.run(coro) + else: + # We're inside an async context, use run_until_complete + # This handles nested async calls + if loop.is_running(): + # Create a new loop in a thread to avoid blocking + import concurrent.futures + + with concurrent.futures.ThreadPoolExecutor() as pool: + future = pool.submit(asyncio.run, coro) + return future.result() + return loop.run_until_complete(coro) diff --git a/tests/e2e/test_copier_to_deployment.py b/tests/e2e/test_copier_to_deployment.py index 57dda63..4aa87bb 100644 --- a/tests/e2e/test_copier_to_deployment.py +++ b/tests/e2e/test_copier_to_deployment.py @@ -536,9 +536,8 @@ def test_07_docker_compose_prod_deployment(self): "uv", "run", "api-forge-cli", - "deploy", - "down", "prod", + "down", "--volumes", "--yes", ], @@ -638,7 +637,7 @@ def test_07_docker_compose_prod_deployment(self): # Start production deployment try: result = self.run_command( - ["uv", "run", "api-forge-cli", "deploy", "up", "prod"], + ["uv", "run", "api-forge-cli", "prod", "up"], cwd=project_dir, timeout=600, # 10 minutes for building images in CI ) @@ -714,7 +713,7 @@ def test_07_docker_compose_prod_deployment(self): # Check deployment status result = self.run_command( - ["uv", "run", "api-forge-cli", "deploy", "status", "prod"], + ["uv", "run", "api-forge-cli", "prod", "status"], cwd=project_dir, ) @@ -817,9 +816,8 @@ def test_07_docker_compose_prod_deployment(self): "uv", "run", "api-forge-cli", - "deploy", - "down", "prod", + "down", "--volumes", "--yes", ], @@ -935,7 +933,7 @@ def test_08_kubernetes_deployment(self): # Deploy to Kubernetes (with real-time output streaming) print("🚀 Starting K8s deployment with real-time output...") result = self.run_command( - ["uv", "run", "api-forge-cli", "deploy", "up", "k8s"], + ["uv", "run", "api-forge-cli", "k8s", "up"], cwd=project_dir, timeout=600, stream_output=True, diff --git a/tests/unit/cli/deployment/test_validator.py b/tests/unit/cli/deployment/test_validator.py index 6dbf336..c2d2e7c 100644 --- a/tests/unit/cli/deployment/test_validator.py +++ b/tests/unit/cli/deployment/test_validator.py @@ -13,6 +13,7 @@ ValidationResult, ValidationSeverity, ) +from src.infra.k8s.controller import JobInfo, PodInfo class TestValidationResult: @@ -168,7 +169,7 @@ def test_validate_detects_failed_jobs( mock_commands.kubectl.namespace_exists.return_value = True mock_commands.helm.list_releases.return_value = [] mock_commands.kubectl.get_jobs.return_value = [ - {"name": "postgres-verifier", "status": "Failed"}, + JobInfo(name="postgres-verifier", status="Failed"), ] mock_commands.kubectl.get_pods.return_value = [] @@ -188,7 +189,7 @@ def test_validate_any_failed_job_is_warning( mock_commands.kubectl.namespace_exists.return_value = True mock_commands.helm.list_releases.return_value = [] mock_commands.kubectl.get_jobs.return_value = [ - {"name": "migration-job", "status": "Failed"}, + JobInfo(name="migration-job", status="Failed"), ] mock_commands.kubectl.get_pods.return_value = [] @@ -208,7 +209,7 @@ def test_validate_detects_crashloop_pods( mock_commands.helm.list_releases.return_value = [] mock_commands.kubectl.get_jobs.return_value = [] mock_commands.kubectl.get_pods.return_value = [ - {"name": "api-forge-app-xyz", "status": "CrashLoopBackOff"}, + PodInfo(name="api-forge-app-xyz", status="CrashLoopBackOff"), ] result = validator.validate("api-forge-prod") @@ -227,7 +228,7 @@ def test_validate_detects_pending_pods( mock_commands.helm.list_releases.return_value = [] mock_commands.kubectl.get_jobs.return_value = [] mock_commands.kubectl.get_pods.return_value = [ - {"name": "api-forge-app-xyz", "status": "Pending"}, + PodInfo(name="api-forge-app-xyz", status="Pending"), ] result = validator.validate("api-forge-prod") @@ -246,7 +247,7 @@ def test_validate_detects_error_pods( mock_commands.helm.list_releases.return_value = [] mock_commands.kubectl.get_jobs.return_value = [] mock_commands.kubectl.get_pods.return_value = [ - {"name": "api-forge-app-xyz", "status": "Error"}, + PodInfo(name="api-forge-app-xyz", status="Error"), ] result = validator.validate("api-forge-prod") @@ -269,19 +270,19 @@ def test_validate_job_pods_only_checks_most_recent( mock_commands.kubectl.get_jobs.return_value = [] mock_commands.kubectl.get_pods.return_value = [ # Old pod from first attempt - failed - { - "name": "postgres-verifier-abc", - "status": "Error", - "jobOwner": "postgres-verifier", - "creationTimestamp": "2025-01-01T10:00:00Z", - }, + PodInfo( + name="postgres-verifier-abc", + status="Error", + job_owner="postgres-verifier", + creation_timestamp="2025-01-01T10:00:00Z", + ), # Newer pod from second attempt - succeeded - { - "name": "postgres-verifier-def", - "status": "Succeeded", - "jobOwner": "postgres-verifier", - "creationTimestamp": "2025-01-01T10:05:00Z", - }, + PodInfo( + name="postgres-verifier-def", + status="Succeeded", + job_owner="postgres-verifier", + creation_timestamp="2025-01-01T10:05:00Z", + ), ] result = validator.validate("api-forge-prod") @@ -299,19 +300,19 @@ def test_validate_job_pods_flags_if_most_recent_failed( mock_commands.kubectl.get_jobs.return_value = [] mock_commands.kubectl.get_pods.return_value = [ # Old pod succeeded - { - "name": "postgres-verifier-abc", - "status": "Succeeded", - "jobOwner": "postgres-verifier", - "creationTimestamp": "2025-01-01T10:00:00Z", - }, + PodInfo( + name="postgres-verifier-abc", + status="Succeeded", + job_owner="postgres-verifier", + creation_timestamp="2025-01-01T10:00:00Z", + ), # Newer pod failed - { - "name": "postgres-verifier-def", - "status": "Error", - "jobOwner": "postgres-verifier", - "creationTimestamp": "2025-01-01T10:05:00Z", - }, + PodInfo( + name="postgres-verifier-def", + status="Error", + job_owner="postgres-verifier", + creation_timestamp="2025-01-01T10:05:00Z", + ), ] result = validator.validate("api-forge-prod") @@ -329,11 +330,11 @@ def test_validate_detects_multiple_issues( mock_commands.kubectl.namespace_exists.return_value = True mock_commands.helm.list_releases.return_value = [] mock_commands.kubectl.get_jobs.return_value = [ - {"name": "postgres-verifier", "status": "Failed"}, + JobInfo(name="postgres-verifier", status="Failed"), ] mock_commands.kubectl.get_pods.return_value = [ - {"name": "api-forge-app-xyz", "status": "CrashLoopBackOff"}, - {"name": "api-forge-worker-abc", "status": "Pending"}, + PodInfo(name="api-forge-app-xyz", status="CrashLoopBackOff"), + PodInfo(name="api-forge-worker-abc", status="Pending"), ] result = validator.validate("api-forge-prod") diff --git a/uv.lock b/uv.lock index ddd3a2d..0d2d1de 100644 --- a/uv.lock +++ b/uv.lock @@ -49,6 +49,7 @@ dependencies = [ { name = "fastapi-limiter" }, { name = "httpx" }, { name = "jinja2" }, + { name = "kr8s" }, { name = "loguru" }, { name = "psycopg2-binary" }, { name = "pydantic", extra = ["email"] }, @@ -87,6 +88,7 @@ requires-dist = [ { name = "fastapi-limiter", specifier = ">=0.1.6" }, { name = "httpx", specifier = ">=0.27.2" }, { name = "jinja2", specifier = ">=3.1.0" }, + { name = "kr8s", specifier = ">=0.20.14" }, { name = "loguru", specifier = ">=0.7.3" }, { name = "psycopg2-binary", specifier = ">=2.9.11" }, { name = "pydantic", extras = ["email"], specifier = ">=2.11.9" }, @@ -589,6 +591,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" }, ] +[[package]] +name = "httpx-ws" +version = "0.8.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "httpcore" }, + { name = "httpx" }, + { name = "wsproto" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4a/32/6f7198f55d94063ea84487a31cdd3e149d2702dc0804fc5de06ed12ef2c2/httpx_ws-0.8.2.tar.gz", hash = "sha256:ba0d4aa76e1c8a27bd5e88984ecdcdc28f7bf30b40cb0989a4c1438d07fa52c7", size = 105734, upload-time = "2025-11-07T12:57:36.566Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/29/cd/2008972ddc4c2139b9813d8a097e53dcc74b2a16a85b4069294457954232/httpx_ws-0.8.2-py3-none-any.whl", hash = "sha256:f8898ddb84cbf98c562e8e796675bc68c215fa1d453d54a7fcd935aca8198cc8", size = 15404, upload-time = "2025-11-07T12:57:35.176Z" }, +] + [[package]] name = "identify" version = "2.6.15" @@ -641,6 +658,26 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/72/b9/313e8f2f2e9517ae050a692ae7b3e4b3f17cc5e6dfea0db51fe14e586580/jinja2_ansible_filters-1.3.2-py3-none-any.whl", hash = "sha256:e1082f5564917649c76fed239117820610516ec10f87735d0338688800a55b34", size = 18975, upload-time = "2022-06-30T14:08:49.571Z" }, ] +[[package]] +name = "kr8s" +version = "0.20.14" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "cachetools" }, + { name = "cryptography" }, + { name = "httpx" }, + { name = "httpx-ws" }, + { name = "packaging" }, + { name = "python-box" }, + { name = "python-jsonpath" }, + { name = "pyyaml" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a1/66/90689a4d960c4fb245d9baf43d9648fe93cd957ddb7f12ce2c12f7ff9700/kr8s-0.20.14.tar.gz", hash = "sha256:e9f859359de0a9c511ee83b119bd1d2a928ee15d59daaf9fc2f11bd37c2bd67b", size = 2838630, upload-time = "2025-12-01T15:37:41.151Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/da/fc/7d15dd15dc6fada3cae8baa1ae65fe07f9d5ee03d20353a0fef08067cfba/kr8s-0.20.14-py3-none-any.whl", hash = "sha256:33384c0d2e261e95e8f146415dc72f9b255e6632dfe746540790332560184546", size = 86660, upload-time = "2025-12-01T15:37:39.757Z" }, +] + [[package]] name = "loguru" version = "0.7.3" @@ -1039,6 +1076,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ca/31/d4e37e9e550c2b92a9cbc2e4d0b7420a27224968580b5a447f420847c975/pytest_xdist-3.8.0-py3-none-any.whl", hash = "sha256:202ca578cfeb7370784a8c33d6d05bc6e13b4f25b5053c30a152269fd10f0b88", size = 46396, upload-time = "2025-07-01T13:30:56.632Z" }, ] +[[package]] +name = "python-box" +version = "7.3.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/29/f7/635eed8c500adf26208e86e985bbffb6ff039cd8950e3a4749ceca904218/python_box-7.3.2.tar.gz", hash = "sha256:028b9917129e67f311932d93347b8a4f1b500d7a5a2870ee3c035f4e7b19403b", size = 45771, upload-time = "2025-01-16T19:10:05.221Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2f/39/8bec609e93dbc5e0d3ea26cfb5af3ca78915f7a55ef5414713462fedeb59/python_box-7.3.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:1dfc3b9b073f3d7cad1fa90de98eaaa684a494d0574bbc0666f74fa8307fd6b6", size = 1804675, upload-time = "2025-01-16T19:10:23.281Z" }, + { url = "https://files.pythonhosted.org/packages/88/ae/baf3a8057d8129896a7e02619df43ea0d918fc5b2bb66eb6e2470595fbac/python_box-7.3.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ca4685a7f764b5a71b6e08535ce2a96b7964bb63d8cb4df10f6bb7147b6c54b", size = 4265645, upload-time = "2025-01-16T19:15:34.087Z" }, + { url = "https://files.pythonhosted.org/packages/43/90/72367e03033c11a5e82676ee389b572bf136647ff4e3081557392b37e1ad/python_box-7.3.2-cp313-cp313-win_amd64.whl", hash = "sha256:e143295f74d47a9ab24562ead2375c9be10629599b57f2e86717d3fff60f82a9", size = 1206740, upload-time = "2025-01-16T19:11:30.635Z" }, + { url = "https://files.pythonhosted.org/packages/37/13/8a990c6e2b6cc12700dce16f3cb383324e6d9a30f604eca22a2fdf84c923/python_box-7.3.2-py3-none-any.whl", hash = "sha256:fd7d74d5a848623f93b5221fd9fb00b8c00ff0e130fa87f396277aa188659c92", size = 29479, upload-time = "2025-01-16T19:10:02.749Z" }, +] + [[package]] name = "python-dotenv" version = "1.1.1" @@ -1048,6 +1097,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5f/ed/539768cf28c661b5b068d66d96a2f155c4971a5d55684a514c1a0e0dec2f/python_dotenv-1.1.1-py3-none-any.whl", hash = "sha256:31f23644fe2602f88ff55e1f5c79ba497e01224ee7737937930c448e4d0e24dc", size = 20556, upload-time = "2025-06-24T04:21:06.073Z" }, ] +[[package]] +name = "python-jsonpath" +version = "2.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f3/db/f1f19205b0df6eb0195de154dc6c967448802dfb573487fa8a4206a243cd/python_jsonpath-2.0.1.tar.gz", hash = "sha256:32a84ebb2dc0ec1b42a6e165b0f9174aef8310bad29154ad9aee31ac37cca18f", size = 49659, upload-time = "2025-09-13T08:01:47.82Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d8/d4/64d7cdc01269f5fed45e6a69f5395c30451958c299ca5cbc1442a4f3f9b9/python_jsonpath-2.0.1-py3-none-any.whl", hash = "sha256:ebd518b7c883acc5b976518d76b6c96288405edec7d9ef838641869c1e1a5eb7", size = 64060, upload-time = "2025-09-13T08:01:46.184Z" }, +] + [[package]] name = "pywin32" version = "311" @@ -1493,3 +1551,15 @@ sdist = { url = "https://files.pythonhosted.org/packages/b3/8f/705086c9d734d3b66 wheels = [ { url = "https://files.pythonhosted.org/packages/e1/07/c6fe3ad3e685340704d314d765b7912993bcb8dc198f0e7a89382d37974b/win32_setctime-1.2.0-py3-none-any.whl", hash = "sha256:95d644c4e708aba81dc3704a116d8cbc974d70b3bdb8be1d150e36be6e9d1390", size = 4083, upload-time = "2024-12-07T15:28:26.465Z" }, ] + +[[package]] +name = "wsproto" +version = "1.3.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c7/79/12135bdf8b9c9367b8701c2c19a14c913c120b882d50b014ca0d38083c2c/wsproto-1.3.2.tar.gz", hash = "sha256:b86885dcf294e15204919950f666e06ffc6c7c114ca900b060d6e16293528294", size = 50116, upload-time = "2025-11-20T18:18:01.871Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a4/f5/10b68b7b1544245097b2a1b8238f66f2fc6dcaeb24ba5d917f52bd2eed4f/wsproto-1.3.2-py3-none-any.whl", hash = "sha256:61eea322cdf56e8cc904bd3ad7573359a242ba65688716b0710a5eb12beab584", size = 24405, upload-time = "2025-11-20T18:18:00.454Z" }, +]