diff --git a/.github/scripts/codebase_agent/__init__.py b/.github/scripts/codebase_agent/__init__.py new file mode 100644 index 0000000..0d6b1fd --- /dev/null +++ b/.github/scripts/codebase_agent/__init__.py @@ -0,0 +1 @@ +"""Codebase Agent - AI-powered code review assistant.""" diff --git a/.github/scripts/codebase_agent/ai_client.py b/.github/scripts/codebase_agent/ai_client.py new file mode 100644 index 0000000..fb14703 --- /dev/null +++ b/.github/scripts/codebase_agent/ai_client.py @@ -0,0 +1,166 @@ +"""AI client and GitHub API utilities.""" +import os +import sys +import requests +from anthropic import Anthropic + +try: + from anthropic import AnthropicVertex + + VERTEX_AVAILABLE = True +except ImportError: + VERTEX_AVAILABLE = False + +# Hardcoded agent context for portability (template-friendly) +AGENT_CONTEXT = """ +**Your Role**: +You are the Codebase Agent for this repository. You assist with code reviews, +technical guidance, and maintaining code quality standards. + +**Operating Principles**: + +1. **Safety First** + - Show plan before major changes + - Explain reasoning and alternatives + - Ask for clarification when requirements are ambiguous + +2. **High Signal, Low Noise** + - Only comment when adding unique value + - Be concise and get to the point + - Focus on critical issues, not minor style differences + +**Code Review Focus**: +When reviewing code, prioritize: +- **Bugs**: Logic errors, edge cases, error handling +- **Security**: Input validation, OWASP Top 10 vulnerabilities +- **Performance**: Inefficient algorithms, unnecessary operations +- **Style**: Code quality and maintainability +- **Testing**: Coverage, missing test cases + +**Feedback Guidelines**: +- Be specific and actionable +- Provide code examples for fixes +- Explain "why" not just "what" +- Prioritize critical issues +- Acknowledge good practices + +**Communication Style**: +- Direct and technical (assume user has context) +- Code-focused (show examples, not just descriptions) +- Actionable (always provide next steps) +- Honest (admit uncertainty, ask for clarification) + +**What NOT to Do**: +- No generic AI responses or "AI slop" +- Don't state the obvious or add filler content +- Don't make assumptions about ambiguous requirements +- Don't include unnecessary praise or validation +""" + + +def _get_claude_client(): + """Get Claude client with Vertex AI fallback to Anthropic API. + + Tries Vertex AI first (if GCP_PROJECT_ID set), falls back to Anthropic API. + + Returns: + Anthropic or AnthropicVertex client + + Raises: + RuntimeError: If no credentials configured + """ + # Try Vertex AI first if credentials available + if VERTEX_AVAILABLE: + project_id = os.environ.get("GCP_PROJECT_ID") + region = os.environ.get("GCP_REGION", "us-central1") + + if project_id: + try: + return AnthropicVertex(project_id=project_id, region=region) + except Exception as e: + print( + f"⚠️ Vertex AI unavailable ({e}), falling back to Anthropic API", + file=sys.stderr, + ) + + # Fall back to Anthropic API + api_key = os.environ.get("ANTHROPIC_API_KEY") + if not api_key: + raise RuntimeError( + "No AI credentials found. Set either:\n" + " - GCP_PROJECT_ID (for Vertex AI), or\n" + " - ANTHROPIC_API_KEY (for Anthropic API)" + ) + + return Anthropic(api_key=api_key) + + +def call_claude(repo_name: str, command: str, url: str) -> str: + """Call Claude API with context. + + Args: + repo_name: Repository name (owner/repo) + command: User command to execute + url: GitHub issue/PR URL + + Returns: + AI response text + + Raises: + RuntimeError: If AI API call fails + """ + client = _get_claude_client() + + prompt = f"""You are the Codebase Agent for {repo_name}. + +{AGENT_CONTEXT} + +--- + +**Current Task**: +Command: {command} +Context: {url} + +Provide a helpful, concise response following the operating principles above.""" + + try: + message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=2000, + messages=[{"role": "user", "content": prompt}], + ) + return message.content[0].text + except Exception as e: + raise RuntimeError(f"AI API error: {e}") + + +def post_github_comment(repo: str, issue_number: int, body: str): + """Post comment to GitHub issue/PR. + + Args: + repo: Repository name (owner/repo) + issue_number: Issue or PR number + body: Comment body text + + Raises: + requests.HTTPError: If GitHub API call fails + """ + token = os.environ.get("GITHUB_TOKEN") + if not token: + raise RuntimeError("GITHUB_TOKEN environment variable not set") + + url = f"https://api.github.com/repos/{repo}/issues/{issue_number}/comments" + + try: + response = requests.post( + url, + headers={ + "Authorization": f"token {token}", + "Accept": "application/vnd.github.v3+json", + }, + json={"body": body}, + timeout=30, + ) + response.raise_for_status() + except requests.exceptions.RequestException as e: + raise RuntimeError(f"GitHub API error: {e}") diff --git a/.github/scripts/codebase_agent/github_parser.py b/.github/scripts/codebase_agent/github_parser.py new file mode 100644 index 0000000..0a4a37b --- /dev/null +++ b/.github/scripts/codebase_agent/github_parser.py @@ -0,0 +1,54 @@ +"""GitHub context parsing utilities.""" +import json + + +def parse_github_context(context_json: str) -> dict: + """Parse GitHub Actions context. + + Args: + context_json: JSON string of GitHub context + + Returns: + Dict with repository, number, url, and event + + Raises: + ValueError: If no issue or PR found in context + """ + context = json.loads(context_json) + + # Extract number and URL + if "pull_request" in context["event"]: + number = context["event"]["pull_request"]["number"] + url = context["event"]["pull_request"]["html_url"] + elif "issue" in context["event"]: + number = context["event"]["issue"]["number"] + url = context["event"]["issue"]["html_url"] + else: + raise ValueError("No issue or PR found in context") + + return { + "repository": context["repository"], + "number": number, + "url": url, + "event": context["event"], + } + + +def extract_command(context: dict) -> str: + """Extract command from @cba mention or labels. + + Args: + context: Parsed GitHub context from parse_github_context() + + Returns: + Command string to execute + """ + # Check for @cba mention in comment + if "comment" in context["event"]: + body = context["event"]["comment"]["body"] + if "@cba" in body: + command = body.split("@cba", 1)[1].strip() + return command if command else "review this code" + + # Default command + return "review this code" diff --git a/.github/scripts/codebase_agent/main.py b/.github/scripts/codebase_agent/main.py new file mode 100644 index 0000000..71e2221 --- /dev/null +++ b/.github/scripts/codebase_agent/main.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +"""Codebase Agent - AI-powered code review assistant.""" +import sys +import json +from .github_parser import parse_github_context, extract_command +from .ai_client import call_claude, post_github_comment + + +def main(): + """Main entry point.""" + try: + # Parse GitHub context from argument + context = parse_github_context(sys.argv[1]) + + # Extract command + command = extract_command(context) + + # Call AI + response = call_claude( + repo_name=context["repository"], command=command, url=context["url"] + ) + + # Post comment + post_github_comment( + repo=context["repository"], + issue_number=context["number"], + body=f"## 🤖 Codebase Agent\n\n{response}", + ) + + print(f"✅ Posted response to {context['url']}") + + except Exception as e: + print(f"❌ Error: {e}", file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/.github/workflows/codebase-agent.yml b/.github/workflows/codebase-agent.yml new file mode 100644 index 0000000..e69a7bf --- /dev/null +++ b/.github/workflows/codebase-agent.yml @@ -0,0 +1,55 @@ +name: Codebase Agent + +on: + issue_comment: + types: [created] + pull_request_review_comment: + types: [created] + issues: + types: [opened, labeled] + pull_request: + types: [opened, labeled, ready_for_review] + +permissions: + contents: write + pull-requests: write + issues: write + id-token: write # Required for GCP Workload Identity (if using Vertex AI) + +jobs: + codebase-agent: + runs-on: ubuntu-latest + if: | + contains(github.event.comment.body, '@cba') || + contains(github.event.issue.labels.*.name, 'cba-review') || + contains(github.event.pull_request.labels.*.name, 'cba-review') || + contains(github.event.issue.labels.*.name, 'cba-help') || + contains(github.event.pull_request.labels.*.name, 'cba-help') + + steps: + - uses: actions/checkout@v4 + + # Uncomment for Vertex AI (removes API key dependency) + # - name: Authenticate to Google Cloud + # uses: google-github-actions/auth@v2 + # with: + # workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }} + # service_account: ${{ secrets.GCP_SERVICE_ACCOUNT }} + + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - run: pip install anthropic requests google-cloud-aiplatform + + - name: Run Codebase Agent + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # Option 1: Anthropic API (default) + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + # Option 2: Vertex AI (optional - remove API key dependency) + GCP_PROJECT_ID: ${{ vars.GCP_PROJECT_ID }} + GCP_REGION: ${{ vars.GCP_REGION }} + run: | + cd .github/scripts + python3 -m codebase_agent.main '${{ toJson(github) }}' diff --git a/docs/patterns/codebase-agent.md b/docs/patterns/codebase-agent.md index 2c15206..d547a21 100644 --- a/docs/patterns/codebase-agent.md +++ b/docs/patterns/codebase-agent.md @@ -84,3 +84,252 @@ Reference in your agent: "Load `.claude/context/architecture.md` for code placem - [Self-Review Reflection](self-review-reflection.md) - [Autonomous Quality Enforcement](autonomous-quality-enforcement.md) + +--- + +## GitHub Actions Deployment + +**Deploy your Codebase Agent as a GitHub bot for team-wide access.** + +### Architecture: Two Complementary Approaches + +| Approach | Location | Trigger | Use Case | +|----------|----------|---------|----------| +| **Local Agent** (above) | Developer's machine | Claude Code CLI | Individual development workflows | +| **Deployed Agent** (below) | GitHub Actions | @mentions, labels | Team code reviews, PR automation | + +### Quick Deploy + +**1. Copy the workflow file:** + +See the [reference implementation](/.github/workflows/codebase-agent.yml) for the complete, production-ready workflow. + +**2. Add GitHub Secret:** + +- `ANTHROPIC_API_KEY`: Your Anthropic API key from + +**3. Usage:** + +```markdown +# In any issue or PR: +@cba please review this PR for security issues +@cba help me understand this error + +# Or use labels: +cba-review → Automatic code review +cba-help → Automatic analysis +``` + +### Implementation Details + +The reference workflow uses: + +- **Modular Python code** - Extracted to `.github/scripts/codebase_agent/` for testability +- **Error handling** - Specific exceptions for API errors, timeouts, rate limits +- **Security** - Command sanitization to prevent prompt injection +- **Safe commands** - Only `review`, `help`, `summarize`, `explain`, `test`, `security` + +### Authentication Options + +The workflow supports two authentication methods with automatic fallback: + +#### Option 1: Anthropic API (Default - Recommended for Quick Start) + +**Best for**: Quick setup, any cloud provider, pay-as-you-go + +**Setup:** + +1. Get API key from +2. Add GitHub secret: `Settings → Secrets → Actions → New secret` + - Name: `ANTHROPIC_API_KEY` + - Value: `sk-ant-...` +3. Done! + +**Pros:** + +- ✅ Simple setup (1 secret) +- ✅ Works anywhere (no GCP required) +- ✅ Pay-as-you-go pricing + +**Cons:** + +- ❌ Requires API key management +- ❌ Key rotation needed periodically + +#### Option 2: Vertex AI (Advanced - For GCP Users) + +**Best for**: GCP users, enterprise deployments, no API key management + +**Setup:** + +1. Set up GCP Workload Identity Federation (see [setup guide](#gcp-workload-identity-setup) below) +2. Uncomment GCP auth steps in workflow (lines 32-37) +3. Add GitHub secrets: + - `GCP_WORKLOAD_IDENTITY_PROVIDER` + - `GCP_SERVICE_ACCOUNT` +4. Add GitHub variables: + - `GCP_PROJECT_ID` + - `GCP_REGION` (optional, defaults to `us-central1`) +5. Done! + +**Pros:** + +- ✅ No API keys (uses Workload Identity) +- ✅ Automatic credential rotation +- ✅ GCP billing integration +- ✅ Audit trail in GCP logs + +**Cons:** + +- ❌ More complex setup +- ❌ Requires GCP project with billing +- ❌ GCP-specific + +#### Automatic Fallback + +The workflow automatically tries Vertex AI first (if configured), then falls back to Anthropic API: + +```text +GCP_PROJECT_ID set? → Try Vertex AI + ↓ Success? → ✅ Use Vertex AI + ↓ Failure? → ⚠️ Fall back to Anthropic API + +ANTHROPIC_API_KEY set? → ✅ Use Anthropic API + ↓ Not set? → ❌ Error (no credentials) +``` + +**Example fallback message** (in workflow logs): + +```text +⚠️ Vertex AI unavailable (Project not found), falling back to Anthropic API +``` + +### GitHub Actions Issues + +| Issue | Solution | +|-------|----------| +| Workflow doesn't trigger | Check `if:` condition matches your use case | +| Response not posted | Verify `ANTHROPIC_API_KEY` or `GCP_PROJECT_ID` is set | +| Module import error | Ensure `cd .github/scripts` before running Python | +| Rate limit errors | Add concurrency limits to workflow | +| Vertex AI fallback warning | Expected if GCP not configured - will use Anthropic API | + +--- + +### GCP Workload Identity Setup + +
+Advanced: Complete GCP Workload Identity Setup Guide + +**Prerequisites:** + +- GCP project with billing enabled +- GitHub repository admin access +- `gcloud` CLI installed + +**Setup script:** + +```bash +export PROJECT_ID="your-gcp-project" +export PROJECT_NUMBER="123456789" # Find in GCP Console → Project Info +export POOL_ID="github-actions-pool" +export PROVIDER_ID="github-provider" +export SERVICE_ACCOUNT="codebase-agent@${PROJECT_ID}.iam.gserviceaccount.com" +export GITHUB_REPO="owner/repo" # e.g., "jeremyeder/reference" + +# 1. Enable required APIs +gcloud services enable iamcredentials.googleapis.com \ + --project="$PROJECT_ID" +gcloud services enable sts.googleapis.com \ + --project="$PROJECT_ID" +gcloud services enable aiplatform.googleapis.com \ + --project="$PROJECT_ID" + +# 2. Create Workload Identity Pool +gcloud iam workload-identity-pools create "$POOL_ID" \ + --project="$PROJECT_ID" \ + --location="global" \ + --display-name="GitHub Actions Pool" + +# 3. Create OIDC Provider +gcloud iam workload-identity-pools providers create-oidc "$PROVIDER_ID" \ + --project="$PROJECT_ID" \ + --location="global" \ + --workload-identity-pool="$POOL_ID" \ + --display-name="GitHub Provider" \ + --attribute-mapping="google.subject=assertion.sub,attribute.repository=assertion.repository" \ + --attribute-condition="assertion.repository=='${GITHUB_REPO}'" \ + --issuer-uri="https://token.actions.githubusercontent.com" + +# 4. Create Service Account +gcloud iam service-accounts create codebase-agent \ + --project="$PROJECT_ID" \ + --display-name="Codebase Agent" + +# 5. Grant Vertex AI permissions +gcloud projects add-iam-policy-binding "$PROJECT_ID" \ + --member="serviceAccount:${SERVICE_ACCOUNT}" \ + --role="roles/aiplatform.user" + +# 6. Allow GitHub Actions to impersonate +gcloud iam service-accounts add-iam-policy-binding "$SERVICE_ACCOUNT" \ + --project="$PROJECT_ID" \ + --role="roles/iam.workloadIdentityUser" \ + --member="principalSet://iam.googleapis.com/projects/${PROJECT_NUMBER}/locations/global/workloadIdentityPools/${POOL_ID}/attribute.repository/${GITHUB_REPO}" + +# 7. Output GitHub secrets and variables +echo "" +echo "======================================" +echo "Add to GitHub Secrets (Settings → Secrets → Actions):" +echo "======================================" +echo "GCP_WORKLOAD_IDENTITY_PROVIDER=projects/${PROJECT_NUMBER}/locations/global/workloadIdentityPools/${POOL_ID}/providers/${PROVIDER_ID}" +echo "GCP_SERVICE_ACCOUNT=${SERVICE_ACCOUNT}" +echo "" +echo "======================================" +echo "Add to GitHub Variables (Settings → Secrets → Variables):" +echo "======================================" +echo "GCP_PROJECT_ID=${PROJECT_ID}" +echo "GCP_REGION=us-central1" +echo "" +echo "Then uncomment GCP auth steps in .github/workflows/codebase-agent.yml (lines 32-37)" +``` + +**Verification:** + +```bash +# Test that GitHub Actions can authenticate +gh workflow run codebase-agent.yml + +# Check workflow logs for: +# ✅ "Successfully authenticated to Google Cloud" +# ❌ "Vertex AI unavailable" (means GCP auth failed, falling back) +``` + +
+ +--- + +### Example Usage + +**Developer adds label:** +![Screenshot: User adds "cba-review" label to PR] + +**Bot posts review:** + +```markdown +## 🤖 Codebase Agent + +I've reviewed this PR. Here are my findings: + +### Security +✅ No SQL injection risks +⚠️ Consider rate limiting (line 42) + +### Performance +⚠️ DB query in loop (lines 67-73) +✅ Good caching implementation + +### Suggestions +1. Add rate limiting: `@limits(calls=100, period=60)` +2. Use bulk query: `User.objects.filter(id__in=ids)` +```