From aa1559b18d5b8daee912b357a6f48c3ccaf555bf Mon Sep 17 00:00:00 2001 From: Andrea Margiovanni Date: Sat, 29 Nov 2025 05:26:02 +0100 Subject: [PATCH 1/3] feat(cli): implement GitHub repository interactive selection (Feature 004) Add interactive menu [A/S/O/L/Q] when repos.txt is missing or empty: - [A] Analyze ALL accessible repositories - [S] Specify repository names manually (owner/repo format) - [O] Analyze organization repositories - [L] Select from list by number (e.g., 1,3,5 or 1-3) - [Q] Quit/Skip GitHub analysis Implementation includes: - GitHubClient.list_user_repos() with pagination support - GitHubClient.list_org_repos() with pagination support - validate_repo_format() and validate_org_name() helpers - format_repo_list() for numbered display - load_github_repos_from_file() for repos.txt parsing - Rate limit handling with wait time display - EOF/KeyboardInterrupt graceful handling - Non-interactive mode support (--quiet flag) Tests: 727 passed (24 new GitHub selection tests) --- CLAUDE.md | 4 +- .../checklists/comprehensive.md | 148 +++++ .../checklists/requirements.md | 37 ++ .../contracts/github-api.yaml | 189 ++++++ .../contracts/internal-api.md | 124 ++++ specs/004-github-repo-selection/data-model.md | 175 +++++ specs/004-github-repo-selection/plan.md | 77 +++ specs/004-github-repo-selection/quickstart.md | 185 ++++++ specs/004-github-repo-selection/research.md | 121 ++++ specs/004-github-repo-selection/spec.md | 219 ++++++ specs/004-github-repo-selection/tasks.md | 277 ++++++++ src/github_analyzer/api/client.py | 54 ++ src/github_analyzer/cli/main.py | 344 ++++++++++ .../integration/test_interactive_selection.py | 625 ++++++++++++++++++ tests/unit/api/test_client.py | 168 +++++ 15 files changed, 2746 insertions(+), 1 deletion(-) create mode 100644 specs/004-github-repo-selection/checklists/comprehensive.md create mode 100644 specs/004-github-repo-selection/checklists/requirements.md create mode 100644 specs/004-github-repo-selection/contracts/github-api.yaml create mode 100644 specs/004-github-repo-selection/contracts/internal-api.md create mode 100644 specs/004-github-repo-selection/data-model.md create mode 100644 specs/004-github-repo-selection/plan.md create mode 100644 specs/004-github-repo-selection/quickstart.md create mode 100644 specs/004-github-repo-selection/research.md create mode 100644 specs/004-github-repo-selection/spec.md create mode 100644 specs/004-github-repo-selection/tasks.md diff --git a/CLAUDE.md b/CLAUDE.md index 1b6b978..291fbbb 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -6,6 +6,8 @@ Auto-generated from all feature plans. Last updated: 2025-11-28 - Python 3.9+ (per constitution, leveraging type hints) + Standard library (urllib, json, csv, os, re); optional: requests (002-jira-integration) - CSV files for export (same as existing GitHub exports) (002-jira-integration) - Python 3.9+ (per constitution, leveraging type hints) + Standard library only (urllib, json, csv, os, re, datetime, statistics); optional: requests (already used in jira_client.py) (003-jira-quality-metrics) +- Python 3.9+ (as per constitution, leveraging type hints) + Standard library only (urllib, json); optional: requests (existing pattern) (004-github-repo-selection) +- N/A (repos.txt file is input, not storage) (004-github-repo-selection) - Python 3.9+ (as per constitution, leveraging type hints) + Standard library only (urllib, json, csv, os, re); optional: requests (001-modular-refactor) @@ -37,10 +39,10 @@ python github_analyzer.py --days 7 Python 3.9+ (as per constitution, leveraging type hints): Follow standard conventions ## Recent Changes +- 004-github-repo-selection: Added Python 3.9+ (as per constitution, leveraging type hints) + Standard library only (urllib, json); optional: requests (existing pattern) - 003-jira-quality-metrics: Added Python 3.9+ (per constitution, leveraging type hints) + Standard library only (urllib, json, csv, os, re, datetime, statistics); optional: requests (already used in jira_client.py) - 002-jira-integration: Added Python 3.9+ (per constitution, leveraging type hints) + Standard library (urllib, json, csv, os, re); optional: requests -- 001-modular-refactor: Added Python 3.9+ (as per constitution, leveraging type hints) + Standard library only (urllib, json, csv, os, re); optional: requests diff --git a/specs/004-github-repo-selection/checklists/comprehensive.md b/specs/004-github-repo-selection/checklists/comprehensive.md new file mode 100644 index 0000000..359dc24 --- /dev/null +++ b/specs/004-github-repo-selection/checklists/comprehensive.md @@ -0,0 +1,148 @@ +# Requirements Quality Checklist: GitHub Repository Interactive Selection + +**Purpose**: Validate specification completeness, clarity, and consistency before implementation +**Created**: 2025-11-29 +**Feature**: [spec.md](../spec.md) +**Focus**: Comprehensive (UX, API, Validation, Non-interactive) +**Depth**: Standard (PR review gate) + +--- + +## Requirement Completeness + +- [x] CHK001 - Are all five menu options [A/S/O/L/Q] explicitly defined with their behavior? [Completeness, Spec §FR-002] ✓ Defined in FR-002 and Display Format section +- [x] CHK002 - Are requirements for repos.txt file loading specified (path, format, empty handling)? [Completeness, Spec §US1] ✓ US1-AC1,2,3 cover all cases +- [x] CHK003 - Are pagination requirements defined for both user repos AND org repos? [Completeness, Spec §FR-007] ✓ FR-007 covers both explicitly +- [x] CHK004 - Are requirements specified for displaying repository descriptions in the list? [Completeness, Display Format] ✓ Added Display Format section with truncation +- [x] CHK005 - Are requirements for private repository visibility indicator defined? [Completeness, Display Format] ✓ Added [private] marker in Display Format +- [x] CHK006 - Are error message content requirements specified (what text to show for each error type)? [Completeness, Edge Cases] ✓ Added specific error messages to all edge cases +- [x] CHK007 - Are requirements defined for the "retry or quit" flow after org not found? [Completeness, Spec §Edge Cases] ✓ "allow retry or quit" specified + +--- + +## Requirement Clarity + +- [x] CHK008 - Is "personal repos" clarified with specific affiliation values (owner,collaborator)? [Clarity, Spec §Clarifications] ✓ Clarified in Session 2025-11-29 +- [x] CHK009 - Is the exact menu prompt text specified or left to implementation? [Clarity, Display Format] ✓ Added Menu Prompt Format section with exact text +- [x] CHK010 - Are the performance thresholds "under 10 seconds" and "under 15 seconds" clearly scoped? [Clarity, Spec §SC-002, SC-003] ✓ Scoped to specific repo counts and network conditions +- [x] CHK011 - Is "clear error message" quantified with specific content requirements? [Clarity, Edge Cases] ✓ All error messages now have exact text +- [x] CHK012 - Is "graceful exit" defined with specific behavior and message? [Clarity, Edge Cases] ✓ "GitHub analysis skipped." message specified +- [x] CHK013 - Are the exact validation patterns for repo format documented? [Clarity, Validation Patterns] ✓ Added Validation Patterns section with regex +- [x] CHK014 - Are the exact validation patterns for org name documented? [Clarity, Validation Patterns] ✓ Added with pattern and examples + +--- + +## Requirement Consistency + +- [x] CHK015 - Are menu options consistent between spec (A/S/O/L/Q) and quickstart examples? [Consistency] ✓ All use [A/S/O/L/Q] +- [x] CHK016 - Is the selection format "1,3,5" or "1-3" consistent with existing parse_project_selection()? [Consistency, Spec §FR-010] ✓ Same format documented +- [x] CHK017 - Are EOF/KeyboardInterrupt handling requirements consistent across all menu states? [Consistency, Spec §FR-004] ✓ FR-004 applies universally +- [x] CHK018 - Are error handling patterns consistent between list_user_repos and list_org_repos? [Consistency, contracts/internal-api.md] ✓ Same RateLimitError/APIError patterns +- [x] CHK019 - Is the "owner/repo" format requirement consistent across manual entry and API responses? [Consistency, Spec §FR-009, FR-011] ✓ full_name format used consistently + +--- + +## Jira Pattern Consistency (FR-003) + +- [x] CHK020 - Does the menu structure match select_jira_projects pattern (options display, prompt format)? [Consistency, Spec §FR-003] ✓ Same pattern documented in Display Format +- [x] CHK021 - Is the list numbering format consistent with Jira project list display? [Consistency, Spec §FR-003] ✓ Same "N. name - description" format +- [x] CHK022 - Is the selection input parsing reusing or mirroring parse_project_selection()? [Consistency, research.md §5] ✓ Documented in research.md decision +- [x] CHK023 - Is the "invalid choice retry" behavior consistent with Jira selection flow? [Consistency, Spec §FR-003] ✓ Same retry pattern per FR-003 +- [x] CHK024 - Are logging patterns (output.log) consistent with select_jira_projects implementation? [Consistency] ✓ Uses TerminalOutput per contracts + +--- + +## Acceptance Criteria Quality + +- [x] CHK025 - Can SC-001 "within 30 seconds" be objectively measured? [Measurability, Spec §SC-001] ✓ Split into menu (2s) and listing (30s) with clear scope +- [x] CHK026 - Can SC-004 "no regression" be verified with specific test criteria? [Measurability, Spec §SC-004] ✓ Testable: repos.txt loading unchanged +- [x] CHK027 - Can SC-005 "UX mirrors Jira" be verified with specific comparison points? [Measurability, Spec §SC-005] ✓ Menu format, prompts, error handling defined +- [x] CHK028 - Are acceptance scenarios in US1-US4 testable without implementation details? [Measurability] ✓ All scenarios use Given/When/Then format +- [x] CHK029 - Is "all repositories are shown" in US2-AC3 measurable (what if 1000+ repos)? [Clarity, Edge Cases] ✓ Partial response edge case added for large lists + +--- + +## Scenario Coverage + +### Primary Flow Coverage +- [x] CHK030 - Are requirements complete for [A] all personal repos flow? [Coverage, Spec §US2] ✓ US2-AC1 +- [x] CHK031 - Are requirements complete for [S] manual specification flow? [Coverage, Spec §US4] ✓ US4 complete +- [x] CHK032 - Are requirements complete for [O] organization repos flow? [Coverage, Spec §US3] ✓ US3 complete +- [x] CHK033 - Are requirements complete for [L] select from list flow? [Coverage, Spec §US2] ✓ US2-AC2,3,4 +- [x] CHK034 - Are requirements complete for [Q] quit/skip flow? [Coverage, Spec §FR-002] ✓ FR-004 covers exit behavior + +### Alternate Flow Coverage +- [x] CHK035 - Are requirements defined for re-prompting after invalid menu choice? [Coverage, FR-003] ✓ Jira pattern includes retry +- [x] CHK036 - Are requirements defined for re-prompting after empty manual input? [Coverage, Spec §US4-AC3] ✓ "can correct or continue" +- [x] CHK037 - Are requirements defined for selecting "all" in list mode? [Coverage, Validation Patterns] ✓ "all" documented as valid input + +### Exception Flow Coverage +- [x] CHK038 - Are requirements defined for API authentication failure? [Coverage, Edge Cases] ✓ Added with specific error message +- [x] CHK039 - Are requirements defined for network timeout during repo listing? [Coverage, Edge Cases] ✓ Added with retry option +- [x] CHK040 - Are requirements defined for partial API response (some repos fetched, then error)? [Coverage, Edge Cases] ✓ Added graceful degradation + +--- + +## Edge Case Coverage + +- [x] CHK041 - Are requirements defined for user with zero repositories? [Edge Case, Edge Cases] ✓ Added with specific message +- [x] CHK042 - Are requirements defined for organization with zero repositories? [Edge Case, Edge Cases] ✓ Added with retry option +- [x] CHK043 - Are rate limit wait time display requirements specified? [Edge Case, Spec §Edge Cases] ✓ "Waiting X seconds..." format +- [x] CHK044 - Are requirements for "special characters in org name" clearly defined? [Edge Case, Validation Patterns] ✓ Regex pattern with examples +- [x] CHK045 - Are requirements defined for repos.txt with invalid entries mixed with valid? [Edge Case, US1-AC3] ✓ Similar to US4-AC4 +- [x] CHK046 - Are requirements defined for selection numbers exceeding list length? [Edge Case, Edge Cases] ✓ Added ignore with warning + +--- + +## Non-Functional Requirements + +### Performance +- [x] CHK047 - Are performance requirements scoped to specific network conditions? [Clarity, NFR §Performance] ✓ Added "< 200ms latency" assumption +- [x] CHK048 - Are timeout values for API calls specified? [Coverage, NFR §Assumptions] ✓ Uses GitHubClient default 30s + +### Security +- [x] CHK049 - Are token exposure prevention requirements documented? [Coverage, NFR §Security] ✓ Added Security section +- [x] CHK050 - Is input validation for org name protecting against injection? [Coverage, NFR §Security] ✓ Regex validation + safe URL construction + +### Accessibility +- [x] CHK051 - Are requirements defined for screen reader compatibility of menu output? [Coverage, NFR §Accessibility] ✓ Added Accessibility section +- [x] CHK052 - Are requirements defined for non-ANSI terminal support? [Coverage, NFR §Accessibility] ✓ Plain text, optional formatting + +--- + +## Dependencies & Assumptions + +- [x] CHK053 - Is the GitHub token scope requirement ("repo") documented as assumption? [Assumption, Spec §Assumptions] ✓ First assumption listed +- [x] CHK054 - Is the assumption "user knows org names" validated or alternatives considered? [Assumption, Spec §Assumptions] ✓ Documented as design decision +- [x] CHK055 - Are dependencies on existing GitHubClient methods documented? [Dependency, plan.md] ✓ paginate(), rate limit handling +- [x] CHK056 - Is the dependency on TerminalOutput documented? [Dependency, contracts/internal-api.md] ✓ In function signature + +--- + +## Ambiguities & Conflicts + +- [x] CHK057 - Is there ambiguity between "skip" (FR-014) and "error message" (US1-AC4) for non-interactive mode? [Resolved] ✓ US1-AC4 updated to "informational log message" +- [x] CHK058 - Is there conflict between "all repos shown" (US2-AC3) and potential max_pages limit? [Resolved, Edge Cases] ✓ Partial response edge case added +- [x] CHK059 - Is the term "valid repositories" in US1-AC3 defined (format valid? exists on GitHub? accessible?)? [Clarity] ✓ Format validation per FR-011 +- [x] CHK060 - Is the behavior for [O] option after seeing org list (select vs all) fully specified? [Clarity, US3-AC5] ✓ "choose [A] after seeing the list" + +--- + +## Summary + +| Category | Items | Status | +|----------|-------|--------| +| Completeness | 7 | ✅ All resolved | +| Clarity | 7 | ✅ All resolved | +| Consistency | 5 | ✅ All resolved | +| Jira Pattern | 5 | ✅ All resolved | +| Acceptance Criteria | 5 | ✅ All resolved | +| Scenario Coverage | 11 | ✅ All resolved | +| Edge Cases | 6 | ✅ All resolved | +| Non-Functional | 6 | ✅ All resolved | +| Dependencies | 4 | ✅ All resolved | +| Ambiguities | 4 | ✅ All resolved | + +**Total Items**: 60 +**Completed**: 60 +**Status**: ✅ PASS diff --git a/specs/004-github-repo-selection/checklists/requirements.md b/specs/004-github-repo-selection/checklists/requirements.md new file mode 100644 index 0000000..0153d93 --- /dev/null +++ b/specs/004-github-repo-selection/checklists/requirements.md @@ -0,0 +1,37 @@ +# Specification Quality Checklist: GitHub Repository Interactive Selection + +**Purpose**: Validate specification completeness and quality before proceeding to planning +**Created**: 2025-11-29 +**Feature**: [spec.md](../spec.md) + +## Content Quality + +- [x] No implementation details (languages, frameworks, APIs) +- [x] Focused on user value and business needs +- [x] Written for non-technical stakeholders +- [x] All mandatory sections completed + +## Requirement Completeness + +- [x] No [NEEDS CLARIFICATION] markers remain +- [x] Requirements are testable and unambiguous +- [x] Success criteria are measurable +- [x] Success criteria are technology-agnostic (no implementation details) +- [x] All acceptance scenarios are defined +- [x] Edge cases are identified +- [x] Scope is clearly bounded +- [x] Dependencies and assumptions identified + +## Feature Readiness + +- [x] All functional requirements have clear acceptance criteria +- [x] User scenarios cover primary flows +- [x] Feature meets measurable outcomes defined in Success Criteria +- [x] No implementation details leak into specification + +## Notes + +- Spec follows the same pattern as 002-jira-integration FR-009a +- UX consistency with select_jira_projects is explicitly required +- API endpoints mentioned (GET /user/repos, GET /orgs/{org}/repos) are part of the functional description, not implementation details +- Ready for `/speckit.plan` or `/speckit.clarify` diff --git a/specs/004-github-repo-selection/contracts/github-api.yaml b/specs/004-github-repo-selection/contracts/github-api.yaml new file mode 100644 index 0000000..384b186 --- /dev/null +++ b/specs/004-github-repo-selection/contracts/github-api.yaml @@ -0,0 +1,189 @@ +# GitHub API Contract for Repository Listing +# Feature: 004-github-repo-selection +# These are the external GitHub API endpoints used by this feature + +openapi: 3.0.3 +info: + title: GitHub API - Repository Endpoints (Subset) + version: 2022-11-28 + description: | + External API contracts for GitHub repository listing. + This documents the subset of GitHub REST API used by the + interactive repository selection feature. + +paths: + /user/repos: + get: + operationId: listUserRepos + summary: List repositories for the authenticated user + description: | + Lists repositories that the authenticated user has explicit permission + to access. Used for FR-005 (personal repository listing). + parameters: + - name: affiliation + in: query + description: | + Comma-separated list of values. Can include: owner, collaborator, + organization_member. Feature uses "owner,collaborator" per spec clarification. + schema: + type: string + default: "owner,collaborator,organization_member" + example: "owner,collaborator" + - name: visibility + in: query + description: Filter by visibility + schema: + type: string + enum: [all, public, private] + default: all + - name: sort + in: query + schema: + type: string + enum: [created, updated, pushed, full_name] + default: full_name + - name: direction + in: query + schema: + type: string + enum: [asc, desc] + default: asc + - name: per_page + in: query + schema: + type: integer + minimum: 1 + maximum: 100 + default: 30 + - name: page + in: query + schema: + type: integer + minimum: 1 + default: 1 + responses: + '200': + description: List of repositories + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/Repository' + '401': + description: Authentication required + '403': + description: Rate limit exceeded or insufficient permissions + + /orgs/{org}/repos: + get: + operationId: listOrgRepos + summary: List organization repositories + description: | + Lists repositories for the specified organization. Used for FR-006. + parameters: + - name: org + in: path + required: true + description: The organization name (case-insensitive) + schema: + type: string + pattern: "^[a-zA-Z0-9]([a-zA-Z0-9-]{0,37}[a-zA-Z0-9])?$" + - name: type + in: query + description: Repository type filter + schema: + type: string + enum: [all, public, private, forks, sources, member] + default: all + - name: sort + in: query + schema: + type: string + enum: [created, updated, pushed, full_name] + default: created + - name: direction + in: query + schema: + type: string + enum: [asc, desc] + - name: per_page + in: query + schema: + type: integer + minimum: 1 + maximum: 100 + default: 30 + - name: page + in: query + schema: + type: integer + minimum: 1 + default: 1 + responses: + '200': + description: List of repositories + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/Repository' + '404': + description: Organization not found + +components: + schemas: + Repository: + type: object + description: GitHub repository (minimal fields used by feature) + required: + - id + - full_name + - name + - owner + - private + - fork + properties: + id: + type: integer + description: Unique repository ID + full_name: + type: string + description: Full repository name (owner/repo) + example: "octocat/Hello-World" + name: + type: string + description: Repository name + example: "Hello-World" + owner: + $ref: '#/components/schemas/Owner' + description: + type: string + nullable: true + description: Repository description + private: + type: boolean + description: Whether repository is private + fork: + type: boolean + description: Whether repository is a fork + html_url: + type: string + format: uri + description: GitHub URL for repository + + Owner: + type: object + required: + - login + properties: + login: + type: string + description: Owner username/org name + example: "octocat" + id: + type: integer + type: + type: string + enum: [User, Organization] diff --git a/specs/004-github-repo-selection/contracts/internal-api.md b/specs/004-github-repo-selection/contracts/internal-api.md new file mode 100644 index 0000000..9cd2a6a --- /dev/null +++ b/specs/004-github-repo-selection/contracts/internal-api.md @@ -0,0 +1,124 @@ +# Internal API Contract: GitHub Repository Selection + +**Feature**: 004-github-repo-selection +**Date**: 2025-11-29 + +## New Methods in GitHubClient + +### `list_user_repos(affiliation: str = "owner,collaborator") -> list[dict]` + +Lists repositories for the authenticated user. + +**Parameters**: +- `affiliation`: Comma-separated affiliations (default: "owner,collaborator" per spec) + +**Returns**: List of repository dicts with at least `full_name`, `name`, `owner.login`, `description`, `private`, `fork` + +**Raises**: +- `RateLimitError`: When API rate limit is exceeded +- `APIError`: On other API failures + +**Implementation Notes**: +- Uses existing `paginate()` method for automatic pagination +- Respects existing `per_page` and `max_pages` config + +--- + +### `list_org_repos(org: str) -> list[dict]` + +Lists repositories for a specific organization. + +**Parameters**: +- `org`: Organization name (validated by caller) + +**Returns**: List of repository dicts (same structure as `list_user_repos`) + +**Raises**: +- `RateLimitError`: When API rate limit is exceeded +- `APIError`: On API failures including 404 (org not found) + +**Implementation Notes**: +- Uses `type=all` to include all accessible repos +- Uses existing `paginate()` method + +--- + +## New Function in cli/main.py + +### `select_github_repos(repos_file: str, config: AnalyzerConfig, interactive: bool = True, output: TerminalOutput | None = None) -> list[str]` + +Select GitHub repositories from file or interactively. + +**Parameters**: +- `repos_file`: Path to repos.txt file +- `config`: Analyzer configuration (contains GitHub token) +- `interactive`: If True, prompt user when file missing/empty. If False, return empty list. +- `output`: Optional TerminalOutput for consistent logging + +**Returns**: List of repository names in `owner/repo` format + +**Behavior**: +1. If `repos_file` exists and has content → return repos from file +2. If `repos_file` missing/empty and `interactive=False` → return `[]` +3. If `repos_file` missing/empty and `interactive=True` → show menu: + - [A] All personal repos → call `list_user_repos()`, return all + - [S] Specify manually → prompt, validate format, return valid repos + - [O] Organization repos → prompt org, call `list_org_repos()`, show selection + - [L] Select from list → call `list_user_repos()`, show numbered list, return selected + - [Q] Quit → return `[]` +4. On EOF/KeyboardInterrupt → return `[]` + +--- + +## Helper Functions + +### `validate_repo_format(repo: str) -> bool` + +Validates repository name format. + +**Pattern**: `^[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+$` + +**Returns**: True if valid `owner/repo` format + +--- + +### `validate_org_name(org: str) -> bool` + +Validates GitHub organization name format. + +**Pattern**: `^[a-zA-Z0-9]([a-zA-Z0-9-]{0,37}[a-zA-Z0-9])?$` + +**Returns**: True if valid org name format + +--- + +### `format_repo_list(repos: list[dict]) -> str` + +Formats repository list for display. + +**Input**: List of repository dicts from API +**Output**: Numbered list string for display + +**Format**: +``` + 1. owner/repo1 - Description here (if any) + 2. owner/repo2 - Another description + 3. owner/private-repo - [private] Private repo desc +``` + +--- + +## Integration Point in main() + +```python +# In main(), before GitHub analysis: +repositories = select_github_repos( + config.repos_file, + config, + interactive=not args.quiet, # Non-interactive in quiet mode + output=output, +) + +if DataSource.GITHUB in sources and repositories: + # ... existing analysis code +``` diff --git a/specs/004-github-repo-selection/data-model.md b/specs/004-github-repo-selection/data-model.md new file mode 100644 index 0000000..bc3352c --- /dev/null +++ b/specs/004-github-repo-selection/data-model.md @@ -0,0 +1,175 @@ +# Data Model: GitHub Repository Interactive Selection + +**Feature**: 004-github-repo-selection +**Date**: 2025-11-29 + +## Entities + +### GitHubRepository (Response Model) + +Represents a repository returned from GitHub API. + +| Field | Type | Description | Source | +|-------|------|-------------|--------| +| `full_name` | `str` | Repository identifier (owner/repo) | `full_name` from API | +| `name` | `str` | Repository name | `name` from API | +| `owner` | `str` | Owner login | `owner.login` from API | +| `description` | `str \| None` | Repository description | `description` from API | +| `private` | `bool` | Private visibility | `private` from API | +| `fork` | `bool` | Is a fork | `fork` from API | + +**Usage**: Display in interactive list, validate user selection. + +### RepositorySelection (Internal State) + +Represents user's repository selection mode. + +| Value | Description | Action | +|-------|-------------|--------| +| `ALL_PERSONAL` | User chose [A] | List all personal repos (owner+collaborator) | +| `MANUAL` | User chose [S] | Accept comma-separated input | +| `ORGANIZATION` | User chose [O] | Prompt for org name, list org repos | +| `FROM_LIST` | User chose [L] | Show numbered list, accept selection | +| `QUIT` | User chose [Q] | Skip GitHub analysis | + +**Note**: This is a conceptual state, not necessarily a formal enum in implementation. + +## Data Flow + +``` +┌─────────────────┐ +│ repos.txt │──exists──▶ Load and return repos +└─────────────────┘ + │ + │ missing/empty + ▼ +┌─────────────────┐ +│ Interactive Menu│ +│ [A/S/O/L/Q] │ +└─────────────────┘ + │ + ├──[A]──▶ GET /user/repos?affiliation=owner,collaborator + │ │ + │ ▼ + │ ┌─────────────────┐ + │ │ GitHubRepository│ (list) + │ └─────────────────┘ + │ │ + │ ▼ + │ Return [full_name, ...] + │ + ├──[S]──▶ Parse "owner/repo, owner/repo2" + │ │ + │ ▼ + │ Validate format + │ │ + │ ▼ + │ Return validated repos + │ + ├──[O]──▶ Prompt for org name + │ │ + │ ▼ + │ Validate org format + │ │ + │ ▼ + │ GET /orgs/{org}/repos?type=all + │ │ + │ ▼ + │ ┌─────────────────┐ + │ │ GitHubRepository│ (list) + │ └─────────────────┘ + │ │ + │ ▼ + │ Show list, accept selection + │ │ + │ ▼ + │ Return [full_name, ...] + │ + ├──[L]──▶ GET /user/repos?affiliation=owner,collaborator + │ │ + │ ▼ + │ Show numbered list + │ │ + │ ▼ + │ Parse "1,3,5" or "1-3" or "all" + │ │ + │ ▼ + │ Return selected [full_name, ...] + │ + └──[Q]──▶ Return [] +``` + +## Validation Rules + +### Repository Name Format (FR-011) + +``` +Pattern: ^[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+$ +Examples: + ✓ owner/repo + ✓ my-org/my-repo + ✓ user123/project_v2 + ✗ just-repo (missing owner) + ✗ owner/ (missing repo) + ✗ /repo (missing owner) +``` + +### Organization Name Format (Edge Case) + +``` +Pattern: ^[a-zA-Z0-9]([a-zA-Z0-9-]{0,37}[a-zA-Z0-9])?$ +Rules: + - 1-39 characters + - Alphanumeric and hyphens only + - Cannot start or end with hyphen +Examples: + ✓ myorg + ✓ my-organization + ✗ -invalid- + ✗ org--double +``` + +### Selection Input Format (FR-010) + +``` +Formats supported: + - Single number: "3" + - Comma-separated: "1,3,5" + - Range: "1-3" + - Mixed: "1,3-5,7" + - All: "all" +``` + +## State Transitions + +``` +START + │ + ▼ +┌─────────┐ file exists ┌─────────┐ +│ Check │──────────────────▶│ LOADED │──▶ END (repos from file) +│ File │ └─────────┘ +└─────────┘ + │ missing/empty + ▼ +┌─────────┐ +│ PROMPT │◄────────────────┐ +└─────────┘ │ + │ │ + ├─[A]─▶ API call ─▶ END │ invalid input + ├─[S]─▶ Manual ────▶ END │ + │ │ │ + │ └──invalid───────►│ + ├─[O]─▶ Org prompt │ + │ │ │ + │ ├─valid org─▶ API │ + │ │ │ │ + │ │ └─▶ List ─▶ END + │ └─invalid───────►─┘ + ├─[L]─▶ API call ─▶ List ─▶ END + │ │ + │ └──invalid──►─┘ + └─[Q]─▶ END (empty list) + + EOF/Ctrl+C at any prompt ─▶ END (empty list) +``` diff --git a/specs/004-github-repo-selection/plan.md b/specs/004-github-repo-selection/plan.md new file mode 100644 index 0000000..772608b --- /dev/null +++ b/specs/004-github-repo-selection/plan.md @@ -0,0 +1,77 @@ +# Implementation Plan: GitHub Repository Interactive Selection + +**Branch**: `004-github-repo-selection` | **Date**: 2025-11-29 | **Spec**: [spec.md](./spec.md) +**Input**: Feature specification from `/specs/004-github-repo-selection/spec.md` + +## Summary + +Implement interactive repository selection for GitHub analysis when `repos.txt` is missing or empty. Users can choose to (A) analyze all personal repos, (S) specify manually, (O) analyze organization repos, (L) select from list, or (Q) quit. The implementation follows the established `select_jira_projects` UX pattern from Feature 002. + +## Technical Context + +**Language/Version**: Python 3.9+ (as per constitution, leveraging type hints) +**Primary Dependencies**: Standard library only (urllib, json); optional: requests (existing pattern) +**Storage**: N/A (repos.txt file is input, not storage) +**Testing**: pytest with fixtures and mocking (existing pattern) +**Target Platform**: CLI on macOS/Linux/Windows +**Project Type**: Single project (existing structure) +**Performance Goals**: Repository listing in <10s for 200 repos, <15s for 500 org repos (per SC-002, SC-003) +**Constraints**: Rate limit aware, graceful error handling, non-blocking in quiet mode +**Scale/Scope**: Support users with 200+ personal repos, orgs with 500+ repos + +## Constitution Check + +*GATE: Must pass before Phase 0 research. Re-check after Phase 1 design.* + +| Principle | Status | Notes | +|-----------|--------|-------| +| I. Modular Architecture | ✅ PASS | New function `select_github_repos` in `cli/main.py` following existing `select_jira_projects` pattern. API methods added to existing `GitHubClient` in `api/client.py` | +| II. Security First | ✅ PASS | Uses existing token handling via `AnalyzerConfig`. No new token exposure points | +| III. Test-Driven Development | ✅ PASS | Tests will mirror `test_interactive_selection.py` pattern for Jira | +| IV. Configuration over Hardcoding | ✅ PASS | Uses existing `repos_file` from config, affiliation parameter configurable | +| V. Graceful Error Handling | ✅ PASS | EOF/KeyboardInterrupt handled, API errors caught, partial failures tolerated | + +**Gate Result**: PASS - No violations + +## Project Structure + +### Documentation (this feature) + +```text +specs/004-github-repo-selection/ +├── plan.md # This file +├── research.md # Phase 0 output +├── data-model.md # Phase 1 output +├── quickstart.md # Phase 1 output +├── contracts/ # Phase 1 output +└── tasks.md # Phase 2 output (/speckit.tasks command) +``` + +### Source Code (repository root) + +```text +src/github_analyzer/ +├── api/ +│ └── client.py # Extended: list_user_repos(), list_org_repos() +├── cli/ +│ └── main.py # Extended: select_github_repos(), helpers +└── ... + +tests/ +├── unit/ +│ └── api/ +│ └── test_github_client.py # New: list_user_repos, list_org_repos tests +└── integration/ + └── test_interactive_selection.py # Extended: GitHub selection tests +``` + +**Structure Decision**: Extend existing modules following established patterns. No new modules needed - consistent with constitution's modular architecture principle. + +## Complexity Tracking + +> No violations to justify - all decisions follow existing patterns. + +| Decision | Justification | +|----------|---------------| +| Extend existing `GitHubClient` | Follows constitution's modular architecture - API client is the correct location for API calls | +| Add to `cli/main.py` | Follows `select_jira_projects` pattern for UX consistency (FR-003) | diff --git a/specs/004-github-repo-selection/quickstart.md b/specs/004-github-repo-selection/quickstart.md new file mode 100644 index 0000000..52d4a3c --- /dev/null +++ b/specs/004-github-repo-selection/quickstart.md @@ -0,0 +1,185 @@ +# Quickstart: GitHub Repository Interactive Selection + +**Feature**: 004-github-repo-selection +**Date**: 2025-11-29 + +## Overview + +This feature adds interactive repository selection when `repos.txt` is missing or empty. Users can: +- Analyze all personal repositories +- Specify repositories manually +- Select from an organization's repositories +- Pick from a numbered list of their repositories + +## Usage Examples + +### Scenario 1: No repos.txt, Interactive Selection + +```bash +# Ensure repos.txt doesn't exist +rm -f repos.txt + +# Run with GitHub source (will trigger interactive menu) +python github_analyzer.py --sources github + +# Output: +# repos.txt not found or empty. +# Found 23 accessible repositories: +# 1. myuser/project-alpha - Alpha project description +# 2. myuser/project-beta - [private] Beta project +# 3. otheruser/collab-repo - Collaborative work +# ... +# +# Options: +# [A] Analyze ALL accessible repositories +# [S] Specify repository names manually (owner/repo format) +# [O] Analyze organization repositories +# [L] Select from list by number (e.g., 1,3,5 or 1-3) +# [Q] Quit/Skip GitHub analysis +# +# Your choice [A/S/O/L/Q]: +``` + +### Scenario 2: Select All Personal Repos + +```bash +# At the prompt: +Your choice [A/S/O/L/Q]: A + +# Output: +# Using all 23 repositories. +# Starting GitHub analysis... +``` + +### Scenario 3: Manual Specification + +```bash +# At the prompt: +Your choice [A/S/O/L/Q]: S +Enter repository names (comma-separated, owner/repo format): myuser/project-alpha, otherorg/public-repo + +# Output: +# Selected 2 repositories: myuser/project-alpha, otherorg/public-repo +# Starting GitHub analysis... +``` + +### Scenario 4: Organization Repositories + +```bash +# At the prompt: +Your choice [A/S/O/L/Q]: O +Enter organization name: mycompany + +# Output: +# Found 47 repositories in 'mycompany': +# 1. mycompany/backend-api - Main backend service +# 2. mycompany/frontend-app - React frontend +# ... +# +# Select repositories (e.g., 1,3,5 or 1-3 or 'all'): 1-5 + +# Output: +# Selected 5 repositories. +# Starting GitHub analysis... +``` + +### Scenario 5: Select from Numbered List + +```bash +# At the prompt: +Your choice [A/S/O/L/Q]: L +Enter selection (e.g., 1,3,5 or 1-3 or 'all'): 1,3,7-10 + +# Output: +# Selected 7 repositories: ... +# Starting GitHub analysis... +``` + +### Scenario 6: Non-Interactive Mode (--quiet) + +```bash +# With --quiet flag and no repos.txt +python github_analyzer.py --sources github --quiet + +# Output: +# No repos.txt found. Skipping GitHub analysis in non-interactive mode. +``` + +### Scenario 7: Using repos.txt (Existing Behavior) + +```bash +# Create repos.txt +echo "myuser/project-alpha" > repos.txt +echo "myuser/project-beta" >> repos.txt + +# Run analyzer - no interactive prompt +python github_analyzer.py --sources github + +# Output: +# Loading repositories from repos.txt... +# Found 2 repositories. +# Starting GitHub analysis... +``` + +## Error Handling + +### Invalid Organization Name + +```bash +Your choice [A/S/O/L/Q]: O +Enter organization name: --invalid-- + +# Output: +# Invalid organization name format. Names must be alphanumeric with hyphens, 1-39 chars. +# Enter organization name: +``` + +### Organization Not Found + +```bash +Your choice [A/S/O/L/Q]: O +Enter organization name: nonexistent-org-xyz + +# Output: +# Could not access organization 'nonexistent-org-xyz'. Check the name and your permissions. +# Enter organization name (or 'Q' to go back): +``` + +### Invalid Repository Format + +```bash +Your choice [A/S/O/L/Q]: S +Enter repository names (comma-separated, owner/repo format): just-a-repo, valid/repo + +# Output: +# Warning: Invalid format ignored: just-a-repo (must be owner/repo) +# Selected 1 repository: valid/repo +``` + +### Rate Limit + +```bash +# If rate limited during repository listing: +# Output: +# GitHub API rate limit exceeded. Waiting 45 seconds... +# (Progress indicator) +# Resuming... +``` + +### Ctrl+C / EOF + +```bash +# At any prompt, pressing Ctrl+C or Ctrl+D: +# Output: +# GitHub analysis skipped. +``` + +## Testing Quick Check + +```bash +# Run unit tests for new functionality +pytest tests/unit/api/test_github_client.py -v -k "list_user_repos or list_org_repos" + +# Run integration tests for interactive selection +pytest tests/integration/test_interactive_selection.py -v -k "github" +``` diff --git a/specs/004-github-repo-selection/research.md b/specs/004-github-repo-selection/research.md new file mode 100644 index 0000000..26536fb --- /dev/null +++ b/specs/004-github-repo-selection/research.md @@ -0,0 +1,121 @@ +# Research: GitHub Repository Interactive Selection + +**Feature**: 004-github-repo-selection +**Date**: 2025-11-29 + +## Research Tasks Completed + +### 1. GitHub API: List Authenticated User Repositories + +**Endpoint**: `GET /user/repos` + +**Decision**: Use `affiliation=owner,collaborator` parameter to list personal repos per spec clarification. + +**Rationale**: +- The spec clarifies that "personal repos" should include repos where user is owner + repos where user is collaborator (not organization member repos) +- This aligns with user expectation of "my repos" without including all org repos they can access + +**Parameters Identified**: +| Parameter | Values | Default | Purpose | +|-----------|--------|---------|---------| +| `affiliation` | `owner`, `collaborator`, `organization_member` (comma-separated) | `owner,collaborator,organization_member` | Filter by relationship | +| `visibility` | `all`, `public`, `private` | `all` | Filter by visibility | +| `sort` | `created`, `updated`, `pushed`, `full_name` | `full_name` | Sort order | +| `direction` | `asc`, `desc` | `asc` when sort=full_name | Sort direction | +| `per_page` | 1-100 | 30 | Results per page | +| `page` | positive integer | 1 | Page number | + +**Implementation**: Use `affiliation=owner,collaborator` for FR-005 (list personal repos). + +### 2. GitHub API: List Organization Repositories + +**Endpoint**: `GET /orgs/{org}/repos` + +**Decision**: Use `type=all` to list all accessible org repositories. + +**Rationale**: +- User wants to see all repos they can access in an organization +- The `type=all` default includes public, private, forks, sources, member repos + +**Parameters Identified**: +| Parameter | Values | Default | Purpose | +|-----------|--------|---------|---------| +| `type` | `all`, `public`, `private`, `forks`, `sources`, `member` | `all` | Repository type filter | +| `sort` | `created`, `updated`, `pushed`, `full_name` | `created` | Sort order | +| `direction` | `asc`, `desc` | `desc` (except full_name) | Sort direction | +| `per_page` | 1-100 | 30 | Results per page | +| `page` | positive integer | 1 | Page number | + +**Implementation**: Use `type=all` for FR-006 (list org repos). + +### 3. Pagination Strategy + +**Decision**: Use existing `GitHubClient.paginate()` method with automatic page handling. + +**Rationale**: +- Existing client already handles pagination efficiently +- Uses `per_page=100` (configured in AnalyzerConfig) for fewer API calls +- Respects `max_pages` configuration to prevent runaway pagination + +**Alternatives Considered**: +- Manual pagination: Rejected - would duplicate existing logic +- Link header parsing: Rejected - current simple page iteration is sufficient + +### 4. Rate Limit Handling + +**Decision**: Use existing rate limit handling from `GitHubClient`. + +**Rationale**: +- `GitHubClient` already implements rate limit detection and `RateLimitError` +- Exponential backoff retry is already implemented for transient failures +- Per spec edge case: show wait time to user when rate limited + +**Implementation**: Catch `RateLimitError` in selection flow, display remaining wait time. + +### 5. UX Pattern: Select from List + +**Decision**: Follow exact pattern from `select_jira_projects` for consistency (FR-003). + +**Rationale**: +- Spec explicitly requires UX consistency with Jira project selection +- Existing `parse_project_selection()` helper can parse "1,3,5" and "1-3" syntax +- Existing `format_project_list()` pattern can be adapted for repos + +**Pattern to Follow** (from `cli/main.py:464-582`): +1. Load from file first (repos.txt) +2. If missing/empty, fetch available options from API +3. Display numbered list +4. Show menu: [A] All, [S] Specify, [O] Organization, [L] List, [Q] Quit +5. Handle EOF/KeyboardInterrupt gracefully +6. Validate input, retry on invalid + +### 6. Organization Name Validation + +**Decision**: Validate org name format before API call. + +**Rationale**: +- Per spec edge case: "Validate organization name format before API call, reject invalid names" +- GitHub org names: alphanumeric + hyphens, 1-39 chars, cannot start/end with hyphen + +**Pattern**: `^[a-zA-Z0-9]([a-zA-Z0-9-]{0,37}[a-zA-Z0-9])?$` + +### 7. Repository Name Format + +**Decision**: Use `owner/repo` format consistently. + +**Rationale**: +- Existing repos.txt uses this format +- FR-009 requires comma-separated `owner/repo` for manual entry +- API returns `full_name` field in this format + +## Summary of Decisions + +| Topic | Decision | Spec Requirement | +|-------|----------|------------------| +| Personal repos API | `GET /user/repos?affiliation=owner,collaborator` | FR-005, Clarification | +| Org repos API | `GET /orgs/{org}/repos?type=all` | FR-006 | +| Pagination | Use existing `paginate()` method | FR-007 | +| Rate limits | Use existing handler, show wait time | Edge case | +| UX pattern | Follow `select_jira_projects` exactly | FR-003 | +| Org name validation | Regex pattern before API call | Edge case | +| Repo format | `owner/repo` (full_name) | FR-009, FR-011 | diff --git a/specs/004-github-repo-selection/spec.md b/specs/004-github-repo-selection/spec.md new file mode 100644 index 0000000..de53c7e --- /dev/null +++ b/specs/004-github-repo-selection/spec.md @@ -0,0 +1,219 @@ +# Feature Specification: GitHub Repository Interactive Selection + +**Feature Branch**: `004-github-repo-selection` +**Created**: 2025-11-29 +**Status**: Draft +**Input**: User description: "Selezione interattiva repository GitHub: quando repos.txt è assente o vuoto, il sistema chiede all'utente se vuole (a) specificare repo manualmente, (b) analizzare tutti i propri repo personali, (c) analizzare tutti i repo di una organization specifica. Implementare API calls per listare repo utente e repo organization. Seguire lo stesso pattern di select_jira_projects per consistenza UX." + +## User Scenarios & Testing *(mandatory)* + +### User Story 1 - Interactive Repository Selection Menu (Priority: P1) + +As a user without a repos.txt file, I want to be presented with options to select repositories interactively so that I can quickly choose which repositories to analyze without creating a configuration file. + +**Why this priority**: This is the core functionality. Without the interactive menu, users with no repos.txt cannot use the tool for GitHub analysis. + +**Independent Test**: Can be fully tested by removing repos.txt, running the analyzer with `--sources github`, and verifying the interactive menu appears with options A/S/O/L/Q. + +**Acceptance Scenarios**: + +1. **Given** repos.txt is missing, **When** I run the analyzer with GitHub source enabled, **Then** I see an interactive menu with options: [A] Analyze all my repos, [S] Specify manually, [O] Analyze organization repos, [L] Select from list, [Q] Quit +2. **Given** repos.txt exists but is empty, **When** I run the analyzer, **Then** I see the same interactive menu +3. **Given** repos.txt contains valid repositories, **When** I run the analyzer, **Then** no interactive menu appears and those repositories are used directly +4. **Given** I'm in non-interactive mode (piped input or --quiet without --repos), **When** repos.txt is missing, **Then** GitHub analysis is skipped with an informational log message (not an error) + +--- + +### User Story 2 - List and Select Personal Repositories (Priority: P2) + +As a user, I want to see a list of all my personal GitHub repositories so that I can select which ones to analyze without remembering exact names. + +**Why this priority**: Personal repos are the most common use case. Users typically want to analyze their own repositories. + +**Independent Test**: Can be tested by selecting option [A] or [L] and verifying all personal repositories are listed with correct names. + +**Acceptance Scenarios**: + +1. **Given** I select option [A] (all my repos), **When** the API call completes, **Then** all my public and private repositories are included for analysis +2. **Given** I select option [L] (select from list), **When** the list appears, **Then** I see all my repositories numbered with owner/name format +3. **Given** I have 50+ repositories, **When** the list appears, **Then** all repositories are shown (paginated API calls handle large accounts) +4. **Given** I select specific numbers from the list (e.g., "1,3,5" or "1-3"), **When** I confirm, **Then** only those repositories are analyzed + +--- + +### User Story 3 - List and Select Organization Repositories (Priority: P3) + +As a user who belongs to GitHub organizations, I want to select an organization and analyze its repositories so that I can generate reports for team projects. + +**Why this priority**: Organization repos are a common enterprise use case, but secondary to personal repos. + +**Independent Test**: Can be tested by selecting option [O], entering an organization name, and verifying org repos are listed. + +**Acceptance Scenarios**: + +1. **Given** I select option [O], **When** prompted for organization name, **Then** I can enter any organization name I have access to +2. **Given** I enter a valid organization name, **When** the API call completes, **Then** I see all repositories I can access in that organization +3. **Given** I enter an organization I don't have access to, **When** the API call fails, **Then** I see a clear error message and can try again or quit +4. **Given** the organization has 100+ repositories, **When** the list appears, **Then** all repositories are shown (paginated API calls) +5. **Given** I want to analyze all org repos, **When** I choose [A] after seeing the list, **Then** all listed org repos are included + +--- + +### User Story 4 - Manual Repository Specification (Priority: P4) + +As a user who knows exactly which repositories I want, I want to type repository names directly so that I can quickly specify them without browsing lists. + +**Why this priority**: Power users prefer direct input. This provides a quick path for experienced users. + +**Independent Test**: Can be tested by selecting option [S] and entering comma-separated repository names. + +**Acceptance Scenarios**: + +1. **Given** I select option [S], **When** prompted, **Then** I can enter repository names in `owner/repo` format, comma-separated +2. **Given** I enter "owner/repo1, owner/repo2", **When** I confirm, **Then** those exact repositories are used for analysis +3. **Given** I enter an invalid repository format, **When** validation runs, **Then** I see a warning about invalid entries and can correct or continue +4. **Given** I enter a mix of valid and invalid repos, **When** I confirm, **Then** only valid repos are used and I'm warned about invalid ones + +--- + +### Edge Cases + +- What happens when GitHub API rate limit is exceeded during repository listing? + - Retry with exponential backoff (same pattern as existing GitHubClient), show wait time to user: "Rate limit exceeded. Waiting X seconds..." +- What happens when the user's token doesn't have repo read permissions? + - Clear error message: "GitHub token requires 'repo' scope to list repositories. Please check your token permissions." +- What happens when user presses Ctrl+C or EOF during interactive menu? + - Graceful exit with "GitHub analysis skipped." message, return empty list +- What happens when organization name contains special characters? + - Validate organization name format (alphanumeric + hyphens, 1-39 chars, no leading/trailing hyphen) before API call, show: "Invalid organization name format." +- What happens in quiet mode (--quiet) without repos.txt? + - Skip interactive prompts, log: "No repos.txt found. Skipping GitHub analysis in non-interactive mode." +- What happens when user has zero repositories? + - Display: "No repositories found for your account." and return to menu or exit gracefully +- What happens when organization has zero repositories? + - Display: "No repositories found in organization '{org}'." and allow retry or quit +- What happens when API authentication fails (invalid/expired token)? + - Display: "GitHub authentication failed. Please verify your GITHUB_TOKEN." and exit with code 1 +- What happens when network timeout occurs during API call? + - Display: "Network timeout while connecting to GitHub API. Please check your connection." and allow retry +- What happens when selection numbers exceed list length? + - Ignore invalid numbers, warn: "Selection '99' is out of range (1-N). Ignored.", continue with valid selections +- What happens with partial API response (some pages fetched, then error)? + - Use already-fetched repositories, warn: "Warning: Could not fetch all repositories. Showing {N} of potentially more." + +## Requirements *(mandatory)* + +### Functional Requirements + +**Interactive Selection Menu** +- **FR-001**: System MUST display interactive menu when repos.txt is missing or empty and GitHub source is enabled +- **FR-002**: System MUST offer options: [A] All personal repos, [S] Specify manually, [O] Organization repos, [L] Select from personal list, [Q] Quit/Skip +- **FR-003**: System MUST follow the same UX pattern as `select_jira_projects` for consistency +- **FR-004**: System MUST handle EOF/KeyboardInterrupt gracefully, returning empty repository list + +**GitHub API Integration** +- **FR-005**: System MUST list all repositories for authenticated user via GET /user/repos API +- **FR-006**: System MUST list organization repositories via GET /orgs/{org}/repos API +- **FR-007**: System MUST handle pagination for users/orgs with many repositories (100+ repos) +- **FR-008**: System MUST respect existing rate limit handling from GitHubClient + +**User Input Handling** +- **FR-009**: System MUST accept comma-separated repository names in `owner/repo` format for manual entry +- **FR-010**: System MUST accept number selection for list mode (e.g., "1,3,5" or "1-3" or "all") +- **FR-011**: System MUST validate repository name format before attempting analysis +- **FR-012**: System MUST warn about invalid entries but continue with valid ones + +**Non-Interactive Mode** +- **FR-013**: System MUST skip interactive prompts in non-interactive mode (--quiet without explicit --repos) +- **FR-014**: System MUST log clear message when GitHub analysis is skipped due to missing repos.txt in non-interactive mode + +### Key Entities + +- **Repository**: Existing entity representing a GitHub repository (owner, name) +- **GitHubClient**: Extended to support list_user_repos() and list_org_repos(org_name) methods +- **RepositorySelection**: Enum or type representing selection mode (ALL_PERSONAL, ORGANIZATION, MANUAL, FROM_LIST) + +## Success Criteria *(mandatory)* + +### Measurable Outcomes + +- **SC-001**: Users without repos.txt can see the interactive menu within 2 seconds of starting the tool; repository listing completes within 30 seconds total (including API calls) +- **SC-002**: Personal repository listing completes in under 10 seconds for users with up to 200 repositories +- **SC-003**: Organization repository listing completes in under 15 seconds for orgs with up to 500 repositories +- **SC-004**: All existing repos.txt functionality works identically (no regression) +- **SC-005**: Interactive selection UX mirrors Jira project selection (same menu patterns, keyboard shortcuts, error handling) +- **SC-006**: Non-interactive mode (--quiet) never blocks waiting for user input + +## Assumptions + +- GitHub token has `repo` scope to list private repositories +- User knows their organization names (system doesn't list user's organizations, user must enter org name) +- Repository listing returns repositories the user can access based on their token permissions +- Follow existing `select_jira_projects` pattern in `cli/main.py` for consistency +- API timeouts use existing GitHubClient configurable timeout (default: 30s per constitution) +- Performance thresholds (SC-002, SC-003) assume standard network conditions (< 200ms latency) + +## Display Format + +### Repository List Format +Each repository is displayed as: +``` + N. owner/repo-name - Description (truncated to 50 chars) + N. owner/private-repo - [private] Description here +``` + +### Menu Prompt Format +``` +Options: + [A] Analyze ALL accessible repositories + [S] Specify repository names manually (owner/repo format) + [O] Analyze organization repositories + [L] Select from list by number (e.g., 1,3,5 or 1-3) + [Q] Quit/Skip GitHub analysis + +Your choice [A/S/O/L/Q]: +``` + +## Validation Patterns + +### Repository Name Format (FR-011) +- Pattern: `^[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+$` +- Valid: `owner/repo`, `my-org/my-repo`, `user123/project_v2` +- Invalid: `just-repo`, `owner/`, `/repo` + +### Organization Name Format +- Pattern: `^[a-zA-Z0-9]([a-zA-Z0-9-]{0,37}[a-zA-Z0-9])?$` +- Rules: 1-39 characters, alphanumeric and hyphens only, cannot start/end with hyphen +- Valid: `myorg`, `my-organization` +- Invalid: `-invalid-`, `org--double` + +### Selection Input Format (FR-010) +- Single number: `3` +- Comma-separated: `1,3,5` +- Range: `1-3` +- Mixed: `1,3-5,7` +- All: `all` + +## Non-Functional Requirements + +### Performance +- Menu display: < 2 seconds from tool start (SC-001) +- Personal repo listing: < 10 seconds for up to 200 repos (SC-002) +- Org repo listing: < 15 seconds for up to 500 repos (SC-003) +- Assumes standard network conditions (< 200ms latency to GitHub API) + +### Security +- Token values MUST NOT be logged, printed, or exposed in error messages (constitution §II) +- Organization name input MUST be validated to prevent injection attacks +- API URLs MUST be constructed safely (no string concatenation with user input) + +### Accessibility +- Menu output uses plain text, compatible with screen readers +- No ANSI color codes required; uses TerminalOutput for optional formatting +- All prompts are clear and self-explanatory + +## Clarifications + +### Session 2025-11-29 + +- Q: Per "personal repos", quali repository includere? → A: Owner + repos dove l'utente è collaborator diretto (affiliation=owner,collaborator) diff --git a/specs/004-github-repo-selection/tasks.md b/specs/004-github-repo-selection/tasks.md new file mode 100644 index 0000000..f4dd147 --- /dev/null +++ b/specs/004-github-repo-selection/tasks.md @@ -0,0 +1,277 @@ +# Tasks: GitHub Repository Interactive Selection + +**Input**: Design documents from `/specs/004-github-repo-selection/` +**Prerequisites**: plan.md (required), spec.md (required for user stories), research.md, data-model.md, contracts/ + +**Tests**: Following constitution principle III (Test-Driven Development), tests are included for all user stories. + +**Organization**: Tasks are grouped by user story to enable independent implementation and testing of each story. + +## Format: `[ID] [P?] [Story] Description` + +- **[P]**: Can run in parallel (different files, no dependencies) +- **[Story]**: Which user story this task belongs to (e.g., US1, US2, US3, US4) +- Include exact file paths in descriptions + +## Path Conventions + +- **Single project**: `src/github_analyzer/`, `tests/` at repository root +- Paths based on existing project structure from plan.md + +--- + +## Phase 1: Setup (Shared Infrastructure) + +**Purpose**: No new project initialization needed - extending existing codebase + +- [x] T001 Verify existing GitHubClient supports pagination in src/github_analyzer/api/client.py +- [x] T002 Verify existing TerminalOutput and error handling in src/github_analyzer/cli/output.py + +--- + +## Phase 2: Foundational (Blocking Prerequisites) + +**Purpose**: API methods in GitHubClient that ALL user stories depend on + +**⚠️ CRITICAL**: No user story work can begin until this phase is complete + +### Tests for Foundational API Methods + +- [x] T003 [P] Unit test for list_user_repos() in tests/unit/api/test_client.py +- [x] T004 [P] Unit test for list_org_repos() in tests/unit/api/test_client.py + +### Implementation for Foundational API Methods + +- [x] T005 Implement list_user_repos(affiliation="owner,collaborator") in src/github_analyzer/api/client.py +- [x] T006 Implement list_org_repos(org: str) in src/github_analyzer/api/client.py + +**Checkpoint**: GitHubClient API methods ready - user story implementation can now begin + +--- + +## Phase 3: User Story 1 - Interactive Repository Selection Menu (Priority: P1) 🎯 MVP + +**Goal**: Display interactive menu when repos.txt is missing/empty, offering options [A/S/O/L/Q] + +**Independent Test**: Remove repos.txt, run `python github_analyzer.py --sources github`, verify menu appears with all options + +### Tests for User Story 1 + +- [x] T007 [P] [US1] Test menu displays when repos.txt missing in tests/integration/test_interactive_selection.py +- [x] T008 [P] [US1] Test menu displays when repos.txt empty in tests/integration/test_interactive_selection.py +- [x] T009 [P] [US1] Test no menu when repos.txt has valid content in tests/integration/test_interactive_selection.py +- [x] T010 [P] [US1] Test EOF/Ctrl+C returns empty list in tests/integration/test_interactive_selection.py +- [x] T011 [P] [US1] Test non-interactive mode (--quiet) skips prompts in tests/integration/test_interactive_selection.py + +### Implementation for User Story 1 + +- [x] T012 [US1] Create select_github_repos() function skeleton in src/github_analyzer/cli/main.py +- [x] T013 [US1] Implement load_github_repos_from_file() helper to read repos.txt in src/github_analyzer/cli/main.py +- [x] T014 [US1] Implement interactive menu display with [A/S/O/L/Q] options in src/github_analyzer/cli/main.py +- [x] T015 [US1] Implement EOF/KeyboardInterrupt handling (return empty list) in src/github_analyzer/cli/main.py +- [x] T016 [US1] Implement non-interactive mode check (interactive=False returns []) in src/github_analyzer/cli/main.py +- [x] T017 [US1] Integrate select_github_repos() call in main() before GitHub analysis in src/github_analyzer/cli/main.py + +**Checkpoint**: User Story 1 complete - menu appears when repos.txt missing, file-based repos still work + +--- + +## Phase 4: User Story 2 - List and Select Personal Repositories (Priority: P2) + +**Goal**: Options [A] and [L] fetch and display user's personal repos using list_user_repos() API + +**Independent Test**: Select option [A] or [L], verify all personal repos listed with owner/name format + +### Tests for User Story 2 + +- [x] T018 [P] [US2] Test option [A] returns all user repos in tests/integration/test_interactive_selection.py +- [x] T019 [P] [US2] Test option [L] displays numbered list in tests/integration/test_interactive_selection.py +- [x] T020 [P] [US2] Test option [L] accepts "1,3,5" selection in tests/integration/test_interactive_selection.py +- [x] T021 [P] [US2] Test option [L] accepts "1-3" range selection in tests/integration/test_interactive_selection.py +- [x] T022 [P] [US2] Test option [L] accepts "all" selection in tests/integration/test_interactive_selection.py +- [x] T023 [P] [US2] Test pagination handles 100+ repos in tests/unit/api/test_client.py + +### Implementation for User Story 2 + +- [x] T024 [P] [US2] Implement format_repo_list() helper in src/github_analyzer/cli/main.py +- [x] T025 [P] [US2] Implement parse_project_selection() helper (reuse pattern from Jira) in src/github_analyzer/cli/main.py +- [x] T026 [US2] Implement option [A] handler - call list_user_repos(), return all full_names in src/github_analyzer/cli/main.py +- [x] T027 [US2] Implement option [L] handler - display numbered list, parse selection in src/github_analyzer/cli/main.py +- [x] T028 [US2] Handle API errors and rate limits with user feedback in src/github_analyzer/cli/main.py + +**Checkpoint**: User Story 2 complete - [A] and [L] options work, personal repos listed + +--- + +## Phase 5: User Story 3 - List and Select Organization Repositories (Priority: P3) + +**Goal**: Option [O] prompts for org name, fetches and displays org repos using list_org_repos() API + +**Independent Test**: Select option [O], enter valid org name, verify org repos listed + +### Tests for User Story 3 + +- [x] T029 [P] [US3] Test option [O] prompts for org name in tests/integration/test_interactive_selection.py +- [x] T030 [P] [US3] Test valid org name fetches repos in tests/integration/test_interactive_selection.py +- [x] T031 [P] [US3] Test invalid org name format shows error in tests/integration/test_interactive_selection.py +- [x] T032 [P] [US3] Test non-existent org shows error and retry option in tests/integration/test_interactive_selection.py +- [x] T033 [P] [US3] Test org with 100+ repos handles pagination in tests/unit/api/test_client.py + +### Implementation for User Story 3 + +- [x] T034 [P] [US3] Implement validate_org_name() helper in src/github_analyzer/cli/main.py +- [x] T035 [US3] Implement option [O] handler - prompt org, validate, call list_org_repos() in src/github_analyzer/cli/main.py +- [x] T036 [US3] Display org repos list and accept selection (reuse format_repo_list) in src/github_analyzer/cli/main.py +- [x] T037 [US3] Handle org not found / permission denied errors in src/github_analyzer/cli/main.py + +**Checkpoint**: User Story 3 complete - [O] option works for organization repos + +--- + +## Phase 6: User Story 4 - Manual Repository Specification (Priority: P4) + +**Goal**: Option [S] accepts comma-separated owner/repo input with validation + +**Independent Test**: Select option [S], enter "owner/repo1, owner/repo2", verify those repos used + +### Tests for User Story 4 + +- [x] T038 [P] [US4] Test option [S] prompts for manual input in tests/integration/test_interactive_selection.py +- [x] T039 [P] [US4] Test valid "owner/repo" format accepted in tests/integration/test_interactive_selection.py +- [x] T040 [P] [US4] Test invalid format shows warning in tests/integration/test_interactive_selection.py +- [x] T041 [P] [US4] Test mixed valid/invalid continues with valid only in tests/integration/test_interactive_selection.py +- [x] T042 [P] [US4] Test empty input prompts again in tests/integration/test_interactive_selection.py + +### Implementation for User Story 4 + +- [x] T043 [P] [US4] Implement validate_repo_format() helper in src/github_analyzer/cli/main.py +- [x] T044 [US4] Implement option [S] handler - prompt, parse comma-separated, validate in src/github_analyzer/cli/main.py +- [x] T045 [US4] Show warnings for invalid repos, continue with valid ones in src/github_analyzer/cli/main.py +- [x] T046 [US4] Handle empty input with retry prompt in src/github_analyzer/cli/main.py + +**Checkpoint**: User Story 4 complete - [S] option works for manual specification + +--- + +## Phase 7: Polish & Cross-Cutting Concerns + +**Purpose**: Final integration, edge cases, and cleanup + +- [x] T047 [P] Test option [Q] returns empty list in tests/integration/test_interactive_selection.py +- [x] T048 [P] Test invalid menu choice shows error and reprompts in tests/integration/test_interactive_selection.py +- [x] T049 Implement rate limit handling with wait time display to user in src/github_analyzer/cli/main.py (FR-008) +- [x] T050 Implement auth error handling (token missing/invalid scope) with clear message in src/github_analyzer/cli/main.py (Edge Case) +- [x] T051 Run full test suite to ensure no regressions: pytest tests/ -v (727 passed) +- [ ] T052 Validate quickstart.md scenarios work end-to-end + +--- + +## Dependencies & Execution Order + +### Phase Dependencies + +- **Setup (Phase 1)**: No dependencies - verification only +- **Foundational (Phase 2)**: Depends on Setup - BLOCKS all user stories (API methods required) +- **User Stories (Phase 3-6)**: All depend on Foundational phase completion + - US1 (menu) is MVP and should complete first + - US2-4 can proceed in parallel after US1 establishes select_github_repos() skeleton +- **Polish (Phase 7)**: Depends on all user stories being complete + +### User Story Dependencies + +``` +Phase 1: Setup (verify existing) + │ + ▼ +Phase 2: Foundational (list_user_repos, list_org_repos) + │ + ├───────────────┬───────────────┬───────────────┐ + ▼ ▼ ▼ ▼ +Phase 3: US1 Phase 4: US2 Phase 5: US3 Phase 6: US4 +(Menu MVP) (Personal) (Org) (Manual) + │ │ │ │ + └───────────────┴───────────────┴───────────────┘ + │ + ▼ + Phase 7: Polish +``` + +### Within Each User Story + +1. Tests MUST be written and FAIL before implementation +2. Helper functions before main handlers +3. Core implementation before integration +4. Story complete before moving to next priority + +### Parallel Opportunities + +**Phase 2 (Foundational)**: +- T003 and T004 (tests) can run in parallel +- T005 and T006 (implementation) are sequential (same file) + +**Phase 3-6 (User Stories)**: +- All test tasks within a story can run in parallel +- Helper functions (format_repo_list, parse_repo_selection, validate_*) can run in parallel +- After US1 establishes skeleton, US2-4 can proceed in parallel + +--- + +## Parallel Example: Foundational Phase + +```bash +# Launch tests in parallel: +Task: T003 "Unit test for list_user_repos()" +Task: T004 "Unit test for list_org_repos()" + +# Then implementation (sequential - same file): +Task: T005 "Implement list_user_repos()" +Task: T006 "Implement list_org_repos()" +``` + +## Parallel Example: User Story 2 + +```bash +# Launch all tests in parallel: +Task: T018-T023 (all test tasks) + +# Launch helpers in parallel: +Task: T024 "format_repo_list()" +Task: T025 "parse_repo_selection()" + +# Then handlers (sequential): +Task: T026 "option [A] handler" +Task: T027 "option [L] handler" +Task: T028 "error handling" +``` + +--- + +## Implementation Strategy + +### MVP First (User Story 1 Only) + +1. Complete Phase 1: Setup (verification) +2. Complete Phase 2: Foundational (API methods) +3. Complete Phase 3: User Story 1 (interactive menu) +4. **STOP and VALIDATE**: Menu appears when repos.txt missing +5. Deploy/demo if ready - users can now see options + +### Incremental Delivery + +1. Complete Setup + Foundational → API ready +2. Add User Story 1 → Menu appears → (MVP!) +3. Add User Story 2 → [A] and [L] work → Personal repos +4. Add User Story 3 → [O] works → Org repos +5. Add User Story 4 → [S] works → Manual entry +6. Polish → Edge cases, rate limits → Production ready + +--- + +## Notes + +- [P] tasks = different files, no dependencies +- [Story] label maps task to specific user story for traceability +- Each user story is independently completable and testable +- Tests follow existing patterns in tests/integration/test_interactive_selection.py +- Constitution requires TDD - all tests written before implementation +- Follow select_jira_projects pattern in cli/main.py for UX consistency (FR-003) diff --git a/src/github_analyzer/api/client.py b/src/github_analyzer/api/client.py index 9bc7f80..fc15537 100644 --- a/src/github_analyzer/api/client.py +++ b/src/github_analyzer/api/client.py @@ -352,6 +352,60 @@ def paginate( return all_items + def list_user_repos( + self, + affiliation: str = "owner,collaborator", + ) -> list[dict]: + """List repositories for the authenticated user. + + Fetches all repositories the user has access to based on affiliation. + Supports pagination for accounts with many repositories. + + Args: + affiliation: Filter by owner relationship. Comma-separated list + of values: owner, collaborator, organization_member. + Defaults to "owner,collaborator" per FR-005. + + Returns: + List of repository dictionaries with full_name, private, description. + + Raises: + RateLimitError: If GitHub API rate limit exceeded. + APIError: On API errors (auth failure, network issues). + """ + return self.paginate( + "/user/repos", + params={"affiliation": affiliation}, + ) + + def list_org_repos( + self, + org: str, + repo_type: str = "all", + ) -> list[dict]: + """List repositories for an organization. + + Fetches all repositories in the specified organization that the + authenticated user can access. Supports pagination for orgs + with many repositories (100+). + + Args: + org: Organization name (e.g., "facebook", "microsoft"). + repo_type: Type filter: all, public, private, forks, sources. + Defaults to "all" per FR-006. + + Returns: + List of repository dictionaries with full_name, private, description. + + Raises: + RateLimitError: If GitHub API rate limit exceeded. + APIError: On API errors (404 for non-existent org, auth issues). + """ + return self.paginate( + f"/orgs/{org}/repos", + params={"type": repo_type}, + ) + def validate_response( self, data: dict | list | None, diff --git a/src/github_analyzer/cli/main.py b/src/github_analyzer/cli/main.py index d183252..0ab9724 100644 --- a/src/github_analyzer/cli/main.py +++ b/src/github_analyzer/cli/main.py @@ -12,6 +12,7 @@ import argparse import os +import re import sys from datetime import datetime, timedelta, timezone from pathlib import Path @@ -406,6 +407,349 @@ def validate_sources(sources: list[DataSource]) -> None: ) +# GitHub repository validation patterns (per spec Validation Patterns section) +REPO_FORMAT_PATTERN = re.compile(r"^[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+$") +ORG_NAME_PATTERN = re.compile(r"^[a-zA-Z0-9]([a-zA-Z0-9-]{0,37}[a-zA-Z0-9])?$") + + +def validate_repo_format(repo: str) -> bool: + """Validate repository name format (owner/repo). + + Per spec FR-011 and Validation Patterns section: + - Pattern: ^[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+$ + - Valid: owner/repo, my-org/my-repo, user123/project_v2 + + Args: + repo: Repository string to validate. + + Returns: + True if valid format, False otherwise. + """ + if not repo or not repo.strip(): + return False + return bool(REPO_FORMAT_PATTERN.match(repo.strip())) + + +def validate_org_name(org: str) -> bool: + """Validate organization name format. + + Per spec Validation Patterns section: + - Pattern: ^[a-zA-Z0-9]([a-zA-Z0-9-]{0,37}[a-zA-Z0-9])?$ + - Rules: 1-39 chars, alphanumeric and hyphens, cannot start/end with hyphen + - Double hyphens (--) are not allowed + + Args: + org: Organization name to validate. + + Returns: + True if valid format, False otherwise. + """ + if not org or not org.strip(): + return False + org = org.strip() + if len(org) > 39: + return False + # Double hyphens not allowed + if "--" in org: + return False + # Single character is valid + if len(org) == 1: + return org.isalnum() + return bool(ORG_NAME_PATTERN.match(org)) + + +def format_repo_list(repos: list[dict]) -> str: + """Format GitHub repositories for display. + + Per spec Display Format section: + - N. owner/repo-name - Description (truncated to 50 chars) + - N. owner/private-repo - [private] Description here + + Args: + repos: List of repository dictionaries with full_name, private, description. + + Returns: + Formatted string for terminal display. + """ + lines = [] + for idx, repo in enumerate(repos, 1): + full_name = repo.get("full_name", "unknown") + is_private = repo.get("private", False) + description = repo.get("description") or "" + + # Truncate description to 50 chars + if len(description) > 50: + description = description[:47] + "..." + + # Build display line + if is_private: + if description: + lines.append(f" {idx}. {full_name} - [private] {description}") + else: + lines.append(f" {idx}. {full_name} - [private]") + else: + if description: + lines.append(f" {idx}. {full_name} - {description}") + else: + lines.append(f" {idx}. {full_name}") + + return "\n".join(lines) + + +def load_github_repos_from_file(repos_file: str) -> list[str]: + """Load repository names from repos.txt file. + + Args: + repos_file: Path to repos.txt file. + + Returns: + List of repository names (owner/repo format), or empty if file missing/empty. + """ + try: + path = Path(repos_file) + if not path.exists(): + return [] + + content = path.read_text().strip() + if not content: + return [] + + repos = [] + for line in content.splitlines(): + line = line.strip() + # Skip comments and empty lines + if not line or line.startswith("#"): + continue + # Handle full URLs + if line.startswith("http"): + # Extract owner/repo from URL + # https://github.com/owner/repo or https://github.com/owner/repo.git + parts = line.rstrip("/").rstrip(".git").split("/") + if len(parts) >= 2: + repos.append(f"{parts[-2]}/{parts[-1]}") + else: + repos.append(line) + return repos + except OSError: + return [] + + +def _handle_rate_limit(e: RateLimitError, log) -> None: + """Handle rate limit error with wait time display (FR-008, T049). + + Per spec Edge Cases: Show wait time to user. + + Args: + e: RateLimitError with reset_time. + log: Logging function. + """ + import time as time_module + if e.reset_time: + wait_seconds = max(0, e.reset_time - int(time_module.time())) + log(f"Rate limit exceeded. Waiting {wait_seconds} seconds...", "warning") + else: + log("Rate limit exceeded. Please try again later.", "warning") + + +def select_github_repos( + repos_file: str, + github_token: str, + interactive: bool = True, + output: TerminalOutput | None = None, +) -> list[str]: + """Select GitHub repositories from file or interactively (Feature 004). + + Per spec FR-001 to FR-014: + - Display interactive menu when repos.txt is missing or empty + - Options: [A] All personal, [S] Specify manually, [O] Organization, + [L] Select from list, [Q] Quit/Skip + - Follow select_jira_projects pattern for UX consistency (FR-003) + + Args: + repos_file: Path to repos.txt file. + github_token: GitHub API token for API calls. + interactive: If True, prompt user when file is missing/empty. + If False (--quiet mode), skip prompts per FR-013. + output: Optional TerminalOutput for consistent logging. + + Returns: + List of repository names (owner/repo format) to analyze. + """ + # Helper for consistent output + def log(msg: str, level: str = "info") -> None: + if output: + output.log(msg, level) + else: + print(msg) + + # Try loading from file first (FR-001) + file_repos = load_github_repos_from_file(repos_file) + if file_repos: + return file_repos + + # No file or empty - need to prompt or skip + if not interactive: + # FR-013, FR-014: Non-interactive mode skips prompts + log("No repos.txt found. Skipping GitHub analysis in non-interactive mode.", "info") + return [] + + # Display menu per FR-002 + print("\nOptions:") + print(" [A] Analyze ALL accessible repositories") + print(" [S] Specify repository names manually (owner/repo format)") + print(" [O] Analyze organization repositories") + print(" [L] Select from list by number (e.g., 1,3,5 or 1-3)") + print(" [Q] Quit/Skip GitHub analysis") + + # Create client for API calls - use provided token + config = AnalyzerConfig(github_token=github_token) + client = GitHubClient(config) + + try: + while True: + try: + choice = input("\nYour choice [A/S/O/L/Q]: ").strip().upper() + except (EOFError, KeyboardInterrupt): + # FR-004: Handle EOF/KeyboardInterrupt gracefully + log("GitHub analysis skipped.", "warning") + return [] + + if choice == "A": + # FR-005: List all user repos + log("Fetching your repositories...", "info") + try: + repos = client.list_user_repos() + if not repos: + log("No repositories found for your account.", "warning") + continue + repo_names = [r["full_name"] for r in repos] + log(f"Using all {len(repo_names)} repositories.", "success") + return repo_names + except RateLimitError as e: + _handle_rate_limit(e, log) + continue + except GitHubAnalyzerError as e: + log(f"Error fetching repositories: {e.message}", "error") + continue + + elif choice == "S": + # FR-009: Manual specification + try: + manual_input = input("Enter repository names (owner/repo, comma-separated): ").strip() + except (EOFError, KeyboardInterrupt): + log("GitHub analysis skipped.", "warning") + return [] + + if not manual_input: + log("No repositories entered.", "warning") + continue + + # Parse and validate (FR-011, FR-012) + manual_repos = [r.strip() for r in manual_input.split(",") if r.strip()] + valid_repos = [r for r in manual_repos if validate_repo_format(r)] + invalid_repos = [r for r in manual_repos if not validate_repo_format(r)] + + if invalid_repos: + log(f"Invalid repository format ignored: {', '.join(invalid_repos)}", "warning") + + if valid_repos: + log(f"Selected {len(valid_repos)} repositories: {', '.join(valid_repos)}", "success") + return valid_repos + else: + log("No valid repository names entered. Try again.", "warning") + + elif choice == "O": + # FR-006: Organization repos + try: + org_name = input("Enter organization name: ").strip() + except (EOFError, KeyboardInterrupt): + log("GitHub analysis skipped.", "warning") + return [] + + if not validate_org_name(org_name): + log("Invalid organization name format.", "warning") + continue + + log(f"Fetching repositories for organization '{org_name}'...", "info") + try: + repos = client.list_org_repos(org_name) + if not repos: + log(f"No repositories found in organization '{org_name}'.", "warning") + continue + + # Show list and allow selection + log(f"Found {len(repos)} repositories:", "info") + print(format_repo_list(repos)) + + # Ask for selection + try: + selection_input = input("\nSelect (e.g., 1,3,5 or 1-3 or 'all'): ").strip() + except (EOFError, KeyboardInterrupt): + log("GitHub analysis skipped.", "warning") + return [] + + indices = parse_project_selection(selection_input, len(repos)) + if indices: + selected = [repos[i]["full_name"] for i in indices] + log(f"Selected {len(selected)} repositories.", "success") + return selected + else: + log("Invalid selection.", "warning") + + except RateLimitError as e: + _handle_rate_limit(e, log) + continue + except GitHubAnalyzerError as e: + if "404" in str(e): + log(f"Organization '{org_name}' not found or not accessible.", "warning") + else: + log(f"Error fetching organization repos: {e.message}", "error") + continue + + elif choice == "L": + # FR-010: Select from personal list + log("Fetching your repositories...", "info") + try: + repos = client.list_user_repos() + if not repos: + log("No repositories found for your account.", "warning") + continue + + log(f"Found {len(repos)} repositories:", "info") + print(format_repo_list(repos)) + + try: + selection_input = input("\nSelect (e.g., 1,3,5 or 1-3 or 'all'): ").strip() + except (EOFError, KeyboardInterrupt): + log("GitHub analysis skipped.", "warning") + return [] + + indices = parse_project_selection(selection_input, len(repos)) + if indices: + selected = [repos[i]["full_name"] for i in indices] + log(f"Selected {len(selected)} repositories.", "success") + return selected + else: + log("Invalid selection. Try again.", "warning") + + except RateLimitError as e: + _handle_rate_limit(e, log) + continue + except GitHubAnalyzerError as e: + log(f"Error fetching repositories: {e.message}", "error") + continue + + elif choice == "Q": + log("GitHub analysis skipped.", "warning") + return [] + + else: + log("Invalid choice. Please enter A, S, O, L, or Q.", "warning") + + finally: + client.close() + + def format_project_list(projects: list[JiraProject]) -> str: """Format Jira projects for display. diff --git a/tests/integration/test_interactive_selection.py b/tests/integration/test_interactive_selection.py index f87344c..6d4f529 100644 --- a/tests/integration/test_interactive_selection.py +++ b/tests/integration/test_interactive_selection.py @@ -555,3 +555,628 @@ def test_format_truncates_long_descriptions(self) -> None: # Description should be truncated assert len(result) < len(long_desc) + 50 + + +class TestGitHubInteractiveSelection: + """Integration tests for GitHub repository interactive selection (Feature 004). + + Tests for User Story 1: Interactive Repository Selection Menu + """ + + @pytest.fixture + def github_env(self) -> dict: + """GitHub environment variables.""" + return { + "GITHUB_TOKEN": "ghp_test_token_12345678901234567890", + } + + @pytest.fixture + def mock_repos(self) -> list: + """Mock GitHub repository list.""" + return [ + {"full_name": "user/repo1", "private": False, "description": "First repo"}, + {"full_name": "user/repo2", "private": True, "description": "Private repo"}, + {"full_name": "user/repo3", "private": False, "description": "Third repo"}, + ] + + def test_menu_displays_when_repos_txt_missing( + self, tmp_path: Path, github_env: dict, mock_repos: list + ) -> None: + """T007: Menu displays when repos.txt is missing.""" + from src.github_analyzer.cli.main import select_github_repos + + repos_file = tmp_path / "repos.txt" + assert not repos_file.exists() + + with mock.patch.dict(os.environ, github_env, clear=True): + # User selects 'Q' to quit immediately + with mock.patch("builtins.input", return_value="Q"): + with mock.patch("builtins.print") as mock_print: + result = select_github_repos( + str(repos_file), + github_token=github_env["GITHUB_TOKEN"], + interactive=True, + ) + + # Verify menu was displayed + print_calls = [str(call) for call in mock_print.call_args_list] + assert any("[A]" in call for call in print_calls) + assert any("[S]" in call for call in print_calls) + assert any("[O]" in call for call in print_calls) + assert any("[L]" in call for call in print_calls) + assert any("[Q]" in call for call in print_calls) + assert result == [] + + def test_menu_displays_when_repos_txt_empty( + self, tmp_path: Path, github_env: dict + ) -> None: + """T008: Menu displays when repos.txt is empty.""" + from src.github_analyzer.cli.main import select_github_repos + + repos_file = tmp_path / "repos.txt" + repos_file.write_text("") # Empty file + + with mock.patch.dict(os.environ, github_env, clear=True): + with mock.patch("builtins.input", return_value="Q"): + with mock.patch("builtins.print") as mock_print: + result = select_github_repos( + str(repos_file), + github_token=github_env["GITHUB_TOKEN"], + interactive=True, + ) + + # Menu should be displayed for empty file + print_calls = [str(call) for call in mock_print.call_args_list] + assert any("[A]" in call for call in print_calls) + assert result == [] + + def test_no_menu_when_repos_txt_has_content( + self, tmp_path: Path, github_env: dict + ) -> None: + """T009: No menu when repos.txt has valid content.""" + from src.github_analyzer.cli.main import select_github_repos + + repos_file = tmp_path / "repos.txt" + repos_file.write_text("owner/repo1\nowner/repo2\n") + + with mock.patch.dict(os.environ, github_env, clear=True): + # No input mock needed - should not prompt + result = select_github_repos( + str(repos_file), + github_token=github_env["GITHUB_TOKEN"], + interactive=True, + ) + + # Should use repos from file + assert result == ["owner/repo1", "owner/repo2"] + + def test_eof_returns_empty_list( + self, tmp_path: Path, github_env: dict + ) -> None: + """T010: EOF/Ctrl+C returns empty list gracefully.""" + from src.github_analyzer.cli.main import select_github_repos + + repos_file = tmp_path / "repos.txt" + assert not repos_file.exists() + + with mock.patch.dict(os.environ, github_env, clear=True): + with mock.patch("builtins.input", side_effect=EOFError): + result = select_github_repos( + str(repos_file), + github_token=github_env["GITHUB_TOKEN"], + interactive=True, + ) + + assert result == [] + + def test_keyboard_interrupt_returns_empty_list( + self, tmp_path: Path, github_env: dict + ) -> None: + """T010: KeyboardInterrupt returns empty list gracefully.""" + from src.github_analyzer.cli.main import select_github_repos + + repos_file = tmp_path / "repos.txt" + assert not repos_file.exists() + + with mock.patch.dict(os.environ, github_env, clear=True): + with mock.patch("builtins.input", side_effect=KeyboardInterrupt): + result = select_github_repos( + str(repos_file), + github_token=github_env["GITHUB_TOKEN"], + interactive=True, + ) + + assert result == [] + + def test_non_interactive_mode_skips_prompts( + self, tmp_path: Path, github_env: dict + ) -> None: + """T011: Non-interactive mode (--quiet) skips prompts.""" + from src.github_analyzer.cli.main import select_github_repos + + repos_file = tmp_path / "repos.txt" + assert not repos_file.exists() + + with mock.patch.dict(os.environ, github_env, clear=True): + result = select_github_repos( + str(repos_file), + github_token=github_env["GITHUB_TOKEN"], + interactive=False, # Non-interactive mode + ) + + # Should return empty list without prompting + assert result == [] + + def test_option_q_returns_empty_list( + self, tmp_path: Path, github_env: dict + ) -> None: + """T047: Option [Q] returns empty list.""" + from src.github_analyzer.cli.main import select_github_repos + + repos_file = tmp_path / "repos.txt" + assert not repos_file.exists() + + with mock.patch.dict(os.environ, github_env, clear=True): + with mock.patch("builtins.input", return_value="Q"): + result = select_github_repos( + str(repos_file), + github_token=github_env["GITHUB_TOKEN"], + interactive=True, + ) + + assert result == [] + + def test_invalid_menu_choice_reprompts( + self, tmp_path: Path, github_env: dict + ) -> None: + """T048: Invalid menu choice shows error and reprompts.""" + from src.github_analyzer.cli.main import select_github_repos + + repos_file = tmp_path / "repos.txt" + assert not repos_file.exists() + + with mock.patch.dict(os.environ, github_env, clear=True): + # User enters invalid 'X', then 'Q' + with mock.patch("builtins.input", side_effect=["X", "Q"]): + result = select_github_repos( + str(repos_file), + github_token=github_env["GITHUB_TOKEN"], + interactive=True, + ) + + assert result == [] + + +class TestGitHubPersonalReposSelection: + """Integration tests for personal repos selection (Feature 004 - User Story 2).""" + + @pytest.fixture + def github_env(self) -> dict: + """GitHub environment variables.""" + return { + "GITHUB_TOKEN": "ghp_test_token_12345678901234567890", + } + + @pytest.fixture + def mock_repos(self) -> list: + """Mock GitHub repository list.""" + return [ + {"full_name": "user/repo1", "private": False, "description": "First repo"}, + {"full_name": "user/repo2", "private": True, "description": "Private repo"}, + {"full_name": "user/repo3", "private": False, "description": "Third repo"}, + ] + + def test_option_a_returns_all_user_repos( + self, tmp_path: Path, github_env: dict, mock_repos: list + ) -> None: + """T018: Option [A] returns all user repositories.""" + import sys + import src.github_analyzer.cli.main + main_module = sys.modules["src.github_analyzer.cli.main"] + from src.github_analyzer.cli.main import select_github_repos + + repos_file = tmp_path / "repos.txt" + assert not repos_file.exists() + + with mock.patch.object(main_module, "GitHubClient") as MockClient: + mock_client = MockClient.return_value + mock_client.list_user_repos.return_value = mock_repos + mock_client.close = mock.Mock() + + with mock.patch.dict(os.environ, github_env, clear=True): + with mock.patch("builtins.input", return_value="A"): + result = select_github_repos( + str(repos_file), + github_token=github_env["GITHUB_TOKEN"], + interactive=True, + ) + + assert len(result) == 3 + assert "user/repo1" in result + assert "user/repo2" in result + assert "user/repo3" in result + + def test_option_l_displays_numbered_list( + self, tmp_path: Path, github_env: dict, mock_repos: list + ) -> None: + """T019: Option [L] displays numbered list of repositories.""" + import sys + import src.github_analyzer.cli.main + main_module = sys.modules["src.github_analyzer.cli.main"] + from src.github_analyzer.cli.main import select_github_repos + + repos_file = tmp_path / "repos.txt" + assert not repos_file.exists() + + with mock.patch.object(main_module, "GitHubClient") as MockClient: + mock_client = MockClient.return_value + mock_client.list_user_repos.return_value = mock_repos + mock_client.close = mock.Mock() + + with mock.patch.dict(os.environ, github_env, clear=True): + # User selects 'L', then '1,3' + with mock.patch("builtins.input", side_effect=["L", "1,3"]): + with mock.patch("builtins.print") as mock_print: + result = select_github_repos( + str(repos_file), + github_token=github_env["GITHUB_TOKEN"], + interactive=True, + ) + + # Verify numbered list was printed + print_calls = " ".join(str(call) for call in mock_print.call_args_list) + assert "1." in print_calls or "[1]" in print_calls + assert "user/repo1" in print_calls + + # Should return selected repos + assert "user/repo1" in result + assert "user/repo3" in result + + def test_option_l_accepts_range_selection( + self, tmp_path: Path, github_env: dict, mock_repos: list + ) -> None: + """T021: Option [L] accepts '1-3' range selection.""" + import sys + import src.github_analyzer.cli.main + main_module = sys.modules["src.github_analyzer.cli.main"] + from src.github_analyzer.cli.main import select_github_repos + + repos_file = tmp_path / "repos.txt" + assert not repos_file.exists() + + with mock.patch.object(main_module, "GitHubClient") as MockClient: + mock_client = MockClient.return_value + mock_client.list_user_repos.return_value = mock_repos + mock_client.close = mock.Mock() + + with mock.patch.dict(os.environ, github_env, clear=True): + with mock.patch("builtins.input", side_effect=["L", "1-3"]): + result = select_github_repos( + str(repos_file), + github_token=github_env["GITHUB_TOKEN"], + interactive=True, + ) + + assert len(result) == 3 + + def test_option_l_accepts_all_selection( + self, tmp_path: Path, github_env: dict, mock_repos: list + ) -> None: + """T022: Option [L] accepts 'all' selection.""" + import sys + import src.github_analyzer.cli.main + main_module = sys.modules["src.github_analyzer.cli.main"] + from src.github_analyzer.cli.main import select_github_repos + + repos_file = tmp_path / "repos.txt" + assert not repos_file.exists() + + with mock.patch.object(main_module, "GitHubClient") as MockClient: + mock_client = MockClient.return_value + mock_client.list_user_repos.return_value = mock_repos + mock_client.close = mock.Mock() + + with mock.patch.dict(os.environ, github_env, clear=True): + with mock.patch("builtins.input", side_effect=["L", "all"]): + result = select_github_repos( + str(repos_file), + github_token=github_env["GITHUB_TOKEN"], + interactive=True, + ) + + assert len(result) == 3 + + +class TestGitHubOrgReposSelection: + """Integration tests for organization repos selection (Feature 004 - User Story 3).""" + + @pytest.fixture + def github_env(self) -> dict: + """GitHub environment variables.""" + return { + "GITHUB_TOKEN": "ghp_test_token_12345678901234567890", + } + + @pytest.fixture + def mock_org_repos(self) -> list: + """Mock organization repository list.""" + return [ + {"full_name": "myorg/project1", "private": False, "description": "Org project 1"}, + {"full_name": "myorg/project2", "private": True, "description": "Org project 2"}, + ] + + def test_option_o_prompts_for_org_name( + self, tmp_path: Path, github_env: dict, mock_org_repos: list + ) -> None: + """T029: Option [O] prompts for organization name.""" + import sys + import src.github_analyzer.cli.main + main_module = sys.modules["src.github_analyzer.cli.main"] + from src.github_analyzer.cli.main import select_github_repos + + repos_file = tmp_path / "repos.txt" + assert not repos_file.exists() + + with mock.patch.object(main_module, "GitHubClient") as MockClient: + mock_client = MockClient.return_value + mock_client.list_org_repos.return_value = mock_org_repos + mock_client.close = mock.Mock() + + with mock.patch.dict(os.environ, github_env, clear=True): + # User selects 'O', enters org name, then 'all' + with mock.patch("builtins.input", side_effect=["O", "myorg", "all"]): + result = select_github_repos( + str(repos_file), + github_token=github_env["GITHUB_TOKEN"], + interactive=True, + ) + + # Should have called list_org_repos with the org name + mock_client.list_org_repos.assert_called_with("myorg") + assert len(result) == 2 + assert "myorg/project1" in result + + def test_invalid_org_name_format_shows_error( + self, tmp_path: Path, github_env: dict + ) -> None: + """T031: Invalid org name format shows error.""" + import sys + import src.github_analyzer.cli.main + main_module = sys.modules["src.github_analyzer.cli.main"] + from src.github_analyzer.cli.main import select_github_repos + + repos_file = tmp_path / "repos.txt" + assert not repos_file.exists() + + with mock.patch.object(main_module, "GitHubClient") as MockClient: + mock_client = MockClient.return_value + mock_client.close = mock.Mock() + + with mock.patch.dict(os.environ, github_env, clear=True): + # User enters invalid org name (starts with hyphen), then quits + with mock.patch("builtins.input", side_effect=["O", "-invalid", "Q"]): + result = select_github_repos( + str(repos_file), + github_token=github_env["GITHUB_TOKEN"], + interactive=True, + ) + + assert result == [] + + +class TestGitHubManualReposSelection: + """Integration tests for manual repos specification (Feature 004 - User Story 4).""" + + @pytest.fixture + def github_env(self) -> dict: + """GitHub environment variables.""" + return { + "GITHUB_TOKEN": "ghp_test_token_12345678901234567890", + } + + def test_option_s_prompts_for_manual_input( + self, tmp_path: Path, github_env: dict + ) -> None: + """T038: Option [S] prompts for manual input.""" + import sys + import src.github_analyzer.cli.main + main_module = sys.modules["src.github_analyzer.cli.main"] + from src.github_analyzer.cli.main import select_github_repos + + repos_file = tmp_path / "repos.txt" + assert not repos_file.exists() + + with mock.patch.object(main_module, "GitHubClient") as MockClient: + mock_client = MockClient.return_value + mock_client.close = mock.Mock() + + with mock.patch.dict(os.environ, github_env, clear=True): + with mock.patch("builtins.input", side_effect=["S", "owner/repo1, owner/repo2"]): + result = select_github_repos( + str(repos_file), + github_token=github_env["GITHUB_TOKEN"], + interactive=True, + ) + + assert result == ["owner/repo1", "owner/repo2"] + + def test_valid_owner_repo_format_accepted( + self, tmp_path: Path, github_env: dict + ) -> None: + """T039: Valid 'owner/repo' format accepted.""" + import sys + import src.github_analyzer.cli.main + main_module = sys.modules["src.github_analyzer.cli.main"] + from src.github_analyzer.cli.main import select_github_repos + + repos_file = tmp_path / "repos.txt" + assert not repos_file.exists() + + with mock.patch.object(main_module, "GitHubClient") as MockClient: + mock_client = MockClient.return_value + mock_client.close = mock.Mock() + + with mock.patch.dict(os.environ, github_env, clear=True): + with mock.patch("builtins.input", side_effect=["S", "facebook/react"]): + result = select_github_repos( + str(repos_file), + github_token=github_env["GITHUB_TOKEN"], + interactive=True, + ) + + assert result == ["facebook/react"] + + def test_invalid_format_shows_warning( + self, tmp_path: Path, github_env: dict + ) -> None: + """T040: Invalid format shows warning.""" + import sys + import src.github_analyzer.cli.main + main_module = sys.modules["src.github_analyzer.cli.main"] + from src.github_analyzer.cli.main import select_github_repos + + repos_file = tmp_path / "repos.txt" + assert not repos_file.exists() + + with mock.patch.object(main_module, "GitHubClient") as MockClient: + mock_client = MockClient.return_value + mock_client.close = mock.Mock() + + with mock.patch.dict(os.environ, github_env, clear=True): + # Enter mix of valid and invalid, should continue with valid only + with mock.patch("builtins.input", side_effect=["S", "valid/repo, invalid-repo"]): + with mock.patch("builtins.print"): + result = select_github_repos( + str(repos_file), + github_token=github_env["GITHUB_TOKEN"], + interactive=True, + ) + + # Only valid repos returned + assert result == ["valid/repo"] + + def test_empty_input_prompts_again( + self, tmp_path: Path, github_env: dict + ) -> None: + """T042: Empty input prompts again.""" + import sys + import src.github_analyzer.cli.main + main_module = sys.modules["src.github_analyzer.cli.main"] + from src.github_analyzer.cli.main import select_github_repos + + repos_file = tmp_path / "repos.txt" + assert not repos_file.exists() + + with mock.patch.object(main_module, "GitHubClient") as MockClient: + mock_client = MockClient.return_value + mock_client.close = mock.Mock() + + with mock.patch.dict(os.environ, github_env, clear=True): + # Empty input, then valid + with mock.patch("builtins.input", side_effect=["S", "", "S", "owner/repo"]): + result = select_github_repos( + str(repos_file), + github_token=github_env["GITHUB_TOKEN"], + interactive=True, + ) + + assert result == ["owner/repo"] + + +class TestGitHubRepoDisplayFormat: + """Tests for GitHub repository display formatting.""" + + def test_format_repo_list(self) -> None: + """Repositories are formatted for display with numbers.""" + from src.github_analyzer.cli.main import format_repo_list + + repos = [ + {"full_name": "user/repo1", "private": False, "description": "Description 1"}, + {"full_name": "user/repo2", "private": True, "description": "Private desc"}, + ] + + result = format_repo_list(repos) + + assert "1." in result + assert "2." in result + assert "user/repo1" in result + assert "user/repo2" in result + assert "[private]" in result # Private marker for second repo + + def test_format_truncates_long_descriptions(self) -> None: + """Long descriptions are truncated to 50 chars.""" + from src.github_analyzer.cli.main import format_repo_list + + long_desc = "A" * 200 + repos = [ + {"full_name": "user/repo1", "private": False, "description": long_desc}, + ] + + result = format_repo_list(repos) + + # Description should be truncated (50 chars + "...") + assert "..." in result + assert "A" * 51 not in result + + +class TestValidationPatterns: + """Tests for validation patterns (spec Validation Patterns section).""" + + def test_validate_repo_format_valid(self) -> None: + """Valid repo formats pass validation.""" + from src.github_analyzer.cli.main import validate_repo_format + + valid_repos = [ + "owner/repo", + "my-org/my-repo", + "user123/project_v2", + "facebook/react", + "owner.name/repo.name", + ] + + for repo in valid_repos: + assert validate_repo_format(repo), f"{repo} should be valid" + + def test_validate_repo_format_invalid(self) -> None: + """Invalid repo formats fail validation.""" + from src.github_analyzer.cli.main import validate_repo_format + + invalid_repos = [ + "just-repo", + "owner/", + "/repo", + "", + "owner//repo", + ] + + for repo in invalid_repos: + assert not validate_repo_format(repo), f"{repo} should be invalid" + + def test_validate_org_name_valid(self) -> None: + """Valid org names pass validation.""" + from src.github_analyzer.cli.main import validate_org_name + + valid_orgs = [ + "myorg", + "my-organization", + "org123", + "a", + ] + + for org in valid_orgs: + assert validate_org_name(org), f"{org} should be valid" + + def test_validate_org_name_invalid(self) -> None: + """Invalid org names fail validation.""" + from src.github_analyzer.cli.main import validate_org_name + + invalid_orgs = [ + "-invalid", + "invalid-", + "org--double", + "", + "a" * 40, # Too long (max 39) + ] + + for org in invalid_orgs: + assert not validate_org_name(org), f"{org} should be invalid" diff --git a/tests/unit/api/test_client.py b/tests/unit/api/test_client.py index a688406..00c9cb5 100644 --- a/tests/unit/api/test_client.py +++ b/tests/unit/api/test_client.py @@ -677,3 +677,171 @@ def test_builds_url_with_params(self, mock_urlopen, mock_config): request = call_args[0][0] assert "page=1" in request.full_url assert "per_page=100" in request.full_url + + +class TestGitHubClientListUserRepos: + """Tests for list_user_repos method (T003).""" + + def test_lists_user_repos_with_owner_collaborator_affiliation(self, mock_config): + """Test list_user_repos uses owner,collaborator affiliation.""" + client = GitHubClient(mock_config) + + mock_repos = [ + {"full_name": "user/repo1", "private": False, "description": "Repo 1"}, + {"full_name": "user/repo2", "private": True, "description": "Repo 2"}, + ] + + with patch.object(client, "paginate") as mock_paginate: + mock_paginate.return_value = mock_repos + + result = client.list_user_repos() + + assert result == mock_repos + mock_paginate.assert_called_once() + call_args = mock_paginate.call_args + assert call_args[0][0] == "/user/repos" + params = call_args[1].get("params", call_args[0][1] if len(call_args[0]) > 1 else {}) + assert params.get("affiliation") == "owner,collaborator" + + def test_lists_user_repos_with_custom_affiliation(self, mock_config): + """Test list_user_repos accepts custom affiliation.""" + client = GitHubClient(mock_config) + + with patch.object(client, "paginate") as mock_paginate: + mock_paginate.return_value = [] + + client.list_user_repos(affiliation="owner") + + call_args = mock_paginate.call_args + params = call_args[1].get("params", call_args[0][1] if len(call_args[0]) > 1 else {}) + assert params.get("affiliation") == "owner" + + def test_lists_user_repos_returns_empty_list(self, mock_config): + """Test list_user_repos returns empty list when no repos.""" + client = GitHubClient(mock_config) + + with patch.object(client, "paginate") as mock_paginate: + mock_paginate.return_value = [] + + result = client.list_user_repos() + + assert result == [] + + def test_lists_user_repos_handles_rate_limit(self, mock_config): + """Test list_user_repos propagates RateLimitError.""" + client = GitHubClient(mock_config) + + with patch.object(client, "paginate") as mock_paginate: + mock_paginate.side_effect = RateLimitError("Rate limit exceeded") + + with pytest.raises(RateLimitError): + client.list_user_repos() + + def test_lists_user_repos_handles_api_error(self, mock_config): + """Test list_user_repos propagates APIError.""" + client = GitHubClient(mock_config) + + with patch.object(client, "paginate") as mock_paginate: + mock_paginate.side_effect = APIError("Unauthorized", status_code=401) + + with pytest.raises(APIError): + client.list_user_repos() + + +class TestGitHubClientListOrgRepos: + """Tests for list_org_repos method (T004).""" + + def test_lists_org_repos(self, mock_config): + """Test list_org_repos fetches repos for organization.""" + client = GitHubClient(mock_config) + + mock_repos = [ + {"full_name": "myorg/repo1", "private": False, "description": "Org Repo 1"}, + {"full_name": "myorg/repo2", "private": True, "description": "Org Repo 2"}, + ] + + with patch.object(client, "paginate") as mock_paginate: + mock_paginate.return_value = mock_repos + + result = client.list_org_repos("myorg") + + assert result == mock_repos + mock_paginate.assert_called_once() + call_args = mock_paginate.call_args + assert call_args[0][0] == "/orgs/myorg/repos" + + def test_lists_org_repos_uses_type_all(self, mock_config): + """Test list_org_repos uses type=all by default.""" + client = GitHubClient(mock_config) + + with patch.object(client, "paginate") as mock_paginate: + mock_paginate.return_value = [] + + client.list_org_repos("myorg") + + call_args = mock_paginate.call_args + params = call_args[1].get("params", call_args[0][1] if len(call_args[0]) > 1 else {}) + assert params.get("type") == "all" + + def test_lists_org_repos_with_custom_type(self, mock_config): + """Test list_org_repos accepts custom type parameter.""" + client = GitHubClient(mock_config) + + with patch.object(client, "paginate") as mock_paginate: + mock_paginate.return_value = [] + + client.list_org_repos("myorg", repo_type="public") + + call_args = mock_paginate.call_args + params = call_args[1].get("params", call_args[0][1] if len(call_args[0]) > 1 else {}) + assert params.get("type") == "public" + + def test_lists_org_repos_returns_empty_list(self, mock_config): + """Test list_org_repos returns empty list when no repos.""" + client = GitHubClient(mock_config) + + with patch.object(client, "paginate") as mock_paginate: + mock_paginate.return_value = [] + + result = client.list_org_repos("empty-org") + + assert result == [] + + def test_lists_org_repos_handles_org_not_found(self, mock_config): + """Test list_org_repos handles 404 for non-existent org.""" + client = GitHubClient(mock_config) + + with patch.object(client, "paginate") as mock_paginate: + mock_paginate.side_effect = APIError("Not Found", status_code=404) + + with pytest.raises(APIError) as exc_info: + client.list_org_repos("nonexistent-org") + + assert exc_info.value.status_code == 404 + + def test_lists_org_repos_handles_rate_limit(self, mock_config): + """Test list_org_repos propagates RateLimitError.""" + client = GitHubClient(mock_config) + + with patch.object(client, "paginate") as mock_paginate: + mock_paginate.side_effect = RateLimitError("Rate limit exceeded") + + with pytest.raises(RateLimitError): + client.list_org_repos("myorg") + + def test_lists_org_repos_handles_pagination(self, mock_config): + """Test list_org_repos handles pagination for 100+ repos.""" + mock_config.per_page = 50 + mock_config.max_pages = 10 + client = GitHubClient(mock_config) + + # Simulate 150 repos (3 pages) + mock_repos = [{"full_name": f"myorg/repo{i}"} for i in range(150)] + + with patch.object(client, "paginate") as mock_paginate: + mock_paginate.return_value = mock_repos + + result = client.list_org_repos("myorg") + + assert len(result) == 150 + mock_paginate.assert_called_once() From 61115b2f92d56f74778ab8355650c6b818ca312d Mon Sep 17 00:00:00 2001 From: Andrea Margiovanni Date: Sat, 29 Nov 2025 05:36:23 +0100 Subject: [PATCH 2/3] fix(cli): integrate select_github_repos() into main() entry point The select_github_repos() function was implemented but not integrated into main(). This caused the interactive menu to never appear when repos.txt was missing/empty. Changes: - Replace load_repositories() with select_github_repos() in main() - Convert returned repo names (strings) to Repository objects - Update unit tests to mock select_github_repos instead of load_repositories - Add required github_token and repos_file attributes to mock configs --- src/github_analyzer/cli/main.py | 28 ++++++++++++++++++++------- tests/unit/cli/test_main.py | 34 ++++++++++++++++++++++++--------- 2 files changed, 46 insertions(+), 16 deletions(-) diff --git a/src/github_analyzer/cli/main.py b/src/github_analyzer/cli/main.py index 0ab9724..6a46e24 100644 --- a/src/github_analyzer/cli/main.py +++ b/src/github_analyzer/cli/main.py @@ -27,7 +27,7 @@ ) from src.github_analyzer.api import GitHubClient, RepositoryStats from src.github_analyzer.cli.output import TerminalOutput -from src.github_analyzer.config import AnalyzerConfig, Repository, load_repositories +from src.github_analyzer.config import AnalyzerConfig, Repository from src.github_analyzer.config.settings import DataSource, JiraConfig from src.github_analyzer.config.validation import load_jira_projects from src.github_analyzer.core.exceptions import ( @@ -996,14 +996,28 @@ def main() -> int: output.log(f"Full PR details: {'Yes' if fetch_pr_details else 'No'}", "info") # Load GitHub repositories if GitHub source is enabled - repositories = [] + repositories: list[Repository] = [] if DataSource.GITHUB in sources: - output.log(f"Loading repositories from {config.repos_file}...") - repositories = load_repositories(config.repos_file) - output.log(f"Found {len(repositories)} repositories to analyze", "success") + # Use interactive selection (Feature 004) + # select_github_repos handles: file loading, empty/missing file prompts + interactive = not args.quiet if hasattr(args, "quiet") else True + repo_names = select_github_repos( + repos_file=config.repos_file, + github_token=config.github_token, + interactive=interactive, + output=output, + ) + + # Convert string names to Repository objects + for name in repo_names: + repositories.append(Repository.from_string(name)) - for repo in repositories: - output.log(f" • {repo.full_name}", "info") + if repositories: + output.log(f"Found {len(repositories)} repositories to analyze", "success") + for repo in repositories: + output.log(f" • {repo.full_name}", "info") + else: + output.log("No GitHub repositories selected", "warning") # Load Jira projects if Jira source is enabled jira_config = None diff --git a/tests/unit/cli/test_main.py b/tests/unit/cli/test_main.py index 06ec3ef..99931c6 100644 --- a/tests/unit/cli/test_main.py +++ b/tests/unit/cli/test_main.py @@ -427,6 +427,7 @@ def test_returns_0_when_cancelled(self, tmp_path): mock_config = Mock(spec=AnalyzerConfig) mock_config.output_dir = str(tmp_path) mock_config.repos_file = "repos.txt" + mock_config.github_token = "test_token" mock_config.days = 30 mock_config.verbose = True mock_config.validate = Mock() @@ -436,7 +437,7 @@ def test_returns_0_when_cancelled(self, tmp_path): patch("sys.argv", ["prog", "--days", "7", "--quiet", "--full", "--sources", "github"]), patch.dict(os.environ, {"GITHUB_TOKEN": "ghp_test1234567890123456789012"}, clear=True), patch.object(main_module, "AnalyzerConfig") as MockConfig, - patch.object(main_module, "load_repositories", return_value=[]), + patch.object(main_module, "select_github_repos", return_value=[]), patch.object(main_module, "prompt_yes_no", return_value=False), ): MockConfig.from_env.return_value = mock_config @@ -824,6 +825,8 @@ def test_github_analysis_full_flow(self, tmp_path): """Test complete GitHub analysis flow in main().""" mock_config = Mock(spec=AnalyzerConfig) mock_config.output_dir = tmp_path + mock_config.repos_file = "repos.txt" + mock_config.github_token = "test_token" mock_config.days = 30 mock_config.verbose = False mock_config.validate = Mock() @@ -840,7 +843,7 @@ def test_github_analysis_full_flow(self, tmp_path): clear=True, ), patch.object(main_module, "AnalyzerConfig") as MockConfig, - patch.object(main_module, "load_repositories", return_value=[Repository(owner="test", name="repo")]), + patch.object(main_module, "select_github_repos", return_value=["test/repo"]), patch.object(main_module, "prompt_yes_no", return_value=True), patch.object(main_module, "GitHubAnalyzer", return_value=mock_analyzer) as MockAnalyzer, ): @@ -858,6 +861,8 @@ def test_github_analysis_calls_close_on_success(self, tmp_path): """Test GitHub analyzer close is called after successful run.""" mock_config = Mock(spec=AnalyzerConfig) mock_config.output_dir = tmp_path + mock_config.repos_file = "repos.txt" + mock_config.github_token = "test_token" mock_config.days = 30 mock_config.verbose = False mock_config.validate = Mock() @@ -868,7 +873,7 @@ def test_github_analysis_calls_close_on_success(self, tmp_path): patch("sys.argv", ["prog", "--sources", "github", "--quiet", "--days", "30", "--full"]), patch.dict(os.environ, {"GITHUB_TOKEN": "test_token"}, clear=True), patch.object(main_module, "AnalyzerConfig") as MockConfig, - patch.object(main_module, "load_repositories", return_value=[Repository(owner="o", name="r")]), + patch.object(main_module, "select_github_repos", return_value=["o/r"]), patch.object(main_module, "prompt_yes_no", return_value=True), patch.object(main_module, "GitHubAnalyzer", return_value=mock_analyzer), ): @@ -883,6 +888,8 @@ def test_github_analysis_calls_close_on_exception(self, tmp_path): """Test GitHub analyzer close is called even when run() raises.""" mock_config = Mock(spec=AnalyzerConfig) mock_config.output_dir = tmp_path + mock_config.repos_file = "repos.txt" + mock_config.github_token = "test_token" mock_config.days = 30 mock_config.verbose = False mock_config.validate = Mock() @@ -894,7 +901,7 @@ def test_github_analysis_calls_close_on_exception(self, tmp_path): patch("sys.argv", ["prog", "--sources", "github", "--quiet", "--days", "30", "--full"]), patch.dict(os.environ, {"GITHUB_TOKEN": "test_token"}, clear=True), patch.object(main_module, "AnalyzerConfig") as MockConfig, - patch.object(main_module, "load_repositories", return_value=[Repository(owner="o", name="r")]), + patch.object(main_module, "select_github_repos", return_value=["o/r"]), patch.object(main_module, "prompt_yes_no", return_value=True), patch.object(main_module, "GitHubAnalyzer", return_value=mock_analyzer), ): @@ -920,6 +927,8 @@ def test_keyboard_interrupt_returns_130(self, tmp_path): """Test KeyboardInterrupt returns exit code 130.""" mock_config = Mock(spec=AnalyzerConfig) mock_config.output_dir = tmp_path + mock_config.repos_file = "repos.txt" + mock_config.github_token = "test_token" mock_config.days = 30 mock_config.verbose = False mock_config.validate = Mock() @@ -928,7 +937,7 @@ def test_keyboard_interrupt_returns_130(self, tmp_path): patch("sys.argv", ["prog", "--sources", "github", "--quiet", "--days", "30", "--full"]), patch.dict(os.environ, {"GITHUB_TOKEN": "test_token"}, clear=True), patch.object(main_module, "AnalyzerConfig") as MockConfig, - patch.object(main_module, "load_repositories", side_effect=KeyboardInterrupt), + patch.object(main_module, "select_github_repos", side_effect=KeyboardInterrupt), ): MockConfig.from_env.return_value = mock_config @@ -940,6 +949,8 @@ def test_unexpected_exception_returns_2(self, tmp_path): """Test unexpected exception returns exit code 2.""" mock_config = Mock(spec=AnalyzerConfig) mock_config.output_dir = tmp_path + mock_config.repos_file = "repos.txt" + mock_config.github_token = "test_token" mock_config.days = 30 mock_config.verbose = False mock_config.validate = Mock() @@ -948,7 +959,7 @@ def test_unexpected_exception_returns_2(self, tmp_path): patch("sys.argv", ["prog", "--sources", "github", "--quiet", "--days", "30", "--full"]), patch.dict(os.environ, {"GITHUB_TOKEN": "test_token"}, clear=True), patch.object(main_module, "AnalyzerConfig") as MockConfig, - patch.object(main_module, "load_repositories", side_effect=RuntimeError("Unexpected")), + patch.object(main_module, "select_github_repos", side_effect=RuntimeError("Unexpected")), ): MockConfig.from_env.return_value = mock_config @@ -986,6 +997,8 @@ def test_output_argument_overrides_config(self, tmp_path): """Test --output argument overrides config output_dir.""" mock_config = Mock(spec=AnalyzerConfig) mock_config.output_dir = "/default/output" + mock_config.repos_file = "repos.txt" + mock_config.github_token = "test_token" mock_config.days = 30 mock_config.verbose = False mock_config.validate = Mock() @@ -997,7 +1010,7 @@ def test_output_argument_overrides_config(self, tmp_path): "--full", "--output", custom_output]), patch.dict(os.environ, {"GITHUB_TOKEN": "test_token"}, clear=True), patch.object(main_module, "AnalyzerConfig") as MockConfig, - patch.object(main_module, "load_repositories", return_value=[Repository(owner="o", name="r")]), + patch.object(main_module, "select_github_repos", return_value=["o/r"]), patch.object(main_module, "prompt_yes_no", return_value=True), patch.object(main_module, "GitHubAnalyzer") as MockAnalyzer, ): @@ -1013,6 +1026,7 @@ def test_repos_argument_overrides_config(self, tmp_path): mock_config = Mock(spec=AnalyzerConfig) mock_config.output_dir = tmp_path mock_config.repos_file = "default_repos.txt" + mock_config.github_token = "test_token" mock_config.days = 30 mock_config.verbose = False mock_config.validate = Mock() @@ -1022,7 +1036,7 @@ def test_repos_argument_overrides_config(self, tmp_path): "--full", "--repos", "custom_repos.txt"]), patch.dict(os.environ, {"GITHUB_TOKEN": "test_token"}, clear=True), patch.object(main_module, "AnalyzerConfig") as MockConfig, - patch.object(main_module, "load_repositories", return_value=[Repository(owner="o", name="r")]), + patch.object(main_module, "select_github_repos", return_value=["o/r"]), patch.object(main_module, "prompt_yes_no", return_value=True), patch.object(main_module, "GitHubAnalyzer") as MockAnalyzer, ): @@ -1057,6 +1071,8 @@ def test_interactive_prompts_when_no_cli_args(self, tmp_path): """Test interactive prompts are used when CLI args not provided.""" mock_config = Mock(spec=AnalyzerConfig) mock_config.output_dir = tmp_path + mock_config.repos_file = "repos.txt" + mock_config.github_token = "test_token" mock_config.days = 7 # default mock_config.verbose = True # default mock_config.validate = Mock() @@ -1065,7 +1081,7 @@ def test_interactive_prompts_when_no_cli_args(self, tmp_path): patch("sys.argv", ["prog", "--sources", "github"]), # No --quiet, --days, --full patch.dict(os.environ, {"GITHUB_TOKEN": "test_token"}, clear=True), patch.object(main_module, "AnalyzerConfig") as MockConfig, - patch.object(main_module, "load_repositories", return_value=[Repository(owner="o", name="r")]), + patch.object(main_module, "select_github_repos", return_value=["o/r"]), patch.object(main_module, "prompt_int", return_value=14) as mock_prompt_int, patch.object(main_module, "prompt_yes_no", side_effect=[False, True, True]) as mock_prompt_yn, patch.object(main_module, "GitHubAnalyzer") as MockAnalyzer, From a37d85f874b613abd1c8d8826ad0f11f23a01a47 Mon Sep 17 00:00:00 2001 From: Andrea Margiovanni Date: Sat, 29 Nov 2025 05:41:55 +0100 Subject: [PATCH 3/3] fix(validation): allow repo names starting with period GitHub allows repository names that start with a period (e.g., .github, .dotfiles). Updated REPO_COMPONENT_PATTERN to accept names starting with alphanumeric OR period. This fixes the "Invalid repository name format" error when selecting repositories from the GitHub API. --- src/github_analyzer/config/validation.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/github_analyzer/config/validation.py b/src/github_analyzer/config/validation.py index bbee5e6..2218386 100644 --- a/src/github_analyzer/config/validation.py +++ b/src/github_analyzer/config/validation.py @@ -37,9 +37,10 @@ # Repository name validation # GitHub allows: alphanumeric, hyphen, underscore, period +# Names can start with alphanumeric or period (e.g., .github, .dotfiles) # Max 100 characters per component -REPO_COMPONENT_PATTERN = r"^[a-zA-Z0-9][a-zA-Z0-9._-]{0,99}$" -REPO_FULL_PATTERN = r"^[a-zA-Z0-9][a-zA-Z0-9._-]{0,99}/[a-zA-Z0-9][a-zA-Z0-9._-]{0,99}$" +REPO_COMPONENT_PATTERN = r"^[a-zA-Z0-9.][a-zA-Z0-9._-]{0,99}$" +REPO_FULL_PATTERN = r"^[a-zA-Z0-9.][a-zA-Z0-9._-]{0,99}/[a-zA-Z0-9.][a-zA-Z0-9._-]{0,99}$" # Dangerous characters that could indicate injection attempts DANGEROUS_CHARS = set(";|&$`(){}[]<>\\'\"\n\r\t") @@ -207,7 +208,7 @@ def from_string(cls, repo_str: str) -> Repository: if not re.match(REPO_COMPONENT_PATTERN, owner): raise ValidationError( "Invalid repository owner format", - details="Owner must start with alphanumeric and contain only alphanumeric, hyphen, underscore, or period", + details="Owner must start with alphanumeric or period and contain only alphanumeric, hyphen, underscore, or period", ) # Validate name @@ -216,7 +217,7 @@ def from_string(cls, repo_str: str) -> Repository: if not re.match(REPO_COMPONENT_PATTERN, name): raise ValidationError( "Invalid repository name format", - details="Name must start with alphanumeric and contain only alphanumeric, hyphen, underscore, or period", + details="Name must start with alphanumeric or period and contain only alphanumeric, hyphen, underscore, or period", ) # Check for path traversal