diff --git a/AGENTS.md b/AGENTS.md index 432bf51..9c63cf8 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -13,6 +13,7 @@ This document provides essential context for LLMs performing development tasks i - Analyze composable definitions and usage across projects - Compare files across documentation versions - Count documentation pages and tested code examples +- Generate reports on testable code examples from analytics data **Target Users**: MongoDB technical writers performing maintenance, scoping work, and reporting. @@ -36,29 +37,42 @@ audit-cli/ │ │ └── composables/ # Analyze composable definitions and usage │ ├── compare/ # Compare files across versions │ │ └── file-contents/ # Compare file contents -│ └── count/ # Count documentation content -│ ├── tested-examples/ # Count tested code examples -│ └── pages/ # Count documentation pages +│ ├── count/ # Count documentation content +│ │ ├── tested-examples/ # Count tested code examples +│ │ └── pages/ # Count documentation pages +│ └── report/ # Generate reports from documentation data +│ └── testable-code/ # Analyze testable code examples from analytics ├── internal/ # Internal packages (not importable externally) │ ├── config/ # Configuration management │ │ ├── config.go # Config loading from file/env/args -│ │ └── config_test.go # Config tests +│ │ ├── config_test.go # Config tests +│ │ ├── url_mapping.go # URL-to-source-file mapping via Snooty Data API +│ │ └── url_mapping_test.go # URL mapping tests +│ ├── language/ # Programming language utilities +│ │ ├── language.go # Language normalization, extensions, products +│ │ └── language_test.go # Language tests │ ├── projectinfo/ # MongoDB docs project structure utilities -│ │ ├── pathresolver.go # Path resolution -│ │ ├── source_finder.go # Source directory detection +│ │ ├── pathresolver.go # Path resolution +│ │ ├── products.go # Content directory to product mapping +│ │ ├── source_finder.go # Source directory detection │ │ └── version_resolver.go # Version path resolution -│ └── rst/ # RST parsing utilities -│ ├── parser.go # Generic parsing with includes -│ ├── directive_parser.go # Directive parsing -│ ├── directive_regex.go # Regex patterns for directives -│ ├── parse_procedures.go # Procedure parsing (core logic) -│ ├── get_procedure_variations.go # Variation extraction -│ └── rstspec.go # Fetch and parse canonical rstspec.toml +│ ├── rst/ # RST parsing utilities +│ │ ├── parser.go # Generic parsing with includes +│ │ ├── directive_parser.go # Directive parsing with language resolution +│ │ ├── directive_regex.go # Regex patterns for directives +│ │ ├── parse_procedures.go # Procedure parsing (core logic) +│ │ ├── get_procedure_variations.go # Variation extraction +│ │ ├── rstspec.go # Fetch and parse canonical rstspec.toml +│ │ └── yaml_steps_parser.go # Parse YAML steps files for code examples +│ └── snooty/ # Snooty.toml parsing utilities +│ ├── snooty.go # Parse snooty.toml, find project config +│ └── snooty_test.go # Snooty tests ├── testdata/ # Test fixtures (auto-ignored by Go build) │ ├── input-files/source/ # Test RST files │ ├── expected-output/ # Expected extraction results │ ├── compare/ # Compare command test data -│ └── count-test-monorepo/ # Count command test data +│ ├── count-test-monorepo/ # Count command test data +│ └── testable-code-test/ # Testable code report test data ├── bin/ # Build output directory ├── docs/ # Additional documentation │ └── PROCEDURE_PARSING.md # Detailed procedure parsing logic @@ -393,6 +407,103 @@ func NewExtractCommand() *cobra.Command { - Support multiple output formats (text, JSON) where applicable - Use consistent formatting (headers with `=` separators, indentation) +### Network Request Caching + +All network requests to external APIs should implement caching to avoid repeated requests and support offline usage. The caching pattern is implemented in `internal/config/url_mapping.go` (for Snooty Data API) and `internal/rst/rstspec.go` (for rstspec.toml). + +**Cache Location**: `~/.audit-cli/` directory +- URL mapping cache: `~/.audit-cli/url-mapping-cache.json` +- Rstspec cache: `~/.audit-cli/rstspec-cache.json` + +**Cache TTL**: 24 hours (configurable per cache type) + +**Implementation Pattern**: + +1. **Define cache constants**: +```go +const CacheTTL = 24 * time.Hour +const CacheDir = ".audit-cli" +const CacheFileName = "my-cache.json" +``` + +2. **Create cache struct** with timestamp and data: +```go +type MyCache struct { + Timestamp time.Time `json:"timestamp"` + Data MyData `json:"data"` +} +``` + +3. **Implement cache functions**: +```go +// getCachePath returns the path to the cache file +func getCachePath() (string, error) { + homeDir, err := os.UserHomeDir() + if err != nil { + return "", fmt.Errorf("failed to get home directory: %w", err) + } + return filepath.Join(homeDir, CacheDir, CacheFileName), nil +} + +// loadCache loads from cache, returns error if missing or expired +func loadCache() (*MyData, error) { + // Read file, unmarshal JSON, check TTL +} + +// saveCache saves data to cache with current timestamp +func saveCache(data *MyData) error { + // Create directory if needed, marshal JSON, write file +} + +// fetchFromAPI fetches fresh data from the network +func fetchFromAPI() (*MyData, error) { + // HTTP request, parse response +} +``` + +4. **Main fetch function with fallback logic**: +```go +func FetchData() (*MyData, error) { + // Try cache first + data, err := loadCache() + if err == nil { + return data, nil + } + + // Cache miss or expired, try network + data, fetchErr := fetchFromAPI() + if fetchErr != nil { + // Network failed - try expired cache as offline fallback + // (read cache file without TTL check) + if expiredData := loadExpiredCache(); expiredData != nil { + fmt.Fprintf(os.Stderr, "Warning: Using expired cache\n") + return expiredData, nil + } + return nil, fetchErr + } + + // Save to cache for next time + if saveErr := saveCache(data); saveErr != nil { + fmt.Fprintf(os.Stderr, "Warning: Could not save cache: %v\n", saveErr) + } + + return data, nil +} +``` + +**Key Behaviors**: +- Cache is stored in user's home directory for persistence across sessions +- Expired cache is used as fallback when network is unavailable (offline support) +- Cache save failures are logged as warnings but don't fail the operation +- JSON format for easy debugging and human readability + +**When Adding New Network Calls**: +1. Follow the pattern above +2. Add cache file name constant +3. Implement the four cache functions +4. Use the same `~/.audit-cli/` directory for consistency +5. Consider appropriate TTL (24 hours is default, adjust if data changes more/less frequently) + ## Key Design Decisions ### RST Parsing Strategy diff --git a/CHANGELOG.md b/CHANGELOG.md index c31dea8..feaacdd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,60 @@ All notable changes to audit-cli will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.3.0] - 2025-01-07 + +### Added + +#### Report Commands +- `report testable-code` - Analyze testable code examples on pages from analytics data + - Takes a CSV file with page rankings and URLs from analytics + - Resolves URLs to source files using the Snooty Data API + - Collects code examples (literalinclude, code, code-block, io-code-block) from each page + - Determines product context from tabs, composables, and content directories + - Identifies tested vs testable vs "maybe testable" code examples + - Supports multiple output formats: text, JSON, CSV + - Flags: + - `--format, -f` - Output format (text, json, csv) + - `--output, -o` - Output file path (default: stdout) + - `--details` - Show detailed per-product breakdown + +#### Internal Packages +- `internal/language` - Programming language utilities (refactored from code-examples) + - Language normalization (e.g., "ts" → "typescript", "py" → "python") + - File extension mapping for all supported languages + - Language-to-product mapping for MongoDB drivers + - Non-driver language detection (bash, json, yaml, etc.) + - MongoDB Shell language detection + - Language resolution with priority: argument > option > file extension +- `internal/snooty` - Snooty.toml parsing utilities + - Parse snooty.toml configuration files + - Find project snooty.toml from source file paths + - Build composable ID-to-title mappings + - Extract project and version from snooty.toml paths +- `internal/config/url_mapping.go` - URL-to-source-file mapping + - Fetches project metadata from Snooty Data API + - Resolves documentation URLs to source file paths + - Caches API responses for 24 hours in `~/.audit-cli/` + - Supports offline usage with expired cache fallback +- `internal/projectinfo/products.go` - Content directory to product mapping + - Maps driver content directories to display product names + - Supports all MongoDB driver documentation projects +- `internal/rst/yaml_steps_parser.go` - YAML steps file parsing + - Parses legacy YAML-native code examples in steps files + - Extracts code blocks with language and content +- `internal/rst/directive_parser.go` - Enhanced directive parsing + - Added `ResolveLanguage()` method to Directive type + - Added `ResolveLanguage()` method to SubDirective type + - Language resolution follows priority: argument > option > file extension + +### Changed + +- Refactored language handling from `commands/extract/code-examples/language.go` to `internal/language` package + - All language-related utilities now centralized and reusable + - Added product mapping and non-driver language detection +- Enhanced `internal/rst` directive parsing with language resolution methods +- Updated `analyze usage` to use new language package for file extension handling + ## [0.2.0] - 2025-12-12 ### Added diff --git a/README.md b/README.md index b4667c8..d3f1679 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ A Go CLI tool for performing audit-related tasks in the MongoDB documentation mo - [Analyze Commands](#analyze-commands) - [Compare Commands](#compare-commands) - [Count Commands](#count-commands) + - [Report Commands](#report-commands) - [Development](#development) - [Project Structure](#project-structure) - [Adding New Commands](#adding-new-commands) @@ -173,9 +174,11 @@ audit-cli │ └── composables ├── compare # Compare files across versions │ └── file-contents -└── count # Count code examples and documentation pages - ├── tested-examples - └── pages +├── count # Count code examples and documentation pages +│ ├── tested-examples +│ └── pages +└── report # Generate reports from documentation data + └── testable-code ``` ### Extract Commands @@ -1504,6 +1507,149 @@ echo "Total documentation pages: $TOTAL_PAGES" # Output: 150 ``` +### Report Commands + +#### `report testable-code` + +Analyze testable code examples on documentation pages based on analytics CSV data. + +This command takes a CSV file with page rankings and URLs, resolves each URL to its source file in the monorepo, collects code examples (literalinclude, code-block, io-code-block), and generates a report with testability information. + +**Use Cases:** + +This command helps writers and maintainers: +- Identify high-traffic pages with untested code examples +- Prioritize which pages to add test coverage to +- Track the ratio of tested vs testable code examples +- Understand code example distribution by product/language +- Find "maybe testable" examples that need manual review + +**Key Concepts:** + +- **Product vs Language**: A "product" is a MongoDB driver or tool (e.g., "Python", "Node.js"). A "language" is the programming language of a code example (e.g., "python", "javascript"). The same language can map to different products depending on context. +- **Testable vs Tested**: "Testable" means the code example is for a product that has test infrastructure. "Tested" means the code example actually references tested code (literalinclude from the tested code examples directory). +- **Maybe Testable**: JavaScript/shell examples without clear context that may need manual review. + +**Examples:** + +```bash +# Analyze pages from a CSV file (specify monorepo path) +./audit-cli report testable-code analytics.csv /path/to/docs-monorepo + +# Use configured monorepo path (from config file or environment variable) +./audit-cli report testable-code analytics.csv + +# Output as JSON to a file +./audit-cli report testable-code analytics.csv --format json --output report.json + +# Output as CSV for spreadsheet analysis +./audit-cli report testable-code analytics.csv --format csv --details -o report.csv + +# Output to stdout (can also use shell redirection) +./audit-cli report testable-code analytics.csv --format json > report.json +``` + +**CSV Input Format:** + +The CSV file should have columns for rank and URL. The first row is treated as a header, but the tool also handles CSV +files with no header: + +```csv +rank,url +1,www.mongodb.com/docs/atlas/some-page/ +2,www.mongodb.com/docs/manual/tutorial/install/ +``` + +**Flags:** + +- `--format, -f ` - Output format: `text` (default), `json`, or `csv` +- `--output, -o ` - Output file path (default: stdout) +- `--details` - Show detailed per-product breakdown (for CSV output, includes per-product columns) +- `--filter ` - Filter pages by product area (can be specified multiple times) +- `--list-drivers` - List all available driver filter options from the Snooty Data API + +**Filtering:** + +Use the `--filter` flag to focus on specific product areas. Multiple filters can be specified to include pages matching any filter. + +Available filters: +- `search` - Pages with "atlas-search" or "search" in URL (excludes vector-search) +- `vector-search` - Pages with "vector-search" in URL +- `drivers` - All MongoDB driver documentation pages +- `driver:` - Specific driver by project name (e.g., `driver:pymongo`, `driver:node`) +- `mongosh` - MongoDB Shell documentation pages + +```bash +# Filter to only Atlas Search pages +./audit-cli report testable-code analytics.csv --filter search + +# Filter to only PyMongo driver pages +./audit-cli report testable-code analytics.csv --filter driver:pymongo + +# Filter to multiple areas (pages matching any filter are included) +./audit-cli report testable-code analytics.csv --filter drivers --filter mongosh + +# List all available driver filter options +./audit-cli report testable-code --list-drivers +``` + +The `--list-drivers` flag queries the Snooty Data API to show all available driver project names that can be used with the `driver:` filter. Results are cached for 24 hours. + +**Testable Products:** + +Products with test infrastructure (code examples for these products are marked as "testable"): +- C# +- Go +- Java (Sync) +- Node.js +- Python +- MongoDB Shell + +To add a new testable product when test infrastructure is added: + +1. Edit `commands/report/testable-code/types.go` +2. Add entries to the `TestableProducts` map for both the display name and internal ID: + ```go + var TestableProducts = map[string]bool{ + // ... existing entries ... + "Ruby": true, // Display name + "ruby": true, // Internal ID (used in tabs/composables) + } + ``` +3. Update the tests in `commands/report/testable-code/testable_code_test.go` to include the new product +4. Update this README to list the new product + +**Output:** + +The text output includes a summary table and detailed per-page breakdowns: + +``` +========================================================================================== +PAGE ANALYTICS REPORT +========================================================================================== +Total pages analyzed: 3 + +SUMMARY +------------------------------------------------------------------------------------------ +Rank URL Total Tested Testable Maybe +------------------------------------------------------------------------------------------ +1 www.mongodb.com/docs/drivers/node/current/quick... 8 2 6 0 +2 www.mongodb.com/docs/manual/tutorial/install-m... 4 0 0 2 +3 www.mongodb.com/docs/atlas/getting-started/ 12 5 7 0 + +DETAILED REPORTS +========================================================================================== + +Rank 1: www.mongodb.com/docs/drivers/node/current/quick-start/ +Source: content/node/current/source/quick-start.txt +------------------------------------------------------------------------------------------ + Product Total Input Output Tested Testable Maybe + -------------------------------------------------------------------- + Node.js 8 4 4 2 6 0 + -------------------------------------------------------------------- + TOTAL 8 4 4 2 6 0 +``` + ## Development ### Project Structure @@ -1520,8 +1666,7 @@ audit-cli/ │ │ │ ├── parser.go # RST directive parsing │ │ │ ├── writer.go # File writing logic │ │ │ ├── report.go # Report generation -│ │ │ ├── types.go # Type definitions -│ │ │ └── language.go # Language normalization +│ │ │ └── types.go # Type definitions │ │ └── procedures/ # Procedures extraction subcommand │ │ ├── procedures.go # Command logic │ │ ├── procedures_test.go # Tests @@ -1573,43 +1718,63 @@ audit-cli/ │ │ ├── output.go # Output formatting │ │ ├── types.go # Type definitions │ │ └── version_resolver.go # Version path resolution -│ └── count/ # Count parent command -│ ├── count.go # Parent command definition -│ ├── tested-examples/ # Tested examples counting subcommand -│ │ ├── tested_examples.go # Command logic -│ │ ├── tested_examples_test.go # Tests -│ │ ├── counter.go # Counting logic -│ │ ├── output.go # Output formatting -│ │ └── types.go # Type definitions -│ └── pages/ # Pages counting subcommand -│ ├── pages.go # Command logic -│ ├── pages_test.go # Tests -│ ├── counter.go # Counting logic +│ ├── count/ # Count parent command +│ │ ├── count.go # Parent command definition +│ │ ├── tested-examples/ # Tested examples counting subcommand +│ │ │ ├── tested_examples.go # Command logic +│ │ │ ├── tested_examples_test.go # Tests +│ │ │ ├── counter.go # Counting logic +│ │ │ ├── output.go # Output formatting +│ │ │ └── types.go # Type definitions +│ │ └── pages/ # Pages counting subcommand +│ │ ├── pages.go # Command logic +│ │ ├── pages_test.go # Tests +│ │ ├── counter.go # Counting logic +│ │ ├── output.go # Output formatting +│ │ └── types.go # Type definitions +│ └── report/ # Report parent command +│ ├── report.go # Parent command definition +│ └── testable-code/ # Testable code analysis subcommand +│ ├── testable_code.go # Command logic +│ ├── testable_code_test.go # Tests +│ ├── code_collector.go # Code example collection logic +│ ├── csv_parser.go # CSV parsing │ ├── output.go # Output formatting │ └── types.go # Type definitions ├── internal/ # Internal packages │ ├── config/ # Configuration management │ │ ├── config.go # Config loading and path resolution -│ │ └── config_test.go # Config tests +│ │ ├── config_test.go # Config tests +│ │ └── url_mapping.go # URL-to-source-file mapping via Snooty Data API +│ ├── language/ # Programming language utilities +│ │ ├── language.go # Language normalization, extensions, products +│ │ └── language_test.go # Language tests │ ├── projectinfo/ # Project structure and info utilities │ │ ├── pathresolver.go # Core path resolution │ │ ├── pathresolver_test.go # Tests +│ │ ├── products.go # Content directory to product mapping │ │ ├── source_finder.go # Source directory detection │ │ ├── version_resolver.go # Version path resolution │ │ └── types.go # Type definitions -│ └── rst/ # RST parsing utilities -│ ├── parser.go # Generic parsing with includes -│ ├── include_resolver.go # Include directive resolution -│ ├── directive_parser.go # Directive parsing -│ ├── directive_regex.go # Directive regex patterns -│ ├── parse_procedures.go # Procedure parsing (core logic) -│ ├── parse_procedures_test.go # Procedure parsing tests -│ ├── get_procedure_variations.go # Variation extraction logic -│ ├── get_procedure_variations_test.go # Variation tests -│ ├── procedure_types.go # Procedure type definitions -│ ├── rstspec.go # Rstspec.toml fetching and parsing -│ ├── rstspec_test.go # Rstspec tests -│ └── file_utils.go # File utilities +│ ├── rst/ # RST parsing utilities +│ │ ├── parser.go # Generic parsing with includes +│ │ ├── include_resolver.go # Include directive resolution +│ │ ├── directive_parser.go # Directive parsing with language resolution +│ │ ├── directive_parser_test.go # Directive parser tests +│ │ ├── directive_regex.go # Directive regex patterns +│ │ ├── parse_procedures.go # Procedure parsing (core logic) +│ │ ├── parse_procedures_test.go # Procedure parsing tests +│ │ ├── get_procedure_variations.go # Variation extraction logic +│ │ ├── get_procedure_variations_test.go # Variation tests +│ │ ├── procedure_types.go # Procedure type definitions +│ │ ├── rstspec.go # Rstspec.toml fetching and parsing +│ │ ├── rstspec_test.go # Rstspec tests +│ │ ├── yaml_steps_parser.go # Parse YAML steps files for code examples +│ │ ├── yaml_steps_parser_test.go # YAML steps parser tests +│ │ └── file_utils.go # File utilities +│ └── snooty/ # Snooty.toml parsing utilities +│ ├── snooty.go # Parse snooty.toml, find project config +│ └── snooty_test.go # Snooty tests └── testdata/ # Test fixtures ├── input-files/ # Test RST files │ └── source/ # Source directory (required) @@ -1627,6 +1792,8 @@ audit-cli/ │ └── *.txt # Direct comparison tests ├── count-test-monorepo/ # Count command test data │ └── content/code-examples/tested/ # Tested examples structure + ├── testable-code-test/ # Testable code report test data + │ └── content/test-project/ # Test project with code examples └── search-test-files/ # Search command test data ``` diff --git a/commands/analyze/composables/analyzer.go b/commands/analyze/composables/analyzer.go index 3ce55a7..0f05e70 100644 --- a/commands/analyze/composables/analyzer.go +++ b/commands/analyze/composables/analyzer.go @@ -3,6 +3,8 @@ package composables import ( "sort" + + "github.com/grove-platform/audit-cli/internal/snooty" ) // AnalyzeComposables analyzes composables and groups them by similarity. @@ -80,7 +82,7 @@ func areComposablesIdentical(locs []ComposableLocation) bool { } // composablesEqual checks if two composables are identical. -func composablesEqual(a, b Composable) bool { +func composablesEqual(a, b snooty.Composable) bool { // Compare basic fields if a.ID != b.ID || a.Title != b.Title || a.Default != b.Default { return false @@ -105,7 +107,7 @@ func composablesEqual(a, b Composable) bool { } // optionsToSortedStrings converts options to sorted strings for comparison. -func optionsToSortedStrings(options []ComposableOption) []string { +func optionsToSortedStrings(options []snooty.ComposableOption) []string { var strs []string for _, opt := range options { strs = append(strs, opt.ID+":"+opt.Title) @@ -195,7 +197,7 @@ func findSimilarComposables(locations []ComposableLocation, groupsByID map[strin // calculateOptionSimilarity calculates the Jaccard similarity between two composables' option sets. // Returns a value between 0 and 1, where 1 means identical option sets. -func calculateOptionSimilarity(a, b Composable) float64 { +func calculateOptionSimilarity(a, b snooty.Composable) float64 { // Get option IDs for both composables aOptions := make(map[string]bool) for _, opt := range a.Options { diff --git a/commands/analyze/composables/composables_test.go b/commands/analyze/composables/composables_test.go index 6722f8d..55780a6 100644 --- a/commands/analyze/composables/composables_test.go +++ b/commands/analyze/composables/composables_test.go @@ -4,6 +4,8 @@ package composables import ( "path/filepath" "testing" + + "github.com/grove-platform/audit-cli/internal/snooty" ) // TestFindSnootyTOMLFiles tests finding snooty.toml files in the test monorepo. @@ -100,7 +102,7 @@ func TestParseSnootyTOML(t *testing.T) { } // Check interface composable - var interfaceComp *Composable + var interfaceComp *snooty.Composable for i := range composables { if composables[i].ID == "interface" { interfaceComp = &composables[i] @@ -212,20 +214,20 @@ func TestSimilarComposables(t *testing.T) { // TestCalculateOptionSimilarity tests the Jaccard similarity calculation. func TestCalculateOptionSimilarity(t *testing.T) { // Test identical option sets - comp1 := Composable{ + comp1 := snooty.Composable{ ID: "test1", Title: "Test 1", - Options: []ComposableOption{ + Options: []snooty.ComposableOption{ {ID: "a", Title: "A"}, {ID: "b", Title: "B"}, {ID: "c", Title: "C"}, }, } - comp2 := Composable{ + comp2 := snooty.Composable{ ID: "test2", Title: "Test 2", - Options: []ComposableOption{ + Options: []snooty.ComposableOption{ {ID: "a", Title: "A"}, {ID: "b", Title: "B"}, {ID: "c", Title: "C"}, @@ -238,10 +240,10 @@ func TestCalculateOptionSimilarity(t *testing.T) { } // Test partial overlap - comp3 := Composable{ + comp3 := snooty.Composable{ ID: "test3", Title: "Test 3", - Options: []ComposableOption{ + Options: []snooty.ComposableOption{ {ID: "a", Title: "A"}, {ID: "b", Title: "B"}, }, @@ -257,10 +259,10 @@ func TestCalculateOptionSimilarity(t *testing.T) { } // Test no overlap - comp4 := Composable{ + comp4 := snooty.Composable{ ID: "test4", Title: "Test 4", - Options: []ComposableOption{ + Options: []snooty.ComposableOption{ {ID: "x", Title: "X"}, {ID: "y", Title: "Y"}, }, @@ -274,31 +276,31 @@ func TestCalculateOptionSimilarity(t *testing.T) { // TestComposablesEqual tests the composable equality function. func TestComposablesEqual(t *testing.T) { - comp1 := Composable{ + comp1 := snooty.Composable{ ID: "test", Title: "Test", Default: "option1", - Options: []ComposableOption{ + Options: []snooty.ComposableOption{ {ID: "option1", Title: "Option 1"}, {ID: "option2", Title: "Option 2"}, }, } - comp2 := Composable{ + comp2 := snooty.Composable{ ID: "test", Title: "Test", Default: "option1", - Options: []ComposableOption{ + Options: []snooty.ComposableOption{ {ID: "option1", Title: "Option 1"}, {ID: "option2", Title: "Option 2"}, }, } - comp3 := Composable{ + comp3 := snooty.Composable{ ID: "test", Title: "Test", Default: "option1", - Options: []ComposableOption{ + Options: []snooty.ComposableOption{ {ID: "option1", Title: "Option 1"}, {ID: "option3", Title: "Option 3"}, }, @@ -338,7 +340,7 @@ func TestExtractProjectAndVersion(t *testing.T) { } for _, tt := range tests { - project, version := extractProjectAndVersion(tt.path) + project, version := snooty.ExtractProjectAndVersion(tt.path) if project != tt.expectedProject { t.Errorf("For path %s, expected project '%s', got '%s'", tt.path, tt.expectedProject, project) } @@ -350,7 +352,7 @@ func TestExtractProjectAndVersion(t *testing.T) { // TestFormatOptionsAsBullets tests the bullet formatting function. func TestFormatOptionsAsBullets(t *testing.T) { - options := []ComposableOption{ + options := []snooty.ComposableOption{ {ID: "option1", Title: "Option 1"}, {ID: "option2", Title: "Option 2"}, {ID: "option3", Title: "Option 3"}, @@ -372,7 +374,7 @@ func TestFormatOptionsAsBullets(t *testing.T) { // TestFormatOptions tests the comma-separated formatting function. func TestFormatOptions(t *testing.T) { - options := []ComposableOption{ + options := []snooty.ComposableOption{ {ID: "option1", Title: "Option 1"}, {ID: "option2", Title: "Option 2"}, {ID: "option3", Title: "Option 3"}, diff --git a/commands/analyze/composables/output.go b/commands/analyze/composables/output.go index c5ece63..5ff57fb 100644 --- a/commands/analyze/composables/output.go +++ b/commands/analyze/composables/output.go @@ -5,6 +5,8 @@ import ( "fmt" "sort" "strings" + + "github.com/grove-platform/audit-cli/internal/snooty" ) // PrintResults prints the analysis results in a formatted table. @@ -235,7 +237,7 @@ func printAllComposablesTable(locations []ComposableLocation, verbose bool) { } // formatOptions formats options as a comma-separated list of IDs. -func formatOptions(options []ComposableOption) string { +func formatOptions(options []snooty.ComposableOption) string { var ids []string for _, opt := range options { ids = append(ids, opt.ID) @@ -244,7 +246,7 @@ func formatOptions(options []ComposableOption) string { } // formatOptionsAsBullets formats options as bullet points for table display. -func formatOptionsAsBullets(options []ComposableOption) []string { +func formatOptionsAsBullets(options []snooty.ComposableOption) []string { var lines []string for _, opt := range options { lines = append(lines, fmt.Sprintf("• %s: %s", opt.ID, opt.Title)) @@ -253,7 +255,7 @@ func formatOptionsAsBullets(options []ComposableOption) []string { } // printOptionsVerbose prints options in verbose format with wrapping. -func printOptionsVerbose(options []ComposableOption, indent string) { +func printOptionsVerbose(options []snooty.ComposableOption, indent string) { const maxWidth = 100 // Maximum width for wrapped text for _, opt := range options { @@ -309,13 +311,13 @@ func truncate(s string, maxLen int) string { } // findCommonOptions finds options that appear in all composables in the group. -func findCommonOptions(locations []ComposableLocation) []ComposableOption { +func findCommonOptions(locations []ComposableLocation) []snooty.ComposableOption { if len(locations) == 0 { return nil } // Start with options from the first composable - commonMap := make(map[string]ComposableOption) + commonMap := make(map[string]snooty.ComposableOption) for _, opt := range locations[0].Composable.Options { commonMap[opt.ID] = opt } @@ -336,7 +338,7 @@ func findCommonOptions(locations []ComposableLocation) []ComposableOption { } // Convert map to sorted slice - var common []ComposableOption + var common []snooty.ComposableOption for _, opt := range commonMap { common = append(common, opt) } diff --git a/commands/analyze/composables/parser.go b/commands/analyze/composables/parser.go index 7cce212..ec15e02 100644 --- a/commands/analyze/composables/parser.go +++ b/commands/analyze/composables/parser.go @@ -5,10 +5,8 @@ import ( "fmt" "os" "path/filepath" - "strings" - "github.com/BurntSushi/toml" - "github.com/grove-platform/audit-cli/internal/projectinfo" + "github.com/grove-platform/audit-cli/internal/snooty" ) // ParseSnootyTOML parses a snooty.toml file and extracts composables. @@ -17,13 +15,12 @@ import ( // - filePath: Path to the snooty.toml file // // Returns: -// - []Composable: Slice of composables found in the file +// - []snooty.Composable: Slice of composables found in the file // - error: Any error encountered during parsing -func ParseSnootyTOML(filePath string) ([]Composable, error) { - var config SnootyConfig - _, err := toml.DecodeFile(filePath, &config) +func ParseSnootyTOML(filePath string) ([]snooty.Composable, error) { + config, err := snooty.ParseFile(filePath) if err != nil { - return nil, fmt.Errorf("failed to parse TOML file: %w", err) + return nil, err } return config.Composables, nil @@ -76,7 +73,7 @@ func FindSnootyTOMLFiles(monorepoPath string, forProject string, currentOnly boo return err } - projectName, versionName := extractProjectAndVersion(relPath) + projectName, versionName := snooty.ExtractProjectAndVersion(relPath) if projectName == "" { return nil } @@ -88,7 +85,7 @@ func FindSnootyTOMLFiles(monorepoPath string, forProject string, currentOnly boo // Filter by current version if specified if currentOnly && versionName != "" { - if !projectinfo.IsCurrentVersion(versionName) { + if !snooty.IsCurrentVersion(versionName) { return nil } } @@ -136,32 +133,3 @@ func findContentDirectory(dirPath string) (string, error) { return "", fmt.Errorf("content directory not found in: %s", dirPath) } - -// extractProjectAndVersion extracts project and version from a relative path. -// Returns (project, version) where version is empty for non-versioned projects. -// -// Examples: -// - "manual/v8.0/snooty.toml" -> ("manual", "v8.0") -// - "atlas/snooty.toml" -> ("atlas", "") -func extractProjectAndVersion(relPath string) (string, string) { - parts := strings.Split(relPath, string(filepath.Separator)) - if len(parts) < 2 { - return "", "" - } - - projectName := parts[0] - - // Check if this is a versioned project - // Pattern: project/version/snooty.toml (3 parts) - // Pattern: project/snooty.toml (2 parts) - if len(parts) == 3 && parts[2] == "snooty.toml" { - // Versioned project: project/version/snooty.toml - return projectName, parts[1] - } else if len(parts) == 2 && parts[1] == "snooty.toml" { - // Non-versioned project: project/snooty.toml - return projectName, "" - } - - return "", "" -} - diff --git a/commands/analyze/composables/rstspec_adapter.go b/commands/analyze/composables/rstspec_adapter.go index 8a2c1db..c0f1321 100644 --- a/commands/analyze/composables/rstspec_adapter.go +++ b/commands/analyze/composables/rstspec_adapter.go @@ -5,6 +5,7 @@ import ( "fmt" "github.com/grove-platform/audit-cli/internal/rst" + "github.com/grove-platform/audit-cli/internal/snooty" ) // FetchRstspecComposables fetches and parses composables from the canonical rstspec.toml file. @@ -26,17 +27,17 @@ func FetchRstspecComposables() ([]ComposableLocation, error) { // Convert rstspec composables to ComposableLocation objects locations := make([]ComposableLocation, 0, len(config.Composables)) for _, rstspecComp := range config.Composables { - // Convert RstspecComposable to Composable - composable := Composable{ + // Convert RstspecComposable to snooty.Composable + composable := snooty.Composable{ ID: rstspecComp.ID, Title: rstspecComp.Title, Default: rstspecComp.Default, - Options: make([]ComposableOption, 0, len(rstspecComp.Options)), + Options: make([]snooty.ComposableOption, 0, len(rstspecComp.Options)), } // Convert options for _, rstspecOpt := range rstspecComp.Options { - composable.Options = append(composable.Options, ComposableOption{ + composable.Options = append(composable.Options, snooty.ComposableOption{ ID: rstspecOpt.ID, Title: rstspecOpt.Title, }) diff --git a/commands/analyze/composables/types.go b/commands/analyze/composables/types.go index 3de2a86..c771656 100644 --- a/commands/analyze/composables/types.go +++ b/commands/analyze/composables/types.go @@ -1,31 +1,15 @@ // Package composables provides functionality for analyzing composables in snooty.toml files. package composables -// Composable represents a composable definition from a snooty.toml file. -type Composable struct { - ID string `toml:"id"` - Title string `toml:"title"` - Default string `toml:"default"` - Dependencies []map[string]string `toml:"dependencies"` - Options []ComposableOption `toml:"options"` -} - -// ComposableOption represents an option within a composable. -type ComposableOption struct { - ID string `toml:"id"` - Title string `toml:"title"` -} - -// SnootyConfig represents the structure of a snooty.toml file. -type SnootyConfig struct { - Composables []Composable `toml:"composables"` -} +import ( + "github.com/grove-platform/audit-cli/internal/snooty" +) // ComposableLocation tracks where a composable was found. type ComposableLocation struct { Project string Version string // Empty for non-versioned projects - Composable Composable + Composable snooty.Composable FilePath string Source string // "snooty.toml" or "rstspec.toml" } diff --git a/commands/analyze/usage/analyzer.go b/commands/analyze/usage/analyzer.go index 8ecd259..9ba4bb8 100644 --- a/commands/analyze/usage/analyzer.go +++ b/commands/analyze/usage/analyzer.go @@ -188,10 +188,10 @@ func AnalyzeUsageRecursive(targetFile string, includeToctree bool, verbose bool, var allUsages []FileUsage for txtFile := range txtFilesSet { // Create a simple FileUsage entry for each .txt file - // We use "include" as a generic directive type since we're showing the final pages + // We use rst.Include as a generic directive type since we're showing the final pages allUsages = append(allUsages, FileUsage{ FilePath: txtFile, - DirectiveType: "include", + DirectiveType: rst.Include, UsagePath: txtFile, LineNumber: 0, }) @@ -352,7 +352,7 @@ func findUsagesInFile(filePath, targetFile, sourceDir string, includeToctree boo if referencesTarget(refPath, targetFile, sourceDir, filePath) { usages = append(usages, FileUsage{ FilePath: filePath, - DirectiveType: "include", + DirectiveType: rst.Include, UsagePath: refPath, LineNumber: lineNum, }) @@ -366,7 +366,7 @@ func findUsagesInFile(filePath, targetFile, sourceDir string, includeToctree boo if referencesTarget(refPath, targetFile, sourceDir, filePath) { usages = append(usages, FileUsage{ FilePath: filePath, - DirectiveType: "literalinclude", + DirectiveType: rst.LiteralInclude, UsagePath: refPath, LineNumber: lineNum, }) @@ -382,7 +382,7 @@ func findUsagesInFile(filePath, targetFile, sourceDir string, includeToctree boo if referencesTarget(refPath, targetFile, sourceDir, filePath) { usages = append(usages, FileUsage{ FilePath: filePath, - DirectiveType: "io-code-block", + DirectiveType: rst.IoCodeBlock, UsagePath: refPath, LineNumber: ioCodeBlockStartLine, }) @@ -396,7 +396,7 @@ func findUsagesInFile(filePath, targetFile, sourceDir string, includeToctree boo if referencesTarget(refPath, targetFile, sourceDir, filePath) { usages = append(usages, FileUsage{ FilePath: filePath, - DirectiveType: "io-code-block", + DirectiveType: rst.IoCodeBlock, UsagePath: refPath, LineNumber: ioCodeBlockStartLine, }) @@ -418,7 +418,7 @@ func findUsagesInFile(filePath, targetFile, sourceDir string, includeToctree boo if referencesToctreeTarget(docName, targetFile, sourceDir, filePath) { usages = append(usages, FileUsage{ FilePath: filePath, - DirectiveType: "toctree", + DirectiveType: rst.Toctree, UsagePath: docName, LineNumber: toctreeStartLine, }) @@ -649,7 +649,7 @@ func referencesToctreeTarget(docName, targetFile, sourceDir, currentFile string) // // Returns: // - *UsageAnalysis: A new analysis with filtered results -func FilterByDirectiveType(analysis *UsageAnalysis, directiveType string) *UsageAnalysis { +func FilterByDirectiveType(analysis *UsageAnalysis, directiveType rst.DirectiveType) *UsageAnalysis { filtered := &UsageAnalysis{ TargetFile: analysis.TargetFile, SourceDir: analysis.SourceDir, @@ -700,7 +700,7 @@ func GroupUsagesByFile(usages []FileUsage) []GroupedFileUsage { // Group by file path and directive type type groupKey struct { filePath string - directiveType string + directiveType rst.DirectiveType } groups := make(map[groupKey][]FileUsage) diff --git a/commands/analyze/usage/output.go b/commands/analyze/usage/output.go index 15cc367..051c9be 100644 --- a/commands/analyze/usage/output.go +++ b/commands/analyze/usage/output.go @@ -7,6 +7,8 @@ import ( "path/filepath" "sort" "strings" + + "github.com/grove-platform/audit-cli/internal/rst" ) // OutputFormat represents the output format for the analysis results. @@ -87,7 +89,7 @@ func printText(analysis *UsageAnalysis, verbose bool, recursive bool) { byDirectiveType := groupByDirectiveType(analysis.UsingFiles) // Print breakdown by directive type with file and reference counts - directiveTypes := []string{"include", "literalinclude", "io-code-block", "toctree"} + directiveTypes := ValidDirectiveTypes for _, directiveType := range directiveTypes { if refs, ok := byDirectiveType[directiveType]; ok { uniqueFiles := countUniqueFiles(refs) @@ -167,8 +169,8 @@ func printJSON(analysis *UsageAnalysis) error { } // groupByDirectiveType groups usages by their directive type. -func groupByDirectiveType(usages []FileUsage) map[string][]FileUsage { - groups := make(map[string][]FileUsage) +func groupByDirectiveType(usages []FileUsage) map[rst.DirectiveType][]FileUsage { + groups := make(map[rst.DirectiveType][]FileUsage) for _, usage := range usages { groups[usage.DirectiveType] = append(groups[usage.DirectiveType], usage) @@ -197,18 +199,19 @@ func FormatReferencePath(path, sourceDir string) string { } // GetDirectiveTypeLabel returns a human-readable label for a directive type. -func GetDirectiveTypeLabel(directiveType string) string { - labels := map[string]string{ - "include": "Include", - "literalinclude": "Literal Include", - "io-code-block": "I/O Code Block", +func GetDirectiveTypeLabel(directiveType rst.DirectiveType) string { + labels := map[rst.DirectiveType]string{ + rst.Include: "Include", + rst.LiteralInclude: "Literal Include", + rst.IoCodeBlock: "I/O Code Block", + rst.Toctree: "Toctree", } if label, ok := labels[directiveType]; ok { return label } - return strings.Title(directiveType) + return strings.Title(string(directiveType)) } // PrintPathsOnly prints only the file paths, one per line. @@ -268,7 +271,7 @@ func PrintSummary(analysis *UsageAnalysis) error { // Print breakdown by type fmt.Println("\nBy Type:") - directiveTypes := []string{"include", "literalinclude", "io-code-block", "toctree"} + directiveTypes := ValidDirectiveTypes for _, directiveType := range directiveTypes { if usages, ok := byDirectiveType[directiveType]; ok { uniqueFiles := countUniqueFiles(usages) diff --git a/commands/analyze/usage/types.go b/commands/analyze/usage/types.go index 7e36694..e44313d 100644 --- a/commands/analyze/usage/types.go +++ b/commands/analyze/usage/types.go @@ -1,5 +1,16 @@ package usage +import "github.com/grove-platform/audit-cli/internal/rst" + +// ValidDirectiveTypes is the list of directive types supported by usage analysis. +// Uses the canonical constants from internal/rst. +var ValidDirectiveTypes = []rst.DirectiveType{ + rst.Include, + rst.LiteralInclude, + rst.IoCodeBlock, + rst.Toctree, +} + // UsageAnalysis contains the results of analyzing which files use a target file. // // This structure holds both a flat list of files that use the target and a hierarchical @@ -33,8 +44,8 @@ type FileUsage struct { FilePath string `json:"file_path"` // DirectiveType is the type of directive used to reference the file - // Possible values: "include", "literalinclude", "io-code-block", "toctree" - DirectiveType string `json:"directive_type"` + // Uses rst.DirectiveType constants: Include, LiteralInclude, IoCodeBlock, Toctree + DirectiveType rst.DirectiveType `json:"directive_type"` // UsagePath is the path used in the directive (as written in the file) UsagePath string `json:"usage_path"` @@ -52,7 +63,8 @@ type UsageNode struct { FilePath string // DirectiveType is the type of directive used to reference the file - DirectiveType string + // Uses rst.DirectiveType constants: Include, LiteralInclude, IoCodeBlock, Toctree + DirectiveType rst.DirectiveType // UsagePath is the path used in the directive UsagePath string @@ -71,7 +83,8 @@ type GroupedFileUsage struct { FilePath string // DirectiveType is the type of directive used - DirectiveType string + // Uses rst.DirectiveType constants: Include, LiteralInclude, IoCodeBlock, Toctree + DirectiveType rst.DirectiveType // Usages is the list of all usages from this file Usages []FileUsage diff --git a/commands/analyze/usage/usage.go b/commands/analyze/usage/usage.go index fcb67ed..e2317a1 100644 --- a/commands/analyze/usage/usage.go +++ b/commands/analyze/usage/usage.go @@ -17,6 +17,7 @@ import ( "fmt" "github.com/grove-platform/audit-cli/internal/config" + "github.com/grove-platform/audit-cli/internal/rst" "github.com/spf13/cobra" ) @@ -168,14 +169,13 @@ Examples: func runUsage(targetFile, format string, verbose, countOnly, pathsOnly, summaryOnly bool, directiveType string, includeToctree bool, excludePattern string, recursive bool) error { // Validate directive type if specified if directiveType != "" { - validTypes := map[string]bool{ - "include": true, - "literalinclude": true, - "io-code-block": true, - "toctree": true, + validTypes := make(map[rst.DirectiveType]bool) + for _, dt := range ValidDirectiveTypes { + validTypes[dt] = true } - if !validTypes[directiveType] { - return fmt.Errorf("invalid directive type: %s (must be 'include', 'literalinclude', 'io-code-block', or 'toctree')", directiveType) + if !validTypes[rst.DirectiveType(directiveType)] { + return fmt.Errorf("invalid directive type: %s (must be '%s', '%s', '%s', or '%s')", + directiveType, rst.Include, rst.LiteralInclude, rst.IoCodeBlock, rst.Toctree) } } @@ -221,7 +221,7 @@ func runUsage(targetFile, format string, verbose, countOnly, pathsOnly, summaryO // Filter by directive type if specified if directiveType != "" { - analysis = FilterByDirectiveType(analysis, directiveType) + analysis = FilterByDirectiveType(analysis, rst.DirectiveType(directiveType)) } // Handle count-only output diff --git a/commands/analyze/usage/usage_test.go b/commands/analyze/usage/usage_test.go index 9a61eb4..713330e 100644 --- a/commands/analyze/usage/usage_test.go +++ b/commands/analyze/usage/usage_test.go @@ -3,6 +3,8 @@ package usage import ( "path/filepath" "testing" + + "github.com/grove-platform/audit-cli/internal/rst" ) // TestAnalyzeUsage tests the AnalyzeUsage function with various scenarios. @@ -14,30 +16,30 @@ func TestAnalyzeUsage(t *testing.T) { name string targetFile string expectedUsages int - expectedDirectiveType string + expectedDirectiveType rst.DirectiveType }{ { - name: "Include file with multiple usages", - targetFile: "includes/intro.rst", - expectedUsages: 5, // 4 RST files + 1 YAML file (no toctree by default) - expectedDirectiveType: "include", + name: "Include file with multiple usages", + targetFile: "includes/intro.rst", + expectedUsages: 5, // 4 RST files + 1 YAML file (no toctree by default) + expectedDirectiveType: rst.Include, }, { - name: "Code example with literalinclude", - targetFile: "code-examples/example.py", - expectedUsages: 2, // 1 RST file + 1 YAML file - expectedDirectiveType: "literalinclude", + name: "Code example with literalinclude", + targetFile: "code-examples/example.py", + expectedUsages: 2, // 1 RST file + 1 YAML file + expectedDirectiveType: rst.LiteralInclude, }, { - name: "Code example with multiple directive types", - targetFile: "code-examples/example.js", - expectedUsages: 2, // literalinclude + io-code-block + name: "Code example with multiple directive types", + targetFile: "code-examples/example.js", + expectedUsages: 2, // literalinclude + io-code-block expectedDirectiveType: "", // mixed types }, { - name: "File with no usages", - targetFile: "code-block-test.rst", - expectedUsages: 0, + name: "File with no usages", + targetFile: "code-block-test.rst", + expectedUsages: 0, expectedDirectiveType: "", }, } @@ -104,7 +106,7 @@ func TestFindUsagesInFile(t *testing.T) { searchFile string targetFile string expectedUsages int - expectedDirective string + expectedDirective rst.DirectiveType includeToctree bool }{ { @@ -112,7 +114,7 @@ func TestFindUsagesInFile(t *testing.T) { searchFile: "include-test.rst", targetFile: "includes/intro.rst", expectedUsages: 1, - expectedDirective: "include", + expectedDirective: rst.Include, includeToctree: false, }, { @@ -120,7 +122,7 @@ func TestFindUsagesInFile(t *testing.T) { searchFile: "literalinclude-test.rst", targetFile: "code-examples/example.py", expectedUsages: 1, - expectedDirective: "literalinclude", + expectedDirective: rst.LiteralInclude, includeToctree: false, }, { @@ -128,7 +130,7 @@ func TestFindUsagesInFile(t *testing.T) { searchFile: "io-code-block-test.rst", targetFile: "code-examples/example.js", expectedUsages: 1, - expectedDirective: "io-code-block", + expectedDirective: rst.IoCodeBlock, includeToctree: false, }, { @@ -136,7 +138,7 @@ func TestFindUsagesInFile(t *testing.T) { searchFile: "duplicate-include-test.rst", targetFile: "includes/intro.rst", expectedUsages: 2, // Same file included twice - expectedDirective: "include", + expectedDirective: rst.Include, includeToctree: false, }, { @@ -144,7 +146,7 @@ func TestFindUsagesInFile(t *testing.T) { searchFile: "index.rst", targetFile: "include-test.rst", expectedUsages: 1, - expectedDirective: "toctree", + expectedDirective: rst.Toctree, includeToctree: true, // Must enable toctree flag }, { @@ -399,11 +401,11 @@ func TestGetExtractRefs(t *testing.T) { // TestGroupByDirectiveType tests the groupByDirectiveType function. func TestGroupByDirectiveType(t *testing.T) { usages := []FileUsage{ - {DirectiveType: "include", FilePath: "file1.rst"}, - {DirectiveType: "include", FilePath: "file2.rst"}, - {DirectiveType: "literalinclude", FilePath: "file3.rst"}, - {DirectiveType: "io-code-block", FilePath: "file4.rst"}, - {DirectiveType: "include", FilePath: "file5.rst"}, + {DirectiveType: rst.Include, FilePath: "file1.rst"}, + {DirectiveType: rst.Include, FilePath: "file2.rst"}, + {DirectiveType: rst.LiteralInclude, FilePath: "file3.rst"}, + {DirectiveType: rst.IoCodeBlock, FilePath: "file4.rst"}, + {DirectiveType: rst.Include, FilePath: "file5.rst"}, } groups := groupByDirectiveType(usages) @@ -414,18 +416,18 @@ func TestGroupByDirectiveType(t *testing.T) { } // Check include group - if len(groups["include"]) != 3 { - t.Errorf("expected 3 include usages, got %d", len(groups["include"])) + if len(groups[rst.Include]) != 3 { + t.Errorf("expected 3 include usages, got %d", len(groups[rst.Include])) } // Check literalinclude group - if len(groups["literalinclude"]) != 1 { - t.Errorf("expected 1 literalinclude usage, got %d", len(groups["literalinclude"])) + if len(groups[rst.LiteralInclude]) != 1 { + t.Errorf("expected 1 literalinclude usage, got %d", len(groups[rst.LiteralInclude])) } // Check io-code-block group - if len(groups["io-code-block"]) != 1 { - t.Errorf("expected 1 io-code-block usage, got %d", len(groups["io-code-block"])) + if len(groups[rst.IoCodeBlock]) != 1 { + t.Errorf("expected 1 io-code-block usage, got %d", len(groups[rst.IoCodeBlock])) } } diff --git a/commands/extract/code-examples/code_examples.go b/commands/extract/code-examples/code_examples.go index 932d1f3..7f56f1b 100644 --- a/commands/extract/code-examples/code_examples.go +++ b/commands/extract/code-examples/code_examples.go @@ -4,6 +4,7 @@ // reStructuredText files and extracts code examples from various directives: // - literalinclude: External file references with optional partial extraction // - code-block: Inline code blocks with automatic dedenting +// - code: Shorter alias for code-block (standard reStructuredText) // - io-code-block: Input/output examples with nested directives // // The extracted code examples are written to individual files with standardized naming: @@ -44,7 +45,7 @@ func NewCodeExamplesCommand() *cobra.Command { cmd := &cobra.Command{ Use: "code-examples [filepath]", Short: "Extract code examples from reStructuredText files", - Long: `Extract code examples from reStructuredText directives (code-block, literalinclude, io-code-block) + Long: `Extract code examples from reStructuredText directives (code-block, code, literalinclude, io-code-block) and output them as individual files. File Path Resolution: diff --git a/commands/extract/code-examples/code_examples_test.go b/commands/extract/code-examples/code_examples_test.go index 5688ca4..c0545dc 100644 --- a/commands/extract/code-examples/code_examples_test.go +++ b/commands/extract/code-examples/code_examples_test.go @@ -4,6 +4,8 @@ import ( "os" "path/filepath" "testing" + + "github.com/grove-platform/audit-cli/internal/rst" ) // TestLiteralIncludeDirective tests the parsing and extraction of literalinclude directives @@ -90,7 +92,7 @@ func TestLiteralIncludeDirective(t *testing.T) { } // Verify directive counts - if count := report.DirectiveCounts[LiteralInclude]; count != 7 { + if count := report.DirectiveCounts[rst.LiteralInclude]; count != 7 { t.Errorf("Expected 7 literalinclude directives, got %d", count) } } @@ -240,8 +242,8 @@ func TestNestedCodeBlockDirective(t *testing.T) { } // Verify all are code-block directives - if report.DirectiveCounts[CodeBlock] != 11 { - t.Errorf("Expected 11 code-block directives, got %d", report.DirectiveCounts[CodeBlock]) + if report.DirectiveCounts[rst.CodeBlock] != 11 { + t.Errorf("Expected 11 code-block directives, got %d", report.DirectiveCounts[rst.CodeBlock]) } // Expected files and their languages @@ -323,8 +325,8 @@ func TestIoCodeBlockDirective(t *testing.T) { } // Verify all are io-code-block directives - if report.DirectiveCounts[IoCodeBlock] != 11 { - t.Errorf("Expected 11 io-code-block examples, got %d", report.DirectiveCounts[IoCodeBlock]) + if report.DirectiveCounts[rst.IoCodeBlock] != 11 { + t.Errorf("Expected 11 io-code-block examples, got %d", report.DirectiveCounts[rst.IoCodeBlock]) } // Expected files @@ -336,12 +338,12 @@ func TestIoCodeBlockDirective(t *testing.T) { // Test 3: Python inline "io-code-block-test.io-code-block.3.input.py", "io-code-block-test.io-code-block.3.output.py", - // Test 4: Shell command + // Test 4: Shell command with JSON output "io-code-block-test.io-code-block.4.input.sh", - "io-code-block-test.io-code-block.4.output.txt", - // Test 5: TypeScript + "io-code-block-test.io-code-block.4.output.json", + // Test 5: TypeScript with JSON output "io-code-block-test.io-code-block.5.input.ts", - "io-code-block-test.io-code-block.5.output.txt", + "io-code-block-test.io-code-block.5.output.json", // Test 6: Nested in procedure "io-code-block-test.io-code-block.6.input.js", "io-code-block-test.io-code-block.6.output.js", @@ -460,13 +462,13 @@ func TestRecursiveDirectoryScanning(t *testing.T) { } // Verify we have examples from different directive types - if report.DirectiveCounts[CodeBlock] == 0 { + if report.DirectiveCounts[rst.CodeBlock] == 0 { t.Error("Expected code-block directives to be found") } - if report.DirectiveCounts[LiteralInclude] == 0 { + if report.DirectiveCounts[rst.LiteralInclude] == 0 { t.Error("Expected literalinclude directives to be found") } - if report.DirectiveCounts[IoCodeBlock] == 0 { + if report.DirectiveCounts[rst.IoCodeBlock] == 0 { t.Error("Expected io-code-block directives to be found") } } @@ -506,9 +508,9 @@ func TestFollowIncludesWithoutRecursive(t *testing.T) { } // Verify the directive type - if report.DirectiveCounts[LiteralInclude] != 1 { + if report.DirectiveCounts[rst.LiteralInclude] != 1 { t.Errorf("Expected 1 literalinclude directive, got %d", - report.DirectiveCounts[LiteralInclude]) + report.DirectiveCounts[rst.LiteralInclude]) } } @@ -548,13 +550,13 @@ func TestRecursiveWithFollowIncludes(t *testing.T) { } // Verify we have examples from all directive types - if report.DirectiveCounts[CodeBlock] == 0 { + if report.DirectiveCounts[rst.CodeBlock] == 0 { t.Error("Expected code-block directives to be found") } - if report.DirectiveCounts[LiteralInclude] == 0 { + if report.DirectiveCounts[rst.LiteralInclude] == 0 { t.Error("Expected literalinclude directives to be found") } - if report.DirectiveCounts[IoCodeBlock] == 0 { + if report.DirectiveCounts[rst.IoCodeBlock] == 0 { t.Error("Expected io-code-block directives to be found") } } diff --git a/commands/extract/code-examples/language.go b/commands/extract/code-examples/language.go deleted file mode 100644 index 2a5df93..0000000 --- a/commands/extract/code-examples/language.go +++ /dev/null @@ -1,177 +0,0 @@ -package code_examples - -import "strings" - -// Language constants define canonical language names used throughout the tool. -// These are used for normalization and file extension mapping. -const ( - Bash = "bash" - C = "c" - CPP = "cpp" - CSharp = "csharp" - Console = "console" - Go = "go" - Java = "java" - JavaScript = "javascript" - Kotlin = "kotlin" - PHP = "php" - PowerShell = "powershell" - PS5 = "ps5" - Python = "python" - Ruby = "ruby" - Rust = "rust" - Scala = "scala" - Shell = "shell" - Swift = "swift" - Text = "text" - TypeScript = "typescript" - Undefined = "undefined" -) - -// File extension constants define the file extensions for each language. -// Used when generating output filenames for extracted code examples. -const ( - BashExtension = ".sh" - CExtension = ".c" - CPPExtension = ".cpp" - CSharpExtension = ".cs" - ConsoleExtension = ".sh" - GoExtension = ".go" - JavaExtension = ".java" - JavaScriptExtension = ".js" - KotlinExtension = ".kt" - PHPExtension = ".php" - PowerShellExtension = ".ps1" - PS5Extension = ".ps1" - PythonExtension = ".py" - RubyExtension = ".rb" - RustExtension = ".rs" - ScalaExtension = ".scala" - ShellExtension = ".sh" - SwiftExtension = ".swift" - TextExtension = ".txt" - TypeScriptExtension = ".ts" - UndefinedExtension = ".txt" -) - -// GetFileExtensionFromLanguage returns the appropriate file extension for a given language. -// -// This function maps language identifiers to their corresponding file extensions. -// Handles various language name variants (e.g., "ts" -> ".ts", "c++" -> ".cpp", "golang" -> ".go"). -// Returns ".txt" for unknown or undefined languages. -// -// Parameters: -// - language: The language identifier (case-insensitive) -// -// Returns: -// - string: The file extension including the leading dot (e.g., ".js", ".py") -func GetFileExtensionFromLanguage(language string) string { - lang := strings.ToLower(strings.TrimSpace(language)) - - langExtensionMap := map[string]string{ - Bash: BashExtension, - C: CExtension, - CPP: CPPExtension, - CSharp: CSharpExtension, - Console: ConsoleExtension, - Go: GoExtension, - Java: JavaExtension, - JavaScript: JavaScriptExtension, - Kotlin: KotlinExtension, - PHP: PHPExtension, - PowerShell: PowerShellExtension, - PS5: PS5Extension, - Python: PythonExtension, - Ruby: RubyExtension, - Rust: RustExtension, - Scala: ScalaExtension, - Shell: ShellExtension, - Swift: SwiftExtension, - Text: TextExtension, - TypeScript: TypeScriptExtension, - Undefined: UndefinedExtension, - "c++": CPPExtension, - "c#": CSharpExtension, - "cs": CSharpExtension, - "golang": GoExtension, - "js": JavaScriptExtension, - "kt": KotlinExtension, - "py": PythonExtension, - "rb": RubyExtension, - "rs": RustExtension, - "sh": ShellExtension, - "ts": TypeScriptExtension, - "txt": TextExtension, - "ps1": PowerShellExtension, - "": UndefinedExtension, - "none": UndefinedExtension, - } - - if extension, exists := langExtensionMap[lang]; exists { - return extension - } - - return UndefinedExtension -} - -// NormalizeLanguage normalizes a language string to a canonical form. -// -// This function converts various language name variants to their canonical forms: -// - "ts" -> "typescript" -// - "c++" -> "cpp" -// - "golang" -> "go" -// - "js" -> "javascript" -// - etc. -// -// Parameters: -// - language: The language identifier (case-insensitive) -// -// Returns: -// - string: The normalized language name, or the original string if no normalization is defined -func NormalizeLanguage(language string) string { - lang := strings.ToLower(strings.TrimSpace(language)) - - normalizeMap := map[string]string{ - Bash: Bash, - C: C, - CPP: CPP, - CSharp: CSharp, - Console: Console, - Go: Go, - Java: Java, - JavaScript: JavaScript, - Kotlin: Kotlin, - PHP: PHP, - PowerShell: PowerShell, - PS5: PS5, - Python: Python, - Ruby: Ruby, - Rust: Rust, - Scala: Scala, - Shell: Shell, - Swift: Swift, - Text: Text, - TypeScript: TypeScript, - "c++": CPP, - "c#": CSharp, - "cs": CSharp, - "golang": Go, - "js": JavaScript, - "kt": Kotlin, - "py": Python, - "rb": Ruby, - "rs": Rust, - "sh": Shell, - "ts": TypeScript, - "txt": Text, - "ps1": PowerShell, - "": Undefined, - "none": Undefined, - } - - if normalized, exists := normalizeMap[lang]; exists { - return normalized - } - - return lang -} diff --git a/commands/extract/code-examples/parser.go b/commands/extract/code-examples/parser.go index 816df9e..7cfb29e 100644 --- a/commands/extract/code-examples/parser.go +++ b/commands/extract/code-examples/parser.go @@ -9,8 +9,10 @@ import ( // ParseFile parses a file and extracts code examples from reStructuredText directives. // -// This function parses all supported RST directives (literalinclude, code-block, io-code-block) +// This function parses all supported RST directives (literalinclude, code-block, code, io-code-block) // and converts them into CodeExample structs ready for writing to files. +// Note: The "code" directive is a shorter alias for "code-block" in standard reStructuredText +// and is treated identically. // // Parameters: // - filePath: Path to the RST file to parse @@ -71,28 +73,21 @@ func parseLiteralInclude(sourceFile string, directive rst.Directive, index int) return CodeExample{}, err } - // Get the language from the :language: option - language := directive.Options["language"] - if language == "" { - language = Undefined - } - - // Normalize the language - language = NormalizeLanguage(language) - return CodeExample{ SourceFile: sourceFile, - DirectiveName: DirectiveType(directive.Type), - Language: language, + DirectiveName: directive.Type, + Language: directive.ResolveLanguage(), Content: content, Index: index, }, nil } -// parseCodeBlock parses a code-block directive and extracts the inline code content. +// parseCodeBlock parses a code-block or code directive and extracts the inline code content. // // The content is already dedented by the directive parser based on the first line's indentation. // Language can be specified either as an argument (.. code-block:: javascript) or as an option (:language: javascript). +// Note: The "code" directive is a shorter alias for "code-block" in standard reStructuredText +// and is handled identically by this function. func parseCodeBlock(sourceFile string, directive rst.Directive, index int) (CodeExample, error) { // The content is already parsed and dedented by the directive parser content := directive.Content @@ -100,23 +95,10 @@ func parseCodeBlock(sourceFile string, directive rst.Directive, index int) (Code return CodeExample{}, fmt.Errorf("code-block has no content") } - // Get the language from the directive argument (e.g., .. code-block:: javascript) - // or from the :language: option - language := directive.Argument - if language == "" { - language = directive.Options["language"] - } - if language == "" { - language = Undefined - } - - // Normalize the language - language = NormalizeLanguage(language) - return CodeExample{ SourceFile: sourceFile, - DirectiveName: DirectiveType(directive.Type), - Language: language, + DirectiveName: directive.Type, + Language: directive.ResolveLanguage(), Content: content, Index: index, }, nil @@ -189,7 +171,7 @@ func parseIoCodeBlock(sourceFile string, directive rst.Directive, index int) []C // Process input directive if directive.InputDirective != nil { - inputExample, err := parseSubDirective(sourceFile, directive.InputDirective, "input", index) + inputExample, err := parseSubDirective(sourceFile, directive.InputDirective, directive.Options, "input", index) if err != nil { fmt.Fprintf(os.Stderr, "Warning: failed to parse input directive at line %d in %s: %v\n", directive.LineNum, sourceFile, err) @@ -200,7 +182,7 @@ func parseIoCodeBlock(sourceFile string, directive rst.Directive, index int) []C // Process output directive if directive.OutputDirective != nil { - outputExample, err := parseSubDirective(sourceFile, directive.OutputDirective, "output", index) + outputExample, err := parseSubDirective(sourceFile, directive.OutputDirective, directive.Options, "output", index) if err != nil { fmt.Fprintf(os.Stderr, "Warning: failed to parse output directive at line %d in %s: %v\n", directive.LineNum, sourceFile, err) @@ -213,7 +195,7 @@ func parseIoCodeBlock(sourceFile string, directive rst.Directive, index int) []C } // parseSubDirective parses an input or output sub-directive within an io-code-block -func parseSubDirective(sourceFile string, subDir *rst.SubDirective, dirType string, index int) (CodeExample, error) { +func parseSubDirective(sourceFile string, subDir *rst.SubDirective, parentOptions map[string]string, dirType string, index int) (CodeExample, error) { var content string var err error @@ -234,18 +216,10 @@ func parseSubDirective(sourceFile string, subDir *rst.SubDirective, dirType stri } } - // Get language from options - language := subDir.Options["language"] - if language == "" { - language = Undefined - } - - language = NormalizeLanguage(language) - return CodeExample{ SourceFile: sourceFile, - DirectiveName: DirectiveType(rst.IoCodeBlock), - Language: language, + DirectiveName: rst.IoCodeBlock, + Language: subDir.ResolveLanguage(parentOptions), Content: content, Index: index, SubType: dirType, // "input" or "output" diff --git a/commands/extract/code-examples/report.go b/commands/extract/code-examples/report.go index 3a6728e..aec9ca3 100644 --- a/commands/extract/code-examples/report.go +++ b/commands/extract/code-examples/report.go @@ -4,6 +4,8 @@ import ( "fmt" "sort" "strings" + + "github.com/grove-platform/audit-cli/internal/rst" ) // PrintReport prints the extraction report to stdout. @@ -59,7 +61,7 @@ func PrintReport(report *Report, verbose bool) { if len(report.DirectiveCounts) > 0 { fmt.Println("\nCode Examples by Directive Type:") - directives := []DirectiveType{CodeBlock, LiteralInclude, IoCodeBlock} + directives := []rst.DirectiveType{rst.CodeBlock, rst.LiteralInclude, rst.IoCodeBlock} for _, directive := range directives { if count, exists := report.DirectiveCounts[directive]; exists { fmt.Printf(" %-20s: %d\n", directive, count) @@ -82,7 +84,7 @@ func PrintReport(report *Report, verbose bool) { if len(stats.DirectiveCounts) > 0 { fmt.Println(" Directives:") - directives := []DirectiveType{CodeBlock, LiteralInclude, IoCodeBlock} + directives := []rst.DirectiveType{rst.CodeBlock, rst.LiteralInclude, rst.IoCodeBlock} for _, directive := range directives { if count, exists := stats.DirectiveCounts[directive]; exists { fmt.Printf(" %-20s: %d\n", directive, count) diff --git a/commands/extract/code-examples/types.go b/commands/extract/code-examples/types.go index a0cda16..9d3872e 100644 --- a/commands/extract/code-examples/types.go +++ b/commands/extract/code-examples/types.go @@ -1,49 +1,39 @@ package code_examples -// DirectiveType represents the type of reStructuredText directive. -type DirectiveType string - -const ( - // CodeBlock represents inline code blocks (.. code-block::) - CodeBlock DirectiveType = "code-block" - // LiteralInclude represents external file references (.. literalinclude::) - LiteralInclude DirectiveType = "literalinclude" - // IoCodeBlock represents input/output examples (.. io-code-block::) - IoCodeBlock DirectiveType = "io-code-block" -) +import "github.com/grove-platform/audit-cli/internal/rst" // CodeExample represents a single code example extracted from an RST file. // // Each code example corresponds to one directive occurrence in the source file // and will be written to a separate output file. type CodeExample struct { - SourceFile string // Path to the source RST file - DirectiveName DirectiveType // Type of directive (code-block, literalinclude, io-code-block) - Language string // Programming language (normalized) - Content string // The actual code content - Index int // The occurrence index of this directive in the source file (1-based) - SubType string // For io-code-block: "input" or "output" + SourceFile string // Path to the source RST file + DirectiveName rst.DirectiveType // Type of directive (code-block, code, literalinclude, io-code-block) + Language string // Programming language (normalized) + Content string // The actual code content + Index int // The occurrence index of this directive in the source file (1-based) + SubType string // For io-code-block: "input" or "output" } // Report contains statistics about the extraction operation. // // Tracks overall statistics as well as per-source-file statistics for detailed reporting. type Report struct { - FilesTraversed int // Total number of RST files processed - TraversedFilepaths []string // List of all processed file paths - OutputFilesWritten int // Total number of code example files written - LanguageCounts map[string]int // Count of examples by language - DirectiveCounts map[DirectiveType]int // Count of examples by directive type - SourcePathStats map[string]*SourceStats // Per-file statistics + FilesTraversed int // Total number of RST files processed + TraversedFilepaths []string // List of all processed file paths + OutputFilesWritten int // Total number of code example files written + LanguageCounts map[string]int // Count of examples by language + DirectiveCounts map[rst.DirectiveType]int // Count of examples by directive type + SourcePathStats map[string]*SourceStats // Per-file statistics } // SourceStats contains statistics for a single source file. // // Used for verbose reporting to show detailed breakdown per source file. type SourceStats struct { - DirectiveCounts map[DirectiveType]int // Count of directives by type in this file - LanguageCounts map[string]int // Count of examples by language in this file - OutputFiles []string // List of output files generated from this source + DirectiveCounts map[rst.DirectiveType]int // Count of directives by type in this file + LanguageCounts map[string]int // Count of examples by language in this file + OutputFiles []string // List of output files generated from this source } // NewReport creates a new initialized Report with empty maps and slices. @@ -51,7 +41,7 @@ func NewReport() *Report { return &Report{ TraversedFilepaths: make([]string, 0), LanguageCounts: make(map[string]int), - DirectiveCounts: make(map[DirectiveType]int), + DirectiveCounts: make(map[rst.DirectiveType]int), SourcePathStats: make(map[string]*SourceStats), } } @@ -59,7 +49,7 @@ func NewReport() *Report { // NewSourceStats creates a new initialized SourceStats with empty maps and slices. func NewSourceStats() *SourceStats { return &SourceStats{ - DirectiveCounts: make(map[DirectiveType]int), + DirectiveCounts: make(map[rst.DirectiveType]int), LanguageCounts: make(map[string]int), OutputFiles: make([]string, 0), } diff --git a/commands/extract/code-examples/writer.go b/commands/extract/code-examples/writer.go index 15f1d52..0c07fbd 100644 --- a/commands/extract/code-examples/writer.go +++ b/commands/extract/code-examples/writer.go @@ -5,6 +5,9 @@ import ( "os" "path/filepath" "strings" + + "github.com/grove-platform/audit-cli/internal/language" + "github.com/grove-platform/audit-cli/internal/rst" ) // WriteCodeExample writes a code example to a file in the output directory. @@ -92,10 +95,10 @@ func GenerateOutputFilename(example CodeExample) string { sourceBase := filepath.Base(example.SourceFile) sourceBase = strings.TrimSuffix(sourceBase, filepath.Ext(sourceBase)) - extension := GetFileExtensionFromLanguage(example.Language) + extension := language.GetExtensionFromLanguage(example.Language) // For io-code-block, include the subtype (input/output) in the filename - if example.DirectiveName == IoCodeBlock && example.SubType != "" { + if example.DirectiveName == rst.IoCodeBlock && example.SubType != "" { filename := fmt.Sprintf("%s.%s.%d.%s%s", sourceBase, example.DirectiveName, diff --git a/commands/report/report.go b/commands/report/report.go new file mode 100644 index 0000000..ca59e72 --- /dev/null +++ b/commands/report/report.go @@ -0,0 +1,36 @@ +// Package report provides the parent command for generating reports. +// +// This package serves as the parent command for various reporting operations. +// Currently supports: +// - testable-code: Analyze testable code examples on pages from analytics data +// +// Future subcommands could include other report types for documentation metrics. +package report + +import ( + testablecode "github.com/grove-platform/audit-cli/commands/report/testable-code" + "github.com/spf13/cobra" +) + +// NewReportCommand creates the report parent command. +// +// This command serves as a parent for various reporting operations. +// It doesn't perform any operations itself but provides a namespace for subcommands. +func NewReportCommand() *cobra.Command { + cmd := &cobra.Command{ + Use: "report", + Short: "Generate reports from documentation data", + Long: `Generate various reports from documentation data and analytics. + +Currently supports: + - testable-code: Analyze testable code examples on pages from analytics CSV data + +Future subcommands may support other report types for documentation metrics.`, + } + + // Add subcommands + cmd.AddCommand(testablecode.NewTestableCodeCommand()) + + return cmd +} + diff --git a/commands/report/testable-code/code_collector.go b/commands/report/testable-code/code_collector.go new file mode 100644 index 0000000..c435e36 --- /dev/null +++ b/commands/report/testable-code/code_collector.go @@ -0,0 +1,819 @@ +package testablecode + +import ( + "bufio" + "os" + "strings" + + "github.com/grove-platform/audit-cli/internal/config" + lang "github.com/grove-platform/audit-cli/internal/language" + "github.com/grove-platform/audit-cli/internal/projectinfo" + "github.com/grove-platform/audit-cli/internal/rst" +) + +// AnalyzePage analyzes a single page for code examples. +// +// This function resolves a URL to its source file in the monorepo, then collects +// all code examples from that file and any files it includes. The analysis includes: +// - Identifying the directive type (literalinclude, code-block, io-code-block) +// - Determining the product/language context for each example +// - Checking if the example is tested (references tested code) +// - Checking if the example is testable (based on product) +// +// The contentDir is extracted from the source path and used for product determination +// when no explicit context (tabs, composables) is available. +func AnalyzePage(entry PageEntry, urlMapping *config.URLMapping, mappings *ProductMappings) (*PageAnalysis, error) { + // Resolve URL to source file + sourcePath, contentDir, err := urlMapping.ResolveURL(entry.URL) + if err != nil { + return nil, err + } + + // Check if source file exists + if _, err := os.Stat(sourcePath); os.IsNotExist(err) { + return nil, err + } + + // Merge project-specific composables from snooty.toml + // This allows projects like Atlas to define custom composables that override rstspec.toml + mergedMappings := MergeProjectComposables(mappings, sourcePath) + + analysis := &PageAnalysis{ + Rank: entry.Rank, + URL: entry.URL, + SourcePath: sourcePath, + ContentDir: contentDir, + } + + // Collect code examples from the file and its includes + visited := make(map[string]bool) + examples, err := collectCodeExamples(sourcePath, contentDir, visited, mergedMappings) + if err != nil { + return nil, err + } + + analysis.CodeExamples = examples + return analysis, nil +} + +// collectCodeExamples collects all code examples from a file and its includes. +// +// This is the public entry point that starts collection with no inherited context. +// It delegates to collectCodeExamplesWithContext, which does the actual work. +// +// WHY THIS WRAPPER EXISTS: +// The collection process is recursive - when we encounter an `.. include::` directive, +// we need to collect code examples from the included file too. However, included files +// may need to inherit context from their parent (e.g., a `selected-content` block). +// +// This wrapper provides a clean entry point for external callers (like AnalyzePage) +// who just want to say "analyze this file" without worrying about context inheritance. +// The recursive collectCodeExamplesWithContext handles passing context through the +// include chain internally. +// +// Flow: +// +// collectCodeExamples(main.txt) ← entry point, nil context +// └── collectCodeExamplesWithContext(main.txt, nil) +// └── collectCodeExamplesWithContext(included.rst, inherited context) +// └── collectCodeExamplesWithContext(nested.rst, inherited context) +func collectCodeExamples(filePath, contentDir string, visited map[string]bool, mappings *ProductMappings) ([]CodeExample, error) { + return collectCodeExamplesWithContext(filePath, contentDir, visited, nil, mappings) +} + +// collectCodeExamplesWithContext collects code examples with inherited context from parent. +// +// CONTENT INCLUSION TYPES HANDLED: +// This function recursively follows content inclusions to find all code examples. +// The rst.FindIncludeDirectives function finds `.. include::` directives and resolves +// paths using MongoDB-specific conventions: +// +// 1. Regular RST includes: `.. include:: /includes/foo.rst` → resolved directly +// 2. Steps files: `.. include:: /includes/steps/foo.rst` → resolved to steps-foo.yaml +// 3. Extracts files: `.. include:: /includes/extracts/foo.rst` → resolved to YAML with ref +// 4. Template variables: `.. include:: {{var}}` → resolved from replacement section +// +// For YAML files (steps, extracts), rst.ParseDirectives handles two formats: +// 1. RST-in-YAML: RST directives embedded in YAML content (e.g., `.. code-block::` in `content: |` blocks) +// 2. YAML-native: Legacy `action:` blocks with `language:` and `code:` fields (added January 2026) +// +// CONTEXT INHERITANCE: +// When a file is included via `.. include::` within a `.. selected-content::` block +// or a `.. tab::` block, the code examples in that included file should inherit the +// context (language/product) from the parent block. This is critical for accurate +// product attribution because: +// +// 1. Many driver docs use composable tutorials where the main file has +// `.. selected-content:: :selections: python` and includes a shared file +// that contains the actual code examples. +// +// 2. Without context inheritance, those code examples would be attributed to +// their raw language (e.g., "python") rather than the proper product context +// (e.g., "Python" driver). +// +// The parentContext parameter carries this inherited context through the include chain. +func collectCodeExamplesWithContext(filePath, contentDir string, visited map[string]bool, parentContext *CodeContext, mappings *ProductMappings) ([]CodeExample, error) { + if visited[filePath] { + return nil, nil + } + visited[filePath] = true + + var examples []CodeExample + + // Parse directives from the file + directives, err := rst.ParseDirectives(filePath) + if err != nil { + return nil, err + } + + // Parse selected-content blocks to get context for includes + selectedContentMap, err := parseSelectedContentBlocks(filePath) + if err != nil { + selectedContentMap = make(map[string]string) + } + + // Parse context blocks (tabs, composables) with their line ranges + // This allows us to match each directive to its containing context block + var contextBlocks []contextBlock + var fileContext []CodeContext + if parentContext != nil { + // If we have a parent context, use it for all directives + fileContext = []CodeContext{*parentContext} + } else { + // Parse context blocks to get line-range-aware context + contextBlocks, err = parseContextBlocks(filePath) + if err != nil { + contextBlocks = nil + } + // Also get file-level context (composable-tutorial options apply to whole file) + fileContext, err = parseFileContexts(filePath) + if err != nil { + fileContext = []CodeContext{{}} + } + } + + // Process each directive with its specific context + for _, directive := range directives { + // Find the context for this directive based on its line number + contexts := findContextForLine(directive.LineNum, contextBlocks, fileContext) + exs := processDirective(directive, filePath, contentDir, contexts, mappings) + examples = append(examples, exs...) + } + + // Follow includes with their selected-content context + includeFiles, err := rst.FindIncludeDirectives(filePath) + if err == nil { + for _, includeFile := range includeFiles { + // Check if this include has a selected-content or tab context + var includeContext *CodeContext + if selection, ok := selectedContentMap[includeFile]; ok { + // Determine if this is a tabid or a composable language selection + // by checking which mapping contains it + if _, isTabID := mappings.DriversTabIDToProduct[selection]; isTabID { + includeContext = &CodeContext{TabID: selection} + } else { + // Treat as composable language selection + includeContext = &CodeContext{Language: selection} + } + } else if parentContext != nil { + includeContext = parentContext + } + + includedExamples, err := collectCodeExamplesWithContext(includeFile, contentDir, visited, includeContext, mappings) + if err == nil { + examples = append(examples, includedExamples...) + } + } + } + + return examples, nil +} + +// contextBlock represents a context-providing block (tab or selected-content) with its line range. +type contextBlock struct { + context CodeContext + startLine int + endLine int // -1 means extends to end of file or next block at same level +} + +// parseContextBlocks parses a file to extract tab and selected-content blocks with their line ranges. +// This allows matching code examples to their containing context block. +func parseContextBlocks(filePath string) ([]contextBlock, error) { + file, err := os.Open(filePath) + if err != nil { + return nil, err + } + defer file.Close() + + var blocks []contextBlock + scanner := bufio.NewScanner(file) + lineNum := 0 + + // Track open blocks by their indentation level + type openBlock struct { + context CodeContext + start int + indent int + } + var openBlocks []openBlock + + for scanner.Scan() { + lineNum++ + line := scanner.Text() + trimmed := strings.TrimLeft(line, " ") + trimmedLine := strings.TrimSpace(line) + indent := len(line) - len(trimmed) + + // Close any blocks that have ended (non-empty line at same or less indentation) + if trimmedLine != "" && !strings.HasPrefix(trimmedLine, ":") { + for i := len(openBlocks) - 1; i >= 0; i-- { + if indent <= openBlocks[i].indent { + // This block has ended + blocks = append(blocks, contextBlock{ + context: openBlocks[i].context, + startLine: openBlocks[i].start, + endLine: lineNum - 1, + }) + openBlocks = openBlocks[:i] + } + } + } + + // Check for tab directive + if rst.TabDirectiveRegex.MatchString(trimmedLine) { + openBlocks = append(openBlocks, openBlock{ + context: CodeContext{}, // TabID will be filled in when we find :tabid: + start: lineNum, + indent: indent, + }) + continue + } + + // Check for selected-content directive + if rst.SelectedContentDirectiveRegex.MatchString(trimmedLine) { + openBlocks = append(openBlocks, openBlock{ + context: CodeContext{}, // Selection will be filled in when we find :selections: + start: lineNum, + indent: indent, + }) + continue + } + + // Look for :tabid: option to fill in the most recent tab block + if len(openBlocks) > 0 { + if matches := rst.TabIDOptionRegex.FindStringSubmatch(line); len(matches) > 1 { + openBlocks[len(openBlocks)-1].context.TabID = strings.TrimSpace(matches[1]) + continue + } + // Look for :selections: option + if matches := rst.SelectionsOptionRegex.FindStringSubmatch(line); len(matches) > 1 { + openBlocks[len(openBlocks)-1].context.Language = strings.TrimSpace(matches[1]) + continue + } + } + } + + // Close any remaining open blocks (they extend to end of file) + for _, ob := range openBlocks { + blocks = append(blocks, contextBlock{ + context: ob.context, + startLine: ob.start, + endLine: lineNum, + }) + } + + return blocks, scanner.Err() +} + +// findContextForLine finds the context that applies to a given line number. +// It checks context blocks first (tabs, selected-content), then falls back to file-level context. +func findContextForLine(lineNum int, contextBlocks []contextBlock, fileContext []CodeContext) []CodeContext { + // Check if this line is inside any context block + for _, block := range contextBlocks { + if lineNum >= block.startLine && lineNum <= block.endLine { + // Found a containing block - use its context + if block.context.TabID != "" || block.context.Language != "" || block.context.Interface != "" { + return []CodeContext{block.context} + } + } + } + + // Fall back to file-level context + return fileContext +} + +// CodeContext represents the context in which a code example appears. +// +// MongoDB documentation uses several mechanisms to provide context for code examples: +// +// 1. Driver Tab Sets (`.. tabs-drivers::` with `.. tab::` and `:tabid:`): +// Used to show the same concept in multiple driver languages. The :tabid: +// identifies which driver (e.g., "python", "nodejs", "java-sync"). +// +// 2. Composable Tutorials (`.. composable-tutorial::` with `:options:`): +// Used for tutorials that can be customized by language and interface. +// Options like "language=python; interface=driver" specify the context. +// +// 3. Selected Content (`.. selected-content::` with `:selections:`): +// Used within composable tutorials to show content for a specific selection. +// The :selections: value identifies which option is active. +// +// These contexts are used to determine the product for code examples, which in turn +// determines whether the example is testable (has test infrastructure). +type CodeContext struct { + TabID string // From :tabid: option in .. tab:: directive + Composable string // From composable-tutorial options (unused, kept for future) + Interface string // From interface composable (e.g., "mongosh", "driver", "compass") + Language string // From language composable (e.g., "python", "nodejs", "java") +} + +// processDirective converts an RST directive to CodeExample(s). +// +// This function handles five types of code example directives: +// - literalinclude: Transcludes code from an external file +// - code-block: Inline code block with language specification +// - code: Shorter alias for code-block (standard reStructuredText, parsed as code-block) +// - io-code-block: Input/output code example with separate input and output blocks +// - yaml-code-block: YAML-native code examples from legacy steps files (action: blocks) +// +// For each directive, it determines the product based on the language and context, +// checks if the example is tested (references tested code), and checks if it's testable. +func processDirective(directive rst.Directive, sourceFile, contentDir string, contexts []CodeContext, mappings *ProductMappings) []CodeExample { + var examples []CodeExample + + switch directive.Type { + case rst.LiteralInclude: + ex := CodeExample{ + Type: string(rst.LiteralInclude), + FilePath: directive.Argument, + SourceFile: sourceFile, + } + ex.Language = directive.ResolveLanguage() + ex.IsTested = isTestedPath(directive.Argument) + ex.Product = determineProduct(ex.Language, contentDir, contexts, mappings) + ex.IsTestable = isTestable(ex.Product, contentDir) + ex.IsMaybeTestable = isMaybeTestable(ex.Product) + examples = append(examples, ex) + + case rst.CodeBlock: + ex := CodeExample{ + Type: string(rst.CodeBlock), + SourceFile: sourceFile, + } + ex.Language = getLanguage(directive, directive.Argument) + ex.Product = determineProduct(ex.Language, contentDir, contexts, mappings) + ex.IsTestable = isTestable(ex.Product, contentDir) + ex.IsMaybeTestable = isMaybeTestable(ex.Product) + examples = append(examples, ex) + + case rst.IoCodeBlock: + // Process input directive + if directive.InputDirective != nil { + ex := CodeExample{ + Type: string(rst.IoCodeBlock), + IsInput: true, + FilePath: directive.InputDirective.Argument, + SourceFile: sourceFile, + } + ex.Language = directive.InputDirective.ResolveLanguage(directive.Options) + ex.IsTested = isTestedPath(directive.InputDirective.Argument) + ex.Product = determineProduct(ex.Language, contentDir, contexts, mappings) + ex.IsTestable = isTestable(ex.Product, contentDir) + ex.IsMaybeTestable = isMaybeTestable(ex.Product) + examples = append(examples, ex) + } + + // Process output directive + if directive.OutputDirective != nil { + ex := CodeExample{ + Type: string(rst.IoCodeBlock), + IsOutput: true, + FilePath: directive.OutputDirective.Argument, + SourceFile: sourceFile, + } + ex.Language = directive.OutputDirective.ResolveLanguage(directive.Options) + ex.IsTested = isTestedPath(directive.OutputDirective.Argument) + ex.Product = determineProduct(ex.Language, contentDir, contexts, mappings) + ex.IsTestable = isTestable(ex.Product, contentDir) + ex.IsMaybeTestable = isMaybeTestable(ex.Product) + examples = append(examples, ex) + } + + case rst.YAMLCodeBlock: + // YAML-native code examples from legacy steps files (action: blocks) + ex := CodeExample{ + Type: string(rst.YAMLCodeBlock), + SourceFile: sourceFile, + } + ex.Language = getLanguage(directive, directive.Argument) + ex.Product = determineProduct(ex.Language, contentDir, contexts, mappings) + ex.IsTestable = isTestable(ex.Product, contentDir) + ex.IsMaybeTestable = isMaybeTestable(ex.Product) + examples = append(examples, ex) + } + + return examples +} + +// getLanguage extracts the language from a directive. +// Checks the :language: option first, then falls back to defaultLang. +// If defaultLang is empty, returns lang.Undefined. +func getLanguage(directive rst.Directive, defaultLang string) string { + if langOpt, ok := directive.Options["language"]; ok && langOpt != "" { + return langOpt + } + if defaultLang != "" { + return defaultLang + } + return lang.Undefined +} + +// isTestedPath checks if a file path references tested code. +func isTestedPath(path string) bool { + return strings.Contains(path, "/tested/") +} + +// isTestable checks if a code example is testable based on its product and content directory. +// +// A code example is considered testable if it meets one of these criteria: +// 1. It's in a testable content directory (e.g., csharp, golang, java, mongodb-shell, node, pymongo-driver) +// 2. Its product (after context resolution) is in the TestableProducts list +// +// Context resolution determines the product through: +// - Composable tutorial selected-content blocks (e.g., :selections: nodejs) +// - Driver tab sets with :tabid: (e.g., :tabid: python) +// - Content directory mapping (e.g., content in "node" dir → Node.js) +// +// Raw language values like "javascript" and "shell" are intentionally NOT testable +// because many code examples use these languages without being actual Driver/Shell examples. +// Only properly contextualized examples are considered testable. +// +// Note: Being in a testable content directory (like mongodb-shell) does NOT automatically +// make all code examples testable. System shell commands (sh, bash) in the MongoDB Shell +// docs are still not testable - only actual MongoDB Shell code is testable. +func isTestable(product, contentDir string) bool { + // Check if product is testable + return TestableProducts[product] +} + +// isMaybeTestable checks if a code example is in the "grey area" - it uses a language +// that COULD be testable (javascript, shell) but lacks proper context to determine definitively. +// +// This applies to: +// - "JavaScript" product: Could be Node.js driver code OR browser JavaScript +// - "Shell" product: Could be MongoDB Shell code OR bash/system commands +// +// These examples are NOT counted as testable (to avoid false positives) but are flagged +// separately so they can be reviewed and potentially re-categorized. +func isMaybeTestable(product string) bool { + return MaybeTestableProducts[product] +} + +// determineProduct determines the product from language, content dir, and context. +// +// The logic handles several special cases: +// +// 1. Non-driver languages (bash, sh, json, yaml, etc.) bypass context inheritance +// and are reported based on their actual language. This prevents shell commands +// like "npm install" from being counted as Node.js driver examples. +// +// 2. MongoDB Shell languages (shell, javascript, js) have special handling: +// - In MongoDB Shell context (mongosh content dir or mongosh interface) → "MongoDB Shell" +// - "shell" outside MongoDB Shell context → "Shell" (not testable) +// - "javascript/js" outside MongoDB Shell context → use driver context or "JavaScript" +func determineProduct(language, contentDir string, contexts []CodeContext, mappings *ProductMappings) string { + // Check if this is a non-driver language that should bypass context inheritance. + // These languages should be reported based on their actual language, not the + // surrounding composable/tab context. + if language != "" && lang.IsNonDriverLanguage(language) { + return lang.GetProductFromLanguage(language) + } + + // Check if we're in a MongoDB Shell context + inMongoShellContext := isMongoShellContext(contentDir, contexts) + + // Handle MongoDB Shell languages specially + if language != "" && lang.IsMongoShellLanguage(language) { + if inMongoShellContext { + return "MongoDB Shell" + } + // "shell" outside MongoDB Shell context is just a shell command + langLower := strings.ToLower(language) + if langLower == "shell" { + return "Shell" + } + // "javascript" or "js" outside MongoDB Shell context - check for driver context + // (fall through to normal context checking below) + } + + // Check if we have a context with a specific product + for _, ctx := range contexts { + if ctx.TabID != "" { + if product, ok := mappings.DriversTabIDToProduct[ctx.TabID]; ok { + return product + } + } + if ctx.Language != "" { + if product, ok := mappings.ComposableLanguageToProduct[ctx.Language]; ok { + return product + } + } + if ctx.Interface != "" { + if product, ok := mappings.ComposableInterfaceToProduct[ctx.Interface]; ok { + return product + } + } + } + + // Map content directory to product using shared mapping + if product := projectinfo.GetProductFromContentDir(contentDir); product != "" { + return product + } + + // Fall back to language + if language != "" { + return lang.GetProductFromLanguage(language) + } + + return "Unknown" +} + +// isMongoShellContext checks if we're in a MongoDB Shell context based on +// content directory or composable/tab context. +func isMongoShellContext(contentDir string, contexts []CodeContext) bool { + // Check content directory + if contentDir == "mongodb-shell" { + return true + } + + // Check for mongosh interface in composable context + for _, ctx := range contexts { + if ctx.Interface == "mongosh" { + return true + } + } + + return false +} + +// parseSelectedContentBlocks parses a file to map include paths to their +// selected-content or tab context. Returns a map from include path to selection/tabid. +// +// WHY THIS EXISTS: +// In composable tutorials and tabbed content, include directives often appear inside +// context-providing blocks. For example: +// +// .. selected-content:: +// :selections: python +// +// .. include:: /includes/driver-examples/insert-one.rst +// +// This function builds a map from include paths to their context, which is then +// used during include processing to pass the correct context to determineProduct. +// +// IMPORTANT: Context inheritance only applies to driver-appropriate languages. +// Non-driver languages (see NonDriverLanguages in internal/language) bypass context entirely: +// - A "python" code block in the included file → attributed to "Python" (from context) +// - A "text" code block in the included file → attributed to "Text" (bypasses context) +// - A "sh" code block in the included file → attributed to "Shell" (bypasses context) +// +// The function handles both: +// - selected-content blocks with :selections: option +// - tab blocks with :tabid: option +func parseSelectedContentBlocks(filePath string) (map[string]string, error) { + file, err := os.Open(filePath) + if err != nil { + return nil, err + } + defer file.Close() + + result := make(map[string]string) + scanner := bufio.NewScanner(file) + + var currentSelection string + var currentTabID string + var inSelectedContent bool + var inTab bool + var blockIndent int + + for scanner.Scan() { + line := scanner.Text() + + // Calculate indentation + trimmed := strings.TrimLeft(line, " ") + trimmedLine := strings.TrimSpace(line) + indent := len(line) - len(trimmed) + + // Check for selected-content directive + if rst.SelectedContentDirectiveRegex.MatchString(trimmedLine) { + inSelectedContent = true + inTab = false + blockIndent = indent + currentSelection = "" + continue + } + + // Check for tab directive + if matches := rst.TabDirectiveRegex.FindStringSubmatch(trimmedLine); len(matches) > 1 { + inTab = true + inSelectedContent = false + blockIndent = indent + currentTabID = "" + continue + } + + // If we're in a selected-content block, look for :selections: + if inSelectedContent && currentSelection == "" { + if matches := rst.SelectionsOptionRegex.FindStringSubmatch(line); len(matches) > 1 { + currentSelection = strings.TrimSpace(matches[1]) + continue + } + } + + // If we're in a tab block, look for :tabid: + if inTab && currentTabID == "" { + if matches := rst.TabIDOptionRegex.FindStringSubmatch(line); len(matches) > 1 { + currentTabID = strings.TrimSpace(matches[1]) + continue + } + } + + // Check if we've exited the block (less or equal indentation on non-empty line) + if trimmedLine != "" && indent <= blockIndent && !strings.HasPrefix(trimmed, ":") { + // Check if this is a new directive at same level + if strings.HasPrefix(trimmed, "..") { + // Could be a new selected-content or tab, handled above + // Or could be something else that ends our block + if !rst.SelectedContentDirectiveRegex.MatchString(trimmedLine) && !rst.TabDirectiveRegex.MatchString(trimmedLine) { + inSelectedContent = false + inTab = false + currentSelection = "" + currentTabID = "" + } + } + } + + // Look for include directives within the current context + if matches := rst.IncludeDirectiveRegex.FindStringSubmatch(trimmedLine); len(matches) > 1 { + includePath := strings.TrimSpace(matches[1]) + // Resolve the include path to match what FindIncludeDirectives returns + resolvedPath, err := rst.ResolveIncludePath(filePath, includePath) + if err == nil { + if inSelectedContent && currentSelection != "" { + result[resolvedPath] = currentSelection + } else if inTab && currentTabID != "" { + result[resolvedPath] = currentTabID + } + } + } + } + + return result, scanner.Err() +} + +// parseFileContexts parses a file to extract tab and composable contexts. +// +// This function scans a file for context-providing directives and extracts their +// configuration. It looks for: +// +// 1. Tab directives with :tabid: - Used in driver tab sets to identify the driver +// 2. Composable tutorials with :options: - Used to specify language and interface +// +// KNOWN LIMITATION: +// This function extracts ALL contexts from the file into a flat list, without +// tracking which code examples are inside which context blocks. For files with +// multiple tabs (e.g., Python, Node.js, Java tabs), all contexts are collected +// and determineProduct uses the first matching one for ALL code examples. +// +// PRACTICAL IMPACT (audited January 2026): +// This limitation has effectively zero practical impact because: +// +// 1. Code blocks always have explicit language - An audit of ~4,000 files with +// :tabid: directives found ZERO code blocks without explicit :language: inside +// driver tabs. Writers consistently specify the language, which takes precedence +// over tab context in determineProduct. +// +// 2. Literalinclude uses file extension - The 673 literalinclude directives found +// inside driver tabs all have file extensions that correctly identify the +// language (.py, .java, .js, etc.), so tab context is not needed. +// +// 3. JSON files are correctly identified - The only "mismatches" (19 cases) are +// intentional JSON data files shown across multiple driver tabs, which should +// be attributed to "JSON", not the driver. +// +// A more accurate implementation would track line ranges for each context block +// and match code examples to their containing block, but this adds significant +// complexity for zero practical benefit given the above findings. +func parseFileContexts(filePath string) ([]CodeContext, error) { + file, err := os.Open(filePath) + if err != nil { + return nil, err + } + defer file.Close() + + var contexts []CodeContext + scanner := bufio.NewScanner(file) + + for scanner.Scan() { + line := scanner.Text() + trimmedLine := strings.TrimSpace(line) + + // Check for tab directive + if rst.TabDirectiveRegex.MatchString(trimmedLine) { + // Look for :tabid: on next lines + for scanner.Scan() { + nextLine := scanner.Text() + if strings.TrimSpace(nextLine) == "" { + break + } + if tabIDMatches := rst.TabIDOptionRegex.FindStringSubmatch(nextLine); len(tabIDMatches) > 1 { + contexts = append(contexts, CodeContext{TabID: strings.TrimSpace(tabIDMatches[1])}) + break + } + // If not an option line, stop looking + if !strings.HasPrefix(strings.TrimSpace(nextLine), ":") { + break + } + } + } + + // Check for composable-tutorial + if rst.ComposableTutorialDirectiveRegex.MatchString(trimmedLine) { + // Look for :options: on next lines + for scanner.Scan() { + nextLine := scanner.Text() + if strings.TrimSpace(nextLine) == "" { + break + } + if optMatches := rst.OptionsOptionRegex.FindStringSubmatch(nextLine); len(optMatches) > 1 { + ctx := parseComposableOptions(optMatches[1]) + contexts = append(contexts, ctx) + break + } + if !strings.HasPrefix(strings.TrimSpace(nextLine), ":") { + break + } + } + } + } + + if len(contexts) == 0 { + contexts = append(contexts, CodeContext{}) + } + + return contexts, scanner.Err() +} + +// parseComposableOptions parses composable options string like "language=python; interface=driver". +// +// WHY WE CHECK MULTIPLE COMPOSABLE IDS: +// The MongoDB documentation uses several variants of language and interface composables, +// each with slightly different option sets for different contexts. Writers can add new +// composable definitions to their snooty.toml files at any time. +// +// PATTERN MATCHING STRATEGY: +// Rather than maintaining an exhaustive list of composable IDs, we use pattern matching: +// - Any key containing "language" or "lang" → treated as language context +// - Any key containing "interface" → treated as interface context +// +// This approach is future-proof: new composables following the naming convention +// (e.g., "language-new-variant", "my-language-selector", "interface-v2") will be +// automatically handled without code changes. +// +// KNOWN COMPOSABLES (audited January 2026, expected to grow): +// +// Language-like composables (68 total usages): +// - language, language-no-dependencies, language-mongocryptd-only, +// - language-no-interface, language-atlas-only, language-atlas-only-2, +// - language-local-only, driver-language +// +// Interface-like composables (65 total usages): +// - interface, interface-atlas-only, interface-default-atlas-cli, +// - interface-local-only +// +// All these composables use the same option values (python, nodejs, mongosh, etc.), +// which are mapped to products via ProductMappings loaded from rstspec.toml. +func parseComposableOptions(options string) CodeContext { + ctx := CodeContext{} + parts := strings.Split(options, ";") + for _, part := range parts { + kv := strings.SplitN(strings.TrimSpace(part), "=", 2) + if len(kv) != 2 { + continue + } + key := strings.TrimSpace(kv[0]) + value := strings.TrimSpace(kv[1]) + + // Handle all language-like composables using substring matching. + // This catches: language, language-*, *-language, *-language-*, driver-lang, etc. + if strings.Contains(key, "language") || strings.Contains(key, "lang") { + ctx.Language = value + } + // Handle all interface-like composables using substring matching. + // This catches: interface, interface-*, *-interface, *-interface-*, etc. + if strings.Contains(key, "interface") { + ctx.Interface = value + } + } + return ctx +} diff --git a/commands/report/testable-code/csv_parser.go b/commands/report/testable-code/csv_parser.go new file mode 100644 index 0000000..0315b66 --- /dev/null +++ b/commands/report/testable-code/csv_parser.go @@ -0,0 +1,110 @@ +package testablecode + +import ( + "encoding/csv" + "fmt" + "os" + "strconv" + "strings" +) + +// ParseCSV parses a CSV file with page rankings and URLs. +// Supports both header and headerless formats: +// - With header: rank,url (first row contains column names) +// - Without header: 1,www.mongodb.com/docs/... (first row is data) +// Returns a slice of PageEntry structs. +func ParseCSV(path string) ([]PageEntry, error) { + file, err := os.Open(path) + if err != nil { + return nil, fmt.Errorf("failed to open CSV file: %w", err) + } + defer file.Close() + + reader := csv.NewReader(file) + + // Read all records + records, err := reader.ReadAll() + if err != nil { + return nil, fmt.Errorf("failed to read CSV: %w", err) + } + + if len(records) < 1 { + return nil, fmt.Errorf("CSV file is empty") + } + + // Determine if first row is a header or data + // Check if first column of first row is a number (data) or text (header) + firstRow := records[0] + if len(firstRow) < 2 { + return nil, fmt.Errorf("CSV must have at least 2 columns (rank and URL)") + } + + hasHeader := false + rankIdx := 0 + urlIdx := 1 + + // Try to parse first column as a number + _, err = strconv.Atoi(strings.TrimSpace(firstRow[0])) + if err != nil { + // First column is not a number, so this is likely a header row + hasHeader = true + + // Find column indices from header + for i, col := range firstRow { + colLower := strings.ToLower(strings.TrimSpace(col)) + switch colLower { + case "rank", "site rank", "siterank": + rankIdx = i + case "url", "page", "path": + urlIdx = i + } + } + } + + // Determine starting row index + startIdx := 0 + if hasHeader { + startIdx = 1 + } + + if len(records) <= startIdx { + return nil, fmt.Errorf("no data rows found in CSV") + } + + // Parse data rows + var entries []PageEntry + for i, record := range records[startIdx:] { + if len(record) <= rankIdx || len(record) <= urlIdx { + continue // Skip malformed rows + } + + rankStr := strings.TrimSpace(record[rankIdx]) + url := strings.TrimSpace(record[urlIdx]) + + if rankStr == "" || url == "" { + continue // Skip empty rows + } + + rank, err := strconv.Atoi(rankStr) + if err != nil { + // Try to parse as float and convert + rankFloat, err := strconv.ParseFloat(rankStr, 64) + if err != nil { + return nil, fmt.Errorf("invalid rank value on row %d: %s", i+startIdx+1, rankStr) + } + rank = int(rankFloat) + } + + entries = append(entries, PageEntry{ + Rank: rank, + URL: url, + }) + } + + if len(entries) == 0 { + return nil, fmt.Errorf("no valid data rows found in CSV") + } + + return entries, nil +} + diff --git a/commands/report/testable-code/output.go b/commands/report/testable-code/output.go new file mode 100644 index 0000000..e1b8753 --- /dev/null +++ b/commands/report/testable-code/output.go @@ -0,0 +1,263 @@ +package testablecode + +import ( + "encoding/json" + "fmt" + "io" + "sort" + "strings" +) + +// BuildPageReport builds a PageReport from a PageAnalysis. +func BuildPageReport(analysis *PageAnalysis) PageReport { + report := PageReport{ + Rank: analysis.Rank, + URL: analysis.URL, + SourcePath: analysis.SourcePath, + ContentDir: analysis.ContentDir, + Error: analysis.Error, + ByProduct: make(map[string]*ProductStats), + } + + for _, ex := range analysis.CodeExamples { + report.TotalExamples++ + if ex.IsInput { + report.TotalInput++ + } + if ex.IsOutput { + report.TotalOutput++ + } + if ex.IsTested { + report.TotalTested++ + } + if ex.IsTestable { + report.TotalTestable++ + } + if ex.IsMaybeTestable { + report.TotalMaybeTestable++ + } + + // Aggregate by product + product := ex.Product + if product == "" { + product = "Unknown" + } + stats, ok := report.ByProduct[product] + if !ok { + stats = &ProductStats{Product: product} + report.ByProduct[product] = stats + } + stats.TotalCount++ + if ex.IsInput { + stats.InputCount++ + } + if ex.IsOutput { + stats.OutputCount++ + } + if ex.IsTested { + stats.TestedCount++ + } + if ex.IsTestable { + stats.TestableCount++ + } + if ex.IsMaybeTestable { + stats.MaybeTestableCount++ + } + } + + return report +} + +// OutputText outputs the reports in text format. +func OutputText(w io.Writer, reports []PageReport) error { + fmt.Fprintln(w, "="+strings.Repeat("=", 89)) + fmt.Fprintln(w, "PAGE ANALYTICS REPORT") + fmt.Fprintln(w, "="+strings.Repeat("=", 89)) + fmt.Fprintf(w, "Total pages analyzed: %d\n\n", len(reports)) + + // Summary table + fmt.Fprintln(w, "SUMMARY") + fmt.Fprintln(w, "-"+strings.Repeat("-", 89)) + fmt.Fprintf(w, "%-5s %-50s %6s %6s %8s %6s\n", "Rank", "URL", "Total", "Tested", "Testable", "Maybe") + fmt.Fprintln(w, "-"+strings.Repeat("-", 89)) + + for _, report := range reports { + url := report.URL + if len(url) > 50 { + url = url[:47] + "..." + } + if report.Error != "" { + fmt.Fprintf(w, "%-5d %-50s %s\n", report.Rank, url, "ERROR: "+report.Error) + } else { + fmt.Fprintf(w, "%-5d %-50s %6d %6d %8d %6d\n", + report.Rank, url, report.TotalExamples, report.TotalTested, + report.TotalTestable, report.TotalMaybeTestable) + } + } + fmt.Fprintln(w) + + // Detailed per-page reports + fmt.Fprintln(w, "DETAILED REPORTS") + fmt.Fprintln(w, "="+strings.Repeat("=", 89)) + + for _, report := range reports { + if report.Error != "" { + continue + } + + fmt.Fprintf(w, "\nRank %d: %s\n", report.Rank, report.URL) + fmt.Fprintf(w, "Source: %s\n", report.SourcePath) + fmt.Fprintln(w, "-"+strings.Repeat("-", 89)) + + if len(report.ByProduct) == 0 { + fmt.Fprintln(w, " No code examples found") + continue + } + + // Sort products for consistent output + products := make([]string, 0, len(report.ByProduct)) + for p := range report.ByProduct { + products = append(products, p) + } + sort.Strings(products) + + fmt.Fprintf(w, " %-20s %6s %6s %6s %6s %8s %6s\n", + "Product", "Total", "Input", "Output", "Tested", "Testable", "Maybe") + fmt.Fprintln(w, " "+strings.Repeat("-", 68)) + + for _, product := range products { + stats := report.ByProduct[product] + fmt.Fprintf(w, " %-20s %6d %6d %6d %6d %8d %6d\n", + product, stats.TotalCount, stats.InputCount, stats.OutputCount, + stats.TestedCount, stats.TestableCount, stats.MaybeTestableCount) + } + + fmt.Fprintf(w, " %s\n", strings.Repeat("-", 68)) + fmt.Fprintf(w, " %-20s %6d %6d %6d %6d %8d %6d\n", + "TOTAL", report.TotalExamples, report.TotalInput, report.TotalOutput, + report.TotalTested, report.TotalTestable, report.TotalMaybeTestable) + } + + return nil +} + +// OutputJSON outputs the reports in JSON format. +func OutputJSON(w io.Writer, reports []PageReport) error { + encoder := json.NewEncoder(w) + encoder.SetIndent("", " ") + return encoder.Encode(reports) +} + +// OutputCSV outputs the reports in CSV format. +// If showDetails is false, outputs one row per page (summary). +// If showDetails is true, outputs one row per product per page (only products with non-zero values). +func OutputCSV(w io.Writer, reports []PageReport, showDetails bool) error { + if showDetails { + return outputCSVDetails(w, reports) + } + return outputCSVSummary(w, reports) +} + +// outputCSVSummary outputs one row per page with aggregate stats. +func outputCSVSummary(w io.Writer, reports []PageReport) error { + // Header + fmt.Fprintln(w, "Rank,URL,SourcePath,ContentDir,Total,Input,Output,Tested,Testable,Maybe,Error") + + for _, report := range reports { + // Escape fields that might contain commas or quotes + url := escapeCSV(report.URL) + sourcePath := escapeCSV(report.SourcePath) + contentDir := escapeCSV(report.ContentDir) + errorMsg := escapeCSV(report.Error) + + fmt.Fprintf(w, "%d,%s,%s,%s,%d,%d,%d,%d,%d,%d,%s\n", + report.Rank, url, sourcePath, contentDir, + report.TotalExamples, report.TotalInput, report.TotalOutput, + report.TotalTested, report.TotalTestable, report.TotalMaybeTestable, + errorMsg) + } + + return nil +} + +// outputCSVDetails outputs one row per product per page. +// Only includes products where at least one column has a non-zero value. +func outputCSVDetails(w io.Writer, reports []PageReport) error { + // Header + fmt.Fprintln(w, "Rank,URL,SourcePath,ContentDir,Product,Total,Input,Output,Tested,Testable,Maybe,Error") + + for _, report := range reports { + // Escape fields that might contain commas or quotes + url := escapeCSV(report.URL) + sourcePath := escapeCSV(report.SourcePath) + contentDir := escapeCSV(report.ContentDir) + errorMsg := escapeCSV(report.Error) + + if report.Error != "" { + // For error rows, output a single row with the error + fmt.Fprintf(w, "%d,%s,%s,%s,,%d,%d,%d,%d,%d,%d,%s\n", + report.Rank, url, sourcePath, contentDir, + report.TotalExamples, report.TotalInput, report.TotalOutput, + report.TotalTested, report.TotalTestable, report.TotalMaybeTestable, + errorMsg) + continue + } + + if len(report.ByProduct) == 0 { + // No code examples - output a single row with zeros + fmt.Fprintf(w, "%d,%s,%s,%s,,%d,%d,%d,%d,%d,%d,\n", + report.Rank, url, sourcePath, contentDir, + 0, 0, 0, 0, 0, 0) + continue + } + + // Sort products for consistent output + products := make([]string, 0, len(report.ByProduct)) + for p := range report.ByProduct { + products = append(products, p) + } + sort.Strings(products) + + for _, product := range products { + stats := report.ByProduct[product] + + // Skip products where all columns are zero + if stats.TotalCount == 0 && stats.InputCount == 0 && stats.OutputCount == 0 && + stats.TestedCount == 0 && stats.TestableCount == 0 && stats.MaybeTestableCount == 0 { + continue + } + + productEscaped := escapeCSV(product) + fmt.Fprintf(w, "%d,%s,%s,%s,%s,%d,%d,%d,%d,%d,%d,\n", + report.Rank, url, sourcePath, contentDir, productEscaped, + stats.TotalCount, stats.InputCount, stats.OutputCount, + stats.TestedCount, stats.TestableCount, stats.MaybeTestableCount) + } + } + + return nil +} + +// escapeCSV escapes a string for CSV output. +// If the string contains commas, quotes, or newlines, it wraps in quotes and escapes internal quotes. +func escapeCSV(s string) string { + if s == "" { + return "" + } + + needsQuotes := false + for _, c := range s { + if c == ',' || c == '"' || c == '\n' || c == '\r' { + needsQuotes = true + break + } + } + + if !needsQuotes { + return s + } + + // Escape quotes by doubling them and wrap in quotes + escaped := strings.ReplaceAll(s, `"`, `""`) + return `"` + escaped + `"` +} diff --git a/commands/report/testable-code/testable_code.go b/commands/report/testable-code/testable_code.go new file mode 100644 index 0000000..594a503 --- /dev/null +++ b/commands/report/testable-code/testable_code.go @@ -0,0 +1,370 @@ +// Package testablecode provides the testable-code subcommand for the report command. +// +// This command analyzes code examples on documentation pages based on analytics data. +// It takes a CSV file with page rankings and URLs, resolves each URL to its source file, +// collects code examples, and generates a report with testability information. +// +// # Purpose +// +// The primary goal is to help identify which high-traffic documentation pages have +// code examples that COULD be tested but currently ARE NOT. This helps prioritize +// efforts to add test coverage to the most impactful pages. +// +// # Key Concepts +// +// Product vs Language: +// A "product" is a MongoDB driver or tool (e.g., "Python", "Node.js", "MongoDB Shell"). +// A "language" is the programming language of a code example (e.g., "python", "javascript"). +// The same language can map to different products depending on context. +// +// Testable vs Tested: +// "Testable" means the code example is for a product that has test infrastructure. +// "Tested" means the code example actually references tested code (literalinclude from +// the tested code examples directory). +// +// Context Inheritance: +// Code examples in included files inherit context from their parent. For example, +// if a file is included within a `.. selected-content:: :selections: python` block, +// all code examples in that file are attributed to "Python". +// +// # Special Cases +// +// The command handles several special cases documented in internal/language and code_collector.go: +// - NonDriverLanguages: Languages that bypass context inheritance (bash, json, etc.) - see internal/language +// - MongoShellLanguages: Languages that need MongoDB Shell context checking - see internal/language +// - Content directory mapping: Driver content dirs map to products - see internal/projectinfo +package testablecode + +import ( + "fmt" + "os" + "sort" + "strings" + + "github.com/grove-platform/audit-cli/internal/config" + "github.com/spf13/cobra" +) + +// NewTestableCodeCommand creates the testable-code subcommand. +func NewTestableCodeCommand() *cobra.Command { + var outputFormat string + var showDetails bool + var outputFile string + var filters []string + var listDrivers bool + + cmd := &cobra.Command{ + Use: "testable-code [monorepo-path]", + Short: "Analyze testable code examples on pages from analytics data", + Long: `Analyze testable code examples on documentation pages based on analytics CSV data. + +Takes a CSV file with page rankings and URLs, resolves each URL to its source file +in the monorepo, collects code examples (literalinclude, code-block, io-code-block), +and generates a report with: + - Total code examples per page + - Breakdown by product/language + - Input vs output counts (for io-code-block) + - Tested vs untested counts + - Testable count (examples that could be tested based on product) + - Maybe testable count (javascript/shell examples without clear context) + +The CSV file should have columns for rank and URL. The first row is treated as a header. + +Example CSV format: + rank,url + 1,www.mongodb.com/docs/atlas/some-page/ + 2,www.mongodb.com/docs/manual/tutorial/install/ + +Testable products (have test infrastructure): + - C#, Go, Java (Sync), Node.js, Python, MongoDB Shell + +Filters (use --filter to focus on specific product areas): + - search: Pages with "atlas-search" or "search" in URL (excludes vector-search) + - vector-search: Pages with "vector-search" in URL + - drivers: All MongoDB driver documentation pages + - driver:: Specific driver. Testable values include: + csharp, golang, java, node, pymongo + For the full list of options, use the --list-drivers flag. + - mongosh: MongoDB Shell documentation pages + +Multiple filters can be specified to include pages matching any filter. + +Use --list-drivers to see available Driver filter options + +Output formats: + - text: Human-readable report with summary and detailed sections + - json: Machine-readable JSON output + - csv: Comma-separated values (summary by default, use --details for per-product breakdown)`, + Args: cobra.RangeArgs(0, 2), + RunE: func(cmd *cobra.Command, args []string) error { + // Handle --list-drivers flag + if listDrivers { + return runListDrivers() + } + + // Require CSV file if not listing drivers + if len(args) < 1 { + return fmt.Errorf("requires at least 1 arg(s), only received 0") + } + + csvPath := args[0] + + // Get monorepo path + var cmdLineArg string + if len(args) > 1 { + cmdLineArg = args[1] + } + monorepoPath, err := config.GetMonorepoPath(cmdLineArg) + if err != nil { + return err + } + + return runTestableCode(csvPath, monorepoPath, outputFormat, showDetails, outputFile, filters) + }, + } + + cmd.Flags().StringVarP(&outputFormat, "format", "f", "text", "Output format: text, json, or csv") + cmd.Flags().BoolVar(&showDetails, "details", false, "Show detailed per-product breakdown (for csv: one row per product per page)") + cmd.Flags().StringVarP(&outputFile, "output", "o", "", "Output file path (default: stdout)") + cmd.Flags().StringSliceVar(&filters, "filter", nil, "Filter pages by product area (search, vector-search, drivers, driver:, mongosh)") + cmd.Flags().BoolVar(&listDrivers, "list-drivers", false, "List all drivers from the Snooty Data API") + + return cmd +} + +// runListDrivers lists all drivers from the Snooty Data API. +func runListDrivers() error { + // Use the version that doesn't require a monorepo path + urlMapping, err := config.GetURLMappingWithoutMonorepo() + if err != nil { + return fmt.Errorf("failed to get URL mapping: %w", err) + } + + driverSlugs := urlMapping.GetDriverSlugs() + if len(driverSlugs) == 0 { + fmt.Println("No drivers found in the Snooty Data API.") + return nil + } + + // Build a list of driver info and sort by project name (the filter value) + type driverInfo struct { + projectName string + slug string + hasTestInfra bool + } + drivers := make([]driverInfo, 0, len(driverSlugs)) + for _, slug := range driverSlugs { + projectName := urlMapping.URLSlugToProject[slug] + drivers = append(drivers, driverInfo{ + projectName: projectName, + slug: slug, + hasTestInfra: TestableDrivers[projectName], + }) + } + // Sort alphabetically by project name + sort.Slice(drivers, func(i, j int) bool { + return drivers[i].projectName < drivers[j].projectName + }) + + fmt.Println("Available driver filters:") + fmt.Println("=========================") + fmt.Println() + fmt.Println("Use --filter driver: with any of these values:") + fmt.Println() + for _, d := range drivers { + testableMarker := "" + if d.hasTestInfra { + testableMarker = " (has test infrastructure)" + } + fmt.Printf(" --filter driver:%-20s (URL slug: %s)%s\n", d.projectName, d.slug, testableMarker) + } + fmt.Println() + fmt.Println("Drivers with test infrastructure:") + fmt.Printf(" %s\n", strings.Join(getTestableDriverNames(), ", ")) + fmt.Println() + fmt.Println("Note: mongodb-shell is not a driver. Use --filter mongosh instead.") + + return nil +} + +// runTestableCode is the main entry point for the testable-code command. +func runTestableCode(csvPath, monorepoPath, outputFormat string, showDetails bool, outputFile string, filters []string) error { + // Parse CSV file + entries, err := ParseCSV(csvPath) + if err != nil { + return fmt.Errorf("failed to parse CSV: %w", err) + } + + fmt.Fprintf(os.Stderr, "Parsed %d pages from CSV\n", len(entries)) + + // Get URL mapping early - needed for driver filters + urlMapping, err := config.GetURLMapping(monorepoPath) + if err != nil { + return fmt.Errorf("failed to get URL mapping: %w", err) + } + + // Validate filters before applying + if err := validateFilters(filters); err != nil { + return err + } + + // Apply URL filters if specified + if len(filters) > 0 { + originalCount := len(entries) + entries = filterEntries(entries, filters, urlMapping) + fmt.Fprintf(os.Stderr, "Filtered to %d pages matching filter(s): %v\n", len(entries), filters) + if len(entries) == 0 { + fmt.Fprintf(os.Stderr, "Warning: No pages matched the specified filter(s). Original count: %d\n", originalCount) + } + } + + // Load product mappings from rstspec.toml + fmt.Fprintf(os.Stderr, "Loading product mappings from rstspec.toml...\n") + mappings, err := LoadProductMappings() + if err != nil { + return fmt.Errorf("failed to load product mappings: %w", err) + } + + // Analyze each page + var reports []PageReport + for i, entry := range entries { + fmt.Fprintf(os.Stderr, "Analyzing page %d/%d: %s\n", i+1, len(entries), entry.URL) + + analysis, err := AnalyzePage(entry, urlMapping, mappings) + if err != nil { + // Log error but continue with other pages + fmt.Fprintf(os.Stderr, " Warning: %v\n", err) + reports = append(reports, PageReport{ + Rank: entry.Rank, + URL: entry.URL, + Error: err.Error(), + }) + continue + } + + report := BuildPageReport(analysis) + reports = append(reports, report) + } + + // Determine output writer + var writer *os.File + if outputFile != "" { + f, err := os.Create(outputFile) + if err != nil { + return fmt.Errorf("failed to create output file: %w", err) + } + defer f.Close() + writer = f + fmt.Fprintf(os.Stderr, "Writing output to %s\n", outputFile) + } else { + writer = os.Stdout + } + + // Output report + switch outputFormat { + case "json": + return OutputJSON(writer, reports) + case "csv": + return OutputCSV(writer, reports, showDetails) + default: + return OutputText(writer, reports) + } +} + +// filterEntries filters page entries based on the specified filters. +// Returns entries that match any of the specified filters. +func filterEntries(entries []PageEntry, filters []string, urlMapping *config.URLMapping) []PageEntry { + var filtered []PageEntry + for _, entry := range entries { + if matchesAnyFilter(entry.URL, filters, urlMapping) { + filtered = append(filtered, entry) + } + } + return filtered +} + +// matchesAnyFilter checks if a URL matches any of the specified filters. +func matchesAnyFilter(url string, filters []string, urlMapping *config.URLMapping) bool { + for _, filter := range filters { + if matchesFilter(url, filter, urlMapping) { + return true + } + } + return false +} + +// validateFilters validates that all specified filters are valid. +// Returns an error if any filter is invalid. +func validateFilters(filters []string) error { + for _, filter := range filters { + filterLower := strings.ToLower(filter) + + // Check for driver: pattern - any driver name is valid + if strings.HasPrefix(filterLower, "driver:") { + driverName := strings.TrimPrefix(filterLower, "driver:") + // mongodb-shell should use mongosh filter since it's not a driver + if driverName == "mongodb-shell" { + return fmt.Errorf("invalid filter %q: mongodb-shell is not a driver, use --filter mongosh instead", filter) + } + // Any other driver name is valid - will just return no results if not found + continue + } + + // Check known filters + switch filterLower { + case "search", "vector-search", "drivers", "mongosh": + // Valid filters + default: + return fmt.Errorf("unknown filter %q.\nValid filters: search, vector-search, drivers, driver:, mongosh\nUse --list-drivers to see available driver names", filter) + } + } + return nil +} + +// getTestableDriverNames returns a sorted list of driver names with test infrastructure. +func getTestableDriverNames() []string { + var names []string + for name := range TestableDrivers { + names = append(names, name) + } + sort.Strings(names) + return names +} + +// matchesFilter checks if a URL matches a specific filter. +// Matching is case-insensitive. +// +// Supported filters: +// - "search": matches URLs containing "atlas-search" or "search" but NOT "vector-search" +// - "vector-search": matches URLs containing "vector-search" +// - "drivers": matches all driver documentation URLs (excludes mongodb-shell) +// - "driver:": matches a specific driver by project name (e.g., driver:pymongo) +// - "mongosh": matches MongoDB Shell documentation URLs +func matchesFilter(url string, filter string, urlMapping *config.URLMapping) bool { + urlLower := strings.ToLower(url) + filterLower := strings.ToLower(filter) + + // Check for driver: pattern + if strings.HasPrefix(filterLower, "driver:") { + driverName := strings.TrimPrefix(filterLower, "driver:") + return urlMapping.IsSpecificDriverURL(url, driverName) + } + + switch filterLower { + case "search": + // Match "atlas-search" or "search" but exclude "vector-search" + if strings.Contains(urlLower, "vector-search") { + return false + } + return strings.Contains(urlLower, "atlas-search") || strings.Contains(urlLower, "search") + case "vector-search": + return strings.Contains(urlLower, "vector-search") + case "drivers": + return urlMapping.IsDriverURL(url) + case "mongosh": + return urlMapping.IsMongoshURL(url) + default: + // This shouldn't happen if validateFilters was called first + return false + } +} diff --git a/commands/report/testable-code/testable_code_test.go b/commands/report/testable-code/testable_code_test.go new file mode 100644 index 0000000..a4b0b99 --- /dev/null +++ b/commands/report/testable-code/testable_code_test.go @@ -0,0 +1,1174 @@ +// Package testablecode provides tests for the testable-code subcommand. +package testablecode + +import ( + "os" + "path/filepath" + "testing" + + "github.com/grove-platform/audit-cli/internal/config" + "github.com/grove-platform/audit-cli/internal/rst" +) + +// createMockURLMapping creates a mock URLMapping for testing filter functions. +func createMockURLMapping() *config.URLMapping { + return &config.URLMapping{ + URLSlugToProject: map[string]string{ + "drivers/go": "golang", + "drivers/node": "node", + "drivers/csharp": "csharp", + "languages/python/pymongo-driver": "pymongo", + "drivers/java/sync": "java", + "mongodb-shell": "mongodb-shell", + "mongoid": "mongoid", + "ruby-driver": "ruby-driver", + }, + DriverSlugs: []string{ + "drivers/csharp", + "drivers/go", + "drivers/java/sync", + "drivers/node", + "languages/python/pymongo-driver", + "mongoid", + "ruby-driver", + }, + } +} + +// TestParseCSV tests the CSV parsing functionality. +func TestParseCSV(t *testing.T) { + // Create a temporary CSV file with header + tempDir := t.TempDir() + csvPath := filepath.Join(tempDir, "test.csv") + + csvContent := `rank,url +1,www.mongodb.com/docs/atlas/page1/ +2,www.mongodb.com/docs/manual/page2/ +3,www.mongodb.com/docs/drivers/page3/` + + if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil { + t.Fatalf("Failed to write test CSV: %v", err) + } + + entries, err := ParseCSV(csvPath) + if err != nil { + t.Fatalf("ParseCSV failed: %v", err) + } + + if len(entries) != 3 { + t.Errorf("Expected 3 entries, got %d", len(entries)) + } + + // Check first entry + if entries[0].Rank != 1 { + t.Errorf("Expected rank 1, got %d", entries[0].Rank) + } + if entries[0].URL != "www.mongodb.com/docs/atlas/page1/" { + t.Errorf("Expected URL 'www.mongodb.com/docs/atlas/page1/', got '%s'", entries[0].URL) + } +} + +// TestParseCSVWithoutHeader tests CSV parsing without a header row. +func TestParseCSVWithoutHeader(t *testing.T) { + tempDir := t.TempDir() + csvPath := filepath.Join(tempDir, "test.csv") + + csvContent := `1,www.mongodb.com/docs/atlas/page1/ +2,www.mongodb.com/docs/manual/page2/` + + if err := os.WriteFile(csvPath, []byte(csvContent), 0644); err != nil { + t.Fatalf("Failed to write test CSV: %v", err) + } + + entries, err := ParseCSV(csvPath) + if err != nil { + t.Fatalf("ParseCSV failed: %v", err) + } + + if len(entries) != 2 { + t.Errorf("Expected 2 entries, got %d", len(entries)) + } + + if entries[0].Rank != 1 { + t.Errorf("Expected rank 1, got %d", entries[0].Rank) + } +} + +// TestParseCSVEmptyFile tests error handling for empty CSV. +func TestParseCSVEmptyFile(t *testing.T) { + tempDir := t.TempDir() + csvPath := filepath.Join(tempDir, "empty.csv") + + if err := os.WriteFile(csvPath, []byte(""), 0644); err != nil { + t.Fatalf("Failed to write test CSV: %v", err) + } + + _, err := ParseCSV(csvPath) + if err == nil { + t.Error("Expected error for empty CSV, got nil") + } +} + +// TestParseCSVMissingFile tests error handling for missing file. +func TestParseCSVMissingFile(t *testing.T) { + _, err := ParseCSV("/nonexistent/path/file.csv") + if err == nil { + t.Error("Expected error for missing file, got nil") + } +} + +// TestMatchesFilter tests the matchesFilter function. +func TestMatchesFilter(t *testing.T) { + urlMapping := createMockURLMapping() + + testCases := []struct { + name string + url string + filter string + expected bool + }{ + // Search filter tests + {"search matches atlas-search", "www.mongodb.com/docs/atlas/atlas-search/tutorial/", "search", true}, + {"search matches search in path", "www.mongodb.com/docs/manual/search/text/", "search", true}, + {"search excludes vector-search", "www.mongodb.com/docs/atlas/atlas-vector-search/tutorial/", "search", false}, + {"search case insensitive", "www.mongodb.com/docs/atlas/Atlas-Search/tutorial/", "search", true}, + {"search no match", "www.mongodb.com/docs/atlas/triggers/", "search", false}, + + // Vector-search filter tests + {"vector-search matches", "www.mongodb.com/docs/atlas/atlas-vector-search/tutorial/", "vector-search", true}, + {"vector-search case insensitive", "www.mongodb.com/docs/atlas/Vector-Search/tutorial/", "vector-search", true}, + {"vector-search no match on regular search", "www.mongodb.com/docs/atlas/atlas-search/tutorial/", "vector-search", false}, + {"vector-search no match", "www.mongodb.com/docs/atlas/triggers/", "vector-search", false}, + + // Drivers filter tests + {"drivers matches go driver", "www.mongodb.com/docs/drivers/go/current/", "drivers", true}, + {"drivers matches node driver", "www.mongodb.com/docs/drivers/node/current/", "drivers", true}, + {"drivers matches pymongo", "www.mongodb.com/docs/languages/python/pymongo-driver/current/", "drivers", true}, + {"drivers excludes mongodb-shell", "www.mongodb.com/docs/mongodb-shell/current/", "drivers", false}, + {"drivers no match", "www.mongodb.com/docs/atlas/triggers/", "drivers", false}, + + // Specific driver filter tests + {"driver:golang matches", "www.mongodb.com/docs/drivers/go/current/page/", "driver:golang", true}, + {"driver:golang no match", "www.mongodb.com/docs/drivers/node/current/", "driver:golang", false}, + {"driver:pymongo matches", "www.mongodb.com/docs/languages/python/pymongo-driver/current/", "driver:pymongo", true}, + + // Mongosh filter tests + {"mongosh matches", "www.mongodb.com/docs/mongodb-shell/current/", "mongosh", true}, + {"mongosh no match", "www.mongodb.com/docs/drivers/go/current/", "mongosh", false}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result := matchesFilter(tc.url, tc.filter, urlMapping) + if result != tc.expected { + t.Errorf("matchesFilter(%q, %q) = %v, expected %v", tc.url, tc.filter, result, tc.expected) + } + }) + } +} + +// TestMatchesAnyFilter tests the matchesAnyFilter function. +func TestMatchesAnyFilter(t *testing.T) { + urlMapping := createMockURLMapping() + + testCases := []struct { + name string + url string + filters []string + expected bool + }{ + {"matches first filter", "www.mongodb.com/docs/atlas/atlas-search/", []string{"search", "vector-search"}, true}, + {"matches second filter", "www.mongodb.com/docs/atlas/vector-search/", []string{"search", "vector-search"}, true}, + {"matches no filter", "www.mongodb.com/docs/atlas/triggers/", []string{"search", "vector-search"}, false}, + {"empty filters", "www.mongodb.com/docs/atlas/atlas-search/", []string{}, false}, + {"matches drivers filter", "www.mongodb.com/docs/drivers/go/current/", []string{"drivers"}, true}, + {"matches mongosh filter", "www.mongodb.com/docs/mongodb-shell/current/", []string{"mongosh"}, true}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result := matchesAnyFilter(tc.url, tc.filters, urlMapping) + if result != tc.expected { + t.Errorf("matchesAnyFilter(%q, %v) = %v, expected %v", tc.url, tc.filters, result, tc.expected) + } + }) + } +} + +// TestFilterEntries tests the filterEntries function. +func TestFilterEntries(t *testing.T) { + urlMapping := createMockURLMapping() + + entries := []PageEntry{ + {Rank: 1, URL: "www.mongodb.com/docs/atlas/atlas-search/tutorial/"}, + {Rank: 2, URL: "www.mongodb.com/docs/atlas/atlas-vector-search/tutorial/"}, + {Rank: 3, URL: "www.mongodb.com/docs/atlas/triggers/"}, + {Rank: 4, URL: "www.mongodb.com/docs/manual/text-search/"}, + {Rank: 5, URL: "www.mongodb.com/docs/drivers/go/current/"}, + {Rank: 6, URL: "www.mongodb.com/docs/mongodb-shell/current/"}, + } + + t.Run("filter by search", func(t *testing.T) { + filtered := filterEntries(entries, []string{"search"}, urlMapping) + if len(filtered) != 2 { + t.Errorf("Expected 2 entries, got %d", len(filtered)) + } + // Should include atlas-search and text-search, but not vector-search + for _, e := range filtered { + if e.URL == "www.mongodb.com/docs/atlas/atlas-vector-search/tutorial/" { + t.Error("Should not include vector-search URL in search filter") + } + } + }) + + t.Run("filter by vector-search", func(t *testing.T) { + filtered := filterEntries(entries, []string{"vector-search"}, urlMapping) + if len(filtered) != 1 { + t.Errorf("Expected 1 entry, got %d", len(filtered)) + } + if filtered[0].Rank != 2 { + t.Errorf("Expected rank 2, got %d", filtered[0].Rank) + } + }) + + t.Run("filter by both search filters", func(t *testing.T) { + filtered := filterEntries(entries, []string{"search", "vector-search"}, urlMapping) + if len(filtered) != 3 { + t.Errorf("Expected 3 entries, got %d", len(filtered)) + } + }) + + t.Run("no filters returns empty", func(t *testing.T) { + filtered := filterEntries(entries, []string{}, urlMapping) + if len(filtered) != 0 { + t.Errorf("Expected 0 entries with empty filter, got %d", len(filtered)) + } + }) + + t.Run("filter by drivers", func(t *testing.T) { + filtered := filterEntries(entries, []string{"drivers"}, urlMapping) + if len(filtered) != 1 { + t.Errorf("Expected 1 entry (go driver), got %d", len(filtered)) + } + if filtered[0].Rank != 5 { + t.Errorf("Expected rank 5 (go driver), got %d", filtered[0].Rank) + } + }) + + t.Run("filter by mongosh", func(t *testing.T) { + filtered := filterEntries(entries, []string{"mongosh"}, urlMapping) + if len(filtered) != 1 { + t.Errorf("Expected 1 entry (mongodb-shell), got %d", len(filtered)) + } + if filtered[0].Rank != 6 { + t.Errorf("Expected rank 6 (mongodb-shell), got %d", filtered[0].Rank) + } + }) +} + +// TestValidateFilters tests the validateFilters function. +func TestValidateFilters(t *testing.T) { + testCases := []struct { + name string + filters []string + expectError bool + errorContains string + }{ + {"valid search filter", []string{"search"}, false, ""}, + {"valid vector-search filter", []string{"vector-search"}, false, ""}, + {"valid drivers filter", []string{"drivers"}, false, ""}, + {"valid mongosh filter", []string{"mongosh"}, false, ""}, + {"valid driver:golang filter", []string{"driver:golang"}, false, ""}, + {"valid driver:pymongo filter", []string{"driver:pymongo"}, false, ""}, + {"valid driver:ruby-driver filter", []string{"driver:ruby-driver"}, false, ""}, // Any driver is valid + {"valid driver:scala filter", []string{"driver:scala"}, false, ""}, // Any driver is valid + {"valid multiple filters", []string{"search", "drivers", "mongosh"}, false, ""}, + {"invalid unknown filter", []string{"unknown"}, true, "unknown filter"}, + {"invalid mongodb-shell as driver", []string{"driver:mongodb-shell"}, true, "use --filter mongosh"}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + err := validateFilters(tc.filters) + if tc.expectError { + if err == nil { + t.Errorf("Expected error containing %q, got nil", tc.errorContains) + } else if tc.errorContains != "" && !contains(err.Error(), tc.errorContains) { + t.Errorf("Expected error containing %q, got %q", tc.errorContains, err.Error()) + } + } else { + if err != nil { + t.Errorf("Expected no error, got %v", err) + } + } + }) + } +} + +// contains checks if a string contains a substring (case-insensitive). +func contains(s, substr string) bool { + return len(s) >= len(substr) && (s == substr || len(substr) == 0 || + (len(s) > 0 && len(substr) > 0 && findSubstring(s, substr))) +} + +func findSubstring(s, substr string) bool { + for i := 0; i <= len(s)-len(substr); i++ { + if s[i:i+len(substr)] == substr { + return true + } + } + return false +} + +// TestTestableProducts tests the TestableProducts map. +func TestTestableProducts(t *testing.T) { + testCases := []struct { + product string + expected bool + }{ + {"Python", true}, + {"python", true}, + {"Node.js", true}, + {"nodejs", true}, + {"Go", true}, + {"go", true}, + {"Java", true}, + {"java", true}, + {"Java (Sync)", true}, + {"java-sync", true}, + {"C#", true}, + {"csharp", true}, + {"MongoDB Shell", true}, + {"mongosh", true}, + {"JavaScript", false}, // Not testable without context + {"Shell", false}, // Not testable without context + {"Ruby", false}, + {"PHP", false}, + {"Unknown", false}, + } + + for _, tc := range testCases { + result := TestableProducts[tc.product] + if result != tc.expected { + t.Errorf("TestableProducts[%q] = %v, expected %v", tc.product, result, tc.expected) + } + } +} + +// TestMaybeTestableProducts tests the MaybeTestableProducts map. +func TestMaybeTestableProducts(t *testing.T) { + testCases := []struct { + product string + expected bool + }{ + {"JavaScript", true}, + {"Shell", true}, + {"Python", false}, + {"Node.js", false}, + } + + for _, tc := range testCases { + result := MaybeTestableProducts[tc.product] + if result != tc.expected { + t.Errorf("MaybeTestableProducts[%q] = %v, expected %v", tc.product, result, tc.expected) + } + } +} + +// TestIsTestedPath tests the isTestedPath function. +func TestIsTestedPath(t *testing.T) { + testCases := []struct { + path string + expected bool + }{ + {"/code-examples/tested/python/example.py", true}, + {"/includes/tested/driver-examples/insert.py", true}, + {"/code-examples/untested/example.py", false}, + {"/includes/examples/insert.py", false}, + {"", false}, + } + + for _, tc := range testCases { + result := isTestedPath(tc.path) + if result != tc.expected { + t.Errorf("isTestedPath(%q) = %v, expected %v", tc.path, result, tc.expected) + } + } +} + +// TestIsTestable tests the isTestable function. +func TestIsTestable(t *testing.T) { + testCases := []struct { + product string + contentDir string + expected bool + }{ + {"Python", "pymongo-driver", true}, + {"Node.js", "node", true}, + {"Go", "golang", true}, + {"MongoDB Shell", "mongodb-shell", true}, + {"JavaScript", "node", false}, // JavaScript without context is not testable + {"Shell", "mongodb-shell", false}, + {"Ruby", "ruby-driver", false}, + {"Unknown", "", false}, + } + + for _, tc := range testCases { + result := isTestable(tc.product, tc.contentDir) + if result != tc.expected { + t.Errorf("isTestable(%q, %q) = %v, expected %v", tc.product, tc.contentDir, result, tc.expected) + } + } +} + +// TestIsMaybeTestable tests the isMaybeTestable function. +func TestIsMaybeTestable(t *testing.T) { + testCases := []struct { + product string + expected bool + }{ + {"JavaScript", true}, + {"Shell", true}, + {"Python", false}, + {"Node.js", false}, + {"MongoDB Shell", false}, + {"Unknown", false}, + } + + for _, tc := range testCases { + result := isMaybeTestable(tc.product) + if result != tc.expected { + t.Errorf("isMaybeTestable(%q) = %v, expected %v", tc.product, result, tc.expected) + } + } +} + +// TestParseComposableOptions tests the parseComposableOptions function. +func TestParseComposableOptions(t *testing.T) { + testCases := []struct { + options string + expectedLanguage string + expectedInterface string + }{ + {"language=python; interface=driver", "python", "driver"}, + {"language=nodejs", "nodejs", ""}, + {"interface=mongosh", "", "mongosh"}, + {"language=java; interface=compass", "java", "compass"}, + {"language-atlas-only=python", "python", ""}, + {"driver-lang=go", "go", ""}, + {"interface-local-only=mongosh", "", "mongosh"}, + {"", "", ""}, + {"invalid", "", ""}, + } + + for _, tc := range testCases { + ctx := parseComposableOptions(tc.options) + if ctx.Language != tc.expectedLanguage { + t.Errorf("parseComposableOptions(%q).Language = %q, expected %q", + tc.options, ctx.Language, tc.expectedLanguage) + } + if ctx.Interface != tc.expectedInterface { + t.Errorf("parseComposableOptions(%q).Interface = %q, expected %q", + tc.options, ctx.Interface, tc.expectedInterface) + } + } +} + +// TestBuildPageReport tests the BuildPageReport function. +func TestBuildPageReport(t *testing.T) { + analysis := &PageAnalysis{ + Rank: 1, + URL: "www.mongodb.com/docs/test/", + SourcePath: "/path/to/source.rst", + ContentDir: "pymongo-driver", + CodeExamples: []CodeExample{ + {Type: "literalinclude", Language: "python", Product: "Python", IsTestable: true, IsTested: true}, + {Type: "code-block", Language: "python", Product: "Python", IsTestable: true, IsTested: false}, + {Type: "io-code-block", Language: "javascript", Product: "Node.js", IsInput: true, IsTestable: true}, + {Type: "io-code-block", Language: "javascript", Product: "Node.js", IsOutput: true, IsTestable: true}, + {Type: "code-block", Language: "json", Product: "JSON", IsTestable: false}, + }, + } + + report := BuildPageReport(analysis) + + if report.Rank != 1 { + t.Errorf("Expected Rank 1, got %d", report.Rank) + } + if report.TotalExamples != 5 { + t.Errorf("Expected TotalExamples 5, got %d", report.TotalExamples) + } + if report.TotalInput != 1 { + t.Errorf("Expected TotalInput 1, got %d", report.TotalInput) + } + if report.TotalOutput != 1 { + t.Errorf("Expected TotalOutput 1, got %d", report.TotalOutput) + } + if report.TotalTested != 1 { + t.Errorf("Expected TotalTested 1, got %d", report.TotalTested) + } + if report.TotalTestable != 4 { + t.Errorf("Expected TotalTestable 4, got %d", report.TotalTestable) + } + + // Check Python stats + pythonStats, ok := report.ByProduct["Python"] + if !ok { + t.Fatal("Expected Python in ByProduct") + } + if pythonStats.TotalCount != 2 { + t.Errorf("Expected Python TotalCount 2, got %d", pythonStats.TotalCount) + } + if pythonStats.TestedCount != 1 { + t.Errorf("Expected Python TestedCount 1, got %d", pythonStats.TestedCount) + } + + // Check Node.js stats + nodeStats, ok := report.ByProduct["Node.js"] + if !ok { + t.Fatal("Expected Node.js in ByProduct") + } + if nodeStats.TotalCount != 2 { + t.Errorf("Expected Node.js TotalCount 2, got %d", nodeStats.TotalCount) + } + if nodeStats.InputCount != 1 { + t.Errorf("Expected Node.js InputCount 1, got %d", nodeStats.InputCount) + } + if nodeStats.OutputCount != 1 { + t.Errorf("Expected Node.js OutputCount 1, got %d", nodeStats.OutputCount) + } +} + +// TestEscapeCSV tests the escapeCSV function. +func TestEscapeCSV(t *testing.T) { + testCases := []struct { + input string + expected string + }{ + {"simple", "simple"}, + {"with,comma", `"with,comma"`}, + {`with"quote`, `"with""quote"`}, + {"with\nnewline", `"with` + "\n" + `newline"`}, + {"", ""}, + {"normal text", "normal text"}, + } + + for _, tc := range testCases { + result := escapeCSV(tc.input) + if result != tc.expected { + t.Errorf("escapeCSV(%q) = %q, expected %q", tc.input, result, tc.expected) + } + } +} + +// TestIsMongoShellContext tests the isMongoShellContext function. +func TestIsMongoShellContext(t *testing.T) { + testCases := []struct { + contentDir string + contexts []CodeContext + expected bool + }{ + {"mongodb-shell", nil, true}, + {"mongodb-shell", []CodeContext{}, true}, + {"pymongo-driver", []CodeContext{{Interface: "mongosh"}}, true}, + {"node", []CodeContext{{Interface: "driver"}}, false}, + {"manual", []CodeContext{}, false}, + {"", nil, false}, + } + + for _, tc := range testCases { + result := isMongoShellContext(tc.contentDir, tc.contexts) + if result != tc.expected { + t.Errorf("isMongoShellContext(%q, %v) = %v, expected %v", + tc.contentDir, tc.contexts, result, tc.expected) + } + } +} + +// TestDetermineProduct tests the determineProduct function. +func TestDetermineProduct(t *testing.T) { + // Create mock mappings + mappings := &ProductMappings{ + DriversTabIDToProduct: map[string]string{ + "python": "Python", + "nodejs": "Node.js", + "java-sync": "Java (Sync)", + }, + ComposableLanguageToProduct: map[string]string{ + "python": "Python", + "nodejs": "Node.js", + "go": "Go", + }, + ComposableInterfaceToProduct: map[string]string{ + "mongosh": "MongoDB Shell", + "driver": "Driver", + "compass": "Compass", + }, + } + + testCases := []struct { + name string + language string + contentDir string + contexts []CodeContext + expected string + }{ + // Non-driver languages bypass context + {"bash bypasses context", "bash", "pymongo-driver", []CodeContext{{Language: "python"}}, "Shell"}, + {"json bypasses context", "json", "node", []CodeContext{{TabID: "nodejs"}}, "JSON"}, + {"yaml bypasses context", "yaml", "golang", nil, "YAML"}, + {"text bypasses context", "text", "manual", nil, "Text"}, + + // MongoDB Shell context + {"shell in mongosh dir", "shell", "mongodb-shell", nil, "MongoDB Shell"}, + {"javascript in mongosh context", "javascript", "", []CodeContext{{Interface: "mongosh"}}, "MongoDB Shell"}, + {"shell outside mongosh", "shell", "manual", nil, "Shell"}, + + // Tab context + {"python tab", "python", "", []CodeContext{{TabID: "python"}}, "Python"}, + {"nodejs tab", "javascript", "", []CodeContext{{TabID: "nodejs"}}, "Node.js"}, + + // Composable language context + {"go composable", "go", "", []CodeContext{{Language: "go"}}, "Go"}, + + // Content directory fallback + {"pymongo content dir", "python", "pymongo-driver", nil, "Python"}, + {"node content dir", "javascript", "node", nil, "Node.js"}, + + // Language fallback + {"python language", "python", "", nil, "Python"}, + {"ruby language", "ruby", "", nil, "Ruby"}, + + // Unknown + {"empty language", "", "", nil, "Unknown"}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result := determineProduct(tc.language, tc.contentDir, tc.contexts, mappings) + if result != tc.expected { + t.Errorf("determineProduct(%q, %q, %v) = %q, expected %q", + tc.language, tc.contentDir, tc.contexts, result, tc.expected) + } + }) + } +} + +// TestGetLanguage tests the getLanguage function. +func TestGetLanguage(t *testing.T) { + testCases := []struct { + name string + options map[string]string + defaultLang string + expected string + }{ + {"language option", map[string]string{"language": "python"}, "javascript", "python"}, + {"default lang", map[string]string{}, "javascript", "javascript"}, + {"empty default", map[string]string{}, "", "undefined"}, + {"empty language option", map[string]string{"language": ""}, "go", "go"}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + directive := rst.Directive{Options: tc.options} + result := getLanguage(directive, tc.defaultLang) + if result != tc.expected { + t.Errorf("getLanguage(%v, %q) = %q, expected %q", + tc.options, tc.defaultLang, result, tc.expected) + } + }) + } +} + +// TestProcessDirective tests the processDirective function. +func TestProcessDirective(t *testing.T) { + mappings := &ProductMappings{ + DriversTabIDToProduct: map[string]string{"python": "Python", "nodejs": "Node.js"}, + ComposableLanguageToProduct: map[string]string{"python": "Python", "nodejs": "Node.js"}, + ComposableInterfaceToProduct: map[string]string{"mongosh": "MongoDB Shell"}, + } + + testCases := []struct { + name string + directive rst.Directive + contentDir string + contexts []CodeContext + expectedCount int + expectedType string + expectedLang string + expectedProduct string + }{ + { + name: "literalinclude with tested path", + directive: rst.Directive{ + Type: rst.LiteralInclude, + Argument: "/code-examples/tested/python/example.py", + Options: map[string]string{"language": "python"}, + }, + contentDir: "pymongo-driver", + contexts: nil, + expectedCount: 1, + expectedType: "literalinclude", + expectedLang: "python", + expectedProduct: "Python", + }, + { + name: "code-block with language argument", + directive: rst.Directive{ + Type: rst.CodeBlock, + Argument: "javascript", + Options: map[string]string{}, + }, + contentDir: "node", + contexts: nil, + expectedCount: 1, + expectedType: "code-block", + expectedLang: "javascript", + expectedProduct: "Node.js", + }, + { + name: "code-block with json bypasses context", + directive: rst.Directive{ + Type: rst.CodeBlock, + Argument: "json", + Options: map[string]string{}, + }, + contentDir: "pymongo-driver", + contexts: []CodeContext{{Language: "python"}}, + expectedCount: 1, + expectedType: "code-block", + expectedLang: "json", + expectedProduct: "JSON", + }, + { + name: "io-code-block with input and output", + directive: rst.Directive{ + Type: rst.IoCodeBlock, + Options: map[string]string{"language": "python"}, + InputDirective: &rst.SubDirective{ + Argument: "/code-examples/input.py", + Options: map[string]string{"language": "python"}, + }, + OutputDirective: &rst.SubDirective{ + Argument: "/code-examples/output.txt", + Options: map[string]string{"language": "text"}, + }, + }, + contentDir: "pymongo-driver", + contexts: nil, + expectedCount: 2, // input + output + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + examples := processDirective(tc.directive, "/test/source.rst", tc.contentDir, tc.contexts, mappings) + + if len(examples) != tc.expectedCount { + t.Errorf("Expected %d examples, got %d", tc.expectedCount, len(examples)) + return + } + + if tc.expectedCount > 0 && tc.expectedType != "" { + if examples[0].Type != tc.expectedType { + t.Errorf("Expected type %q, got %q", tc.expectedType, examples[0].Type) + } + } + + if tc.expectedLang != "" && examples[0].Language != tc.expectedLang { + t.Errorf("Expected language %q, got %q", tc.expectedLang, examples[0].Language) + } + + if tc.expectedProduct != "" && examples[0].Product != tc.expectedProduct { + t.Errorf("Expected product %q, got %q", tc.expectedProduct, examples[0].Product) + } + }) + } +} + +// TestProcessDirectiveIOCodeBlock tests io-code-block processing in detail. +func TestProcessDirectiveIOCodeBlock(t *testing.T) { + mappings := &ProductMappings{ + DriversTabIDToProduct: map[string]string{}, + ComposableLanguageToProduct: map[string]string{}, + ComposableInterfaceToProduct: map[string]string{}, + } + + directive := rst.Directive{ + Type: rst.IoCodeBlock, + Options: map[string]string{}, + InputDirective: &rst.SubDirective{ + Argument: "/code-examples/tested/python/input.py", + Options: map[string]string{"language": "python"}, + }, + OutputDirective: &rst.SubDirective{ + Argument: "/code-examples/output.json", + Options: map[string]string{"language": "json"}, + }, + } + + examples := processDirective(directive, "/test/source.rst", "pymongo-driver", nil, mappings) + + if len(examples) != 2 { + t.Fatalf("Expected 2 examples, got %d", len(examples)) + } + + // Check input + input := examples[0] + if !input.IsInput { + t.Error("Expected first example to be input") + } + if input.IsOutput { + t.Error("Expected first example to not be output") + } + if !input.IsTested { + t.Error("Expected input to be tested (path contains /tested/)") + } + if input.Language != "python" { + t.Errorf("Expected input language 'python', got %q", input.Language) + } + + // Check output + output := examples[1] + if !output.IsOutput { + t.Error("Expected second example to be output") + } + if output.IsInput { + t.Error("Expected second example to not be input") + } + if output.IsTested { + t.Error("Expected output to not be tested") + } + if output.Language != "json" { + t.Errorf("Expected output language 'json', got %q", output.Language) + } +} + +// TestParseFileContexts tests the parseFileContexts function. +func TestParseFileContexts(t *testing.T) { + testDataDir := filepath.Join("..", "..", "..", "testdata", "testable-code-test", "content", "test-project", "source") + + t.Run("file with tabs", func(t *testing.T) { + filePath := filepath.Join(testDataDir, "with-tabs.rst") + contexts, err := parseFileContexts(filePath) + if err != nil { + t.Fatalf("parseFileContexts failed: %v", err) + } + + // Should find tab contexts + if len(contexts) == 0 { + t.Error("Expected to find contexts") + } + + // Check that we found at least one tabid + foundTabID := false + for _, ctx := range contexts { + if ctx.TabID != "" { + foundTabID = true + break + } + } + if !foundTabID { + t.Error("Expected to find at least one TabID context") + } + }) + + t.Run("file with composable tutorial", func(t *testing.T) { + filePath := filepath.Join(testDataDir, "with-selected-content.rst") + contexts, err := parseFileContexts(filePath) + if err != nil { + t.Fatalf("parseFileContexts failed: %v", err) + } + + // Should find composable context + if len(contexts) == 0 { + t.Error("Expected to find contexts") + } + + // Check that we found language or interface from composable options + foundComposable := false + for _, ctx := range contexts { + if ctx.Language != "" || ctx.Interface != "" { + foundComposable = true + break + } + } + if !foundComposable { + t.Error("Expected to find composable context with language or interface") + } + }) + + t.Run("simple file without context", func(t *testing.T) { + filePath := filepath.Join(testDataDir, "simple-code.rst") + contexts, err := parseFileContexts(filePath) + if err != nil { + t.Fatalf("parseFileContexts failed: %v", err) + } + + // Should return at least one empty context + if len(contexts) == 0 { + t.Error("Expected at least one context (even if empty)") + } + }) +} + +// TestParseSelectedContentBlocks tests the parseSelectedContentBlocks function. +func TestParseSelectedContentBlocks(t *testing.T) { + testDataDir := filepath.Join("..", "..", "..", "testdata", "testable-code-test", "content", "test-project", "source") + filePath := filepath.Join(testDataDir, "with-selected-content.rst") + + result, err := parseSelectedContentBlocks(filePath) + if err != nil { + t.Fatalf("parseSelectedContentBlocks failed: %v", err) + } + + // The function should map include paths to their selections + // We expect to find mappings for the includes in selected-content blocks + if len(result) == 0 { + t.Log("No selected-content mappings found (this may be expected if includes don't resolve)") + } + + // Check that any found mappings have valid selection values + for path, selection := range result { + if selection == "" { + t.Errorf("Empty selection for path %q", path) + } + t.Logf("Found mapping: %s -> %s", path, selection) + } +} + +// TestCollectCodeExamples tests the collectCodeExamples function. +func TestCollectCodeExamples(t *testing.T) { + testDataDir := filepath.Join("..", "..", "..", "testdata", "testable-code-test", "content", "test-project", "source") + + mappings := &ProductMappings{ + DriversTabIDToProduct: map[string]string{"python": "Python", "nodejs": "Node.js", "java-sync": "Java (Sync)"}, + ComposableLanguageToProduct: map[string]string{"python": "Python", "nodejs": "Node.js", "go": "Go"}, + ComposableInterfaceToProduct: map[string]string{"mongosh": "MongoDB Shell", "driver": "Driver"}, + } + + t.Run("simple code file", func(t *testing.T) { + filePath := filepath.Join(testDataDir, "simple-code.rst") + visited := make(map[string]bool) + + examples, err := collectCodeExamples(filePath, "test-project", visited, mappings) + if err != nil { + t.Fatalf("collectCodeExamples failed: %v", err) + } + + // Should find 4 code blocks: python, javascript, json, sh + if len(examples) != 4 { + t.Errorf("Expected 4 examples, got %d", len(examples)) + } + + // Check that we found the expected languages + languages := make(map[string]bool) + for _, ex := range examples { + languages[ex.Language] = true + } + + expectedLangs := []string{"python", "javascript", "json", "sh"} + for _, lang := range expectedLangs { + if !languages[lang] { + t.Errorf("Expected to find language %q", lang) + } + } + }) + + t.Run("file with tabs", func(t *testing.T) { + filePath := filepath.Join(testDataDir, "with-tabs.rst") + visited := make(map[string]bool) + + examples, err := collectCodeExamples(filePath, "test-project", visited, mappings) + if err != nil { + t.Fatalf("collectCodeExamples failed: %v", err) + } + + // Should find 3 code blocks: python, javascript, java + if len(examples) < 3 { + t.Errorf("Expected at least 3 examples, got %d", len(examples)) + } + }) +} + +// TestMergeProjectComposables tests the MergeProjectComposables function. +func TestMergeProjectComposables(t *testing.T) { + testDataDir := filepath.Join("..", "..", "..", "testdata", "testable-code-test", "content", "test-project", "source") + + baseMappings := &ProductMappings{ + DriversTabIDToProduct: map[string]string{"python": "Python"}, + ComposableLanguageToProduct: map[string]string{"python": "Python"}, + ComposableInterfaceToProduct: map[string]string{"driver": "Driver"}, + } + + t.Run("merges project composables", func(t *testing.T) { + sourcePath := filepath.Join(testDataDir, "simple-code.rst") + absPath, _ := filepath.Abs(sourcePath) + + merged := MergeProjectComposables(baseMappings, absPath) + + // Should have base mappings + if merged.DriversTabIDToProduct["python"] != "Python" { + t.Error("Expected base mapping for python tab") + } + + // Should have project-specific composables merged in + // The test project defines nodejs and go in language composable + if merged.ComposableLanguageToProduct["nodejs"] != "Node.js" { + t.Error("Expected project composable for nodejs") + } + if merged.ComposableLanguageToProduct["go"] != "Go" { + t.Error("Expected project composable for go") + } + + // Should have interface composables + if merged.ComposableInterfaceToProduct["mongosh"] != "MongoDB Shell" { + t.Error("Expected project composable for mongosh interface") + } + }) + + t.Run("returns base mappings for nonexistent path", func(t *testing.T) { + merged := MergeProjectComposables(baseMappings, "/nonexistent/path/file.rst") + + // Should return base mappings unchanged + if merged.DriversTabIDToProduct["python"] != "Python" { + t.Error("Expected base mapping to be preserved") + } + }) +} + +// TestAnalyzePage tests the AnalyzePage function. +// Note: AnalyzePage requires a URLMapping which involves URL resolution. +// The URL resolution expects .txt files (MongoDB docs monorepo format). +// We create .txt copies of our test .rst files to test the full integration. +func TestAnalyzePage(t *testing.T) { + testDataDir := filepath.Join("..", "..", "..", "testdata", "testable-code-test", "content", "test-project", "source") + absTestDataDir, _ := filepath.Abs(testDataDir) + monorepoPath := filepath.Join(absTestDataDir, "..", "..", "..") + + // Create .txt copies of our .rst test files for URL resolution + rstFiles := []string{"simple-code.rst", "with-tabs.rst", "with-selected-content.rst"} + for _, rstFile := range rstFiles { + rstPath := filepath.Join(absTestDataDir, rstFile) + txtPath := filepath.Join(absTestDataDir, rstFile[:len(rstFile)-4]+".txt") + content, err := os.ReadFile(rstPath) + if err != nil { + t.Fatalf("Failed to read %s: %v", rstPath, err) + } + if err := os.WriteFile(txtPath, content, 0644); err != nil { + t.Fatalf("Failed to write %s: %v", txtPath, err) + } + defer os.Remove(txtPath) + } + + mappings := &ProductMappings{ + DriversTabIDToProduct: map[string]string{"python": "Python", "nodejs": "Node.js", "java-sync": "Java (Sync)"}, + ComposableLanguageToProduct: map[string]string{"python": "Python", "nodejs": "Node.js", "go": "Go"}, + ComposableInterfaceToProduct: map[string]string{"mongosh": "MongoDB Shell", "driver": "Driver"}, + } + + // Create a mock URLMapping that maps test URLs to our test files + urlMapping := &config.URLMapping{ + URLSlugToProject: map[string]string{ + "test-project": "test-project", + }, + ProjectToContentDir: map[string]string{ + "test-project": "test-project", + }, + ProjectBranches: map[string][]string{ + "test-project": {"current"}, + }, + MonorepoPath: monorepoPath, + } + + t.Run("analyzes simple code file", func(t *testing.T) { + entry := PageEntry{ + Rank: 1, + URL: "https://www.mongodb.com/docs/test-project/current/simple-code/", + } + + analysis, err := AnalyzePage(entry, urlMapping, mappings) + if err != nil { + t.Fatalf("AnalyzePage failed: %v", err) + } + + // Should find 4 code examples + if len(analysis.CodeExamples) != 4 { + t.Errorf("Expected 4 code examples, got %d", len(analysis.CodeExamples)) + } + + // Check that products are assigned + for _, ex := range analysis.CodeExamples { + if ex.Product == "" || ex.Product == "Unknown" { + t.Errorf("Expected product to be assigned for language %q, got %q", ex.Language, ex.Product) + } + } + }) + + t.Run("analyzes file with tabs", func(t *testing.T) { + entry := PageEntry{ + Rank: 2, + URL: "https://www.mongodb.com/docs/test-project/current/with-tabs/", + } + + analysis, err := AnalyzePage(entry, urlMapping, mappings) + if err != nil { + t.Fatalf("AnalyzePage failed: %v", err) + } + + // Should find at least 3 code examples (one per tab) + if len(analysis.CodeExamples) < 3 { + t.Errorf("Expected at least 3 code examples, got %d", len(analysis.CodeExamples)) + } + + // Check that products are assigned based on tab context + products := make(map[string]bool) + for _, ex := range analysis.CodeExamples { + products[ex.Product] = true + } + + // Should have Python, Node.js, and Java products + expectedProducts := []string{"Python", "Node.js", "Java (Sync)"} + for _, prod := range expectedProducts { + if !products[prod] { + t.Errorf("Expected to find product %q", prod) + } + } + }) + + t.Run("returns error for nonexistent URL", func(t *testing.T) { + entry := PageEntry{ + Rank: 99, + URL: "https://www.mongodb.com/docs/nonexistent-project/current/page/", + } + + _, err := AnalyzePage(entry, urlMapping, mappings) + if err == nil { + t.Error("Expected error for nonexistent URL") + } + }) + + t.Run("analyzes file with composable tutorial", func(t *testing.T) { + entry := PageEntry{ + Rank: 3, + URL: "https://www.mongodb.com/docs/test-project/current/with-selected-content/", + } + + analysis, err := AnalyzePage(entry, urlMapping, mappings) + if err != nil { + t.Fatalf("AnalyzePage failed: %v", err) + } + + // Should find code examples from selected-content blocks + if len(analysis.CodeExamples) == 0 { + t.Error("Expected to find code examples in composable tutorial") + } + + // Log what we found for debugging + for _, ex := range analysis.CodeExamples { + t.Logf("Found: type=%s, lang=%s, product=%s, source=%s", + ex.Type, ex.Language, ex.Product, ex.SourceFile) + } + }) +} + diff --git a/commands/report/testable-code/types.go b/commands/report/testable-code/types.go new file mode 100644 index 0000000..801e2d2 --- /dev/null +++ b/commands/report/testable-code/types.go @@ -0,0 +1,289 @@ +// Package testablecode provides the testable-code subcommand for the report command. +package testablecode + +import ( + "fmt" + "sync" + + "github.com/grove-platform/audit-cli/internal/rst" + "github.com/grove-platform/audit-cli/internal/snooty" +) + +// PageEntry represents a single page from the analytics CSV. +type PageEntry struct { + Rank int + URL string +} + +// CodeExample represents a single code example found in a page. +type CodeExample struct { + // Type is the directive type: literalinclude, code-block, code, io-code-block + Type string + // Language is the programming language (from :language: option or argument, or file extension) + Language string + // Product is the MongoDB product context (from tabs, composables, or content directory) + Product string + // IsInput indicates if this is an input block (for io-code-block) + IsInput bool + // IsOutput indicates if this is an output block (for io-code-block) + IsOutput bool + // IsTested indicates if the code example references tested code + IsTested bool + // IsTestable indicates if the code example could be tested (based on product) + IsTestable bool + // IsMaybeTestable indicates the example uses a language that COULD be testable + // (javascript, shell) but lacks proper context to determine definitively. + // These are grey-area examples that may need manual review. + IsMaybeTestable bool + // FilePath is the path to the included file (for literalinclude or io-code-block) + FilePath string + // SourceFile is the RST file containing this code example + SourceFile string +} + +// PageAnalysis represents the analysis results for a single page. +type PageAnalysis struct { + Rank int + URL string + SourcePath string + ContentDir string + Error string // Non-empty if page could not be analyzed + CodeExamples []CodeExample +} + +// ProductStats holds statistics for a single product/language. +type ProductStats struct { + Product string + TotalCount int + InputCount int + OutputCount int + TestedCount int + TestableCount int + MaybeTestableCount int +} + +// PageReport holds the complete analysis for a page with aggregated stats. +type PageReport struct { + Rank int + URL string + SourcePath string + ContentDir string + Error string + TotalExamples int + TotalInput int + TotalOutput int + TotalTested int + TotalTestable int + TotalMaybeTestable int + ByProduct map[string]*ProductStats +} + +// TestableProducts lists the products that have test infrastructure. +// +// WHY THIS EXISTS: +// MongoDB has automated testing infrastructure for code examples in certain driver +// documentation sets. This map identifies which products have that infrastructure, +// so we can report on how many code examples on a page COULD be tested. +// +// WHY RAW LANGUAGES ARE EXCLUDED: +// Raw language values like "javascript" and "shell" are intentionally excluded because +// many code examples use these languages without being actual Driver/Shell examples. +// For example: +// - A "javascript" code block might be a browser snippet, not a Node.js driver example +// - A "shell" code block might be a bash command, not a MongoDB Shell example +// +// Only properly contextualized examples are considered testable: +// - Examples in driver content directories (e.g., content/pymongo-driver) +// - Examples within driver tab sets (.. tabs-drivers:: with :tabid:) +// - Examples within composable tutorials with language/interface options +// +// The map includes both human-readable names (e.g., "Python") and internal IDs +// (e.g., "python") to handle both display names and raw values from rstspec.toml. +var TestableProducts = map[string]bool{ + "C#": true, + "csharp": true, + "Go": true, + "go": true, + "Java": true, + "Java (Sync)": true, + "java": true, + "java-sync": true, + "Node.js": true, + "nodejs": true, + "Python": true, + "python": true, + "MongoDB Shell": true, + "mongosh": true, +} + +// MaybeTestableProducts lists products that COULD be testable but lack proper context. +// +// These are "grey area" examples where the language (javascript, shell) could represent +// testable code (Node.js driver, MongoDB Shell) but could also be non-testable content +// (other JavaScript, bash commands, output examples). +// +// Examples are marked as "maybe testable" when: +// - Language is "javascript" or "js" but not in a Node.js driver or MongoDB Shell context +// - Language is "shell" but not in a MongoDB Shell context +// +// These examples need manual review to determine if they should be tested. +var MaybeTestableProducts = map[string]bool{ + "JavaScript": true, + "Shell": true, +} + +// TestableDrivers lists the driver project names that have test infrastructure. +// Used to highlight which drivers have test infrastructure in --list-drivers output. +// The keys are the Snooty project names (used in URLs and internally). +var TestableDrivers = map[string]bool{ + "csharp": true, // C# Driver + "golang": true, // Go Driver + "java": true, // Java Sync Driver + "node": true, // Node.js Driver + "pymongo": true, // Python Driver + // Note: mongodb-shell has test infrastructure but is not a driver (use --filter mongosh) +} + +// ProductMappings holds the mappings from rstspec.toml for resolving +// tab IDs and composable options to human-readable product names. +// +// WHY RUNTIME LOADING FROM rstspec.toml: +// The rstspec.toml file in the snooty-parser repository is the canonical source +// of truth for all RST directive definitions, including tabs and composables. +// By loading these mappings at runtime (with caching), we ensure: +// 1. Mappings stay in sync with the actual documentation build system +// 2. New drivers/languages are automatically supported without code changes +// 3. We don't have to maintain duplicate hardcoded mappings +// +// The mappings are cached for 24 hours to avoid repeated network requests. +// See internal/rst/rstspec.go for the caching implementation. +type ProductMappings struct { + // DriversTabIDToProduct maps driver tab IDs to product names. + // Example: "python" → "Python", "java-sync" → "Java (Sync)" + // Loaded from [tabs.drivers] in rstspec.toml. + DriversTabIDToProduct map[string]string + + // ComposableLanguageToProduct maps composable language IDs to product names. + // Example: "nodejs" → "Node.js", "csharp" → "C#" + // Loaded from [[composables]] where id="language" in rstspec.toml. + ComposableLanguageToProduct map[string]string + + // ComposableInterfaceToProduct maps composable interface IDs to product names. + // Example: "mongosh" → "MongoDB Shell", "compass" → "Compass" + // Loaded from [[composables]] where id="interface" in rstspec.toml. + ComposableInterfaceToProduct map[string]string +} + +// LoadProductMappings fetches rstspec.toml and builds the product mappings. +// +// This function fetches the canonical rstspec.toml from the snooty-parser repository +// (with 24-hour caching) and extracts the mappings for: +// - Driver tabs: [tabs.drivers] section +// - Language composables: [[composables]] where id="language" +// - Interface composables: [[composables]] where id="interface" +// +// If the network is unavailable, it falls back to an expired cache if available. +func LoadProductMappings() (*ProductMappings, error) { + rstspec, err := rst.FetchRstspec() + if err != nil { + return nil, fmt.Errorf("failed to fetch rstspec.toml: %w", err) + } + + mappings := &ProductMappings{ + DriversTabIDToProduct: rstspec.BuildTabIDToTitleMap("drivers"), + ComposableLanguageToProduct: rstspec.BuildComposableIDToTitleMap("language"), + ComposableInterfaceToProduct: rstspec.BuildComposableIDToTitleMap("interface"), + } + + return mappings, nil +} + +// snootyCache caches parsed snooty.toml files by their path to avoid re-parsing. +var snootyCache = struct { + sync.RWMutex + configs map[string]*snooty.Config +}{configs: make(map[string]*snooty.Config)} + +// MergeProjectComposables creates a copy of the base mappings and merges in +// composables from the project's snooty.toml file. +// +// This function: +// 1. Finds the project's snooty.toml by walking up from the source file path +// 2. Parses the snooty.toml (with caching to avoid re-parsing for each page) +// 3. Merges any "language" or "interface" composables into the mappings +// +// Project-specific composables take precedence over rstspec.toml definitions, +// allowing projects like Atlas to define custom composables that override defaults. +// +// Parameters: +// - baseMappings: The base mappings loaded from rstspec.toml +// - sourcePath: Absolute path to the source file being analyzed +// +// Returns: +// - *ProductMappings: A new ProductMappings with project composables merged in +func MergeProjectComposables(baseMappings *ProductMappings, sourcePath string) *ProductMappings { + // Find the project's snooty.toml + snootyPath, err := snooty.FindProjectSnootyTOML(sourcePath) + if err != nil || snootyPath == "" { + // No snooty.toml found, return base mappings as-is + return baseMappings + } + + // Check cache first + snootyCache.RLock() + cachedConfig, found := snootyCache.configs[snootyPath] + snootyCache.RUnlock() + + var config *snooty.Config + if found { + config = cachedConfig + } else { + // Parse the snooty.toml + config, err = snooty.ParseFile(snootyPath) + if err != nil { + // Failed to parse, return base mappings + return baseMappings + } + + // Cache the parsed config + snootyCache.Lock() + snootyCache.configs[snootyPath] = config + snootyCache.Unlock() + } + + // If no composables defined, return base mappings + if len(config.Composables) == 0 { + return baseMappings + } + + // Create a copy of the base mappings + merged := &ProductMappings{ + DriversTabIDToProduct: make(map[string]string), + ComposableLanguageToProduct: make(map[string]string), + ComposableInterfaceToProduct: make(map[string]string), + } + + // Copy base mappings + for k, v := range baseMappings.DriversTabIDToProduct { + merged.DriversTabIDToProduct[k] = v + } + for k, v := range baseMappings.ComposableLanguageToProduct { + merged.ComposableLanguageToProduct[k] = v + } + for k, v := range baseMappings.ComposableInterfaceToProduct { + merged.ComposableInterfaceToProduct[k] = v + } + + // Merge project-specific composables (project takes precedence) + projectLanguage := snooty.BuildComposableIDToTitleMap(config.Composables, "language") + for k, v := range projectLanguage { + merged.ComposableLanguageToProduct[k] = v + } + + projectInterface := snooty.BuildComposableIDToTitleMap(config.Composables, "interface") + for k, v := range projectInterface { + merged.ComposableInterfaceToProduct[k] = v + } + + return merged +} diff --git a/internal/config/url_mapping.go b/internal/config/url_mapping.go new file mode 100644 index 0000000..41290a2 --- /dev/null +++ b/internal/config/url_mapping.go @@ -0,0 +1,761 @@ +// Package config provides configuration management for audit-cli. +// This file handles URL-to-source-file mapping for MongoDB documentation. + +package config + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "os" + "path/filepath" + "regexp" + "strings" + "time" + + "github.com/BurntSushi/toml" +) + +// SnootyDataAPIURL is the endpoint for fetching project metadata. +const SnootyDataAPIURL = "https://snooty-data-api.mongodb.com/prod/projects" + +// CacheTTL is the time-to-live for the cached URL mapping (24 hours). +const CacheTTL = 24 * time.Hour + +// CacheDir is the directory for storing cache files. +const CacheDir = ".audit-cli" + +// CacheFileName is the name of the URL mapping cache file. +const CacheFileName = "url-mapping-cache.json" + +// URLMappingCache represents the cached URL mapping data. +type URLMappingCache struct { + Timestamp time.Time `json:"timestamp"` + Mapping map[string]string `json:"mapping"` // URL slug -> snooty project name + Branches map[string][]string `json:"branches"` // project name -> list of version slugs + DriverSlugs []string `json:"driver_slugs"` // URL slugs for driver documentation +} + +// SnootyAPIResponse represents the response from the Snooty Data API. +type SnootyAPIResponse struct { + Data []SnootyProject `json:"data"` +} + +// SnootyProject represents a project in the Snooty Data API response. +type SnootyProject struct { + Project string `json:"project"` + DisplayName string `json:"displayName"` + RepoName string `json:"repoName"` + Branches []SnootyBranch `json:"branches"` +} + +// SnootyBranch represents a branch in a Snooty project. +type SnootyBranch struct { + GitBranchName string `json:"gitBranchName"` + Label string `json:"label"` + Active any `json:"active"` // Can be bool or string "true" + FullURL string `json:"fullUrl"` + IsStableBranch bool `json:"isStableBranch"` +} + +// SnootyToml represents the relevant fields from a snooty.toml file. +type SnootyToml struct { + Name string `toml:"name"` +} + +// URLMapping provides URL-to-source-file resolution. +type URLMapping struct { + // URLSlugToProject maps URL slugs to snooty project names + URLSlugToProject map[string]string + // ProjectToContentDir maps snooty project names to content directories + ProjectToContentDir map[string]string + // ProjectBranches maps project names to available version slugs + ProjectBranches map[string][]string + // DriverSlugs contains URL slugs for driver documentation (excludes mongodb-shell) + DriverSlugs []string + // MonorepoPath is the path to the docs monorepo + MonorepoPath string +} + +// getCachePath returns the path to the cache file. +func getCachePath() (string, error) { + homeDir, err := os.UserHomeDir() + if err != nil { + return "", fmt.Errorf("failed to get home directory: %w", err) + } + return filepath.Join(homeDir, CacheDir, CacheFileName), nil +} + +// loadCache loads the URL mapping from the cache file. +func loadCache() (*URLMappingCache, error) { + cachePath, err := getCachePath() + if err != nil { + return nil, err + } + + data, err := os.ReadFile(cachePath) + if err != nil { + return nil, err + } + + var cache URLMappingCache + if err := json.Unmarshal(data, &cache); err != nil { + return nil, fmt.Errorf("failed to parse cache: %w", err) + } + + // Check if cache is expired + if time.Since(cache.Timestamp) > CacheTTL { + return nil, fmt.Errorf("cache expired") + } + + return &cache, nil +} + +// saveCache saves the URL mapping to the cache file. +func saveCache(cache *URLMappingCache) error { + cachePath, err := getCachePath() + if err != nil { + return err + } + + // Ensure cache directory exists + cacheDir := filepath.Dir(cachePath) + if err := os.MkdirAll(cacheDir, 0755); err != nil { + return fmt.Errorf("failed to create cache directory: %w", err) + } + + data, err := json.MarshalIndent(cache, "", " ") + if err != nil { + return fmt.Errorf("failed to marshal cache: %w", err) + } + + if err := os.WriteFile(cachePath, data, 0644); err != nil { + return fmt.Errorf("failed to write cache: %w", err) + } + + return nil +} + +// isActive checks if a branch is active (handles both bool and string "true"). +func isActive(active any) bool { + switch v := active.(type) { + case bool: + return v + case string: + return v == "true" + default: + return false + } +} + +// fetchFromAPI fetches URL mapping from the Snooty Data API. +func fetchFromAPI() (*URLMappingCache, error) { + resp, err := http.Get(SnootyDataAPIURL) + if err != nil { + return nil, fmt.Errorf("failed to fetch from API: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("API returned status %d", resp.StatusCode) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read API response: %w", err) + } + + var apiResp SnootyAPIResponse + if err := json.Unmarshal(body, &apiResp); err != nil { + return nil, fmt.Errorf("failed to parse API response: %w", err) + } + + cache := &URLMappingCache{ + Timestamp: time.Now(), + Mapping: make(map[string]string), + Branches: make(map[string][]string), + DriverSlugs: []string{}, + } + + // Regex to extract URL slug from fullUrl + slugRegex := regexp.MustCompile(`/docs/(.+?)/?$`) + + // Track driver slugs using a map to avoid duplicates + driverSlugSet := make(map[string]bool) + + for _, project := range apiResp.Data { + var versionSlugs []string + var baseSlugForProject string + + for _, branch := range project.Branches { + if !isActive(branch.Active) { + continue + } + + match := slugRegex.FindStringSubmatch(branch.FullURL) + if match == nil { + continue + } + + fullPath := match[1] + // Extract base slug (without version) and version + // e.g., "drivers/go/current" -> base="drivers/go", version="current" + parts := strings.Split(fullPath, "/") + + // Check if last part looks like a version + lastPart := parts[len(parts)-1] + if isVersionSlug(lastPart) { + versionSlugs = append(versionSlugs, lastPart) + // Use base path (without version) as the slug + baseSlug := strings.Join(parts[:len(parts)-1], "/") + if baseSlug != "" { + cache.Mapping[baseSlug] = project.Project + baseSlugForProject = baseSlug + } + } + // Also map the full path + cache.Mapping[fullPath] = project.Project + } + if len(versionSlugs) > 0 { + cache.Branches[project.Project] = versionSlugs + } + + // Identify driver projects by URL pattern or displayName + // Exclude mongodb-shell as it's not a driver + if baseSlugForProject != "" && project.Project != "mongodb-shell" { + if isDriverSlug(baseSlugForProject, project.DisplayName) { + driverSlugSet[baseSlugForProject] = true + } + } + } + + // Convert driver slug set to sorted slice + for slug := range driverSlugSet { + cache.DriverSlugs = append(cache.DriverSlugs, slug) + } + // Sort for deterministic output + sortStrings(cache.DriverSlugs) + + return cache, nil +} + +// isDriverSlug determines if a URL slug represents driver documentation. +// A slug is considered a driver if: +// - It starts with "drivers/" or "languages/" +// - OR the displayName contains "Driver" (case-insensitive) +// - OR it's in the standaloneDriverSlugs list (for edge cases) +// +// Excludes mongodb-shell which is handled separately (use --filter mongosh). +// Excludes ODMs (Mongoid, Entity Framework), connectors (Spark, Kafka), and other +// non-driver projects - we only want actual MongoDB drivers. +func isDriverSlug(slug, displayName string) bool { + // Check URL patterns - most drivers use "drivers/" or "languages/" prefixes + if strings.HasPrefix(slug, "drivers/") || strings.HasPrefix(slug, "languages/") { + return true + } + + // Check displayName for "Driver" (handles standalone drivers like ruby-driver + // which has URL slug "ruby-driver" and displayName "Ruby Driver") + if strings.Contains(strings.ToLower(displayName), "driver") { + return true + } + + // Standalone driver slugs that don't match the above patterns. + // These are edge cases where the URL slug doesn't start with "drivers/" or + // "languages/" AND the displayName doesn't contain "Driver". + // + // As of 2026-01-08, the only such case is: + // - php-library: displayName is "PHP Library", URL is "php-library" + // + // NOT included (these are ODMs/connectors, not drivers): + // - mongoid: ODM for Ruby (displayName: "Mongoid") + // - entity-framework: ORM for C# (displayName: "Entity Framework") + // - spark-connector, kafka-connector: data connectors + standaloneDriverSlugs := map[string]bool{ + "php-library": true, + } + return standaloneDriverSlugs[slug] +} + +// sortStrings sorts a slice of strings in place - used to display the list of filters in alphabetical order. +func sortStrings(s []string) { + for i := 0; i < len(s); i++ { + for j := i + 1; j < len(s); j++ { + if s[i] > s[j] { + s[i], s[j] = s[j], s[i] + } + } + } +} + +// isVersionSlug checks if a string looks like a version slug. +func isVersionSlug(s string) bool { + versionPatterns := []string{ + "current", "upcoming", "stable", "master", "latest", + "manual", // MongoDB Manual uses "manual" as the current version directory + } + for _, p := range versionPatterns { + if s == p { + return true + } + } + // Check for version patterns like v8.0, v1.13, etc. + matched, _ := regexp.MatchString(`^v?\d+(\.\d+)*$`, s) + return matched +} + +// scanSnootyTomlFiles scans the monorepo for snooty.toml files and builds +// a mapping from snooty project name to content directory. +func scanSnootyTomlFiles(monorepoPath string) (map[string]string, error) { + projectToDir := make(map[string]string) + contentDir := filepath.Join(monorepoPath, "content") + + entries, err := os.ReadDir(contentDir) + if err != nil { + return nil, fmt.Errorf("failed to read content directory: %w", err) + } + + for _, entry := range entries { + if !entry.IsDir() { + continue + } + + dirName := entry.Name() + dirPath := filepath.Join(contentDir, dirName) + + // Check for snooty.toml directly in the project directory + snootyPath := filepath.Join(dirPath, "snooty.toml") + if name, err := parseSnootyName(snootyPath); err == nil { + projectToDir[name] = dirName + } + + // Check for versioned subdirectories + subEntries, err := os.ReadDir(dirPath) + if err != nil { + continue + } + + for _, subEntry := range subEntries { + if !subEntry.IsDir() { + continue + } + subDirName := subEntry.Name() + subSnootyPath := filepath.Join(dirPath, subDirName, "snooty.toml") + if name, err := parseSnootyName(subSnootyPath); err == nil { + // For versioned projects, store just the base directory name + // The version will be added from the URL during resolution + // Only set if not already set (prefer non-versioned snooty.toml) + if _, exists := projectToDir[name]; !exists { + projectToDir[name] = dirName + } + } + } + } + + return projectToDir, nil +} + +// parseSnootyName extracts the name field from a snooty.toml file. +func parseSnootyName(path string) (string, error) { + data, err := os.ReadFile(path) + if err != nil { + return "", err + } + + var snootyToml SnootyToml + if _, err := toml.Decode(string(data), &snootyToml); err != nil { + return "", err + } + + if snootyToml.Name == "" { + return "", fmt.Errorf("no name field in snooty.toml") + } + + return snootyToml.Name, nil +} + +// GetURLMapping returns a URLMapping instance for resolving URLs to source files. +// It uses cached data if available and not expired, otherwise fetches from the API. +// Falls back to static mapping if API is unavailable. +func GetURLMapping(monorepoPath string) (*URLMapping, error) { + var cache *URLMappingCache + var err error + + // Try to load from cache first + cache, err = loadCache() + if err != nil { + // Cache miss or expired, try to fetch from API + cache, err = fetchFromAPI() + if err != nil { + // API failed, use static fallback + fmt.Fprintf(os.Stderr, "Warning: Could not fetch URL mapping from API (%v), using static fallback\n", err) + cache = getStaticFallback() + } else { + // Save to cache for next time + if saveErr := saveCache(cache); saveErr != nil { + fmt.Fprintf(os.Stderr, "Warning: Could not save URL mapping cache: %v\n", saveErr) + } + } + } + + // Merge special cases that aren't in the API data + mergeSpecialCases(cache) + + // Scan snooty.toml files to build project -> content dir mapping + projectToDir, err := scanSnootyTomlFiles(monorepoPath) + if err != nil { + return nil, fmt.Errorf("failed to scan snooty.toml files: %w", err) + } + + return &URLMapping{ + URLSlugToProject: cache.Mapping, + ProjectToContentDir: projectToDir, + ProjectBranches: cache.Branches, + DriverSlugs: cache.DriverSlugs, + MonorepoPath: monorepoPath, + }, nil +} + +// GetURLMappingWithoutMonorepo returns a URLMapping instance without requiring a monorepo path. +// This is useful for operations that only need API data (like listing drivers) and don't need +// to resolve local file paths. +func GetURLMappingWithoutMonorepo() (*URLMapping, error) { + var cache *URLMappingCache + var err error + + // Try to load from cache first + cache, err = loadCache() + if err != nil { + // Cache miss or expired, try to fetch from API + cache, err = fetchFromAPI() + if err != nil { + // API failed, use static fallback + fmt.Fprintf(os.Stderr, "Warning: Could not fetch URL mapping from API (%v), using static fallback\n", err) + cache = getStaticFallback() + } else { + // Save to cache for next time + if saveErr := saveCache(cache); saveErr != nil { + fmt.Fprintf(os.Stderr, "Warning: Could not save URL mapping cache: %v\n", saveErr) + } + } + } + + // Merge special cases that aren't in the API data + mergeSpecialCases(cache) + + return &URLMapping{ + URLSlugToProject: cache.Mapping, + ProjectToContentDir: make(map[string]string), // Empty - no monorepo scanning + ProjectBranches: cache.Branches, + DriverSlugs: cache.DriverSlugs, + MonorepoPath: "", + }, nil +} + +// mergeSpecialCases adds special URL mappings that aren't in the API data. +// These are edge cases where the URL slug doesn't follow the standard pattern. +func mergeSpecialCases(cache *URLMappingCache) { + specialCases := map[string]string{ + // Note: get-started is handled specially in ResolveURL because + // the slug itself is the page path (get-started.txt, not index.txt) + } + + for slug, project := range specialCases { + if _, exists := cache.Mapping[slug]; !exists { + cache.Mapping[slug] = project + } + } +} + +// specialPagePaths maps URL slugs to their actual page paths when the slug +// itself should be used as the page path instead of defaulting to "index". +var specialPagePaths = map[string]string{ + "get-started": "get-started", // /docs/get-started/ -> get-started.txt (not index.txt) +} + +// specialSlugToProject maps special URL slugs to their project names. +// These are cases not covered by the API data. +var specialSlugToProject = map[string]string{ + "get-started": "landing", +} + +// getStaticFallback returns a static URL mapping as a fallback when API is unavailable. +func getStaticFallback() *URLMappingCache { + return &URLMappingCache{ + Timestamp: time.Now(), + Mapping: map[string]string{ + "atlas": "cloud-docs", + "atlas/app-services": "atlas-app-services", + "atlas/architecture": "atlas-architecture", + "atlas/cli": "atlas-cli", + "atlas/device-sdks": "realm", + "atlas/government": "cloudgov", + "atlas/operator": "atlas-operator", + "bi-connector": "bi-connector", + "charts": "charts", + "cloud-manager": "cloud-manager", + "compass": "compass", + "database-tools": "database-tools", + "drivers": "drivers", + "drivers/csharp": "csharp", + "drivers/go": "golang", + "drivers/java/sync": "java", + "drivers/kotlin/coroutine": "kotlin", + "drivers/node": "node", + "drivers/php/laravel-mongodb": "laravel", + "drivers/rust": "rust", + "entity-framework": "entity-framework", + "get-started": "landing", + "kafka-connector": "kafka-connector", + "kubernetes": "mck", + "kubernetes-operator": "docs-k8s-operator", + "languages/c/c-driver": "c", + "languages/cpp/cpp-driver": "cpp-driver", + "languages/java/mongodb-hibernate": "hibernate", + "languages/java/reactive-streams-driver": "java-rs", + "languages/kotlin/kotlin-sync-driver": "kotlin-sync", + "languages/python/django-mongodb": "django", + "languages/python/pymongo-arrow-driver": "pymongo-arrow", + "languages/python/pymongo-driver": "pymongo", + "languages/scala/scala-driver": "scala", + "manual": "docs", + "mcp-server": "mcp-server", + "mongocli": "mongocli", + "mongodb-analyzer": "visual-studio-extension", + "mongodb-intellij": "intellij", + "mongodb-shell": "mongodb-shell", + "mongodb-voyage": "voyage", + "mongodb-vscode": "mongodb-vscode", + "mongoid": "mongoid", + "mongosync": "mongosync", + "ops-manager": "ops-manager", + "php-library": "php-library", + "relational-migrator": "docs-relational-migrator", + "ruby-driver": "ruby-driver", + "spark-connector": "spark-connector", + }, + Branches: map[string][]string{ + "docs": {"manual", "upcoming", "v8.0", "v7.0", "v6.0", "v5.0", "v4.4"}, + }, + DriverSlugs: []string{ + "drivers/csharp", + "drivers/go", + "drivers/java/sync", + "drivers/kotlin/coroutine", + "drivers/node", + "drivers/php/laravel-mongodb", + "drivers/rust", + "languages/c/c-driver", + "languages/cpp/cpp-driver", + "languages/java/mongodb-hibernate", + "languages/java/reactive-streams-driver", + "languages/kotlin/kotlin-sync-driver", + "languages/python/django-mongodb", + "languages/python/pymongo-arrow-driver", + "languages/python/pymongo-driver", + "languages/scala/scala-driver", + "mongoid", + "php-library", + "ruby-driver", + }, + } +} + +// ResolveURL resolves a documentation URL to a source file path. +// Returns the absolute path to the source file and the content directory. +// +// URL format: www.mongodb.com/docs/{slug}/{version?}/{page-path} +// Examples: +// - www.mongodb.com/docs/atlas/some-page/ -> content/atlas/source/some-page.txt +// - www.mongodb.com/docs/v8.0/tutorial/install/ -> content/manual/v8.0/source/tutorial/install.txt +// - www.mongodb.com/docs/drivers/go/current/usage/ -> content/golang/current/source/usage.txt +func (m *URLMapping) ResolveURL(url string) (sourcePath string, contentDir string, err error) { + // Parse the URL to extract the path after /docs/ + urlPath := extractDocsPath(url) + if urlPath == "" { + return "", "", fmt.Errorf("invalid URL format: %s", url) + } + + parts := strings.Split(urlPath, "/") + if len(parts) == 0 { + return "", "", fmt.Errorf("empty URL path") + } + + // Try to find the longest matching slug + var projectName string + var pagePath string + var version string + + for i := len(parts); i > 0; i-- { + candidateSlug := strings.Join(parts[:i], "/") + if proj, ok := m.URLSlugToProject[candidateSlug]; ok { + projectName = proj + remaining := parts[i:] + + // Check if the matched slug ends with a version + // e.g., "drivers/go/current" matched, extract "current" as version + slugParts := strings.Split(candidateSlug, "/") + lastSlugPart := slugParts[len(slugParts)-1] + if isVersionSlug(lastSlugPart) { + version = lastSlugPart + pagePath = strings.Join(remaining, "/") + } else if len(remaining) > 0 && isVersionSlug(remaining[0]) { + // Check if first remaining part is a version + version = remaining[0] + pagePath = strings.Join(remaining[1:], "/") + } else { + pagePath = strings.Join(remaining, "/") + } + break + } + } + + // Special handling for MongoDB Manual (docs project) + // URLs like /docs/manual/... or /docs/v8.0/... map to the "docs" project + if projectName == "" { + if len(parts) > 0 && (parts[0] == "manual" || isVersionSlug(parts[0])) { + projectName = "docs" + version = parts[0] + pagePath = strings.Join(parts[1:], "/") + } + } + + // Check for special slug mappings not in the API data + if projectName == "" { + if len(parts) > 0 { + if proj, ok := specialSlugToProject[parts[0]]; ok { + projectName = proj + // For special slugs, the slug itself may be the page path + if specialPath, ok := specialPagePaths[parts[0]]; ok { + pagePath = specialPath + } else { + pagePath = strings.Join(parts[1:], "/") + } + } + } + } + + if projectName == "" { + return "", "", fmt.Errorf("could not resolve URL slug: %s", urlPath) + } + + // Get content directory for this project + contentDir, ok := m.ProjectToContentDir[projectName] + if !ok { + return "", "", fmt.Errorf("no content directory found for project: %s", projectName) + } + + // Build the source file path + // For versioned projects, the content dir already includes the version + // For non-versioned projects with a version in URL, we need to add it + sourceDir := filepath.Join(m.MonorepoPath, "content", contentDir) + + // Check if this is a versioned project by looking for version subdirectories + // If the content directory has version subdirectories and URL has a version, use it + if version != "" { + versionedPath := filepath.Join(m.MonorepoPath, "content", contentDir, version) + if _, err := os.Stat(versionedPath); err == nil { + sourceDir = versionedPath + } + } + + // Add source directory and page path + if pagePath == "" { + pagePath = "index" + } + sourcePath = filepath.Join(sourceDir, "source", pagePath+".txt") + + return sourcePath, contentDir, nil +} + +// extractDocsPath extracts the path after /docs/ from a URL. +func extractDocsPath(url string) string { + // Remove protocol and domain + url = strings.TrimPrefix(url, "https://") + url = strings.TrimPrefix(url, "http://") + url = strings.TrimPrefix(url, "www.") + + // Find /docs/ in the path + idx := strings.Index(url, "/docs/") + if idx == -1 { + // Try without leading slash + idx = strings.Index(url, "docs/") + if idx == -1 { + return "" + } + url = url[idx+5:] + } else { + url = url[idx+6:] + } + + // Remove trailing slash + url = strings.TrimSuffix(url, "/") + + return url +} + +// IsDriverURL checks if a URL is for driver documentation. +// Returns true if the URL matches any known driver slug pattern. +// Excludes mongodb-shell which is handled separately. +func (m *URLMapping) IsDriverURL(url string) bool { + urlPath := extractDocsPath(url) + if urlPath == "" { + return false + } + urlPathLower := strings.ToLower(urlPath) + + // Check against known driver slugs + for _, slug := range m.DriverSlugs { + slugLower := strings.ToLower(slug) + if strings.HasPrefix(urlPathLower, slugLower+"/") || urlPathLower == slugLower { + return true + } + } + + // Also check for the generic drivers/ and languages/ prefixes + // in case a new driver was added that's not in our cached list + if strings.HasPrefix(urlPathLower, "drivers/") || strings.HasPrefix(urlPathLower, "languages/") { + return true + } + + return false +} + +// IsSpecificDriverURL checks if a URL is for a specific driver by project name. +// The driverName should be the Snooty project name (e.g., "golang", "pymongo", "node"). +func (m *URLMapping) IsSpecificDriverURL(url, driverName string) bool { + urlPath := extractDocsPath(url) + if urlPath == "" { + return false + } + + // Find the slug for this driver + for slug, project := range m.URLSlugToProject { + if strings.EqualFold(project, driverName) { + slugLower := strings.ToLower(slug) + urlPathLower := strings.ToLower(urlPath) + if strings.HasPrefix(urlPathLower, slugLower+"/") || urlPathLower == slugLower { + return true + } + } + } + + return false +} + +// IsMongoshURL checks if a URL is for MongoDB Shell documentation. +func (m *URLMapping) IsMongoshURL(url string) bool { + urlPath := extractDocsPath(url) + if urlPath == "" { + return false + } + urlPathLower := strings.ToLower(urlPath) + + return strings.HasPrefix(urlPathLower, "mongodb-shell/") || urlPathLower == "mongodb-shell" +} + +// GetDriverSlugs returns the list of known driver URL slugs. +func (m *URLMapping) GetDriverSlugs() []string { + return m.DriverSlugs +} diff --git a/internal/config/url_mapping_test.go b/internal/config/url_mapping_test.go new file mode 100644 index 0000000..be48755 --- /dev/null +++ b/internal/config/url_mapping_test.go @@ -0,0 +1,376 @@ +package config + +import ( + "testing" +) + +// TestIsActive tests the isActive helper function. +func TestIsActive(t *testing.T) { + testCases := []struct { + name string + input any + expected bool + }{ + {"bool true", true, true}, + {"bool false", false, false}, + {"string true", "true", true}, + {"string false", "false", false}, + {"string other", "yes", false}, + {"nil", nil, false}, + {"int", 1, false}, + {"empty string", "", false}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result := isActive(tc.input) + if result != tc.expected { + t.Errorf("isActive(%v) = %v, expected %v", tc.input, result, tc.expected) + } + }) + } +} + +// TestIsDriverSlug tests the isDriverSlug function. +func TestIsDriverSlug(t *testing.T) { + testCases := []struct { + name string + slug string + displayName string + expected bool + }{ + // Drivers with drivers/ prefix + {"drivers/csharp", "drivers/csharp", "C#/.NET Driver", true}, + {"drivers/go", "drivers/go", "Go Driver", true}, + {"drivers/node", "drivers/node", "Node.js Driver", true}, + {"drivers/java/sync", "drivers/java/sync", "Java Sync Driver", true}, + {"drivers/kotlin/coroutine", "drivers/kotlin/coroutine", "Kotlin Coroutine", true}, + + // Drivers with languages/ prefix + {"languages/python/pymongo-driver", "languages/python/pymongo-driver", "PyMongo", true}, + {"languages/c/c-driver", "languages/c/c-driver", "C Driver", true}, + {"languages/scala/scala-driver", "languages/scala/scala-driver", "Scala", true}, + + // Drivers detected by displayName containing "Driver" + {"ruby-driver by displayName", "ruby-driver", "Ruby Driver", true}, + + // Standalone driver slugs (edge cases) + {"php-library", "php-library", "PHP Library", true}, + + // Non-drivers (should return false) + {"mongoid ODM", "mongoid", "Mongoid", false}, + {"entity-framework ORM", "entity-framework", "Entity Framework", false}, + {"atlas", "atlas", "MongoDB Atlas", false}, + {"compass", "compass", "MongoDB Compass", false}, + {"mongodb-shell", "mongodb-shell", "MongoDB Shell", false}, + {"kafka-connector", "kafka-connector", "Kafka Connector", false}, + {"spark-connector", "spark-connector", "Spark Connector", false}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result := isDriverSlug(tc.slug, tc.displayName) + if result != tc.expected { + t.Errorf("isDriverSlug(%q, %q) = %v, expected %v", + tc.slug, tc.displayName, result, tc.expected) + } + }) + } +} + +// TestIsVersionSlug tests the isVersionSlug function. +func TestIsVersionSlug(t *testing.T) { + testCases := []struct { + name string + input string + expected bool + }{ + // Named versions + {"current", "current", true}, + {"upcoming", "upcoming", true}, + {"stable", "stable", true}, + {"master", "master", true}, + {"latest", "latest", true}, + {"manual", "manual", true}, + + // Numeric versions + {"v8.0", "v8.0", true}, + {"v7.0", "v7.0", true}, + {"v1.13", "v1.13", true}, + {"v2.30", "v2.30", true}, + {"8.0 without v", "8.0", true}, + {"1.0.0 semver", "1.0.0", true}, + {"v1.0.0 semver with v", "v1.0.0", true}, + + // Non-versions + {"project name", "pymongo", false}, + {"drivers prefix", "drivers", false}, + {"random string", "hello", false}, + {"empty string", "", false}, + {"partial version", "v", false}, + {"invalid version", "vX.Y", false}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result := isVersionSlug(tc.input) + if result != tc.expected { + t.Errorf("isVersionSlug(%q) = %v, expected %v", tc.input, result, tc.expected) + } + }) + } +} + +// TestExtractDocsPath tests the extractDocsPath function. +func TestExtractDocsPath(t *testing.T) { + testCases := []struct { + name string + url string + expected string + }{ + // Full URLs with https + {"full URL with https", "https://www.mongodb.com/docs/drivers/go/current/", "drivers/go/current"}, + {"full URL without www", "https://mongodb.com/docs/atlas/search/", "atlas/search"}, + + // URLs with http + {"http URL", "http://www.mongodb.com/docs/manual/tutorial/", "manual/tutorial"}, + + // URLs without protocol + {"no protocol with www", "www.mongodb.com/docs/compass/current/", "compass/current"}, + {"no protocol no www", "mongodb.com/docs/pymongo/", "pymongo"}, + + // Edge cases + {"trailing slash removed", "https://mongodb.com/docs/atlas/", "atlas"}, + {"no trailing slash", "https://mongodb.com/docs/atlas", "atlas"}, + {"deep path", "https://mongodb.com/docs/drivers/node/current/fundamentals/crud/", "drivers/node/current/fundamentals/crud"}, + + // Invalid URLs + {"no docs path", "https://mongodb.com/products/atlas", ""}, + {"empty string", "", ""}, + {"just domain", "mongodb.com", ""}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result := extractDocsPath(tc.url) + if result != tc.expected { + t.Errorf("extractDocsPath(%q) = %q, expected %q", tc.url, result, tc.expected) + } + }) + } +} + +// createTestURLMapping creates a URLMapping for testing with sample driver data. +func createTestURLMapping() *URLMapping { + return &URLMapping{ + URLSlugToProject: map[string]string{ + "drivers/go": "golang", + "drivers/node": "node", + "drivers/csharp": "csharp", + "languages/python/pymongo-driver": "pymongo", + "ruby-driver": "ruby-driver", + "php-library": "php-library", + "mongodb-shell": "mongodb-shell", + "atlas": "cloud-docs", + }, + DriverSlugs: []string{ + "drivers/go", + "drivers/node", + "drivers/csharp", + "languages/python/pymongo-driver", + "ruby-driver", + "php-library", + }, + ProjectToContentDir: map[string]string{}, + ProjectBranches: map[string][]string{}, + MonorepoPath: "", + } +} + +// TestIsDriverURL tests the IsDriverURL method. +func TestIsDriverURL(t *testing.T) { + m := createTestURLMapping() + + testCases := []struct { + name string + url string + expected bool + }{ + // Known driver URLs + {"go driver", "https://mongodb.com/docs/drivers/go/current/", true}, + {"node driver", "https://mongodb.com/docs/drivers/node/current/fundamentals/", true}, + {"csharp driver", "https://mongodb.com/docs/drivers/csharp/current/", true}, + {"pymongo driver", "https://mongodb.com/docs/languages/python/pymongo-driver/current/", true}, + {"ruby driver", "https://mongodb.com/docs/ruby-driver/current/", true}, + {"php library", "https://mongodb.com/docs/php-library/current/", true}, + + // Generic drivers/ and languages/ prefixes (for new drivers not in cache) + {"unknown driver in drivers/", "https://mongodb.com/docs/drivers/unknown/current/", true}, + {"unknown driver in languages/", "https://mongodb.com/docs/languages/java/new-driver/", true}, + + // Non-driver URLs + {"mongodb shell", "https://mongodb.com/docs/mongodb-shell/current/", false}, + {"atlas", "https://mongodb.com/docs/atlas/search/", false}, + {"manual", "https://mongodb.com/docs/manual/tutorial/", false}, + {"compass", "https://mongodb.com/docs/compass/current/", false}, + + // Edge cases + {"empty URL", "", false}, + {"invalid URL", "not-a-url", false}, + {"exact slug match", "https://mongodb.com/docs/drivers/go", true}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result := m.IsDriverURL(tc.url) + if result != tc.expected { + t.Errorf("IsDriverURL(%q) = %v, expected %v", tc.url, result, tc.expected) + } + }) + } +} + +// TestIsSpecificDriverURL tests the IsSpecificDriverURL method. +func TestIsSpecificDriverURL(t *testing.T) { + m := createTestURLMapping() + + testCases := []struct { + name string + url string + driverName string + expected bool + }{ + // Matching driver URLs + {"golang match", "https://mongodb.com/docs/drivers/go/current/", "golang", true}, + {"node match", "https://mongodb.com/docs/drivers/node/current/", "node", true}, + {"pymongo match", "https://mongodb.com/docs/languages/python/pymongo-driver/current/", "pymongo", true}, + {"ruby-driver match", "https://mongodb.com/docs/ruby-driver/current/", "ruby-driver", true}, + + // Case insensitive matching + {"golang uppercase", "https://mongodb.com/docs/drivers/go/current/", "GOLANG", true}, + {"node mixed case", "https://mongodb.com/docs/drivers/node/current/", "Node", true}, + + // Non-matching + {"wrong driver", "https://mongodb.com/docs/drivers/go/current/", "node", false}, + {"non-driver URL", "https://mongodb.com/docs/atlas/search/", "golang", false}, + + // Edge cases + {"empty URL", "", "golang", false}, + {"empty driver name", "https://mongodb.com/docs/drivers/go/current/", "", false}, + {"unknown driver name", "https://mongodb.com/docs/drivers/go/current/", "unknown", false}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result := m.IsSpecificDriverURL(tc.url, tc.driverName) + if result != tc.expected { + t.Errorf("IsSpecificDriverURL(%q, %q) = %v, expected %v", + tc.url, tc.driverName, result, tc.expected) + } + }) + } +} + +// TestIsMongoshURL tests the IsMongoshURL method. +func TestIsMongoshURL(t *testing.T) { + m := createTestURLMapping() + + testCases := []struct { + name string + url string + expected bool + }{ + // MongoDB Shell URLs + {"mongosh with path", "https://mongodb.com/docs/mongodb-shell/current/", true}, + {"mongosh root", "https://mongodb.com/docs/mongodb-shell/", true}, + {"mongosh exact", "https://mongodb.com/docs/mongodb-shell", true}, + {"mongosh deep path", "https://mongodb.com/docs/mongodb-shell/reference/methods/", true}, + + // Case insensitive + {"mongosh uppercase", "https://mongodb.com/docs/MONGODB-SHELL/current/", true}, + {"mongosh mixed case", "https://mongodb.com/docs/MongoDB-Shell/current/", true}, + + // Non-mongosh URLs + {"driver URL", "https://mongodb.com/docs/drivers/go/current/", false}, + {"atlas URL", "https://mongodb.com/docs/atlas/", false}, + {"manual URL", "https://mongodb.com/docs/manual/", false}, + + // Edge cases + {"empty URL", "", false}, + {"partial match", "https://mongodb.com/docs/mongodb-shell-like/", false}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result := m.IsMongoshURL(tc.url) + if result != tc.expected { + t.Errorf("IsMongoshURL(%q) = %v, expected %v", tc.url, result, tc.expected) + } + }) + } +} + +// TestGetDriverSlugs tests the GetDriverSlugs method. +func TestGetDriverSlugs(t *testing.T) { + m := createTestURLMapping() + + slugs := m.GetDriverSlugs() + + if len(slugs) != 6 { + t.Errorf("Expected 6 driver slugs, got %d", len(slugs)) + } + + // Check that expected slugs are present + expectedSlugs := map[string]bool{ + "drivers/go": true, + "drivers/node": true, + "drivers/csharp": true, + "languages/python/pymongo-driver": true, + "ruby-driver": true, + "php-library": true, + } + + for _, slug := range slugs { + if !expectedSlugs[slug] { + t.Errorf("Unexpected slug in result: %q", slug) + } + } +} + +// TestSortStrings tests the sortStrings helper function. +func TestSortStrings(t *testing.T) { + testCases := []struct { + name string + input []string + expected []string + }{ + {"already sorted", []string{"a", "b", "c"}, []string{"a", "b", "c"}}, + {"reverse order", []string{"c", "b", "a"}, []string{"a", "b", "c"}}, + {"mixed order", []string{"banana", "apple", "cherry"}, []string{"apple", "banana", "cherry"}}, + {"empty slice", []string{}, []string{}}, + {"single element", []string{"only"}, []string{"only"}}, + {"duplicates", []string{"b", "a", "b", "a"}, []string{"a", "a", "b", "b"}}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // Make a copy to avoid modifying the test case + input := make([]string, len(tc.input)) + copy(input, tc.input) + + sortStrings(input) + + if len(input) != len(tc.expected) { + t.Errorf("Length mismatch: got %d, expected %d", len(input), len(tc.expected)) + return + } + + for i, v := range input { + if v != tc.expected[i] { + t.Errorf("At index %d: got %q, expected %q", i, v, tc.expected[i]) + } + } + }) + } +} + diff --git a/internal/language/language.go b/internal/language/language.go new file mode 100644 index 0000000..80df94b --- /dev/null +++ b/internal/language/language.go @@ -0,0 +1,448 @@ +// Package language provides utilities for working with programming language identifiers. +// +// This package provides: +// - Canonical language name constants +// - File extension constants and mappings +// - Language normalization (e.g., "ts" -> "typescript") +// - Language inference from file extensions +package language + +import ( + "path/filepath" + "strings" +) + +// Language constants define canonical language names used throughout the tool. +// These are used for normalization and file extension mapping. +const ( + Bash = "bash" + C = "c" + CPP = "cpp" + CSharp = "csharp" + Console = "console" + CSS = "css" + Go = "go" + HTML = "html" + Java = "java" + JavaScript = "javascript" + JSON = "json" + Kotlin = "kotlin" + PHP = "php" + PowerShell = "powershell" + PS5 = "ps5" + Python = "python" + Ruby = "ruby" + Rust = "rust" + Scala = "scala" + Shell = "shell" + SQL = "sql" + Swift = "swift" + Text = "text" + TypeScript = "typescript" + Undefined = "undefined" + XML = "xml" + YAML = "yaml" +) + +// File extension constants define the file extensions for each language. +// Used when generating output filenames for extracted code examples. +const ( + BashExtension = ".sh" + CExtension = ".c" + CPPExtension = ".cpp" + CSharpExtension = ".cs" + ConsoleExtension = ".sh" + CSSExtension = ".css" + GoExtension = ".go" + HTMLExtension = ".html" + JavaExtension = ".java" + JavaScriptExtension = ".js" + JSONExtension = ".json" + KotlinExtension = ".kt" + PHPExtension = ".php" + PowerShellExtension = ".ps1" + PS5Extension = ".ps1" + PythonExtension = ".py" + RubyExtension = ".rb" + RustExtension = ".rs" + ScalaExtension = ".scala" + ShellExtension = ".sh" + SQLExtension = ".sql" + SwiftExtension = ".swift" + TextExtension = ".txt" + TypeScriptExtension = ".ts" + UndefinedExtension = ".txt" + XMLExtension = ".xml" + YAMLExtension = ".yaml" +) + +// GetExtensionFromLanguage returns the appropriate file extension for a given language. +// +// This function maps language identifiers to their corresponding file extensions. +// Handles various language name variants (e.g., "ts" -> ".ts", "c++" -> ".cpp", "golang" -> ".go"). +// Returns ".txt" for unknown or undefined languages. +// +// Parameters: +// - language: The language identifier (case-insensitive) +// +// Returns: +// - string: The file extension including the leading dot (e.g., ".js", ".py") +func GetExtensionFromLanguage(language string) string { + lang := strings.ToLower(strings.TrimSpace(language)) + + langExtensionMap := map[string]string{ + Bash: BashExtension, + C: CExtension, + CPP: CPPExtension, + CSharp: CSharpExtension, + Console: ConsoleExtension, + CSS: CSSExtension, + Go: GoExtension, + HTML: HTMLExtension, + Java: JavaExtension, + JavaScript: JavaScriptExtension, + JSON: JSONExtension, + Kotlin: KotlinExtension, + PHP: PHPExtension, + PowerShell: PowerShellExtension, + PS5: PS5Extension, + Python: PythonExtension, + Ruby: RubyExtension, + Rust: RustExtension, + Scala: ScalaExtension, + Shell: ShellExtension, + SQL: SQLExtension, + Swift: SwiftExtension, + Text: TextExtension, + TypeScript: TypeScriptExtension, + Undefined: UndefinedExtension, + XML: XMLExtension, + YAML: YAMLExtension, + "c++": CPPExtension, + "c#": CSharpExtension, + "cs": CSharpExtension, + "golang": GoExtension, + "js": JavaScriptExtension, + "kt": KotlinExtension, + "py": PythonExtension, + "rb": RubyExtension, + "rs": RustExtension, + "sh": ShellExtension, + "ts": TypeScriptExtension, + "txt": TextExtension, + "ps1": PowerShellExtension, + "yml": YAMLExtension, + "": UndefinedExtension, + "none": UndefinedExtension, + } + + if extension, exists := langExtensionMap[lang]; exists { + return extension + } + + return UndefinedExtension +} + +// GetLanguageFromExtension infers the language from a file extension. +// +// This function maps file extensions to their corresponding language names. +// Returns empty string if the extension is not recognized. +// +// Parameters: +// - filePath: The file path to extract the extension from +// +// Returns: +// - string: The language name, or empty string if not recognized +func GetLanguageFromExtension(filePath string) string { + ext := strings.ToLower(filepath.Ext(filePath)) + extensionMap := map[string]string{ + ".py": Python, + ".js": JavaScript, + ".ts": TypeScript, + ".go": Go, + ".java": Java, + ".cs": CSharp, + ".cpp": CPP, + ".c": C, + ".rb": Ruby, + ".rs": Rust, + ".swift": Swift, + ".kt": Kotlin, + ".scala": Scala, + ".sh": Shell, + ".bash": Shell, + ".ps1": PowerShell, + ".json": JSON, + ".yaml": YAML, + ".yml": YAML, + ".xml": XML, + ".html": HTML, + ".css": CSS, + ".sql": SQL, + ".txt": Text, + ".php": PHP, + } + if lang, ok := extensionMap[ext]; ok { + return lang + } + return "" +} + +// Resolve determines the language for a code example using a fallback chain. +// +// This function implements a priority-based language detection: +// 1. languageArg - explicit language from directive argument (e.g., .. code-block:: python) +// 2. languageOption - explicit language from :language: option +// 3. filePath - infer from file extension (for literalinclude, io-code-block) +// 4. "undefined" as final fallback +// +// The result is normalized before being returned. +// +// Parameters: +// - languageArg: Language from directive argument (empty if argument is a filepath) +// - languageOption: The value of the :language: option (may be empty) +// - filePath: The filepath to infer language from extension (may be empty) +// +// Returns: +// - string: The normalized language name +func Resolve(languageArg, languageOption, filePath string) string { + // Priority 1: explicit language argument + lang := languageArg + + // Priority 2: :language: option + if lang == "" { + lang = languageOption + } + + // Priority 3: infer from file extension + if lang == "" && filePath != "" { + lang = GetLanguageFromExtension(filePath) + } + + // Final fallback to undefined + if lang == "" { + lang = Undefined + } + + return Normalize(lang) +} + +// Normalize normalizes a language string to a canonical form. +// +// This function converts various language name variants to their canonical forms: +// - "ts" -> "typescript" +// - "c++" -> "cpp" +// - "golang" -> "go" +// - "js" -> "javascript" +// - etc. +// +// Parameters: +// - language: The language identifier (case-insensitive) +// +// Returns: +// - string: The normalized language name, or the original string if no normalization is defined +func Normalize(language string) string { + lang := strings.ToLower(strings.TrimSpace(language)) + + normalizeMap := map[string]string{ + Bash: Bash, + C: C, + CPP: CPP, + CSharp: CSharp, + Console: Console, + CSS: CSS, + Go: Go, + HTML: HTML, + Java: Java, + JavaScript: JavaScript, + JSON: JSON, + Kotlin: Kotlin, + PHP: PHP, + PowerShell: PowerShell, + PS5: PS5, + Python: Python, + Ruby: Ruby, + Rust: Rust, + Scala: Scala, + Shell: Shell, + SQL: SQL, + Swift: Swift, + Text: Text, + TypeScript: TypeScript, + XML: XML, + YAML: YAML, + "c++": CPP, + "c#": CSharp, + "cs": CSharp, + "golang": Go, + "js": JavaScript, + "kt": Kotlin, + "py": Python, + "rb": Ruby, + "rs": Rust, + "sh": Shell, + "ts": TypeScript, + "txt": Text, + "ps1": PowerShell, + "yml": YAML, + "": Undefined, + "none": Undefined, + } + + if normalized, exists := normalizeMap[lang]; exists { + return normalized + } + + return lang +} + +// LanguageToProduct maps language identifiers to their display product names. +// This is used for reporting purposes when a language needs to be displayed +// as a product category. +var LanguageToProduct = map[string]string{ + "python": "Python", + "javascript": "JavaScript", + "js": "JavaScript", + "typescript": "TypeScript", + "ts": "TypeScript", + "go": "Go", + "golang": "Go", + "java": "Java", + "csharp": "C#", + "c#": "C#", + "cs": "C#", + "cpp": "C++", + "c++": "C++", + "c": "C", + "ruby": "Ruby", + "rb": "Ruby", + "rust": "Rust", + "rs": "Rust", + "swift": "Swift", + "kotlin": "Kotlin", + "kt": "Kotlin", + "scala": "Scala", + "php": "PHP", + "mongosh": "MongoDB Shell", + "bash": "Shell", + "sh": "Shell", + "shell": "Shell", + "console": "Shell", + "powershell": "PowerShell", + "ps1": "PowerShell", + "json": "JSON", + "yaml": "YAML", + "yml": "YAML", + "xml": "XML", + "html": "HTML", + "css": "CSS", + "sql": "SQL", + "ini": "INI", + "toml": "TOML", + "properties": "Properties", + "text": "Text", + "txt": "Text", + "none": "Text", +} + +// GetProductFromLanguage maps a language string to a display product name. +// +// This function converts language identifiers to their display names for reporting: +// - "python" -> "Python" +// - "js" -> "JavaScript" +// - "mongosh" -> "MongoDB Shell" +// - etc. +// +// Parameters: +// - lang: The language identifier (case-insensitive) +// +// Returns: +// - string: The display product name, or the original language if no mapping exists +func GetProductFromLanguage(lang string) string { + langLower := strings.ToLower(strings.TrimSpace(lang)) + if product, ok := LanguageToProduct[langLower]; ok { + return product + } + return lang +} + +// NonDriverLanguages lists languages that should NOT inherit context from +// composables or tabs. +// +// WHY THIS EXISTS: +// Driver documentation often includes code examples that are NOT driver code: +// - Shell commands to install packages (bash, sh) +// - Configuration files (json, yaml, xml, ini, toml) +// - SQL queries for comparison +// - HTTP requests showing API calls +// +// Without this list, a bash command like "npm install mongodb" inside a Node.js +// driver tab would be incorrectly attributed to "Node.js" and counted as testable. +// By checking this list first, we ensure these examples are reported based on their +// actual language and are NOT considered testable. +// +// WHY "shell" AND "javascript" ARE EXCLUDED: +// These languages have special handling because they CAN be valid MongoDB Shell +// examples when in a MongoDB Shell context. See MongoShellLanguages and the +// special handling in determineProduct(). +var NonDriverLanguages = map[string]bool{ + "bash": true, + "sh": true, + "console": true, + "text": true, + "json": true, + "yaml": true, + "xml": true, + "ini": true, + "toml": true, + "properties": true, + "sql": true, + "none": true, + "http": true, +} + +// IsNonDriverLanguage checks if a language should NOT inherit context from +// composables or tabs. +// +// Parameters: +// - language: The language identifier (case-insensitive) +// +// Returns: +// - bool: true if the language is a non-driver language +func IsNonDriverLanguage(language string) bool { + return NonDriverLanguages[strings.ToLower(strings.TrimSpace(language))] +} + +// MongoShellLanguages lists languages that are valid for MongoDB Shell examples. +// +// WHY THIS EXISTS: +// MongoDB Shell (mongosh) code examples use "shell", "javascript", or "js" as +// their language. However, these same languages are used in other contexts: +// - "shell" is used for bash/system shell commands +// - "javascript" is used for browser JavaScript or Node.js +// +// To correctly identify MongoDB Shell examples, we need to check BOTH: +// 1. The language is in this list, AND +// 2. We're in a MongoDB Shell context (mongosh content dir or mongosh interface) +// +// If both conditions are met, the example is attributed to "MongoDB Shell" and +// is testable. Otherwise: +// - "shell" → "Shell" (not testable, it's a system shell command) +// - "javascript"/"js" → falls through to driver context or "JavaScript" +var MongoShellLanguages = map[string]bool{ + "shell": true, + "javascript": true, + "js": true, +} + +// IsMongoShellLanguage checks if a language is valid for MongoDB Shell examples. +// +// Parameters: +// - language: The language identifier (case-insensitive) +// +// Returns: +// - bool: true if the language could be a MongoDB Shell language +func IsMongoShellLanguage(language string) bool { + return MongoShellLanguages[strings.ToLower(strings.TrimSpace(language))] +} + diff --git a/internal/language/language_test.go b/internal/language/language_test.go new file mode 100644 index 0000000..ce02969 --- /dev/null +++ b/internal/language/language_test.go @@ -0,0 +1,338 @@ +package language + +import ( + "testing" +) + +func TestGetExtensionFromLanguage(t *testing.T) { + tests := []struct { + name string + language string + want string + }{ + {"python", "python", ".py"}, + {"Python uppercase", "Python", ".py"}, + {"javascript", "javascript", ".js"}, + {"js shorthand", "js", ".js"}, + {"typescript", "typescript", ".ts"}, + {"ts shorthand", "ts", ".ts"}, + {"go", "go", ".go"}, + {"golang alias", "golang", ".go"}, + {"java", "java", ".java"}, + {"csharp", "csharp", ".cs"}, + {"c# alias", "c#", ".cs"}, + {"cs alias", "cs", ".cs"}, + {"cpp", "cpp", ".cpp"}, + {"c++ alias", "c++", ".cpp"}, + {"ruby", "ruby", ".rb"}, + {"rb shorthand", "rb", ".rb"}, + {"rust", "rust", ".rs"}, + {"rs shorthand", "rs", ".rs"}, + {"shell", "shell", ".sh"}, + {"sh shorthand", "sh", ".sh"}, + {"bash", "bash", ".sh"}, + {"json", "json", ".json"}, + {"yaml", "yaml", ".yaml"}, + {"yml alias", "yml", ".yaml"}, + {"text", "text", ".txt"}, + {"txt alias", "txt", ".txt"}, + {"empty string", "", ".txt"}, + {"none", "none", ".txt"}, + {"unknown language", "unknownlang", ".txt"}, + {"whitespace", " python ", ".py"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := GetExtensionFromLanguage(tt.language) + if got != tt.want { + t.Errorf("GetExtensionFromLanguage(%q) = %q, want %q", tt.language, got, tt.want) + } + }) + } +} + +func TestGetLanguageFromExtension(t *testing.T) { + tests := []struct { + name string + filePath string + want string + }{ + {"python file", "example.py", Python}, + {"javascript file", "script.js", JavaScript}, + {"typescript file", "app.ts", TypeScript}, + {"go file", "main.go", Go}, + {"java file", "Main.java", Java}, + {"csharp file", "Program.cs", CSharp}, + {"cpp file", "main.cpp", CPP}, + {"c file", "main.c", C}, + {"ruby file", "script.rb", Ruby}, + {"rust file", "main.rs", Rust}, + {"shell file", "script.sh", Shell}, + {"bash file", "script.bash", Shell}, + {"json file", "config.json", JSON}, + {"yaml file", "config.yaml", YAML}, + {"yml file", "config.yml", YAML}, + {"xml file", "data.xml", XML}, + {"html file", "index.html", HTML}, + {"css file", "styles.css", CSS}, + {"sql file", "query.sql", SQL}, + {"text file", "readme.txt", Text}, + {"php file", "index.php", PHP}, + {"full path", "/path/to/file.py", Python}, + {"unknown extension", "file.xyz", ""}, + {"no extension", "Makefile", ""}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := GetLanguageFromExtension(tt.filePath) + if got != tt.want { + t.Errorf("GetLanguageFromExtension(%q) = %q, want %q", tt.filePath, got, tt.want) + } + }) + } +} + +func TestNormalize(t *testing.T) { + tests := []struct { + name string + language string + want string + }{ + {"python", "python", Python}, + {"Python uppercase", "Python", Python}, + {"py shorthand", "py", Python}, + {"javascript", "javascript", JavaScript}, + {"js shorthand", "js", JavaScript}, + {"typescript", "typescript", TypeScript}, + {"ts shorthand", "ts", TypeScript}, + {"go", "go", Go}, + {"golang alias", "golang", Go}, + {"csharp", "csharp", CSharp}, + {"c# alias", "c#", CSharp}, + {"cs alias", "cs", CSharp}, + {"cpp", "cpp", CPP}, + {"c++ alias", "c++", CPP}, + {"shell", "shell", Shell}, + {"sh shorthand", "sh", Shell}, + {"yaml", "yaml", YAML}, + {"yml alias", "yml", YAML}, + {"empty string", "", Undefined}, + {"none", "none", Undefined}, + {"unknown language", "unknownlang", "unknownlang"}, + {"whitespace", " python ", Python}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := Normalize(tt.language) + if got != tt.want { + t.Errorf("Normalize(%q) = %q, want %q", tt.language, got, tt.want) + } + }) + } +} + +func TestGetProductFromLanguage(t *testing.T) { + tests := []struct { + name string + language string + want string + }{ + {"python", "python", "Python"}, + {"Python uppercase", "Python", "Python"}, + {"javascript", "javascript", "JavaScript"}, + {"js shorthand", "js", "JavaScript"}, + {"typescript", "typescript", "TypeScript"}, + {"ts shorthand", "ts", "TypeScript"}, + {"go", "go", "Go"}, + {"golang alias", "golang", "Go"}, + {"java", "java", "Java"}, + {"csharp", "csharp", "C#"}, + {"c# alias", "c#", "C#"}, + {"mongosh", "mongosh", "MongoDB Shell"}, + {"bash", "bash", "Shell"}, + {"sh", "sh", "Shell"}, + {"shell", "shell", "Shell"}, + {"console", "console", "Shell"}, + {"json", "json", "JSON"}, + {"yaml", "yaml", "YAML"}, + {"yml alias", "yml", "YAML"}, + {"unknown returns original", "unknownlang", "unknownlang"}, + {"whitespace trimmed", " python ", "Python"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := GetProductFromLanguage(tt.language) + if got != tt.want { + t.Errorf("GetProductFromLanguage(%q) = %q, want %q", tt.language, got, tt.want) + } + }) + } +} + +func TestIsNonDriverLanguage(t *testing.T) { + tests := []struct { + name string + language string + want bool + }{ + {"bash is non-driver", "bash", true}, + {"sh is non-driver", "sh", true}, + {"console is non-driver", "console", true}, + {"text is non-driver", "text", true}, + {"json is non-driver", "json", true}, + {"yaml is non-driver", "yaml", true}, + {"xml is non-driver", "xml", true}, + {"ini is non-driver", "ini", true}, + {"toml is non-driver", "toml", true}, + {"properties is non-driver", "properties", true}, + {"sql is non-driver", "sql", true}, + {"none is non-driver", "none", true}, + {"http is non-driver", "http", true}, + {"python is driver", "python", false}, + {"javascript is driver", "javascript", false}, + {"go is driver", "go", false}, + {"java is driver", "java", false}, + {"shell is driver", "shell", false}, // shell has special handling + {"case insensitive", "BASH", true}, + {"whitespace trimmed", " json ", true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := IsNonDriverLanguage(tt.language) + if got != tt.want { + t.Errorf("IsNonDriverLanguage(%q) = %v, want %v", tt.language, got, tt.want) + } + }) + } +} + +func TestResolve(t *testing.T) { + tests := []struct { + name string + languageArg string + languageOption string + filePath string + want string + }{ + { + name: "explicit language argument takes priority", + languageArg: "python", + languageOption: "javascript", + filePath: "/path/to/file.go", + want: "python", + }, + { + name: "language option when no argument", + languageArg: "", + languageOption: "javascript", + filePath: "/path/to/file.py", + want: "javascript", + }, + { + name: "infer from filepath when no explicit language", + languageArg: "", + languageOption: "", + filePath: "/path/to/example.py", + want: "python", + }, + { + name: "infer from .js extension", + languageArg: "", + languageOption: "", + filePath: "code/snippet.js", + want: "javascript", + }, + { + name: "infer from .go extension", + languageArg: "", + languageOption: "", + filePath: "main.go", + want: "go", + }, + { + name: "infer from .java extension", + languageArg: "", + languageOption: "", + filePath: "Example.java", + want: "java", + }, + { + name: "unknown extension returns undefined", + languageArg: "", + languageOption: "", + filePath: "/path/to/file.xyz", + want: "undefined", + }, + { + name: "no inputs returns undefined", + languageArg: "", + languageOption: "", + filePath: "", + want: "undefined", + }, + { + name: "language argument normalized", + languageArg: "ts", + languageOption: "", + filePath: "", + want: "typescript", + }, + { + name: "language option normalized", + languageArg: "", + languageOption: "ts", + filePath: "", + want: "typescript", + }, + { + name: "filepath extension normalized", + languageArg: "", + languageOption: "", + filePath: "/path/to/file.yml", + want: "yaml", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := Resolve(tt.languageArg, tt.languageOption, tt.filePath) + if got != tt.want { + t.Errorf("Resolve(%q, %q, %q) = %q, want %q", + tt.languageArg, tt.languageOption, tt.filePath, got, tt.want) + } + }) + } +} + +func TestIsMongoShellLanguage(t *testing.T) { + tests := []struct { + name string + language string + want bool + }{ + {"shell is mongo shell", "shell", true}, + {"javascript is mongo shell", "javascript", true}, + {"js is mongo shell", "js", true}, + {"python is not mongo shell", "python", false}, + {"bash is not mongo shell", "bash", false}, + {"mongosh is not in list", "mongosh", false}, // mongosh is handled separately + {"case insensitive", "SHELL", true}, + {"case insensitive js", "JavaScript", true}, + {"whitespace trimmed", " shell ", true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := IsMongoShellLanguage(tt.language) + if got != tt.want { + t.Errorf("IsMongoShellLanguage(%q) = %v, want %v", tt.language, got, tt.want) + } + }) + } +} + diff --git a/internal/projectinfo/products.go b/internal/projectinfo/products.go new file mode 100644 index 0000000..6a1f789 --- /dev/null +++ b/internal/projectinfo/products.go @@ -0,0 +1,66 @@ +// Package projectinfo provides utilities for working with MongoDB documentation project structure. +package projectinfo + +// ContentDirToProduct maps content directory names to their display product names. +// This is used for reporting and analysis purposes. +// +// Content directories are the top-level directories under content/ in the docs monorepo +// that contain driver or product documentation (e.g., "pymongo-driver", "node", "golang"). +// +// Note: This map should include ALL driver/product content directories, not just +// those with tested code examples. Testability is determined separately. +var ContentDirToProduct = map[string]string{ + "c-driver": "C", + "cpp-driver": "C++", + "csharp": "C#", + "golang": "Go", + "java": "Java (Sync)", + "java-rs": "Java (Reactive Streams)", + "kotlin": "Kotlin (Coroutine)", + "kotlin-sync": "Kotlin (Sync)", + "laravel-mongodb": "Laravel", + "mongodb-shell": "MongoDB Shell", + "node": "Node.js", + "php-library": "PHP", + "pymongo-arrow": "PyMongo Arrow", + "pymongo-driver": "Python", + "ruby-driver": "Ruby", + "rust": "Rust", + "scala-driver": "Scala", + "swift": "Swift", +} + +// GetProductFromContentDir returns the display product name for a content directory. +// Returns the product name if found, or empty string if the content directory is not recognized. +func GetProductFromContentDir(contentDir string) string { + if product, ok := ContentDirToProduct[contentDir]; ok { + return product + } + return "" +} + +// GetAllContentDirs returns a slice of all known content directory names. +// Useful for validation or iteration. +func GetAllContentDirs() []string { + dirs := make([]string, 0, len(ContentDirToProduct)) + for dir := range ContentDirToProduct { + dirs = append(dirs, dir) + } + return dirs +} + +// GetAllProducts returns a slice of all known product display names. +// Useful for validation or reporting. +func GetAllProducts() []string { + // Use a map to deduplicate (in case multiple dirs map to same product) + seen := make(map[string]bool) + products := make([]string, 0, len(ContentDirToProduct)) + for _, product := range ContentDirToProduct { + if !seen[product] { + seen[product] = true + products = append(products, product) + } + } + return products +} + diff --git a/internal/rst/directive_parser.go b/internal/rst/directive_parser.go index 618257c..dc1d804 100644 --- a/internal/rst/directive_parser.go +++ b/internal/rst/directive_parser.go @@ -16,18 +16,24 @@ import ( "os" "regexp" "strings" + + "github.com/grove-platform/audit-cli/internal/language" ) // DirectiveType represents the type of reStructuredText directive. type DirectiveType string const ( - // CodeBlock represents inline code blocks (.. code-block::) + // CodeBlock represents inline code blocks (.. code-block:: and .. code::) CodeBlock DirectiveType = "code-block" // LiteralInclude represents external file references (.. literalinclude::) LiteralInclude DirectiveType = "literalinclude" // IoCodeBlock represents input/output examples (.. io-code-block::) IoCodeBlock DirectiveType = "io-code-block" + // Include represents content inclusion (.. include::) + Include DirectiveType = "include" + // Toctree represents table of contents entries (.. toctree::) + Toctree DirectiveType = "toctree" ) // Directive represents a parsed reStructuredText directive. @@ -56,6 +62,66 @@ type SubDirective struct { Content string // Inline content (if no filepath) } +// ResolveLanguage determines the language for a code example directive. +// +// The resolution strategy depends on the directive type: +// - CodeBlock: Argument is the language (e.g., .. code-block:: python) +// - LiteralInclude: Argument is a filepath, infer language from extension +// - IoCodeBlock: Use :language: option only (sub-directives handle their own) +// +// Returns the normalized language name, or "undefined" if not determinable. +func (d Directive) ResolveLanguage() string { + switch d.Type { + case CodeBlock: + // For code-block, the argument IS the language + return language.Resolve(d.Argument, d.Options["language"], "") + case LiteralInclude: + // For literalinclude, the argument is a filepath + return language.Resolve("", d.Options["language"], d.Argument) + case IoCodeBlock: + // For io-code-block parent, only check the :language: option + // Sub-directives handle their own language resolution + return language.Resolve("", d.Options["language"], "") + default: + return language.Undefined + } +} + +// ResolveLanguage determines the language for a sub-directive (input/output). +// +// The resolution strategy: +// 1. Check the sub-directive's :language: option +// 2. If the sub-directive has a filepath argument, infer from extension +// 3. Fall back to the parent directive's :language: option +// 4. Return "undefined" if not determinable +// +// Parameters: +// - parentOptions: The parent io-code-block's options map (for fallback) +// +// Returns the normalized language name, or "undefined" if not determinable. +func (s SubDirective) ResolveLanguage(parentOptions map[string]string) string { + // First try sub-directive's own language option + if lang := s.Options["language"]; lang != "" { + return language.Resolve("", lang, "") + } + + // Then try to infer from filepath if present + if s.Argument != "" { + if lang := language.GetLanguageFromExtension(s.Argument); lang != "" { + return language.Normalize(lang) + } + } + + // Fall back to parent's language option + if parentOptions != nil { + if lang := parentOptions["language"]; lang != "" { + return language.Resolve("", lang, "") + } + } + + return language.Undefined +} + // Regular expressions for directive parsing // // Note: literalIncludeRegex is imported from directive_regex.go (LiteralIncludeDirectiveRegex) @@ -67,6 +133,9 @@ var ( // Matches: .. code-block:: python (language is optional) codeBlockRegex = regexp.MustCompile(`^\.\.\s+code-block::\s*(.*)$`) + // Alias for the shared code directive regex (shorter form of code-block) + codeDirectiveRegex = CodeDirectiveRegex + // Matches: .. io-code-block:: (strict - must end after directive) // This is different from IOCodeBlockDirectiveRegex which is more permissive ioCodeBlockRegex = regexp.MustCompile(`^\.\.\s+io-code-block::\s*$`) @@ -83,17 +152,20 @@ var ( optionRegex = regexp.MustCompile(`^\s+:([^:]+):\s*(.*)$`) ) -// ParseDirectives parses all directives from an RST file. +// ParseDirectives parses all directives from an RST or YAML file. +// +// This function extracts all supported code example directives: +// - literalinclude: External file references +// - code-block: Inline code blocks +// - code: Shorter alias for code-block (standard reStructuredText) +// - io-code-block: Input/output examples with nested directives +// - yaml-code-block: YAML-native code examples (from action: blocks in steps files) // -// This function scans the file line-by-line and extracts all supported directives -// (literalinclude, code-block, io-code-block). For each directive, it parses: -// - The directive type and argument -// - All directive options (e.g., :language:, :start-after:) -// - The directive content (for code-block and io-code-block) -// - Nested directives (for io-code-block) +// For RST files, it scans line-by-line for RST directives. +// For YAML files, it also parses the legacy action: format used in some steps files. // // Parameters: -// - filePath: Path to the RST file to parse +// - filePath: Path to the RST or YAML file to parse // // Returns: // - []Directive: Slice of all parsed directives in order of appearance @@ -145,6 +217,22 @@ func ParseDirectives(filePath string) ([]Directive, error) { continue } + // Check for code directive (shorter alias for code-block in standard RST) + if matches := codeDirectiveRegex.FindStringSubmatch(trimmedLine); len(matches) > 1 { + directive := Directive{ + Type: CodeBlock, // Treat as code-block since they're functionally equivalent + Argument: strings.TrimSpace(matches[1]), + Options: make(map[string]string), + LineNum: lineNum, + } + + // Parse options and content on following lines + firstContentLine := parseDirectiveOptions(scanner, &directive, &lineNum) + parseDirectiveContent(scanner, &directive, &lineNum, firstContentLine) + directives = append(directives, directive) + continue + } + // Check for io-code-block directive if ioCodeBlockRegex.MatchString(trimmedLine) { directive := Directive{ @@ -164,6 +252,13 @@ func ParseDirectives(filePath string) ([]Directive, error) { return nil, err } + // For YAML files, also parse YAML-native code examples (action: blocks) + // This handles the legacy format used in some steps files + yamlDirectives, err := ParseYAMLStepsFile(filePath) + if err == nil && len(yamlDirectives) > 0 { + directives = append(directives, yamlDirectives...) + } + return directives, nil } diff --git a/internal/rst/directive_parser_test.go b/internal/rst/directive_parser_test.go new file mode 100644 index 0000000..86ccd7e --- /dev/null +++ b/internal/rst/directive_parser_test.go @@ -0,0 +1,224 @@ +package rst + +import ( + "testing" + + "github.com/grove-platform/audit-cli/internal/language" +) + +func TestDirective_ResolveLanguage(t *testing.T) { + tests := []struct { + name string + directive Directive + want string + }{ + { + name: "code-block with language argument", + directive: Directive{ + Type: CodeBlock, + Argument: "python", + Options: map[string]string{}, + }, + want: "python", + }, + { + name: "code-block with language option", + directive: Directive{ + Type: CodeBlock, + Argument: "", + Options: map[string]string{"language": "javascript"}, + }, + want: "javascript", + }, + { + name: "code-block argument takes priority over option", + directive: Directive{ + Type: CodeBlock, + Argument: "python", + Options: map[string]string{"language": "javascript"}, + }, + want: "python", + }, + { + name: "code-block with no language returns undefined", + directive: Directive{ + Type: CodeBlock, + Argument: "", + Options: map[string]string{}, + }, + want: language.Undefined, + }, + { + name: "literalinclude with language option", + directive: Directive{ + Type: LiteralInclude, + Argument: "/path/to/file.txt", + Options: map[string]string{"language": "python"}, + }, + want: "python", + }, + { + name: "literalinclude infers from file extension", + directive: Directive{ + Type: LiteralInclude, + Argument: "/path/to/example.py", + Options: map[string]string{}, + }, + want: "python", + }, + { + name: "literalinclude language option takes priority over extension", + directive: Directive{ + Type: LiteralInclude, + Argument: "/path/to/example.py", + Options: map[string]string{"language": "javascript"}, + }, + want: "javascript", + }, + { + name: "literalinclude with unknown extension returns undefined", + directive: Directive{ + Type: LiteralInclude, + Argument: "/path/to/file.xyz", + Options: map[string]string{}, + }, + want: language.Undefined, + }, + { + name: "io-code-block with language option", + directive: Directive{ + Type: IoCodeBlock, + Argument: "", + Options: map[string]string{"language": "go"}, + }, + want: "go", + }, + { + name: "io-code-block with no language returns undefined", + directive: Directive{ + Type: IoCodeBlock, + Argument: "", + Options: map[string]string{}, + }, + want: language.Undefined, + }, + { + name: "code-block normalizes language", + directive: Directive{ + Type: CodeBlock, + Argument: "ts", + Options: map[string]string{}, + }, + want: "typescript", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := tt.directive.ResolveLanguage() + if got != tt.want { + t.Errorf("Directive.ResolveLanguage() = %q, want %q", got, tt.want) + } + }) + } +} + +func TestSubDirective_ResolveLanguage(t *testing.T) { + tests := []struct { + name string + subDir SubDirective + parentOptions map[string]string + want string + }{ + { + name: "sub-directive with own language option", + subDir: SubDirective{ + Argument: "", + Options: map[string]string{"language": "python"}, + Content: "print('hello')", + }, + parentOptions: map[string]string{}, + want: "python", + }, + { + name: "sub-directive infers from filepath", + subDir: SubDirective{ + Argument: "/path/to/example.js", + Options: map[string]string{}, + Content: "", + }, + parentOptions: map[string]string{}, + want: "javascript", + }, + { + name: "sub-directive language option takes priority over filepath", + subDir: SubDirective{ + Argument: "/path/to/example.py", + Options: map[string]string{"language": "javascript"}, + Content: "", + }, + parentOptions: map[string]string{}, + want: "javascript", + }, + { + name: "sub-directive falls back to parent language", + subDir: SubDirective{ + Argument: "", + Options: map[string]string{}, + Content: "some code", + }, + parentOptions: map[string]string{"language": "go"}, + want: "go", + }, + { + name: "sub-directive own language takes priority over parent", + subDir: SubDirective{ + Argument: "", + Options: map[string]string{"language": "python"}, + Content: "print('hello')", + }, + parentOptions: map[string]string{"language": "go"}, + want: "python", + }, + { + name: "sub-directive with no language returns undefined", + subDir: SubDirective{ + Argument: "", + Options: map[string]string{}, + Content: "some code", + }, + parentOptions: map[string]string{}, + want: language.Undefined, + }, + { + name: "sub-directive with nil parent options", + subDir: SubDirective{ + Argument: "", + Options: map[string]string{}, + Content: "some code", + }, + parentOptions: nil, + want: language.Undefined, + }, + { + name: "sub-directive normalizes language", + subDir: SubDirective{ + Argument: "", + Options: map[string]string{"language": "ts"}, + Content: "const x = 1", + }, + parentOptions: map[string]string{}, + want: "typescript", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := tt.subDir.ResolveLanguage(tt.parentOptions) + if got != tt.want { + t.Errorf("SubDirective.ResolveLanguage() = %q, want %q", got, tt.want) + } + }) + } +} + diff --git a/internal/rst/directive_regex.go b/internal/rst/directive_regex.go index 4ea7249..2535c7d 100644 --- a/internal/rst/directive_regex.go +++ b/internal/rst/directive_regex.go @@ -26,6 +26,11 @@ var IncludeDirectiveRegex = regexp.MustCompile(`^\.\.\s+include::\s+(.+)$`) // Example: .. literalinclude:: /path/to/file.py var LiteralIncludeDirectiveRegex = regexp.MustCompile(`^\.\.\s+literalinclude::\s+(.+)$`) +// CodeDirectiveRegex matches .. code:: directives in RST files. +// This is a shorter alias for code-block in standard reStructuredText. +// Example: .. code:: python +var CodeDirectiveRegex = regexp.MustCompile(`^\.\.\s+code::\s*(.*)$`) + // IOCodeBlockDirectiveRegex matches .. io-code-block:: directives in RST files. // Example: .. io-code-block:: var IOCodeBlockDirectiveRegex = regexp.MustCompile(`^\.\.\s+io-code-block::`) @@ -66,3 +71,24 @@ var ComposableTutorialDirectiveRegex = regexp.MustCompile(`^\.\.\s+composable-tu // Example: .. selected-content:: var SelectedContentDirectiveRegex = regexp.MustCompile(`^\.\.\s+selected-content::`) +// RST Directive Option Regular Expressions +// +// These patterns match directive options (lines starting with :option-name:). +// Options appear indented under directives and provide configuration. + +// TabIDOptionRegex matches :tabid: options in RST files. +// Example: :tabid: python +var TabIDOptionRegex = regexp.MustCompile(`^\s+:tabid:\s*(.*)$`) + +// OptionsOptionRegex matches :options: options in RST files (used in composable-tutorial). +// Example: :options: language +var OptionsOptionRegex = regexp.MustCompile(`^\s+:options:\s*(.*)$`) + +// SelectionsOptionRegex matches :selections: options in RST files (used in selected-content). +// Example: :selections: python +var SelectionsOptionRegex = regexp.MustCompile(`^\s+:selections:\s*(.*)$`) + +// LanguageOptionRegex matches :language: options in RST files. +// Example: :language: python +var LanguageOptionRegex = regexp.MustCompile(`^\s+:language:\s*(.*)$`) + diff --git a/internal/rst/rstspec.go b/internal/rst/rstspec.go index c402807..8353bc2 100644 --- a/internal/rst/rstspec.go +++ b/internal/rst/rstspec.go @@ -2,9 +2,13 @@ package rst import ( + "encoding/json" "fmt" "io" "net/http" + "os" + "path/filepath" + "time" "github.com/BurntSushi/toml" ) @@ -12,6 +16,15 @@ import ( // RstspecURL is the URL to the canonical rstspec.toml file in the snooty-parser repository. const RstspecURL = "https://raw.githubusercontent.com/mongodb/snooty-parser/refs/heads/main/snooty/rstspec.toml" +// RstspecCacheTTL is the time-to-live for the cached rstspec.toml (24 hours). +const RstspecCacheTTL = 24 * time.Hour + +// RstspecCacheDir is the directory for storing cache files. +const RstspecCacheDir = ".audit-cli" + +// RstspecCacheFileName is the name of the rstspec cache file. +const RstspecCacheFileName = "rstspec-cache.json" + // RstspecComposable represents a composable definition from rstspec.toml. type RstspecComposable struct { ID string `toml:"id"` @@ -27,36 +40,156 @@ type RstspecComposableOption struct { Title string `toml:"title"` } +// RstspecTabOption represents a tab option within a tabset. +type RstspecTabOption struct { + ID string `toml:"id"` + Title string `toml:"title"` +} + // RstspecConfig represents the structure of the rstspec.toml file. // This includes all sections, though most commands will only use specific parts. type RstspecConfig struct { Composables []RstspecComposable `toml:"composables"` + // Tabs contains tabset definitions (e.g., drivers, platforms, cloud-providers) + Tabs map[string][]RstspecTabOption `toml:"tabs"` // Additional sections can be added here as needed: // Directives map[string]interface{} `toml:"directive"` // Roles map[string]interface{} `toml:"role"` // etc. } -// FetchRstspec fetches and parses the canonical rstspec.toml file. -// -// This function downloads the rstspec.toml file from the snooty-parser repository -// and parses it into an RstspecConfig structure. This file contains canonical -// definitions for RST directives, roles, composables, and other configuration -// that may be duplicated or extended in local project files. -// -// Returns: -// - *RstspecConfig: The parsed rstspec configuration -// - error: Any error encountered during fetch or parse -// -// Example: -// -// config, err := rst.FetchRstspec() -// if err != nil { -// return fmt.Errorf("failed to fetch rstspec: %w", err) -// } -// fmt.Printf("Found %d composables\n", len(config.Composables)) -func FetchRstspec() (*RstspecConfig, error) { - // Fetch the rstspec.toml file +// GetComposableOptionTitle returns the human-readable title for a composable option. +// For example, GetComposableOptionTitle("language", "nodejs") returns "Node.js". +func (c *RstspecConfig) GetComposableOptionTitle(composableID, optionID string) (string, bool) { + for _, comp := range c.Composables { + if comp.ID == composableID { + for _, opt := range comp.Options { + if opt.ID == optionID { + return opt.Title, true + } + } + } + } + return "", false +} + +// GetTabOptionTitle returns the human-readable title for a tab option. +// For example, GetTabOptionTitle("drivers", "nodejs") returns "Node.js". +func (c *RstspecConfig) GetTabOptionTitle(tabsetID, optionID string) (string, bool) { + if tabset, ok := c.Tabs[tabsetID]; ok { + for _, opt := range tabset { + if opt.ID == optionID { + return opt.Title, true + } + } + } + return "", false +} + +// BuildComposableIDToTitleMap builds a map from option ID to title for a specific composable. +// For example, BuildComposableIDToTitleMap("language") returns {"nodejs": "Node.js", "python": "Python", ...}. +func (c *RstspecConfig) BuildComposableIDToTitleMap(composableID string) map[string]string { + result := make(map[string]string) + for _, comp := range c.Composables { + if comp.ID == composableID { + for _, opt := range comp.Options { + result[opt.ID] = opt.Title + } + break + } + } + return result +} + +// BuildTabIDToTitleMap builds a map from tab ID to title for a specific tabset. +// For example, BuildTabIDToTitleMap("drivers") returns {"nodejs": "Node.js", "python": "Python", ...}. +func (c *RstspecConfig) BuildTabIDToTitleMap(tabsetID string) map[string]string { + result := make(map[string]string) + if tabset, ok := c.Tabs[tabsetID]; ok { + for _, opt := range tabset { + result[opt.ID] = opt.Title + } + } + return result +} + +// RstspecCache represents the cached rstspec.toml data. +type RstspecCache struct { + Timestamp time.Time `json:"timestamp"` + Composables []RstspecComposable `json:"composables"` + Tabs map[string][]RstspecTabOption `json:"tabs"` +} + +// getRstspecCachePath returns the path to the rstspec cache file. +func getRstspecCachePath() (string, error) { + homeDir, err := os.UserHomeDir() + if err != nil { + return "", fmt.Errorf("failed to get home directory: %w", err) + } + return filepath.Join(homeDir, RstspecCacheDir, RstspecCacheFileName), nil +} + +// loadRstspecCache loads the rstspec from the cache file. +func loadRstspecCache() (*RstspecConfig, error) { + cachePath, err := getRstspecCachePath() + if err != nil { + return nil, err + } + + data, err := os.ReadFile(cachePath) + if err != nil { + return nil, err + } + + var cache RstspecCache + if err := json.Unmarshal(data, &cache); err != nil { + return nil, fmt.Errorf("failed to parse rstspec cache: %w", err) + } + + // Check if cache is expired + if time.Since(cache.Timestamp) > RstspecCacheTTL { + return nil, fmt.Errorf("rstspec cache expired") + } + + return &RstspecConfig{ + Composables: cache.Composables, + Tabs: cache.Tabs, + }, nil +} + +// saveRstspecCache saves the rstspec to the cache file. +func saveRstspecCache(config *RstspecConfig) error { + cachePath, err := getRstspecCachePath() + if err != nil { + return err + } + + // Ensure cache directory exists + cacheDir := filepath.Dir(cachePath) + if err := os.MkdirAll(cacheDir, 0755); err != nil { + return fmt.Errorf("failed to create cache directory: %w", err) + } + + cache := RstspecCache{ + Timestamp: time.Now(), + Composables: config.Composables, + Tabs: config.Tabs, + } + + data, err := json.MarshalIndent(cache, "", " ") + if err != nil { + return fmt.Errorf("failed to marshal rstspec cache: %w", err) + } + + if err := os.WriteFile(cachePath, data, 0644); err != nil { + return fmt.Errorf("failed to write rstspec cache: %w", err) + } + + return nil +} + +// fetchRstspecFromURL fetches and parses rstspec.toml from the remote URL. +func fetchRstspecFromURL() (*RstspecConfig, error) { resp, err := http.Get(RstspecURL) if err != nil { return nil, fmt.Errorf("failed to fetch rstspec.toml: %w", err) @@ -67,13 +200,11 @@ func FetchRstspec() (*RstspecConfig, error) { return nil, fmt.Errorf("failed to fetch rstspec.toml: HTTP %d", resp.StatusCode) } - // Read the response body body, err := io.ReadAll(resp.Body) if err != nil { return nil, fmt.Errorf("failed to read rstspec.toml: %w", err) } - // Parse the TOML var config RstspecConfig if err := toml.Unmarshal(body, &config); err != nil { return nil, fmt.Errorf("failed to parse rstspec.toml: %w", err) @@ -82,3 +213,57 @@ func FetchRstspec() (*RstspecConfig, error) { return &config, nil } +// FetchRstspec fetches and parses the canonical rstspec.toml file. +// +// This function uses a local cache (stored in ~/.audit-cli/rstspec-cache.json) +// to avoid repeated network requests. The cache has a 24-hour TTL. +// If the cache is missing or expired, it fetches from the snooty-parser repository. +// If the network request fails and a cached version exists (even if expired), +// it falls back to the cached version for offline support. +// +// Returns: +// - *RstspecConfig: The parsed rstspec configuration +// - error: Any error encountered during fetch or parse +// +// Example: +// +// config, err := rst.FetchRstspec() +// if err != nil { +// return fmt.Errorf("failed to fetch rstspec: %w", err) +// } +// fmt.Printf("Found %d composables\n", len(config.Composables)) +func FetchRstspec() (*RstspecConfig, error) { + // Try to load from cache first + config, err := loadRstspecCache() + if err == nil { + return config, nil + } + + // Cache miss or expired, try to fetch from URL + config, fetchErr := fetchRstspecFromURL() + if fetchErr != nil { + // Network failed - try to use expired cache as fallback for offline support + cachePath, pathErr := getRstspecCachePath() + if pathErr == nil { + if data, readErr := os.ReadFile(cachePath); readErr == nil { + var cache RstspecCache + if jsonErr := json.Unmarshal(data, &cache); jsonErr == nil { + // Return expired cache with a warning + fmt.Fprintf(os.Stderr, "Warning: Could not fetch rstspec.toml (%v), using expired cache\n", fetchErr) + return &RstspecConfig{ + Composables: cache.Composables, + Tabs: cache.Tabs, + }, nil + } + } + } + return nil, fetchErr + } + + // Save to cache for next time + if saveErr := saveRstspecCache(config); saveErr != nil { + fmt.Fprintf(os.Stderr, "Warning: Could not save rstspec cache: %v\n", saveErr) + } + + return config, nil +} diff --git a/internal/rst/yaml_steps_parser.go b/internal/rst/yaml_steps_parser.go new file mode 100644 index 0000000..6226a3a --- /dev/null +++ b/internal/rst/yaml_steps_parser.go @@ -0,0 +1,150 @@ +package rst + +import ( + "os" + "path/filepath" + "strings" + + "gopkg.in/yaml.v3" +) + +// YAMLCodeBlock represents a code example in YAML-native format. +// This is the legacy format used in some steps files with action: blocks. +const YAMLCodeBlock DirectiveType = "yaml-code-block" + +// YAMLActionItem represents an action item in a YAML steps file. +// This is the structure used in legacy steps files for code examples. +type YAMLActionItem struct { + Pre string `yaml:"pre"` + Language string `yaml:"language"` + Code string `yaml:"code"` + Copyable bool `yaml:"copyable"` +} + +// ParseYAMLStepsFile parses a YAML steps file and extracts code examples. +// +// This function handles the legacy YAML-native format where code examples +// are defined using action: blocks with language: and code: fields, rather +// than RST directives like .. code-block::. +// +// Example YAML format: +// +// title: Download the file +// stepnum: 1 +// action: +// - pre: "Run this command:" +// language: sh +// code: | +// curl -LO https://example.com/file.tgz +// +// Parameters: +// - filePath: Path to the YAML steps file +// +// Returns: +// - []Directive: Slice of directives representing code examples +// - error: Any error encountered during parsing +func ParseYAMLStepsFile(filePath string) ([]Directive, error) { + // Only process YAML files + ext := strings.ToLower(filepath.Ext(filePath)) + if ext != ".yaml" && ext != ".yml" { + return nil, nil + } + + content, err := os.ReadFile(filePath) + if err != nil { + return nil, err + } + + var directives []Directive + + // Split by YAML document separator and parse each document + documents := strings.Split(string(content), "\n---") + lineNum := 1 + + for _, doc := range documents { + if strings.TrimSpace(doc) == "" || strings.TrimSpace(doc) == "..." { + // Count lines in empty/end documents + lineNum += strings.Count(doc, "\n") + continue + } + + var step YAMLStep + if err := yaml.Unmarshal([]byte(doc), &step); err != nil { + // Skip documents that don't parse as steps + lineNum += strings.Count(doc, "\n") + continue + } + + // Extract code examples from action blocks + // Action can be a single item or a list of items + actions := extractActionsFromStep(step) + for _, action := range actions { + if action.Code != "" && action.Language != "" { + directive := Directive{ + Type: YAMLCodeBlock, + Argument: action.Language, // Language goes in Argument like code-block + Options: make(map[string]string), + Content: strings.TrimSpace(action.Code), + LineNum: lineNum, + } + // Store language in options too for consistency + directive.Options["language"] = action.Language + directives = append(directives, directive) + } + } + + lineNum += strings.Count(doc, "\n") + 1 // +1 for the --- separator + } + + return directives, nil +} + +// extractActionsFromStep extracts action items from a YAMLStep. +// The Action field can be either a single map or a list of maps. +func extractActionsFromStep(step YAMLStep) []YAMLActionItem { + if step.Action == nil { + return nil + } + + var actions []YAMLActionItem + + // Try as a list of maps first (most common) + if actionList, ok := step.Action.([]interface{}); ok { + for _, item := range actionList { + if actionMap, ok := item.(map[string]interface{}); ok { + action := parseActionMap(actionMap) + actions = append(actions, action) + } + } + return actions + } + + // Try as a single map + if actionMap, ok := step.Action.(map[string]interface{}); ok { + action := parseActionMap(actionMap) + actions = append(actions, action) + } + + return actions +} + +// parseActionMap converts a map[string]interface{} to a YAMLActionItem. +func parseActionMap(m map[string]interface{}) YAMLActionItem { + var action YAMLActionItem + + if pre, ok := m["pre"].(string); ok { + action.Pre = pre + } + if lang, ok := m["language"].(string); ok { + action.Language = lang + } + if code, ok := m["code"].(string); ok { + action.Code = code + } + if copyable, ok := m["copyable"].(bool); ok { + action.Copyable = copyable + } + + return action +} + diff --git a/internal/rst/yaml_steps_parser_test.go b/internal/rst/yaml_steps_parser_test.go new file mode 100644 index 0000000..4cd912b --- /dev/null +++ b/internal/rst/yaml_steps_parser_test.go @@ -0,0 +1,141 @@ +package rst + +import ( + "os" + "path/filepath" + "testing" +) + +func TestParseYAMLStepsFile(t *testing.T) { + // Create a temporary YAML steps file + tempDir, err := os.MkdirTemp("", "yaml-steps-test") + if err != nil { + t.Fatalf("Failed to create temp dir: %v", err) + } + defer os.RemoveAll(tempDir) + + yamlContent := `title: Download the file +stepnum: 1 +ref: download-file +action: + - pre: | + Run this command: + language: sh + copyable: true + code: | + curl -LO https://example.com/file.tgz + - pre: | + For Linux: + language: bash + code: | + wget https://example.com/file.tgz +--- +title: Verify the file +stepnum: 2 +ref: verify-file +action: + - pre: | + Check the signature: + language: sh + code: | + gpg --verify file.tgz.sig file.tgz +... +` + + testFile := filepath.Join(tempDir, "steps-test.yaml") + if err := os.WriteFile(testFile, []byte(yamlContent), 0644); err != nil { + t.Fatalf("Failed to write test file: %v", err) + } + + // Parse the file + directives, err := ParseYAMLStepsFile(testFile) + if err != nil { + t.Fatalf("ParseYAMLStepsFile failed: %v", err) + } + + // Should find 3 code examples + if len(directives) != 3 { + t.Errorf("Expected 3 directives, got %d", len(directives)) + } + + // Verify directive types and languages + expectedLangs := []string{"sh", "bash", "sh"} + for i, d := range directives { + if d.Type != YAMLCodeBlock { + t.Errorf("Directive %d: expected type %s, got %s", i, YAMLCodeBlock, d.Type) + } + if d.Argument != expectedLangs[i] { + t.Errorf("Directive %d: expected language %s, got %s", i, expectedLangs[i], d.Argument) + } + if d.Content == "" { + t.Errorf("Directive %d: expected non-empty content", i) + } + } +} + +func TestParseYAMLStepsFile_NonYAMLFile(t *testing.T) { + // Create a temporary RST file + tempDir, err := os.MkdirTemp("", "yaml-steps-test") + if err != nil { + t.Fatalf("Failed to create temp dir: %v", err) + } + defer os.RemoveAll(tempDir) + + testFile := filepath.Join(tempDir, "test.rst") + if err := os.WriteFile(testFile, []byte(".. code-block:: python\n\n print('hello')"), 0644); err != nil { + t.Fatalf("Failed to write test file: %v", err) + } + + // Parse the file - should return nil for non-YAML files + directives, err := ParseYAMLStepsFile(testFile) + if err != nil { + t.Fatalf("ParseYAMLStepsFile failed: %v", err) + } + + if directives != nil { + t.Errorf("Expected nil directives for non-YAML file, got %d", len(directives)) + } +} + +func TestParseDirectives_IncludesYAMLCodeBlocks(t *testing.T) { + // Create a temporary YAML steps file + tempDir, err := os.MkdirTemp("", "yaml-steps-test") + if err != nil { + t.Fatalf("Failed to create temp dir: %v", err) + } + defer os.RemoveAll(tempDir) + + yamlContent := `title: Run command +stepnum: 1 +action: + - language: python + code: | + print("hello") +` + + testFile := filepath.Join(tempDir, "steps-test.yaml") + if err := os.WriteFile(testFile, []byte(yamlContent), 0644); err != nil { + t.Fatalf("Failed to write test file: %v", err) + } + + // Use ParseDirectives (the main entry point) + directives, err := ParseDirectives(testFile) + if err != nil { + t.Fatalf("ParseDirectives failed: %v", err) + } + + // Should find 1 code example + if len(directives) != 1 { + t.Errorf("Expected 1 directive, got %d", len(directives)) + } + + if len(directives) > 0 { + if directives[0].Type != YAMLCodeBlock { + t.Errorf("Expected type %s, got %s", YAMLCodeBlock, directives[0].Type) + } + if directives[0].Argument != "python" { + t.Errorf("Expected language python, got %s", directives[0].Argument) + } + } +} + diff --git a/internal/snooty/snooty.go b/internal/snooty/snooty.go new file mode 100644 index 0000000..7adc0b8 --- /dev/null +++ b/internal/snooty/snooty.go @@ -0,0 +1,163 @@ +// Package snooty provides utilities for parsing snooty.toml configuration files. +// +// This package provides: +// - Types for representing snooty.toml structure (composables, options) +// - Functions for parsing snooty.toml files +// - Functions for finding snooty.toml files in the monorepo +// - Functions for finding a project's snooty.toml from a source file path +package snooty + +import ( + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/BurntSushi/toml" + "github.com/grove-platform/audit-cli/internal/projectinfo" +) + +// Composable represents a composable definition from a snooty.toml file. +type Composable struct { + ID string `toml:"id"` + Title string `toml:"title"` + Default string `toml:"default"` + Dependencies []map[string]string `toml:"dependencies"` + Options []ComposableOption `toml:"options"` +} + +// ComposableOption represents an option within a composable. +type ComposableOption struct { + ID string `toml:"id"` + Title string `toml:"title"` +} + +// Config represents the structure of a snooty.toml file. +type Config struct { + Name string `toml:"name"` + Title string `toml:"title"` + Composables []Composable `toml:"composables"` +} + +// ParseFile parses a snooty.toml file and returns its configuration. +// +// Parameters: +// - filePath: Path to the snooty.toml file +// +// Returns: +// - *Config: The parsed configuration +// - error: Any error encountered during parsing +func ParseFile(filePath string) (*Config, error) { + var config Config + _, err := toml.DecodeFile(filePath, &config) + if err != nil { + return nil, fmt.Errorf("failed to parse snooty.toml: %w", err) + } + return &config, nil +} + +// FindProjectSnootyTOML finds the snooty.toml file for a project based on a source file path. +// +// Given a source file path like: +// - /path/to/monorepo/content/atlas/source/foo.rst +// - /path/to/monorepo/content/manual/v8.0/source/bar.rst +// +// This function walks up the directory tree to find the snooty.toml file at: +// - /path/to/monorepo/content/atlas/snooty.toml +// - /path/to/monorepo/content/manual/v8.0/snooty.toml +// +// Parameters: +// - sourcePath: Absolute path to a source file within a project +// +// Returns: +// - string: Path to the snooty.toml file, or empty string if not found +// - error: Any error encountered during search +func FindProjectSnootyTOML(sourcePath string) (string, error) { + // Get absolute path + absPath, err := filepath.Abs(sourcePath) + if err != nil { + return "", fmt.Errorf("failed to get absolute path: %w", err) + } + + // Start from the directory containing the source file + dir := filepath.Dir(absPath) + + // Walk up the directory tree looking for snooty.toml + // Stop when we reach the content directory or filesystem root + for { + // Check if snooty.toml exists in this directory + snootyPath := filepath.Join(dir, "snooty.toml") + if _, err := os.Stat(snootyPath); err == nil { + return snootyPath, nil + } + + // Check if we've reached the content directory (stop here) + if filepath.Base(dir) == "content" { + break + } + + // Move up one directory + parent := filepath.Dir(dir) + if parent == dir { + // Reached filesystem root + break + } + dir = parent + } + + return "", nil // Not found, but not an error +} + +// BuildComposableIDToTitleMap builds a map from composable option IDs to titles +// for a specific composable type (e.g., "language", "interface"). +// +// Parameters: +// - composables: Slice of composables from a snooty.toml file +// - composableID: The ID of the composable to extract (e.g., "language", "interface") +// +// Returns: +// - map[string]string: Map from option ID to option title +func BuildComposableIDToTitleMap(composables []Composable, composableID string) map[string]string { + result := make(map[string]string) + for _, comp := range composables { + if comp.ID == composableID { + for _, opt := range comp.Options { + result[opt.ID] = opt.Title + } + } + } + return result +} + +// IsCurrentVersion checks if a version string represents a "current" version. +// This is a convenience wrapper around projectinfo.IsCurrentVersion. +func IsCurrentVersion(version string) bool { + return projectinfo.IsCurrentVersion(version) +} + +// ExtractProjectAndVersion extracts project and version from a relative path. +// Returns (project, version) where version is empty for non-versioned projects. +// +// Examples: +// - "manual/v8.0/snooty.toml" -> ("manual", "v8.0") +// - "atlas/snooty.toml" -> ("atlas", "") +func ExtractProjectAndVersion(relPath string) (string, string) { + parts := strings.Split(relPath, string(filepath.Separator)) + if len(parts) < 2 { + return "", "" + } + + projectName := parts[0] + + // Check if this is a versioned project + // Pattern: project/version/snooty.toml (3 parts) + // Pattern: project/snooty.toml (2 parts) + if len(parts) == 3 && parts[2] == "snooty.toml" { + return projectName, parts[1] + } else if len(parts) == 2 && parts[1] == "snooty.toml" { + return projectName, "" + } + + return "", "" +} + diff --git a/internal/snooty/snooty_test.go b/internal/snooty/snooty_test.go new file mode 100644 index 0000000..9f78969 --- /dev/null +++ b/internal/snooty/snooty_test.go @@ -0,0 +1,316 @@ +package snooty + +import ( + "os" + "path/filepath" + "testing" +) + +func TestParseFile(t *testing.T) { + // Create a temporary snooty.toml file + tempDir := t.TempDir() + snootyPath := filepath.Join(tempDir, "snooty.toml") + + content := ` +name = "test-project" +title = "Test Project" + +[[composables]] +id = "language" +title = "Language" +default = "python" + +[[composables.options]] +id = "python" +title = "Python" + +[[composables.options]] +id = "javascript" +title = "JavaScript" + +[[composables]] +id = "interface" +title = "Interface" +default = "atlas" + +[[composables.options]] +id = "atlas" +title = "Atlas" + +[[composables.options]] +id = "mongosh" +title = "MongoDB Shell" +` + if err := os.WriteFile(snootyPath, []byte(content), 0644); err != nil { + t.Fatalf("Failed to write test file: %v", err) + } + + config, err := ParseFile(snootyPath) + if err != nil { + t.Fatalf("ParseFile() error = %v", err) + } + + if config.Name != "test-project" { + t.Errorf("config.Name = %q, want %q", config.Name, "test-project") + } + + if config.Title != "Test Project" { + t.Errorf("config.Title = %q, want %q", config.Title, "Test Project") + } + + if len(config.Composables) != 2 { + t.Fatalf("len(config.Composables) = %d, want 2", len(config.Composables)) + } + + // Check first composable + lang := config.Composables[0] + if lang.ID != "language" { + t.Errorf("Composables[0].ID = %q, want %q", lang.ID, "language") + } + if len(lang.Options) != 2 { + t.Errorf("len(Composables[0].Options) = %d, want 2", len(lang.Options)) + } + if lang.Options[0].ID != "python" || lang.Options[0].Title != "Python" { + t.Errorf("Composables[0].Options[0] = {%q, %q}, want {python, Python}", + lang.Options[0].ID, lang.Options[0].Title) + } +} + +func TestParseFile_InvalidFile(t *testing.T) { + _, err := ParseFile("/nonexistent/path/snooty.toml") + if err == nil { + t.Error("ParseFile() expected error for nonexistent file, got nil") + } +} + +func TestParseFile_InvalidTOML(t *testing.T) { + tempDir := t.TempDir() + snootyPath := filepath.Join(tempDir, "snooty.toml") + + // Write invalid TOML + if err := os.WriteFile(snootyPath, []byte("invalid = [toml"), 0644); err != nil { + t.Fatalf("Failed to write test file: %v", err) + } + + _, err := ParseFile(snootyPath) + if err == nil { + t.Error("ParseFile() expected error for invalid TOML, got nil") + } +} + +func TestFindProjectSnootyTOML(t *testing.T) { + // Create a mock project structure + tempDir := t.TempDir() + + // Create: content/atlas/snooty.toml + atlasDir := filepath.Join(tempDir, "content", "atlas") + if err := os.MkdirAll(atlasDir, 0755); err != nil { + t.Fatalf("Failed to create atlas dir: %v", err) + } + atlasSnootyPath := filepath.Join(atlasDir, "snooty.toml") + if err := os.WriteFile(atlasSnootyPath, []byte("name = \"atlas\""), 0644); err != nil { + t.Fatalf("Failed to write snooty.toml: %v", err) + } + + // Create: content/atlas/source/getting-started.txt + atlasSourceDir := filepath.Join(atlasDir, "source") + if err := os.MkdirAll(atlasSourceDir, 0755); err != nil { + t.Fatalf("Failed to create source dir: %v", err) + } + sourceFile := filepath.Join(atlasSourceDir, "getting-started.txt") + if err := os.WriteFile(sourceFile, []byte("test"), 0644); err != nil { + t.Fatalf("Failed to write source file: %v", err) + } + + // Test finding snooty.toml from source file + found, err := FindProjectSnootyTOML(sourceFile) + if err != nil { + t.Fatalf("FindProjectSnootyTOML() error = %v", err) + } + if found != atlasSnootyPath { + t.Errorf("FindProjectSnootyTOML() = %q, want %q", found, atlasSnootyPath) + } +} + +func TestFindProjectSnootyTOML_VersionedProject(t *testing.T) { + // Create a mock versioned project structure + tempDir := t.TempDir() + + // Create: content/manual/v8.0/snooty.toml + versionDir := filepath.Join(tempDir, "content", "manual", "v8.0") + if err := os.MkdirAll(versionDir, 0755); err != nil { + t.Fatalf("Failed to create version dir: %v", err) + } + snootyPath := filepath.Join(versionDir, "snooty.toml") + if err := os.WriteFile(snootyPath, []byte("name = \"manual\""), 0644); err != nil { + t.Fatalf("Failed to write snooty.toml: %v", err) + } + + // Create: content/manual/v8.0/source/tutorial/install.txt + sourceDir := filepath.Join(versionDir, "source", "tutorial") + if err := os.MkdirAll(sourceDir, 0755); err != nil { + t.Fatalf("Failed to create source dir: %v", err) + } + sourceFile := filepath.Join(sourceDir, "install.txt") + if err := os.WriteFile(sourceFile, []byte("test"), 0644); err != nil { + t.Fatalf("Failed to write source file: %v", err) + } + + // Test finding snooty.toml from nested source file + found, err := FindProjectSnootyTOML(sourceFile) + if err != nil { + t.Fatalf("FindProjectSnootyTOML() error = %v", err) + } + if found != snootyPath { + t.Errorf("FindProjectSnootyTOML() = %q, want %q", found, snootyPath) + } +} + +func TestFindProjectSnootyTOML_NotFound(t *testing.T) { + // Create a directory structure without snooty.toml + tempDir := t.TempDir() + contentDir := filepath.Join(tempDir, "content", "project", "source") + if err := os.MkdirAll(contentDir, 0755); err != nil { + t.Fatalf("Failed to create dir: %v", err) + } + sourceFile := filepath.Join(contentDir, "test.txt") + if err := os.WriteFile(sourceFile, []byte("test"), 0644); err != nil { + t.Fatalf("Failed to write file: %v", err) + } + + found, err := FindProjectSnootyTOML(sourceFile) + if err != nil { + t.Fatalf("FindProjectSnootyTOML() error = %v", err) + } + if found != "" { + t.Errorf("FindProjectSnootyTOML() = %q, want empty string", found) + } +} + +func TestBuildComposableIDToTitleMap(t *testing.T) { + composables := []Composable{ + { + ID: "language", + Title: "Language", + Options: []ComposableOption{ + {ID: "python", Title: "Python"}, + {ID: "javascript", Title: "JavaScript"}, + {ID: "go", Title: "Go"}, + }, + }, + { + ID: "interface", + Title: "Interface", + Options: []ComposableOption{ + {ID: "atlas", Title: "Atlas"}, + {ID: "mongosh", Title: "MongoDB Shell"}, + }, + }, + } + + // Test extracting language composable + langMap := BuildComposableIDToTitleMap(composables, "language") + if len(langMap) != 3 { + t.Errorf("len(langMap) = %d, want 3", len(langMap)) + } + if langMap["python"] != "Python" { + t.Errorf("langMap[python] = %q, want %q", langMap["python"], "Python") + } + if langMap["javascript"] != "JavaScript" { + t.Errorf("langMap[javascript] = %q, want %q", langMap["javascript"], "JavaScript") + } + + // Test extracting interface composable + ifaceMap := BuildComposableIDToTitleMap(composables, "interface") + if len(ifaceMap) != 2 { + t.Errorf("len(ifaceMap) = %d, want 2", len(ifaceMap)) + } + if ifaceMap["mongosh"] != "MongoDB Shell" { + t.Errorf("ifaceMap[mongosh] = %q, want %q", ifaceMap["mongosh"], "MongoDB Shell") + } + + // Test non-existent composable + emptyMap := BuildComposableIDToTitleMap(composables, "nonexistent") + if len(emptyMap) != 0 { + t.Errorf("len(emptyMap) = %d, want 0", len(emptyMap)) + } +} + +func TestExtractProjectAndVersion(t *testing.T) { + tests := []struct { + name string + relPath string + wantProject string + wantVersion string + }{ + { + name: "versioned project", + relPath: "manual/v8.0/snooty.toml", + wantProject: "manual", + wantVersion: "v8.0", + }, + { + name: "non-versioned project", + relPath: "atlas/snooty.toml", + wantProject: "atlas", + wantVersion: "", + }, + { + name: "current version", + relPath: "node/current/snooty.toml", + wantProject: "node", + wantVersion: "current", + }, + { + name: "too short path", + relPath: "snooty.toml", + wantProject: "", + wantVersion: "", + }, + { + name: "not snooty.toml", + relPath: "manual/v8.0/source/index.txt", + wantProject: "", + wantVersion: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotProject, gotVersion := ExtractProjectAndVersion(tt.relPath) + if gotProject != tt.wantProject { + t.Errorf("ExtractProjectAndVersion(%q) project = %q, want %q", + tt.relPath, gotProject, tt.wantProject) + } + if gotVersion != tt.wantVersion { + t.Errorf("ExtractProjectAndVersion(%q) version = %q, want %q", + tt.relPath, gotVersion, tt.wantVersion) + } + }) + } +} + +func TestIsCurrentVersion(t *testing.T) { + tests := []struct { + version string + want bool + }{ + {"current", true}, + {"manual", true}, + {"v8.0", false}, + {"v7.0", false}, + {"master", false}, + {"upcoming", false}, + {"latest", false}, + } + + for _, tt := range tests { + t.Run(tt.version, func(t *testing.T) { + got := IsCurrentVersion(tt.version) + if got != tt.want { + t.Errorf("IsCurrentVersion(%q) = %v, want %v", tt.version, got, tt.want) + } + }) + } +} + diff --git a/main.go b/main.go index 966d293..d8cbd03 100644 --- a/main.go +++ b/main.go @@ -19,13 +19,14 @@ import ( "github.com/grove-platform/audit-cli/commands/compare" "github.com/grove-platform/audit-cli/commands/count" "github.com/grove-platform/audit-cli/commands/extract" + "github.com/grove-platform/audit-cli/commands/report" "github.com/grove-platform/audit-cli/commands/search" "github.com/spf13/cobra" ) // version is the current version of audit-cli. // Update this when releasing new versions following semantic versioning. -const version = "0.2.0" +const version = "0.3.0" func main() { var rootCmd = &cobra.Command{ @@ -53,6 +54,7 @@ Designed for maintenance tasks, scoping work, and reporting to stakeholders.`, rootCmd.AddCommand(analyze.NewAnalyzeCommand()) rootCmd.AddCommand(compare.NewCompareCommand()) rootCmd.AddCommand(count.NewCountCommand()) + rootCmd.AddCommand(report.NewReportCommand()) err := rootCmd.Execute() if err != nil { diff --git a/testdata/expected-output/io-code-block-test.io-code-block.4.output.txt b/testdata/expected-output/io-code-block-test.io-code-block.4.output.json similarity index 100% rename from testdata/expected-output/io-code-block-test.io-code-block.4.output.txt rename to testdata/expected-output/io-code-block-test.io-code-block.4.output.json diff --git a/testdata/expected-output/io-code-block-test.io-code-block.5.output.txt b/testdata/expected-output/io-code-block-test.io-code-block.5.output.json similarity index 100% rename from testdata/expected-output/io-code-block-test.io-code-block.5.output.txt rename to testdata/expected-output/io-code-block-test.io-code-block.5.output.json diff --git a/testdata/testable-code-test/content/test-project/snooty.toml b/testdata/testable-code-test/content/test-project/snooty.toml new file mode 100644 index 0000000..60271bc --- /dev/null +++ b/testdata/testable-code-test/content/test-project/snooty.toml @@ -0,0 +1,22 @@ +name = "test-project" +title = "Test Project" + +[[composables]] +id = "language" +title = "Language" +default = "python" +options = [ + {id = "python", title = "Python"}, + {id = "nodejs", title = "Node.js"}, + {id = "go", title = "Go"}, +] + +[[composables]] +id = "interface" +title = "Interface" +default = "driver" +options = [ + {id = "driver", title = "Driver"}, + {id = "mongosh", title = "MongoDB Shell"}, +] + diff --git a/testdata/testable-code-test/content/test-project/source/includes/nodejs-example.rst b/testdata/testable-code-test/content/test-project/source/includes/nodejs-example.rst new file mode 100644 index 0000000..2c9abec --- /dev/null +++ b/testdata/testable-code-test/content/test-project/source/includes/nodejs-example.rst @@ -0,0 +1,11 @@ +Node.js Include Example +----------------------- + +This is an included file with Node.js code. + +.. code-block:: javascript + + // Included Node.js code + const cursor = collection.find(); + await cursor.forEach(doc => console.log(doc)); + diff --git a/testdata/testable-code-test/content/test-project/source/includes/python-example.rst b/testdata/testable-code-test/content/test-project/source/includes/python-example.rst new file mode 100644 index 0000000..0cb5d64 --- /dev/null +++ b/testdata/testable-code-test/content/test-project/source/includes/python-example.rst @@ -0,0 +1,11 @@ +Python Include Example +---------------------- + +This is an included file with Python code. + +.. code-block:: python + + # Included Python code + for doc in collection.find(): + print(doc) + diff --git a/testdata/testable-code-test/content/test-project/source/simple-code.rst b/testdata/testable-code-test/content/test-project/source/simple-code.rst new file mode 100644 index 0000000..83f5b27 --- /dev/null +++ b/testdata/testable-code-test/content/test-project/source/simple-code.rst @@ -0,0 +1,33 @@ +Simple Code Examples +==================== + +This file contains simple code examples for testing. + +Python Example +-------------- + +.. code-block:: python + + print("Hello, World!") + +JavaScript Example +------------------ + +.. code-block:: javascript + + console.log("Hello, World!"); + +JSON Data +--------- + +.. code-block:: json + + {"name": "test", "value": 123} + +Shell Command +------------- + +.. code-block:: sh + + echo "Hello" + diff --git a/testdata/testable-code-test/content/test-project/source/with-selected-content.rst b/testdata/testable-code-test/content/test-project/source/with-selected-content.rst new file mode 100644 index 0000000..0660993 --- /dev/null +++ b/testdata/testable-code-test/content/test-project/source/with-selected-content.rst @@ -0,0 +1,34 @@ +Composable Tutorial Example +=========================== + +This file uses composable tutorials with selected-content blocks. + +.. composable-tutorial:: + :options: language=python; interface=driver + + Introduction to the tutorial. + + .. selected-content:: + :selections: python + + Python-specific content: + + .. code-block:: python + + # This is Python code in a selected-content block + result = collection.find_one() + + .. include:: /includes/python-example.rst + + .. selected-content:: + :selections: nodejs + + Node.js-specific content: + + .. code-block:: javascript + + // This is Node.js code in a selected-content block + const result = await collection.findOne(); + + .. include:: /includes/nodejs-example.rst + diff --git a/testdata/testable-code-test/content/test-project/source/with-tabs.rst b/testdata/testable-code-test/content/test-project/source/with-tabs.rst new file mode 100644 index 0000000..d4c760c --- /dev/null +++ b/testdata/testable-code-test/content/test-project/source/with-tabs.rst @@ -0,0 +1,36 @@ +Driver Tabs Example +=================== + +This file contains code examples within driver tabs. + +.. tabs-drivers:: + + .. tab:: + :tabid: python + + Python driver example: + + .. code-block:: python + + from pymongo import MongoClient + client = MongoClient() + + .. tab:: + :tabid: nodejs + + Node.js driver example: + + .. code-block:: javascript + + const { MongoClient } = require('mongodb'); + const client = new MongoClient(uri); + + .. tab:: + :tabid: java-sync + + Java driver example: + + .. code-block:: java + + MongoClient client = MongoClients.create(); +