From 70a6c5263900ac11da1a9e9fdaa759a6513533e6 Mon Sep 17 00:00:00 2001 From: Dachary Carey Date: Tue, 3 Feb 2026 09:23:55 -0500 Subject: [PATCH] Add debug utils to fix CSV issues --- create-url-list/.gitignore | 2 + create-url-list/README.md | 81 +++++++++++++++ create-url-list/go.mod | 5 +- create-url-list/go.sum | 2 + create-url-list/utils/README.md | 142 +++++++++++++++++++++++++++ create-url-list/utils/convert-csv.go | 69 +++++++++++++ create-url-list/utils/debug-csv.go | 107 ++++++++++++++++++++ 7 files changed, 407 insertions(+), 1 deletion(-) create mode 100644 create-url-list/utils/README.md create mode 100644 create-url-list/utils/convert-csv.go create mode 100644 create-url-list/utils/debug-csv.go diff --git a/create-url-list/.gitignore b/create-url-list/.gitignore index 15c097f..b2f9178 100644 --- a/create-url-list/.gitignore +++ b/create-url-list/.gitignore @@ -2,3 +2,5 @@ output/ test-output/ create-url-list config.yml +utils/convert-csv +utils/debug-csv diff --git a/create-url-list/README.md b/create-url-list/README.md index 4dd7e22..c2dcc11 100644 --- a/create-url-list/README.md +++ b/create-url-list/README.md @@ -117,3 +117,84 @@ The tool exits with code 1 and displays an error message if: - Required columns are missing from the CSV - URL structure doesn't match expected format (must start with `www.`) - Range format is invalid + +## Troubleshooting Utilities + +The `utils/` directory contains helper tools for diagnosing and fixing CSV format issues. + +### CSV Format Debugger + +If you're getting a "missing required columns" error, use the debug tool to inspect your CSV file: + +```bash +# Build the debug tool +cd utils +go build -o debug-csv debug-csv.go + +# Run it on your CSV file +./debug-csv /path/to/your/file.csv +``` + +The debug tool will show you: +- How many columns were detected +- The exact name of each column (with quotes to reveal whitespace) +- Byte representation to reveal hidden characters (BOM, special encoding, etc.) +- Whether each required column was found +- Warnings about common issues (BOM, extra whitespace, etc.) + +**Example output:** +``` +Found 5 columns in header: + +Column 0: "Page" + Bytes: [80 97 103 101] + Length: 4 + ✓ Matches required column 'Page' + +Column 2: "Measure Names" + Bytes: [77 101 97 115 117 114 101 32 78 97 109 101 115] + Length: 13 + ✓ Matches required column 'Measure Names' +... +``` + +### CSV Format Converter + +If your CSV file is in UTF-16 encoding or tab-delimited format (common with Excel/Tableau exports), use the converter tool: + +```bash +# Build the converter tool +cd utils +go build -o convert-csv convert-csv.go + +# Convert your file +./convert-csv /path/to/input.csv /path/to/output.csv +``` + +This tool converts: +- **From:** UTF-16 encoding with tab delimiters +- **To:** UTF-8 encoding with comma delimiters (standard CSV) + +**Example:** +```bash +# Convert a Tableau export +./convert-csv ~/Downloads/tableau-export.csv ~/temp/converted.csv + +# Then use the converted file +cd .. +./create-url-list ~/temp/converted.csv 1-250 output.csv +``` + +### Common CSV Issues + +1. **UTF-16 encoding with BOM** - File starts with byte order mark (bytes `255 254`) + - **Solution:** Use `convert-csv` tool + +2. **Tab-delimited instead of comma-delimited** - Columns separated by tabs + - **Solution:** Use `convert-csv` tool + +3. **Extra whitespace in column names** - Column named `" Page "` instead of `"Page"` + - **Solution:** Edit the CSV header row to remove extra spaces + +4. **Wrong column names** - Different capitalization or spelling + - **Solution:** Rename columns to exactly match: `Page`, `Measure Names`, `Measure Values` diff --git a/create-url-list/go.mod b/create-url-list/go.mod index d9d28dc..683c2ec 100644 --- a/create-url-list/go.mod +++ b/create-url-list/go.mod @@ -2,4 +2,7 @@ module create-url-list go 1.25.4 -require gopkg.in/yaml.v3 v3.0.1 // indirect +require ( + golang.org/x/text v0.33.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/create-url-list/go.sum b/create-url-list/go.sum index 4bc0337..485385a 100644 --- a/create-url-list/go.sum +++ b/create-url-list/go.sum @@ -1,3 +1,5 @@ +golang.org/x/text v0.33.0 h1:B3njUFyqtHDUI5jMn1YIr5B0IE2U0qck04r6d4KPAxE= +golang.org/x/text v0.33.0/go.mod h1:LuMebE6+rBincTi9+xWTY8TztLzKHc/9C1uBCG27+q8= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/create-url-list/utils/README.md b/create-url-list/utils/README.md new file mode 100644 index 0000000..fc6a472 --- /dev/null +++ b/create-url-list/utils/README.md @@ -0,0 +1,142 @@ +# Utility Tools for create-url-list + +This directory contains diagnostic and conversion tools for troubleshooting CSV format issues with `create-url-list`. + +## Tools + +### debug-csv - CSV Format Inspector + +Inspects a CSV file to diagnose format issues and verify column names. + +**Build:** +```bash +go build -o debug-csv debug-csv.go +``` + +**Usage:** +```bash +./debug-csv +``` + +**What it shows:** +- Number of columns detected +- Exact column names (with quotes to reveal whitespace) +- Byte representation of each column name (to detect encoding issues) +- Column length +- Whether required columns (`Page`, `Measure Names`, `Measure Values`) are present +- Warnings about common issues: + - BOM (Byte Order Mark) at start of file + - Leading/trailing whitespace in column names + +**Example:** +```bash +./debug-csv ~/Downloads/analytics-data.csv +``` + +**Sample output:** +``` +Found 5 columns in header: + +Column 0: "Page" + Bytes: [80 97 103 101] + Length: 4 + ✓ Matches required column 'Page' + +Column 1: "Page Subsite" + Bytes: [80 97 103 101 32 83 117 98 115 105 116 101] + Length: 12 + +Column 2: "Measure Names" + Bytes: [77 101 97 115 117 114 101 32 78 97 109 101 115] + Length: 13 + ✓ Matches required column 'Measure Names' + +Column 3: "Measure Values" + Bytes: [77 101 97 115 117 114 101 32 86 97 108 117 101 115] + Length: 14 + ✓ Matches required column 'Measure Values' + +Required columns check: + ✓ 'Page' found + ✓ 'Measure Names' found + ✓ 'Measure Values' found + +✓ All required columns present! +``` + +--- + +### convert-csv - CSV Format Converter + +Converts UTF-16 tab-delimited CSV files to UTF-8 comma-delimited format (standard CSV). + +**Build:** +```bash +go build -o convert-csv convert-csv.go +``` + +**Usage:** +```bash +./convert-csv +``` + +**What it does:** +- Reads UTF-16 encoded files (with or without BOM) +- Handles tab-delimited data +- Outputs standard UTF-8 comma-delimited CSV + +**Example:** +```bash +# Convert a Tableau or Excel export +./convert-csv ~/Downloads/tableau-export.csv ~/temp/converted.csv + +# Then use with create-url-list +cd .. +./create-url-list ~/temp/converted.csv 1-250 output.csv +``` + +**Sample output:** +``` +Successfully converted 51396 rows from /path/to/input.csv to /path/to/output.csv +Input format: UTF-16 tab-delimited +Output format: UTF-8 comma-delimited +``` + +--- + +## Common Workflow + +When you encounter a "missing required columns" error: + +1. **Diagnose the issue:** + ```bash + ./debug-csv /path/to/problematic-file.csv + ``` + +2. **If the file is UTF-16 or tab-delimited, convert it:** + ```bash + ./convert-csv /path/to/problematic-file.csv /path/to/fixed-file.csv + ``` + +3. **Verify the conversion worked:** + ```bash + ./debug-csv /path/to/fixed-file.csv + ``` + +4. **Use the fixed file with create-url-list:** + ```bash + cd .. + ./create-url-list /path/to/fixed-file.csv 1-250 output.csv + ``` + +## Dependencies + +The `convert-csv` tool requires the `golang.org/x/text` package: + +```bash +go get golang.org/x/text/encoding/unicode +go get golang.org/x/text/transform +``` + +This dependency is automatically downloaded when you build the tool. + diff --git a/create-url-list/utils/convert-csv.go b/create-url-list/utils/convert-csv.go new file mode 100644 index 0000000..ee30194 --- /dev/null +++ b/create-url-list/utils/convert-csv.go @@ -0,0 +1,69 @@ +package main + +import ( + "encoding/csv" + "fmt" + "os" + + "golang.org/x/text/encoding/unicode" + "golang.org/x/text/transform" +) + +func main() { + if len(os.Args) < 3 { + fmt.Fprintf(os.Stderr, "Usage: %s \n", os.Args[0]) + fmt.Fprintf(os.Stderr, "Converts UTF-16 tab-delimited CSV to UTF-8 comma-delimited CSV\n") + os.Exit(1) + } + + inputPath := os.Args[1] + outputPath := os.Args[2] + + // Open input file + inputFile, err := os.Open(inputPath) + if err != nil { + fmt.Fprintf(os.Stderr, "Error opening input file: %v\n", err) + os.Exit(1) + } + defer inputFile.Close() + + // Create UTF-16 decoder + decoder := unicode.UTF16(unicode.LittleEndian, unicode.UseBOM).NewDecoder() + reader := transform.NewReader(inputFile, decoder) + + // Create CSV reader with tab delimiter + csvReader := csv.NewReader(reader) + csvReader.Comma = '\t' + csvReader.LazyQuotes = true + + // Read all records + records, err := csvReader.ReadAll() + if err != nil { + fmt.Fprintf(os.Stderr, "Error reading CSV: %v\n", err) + os.Exit(1) + } + + // Create output file + outputFile, err := os.Create(outputPath) + if err != nil { + fmt.Fprintf(os.Stderr, "Error creating output file: %v\n", err) + os.Exit(1) + } + defer outputFile.Close() + + // Create CSV writer (defaults to comma delimiter) + csvWriter := csv.NewWriter(outputFile) + defer csvWriter.Flush() + + // Write all records + for _, record := range records { + if err := csvWriter.Write(record); err != nil { + fmt.Fprintf(os.Stderr, "Error writing record: %v\n", err) + os.Exit(1) + } + } + + fmt.Printf("Successfully converted %d rows from %s to %s\n", len(records), inputPath, outputPath) + fmt.Printf("Input format: UTF-16 tab-delimited\n") + fmt.Printf("Output format: UTF-8 comma-delimited\n") +} diff --git a/create-url-list/utils/debug-csv.go b/create-url-list/utils/debug-csv.go new file mode 100644 index 0000000..12fa6cf --- /dev/null +++ b/create-url-list/utils/debug-csv.go @@ -0,0 +1,107 @@ +package main + +import ( + "encoding/csv" + "fmt" + "os" + "strings" +) + +func main() { + if len(os.Args) < 2 { + fmt.Fprintf(os.Stderr, "Usage: %s \n", os.Args[0]) + os.Exit(1) + } + + file, err := os.Open(os.Args[1]) + if err != nil { + fmt.Fprintf(os.Stderr, "Error opening file: %v\n", err) + os.Exit(1) + } + defer file.Close() + + reader := csv.NewReader(file) + + // Read header + header, err := reader.Read() + if err != nil { + fmt.Fprintf(os.Stderr, "Error reading header: %v\n", err) + os.Exit(1) + } + + fmt.Printf("Found %d columns in header:\n\n", len(header)) + + for i, col := range header { + // Show the column with quotes to reveal whitespace + fmt.Printf("Column %d: \"%s\"\n", i, col) + + // Show byte representation to reveal hidden characters + fmt.Printf(" Bytes: %v\n", []byte(col)) + + // Show length + fmt.Printf(" Length: %d\n", len(col)) + + // Check for BOM or other special characters + if strings.HasPrefix(col, "\ufeff") { + fmt.Printf(" ⚠️ Contains BOM (Byte Order Mark) at start\n") + } + if strings.TrimSpace(col) != col { + fmt.Printf(" ⚠️ Contains leading/trailing whitespace\n") + } + + // Check if it matches expected columns + switch col { + case "Page": + fmt.Printf(" ✓ Matches required column 'Page'\n") + case "Measure Names": + fmt.Printf(" ✓ Matches required column 'Measure Names'\n") + case "Measure Values": + fmt.Printf(" ✓ Matches required column 'Measure Values'\n") + } + + fmt.Println() + } + + // Check for required columns + fmt.Println("Required columns check:") + hasPage := false + hasMeasureNames := false + hasMeasureValues := false + + for _, col := range header { + if col == "Page" { + hasPage = true + } + if col == "Measure Names" { + hasMeasureNames = true + } + if col == "Measure Values" { + hasMeasureValues = true + } + } + + if hasPage { + fmt.Println(" ✓ 'Page' found") + } else { + fmt.Println(" ✗ 'Page' NOT found") + } + + if hasMeasureNames { + fmt.Println(" ✓ 'Measure Names' found") + } else { + fmt.Println(" ✗ 'Measure Names' NOT found") + } + + if hasMeasureValues { + fmt.Println(" ✓ 'Measure Values' found") + } else { + fmt.Println(" ✗ 'Measure Values' NOT found") + } + + if hasPage && hasMeasureNames && hasMeasureValues { + fmt.Println("\n✓ All required columns present!") + } else { + fmt.Println("\n✗ Missing required columns") + } +} +