diff --git a/go.mod b/go.mod index 5860905..63b198c 100644 --- a/go.mod +++ b/go.mod @@ -6,8 +6,8 @@ toolchain go1.24.7 require ( github.com/atotto/clipboard v0.1.4 - github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 github.com/google/generative-ai-go v0.19.0 + github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 github.com/manifoldco/promptui v0.9.0 github.com/openai/openai-go/v3 v3.0.1 github.com/pterm/pterm v0.12.80 diff --git a/internal/git/operations.go b/internal/git/operations.go index 8b26fc3..a8947ee 100644 --- a/internal/git/operations.go +++ b/internal/git/operations.go @@ -22,6 +22,107 @@ func IsRepository(path string) bool { return strings.TrimSpace(string(output)) == "true" } +// parseGitStatusLine represents a parsed git status line +type parseGitStatusLine struct { + status string + filenames []string +} + +// parseGitNameStatus parses a single line from git diff --name-status output +// Handles various git status codes including rename (R) and copy (C) operations +func parseGitNameStatus(line string) parseGitStatusLine { + if line == "" { + return parseGitStatusLine{} + } + + // Git uses tabs to separate fields in --name-status output + parts := strings.Split(line, "\t") + if len(parts) < 2 { + return parseGitStatusLine{} + } + + status := parts[0] + + // Handle rename/copy status codes (e.g., "R100", "C75") + if len(status) > 1 && (status[0] == 'R' || status[0] == 'C') { + // For rename/copy, we expect: "R100\toldname\tnewname" or "C75\toldname\tnewname" + if len(parts) >= 3 { + // For renames/copies, both old and new filenames need to be checked + oldFile := parts[1] + newFile := parts[2] + return parseGitStatusLine{ + status: status, + filenames: []string{oldFile, newFile}, + } + } + } + + // Handle regular status codes (M, A, D, etc.) + filename := parts[1] + return parseGitStatusLine{ + status: status, + filenames: []string{filename}, + } +} + +// processGitStatusOutput processes git diff --name-status output and returns filtered results +func processGitStatusOutput(nameStatusOutput string, returnFilenames bool) ([]string, []string) { + if nameStatusOutput == "" { + return nil, nil + } + + lines := strings.Split(strings.TrimSpace(nameStatusOutput), "\n") + var filteredLines []string + var nonBinaryFiles []string + + for _, line := range lines { + if line == "" { + continue + } + + parsed := parseGitNameStatus(line) + if len(parsed.filenames) == 0 { + continue + } + + // Check if any of the filenames are binary + hasBinaryFile := false + for _, filename := range parsed.filenames { + if utils.IsBinaryFile(filename) { + hasBinaryFile = true + break + } + } + + // If no binary files found, include this line/files + if !hasBinaryFile { + filteredLines = append(filteredLines, line) + if returnFilenames { + nonBinaryFiles = append(nonBinaryFiles, parsed.filenames...) + } + } + } + + return filteredLines, nonBinaryFiles +} + +// filterBinaryFiles filters out binary files from git diff --name-status output +func filterBinaryFiles(nameStatusOutput string) string { + filteredLines, _ := processGitStatusOutput(nameStatusOutput, false) + + if len(filteredLines) == 0 { + return "" + } + + return strings.Join(filteredLines, "\n") +} + +// extractNonBinaryFiles extracts non-binary filenames from git diff --name-status output +func extractNonBinaryFiles(nameStatusOutput string) []string { + _, nonBinaryFiles := processGitStatusOutput(nameStatusOutput, true) + return nonBinaryFiles +} + // GetChanges retrieves all Git changes including staged, unstaged, and untracked files func GetChanges(config *types.RepoConfig) (string, error) { var changes strings.Builder @@ -34,20 +135,29 @@ func GetChanges(config *types.RepoConfig) (string, error) { } if len(output) > 0 { - changes.WriteString("Unstaged changes:\n") - changes.WriteString(string(output)) - changes.WriteString("\n\n") - - // Get the content of these changes - diffCmd := exec.Command("git", "-C", config.Path, "diff") - diffOutput, err := diffCmd.Output() - if err != nil { - return "", fmt.Errorf("git diff content failed: %v", err) - } + // Filter out binary files from the name-status output + filteredOutput := filterBinaryFiles(string(output)) + + if filteredOutput != "" { + changes.WriteString("Unstaged changes:\n") + changes.WriteString(filteredOutput) + changes.WriteString("\n\n") + + // Get the content of these changes (only for non-binary files) + nonBinaryFiles := extractNonBinaryFiles(string(output)) + if len(nonBinaryFiles) > 0 { + diffCmd := exec.Command("git", "-C", config.Path, "diff", "--") + diffCmd.Args = append(diffCmd.Args, nonBinaryFiles...) + diffOutput, err := diffCmd.Output() + if err != nil { + return "", fmt.Errorf("git diff content failed: %v", err) + } - changes.WriteString("Unstaged diff content:\n") - changes.WriteString(string(diffOutput)) - changes.WriteString("\n\n") + changes.WriteString("Unstaged diff content:\n") + changes.WriteString(string(diffOutput)) + changes.WriteString("\n\n") + } + } } // 2. Check for staged changes @@ -58,20 +168,29 @@ func GetChanges(config *types.RepoConfig) (string, error) { } if len(stagedOutput) > 0 { - changes.WriteString("Staged changes:\n") - changes.WriteString(string(stagedOutput)) - changes.WriteString("\n\n") - - // Get the content of these changes - stagedDiffCmd := exec.Command("git", "-C", config.Path, "diff", "--cached") - stagedDiffOutput, err := stagedDiffCmd.Output() - if err != nil { - return "", fmt.Errorf("git diff --cached content failed: %v", err) - } + // Filter out binary files from the staged changes + filteredStagedOutput := filterBinaryFiles(string(stagedOutput)) + + if filteredStagedOutput != "" { + changes.WriteString("Staged changes:\n") + changes.WriteString(filteredStagedOutput) + changes.WriteString("\n\n") - changes.WriteString("Staged diff content:\n") - changes.WriteString(string(stagedDiffOutput)) - changes.WriteString("\n\n") + // Get the content of these changes (only for non-binary files) + nonBinaryStagedFiles := extractNonBinaryFiles(string(stagedOutput)) + if len(nonBinaryStagedFiles) > 0 { + stagedDiffCmd := exec.Command("git", "-C", config.Path, "diff", "--cached", "--") + stagedDiffCmd.Args = append(stagedDiffCmd.Args, nonBinaryStagedFiles...) + stagedDiffOutput, err := stagedDiffCmd.Output() + if err != nil { + return "", fmt.Errorf("git diff --cached content failed: %v", err) + } + + changes.WriteString("Staged diff content:\n") + changes.WriteString(string(stagedDiffOutput)) + changes.WriteString("\n\n") + } + } } // 3. Check for untracked files @@ -82,34 +201,44 @@ func GetChanges(config *types.RepoConfig) (string, error) { } if len(untrackedOutput) > 0 { - changes.WriteString("Untracked files:\n") - changes.WriteString(string(untrackedOutput)) - changes.WriteString("\n\n") - - // Try to get content of untracked files (limited to text files and smaller size) + // Filter out binary files from untracked files untrackedFiles := strings.Split(strings.TrimSpace(string(untrackedOutput)), "\n") + var nonBinaryUntrackedFiles []string + for _, file := range untrackedFiles { if file == "" { continue } + if !utils.IsBinaryFile(file) { + nonBinaryUntrackedFiles = append(nonBinaryUntrackedFiles, file) + } + } + + if len(nonBinaryUntrackedFiles) > 0 { + changes.WriteString("Untracked files:\n") + changes.WriteString(strings.Join(nonBinaryUntrackedFiles, "\n")) + changes.WriteString("\n\n") - fullPath := filepath.Join(config.Path, file) - if utils.IsTextFile(fullPath) && utils.IsSmallFile(fullPath) { - fileContent, err := os.ReadFile(fullPath) - if err != nil { - // Log but don't fail - untracked file may have been deleted or is inaccessible - continue - } - changes.WriteString(fmt.Sprintf("Content of new file %s:\n", file)) - - // Use special scrubbing for .env files - if strings.HasSuffix(strings.ToLower(file), ".env") || - strings.Contains(strings.ToLower(file), ".env.") { - changes.WriteString(scrubber.ScrubEnvFile(string(fileContent))) - } else { - changes.WriteString(string(fileContent)) + // Try to get content of untracked files (limited to text files and smaller size) + for _, file := range nonBinaryUntrackedFiles { + fullPath := filepath.Join(config.Path, file) + if utils.IsTextFile(fullPath) && utils.IsSmallFile(fullPath) { + fileContent, err := os.ReadFile(fullPath) + if err != nil { + // Log but don't fail - untracked file may have been deleted or is inaccessible + continue + } + changes.WriteString(fmt.Sprintf("Content of new file %s:\n", file)) + + // Use special scrubbing for .env files + if strings.HasSuffix(strings.ToLower(file), ".env") || + strings.Contains(strings.ToLower(file), ".env.") { + changes.WriteString(scrubber.ScrubEnvFile(string(fileContent))) + } else { + changes.WriteString(string(fileContent)) + } + changes.WriteString("\n\n") } - changes.WriteString("\n\n") } } } diff --git a/internal/utils/utils.go b/internal/utils/utils.go index bc485fb..12a7177 100644 --- a/internal/utils/utils.go +++ b/internal/utils/utils.go @@ -21,7 +21,10 @@ func IsTextFile(filename string) bool { textExtensions := []string{ ".txt", ".md", ".go", ".js", ".py", ".java", ".c", ".cpp", ".h", ".html", ".css", ".json", ".xml", ".yaml", ".yml", ".sh", ".bash", - ".ts", ".tsx", ".jsx", ".php", ".rb", ".rs", ".dart", + ".ts", ".tsx", ".jsx", ".php", ".rb", ".rs", ".dart", ".sql", ".r", + ".scala", ".kt", ".swift", ".m", ".pl", ".lua", ".vim", ".csv", + ".log", ".cfg", ".conf", ".ini", ".toml", ".lock", ".gitignore", + ".dockerfile", ".makefile", ".cmake", ".pro", ".pri", ".svg", } ext := strings.ToLower(filepath.Ext(filename)) @@ -31,6 +34,55 @@ func IsTextFile(filename string) bool { } } + // Common extensionless files that are typically text + if ext == "" { + baseName := strings.ToLower(filepath.Base(filename)) + commonTextFiles := []string{ + "readme", "dockerfile", "makefile", "rakefile", "gemfile", + "procfile", "jenkinsfile", "vagrantfile", "changelog", "authors", + "contributors", "copying", "install", "news", "todo", + } + + for _, textFile := range commonTextFiles { + if baseName == textFile { + return true + } + } + } + + return false +} + +// IsBinaryFile checks if a file is likely to be a binary file that should be excluded from diffs +func IsBinaryFile(filename string) bool { + // List of common binary file extensions + binaryExtensions := []string{ + // Images (excluding SVG which is XML text) + ".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".tif", ".ico", ".webp", + // Audio/Video + ".mp3", ".mp4", ".avi", ".mkv", ".mov", ".wmv", ".flv", ".wav", ".ogg", ".m4a", + // Archives/Compressed + ".zip", ".tar", ".gz", ".7z", ".rar", ".bz2", ".xz", ".lz", ".lzma", + // Executables/Libraries + ".exe", ".dll", ".so", ".dylib", ".a", ".lib", ".bin", ".deb", ".rpm", ".dmg", ".msi", + // Documents + ".pdf", ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx", ".odt", ".ods", ".odp", + // Fonts + ".ttf", ".otf", ".woff", ".woff2", ".eot", + // Other binary formats + ".db", ".sqlite", ".sqlite3", ".mdb", ".accdb", ".pickle", ".pkl", ".pyc", ".pyo", + ".class", ".jar", ".war", ".ear", ".apk", ".ipa", + } + + ext := strings.ToLower(filepath.Ext(filename)) + for _, binExt := range binaryExtensions { + if ext == binExt { + return true + } + } + + // Note: Files with unknown extensions are not considered binary by default + // This allows them to be processed as text files for diff analysis return false }