Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ toolchain go1.24.7

require (
github.com/atotto/clipboard v0.1.4
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510
github.com/google/generative-ai-go v0.19.0
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510
github.com/manifoldco/promptui v0.9.0
github.com/openai/openai-go/v3 v3.0.1
github.com/pterm/pterm v0.12.80
Expand Down
223 changes: 176 additions & 47 deletions internal/git/operations.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,107 @@ func IsRepository(path string) bool {
return strings.TrimSpace(string(output)) == "true"
}

// parseGitStatusLine represents a parsed git status line
type parseGitStatusLine struct {
status string
filenames []string
}

// parseGitNameStatus parses a single line from git diff --name-status output
// Handles various git status codes including rename (R) and copy (C) operations
func parseGitNameStatus(line string) parseGitStatusLine {
if line == "" {
return parseGitStatusLine{}
}

// Git uses tabs to separate fields in --name-status output
parts := strings.Split(line, "\t")
if len(parts) < 2 {
return parseGitStatusLine{}
}

status := parts[0]

// Handle rename/copy status codes (e.g., "R100", "C75")
if len(status) > 1 && (status[0] == 'R' || status[0] == 'C') {
// For rename/copy, we expect: "R100\toldname\tnewname" or "C75\toldname\tnewname"
if len(parts) >= 3 {
// For renames/copies, both old and new filenames need to be checked
oldFile := parts[1]
newFile := parts[2]
return parseGitStatusLine{
status: status,
filenames: []string{oldFile, newFile},
}
}
}

// Handle regular status codes (M, A, D, etc.)
filename := parts[1]
return parseGitStatusLine{
status: status,
filenames: []string{filename},
}
Comment on lines +39 to +65
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Unquote name-status paths before classification/diffing

git diff --name-status emits C-quoted paths whenever core.quotepath is left at its (true) default—for example a binary file image file.png shows up as M\t"image file.png". We forward that quoted string straight into utils.IsBinaryFile and the later git diff -- … call. Two bad things happen:

  • .Ext("\"image file.png\"") yields .png", so we fail to recognise it as binary and end up sending the very data we meant to block.
  • exec.Command passes the quotes verbatim, so git diff never matches the real path and the diff content silently disappears.

Please strip the C-style quoting (and unescape sequences) before returning filenames. A tiny helper around strconv.Unquote for both the rename/copy and regular branches fixes it:

-import (
-	"fmt"
-	"os"
-	"os/exec"
-	"path/filepath"
-	"strings"
+import (
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strconv"
+	"strings"
 )
@@
 func parseGitNameStatus(line string) parseGitStatusLine {
@@
-			oldFile := parts[1]
-			newFile := parts[2]
+			oldFile := unquoteGitPath(parts[1])
+			newFile := unquoteGitPath(parts[2])
@@
-	filename := parts[1]
+	filename := unquoteGitPath(parts[1])
@@
 }
+
+func unquoteGitPath(path string) string {
+	if len(path) >= 2 && path[0] == '"' && path[len(path)-1] == '"' {
+		if unquoted, err := strconv.Unquote(path); err == nil {
+			return unquoted
+		}
+	}
+	return path
+}

Without this, any path that needs quoting (spaces, non-ASCII, etc.) will either bypass the binary filter or vanish from the diff, defeating the main goal of the PR. -->

📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
parts := strings.Split(line, "\t")
if len(parts) < 2 {
return parseGitStatusLine{}
}
status := parts[0]
// Handle rename/copy status codes (e.g., "R100", "C75")
if len(status) > 1 && (status[0] == 'R' || status[0] == 'C') {
// For rename/copy, we expect: "R100\toldname\tnewname" or "C75\toldname\tnewname"
if len(parts) >= 3 {
// For renames/copies, both old and new filenames need to be checked
oldFile := parts[1]
newFile := parts[2]
return parseGitStatusLine{
status: status,
filenames: []string{oldFile, newFile},
}
}
}
// Handle regular status codes (M, A, D, etc.)
filename := parts[1]
return parseGitStatusLine{
status: status,
filenames: []string{filename},
}
import (
"fmt"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
)
func parseGitNameStatus(line string) parseGitStatusLine {
parts := strings.Split(line, "\t")
if len(parts) < 2 {
return parseGitStatusLine{}
}
status := parts[0]
// Handle rename/copy status codes (e.g., "R100", "C75")
if len(status) > 1 && (status[0] == 'R' || status[0] == 'C') {
// For rename/copy, we expect: "R100\toldname\tnewname" or "C75\toldname\tnewname"
if len(parts) >= 3 {
// For renames/copies, both old and new filenames need to be checked
oldFile := unquoteGitPath(parts[1])
newFile := unquoteGitPath(parts[2])
return parseGitStatusLine{
status: status,
filenames: []string{oldFile, newFile},
}
}
}
// Handle regular status codes (M, A, D, etc.)
filename := unquoteGitPath(parts[1])
return parseGitStatusLine{
status: status,
filenames: []string{filename},
}
}
func unquoteGitPath(path string) string {
if len(path) >= 2 && path[0] == '"' && path[len(path)-1] == '"' {
if unquoted, err := strconv.Unquote(path); err == nil {
return unquoted
}
}
return path
}

}

// processGitStatusOutput processes git diff --name-status output and returns filtered results
func processGitStatusOutput(nameStatusOutput string, returnFilenames bool) ([]string, []string) {
if nameStatusOutput == "" {
return nil, nil
}

lines := strings.Split(strings.TrimSpace(nameStatusOutput), "\n")
var filteredLines []string
var nonBinaryFiles []string

for _, line := range lines {
if line == "" {
continue
}

parsed := parseGitNameStatus(line)
if len(parsed.filenames) == 0 {
continue
}

// Check if any of the filenames are binary
hasBinaryFile := false
for _, filename := range parsed.filenames {
if utils.IsBinaryFile(filename) {
hasBinaryFile = true
break
}
}

// If no binary files found, include this line/files
if !hasBinaryFile {
filteredLines = append(filteredLines, line)
if returnFilenames {
nonBinaryFiles = append(nonBinaryFiles, parsed.filenames...)
}
}
}

return filteredLines, nonBinaryFiles
}

// filterBinaryFiles filters out binary files from git diff --name-status output
func filterBinaryFiles(nameStatusOutput string) string {
filteredLines, _ := processGitStatusOutput(nameStatusOutput, false)

if len(filteredLines) == 0 {
return ""
}

return strings.Join(filteredLines, "\n")
}

// extractNonBinaryFiles extracts non-binary filenames from git diff --name-status output
func extractNonBinaryFiles(nameStatusOutput string) []string {
_, nonBinaryFiles := processGitStatusOutput(nameStatusOutput, true)
return nonBinaryFiles
}

// GetChanges retrieves all Git changes including staged, unstaged, and untracked files
func GetChanges(config *types.RepoConfig) (string, error) {
var changes strings.Builder
Expand All @@ -34,20 +135,29 @@ func GetChanges(config *types.RepoConfig) (string, error) {
}

if len(output) > 0 {
changes.WriteString("Unstaged changes:\n")
changes.WriteString(string(output))
changes.WriteString("\n\n")

// Get the content of these changes
diffCmd := exec.Command("git", "-C", config.Path, "diff")
diffOutput, err := diffCmd.Output()
if err != nil {
return "", fmt.Errorf("git diff content failed: %v", err)
}
// Filter out binary files from the name-status output
filteredOutput := filterBinaryFiles(string(output))

if filteredOutput != "" {
changes.WriteString("Unstaged changes:\n")
changes.WriteString(filteredOutput)
changes.WriteString("\n\n")

// Get the content of these changes (only for non-binary files)
nonBinaryFiles := extractNonBinaryFiles(string(output))
if len(nonBinaryFiles) > 0 {
diffCmd := exec.Command("git", "-C", config.Path, "diff", "--")
diffCmd.Args = append(diffCmd.Args, nonBinaryFiles...)
diffOutput, err := diffCmd.Output()
if err != nil {
return "", fmt.Errorf("git diff content failed: %v", err)
}

changes.WriteString("Unstaged diff content:\n")
changes.WriteString(string(diffOutput))
changes.WriteString("\n\n")
changes.WriteString("Unstaged diff content:\n")
changes.WriteString(string(diffOutput))
changes.WriteString("\n\n")
}
}
}

// 2. Check for staged changes
Expand All @@ -58,20 +168,29 @@ func GetChanges(config *types.RepoConfig) (string, error) {
}

if len(stagedOutput) > 0 {
changes.WriteString("Staged changes:\n")
changes.WriteString(string(stagedOutput))
changes.WriteString("\n\n")

// Get the content of these changes
stagedDiffCmd := exec.Command("git", "-C", config.Path, "diff", "--cached")
stagedDiffOutput, err := stagedDiffCmd.Output()
if err != nil {
return "", fmt.Errorf("git diff --cached content failed: %v", err)
}
// Filter out binary files from the staged changes
filteredStagedOutput := filterBinaryFiles(string(stagedOutput))

if filteredStagedOutput != "" {
changes.WriteString("Staged changes:\n")
changes.WriteString(filteredStagedOutput)
changes.WriteString("\n\n")

changes.WriteString("Staged diff content:\n")
changes.WriteString(string(stagedDiffOutput))
changes.WriteString("\n\n")
// Get the content of these changes (only for non-binary files)
nonBinaryStagedFiles := extractNonBinaryFiles(string(stagedOutput))
if len(nonBinaryStagedFiles) > 0 {
stagedDiffCmd := exec.Command("git", "-C", config.Path, "diff", "--cached", "--")
stagedDiffCmd.Args = append(stagedDiffCmd.Args, nonBinaryStagedFiles...)
stagedDiffOutput, err := stagedDiffCmd.Output()
if err != nil {
return "", fmt.Errorf("git diff --cached content failed: %v", err)
}

changes.WriteString("Staged diff content:\n")
changes.WriteString(string(stagedDiffOutput))
changes.WriteString("\n\n")
}
}
}

// 3. Check for untracked files
Expand All @@ -82,34 +201,44 @@ func GetChanges(config *types.RepoConfig) (string, error) {
}

if len(untrackedOutput) > 0 {
changes.WriteString("Untracked files:\n")
changes.WriteString(string(untrackedOutput))
changes.WriteString("\n\n")

// Try to get content of untracked files (limited to text files and smaller size)
// Filter out binary files from untracked files
untrackedFiles := strings.Split(strings.TrimSpace(string(untrackedOutput)), "\n")
var nonBinaryUntrackedFiles []string

for _, file := range untrackedFiles {
if file == "" {
continue
}
if !utils.IsBinaryFile(file) {
nonBinaryUntrackedFiles = append(nonBinaryUntrackedFiles, file)
}
}

if len(nonBinaryUntrackedFiles) > 0 {
changes.WriteString("Untracked files:\n")
changes.WriteString(strings.Join(nonBinaryUntrackedFiles, "\n"))
changes.WriteString("\n\n")

fullPath := filepath.Join(config.Path, file)
if utils.IsTextFile(fullPath) && utils.IsSmallFile(fullPath) {
fileContent, err := os.ReadFile(fullPath)
if err != nil {
// Log but don't fail - untracked file may have been deleted or is inaccessible
continue
}
changes.WriteString(fmt.Sprintf("Content of new file %s:\n", file))

// Use special scrubbing for .env files
if strings.HasSuffix(strings.ToLower(file), ".env") ||
strings.Contains(strings.ToLower(file), ".env.") {
changes.WriteString(scrubber.ScrubEnvFile(string(fileContent)))
} else {
changes.WriteString(string(fileContent))
// Try to get content of untracked files (limited to text files and smaller size)
for _, file := range nonBinaryUntrackedFiles {
fullPath := filepath.Join(config.Path, file)
if utils.IsTextFile(fullPath) && utils.IsSmallFile(fullPath) {
fileContent, err := os.ReadFile(fullPath)
if err != nil {
// Log but don't fail - untracked file may have been deleted or is inaccessible
continue
}
changes.WriteString(fmt.Sprintf("Content of new file %s:\n", file))

// Use special scrubbing for .env files
if strings.HasSuffix(strings.ToLower(file), ".env") ||
strings.Contains(strings.ToLower(file), ".env.") {
changes.WriteString(scrubber.ScrubEnvFile(string(fileContent)))
} else {
changes.WriteString(string(fileContent))
}
changes.WriteString("\n\n")
}
changes.WriteString("\n\n")
}
}
}
Expand Down
54 changes: 53 additions & 1 deletion internal/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@ func IsTextFile(filename string) bool {
textExtensions := []string{
".txt", ".md", ".go", ".js", ".py", ".java", ".c", ".cpp", ".h",
".html", ".css", ".json", ".xml", ".yaml", ".yml", ".sh", ".bash",
".ts", ".tsx", ".jsx", ".php", ".rb", ".rs", ".dart",
".ts", ".tsx", ".jsx", ".php", ".rb", ".rs", ".dart", ".sql", ".r",
".scala", ".kt", ".swift", ".m", ".pl", ".lua", ".vim", ".csv",
".log", ".cfg", ".conf", ".ini", ".toml", ".lock", ".gitignore",
".dockerfile", ".makefile", ".cmake", ".pro", ".pri", ".svg",
}

ext := strings.ToLower(filepath.Ext(filename))
Expand All @@ -31,6 +34,55 @@ func IsTextFile(filename string) bool {
}
}

// Common extensionless files that are typically text
if ext == "" {
baseName := strings.ToLower(filepath.Base(filename))
commonTextFiles := []string{
"readme", "dockerfile", "makefile", "rakefile", "gemfile",
"procfile", "jenkinsfile", "vagrantfile", "changelog", "authors",
"contributors", "copying", "install", "news", "todo",
}

for _, textFile := range commonTextFiles {
if baseName == textFile {
return true
}
}
}

return false
}

// IsBinaryFile checks if a file is likely to be a binary file that should be excluded from diffs
func IsBinaryFile(filename string) bool {
// List of common binary file extensions
binaryExtensions := []string{
// Images (excluding SVG which is XML text)
".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".tif", ".ico", ".webp",
// Audio/Video
".mp3", ".mp4", ".avi", ".mkv", ".mov", ".wmv", ".flv", ".wav", ".ogg", ".m4a",
// Archives/Compressed
".zip", ".tar", ".gz", ".7z", ".rar", ".bz2", ".xz", ".lz", ".lzma",
// Executables/Libraries
".exe", ".dll", ".so", ".dylib", ".a", ".lib", ".bin", ".deb", ".rpm", ".dmg", ".msi",
// Documents
".pdf", ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx", ".odt", ".ods", ".odp",
// Fonts
".ttf", ".otf", ".woff", ".woff2", ".eot",
// Other binary formats
".db", ".sqlite", ".sqlite3", ".mdb", ".accdb", ".pickle", ".pkl", ".pyc", ".pyo",
".class", ".jar", ".war", ".ear", ".apk", ".ipa",
}

ext := strings.ToLower(filepath.Ext(filename))
for _, binExt := range binaryExtensions {
if ext == binExt {
return true
}
}

// Note: Files with unknown extensions are not considered binary by default
// This allows them to be processed as text files for diff analysis
return false
}

Expand Down
Loading