diff --git a/cmd/entire/cli/strategy/manual_commit_condensation.go b/cmd/entire/cli/strategy/manual_commit_condensation.go index 741f64a8c..0c409a138 100644 --- a/cmd/entire/cli/strategy/manual_commit_condensation.go +++ b/cmd/entire/cli/strategy/manual_commit_condensation.go @@ -19,6 +19,7 @@ import ( "github.com/entireio/cli/cmd/entire/cli/paths" "github.com/entireio/cli/cmd/entire/cli/session" "github.com/entireio/cli/cmd/entire/cli/settings" + "github.com/entireio/cli/cmd/entire/cli/stringutil" "github.com/entireio/cli/cmd/entire/cli/summarize" "github.com/entireio/cli/cmd/entire/cli/textutil" "github.com/entireio/cli/cmd/entire/cli/transcript" @@ -679,11 +680,9 @@ func generateContextFromPrompts(prompts []string) []byte { buf.WriteString("## User Prompts\n\n") for i, prompt := range prompts { - // Truncate very long prompts for readability - displayPrompt := prompt - if len(displayPrompt) > 500 { - displayPrompt = displayPrompt[:500] + "..." - } + // Truncate very long prompts for readability. + // Use rune-based truncation to avoid splitting multi-byte UTF-8 characters (e.g. CJK). + displayPrompt := stringutil.TruncateRunes(prompt, 500, "...") buf.WriteString(fmt.Sprintf("### Prompt %d\n\n", i+1)) buf.WriteString(displayPrompt) buf.WriteString("\n\n") diff --git a/cmd/entire/cli/strategy/manual_commit_condensation_test.go b/cmd/entire/cli/strategy/manual_commit_condensation_test.go new file mode 100644 index 000000000..ad150973b --- /dev/null +++ b/cmd/entire/cli/strategy/manual_commit_condensation_test.go @@ -0,0 +1,87 @@ +package strategy + +import ( + "strings" + "testing" + "unicode/utf8" +) + +func TestGenerateContextFromPrompts_CJKTruncation(t *testing.T) { + t.Parallel() + + // 600 CJK characters exceeds the 500-rune truncation limit. + prompt := strings.Repeat("あ", 600) + + result := generateContextFromPrompts([]string{prompt}) + + if !utf8.Valid(result) { + t.Error("generateContextFromPrompts produced invalid UTF-8 when truncating a CJK prompt") + } + + resultStr := string(result) + if !strings.Contains(resultStr, "...") { + t.Error("expected truncated CJK prompt to contain '...' suffix") + } + // Should not contain more than 500 CJK characters + if strings.Contains(resultStr, strings.Repeat("あ", 501)) { + t.Error("CJK prompt was not truncated") + } +} + +func TestGenerateContextFromPrompts_EmojiTruncation(t *testing.T) { + t.Parallel() + + // 600 emoji exceeds the 500-rune truncation limit. + prompt := strings.Repeat("🎉", 600) + + result := generateContextFromPrompts([]string{prompt}) + + if !utf8.Valid(result) { + t.Error("generateContextFromPrompts produced invalid UTF-8 when truncating an emoji prompt") + } + + resultStr := string(result) + if !strings.Contains(resultStr, "...") { + t.Error("expected truncated emoji prompt to contain '...' suffix") + } +} + +func TestGenerateContextFromPrompts_ASCIITruncation(t *testing.T) { + t.Parallel() + + // Pure ASCII: should truncate at 500 runes with "..." suffix. + prompt := strings.Repeat("a", 600) + + result := generateContextFromPrompts([]string{prompt}) + + if !utf8.Valid(result) { + t.Error("generateContextFromPrompts produced invalid UTF-8 when truncating an ASCII prompt") + } + + resultStr := string(result) + if !strings.Contains(resultStr, "...") { + t.Error("expected truncated prompt to contain '...' suffix") + } + + if strings.Contains(resultStr, strings.Repeat("a", 501)) { + t.Error("prompt was not truncated") + } +} + +func TestGenerateContextFromPrompts_ShortCJKNotTruncated(t *testing.T) { + t.Parallel() + + // 200 CJK characters is under the 500-rune limit, should not be truncated. + prompt := strings.Repeat("あ", 200) + + result := generateContextFromPrompts([]string{prompt}) + + if !utf8.Valid(result) { + t.Error("generateContextFromPrompts produced invalid UTF-8") + } + + resultStr := string(result) + if strings.Contains(resultStr, "...") { + t.Error("short CJK prompt should not be truncated") + } +} diff --git a/cmd/entire/cli/strategy/messages.go b/cmd/entire/cli/strategy/messages.go index 33357d30e..3b1eba789 100644 --- a/cmd/entire/cli/strategy/messages.go +++ b/cmd/entire/cli/strategy/messages.go @@ -3,22 +3,26 @@ package strategy import ( "encoding/json" "fmt" + "unicode/utf8" + + "github.com/entireio/cli/cmd/entire/cli/stringutil" ) // MaxDescriptionLength is the maximum length for descriptions in commit messages // before truncation occurs. const MaxDescriptionLength = 60 -// TruncateDescription truncates a string to maxLen characters, adding "..." if truncated. -// If maxLen is less than 3, returns a string of dots up to maxLen. +// TruncateDescription truncates a string to maxLen runes, adding "..." if truncated. +// Uses rune-based slicing to avoid splitting multi-byte UTF-8 characters. +// If maxLen is less than 3, truncates without ellipsis. func TruncateDescription(s string, maxLen int) string { - if len(s) <= maxLen { + if utf8.RuneCountInString(s) <= maxLen { return s } if maxLen < 3 { - return s[:maxLen] + return stringutil.TruncateRunes(s, maxLen, "") } - return s[:maxLen-3] + "..." + return stringutil.TruncateRunes(s, maxLen, "...") } // FormatSubagentEndMessage formats a commit message for when a subagent completes. diff --git a/cmd/entire/cli/stringutil/stringutil.go b/cmd/entire/cli/stringutil/stringutil.go index e26589d3e..dcd737b01 100644 --- a/cmd/entire/cli/stringutil/stringutil.go +++ b/cmd/entire/cli/stringutil/stringutil.go @@ -21,11 +21,10 @@ func CollapseWhitespace(s string) string { // TruncateRunes truncates a string to at most maxRunes runes, appending suffix if truncated. // This is safe for multi-byte UTF-8 characters unlike byte-based slicing. func TruncateRunes(s string, maxRunes int, suffix string) string { - runes := []rune(s) - if len(runes) <= maxRunes { + if utf8.RuneCountInString(s) <= maxRunes { return s } - // Leave room for the suffix + runes := []rune(s) suffixRunes := []rune(suffix) truncateAt := maxRunes - len(suffixRunes) if truncateAt < 0 {