From 9f9d7398a5adfff615923c74779de8ccf56baf72 Mon Sep 17 00:00:00 2001 From: Matt Sherman Date: Wed, 21 Jan 2026 22:30:16 -0500 Subject: [PATCH 1/6] Simpler ASCII printable length Don't need the SWAR. Overall, both mixed and ASCII-only benchmarks are much faster. Surprising! Presumably because the new function does not require an 8-byte boundaries and therefore applies to more texts. --- README.md | 59 +++++------ comparison/README.md | 57 ++++++----- width.go | 127 +++++------------------- width_test.go | 229 +++++++++++++++++-------------------------- 4 files changed, 169 insertions(+), 303 deletions(-) diff --git a/README.md b/README.md index e4a7f4f..745878d 100644 --- a/README.md +++ b/README.md @@ -97,19 +97,14 @@ This package implements the Unicode East Asian Width standard ([UAX #11](https://www.unicode.org/reports/tr11/tr11-43.html)), and handles [version selectors](https://en.wikipedia.org/wiki/Variation_Selectors_(Unicode_block)), and [regional indicator pairs](https://en.wikipedia.org/wiki/Regional_indicator_symbol) -(flags). We implement [Unicode TR51](https://www.unicode.org/reports/tr51/tr51-27.html). We are keeping -an eye on [emerging standards](https://www.jeffquast.com/post/state-of-terminal-emulation-2025/). - +(flags). We implement [Unicode TR51](https://www.unicode.org/reports/tr51/tr51-27.html) +for emojis. We are keeping an eye on +[emerging standards](https://www.jeffquast.com/post/state-of-terminal-emulation-2025/). `clipperhouse/displaywidth`, `mattn/go-runewidth`, and `rivo/uniseg` will give the same outputs for most real-world text. Extensive details are in the [compatibility analysis](comparison/COMPATIBILITY_ANALYSIS.md). -If you wish to investigate the core logic, see the `lookupProperties` and `width` -functions in [width.go](width.go#L139). The essential trie generation logic is in -`buildPropertyBitmap` in [unicode.go](internal/gen/unicode.go#L316). - - ## Prior Art [mattn/go-runewidth](https://github.com/mattn/go-runewidth) @@ -133,39 +128,39 @@ goarch: arm64 pkg: github.com/clipperhouse/displaywidth/comparison cpu: Apple M2 -BenchmarkString_Mixed/clipperhouse/displaywidth-8 6685 ns/op 252.36 MB/s 0 B/op 0 allocs/op -BenchmarkString_Mixed/mattn/go-runewidth-8 13952 ns/op 120.92 MB/s 0 B/op 0 allocs/op -BenchmarkString_Mixed/rivo/uniseg-8 19415 ns/op 86.89 MB/s 0 B/op 0 allocs/op +BenchmarkString_Mixed/clipperhouse/displaywidth-8 5460 ns/op 308.96 MB/s 0 B/op 0 allocs/op +BenchmarkString_Mixed/mattn/go-runewidth-8 14301 ns/op 117.96 MB/s 0 B/op 0 allocs/op +BenchmarkString_Mixed/rivo/uniseg-8 19562 ns/op 86.24 MB/s 0 B/op 0 allocs/op -BenchmarkString_EastAsian/clipperhouse/displaywidth-8 6857 ns/op 246.02 MB/s 0 B/op 0 allocs/op -BenchmarkString_EastAsian/mattn/go-runewidth-8 23316 ns/op 72.35 MB/s 0 B/op 0 allocs/op -BenchmarkString_EastAsian/rivo/uniseg-8 19272 ns/op 87.54 MB/s 0 B/op 0 allocs/op +BenchmarkString_EastAsian/clipperhouse/displaywidth-8 5546 ns/op 304.20 MB/s 0 B/op 0 allocs/op +BenchmarkString_EastAsian/mattn/go-runewidth-8 23801 ns/op 70.88 MB/s 0 B/op 0 allocs/op +BenchmarkString_EastAsian/rivo/uniseg-8 19768 ns/op 85.34 MB/s 0 B/op 0 allocs/op -BenchmarkString_ASCII/clipperhouse/displaywidth-8 178.6 ns/op 716.77 MB/s 0 B/op 0 allocs/op -BenchmarkString_ASCII/mattn/go-runewidth-8 1164 ns/op 110.01 MB/s 0 B/op 0 allocs/op -BenchmarkString_ASCII/rivo/uniseg-8 1578 ns/op 81.13 MB/s 0 B/op 0 allocs/op +BenchmarkString_ASCII/clipperhouse/displaywidth-8 54.58 ns/op 2345.21 MB/s 0 B/op 0 allocs/op +BenchmarkString_ASCII/mattn/go-runewidth-8 1167 ns/op 109.73 MB/s 0 B/op 0 allocs/op +BenchmarkString_ASCII/rivo/uniseg-8 1577 ns/op 81.17 MB/s 0 B/op 0 allocs/op -BenchmarkString_Emoji/clipperhouse/displaywidth-8 3169 ns/op 228.43 MB/s 0 B/op 0 allocs/op -BenchmarkString_Emoji/mattn/go-runewidth-8 4664 ns/op 155.23 MB/s 0 B/op 0 allocs/op -BenchmarkString_Emoji/rivo/uniseg-8 6525 ns/op 110.95 MB/s 0 B/op 0 allocs/op +BenchmarkString_Emoji/clipperhouse/displaywidth-8 3127 ns/op 231.51 MB/s 0 B/op 0 allocs/op +BenchmarkString_Emoji/mattn/go-runewidth-8 4722 ns/op 153.31 MB/s 0 B/op 0 allocs/op +BenchmarkString_Emoji/rivo/uniseg-8 6562 ns/op 110.34 MB/s 0 B/op 0 allocs/op -BenchmarkRune_Mixed/clipperhouse/displaywidth-8 3328 ns/op 506.96 MB/s 0 B/op 0 allocs/op -BenchmarkRune_Mixed/mattn/go-runewidth-8 5327 ns/op 316.66 MB/s 0 B/op 0 allocs/op +BenchmarkRune_Mixed/clipperhouse/displaywidth-8 3452 ns/op 488.68 MB/s 0 B/op 0 allocs/op +BenchmarkRune_Mixed/mattn/go-runewidth-8 5367 ns/op 314.33 MB/s 0 B/op 0 allocs/op -BenchmarkRune_EastAsian/clipperhouse/displaywidth-8 3371 ns/op 500.37 MB/s 0 B/op 0 allocs/op -BenchmarkRune_EastAsian/mattn/go-runewidth-8 15306 ns/op 110.22 MB/s 0 B/op 0 allocs/op +BenchmarkRune_EastAsian/clipperhouse/displaywidth-8 3757 ns/op 449.06 MB/s 0 B/op 0 allocs/op +BenchmarkRune_EastAsian/mattn/go-runewidth-8 15390 ns/op 109.62 MB/s 0 B/op 0 allocs/op -BenchmarkRune_ASCII/clipperhouse/displaywidth-8 256.7 ns/op 498.66 MB/s 0 B/op 0 allocs/op -BenchmarkRune_ASCII/mattn/go-runewidth-8 262.5 ns/op 487.58 MB/s 0 B/op 0 allocs/op +BenchmarkRune_ASCII/clipperhouse/displaywidth-8 256.3 ns/op 499.40 MB/s 0 B/op 0 allocs/op +BenchmarkRune_ASCII/mattn/go-runewidth-8 262.3 ns/op 487.91 MB/s 0 B/op 0 allocs/op -BenchmarkRune_Emoji/clipperhouse/displaywidth-8 1327 ns/op 545.59 MB/s 0 B/op 0 allocs/op -BenchmarkRune_Emoji/mattn/go-runewidth-8 2212 ns/op 327.26 MB/s 0 B/op 0 allocs/op +BenchmarkRune_Emoji/clipperhouse/displaywidth-8 1436 ns/op 504.16 MB/s 0 B/op 0 allocs/op +BenchmarkRune_Emoji/mattn/go-runewidth-8 2267 ns/op 319.32 MB/s 0 B/op 0 allocs/op -BenchmarkTruncateWithTail/clipperhouse/displaywidth-8 3804 ns/op 46.53 MB/s 192 B/op 14 allocs/op -BenchmarkTruncateWithTail/mattn/go-runewidth-8 8097 ns/op 21.86 MB/s 192 B/op 14 allocs/op +BenchmarkTruncateWithTail/clipperhouse/displaywidth-8 3120 ns/op 56.73 MB/s 192 B/op 14 allocs/op +BenchmarkTruncateWithTail/mattn/go-runewidth-8 8134 ns/op 21.76 MB/s 192 B/op 14 allocs/op -BenchmarkTruncateWithoutTail/clipperhouse/displaywidth-8 3426 ns/op 66.84 MB/s 0 B/op 0 allocs/op -BenchmarkTruncateWithoutTail/mattn/go-runewidth-8 10441 ns/op 21.93 MB/s 0 B/op 0 allocs/op +BenchmarkTruncateWithoutTail/clipperhouse/displaywidth-8 3427 ns/op 66.82 MB/s 0 B/op 0 allocs/op +BenchmarkTruncateWithoutTail/mattn/go-runewidth-8 10410 ns/op 22.00 MB/s 0 B/op 0 allocs/op ``` Here are some notes on [how to make Unicode things fast](https://clipperhouse.com/go-unicode/). diff --git a/comparison/README.md b/comparison/README.md index d21b970..0b228c3 100644 --- a/comparison/README.md +++ b/comparison/README.md @@ -1,11 +1,10 @@ ## Compatibility -In real-world text, you mostly should see the same outputs from -`clipperhouse/displaywidth`,`mattn/go-runewidth`, and `rivo/uniseg`. It's -mostly the same data and logic. +In real-world text, you should see the same outputs from +`clipperhouse/displaywidth`,`mattn/go-runewidth`, and `rivo/uniseg`. -The tests in this package exercise the behaviors of the three libraries. -Extensive details are available in the +The tests in this `comparison` package exercise the behaviors of the three +libraries. Extensive details are available in the [compatibility analysis](COMPATIBILITY_ANALYSIS.md). ## Benchmarks @@ -20,37 +19,37 @@ goarch: arm64 pkg: github.com/clipperhouse/displaywidth/comparison cpu: Apple M2 -BenchmarkString_Mixed/clipperhouse/displaywidth-8 10400 ns/op 162.21 MB/s 0 B/op 0 allocs/op -BenchmarkString_Mixed/mattn/go-runewidth-8 14296 ns/op 118.00 MB/s 0 B/op 0 allocs/op -BenchmarkString_Mixed/rivo/uniseg-8 19770 ns/op 85.33 MB/s 0 B/op 0 allocs/op +BenchmarkString_Mixed/clipperhouse/displaywidth-8 5460 ns/op 308.96 MB/s 0 B/op 0 allocs/op +BenchmarkString_Mixed/mattn/go-runewidth-8 14301 ns/op 117.96 MB/s 0 B/op 0 allocs/op +BenchmarkString_Mixed/rivo/uniseg-8 19562 ns/op 86.24 MB/s 0 B/op 0 allocs/op -BenchmarkString_EastAsian/clipperhouse/displaywidth-8 10593 ns/op 159.26 MB/s 0 B/op 0 allocs/op -BenchmarkString_EastAsian/mattn/go-runewidth-8 23980 ns/op 70.35 MB/s 0 B/op 0 allocs/op -BenchmarkString_EastAsian/rivo/uniseg-8 19777 ns/op 85.30 MB/s 0 B/op 0 allocs/op +BenchmarkString_EastAsian/clipperhouse/displaywidth-8 5546 ns/op 304.20 MB/s 0 B/op 0 allocs/op +BenchmarkString_EastAsian/mattn/go-runewidth-8 23801 ns/op 70.88 MB/s 0 B/op 0 allocs/op +BenchmarkString_EastAsian/rivo/uniseg-8 19768 ns/op 85.34 MB/s 0 B/op 0 allocs/op -BenchmarkString_ASCII/clipperhouse/displaywidth-8 1032 ns/op 124.09 MB/s 0 B/op 0 allocs/op -BenchmarkString_ASCII/mattn/go-runewidth-8 1162 ns/op 110.16 MB/s 0 B/op 0 allocs/op -BenchmarkString_ASCII/rivo/uniseg-8 1586 ns/op 80.69 MB/s 0 B/op 0 allocs/op +BenchmarkString_ASCII/clipperhouse/displaywidth-8 54.58 ns/op 2345.21 MB/s 0 B/op 0 allocs/op +BenchmarkString_ASCII/mattn/go-runewidth-8 1167 ns/op 109.73 MB/s 0 B/op 0 allocs/op +BenchmarkString_ASCII/rivo/uniseg-8 1577 ns/op 81.17 MB/s 0 B/op 0 allocs/op -BenchmarkString_Emoji/clipperhouse/displaywidth-8 3017 ns/op 240.01 MB/s 0 B/op 0 allocs/op -BenchmarkString_Emoji/mattn/go-runewidth-8 4745 ns/op 152.58 MB/s 0 B/op 0 allocs/op -BenchmarkString_Emoji/rivo/uniseg-8 6745 ns/op 107.34 MB/s 0 B/op 0 allocs/op +BenchmarkString_Emoji/clipperhouse/displaywidth-8 3127 ns/op 231.51 MB/s 0 B/op 0 allocs/op +BenchmarkString_Emoji/mattn/go-runewidth-8 4722 ns/op 153.31 MB/s 0 B/op 0 allocs/op +BenchmarkString_Emoji/rivo/uniseg-8 6562 ns/op 110.34 MB/s 0 B/op 0 allocs/op -BenchmarkRune_Mixed/clipperhouse/displaywidth-8 3381 ns/op 498.90 MB/s 0 B/op 0 allocs/op -BenchmarkRune_Mixed/mattn/go-runewidth-8 5383 ns/op 313.41 MB/s 0 B/op 0 allocs/op +BenchmarkRune_Mixed/clipperhouse/displaywidth-8 3452 ns/op 488.68 MB/s 0 B/op 0 allocs/op +BenchmarkRune_Mixed/mattn/go-runewidth-8 5367 ns/op 314.33 MB/s 0 B/op 0 allocs/op -BenchmarkRune_EastAsian/clipperhouse/displaywidth-8 3395 ns/op 496.96 MB/s 0 B/op 0 allocs/op -BenchmarkRune_EastAsian/mattn/go-runewidth-8 15645 ns/op 107.83 MB/s 0 B/op 0 allocs/op +BenchmarkRune_EastAsian/clipperhouse/displaywidth-8 3757 ns/op 449.06 MB/s 0 B/op 0 allocs/op +BenchmarkRune_EastAsian/mattn/go-runewidth-8 15390 ns/op 109.62 MB/s 0 B/op 0 allocs/op -BenchmarkRune_ASCII/clipperhouse/displaywidth-8 257.8 ns/op 496.57 MB/s 0 B/op 0 allocs/op -BenchmarkRune_ASCII/mattn/go-runewidth-8 267.3 ns/op 478.89 MB/s 0 B/op 0 allocs/op +BenchmarkRune_ASCII/clipperhouse/displaywidth-8 256.3 ns/op 499.40 MB/s 0 B/op 0 allocs/op +BenchmarkRune_ASCII/mattn/go-runewidth-8 262.3 ns/op 487.91 MB/s 0 B/op 0 allocs/op -BenchmarkRune_Emoji/clipperhouse/displaywidth-8 1338 ns/op 541.24 MB/s 0 B/op 0 allocs/op -BenchmarkRune_Emoji/mattn/go-runewidth-8 2287 ns/op 316.58 MB/s 0 B/op 0 allocs/op +BenchmarkRune_Emoji/clipperhouse/displaywidth-8 1436 ns/op 504.16 MB/s 0 B/op 0 allocs/op +BenchmarkRune_Emoji/mattn/go-runewidth-8 2267 ns/op 319.32 MB/s 0 B/op 0 allocs/op -BenchmarkTruncateWithTail/clipperhouse/displaywidth-8 3689 ns/op 47.98 MB/s 192 B/op 14 allocs/op -BenchmarkTruncateWithTail/mattn/go-runewidth-8 8069 ns/op 21.93 MB/s 192 B/op 14 allocs/op +BenchmarkTruncateWithTail/clipperhouse/displaywidth-8 3120 ns/op 56.73 MB/s 192 B/op 14 allocs/op +BenchmarkTruncateWithTail/mattn/go-runewidth-8 8134 ns/op 21.76 MB/s 192 B/op 14 allocs/op -BenchmarkTruncateWithoutTail/clipperhouse/displaywidth-8 3457 ns/op 66.24 MB/s 0 B/op 0 allocs/op -BenchmarkTruncateWithoutTail/mattn/go-runewidth-8 10441 ns/op 21.93 MB/s 0 B/op 0 allocs/op +BenchmarkTruncateWithoutTail/clipperhouse/displaywidth-8 3427 ns/op 66.82 MB/s 0 B/op 0 allocs/op +BenchmarkTruncateWithoutTail/mattn/go-runewidth-8 10410 ns/op 22.00 MB/s 0 B/op 0 allocs/op ``` diff --git a/width.go b/width.go index 78ce0e9..833948f 100644 --- a/width.go +++ b/width.go @@ -2,7 +2,6 @@ package displaywidth import ( "unicode/utf8" - "unsafe" "github.com/clipperhouse/stringish" "github.com/clipperhouse/uax29/v2/graphemes" @@ -34,7 +33,7 @@ func (options Options) String(s string) int { pos := 0 for pos < len(s) { - // Try ASCII optimization (need >= 8 bytes for it to be worth it) + // Try ASCII optimization asciiLen := printableASCIILength(s[pos:]) if asciiLen > 0 { width += asciiLen @@ -42,26 +41,19 @@ func (options Options) String(s string) int { continue } - // Not ASCII (or < 8 bytes), use grapheme parsing + // Not ASCII, use grapheme parsing g := graphemes.FromString(s[pos:]) - hitASCII := false for g.Next() { - width += graphemeWidth(g.Value(), options) - absEnd := pos + g.End() + v := g.Value() + width += graphemeWidth(v, options) + pos += len(v) - // Quick check: if remaining might be an ASCII run, break to outer loop - if len(s)-absEnd >= 8 && s[absEnd] >= 0x20 && s[absEnd] <= 0x7E { - pos = absEnd - hitASCII = true + // Quick check: if remaining might have printable ASCII, break to outer loop + if pos < len(s) && s[pos] >= 0x20 && s[pos] <= 0x7E { break } } - - if !hitASCII { - // Consumed all remaining via graphemes - break - } } return width @@ -81,34 +73,27 @@ func (options Options) Bytes(s []byte) int { pos := 0 for pos < len(s) { - // Try ASCII optimization (need >= 8 bytes for it to be worth it) - asciiLen := printableASCIILengthBytes(s[pos:]) + // Try ASCII optimization + asciiLen := printableASCIILength(s[pos:]) if asciiLen > 0 { width += asciiLen pos += asciiLen continue } - // Not ASCII (or < 8 bytes), use grapheme parsing + // Not ASCII, use grapheme parsing g := graphemes.FromBytes(s[pos:]) - hitASCII := false for g.Next() { - width += graphemeWidth(g.Value(), options) - absEnd := pos + g.End() + v := g.Value() + width += graphemeWidth(v, options) + pos += len(v) - // Quick check: if remaining might be an ASCII run, break to outer loop - if len(s)-absEnd >= 8 && s[absEnd] >= 0x20 && s[absEnd] <= 0x7E { - pos = absEnd - hitASCII = true + // Quick check: if remaining might have printable ASCII, break to outer loop + if pos < len(s) && s[pos] >= 0x20 && s[pos] <= 0x7E { break } } - - if !hitASCII { - // Consumed all remaining via graphemes - break - } } return width @@ -263,90 +248,22 @@ func asciiWidth(b byte) int { } // printableASCIILength returns the length of consecutive printable ASCII bytes -// starting at the beginning of s. Returns -1 if fewer than 8 consecutive -// printable ASCII bytes are found (not worth optimizing). Uses SWAR to check -// 8 bytes at a time. -func printableASCIILength(s string) int { - if len(s) < 8 { - return -1 - } - +// starting at the beginning of s. +func printableASCIILength[T string | []byte](s T) int { i := 0 - for ; i+8 <= len(s); i += 8 { - x := *(*uint64)(unsafe.Add(unsafe.Pointer(unsafe.StringData(s)), i)) - // Check for non-ASCII (high bit set) - if x&0x8080808080808080 != 0 { - break - } - // Check for control chars (< 0x20): add 0x60, printable bytes overflow to set high bit - if (x+0x6060606060606060)&0x8080808080808080 != 0x8080808080808080 { - break - } - // Check for DEL (0x7F) using zero-byte detection - xored := x ^ 0x7F7F7F7F7F7F7F7F - if ((xored - 0x0101010101010101) & ^xored & 0x8080808080808080) != 0 { + for ; i < len(s); i++ { + b := s[i] + // Printable ASCII is 0x20-0x7E (space through tilde) + if b < 0x20 || b > 0x7E { break } } - // If we didn't get at least 8 bytes, not worth optimizing - if i == 0 { - return -1 - } - // If the next byte is non-ASCII (>= 0x80), back off by 1. The grapheme // parser may group the last ASCII byte with subsequent non-ASCII bytes, // such as combining marks. - if i < len(s) && s[i] >= 0x80 { + if i > 0 && i < len(s) && s[i] >= 0x80 { i-- - if i < 8 { - return -1 - } - } - - return i -} - -// printableASCIILengthBytes returns the length of consecutive printable ASCII bytes -// starting at the beginning of s. Returns -1 if fewer than 8 consecutive -// printable ASCII bytes are found (not worth optimizing). Uses SWAR to check -// 8 bytes at a time. -func printableASCIILengthBytes(s []byte) int { - if len(s) < 8 { - return -1 - } - - i := 0 - for ; i+8 <= len(s); i += 8 { - x := *(*uint64)(unsafe.Pointer(&s[i])) - // Check for non-ASCII (high bit set) - if x&0x8080808080808080 != 0 { - break - } - // Check for control chars (< 0x20): add 0x60, printable bytes overflow to set high bit - if (x+0x6060606060606060)&0x8080808080808080 != 0x8080808080808080 { - break - } - // Check for DEL (0x7F) using zero-byte detection - xored := x ^ 0x7F7F7F7F7F7F7F7F - if ((xored - 0x0101010101010101) & ^xored & 0x8080808080808080) != 0 { - break - } - } - - // If we didn't get at least 8 bytes, not worth optimizing - if i == 0 { - return -1 - } - - // If the next byte is non-ASCII (>= 0x80), back off by 1. The grapheme - // parser may group the last ASCII byte with subsequent non-ASCII bytes, - // such as combining marks. - if i < len(s) && s[i] >= 0x80 { - i-- - if i < 8 { - return -1 - } } return i diff --git a/width_test.go b/width_test.go index c68fae5..3eafea7 100644 --- a/width_test.go +++ b/width_test.go @@ -1023,79 +1023,55 @@ func TestPrintableASCIILength(t *testing.T) { expected int desc string }{ - // Too short to optimize (< 8 bytes) -> returns -1 - {"empty string", "", -1, "Empty string is too short"}, - {"single space", " ", -1, "Single byte is too short"}, - {"single char", "a", -1, "Single byte is too short"}, - {"7 bytes", "1234567", -1, "7 bytes is too short"}, - {"7 bytes all printable", "abcdefg", -1, "7 bytes is too short even if printable"}, - - // Exactly 8 bytes (SWAR boundary) - {"8 bytes all printable", "12345678", 8, "8 bytes all printable"}, - {"8 bytes with space", "hello wo", 8, "8 bytes with space"}, - {"8 bytes all spaces", " ", 8, "8 spaces"}, - {"8 bytes all tildes", "~~~~~~~~", 8, "8 tildes"}, - {"8 bytes boundary low", "\x20\x20\x20\x20\x20\x20\x20\x20", 8, "8 spaces (0x20)"}, - {"8 bytes boundary high", "\x7E\x7E\x7E\x7E\x7E\x7E\x7E\x7E", 8, "8 tildes (0x7E)"}, - - // 8 bytes with non-printable in first chunk -> -1 - {"8 bytes with control at start", "\x00hello12", -1, "Control char breaks first chunk"}, - {"8 bytes with DEL at start", "\x7Fhello12", -1, "DEL breaks first chunk"}, - {"8 bytes with non-ASCII at start", "\x80hello12", -1, "Non-ASCII breaks first chunk"}, - {"8 bytes with control in middle", "hel\x00o123", -1, "Control char in first 8 bytes"}, - {"8 bytes with DEL in middle", "hel\x7Fo123", -1, "DEL in first 8 bytes"}, - - // Just after 8 bytes (9 bytes) - block-aligned, returns 8 - {"9 bytes all printable", "123456789", 8, "Block-aligned: 9 bytes returns 8"}, - {"9 bytes with control at end", "12345678\x00", 8, "Returns 8, stops at control"}, - {"9 bytes with DEL at end", "12345678\x7F", 8, "Returns 8, stops at DEL"}, - {"9 bytes with non-ASCII at end", "12345678\x80", -1, "Backs off before non-ASCII, leaving <8 bytes"}, - - // Exactly 16 bytes (two SWAR chunks) - {"16 bytes all printable", "1234567890123456", 16, "16 bytes all printable"}, - {"16 bytes control at pos 15", "123456789012345\x00", 8, "Block-aligned: control at 15 means second chunk fails, returns 8"}, - {"16 bytes control at pos 8", "12345678\x00234567", 8, "Stops at control after first chunk"}, - - // Just before 16 bytes (15 bytes) - block-aligned, returns 8 - {"15 bytes all printable", "123456789012345", 8, "Block-aligned: 15 bytes returns 8"}, - - // Just after 16 bytes (17 bytes) - block-aligned, returns 16 - {"17 bytes all printable", "12345678901234567", 16, "Block-aligned: 17 bytes returns 16"}, - {"17 bytes control at end", "1234567890123456\x00", 16, "Stops at control in tail"}, - - // Exactly 24 bytes (three SWAR chunks) - {"24 bytes all printable", "123456789012345678901234", 24, "24 bytes all printable"}, - - // Long strings - block-aligned - {"long all printable", "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#$%^&*()", 72, "72 bytes (multiple of 8) all printable"}, - {"all printable range", " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~", 88, "Block-aligned: 95 bytes returns 88 (11*8)"}, - - // ASCII followed by non-ASCII (partial runs) - backs off before non-ASCII - {"ASCII then emoji", "12345678\xF0\x9F\x98\x80", -1, "Backs off before emoji, leaving <8 bytes"}, + // Any length works - returns exact count + {"empty string", "", 0, "Empty string has 0 printable bytes"}, + {"single char", "a", 1, "Single printable byte"}, + {"single space", " ", 1, "Space is printable"}, + {"7 bytes", "1234567", 7, "7 printable bytes"}, + {"8 bytes", "12345678", 8, "8 printable bytes"}, + {"9 bytes", "123456789", 9, "9 printable bytes"}, + {"15 bytes", "123456789012345", 15, "15 printable bytes"}, + {"16 bytes", "1234567890123456", 16, "16 printable bytes"}, + {"17 bytes", "12345678901234567", 17, "17 printable bytes"}, + {"24 bytes", "123456789012345678901234", 24, "24 printable bytes"}, + {"long ASCII", "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", 62, "All 62 printable bytes"}, + {"all printable range", " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~", 95, "All 95 printable ASCII chars"}, + + // Non-printable at start -> 0 + {"control at start", "\x00hello world", 0, "Control char at start"}, + {"DEL at start", "\x7Fhello world", 0, "DEL at start"}, + {"non-ASCII at start", "\x80hello world", 0, "Non-ASCII at start"}, + {"UTF-8 at start", "\xC2\xA0hello world", 0, "UTF-8 at start"}, + {"emoji at start", "\xF0\x9F\x98\x80hello123", 0, "Emoji at start"}, + + // Non-printable in middle - stops before it + {"control in middle", "hello\x00world123", 5, "Control at pos 5, returns 5"}, + {"DEL in middle", "hello\x7Fworld123", 5, "DEL at pos 5, returns 5"}, + {"control after 8", "12345678\x00world", 8, "Control at pos 8, returns 8"}, + {"DEL after 8", "12345678\x7Fworld", 8, "DEL at pos 8, returns 8"}, + {"control at pos 15", "123456789012345\x00", 15, "Control at pos 15, returns 15"}, + + // Non-ASCII at end - backs off by 1 + {"non-ASCII at end of 9", "12345678\x80", 7, "Backs off 1 before non-ASCII"}, + {"non-ASCII at end of 17", "1234567890123456\x80", 15, "Backs off 1 before non-ASCII"}, + {"combining after 16", "1234567890123456\u0301", 15, "Backs off 1 before combining mark"}, + {"non-ASCII after 1", "a\x80", 0, "Backs off 1 from 1, returns 0"}, {"16 ASCII then emoji", "1234567890123456\xF0\x9F\x98\x80", 15, "Backs off 1 before emoji"}, - {"ASCII then UTF-8", "hello world!\xC2\xA0", 8, "No backoff needed: next byte after block 8 is 'r' (ASCII)"}, - - // ASCII followed by combining marks - backoff needed because combining mark modifies preceding char - {"8 ASCII then combining acute", "12345678\u0301", -1, "Backs off before combining mark, leaving <8 bytes"}, - {"16 ASCII then combining acute", "1234567890123456\u0301", 15, "Backs off 1 before combining mark"}, - {"16 ASCII then combining grave", "1234567890123456\u0300", 15, "Backs off 1 before combining mark"}, - - // Non-printable at start -> -1 - {"control at start", "\x00hello world", -1, "Control char at start"}, - {"DEL at start", "\x7Fhello world", -1, "DEL at start"}, - {"non-ASCII at start", "\x80hello world", -1, "Non-ASCII at start"}, - {"UTF-8 at start", "\xC2\xA0hello world", -1, "UTF-8 at start"}, - {"emoji at start", "\xF0\x9F\x98\x80hello123", -1, "Emoji at start"}, - - // Control characters at various positions in first 8 bytes - {"control at pos 0", "\x00234567890", -1, "Control at position 0"}, - {"control at pos 3", "123\x00567890", -1, "Control at position 3"}, - {"control at pos 7", "1234567\x000", -1, "Control at position 7"}, - - // DEL at various positions in first 8 bytes - {"DEL at pos 0", "\x7F234567890", -1, "DEL at position 0"}, - {"DEL at pos 3", "123\x7F567890", -1, "DEL at position 3"}, - {"DEL at pos 7", "1234567\x7F0", -1, "DEL at position 7"}, + + // Printable boundaries + {"8 spaces", " ", 8, "Space (0x20) is first printable"}, + {"8 tildes", "~~~~~~~~", 8, "Tilde (0x7E) is last printable"}, + {"mixed printable", "Hello, World! 123", 17, "All 17 are printable"}, + + // Control characters at various positions + {"control at pos 0", "\x00234567890", 0, "Control at position 0"}, + {"control at pos 3", "123\x00567890", 3, "Control at position 3"}, + {"control at pos 7", "1234567\x000", 7, "Control at position 7"}, + + // DEL at various positions + {"DEL at pos 0", "\x7F234567890", 0, "DEL at position 0"}, + {"DEL at pos 3", "123\x7F567890", 3, "DEL at position 3"}, + {"DEL at pos 7", "1234567\x7F0", 7, "DEL at position 7"}, } for _, tt := range tests { @@ -1116,6 +1092,7 @@ func TestPrintableASCIILength(t *testing.T) { } } + func TestPrintableASCIILengthBytes(t *testing.T) { tests := []struct { name string @@ -1123,86 +1100,64 @@ func TestPrintableASCIILengthBytes(t *testing.T) { expected int desc string }{ - // Too short to optimize (< 8 bytes) -> returns -1 - {"empty slice", []byte{}, -1, "Empty slice is too short"}, - {"single space", []byte{0x20}, -1, "Single byte is too short"}, - {"single char", []byte("a"), -1, "Single byte is too short"}, - {"7 bytes", []byte("1234567"), -1, "7 bytes is too short"}, - {"7 bytes all printable", []byte("abcdefg"), -1, "7 bytes is too short even if printable"}, - - // Exactly 8 bytes (SWAR boundary) + // Any length works - returns exact count + {"empty slice", []byte{}, 0, "Empty slice has 0 printable bytes"}, + {"single space", []byte{0x20}, 1, "Single space"}, + {"single char", []byte("a"), 1, "Single printable byte"}, + {"7 bytes", []byte("1234567"), 7, "7 printable bytes"}, {"8 bytes all printable", []byte("12345678"), 8, "8 bytes all printable"}, - {"8 bytes with space", []byte("hello wo"), 8, "8 bytes with space"}, + {"9 bytes all printable", []byte("123456789"), 9, "9 printable bytes"}, + {"15 bytes all printable", []byte("123456789012345"), 15, "15 printable bytes"}, + {"16 bytes all printable", []byte("1234567890123456"), 16, "16 printable bytes"}, + {"17 bytes all printable", []byte("12345678901234567"), 17, "17 printable bytes"}, + {"24 bytes all printable", []byte("123456789012345678901234"), 24, "24 printable bytes"}, + {"long all printable", []byte("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#$%^&*()"), 72, "72 printable bytes"}, + {"all printable range", []byte(" !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"), 95, "All 95 printable ASCII chars"}, + + // Printable boundaries {"8 bytes all spaces", []byte(" "), 8, "8 spaces"}, {"8 bytes all tildes", []byte("~~~~~~~~"), 8, "8 tildes"}, {"8 bytes boundary low", []byte{0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20}, 8, "8 spaces (0x20)"}, {"8 bytes boundary high", []byte{0x7E, 0x7E, 0x7E, 0x7E, 0x7E, 0x7E, 0x7E, 0x7E}, 8, "8 tildes (0x7E)"}, - // 8 bytes with non-printable in first chunk -> -1 - {"8 bytes with control at start", []byte("\x00hello12"), -1, "Control char breaks first chunk"}, - {"8 bytes with DEL at start", []byte("\x7Fhello12"), -1, "DEL breaks first chunk"}, - {"8 bytes with non-ASCII at start", []byte("\x80hello12"), -1, "Non-ASCII breaks first chunk"}, - {"8 bytes with control in middle", []byte("hel\x00o123"), -1, "Control char in first 8 bytes"}, - {"8 bytes with DEL in middle", []byte("hel\x7Fo123"), -1, "DEL in first 8 bytes"}, - - // Just after 8 bytes (9 bytes) - block-aligned, returns 8 - {"9 bytes all printable", []byte("123456789"), 8, "Block-aligned: 9 bytes returns 8"}, - {"9 bytes with control at end", []byte("12345678\x00"), 8, "Returns 8, stops at control"}, - {"9 bytes with DEL at end", []byte("12345678\x7F"), 8, "Returns 8, stops at DEL"}, - {"9 bytes with non-ASCII at end", []byte("12345678\x80"), -1, "Backs off before non-ASCII, leaving <8 bytes"}, - - // Exactly 16 bytes (two SWAR chunks) - {"16 bytes all printable", []byte("1234567890123456"), 16, "16 bytes all printable"}, - {"16 bytes control at pos 15", []byte("123456789012345\x00"), 8, "Block-aligned: control at 15 means second chunk fails, returns 8"}, - {"16 bytes control at pos 8", []byte("12345678\x00234567"), 8, "Stops at control after first chunk"}, - - // Just before 16 bytes (15 bytes) - block-aligned, returns 8 - {"15 bytes all printable", []byte("123456789012345"), 8, "Block-aligned: 15 bytes returns 8"}, - - // Just after 16 bytes (17 bytes) - block-aligned, returns 16 - {"17 bytes all printable", []byte("12345678901234567"), 16, "Block-aligned: 17 bytes returns 16"}, - {"17 bytes control at end", []byte("1234567890123456\x00"), 16, "Stops at control in tail"}, - - // Exactly 24 bytes (three SWAR chunks) - {"24 bytes all printable", []byte("123456789012345678901234"), 24, "24 bytes all printable"}, - - // Long slices - block-aligned - {"long all printable", []byte("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#$%^&*()"), 72, "72 bytes (multiple of 8) all printable"}, - {"all printable range", []byte(" !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"), 88, "Block-aligned: 95 bytes returns 88 (11*8)"}, - - // ASCII followed by non-ASCII (partial runs) - backs off before non-ASCII - {"ASCII then emoji", []byte("12345678\xF0\x9F\x98\x80"), -1, "Backs off before emoji, leaving <8 bytes"}, + // Non-printable at start -> 0 + {"control at start", []byte("\x00hello world"), 0, "Control char at start"}, + {"DEL at start", []byte("\x7Fhello world"), 0, "DEL at start"}, + {"non-ASCII at start", []byte("\x80hello world"), 0, "Non-ASCII at start"}, + {"UTF-8 at start", []byte("\xC2\xA0hello world"), 0, "UTF-8 at start"}, + {"emoji at start", []byte("\xF0\x9F\x98\x80hello123"), 0, "Emoji at start"}, + + // Non-printable in middle - stops before it + {"control in middle", []byte("hel\x00o123"), 3, "Control at pos 3, returns 3"}, + {"DEL in middle", []byte("hel\x7Fo123"), 3, "DEL at pos 3, returns 3"}, + {"control at pos 8", []byte("12345678\x00world"), 8, "Control at pos 8, returns 8"}, + {"DEL at pos 8", []byte("12345678\x7Fworld"), 8, "DEL at pos 8, returns 8"}, + {"control at pos 15", []byte("123456789012345\x00"), 15, "Control at pos 15, returns 15"}, + + // Non-ASCII at end - backs off by 1 + {"non-ASCII at end of 9", []byte("12345678\x80"), 7, "Backs off 1 before non-ASCII"}, + {"non-ASCII at end of 17", []byte("1234567890123456\x80"), 15, "Backs off 1 before non-ASCII"}, {"16 ASCII then emoji", []byte("1234567890123456\xF0\x9F\x98\x80"), 15, "Backs off 1 before emoji"}, - {"ASCII then UTF-8", []byte("hello world!\xC2\xA0"), 8, "No backoff needed: next byte after block 8 is 'r' (ASCII)"}, - - // ASCII followed by combining marks - backoff needed because combining mark modifies preceding char - {"8 ASCII then combining acute", []byte("12345678\u0301"), -1, "Backs off before combining mark, leaving <8 bytes"}, {"16 ASCII then combining acute", []byte("1234567890123456\u0301"), 15, "Backs off 1 before combining mark"}, {"16 ASCII then combining grave", []byte("1234567890123456\u0300"), 15, "Backs off 1 before combining mark"}, + {"non-ASCII after 1", []byte("a\x80"), 0, "Backs off 1 from 1, returns 0"}, + + // Control characters at various positions + {"control at pos 0", []byte("\x00234567890"), 0, "Control at position 0"}, + {"control at pos 3", []byte("123\x00567890"), 3, "Control at position 3"}, + {"control at pos 7", []byte("1234567\x000"), 7, "Control at position 7"}, - // Non-printable at start -> -1 - {"control at start", []byte("\x00hello world"), -1, "Control char at start"}, - {"DEL at start", []byte("\x7Fhello world"), -1, "DEL at start"}, - {"non-ASCII at start", []byte("\x80hello world"), -1, "Non-ASCII at start"}, - {"UTF-8 at start", []byte("\xC2\xA0hello world"), -1, "UTF-8 at start"}, - {"emoji at start", []byte("\xF0\x9F\x98\x80hello123"), -1, "Emoji at start"}, - - // Control characters at various positions in first 8 bytes - {"control at pos 0", []byte("\x00234567890"), -1, "Control at position 0"}, - {"control at pos 3", []byte("123\x00567890"), -1, "Control at position 3"}, - {"control at pos 7", []byte("1234567\x000"), -1, "Control at position 7"}, - - // DEL at various positions in first 8 bytes - {"DEL at pos 0", []byte("\x7F234567890"), -1, "DEL at position 0"}, - {"DEL at pos 3", []byte("123\x7F567890"), -1, "DEL at position 3"}, - {"DEL at pos 7", []byte("1234567\x7F0"), -1, "DEL at position 7"}, + // DEL at various positions + {"DEL at pos 0", []byte("\x7F234567890"), 0, "DEL at position 0"}, + {"DEL at pos 3", []byte("123\x7F567890"), 3, "DEL at position 3"}, + {"DEL at pos 7", []byte("1234567\x7F0"), 7, "DEL at position 7"}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := printableASCIILengthBytes(tt.input) + got := printableASCIILength(tt.input) if got != tt.expected { - t.Errorf("printableASCIILengthBytes(%v) = %d, want %d (%s)", + t.Errorf("printableASCIILength(%v) = %d, want %d (%s)", tt.input, got, tt.expected, tt.desc) if len(tt.input) > 0 { t.Logf(" Slice length: %d bytes", len(tt.input)) From 63eb885bced9285936cd48d4c4cb694a9234018f Mon Sep 17 00:00:00 2001 From: Matt Sherman Date: Thu, 22 Jan 2026 19:20:39 -0500 Subject: [PATCH 2/6] coupla more tests --- width_test.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/width_test.go b/width_test.go index 3eafea7..f4115a6 100644 --- a/width_test.go +++ b/width_test.go @@ -74,6 +74,17 @@ func TestStringWidth(t *testing.T) { {"short ASCII then emoji", "hello😀", defaultOptions, 5 + 2}, // < 8 bytes, no optimization {"emoji-short ASCII-emoji", "😀abc😀", defaultOptions, 2 + 3 + 2}, // < 8 bytes in middle {"long mixed", "Hello World! 你好世界 12345678 emoji: 🎉🎊", defaultOptions, 42}, // 13 + 9 + 9 + 7 + 4 + + // ASCII with embedded control characters + {"ASCII with null in middle", "hello\x00world", defaultOptions, 10}, // 5 + 0 + 5 + {"ASCII with DEL in middle", "hello\x7Fworld", defaultOptions, 10}, // 5 + 0 + 5 + {"ASCII with multiple controls", "a\x00b\tc\nd", defaultOptions, 4}, // 1 + 0 + 1 + 0 + 1 + 0 + 1 + + // Alternating short ASCII/non-ASCII sequences + {"alternating ASCII-CJK", "a中b文c", defaultOptions, 7}, // 1 + 2 + 1 + 2 + 1 + {"alternating CJK-ASCII", "中a文b字c", defaultOptions, 9}, // 2 + 1 + 2 + 1 + 2 + 1 + {"single char alternating", "a😀b🎉c", defaultOptions, 7}, // 1 + 2 + 1 + 2 + 1 + {"rapid alternation", "aあbいcうd", defaultOptions, 10}, // 1 + 2 + 1 + 2 + 1 + 2 + 1 } for _, tt := range tests { From 1c9606cba22ee7dad61313705025003b6de2dff8 Mon Sep 17 00:00:00 2001 From: Matt Sherman Date: Sun, 25 Jan 2026 14:07:04 -0500 Subject: [PATCH 3/6] remove references to 8 byte boundaries --- width_test.go | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/width_test.go b/width_test.go index f4115a6..52b3403 100644 --- a/width_test.go +++ b/width_test.go @@ -71,20 +71,20 @@ func TestStringWidth(t *testing.T) { {"emoji then ASCII 8 bytes", "😀12345678", defaultOptions, 2 + 8}, {"CJK then ASCII 16 bytes", "中1234567890abcdef", defaultOptions, 2 + 16}, {"ASCII-emoji-ASCII sandwich", "12345678😀abcdefgh", defaultOptions, 8 + 2 + 8}, - {"short ASCII then emoji", "hello😀", defaultOptions, 5 + 2}, // < 8 bytes, no optimization - {"emoji-short ASCII-emoji", "😀abc😀", defaultOptions, 2 + 3 + 2}, // < 8 bytes in middle + {"short ASCII then emoji", "hello😀", defaultOptions, 5 + 2}, + {"emoji-short ASCII-emoji", "😀abc😀", defaultOptions, 2 + 3 + 2}, {"long mixed", "Hello World! 你好世界 12345678 emoji: 🎉🎊", defaultOptions, 42}, // 13 + 9 + 9 + 7 + 4 // ASCII with embedded control characters - {"ASCII with null in middle", "hello\x00world", defaultOptions, 10}, // 5 + 0 + 5 - {"ASCII with DEL in middle", "hello\x7Fworld", defaultOptions, 10}, // 5 + 0 + 5 - {"ASCII with multiple controls", "a\x00b\tc\nd", defaultOptions, 4}, // 1 + 0 + 1 + 0 + 1 + 0 + 1 + {"ASCII with null in middle", "hello\x00world", defaultOptions, 10}, // 5 + 0 + 5 + {"ASCII with DEL in middle", "hello\x7Fworld", defaultOptions, 10}, // 5 + 0 + 5 + {"ASCII with multiple controls", "a\x00b\tc\nd", defaultOptions, 4}, // 1 + 0 + 1 + 0 + 1 + 0 + 1 // Alternating short ASCII/non-ASCII sequences - {"alternating ASCII-CJK", "a中b文c", defaultOptions, 7}, // 1 + 2 + 1 + 2 + 1 - {"alternating CJK-ASCII", "中a文b字c", defaultOptions, 9}, // 2 + 1 + 2 + 1 + 2 + 1 - {"single char alternating", "a😀b🎉c", defaultOptions, 7}, // 1 + 2 + 1 + 2 + 1 - {"rapid alternation", "aあbいcうd", defaultOptions, 10}, // 1 + 2 + 1 + 2 + 1 + 2 + 1 + {"alternating ASCII-CJK", "a中b文c", defaultOptions, 7}, // 1 + 2 + 1 + 2 + 1 + {"alternating CJK-ASCII", "中a文b字c", defaultOptions, 9}, // 2 + 1 + 2 + 1 + 2 + 1 + {"single char alternating", "a😀b🎉c", defaultOptions, 7}, // 1 + 2 + 1 + 2 + 1 + {"rapid alternation", "aあbいcうd", defaultOptions, 10}, // 1 + 2 + 1 + 2 + 1 + 2 + 1 } for _, tt := range tests { @@ -1034,7 +1034,8 @@ func TestPrintableASCIILength(t *testing.T) { expected int desc string }{ - // Any length works - returns exact count + // Some of these test are left over from a SWAR implementation, + // which cared about 8 byte boundaries. {"empty string", "", 0, "Empty string has 0 printable bytes"}, {"single char", "a", 1, "Single printable byte"}, {"single space", " ", 1, "Space is printable"}, @@ -1103,7 +1104,6 @@ func TestPrintableASCIILength(t *testing.T) { } } - func TestPrintableASCIILengthBytes(t *testing.T) { tests := []struct { name string From 4ed112d8297b278d3a553933ed4ac379c8f22a1a Mon Sep 17 00:00:00 2001 From: Matt Sherman Date: Sun, 25 Jan 2026 14:23:19 -0500 Subject: [PATCH 4/6] defensive check --- width.go | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/width.go b/width.go index 833948f..1f8cdb1 100644 --- a/width.go +++ b/width.go @@ -43,6 +43,7 @@ func (options Options) String(s string) int { // Not ASCII, use grapheme parsing g := graphemes.FromString(s[pos:]) + start := pos for g.Next() { v := g.Value() @@ -54,6 +55,13 @@ func (options Options) String(s string) int { break } } + + // Defensive, should not happen: if no progress was made, + // skip a byte to prevent infinite loop. Only applies if + // the grapheme parser misbehaves. + if pos == start { + pos++ + } } return width @@ -83,6 +91,7 @@ func (options Options) Bytes(s []byte) int { // Not ASCII, use grapheme parsing g := graphemes.FromBytes(s[pos:]) + start := pos for g.Next() { v := g.Value() @@ -94,6 +103,13 @@ func (options Options) Bytes(s []byte) int { break } } + + // Defensive, should not happen: if no progress was made, + // skip a byte to prevent infinite loop. Only applies if + // the grapheme parser misbehaves. + if pos == start { + pos++ + } } return width From 515dd8a155f5bf48581a708849a22e9ef4d35d72 Mon Sep 17 00:00:00 2001 From: Matt Sherman Date: Sun, 25 Jan 2026 14:23:25 -0500 Subject: [PATCH 5/6] typo --- width_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/width_test.go b/width_test.go index 52b3403..d008c05 100644 --- a/width_test.go +++ b/width_test.go @@ -1034,7 +1034,7 @@ func TestPrintableASCIILength(t *testing.T) { expected int desc string }{ - // Some of these test are left over from a SWAR implementation, + // Some of these tests are left over from a SWAR implementation, // which cared about 8 byte boundaries. {"empty string", "", 0, "Empty string has 0 printable bytes"}, {"single char", "a", 1, "Single printable byte"}, From 79443a7664cc041131b133d3724d43c079f1217f Mon Sep 17 00:00:00 2001 From: Matt Sherman Date: Sun, 25 Jan 2026 14:34:48 -0500 Subject: [PATCH 6/6] typo Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- comparison/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/comparison/README.md b/comparison/README.md index 0b228c3..7fd3750 100644 --- a/comparison/README.md +++ b/comparison/README.md @@ -1,7 +1,7 @@ ## Compatibility In real-world text, you should see the same outputs from -`clipperhouse/displaywidth`,`mattn/go-runewidth`, and `rivo/uniseg`. +`clipperhouse/displaywidth`, `mattn/go-runewidth`, and `rivo/uniseg`. The tests in this `comparison` package exercise the behaviors of the three libraries. Extensive details are available in the