diff --git a/.changeset/two-ideas-lay.md b/.changeset/two-ideas-lay.md new file mode 100644 index 0000000..a8b981e --- /dev/null +++ b/.changeset/two-ideas-lay.md @@ -0,0 +1,5 @@ +--- +"unicode-segmenter": patch +--- + +Inlined the InCB=Linker checking for Indic scripts diff --git a/README.md b/README.md index d80ff19..6343225 100644 --- a/README.md +++ b/README.md @@ -215,7 +215,7 @@ Since [Hermes doesn't support the `Intl.Segmenter` API](https://github.com/faceb | Name | Unicode® | ESM? | Size | Size (min) | Size (min+gzip) | Size (min+br) | Size (min+zstd) | |------------------------------|----------|------|----------:|-----------:|----------------:|--------------:|----------------:| -| `unicode-segmenter/grapheme` | 16.0.0 | ✔️ | 10,774 | 6,675 | 3,368 | 2,755 | 3,497 | +| `unicode-segmenter/grapheme` | 16.0.0 | ✔️ | 10,708 | 6,659 | 3,363 | 2,739 | 3,490 | | `graphemer` | 15.0.0 | ✖️ ️| 410,435 | 95,104 | 15,752 | 10,660 | 15,911 | | `grapheme-splitter` | 10.0.0 | ✖️ | 122,254 | 23,682 | 7,852 | 4,802 | 6,753 | | `@formatjs/intl-segmenter`* | 15.0.0 | ✖️ | 603,510 | 369,673 | 72,273 | 49,530 | 68,027 | @@ -231,7 +231,7 @@ Since [Hermes doesn't support the `Intl.Segmenter` API](https://github.com/faceb | Name | Bytecode size | Bytecode size (gzip)* | |------------------------------|--------------:|----------------------:| -| `unicode-segmenter/grapheme` | 20,295 | 11,420 | +| `unicode-segmenter/grapheme` | 20,259 | 11,417 | | `graphemer` | 134,089 | 31,766 | | `grapheme-splitter` | 63,946 | 19,162 | diff --git a/src/grapheme.js b/src/grapheme.js index f7b247f..22ec79b 100644 --- a/src/grapheme.js +++ b/src/grapheme.js @@ -167,7 +167,14 @@ export function* graphemeSegments(input) { consonant = isIndicConjunctConsonant(_hd); } if (consonant && catAfter === 3) { - linker = linker || isIndicConjunctLinker(cp); + linker = linker + || cp === 0x094D + || cp === 0x09CD + || cp === 0x0A4D + || cp === 0x0ACD + || cp === 0x0B4D + || cp === 0x0C4D + || cp === 0x0D4D; } else { linker = false; } @@ -339,18 +346,3 @@ function cat(cp) { function isIndicConjunctConsonant(cp) { return findUnicodeRangeIndex(cp, consonant_ranges) >= 0; } - -/** - * @param {number} cp - * @return {boolean} - */ -function isIndicConjunctLinker(cp) { - return ( - cp === 2381 /* 0x094D */ || - cp === 2509 /* 0x09CD */ || - cp === 2765 /* 0x0ACD */ || - cp === 2893 /* 0x0B4D */ || - cp === 3149 /* 0x0C4D */ || - cp === 3405 /* 0x0D4D */ - ); -}