From eb521cd4b92b6d14fa48ba16b628c03b7753f264 Mon Sep 17 00:00:00 2001 From: DasProffi <67233923+DasProffi@users.noreply.github.com> Date: Wed, 26 Nov 2025 20:49:33 +0100 Subject: [PATCH] fix: fix plurals and do type conversions --- CHANGELOG.md | 16 + README.md | 41 +- examples/locales/de-DE.arb | 11 +- examples/translations/translations.ts | 29 +- package.json | 2 +- src/compile-to-code.ts | 105 +++++ src/compile-to-string.ts | 83 ++++ src/icu.ts | 427 +----------------- src/index.ts | 5 +- src/lex.ts | 61 +++ src/parse.ts | 277 ++++++++++++ src/scripts/compile-arb.ts | 173 +++---- src/translationGeneration.ts | 6 +- tests/compiler-code.test.ts | 55 +++ ...mpiler.test.ts => compiler-string.test.ts} | 2 +- tests/typing.test.ts | 1 + 16 files changed, 736 insertions(+), 558 deletions(-) create mode 100644 src/compile-to-code.ts create mode 100644 src/compile-to-string.ts create mode 100644 src/lex.ts create mode 100644 src/parse.ts create mode 100644 tests/compiler-code.test.ts rename tests/{compiler.test.ts => compiler-string.test.ts} (99%) diff --git a/CHANGELOG.md b/CHANGELOG.md index e732517..f7915dc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,22 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.4.0] - 2025-11-26 + +### Added +- Added tests for code compiler +- Added more examples +- Added type parsing corrections based on variable names and translate types from other languages like int, or float + +### Changed +- Split `lex`, `parse`, and `compile` into different files +- Moved compile function out of compile-arb-script +- Limit exports in index file +- Updated [README.md](README.md) + +### Security +- Prevent code injections through typing variables + ## [0.3.0] - 2025-11-24 ### Added diff --git a/README.md b/README.md index c30c47a..1bab1d4 100644 --- a/README.md +++ b/README.md @@ -19,12 +19,21 @@ Create a `.arb` file with your translations: And get a translation: ```typescript -import {combineTranslation} from "./combineTranslation"; +import {combineTranslation} from "@helpwave/internationalization"; +import {Translation} from "@helpwave/internationalization"; -translations["en-US"].priceInfo(price, currency) +translations["en-US"].priceInfo?.({price, currency}) -const t = combineTranslation([translation1, translation2], "en-US") -// v still typesafe on both function parameters +type ExtensionType = { name: string } +const extension: Translation<"fr-FR", ExtensionType> = { + "fr-FR": { + name: "Charlemagne" + } +} + +const t = combineTranslation([translations, extension], "en-US") +// typesafe on both function parameters +// and handles errors automatically -> return = {{${locale}:${String(key)}}} t("priceInfo", { price, currency }) ``` @@ -72,4 +81,28 @@ Rebuild the examples: ```bash npm run build node dist/scripts/compile-arb.js --force -i ./examples/locales -o ./examples/translations/translations.ts -n "exampleTranslation" +``` + +React hook example: +```typescript +type UseHidetideTranslationOverwrites = { + locale?: HightideTranslationLocales, +} + +type HidetideTranslationExtension + = PartialTranslationExtension + +export function useHightideTranslation( + extensions?: SingleOrArray>, + overwrites?: UseHidetideTranslationOverwrites +) { + const { locale: inferredLocale } = useLocale() + const locale = overwrites?.locale ?? inferredLocale + const translationExtensions = ArrayUtil.resolveSingleOrArray(extensions) + + return combineTranslation([ + ...translationExtensions, + hightideTranslation as HidetideTranslationExtension + ], locale) +} ``` \ No newline at end of file diff --git a/examples/locales/de-DE.arb b/examples/locales/de-DE.arb index 57e6d96..d42356a 100644 --- a/examples/locales/de-DE.arb +++ b/examples/locales/de-DE.arb @@ -65,5 +65,14 @@ } } }, - "escapeCharacters": "Folgende Zeichen werden mit '\\' im resultiernden string ergänzt '`', '\\' und '$' $'{'" + "escapeCharacters": "Folgende Zeichen werden mit '\\' im resultiernden string ergänzt '`', '\\' und '$' $'{'", + "nWard": "{count, plural, =1{# Station} other{# Stationen}}", + "@nWard": { + "placeholders": { + "count": { + "type": "number" + } + } + }, + "templateJSEscape": "` '${}'" } diff --git a/examples/translations/translations.ts b/examples/translations/translations.ts index 862180b..2eecb12 100644 --- a/examples/translations/translations.ts +++ b/examples/translations/translations.ts @@ -17,9 +17,11 @@ export type ExampleTranslationEntries = { 'nested.itemCount': (values: { count: number }) => string, 'nested.nested': string, 'nestedSelectPlural': (values: { gender: string, count: number }) => string, + 'nWard': (values: { count: number }) => string, 'passwordStrength': (values: { strength: string }) => string, 'priceInfo': (values: { price: number, currency: string }) => string, 'taskDeadline': (values: { deadline: string }) => string, + 'templateJSEscape': string, 'userGreeting': (values: { gender: string, name: string }) => string, 'welcomeMessage': (values: { gender: string, name: string, count: number }) => string, 'goodbye': string, @@ -49,7 +51,7 @@ export const exampleTranslation: Translation { - return TranslationGen.resolveSelect(count, { + return TranslationGen.resolvePlural(count, { '=0': `Keine Elemente`, '=1': `Ein Element`, 'other': `${count} Elemente`, @@ -58,23 +60,29 @@ export const exampleTranslation: Translation { return TranslationGen.resolveSelect(gender, { - 'male': TranslationGen.resolveSelect(count, { + 'male': TranslationGen.resolvePlural(count, { '=0': `Keine Nachrichten`, '=1': `Eine Nachricht`, 'other': `${count} Nachrichten`, }), - 'female': TranslationGen.resolveSelect(count, { + 'female': TranslationGen.resolvePlural(count, { '=0': `Keine Nachrichten`, '=1': `Eine Nachricht`, 'other': `${count} Nachrichten`, }), - 'other': TranslationGen.resolveSelect(count, { + 'other': TranslationGen.resolvePlural(count, { '=0': `Keine Nachrichten`, '=1': `Eine Nachricht`, 'other': `${count} Nachrichten`, }), }) }, + 'nWard': ({ count }): string => { + return TranslationGen.resolvePlural(count, { + '=1': `${count} Station`, + 'other': `${count} Stationen`, + }) + }, 'passwordStrength': ({ strength }): string => { return TranslationGen.resolveSelect(strength, { 'weak': `Schwach`, @@ -96,6 +104,7 @@ export const exampleTranslation: Translation { return `Die Aufgabe muss bis ${deadline} erledigt sein.` }, + 'templateJSEscape': `\` \${}`, 'userGreeting': ({ gender, name }): string => { return TranslationGen.resolveSelect(gender, { 'male': `Hallo, ${name}!`, @@ -111,7 +120,7 @@ export const exampleTranslation: Translation { - return TranslationGen.resolveSelect(count, { + return TranslationGen.resolvePlural(count, { '=0': `No items`, '=1': `One item`, 'other': `${count} items`, @@ -147,17 +156,17 @@ export const exampleTranslation: Translation { return TranslationGen.resolveSelect(gender, { - 'male': TranslationGen.resolveSelect(count, { + 'male': TranslationGen.resolvePlural(count, { '=0': `No messages`, '=1': `One message`, 'other': `${count} messages`, }), - 'female': TranslationGen.resolveSelect(count, { + 'female': TranslationGen.resolvePlural(count, { '=0': `No messages`, '=1': `One message`, 'other': `${count} messages`, }), - 'other': TranslationGen.resolveSelect(count, { + 'other': TranslationGen.resolvePlural(count, { '=0': `No messages`, '=1': `One message`, 'other': `${count} messages`, @@ -200,7 +209,7 @@ export const exampleTranslation: Translation + +const defaultCompileContext: CompileContextResult = { + indentLevel: 0, + inNode: false, + isOnlyText: false, +} + +export function compileToCode( + node: ICUASTNode, + initialContext?: CompileContext +): string[] { + const context: CompileContextResult = { ...defaultCompileContext, ...initialContext } + const lines: string[] = [] + let currentLine = '' + + function indent(level: number = context.indentLevel) { + return ' '.repeat(level * 2) + } + + function flushCurrent() { + if (currentLine) { + if (context.inNode) { + lines.push(currentLine) + } else { + const nextLine = `${indent()}\`${currentLine}\`` + lines.push(nextLine) + } + } + currentLine = '' + } + + switch (node.type) { + case 'Text': + currentLine += escapeForTemplateJS(node.value) + break + case 'NumberField': + if (context.numberParam) { + currentLine += `$\{${context.numberParam}}` + } else { + currentLine += `{${context.numberParam}}` + } + break + case 'SimpleReplace': + currentLine += `$\{${node.variableName}}` + break + case 'Node': { + for (const partNode of node.parts) { + const compiled = compileToCode(partNode, { ...context, inNode: true }) + if (partNode.type === 'OptionReplace' || partNode.type === 'Node') { + flushCurrent() + lines.push(...compiled) + } else { + currentLine += compiled[0] + } + } + break + } + case 'OptionReplace': { + if (context.isOnlyText) { + currentLine += `{${node.variableName}, ${node.operatorName}, {options}}` + break + } + flushCurrent() + const resolver = node.operatorName === 'plural' ? + 'TranslationGen.resolvePlural': 'TranslationGen.resolveSelect' + lines.push(`${resolver}(${node.variableName}, {`) + + const entries = Object.entries(node.options) + + for (const [key, entryNode] of entries) { + const expr = compileToCode(entryNode, { + ...context, + numberParam: node.operatorName === 'plural' ? node.variableName : context.numberParam, + indentLevel: context.indentLevel + 1, + inNode: false, + }) + if (expr.length === 0) continue + lines.push(indent(context.indentLevel + 1) + `'${key}': ${expr[0].trimStart()}`, ...expr.slice(1)) + lines[lines.length - 1] += ',' + } + + lines.push(indent() + `})`) + return lines + } + } + flushCurrent() + return lines +} \ No newline at end of file diff --git a/src/compile-to-string.ts b/src/compile-to-string.ts new file mode 100644 index 0000000..7f433b3 --- /dev/null +++ b/src/compile-to-string.ts @@ -0,0 +1,83 @@ +import type { ICUASTNode } from '@/src/parse' + +export function getPluralKey(num: number) { + return num === 0 ? '=0' : + num === 1 ? '=1' : + num === 2 ? '=2' : + num > 2 && num < 5 ? 'few' : + num >= 5 ? 'many' : 'other' +} + +type CompilerContext = { + hashtagReplacer?: number, +} + +// eslint-disable-next-line @typescript-eslint/no-explicit-any +export type ICUCompilerValues = Record + +/** + * Compiles a ICUASTNode tree to a string + * @param node Tree to compile + * @param values The values that replace the variable in the tree + * @param context The context values of previous compile iterations + */ +export function compileToString(node: ICUASTNode, values: ICUCompilerValues, context: CompilerContext = {}): string { + switch (node.type) { + case 'Node': { + return node.parts.map(p => compileToString(p, values, context)).join('') + } + case 'Text': + return node.value + case 'SimpleReplace': { + const name = node.variableName + if (values && values[name] !== undefined) return String(values[name]) + console.warn(`ICU Compile: missing value for ${name}`) + return `{${name}}` + } + case 'OptionReplace': { + const name = node.variableName + const operation = node.operatorName + const val = values ? values[name] : undefined + switch (operation) { + case 'plural': { + const num = Number(val) + if (isNaN(num)) { + console.warn(`ICU Compile: plural expected numeric value for ${name}, got ${val}`) + return `{${name}}` + } + const pluralKey = getPluralKey(num) + + const chosen = node.options[pluralKey] ?? node.options['other'] + if (!chosen) { + console.warn(`ICU Compile: plural for ${name} could not find key ${pluralKey} and no other`) + return `{${name}}` + } + return compileToString(chosen, values, { ...context, hashtagReplacer: num }) + } + case 'select': { + if (val === undefined) { + console.warn(`ICU Compile: missing value for select ${name}`) + const other = node.options['other'] + return other ? compileToString(other, values, context) : `{${name}}` + } + const chosen = node.options[String(val)] ?? node.options['other'] + if (!chosen) { + console.warn(`ICU Compile: select ${name} chose undefined option "${val}" and no "other" provided`) + return `{${name}}` + } + return compileToString(chosen, values, context) + } + default: { + return `{${name}, ${operation}}` + } + } + } + case 'NumberField': { + if (context.hashtagReplacer !== undefined) { + return `${context.hashtagReplacer}` + } else { + return '{#}' + } + } + } +} \ No newline at end of file diff --git a/src/icu.ts b/src/icu.ts index 9d73a9b..1e27d78 100644 --- a/src/icu.ts +++ b/src/icu.ts @@ -1,423 +1,13 @@ -const escapeCharacter = "'" +import { lex } from './lex' +import { parse } from './parse' +import { compileToString } from './compile-to-string' +import type { ICUCompilerValues } from './compile-to-string' -///////////// -// Lexer -///////////// - -export type ICUToken = - | { type: 'LBRACE' } - | { type: 'RBRACE' } - | { type: 'COMMA' } - | { type: 'HASHTAG' } - | { type: 'ESCAPE' } - | { type: 'WHITESPACE', value: string } - | { type: 'TEXT', value: string } - -/** - * ICU uses single quotes to quote literal text. This means: - * '' -> ' - * '...anything...' -> literal anything (but two single quotes inside become one) - */ -function lex(input: string): ICUToken[] { - const tokens: ICUToken[] = [] - - function pushAppend(text: string, type: 'TEXT' | 'WHITESPACE') { - if (tokens.length > 0) { - const last = tokens[tokens.length - 1] - if (last.type === type) { - last.value += text - return - } - } - tokens.push({ type, value: text }) - } - - for (let index = 0; index < input.length; index++) { - const character = input[index] - switch (character) { - case '{': - tokens.push({ type: 'LBRACE' }) - break - case '}': - tokens.push({ type: 'RBRACE' }) - break - case '#': - tokens.push({ type: 'HASHTAG' }) - break - case ',': - tokens.push({ type: 'COMMA' }) - break - case escapeCharacter: - tokens.push({ type: 'ESCAPE' }) - break - case ' ': - pushAppend(character, 'WHITESPACE') - break - default: - pushAppend(character, 'TEXT') - break - } - } - - return tokens -} - -///////////// -// Parser -> AST -///////////// - -const replaceOperations = ['plural', 'select'] as const -type ReplaceOperation = typeof replaceOperations[number] - -export type ICUASTNode = - | { type: 'Node', parts: ICUASTNode[] } - | { type: 'Text', value: string } - | { type: 'NumberField' } - | { type: 'SimpleReplace', variableName: string } // {name} - | { type: 'OptionReplace', variableName: string, operatorName: ReplaceOperation, options: Record } // {var, select, key{msg} ...} - -type ParserState = { name: 'escape' } | - { name: 'normal' } | - { - name: 'replaceFunction', - expect: ReplaceExpectState, - variableName: string, - subtree: ICUASTNode[], - operatorName?: ReplaceOperation, - optionName: string, - options: Record, - } - -type ReplaceExpectState = - 'variableName' - | 'variableNameCommaOrSimpleReplaceClose' - | 'operatorName' - | 'operatorNameComma' - | 'optionNameOrReplaceClose' - | 'optionOpen' - | 'optionContentOrClose' - -type ParserContext = { - state: ParserState[], - last?: ICUToken, -} - -function parse(tokens: ICUToken[]): ICUASTNode { - const result: ICUASTNode[] = [] - - const context: ParserContext = { - state: [{ name: 'normal' }], - } - - function getState() { - const state = context.state[context.state.length - 1] - if (!state) { - throw new Error('ICU Parser: Reached invalid state') - } - return state - } - - function getStateName() { - return getState().name - } - - function pushText(text: string, target: ICUASTNode[] = result) { - if (target.length > 0) { - const last = target[target.length - 1] - if (last.type === 'Text') { - last.value += text - return - } - } - target.push({ type: 'Text', value: text }) - } - - function inNormal(token: ICUToken) { - switch (token.type) { - case 'RBRACE': - throw Error('ICU Parser: Read an unescaped "}" before reading a "{"') - case 'LBRACE': - context.state.push({ - name: 'replaceFunction', - expect: 'variableName', - variableName: '', - optionName: '', - options: {}, - subtree: [], - }) - break - case 'ESCAPE': - context.state.push({ name: 'escape' }) - break - case 'COMMA': - pushText(',') - break - case 'HASHTAG': - pushText('#') - break - case 'TEXT': - pushText(token.value) - break - case 'WHITESPACE': - pushText(token.value) - break - } - } - - function inEscape(token: ICUToken) { - const prevState = context.state[context.state.length - 1] - let pushFunction: (value: string) => void = pushText - if (prevState && prevState.name === 'replaceFunction' && prevState.expect === 'operatorName') { - pushFunction = (value: string) => pushText(value, prevState.subtree) - } - - switch (token.type) { - case 'ESCAPE': - if (context.last?.type === 'ESCAPE') { - pushFunction(escapeCharacter) - } - context.state.pop() - break - case 'COMMA': - pushFunction(',') - break - case 'HASHTAG': - pushFunction('#') - break - case 'LBRACE': - pushFunction('{') - break - case 'RBRACE': - pushFunction('}') - break - default: - pushFunction(token.value) - } - } - - // Closing and opening brackets are already removed - function inReplaceFunction(token: ICUToken) { - const state = getState() - if (state.name !== 'replaceFunction') { - throw Error(`ICU Parser: Invalid State of Parser. Contact Package developer.`) - } - switch (token.type) { - case 'ESCAPE': - if (state.expect !== 'optionContentOrClose') { - throw Error(`ICU Parser: Invalid Escape character "'". Escape characters are only valid outside of replacement functions or in the option content.`) - } - context.state.push({ name: 'escape' }) - break - case 'LBRACE': - if (state.expect === 'optionOpen') { - state.expect = 'optionContentOrClose' - } else if (state.expect === 'optionContentOrClose') { - context.state.push({ - name: 'replaceFunction', - expect: 'variableName', - variableName: '', - optionName: '', - options: {}, - subtree: [] - }) - } else { - throw Error(`ICU Parser: Invalid placement of "{" in replacement function.`) - } - break - case 'RBRACE': - if (state.expect === 'variableNameCommaOrSimpleReplaceClose') { - context.state.pop() - const prevState = getState() - const node: ICUASTNode = { - type: 'SimpleReplace', - variableName: state.variableName - } - if (prevState.name === 'replaceFunction') { - prevState.subtree.push(node) - } else { - result.push(node) - } - } else if (state.expect === 'optionContentOrClose') { - const subTree = state.subtree - state.options[state.optionName] = subTree.length === 1 ? subTree[0] : { type: 'Node', parts: subTree } - state.expect = 'optionNameOrReplaceClose' - state.subtree = [] - } else if (state.expect === 'optionNameOrReplaceClose') { - context.state.pop() - const prevState = getState() - if(!state.operatorName) { - throw Error(`ICU Parser: Internal Parser Error. Operator name undefined in state.`) - } - const node: ICUASTNode = { - type: 'OptionReplace', - variableName: state.variableName, - operatorName: state.operatorName, - options: state.options, - } - if (prevState.name === 'replaceFunction') { - prevState.subtree.push(node) - } else { - result.push(node) - } - } else { - throw Error(`ICU Parser: Invalid placement of "}" in replacement function.`) - } - break - case 'HASHTAG': { - if (state.expect === 'optionContentOrClose') { - if (state.operatorName === 'plural') { - state.subtree.push({ type: 'NumberField' }) - } else { - pushText('#', state.subtree) - } - } else { - throw Error(`ICU Parser: Invalid placement of "#". "#" are only valid outside of replacement functions or in the option content.`) - } - break - } - case 'COMMA': - if (state.expect === 'operatorNameComma') { - state.expect = 'optionNameOrReplaceClose' - } else if (state.expect === 'variableNameCommaOrSimpleReplaceClose') { - state.expect = 'operatorName' - } else if (state.expect === 'optionContentOrClose') { - pushText(',', state.subtree) - } else { - throw Error(`ICU Parser: Invalid placement of "," in replacement function.`) - } - break - case 'WHITESPACE': - if (state.expect === 'optionContentOrClose') { - pushText(token.value, state.subtree) - } - break - case 'TEXT': - if (state.expect === 'variableName') { - state.variableName = token.value - state.expect = 'variableNameCommaOrSimpleReplaceClose' - } else if (state.expect === 'operatorName') { - if (replaceOperations.some(value => value === token.value)) { - state.operatorName = token.value as ReplaceOperation - } else { - throw Error(`ICU Parser: ${token.value} is an invalid replacement function operator. Allowed are ${replaceOperations.map(value => `"${value}"`).join(', ')}`) - } - state.expect = 'operatorNameComma' - } else if (state.expect === 'optionNameOrReplaceClose') { - state.optionName = token.value - state.expect = 'optionOpen' - } else if (state.expect === 'optionContentOrClose') { - pushText(token.value, state.subtree) - } else { - throw Error('ICU Parser: Invalid position of a Text block in a replacement function.') - } - break - } - } - - for (let index = 0; index < tokens.length; index++) { - const token = tokens[index] - const state = getStateName() - - if (state === 'normal') { - inNormal(token) - } else if (state === 'replaceFunction') { - inReplaceFunction(token) - } else if (state === 'escape') { - inEscape(token) - } - context.last = token - } - - const state = getStateName() - - if (state === 'replaceFunction') { - throw Error(`ICU Parse: Encountered unclosed "{"`) - } else if (state === 'escape') { - throw Error(`ICU Parse: Encountered unclosed escape "'"`) - } - return result.length !== 1 ? { type: 'Node', parts: result } : result[0] -} - -///////////// -// Compiler -///////////// - -type CompilerContext = { - hashtagReplacer?: number, -} - -// eslint-disable-next-line @typescript-eslint/no-explicit-any -export type ICUCompilerValues = Record - -function compile(node: ICUASTNode, values: ICUCompilerValues, context: CompilerContext = {}): string { - switch (node.type) { - case 'Node': { - return node.parts.map(p => compile(p, values, context)).join('') - } - case 'Text': - return node.value - case 'SimpleReplace': { - const name = node.variableName - if (values && values[name] !== undefined) return String(values[name]) - console.warn(`ICU Compile: missing value for ${name}`) - return `{${name}}` - } - case 'OptionReplace': { - const name = node.variableName - const operation = node.operatorName - const val = values ? values[name] : undefined - switch (operation) { - case 'plural': { - const num = Number(val) - if (isNaN(num)) { - console.warn(`ICU Compile: plural expected numeric value for ${name}, got ${val}`) - return `{${name}}` - } - const pluralKey = - num === 0 ? '=0' : - num === 1 ? '=1' : - num === 2 ? '=2' : - num > 2 && num < 5 ? 'few' : - num >= 5 ? 'many' : 'other' - - const chosen = node.options[pluralKey] ?? node.options['other'] - if (!chosen) { - console.warn(`ICU Compile: plural for ${name} could not find key ${pluralKey} and no other`) - return `{${name}}` - } - return compile(chosen, values, { ...context, hashtagReplacer: num }) - } - case 'select': { - if (val === undefined) { - console.warn(`ICU Compile: missing value for select ${name}`) - const other = node.options['other'] - return other ? compile(other, values, context) : `{${name}}` - } - const chosen = node.options[String(val)] ?? node.options['other'] - if (!chosen) { - console.warn(`ICU Compile: select ${name} chose undefined option "${val}" and no "other" provided`) - return `{${name}}` - } - return compile(chosen, values, context) - } - default: { - return `{${name}, ${operation}}` - } - } - } - case 'NumberField': { - if (context.hashtagReplacer !== undefined) { - return `${context.hashtagReplacer}` - } else { - return '{#}' - } - } - } -} +export const escapeCharacter = "'" function interpret(message: string, values: ICUCompilerValues): string { try { - return compile(parse(lex(message)), values) + return compileToString(parse(lex(message)), values) } catch (e) { console.error(`Failed to interpret message: ${message}`, e) return message @@ -427,6 +17,7 @@ function interpret(message: string, values: ICUCompilerValues): string { export const ICUUtil = { lex, parse, - compile, - interpret + compile: compileToString, + interpret, + escapeCharacter } diff --git a/src/index.ts b/src/index.ts index 8f66b30..8129d22 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,4 +1,7 @@ -export * from './icu' +export { ICUUtil } from './icu' +export { ICUToken } from './lex' +export { ICUASTNode } from './parse' +export { ICUCompilerValues } from './compile-to-string' export * from './combineTranslation' export * from './types' export * from './translationGeneration' diff --git a/src/lex.ts b/src/lex.ts new file mode 100644 index 0000000..6b36244 --- /dev/null +++ b/src/lex.ts @@ -0,0 +1,61 @@ +import { escapeCharacter } from './icu' + +export type ICUToken = + | { type: 'LBRACE' } + | { type: 'RBRACE' } + | { type: 'COMMA' } + | { type: 'HASHTAG' } + | { type: 'ESCAPE' } + | { type: 'WHITESPACE', value: string } + | { type: 'TEXT', value: string } + +/** + * Takes in a sting and converts it to a list of ICUToken's + * ICU uses single quotes to quote literal text. This means: + * '' -> ' + * '...anything...' -> literal anything (but two single quotes inside become one) + * @param input The string to tokenize + */ +export function lex(input: string): ICUToken[] { + const tokens: ICUToken[] = [] + + function pushAppend(text: string, type: 'TEXT' | 'WHITESPACE') { + if (tokens.length > 0) { + const last = tokens[tokens.length - 1] + if (last.type === type) { + last.value += text + return + } + } + tokens.push({ type, value: text }) + } + + for (let index = 0; index < input.length; index++) { + const character = input[index] + switch (character) { + case '{': + tokens.push({ type: 'LBRACE' }) + break + case '}': + tokens.push({ type: 'RBRACE' }) + break + case '#': + tokens.push({ type: 'HASHTAG' }) + break + case ',': + tokens.push({ type: 'COMMA' }) + break + case escapeCharacter: + tokens.push({ type: 'ESCAPE' }) + break + case ' ': + pushAppend(character, 'WHITESPACE') + break + default: + pushAppend(character, 'TEXT') + break + } + } + + return tokens +} \ No newline at end of file diff --git a/src/parse.ts b/src/parse.ts new file mode 100644 index 0000000..cb202e2 --- /dev/null +++ b/src/parse.ts @@ -0,0 +1,277 @@ +import type { ICUToken } from './lex' +import { escapeCharacter } from './icu' +const replaceOperations = ['plural', 'select'] as const +type ReplaceOperation = typeof replaceOperations[number] + +export type ICUASTNode = + | { type: 'Node', parts: ICUASTNode[] } + | { type: 'Text', value: string } + | { type: 'NumberField' } + | { type: 'SimpleReplace', variableName: string } // {name} + | { type: 'OptionReplace', variableName: string, operatorName: ReplaceOperation, options: Record } // {var, select, key{msg} ...} + +type ParserState = { name: 'escape' } | + { name: 'normal' } | + { + name: 'replaceFunction', + expect: ReplaceExpectState, + variableName: string, + subtree: ICUASTNode[], + operatorName?: ReplaceOperation, + optionName: string, + options: Record, + } + +type ReplaceExpectState = + 'variableName' + | 'variableNameCommaOrSimpleReplaceClose' + | 'operatorName' + | 'operatorNameComma' + | 'optionNameOrReplaceClose' + | 'optionOpen' + | 'optionContentOrClose' + +type ParserContext = { + state: ParserState[], + last?: ICUToken, +} + +/** + * Converts a list of ICUToken's to a tree of ICUSAstNode's + * @param tokens + */ +export function parse(tokens: ICUToken[]): ICUASTNode { + const result: ICUASTNode[] = [] + + const context: ParserContext = { + state: [{ name: 'normal' }], + } + + function getState() { + const state = context.state[context.state.length - 1] + if (!state) { + throw new Error('ICU Parser: Reached invalid state') + } + return state + } + + function getStateName() { + return getState().name + } + + function pushText(text: string, target: ICUASTNode[] = result) { + if (target.length > 0) { + const last = target[target.length - 1] + if (last.type === 'Text') { + last.value += text + return + } + } + target.push({ type: 'Text', value: text }) + } + + function inNormal(token: ICUToken) { + switch (token.type) { + case 'RBRACE': + throw Error('ICU Parser: Read an unescaped "}" before reading a "{"') + case 'LBRACE': + context.state.push({ + name: 'replaceFunction', + expect: 'variableName', + variableName: '', + optionName: '', + options: {}, + subtree: [], + }) + break + case 'ESCAPE': + context.state.push({ name: 'escape' }) + break + case 'COMMA': + pushText(',') + break + case 'HASHTAG': + pushText('#') + break + case 'TEXT': + pushText(token.value) + break + case 'WHITESPACE': + pushText(token.value) + break + } + } + + function inEscape(token: ICUToken) { + const prevState = context.state[context.state.length - 1] + let pushFunction: (value: string) => void = pushText + if (prevState && prevState.name === 'replaceFunction' && prevState.expect === 'operatorName') { + pushFunction = (value: string) => pushText(value, prevState.subtree) + } + + switch (token.type) { + case 'ESCAPE': + if (context.last?.type === 'ESCAPE') { + pushFunction(escapeCharacter) + } + context.state.pop() + break + case 'COMMA': + pushFunction(',') + break + case 'HASHTAG': + pushFunction('#') + break + case 'LBRACE': + pushFunction('{') + break + case 'RBRACE': + pushFunction('}') + break + default: + pushFunction(token.value) + } + } + + // Closing and opening brackets are already removed + function inReplaceFunction(token: ICUToken) { + const state = getState() + if (state.name !== 'replaceFunction') { + throw Error(`ICU Parser: Invalid State of Parser. Contact Package developer.`) + } + switch (token.type) { + case 'ESCAPE': + if (state.expect !== 'optionContentOrClose') { + throw Error(`ICU Parser: Invalid Escape character "'". Escape characters are only valid outside of replacement functions or in the option content.`) + } + context.state.push({ name: 'escape' }) + break + case 'LBRACE': + if (state.expect === 'optionOpen') { + state.expect = 'optionContentOrClose' + } else if (state.expect === 'optionContentOrClose') { + context.state.push({ + name: 'replaceFunction', + expect: 'variableName', + variableName: '', + optionName: '', + options: {}, + subtree: [] + }) + } else { + throw Error(`ICU Parser: Invalid placement of "{" in replacement function.`) + } + break + case 'RBRACE': + if (state.expect === 'variableNameCommaOrSimpleReplaceClose') { + context.state.pop() + const prevState = getState() + const node: ICUASTNode = { + type: 'SimpleReplace', + variableName: state.variableName + } + if (prevState.name === 'replaceFunction') { + prevState.subtree.push(node) + } else { + result.push(node) + } + } else if (state.expect === 'optionContentOrClose') { + const subTree = state.subtree + state.options[state.optionName] = subTree.length === 1 ? subTree[0] : { type: 'Node', parts: subTree } + state.expect = 'optionNameOrReplaceClose' + state.subtree = [] + } else if (state.expect === 'optionNameOrReplaceClose') { + context.state.pop() + const prevState = getState() + if(!state.operatorName) { + throw Error(`ICU Parser: Internal Parser Error. Operator name undefined in state.`) + } + const node: ICUASTNode = { + type: 'OptionReplace', + variableName: state.variableName, + operatorName: state.operatorName, + options: state.options, + } + if (prevState.name === 'replaceFunction') { + prevState.subtree.push(node) + } else { + result.push(node) + } + } else { + throw Error(`ICU Parser: Invalid placement of "}" in replacement function.`) + } + break + case 'HASHTAG': { + if (state.expect === 'optionContentOrClose') { + if (state.operatorName === 'plural') { + state.subtree.push({ type: 'NumberField' }) + } else { + pushText('#', state.subtree) + } + } else { + throw Error(`ICU Parser: Invalid placement of "#". "#" are only valid outside of replacement functions or in the option content.`) + } + break + } + case 'COMMA': + if (state.expect === 'operatorNameComma') { + state.expect = 'optionNameOrReplaceClose' + } else if (state.expect === 'variableNameCommaOrSimpleReplaceClose') { + state.expect = 'operatorName' + } else if (state.expect === 'optionContentOrClose') { + pushText(',', state.subtree) + } else { + throw Error(`ICU Parser: Invalid placement of "," in replacement function.`) + } + break + case 'WHITESPACE': + if (state.expect === 'optionContentOrClose') { + pushText(token.value, state.subtree) + } + break + case 'TEXT': + if (state.expect === 'variableName') { + state.variableName = token.value + state.expect = 'variableNameCommaOrSimpleReplaceClose' + } else if (state.expect === 'operatorName') { + if (replaceOperations.some(value => value === token.value)) { + state.operatorName = token.value as ReplaceOperation + } else { + throw Error(`ICU Parser: ${token.value} is an invalid replacement function operator. Allowed are ${replaceOperations.map(value => `"${value}"`).join(', ')}`) + } + state.expect = 'operatorNameComma' + } else if (state.expect === 'optionNameOrReplaceClose') { + state.optionName = token.value + state.expect = 'optionOpen' + } else if (state.expect === 'optionContentOrClose') { + pushText(token.value, state.subtree) + } else { + throw Error('ICU Parser: Invalid position of a Text block in a replacement function.') + } + break + } + } + + for (let index = 0; index < tokens.length; index++) { + const token = tokens[index] + const state = getStateName() + + if (state === 'normal') { + inNormal(token) + } else if (state === 'replaceFunction') { + inReplaceFunction(token) + } else if (state === 'escape') { + inEscape(token) + } + context.last = token + } + + const state = getStateName() + + if (state === 'replaceFunction') { + throw Error(`ICU Parse: Encountered unclosed "{"`) + } else if (state === 'escape') { + throw Error(`ICU Parse: Encountered unclosed escape "'"`) + } + return result.length !== 1 ? { type: 'Node', parts: result } : result[0] +} \ No newline at end of file diff --git a/src/scripts/compile-arb.ts b/src/scripts/compile-arb.ts index 55b744d..a056536 100644 --- a/src/scripts/compile-arb.ts +++ b/src/scripts/compile-arb.ts @@ -2,8 +2,8 @@ import fs from 'fs' import path from 'path' import readline from 'readline' -import type { ICUASTNode } from '@/src' -import { ICUUtil } from '@/src' +import { ICUUtil } from '../' +import { compileToCode } from '@/src/compile-to-code' /* ------------------ types ------------------ */ @@ -23,7 +23,7 @@ interface ARBFile { [key: string]: string | ARBMeta, } -interface FuncParam { +export interface FuncParam { name: string, typing: string, } @@ -199,19 +199,49 @@ function readARBDir( let entryObj: TranslationEntry + const nameOverwrites: { type: string, names: string[] }[] = [ + { type: 'number', names: ['count', 'amount', 'length', 'number'] }, + { type: 'Date', names: ['date', 'dateTime'] }, + { type: 'string', names: ['name'] }, + ] + const typeOverwrites: { type: string, types: string[] }[] = [ + { type: 'number', types: ['int', 'float', 'double', 'Int', 'Float', 'Double'] }, + { type: 'Date', types: ['date', 'DateTime', 'dateTime'] }, + { type: 'string', types: ['String'] }, + ] + try { if (meta?.placeholders) { // ICU function const params: FuncParam[] = Object.entries(meta.placeholders).map( ([name, def]) => { let typing = def.type - if (!typing) { - if (['count', 'amount', 'length', 'number'].includes(name)) { - typing = 'number' - } else if (['date', 'dateTime'].includes(name)) { - typing = 'Date' - } else { + if (typing) { + const legalCharacters = /^[a-zA-Z]+$/.test(typing) + if(!legalCharacters) { typing = 'string' + console.error(`Type "${typing}" contained illegal characters for variable [${name}] of [${key}] in file ${fullPath}`) + } else { + // type matching for .arb of other programming languages + for (const typeOverwrite of typeOverwrites) { + if (typeOverwrite.types.includes(name)) { + typing = typeOverwrite.type + break + } + } + } + } else { + typing = 'string' + for (const nameOverwrite of nameOverwrites) { + if (nameOverwrite.names.includes(name)) { + console.warn([ + `Variable [${name}] of [${key}] in file ${fullPath}`, + `Was considered a ${nameOverwrite.type} due to the name being in ${nameOverwrite.names}.`, + `To prevent this warning set the type explicitly or consider renaming it` + ].join('\n')) + typing = nameOverwrite.type + break + } } } return { name, typing } @@ -231,7 +261,7 @@ function readARBDir( } result[locale][flatKey] = entryObj - }catch (e) { + } catch (e) { console.error(`Failed to load [${key}] in file ${fullPath}`, e) } } @@ -242,113 +272,7 @@ function readARBDir( /* ------------------ code generator: values ------------------ */ -function escapeForTemplateJS(s: string): string { - return s - .replace(/\\/g, `\\\\`) - .replace(/`/g, `\\\``) - .replace(/\$/g, `\\$`) -} - -type CompileContext = { - numberParam?: string, - inNode: boolean, - indentLevel: number, - isOnlyText: boolean, -} - -const defaultCompileContext: CompileContext = { - indentLevel: 0, - inNode: false, - isOnlyText: false, -} - -function compile( - node: ICUASTNode, - context: CompileContext = defaultCompileContext -): string[] { - const lines: string[] = [] - let currentLine = '' - const isTopLevel = context.indentLevel === 0 - - function indent(level: number = context.indentLevel) { - return ' '.repeat(level * 2) - } - - function flushCurrent() { - if (currentLine) { - if (context.inNode) { - lines.push(currentLine) - } else { - const prefix = - context.isOnlyText ? '' : - !isTopLevel ? indent() - : '_out += ' - const nextLine = `${prefix}\`${currentLine}\`` - lines.push(nextLine) - } - } - currentLine = '' - } - - switch (node.type) { - case 'Text': - currentLine += escapeForTemplateJS(node.value) - break - case 'NumberField': - if (context.numberParam) { - currentLine += `$\{${context.numberParam}}` - } else { - currentLine += `{${context.numberParam}}` - } - break - case 'SimpleReplace': - currentLine += `$\{${node.variableName}}` - break - case 'Node': { - for (const partNode of node.parts) { - const compiled = compile(partNode, { ...context, inNode: true }) - if (partNode.type === 'OptionReplace' || partNode.type === 'Node') { - flushCurrent() - lines.push(...compiled) - } else { - currentLine += compiled[0] - } - } - break - } - case 'OptionReplace': { - if (context.isOnlyText) { - currentLine += `{${node.variableName}, ${node.operatorName}, {options}}` - break - } - flushCurrent() - lines.push(`${isTopLevel ? '_out += ' : ''}TranslationGen.resolveSelect(${node.variableName}, {`) - - const entries = Object.entries(node.options) - - for (const [key, entryNode] of entries) { - const numberParamUpdate = node.operatorName === 'plural' ? key : undefined - const expr = compile(entryNode, { - ...context, - numberParam: numberParamUpdate ?? context.numberParam, - indentLevel: context.indentLevel + 1, - inNode: false, - }) - if (expr.length === 0) continue - lines.push(indent(context.indentLevel + 1) + `'${key}': ${expr[0].trimStart()}`, ...expr.slice(1)) - lines[lines.length - 1] += ',' - } - - lines.push(indent() + `})`) - return lines - } - } - flushCurrent() - return lines -} - - -function generateCode( +export function generateCode( obj: Record, indentLevel = 1 ): string { @@ -366,7 +290,16 @@ function generateCode( try { if (entry.type === 'func') { const ast = ICUUtil.parse(ICUUtil.lex(entry.value)) - let compiled = compile(ast) + let compiled = compileToCode(ast).map((value, index, array) => { + if (value.startsWith(' ')) { + return value + } else { + if (index - 1 < 0 || !array[index - 1].startsWith(' ')) { + return `_out += ${value}` + } + } + return value + }) if (compiled.filter(value => value.startsWith('_out +=')).length === 1) { const first = compiled.findIndex(value => value.startsWith('_out +=')) compiled[first] = 'return ' + compiled[first].slice(8) @@ -385,8 +318,8 @@ function generateCode( str += `${indent}${quotedKey}: ${functionLines.join(`\n${indent}`)}${comma}\n` } else if (entry.type === 'text') { const ast = ICUUtil.parse(ICUUtil.lex(entry.value)) - const compiled = compile(ast, { ...defaultCompileContext, isOnlyText: true }) - const text = compiled.length === 1 ? compiled[0] : `\`${escapeForTemplateJS(entry.value)}\`` + const compiled = compileToCode(ast, { isOnlyText: true }) + const text = compiled.length === 1 ? compiled[0] : `\`${(entry.value)}\`` str += `${indent}${quotedKey}: ${text}${comma}\n` } else { // nested object diff --git a/src/translationGeneration.ts b/src/translationGeneration.ts index 9e2d1e6..7ecc706 100644 --- a/src/translationGeneration.ts +++ b/src/translationGeneration.ts @@ -1,3 +1,5 @@ +import { getPluralKey } from './compile-to-string' + function resolveSelect( value: string | number | undefined | null, options: Record string)> @@ -13,8 +15,8 @@ function resolvePlural( value: number, options: Record string)> ): string { - const v = String(value) - const handler = options[v] ?? options['other'] + const key = getPluralKey(value) + const handler = options[key] ?? options['other'] if (handler == null) return '' return typeof handler === 'function' ? handler() : handler } diff --git a/tests/compiler-code.test.ts b/tests/compiler-code.test.ts new file mode 100644 index 0000000..d447fca --- /dev/null +++ b/tests/compiler-code.test.ts @@ -0,0 +1,55 @@ +import { ICUUtil } from '../src' +import { compileToCode } from '../src/compile-to-code' + +type ExampleValues = { + name: string, + input: string, + result: string[], +} + +const examples: ExampleValues[] = [ + { + name: 'Plural with number insertion', + input: 'You have {count, plural, =1{# Cat} other{# Cats}}', + result: [ + '`You have `', + 'TranslationGen.resolvePlural(count, {', + " '=1': `${count} Cat`,", + " 'other': `${count} Cats`,", + '})', + ], + }, + { + name: 'Select with nested replacement', + input: '{gender, select, male{Hello Mr.} female{Hello Ms.} other{Hello}} {name}', + result: [ + 'TranslationGen.resolveSelect(gender, {', + " 'male': `Hello Mr.`,", + " 'female': `Hello Ms.`,", + " 'other': `Hello`,", + '})', + '` ${name}`' + ], + }, + { + name: 'ICU Escape sequence', + input: "'''''' '{}'", + result: ["`''' {}`"], + }, + { + name: 'Template JS escape sequence', + input: `\` '$\{}'`, + // eslint-disable-next-line no-useless-escape + result: ['`\\\` \\${}`'], + } +] + + +describe('ICU Code-Compiler', () => { + for (const example of examples) { + test(`${example.name}`, () => { + const result = compileToCode(ICUUtil.parse(ICUUtil.lex(example.input))) + expect(result).toEqual(example.result) + }) + } +}) diff --git a/tests/compiler.test.ts b/tests/compiler-string.test.ts similarity index 99% rename from tests/compiler.test.ts rename to tests/compiler-string.test.ts index 8dd06c9..4984332 100644 --- a/tests/compiler.test.ts +++ b/tests/compiler-string.test.ts @@ -208,7 +208,7 @@ const examples: ExampleValues[] = [ ] -describe('ICU Compiler', () => { +describe('ICU String-Compiler', () => { for (const example of examples) { test(`${example.name}`, () => { const result = ICUUtil.compile(example.input, example.values) diff --git a/tests/typing.test.ts b/tests/typing.test.ts index c8a1f8f..868bd87 100644 --- a/tests/typing.test.ts +++ b/tests/typing.test.ts @@ -44,4 +44,5 @@ const translationCandidate: T = { test('Typing and type shape', () => { expect(isValidTranslation(translationCandidate)).toBe(true) + expect(translationCandidate['en'].function1({ name: 'Dune', author: 'Frank Herbert' })).toBe('Dune by Frank Herbert') }) \ No newline at end of file