diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 20873fd..5b69020 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -1,11 +1,8 @@ name: Run Tests on: - # Trigger on push to default branch push: branches: - main - # Trigger the workflow on pull request events - # but only for the master branch pull_request: branches: - main @@ -15,20 +12,20 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: - python-version: '3.12' + python-version: '3.11' - name: Set up uv - uses: astral-sh/setup-uv@v5 + uses: astral-sh/setup-uv@v6 with: enable-cache: true cache-dependency-glob: | - **/requirements.txt - version: "0.6.4" + requirements.* + version: "0.8.22" - name: Install Dependencies - run: uv pip install -r requirements.txt + run: uv pip install -r requirements.txt -r requirements-dev.txt env: UV_SYSTEM_PYTHON: 1 - name: ruff lint @@ -43,22 +40,23 @@ jobs: python-versions: - '3.11' - '3.12' + - '3.12' steps: - name: Check out repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: - python-version: ${{ matrix.python-versions }} + python-version: '3.11' - name: Set up uv - uses: astral-sh/setup-uv@v5 + uses: astral-sh/setup-uv@v6 with: enable-cache: true cache-dependency-glob: | - **/requirements.txt - version: "0.6.4" + requirements.* + version: "0.8.22" - name: Install Dependencies - run: uv pip install -r requirements.txt + run: uv pip install -r requirements.txt -r requirements-dev.txt env: UV_SYSTEM_PYTHON: 1 - name: Run tests diff --git a/pyrightconfig.json b/pyrightconfig.json new file mode 100644 index 0000000..a7286da --- /dev/null +++ b/pyrightconfig.json @@ -0,0 +1,4 @@ +{ + "include": ["tmx_products", "tests"], + "extraPaths": ["tmx_products"] +} diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..6283cb9 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1 @@ +ruff~=0.13.1 diff --git a/requirements.txt b/requirements.txt index 3838258..aa8695e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1 @@ -compare-locales~=9.0.0 -moz-l10n~=0.6.1 -ruff~=0.9.9 +moz-l10n[xml]~=0.9.1 diff --git a/tests/__init__.py b/tests/__init__.py index e69de29..0eb2103 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -0,0 +1,6 @@ +import sys +from pathlib import Path + +# Add .../tmx_folder to sys.path so tests can do "import functions" +root = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(root / "tmx_products")) diff --git a/tests/test_string_projectconfig.py b/tests/test_string_projectconfig.py index 18ada70..29bb407 100644 --- a/tests/test_string_projectconfig.py +++ b/tests/test_string_projectconfig.py @@ -3,7 +3,7 @@ import os import unittest -import tmx_products.tmx_projectconfig +import tmx_projectconfig as tmx_proj class TestStringExtraction(unittest.TestCase): @@ -13,7 +13,7 @@ def setUp(self): def testGetAndroidStrings(self): toml_path = os.path.join(self.testfiles_path, "android", "l10n.toml") - extraction = tmx_products.tmx_projectconfig.StringExtraction( + extraction = tmx_proj.StringExtraction( toml_path, self.storage_path, "en-US", "test", True ) extraction.extractStrings() @@ -40,7 +40,7 @@ def testGetAndroidStrings(self): def testGetProductStrings(self): toml_path = os.path.join(self.testfiles_path, "toml", "l10n.toml") - extraction = tmx_products.tmx_projectconfig.StringExtraction( + extraction = tmx_proj.StringExtraction( toml_path, self.storage_path, "en", "test", False ) extraction.extractStrings() diff --git a/tests/test_string_extraction.py b/tests/test_string_repository.py similarity index 87% rename from tests/test_string_extraction.py rename to tests/test_string_repository.py index eb0d946..95c44dd 100644 --- a/tests/test_string_extraction.py +++ b/tests/test_string_repository.py @@ -4,7 +4,7 @@ import os import unittest -import tmx_products.tmx_products +import tmx_repository as tmx_prod class TestStringExtraction(unittest.TestCase): @@ -14,7 +14,7 @@ def setUp(self): def testGetProductStringsChinese(self): repo_path = os.path.join(self.testfiles_path, "product", "zh-CN") - extraction = tmx_products.tmx_products.StringExtraction( + extraction = tmx_prod.StringExtraction( self.storage_path, "zh-CN", "en-US", "test" ) extraction.setRepositoryPath(repo_path) @@ -98,9 +98,7 @@ def testGetProductStringsChinese(self): def testGetProductStringsItalian(self): repo_path = os.path.join(self.testfiles_path, "product", "it") - extraction = tmx_products.tmx_products.StringExtraction( - self.storage_path, "it", "en-US", "test" - ) + extraction = tmx_prod.StringExtraction(self.storage_path, "it", "en-US", "test") extraction.setRepositoryPath(repo_path) extraction.extractStrings() @@ -125,19 +123,17 @@ def testGetProductStringsItalian(self): ) self.assertEqual( - strings_locale["browser/chrome/updater/updater.ini:TitleText"], + strings_locale["browser/chrome/updater/updater.ini:Strings.TitleText"], "Aggiornamento %MOZ_APP_DISPLAYNAME%", ) self.assertEqual( - strings_locale["browser/chrome/updater/updater.ini:InfoText"], + strings_locale["browser/chrome/updater/updater.ini:Strings.InfoText"], "%MOZ_APP_DISPLAYNAME% sta installando gli aggiornamenti e si avvierà fra qualche istante…", ) def testGetProductStringsBulgarian(self): repo_path = os.path.join(self.testfiles_path, "product", "bg") - extraction = tmx_products.tmx_products.StringExtraction( - self.storage_path, "bg", "en-US", "test" - ) + extraction = tmx_prod.StringExtraction(self.storage_path, "bg", "en-US", "test") extraction.setRepositoryPath(repo_path) extraction.extractStrings() @@ -149,9 +145,7 @@ def testGetProductStringsBulgarian(self): ) def testEscape(self): - extraction = tmx_products.tmx_products.StringExtraction( - self.storage_path, "", "", "" - ) + extraction = tmx_prod.StringExtraction(self.storage_path, "", "", "") extraction.translations = { "This is a simple test.": "This is a simple test.", "您的電腦中已儲存下列的 Cookie:": "您的電腦中已儲存下列的 Cookie:", @@ -167,9 +161,7 @@ def testEscape(self): self.assertEqual(extraction.escape(string), result) def testRelativePath(self): - extraction = tmx_products.tmx_products.StringExtraction( - self.storage_path, "", "", "" - ) + extraction = tmx_prod.StringExtraction(self.storage_path, "", "", "") extraction.setRepositoryPath("/home/test") paths = { @@ -203,7 +195,7 @@ def testRelativePath(self): def testOutput(self): repo_path = os.path.join(self.testfiles_path, "tmx", "en-US") - extraction = tmx_products.tmx_products.StringExtraction( + extraction = tmx_prod.StringExtraction( self.storage_path, "en-US", "en-US", "test" ) extraction.setRepositoryPath(repo_path) @@ -211,9 +203,7 @@ def testOutput(self): extraction.storeTranslations("") repo_path = os.path.join(self.testfiles_path, "tmx", "it") - extraction = tmx_products.tmx_products.StringExtraction( - self.storage_path, "it", "en-US", "test" - ) + extraction = tmx_prod.StringExtraction(self.storage_path, "it", "en-US", "test") extraction.setRepositoryPath(repo_path) extraction.extractStrings() extraction.storeTranslations("") @@ -252,7 +242,7 @@ def testOutput(self): def testOutputAppend(self): repo_path = os.path.join(self.testfiles_path, "tmx", "en-US") - extraction = tmx_products.tmx_products.StringExtraction( + extraction = tmx_prod.StringExtraction( self.storage_path, "en-US", "en-US", "appendtest" ) extraction.setRepositoryPath(repo_path) @@ -261,7 +251,7 @@ def testOutputAppend(self): # Do a new extraction, but append to existing translations repo_path = os.path.join(self.testfiles_path, "tmx", "en-US", "mail") - extraction = tmx_products.tmx_products.StringExtraction( + extraction = tmx_prod.StringExtraction( self.storage_path, "en-US", "en-US", "appendtest" ) extraction.setRepositoryPath(repo_path) @@ -302,20 +292,6 @@ def testOutputAppend(self): self.assertTrue(cmp_result_php) self.assertTrue(cmp_result_json) - def testBrokenEnconding(self): - repo_path = os.path.join(self.testfiles_path, "tmx", "oc") - extraction = tmx_products.tmx_products.StringExtraction( - self.storage_path, "oc", "en-US", "test" - ) - extraction.setRepositoryPath(repo_path) - extraction.extractStrings() - - self.assertEqual( - extraction.translations["test/test.dtd:test1"], "Test with one \ slash" - ) - self.assertFalse("test/test.dtd:test_missing" in extraction.translations) - self.assertFalse("test/test.dtd:test_empty" in extraction.translations) - if __name__ == "__main__": unittest.main() diff --git a/tests/testfiles/product/zh-CN/browser/chrome/browser/main.ftl b/tests/testfiles/product/zh-CN/browser/chrome/browser/main.ftl index c2eb8b7..c2c63f2 100644 --- a/tests/testfiles/product/zh-CN/browser/chrome/browser/main.ftl +++ b/tests/testfiles/product/zh-CN/browser/chrome/browser/main.ftl @@ -18,7 +18,8 @@ timeDiffHoursAgo = { $number -> # Basic string sample = Just a test -some junk text, should be ignored +# TODO: restore this? +# some junk text, should be ignored # Parameterized term onboarding-fxa-text = Зарегистрируйте { -fxaccount-brand-name(case: "nominative") } test. diff --git a/tests/testfiles/tmx/en-US/test/test.dtd b/tests/testfiles/tmx/en-US/test/test.dtd index f044372..dd92818 100644 --- a/tests/testfiles/tmx/en-US/test/test.dtd +++ b/tests/testfiles/tmx/en-US/test/test.dtd @@ -1,3 +1,7 @@ + + diff --git a/tests/testfiles/tmx/it/test/test.dtd b/tests/testfiles/tmx/it/test/test.dtd index 509c840..9356f32 100644 --- a/tests/testfiles/tmx/it/test/test.dtd +++ b/tests/testfiles/tmx/it/test/test.dtd @@ -1,3 +1,7 @@ + + diff --git a/tests/testfiles/tmx/oc/mail/mailTurboMenu.properties b/tests/testfiles/tmx/oc/mail/mailTurboMenu.properties deleted file mode 100644 index 0bf50a0..0000000 --- a/tests/testfiles/tmx/oc/mail/mailTurboMenu.properties +++ /dev/null @@ -1,2 +0,0 @@ -MailNews=&Corrir e gropes de discussion -Addressbook=Qu&asernet d'adreas diff --git a/tests/testfiles/tmx/oc/test/test.dtd b/tests/testfiles/tmx/oc/test/test.dtd deleted file mode 100644 index 51859af..0000000 --- a/tests/testfiles/tmx/oc/test/test.dtd +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/tmx_products/functions.py b/tmx_products/functions.py new file mode 100644 index 0000000..4721dd5 --- /dev/null +++ b/tmx_products/functions.py @@ -0,0 +1,93 @@ +from configparser import ConfigParser +import argparse +import os + + +def get_config() -> str: + # Get absolute path of ../../config from the current script location (not the + # current folder) + config_folder = os.path.abspath( + os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, "config") + ) + # Read Transvision's configuration file from ../../config/config.ini + # If not available use a default storage folder to store data + config_file = os.path.join(config_folder, "config.ini") + if not os.path.isfile(config_file): + print( + "Configuration file /app/config/config.ini is missing. " + "Default settings will be used." + ) + root_folder = os.path.abspath( + os.path.join(os.path.dirname(__file__), os.pardir) + ) + storage_path = os.path.join(root_folder, "TMX") + os.makedirs(storage_path, exist_ok=True) + else: + config_parser = ConfigParser() + config_parser.read(config_file) + storage_path = os.path.join(config_parser.get("config", "root"), "TMX") + + return storage_path + + +def get_cli_parameters(config: bool = False) -> argparse.Namespace: + # Read command line input parameters + parser = argparse.ArgumentParser() + + if config: + parser.add_argument("toml_path", help="Path to root l10n.toml file") + parser.add_argument( + "--android", + dest="android_project", + action="store_true", + help="If passed, the script will parse the config file using Android locale codes", + default=False, + ) + else: + parser.add_argument( + "--path", + dest="repo_path", + help="Path to locale files", + required=True, + ) + parser.add_argument( + "--locale", + dest="locale_code", + help="Locale code", + required=True, + ) + + # Common parameters + parser.add_argument( + "--ref", + dest="reference_code", + help="Reference locale code", + required=True, + ) + parser.add_argument( + "--repo", dest="repository_name", help="Repository name", required=True + ) + parser.add_argument( + "--append", + dest="append_mode", + action="store_true", + help="If set to 'append', translations will be added to an existing cache file", + ) + parser.add_argument( + "--prefix", + dest="storage_prefix", + nargs="?", + help="This prefix will be prependended to the identified " + "path in string IDs (e.g. extensions/irc for Chatzilla)", + default="", + ) + parser.add_argument( + "--output", + nargs="?", + type=str, + choices=["json", "php"], + help="Store only one type of output.", + default="", + ) + + return parser.parse_args() diff --git a/tmx_products/tmx_projectconfig.py b/tmx_products/tmx_projectconfig.py index 4464cce..a72f351 100755 --- a/tmx_products/tmx_projectconfig.py +++ b/tmx_products/tmx_projectconfig.py @@ -1,37 +1,15 @@ #!/usr/bin/env python -from compare_locales import parser -from configparser import ConfigParser +from functions import get_cli_parameters, get_config +from moz.l10n.formats import Format +from moz.l10n.message import serialize_message +from moz.l10n.model import Entry from moz.l10n.paths import L10nConfigPaths, get_android_locale -import argparse +from moz.l10n.resource import parse_resource import codecs import json -import logging import os -logging.basicConfig() -# Get absolute path of ../../config from the current script location (not the -# current folder) -config_folder = os.path.abspath( - os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, "config") -) - -# Read Transvision's configuration file from ../../config/config.ini -# If not available use a default storage folder to store data -config_file = os.path.join(config_folder, "config.ini") -if not os.path.isfile(config_file): - print( - "Configuration file /app/config/config.ini is missing. " - "Default settings will be used." - ) - root_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir)) - storage_path = os.path.join(root_folder, "TMX") - os.makedirs(storage_path, exist_ok=True) -else: - config_parser = ConfigParser() - config_parser.read(config_file) - storage_path = os.path.join(config_parser.get("config", "root"), "TMX") - class StringExtraction: def __init__( @@ -80,10 +58,24 @@ def readExistingJSON(locale): return translations + def getEntryValue(resource, value): + entry_value = serialize_message(resource.format, value) + if resource.format == Format.android: + # In Android resources, unescape quotes + entry_value = entry_value.replace('\\"', '"').replace("\\'", "'") + + return entry_value + def readFiles(locale): """Read files for locale""" - if locale != self.reference_locale: + is_ref_locale = locale == self.reference_locale + if is_ref_locale: + locale_files = [ + (os.path.abspath(ref_path), os.path.abspath(ref_path)) + for ref_path in project_config_paths.ref_paths + ] + else: locale_files = [ ( os.path.abspath(ref_path), @@ -100,11 +92,6 @@ def readFiles(locale): ) ) ] - else: - locale_files = [ - (os.path.abspath(ref_path), os.path.abspath(ref_path)) - for ref_path in project_config_paths.ref_paths - ] for reference_file, l10n_file in locale_files: if not os.path.exists(l10n_file): @@ -119,32 +106,45 @@ def readFiles(locale): # Prepend storage_prefix if defined if self.storage_prefix != "": key_path = f"{self.storage_prefix}/{key_path}" - try: - p = parser.getParser(reference_file) - except UserWarning: - continue - p.readFile(l10n_file) - if isinstance(p, parser.android.AndroidParser): - # As of https://github.com/mozilla/pontoon/pull/3611, Pontoon - # uses moz.l10n for resource parsing, resulting in quotes being - # escaped. compare-locales doesn't escape them, so need to - # manually remove escapes. - self.translations[locale].update( - ( - f"{self.repository_name}/{key_path}:{entity.key}", - entity.raw_val.replace("\\'", "'").replace('\\"', '"'), + try: + if is_ref_locale: + resource = parse_resource( + reference_file, android_literal_quotes=True ) - for entity in p.parse() - ) - else: - self.translations[locale].update( - ( - f"{self.repository_name}/{key_path}:{entity.key}", - entity.raw_val, + else: + resource = parse_resource( + l10n_file, android_literal_quotes=True ) - for entity in p.parse() - ) + for section in resource.sections: + for entry in section.entries: + if isinstance(entry, Entry): + entry_id = ".".join(section.id + entry.id) + string_id = ( + f"{self.repository_name}/{key_path}:{entry_id}" + ) + if entry.properties: + # Store the value of an entry with attributes only + # if the value is not empty. + if not entry.value.is_empty(): + self.translations[locale][string_id] = ( + getEntryValue(resource, entry.value) + ) + for ( + attribute, + attr_value, + ) in entry.properties.items(): + attr_id = f"{string_id}.{attribute}" + self.translations[locale][attr_id] = ( + getEntryValue(resource, attr_value) + ) + else: + self.translations[locale][string_id] = ( + getEntryValue(resource, entry.value) + ) + except Exception as e: + print(f"Error parsing file: {reference_file}") + print(e) basedir = os.path.dirname(self.toml_path) if self.android_project: @@ -234,49 +234,8 @@ def escape(self, translation): def main(): - # Read command line input parameters - parser = argparse.ArgumentParser() - parser.add_argument("toml_path", help="Path to root l10n.toml file") - parser.add_argument( - "--ref", - dest="reference_code", - help="Reference language code", - required=True, - ) - parser.add_argument( - "--repo", dest="repository_name", help="Repository name", required=True - ) - parser.add_argument( - "--append", - dest="append_mode", - action="store_true", - help="If set to 'append', translations will be added to an existing cache file", - ) - parser.add_argument( - "--android", - dest="android_project", - action="store_true", - help="If passed, the script will parse the config file using Android locale codes", - default=False, - ) - parser.add_argument( - "--prefix", - dest="storage_prefix", - nargs="?", - help="This prefix will be prependended to the identified " - "path in string IDs (e.g. extensions/irc for Chatzilla)", - default="", - ) - parser.add_argument( - "--output", - nargs="?", - type=str, - choices=["json", "php"], - help="Store only one type of output.", - default="", - ) - args = parser.parse_args() - + args = get_cli_parameters(config=True) + storage_path = get_config() extracted_strings = StringExtraction( args.toml_path, storage_path, diff --git a/tmx_products/tmx_products.py b/tmx_products/tmx_repository.py similarity index 59% rename from tmx_products/tmx_products.py rename to tmx_products/tmx_repository.py index 3f62e91..79a1e99 100755 --- a/tmx_products/tmx_products.py +++ b/tmx_products/tmx_repository.py @@ -1,43 +1,12 @@ #!/usr/bin/env python -from compare_locales import parser -from configparser import ConfigParser -import argparse +from functions import get_cli_parameters, get_config +from moz.l10n.resource import parse_resource +from moz.l10n.message import serialize_message +from moz.l10n.model import Entry import codecs import json -import logging import os -import sys - -logging.basicConfig() -# Get absolute path of ../../config from the current script location (not the -# current folder) -config_folder = os.path.abspath( - os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, "config") -) - -# Read Transvision's configuration file from ../../config/config.ini -# If not available use a default storage folder to store data -config_file = os.path.join(config_folder, "config.ini") -if not os.path.isfile(config_file): - print( - "Configuration file /app/config/config.ini is missing. " - "Default settings will be used." - ) - root_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir)) - storage_path = os.path.join(root_folder, "TMX") - os.makedirs(storage_path, exist_ok=True) -else: - config_parser = ConfigParser() - config_parser.read(config_file) - storage_path = os.path.join(config_parser.get("config", "root"), "TMX") - -try: - from compare_locales import parser -except ImportError as e: - print("FATAL: make sure that dependencies are installed") - print(e) - sys.exit(1) class StringExtraction: @@ -125,35 +94,29 @@ def extractStrings(self): self.extractFileList() for file_name in self.file_list: - file_extension = os.path.splitext(file_name)[1] - - file_parser = parser.getParser(file_extension) - file_parser.readFile(file_name) + resource = parse_resource(file_name) try: - entities = file_parser.parse() - for entity in entities: - # Ignore Junk - if isinstance(entity, parser.Junk): - continue - string_id = f"{self.getRelativePath(file_name)}:{entity}" - if file_extension == ".ftl": - if entity.raw_val is not None: - self.translations[string_id] = entity.raw_val - # Store attributes - for attribute in entity.attributes: - attr_string_id = f"{self.getRelativePath(file_name)}:{entity}.{attribute}" - self.translations[attr_string_id] = attribute.raw_val - else: - if isinstance(file_parser, parser.android.AndroidParser): - # As of https://github.com/mozilla/pontoon/pull/3611, Pontoon - # uses moz.l10n for resource parsing, resulting in quotes being - # escaped. compare-locales doesn't escape them, so need to - # manually remove escapes. - self.translations[string_id] = entity.raw_val.replace( - "\\'", "'" - ).replace('\\"', '"') - else: - self.translations[string_id] = entity.raw_val + for section in resource.sections: + for entry in section.entries: + if isinstance(entry, Entry): + entry_id = ".".join(section.id + entry.id) + string_id = f"{self.getRelativePath(file_name)}:{entry_id}" + if entry.properties: + # Store the value of an entry with attributes only + # if the value is not empty. + if not entry.value.is_empty(): + self.translations[string_id] = serialize_message( + resource.format, entry.value + ) + for attribute, attr_value in entry.properties.items(): + attr_id = f"{string_id}.{attribute}" + self.translations[attr_id] = serialize_message( + resource.format, attr_value + ) + else: + self.translations[string_id] = serialize_message( + resource.format, entry.value + ) except Exception as e: print(f"Error parsing file: {file_name}") print(e) @@ -225,53 +188,8 @@ def escape(self, translation): def main(): - # Read command line input parameters - parser = argparse.ArgumentParser() - parser.add_argument( - "--path", - dest="repo_path", - help="Path to locale files", - required=True, - ) - parser.add_argument( - "--locale", - dest="locale_code", - help="Locale code", - required=True, - ) - parser.add_argument( - "--ref", - dest="reference_code", - help="Reference locale code", - required=True, - ) - parser.add_argument( - "--repo", dest="repository_name", help="Repository name", required=True - ) - parser.add_argument( - "--append", - dest="append_mode", - action="store_true", - help="If set to 'append', translations will be added to an existing cache file", - ) - parser.add_argument( - "--prefix", - dest="storage_prefix", - nargs="?", - help="This prefix will be prependended to the identified " - "path in string IDs (e.g. extensions/irc for Chatzilla)", - default="", - ) - parser.add_argument( - "--output", - nargs="?", - type=str, - choices=["json", "php"], - help="Store only one type of output.", - default="", - ) - args = parser.parse_args() - + args = get_cli_parameters() + storage_path = get_config() extracted_strings = StringExtraction( storage_path, args.locale_code, args.reference_code, args.repository_name )