From 836b26aa1bd44c3354c686062328816e0834bb05 Mon Sep 17 00:00:00 2001 From: gray Date: Sat, 11 Mar 2023 11:26:24 -0500 Subject: [PATCH 1/5] Upped max_tokens to 1024, it was failing on some things with the lower value. Chunk the input into chunks of 16. Divide those chucks if they fail. Write succesful chucks immediately so it'll process as much as possible. --- .github/scripts/translations.py | 183 +++++++++++++++++++------------- 1 file changed, 110 insertions(+), 73 deletions(-) diff --git a/.github/scripts/translations.py b/.github/scripts/translations.py index b28af9d..62f7e4f 100644 --- a/.github/scripts/translations.py +++ b/.github/scripts/translations.py @@ -6,6 +6,7 @@ import requests from xml.dom import minidom +from itertools import islice # Env Args GITHUB_WORKSPACE = os.environ.get('GITHUB_WORKSPACE') @@ -17,11 +18,108 @@ # Associative Array which is the source of our languages qualifier_language = { - "pl": "Polish", - "en-rGB": "British English", +# "pl": "Polish", +# "en-rGB": "British English", "uk": "Ukrainian", + "ar": "Arabic", + "bg": "Bulgarian", + "da": "Danish", + "de": "German", + "el": "Greek", + "es": "Spanish", + "fi": "Finnish", + "fr": "French", + "hr": "Croatian", + "it": "Italian", + "ja": "Japanese", + "ko": "Korean", + "nl": "Dutch", + "pl": "Polish", + "pt-rBR" : "Brazilian Portuguese", + "pt-rPT": "Portuguese", + "ro": "Romanian", + "ru": "Russian", + "tr": "Turkish", + "zh": "Chinese" } +def chunks(data, SIZE=10000): + it = iter(data) + for i in range(0, len(data), SIZE): + yield {k:data[k] for k in islice(it, SIZE)} + +def tryChunk(strings_needed, language): + # First we need our prompt, which will fetch a response for each language. + prompt = "Translate each of these phrases, excluding punctuation unless present, into " + \ + language + for qualified_string_needed_key in strings_needed: + prompt += "\n" + strings_needed[qualified_string_needed_key].text + + url = "https://api.openai.com/v1/completions" + headers = { + "Content-Type": "application/json; charset=utf-8", + "Authorization": "Bearer " + OPENAI_API_KEY, + } + data = { + "model": "text-davinci-003", + "prompt": prompt, + "temperature": 0, + "max_tokens": 1024, + "top_p": 1, + "frequency_penalty": 0.5, + "presence_penalty": 0, + } + print(f"...Fetching {len(strings_needed)} {language} translation(s)") + json_response = requests.post(url, headers=headers, json=data) + response_text = json_response.json()["choices"][0]["text"] + response_strings = response_text.replace('\n\n', "").split('\n') + filtered_response_strings = list(filter(lambda string: len(string) > 0, response_strings)) + if len(filtered_response_strings) != len(strings_needed): + print(f"Miss, {len(filtered_response_strings)} != {len(strings_needed)}, found {json_response.json()}") + if len(strings_needed) > 1: + in1 = dict(list(strings_needed.items())[len(strings_needed)//2:]) + in2 = dict(list(strings_needed.items())[:len(strings_needed)//2]) + out1 = tryChunk(in1, language) + out2 = tryChunk(in2, language) + return out1 + out2 + else: + return filtered_response_strings + else: + insertString(filtered_response_strings, strings_needed) + return filtered_response_strings + + +def insertString(strings_to_add, strings_needed): + print(f"{strings_to_add}, {strings_needed}") + index = 0 + qualified_strings_to_add = list() + + for qualified_string_needed_key in strings_needed: + qualified_string_needed = strings_needed[qualified_string_needed_key] + qualified_string_copy = copy.deepcopy(qualified_string_needed) + qualified_string_copy.text = strings_to_add[index].replace('\'', r'\'') + print( + f"...Adding {qualified_strings_needed[qualified_string_needed_key].text} -> {qualified_string_copy.text}") + qualified_strings_to_add.append(qualified_string_copy) + index += 1 + # Now lets move onto modifying the XML file. + if len(strings_needed) > 0: + qualified_strings_tree = ET.parse(qualified_strings_file_path) + qualified_strings_root = qualified_strings_tree.getroot() + + # Next lets add the elements we do want + for qualified_string in qualified_strings_to_add: + qualified_strings_root.append(qualified_string) + + # Lastly, we write the changes to the file + print(f"...Writing changes to {str(qualified_strings_file_path)}") + qualified_strings_tree.write( + qualified_strings_file_path, + encoding="utf-8", + xml_declaration="utf-8", + method="xml" + ) + # Iterate through each source strings.xml file so the case where source_paths = pathlib.Path(GITHUB_WORKSPACE).glob('**/src/*/res/values/strings.xml') @@ -84,78 +182,17 @@ new_strings_file.close() # It's time to request from OpenAI and get our translations! - qualified_strings_to_add = list() + filtered_response_strings = list() if len(qualified_strings_needed) != 0: - # First we need our prompt, which will fetch a response for each language. - prompt = "Translate each of these phrases, excluding punctuation unless present, into " + \ - qualifier_language[qualifier] - for qualified_string_needed_key in qualified_strings_needed: - prompt += "\n" + qualified_strings_needed[qualified_string_needed_key].text - - url = "https://api.openai.com/v1/completions" - headers = { - "Content-Type": "application/json; charset=utf-8", - "Authorization": "Bearer " + OPENAI_API_KEY, - } - data = { - "model": "text-davinci-003", - "prompt": prompt, - "temperature": 0, - "max_tokens": 60, - "top_p": 1, - "frequency_penalty": 0.5, - "presence_penalty": 0, - } - print(f"...Fetching {len(qualified_strings_needed)} {qualifier_language[qualifier]} translation(s)") - json_response = requests.post(url, headers=headers, json=data) - response_text = json_response.json()["choices"][0]["text"] - response_strings = response_text.replace('\n\n', "").split('\n') - filtered_response_strings = list(filter(lambda string: len(string) > 0, response_strings)) - - # The count isn't the best way of doing this, but sometimes life is like that. - if len(filtered_response_strings) != len(qualified_strings_needed): - print( - "...Stopping translations for {qualifier}, OpenAI response returned {oai_count} item(s) but we " - "expected {local_count}".format( - qualifier=qualifier, - oai_count=len(filtered_response_strings), - local_count=len(qualified_strings_needed) - )) - continue - - index = 0 - for qualified_string_needed_key in qualified_strings_needed: - qualified_string_needed = qualified_strings_needed[qualified_string_needed_key] - qualified_string_copy = copy.deepcopy(qualified_string_needed) - qualified_string_copy.text = filtered_response_strings[index] - print( - f"...Adding {qualified_strings_needed[qualified_string_needed_key].text} -> {qualified_string_copy.text}") - qualified_strings_to_add.append(qualified_string_copy) - index += 1 - - # Now lets move onto modifying the XML file. - if len(qualified_strings_remove) > 0 or len(qualified_strings_needed) > 0: - qualified_strings_tree = ET.parse(qualified_strings_file_path) - qualified_strings_root = qualified_strings_tree.getroot() - - # First lets remove the elements we dont need - for qualified_string_to_remove in qualified_strings_remove: - for qualified_string in qualified_strings_root: - if qualified_string.attrib.get(XML_ATTR_NAME) == qualified_string_to_remove: - qualified_strings_root.remove(qualified_string) - - # Next lets add the elements we do want - for qualified_string in qualified_strings_to_add: - qualified_strings_root.append(qualified_string) - - # Lastly, we write the changes to the file - print(f"...Writing changes to {str(qualified_strings_file_path)}") - qualified_strings_tree.write( - qualified_strings_file_path, - encoding="utf-8", - xml_declaration="utf-8", - method="xml" - ) + for chunk in chunks(qualified_strings_needed, 16): + tryChunk(chunk, qualifier_language[qualifier]) + + # First lets remove the elements we don't need + for qualified_string_to_remove in qualified_strings_remove: + for qualified_string in qualified_strings_root: + if qualified_string.attrib.get(XML_ATTR_NAME) == qualified_string_to_remove: + qualified_strings_root.remove(qualified_string) + print(f"...Translations for {qualifier_language[qualifier]} completed") print("-------------------------------") print("Translation Script Complete!") From 51c3b6ead35adb2ee814b3978ccafaf67f4a49f8 Mon Sep 17 00:00:00 2001 From: gray Date: Sun, 12 Mar 2023 14:06:29 -0400 Subject: [PATCH 2/5] Google Translate Support Command Line Arguments --- .github/scripts/translations.py | 78 ++++++++++++++++++++++++++++----- 1 file changed, 66 insertions(+), 12 deletions(-) diff --git a/.github/scripts/translations.py b/.github/scripts/translations.py index 62f7e4f..63f1c53 100644 --- a/.github/scripts/translations.py +++ b/.github/scripts/translations.py @@ -1,16 +1,19 @@ +import argparse import pathlib import json import xml.etree.ElementTree as ET import os import copy import requests +import re from xml.dom import minidom from itertools import islice -# Env Args -GITHUB_WORKSPACE = os.environ.get('GITHUB_WORKSPACE') -OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY') +# if you don't want to import the dependencies and don't plan to use google translate +# comment out the following line +from google.cloud import translate + # Parsing Args XML_ATTR_TRANSLATABLE = "translatable" @@ -20,25 +23,33 @@ qualifier_language = { # "pl": "Polish", # "en-rGB": "British English", - "uk": "Ukrainian", "ar": "Arabic", "bg": "Bulgarian", + "bn": "Bengali", + "ca": "Catalan", + "cs": "Czech", "da": "Danish", "de": "German", "el": "Greek", "es": "Spanish", "fi": "Finnish", + "fa": "Persian", "fr": "French", + "he": "Hebrew", + "hi": "Hindi", "hr": "Croatian", "it": "Italian", "ja": "Japanese", "ko": "Korean", + "nb": "Norwegian Bokmål", "nl": "Dutch", "pl": "Polish", "pt-rBR" : "Brazilian Portuguese", "pt-rPT": "Portuguese", "ro": "Romanian", "ru": "Russian", + "sv": "Swedish", + "uk": "Ukrainian", "tr": "Turkish", "zh": "Chinese" } @@ -48,7 +59,7 @@ def chunks(data, SIZE=10000): for i in range(0, len(data), SIZE): yield {k:data[k] for k in islice(it, SIZE)} -def tryChunk(strings_needed, language): +def fetchOpenAI(strings_needed, language): # First we need our prompt, which will fetch a response for each language. prompt = "Translate each of these phrases, excluding punctuation unless present, into " + \ language @@ -73,9 +84,35 @@ def tryChunk(strings_needed, language): json_response = requests.post(url, headers=headers, json=data) response_text = json_response.json()["choices"][0]["text"] response_strings = response_text.replace('\n\n', "").split('\n') + +def fetchGoogleTranslate(strings_needed, language_code): + client = translate.TranslationServiceClient() + location = "global" + parent = f"projects/{PROJECT_ID}/locations/{location}" + + request_strings = list(map(lambda x: x.text, strings_needed.values())) + request={ + "parent": parent, + "contents": request_strings, + "mime_type": "text/plain", + "source_language_code": "en-US", + "target_language_code": language_code.replace("-r", "-"), # pt-rBR -> pt-BR + } +# print(f"request={request}") + response = client.translate_text( + request=request + ) + +# print(f"response={response}") + return map(lambda x: x.translated_text, response.translations) + +def tryChunk(strings_needed, language, language_code): + if config['google_translate']: + response_strings = fetchGoogleTranslate(strings_needed, language_code) + else: + response_strings = fetchOpenAI(strings_needed, language) filtered_response_strings = list(filter(lambda string: len(string) > 0, response_strings)) if len(filtered_response_strings) != len(strings_needed): - print(f"Miss, {len(filtered_response_strings)} != {len(strings_needed)}, found {json_response.json()}") if len(strings_needed) > 1: in1 = dict(list(strings_needed.items())[len(strings_needed)//2:]) in2 = dict(list(strings_needed.items())[:len(strings_needed)//2]) @@ -85,19 +122,18 @@ def tryChunk(strings_needed, language): else: return filtered_response_strings else: - insertString(filtered_response_strings, strings_needed) + insertStrings(filtered_response_strings, strings_needed) return filtered_response_strings -def insertString(strings_to_add, strings_needed): - print(f"{strings_to_add}, {strings_needed}") +def insertStrings(strings_to_add, strings_needed): index = 0 qualified_strings_to_add = list() for qualified_string_needed_key in strings_needed: qualified_string_needed = strings_needed[qualified_string_needed_key] qualified_string_copy = copy.deepcopy(qualified_string_needed) - qualified_string_copy.text = strings_to_add[index].replace('\'', r'\'') + qualified_string_copy.text = strings_to_add[index].replace('\'', r'\'').replace("...", "…") print( f"...Adding {qualified_strings_needed[qualified_string_needed_key].text} -> {qualified_string_copy.text}") qualified_strings_to_add.append(qualified_string_copy) @@ -120,8 +156,23 @@ def insertString(strings_to_add, strings_needed): method="xml" ) +parser = argparse.ArgumentParser(description="Android String Translator", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) +parser.add_argument("-r", "--root", help="root directory of the android app, or library") +parser.add_argument("-O", "--openai-key", help="OpenAI Key") +parser.add_argument("-p", "--project_id", help="The Project Id for Google Translate") +parser.add_argument("-g", "--google-translate", action="store_true", help="use Google Translate instead of OpenAI") +args = parser.parse_args() +config = vars(args) + +rootDir = config['root'] if config['root'] != None else os.environ.get('GITHUB_WORKSPACE') +if config['google_translate']: + PROJECT_ID = config['project_id'] if config['project_id'] != None else os.environ.get('GOOGLE_PROJECT_ID') +else: + OPENAI_API_KEY = config['openai_key'] if config['openai_key'] != None else os.environ.get('OPENAI_API_KEY') + # Iterate through each source strings.xml file so the case where -source_paths = pathlib.Path(GITHUB_WORKSPACE).glob('**/src/*/res/values/strings.xml') +source_paths = pathlib.Path(rootDir).glob('**/src/*/res/values/strings.xml') print("Starting Translations Script!") print("-------------------------------") @@ -185,9 +236,12 @@ def insertString(strings_to_add, strings_needed): filtered_response_strings = list() if len(qualified_strings_needed) != 0: for chunk in chunks(qualified_strings_needed, 16): - tryChunk(chunk, qualifier_language[qualifier]) + tryChunk(chunk, qualifier_language[qualifier], qualifier) # First lets remove the elements we don't need + qualified_strings_tree = ET.parse(qualified_strings_file_path) + qualified_strings_root = qualified_strings_tree.getroot() + for qualified_string_to_remove in qualified_strings_remove: for qualified_string in qualified_strings_root: if qualified_string.attrib.get(XML_ATTR_NAME) == qualified_string_to_remove: From 30200cba5cd0c2e6c2866975f9d2de1385a7aea3 Mon Sep 17 00:00:00 2001 From: gray Date: Sun, 12 Mar 2023 15:37:31 -0400 Subject: [PATCH 3/5] Added a quiet mode --- .github/scripts/translations.py | 41 ++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/.github/scripts/translations.py b/.github/scripts/translations.py index 63f1c53..739203e 100644 --- a/.github/scripts/translations.py +++ b/.github/scripts/translations.py @@ -80,7 +80,9 @@ def fetchOpenAI(strings_needed, language): "frequency_penalty": 0.5, "presence_penalty": 0, } - print(f"...Fetching {len(strings_needed)} {language} translation(s)") + + if not config['quiet']: + print(f"...Fetching {len(strings_needed)} {language} translation(s)") json_response = requests.post(url, headers=headers, json=data) response_text = json_response.json()["choices"][0]["text"] response_strings = response_text.replace('\n\n', "").split('\n') @@ -133,9 +135,11 @@ def insertStrings(strings_to_add, strings_needed): for qualified_string_needed_key in strings_needed: qualified_string_needed = strings_needed[qualified_string_needed_key] qualified_string_copy = copy.deepcopy(qualified_string_needed) - qualified_string_copy.text = strings_to_add[index].replace('\'', r'\'').replace("...", "…") - print( - f"...Adding {qualified_strings_needed[qualified_string_needed_key].text} -> {qualified_string_copy.text}") + qualified_string_copy.text = strings_to_add[index].replace('\'', r'\'').replace("...", "…:") + + if not config['quiet']: + print( + f"...Adding {qualified_strings_needed[qualified_string_needed_key].text} -> {qualified_string_copy.text}") qualified_strings_to_add.append(qualified_string_copy) index += 1 # Now lets move onto modifying the XML file. @@ -148,7 +152,8 @@ def insertStrings(strings_to_add, strings_needed): qualified_strings_root.append(qualified_string) # Lastly, we write the changes to the file - print(f"...Writing changes to {str(qualified_strings_file_path)}") + if not config['quiet']: + print(f"...Writing changes to {str(qualified_strings_file_path)}") qualified_strings_tree.write( qualified_strings_file_path, encoding="utf-8", @@ -159,6 +164,7 @@ def insertStrings(strings_to_add, strings_needed): parser = argparse.ArgumentParser(description="Android String Translator", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("-r", "--root", help="root directory of the android app, or library") +parser.add_argument("-q", "--quiet", action="store_true", help="decrease verbosity") parser.add_argument("-O", "--openai-key", help="OpenAI Key") parser.add_argument("-p", "--project_id", help="The Project Id for Google Translate") parser.add_argument("-g", "--google-translate", action="store_true", help="use Google Translate instead of OpenAI") @@ -174,12 +180,14 @@ def insertStrings(strings_to_add, strings_needed): # Iterate through each source strings.xml file so the case where source_paths = pathlib.Path(rootDir).glob('**/src/*/res/values/strings.xml') -print("Starting Translations Script!") -print("-------------------------------") +if not config['quiet']: + print("Starting Translations Script!") + print("-------------------------------") for source_path in source_paths: # Generate a map of source strings - print("Parsing " + str(source_path)) + if not config['quiet']: + print("Parsing " + str(source_path)) source_tree = ET.parse(source_path) source_strings = dict() @@ -187,11 +195,13 @@ def insertStrings(strings_to_add, strings_needed): for child in source_tree.getroot(): # Let's ignore the strings that are marked with translatable=false if child.attrib.get(XML_ATTR_TRANSLATABLE) == "false": - print(f"⚠️ Ignoring {child.attrib.get(XML_ATTR_NAME)} because it wasn't marked as translatable") + if not config['quiet']: + print(f"⚠️ Ignoring {child.attrib.get(XML_ATTR_NAME)} because it wasn't marked as translatable") continue source_strings[child.attrib.get(XML_ATTR_NAME)] = child - print("-------------------------------") + if not config['quiet']: + print("-------------------------------") # Next, we check to see if each language exists res_directory = source_path.parent.parent @@ -212,7 +222,8 @@ def insertStrings(strings_to_add, strings_needed): for qualified_string in strings_tree.getroot(): # Let's ignore the strings that are marked with translatable=false if qualified_string.attrib.get(XML_ATTR_TRANSLATABLE) == "false": - print(f"...⚠️ Ignoring values-{qualifier}/{child.attrib.get(XML_ATTR_NAME)} because it wasn't marked as translatable") + if not config['quiet']: + print(f"...⚠️ Ignoring values-{qualifier}/{child.attrib.get(XML_ATTR_NAME)} because it wasn't marked as translatable") continue # Now we check to see if this qualified file has the translation @@ -247,6 +258,8 @@ def insertStrings(strings_to_add, strings_needed): if qualified_string.attrib.get(XML_ATTR_NAME) == qualified_string_to_remove: qualified_strings_root.remove(qualified_string) - print(f"...Translations for {qualifier_language[qualifier]} completed") - print("-------------------------------") - print("Translation Script Complete!") + if not config['quiet']: + print(f"...Translations for {qualifier_language[qualifier]} completed") + print("-------------------------------") + if not config['quiet']: + print("Translation Script Complete!") From 7740e5ac0155729cda2e717693d6f10fa5cfab82 Mon Sep 17 00:00:00 2001 From: gray Date: Tue, 14 Mar 2023 09:45:06 -0400 Subject: [PATCH 4/5] Created classes, Translator is the top class that has the common code GoogleTranslate and OpenAITranslator implement fetch to get the translated string. Added Documentation. Cleaned up python code a bit so it uses python common style. --- .github/scripts/translations.py | 267 ++++++++++++++++++++------------ 1 file changed, 166 insertions(+), 101 deletions(-) diff --git a/.github/scripts/translations.py b/.github/scripts/translations.py index 739203e..f06a0cc 100644 --- a/.github/scripts/translations.py +++ b/.github/scripts/translations.py @@ -6,6 +6,7 @@ import copy import requests import re +import pydoc from xml.dom import minidom from itertools import islice @@ -35,10 +36,11 @@ "fi": "Finnish", "fa": "Persian", "fr": "French", - "he": "Hebrew", "hi": "Hindi", "hr": "Croatian", + "id": "Indonesian", "it": "Italian", + "iw": "Hebrew", "ja": "Japanese", "ko": "Korean", "nb": "Norwegian Bokmål", @@ -49,118 +51,180 @@ "ro": "Romanian", "ru": "Russian", "sv": "Swedish", - "uk": "Ukrainian", "tr": "Turkish", + "uk": "Ukrainian", + "ur": "Urdu", "zh": "Chinese" } -def chunks(data, SIZE=10000): - it = iter(data) - for i in range(0, len(data), SIZE): - yield {k:data[k] for k in islice(it, SIZE)} - -def fetchOpenAI(strings_needed, language): - # First we need our prompt, which will fetch a response for each language. - prompt = "Translate each of these phrases, excluding punctuation unless present, into " + \ - language - for qualified_string_needed_key in strings_needed: - prompt += "\n" + strings_needed[qualified_string_needed_key].text - - url = "https://api.openai.com/v1/completions" - headers = { - "Content-Type": "application/json; charset=utf-8", - "Authorization": "Bearer " + OPENAI_API_KEY, - } - data = { - "model": "text-davinci-003", - "prompt": prompt, - "temperature": 0, - "max_tokens": 1024, - "top_p": 1, - "frequency_penalty": 0.5, - "presence_penalty": 0, - } - - if not config['quiet']: - print(f"...Fetching {len(strings_needed)} {language} translation(s)") - json_response = requests.post(url, headers=headers, json=data) - response_text = json_response.json()["choices"][0]["text"] - response_strings = response_text.replace('\n\n', "").split('\n') - -def fetchGoogleTranslate(strings_needed, language_code): - client = translate.TranslationServiceClient() - location = "global" - parent = f"projects/{PROJECT_ID}/locations/{location}" - - request_strings = list(map(lambda x: x.text, strings_needed.values())) - request={ - "parent": parent, - "contents": request_strings, - "mime_type": "text/plain", - "source_language_code": "en-US", - "target_language_code": language_code.replace("-r", "-"), # pt-rBR -> pt-BR - } -# print(f"request={request}") - response = client.translate_text( - request=request - ) - -# print(f"response={response}") - return map(lambda x: x.translated_text, response.translations) - -def tryChunk(strings_needed, language, language_code): - if config['google_translate']: - response_strings = fetchGoogleTranslate(strings_needed, language_code) - else: - response_strings = fetchOpenAI(strings_needed, language) - filtered_response_strings = list(filter(lambda string: len(string) > 0, response_strings)) - if len(filtered_response_strings) != len(strings_needed): - if len(strings_needed) > 1: - in1 = dict(list(strings_needed.items())[len(strings_needed)//2:]) - in2 = dict(list(strings_needed.items())[:len(strings_needed)//2]) - out1 = tryChunk(in1, language) - out2 = tryChunk(in2, language) - return out1 + out2 +class Translator: + """ + Class Translator + This is the base class for OpenAI and Google Translator with common methods. + """ + + def fetch(self, strings_needed, language, language_code): + """ + fetch is implemented by OpenAI and Google Translator class. + + :param strings_needed: The strings to be fetched + :param language: The language name for the langauge to fetch + :param language_code: The language code for the langauge to fetch + """ + pass + + def try_chunk(self, strings_needed, language, language_code): + """ + try_chunk tries to fetch a chunk of strings for one language. + It'll divide the chunk in two if its chunk fails. + + btw, for OpenAI we often have to divide the chunk. + with Google Translation, it never happens. + + :param strings_needed: The strings to be fetched + :param language: The language name for the langauge to fetch + :param language_code: The language code for the langauge to fetch + :return: the list of strings fetched + + """ + response_strings = translator.fetch(strings_needed, language, language_code) + + filtered_response_strings = list(filter(lambda string: len(string) > 0, response_strings)) + if len(filtered_response_strings) != len(strings_needed): + if len(strings_needed) > 1: + in1 = dict(list(strings_needed.items())[len(strings_needed)//2:]) + in2 = dict(list(strings_needed.items())[:len(strings_needed)//2]) + out1 = self.try_chunk(in1, language, language_code) + out2 = self.try_chunk(in2, language, language_code) + return out1 + out2 + else: + return filtered_response_strings else: + self.insert_strings(filtered_response_strings, strings_needed) return filtered_response_strings - else: - insertStrings(filtered_response_strings, strings_needed) - return filtered_response_strings -def insertStrings(strings_to_add, strings_needed): - index = 0 - qualified_strings_to_add = list() + def insert_strings(self, strings_to_add, strings_needed): + """ + insert_strings inserts a set of strings - for qualified_string_needed_key in strings_needed: - qualified_string_needed = strings_needed[qualified_string_needed_key] - qualified_string_copy = copy.deepcopy(qualified_string_needed) - qualified_string_copy.text = strings_to_add[index].replace('\'', r'\'').replace("...", "…:") + :param strings_to_add: The strings we fetched, that'll be inserted. + :param strings_needed: The strings that were needed + :return: returns nothing + """ - if not config['quiet']: - print( - f"...Adding {qualified_strings_needed[qualified_string_needed_key].text} -> {qualified_string_copy.text}") - qualified_strings_to_add.append(qualified_string_copy) - index += 1 - # Now lets move onto modifying the XML file. - if len(strings_needed) > 0: - qualified_strings_tree = ET.parse(qualified_strings_file_path) - qualified_strings_root = qualified_strings_tree.getroot() + index = 0 + qualified_strings_to_add = list() - # Next lets add the elements we do want - for qualified_string in qualified_strings_to_add: - qualified_strings_root.append(qualified_string) + for qualified_string_needed_key in strings_needed: + qualified_string_needed = strings_needed[qualified_string_needed_key] + qualified_string_copy = copy.deepcopy(qualified_string_needed) + qualified_string_copy.text = strings_to_add[index].replace('\'', r'\'').replace("...", "…:") - # Lastly, we write the changes to the file - if not config['quiet']: - print(f"...Writing changes to {str(qualified_strings_file_path)}") - qualified_strings_tree.write( - qualified_strings_file_path, - encoding="utf-8", - xml_declaration="utf-8", - method="xml" + if not config['quiet']: + print( + f"...Adding {qualified_strings_needed[qualified_string_needed_key].text} -> {qualified_string_copy.text}") + qualified_strings_to_add.append(qualified_string_copy) + index += 1 + + # Now lets move onto modifying the XML file. + if len(strings_needed) > 0: + qualified_strings_tree = ET.parse(qualified_strings_file_path) + qualified_strings_root = qualified_strings_tree.getroot() + + # Next lets add the elements we do want + for qualified_string in qualified_strings_to_add: + qualified_strings_root.append(qualified_string) + + # Lastly, we write the changes to the file + if not config['quiet']: + print(f"...Writing changes to {str(qualified_strings_file_path)}") + qualified_strings_tree.write( + qualified_strings_file_path, + encoding="utf-8", + xml_declaration="utf-8", + method="xml" + ) + +class GoogleTranslate (Translator): + """ + Class GoogleTranslate implement Translator::fetch to fetch translations from Google Translation + """ + + def __init__(self, project_id) -> None: + """ + Construct a new 'GoogleTranslate' object. + + :project_id: The project id of Google Cloud project + :return: returns nothing + """ + super().__init__() + self.project_id = project_id + self.client = translate.TranslationServiceClient() + location = "global" + self.parent = f"projects/{PROJECT_ID}/locations/{location}" + + + def fetch(self, strings_needed, language, language_code): + + request_strings = list(map(lambda x: x.text, strings_needed.values())) + request={ + "parent": self.parent, + "contents": request_strings, + "mime_type": "text/plain", + "source_language_code": "en-US", + "target_language_code": language_code.replace("-r", "-"), # pt-rBR -> pt-BR + } + # print(f"request={request}") + response = self.client.translate_text( + request=request ) + # print(f"response={response}") + return map(lambda x: x.translated_text, response.translations) + +class OpenAITranslator (Translator): + """ + Class OpenAITranslator implement Translator::fetch to fetch translations from OpenAI (ChatGPT) + """ + + def __init__(self, api_key) -> None: + super().__init__() + self.url = "https://api.openai.com/v1/completions" + self.headers = { + "Content-Type": "application/json; charset=utf-8", + "Authorization": "Bearer " + api_key, + } + + def fetch(self, strings_needed, language, language_code): + # First we need our prompt, which will fetch a response for each language. + prompt = "Translate each of these phrases, excluding punctuation unless present, into " + \ + language + "\n" + "\n".join([x.text for x in strings_needed.values()]) + + data = { + "model": "text-davinci-003", + "prompt": prompt, + "temperature": 0, + "max_tokens": 1024, + "top_p": 1, + "frequency_penalty": 0.5, + "presence_penalty": 0, + } + + if not config['quiet']: + print(f"...Fetching {len(strings_needed)} {language} translation(s)") + json_response = requests.post(self.url, headers=self.headers, json=data) + response_text = json_response.json()["choices"][0]["text"] + response_strings = response_text.replace('\n\n', "").split('\n') + return response_strings + + +def chunks(data, SIZE=10000): + it = iter(data) + for i in range(0, len(data), SIZE): + yield {k:data[k] for k in islice(it, SIZE)} + + parser = argparse.ArgumentParser(description="Android String Translator", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("-r", "--root", help="root directory of the android app, or library") @@ -177,6 +241,7 @@ def insertStrings(strings_to_add, strings_needed): else: OPENAI_API_KEY = config['openai_key'] if config['openai_key'] != None else os.environ.get('OPENAI_API_KEY') +translator = GoogleTranslate(PROJECT_ID) if config['google_translate'] else OpenAITranslator(OPENAI_API_KEY) # Iterate through each source strings.xml file so the case where source_paths = pathlib.Path(rootDir).glob('**/src/*/res/values/strings.xml') @@ -206,7 +271,7 @@ def insertStrings(strings_to_add, strings_needed): # Next, we check to see if each language exists res_directory = source_path.parent.parent for qualifier in qualifier_language.keys(): - qualified_values_folder_name = 'values-{qualifier}'.format(qualifier=qualifier) + qualified_values_folder_name = f"values-{qualifier}" qualified_values_folder_path = os.path.join(res_directory, qualified_values_folder_name) qualified_values_folder_exists = os.path.exists(qualified_values_folder_path) qualified_strings_file_path = os.path.join(qualified_values_folder_path, "strings.xml") @@ -247,7 +312,7 @@ def insertStrings(strings_to_add, strings_needed): filtered_response_strings = list() if len(qualified_strings_needed) != 0: for chunk in chunks(qualified_strings_needed, 16): - tryChunk(chunk, qualifier_language[qualifier], qualifier) + translator.try_chunk(chunk, qualifier_language[qualifier], qualifier) # First lets remove the elements we don't need qualified_strings_tree = ET.parse(qualified_strings_file_path) From e6d95d839d79123678da952ee1072de827fc081f Mon Sep 17 00:00:00 2001 From: gray Date: Sun, 25 Jun 2023 11:00:23 -0400 Subject: [PATCH 5/5] Fixed the code for Indonesian, add support for fixing Arabic strings that were coming back with the placeholders munged. --- .github/scripts/translations.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/scripts/translations.py b/.github/scripts/translations.py index f06a0cc..71e0a39 100644 --- a/.github/scripts/translations.py +++ b/.github/scripts/translations.py @@ -38,7 +38,7 @@ "fr": "French", "hi": "Hindi", "hr": "Croatian", - "id": "Indonesian", + "in": "Indonesian", "it": "Italian", "iw": "Hebrew", "ja": "Japanese", @@ -121,6 +121,11 @@ def insert_strings(self, strings_to_add, strings_needed): qualified_string_copy = copy.deepcopy(qualified_string_needed) qualified_string_copy.text = strings_to_add[index].replace('\'', r'\'').replace("...", "…:") + # Arabic strings are coming back with some unicode % and added spaces in placeholders + # Look for that pattern, and fix it. + pattern = re.compile(r'٪\s+(\d)\s+\$\s+(\w)') + qualified_string_copy.text = re.sub(pattern, r'%\1$\2', qualified_string_copy.text) + if not config['quiet']: print( f"...Adding {qualified_strings_needed[qualified_string_needed_key].text} -> {qualified_string_copy.text}")