Handling Untranslatable Strings (#1133)

2024-04-28 00:26:12 +02:00 · 2024-04-28 00:26:12 +02:00 · 318076254d
commit 318076254d
parent 4fea8d10f8
4 changed files with 279 additions and 39 deletions
--- a/.github/workflows/sync_files.yml
+++ b/.github/workflows/sync_files.yml
@ -7,6 +7,7 @@ on:
    paths:
      - "build.gradle"
      - "src/main/resources/messages_*.properties"
      - "scripts/translation_status.toml"
 permissions:
  contents: write
@ -58,6 +59,8 @@ jobs:
        uses: actions/setup-python@v5.1.0
        with:
          python-version: "3.x"
      - name: Install dependencies
        run: pip install tomlkit
      - name: Sync README
        run: python scripts/counter_translation.py
      - name: Set up git config
--- a/HowToAddNewLanguage.md
+++ b/HowToAddNewLanguage.md
@ -34,5 +34,18 @@ Then simply translate all property entries within that file and make a PR into m
 If you do not have a java IDE i am happy to verify the changes worked once you raise PR (but won't be able to verify the translations themselves)
 ## Handling Untranslatable Strings
 Sometimes, certain strings in the properties file may not require translation because they are the same in the target language or are universal (like names of protocols, certain terminologies, etc.). To ensure accurate statistics for language progress, these strings should be added to the `ignore_translation.toml` file located in the `scripts` directory. This will exclude them from the translation progress calculations.
 For example, if the English string error=Error does not need translation in Polish, add it to the ignore_translation.toml under the Polish section:
 ```toml
 [pl_PL]
 ignore = [
    "language.direction",  # Existing entries
    "error"                # Add new entries here
 ]
 ```
 Make sure to place the entry under the correct language section. This helps maintain the accuracy of translation progress statistics and ensures that the translation tool or scripts do not misinterpret the completion rate.
--- a/scripts/counter_translation.py
+++ b/scripts/counter_translation.py
@ -10,49 +10,77 @@ Author: Ludy87
 Example:
    To use this script, simply run it from command line:
        $ python counter_translation.py
-"""
+"""  # noqa: D205
-import os
+
 import glob
 import os
 import re
-from typing import List, Tuple
+
 import tomlkit
 import tomlkit.toml_file
-def write_readme(progress_list: List[Tuple[str, int]]) -> None:
+def convert_to_multiline(data: tomlkit.TOMLDocument) -> tomlkit.TOMLDocument:
-    """
+    """Converts 'ignore' and 'missing' arrays to multiline arrays and sorts the first-level keys of the TOML document.
-    Updates the progress status in the README.md file based
+    Enhances readability and consistency in the TOML file by ensuring arrays contain unique and sorted entries.
    Parameters:
        data (tomlkit.TOMLDocument): The original TOML document containing the data.
    Returns:
        tomlkit.TOMLDocument: A new TOML document with sorted keys and properly formatted arrays.
    """  # noqa: D205
    sorted_data = tomlkit.document()
    for key in sorted(data.keys()):
        value = data[key]
        if isinstance(value, dict):
            new_table = tomlkit.table()
            for subkey in ("ignore", "missing"):
                if subkey in value:
                    # Convert the list to a set to remove duplicates, sort it, and convert to multiline for readability
                    unique_sorted_array = sorted(set(value[subkey]))
                    array = tomlkit.array()
                    array.multiline(True)
                    for item in unique_sorted_array:
                        array.append(item)
                    new_table[subkey] = array
            sorted_data[key] = new_table
        else:
            # Add other types of data unchanged
            sorted_data[key] = value
    return sorted_data
 def write_readme(progress_list: list[tuple[str, int]]) -> None:
    """Updates the progress status in the README.md file based
    on the provided progress list.
    Parameters:
-        progress_list (List[Tuple[str, int]]): A list of tuples containing
+        progress_list (list[tuple[str, int]]): A list of tuples containing
        language and progress percentage.
    Returns:
        None
-    """
+    """  # noqa: D205
-    with open("README.md", "r", encoding="utf-8") as file:
+    with open("README.md", encoding="utf-8") as file:
-        content = file.read()
+        content = file.readlines()
-    lines = content.split("\n")
+    for i, line in enumerate(content[2:], start=2):
    for i, line in enumerate(lines[2:], start=2):
        for progress in progress_list:
            language, value = progress
            if language in line:
-                match = re.search(r"\!\[(\d+(\.\d+)?)%\]\(.*\)", line)
+                if match := re.search(r"\!\[(\d+(\.\d+)?)%\]\(.*\)", line):
-                if match:
+                    content[i] = line.replace(
                    lines[i] = line.replace(
                        match.group(0),
                        f"![{value}%](https://geps.dev/progress/{value})",
                    )
    new_content = "\n".join(lines)
    with open("README.md", "w", encoding="utf-8") as file:
-        file.write(new_content)
+        file.writelines(content)
-def compare_files(default_file_path, files_directory) -> List[Tuple[str, int]]:
+def compare_files(default_file_path, file_paths, translation_status_file) -> list[tuple[str, int]]:
-    """
+    """Compares the default properties file with other
    Compares the default properties file with other
    properties files in the directory.
    Parameters:
@ -60,20 +88,22 @@ def compare_files(default_file_path, files_directory) -> List[Tuple[str, int]]:
        files_directory (str): The directory containing other properties files.
    Returns:
-        List[Tuple[str, int]]: A list of tuples containing
+        list[tuple[str, int]]: A list of tuples containing
        language and progress percentage.
-    """
+    """  # noqa: D205
-    file_paths = glob.glob(os.path.join(files_directory, "messages_*.properties"))
+    num_lines = sum(
-    num_lines = sum(1 for _ in open(default_file_path, encoding="utf-8"))
+        1 for line in open(default_file_path, encoding="utf-8") if line.strip() and not line.strip().startswith("#")
    )
    result_list = []
    sort_translation_status: tomlkit.TOMLDocument
    # read toml
    with open(translation_status_file, encoding="utf-8") as f:
        sort_translation_status = tomlkit.parse(f.read())
    for file_path in file_paths:
-        language = (
+        language = os.path.basename(file_path).split("messages_", 1)[1].split(".properties", 1)[0]
            os.path.basename(file_path)
            .split("messages_", 1)[1]
            .split(".properties", 1)[0]
        )
        fails = 0
        if "en_GB" in language or "en_US" in language:
@ -81,9 +111,21 @@ def compare_files(default_file_path, files_directory) -> List[Tuple[str, int]]:
            result_list.append(("en_US", 100))
            continue
-        with open(default_file_path, "r", encoding="utf-8") as default_file, open(
+        if language not in sort_translation_status:
-            file_path, "r", encoding="utf-8"
+            sort_translation_status[language] = tomlkit.table()
-        ) as file:
+
        if (
            "ignore" not in sort_translation_status[language]
            or len(sort_translation_status[language].get("ignore", [])) < 1
        ):
            sort_translation_status[language]["ignore"] = tomlkit.array(["language.direction"])
        # if "missing" not in sort_translation_status[language]:
        #     sort_translation_status[language]["missing"] = tomlkit.array()
        # elif "language.direction" in sort_translation_status[language]["missing"]:
        #     sort_translation_status[language]["missing"].remove("language.direction")
        with open(default_file_path, encoding="utf-8") as default_file, open(file_path, encoding="utf-8") as file:
            for _ in range(5):
                next(default_file)
                try:
@ -91,24 +133,47 @@ def compare_files(default_file_path, files_directory) -> List[Tuple[str, int]]:
                except StopIteration:
                    fails = num_lines
-            for _, (line_default, line_file) in enumerate(
+            for line_num, (line_default, line_file) in enumerate(zip(default_file, file), start=6):
                zip(default_file, file), start=6
            ):
                try:
                    # Ignoring empty lines and lines start with #
                    if line_default.strip() == "" or line_default.startswith("#"):
                        continue
                    default_key, default_value = line_default.split("=", 1)
                    file_key, file_value = line_file.split("=", 1)
                    if (
-                        line_default.split("=", 1)[1].strip()
+                        default_value.strip() == file_value.strip()
-                        == line_file.split("=", 1)[1].strip()
+                        and default_key.strip() not in sort_translation_status[language]["ignore"]
                    ):
                        print(f"{language}: Line {line_num} is missing the translation.")
                        # if default_key.strip() not in sort_translation_status[language]["missing"]:
                        #     missing_array = tomlkit.array()
                        #     missing_array.append(default_key.strip())
                        #     missing_array.multiline(True)
                        #     sort_translation_status[language]["missing"].extend(missing_array)
                        fails += 1
                    # elif default_key.strip() in sort_translation_status[language]["ignore"]:
                    #     if default_key.strip() in sort_translation_status[language]["missing"]:
                    #         sort_translation_status[language]["missing"].remove(default_key.strip())
                    if default_value.strip() != file_value.strip():
                        # if default_key.strip() in sort_translation_status[language]["missing"]:
                        #     sort_translation_status[language]["missing"].remove(default_key.strip())
                        if default_key.strip() in sort_translation_status[language]["ignore"]:
                            sort_translation_status[language]["ignore"].remove(default_key.strip())
                except IndexError:
                    pass
        print(f"{language}: {fails} out of {num_lines} lines are not translated.")
        result_list.append(
            (
                language,
                int((num_lines - fails) * 100 / num_lines),
            )
        )
    translation_status = convert_to_multiline(sort_translation_status)
    with open(translation_status_file, "w", encoding="utf-8") as file:
        file.write(tomlkit.dumps(translation_status))
    unique_data = list(set(result_list))
    unique_data.sort(key=lambda x: x[1], reverse=True)
@ -118,5 +183,10 @@ def compare_files(default_file_path, files_directory) -> List[Tuple[str, int]]:
 if __name__ == "__main__":
    directory = os.path.join(os.getcwd(), "src", "main", "resources")
    messages_file_paths = glob.glob(os.path.join(directory, "messages_*.properties"))
    reference_file = os.path.join(directory, "messages_en_GB.properties")
-    write_readme(compare_files(reference_file, directory))
+
    scripts_directory = os.path.join(os.getcwd(), "scripts")
    translation_state_file = os.path.join(scripts_directory, "translation_status.toml")
    write_readme(compare_files(reference_file, messages_file_paths, translation_state_file))
--- a/scripts/translation_status.toml
+++ b/scripts/translation_status.toml
@ -0,0 +1,154 @@
 [ar_AR]
 ignore = [
    'language.direction',
 ]
 [bg_BG]
 ignore = [
    'language.direction',
 ]
 [ca_CA]
 ignore = [
    'language.direction',
 ]
 [de_DE]
 ignore = [
    'AddStampRequest.alphabet',
    'AddStampRequest.position',
    'PDFToBook.selectText.1',
    'PDFToText.tags',
    'addPageNumbers.selectText.3',
    'alphabet',
    'certSign.name',
    'language.direction',
    'licenses.version',
    'pipeline.title',
    'pipelineOptions.pipelineHeader',
    'sponsor',
    'text',
    'watermark.type.1',
 ]
 [el_GR]
 ignore = [
    'language.direction',
 ]
 [es_ES]
 ignore = [
    'adminUserSettings.roles',
    'color',
    'language.direction',
    'no',
    'showJS.tags',
 ]
 [eu_ES]
 ignore = [
    'language.direction',
 ]
 [fr_FR]
 ignore = [
    'language.direction',
 ]
 [hi_IN]
 ignore = [
    'language.direction',
 ]
 [hu_HU]
 ignore = [
    'language.direction',
 ]
 [id_ID]
 ignore = [
    'language.direction',
 ]
 [it_IT]
 ignore = [
    'font',
    'language.direction',
    'no',
    'password',
    'pipeline.title',
    'pipelineOptions.pipelineHeader',
    'removePassword.selectText.2',
    'showJS.tags',
    'sponsor',
 ]
 [ja_JP]
 ignore = [
    'language.direction',
 ]
 [ko_KR]
 ignore = [
    'language.direction',
 ]
 [nl_NL]
 ignore = [
    'language.direction',
 ]
 [pl_PL]
 ignore = [
    'language.direction',
 ]
 [pt_BR]
 ignore = [
    'language.direction',
 ]
 [pt_PT]
 ignore = [
    'language.direction',
 ]
 [ro_RO]
 ignore = [
    'language.direction',
 ]
 [ru_RU]
 ignore = [
    'language.direction',
 ]
 [sr_LATN_RS]
 ignore = [
    'language.direction',
 ]
 [sv_SE]
 ignore = [
    'language.direction',
 ]
 [tr_TR]
 ignore = [
    'language.direction',
 ]
 [uk_UA]
 ignore = [
    'language.direction',
 ]
 [zh_CN]
 ignore = [
    'language.direction',
 ]
 [zh_TW]
 ignore = [
    'language.direction',
 ]