Handling Untranslatable Strings (#1133)

2024-04-28 00:26:12 +02:00 · 2024-04-28 00:26:12 +02:00 · 318076254d
commit 318076254d
parent 4fea8d10f8
4 changed files with 279 additions and 39 deletions
--- a/.github/workflows/sync_files.yml
+++ b/.github/workflows/sync_files.yml
@ -7,6 +7,7 @@ on:
    paths:
      - "build.gradle"
      - "src/main/resources/messages_*.properties"
+      - "scripts/translation_status.toml"

 permissions:
  contents: write
@ -58,6 +59,8 @@ jobs:
        uses: actions/setup-python@v5.1.0
        with:
          python-version: "3.x"
+      - name: Install dependencies
+        run: pip install tomlkit
      - name: Sync README
        run: python scripts/counter_translation.py
      - name: Set up git config
--- a/HowToAddNewLanguage.md
+++ b/HowToAddNewLanguage.md
@ -34,5 +34,18 @@ Then simply translate all property entries within that file and make a PR into m

 If you do not have a java IDE i am happy to verify the changes worked once you raise PR (but won't be able to verify the translations themselves)

+## Handling Untranslatable Strings

+Sometimes, certain strings in the properties file may not require translation because they are the same in the target language or are universal (like names of protocols, certain terminologies, etc.). To ensure accurate statistics for language progress, these strings should be added to the `ignore_translation.toml` file located in the `scripts` directory. This will exclude them from the translation progress calculations.

+For example, if the English string error=Error does not need translation in Polish, add it to the ignore_translation.toml under the Polish section:
+
+```toml
+[pl_PL]
+ignore = [
+    "language.direction",  # Existing entries
+    "error"                # Add new entries here
+]
+```
+
+Make sure to place the entry under the correct language section. This helps maintain the accuracy of translation progress statistics and ensures that the translation tool or scripts do not misinterpret the completion rate.
--- a/scripts/counter_translation.py
+++ b/scripts/counter_translation.py
@ -10,49 +10,77 @@ Author: Ludy87
 Example:
    To use this script, simply run it from command line:
        $ python counter_translation.py
-"""
-import os
+"""  # noqa: D205
+
 import glob
+import os
 import re
-from typing import List, Tuple
+
+import tomlkit
+import tomlkit.toml_file


-def write_readme(progress_list: List[Tuple[str, int]]) -> None:
-    """
-    Updates the progress status in the README.md file based
+def convert_to_multiline(data: tomlkit.TOMLDocument) -> tomlkit.TOMLDocument:
+    """Converts 'ignore' and 'missing' arrays to multiline arrays and sorts the first-level keys of the TOML document.
+    Enhances readability and consistency in the TOML file by ensuring arrays contain unique and sorted entries.
+
+    Parameters:
+        data (tomlkit.TOMLDocument): The original TOML document containing the data.
+
+    Returns:
+        tomlkit.TOMLDocument: A new TOML document with sorted keys and properly formatted arrays.
+    """  # noqa: D205
+    sorted_data = tomlkit.document()
+    for key in sorted(data.keys()):
+        value = data[key]
+        if isinstance(value, dict):
+            new_table = tomlkit.table()
+            for subkey in ("ignore", "missing"):
+                if subkey in value:
+                    # Convert the list to a set to remove duplicates, sort it, and convert to multiline for readability
+                    unique_sorted_array = sorted(set(value[subkey]))
+                    array = tomlkit.array()
+                    array.multiline(True)
+                    for item in unique_sorted_array:
+                        array.append(item)
+                    new_table[subkey] = array
+            sorted_data[key] = new_table
+        else:
+            # Add other types of data unchanged
+            sorted_data[key] = value
+    return sorted_data
+
+
+def write_readme(progress_list: list[tuple[str, int]]) -> None:
+    """Updates the progress status in the README.md file based
    on the provided progress list.

    Parameters:
-        progress_list (List[Tuple[str, int]]): A list of tuples containing
+        progress_list (list[tuple[str, int]]): A list of tuples containing
        language and progress percentage.

    Returns:
        None
-    """
-    with open("README.md", "r", encoding="utf-8") as file:
-        content = file.read()
+    """  # noqa: D205
+    with open("README.md", encoding="utf-8") as file:
+        content = file.readlines()

-    lines = content.split("\n")
-    for i, line in enumerate(lines[2:], start=2):
+    for i, line in enumerate(content[2:], start=2):
        for progress in progress_list:
            language, value = progress
            if language in line:
-                match = re.search(r"\!\[(\d+(\.\d+)?)%\]\(.*\)", line)
-                if match:
-                    lines[i] = line.replace(
+                if match := re.search(r"\!\[(\d+(\.\d+)?)%\]\(.*\)", line):
+                    content[i] = line.replace(
                        match.group(0),
                        f"![{value}%](https://geps.dev/progress/{value})",
                    )

-    new_content = "\n".join(lines)
-
    with open("README.md", "w", encoding="utf-8") as file:
-        file.write(new_content)
+        file.writelines(content)


-def compare_files(default_file_path, files_directory) -> List[Tuple[str, int]]:
-    """
-    Compares the default properties file with other
+def compare_files(default_file_path, file_paths, translation_status_file) -> list[tuple[str, int]]:
+    """Compares the default properties file with other
    properties files in the directory.

    Parameters:
@ -60,20 +88,22 @@ def compare_files(default_file_path, files_directory) -> List[Tuple[str, int]]:
        files_directory (str): The directory containing other properties files.

    Returns:
-        List[Tuple[str, int]]: A list of tuples containing
+        list[tuple[str, int]]: A list of tuples containing
        language and progress percentage.
-    """
-    file_paths = glob.glob(os.path.join(files_directory, "messages_*.properties"))
-    num_lines = sum(1 for _ in open(default_file_path, encoding="utf-8"))
+    """  # noqa: D205
+    num_lines = sum(
+        1 for line in open(default_file_path, encoding="utf-8") if line.strip() and not line.strip().startswith("#")
+    )

    result_list = []
+    sort_translation_status: tomlkit.TOMLDocument
+
+    # read toml
+    with open(translation_status_file, encoding="utf-8") as f:
+        sort_translation_status = tomlkit.parse(f.read())

    for file_path in file_paths:
-        language = (
-            os.path.basename(file_path)
-            .split("messages_", 1)[1]
-            .split(".properties", 1)[0]
-        )
+        language = os.path.basename(file_path).split("messages_", 1)[1].split(".properties", 1)[0]

        fails = 0
        if "en_GB" in language or "en_US" in language:
@ -81,9 +111,21 @@ def compare_files(default_file_path, files_directory) -> List[Tuple[str, int]]:
            result_list.append(("en_US", 100))
            continue

-        with open(default_file_path, "r", encoding="utf-8") as default_file, open(
-            file_path, "r", encoding="utf-8"
-        ) as file:
+        if language not in sort_translation_status:
+            sort_translation_status[language] = tomlkit.table()
+
+        if (
+            "ignore" not in sort_translation_status[language]
+            or len(sort_translation_status[language].get("ignore", [])) < 1
+        ):
+            sort_translation_status[language]["ignore"] = tomlkit.array(["language.direction"])
+
+        # if "missing" not in sort_translation_status[language]:
+        #     sort_translation_status[language]["missing"] = tomlkit.array()
+        # elif "language.direction" in sort_translation_status[language]["missing"]:
+        #     sort_translation_status[language]["missing"].remove("language.direction")
+
+        with open(default_file_path, encoding="utf-8") as default_file, open(file_path, encoding="utf-8") as file:
            for _ in range(5):
                next(default_file)
                try:
@ -91,24 +133,47 @@ def compare_files(default_file_path, files_directory) -> List[Tuple[str, int]]:
                except StopIteration:
                    fails = num_lines

-            for _, (line_default, line_file) in enumerate(
-                zip(default_file, file), start=6
-            ):
+            for line_num, (line_default, line_file) in enumerate(zip(default_file, file), start=6):
                try:
+                    # Ignoring empty lines and lines start with #
+                    if line_default.strip() == "" or line_default.startswith("#"):
+                        continue
+
+                    default_key, default_value = line_default.split("=", 1)
+                    file_key, file_value = line_file.split("=", 1)
                    if (
-                        line_default.split("=", 1)[1].strip()
-                        == line_file.split("=", 1)[1].strip()
+                        default_value.strip() == file_value.strip()
+                        and default_key.strip() not in sort_translation_status[language]["ignore"]
                    ):
+                        print(f"{language}: Line {line_num} is missing the translation.")
+                        # if default_key.strip() not in sort_translation_status[language]["missing"]:
+                        #     missing_array = tomlkit.array()
+                        #     missing_array.append(default_key.strip())
+                        #     missing_array.multiline(True)
+                        #     sort_translation_status[language]["missing"].extend(missing_array)
                        fails += 1
+                    # elif default_key.strip() in sort_translation_status[language]["ignore"]:
+                    #     if default_key.strip() in sort_translation_status[language]["missing"]:
+                    #         sort_translation_status[language]["missing"].remove(default_key.strip())
+                    if default_value.strip() != file_value.strip():
+                        # if default_key.strip() in sort_translation_status[language]["missing"]:
+                        #     sort_translation_status[language]["missing"].remove(default_key.strip())
+                        if default_key.strip() in sort_translation_status[language]["ignore"]:
+                            sort_translation_status[language]["ignore"].remove(default_key.strip())
+
                except IndexError:
                    pass

+        print(f"{language}: {fails} out of {num_lines} lines are not translated.")
        result_list.append(
            (
                language,
                int((num_lines - fails) * 100 / num_lines),
            )
        )
+    translation_status = convert_to_multiline(sort_translation_status)
+    with open(translation_status_file, "w", encoding="utf-8") as file:
+        file.write(tomlkit.dumps(translation_status))

    unique_data = list(set(result_list))
    unique_data.sort(key=lambda x: x[1], reverse=True)
@ -118,5 +183,10 @@ def compare_files(default_file_path, files_directory) -> List[Tuple[str, int]]:

 if __name__ == "__main__":
    directory = os.path.join(os.getcwd(), "src", "main", "resources")
+    messages_file_paths = glob.glob(os.path.join(directory, "messages_*.properties"))
    reference_file = os.path.join(directory, "messages_en_GB.properties")
-    write_readme(compare_files(reference_file, directory))
+
+    scripts_directory = os.path.join(os.getcwd(), "scripts")
+    translation_state_file = os.path.join(scripts_directory, "translation_status.toml")
+
+    write_readme(compare_files(reference_file, messages_file_paths, translation_state_file))
--- a/scripts/translation_status.toml
+++ b/scripts/translation_status.toml
@ -0,0 +1,154 @@
+[ar_AR]
+ignore = [
+    'language.direction',
+]
+
+[bg_BG]
+ignore = [
+    'language.direction',
+]
+
+[ca_CA]
+ignore = [
+    'language.direction',
+]
+
+[de_DE]
+ignore = [
+    'AddStampRequest.alphabet',
+    'AddStampRequest.position',
+    'PDFToBook.selectText.1',
+    'PDFToText.tags',
+    'addPageNumbers.selectText.3',
+    'alphabet',
+    'certSign.name',
+    'language.direction',
+    'licenses.version',
+    'pipeline.title',
+    'pipelineOptions.pipelineHeader',
+    'sponsor',
+    'text',
+    'watermark.type.1',
+]
+
+[el_GR]
+ignore = [
+    'language.direction',
+]
+
+[es_ES]
+ignore = [
+    'adminUserSettings.roles',
+    'color',
+    'language.direction',
+    'no',
+    'showJS.tags',
+]
+
+[eu_ES]
+ignore = [
+    'language.direction',
+]
+
+[fr_FR]
+ignore = [
+    'language.direction',
+]
+
+[hi_IN]
+ignore = [
+    'language.direction',
+]
+
+[hu_HU]
+ignore = [
+    'language.direction',
+]
+
+[id_ID]
+ignore = [
+    'language.direction',
+]
+
+[it_IT]
+ignore = [
+    'font',
+    'language.direction',
+    'no',
+    'password',
+    'pipeline.title',
+    'pipelineOptions.pipelineHeader',
+    'removePassword.selectText.2',
+    'showJS.tags',
+    'sponsor',
+]
+
+[ja_JP]
+ignore = [
+    'language.direction',
+]
+
+[ko_KR]
+ignore = [
+    'language.direction',
+]
+
+[nl_NL]
+ignore = [
+    'language.direction',
+]
+
+[pl_PL]
+ignore = [
+    'language.direction',
+]
+
+[pt_BR]
+ignore = [
+    'language.direction',
+]
+
+[pt_PT]
+ignore = [
+    'language.direction',
+]
+
+[ro_RO]
+ignore = [
+    'language.direction',
+]
+
+[ru_RU]
+ignore = [
+    'language.direction',
+]
+
+[sr_LATN_RS]
+ignore = [
+    'language.direction',
+]
+
+[sv_SE]
+ignore = [
+    'language.direction',
+]
+
+[tr_TR]
+ignore = [
+    'language.direction',
+]
+
+[uk_UA]
+ignore = [
+    'language.direction',
+]
+
+[zh_CN]
+ignore = [
+    'language.direction',
+]
+
+[zh_TW]
+ignore = [
+    'language.direction',
+]