Handling Untranslatable Strings (#1133)

This commit is contained in:
Ludy 2024-04-28 00:26:12 +02:00 committed by GitHub
parent 4fea8d10f8
commit 318076254d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 279 additions and 39 deletions

View file

@ -7,6 +7,7 @@ on:
paths:
- "build.gradle"
- "src/main/resources/messages_*.properties"
- "scripts/translation_status.toml"
permissions:
contents: write
@ -58,6 +59,8 @@ jobs:
uses: actions/setup-python@v5.1.0
with:
python-version: "3.x"
- name: Install dependencies
run: pip install tomlkit
- name: Sync README
run: python scripts/counter_translation.py
- name: Set up git config

View file

@ -34,5 +34,18 @@ Then simply translate all property entries within that file and make a PR into m
If you do not have a java IDE i am happy to verify the changes worked once you raise PR (but won't be able to verify the translations themselves)
## Handling Untranslatable Strings
Sometimes, certain strings in the properties file may not require translation because they are the same in the target language or are universal (like names of protocols, certain terminologies, etc.). To ensure accurate statistics for language progress, these strings should be added to the `ignore_translation.toml` file located in the `scripts` directory. This will exclude them from the translation progress calculations.
For example, if the English string error=Error does not need translation in Polish, add it to the ignore_translation.toml under the Polish section:
```toml
[pl_PL]
ignore = [
"language.direction", # Existing entries
"error" # Add new entries here
]
```
Make sure to place the entry under the correct language section. This helps maintain the accuracy of translation progress statistics and ensures that the translation tool or scripts do not misinterpret the completion rate.

View file

@ -10,49 +10,77 @@ Author: Ludy87
Example:
To use this script, simply run it from command line:
$ python counter_translation.py
"""
import os
""" # noqa: D205
import glob
import os
import re
from typing import List, Tuple
import tomlkit
import tomlkit.toml_file
def write_readme(progress_list: List[Tuple[str, int]]) -> None:
"""
Updates the progress status in the README.md file based
def convert_to_multiline(data: tomlkit.TOMLDocument) -> tomlkit.TOMLDocument:
"""Converts 'ignore' and 'missing' arrays to multiline arrays and sorts the first-level keys of the TOML document.
Enhances readability and consistency in the TOML file by ensuring arrays contain unique and sorted entries.
Parameters:
data (tomlkit.TOMLDocument): The original TOML document containing the data.
Returns:
tomlkit.TOMLDocument: A new TOML document with sorted keys and properly formatted arrays.
""" # noqa: D205
sorted_data = tomlkit.document()
for key in sorted(data.keys()):
value = data[key]
if isinstance(value, dict):
new_table = tomlkit.table()
for subkey in ("ignore", "missing"):
if subkey in value:
# Convert the list to a set to remove duplicates, sort it, and convert to multiline for readability
unique_sorted_array = sorted(set(value[subkey]))
array = tomlkit.array()
array.multiline(True)
for item in unique_sorted_array:
array.append(item)
new_table[subkey] = array
sorted_data[key] = new_table
else:
# Add other types of data unchanged
sorted_data[key] = value
return sorted_data
def write_readme(progress_list: list[tuple[str, int]]) -> None:
"""Updates the progress status in the README.md file based
on the provided progress list.
Parameters:
progress_list (List[Tuple[str, int]]): A list of tuples containing
progress_list (list[tuple[str, int]]): A list of tuples containing
language and progress percentage.
Returns:
None
"""
with open("README.md", "r", encoding="utf-8") as file:
content = file.read()
""" # noqa: D205
with open("README.md", encoding="utf-8") as file:
content = file.readlines()
lines = content.split("\n")
for i, line in enumerate(lines[2:], start=2):
for i, line in enumerate(content[2:], start=2):
for progress in progress_list:
language, value = progress
if language in line:
match = re.search(r"\!\[(\d+(\.\d+)?)%\]\(.*\)", line)
if match:
lines[i] = line.replace(
if match := re.search(r"\!\[(\d+(\.\d+)?)%\]\(.*\)", line):
content[i] = line.replace(
match.group(0),
f"![{value}%](https://geps.dev/progress/{value})",
)
new_content = "\n".join(lines)
with open("README.md", "w", encoding="utf-8") as file:
file.write(new_content)
file.writelines(content)
def compare_files(default_file_path, files_directory) -> List[Tuple[str, int]]:
"""
Compares the default properties file with other
def compare_files(default_file_path, file_paths, translation_status_file) -> list[tuple[str, int]]:
"""Compares the default properties file with other
properties files in the directory.
Parameters:
@ -60,20 +88,22 @@ def compare_files(default_file_path, files_directory) -> List[Tuple[str, int]]:
files_directory (str): The directory containing other properties files.
Returns:
List[Tuple[str, int]]: A list of tuples containing
list[tuple[str, int]]: A list of tuples containing
language and progress percentage.
"""
file_paths = glob.glob(os.path.join(files_directory, "messages_*.properties"))
num_lines = sum(1 for _ in open(default_file_path, encoding="utf-8"))
""" # noqa: D205
num_lines = sum(
1 for line in open(default_file_path, encoding="utf-8") if line.strip() and not line.strip().startswith("#")
)
result_list = []
sort_translation_status: tomlkit.TOMLDocument
# read toml
with open(translation_status_file, encoding="utf-8") as f:
sort_translation_status = tomlkit.parse(f.read())
for file_path in file_paths:
language = (
os.path.basename(file_path)
.split("messages_", 1)[1]
.split(".properties", 1)[0]
)
language = os.path.basename(file_path).split("messages_", 1)[1].split(".properties", 1)[0]
fails = 0
if "en_GB" in language or "en_US" in language:
@ -81,9 +111,21 @@ def compare_files(default_file_path, files_directory) -> List[Tuple[str, int]]:
result_list.append(("en_US", 100))
continue
with open(default_file_path, "r", encoding="utf-8") as default_file, open(
file_path, "r", encoding="utf-8"
) as file:
if language not in sort_translation_status:
sort_translation_status[language] = tomlkit.table()
if (
"ignore" not in sort_translation_status[language]
or len(sort_translation_status[language].get("ignore", [])) < 1
):
sort_translation_status[language]["ignore"] = tomlkit.array(["language.direction"])
# if "missing" not in sort_translation_status[language]:
# sort_translation_status[language]["missing"] = tomlkit.array()
# elif "language.direction" in sort_translation_status[language]["missing"]:
# sort_translation_status[language]["missing"].remove("language.direction")
with open(default_file_path, encoding="utf-8") as default_file, open(file_path, encoding="utf-8") as file:
for _ in range(5):
next(default_file)
try:
@ -91,24 +133,47 @@ def compare_files(default_file_path, files_directory) -> List[Tuple[str, int]]:
except StopIteration:
fails = num_lines
for _, (line_default, line_file) in enumerate(
zip(default_file, file), start=6
):
for line_num, (line_default, line_file) in enumerate(zip(default_file, file), start=6):
try:
# Ignoring empty lines and lines start with #
if line_default.strip() == "" or line_default.startswith("#"):
continue
default_key, default_value = line_default.split("=", 1)
file_key, file_value = line_file.split("=", 1)
if (
line_default.split("=", 1)[1].strip()
== line_file.split("=", 1)[1].strip()
default_value.strip() == file_value.strip()
and default_key.strip() not in sort_translation_status[language]["ignore"]
):
print(f"{language}: Line {line_num} is missing the translation.")
# if default_key.strip() not in sort_translation_status[language]["missing"]:
# missing_array = tomlkit.array()
# missing_array.append(default_key.strip())
# missing_array.multiline(True)
# sort_translation_status[language]["missing"].extend(missing_array)
fails += 1
# elif default_key.strip() in sort_translation_status[language]["ignore"]:
# if default_key.strip() in sort_translation_status[language]["missing"]:
# sort_translation_status[language]["missing"].remove(default_key.strip())
if default_value.strip() != file_value.strip():
# if default_key.strip() in sort_translation_status[language]["missing"]:
# sort_translation_status[language]["missing"].remove(default_key.strip())
if default_key.strip() in sort_translation_status[language]["ignore"]:
sort_translation_status[language]["ignore"].remove(default_key.strip())
except IndexError:
pass
print(f"{language}: {fails} out of {num_lines} lines are not translated.")
result_list.append(
(
language,
int((num_lines - fails) * 100 / num_lines),
)
)
translation_status = convert_to_multiline(sort_translation_status)
with open(translation_status_file, "w", encoding="utf-8") as file:
file.write(tomlkit.dumps(translation_status))
unique_data = list(set(result_list))
unique_data.sort(key=lambda x: x[1], reverse=True)
@ -118,5 +183,10 @@ def compare_files(default_file_path, files_directory) -> List[Tuple[str, int]]:
if __name__ == "__main__":
directory = os.path.join(os.getcwd(), "src", "main", "resources")
messages_file_paths = glob.glob(os.path.join(directory, "messages_*.properties"))
reference_file = os.path.join(directory, "messages_en_GB.properties")
write_readme(compare_files(reference_file, directory))
scripts_directory = os.path.join(os.getcwd(), "scripts")
translation_state_file = os.path.join(scripts_directory, "translation_status.toml")
write_readme(compare_files(reference_file, messages_file_paths, translation_state_file))

View file

@ -0,0 +1,154 @@
[ar_AR]
ignore = [
'language.direction',
]
[bg_BG]
ignore = [
'language.direction',
]
[ca_CA]
ignore = [
'language.direction',
]
[de_DE]
ignore = [
'AddStampRequest.alphabet',
'AddStampRequest.position',
'PDFToBook.selectText.1',
'PDFToText.tags',
'addPageNumbers.selectText.3',
'alphabet',
'certSign.name',
'language.direction',
'licenses.version',
'pipeline.title',
'pipelineOptions.pipelineHeader',
'sponsor',
'text',
'watermark.type.1',
]
[el_GR]
ignore = [
'language.direction',
]
[es_ES]
ignore = [
'adminUserSettings.roles',
'color',
'language.direction',
'no',
'showJS.tags',
]
[eu_ES]
ignore = [
'language.direction',
]
[fr_FR]
ignore = [
'language.direction',
]
[hi_IN]
ignore = [
'language.direction',
]
[hu_HU]
ignore = [
'language.direction',
]
[id_ID]
ignore = [
'language.direction',
]
[it_IT]
ignore = [
'font',
'language.direction',
'no',
'password',
'pipeline.title',
'pipelineOptions.pipelineHeader',
'removePassword.selectText.2',
'showJS.tags',
'sponsor',
]
[ja_JP]
ignore = [
'language.direction',
]
[ko_KR]
ignore = [
'language.direction',
]
[nl_NL]
ignore = [
'language.direction',
]
[pl_PL]
ignore = [
'language.direction',
]
[pt_BR]
ignore = [
'language.direction',
]
[pt_PT]
ignore = [
'language.direction',
]
[ro_RO]
ignore = [
'language.direction',
]
[ru_RU]
ignore = [
'language.direction',
]
[sr_LATN_RS]
ignore = [
'language.direction',
]
[sv_SE]
ignore = [
'language.direction',
]
[tr_TR]
ignore = [
'language.direction',
]
[uk_UA]
ignore = [
'language.direction',
]
[zh_CN]
ignore = [
'language.direction',
]
[zh_TW]
ignore = [
'language.direction',
]