Handling Untranslatable Strings (#1133)
This commit is contained in:
parent
4fea8d10f8
commit
318076254d
4 changed files with 279 additions and 39 deletions
3
.github/workflows/sync_files.yml
vendored
3
.github/workflows/sync_files.yml
vendored
|
@ -7,6 +7,7 @@ on:
|
||||||
paths:
|
paths:
|
||||||
- "build.gradle"
|
- "build.gradle"
|
||||||
- "src/main/resources/messages_*.properties"
|
- "src/main/resources/messages_*.properties"
|
||||||
|
- "scripts/translation_status.toml"
|
||||||
|
|
||||||
permissions:
|
permissions:
|
||||||
contents: write
|
contents: write
|
||||||
|
@ -58,6 +59,8 @@ jobs:
|
||||||
uses: actions/setup-python@v5.1.0
|
uses: actions/setup-python@v5.1.0
|
||||||
with:
|
with:
|
||||||
python-version: "3.x"
|
python-version: "3.x"
|
||||||
|
- name: Install dependencies
|
||||||
|
run: pip install tomlkit
|
||||||
- name: Sync README
|
- name: Sync README
|
||||||
run: python scripts/counter_translation.py
|
run: python scripts/counter_translation.py
|
||||||
- name: Set up git config
|
- name: Set up git config
|
||||||
|
|
|
@ -34,5 +34,18 @@ Then simply translate all property entries within that file and make a PR into m
|
||||||
|
|
||||||
If you do not have a java IDE i am happy to verify the changes worked once you raise PR (but won't be able to verify the translations themselves)
|
If you do not have a java IDE i am happy to verify the changes worked once you raise PR (but won't be able to verify the translations themselves)
|
||||||
|
|
||||||
|
## Handling Untranslatable Strings
|
||||||
|
|
||||||
|
Sometimes, certain strings in the properties file may not require translation because they are the same in the target language or are universal (like names of protocols, certain terminologies, etc.). To ensure accurate statistics for language progress, these strings should be added to the `ignore_translation.toml` file located in the `scripts` directory. This will exclude them from the translation progress calculations.
|
||||||
|
|
||||||
|
For example, if the English string error=Error does not need translation in Polish, add it to the ignore_translation.toml under the Polish section:
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[pl_PL]
|
||||||
|
ignore = [
|
||||||
|
"language.direction", # Existing entries
|
||||||
|
"error" # Add new entries here
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
Make sure to place the entry under the correct language section. This helps maintain the accuracy of translation progress statistics and ensures that the translation tool or scripts do not misinterpret the completion rate.
|
||||||
|
|
|
@ -10,49 +10,77 @@ Author: Ludy87
|
||||||
Example:
|
Example:
|
||||||
To use this script, simply run it from command line:
|
To use this script, simply run it from command line:
|
||||||
$ python counter_translation.py
|
$ python counter_translation.py
|
||||||
"""
|
""" # noqa: D205
|
||||||
import os
|
|
||||||
import glob
|
import glob
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
from typing import List, Tuple
|
|
||||||
|
import tomlkit
|
||||||
|
import tomlkit.toml_file
|
||||||
|
|
||||||
|
|
||||||
def write_readme(progress_list: List[Tuple[str, int]]) -> None:
|
def convert_to_multiline(data: tomlkit.TOMLDocument) -> tomlkit.TOMLDocument:
|
||||||
"""
|
"""Converts 'ignore' and 'missing' arrays to multiline arrays and sorts the first-level keys of the TOML document.
|
||||||
Updates the progress status in the README.md file based
|
Enhances readability and consistency in the TOML file by ensuring arrays contain unique and sorted entries.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
data (tomlkit.TOMLDocument): The original TOML document containing the data.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tomlkit.TOMLDocument: A new TOML document with sorted keys and properly formatted arrays.
|
||||||
|
""" # noqa: D205
|
||||||
|
sorted_data = tomlkit.document()
|
||||||
|
for key in sorted(data.keys()):
|
||||||
|
value = data[key]
|
||||||
|
if isinstance(value, dict):
|
||||||
|
new_table = tomlkit.table()
|
||||||
|
for subkey in ("ignore", "missing"):
|
||||||
|
if subkey in value:
|
||||||
|
# Convert the list to a set to remove duplicates, sort it, and convert to multiline for readability
|
||||||
|
unique_sorted_array = sorted(set(value[subkey]))
|
||||||
|
array = tomlkit.array()
|
||||||
|
array.multiline(True)
|
||||||
|
for item in unique_sorted_array:
|
||||||
|
array.append(item)
|
||||||
|
new_table[subkey] = array
|
||||||
|
sorted_data[key] = new_table
|
||||||
|
else:
|
||||||
|
# Add other types of data unchanged
|
||||||
|
sorted_data[key] = value
|
||||||
|
return sorted_data
|
||||||
|
|
||||||
|
|
||||||
|
def write_readme(progress_list: list[tuple[str, int]]) -> None:
|
||||||
|
"""Updates the progress status in the README.md file based
|
||||||
on the provided progress list.
|
on the provided progress list.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
progress_list (List[Tuple[str, int]]): A list of tuples containing
|
progress_list (list[tuple[str, int]]): A list of tuples containing
|
||||||
language and progress percentage.
|
language and progress percentage.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
None
|
None
|
||||||
"""
|
""" # noqa: D205
|
||||||
with open("README.md", "r", encoding="utf-8") as file:
|
with open("README.md", encoding="utf-8") as file:
|
||||||
content = file.read()
|
content = file.readlines()
|
||||||
|
|
||||||
lines = content.split("\n")
|
for i, line in enumerate(content[2:], start=2):
|
||||||
for i, line in enumerate(lines[2:], start=2):
|
|
||||||
for progress in progress_list:
|
for progress in progress_list:
|
||||||
language, value = progress
|
language, value = progress
|
||||||
if language in line:
|
if language in line:
|
||||||
match = re.search(r"\!\[(\d+(\.\d+)?)%\]\(.*\)", line)
|
if match := re.search(r"\!\[(\d+(\.\d+)?)%\]\(.*\)", line):
|
||||||
if match:
|
content[i] = line.replace(
|
||||||
lines[i] = line.replace(
|
|
||||||
match.group(0),
|
match.group(0),
|
||||||
f"![{value}%](https://geps.dev/progress/{value})",
|
f"![{value}%](https://geps.dev/progress/{value})",
|
||||||
)
|
)
|
||||||
|
|
||||||
new_content = "\n".join(lines)
|
|
||||||
|
|
||||||
with open("README.md", "w", encoding="utf-8") as file:
|
with open("README.md", "w", encoding="utf-8") as file:
|
||||||
file.write(new_content)
|
file.writelines(content)
|
||||||
|
|
||||||
|
|
||||||
def compare_files(default_file_path, files_directory) -> List[Tuple[str, int]]:
|
def compare_files(default_file_path, file_paths, translation_status_file) -> list[tuple[str, int]]:
|
||||||
"""
|
"""Compares the default properties file with other
|
||||||
Compares the default properties file with other
|
|
||||||
properties files in the directory.
|
properties files in the directory.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
|
@ -60,20 +88,22 @@ def compare_files(default_file_path, files_directory) -> List[Tuple[str, int]]:
|
||||||
files_directory (str): The directory containing other properties files.
|
files_directory (str): The directory containing other properties files.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List[Tuple[str, int]]: A list of tuples containing
|
list[tuple[str, int]]: A list of tuples containing
|
||||||
language and progress percentage.
|
language and progress percentage.
|
||||||
"""
|
""" # noqa: D205
|
||||||
file_paths = glob.glob(os.path.join(files_directory, "messages_*.properties"))
|
num_lines = sum(
|
||||||
num_lines = sum(1 for _ in open(default_file_path, encoding="utf-8"))
|
1 for line in open(default_file_path, encoding="utf-8") if line.strip() and not line.strip().startswith("#")
|
||||||
|
)
|
||||||
|
|
||||||
result_list = []
|
result_list = []
|
||||||
|
sort_translation_status: tomlkit.TOMLDocument
|
||||||
|
|
||||||
|
# read toml
|
||||||
|
with open(translation_status_file, encoding="utf-8") as f:
|
||||||
|
sort_translation_status = tomlkit.parse(f.read())
|
||||||
|
|
||||||
for file_path in file_paths:
|
for file_path in file_paths:
|
||||||
language = (
|
language = os.path.basename(file_path).split("messages_", 1)[1].split(".properties", 1)[0]
|
||||||
os.path.basename(file_path)
|
|
||||||
.split("messages_", 1)[1]
|
|
||||||
.split(".properties", 1)[0]
|
|
||||||
)
|
|
||||||
|
|
||||||
fails = 0
|
fails = 0
|
||||||
if "en_GB" in language or "en_US" in language:
|
if "en_GB" in language or "en_US" in language:
|
||||||
|
@ -81,9 +111,21 @@ def compare_files(default_file_path, files_directory) -> List[Tuple[str, int]]:
|
||||||
result_list.append(("en_US", 100))
|
result_list.append(("en_US", 100))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
with open(default_file_path, "r", encoding="utf-8") as default_file, open(
|
if language not in sort_translation_status:
|
||||||
file_path, "r", encoding="utf-8"
|
sort_translation_status[language] = tomlkit.table()
|
||||||
) as file:
|
|
||||||
|
if (
|
||||||
|
"ignore" not in sort_translation_status[language]
|
||||||
|
or len(sort_translation_status[language].get("ignore", [])) < 1
|
||||||
|
):
|
||||||
|
sort_translation_status[language]["ignore"] = tomlkit.array(["language.direction"])
|
||||||
|
|
||||||
|
# if "missing" not in sort_translation_status[language]:
|
||||||
|
# sort_translation_status[language]["missing"] = tomlkit.array()
|
||||||
|
# elif "language.direction" in sort_translation_status[language]["missing"]:
|
||||||
|
# sort_translation_status[language]["missing"].remove("language.direction")
|
||||||
|
|
||||||
|
with open(default_file_path, encoding="utf-8") as default_file, open(file_path, encoding="utf-8") as file:
|
||||||
for _ in range(5):
|
for _ in range(5):
|
||||||
next(default_file)
|
next(default_file)
|
||||||
try:
|
try:
|
||||||
|
@ -91,24 +133,47 @@ def compare_files(default_file_path, files_directory) -> List[Tuple[str, int]]:
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
fails = num_lines
|
fails = num_lines
|
||||||
|
|
||||||
for _, (line_default, line_file) in enumerate(
|
for line_num, (line_default, line_file) in enumerate(zip(default_file, file), start=6):
|
||||||
zip(default_file, file), start=6
|
|
||||||
):
|
|
||||||
try:
|
try:
|
||||||
|
# Ignoring empty lines and lines start with #
|
||||||
|
if line_default.strip() == "" or line_default.startswith("#"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
default_key, default_value = line_default.split("=", 1)
|
||||||
|
file_key, file_value = line_file.split("=", 1)
|
||||||
if (
|
if (
|
||||||
line_default.split("=", 1)[1].strip()
|
default_value.strip() == file_value.strip()
|
||||||
== line_file.split("=", 1)[1].strip()
|
and default_key.strip() not in sort_translation_status[language]["ignore"]
|
||||||
):
|
):
|
||||||
|
print(f"{language}: Line {line_num} is missing the translation.")
|
||||||
|
# if default_key.strip() not in sort_translation_status[language]["missing"]:
|
||||||
|
# missing_array = tomlkit.array()
|
||||||
|
# missing_array.append(default_key.strip())
|
||||||
|
# missing_array.multiline(True)
|
||||||
|
# sort_translation_status[language]["missing"].extend(missing_array)
|
||||||
fails += 1
|
fails += 1
|
||||||
|
# elif default_key.strip() in sort_translation_status[language]["ignore"]:
|
||||||
|
# if default_key.strip() in sort_translation_status[language]["missing"]:
|
||||||
|
# sort_translation_status[language]["missing"].remove(default_key.strip())
|
||||||
|
if default_value.strip() != file_value.strip():
|
||||||
|
# if default_key.strip() in sort_translation_status[language]["missing"]:
|
||||||
|
# sort_translation_status[language]["missing"].remove(default_key.strip())
|
||||||
|
if default_key.strip() in sort_translation_status[language]["ignore"]:
|
||||||
|
sort_translation_status[language]["ignore"].remove(default_key.strip())
|
||||||
|
|
||||||
except IndexError:
|
except IndexError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
print(f"{language}: {fails} out of {num_lines} lines are not translated.")
|
||||||
result_list.append(
|
result_list.append(
|
||||||
(
|
(
|
||||||
language,
|
language,
|
||||||
int((num_lines - fails) * 100 / num_lines),
|
int((num_lines - fails) * 100 / num_lines),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
translation_status = convert_to_multiline(sort_translation_status)
|
||||||
|
with open(translation_status_file, "w", encoding="utf-8") as file:
|
||||||
|
file.write(tomlkit.dumps(translation_status))
|
||||||
|
|
||||||
unique_data = list(set(result_list))
|
unique_data = list(set(result_list))
|
||||||
unique_data.sort(key=lambda x: x[1], reverse=True)
|
unique_data.sort(key=lambda x: x[1], reverse=True)
|
||||||
|
@ -118,5 +183,10 @@ def compare_files(default_file_path, files_directory) -> List[Tuple[str, int]]:
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
directory = os.path.join(os.getcwd(), "src", "main", "resources")
|
directory = os.path.join(os.getcwd(), "src", "main", "resources")
|
||||||
|
messages_file_paths = glob.glob(os.path.join(directory, "messages_*.properties"))
|
||||||
reference_file = os.path.join(directory, "messages_en_GB.properties")
|
reference_file = os.path.join(directory, "messages_en_GB.properties")
|
||||||
write_readme(compare_files(reference_file, directory))
|
|
||||||
|
scripts_directory = os.path.join(os.getcwd(), "scripts")
|
||||||
|
translation_state_file = os.path.join(scripts_directory, "translation_status.toml")
|
||||||
|
|
||||||
|
write_readme(compare_files(reference_file, messages_file_paths, translation_state_file))
|
||||||
|
|
154
scripts/translation_status.toml
Normal file
154
scripts/translation_status.toml
Normal file
|
@ -0,0 +1,154 @@
|
||||||
|
[ar_AR]
|
||||||
|
ignore = [
|
||||||
|
'language.direction',
|
||||||
|
]
|
||||||
|
|
||||||
|
[bg_BG]
|
||||||
|
ignore = [
|
||||||
|
'language.direction',
|
||||||
|
]
|
||||||
|
|
||||||
|
[ca_CA]
|
||||||
|
ignore = [
|
||||||
|
'language.direction',
|
||||||
|
]
|
||||||
|
|
||||||
|
[de_DE]
|
||||||
|
ignore = [
|
||||||
|
'AddStampRequest.alphabet',
|
||||||
|
'AddStampRequest.position',
|
||||||
|
'PDFToBook.selectText.1',
|
||||||
|
'PDFToText.tags',
|
||||||
|
'addPageNumbers.selectText.3',
|
||||||
|
'alphabet',
|
||||||
|
'certSign.name',
|
||||||
|
'language.direction',
|
||||||
|
'licenses.version',
|
||||||
|
'pipeline.title',
|
||||||
|
'pipelineOptions.pipelineHeader',
|
||||||
|
'sponsor',
|
||||||
|
'text',
|
||||||
|
'watermark.type.1',
|
||||||
|
]
|
||||||
|
|
||||||
|
[el_GR]
|
||||||
|
ignore = [
|
||||||
|
'language.direction',
|
||||||
|
]
|
||||||
|
|
||||||
|
[es_ES]
|
||||||
|
ignore = [
|
||||||
|
'adminUserSettings.roles',
|
||||||
|
'color',
|
||||||
|
'language.direction',
|
||||||
|
'no',
|
||||||
|
'showJS.tags',
|
||||||
|
]
|
||||||
|
|
||||||
|
[eu_ES]
|
||||||
|
ignore = [
|
||||||
|
'language.direction',
|
||||||
|
]
|
||||||
|
|
||||||
|
[fr_FR]
|
||||||
|
ignore = [
|
||||||
|
'language.direction',
|
||||||
|
]
|
||||||
|
|
||||||
|
[hi_IN]
|
||||||
|
ignore = [
|
||||||
|
'language.direction',
|
||||||
|
]
|
||||||
|
|
||||||
|
[hu_HU]
|
||||||
|
ignore = [
|
||||||
|
'language.direction',
|
||||||
|
]
|
||||||
|
|
||||||
|
[id_ID]
|
||||||
|
ignore = [
|
||||||
|
'language.direction',
|
||||||
|
]
|
||||||
|
|
||||||
|
[it_IT]
|
||||||
|
ignore = [
|
||||||
|
'font',
|
||||||
|
'language.direction',
|
||||||
|
'no',
|
||||||
|
'password',
|
||||||
|
'pipeline.title',
|
||||||
|
'pipelineOptions.pipelineHeader',
|
||||||
|
'removePassword.selectText.2',
|
||||||
|
'showJS.tags',
|
||||||
|
'sponsor',
|
||||||
|
]
|
||||||
|
|
||||||
|
[ja_JP]
|
||||||
|
ignore = [
|
||||||
|
'language.direction',
|
||||||
|
]
|
||||||
|
|
||||||
|
[ko_KR]
|
||||||
|
ignore = [
|
||||||
|
'language.direction',
|
||||||
|
]
|
||||||
|
|
||||||
|
[nl_NL]
|
||||||
|
ignore = [
|
||||||
|
'language.direction',
|
||||||
|
]
|
||||||
|
|
||||||
|
[pl_PL]
|
||||||
|
ignore = [
|
||||||
|
'language.direction',
|
||||||
|
]
|
||||||
|
|
||||||
|
[pt_BR]
|
||||||
|
ignore = [
|
||||||
|
'language.direction',
|
||||||
|
]
|
||||||
|
|
||||||
|
[pt_PT]
|
||||||
|
ignore = [
|
||||||
|
'language.direction',
|
||||||
|
]
|
||||||
|
|
||||||
|
[ro_RO]
|
||||||
|
ignore = [
|
||||||
|
'language.direction',
|
||||||
|
]
|
||||||
|
|
||||||
|
[ru_RU]
|
||||||
|
ignore = [
|
||||||
|
'language.direction',
|
||||||
|
]
|
||||||
|
|
||||||
|
[sr_LATN_RS]
|
||||||
|
ignore = [
|
||||||
|
'language.direction',
|
||||||
|
]
|
||||||
|
|
||||||
|
[sv_SE]
|
||||||
|
ignore = [
|
||||||
|
'language.direction',
|
||||||
|
]
|
||||||
|
|
||||||
|
[tr_TR]
|
||||||
|
ignore = [
|
||||||
|
'language.direction',
|
||||||
|
]
|
||||||
|
|
||||||
|
[uk_UA]
|
||||||
|
ignore = [
|
||||||
|
'language.direction',
|
||||||
|
]
|
||||||
|
|
||||||
|
[zh_CN]
|
||||||
|
ignore = [
|
||||||
|
'language.direction',
|
||||||
|
]
|
||||||
|
|
||||||
|
[zh_TW]
|
||||||
|
ignore = [
|
||||||
|
'language.direction',
|
||||||
|
]
|
Loading…
Reference in a new issue