yunohost/maintenance/autofix_locale_format.py
2024-05-21 23:16:55 +02:00

167 lines
5.1 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import re
import json
import glob
from collections import OrderedDict
ROOT = os.path.dirname(__file__) + "/../"
LOCALE_FOLDER = ROOT + "/locales/"
# List all locale files (except en.json being the ref)
TRANSLATION_FILES = glob.glob(LOCALE_FOLDER + "*.json")
TRANSLATION_FILES = [filename.split("/")[-1] for filename in TRANSLATION_FILES]
print(LOCALE_FOLDER)
TRANSLATION_FILES.remove("en.json")
REFERENCE_FILE = LOCALE_FOLDER + "en.json"
def autofix_i18n_placeholders():
def _autofix_i18n_placeholders(locale_file):
"""
This tries for magically fix mismatch between en.json format and other.json format
e.g. an i18n string with:
source: "Lorem ipsum {some_var}"
fr: "Lorem ipsum {une_variable}"
(ie the keyword in {} was translated but shouldnt have been)
"""
this_locale = json.loads(open(LOCALE_FOLDER + locale_file).read())
fixed_stuff = False
reference = json.loads(open(REFERENCE_FILE).read())
# We iterate over all keys/string in en.json
for key, string in reference.items():
# Ignore check if there's no translation yet for this key
if key not in this_locale:
continue
# Then we check that every "{stuff}" (for python's .format())
# should also be in the translated string, otherwise the .format
# will trigger an exception!
subkeys_in_ref = [k[0] for k in re.findall(r"{(\w+)(:\w)?}", string)]
subkeys_in_this_locale = [
k[0] for k in re.findall(r"{(\w+)(:\w)?}", this_locale[key])
]
if set(subkeys_in_ref) != set(subkeys_in_this_locale) and (
len(subkeys_in_ref) == len(subkeys_in_this_locale)
):
for i, subkey in enumerate(subkeys_in_ref):
this_locale[key] = this_locale[key].replace(
"{%s}" % subkeys_in_this_locale[i], "{%s}" % subkey
)
fixed_stuff = True
# Validate that now it's okay ?
subkeys_in_ref = [k[0] for k in re.findall(r"{(\w+)(:\w)?}", string)]
subkeys_in_this_locale = [
k[0] for k in re.findall(r"{(\w+)(:\w)?}", this_locale[key])
]
if any(k not in subkeys_in_ref for k in subkeys_in_this_locale):
raise Exception(
"""\n
==========================
Format inconsistency for string {key} in {locale_file}:"
en.json -> {string}
{locale_file} -> {translated_string}
Please fix it manually !
""".format(
key=key,
string=string.encode("utf-8"),
locale_file=locale_file,
translated_string=this_locale[key].encode("utf-8"),
)
)
if fixed_stuff:
json.dump(
this_locale,
open(LOCALE_FOLDER + locale_file, "w"),
indent=4,
ensure_ascii=False,
)
for locale_file in TRANSLATION_FILES:
_autofix_i18n_placeholders(locale_file)
def autofix_orthotypography_and_standardized_words():
def reformat(lang, transformations):
locale = open(f"{LOCALE_FOLDER}{lang}.json").read()
for pattern, replace in transformations.items():
locale = re.compile(pattern).sub(replace, locale)
open(f"{LOCALE_FOLDER}{lang}.json", "w").write(locale)
######################################################
godamn_spaces_of_hell = [
"\u00a0",
"\u2000",
"\u2001",
"\u2002",
"\u2003",
"\u2004",
"\u2005",
"\u2006",
"\u2007",
"\u2008",
"\u2009",
"\u200A",
# "\u202f",
# "\u202F",
"\u3000",
]
transformations = {s: " " for s in godamn_spaces_of_hell}
transformations.update(
{
r"\.\.\.": "",
"https ://": "https://",
}
)
reformat("en", transformations)
######################################################
transformations.update(
{
"courriel": "email",
"e-mail": "email",
"Courriel": "Email",
"E-mail": "Email",
"« ": "'",
"«": "'",
" »": "'",
"»": "'",
"": "'",
# r"$(\w{1,2})'|( \w{1,2})'": r"\1\2",
}
)
reformat("fr", transformations)
def remove_stale_translated_strings():
reference = json.loads(open(LOCALE_FOLDER + "en.json").read())
for locale_file in TRANSLATION_FILES:
print(locale_file)
this_locale = json.loads(
open(LOCALE_FOLDER + locale_file).read(), object_pairs_hook=OrderedDict
)
this_locale_fixed = {k: v for k, v in this_locale.items() if k in reference}
json.dump(
this_locale_fixed,
open(LOCALE_FOLDER + locale_file, "w"),
indent=4,
ensure_ascii=False,
)
autofix_orthotypography_and_standardized_words()
remove_stale_translated_strings()
autofix_i18n_placeholders()