From e585f5b82b47b8d8367844f3cb3aff5a82807a17 Mon Sep 17 00:00:00 2001 From: Sab Pyrope Date: Sun, 10 Nov 2024 14:55:18 +0800 Subject: [PATCH] Restruct translation template --- .../monsters/nightmares_and_chimeras.json | 2 +- lang/extract_json_strings.py | 27 ++-- lang/merge_po.sh | 4 +- lang/string_extractor/message.py | 1 + lang/string_extractor/pot_export.py | 119 ++++++++++-------- lang/string_extractor/write_text.py | 8 +- lang/update_pot.sh | 12 +- 7 files changed, 102 insertions(+), 71 deletions(-) diff --git a/data/mods/Xedra_Evolved/monsters/nightmares_and_chimeras.json b/data/mods/Xedra_Evolved/monsters/nightmares_and_chimeras.json index c831ee9ce654d..adc24b4a700a5 100644 --- a/data/mods/Xedra_Evolved/monsters/nightmares_and_chimeras.json +++ b/data/mods/Xedra_Evolved/monsters/nightmares_and_chimeras.json @@ -113,7 +113,7 @@ "copy-from": "mon_hologram", "looks_like": "player", "type": "MONSTER", - "name": { "str": "you" }, + "name": { "str_sp": "you" }, "description": "It is you. It looks like you, it is equipped like you, it is even has a weapon like you." } ] diff --git a/lang/extract_json_strings.py b/lang/extract_json_strings.py index bdb05fd13f81b..e57ea31ec65ab 100755 --- a/lang/extract_json_strings.py +++ b/lang/extract_json_strings.py @@ -4,9 +4,8 @@ from optparse import OptionParser from sys import exit, version_info - from string_extractor.parse import parse_json_file -from string_extractor.pot_export import write_to_pot +from string_extractor.pot_export import write_to_pot, sanitize parser = OptionParser() @@ -14,9 +13,9 @@ action="append", type="str", help="include directories") parser.add_option("-n", "--name", dest="name", help="POT package name") -parser.add_option("-o", "--output", dest="output", help="output file path") parser.add_option("-r", "--reference", dest="reference", - help="reference POT for plural collision avoidance") + help="reference POT for plural collision avoidance, " + "also strings from JSON are appended to this file") parser.add_option("-v", "--verbose", dest="verbose", help="be verbose") parser.add_option("-X", "--exclude", dest="exclude", action="append", type="str", @@ -34,12 +33,13 @@ print("Requires Python 3.7 or higher.") exit(1) -if not options.output: - print("Have to specify output file path.") +if not options.reference: + print("Have to specify reference file path.") exit(1) if not options.include_dir: print("Have to specify at least one search path.") + exit(1) include_dir = [os.path.normpath(i) for i in options.include_dir] @@ -61,6 +61,11 @@ def extract_all_from_dir(json_dir): skiplist = [os.path.normpath(".gitkeep")] for f in allfiles: full_name = os.path.join(json_dir, f) + if full_name in [i for i in include_dir if i != json_dir]: + # Skip other included directories; + # They will be extracted later and appended to + # the end of the shared list of strings; + continue if os.path.isdir(full_name): dirs.append(f) elif f in skiplist or full_name in exclude: @@ -74,13 +79,13 @@ def extract_all_from_dir(json_dir): def main(): - for i in sorted(include_dir): + for i in include_dir: extract_all_from_dir(i) - with open(options.output, mode="w", encoding="utf-8") as fp: - write_to_pot(fp, True, options.name, - sanitize=options.reference, - obsolete_paths=obsolete_paths) + sanitize(options.reference, options.name) + + with open(options.reference, mode="a", encoding="utf-8") as fp: + write_to_pot(fp, obsolete_paths=obsolete_paths) main() diff --git a/lang/merge_po.sh b/lang/merge_po.sh index e981422b13205..2def64cff666d 100755 --- a/lang/merge_po.sh +++ b/lang/merge_po.sh @@ -18,7 +18,7 @@ function merge_lang if [ -f ${o} ] then echo "merging ${f}" - msgcat -F --use-first ${f} ${o} -o ${o} && rm ${f} + msgcat --use-first ${f} ${o} -o ${o} && rm ${f} else echo "importing ${f}" mv ${f} ${o} @@ -26,7 +26,7 @@ function merge_lang # merge lang/po/cataclysm-dda.pot with .po file echo "updating $o" - msgmerge --sort-by-file --no-fuzzy-matching $o lang/po/cataclysm-dda.pot | msgattrib --sort-by-file --no-obsolete -o $o + msgmerge --no-fuzzy-matching $o lang/po/cataclysm-dda.pot | msgattrib --clear-fuzzy --no-obsolete -o $o } # merge incoming translations for each language specified on the commandline diff --git a/lang/string_extractor/message.py b/lang/string_extractor/message.py index b6383a3f40324..c1909cdd9f7d7 100644 --- a/lang/string_extractor/message.py +++ b/lang/string_extractor/message.py @@ -9,6 +9,7 @@ class Message: context: str text: str text_plural: str + explicit_plural: bool messages = dict() diff --git a/lang/string_extractor/pot_export.py b/lang/string_extractor/pot_export.py index 58f979a15deb0..4df56dc1f4060 100644 --- a/lang/string_extractor/pot_export.py +++ b/lang/string_extractor/pot_export.py @@ -46,6 +46,56 @@ def process_comments(comments, origins, obsolete_paths): return result +def sanitize(reference, pkg_name="Cataclysm-DDA"): + if not os.path.isfile(reference): + raise Exception(f"Cannot read {reference}") + pofile = polib.pofile(reference) + + # sanitize plural entries + # Multiple objects may define slightly different plurals for strings, + # but without the specified context only one such string can be stored. + # Adds a plural form to those matching strings that do not have it. + for entry in pofile.untranslated_entries(): + pair = (entry.msgctxt if entry.msgctxt else "", entry.msgid) + if pair in messages: + # first, check the messages against the reference + for m in messages[pair]: + if not m.text_plural and entry.msgid_plural: + m.text_plural = entry.msgid_plural + break + # then check the reference against the messages + # prioritize plurals that are explicitly specified in JSON + temp_plural = "" + for m in messages[pair]: + if m.text_plural and not entry.msgid_plural: + entry.msgstr_plural = {0: "", 1: ""} + if m.explicit_plural: + entry.msgid_plural = m.text_plural + break + temp_plural = m.text_plural + if temp_plural: + entry.msgid_plural = temp_plural + + # write the correct header. + tzinfo = datetime.now(timezone.utc).astimezone().tzinfo + tztime = datetime.now(tzinfo).strftime('%Y-%m-%d %H:%M%z') + pofile.metadata = { + "Project-Id-Version": pkg_name, + "POT-Creation-Date": f"{tztime}", + "PO-Revision-Date": f"{tztime}", + "Last-Translator": "None", + "Language-Team": "None", + "Language": "en", + "MIME-Version": "1.0", + "Content-Type": "text/plain; charset=UTF-8", + "Content-Transfer-Encoding": "8bit", + "Plural-Forms": "nplurals=2; plural=(n > 1);" + } + pofile.metadata_is_fuzzy = 0 + + pofile.save() + + def is_unicode(sequence): hex = "0123456789abcdef" return sequence[0] == "\\" and sequence[1] == "u" and \ @@ -64,47 +114,12 @@ def restore_unicode(string): return string -def format_msg(prefix, text): - return "{0} {1}".format(prefix, restore_unicode(json.dumps(text))) +def format_msg(text): + return restore_unicode(json.dumps(text)) -def write_pot_header(fp, pkg_name="Cataclysm-DDA"): - tzinfo = datetime.now(timezone.utc).astimezone().tzinfo - time = datetime.now(tzinfo).strftime('%Y-%m-%d %H:%M%z') - print("msgid \"\"", file=fp) - print("msgstr \"\"", file=fp) - print("\"Project-Id-Version: {}\\n\"".format(pkg_name), file=fp) - print("\"POT-Creation-Date: {}\\n\"".format(time), file=fp) - print("\"PO-Revision-Date: {}\\n\"".format(time), file=fp) - print("\"Last-Translator: None\\n\"", file=fp) - print("\"Language-Team: None\\n\"", file=fp) - print("\"Language: en\\n\"", file=fp) - print("\"MIME-Version: 1.0\\n\"", file=fp) - print("\"Content-Type: text/plain; charset=UTF-8\\n\"", file=fp) - print("\"Content-Transfer-Encoding: 8bit\\n\"", file=fp) - print("\"Plural-Forms: nplurals=2; plural=(n > 1);\\n\"", file=fp) - print("", file=fp) - - -def sanitize_plural_colissions(reference): - if not os.path.isfile(reference): - raise Exception("cannot read {}".format(reference)) - pofile = polib.pofile(reference) - for entry in pofile.untranslated_entries(): - if entry.msgid_plural: - pair = (entry.msgctxt if entry.msgctxt else "", entry.msgid) - if pair in messages: - if len(messages[pair]) == 1: - if messages[pair][0].text_plural == "": - messages[pair][0].text_plural = entry.msgid_plural - - -def write_to_pot(fp, with_header=True, pkg_name=None, - sanitize=None, obsolete_paths=[]): - if sanitize: - sanitize_plural_colissions(sanitize) - if with_header: - write_pot_header(fp, pkg_name) +def write_to_pot(fp, obsolete_paths=[]): + entries = [] for (context, text) in occurrences: if (context, text) not in messages: continue @@ -120,32 +135,34 @@ def write_to_pot(fp, with_header=True, pkg_name=None, if message.text_plural: text_plural = message.text_plural origin = " ".join(sorted(origins)) + entry = [] # translator comments for line in process_comments(comments, origins, obsolete_paths): - print("#. ~ {}".format(line), file=fp) + entry.append(f"#. ~ {line}") # reference - print("#: {}".format(origin), file=fp) + entry.append(f"#: {origin}") # c-format if format_tag: - print("#, {}".format(format_tag), file=fp) + entry.append(f"#, {format_tag}") # context if context: - print("msgctxt \"{}\"".format(context), file=fp) + entry.append(f"msgctxt \"{context}\"") # text if text_plural: - print(format_msg("msgid", text), file=fp) - print(format_msg("msgid_plural", text_plural), file=fp) - print("msgstr[0] \"\"", file=fp) - print("msgstr[1] \"\"", file=fp) + entry.append(f"msgid {format_msg(text)}\n" + f"msgid_plural {format_msg(text_plural)}\n" + "msgstr[0] \"\"\n" + "msgstr[1] \"\"") else: - print(format_msg("msgid", text), file=fp) - print("msgstr \"\"", file=fp) - - print("", file=fp) + entry.append(f"msgid {format_msg(text)}\n" + "msgstr \"\"") + entries.append("\n".join(entry)) del messages[(context, text)] + + fp.write("\n\n".join(entries)) diff --git a/lang/string_extractor/write_text.py b/lang/string_extractor/write_text.py index a0277cc36d803..409b5d0aea27b 100644 --- a/lang/string_extractor/write_text.py +++ b/lang/string_extractor/write_text.py @@ -23,6 +23,8 @@ def write_text(json, origin, context="", comment="", comment: Translation comments in either string form or list of strings plural (bool): Whether the text should be pluralized c_format (bool): Whether the text contains C-style format string + explicit_plural (bool): Whether the plural is specified + explicitly in JSON """ if json is None or json == "": return @@ -30,6 +32,7 @@ def write_text(json, origin, context="", comment="", comments = append_comment([], comment) text = "" text_plural = "" + explicit_plural = False if type(json) is str: text = json @@ -48,8 +51,10 @@ def write_text(json, origin, context="", comment="", if "str_sp" in json: text = json["str_sp"] text_plural = json["str_sp"] + explicit_plural = True elif "str_pl" in json: text_plural = json["str_pl"] + explicit_plural = True else: text_plural = "{}s".format(text) @@ -67,7 +72,8 @@ def write_text(json, origin, context="", comment="", messages[(context, text)] = list() messages[(context, text)].append( - Message(comments, origin, format_tag, context, text, text_plural)) + Message(comments, origin, format_tag, context, + text, text_plural, explicit_plural)) occurrences.append((context, text)) diff --git a/lang/update_pot.sh b/lang/update_pot.sh index 21a47a3d5f999..cd77bf7ea2755 100755 --- a/lang/update_pot.sh +++ b/lang/update_pot.sh @@ -16,7 +16,7 @@ echo "> Extracting strings from C++ code" xgettext --default-domain="cataclysm-dda" \ --add-comments="~" \ --sort-by-file \ - --output="lang/po/gui.pot" \ + --output="lang/po/base.pot" \ --keyword="_" \ --keyword="pgettext:1c,2" \ --keyword="n_gettext:1,2" \ @@ -40,6 +40,8 @@ version=$(grep '^VERSION *= *' Makefile | tr -d [:space:] | cut -f 2 -d '=') echo "> Extracting strings from JSON" if ! lang/extract_json_strings.py \ -i data \ + -i data/json \ + -i data/mods \ -x data/mods/TEST_DATA \ -X data/json/furniture_and_terrain/terrain-regional-pseudo.json \ -X data/json/furniture_and_terrain/furniture-regional-pseudo.json \ @@ -50,19 +52,19 @@ if ! lang/extract_json_strings.py \ -D data/mods/BlazeIndustries \ -D data/mods/desert_region \ -n "$package $version" \ - -r lang/po/gui.pot \ - -o lang/po/json.pot + -r lang/po/base.pot then echo "Error in extracting strings from JSON. Aborting." exit 1 fi -echo "> Merging translation templates" -msgcat -o lang/po/cataclysm-dda.pot --use-first lang/po/json.pot lang/po/gui.pot +echo "> Unification of translation template" +msguniq -o lang/po/cataclysm-dda.pot lang/po/base.pot if [ ! -f lang/po/cataclysm-dda.pot ]; then echo "Error in merging translation templates. Aborting." exit 1 fi +sed -i "/^#\. #-#-#-#-# [a-zA-Z0-9(). -]*#-#-#-#-#$/d" lang/po/cataclysm-dda.pot # convert line endings to unix os="$(uname -s)"