CleverRaven · Night-Pryanik · Nov 21, 2024 · Nov 10, 2024
diff --git a/data/mods/Xedra_Evolved/monsters/nightmares_and_chimeras.json b/data/mods/Xedra_Evolved/monsters/nightmares_and_chimeras.json
@@ -113,7 +113,7 @@
     "copy-from": "mon_hologram",
     "looks_like": "player",
     "type": "MONSTER",
-    "name": { "str": "you" },
+    "name": { "str_sp": "you" },
     "description": "It is you.  It looks like you, it is equipped like you, it is even has a weapon like you."
   }
 ]
diff --git a/lang/extract_json_strings.py b/lang/extract_json_strings.py
@@ -4,19 +4,18 @@
 from optparse import OptionParser
 from sys import exit, version_info
 
-
 from string_extractor.parse import parse_json_file
-from string_extractor.pot_export import write_to_pot
+from string_extractor.pot_export import write_to_pot, sanitize
 
 
 parser = OptionParser()
 parser.add_option("-i", "--include_dir", dest="include_dir",
                   action="append", type="str",
                   help="include directories")
 parser.add_option("-n", "--name", dest="name", help="POT package name")
-parser.add_option("-o", "--output", dest="output", help="output file path")
 parser.add_option("-r", "--reference", dest="reference",
-                  help="reference POT for plural collision avoidance")
+                  help="reference POT for plural collision avoidance, "
+                  "also strings from JSON are appended to this file")
 parser.add_option("-v", "--verbose", dest="verbose", help="be verbose")
 parser.add_option("-X", "--exclude", dest="exclude",
                   action="append", type="str",
@@ -34,12 +33,13 @@
     print("Requires Python 3.7 or higher.")
     exit(1)
 
-if not options.output:
-    print("Have to specify output file path.")
+if not options.reference:
+    print("Have to specify reference file path.")
     exit(1)
 
 if not options.include_dir:
     print("Have to specify at least one search path.")
+    exit(1)
 
 include_dir = [os.path.normpath(i) for i in options.include_dir]
 
@@ -61,6 +61,11 @@ def extract_all_from_dir(json_dir):
     skiplist = [os.path.normpath(".gitkeep")]
     for f in allfiles:
         full_name = os.path.join(json_dir, f)
+        if full_name in [i for i in include_dir if i != json_dir]:
+            # Skip other included directories;
+            # They will be extracted later and appended to
+            # the end of the shared list of strings;
+            continue
         if os.path.isdir(full_name):
             dirs.append(f)
         elif f in skiplist or full_name in exclude:
@@ -74,13 +79,13 @@ def extract_all_from_dir(json_dir):
 
 
 def main():
-    for i in sorted(include_dir):
+    for i in include_dir:
         extract_all_from_dir(i)
 
-    with open(options.output, mode="w", encoding="utf-8") as fp:
-        write_to_pot(fp, True, options.name,
-                     sanitize=options.reference,
-                     obsolete_paths=obsolete_paths)
+    sanitize(options.reference, options.name)
+
+    with open(options.reference, mode="a", encoding="utf-8") as fp:
+        write_to_pot(fp, obsolete_paths=obsolete_paths)
 
 
 main()
diff --git a/lang/merge_po.sh b/lang/merge_po.sh
@@ -18,15 +18,15 @@ function merge_lang
     if [ -f ${o} ]
     then
         echo "merging ${f}"
-        msgcat -F --use-first ${f} ${o} -o ${o} && rm ${f}
+        msgcat --use-first ${f} ${o} -o ${o} && rm ${f}
     else
         echo "importing ${f}"
         mv ${f} ${o}
     fi
 
     # merge lang/po/cataclysm-dda.pot with .po file
     echo "updating $o"
-    msgmerge --sort-by-file --no-fuzzy-matching $o lang/po/cataclysm-dda.pot | msgattrib --sort-by-file --no-obsolete -o $o
+    msgmerge --no-fuzzy-matching $o lang/po/cataclysm-dda.pot | msgattrib --clear-fuzzy --no-obsolete -o $o
 }
 
 # merge incoming translations for each language specified on the commandline

diff --git a/lang/string_extractor/message.py b/lang/string_extractor/message.py
@@ -9,6 +9,7 @@ class Message:
     context: str
     text: str
     text_plural: str
+    explicit_plural: bool
 
 
 messages = dict()

diff --git a/lang/string_extractor/pot_export.py b/lang/string_extractor/pot_export.py
@@ -46,6 +46,56 @@ def process_comments(comments, origins, obsolete_paths):
     return result
 
 
+def sanitize(reference, pkg_name="Cataclysm-DDA"):
+    if not os.path.isfile(reference):
+        raise Exception(f"Cannot read {reference}")
+    pofile = polib.pofile(reference)
+
+    # sanitize plural entries
+    # Multiple objects may define slightly different plurals for strings,
+    # but without the specified context only one such string can be stored.
+    # Adds a plural form to those matching strings that do not have it.
+    for entry in pofile.untranslated_entries():
+        pair = (entry.msgctxt if entry.msgctxt else "", entry.msgid)
+        if pair in messages:
+            # first, check the messages against the reference
+            for m in messages[pair]:
+                if not m.text_plural and entry.msgid_plural:
+                    m.text_plural = entry.msgid_plural
+                    break
+            # then check the reference against the messages
+            # prioritize plurals that are explicitly specified in JSON
+            temp_plural = ""
+            for m in messages[pair]:
+                if m.text_plural and not entry.msgid_plural:
+                    entry.msgstr_plural = {0: "", 1: ""}
+                    if m.explicit_plural:
+                        entry.msgid_plural = m.text_plural
+                        break
+                    temp_plural = m.text_plural
+            if temp_plural:
+                entry.msgid_plural = temp_plural
+
+    # write the correct header.
+    tzinfo = datetime.now(timezone.utc).astimezone().tzinfo
+    tztime = datetime.now(tzinfo).strftime('%Y-%m-%d %H:%M%z')
+    pofile.metadata = {
+        "Project-Id-Version": pkg_name,
+        "POT-Creation-Date": f"{tztime}",
+        "PO-Revision-Date": f"{tztime}",
+        "Last-Translator": "None",
+        "Language-Team": "None",
+        "Language": "en",
+        "MIME-Version": "1.0",
+        "Content-Type": "text/plain; charset=UTF-8",
+        "Content-Transfer-Encoding": "8bit",
+        "Plural-Forms": "nplurals=2; plural=(n > 1);"
+    }
+    pofile.metadata_is_fuzzy = 0
+
+    pofile.save()
+
+
 def is_unicode(sequence):
     hex = "0123456789abcdef"
     return sequence[0] == "\\" and sequence[1] == "u" and \
@@ -64,47 +114,12 @@ def restore_unicode(string):
     return string
 
 
-def format_msg(prefix, text):
-    return "{0} {1}".format(prefix, restore_unicode(json.dumps(text)))
+def format_msg(text):
+    return restore_unicode(json.dumps(text))
 
 
-def write_pot_header(fp, pkg_name="Cataclysm-DDA"):
-    tzinfo = datetime.now(timezone.utc).astimezone().tzinfo
-    time = datetime.now(tzinfo).strftime('%Y-%m-%d %H:%M%z')
-    print("msgid \"\"", file=fp)
-    print("msgstr \"\"", file=fp)
-    print("\"Project-Id-Version: {}\\n\"".format(pkg_name), file=fp)
-    print("\"POT-Creation-Date: {}\\n\"".format(time), file=fp)
-    print("\"PO-Revision-Date: {}\\n\"".format(time), file=fp)
-    print("\"Last-Translator: None\\n\"", file=fp)
-    print("\"Language-Team: None\\n\"", file=fp)
-    print("\"Language: en\\n\"", file=fp)
-    print("\"MIME-Version: 1.0\\n\"", file=fp)
-    print("\"Content-Type: text/plain; charset=UTF-8\\n\"", file=fp)
-    print("\"Content-Transfer-Encoding: 8bit\\n\"", file=fp)
-    print("\"Plural-Forms: nplurals=2; plural=(n > 1);\\n\"", file=fp)
-    print("", file=fp)
-
-
-def sanitize_plural_colissions(reference):
-    if not os.path.isfile(reference):
-        raise Exception("cannot read {}".format(reference))
-    pofile = polib.pofile(reference)
-    for entry in pofile.untranslated_entries():
-        if entry.msgid_plural:
-            pair = (entry.msgctxt if entry.msgctxt else "", entry.msgid)
-            if pair in messages:
-                if len(messages[pair]) == 1:
-                    if messages[pair][0].text_plural == "":
-                        messages[pair][0].text_plural = entry.msgid_plural
-
-
-def write_to_pot(fp, with_header=True, pkg_name=None,
-                 sanitize=None, obsolete_paths=[]):
-    if sanitize:
-        sanitize_plural_colissions(sanitize)
-    if with_header:
-        write_pot_header(fp, pkg_name)
+def write_to_pot(fp, obsolete_paths=[]):
+    entries = []
     for (context, text) in occurrences:
         if (context, text) not in messages:
             continue
@@ -120,32 +135,34 @@ def write_to_pot(fp, with_header=True, pkg_name=None,
             if message.text_plural:
                 text_plural = message.text_plural
         origin = " ".join(sorted(origins))
+        entry = []
 
         # translator comments
         for line in process_comments(comments, origins, obsolete_paths):
-            print("#. ~ {}".format(line), file=fp)
+            entry.append(f"#. ~ {line}")
 
         # reference
-        print("#: {}".format(origin), file=fp)
+        entry.append(f"#: {origin}")
 
         # c-format
         if format_tag:
-            print("#, {}".format(format_tag), file=fp)
+            entry.append(f"#, {format_tag}")
 
         # context
         if context:
-            print("msgctxt \"{}\"".format(context), file=fp)
+            entry.append(f"msgctxt \"{context}\"")
 
         # text
         if text_plural:
-            print(format_msg("msgid", text), file=fp)
-            print(format_msg("msgid_plural", text_plural), file=fp)
-            print("msgstr[0] \"\"", file=fp)
-            print("msgstr[1] \"\"", file=fp)
+            entry.append(f"msgid {format_msg(text)}\n"
+                         f"msgid_plural {format_msg(text_plural)}\n"
+                         "msgstr[0] \"\"\n"
+                         "msgstr[1] \"\"")
         else:
-            print(format_msg("msgid", text), file=fp)
-            print("msgstr \"\"", file=fp)
-
-        print("", file=fp)
+            entry.append(f"msgid {format_msg(text)}\n"
+                         "msgstr \"\"")
 
+        entries.append("\n".join(entry))
         del messages[(context, text)]
+
+    fp.write("\n\n".join(entries))
diff --git a/lang/string_extractor/write_text.py b/lang/string_extractor/write_text.py
@@ -23,13 +23,16 @@ def write_text(json, origin, context="", comment="",
         comment: Translation comments in either string form or list of strings
         plural (bool): Whether the text should be pluralized
         c_format (bool): Whether the text contains C-style format string
+        explicit_plural (bool): Whether the plural is specified
+                                explicitly in JSON
     """
     if json is None or json == "":
         return
 
     comments = append_comment([], comment)
     text = ""
     text_plural = ""
+    explicit_plural = False
 
     if type(json) is str:
         text = json
@@ -48,8 +51,10 @@ def write_text(json, origin, context="", comment="",
             if "str_sp" in json:
                 text = json["str_sp"]
                 text_plural = json["str_sp"]
+                explicit_plural = True
             elif "str_pl" in json:
                 text_plural = json["str_pl"]
+                explicit_plural = True
             else:
                 text_plural = "{}s".format(text)
 
@@ -67,7 +72,8 @@ def write_text(json, origin, context="", comment="",
         messages[(context, text)] = list()
 
     messages[(context, text)].append(
-        Message(comments, origin, format_tag, context, text, text_plural))
+        Message(comments, origin, format_tag, context,
+                text, text_plural, explicit_plural))
     occurrences.append((context, text))
 
 

diff --git a/lang/update_pot.sh b/lang/update_pot.sh
@@ -16,7 +16,7 @@ echo "> Extracting strings from C++ code"
 xgettext --default-domain="cataclysm-dda" \
          --add-comments="~" \
          --sort-by-file \
-         --output="lang/po/gui.pot" \
+         --output="lang/po/base.pot" \
          --keyword="_" \
          --keyword="pgettext:1c,2" \
          --keyword="n_gettext:1,2" \
@@ -40,6 +40,8 @@ version=$(grep '^VERSION *= *' Makefile | tr -d [:space:] | cut -f 2 -d '=')
 echo "> Extracting strings from JSON"
 if ! lang/extract_json_strings.py \
         -i data \
+        -i data/json \
+        -i data/mods \
         -x data/mods/TEST_DATA \
         -X data/json/furniture_and_terrain/terrain-regional-pseudo.json \
         -X data/json/furniture_and_terrain/furniture-regional-pseudo.json \
@@ -50,19 +52,19 @@ if ! lang/extract_json_strings.py \
         -D data/mods/BlazeIndustries \
         -D data/mods/desert_region \
         -n "$package $version" \
-        -r lang/po/gui.pot \
-        -o lang/po/json.pot
+        -r lang/po/base.pot
 then
     echo "Error in extracting strings from JSON. Aborting."
     exit 1
 fi
 
-echo "> Merging translation templates"
-msgcat -o lang/po/cataclysm-dda.pot --use-first lang/po/json.pot lang/po/gui.pot
+echo "> Unification of translation template"
+msguniq -o lang/po/cataclysm-dda.pot lang/po/base.pot
 if [ ! -f lang/po/cataclysm-dda.pot ]; then
     echo "Error in merging translation templates. Aborting."
     exit 1
 fi
+sed -i "/^#\. #-#-#-#-#  [a-zA-Z0-9(). -]*#-#-#-#-#$/d" lang/po/cataclysm-dda.pot
 
 # convert line endings to unix
 os="$(uname -s)"