From 8b902d42b9ed879a34bb297b906f60b674182afd Mon Sep 17 00:00:00 2001 From: Friedrich Lindenberg Date: Tue, 2 Apr 2024 11:38:25 +0200 Subject: [PATCH 1/6] introduce new topics, overly specific --- followthemoney/types/topic.py | 5 ++++- js/src/defaultModel.json | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/followthemoney/types/topic.py b/followthemoney/types/topic.py index 2df7f653e..0822ff76e 100644 --- a/followthemoney/types/topic.py +++ b/followthemoney/types/topic.py @@ -56,7 +56,9 @@ class TopicType(EnumType): "fin.bank": _("Bank"), "fin.fund": _("Fund"), "fin.adivsor": _("Financial advisor"), - "role.pep": _("Political"), + "reg.action": _("Regulatory action"), + "role.pep": _("Politican"), + "role.pol": _("Non-PEP"), "role.rca": _("Close Associate"), "role.judge": _("Judge"), "role.civil": _("Civil servant"), @@ -74,6 +76,7 @@ class TopicType(EnumType): "asset.frozen": _("Frozen asset"), "sanction": _("Sanctioned entity"), "sanction.linked": _("Sanction-linked entity"), + "sanction.counter": _("Counter-sanctioned entity"), "export.control": _("Export controlled"), "debarment": _("Debarred entity"), "poi": _("Person of interest"), diff --git a/js/src/defaultModel.json b/js/src/defaultModel.json index 4b1a7cc75..15019c301 100644 --- a/js/src/defaultModel.json +++ b/js/src/defaultModel.json @@ -6942,6 +6942,7 @@ "poi": "Person of interest", "pol.party": "Political party", "pol.union": "Union", + "reg.action": "Regulatory action", "rel": "Religion", "role.acct": "Accountant", "role.act": "Activist", @@ -6951,10 +6952,12 @@ "role.judge": "Judge", "role.lawyer": "Lawyer", "role.oligarch": "Oligarch", - "role.pep": "Political", + "role.pep": "Politican", + "role.pol": "Non-PEP", "role.rca": "Close Associate", "role.spy": "Spy", "sanction": "Sanctioned entity", + "sanction.counter": "Counter-sanctioned entity", "sanction.linked": "Sanction-linked entity" } }, From dfa07f80a6677d27d040bac03ff58392999acdf8 Mon Sep 17 00:00:00 2001 From: Friedrich Lindenberg Date: Tue, 2 Apr 2024 12:25:09 +0200 Subject: [PATCH 2/6] Use orjson to dump default model in the hopes of making this more stable --- followthemoney/cli/cli.py | 32 ++++++++++++++++++-------------- js/src/defaultModel.json | 32 ++++++++++++++++---------------- 2 files changed, 34 insertions(+), 30 deletions(-) diff --git a/followthemoney/cli/cli.py b/followthemoney/cli/cli.py index d66842187..f233c1c54 100644 --- a/followthemoney/cli/cli.py +++ b/followthemoney/cli/cli.py @@ -1,14 +1,14 @@ import sys -import json import click +import orjson import logging from pathlib import Path -from typing import Optional, TextIO +from typing import Optional, BinaryIO, List, Any, Dict from banal import ensure_list from followthemoney import model from followthemoney.namespace import Namespace -from followthemoney.cli.util import InPath, OutPath, path_entities, read_entities +from followthemoney.cli.util import InPath, OutPath, path_entities from followthemoney.cli.util import path_writer, write_entity from followthemoney.proxy import EntityProxy @@ -20,9 +20,10 @@ def cli() -> None: @cli.command("dump-model", help="Export the current schema model") -@click.option("-o", "--outfile", type=click.File("w"), default="-") -def dump_model(outfile: TextIO) -> None: - outfile.write(json.dumps(model.to_dict(), indent=2, sort_keys=True)) +@click.option("-o", "--outfile", type=click.File("wb"), default="-") +def dump_model(outfile: BinaryIO) -> None: + f = orjson.OPT_INDENT_2 | orjson.OPT_SORT_KEYS + outfile.write(orjson.dumps(model.to_dict(), option=f)) @cli.command("validate", help="Re-parse and validate the given data") @@ -34,7 +35,7 @@ def validate(infile: Path, outfile: Path) -> None: for entity in path_entities(infile, EntityProxy, cleaned=False): clean = model.make_entity(entity.schema) clean.id = entity.id - for (prop, value) in entity.itervalues(): + for prop, value in entity.itervalues(): clean.add(prop, value) write_entity(outfh, clean) except BrokenPipeError: @@ -46,12 +47,14 @@ def validate(infile: Path, outfile: Path) -> None: @click.option("-o", "--outfile", type=OutPath, default="-") # noqa def import_vis(infile: Path, outfile: Path) -> None: with path_writer(outfile) as outfh: - with open(infile, "r") as infh: - data = json.load(infh) + with open(infile, "rb") as infh: + data: Dict[str, Any] = orjson.loads(infh.read()) if "entities" in data: - entities = data.get("entities", data) - if "layout" in data: + entities: List[Dict[str, Any]] = data.get("entities", data) + elif "layout" in data: entities = data.get("layout", {}).get("entities", data) + else: + raise click.ClickException("No entities found in VIS file") for entity_data in ensure_list(entities): entity = EntityProxy.from_dict(model, entity_data) write_entity(outfh, entity) @@ -75,10 +78,11 @@ def sign(infile: Path, outfile: Path, signature: Optional[str]) -> None: @cli.command(help="Format a stream of entities to make it readable") @click.option("-i", "--infile", type=InPath, default="-") # noqa def pretty(infile: Path) -> None: - stdout = click.get_text_stream("stdout") + stdout = click.get_binary_stream("stdout") try: + f = orjson.OPT_INDENT_2 | orjson.OPT_APPEND_NEWLINE for entity in path_entities(infile, EntityProxy): - data = json.dumps(entity.to_dict(), indent=2) - stdout.write(data + "\n") + data = orjson.dumps(entity.to_dict(), option=f) + stdout.write(data) except BrokenPipeError: raise click.Abort() diff --git a/js/src/defaultModel.json b/js/src/defaultModel.json index 15019c301..a3d19430c 100644 --- a/js/src/defaultModel.json +++ b/js/src/defaultModel.json @@ -1042,14 +1042,14 @@ "type": "identifier" }, "fnsCode": { - "description": "(RU, \u0424\u041d\u0421) Federal Tax Service related info", + "description": "(RU, ФНС) Federal Tax Service related info", "label": "Federal tax service code", "name": "fnsCode", "qname": "Company:fnsCode", "type": "identifier" }, "fssCode": { - "description": "(RU, \u0424\u0421\u0421) Social Security", + "description": "(RU, ФСС) Social Security", "label": "FSS", "name": "fssCode", "qname": "Company:fssCode", @@ -1100,7 +1100,7 @@ "type": "country" }, "kppCode": { - "description": "(RU, \u041a\u041f\u041f) in addition to INN for orgs; reason for registration at FNS", + "description": "(RU, КПП) in addition to INN for orgs; reason for registration at FNS", "label": "KPP", "name": "kppCode", "qname": "Company:kppCode", @@ -1114,21 +1114,21 @@ "type": "identifier" }, "okopfCode": { - "description": "(RU, \u041e\u041a\u041e\u041f\u0424) What kind of business entity", + "description": "(RU, ОКОПФ) What kind of business entity", "label": "OKOPF", "name": "okopfCode", "qname": "Company:okopfCode", "type": "string" }, "oksmCode": { - "description": "Russian (\u041e\u041a\u0421\u041c) countries classifier", + "description": "Russian (ОКСМ) countries classifier", "label": "OKSM", "name": "oksmCode", "qname": "Company:oksmCode", "type": "string" }, "okvedCode": { - "description": "(RU, \u041e\u041a\u0412\u042d\u0414) Economical activity classifier. OKVED2 is the same but newer", + "description": "(RU, ОКВЭД) Economical activity classifier. OKVED2 is the same but newer", "label": "OKVED(2) Classifier", "name": "okvedCode", "qname": "Company:okvedCode", @@ -1143,7 +1143,7 @@ "type": "identifier" }, "pfrNumber": { - "description": "(RU, \u041f\u0424\u0420) Pension Fund Registration number. AAA-BBB-CCCCCC, where AAA is organisation region, BBB is district, CCCCCC number at a specific branch", + "description": "(RU, ПФР) Pension Fund Registration number. AAA-BBB-CCCCCC, where AAA is organisation region, BBB is district, CCCCCC number at a specific branch", "label": "PFR Number", "matchable": true, "name": "pfrNumber", @@ -2184,7 +2184,7 @@ "type": "string" }, "customsProcedure": { - "description": "Customs Procedure \u2014 type of customs clearance", + "description": "Customs Procedure — type of customs clearance", "label": "Customs Procedure", "name": "customsProcedure", "qname": "EconomicActivity:customsProcedure", @@ -2290,7 +2290,7 @@ "type": "entity" }, "vedCode": { - "description": "(\u041a\u043e\u0434 \u0422\u041d \u0412\u042d\u0414) Foreign Economic Activity Commodity Code", + "description": "(Код ТН ВЭД) Foreign Economic Activity Commodity Code", "label": "FEAC Code", "matchable": true, "name": "vedCode", @@ -2298,7 +2298,7 @@ "type": "identifier" }, "vedCodeDescription": { - "description": "(\u041e\u043f\u0438\u0441\u0430\u043d\u0438\u0435 \u043a\u043e\u0434\u0430 \u0422\u041d \u0412\u042d\u0414) Foreign Economic Activity Commodity Code description", + "description": "(Описание кода ТН ВЭД) Foreign Economic Activity Commodity Code description", "label": "FEAC Code description", "name": "vedCodeDescription", "qname": "EconomicActivity:vedCodeDescription", @@ -6451,7 +6451,7 @@ "at": "Austria", "au": "Australia", "aw": "Aruba", - "ax": "\u00c5land Islands", + "ax": "Åland Islands", "az": "Azerbaijan", "az-nk": "Nagorno-Karabakh", "ba": "Bosnia & Herzegovina", @@ -6463,7 +6463,7 @@ "bh": "Bahrain", "bi": "Burundi", "bj": "Benin", - "bl": "St. Barth\u00e9lemy", + "bl": "St. Barthélemy", "bm": "Bermuda", "bn": "Brunei", "bo": "Bolivia", @@ -6481,7 +6481,7 @@ "cf": "Central African Republic", "cg": "Congo - Brazzaville", "ch": "Switzerland", - "ci": "C\u00f4te d\u2019Ivoire", + "ci": "Côte d’Ivoire", "ck": "Cook Islands", "cl": "Chile", "cm": "Cameroon", @@ -6494,7 +6494,7 @@ "csxx": "Serbia and Montenegro", "cu": "Cuba", "cv": "Cape Verde", - "cw": "Cura\u00e7ao", + "cw": "Curaçao", "cx": "Christmas Island", "cy": "Cyprus", "cy-trnc": "Northern Cyprus", @@ -6643,7 +6643,7 @@ "py": "Paraguay", "qa": "Qatar", "qo": "Outlying Oceania", - "re": "R\u00e9union", + "re": "Réunion", "ro": "Romania", "rs": "Serbia", "ru": "Russia", @@ -6665,7 +6665,7 @@ "so-som": "Somaliland", "sr": "Suriname", "ss": "South Sudan", - "st": "S\u00e3o Tom\u00e9 & Pr\u00edncipe", + "st": "São Tomé & Príncipe", "suhh": "Soviet Union", "sv": "El Salvador", "sx": "Sint Maarten", From 9b8ebb5fa462c42fd129d5d774145b81fbe9c2d4 Mon Sep 17 00:00:00 2001 From: Friedrich Lindenberg Date: Tue, 2 Apr 2024 12:33:18 +0200 Subject: [PATCH 3/6] Try outputting git diff --- .github/workflows/build.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8f64a8b80..5321759b2 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -33,6 +33,7 @@ jobs: run: | python contrib/check_model.py make default-model + git diff if [[ -z "$(git status --porcelain)" ]]; then echo "Default model is up to date 👌" From 7f75f1a6e93141427a6de96de03b9abed616095c Mon Sep 17 00:00:00 2001 From: Friedrich Lindenberg Date: Tue, 2 Apr 2024 12:37:19 +0200 Subject: [PATCH 4/6] Local babel was outdated --- js/src/defaultModel.json | 3 ++- setup.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/js/src/defaultModel.json b/js/src/defaultModel.json index a3d19430c..b048095bd 100644 --- a/js/src/defaultModel.json +++ b/js/src/defaultModel.json @@ -6489,6 +6489,7 @@ "cn-xz": "Tibet", "co": "Colombia", "cp": "Clipperton Island", + "cq": "Sark", "cr": "Costa Rica", "cshh": "Czechoslovakia", "csxx": "Serbia and Montenegro", @@ -6683,7 +6684,7 @@ "tm": "Turkmenistan", "tn": "Tunisia", "to": "Tonga", - "tr": "Turkey", + "tr": "Türkiye", "tt": "Trinidad & Tobago", "tv": "Tuvalu", "tw": "Taiwan", diff --git a/setup.py b/setup.py index fa9a4580e..14d90eb32 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ }, zip_safe=False, install_requires=[ - "babel >= 2.9.1, < 3.0.0", + "babel >= 2.14.0, < 3.0.0", "pyyaml >= 5.0.0, < 7.0.0", "types-PyYAML", "sqlalchemy2-stubs", From 2283a5ac6100f9282548b2906ce54c72e546022e Mon Sep 17 00:00:00 2001 From: Friedrich Lindenberg Date: Wed, 8 May 2024 10:40:45 +0200 Subject: [PATCH 5/6] add "regulator warning" --- followthemoney/types/topic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/followthemoney/types/topic.py b/followthemoney/types/topic.py index 0822ff76e..144c97ee5 100644 --- a/followthemoney/types/topic.py +++ b/followthemoney/types/topic.py @@ -56,7 +56,8 @@ class TopicType(EnumType): "fin.bank": _("Bank"), "fin.fund": _("Fund"), "fin.adivsor": _("Financial advisor"), - "reg.action": _("Regulatory action"), + "reg.action": _("Regulator action"), + "reg.warn": _("Regulator warning"), "role.pep": _("Politican"), "role.pol": _("Non-PEP"), "role.rca": _("Close Associate"), From 3dbf6dc9dae71c3a4383a1372645b55df63c8bfc Mon Sep 17 00:00:00 2001 From: Friedrich Lindenberg Date: Wed, 8 May 2024 11:01:38 +0200 Subject: [PATCH 6/6] update the model --- js/src/defaultModel.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/js/src/defaultModel.json b/js/src/defaultModel.json index b048095bd..dc293e83d 100644 --- a/js/src/defaultModel.json +++ b/js/src/defaultModel.json @@ -6943,7 +6943,8 @@ "poi": "Person of interest", "pol.party": "Political party", "pol.union": "Union", - "reg.action": "Regulatory action", + "reg.action": "Regulator action", + "reg.warn": "Regulator warning", "rel": "Religion", "role.acct": "Accountant", "role.act": "Activist",