diff --git a/openfoodfacts/taxonomy.py b/openfoodfacts/taxonomy.py index 8f7f516..0a420d6 100644 --- a/openfoodfacts/taxonomy.py +++ b/openfoodfacts/taxonomy.py @@ -440,6 +440,39 @@ def create_taxonomy_mapping(taxonomy: Taxonomy) -> Dict[str, str]: return mapping +def is_prefixed_value(value: str) -> bool: + """Return True if the given value has a language prefix (en:, fr:,...), + False otherwise.""" + return len(value) > 3 and value[2] == ":" + + +def create_brand_taxonomy_mapping(taxonomy: Taxonomy) -> Dict[str, str]: + """From a brand taxonomy, create a mapping of tags to taxonomy brand names. + The mapping generated is different than the mapping generated by the + `create_taxonomy_mapping` function, as it maps an unprefixed value + (ex: `nestle`) to a brand name, with capitalization and accents + (ex: `Nestlé`). + + The taxonomy mapping has the following format: + { + "alva": "Alva", + "benecop": "Bénécop", + ... + } + + :param taxonomy: the taxonomy to use (brand taxonomy) + :return: a dict mapping tags (*without* language prefix) to brand values + (capitalized) + """ + mapping = {} + for node in taxonomy.iter_nodes(): + unprefixed_key = node.id + if is_prefixed_value(node.id): + unprefixed_key = node.id[3:] + mapping[unprefixed_key] = node.names["en"] + return mapping + + def map_to_canonical_id( taxonomy_mapping: Dict[str, str], values: List[str] ) -> Dict[str, str]: diff --git a/tests/unit/test_taxonomy.py b/tests/unit/test_taxonomy.py index af4d6dc..6e0a1c4 100644 --- a/tests/unit/test_taxonomy.py +++ b/tests/unit/test_taxonomy.py @@ -5,9 +5,11 @@ from openfoodfacts.taxonomy import ( Taxonomy, TaxonomyNode, + create_brand_taxonomy_mapping, create_taxonomy_mapping, map_to_canonical_id, ) +from openfoodfacts.types import TaxonomyType def test_map_to_canonical_id(): @@ -123,3 +125,19 @@ def test_multiple_languages_with_different_synonyms(self): } assert create_taxonomy_mapping(taxonomy) == expected_mapping + + def test_create_brand_taxonomy_mapping(self): + taxonomy = Taxonomy.from_dict( + { + "en:5th-season": {"name": {"en": "5th Season"}}, + "en:arev": {"name": {"en": "Arèv"}}, + "en:arrighi": {"name": {"en": "Arrighi"}}, + "en:voiles-au-vent": {"name": {"en": "Voiles au Vent"}}, + } + ) + assert create_brand_taxonomy_mapping(taxonomy) == { + "5th-season": "5th Season", + "arev": "Arèv", + "arrighi": "Arrighi", + "voiles-au-vent": "Voiles au Vent", + }