From 89813d8c6b74855492e05b08206dc17cf5cda5b0 Mon Sep 17 00:00:00 2001
From: Friedrich Lindenberg <friedrich@pudo.org>
Date: Fri, 6 Oct 2023 12:38:20 +0200
Subject: [PATCH] set the name keys

---
 yente/data/util.py      | 8 ++++----
 yente/search/indexer.py | 7 +++++--
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/yente/data/util.py b/yente/data/util.py
index 3fcd385d..e8cd7b0e 100644
--- a/yente/data/util.py
+++ b/yente/data/util.py
@@ -5,7 +5,7 @@
 from prefixdate.precision import Precision
 from contextlib import asynccontextmanager
 from aiohttp import ClientSession, ClientTimeout, TCPConnector
-from typing import AsyncGenerator, Dict, List, Union, Iterable, Optional
+from typing import AsyncGenerator, Dict, List, Union, Iterable, Optional, Set
 from followthemoney.types import registry
 from normality.cleaning import decompose_nfkd, category_replace
 from fingerprints import remove_types, clean_name_light, clean_entity_prefix
@@ -67,12 +67,12 @@ def index_name_parts(names: List[str]) -> List[str]:
 
 def index_name_keys(names: List[str]) -> List[str]:
     """Generate a indexable name keys from the given names."""
-    keys: List[str] = []
+    keys: Set[str] = set()
     for name in names:
         for key in (fingerprint_name(name), clean_name_light(name)):
             if key is not None:
-                keys.append(key)
-    return keys
+                keys.add(key)
+    return list(keys)
 
 
 def pick_names(names: List[str], limit: int = 3) -> List[str]:
diff --git a/yente/search/indexer.py b/yente/search/indexer.py
index ef981c5c..936f4de6 100644
--- a/yente/search/indexer.py
+++ b/yente/search/indexer.py
@@ -47,13 +47,16 @@ async def iter_entity_docs(
 
             texts = entity.pop("indexText")
             doc = entity.to_full_dict(matchable=True)
-            doc["text"] = texts
             names: List[str] = doc.get(NAMES_FIELD, [])
             names.extend(entity.get("weakAlias", quiet=True))
-            doc[NAME_PART_FIELD] = index_name_parts(names)
+            name_parts = index_name_parts(names)
+            texts.extend(name_parts)
+            doc[NAME_PART_FIELD] = name_parts
             doc[NAME_KEY_FIELD] = index_name_keys(names)
             doc[NAME_PHONETIC_FIELD] = phonetic_names(names)
             doc[DateType.group] = expand_dates(doc.pop(DateType.group, []))
+            doc["text"] = texts
+
             entity_id = doc.pop("id")
             yield {"_index": index, "_id": entity_id, "_source": doc}
         except FollowTheMoneyException as exc: