Skip to content

Commit

Permalink
set the name keys
Browse files Browse the repository at this point in the history
  • Loading branch information
pudo committed Oct 6, 2023
1 parent 4824131 commit 89813d8
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 6 deletions.
8 changes: 4 additions & 4 deletions yente/data/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from prefixdate.precision import Precision
from contextlib import asynccontextmanager
from aiohttp import ClientSession, ClientTimeout, TCPConnector
from typing import AsyncGenerator, Dict, List, Union, Iterable, Optional
from typing import AsyncGenerator, Dict, List, Union, Iterable, Optional, Set
from followthemoney.types import registry
from normality.cleaning import decompose_nfkd, category_replace
from fingerprints import remove_types, clean_name_light, clean_entity_prefix
Expand Down Expand Up @@ -67,12 +67,12 @@ def index_name_parts(names: List[str]) -> List[str]:

def index_name_keys(names: List[str]) -> List[str]:
"""Generate a indexable name keys from the given names."""
keys: List[str] = []
keys: Set[str] = set()
for name in names:
for key in (fingerprint_name(name), clean_name_light(name)):
if key is not None:
keys.append(key)
return keys
keys.add(key)
return list(keys)


def pick_names(names: List[str], limit: int = 3) -> List[str]:
Expand Down
7 changes: 5 additions & 2 deletions yente/search/indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,16 @@ async def iter_entity_docs(

texts = entity.pop("indexText")
doc = entity.to_full_dict(matchable=True)
doc["text"] = texts
names: List[str] = doc.get(NAMES_FIELD, [])
names.extend(entity.get("weakAlias", quiet=True))
doc[NAME_PART_FIELD] = index_name_parts(names)
name_parts = index_name_parts(names)
texts.extend(name_parts)
doc[NAME_PART_FIELD] = name_parts
doc[NAME_KEY_FIELD] = index_name_keys(names)
doc[NAME_PHONETIC_FIELD] = phonetic_names(names)
doc[DateType.group] = expand_dates(doc.pop(DateType.group, []))
doc["text"] = texts

entity_id = doc.pop("id")
yield {"_index": index, "_id": entity_id, "_source": doc}
except FollowTheMoneyException as exc:
Expand Down

0 comments on commit 89813d8

Please sign in to comment.