Skip to content

Commit

Permalink
re-work old index deletion
Browse files Browse the repository at this point in the history
  • Loading branch information
pudo committed Sep 19, 2023
1 parent b4222e8 commit 5811bed
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 11 deletions.
4 changes: 2 additions & 2 deletions yente/data/util.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from pathlib import Path
from jellyfish import metaphone
from urllib.parse import urlparse
from prefixdate.precision import Precision
from contextlib import asynccontextmanager
from aiohttp import ClientSession, ClientTimeout, TCPConnector
from typing import AsyncGenerator, Dict, List, Set, Union
from followthemoney.types import registry
from nomenklatura.util import fingerprint_name, name_words, levenshtein
from nomenklatura.util import phonetic_token


def expand_dates(dates: List[str]) -> List[str]:
Expand All @@ -24,7 +24,7 @@ def phonetic_names(names: List[str]) -> List[str]:
phonemes: Set[str] = set()
for word in name_words(names):
if len(word) > 2:
phonemes.add(metaphone(word))
phonemes.add(phonetic_token(word))
return list(phonemes)


Expand Down
26 changes: 17 additions & 9 deletions yente/search/indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,8 @@ async def index_entities(es: AsyncElasticsearch, dataset: Dataset, force: bool)
url=dataset.entities_url,
version=version,
)
dataset_prefix = f"{settings.ENTITY_INDEX}-{dataset.name}"
next_index = f"{dataset_prefix}-{version}"
dataset_prefix = f"{settings.ENTITY_INDEX}-{dataset.name}-"
next_index = f"{dataset_prefix}{version}"
exists = await es.indices.exists_alias(name=settings.ENTITY_INDEX, index=next_index)
if exists.body and not force:
log.info("Index is up to date.", index=next_index)
Expand Down Expand Up @@ -148,14 +148,22 @@ async def index_entities(es: AsyncElasticsearch, dataset: Dataset, force: bool)
return False
log.info("Index is now aliased to: %s" % settings.ENTITY_INDEX, index=next_index)

indices: Any = await es.cat.indices(format="json")
for index_data in indices:
index_name: str = index_data.get("index")
if not index_name.startswith(f"{dataset_prefix}-"):
res = await es.indices.get_alias(name=settings.ENTITY_INDEX)
for aliased_index in res.body.keys():
if aliased_index == next_index:
continue
if index_name < next_index:
log.info("Delete old index", index=index_name)
await es.indices.delete(index=index_name)
if aliased_index.startswith(dataset_prefix):
log.info("Delete old index", index=aliased_index)
res = await es.indices.delete(index=aliased_index)

# indices: Any = await es.cat.indices(format="json")
# for index_data in indices:
# index_name: str = index_data.get("index")
# if not index_name.startswith(dataset_prefix):
# continue
# if index_name < next_index:
# log.info("Delete old index", index=index_name)
# res = await es.indices.delete(index=index_name)
return True


Expand Down

0 comments on commit 5811bed

Please sign in to comment.