From d1d03d5c6aa35b0d9d66d4a1944e0a6cf49618ea Mon Sep 17 00:00:00 2001 From: Friedrich Lindenberg Date: Sun, 8 Oct 2023 23:04:24 +0200 Subject: [PATCH] add an option to immediately cancel the indexer if a specific index already exists --- yente/search/indexer.py | 16 ++++++++++++++-- yente/settings.py | 1 + 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/yente/search/indexer.py b/yente/search/indexer.py index 936f4de6..081e09f8 100644 --- a/yente/search/indexer.py +++ b/yente/search/indexer.py @@ -92,7 +92,13 @@ async def index_entities(es: AsyncElasticsearch, dataset: Dataset, force: bool) ) dataset_prefix = f"{settings.ENTITY_INDEX}-{dataset.name}-" next_index = f"{dataset_prefix}{version}" - exists = await es.indices.exists_alias(name=settings.ENTITY_INDEX, index=next_index) + if settings.INDEX_EXISTS_ABORT: + exists = await es.indices.exists(index=next_index) + else: + exists = await es.indices.exists_alias( + name=settings.ENTITY_INDEX, + index=next_index, + ) if exists.body and not force: log.info("Index is up to date.", index=next_index) return False @@ -134,7 +140,13 @@ async def index_entities(es: AsyncElasticsearch, dataset: Dataset, force: bool) errors=errors, entities_url=dataset.entities_url, ) - await es.indices.delete(index=next_index) + if settings.INDEX_EXISTS_ABORT: + is_linked = await es.indices.exists_alias( + name=settings.ENTITY_INDEX, + index=next_index, + ) + if not is_linked.body: + await es.indices.delete(index=next_index) return False await es.indices.refresh(index=next_index) diff --git a/yente/settings.py b/yente/settings.py index 73f36849..c58387f7 100644 --- a/yente/settings.py +++ b/yente/settings.py @@ -150,6 +150,7 @@ def env_str(name: str, default: Optional[str] = None) -> Optional[str]: ES_SHARDS = int(env_str("YENTE_ELASTICSEARCH_SHARDS") or "1") ENTITY_INDEX = f"{ES_INDEX}-entities" INDEX_VERSION = env_str("YENTE_INDEX_VERSION", "008") +INDEX_EXISTS_ABORT = as_bool(env_str("YENTE_INDEX_EXISTS_ABORT") or "false") # Log output can be formatted as JSON: LOG_JSON = as_bool(env_str("YENTE_LOG_JSON", "false"))