diff --git a/yente/search/indexer.py b/yente/search/indexer.py index d3112687..8db6b05c 100644 --- a/yente/search/indexer.py +++ b/yente/search/indexer.py @@ -20,8 +20,8 @@ NAME_PHONETIC_FIELD, ) from yente.provider import SearchProvider, with_provider -from yente.search.util import parse_index_name -from yente.search.util import construct_index_name +from yente.search.versions import parse_index_name +from yente.search.versions import construct_index_name from yente.data.util import expand_dates, phonetic_names from yente.data.util import index_name_parts, index_name_keys diff --git a/yente/search/status.py b/yente/search/status.py index 002655b5..6ec4d67f 100644 --- a/yente/search/status.py +++ b/yente/search/status.py @@ -1,7 +1,7 @@ from yente import settings from yente.logs import get_logger from yente.provider import SearchProvider -from yente.search.util import parse_index_name +from yente.search.versions import parse_index_name from yente.data.manifest import Catalog log = get_logger(__name__) diff --git a/yente/search/util.py b/yente/search/versions.py similarity index 61% rename from yente/search/util.py rename to yente/search/versions.py index 76406879..8589f29a 100644 --- a/yente/search/util.py +++ b/yente/search/versions.py @@ -1,9 +1,19 @@ +from functools import cache from typing import Tuple from normality import slugify +import followthemoney from yente import settings +@cache +def system_version() -> str: + """Get the current version of the system.""" + parts = [v.rjust(2, "0") for v in followthemoney.__version__.split(".")] + ftm_version = "".join(parts)[:6] + return f"{settings.INDEX_VERSION}{ftm_version}-" + + def parse_index_name(index: str) -> Tuple[str, str]: """ Parse a given index name. @@ -19,31 +29,33 @@ def parse_index_name(index: str) -> Tuple[str, str]: if "-" not in index_end: raise ValueError("Index name does not contain a version.") dataset, index_version = index_end.split("-", 1) - if not index_version.startswith(settings.INDEX_VERSION): + sys_version = system_version() + if not index_version.startswith(sys_version): raise ValueError("Index version does not start with the correct prefix.") - dataset_version = index_version[len(settings.INDEX_VERSION) :] + dataset_version = index_version[len(sys_version) :] if len(dataset_version) < 1: raise ValueError("Index version must be at least one character long.") return (dataset, dataset_version) -def construct_index_name(ds_name: str, ds_version: str | None = None) -> str: +def construct_index_name(dataset: str, version: str | None = None) -> str: """ Given a dataset and optionally a version construct a properly versioned index name. """ - if len(str(ds_name)) < 1: + if len(str(dataset)) < 1: raise ValueError("Dataset name must be at least one character long.") - base = f"{settings.ENTITY_INDEX}-{ds_name}" - if ds_version is None: + base = f"{settings.ENTITY_INDEX}-{dataset}" + if version is None: return base - return f"{base}-{construct_index_version(ds_version)}" + return f"{base}-{construct_index_version(version)}" def construct_index_version(version: str) -> str: """Given a version ID, return a version string with the version prefix.""" if len(version) < 1: raise ValueError("Version must be at least one character long.") - combined = slugify(f"{settings.INDEX_VERSION}{version}", "-") - if combined is None or len(combined) < len(settings.INDEX_VERSION) + 1: - raise ValueError("Invalid version: %s%s." % (settings.INDEX_VERSION, version)) + sys_version = system_version() + combined = slugify(f"{sys_version}{version}", "-") + if combined is None or len(combined) < len(sys_version) + 1: + raise ValueError("Invalid version: %s%s." % (sys_version, version)) return combined diff --git a/yente/settings.py b/yente/settings.py index 03ebe65c..5eac5f47 100644 --- a/yente/settings.py +++ b/yente/settings.py @@ -181,7 +181,7 @@ def random_cron() -> str: INDEX_SHARDS = int(env_legacy("YENTE_INDEX_SHARDS", "YENTE_ELASTICSEARCH_SHARDS", "1")) INDEX_NAME = env_legacy("YENTE_INDEX_NAME", "YENTE_ELASTICSEARCH_INDEX", "yente") ENTITY_INDEX = f"{INDEX_NAME}-entities" -INDEX_VERSION = env_str("YENTE_INDEX_VERSION", "009") +INDEX_VERSION = env_str("YENTE_INDEX_VERSION", "011") assert len(INDEX_VERSION) == 3, "Index version must be 3 characters long." # ElasticSearch-only options: