From e36a833bce094657eef8dcee3d14370aee5d6ec5 Mon Sep 17 00:00:00 2001 From: Willi Date: Fri, 29 Nov 2024 10:17:08 +0100 Subject: [PATCH] Add no-index option to avoid indexing a field --- arlas/cli/index.py | 3 ++- arlas/cli/model_infering.py | 37 +++++++++++++++++++++---------------- 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/arlas/cli/index.py b/arlas/cli/index.py index 31cc16d..65d1ac7 100644 --- a/arlas/cli/index.py +++ b/arlas/cli/index.py @@ -119,6 +119,7 @@ def mapping( nb_lines: int = typer.Option(default=2, help="Number of line to consider for generating the mapping. Avoid going over 10."), field_mapping: list[str] = typer.Option(default=[], help="Override the mapping with the provided field path/type. Example: fragment.location:geo_point. Important: the full field path must be provided."), no_fulltext: list[str] = typer.Option(default=[], help="List of keyword or text fields that should not be in the fulltext search. Important: the field name only must be provided."), + no_index: list[str] = typer.Option(default=[], help="List of fields that should not be indexed."), push_on: str = typer.Option(default=None, help="Push the generated mapping for the provided index name"), ): config = variables["arlas"] @@ -138,7 +139,7 @@ def mapping( else: print(f"Error: invalid field_mapping \"{fm}\". The format is \"field:type\" like \"fragment.location:geo_point\"", file=sys.stderr) exit(1) - mapping = make_mapping(file=file, nb_lines=nb_lines, types=types, no_fulltext=no_fulltext) + mapping = make_mapping(file=file, nb_lines=nb_lines, types=types, no_fulltext=no_fulltext, no_index=no_index) if push_on and config: Service.create_index( config, diff --git a/arlas/cli/model_infering.py b/arlas/cli/model_infering.py index 6658f1e..d8d2221 100644 --- a/arlas/cli/model_infering.py +++ b/arlas/cli/model_infering.py @@ -141,29 +141,34 @@ def __type_node__(n, name: str = None) -> str: return "UNDEFINED" # from the typed tree, generate the mapping. -def __generate_mapping__(tree, mapping, no_fulltext: list[str]): +def __generate_mapping__(tree, mapping, no_fulltext: list[str], no_index: list[str]): if type(tree) is dict: - for (k, v) in tree.items(): - if k not in ["__type__", "__values__"]: - t: str = v.get("__type__") - if t == "object": - mapping[k] = {"properties": {}} - __generate_mapping__(v, mapping[k]["properties"], no_fulltext) + for (field_name, v) in tree.items(): + if field_name not in ["__type__", "__values__"]: + field_type: str = v.get("__type__") + if field_type == "object": + mapping[field_name] = {"properties": {}} + __generate_mapping__(tree=v, mapping=mapping[field_name]["properties"], no_fulltext=no_fulltext, + no_index=no_index) else: - if t.startswith("date-"): + if field_type.startswith("date-"): # Dates can have format patterns containing '-' - mapping[k] = {"type": "date", "format": t.split("-", 1)[1]} + mapping[field_name] = {"type": "date", "format": field_type.split("-", 1)[1]} else: - mapping[k] = {"type": t} - if t in ["keyword", "text"]: - print("-->{}".format(k)) - if k not in no_fulltext: - mapping[k]["copy_to"] = ["internal.fulltext", "internal.autocomplete"] + mapping[field_name] = {"type": field_type} + if field_type in ["keyword", "text"]: + if field_name not in no_fulltext: + mapping[field_name]["copy_to"] = ["internal.fulltext", "internal.autocomplete"] + # Avoid indexing field if field in --no-index + if field_name in no_index: + mapping[field_name]["index"] = "false" + print(f"-->{field_name}: {mapping[field_name]['type']}") else: raise Exception("Unexpected state") -def make_mapping(file: str, nb_lines: int = 2, types: dict[str, str] = {}, no_fulltext: list[str] = []): +def make_mapping(file: str, nb_lines: int = 2, types: dict[str, str] = {}, no_fulltext: list[str] = [], + no_index: list[str] = []): tree = {} mapping = {} with open(file) as f: @@ -176,7 +181,7 @@ def make_mapping(file: str, nb_lines: int = 2, types: dict[str, str] = {}, no_fu hit = json.loads(line) __build_tree__(tree, hit) __type_tree__("", tree, types) - __generate_mapping__(tree, mapping, no_fulltext) + __generate_mapping__(tree, mapping, no_fulltext, no_index) mapping["internal"] = { "properties": { "autocomplete": {