From 4d7fe0a01753df38d960f1d4a25e9c9b41feb19c Mon Sep 17 00:00:00 2001 From: Sylvain Gaudan Date: Wed, 31 Jul 2024 08:13:43 +0200 Subject: [PATCH] add -no-fulltext option --- arlas/cli/index.py | 5 +++-- arlas/cli/model_infering.py | 12 +++++++----- scripts/tests.sh | 6 +++--- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/arlas/cli/index.py b/arlas/cli/index.py index 4acea86..835595a 100644 --- a/arlas/cli/index.py +++ b/arlas/cli/index.py @@ -89,7 +89,8 @@ def data( def mapping( file: str = typer.Argument(help="Path to the file conaining the data. Format: NDJSON"), nb_lines: int = typer.Option(default=2, help="Number of line to consider for generating the mapping. Avoid going over 10."), - field_mapping: list[str] = typer.Option(default=[], help="Overide the mapping with the provided field/type. Example: fragment.location:geo_point"), + field_mapping: list[str] = typer.Option(default=[], help="Overide the mapping with the provided field path/type. Example: fragment.location:geo_point. Important: the full field path must be provided."), + no_fulltext: list[str] = typer.Option(default=[], help="List of keyword or text fields that should not be in the fulltext search. Important: the field name only must be provided."), push_on: str = typer.Option(default=None, help="Push the generated mapping for the provided index name"), ): config = variables["arlas"] @@ -104,7 +105,7 @@ def mapping( else: print("Error: invalid field_mapping \"{}\". The format is \"field:type\" like \"fragment.location:geo_point\"".format(fm), file=sys.stderr) exit(1) - mapping = make_mapping(file=file, nb_lines=nb_lines, types=types) + mapping = make_mapping(file=file, nb_lines=nb_lines, types=types, no_fulltext=no_fulltext) if push_on and config: Service.create_index( config, diff --git a/arlas/cli/model_infering.py b/arlas/cli/model_infering.py index ea7ab57..a334da8 100644 --- a/arlas/cli/model_infering.py +++ b/arlas/cli/model_infering.py @@ -141,26 +141,28 @@ def __type_node__(n, name: str = None) -> str: return "UNDEFINED" # from the typed tree, generate the mapping. -def __generate_mapping__(tree, mapping): +def __generate_mapping__(tree, mapping, no_fulltext: list[str]): if type(tree) is dict: for (k, v) in tree.items(): if k not in ["__type__", "__values__"]: t: str = v.get("__type__") if t == "object": mapping[k] = {"properties": {}} - __generate_mapping__(v, mapping[k]["properties"]) + __generate_mapping__(v, mapping[k]["properties"], no_fulltext) else: if t.startswith("date-"): mapping[k] = {"type": "date", "format": t.split("-")[1]} else: mapping[k] = {"type": t} if t in ["keyword", "text"]: - mapping[k]["copy_to"] = ["internal.fulltext", "internal.autocomplete"] + print("-->{}".format(k)) + if k not in no_fulltext: + mapping[k]["copy_to"] = ["internal.fulltext", "internal.autocomplete"] else: raise Exception("Unexpected state") -def make_mapping(file: str, nb_lines: int = 2, types: dict[str, str] = {}): +def make_mapping(file: str, nb_lines: int = 2, types: dict[str, str] = {}, no_fulltext: list[str] = []): tree = {} mapping = {} with open(file) as f: @@ -173,7 +175,7 @@ def make_mapping(file: str, nb_lines: int = 2, types: dict[str, str] = {}): hit = json.loads(line) __build_tree__(tree, hit) __type_tree__("", tree, types) - __generate_mapping__(tree, mapping) + __generate_mapping__(tree, mapping, no_fulltext) mapping["internal"] = { "properties": { "autocomplete": { diff --git a/scripts/tests.sh b/scripts/tests.sh index bc58cbc..69134af 100755 --- a/scripts/tests.sh +++ b/scripts/tests.sh @@ -32,7 +32,7 @@ fi # ---------------------------------------------------------- echo "TEST add direct mapping on ES" -python3 -m arlas.cli.cli --config-file /tmp/arlas_cli.yaml indices --config tests create direct_mappping_index --mapping tests/mapping.json +python3.10 -m arlas.cli.cli --config-file /tmp/arlas_cli.yaml indices --config tests create direct_mappping_index --mapping tests/mapping.json if [ "$? -eq 0" ] ; then echo "OK: Mapping added" else @@ -42,7 +42,7 @@ fi # ---------------------------------------------------------- echo "TEST retrieve direct mapping from ES" -if python3 -m arlas.cli.cli --config-file /tmp/arlas_cli.yaml indices --config tests list | grep direct_mappping_index ; then +if python3.10 -m arlas.cli.cli --config-file /tmp/arlas_cli.yaml indices --config tests list | grep direct_mappping_index ; then echo "OK: direct mapping found" else echo "ERROR: direct mapping not found" @@ -51,7 +51,7 @@ fi # ---------------------------------------------------------- echo "TEST infer mapping and add mapping on ES" -python3.10 -m arlas.cli.cli --config-file /tmp/arlas_cli.yaml indices --config tests mapping tests/sample.json --nb-lines 200 --field-mapping track.timestamps.center:date-epoch_second --field-mapping track.timestamps.start:date-epoch_second --field-mapping track.timestamps.end:date-epoch_second --push-on courses +python3.10 -m arlas.cli.cli --config-file /tmp/arlas_cli.yaml indices --config tests mapping tests/sample.json --nb-lines 200 --field-mapping track.timestamps.center:date-epoch_second --field-mapping track.timestamps.start:date-epoch_second --field-mapping track.timestamps.end:date-epoch_second --no-fulltext cargo_type --push-on courses if [ "$? -eq 0" ] ; then echo "OK: Mapping inferred and added" else