Skip to content

Commit

Permalink
add -no-fulltext option
Browse files Browse the repository at this point in the history
  • Loading branch information
sylvaingaudan committed Jul 31, 2024
1 parent e8d26ea commit 4d7fe0a
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 10 deletions.
5 changes: 3 additions & 2 deletions arlas/cli/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,8 @@ def data(
def mapping(
file: str = typer.Argument(help="Path to the file conaining the data. Format: NDJSON"),
nb_lines: int = typer.Option(default=2, help="Number of line to consider for generating the mapping. Avoid going over 10."),
field_mapping: list[str] = typer.Option(default=[], help="Overide the mapping with the provided field/type. Example: fragment.location:geo_point"),
field_mapping: list[str] = typer.Option(default=[], help="Overide the mapping with the provided field path/type. Example: fragment.location:geo_point. Important: the full field path must be provided."),
no_fulltext: list[str] = typer.Option(default=[], help="List of keyword or text fields that should not be in the fulltext search. Important: the field name only must be provided."),
push_on: str = typer.Option(default=None, help="Push the generated mapping for the provided index name"),
):
config = variables["arlas"]
Expand All @@ -104,7 +105,7 @@ def mapping(
else:
print("Error: invalid field_mapping \"{}\". The format is \"field:type\" like \"fragment.location:geo_point\"".format(fm), file=sys.stderr)
exit(1)
mapping = make_mapping(file=file, nb_lines=nb_lines, types=types)
mapping = make_mapping(file=file, nb_lines=nb_lines, types=types, no_fulltext=no_fulltext)
if push_on and config:
Service.create_index(
config,
Expand Down
12 changes: 7 additions & 5 deletions arlas/cli/model_infering.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,26 +141,28 @@ def __type_node__(n, name: str = None) -> str:
return "UNDEFINED"

# from the typed tree, generate the mapping.
def __generate_mapping__(tree, mapping):
def __generate_mapping__(tree, mapping, no_fulltext: list[str]):
if type(tree) is dict:
for (k, v) in tree.items():
if k not in ["__type__", "__values__"]:
t: str = v.get("__type__")
if t == "object":
mapping[k] = {"properties": {}}
__generate_mapping__(v, mapping[k]["properties"])
__generate_mapping__(v, mapping[k]["properties"], no_fulltext)
else:
if t.startswith("date-"):
mapping[k] = {"type": "date", "format": t.split("-")[1]}
else:
mapping[k] = {"type": t}
if t in ["keyword", "text"]:
mapping[k]["copy_to"] = ["internal.fulltext", "internal.autocomplete"]
print("-->{}".format(k))
if k not in no_fulltext:
mapping[k]["copy_to"] = ["internal.fulltext", "internal.autocomplete"]
else:
raise Exception("Unexpected state")


def make_mapping(file: str, nb_lines: int = 2, types: dict[str, str] = {}):
def make_mapping(file: str, nb_lines: int = 2, types: dict[str, str] = {}, no_fulltext: list[str] = []):
tree = {}
mapping = {}
with open(file) as f:
Expand All @@ -173,7 +175,7 @@ def make_mapping(file: str, nb_lines: int = 2, types: dict[str, str] = {}):
hit = json.loads(line)
__build_tree__(tree, hit)
__type_tree__("", tree, types)
__generate_mapping__(tree, mapping)
__generate_mapping__(tree, mapping, no_fulltext)
mapping["internal"] = {
"properties": {
"autocomplete": {
Expand Down
6 changes: 3 additions & 3 deletions scripts/tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ fi

# ----------------------------------------------------------
echo "TEST add direct mapping on ES"
python3 -m arlas.cli.cli --config-file /tmp/arlas_cli.yaml indices --config tests create direct_mappping_index --mapping tests/mapping.json
python3.10 -m arlas.cli.cli --config-file /tmp/arlas_cli.yaml indices --config tests create direct_mappping_index --mapping tests/mapping.json
if [ "$? -eq 0" ] ; then
echo "OK: Mapping added"
else
Expand All @@ -42,7 +42,7 @@ fi

# ----------------------------------------------------------
echo "TEST retrieve direct mapping from ES"
if python3 -m arlas.cli.cli --config-file /tmp/arlas_cli.yaml indices --config tests list | grep direct_mappping_index ; then
if python3.10 -m arlas.cli.cli --config-file /tmp/arlas_cli.yaml indices --config tests list | grep direct_mappping_index ; then
echo "OK: direct mapping found"
else
echo "ERROR: direct mapping not found"
Expand All @@ -51,7 +51,7 @@ fi

# ----------------------------------------------------------
echo "TEST infer mapping and add mapping on ES"
python3.10 -m arlas.cli.cli --config-file /tmp/arlas_cli.yaml indices --config tests mapping tests/sample.json --nb-lines 200 --field-mapping track.timestamps.center:date-epoch_second --field-mapping track.timestamps.start:date-epoch_second --field-mapping track.timestamps.end:date-epoch_second --push-on courses
python3.10 -m arlas.cli.cli --config-file /tmp/arlas_cli.yaml indices --config tests mapping tests/sample.json --nb-lines 200 --field-mapping track.timestamps.center:date-epoch_second --field-mapping track.timestamps.start:date-epoch_second --field-mapping track.timestamps.end:date-epoch_second --no-fulltext cargo_type --push-on courses
if [ "$? -eq 0" ] ; then
echo "OK: Mapping inferred and added"
else
Expand Down

0 comments on commit 4d7fe0a

Please sign in to comment.