diff --git a/run-elasticsearch.ipynb b/run-elasticsearch.ipynb index c8472ba..a4e53f2 100644 --- a/run-elasticsearch.ipynb +++ b/run-elasticsearch.ipynb @@ -949,7 +949,7 @@ " search_with_knn_queries(dataset_config, filename, page_size=page_size, max_size=1000) # warmup\n", " search_with_knn_queries(dataset_config, filename, page_size=page_size, explain=False, track_total_hits=False, offset=dataset_config.index_size)\n", " stop_update()\n", - " results[f\"top_{page_size}\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/knn_{page_size}.jsonl.gz\")" + " results[f\"top_{page_size}\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{target_config}/knn_{page_size}.jsonl.gz\")" ] }, { @@ -977,7 +977,7 @@ " search_with_knn_queries(dataset_config, filename, page_size=page_size, max_size=1000, pre_filter=pre_filter_generator()) # warmup\n", " search_with_knn_queries(dataset_config, filename, page_size=page_size, explain=False, track_total_hits=False, offset=dataset_config.index_size, pre_filter=pre_filter_generator())\n", " stop_update()\n", - " results[f\"top_{page_size}_filtered\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/knn_{page_size}_filtered.jsonl.gz\")" + " results[f\"top_{page_size}_filtered\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{target_config}/knn_{page_size}_filtered.jsonl.gz\")" ] }, { diff --git a/run-qdrant.ipynb b/run-qdrant.ipynb index a8b53f4..2548e41 100644 --- a/run-qdrant.ipynb +++ b/run-qdrant.ipynb @@ -813,7 +813,7 @@ " search_with_knn_queries(dataset_config, filename, page_size=page_size, max_size=1000) # warmup\n", " search_with_knn_queries(dataset_config, filename, page_size=page_size, offset=dataset_config.index_size, exact=dataset_config.exact)\n", " stop_update()\n", - " results[f\"top_{page_size}\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/knn_{page_size}.jsonl.gz\")" + " results[f\"top_{page_size}\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{target_config}/knn_{page_size}.jsonl.gz\")" ] }, { @@ -846,7 +846,7 @@ " search_with_knn_queries(dataset_config, filename, page_size=page_size, max_size=1000, pre_filter=pre_filter_generator()) # warmup\n", " search_with_knn_queries(dataset_config, filename, page_size=page_size, offset=dataset_config.index_size, pre_filter=pre_filter_generator(), exact=dataset_config.exact)\n", " stop_update()\n", - " results[f\"top_{page_size}_filtered\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/knn_{page_size}_filtered.jsonl.gz\")" + " results[f\"top_{page_size}_filtered\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{target_config}/knn_{page_size}_filtered.jsonl.gz\")" ] }, { diff --git a/run-vespa.ipynb b/run-vespa.ipynb index 93a3d08..3386d68 100644 --- a/run-vespa.ipynb +++ b/run-vespa.ipynb @@ -901,7 +901,7 @@ " search_with_knn_queries(dataset_config, filename, page_size=page_size, max_size=1000) # warmup\n", " search_with_knn_queries(dataset_config, filename, page_size=page_size, offset=dataset_config.index_size, exact=dataset_config.exact)\n", " stop_update()\n", - " results[f\"top_{page_size}\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/knn_{page_size}.jsonl.gz\")" + " results[f\"top_{page_size}\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{target_config}/knn_{page_size}.jsonl.gz\")" ] }, { @@ -925,7 +925,7 @@ " search_with_knn_queries(dataset_config, filename, page_size=page_size, max_size=1000, pre_filter=pre_filter_generator()) # warmup\n", " search_with_knn_queries(dataset_config, filename, page_size=page_size, offset=dataset_config.index_size, pre_filter=pre_filter_generator(), exact=dataset_config.exact)\n", " stop_update()\n", - " results[f\"top_{page_size}_filtered\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/knn_{page_size}_filtered.jsonl.gz\")" + " results[f\"top_{page_size}_filtered\"] = print_took_and_total_hits(page_size, filename, f\"dataset/ground_truth/{target_config}/knn_{page_size}_filtered.jsonl.gz\")" ] }, { diff --git a/scripts/setup.sh b/scripts/setup.sh index d6b5682..6f4d7f4 100644 --- a/scripts/setup.sh +++ b/scripts/setup.sh @@ -26,10 +26,11 @@ fi # wikipedia contents data_type=passages-c400-jawiki-20230403 model_type=multilingual-e5-base-passage +setting_type=100k-768-m32-ef100-ip data_dir="${base_dir}/../dataset/${data_type}" output_dir="${base_dir}/../output" -truth_dir="${base_dir}/../dataset/ground_truth" +truth_dir="${base_dir}/../dataset/ground_truth/${setting_type}" mkdir -p "${data_dir}" "${output_dir}" @@ -71,7 +72,7 @@ for truth_file in ${truth_files} ; do if [[ ! -f "${truth_dir}/${truth_file}" ]] ; then echo -n "Downloading ${truth_file}... " curl -sL -o "${truth_dir}/${truth_file}" \ - "https://codelibs.co/download/ann/benchmark/${truth_file}" || exit 1 + "https://codelibs.co/download/ann/benchmark/${setting_type}/${truth_file}" || exit 1 echo "[OK]" fi done