From da918cfe287893159447d46d00ec7201020d7543 Mon Sep 17 00:00:00 2001 From: Shinsuke Sugaya Date: Sun, 28 Apr 2024 22:15:02 +0900 Subject: [PATCH] add quantization --- run-elasticsearch.ipynb | 16 ++++-- run-qdrant.ipynb | 42 ++++++++------- run-vespa.ipynb | 13 +++-- run-weaviate.ipynb | 117 ++++++++++++++++++++++++++-------------- 4 files changed, 123 insertions(+), 65 deletions(-) diff --git a/run-elasticsearch.ipynb b/run-elasticsearch.ipynb index 8a2caf5..3ad0c31 100644 --- a/run-elasticsearch.ipynb +++ b/run-elasticsearch.ipynb @@ -45,6 +45,7 @@ " hnsw_ef_construction: int\n", " hnsw_ef: int\n", " update_docs_per_sec: int\n", + " quantization: str\n", "\n", " elasticsearch_name: str = \"benchmark_es\"\n", " elasticsearch_host: str = \"localhost\"\n", @@ -69,6 +70,7 @@ " \"hnsw_ef_construction\": 200,\n", " \"hnsw_ef\": 100,\n", " \"update_docs_per_sec\": 0,\n", + " \"quantization\": \"int8\",\n", " },\n", " \"1m-768-m49-ef100-ip\": {\n", " \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n", @@ -84,6 +86,7 @@ " \"hnsw_ef_construction\": 200,\n", " \"hnsw_ef\": 100,\n", " \"update_docs_per_sec\": 0,\n", + " \"quantization\": \"int8\",\n", " },\n", " \"5m-768-m49-ef100-ip\": {\n", " \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n", @@ -99,6 +102,7 @@ " \"hnsw_ef_construction\": 200,\n", " \"hnsw_ef\": 100,\n", " \"update_docs_per_sec\": 0,\n", + " \"quantization\": \"int8\",\n", " },\n", " }\n", " return DataSetConfig(**setting.get(target_name))\n" @@ -249,7 +253,13 @@ "outputs": [], "source": [ "def create_index(config, number_of_shards=1, number_of_replicas=0):\n", - " print(F\"Creating {config.index_name}... \", end=\"\")\n", + " if config.exact:\n", + " knn_type = \"flat\"\n", + " if config.quantization == \"int8\":\n", + " knn_type = \"int8_hnsw\"\n", + " else:\n", + " knn_type = \"hnsw\"\n", + " print(F\"Creating {config.index_name} with {knn_type}... \", end=\"\")\n", " response = requests.put(f\"http://{config.elasticsearch_host}:{config.elasticsearch_port}/{config.index_name}\",\n", " headers={\"Content-Type\": \"application/json\"},\n", " json={\n", @@ -278,10 +288,10 @@ " \"embedding\": {\n", " \"type\": \"dense_vector\",\n", " \"dims\": config.dimension,\n", - " \"index\": not config.exact,\n", + " \"index\": True,\n", " \"similarity\": config.distance,\n", " \"index_options\": {\n", - " \"type\": \"int8_hnsw\",\n", + " \"type\": knn_type,\n", " \"m\" : config.hnsw_m,\n", " \"ef_construction\" : config.hnsw_ef_construction\n", " }\n", diff --git a/run-qdrant.ipynb b/run-qdrant.ipynb index d05b33a..9997868 100644 --- a/run-qdrant.ipynb +++ b/run-qdrant.ipynb @@ -45,6 +45,7 @@ " hnsw_ef_construction: int\n", " hnsw_ef: int\n", " update_docs_per_sec: int\n", + " quantization: str\n", "\n", " qdrant_name: str = \"benchmark_qdrant\"\n", " qdrant_host: str = \"localhost\"\n", @@ -68,6 +69,7 @@ " \"hnsw_ef_construction\": 200,\n", " \"hnsw_ef\": 100,\n", " \"update_docs_per_sec\": 0,\n", + " \"quantization\": \"int8\",\n", " },\n", " \"1m-768-m49-ef100-ip\": {\n", " \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n", @@ -83,6 +85,7 @@ " \"hnsw_ef_construction\": 200,\n", " \"hnsw_ef\": 100,\n", " \"update_docs_per_sec\": 0,\n", + " \"quantization\": \"int8\",\n", " },\n", " \"5m-768-m49-ef100-ip\": {\n", " \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n", @@ -98,6 +101,7 @@ " \"hnsw_ef_construction\": 200,\n", " \"hnsw_ef\": 100,\n", " \"update_docs_per_sec\": 0,\n", + " \"quantization\": \"int8\",\n", " },\n", " }\n", " return DataSetConfig(**setting.get(target_name))\n" @@ -244,26 +248,28 @@ "outputs": [], "source": [ "def create_index(config):\n", - " print(F\"Creating Collection {config.index_name}... \", end=\"\")\n", + " print(F\"Creating Collection {config.index_name} with {config.quantization}... \", end=\"config.quantization\")\n", + " schema = {\n", + " \"vectors\": {\n", + " \"size\": config.dimension,\n", + " \"distance\": config.distance,\n", + " \"hnsw_config\": {\n", + " \"m\": config.hnsw_m,\n", + " \"ef_construction\": config.hnsw_ef_construction,\n", + " }\n", + " }\n", + " }\n", + " if config.quantization == \"int8\":\n", + " schema[\"quantization_config\"] = {\n", + " \"scalar\": {\n", + " \"type\": \"int8\",\n", + " \"quantile\": 0.99,\n", + " \"always_ram\": True\n", + " }\n", + " }\n", " response = requests.put(f\"http://{config.qdrant_host}:{config.qdrant_port}/collections/{config.index_name}\",\n", " headers={\"Content-Type\": \"application/json\"},\n", - " json={\n", - " \"vectors\": {\n", - " \"size\": config.dimension,\n", - " \"distance\": config.distance,\n", - " \"hnsw_config\": {\n", - " \"m\": config.hnsw_m,\n", - " \"ef_construction\": config.hnsw_ef_construction,\n", - " }\n", - " },\n", - " \"quantization_config\": {\n", - " \"scalar\": {\n", - " \"type\": \"int8\",\n", - " \"quantile\": 0.99,\n", - " \"always_ram\": True\n", - " }\n", - " }\n", - " })\n", + " json=schema)\n", " if response.status_code == 200:\n", " print(\"[OK]\")\n", " else:\n", diff --git a/run-vespa.ipynb b/run-vespa.ipynb index 3236588..01df7d7 100644 --- a/run-vespa.ipynb +++ b/run-vespa.ipynb @@ -56,6 +56,7 @@ " hnsw_ef_construction: int\n", " hnsw_ef: int\n", " update_docs_per_sec: int\n", + " quantization: str\n", "\n", " vespa_name: str = \"benchmark_vespa\"\n", " vespa_host: str = \"localhost\"\n", @@ -80,6 +81,7 @@ " \"hnsw_ef_construction\": 200,\n", " \"hnsw_ef\": 100,\n", " \"update_docs_per_sec\": 0,\n", + " \"quantization\": \"bfloat16\",\n", " },\n", " \"1m-768-m49-ef100-ip\": {\n", " \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n", @@ -95,6 +97,7 @@ " \"hnsw_ef_construction\": 200,\n", " \"hnsw_ef\": 100,\n", " \"update_docs_per_sec\": 0,\n", + " \"quantization\": \"bfloat16\",\n", " },\n", " \"5m-768-m49-ef100-ip\": {\n", " \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n", @@ -110,6 +113,7 @@ " \"hnsw_ef_construction\": 200,\n", " \"hnsw_ef\": 100,\n", " \"update_docs_per_sec\": 0,\n", + " \"quantization\": \"bfloat16\",\n", " },\n", " }\n", " return DataSetConfig(**setting.get(target_name))\n" @@ -305,7 +309,7 @@ " indexing: index | summary\n", " index: enable-bm25\n", " }\n", - " field embedding type tensor(x[{dimension}]) {\n", + " field embedding type tensor<{float_type}>(x[{dimension}]) {\n", " indexing: attribute | index\n", " attribute {\n", " distance-metric: {distance}\n", @@ -334,8 +338,8 @@ " match-features: distance(field, embedding)\n", "\n", " inputs {\n", - " query(q) tensor(x[{dimension}])\n", - " query(qa) tensor(x[{dimension}])\n", + " query(q) tensor<{float_type}>(x[{dimension}])\n", + " query(qa) tensor<{float_type}>(x[{dimension}])\n", " }\n", "\n", " first-phase {\n", @@ -347,7 +351,8 @@ " .replace(\"{distance}\", str(config.distance))\\\n", " .replace(\"{dimension}\", str(config.dimension))\\\n", " .replace(\"{hnsw_m}\", str(config.hnsw_m))\\\n", - " .replace(\"{hnsw_ef_construction}\", str(config.hnsw_ef_construction))\n", + " .replace(\"{hnsw_ef_construction}\", str(config.hnsw_ef_construction))\\\n", + " .replace(\"{float_type}\", config.quantization)\n", "\n", " query_profile_str = \"\"\"\n", "\n", diff --git a/run-weaviate.ipynb b/run-weaviate.ipynb index 9fb185b..74ef594 100644 --- a/run-weaviate.ipynb +++ b/run-weaviate.ipynb @@ -44,6 +44,7 @@ " hnsw_ef_construction: int\n", " hnsw_ef: int\n", " update_docs_per_sec: int\n", + " quantization: str\n", "\n", " weaviate_name: str = \"benchmark_weaviate\"\n", " weaviate_host: str = \"localhost\"\n", @@ -66,6 +67,7 @@ " \"hnsw_ef_construction\": 200,\n", " \"hnsw_ef\": 100,\n", " \"update_docs_per_sec\": 0,\n", + " \"quantization\": \"none\", # \"pq\",\n", " },\n", " \"1m-768-m49-ef100-ip\": {\n", " \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n", @@ -80,6 +82,7 @@ " \"hnsw_ef_construction\": 200,\n", " \"hnsw_ef\": 100,\n", " \"update_docs_per_sec\": 0,\n", + " \"quantization\": \"none\", # \"pq\",\n", " },\n", " \"5m-768-m49-ef100-ip\": {\n", " \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n", @@ -94,6 +97,7 @@ " \"hnsw_ef_construction\": 200,\n", " \"hnsw_ef\": 100,\n", " \"update_docs_per_sec\": 0,\n", + " \"quantization\": \"none\", # \"pq\",\n", " },\n", " }\n", " return DataSetConfig(**setting.get(target_name))\n" @@ -115,6 +119,7 @@ " \"--name\", config.weaviate_name,\n", " \"-p\", f\"{config.weaviate_port}:8080\",\n", " # \"-v\", f\"{volume_dir}:/data/content\",\n", + " \"-e\", \"ASYNC_INDEXING=true\", # to enable AutoPQ\n", " f\"cr.weaviate.io/semitechnologies/weaviate:{config.weaviate_version}\"\n", " ]\n", " result = subprocess.run(docker_cmd, capture_output=True, text=True)\n", @@ -240,48 +245,54 @@ "outputs": [], "source": [ "def create_index(config):\n", - " print(F\"Creating {config.index_name}... \", end=\"\")\n", + " print(F\"Creating {config.index_name} with {config.quantization}... \", end=\"\")\n", + " schema = {\n", + " \"class\": config.index_name,\n", + " \"vectorIndexType\": \"hnsw\",\n", + " \"vectorIndexConfig\": {\n", + " \"distance\": config.distance,\n", + " \"maxConnections\": config.hnsw_m,\n", + " \"ef\": config.hnsw_ef,\n", + " \"efConstruction\": config.hnsw_ef_construction,\n", + " },\n", + " \"properties\": [\n", + " {\n", + " \"name\": \"doc_id\",\n", + " \"dataType\": [\"int\"]\n", + " },\n", + " {\n", + " \"name\": \"page_id\",\n", + " \"dataType\": [\"int\"]\n", + " },\n", + " {\n", + " \"name\": \"rev_id\",\n", + " \"dataType\": [\"int\"]\n", + " },\n", + " {\n", + " \"name\": \"section\",\n", + " \"dataType\": [\"string\"],\n", + " \"indexInverted\": True\n", + " },\n", + " {\n", + " \"name\": \"text\",\n", + " \"dataType\": [\"text\"],\n", + " \"indexInverted\": True\n", + " },\n", + " {\n", + " \"name\": \"title\",\n", + " \"dataType\": [\"text\"],\n", + " \"indexInverted\": True\n", + " }\n", + " ]\n", + " }\n", + " if config.quantization == \"pq\":\n", + " schema[\"vectorIndexConfig\"][\"pq\"] = {\n", + " \"enabled\": True,\n", + " \"trainingLimit\": 100000 - 10000,\n", + " }\n", " response = requests.post(f\"http://{config.weaviate_host}:{config.weaviate_port}/v1/schema\",\n", " headers={\"Content-Type\": \"application/json\"},\n", - " json={\n", - " \"class\": config.index_name,\n", - " \"vectorIndexType\": \"hnsw\",\n", - " \"vectorIndexConfig\": {\n", - " \"distance\": config.distance,\n", - " \"maxConnections\": config.hnsw_m,\n", - " \"ef\": config.hnsw_ef,\n", - " \"efConstruction\": config.hnsw_ef_construction,\n", - " },\n", - " \"properties\": [\n", - " {\n", - " \"name\": \"doc_id\",\n", - " \"dataType\": [\"int\"]\n", - " },\n", - " {\n", - " \"name\": \"pageId\",\n", - " \"dataType\": [\"int\"]\n", - " },\n", - " {\n", - " \"name\": \"revId\",\n", - " \"dataType\": [\"int\"]\n", - " },\n", - " {\n", - " \"name\": \"section\",\n", - " \"dataType\": [\"string\"],\n", - " \"indexInverted\": True\n", - " },\n", - " {\n", - " \"name\": \"text\",\n", - " \"dataType\": [\"text\"],\n", - " \"indexInverted\": True\n", - " },\n", - " {\n", - " \"name\": \"title\",\n", - " \"dataType\": [\"text\"],\n", - " \"indexInverted\": True\n", - " }\n", - " ]\n", - " })\n", + " json=schema)\n", " if response.status_code == 200:\n", " print(\"[OK]\")\n", " else:\n", @@ -352,6 +363,30 @@ " print(\" [FAIL]\")\n" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a6c35f9-e61d-402a-b83e-38059dd1d1d9", + "metadata": {}, + "outputs": [], + "source": [ + "def wait_for_indexing(config, retry_count=60):\n", + " print(f\"Waiting for {config.index_name}\", end=\"\")\n", + " for i in range(retry_count):\n", + " try:\n", + " response = requests.get(f\"http://{config.weaviate_host}:{config.weaviate_port}/v1/schema/{config.index_name}/shards\")\n", + " if response.status_code == 200:\n", + " obj = json.loads(response.text)\n", + " if obj is not None and len(obj) > 0 and obj[0].get(\"status\") == \"READY\":\n", + " print(\" [OK]\") \n", + " return\n", + " except:\n", + " pass\n", + " print(\".\", end=\"\")\n", + " time.sleep(1)\n", + " print(\" [FAIL]\")\n" + ] + }, { "cell_type": "code", "execution_count": null, @@ -432,6 +467,8 @@ " if len(docs) > 0:\n", " total_time += send_data(count)\n", "\n", + " wait_for_indexing(config)\n", + "\n", " execution_time = time.time() - start_time\n", " hours, remainder = divmod(execution_time, 3600)\n", " minutes, seconds = divmod(remainder, 60)\n",