From cae84d9c82d7283ac2045a56974eb05d1b93a2ae Mon Sep 17 00:00:00 2001 From: Pamela Fox Date: Tue, 7 Jan 2025 20:19:50 +0000 Subject: [PATCH] Update operator used in index --- src/backend/fastapi_app/postgres_models.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/backend/fastapi_app/postgres_models.py b/src/backend/fastapi_app/postgres_models.py index 54bc1f6..0ed9e99 100644 --- a/src/backend/fastapi_app/postgres_models.py +++ b/src/backend/fastapi_app/postgres_models.py @@ -39,23 +39,30 @@ def to_str_for_embedding(self): return f"Name: {self.name} Description: {self.description} Type: {self.type}" -# Define HNSW index to support vector similarity search -# Use the vector_ip_ops access method (inner product) since these embeddings are normalized +""" +**Define HNSW index to support vector similarity search** + +We use the vector_cosine_ops access method (cosine distance) + since it works for both normalized and non-normalized vector embeddings +If you know your embeddings are normalized, + you can switch to inner product for potentially better performance. +The index operator should match the operator used in queries. +""" table_name = Item.__tablename__ index_ada002 = Index( - "hnsw_index_for_innerproduct_{table_name}_embedding_ada002", + "hnsw_index_for_cosine_{table_name}_embedding_ada002", Item.embedding_ada002, postgresql_using="hnsw", postgresql_with={"m": 16, "ef_construction": 64}, - postgresql_ops={"embedding_ada002": "vector_ip_ops"}, + postgresql_ops={"embedding_ada002": "vector_cosine_ops"}, ) index_nomic = Index( - f"hnsw_index_for_innerproduct_{table_name}_embedding_nomic", + f"hnsw_index_for_cosine_{table_name}_embedding_nomic", Item.embedding_nomic, postgresql_using="hnsw", postgresql_with={"m": 16, "ef_construction": 64}, - postgresql_ops={"embedding_nomic": "vector_ip_ops"}, + postgresql_ops={"embedding_nomic": "vector_cosine_ops"}, )