Skip to content

Commit

Permalink
Merge pull request #155 from Azure-Samples/cosineoperator
Browse files Browse the repository at this point in the history
Update HNSW indexes to use cosine operator
  • Loading branch information
pamelafox authored Jan 7, 2025
2 parents 3b938d6 + 909b833 commit 9a6f6dd
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 7 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/app-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ jobs:
if: matrix.os == 'ubuntu-latest'
run: |
sudo /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh -y
sudo apt-get install postgresql-14-pgvector
sudo apt-get install postgresql-16-pgvector
sudo systemctl start postgresql
sudo -u postgres psql -c "ALTER USER ${{ env.POSTGRES_USERNAME }} PASSWORD '${{ env.POSTGRES_PASSWORD }}'"
sudo -u postgres psql -c 'CREATE EXTENSION vector'
Expand Down
19 changes: 13 additions & 6 deletions src/backend/fastapi_app/postgres_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,23 +39,30 @@ def to_str_for_embedding(self):
return f"Name: {self.name} Description: {self.description} Type: {self.type}"


# Define HNSW index to support vector similarity search
# Use the vector_ip_ops access method (inner product) since these embeddings are normalized
"""
**Define HNSW index to support vector similarity search**
We use the vector_cosine_ops access method (cosine distance)
since it works for both normalized and non-normalized vector embeddings
If you know your embeddings are normalized,
you can switch to inner product for potentially better performance.
The index operator should match the operator used in queries.
"""

table_name = Item.__tablename__

index_ada002 = Index(
"hnsw_index_for_innerproduct_{table_name}_embedding_ada002",
"hnsw_index_for_cosine_{table_name}_embedding_ada002",
Item.embedding_ada002,
postgresql_using="hnsw",
postgresql_with={"m": 16, "ef_construction": 64},
postgresql_ops={"embedding_ada002": "vector_ip_ops"},
postgresql_ops={"embedding_ada002": "vector_cosine_ops"},
)

index_nomic = Index(
f"hnsw_index_for_innerproduct_{table_name}_embedding_nomic",
f"hnsw_index_for_cosine_{table_name}_embedding_nomic",
Item.embedding_nomic,
postgresql_using="hnsw",
postgresql_with={"m": 16, "ef_construction": 64},
postgresql_ops={"embedding_nomic": "vector_ip_ops"},
postgresql_ops={"embedding_nomic": "vector_cosine_ops"},
)

0 comments on commit 9a6f6dd

Please sign in to comment.