From b528b455e13edc95a933ba63a0bba0764d2f29f9 Mon Sep 17 00:00:00 2001 From: "xunjian.sl" Date: Thu, 19 Dec 2024 12:53:07 +0800 Subject: [PATCH 1/3] TablestoreVectorStore check the Dimension of the embedding when writing it to the vector store. --- .../vector_stores/tablestore/base.py | 7 ++++++ .../tests/test_vector_stores_tablestore.py | 24 +++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-tablestore/llama_index/vector_stores/tablestore/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-tablestore/llama_index/vector_stores/tablestore/base.py index e8eac6f35d030..e058a82235bc3 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-tablestore/llama_index/vector_stores/tablestore/base.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-tablestore/llama_index/vector_stores/tablestore/base.py @@ -54,6 +54,7 @@ class TablestoreVectorStore(BasePydanticVectorStore): is_embedding_query: bool = True stores_text: bool = True + _vector_dimension: int = PrivateAttr(default=512) _logger: Any = PrivateAttr(default=None) _tablestore_client: tablestore.OTSClient = PrivateAttr(default=None) _table_name: str = PrivateAttr(default="llama_index_vector_store_ots_v1") @@ -91,6 +92,7 @@ def __init__( ) else: self._tablestore_client = tablestore_client + self._vector_dimension = vector_dimension self._table_name = table_name self._index_name = index_name self._text_field = text_field @@ -632,6 +634,11 @@ def add(self, nodes: List[BaseNode], **kwargs: Any) -> List[str]: return [] ids = [] for node in nodes: + if len(node.get_embedding()) != self._vector_dimension: + raise RuntimeError( + "node embedding size:%d is not the same as vector store dim:%d" + % (len(node.get_embedding()), self._vector_dimension) + ) self._write_row( row_id=node.node_id, content=node.text, diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-tablestore/tests/test_vector_stores_tablestore.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-tablestore/tests/test_vector_stores_tablestore.py index 7e3abe2d70365..e49830f8cde81 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-tablestore/tests/test_vector_stores_tablestore.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-tablestore/tests/test_vector_stores_tablestore.py @@ -165,3 +165,27 @@ def test_tablestore() -> None: assert query_result.ids is not None assert query_result.similarities is not None assert query_result.similarities is not None + + +def test_tablestore_add_dim() -> None: + store = TablestoreVectorStore( + endpoint=os.getenv("end_point"), + instance_name=os.getenv("instance_name"), + access_key_id=os.getenv("access_key_id"), + access_key_secret=os.getenv("access_key_secret"), + vector_dimension=512, + vector_metric_type=tablestore.VectorMetricType.VM_COSINE, + ) + embedder = MockEmbedding(128) + node = TextNode( + id_="1", + text="hello world", + metadata={"type": "a", "time": 1995}, + ) + node.embedding = embedder.get_text_embedding(node.get_text()) + + try: + store.add([node]) + raise RuntimeError("should failed") + except Exception as e: + assert "not the same as" in e.args[0] From 8f8596a05441e4a4e4217fc3376add3edb78d406 Mon Sep 17 00:00:00 2001 From: "xunjian.sl" Date: Thu, 19 Dec 2024 13:06:22 +0800 Subject: [PATCH 2/3] fix case --- .../tests/test_vector_stores_tablestore.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-tablestore/tests/test_vector_stores_tablestore.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-tablestore/tests/test_vector_stores_tablestore.py index e49830f8cde81..c01d401607e78 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-tablestore/tests/test_vector_stores_tablestore.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-tablestore/tests/test_vector_stores_tablestore.py @@ -169,10 +169,10 @@ def test_tablestore() -> None: def test_tablestore_add_dim() -> None: store = TablestoreVectorStore( - endpoint=os.getenv("end_point"), - instance_name=os.getenv("instance_name"), - access_key_id=os.getenv("access_key_id"), - access_key_secret=os.getenv("access_key_secret"), + endpoint="http://test.a.com", + instance_name="test", + access_key_id="test", + access_key_secret="test", vector_dimension=512, vector_metric_type=tablestore.VectorMetricType.VM_COSINE, ) From 9ffff008da0e85f223f03aedfc33e381012185fa Mon Sep 17 00:00:00 2001 From: Logan Markewich Date: Mon, 23 Dec 2024 11:01:48 -0600 Subject: [PATCH 3/3] vbump --- .../llama-index-vector-stores-tablestore/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-tablestore/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-tablestore/pyproject.toml index bf8665ed25973..e107754490e11 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-tablestore/pyproject.toml +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-tablestore/pyproject.toml @@ -27,7 +27,7 @@ license = "MIT" name = "llama-index-vector-stores-tablestore" packages = [{include = "llama_index/"}] readme = "README.md" -version = "0.2.0" +version = "0.2.1" [tool.poetry.dependencies] python = ">=3.9,<4.0"