From 2bc45efcc1bcf7532e597dc16abf513392b2d365 Mon Sep 17 00:00:00 2001 From: "Junxi (Eric) Li" <68884486+ericljx2020-gmail@users.noreply.github.com> Date: Wed, 16 Oct 2024 01:35:25 -0700 Subject: [PATCH] feat: add Milvus as a choice of vectorstore. (#1389) * unit tests passed * add tests * final format linting needed * pre pr done * add description to code * codespell keep origin * final edits --- pandasai/ee/vectorstores/__init__.py | 3 +- pandasai/ee/vectorstores/milvus.py | 391 +++++++++++++++ poetry.lock | 476 ++++++------------- pyproject.toml | 4 +- tests/unit_tests/vectorstores/test_milvus.py | 148 ++++++ 5 files changed, 700 insertions(+), 322 deletions(-) create mode 100644 pandasai/ee/vectorstores/milvus.py create mode 100644 tests/unit_tests/vectorstores/test_milvus.py diff --git a/pandasai/ee/vectorstores/__init__.py b/pandasai/ee/vectorstores/__init__.py index 694d4ce42..1e486930f 100644 --- a/pandasai/ee/vectorstores/__init__.py +++ b/pandasai/ee/vectorstores/__init__.py @@ -4,6 +4,7 @@ from .chroma import ChromaDB from .lanceDB import LanceDB +from .milvus import Milvus from .qdrant import Qdrant -__all__ = ["ChromaDB", "Qdrant", "LanceDB"] +__all__ = ["ChromaDB", "Qdrant", "LanceDB", "Milvus"] diff --git a/pandasai/ee/vectorstores/milvus.py b/pandasai/ee/vectorstores/milvus.py new file mode 100644 index 000000000..5ac435297 --- /dev/null +++ b/pandasai/ee/vectorstores/milvus.py @@ -0,0 +1,391 @@ +import logging +import uuid +from typing import Dict, Iterable, List, Optional + +from pydantic import Field +from pymilvus import DataType, MilvusClient, model + +from pandasai.helpers.logger import Logger +from pandasai.vectorstores.vectorstore import VectorStore + +DEFAULT_COLLECTION_NAME = "pandasai" +UUID_NAMESPACE = "f55f1395-e097-4f35-8c20-90fdea7baa14" +ID = "id" +EMBEDDING = "vector" +DOCUMENT = "document" +URI = "milvus_demo.db" + + +class Milvus(VectorStore): + qa_dimension: int = Field( + default=384, description="default embedding model dimension" + ) + + docs_dimension: int = Field( + default=384, description="default embedding model dimension" + ) + + # Initializes the Milvus object with collection names, a URI for the Milvus database, + # a logger, and the embedding function. + def __init__( + self, + collection_name: Optional[str] = DEFAULT_COLLECTION_NAME, + uri: Optional[str] = URI, + similarity_threshold: Optional[float] = None, + logger: Optional[Logger] = None, + ): + self.docs_collection_name = f"{collection_name}_docs" + self.qa_collection_name = f"{collection_name}_qa" + self.uri = uri + self._logger = logger or Logger() + self.similarity_threshold = similarity_threshold + self.emb_function = model.DefaultEmbeddingFunction() + self.client = MilvusClient(uri=self.uri) + + # Adds question-answer pairs to the Milvus collection. + # It takes queries (questions), codes (answers), optional IDs, and metadata. + # If queries and codes have mismatched lengths, it raises a ValueError. + # The embeddings are calculated, and data is inserted into the QA collection. + def add_question_answer( + self, + queries: Iterable[str], + codes: Iterable[str], + ids: Iterable[str] = None, + metadatas: List[Dict] = None, + ) -> List[str]: + if len(queries) != len(codes): + raise ValueError( + f"Queries and codes length doesn't match. {len(queries)} != {len(codes)}" + ) + format_qa = [ + self._format_qa(query, code) for query, code in zip(queries, codes) + ] + vectors = self.emb_function.encode_documents(format_qa) + self.qa_dimension = self.emb_function.dim + milvus_ids = ( + self._convert_ids(ids) if ids else self.generate_random_uuids(len(queries)) + ) + + if not self.client.has_collection(collection_name=self.qa_collection_name): + self._initiate_qa_collection() + + if metadatas: + data = [ + {ID: id, EMBEDDING: vector, DOCUMENT: doc, "metadata": metadata} + for id, vector, doc, metadata in zip( + milvus_ids, vectors, format_qa, metadatas + ) + ] + else: + data = [ + {ID: id, EMBEDDING: vector, DOCUMENT: doc} + for id, vector, doc in zip(milvus_ids, vectors, format_qa) + ] + + self.client.insert( + collection_name=self.qa_collection_name, + data=data, + ) + + return milvus_ids + + + + # Adds documents to the Milvus collection. + # It accepts documents, optional IDs, and metadata, and stores them in the document collection. + def add_docs( + self, + docs: Iterable[str], + ids: Iterable[str] = None, + metadatas: List[Dict] = None, + ) -> List[str]: + milvus_ids = ( + self._convert_ids(ids) if ids else self.generate_random_uuids(len(docs)) + ) + vectors = self.emb_function.encode_documents(docs) + + if not self.client.has_collection(collection_name=self.docs_collection_name): + self._initiate_docs_collection() + + if metadatas: + data = [ + {ID: id, EMBEDDING: vector, DOCUMENT: doc, "metadata": metadata} + for id, vector, doc, metadata in zip( + milvus_ids, vectors, docs, metadatas + ) + ] + else: + data = [ + {ID: id, EMBEDDING: vector, DOCUMENT: doc} + for id, vector, doc in zip(milvus_ids, vectors, docs) + ] + + self.client.insert( + collection_name=self.docs_collection_name, + data=data, + ) + + return milvus_ids + + # Retrieves the most relevant question-answer pairs from the QA collection + # based on a given query and returns the top-k results. + def get_relevant_question_answers(self, question: str, k: int = 1) -> List[Dict]: + if not self.client.has_collection(collection_name=self.qa_collection_name): + return { + "documents": [], + "distances": [], + "metadatas": [], + "ids": [], + } + + vector = self.emb_function.encode_documents(question) + response = self.client.search( + collection_name=self.qa_collection_name, + data=vector, + limit=k, + filter="", + output_fields=[DOCUMENT], + ) + + return self._convert_search_response(response) + + # Retrieves the most relevant documents from the document collection + # based on a given query and returns the top-k results. + def get_relevant_docs(self, question: str, k: int = 1) -> List[Dict]: + if not self.client.has_collection(collection_name=self.docs_collection_name): + return { + "documents": [], + "distances": [], + "metadatas": [], + "ids": [], + } + vector = self.emb_function.encode_documents(question) + response = self.client.search( + collection_name=self.docs_collection_name, + data=vector, + limit=k, + output_fields=[DOCUMENT], + ) + + return self._convert_search_response(response) + + # Converts the search response returned by Milvus into a list of dictionaries + # with document content, ids, metadata, and distances. + def _convert_search_response(self, response): + document = [] + ids = [] + metadatas = [] + distances = [] + + for res in response[0]: + document.append(res["entity"][DOCUMENT]) + ids.append(res[ID]) + if "metadata" in res["entity"]: + metadatas.append(res["entity"]["metadata"]) + distances.append(res["distance"]) + + return { + "documents": document, + "distances": distances, + "metadatas": metadatas, + "ids": ids, + } + + # Creates the QA collection schema and defines the fields to store question-answer pairs, + # including ID, embeddings, and document content. + def _initiate_qa_collection(self): + schema = MilvusClient.create_schema( + auto_id=False, + enable_dynamic_field=True, + ) + schema.add_field( + field_name=ID, datatype=DataType.VARCHAR, max_length=1000, is_primary=True + ) + schema.add_field( + field_name=EMBEDDING, datatype=DataType.FLOAT_VECTOR, dim=self.qa_dimension + ) + schema.add_field( + field_name=DOCUMENT, datatype=DataType.VARCHAR, max_length=1000 + ) + + index_params = self.client.prepare_index_params() + index_params.add_index( + field_name=ID, + ) + index_params.add_index( + field_name=EMBEDDING, + metric_type="COSINE", + ) + self.client.create_collection( + collection_name=self.qa_collection_name, + schema=schema, + index_params=index_params, + ) + + # Creates the document collection schema and defines the fields to store documents, + # including ID, embeddings, and document content. + def _initiate_docs_collection(self): + schema = MilvusClient.create_schema( + auto_id=False, + enable_dynamic_field=True, + ) + schema.add_field(field_name=ID, datatype=DataType.VARCHAR, is_primary=True) + schema.add_field( + field_name=EMBEDDING, + datatype=DataType.FLOAT_VECTOR, + dim=self.docs_dimension, + ) + schema.add_field( + field_name=DOCUMENT, datatype=DataType.VARCHAR, max_length=1000 + ) + + index_params = self.client.prepare_index_params() + index_params.add_index( + field_name=ID, + ) + index_params.add_index( + field_name=EMBEDDING, + metric_type="COSINE", + ) + self.client.create_collection( + collection_name=self.docs_collection_name, + schema=schema, + index_params=index_params, + ) + + # Returns the list of relevant document contents from the document collection + # based on a given query and the top-k results. + def get_relevant_docs_documents(self, question: str, k: int = 1) -> List[str]: + return self.get_relevant_docs(question, k)["documents"] + + # Returns the list of relevant question-answer document contents from the QA collection + # based on a given query and the top-k results. + def get_relevant_qa_documents(self, question: str, k: int = 1) -> List[str]: + return self.get_relevant_question_answers(question, k)["documents"] + + # Retrieves question-answer documents by their IDs and returns the corresponding documents. + def get_relevant_question_answers_by_id(self, ids: Iterable[str]) -> List[Dict]: + milvus_ids = self._convert_ids(ids) + response = self.client.query( + collection_name=self.qa_collection_name, + ids=milvus_ids, + output_fields=[DOCUMENT, ID, "distance", "entity"], + ) + + return self._convert_search_response(response)["documents"] + + # Deletes documents from the document collection based on a list of document IDs. + def delete_docs(self, ids: List[str] = None) -> bool: + milvus_ids = self._convert_ids(ids) + id_filter = str(milvus_ids) + self.client.delete( + collection_name=self.docs_collection_name, + filter=f"id in {id_filter}", + ) + return True + + # Deletes question-answer pairs from the QA collection based on a list of question-answer IDs. + def delete_question_and_answers(self, ids: List[str] = None) -> bool: + milvus_ids = self._convert_ids(ids) + id_filter = str(milvus_ids) + self.client.delete( + collection_name=self.qa_collection_name, + filter=f"id in {id_filter}", + ) + return True + + # Updates the existing question-answer pairs in the QA collection based on given IDs. + # This replaces the question-answer text and embeddings, and allows optional metadata. + def update_question_answer( + self, + ids: Iterable[str], + queries: Iterable[str], + codes: Iterable[str], + metadatas: List[Dict] = None, + ) -> List[str]: + if not (len(ids) == len(queries) == len(codes)): + raise ValueError( + f"Queries, codes and ids length doesn't match. {len(queries)} != {len(codes)} != {len(ids)}" + ) + milvus_ids = self._convert_ids(ids) + if not self._validate_update_ids( + collection_name=self.qa_collection_name, ids=milvus_ids + ): + return [] + + format_qa = [ + self._format_qa(query, code) for query, code in zip(queries, codes) + ] + vectors = self.emb_function.encode_documents(format_qa) + data = [ + {ID: id, EMBEDDING: vector, DOCUMENT: doc} + for id, vector, doc in zip(milvus_ids, vectors, format_qa) + ] + + self.client.insert( + collection_name=self.qa_collection_name, + data=data, + ) + + # Updates the existing documents in the document collection based on given IDs. + # This replaces the document text and embeddings, and allows optional metadata. + def update_docs( + self, ids: Iterable[str], docs: Iterable[str], metadatas: List[Dict] = None + ) -> List[str]: + if not (len(ids) == len(docs)): + raise ValueError( + f"Queries, codes and ids length doesn't match. {len(id)} != {len(docs)}" + ) + milvus_ids = self._convert_ids(ids) + if not self._validate_update_ids( + collection_name=self.docs_collection_name, ids=milvus_ids + ): + return [] + + vectors = self.emb_function.encode_document(docs) + data = [ + {ID: id, EMBEDDING: vector, DOCUMENT: doc} + for id, vector, doc in zip(milvus_ids, vectors, docs) + ] + + return self.client.insert(collection_name=self.docs_collection_name, data=data) + + # Validates that the given IDs exist in the collection. + # Returns True if all IDs are present, otherwise logs the missing IDs and returns False. + def _validate_update_ids(self, collection_name: str, ids: List[str]) -> bool: + response = self.client.query(collection_name=collection_name, ids=ids) + retrieved_ids = [p["id"] for p in response[0]] + diff = set(ids) - set(retrieved_ids) + if diff: + self._logger.log( + f"Missing IDs: {diff}. Skipping update", level=logging.WARN + ) + return False + return True + + # Deletes the QA and document collections for a given collection name. + def delete_collection(self, collection_name: str) -> Optional[bool]: + self.client.drop_collection(collection_name=f"{collection_name}-qa") + self.client.drop_collection(collection_name=f"{collection_name}-docs") + + # Converts given IDs to UUIDs using a namespace. + # If the ID is already a valid UUID, it returns the ID unchanged. + def _convert_ids(self, ids: Iterable[str]) -> List[str]: + return [ + id + if self._is_valid_uuid(id) + else str(uuid.uuid5(uuid.UUID(UUID_NAMESPACE), id)) + for id in ids + ] + + # Checks if a given ID is a valid UUID. + def _is_valid_uuid(self, id: str): + try: + uuid.UUID(id) + return True + except ValueError: + return False + + # Generates a list of random UUIDs. + def generate_random_uuids(self, n): + return [str(uuid.uuid4()) for _ in range(n)] \ No newline at end of file diff --git a/poetry.lock b/poetry.lock index 1aaf9bb42..ad9eec033 100644 --- a/poetry.lock +++ b/poetry.lock @@ -295,56 +295,6 @@ docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphi tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"] -[[package]] -name = "azure-common" -version = "1.1.28" -description = "Microsoft Azure Client Library for Python (Common)" -optional = true -python-versions = "*" -files = [ - {file = "azure-common-1.1.28.zip", hash = "sha256:4ac0cd3214e36b6a1b6a442686722a5d8cc449603aa833f3f0f40bda836704a3"}, - {file = "azure_common-1.1.28-py2.py3-none-any.whl", hash = "sha256:5c12d3dcf4ec20599ca6b0d3e09e86e146353d443e7fcc050c9a19c1f9df20ad"}, -] - -[[package]] -name = "azure-core" -version = "1.31.0" -description = "Microsoft Azure Core Library for Python" -optional = true -python-versions = ">=3.8" -files = [ - {file = "azure_core-1.31.0-py3-none-any.whl", hash = "sha256:22954de3777e0250029360ef31d80448ef1be13b80a459bff80ba7073379e2cd"}, - {file = "azure_core-1.31.0.tar.gz", hash = "sha256:656a0dd61e1869b1506b7c6a3b31d62f15984b1a573d6326f6aa2f3e4123284b"}, -] - -[package.dependencies] -requests = ">=2.21.0" -six = ">=1.11.0" -typing-extensions = ">=4.6.0" - -[package.extras] -aio = ["aiohttp (>=3.0)"] - -[[package]] -name = "azure-storage-blob" -version = "12.23.0" -description = "Microsoft Azure Blob Storage Client Library for Python" -optional = true -python-versions = ">=3.8" -files = [ - {file = "azure_storage_blob-12.23.0-py3-none-any.whl", hash = "sha256:8ac4b34624ed075eda1e38f0c6dadb601e1b199e27a09aa63edc429bf4a23329"}, - {file = "azure_storage_blob-12.23.0.tar.gz", hash = "sha256:2fadbceda1d99c4a72dfd32e0122d7bca8b5e8d2563f5c624d634aeaff49c9df"}, -] - -[package.dependencies] -azure-core = ">=1.30.0" -cryptography = ">=2.1.4" -isodate = ">=0.6.1" -typing-extensions = ">=4.6.0" - -[package.extras] -aio = ["azure-core[aio] (>=1.30.0)"] - [[package]] name = "backoff" version = "2.2.1" @@ -618,17 +568,6 @@ files = [ {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, ] -[[package]] -name = "chardet" -version = "4.0.0" -description = "Universal encoding detector for Python 2 and 3" -optional = true -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" -files = [ - {file = "chardet-4.0.0-py2.py3-none-any.whl", hash = "sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5"}, - {file = "chardet-4.0.0.tar.gz", hash = "sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa"}, -] - [[package]] name = "charset-normalizer" version = "3.3.2" @@ -1047,45 +986,6 @@ files = [ [package.extras] toml = ["tomli"] -[[package]] -name = "cryptography" -version = "3.4.8" -description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." -optional = true -python-versions = ">=3.6" -files = [ - {file = "cryptography-3.4.8-cp36-abi3-macosx_10_10_x86_64.whl", hash = "sha256:a00cf305f07b26c351d8d4e1af84ad7501eca8a342dedf24a7acb0e7b7406e14"}, - {file = "cryptography-3.4.8-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:f44d141b8c4ea5eb4dbc9b3ad992d45580c1d22bf5e24363f2fbf50c2d7ae8a7"}, - {file = "cryptography-3.4.8-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:0a7dcbcd3f1913f664aca35d47c1331fce738d44ec34b7be8b9d332151b0b01e"}, - {file = "cryptography-3.4.8-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:34dae04a0dce5730d8eb7894eab617d8a70d0c97da76b905de9efb7128ad7085"}, - {file = "cryptography-3.4.8-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1eb7bb0df6f6f583dd8e054689def236255161ebbcf62b226454ab9ec663746b"}, - {file = "cryptography-3.4.8-cp36-abi3-manylinux_2_24_x86_64.whl", hash = "sha256:9965c46c674ba8cc572bc09a03f4c649292ee73e1b683adb1ce81e82e9a6a0fb"}, - {file = "cryptography-3.4.8-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:3c4129fc3fdc0fa8e40861b5ac0c673315b3c902bbdc05fc176764815b43dd1d"}, - {file = "cryptography-3.4.8-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:695104a9223a7239d155d7627ad912953b540929ef97ae0c34c7b8bf30857e89"}, - {file = "cryptography-3.4.8-cp36-abi3-win32.whl", hash = "sha256:21ca464b3a4b8d8e86ba0ee5045e103a1fcfac3b39319727bc0fc58c09c6aff7"}, - {file = "cryptography-3.4.8-cp36-abi3-win_amd64.whl", hash = "sha256:3520667fda779eb788ea00080124875be18f2d8f0848ec00733c0ec3bb8219fc"}, - {file = "cryptography-3.4.8-pp36-pypy36_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:d2a6e5ef66503da51d2110edf6c403dc6b494cc0082f85db12f54e9c5d4c3ec5"}, - {file = "cryptography-3.4.8-pp36-pypy36_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a305600e7a6b7b855cd798e00278161b681ad6e9b7eca94c721d5f588ab212af"}, - {file = "cryptography-3.4.8-pp36-pypy36_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:3fa3a7ccf96e826affdf1a0a9432be74dc73423125c8f96a909e3835a5ef194a"}, - {file = "cryptography-3.4.8-pp37-pypy37_pp73-macosx_10_10_x86_64.whl", hash = "sha256:d9ec0e67a14f9d1d48dd87a2531009a9b251c02ea42851c060b25c782516ff06"}, - {file = "cryptography-3.4.8-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:5b0fbfae7ff7febdb74b574055c7466da334a5371f253732d7e2e7525d570498"}, - {file = "cryptography-3.4.8-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94fff993ee9bc1b2440d3b7243d488c6a3d9724cc2b09cdb297f6a886d040ef7"}, - {file = "cryptography-3.4.8-pp37-pypy37_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:8695456444f277af73a4877db9fc979849cd3ee74c198d04fc0776ebc3db52b9"}, - {file = "cryptography-3.4.8-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:cd65b60cfe004790c795cc35f272e41a3df4631e2fb6b35aa7ac6ef2859d554e"}, - {file = "cryptography-3.4.8.tar.gz", hash = "sha256:94cc5ed4ceaefcbe5bf38c8fba6a21fc1d365bb8fb826ea1688e3370b2e24a1c"}, -] - -[package.dependencies] -cffi = ">=1.12" - -[package.extras] -docs = ["sphinx (>=1.6.5,!=1.8.0,!=3.1.0,!=3.1.1)", "sphinx-rtd-theme"] -docstest = ["doc8", "pyenchant (>=1.6.11)", "sphinxcontrib-spelling (>=4.0.1)", "twine (>=1.12.0)"] -pep8test = ["black", "flake8", "flake8-import-order", "pep8-naming"] -sdist = ["setuptools-rust (>=0.11.4)"] -ssh = ["bcrypt (>=3.1.5)"] -test = ["hypothesis (>=1.11.4,!=3.79.2)", "iso8601", "pretend", "pytest (>=6.0)", "pytest-cov", "pytest-subtests", "pytest-xdist", "pytz"] - [[package]] name = "cryptography" version = "43.0.1" @@ -1366,6 +1266,27 @@ files = [ {file = "durationpy-0.7.tar.gz", hash = "sha256:8447c43df4f1a0b434e70c15a38d77f5c9bd17284bfc1ff1d430f233d5083732"}, ] +[[package]] +name = "environs" +version = "9.5.0" +description = "simplified environment variable parsing" +optional = true +python-versions = ">=3.6" +files = [ + {file = "environs-9.5.0-py2.py3-none-any.whl", hash = "sha256:1e549569a3de49c05f856f40bce86979e7d5ffbbc4398e7f338574c220189124"}, + {file = "environs-9.5.0.tar.gz", hash = "sha256:a76307b36fbe856bdca7ee9161e6c466fd7fcffc297109a118c59b54e27e30c9"}, +] + +[package.dependencies] +marshmallow = ">=3.0.0" +python-dotenv = "*" + +[package.extras] +dev = ["dj-database-url", "dj-email-url", "django-cache-url", "flake8 (==4.0.1)", "flake8-bugbear (==21.9.2)", "mypy (==0.910)", "pre-commit (>=2.4,<3.0)", "pytest", "tox"] +django = ["dj-database-url", "dj-email-url", "django-cache-url"] +lint = ["flake8 (==4.0.1)", "flake8-bugbear (==21.9.2)", "mypy (==0.910)", "pre-commit (>=2.4,<3.0)"] +tests = ["dj-database-url", "dj-email-url", "django-cache-url", "pytest"] + [[package]] name = "et-xmlfile" version = "1.1.0" @@ -2605,21 +2526,6 @@ ibm-cos-sdk-core = "2.13.5" ibm-cos-sdk-s3transfer = "2.13.5" jmespath = ">=0.10.0,<=1.0.1" -[[package]] -name = "ibm-cos-sdk" -version = "2.13.6" -description = "IBM SDK for Python" -optional = true -python-versions = ">=3.8" -files = [ - {file = "ibm-cos-sdk-2.13.6.tar.gz", hash = "sha256:171cf2ae4ab662a4b8ab58dcf4ac994b0577d6c92d78490295fd7704a83978f6"}, -] - -[package.dependencies] -ibm-cos-sdk-core = "2.13.6" -ibm-cos-sdk-s3transfer = "2.13.6" -jmespath = ">=0.10.0,<=1.0.1" - [[package]] name = "ibm-cos-sdk-core" version = "2.13.5" @@ -2634,22 +2540,7 @@ files = [ jmespath = ">=0.10.0,<=1.0.1" python-dateutil = ">=2.9.0,<3.0.0" requests = ">=2.32.3,<3.0" - -[[package]] -name = "ibm-cos-sdk-core" -version = "2.13.6" -description = "Low-level, data-driven core of IBM SDK for Python" -optional = true -python-versions = ">=3.6" -files = [ - {file = "ibm-cos-sdk-core-2.13.6.tar.gz", hash = "sha256:dd41fb789eeb65546501afabcd50e78846ab4513b6ad4042e410b6a14ff88413"}, -] - -[package.dependencies] -jmespath = ">=0.10.0,<=1.0.1" -python-dateutil = ">=2.9.0,<3.0.0" -requests = ">=2.32.0,<2.32.3" -urllib3 = ">=1.26.18,<3" +urllib3 = {version = ">=1.26.18,<2.2", markers = "python_version >= \"3.10\""} [[package]] name = "ibm-cos-sdk-s3transfer" @@ -2664,19 +2555,6 @@ files = [ [package.dependencies] ibm-cos-sdk-core = "2.13.5" -[[package]] -name = "ibm-cos-sdk-s3transfer" -version = "2.13.6" -description = "IBM S3 Transfer Manager" -optional = true -python-versions = ">=3.8" -files = [ - {file = "ibm-cos-sdk-s3transfer-2.13.6.tar.gz", hash = "sha256:e0acce6f380c47d11e07c6765b684b4ababbf5c66cc0503bc246469a1e2b9790"}, -] - -[package.dependencies] -ibm-cos-sdk-core = "2.13.6" - [[package]] name = "ibm-watson-machine-learning" version = "1.0.360" @@ -2804,20 +2682,6 @@ files = [ {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] -[[package]] -name = "isodate" -version = "0.6.1" -description = "An ISO 8601 date/time/duration parser and formatter" -optional = true -python-versions = "*" -files = [ - {file = "isodate-0.6.1-py2.py3-none-any.whl", hash = "sha256:0751eece944162659049d35f4f549ed815792b38793f07cf73381c1c87cbed96"}, - {file = "isodate-0.6.1.tar.gz", hash = "sha256:48c5881de7e8b0a0d648cb024c8062dc84e7b840ed81e864c7614fd3c127bde9"}, -] - -[package.dependencies] -six = "*" - [[package]] name = "itsdangerous" version = "2.2.0" @@ -3913,6 +3777,21 @@ files = [ {file = "mergedeep-1.3.4.tar.gz", hash = "sha256:0096d52e9dad9939c3d975a774666af186eda617e6ca84df4c94dec30004f2a8"}, ] +[[package]] +name = "milvus-lite" +version = "2.4.10" +description = "A lightweight version of Milvus wrapped with Python." +optional = true +python-versions = ">=3.7" +files = [ + {file = "milvus_lite-2.4.10-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:fc4246d3ed7d1910847afce0c9ba18212e93a6e9b8406048436940578dfad5cb"}, + {file = "milvus_lite-2.4.10-py3-none-macosx_11_0_arm64.whl", hash = "sha256:74a8e07c5e3b057df17fbb46913388e84df1dc403a200f4e423799a58184c800"}, + {file = "milvus_lite-2.4.10-py3-none-manylinux2014_x86_64.whl", hash = "sha256:211d2e334a043f9282bdd9755f76b9b2d93b23bffa7af240919ffce6a8dfe325"}, +] + +[package.dependencies] +tqdm = "*" + [[package]] name = "mkdocs" version = "1.5.3" @@ -4982,20 +4861,6 @@ files = [ {file = "orjson-3.10.7.tar.gz", hash = "sha256:75ef0640403f945f3a1f9f6400686560dbfb0fb5b16589ad62cd477043c4eee3"}, ] -[[package]] -name = "oscrypto" -version = "1.3.0" -description = "TLS (SSL) sockets, key generation, encryption, decryption, signing, verification and KDFs using the OS crypto libraries. Does not require a compiler, and relies on the OS for patching. Works on Windows, OS X and Linux/BSD." -optional = true -python-versions = "*" -files = [ - {file = "oscrypto-1.3.0-py2.py3-none-any.whl", hash = "sha256:2b2f1d2d42ec152ca90ccb5682f3e051fb55986e1b170ebde472b133713e7085"}, - {file = "oscrypto-1.3.0.tar.gz", hash = "sha256:6f5fef59cb5b3708321db7cca56aed8ad7e662853351e7991fcf60ec606d47a4"}, -] - -[package.dependencies] -asn1crypto = ">=1.5.1" - [[package]] name = "overrides" version = "7.7.0" @@ -5697,47 +5562,6 @@ files = [ {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"}, ] -[[package]] -name = "pycryptodomex" -version = "3.20.0" -description = "Cryptographic library for Python" -optional = true -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" -files = [ - {file = "pycryptodomex-3.20.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:645bd4ca6f543685d643dadf6a856cc382b654cc923460e3a10a49c1b3832aeb"}, - {file = "pycryptodomex-3.20.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:ff5c9a67f8a4fba4aed887216e32cbc48f2a6fb2673bb10a99e43be463e15913"}, - {file = "pycryptodomex-3.20.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:8ee606964553c1a0bc74057dd8782a37d1c2bc0f01b83193b6f8bb14523b877b"}, - {file = "pycryptodomex-3.20.0-cp27-cp27m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7805830e0c56d88f4d491fa5ac640dfc894c5ec570d1ece6ed1546e9df2e98d6"}, - {file = "pycryptodomex-3.20.0-cp27-cp27m-musllinux_1_1_aarch64.whl", hash = "sha256:bc3ee1b4d97081260d92ae813a83de4d2653206967c4a0a017580f8b9548ddbc"}, - {file = "pycryptodomex-3.20.0-cp27-cp27m-win32.whl", hash = "sha256:8af1a451ff9e123d0d8bd5d5e60f8e3315c3a64f3cdd6bc853e26090e195cdc8"}, - {file = "pycryptodomex-3.20.0-cp27-cp27m-win_amd64.whl", hash = "sha256:cbe71b6712429650e3883dc81286edb94c328ffcd24849accac0a4dbcc76958a"}, - {file = "pycryptodomex-3.20.0-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:76bd15bb65c14900d98835fcd10f59e5e0435077431d3a394b60b15864fddd64"}, - {file = "pycryptodomex-3.20.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:653b29b0819605fe0898829c8ad6400a6ccde096146730c2da54eede9b7b8baa"}, - {file = "pycryptodomex-3.20.0-cp27-cp27mu-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:62a5ec91388984909bb5398ea49ee61b68ecb579123694bffa172c3b0a107079"}, - {file = "pycryptodomex-3.20.0-cp27-cp27mu-musllinux_1_1_aarch64.whl", hash = "sha256:108e5f1c1cd70ffce0b68739c75734437c919d2eaec8e85bffc2c8b4d2794305"}, - {file = "pycryptodomex-3.20.0-cp35-abi3-macosx_10_9_universal2.whl", hash = "sha256:59af01efb011b0e8b686ba7758d59cf4a8263f9ad35911bfe3f416cee4f5c08c"}, - {file = "pycryptodomex-3.20.0-cp35-abi3-macosx_10_9_x86_64.whl", hash = "sha256:82ee7696ed8eb9a82c7037f32ba9b7c59e51dda6f105b39f043b6ef293989cb3"}, - {file = "pycryptodomex-3.20.0-cp35-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:91852d4480a4537d169c29a9d104dda44094c78f1f5b67bca76c29a91042b623"}, - {file = "pycryptodomex-3.20.0-cp35-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bca649483d5ed251d06daf25957f802e44e6bb6df2e8f218ae71968ff8f8edc4"}, - {file = "pycryptodomex-3.20.0-cp35-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6e186342cfcc3aafaad565cbd496060e5a614b441cacc3995ef0091115c1f6c5"}, - {file = "pycryptodomex-3.20.0-cp35-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:25cd61e846aaab76d5791d006497134602a9e451e954833018161befc3b5b9ed"}, - {file = "pycryptodomex-3.20.0-cp35-abi3-musllinux_1_1_i686.whl", hash = "sha256:9c682436c359b5ada67e882fec34689726a09c461efd75b6ea77b2403d5665b7"}, - {file = "pycryptodomex-3.20.0-cp35-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:7a7a8f33a1f1fb762ede6cc9cbab8f2a9ba13b196bfaf7bc6f0b39d2ba315a43"}, - {file = "pycryptodomex-3.20.0-cp35-abi3-win32.whl", hash = "sha256:c39778fd0548d78917b61f03c1fa8bfda6cfcf98c767decf360945fe6f97461e"}, - {file = "pycryptodomex-3.20.0-cp35-abi3-win_amd64.whl", hash = "sha256:2a47bcc478741b71273b917232f521fd5704ab4b25d301669879e7273d3586cc"}, - {file = "pycryptodomex-3.20.0-pp27-pypy_73-manylinux2010_x86_64.whl", hash = "sha256:1be97461c439a6af4fe1cf8bf6ca5936d3db252737d2f379cc6b2e394e12a458"}, - {file = "pycryptodomex-3.20.0-pp27-pypy_73-win32.whl", hash = "sha256:19764605feea0df966445d46533729b645033f134baeb3ea26ad518c9fdf212c"}, - {file = "pycryptodomex-3.20.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:f2e497413560e03421484189a6b65e33fe800d3bd75590e6d78d4dfdb7accf3b"}, - {file = "pycryptodomex-3.20.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e48217c7901edd95f9f097feaa0388da215ed14ce2ece803d3f300b4e694abea"}, - {file = "pycryptodomex-3.20.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d00fe8596e1cc46b44bf3907354e9377aa030ec4cd04afbbf6e899fc1e2a7781"}, - {file = "pycryptodomex-3.20.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:88afd7a3af7ddddd42c2deda43d53d3dfc016c11327d0915f90ca34ebda91499"}, - {file = "pycryptodomex-3.20.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:d3584623e68a5064a04748fb6d76117a21a7cb5eaba20608a41c7d0c61721794"}, - {file = "pycryptodomex-3.20.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0daad007b685db36d977f9de73f61f8da2a7104e20aca3effd30752fd56f73e1"}, - {file = "pycryptodomex-3.20.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5dcac11031a71348faaed1f403a0debd56bf5404232284cf8c761ff918886ebc"}, - {file = "pycryptodomex-3.20.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:69138068268127cd605e03438312d8f271135a33140e2742b417d027a0539427"}, - {file = "pycryptodomex-3.20.0.tar.gz", hash = "sha256:7a710b79baddd65b806402e14766c721aee8fb83381769c27920f26476276c1e"}, -] - [[package]] name = "pydantic" version = "2.9.2" @@ -5978,6 +5802,31 @@ pyyaml = "*" [package.extras] extra = ["pygments (>=2.12)"] +[[package]] +name = "pymilvus" +version = "2.4.7" +description = "Python Sdk for Milvus" +optional = true +python-versions = ">=3.8" +files = [ + {file = "pymilvus-2.4.7-py3-none-any.whl", hash = "sha256:1e5d377bd40fa7eb459d3958dbd96201758f5cf997d41eb3d2d169d0b7fa462e"}, + {file = "pymilvus-2.4.7.tar.gz", hash = "sha256:9ef460b940782a42e1b7b8ae0da03d8cc02d9d80044d13f4b689a7c935ec7aa7"}, +] + +[package.dependencies] +environs = "<=9.5.0" +grpcio = ">=1.49.1" +milvus-lite = {version = ">=2.4.0,<2.5.0", markers = "sys_platform != \"win32\""} +pandas = ">=1.2.4" +protobuf = ">=3.20.0" +setuptools = ">69" +ujson = ">=2.0.0" + +[package.extras] +bulk-writer = ["azure-storage-blob", "minio (>=7.0.0)", "pyarrow (>=12.0.0)", "requests"] +dev = ["black", "grpcio (==1.62.2)", "grpcio-testing (==1.62.2)", "grpcio-tools (==1.62.2)", "pytest (>=5.3.4)", "pytest-cov (>=2.8.1)", "pytest-timeout (>=1.3.4)", "ruff (>0.4.0)"] +model = ["milvus-model (>=0.1.0)"] + [[package]] name = "pymysql" version = "1.1.1" @@ -5993,25 +5842,6 @@ files = [ ed25519 = ["PyNaCl (>=1.4.0)"] rsa = ["cryptography"] -[[package]] -name = "pyopenssl" -version = "20.0.1" -description = "Python wrapper module around the OpenSSL library" -optional = true -python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*" -files = [ - {file = "pyOpenSSL-20.0.1-py2.py3-none-any.whl", hash = "sha256:818ae18e06922c066f777a33f1fca45786d85edfe71cd043de6379337a7f274b"}, - {file = "pyOpenSSL-20.0.1.tar.gz", hash = "sha256:4c231c759543ba02560fcd2480c48dcec4dae34c9da7d3747c508227e0624b51"}, -] - -[package.dependencies] -cryptography = ">=3.2" -six = ">=1.5.2" - -[package.extras] -docs = ["sphinx", "sphinx-rtd-theme"] -test = ["flaky", "pretend", "pytest (>=3.0.1)"] - [[package]] name = "pyopenssl" version = "24.2.1" @@ -6514,27 +6344,6 @@ files = [ {file = "regex-2024.9.11.tar.gz", hash = "sha256:6c188c307e8433bcb63dc1915022deb553b4203a70722fc542c363bf120a01fd"}, ] -[[package]] -name = "requests" -version = "2.32.2" -description = "Python HTTP for Humans." -optional = false -python-versions = ">=3.8" -files = [ - {file = "requests-2.32.2-py3-none-any.whl", hash = "sha256:fc06670dd0ed212426dfeb94fc1b983d917c4f9847c863f313c9dfaaffb7c23c"}, - {file = "requests-2.32.2.tar.gz", hash = "sha256:dd951ff5ecf3e3b3aa26b40703ba77495dab41da839ae72ef3c8e5d8e2433289"}, -] - -[package.dependencies] -certifi = ">=2017.4.17" -charset-normalizer = ">=2,<4" -idna = ">=2.5,<4" -urllib3 = ">=1.21.1,<3" - -[package.extras] -socks = ["PySocks (>=1.5.6,!=1.5.7)"] -use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] - [[package]] name = "requests" version = "2.32.3" @@ -6930,6 +6739,11 @@ files = [ {file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f60021ec1574e56632be2a36b946f8143bf4e5e6af4a06d85281adc22938e0dd"}, {file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:394397841449853c2290a32050382edaec3da89e35b3e03d6cc966aebc6a8ae6"}, {file = "scikit_learn-1.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:57cc1786cfd6bd118220a92ede80270132aa353647684efa385a74244a41e3b1"}, + {file = "scikit_learn-1.5.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e9a702e2de732bbb20d3bad29ebd77fc05a6b427dc49964300340e4c9328b3f5"}, + {file = "scikit_learn-1.5.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:b0768ad641981f5d3a198430a1d31c3e044ed2e8a6f22166b4d546a5116d7908"}, + {file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:178ddd0a5cb0044464fc1bfc4cca5b1833bfc7bb022d70b05db8530da4bb3dd3"}, + {file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7284ade780084d94505632241bf78c44ab3b6f1e8ccab3d2af58e0e950f9c12"}, + {file = "scikit_learn-1.5.2-cp313-cp313-win_amd64.whl", hash = "sha256:b7b0f9a0b1040830d38c39b91b3a44e1b643f4b36e36567b80b7c6bd2202a27f"}, {file = "scikit_learn-1.5.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:757c7d514ddb00ae249832fe87100d9c73c6ea91423802872d9e74970a0e40b9"}, {file = "scikit_learn-1.5.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:52788f48b5d8bca5c0736c175fa6bdaab2ef00a8f536cda698db61bd89c551c1"}, {file = "scikit_learn-1.5.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:643964678f4b5fbdc95cbf8aec638acc7aa70f5f79ee2cdad1eec3df4ba6ead8"}, @@ -7198,55 +7012,6 @@ files = [ {file = "snowballstemmer-2.2.0.tar.gz", hash = "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1"}, ] -[[package]] -name = "snowflake-connector-python" -version = "2.6.1" -description = "Snowflake Connector for Python" -optional = true -python-versions = ">=3.6" -files = [ - {file = "snowflake-connector-python-2.6.1.tar.gz", hash = "sha256:dbe6f7d84debd27b117e17fdb280be27695cf6ae54009c49495584d1b7776d1b"}, - {file = "snowflake_connector_python-2.6.1-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:c5d0a5661c3e7e17195d4dd1d946136c160b533b9afe6038a4c922e1a564debb"}, - {file = "snowflake_connector_python-2.6.1-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:428d2cae8f723130d80e31036f3b20d711f8455937b2b3dace8697cced54b83d"}, - {file = "snowflake_connector_python-2.6.1-cp36-cp36m-manylinux2014_x86_64.whl", hash = "sha256:7d1a4d10967803b875533ac4793a7273677db5901d05978eb397af33ff1f5bf9"}, - {file = "snowflake_connector_python-2.6.1-cp36-cp36m-win_amd64.whl", hash = "sha256:cff31e3a24984683bbe1c351e4151a67dfe57e298c36cac1afde632d43e12f65"}, - {file = "snowflake_connector_python-2.6.1-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:146e52e76ec4de2dc07fa76ceaad51af60592ea5c1c60bd7778b7955502650e4"}, - {file = "snowflake_connector_python-2.6.1-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:04d623a1c1099ee1b4f4dbae2249d600b7cc3409688b44c157a67e6bdd6bf348"}, - {file = "snowflake_connector_python-2.6.1-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:ad1c78acfa496cb567735410f5289b595657991256a6c4ada2af65fcd6676e65"}, - {file = "snowflake_connector_python-2.6.1-cp37-cp37m-win_amd64.whl", hash = "sha256:55fef4b0c25eecb9af315e206e2fd7b9f754ebf9fec99b74d358e60228b508d2"}, - {file = "snowflake_connector_python-2.6.1-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:76b632f8db5b6f305297b73b5a0c06cba72ddeb140e6d94a4eb94e2d1827c4fb"}, - {file = "snowflake_connector_python-2.6.1-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:f5a31cdb14e2f6bcc659bfebd4160d155b3a6620f2af64fde51b62676721a9e7"}, - {file = "snowflake_connector_python-2.6.1-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:103ca8d2be9a5fcab41dd27827361a6490feaa7984bfcb73d620e16062bab21b"}, - {file = "snowflake_connector_python-2.6.1-cp38-cp38-win_amd64.whl", hash = "sha256:9eeebd9e03abb600dee5d25eeba3e6be07ef60dc2afeffb974c95f4655532dea"}, - {file = "snowflake_connector_python-2.6.1-cp39-cp39-macosx_10_14_universal2.whl", hash = "sha256:d9f0a6fc896419d9d81d0c4bc2400902d0a69048eb97e5c0a76fe3cb2e2b0f5c"}, - {file = "snowflake_connector_python-2.6.1-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:6e675b86f563a4ae343fc1eb401fc9158764165a12b14781ed4059a2326fa239"}, - {file = "snowflake_connector_python-2.6.1-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:8bf73ed4fbf164a6ef7e78f4c3460cd30a128499cb1bfa23fa7e215a5fca8dae"}, - {file = "snowflake_connector_python-2.6.1-cp39-cp39-win_amd64.whl", hash = "sha256:e2462afa35fd5df25f68375ff9df607c169f66f89adabaa2ed5500566b287803"}, -] - -[package.dependencies] -asn1crypto = ">0.24.0,<2.0.0" -azure-common = "<2.0.0" -azure-storage-blob = ">=12.0.0,<13.0.0" -boto3 = ">=1.4.4,<2.0.0" -certifi = ">=2017.4.17" -cffi = ">=1.9,<2.0.0" -chardet = ">=3.0.2,<5" -cryptography = ">=2.5.0,<4.0.0" -idna = ">=2.5,<4" -oscrypto = "<2.0.0" -pycryptodomex = ">=3.2,<3.5.0 || >3.5.0,<4.0.0" -pyjwt = "<3.0.0" -pyOpenSSL = ">=16.2.0,<21.0.0" -pytz = "*" -requests = "<3.0.0" -setuptools = ">34.0.0" - -[package.extras] -development = ["Cython", "coverage", "mock", "more-itertools", "numpy (<1.21.0)", "pendulum (!=2.1.1)", "pexpect", "pytest (<6.3.0)", "pytest-cov", "pytest-rerunfailures", "pytest-timeout", "pytest-xdist", "pytz", "pytzdata"] -pandas = ["pandas (>=1.0.0,<1.4.0)", "pyarrow (>=5.0.0,<5.1.0)"] -secure-local-storage = ["keyring (!=16.1.0,<22.0.0)"] - [[package]] name = "snowflake-connector-python" version = "3.12.2" @@ -8185,6 +7950,93 @@ files = [ [package.extras] test = ["coverage", "pytest", "pytest-cov"] +[[package]] +name = "ujson" +version = "5.10.0" +description = "Ultra fast JSON encoder and decoder for Python" +optional = true +python-versions = ">=3.8" +files = [ + {file = "ujson-5.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2601aa9ecdbee1118a1c2065323bda35e2c5a2cf0797ef4522d485f9d3ef65bd"}, + {file = "ujson-5.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:348898dd702fc1c4f1051bc3aacbf894caa0927fe2c53e68679c073375f732cf"}, + {file = "ujson-5.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22cffecf73391e8abd65ef5f4e4dd523162a3399d5e84faa6aebbf9583df86d6"}, + {file = "ujson-5.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26b0e2d2366543c1bb4fbd457446f00b0187a2bddf93148ac2da07a53fe51569"}, + {file = "ujson-5.10.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:caf270c6dba1be7a41125cd1e4fc7ba384bf564650beef0df2dd21a00b7f5770"}, + {file = "ujson-5.10.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a245d59f2ffe750446292b0094244df163c3dc96b3ce152a2c837a44e7cda9d1"}, + {file = "ujson-5.10.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:94a87f6e151c5f483d7d54ceef83b45d3a9cca7a9cb453dbdbb3f5a6f64033f5"}, + {file = "ujson-5.10.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:29b443c4c0a113bcbb792c88bea67b675c7ca3ca80c3474784e08bba01c18d51"}, + {file = "ujson-5.10.0-cp310-cp310-win32.whl", hash = "sha256:c18610b9ccd2874950faf474692deee4223a994251bc0a083c114671b64e6518"}, + {file = "ujson-5.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:924f7318c31874d6bb44d9ee1900167ca32aa9b69389b98ecbde34c1698a250f"}, + {file = "ujson-5.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a5b366812c90e69d0f379a53648be10a5db38f9d4ad212b60af00bd4048d0f00"}, + {file = "ujson-5.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:502bf475781e8167f0f9d0e41cd32879d120a524b22358e7f205294224c71126"}, + {file = "ujson-5.10.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5b91b5d0d9d283e085e821651184a647699430705b15bf274c7896f23fe9c9d8"}, + {file = "ujson-5.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:129e39af3a6d85b9c26d5577169c21d53821d8cf68e079060602e861c6e5da1b"}, + {file = "ujson-5.10.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f77b74475c462cb8b88680471193064d3e715c7c6074b1c8c412cb526466efe9"}, + {file = "ujson-5.10.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7ec0ca8c415e81aa4123501fee7f761abf4b7f386aad348501a26940beb1860f"}, + {file = "ujson-5.10.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ab13a2a9e0b2865a6c6db9271f4b46af1c7476bfd51af1f64585e919b7c07fd4"}, + {file = "ujson-5.10.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:57aaf98b92d72fc70886b5a0e1a1ca52c2320377360341715dd3933a18e827b1"}, + {file = "ujson-5.10.0-cp311-cp311-win32.whl", hash = "sha256:2987713a490ceb27edff77fb184ed09acdc565db700ee852823c3dc3cffe455f"}, + {file = "ujson-5.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:f00ea7e00447918ee0eff2422c4add4c5752b1b60e88fcb3c067d4a21049a720"}, + {file = "ujson-5.10.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:98ba15d8cbc481ce55695beee9f063189dce91a4b08bc1d03e7f0152cd4bbdd5"}, + {file = "ujson-5.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a9d2edbf1556e4f56e50fab7d8ff993dbad7f54bac68eacdd27a8f55f433578e"}, + {file = "ujson-5.10.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6627029ae4f52d0e1a2451768c2c37c0c814ffc04f796eb36244cf16b8e57043"}, + {file = "ujson-5.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8ccb77b3e40b151e20519c6ae6d89bfe3f4c14e8e210d910287f778368bb3d1"}, + {file = "ujson-5.10.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3caf9cd64abfeb11a3b661329085c5e167abbe15256b3b68cb5d914ba7396f3"}, + {file = "ujson-5.10.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6e32abdce572e3a8c3d02c886c704a38a1b015a1fb858004e03d20ca7cecbb21"}, + {file = "ujson-5.10.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a65b6af4d903103ee7b6f4f5b85f1bfd0c90ba4eeac6421aae436c9988aa64a2"}, + {file = "ujson-5.10.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:604a046d966457b6cdcacc5aa2ec5314f0e8c42bae52842c1e6fa02ea4bda42e"}, + {file = "ujson-5.10.0-cp312-cp312-win32.whl", hash = "sha256:6dea1c8b4fc921bf78a8ff00bbd2bfe166345f5536c510671bccececb187c80e"}, + {file = "ujson-5.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:38665e7d8290188b1e0d57d584eb8110951a9591363316dd41cf8686ab1d0abc"}, + {file = "ujson-5.10.0-cp313-cp313-macosx_10_9_x86_64.whl", hash = "sha256:618efd84dc1acbd6bff8eaa736bb6c074bfa8b8a98f55b61c38d4ca2c1f7f287"}, + {file = "ujson-5.10.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:38d5d36b4aedfe81dfe251f76c0467399d575d1395a1755de391e58985ab1c2e"}, + {file = "ujson-5.10.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67079b1f9fb29ed9a2914acf4ef6c02844b3153913eb735d4bf287ee1db6e557"}, + {file = "ujson-5.10.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7d0e0ceeb8fe2468c70ec0c37b439dd554e2aa539a8a56365fd761edb418988"}, + {file = "ujson-5.10.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:59e02cd37bc7c44d587a0ba45347cc815fb7a5fe48de16bf05caa5f7d0d2e816"}, + {file = "ujson-5.10.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2a890b706b64e0065f02577bf6d8ca3b66c11a5e81fb75d757233a38c07a1f20"}, + {file = "ujson-5.10.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:621e34b4632c740ecb491efc7f1fcb4f74b48ddb55e65221995e74e2d00bbff0"}, + {file = "ujson-5.10.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b9500e61fce0cfc86168b248104e954fead61f9be213087153d272e817ec7b4f"}, + {file = "ujson-5.10.0-cp313-cp313-win32.whl", hash = "sha256:4c4fc16f11ac1612f05b6f5781b384716719547e142cfd67b65d035bd85af165"}, + {file = "ujson-5.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:4573fd1695932d4f619928fd09d5d03d917274381649ade4328091ceca175539"}, + {file = "ujson-5.10.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a984a3131da7f07563057db1c3020b1350a3e27a8ec46ccbfbf21e5928a43050"}, + {file = "ujson-5.10.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:73814cd1b9db6fc3270e9d8fe3b19f9f89e78ee9d71e8bd6c9a626aeaeaf16bd"}, + {file = "ujson-5.10.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:61e1591ed9376e5eddda202ec229eddc56c612b61ac6ad07f96b91460bb6c2fb"}, + {file = "ujson-5.10.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2c75269f8205b2690db4572a4a36fe47cd1338e4368bc73a7a0e48789e2e35a"}, + {file = "ujson-5.10.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7223f41e5bf1f919cd8d073e35b229295aa8e0f7b5de07ed1c8fddac63a6bc5d"}, + {file = "ujson-5.10.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:d4dc2fd6b3067c0782e7002ac3b38cf48608ee6366ff176bbd02cf969c9c20fe"}, + {file = "ujson-5.10.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:232cc85f8ee3c454c115455195a205074a56ff42608fd6b942aa4c378ac14dd7"}, + {file = "ujson-5.10.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:cc6139531f13148055d691e442e4bc6601f6dba1e6d521b1585d4788ab0bfad4"}, + {file = "ujson-5.10.0-cp38-cp38-win32.whl", hash = "sha256:e7ce306a42b6b93ca47ac4a3b96683ca554f6d35dd8adc5acfcd55096c8dfcb8"}, + {file = "ujson-5.10.0-cp38-cp38-win_amd64.whl", hash = "sha256:e82d4bb2138ab05e18f089a83b6564fee28048771eb63cdecf4b9b549de8a2cc"}, + {file = "ujson-5.10.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:dfef2814c6b3291c3c5f10065f745a1307d86019dbd7ea50e83504950136ed5b"}, + {file = "ujson-5.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4734ee0745d5928d0ba3a213647f1c4a74a2a28edc6d27b2d6d5bd9fa4319e27"}, + {file = "ujson-5.10.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d47ebb01bd865fdea43da56254a3930a413f0c5590372a1241514abae8aa7c76"}, + {file = "ujson-5.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dee5e97c2496874acbf1d3e37b521dd1f307349ed955e62d1d2f05382bc36dd5"}, + {file = "ujson-5.10.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7490655a2272a2d0b072ef16b0b58ee462f4973a8f6bbe64917ce5e0a256f9c0"}, + {file = "ujson-5.10.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:ba17799fcddaddf5c1f75a4ba3fd6441f6a4f1e9173f8a786b42450851bd74f1"}, + {file = "ujson-5.10.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:2aff2985cef314f21d0fecc56027505804bc78802c0121343874741650a4d3d1"}, + {file = "ujson-5.10.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:ad88ac75c432674d05b61184178635d44901eb749786c8eb08c102330e6e8996"}, + {file = "ujson-5.10.0-cp39-cp39-win32.whl", hash = "sha256:2544912a71da4ff8c4f7ab5606f947d7299971bdd25a45e008e467ca638d13c9"}, + {file = "ujson-5.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:3ff201d62b1b177a46f113bb43ad300b424b7847f9c5d38b1b4ad8f75d4a282a"}, + {file = "ujson-5.10.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:5b6fee72fa77dc172a28f21693f64d93166534c263adb3f96c413ccc85ef6e64"}, + {file = "ujson-5.10.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:61d0af13a9af01d9f26d2331ce49bb5ac1fb9c814964018ac8df605b5422dcb3"}, + {file = "ujson-5.10.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ecb24f0bdd899d368b715c9e6664166cf694d1e57be73f17759573a6986dd95a"}, + {file = "ujson-5.10.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fbd8fd427f57a03cff3ad6574b5e299131585d9727c8c366da4624a9069ed746"}, + {file = "ujson-5.10.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:beeaf1c48e32f07d8820c705ff8e645f8afa690cca1544adba4ebfa067efdc88"}, + {file = "ujson-5.10.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:baed37ea46d756aca2955e99525cc02d9181de67f25515c468856c38d52b5f3b"}, + {file = "ujson-5.10.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:7663960f08cd5a2bb152f5ee3992e1af7690a64c0e26d31ba7b3ff5b2ee66337"}, + {file = "ujson-5.10.0-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:d8640fb4072d36b08e95a3a380ba65779d356b2fee8696afeb7794cf0902d0a1"}, + {file = "ujson-5.10.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78778a3aa7aafb11e7ddca4e29f46bc5139131037ad628cc10936764282d6753"}, + {file = "ujson-5.10.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b0111b27f2d5c820e7f2dbad7d48e3338c824e7ac4d2a12da3dc6061cc39c8e6"}, + {file = "ujson-5.10.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:c66962ca7565605b355a9ed478292da628b8f18c0f2793021ca4425abf8b01e5"}, + {file = "ujson-5.10.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ba43cc34cce49cf2d4bc76401a754a81202d8aa926d0e2b79f0ee258cb15d3a4"}, + {file = "ujson-5.10.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:ac56eb983edce27e7f51d05bc8dd820586c6e6be1c5216a6809b0c668bb312b8"}, + {file = "ujson-5.10.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f44bd4b23a0e723bf8b10628288c2c7c335161d6840013d4d5de20e48551773b"}, + {file = "ujson-5.10.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c10f4654e5326ec14a46bcdeb2b685d4ada6911050aa8baaf3501e57024b804"}, + {file = "ujson-5.10.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0de4971a89a762398006e844ae394bd46991f7c385d7a6a3b93ba229e6dac17e"}, + {file = "ujson-5.10.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:e1402f0564a97d2a52310ae10a64d25bcef94f8dd643fcf5d310219d915484f7"}, + {file = "ujson-5.10.0.tar.gz", hash = "sha256:b3cd8f3c5d8c7738257f1018880444f7b7d9b66232c64649f562d7ba86ad4bc1"}, +] + [[package]] name = "urllib3" version = "1.26.20" @@ -8201,23 +8053,6 @@ brotli = ["brotli (==1.0.9)", "brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotl secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] -[[package]] -name = "urllib3" -version = "2.2.3" -description = "HTTP library with thread-safe connection pooling, file post, and more." -optional = false -python-versions = ">=3.8" -files = [ - {file = "urllib3-2.2.3-py3-none-any.whl", hash = "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac"}, - {file = "urllib3-2.2.3.tar.gz", hash = "sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9"}, -] - -[package.extras] -brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] -h2 = ["h2 (>=4,<5)"] -socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] -zstd = ["zstandard (>=0.18.0)"] - [[package]] name = "uvicorn" version = "0.30.6" @@ -8846,6 +8681,7 @@ google-sheets = ["beautifulsoup4"] ibm-watsonx-ai = ["ibm-watsonx-ai"] lancedb = ["lancedb"] langchain = ["langchain"] +milvus = ["pymilvus"] modin = ["modin"] numpy = ["numpy"] pinecone = ["pinecone-client"] @@ -8863,4 +8699,4 @@ yfinance = ["yfinance"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<3.9.7 || >3.9.7,<4.0" -content-hash = "423d91a022554262b9e537f9ed05006342ca0dfa490fa01bcc54324632089f9b" +content-hash = "d21c51d44da069b6eca3acef9b565b552e37e8a784aad15515787a98b176a190" diff --git a/pyproject.toml b/pyproject.toml index e56e11189..597efd400 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,6 +55,7 @@ pinecone-client = { version = "^4.1.0", optional = true, markers = "python_versi lancedb = {version = "^0.11.0", optional = true} sentence-transformers = {version = "^2.3.0", optional = true} sqlglot = {extras = ["rs"], version = "^25.0.3"} +pymilvus = {version = "^2.4.6", optional = true} [tool.poetry.group.dev] optional = true @@ -97,6 +98,7 @@ flask = ["flask"] qdrant = ["qdrant-client"] pinecone = ["pinecone-client"] ibm-watsonx-ai = ["ibm-watsonx-ai"] +milvus = ["pymilvus"] [tool.poetry.group.docs] optional = true @@ -118,4 +120,4 @@ exclude = ["tests_*"] env = [ "HUGGINGFACE_API_KEY=", "OPENAI_API_KEY=" -] +] \ No newline at end of file diff --git a/tests/unit_tests/vectorstores/test_milvus.py b/tests/unit_tests/vectorstores/test_milvus.py new file mode 100644 index 000000000..da0d5e563 --- /dev/null +++ b/tests/unit_tests/vectorstores/test_milvus.py @@ -0,0 +1,148 @@ +import unittest +from unittest.mock import ANY, MagicMock, patch + +import numpy as np # Assuming `encode_documents` returns `numpy` arrays + +from pandasai.ee.vectorstores.milvus import Milvus + + +class TestMilvus(unittest.TestCase): + @patch("pandasai.ee.vectorstores.milvus.MilvusClient", autospec=True) + def test_add_question_answer(self, mock_client): + milvus = Milvus() + milvus.add_question_answer( + ["What is AGI?", "How does it work?"], + ["print('Hello')", "for i in range(10): print(i)"], + ) + mock_client.return_value.insert.assert_called_once() + + @patch("pandasai.ee.vectorstores.milvus.MilvusClient", autospec=True) + def test_add_question_answer_with_ids(self, mock_client): + milvus = Milvus() + ids = ["test id 1", "test id 2"] + documents = [ + "Q: What is AGI?\n A: print('Hello')", + "Q: How does it work?\n A: for i in range(10): print(i)", + ] + + # Mock the embedding function and ID conversion + mock_ids = milvus._convert_ids(ids) + + milvus.add_question_answer( + ["What is AGI?", "How does it work?"], + ["print('Hello')", "for i in range(10): print(i)"], + ids=ids, + ) + + # Construct the expected data + expected_data = [ + {"id": mock_ids[i], "vector": ANY, "document": documents[i]} + for i in range(len(documents)) + ] + + # Assert insert was called correctly + mock_client.return_value.insert.assert_called_once_with( + collection_name=milvus.qa_collection_name, + data=expected_data, + ) + + @patch("pandasai.ee.vectorstores.milvus.MilvusClient", autospec=True) + def test_add_question_answer_different_dimensions(self, mock_client): + milvus = Milvus() + with self.assertRaises(ValueError): + milvus.add_question_answer( + ["What is AGI?", "How does it work?"], + ["print('Hello')"], + ) + + @patch("pandasai.ee.vectorstores.milvus.MilvusClient", autospec=True) + def test_update_question_answer(self, mock_client): + milvus = Milvus() + ids = ["test id 1", "test id 2"] + milvus.update_question_answer( + ["test id", "test id"], + ["What is AGI?", "How does it work?"], + ["print('Hello')", "for i in range(10): print(i)"], + ) + mock_client.return_value.query.assert_called_once() + + @patch("pandasai.ee.vectorstores.milvus.MilvusClient", autospec=True) + def test_update_question_answer_different_dimensions(self, mock_client): + milvus = Milvus() + with self.assertRaises(ValueError): + milvus.update_question_answer( + ["test id"], + ["What is AGI?", "How does it work?"], + ["print('Hello')"], + ) + + @patch("pandasai.ee.vectorstores.milvus.MilvusClient", autospec=True) + def test_add_docs(self, mock_client): + milvus = Milvus() + milvus.add_docs(["Document 1", "Document 2"]) + mock_client.return_value.insert.assert_called_once() + + @patch("pandasai.ee.vectorstores.milvus.MilvusClient", autospec=True) + def test_add_docs_with_ids(self, mock_client): + milvus = Milvus() + ids = ["test id 1", "test id 2"] + documents = ["Document 1", "Document 2"] + + # Mock the embedding function + milvus.add_docs(documents, ids) + + # Assert insert was called correctly + mock_client.return_value.insert.assert_called_once() + + @patch("pandasai.ee.vectorstores.milvus.MilvusClient", autospec=True) + def test_delete_question_and_answers(self, mock_client): + milvus = Milvus() + ids = ["id1", "id2"] + milvus.delete_question_and_answers(ids) + id_filter = str(milvus._convert_ids(ids)) + mock_client.return_value.delete.assert_called_once_with( + collection_name=milvus.qa_collection_name, + filter=f"id in {id_filter}", + ) + + @patch("pandasai.ee.vectorstores.milvus.MilvusClient", autospec=True) + def test_delete_docs(self, mock_client): + milvus = Milvus() + ids = ["id1", "id2"] + milvus.delete_docs(ids) + id_filter = str(milvus._convert_ids(ids)) + mock_client.return_value.delete.assert_called_once_with( + collection_name=milvus.docs_collection_name, + filter=f"id in {id_filter}", + ) + + @patch("pandasai.ee.vectorstores.milvus.MilvusClient", autospec=True) + def test_get_relevant_question_answers(self, mock_client): + milvus = Milvus() + question = "What is AGI?" + mock_vector = milvus.emb_function.encode_documents(question) + milvus.emb_function.encode_documents = MagicMock(return_value=mock_vector) + + milvus.get_relevant_question_answers(question, k=3) + mock_client.return_value.search.assert_called_once_with( + collection_name=milvus.qa_collection_name, + data=mock_vector, + limit=3, + filter="", + output_fields=["document"], + ) + + @patch("pandasai.ee.vectorstores.milvus.MilvusClient", autospec=True) + def test_get_relevant_docs(self, mock_client): + milvus = Milvus() + question = "What is AGI?" + mock_vector = milvus.emb_function.encode_documents(question) + milvus.emb_function.encode_documents = MagicMock(return_value=mock_vector) + + milvus.get_relevant_docs(question, k=3) + mock_client.return_value.search.assert_called_once_with( + collection_name=milvus.docs_collection_name, + data=mock_vector, + limit=3, + output_fields=["document"], + )