From fc0148e6244192a20f417b0be644f4b876b63831 Mon Sep 17 00:00:00 2001 From: ArslanSaleem Date: Mon, 9 Dec 2024 18:05:53 +0100 Subject: [PATCH 1/2] refactor(BambooVectorStore): remove bamboo vectorstore --- pandasai/agent/base.py | 12 --- pandasai/vectorstores/__init__.py | 3 +- pandasai/vectorstores/bamboo_vectorstore.py | 84 ------------------ .../vectorstores/test_bamboo_vector_store.py | 88 ------------------- 4 files changed, 1 insertion(+), 186 deletions(-) delete mode 100644 pandasai/vectorstores/bamboo_vectorstore.py delete mode 100644 tests/unit_tests/vectorstores/test_bamboo_vector_store.py diff --git a/pandasai/agent/base.py b/pandasai/agent/base.py index d3f86d0f4..76cc53a73 100644 --- a/pandasai/agent/base.py +++ b/pandasai/agent/base.py @@ -1,4 +1,3 @@ -import os import traceback import uuid from typing import Any, List, Optional, Tuple, Union @@ -87,17 +86,6 @@ def __init__( save_logs=self._state.config.save_logs, verbose=self._state.config.verbose ) - # Initiate VectorStore - if vectorstore is None and os.environ.get("PANDASAI_API_KEY"): - try: - from pandasai.vectorstores.bamboo_vectorstore import BambooVectorStore - except ImportError as e: - raise ImportError( - "Could not import BambooVectorStore. Please install the required dependencies." - ) from e - - self._state.vectorstore = BambooVectorStore(logger=self._state.logger) - # Initialize Cache self._state.cache = Cache() if self._state.config.enable_cache else None diff --git a/pandasai/vectorstores/__init__.py b/pandasai/vectorstores/__init__.py index 7b9680faa..9c4490921 100644 --- a/pandasai/vectorstores/__init__.py +++ b/pandasai/vectorstores/__init__.py @@ -2,7 +2,6 @@ Vector stores to store data for training purpose """ -from .bamboo_vectorstore import BambooVectorStore from .vectorstore import VectorStore -__all__ = ["VectorStore", "BambooVectorStore"] +__all__ = ["VectorStore"] diff --git a/pandasai/vectorstores/bamboo_vectorstore.py b/pandasai/vectorstores/bamboo_vectorstore.py deleted file mode 100644 index 6f3cc01a9..000000000 --- a/pandasai/vectorstores/bamboo_vectorstore.py +++ /dev/null @@ -1,84 +0,0 @@ -import logging -from typing import Iterable, List, Optional, Union - -from pandasai.helpers.logger import Logger -from pandasai.helpers.request import Session -from pandasai.vectorstores.vectorstore import VectorStore - - -class BambooVectorStore(VectorStore): - """ - Implementation of ChromeDB vector store - """ - - _logger: Logger - - def __init__( - self, - endpoint_url: Optional[str] = None, - api_key: Optional[str] = None, - logger: Optional[Logger] = None, - max_samples: int = 1, - ) -> None: - self._max_samples = max_samples - self._logger = logger or Logger() - self._session = Session(endpoint_url, api_key, logger) - - def add_question_answer(self, queries: Iterable[str], codes: Iterable[str]) -> bool: - """ - Add question and answer(code) to the training set - Args: - queries: string of question - codes: str - """ - self._session.post("/training-data", json={"query": queries, "code": codes}) - return True - - def add_docs(self, docs: Iterable[str]) -> bool: - """ - Add docs to the training set - Args: - docs: Iterable of strings to add to the vectorstore. - ids: Optional Iterable of ids associated with the texts. - metadatas: Optional list of metadatas associated with the texts. - kwargs: vectorstore specific parameters - - Returns: - List of ids from adding the texts into the vectorstore. - """ - self._session.post("/training-docs", json={"docs": docs}) - return True - - def get_relevant_qa_documents(self, question: str, k: int = None) -> List[dict]: - """ - Returns relevant question answers based on search - """ - k = k or self._max_samples - - try: - docs = self._session.get( - "/training-data/qa/relevant-qa", params={"query": question, "count": k} - )["data"] - return docs["docs"] - except Exception: - self._logger.log("Querying without using training data.", logging.ERROR) - return [] - - def get_relevant_docs_documents( - self, question: str, k: Union[int, None] = 3 - ) -> List[str]: - """ - Returns relevant question answers documents only - Args: - question (_type_): list of documents - """ - k = k or self._max_samples - try: - docs = self._session.get( - "/training-docs/docs/relevant-docs", - params={"query": question, "count": k}, - )["data"] - return docs["docs"] - except Exception: - self._logger.log("Querying without using training docs.", logging.ERROR) - return [] diff --git a/tests/unit_tests/vectorstores/test_bamboo_vector_store.py b/tests/unit_tests/vectorstores/test_bamboo_vector_store.py deleted file mode 100644 index 03bf08dd9..000000000 --- a/tests/unit_tests/vectorstores/test_bamboo_vector_store.py +++ /dev/null @@ -1,88 +0,0 @@ -import unittest -from unittest.mock import patch - -from pandasai.vectorstores.bamboo_vectorstore import BambooVectorStore - - -class TestBambooVector(unittest.TestCase): - @patch("pandasai.helpers.request.Session.make_request", autospec=True) - def test_add_question_answer(self, mock_request): - bvs = BambooVectorStore(api_key="dummy_key") - bvs.add_question_answer( - ["What is Chroma?", "How does it work?"], - ["print('Hello')", "for i in range(10): print(i)"], - ) - call_args = mock_request.call_args_list[0][0] - mock_request.assert_called_once() - assert call_args[1] == "POST" - assert call_args[2] == "/training-data" - assert mock_request.call_args_list[0][1] == { - "json": { - "code": ["print('Hello')", "for i in range(10): print(i)"], - "query": ["What is Chroma?", "How does it work?"], - } - } - - @patch("pandasai.helpers.request.Session.make_request", autospec=True) - def test_add_docs(self, mock_request): - bvs = BambooVectorStore(api_key="dummy_key") - bvs.add_docs(["What is Chroma?"]) - call_args = mock_request.call_args_list[0][0] - mock_request.assert_called_once() - assert call_args[1] == "POST" - assert call_args[2] == "/training-docs" - assert mock_request.call_args_list[0][1] == { - "json": {"docs": ["What is Chroma?"]} - } - - @patch("pandasai.helpers.request.Session.make_request", autospec=True) - def test_get_qa(self, mock_request): - bvs = BambooVectorStore(api_key="dummy_key") - bvs.get_relevant_qa_documents("Chroma") - mock_request.assert_called_once() - - @patch("pandasai.helpers.request.Session.make_request", autospec=True) - def test_get_qa_args(self, mock_request): - bvs = BambooVectorStore(api_key="dummy_key") - bvs.get_relevant_qa_documents("Chroma") - call_args = mock_request.call_args_list[0][0] - mock_request.assert_called_once() - assert call_args[1] == "GET" - assert call_args[2] == "/training-data/qa/relevant-qa" - assert mock_request.call_args_list[0][1] == { - "params": {"count": 1, "query": "Chroma"} - } - - @patch("pandasai.helpers.request.Session.make_request", autospec=True) - def test_get_docs(self, mock_request): - bvs = BambooVectorStore(api_key="dummy_key") - bvs.get_relevant_docs_documents("Chroma") - mock_request.assert_called_once() - - @patch("pandasai.helpers.request.Session.make_request", autospec=True) - def test_get_docs_args(self, mock_request): - bvs = BambooVectorStore(api_key="dummy_key") - bvs.get_relevant_docs_documents("Chroma") - call_args = mock_request.call_args_list[0][0] - mock_request.assert_called_once() - assert call_args[1] == "GET" - assert call_args[2] == "/training-docs/docs/relevant-docs" - assert mock_request.call_args_list[0][1] == { - "params": {"count": 3, "query": "Chroma"} - } - - @patch("pandasai.helpers.request.Session.make_request", autospec=True) - def test_get_qa_raise_exception(self, mock_request): - mock_request.side_effect = Exception("Some error occurred") - bvs = BambooVectorStore(api_key="dummy_key") - docs = bvs.get_relevant_qa_documents("Chroma") - mock_request.assert_called_once() - assert len(docs) == 0 - - @patch("pandasai.helpers.request.Session.make_request", autospec=True) - def test_get_docs_raise_exception(self, mock_request): - mock_request.side_effect = Exception("Some error occurred") - bvs = BambooVectorStore(api_key="dummy_key") - docs = bvs.get_relevant_docs_documents("Chroma") - mock_request.assert_called_once() - assert len(docs) == 0 From ccbf3c04d7c06fc6390cd0e18ad07d1e250663a4 Mon Sep 17 00:00:00 2001 From: ArslanSaleem Date: Mon, 9 Dec 2024 18:15:44 +0100 Subject: [PATCH 2/2] fix: vectorstore from constructor --- pandasai/agent/base.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandasai/agent/base.py b/pandasai/agent/base.py index 76cc53a73..0d5206df0 100644 --- a/pandasai/agent/base.py +++ b/pandasai/agent/base.py @@ -86,6 +86,9 @@ def __init__( save_logs=self._state.config.save_logs, verbose=self._state.config.verbose ) + # Initiate VectorStore + self._state.vectorstore = vectorstore + # Initialize Cache self._state.cache = Cache() if self._state.config.enable_cache else None