Merge branch 'main' into feature/v1.2

Sinaptik-AI · Sep 7, 2023 · 9955000 · 9955000
2 parents 3a92d33 + 9e6a71a
commit 9955000
Show file tree

Hide file tree

Showing 16 changed files with 453 additions and 70 deletions.
diff --git a/docs/API/llms.md b/docs/API/llms.md
@@ -18,18 +18,22 @@ OpenAI API wrapper extended through BaseOpenAI class.
 options:
 show_root_heading: true
 
-### Starcoder
+### Starcoder (deprecated)
 
 Starcoder wrapper extended through Base HuggingFace Class
 
+- Note: Starcoder is deprecated and will be removed in future versions. Please use another LLM.
+
 ::: pandasai.llm.starcoder
 options:
 show_root_heading: true
 
-### Falcon
+### Falcon (deprecated)
 
 Falcon wrapper extended through Base HuggingFace Class
 
+- Note: Falcon is deprecated and will be removed in future versions. Please use another LLM.
+
 ::: pandasai.llm.falcon
 options:
 show_root_heading: true

diff --git a/docs/LLMs/llms.md b/docs/LLMs/llms.md
@@ -2,6 +2,8 @@
 
 PandasAI supports several large language models (LLMs). LLMs are used to generate code from natural language queries. The generated code is then executed to produce the result.
 
+[![Choose the LLM](https://cdn.loom.com/sessions/thumbnails/5496c9c07ee04f69bfef1bc2359cd591-00001.jpg)](https://www.loom.com/share/5496c9c07ee04f69bfef1bc2359cd591 "Choose the LLM")
+
 You can either choose a LLM by instantiating one and passing it to the `SmartDataFrame` or `SmartDatalake` constructor, or you can specify one in the `pandasai.json` file.
 
 If the model expects one or more parameters, you can pass them to the constructor or specify them in the `pandasai.json` file, in the `llm_options` param, as it follows:
@@ -15,8 +17,6 @@ If the model expects one or more parameters, you can pass them to the constructo
 }
 ```
 
-## OpenAI models
-
 In order to use OpenAI models, you need to have an OpenAI API key. You can get one [here](https://platform.openai.com/account/api-keys).
 
 Once you have an API key, you can use it to instantiate an OpenAI object:

diff --git a/mkdocs.yml b/mkdocs.yml
@@ -37,7 +37,7 @@ nav:
       - Documents Building: building_docs.md
       - License: license.md
 extra:
-  version: "1.1"
+  version: "1.1.2"
 plugins:
   - search
   - mkdocstrings:

diff --git a/pandasai/__init__.py b/pandasai/__init__.py
@@ -98,22 +98,22 @@ class PandasAI:
     """
 
     _dl: SmartDatalake = None
-    _config: Config
+    _config: Union[Config, dict]
 
     def __init__(
-            self,
-            llm=None,
-            conversational=False,
-            verbose=False,
-            enforce_privacy=False,
-            save_charts=False,
-            save_charts_path="",
-            enable_cache=True,
-            middlewares=None,
-            custom_whitelisted_dependencies=None,
-            enable_logging=True,
-            non_default_prompts: Optional[Dict[str, Type[Prompt]]] = None,
-            callback: Optional[BaseCallback] = None,
+        self,
+        llm=None,
+        conversational=False,
+        verbose=False,
+        enforce_privacy=False,
+        save_charts=False,
+        save_charts_path="",
+        enable_cache=True,
+        middlewares=None,
+        custom_whitelisted_dependencies=None,
+        enable_logging=True,
+        non_default_prompts: Optional[Dict[str, Type[Prompt]]] = None,
+        callback: Optional[BaseCallback] = None,
     ):
         """
         __init__ method of the Class PandasAI
@@ -142,8 +142,10 @@ def __init__(
         # noinspection PyArgumentList
         # https://stackoverflow.com/questions/61226587/pycharm-does-not-recognize-logging-basicconfig-handlers-argument
 
-        warnings.warn("`PandasAI` (class) is deprecated since v1.0 and will be removed "
-                      "in a future release. Please use `SmartDataframe` instead.")
+        warnings.warn(
+            "`PandasAI` (class) is deprecated since v1.0 and will be removed "
+            "in a future release. Please use `SmartDataframe` instead."
+        )
 
         self._config = Config(
             conversational=conversational,
@@ -161,12 +163,12 @@ def __init__(
         )
 
     def run(
-            self,
-            data_frame: Union[pd.DataFrame, List[pd.DataFrame]],
-            prompt: str,
-            show_code: bool = False,
-            anonymize_df: bool = True,
-            use_error_correction_framework: bool = True,
+        self,
+        data_frame: Union[pd.DataFrame, List[pd.DataFrame]],
+        prompt: str,
+        show_code: bool = False,
+        anonymize_df: bool = True,
+        use_error_correction_framework: bool = True,
     ) -> Union[str, pd.DataFrame]:
         """
         Run the PandasAI to make Dataframes Conversational.
@@ -198,12 +200,12 @@ def run(
         return self._dl.chat(prompt)
 
     def __call__(
-            self,
-            data_frame: Union[pd.DataFrame, List[pd.DataFrame]],
-            prompt: str,
-            show_code: bool = False,
-            anonymize_df: bool = True,
-            use_error_correction_framework: bool = True,
+        self,
+        data_frame: Union[pd.DataFrame, List[pd.DataFrame]],
+        prompt: str,
+        show_code: bool = False,
+        anonymize_df: bool = True,
+        use_error_correction_framework: bool = True,
     ) -> Union[str, pd.DataFrame]:
         """
         __call__ method of PandasAI class. It calls the `run` method.

diff --git a/pandasai/config.py b/pandasai/config.py
@@ -1,10 +1,15 @@
 import json
+import logging
+from typing import Optional, Union
+
 from . import llm, middlewares, callbacks
 from .helpers.path import find_closest
 from .schemas.df_config import Config
 
+logger = logging.getLogger(__name__)
+
 
-def load_config(override_config: Config = None):
+def load_config(override_config: Optional[Union[Config, dict]] = None):
     config = {}
 
     if override_config is None:
@@ -27,11 +32,9 @@ def load_config(override_config: Config = None):
             if config.get("callback") and not override_config.get("callback"):
                 config["callback"] = getattr(callbacks, config["callback"])()
     except Exception:
-        pass
+        logger.error("Could not load configuration", exc_info=True)
 
     if override_config:
         config.update(override_config)
 
-    config = Config(**config)
-
     return config
diff --git a/pandasai/helpers/code_manager.py b/pandasai/helpers/code_manager.py
@@ -25,7 +25,7 @@
 class CodeManager:
     _dfs: List
     _middlewares: List[Middleware] = [ChartsMiddleware()]
-    _config: Config
+    _config: Union[Config, dict]
     _logger: Logger = None
     _additional_dependencies: List[dict] = []
     _ast_comparatos_map: dict = {
@@ -46,12 +46,12 @@ class CodeManager:
     def __init__(
         self,
         dfs: List,
-        config: Config,
+        config: Union[Config, dict],
         logger: Logger,
     ):
         """
         Args:
-            config (Config, optional): Config to be used. Defaults to None.
+            config (Union[Config, dict], optional): Config to be used. Defaults to None.
             logger (Logger, optional): Logger to be used. Defaults to None.
         """
 

diff --git a/pandasai/helpers/df_info.py b/pandasai/helpers/df_info.py
@@ -21,6 +21,10 @@ def df_type(df: DataFrameType) -> str:
     Returns:
         str: Type of the dataframe
     """
+    print("*" * 100)
+    print(df)
+    print("*" * 100)
+
     if polars_imported and isinstance(df, pl.DataFrame):
         return "polars"
     elif isinstance(df, pd.DataFrame):

diff --git a/pandasai/helpers/df_validator.py b/pandasai/helpers/df_validator.py
@@ -0,0 +1,126 @@
+from typing import List, Dict
+from pydantic import ValidationError
+from pydantic import BaseModel
+from pandasai.helpers.df_info import DataFrameType, df_type
+
+
+class DfValidationResult:
+    """
+    Validation results for a dataframe.
+
+    Attributes:
+        passed: Whether the validation passed or not.
+        errors: List of errors if the validation failed.
+    """
+
+    _passed: bool
+    _errors: List[Dict]
+
+    def __init__(self, passed: bool = True, errors: List[Dict] = None):
+        """
+        Args:
+            passed: Whether the validation passed or not.
+            errors: List of errors if the validation failed.
+        """
+        if errors is None:
+            errors = []
+        self._passed = passed
+        self._errors = errors
+
+    @property
+    def passed(self):
+        return self._passed
+
+    def errors(self) -> List[Dict]:
+        return self._errors
+
+    def add_error(self, error_message: str):
+        """
+        Add an error message to the validation results.
+
+        Args:
+            error_message: Error message to add.
+        """
+        self._passed = False
+        self._errors.append(error_message)
+
+    def __bool__(self) -> bool:
+        """
+        Define the truthiness of ValidationResults.
+        """
+        return self.passed
+
+
+class DfValidator:
+    """
+    Validate a dataframe using a Pydantic schema.
+
+    Attributes:
+        df: dataframe to be validated
+    """
+
+    _df: DataFrameType
+
+    def __init__(self, df: DataFrameType):
+        """
+        Args:
+            df: dataframe to be validated
+        """
+        self._df = df
+
+    def _validate_batch(self, schema, df_json: List[Dict]):
+        """
+        Args:
+            schema: Pydantic schema
+            batch_df: dataframe batch
+
+        Returns:
+            list of errors
+        """
+        try:
+            # Create a Pydantic Validator to validate rows of dataframe
+            class PdVal(BaseModel):
+                df: List[schema]
+
+            PdVal(df=df_json)
+            return []
+
+        except ValidationError as e:
+            return e.errors()
+
+    def _df_to_list_of_dict(self, df: DataFrameType, dataframe_type: str) -> List[Dict]:
+        """
+        Create list of dict of dataframe rows on basis of dataframe type
+        Supports only polars and pandas dataframe
+
+        Args:
+            df: dataframe to be converted
+            dataframe_type: type of dataframe
+
+        Returns:
+            list of dict of dataframe rows
+        """
+        if dataframe_type == "pandas":
+            return df.to_dict(orient="records")
+        elif dataframe_type == "polars":
+            return df.to_dicts()
+        else:
+            return []
+
+    def validate(self, schema: BaseModel) -> DfValidationResult:
+        """
+        Args:
+            schema: Pydantic schema to be validated for the dataframe row
+
+        Returns:
+            Validation results
+        """
+        dataframe_type = df_type(self._df)
+        if dataframe_type is None:
+            raise ValueError("Unsupported DataFrame")
+
+        df_json: List[Dict] = self._df_to_list_of_dict(self._df, dataframe_type)
+
+        errors = self._validate_batch(schema, df_json)
+
+        return DfValidationResult(len(errors) == 0, errors)
diff --git a/pandasai/llm/falcon.py b/pandasai/llm/falcon.py
@@ -8,7 +8,7 @@
 
     >>> from pandasai.llm.falcon import Falcon
 """
-
+import warnings
 
 from ..helpers import load_dotenv
 from .base import HuggingFaceLLM
@@ -17,19 +17,23 @@
 
 
 class Falcon(HuggingFaceLLM):
-
-    """Falcon LLM API
-
-    A base HuggingFaceLLM class is extended to use Falcon model.
-
-    """
+    """Falcon LLM API (Deprecated: Kept for backwards compatibility)"""
 
     api_token: str
     _api_url: str = (
         "https://api-inference.huggingface.co/models/tiiuae/falcon-7b-instruct"
     )
     _max_retries: int = 30
 
+    def __init__(self, **kwargs):
+        warnings.warn(
+            """Falcon is deprecated and will be removed in a future release.
+            Please use langchain.llms.HuggingFaceHub instead, although please be 
+            aware that it may perform poorly.
+            """
+        )
+        super().__init__(**kwargs)
+
     @property
     def type(self) -> str:
         return "falcon"