Skip to content

Commit

Permalink
Merge branch 'main' into feature/v1.2
Browse files Browse the repository at this point in the history
  • Loading branch information
gventuri committed Sep 7, 2023
2 parents 3a92d33 + 9e6a71a commit 9955000
Show file tree
Hide file tree
Showing 16 changed files with 453 additions and 70 deletions.
8 changes: 6 additions & 2 deletions docs/API/llms.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,22 @@ OpenAI API wrapper extended through BaseOpenAI class.
options:
show_root_heading: true

### Starcoder
### Starcoder (deprecated)

Starcoder wrapper extended through Base HuggingFace Class

- Note: Starcoder is deprecated and will be removed in future versions. Please use another LLM.

::: pandasai.llm.starcoder
options:
show_root_heading: true

### Falcon
### Falcon (deprecated)

Falcon wrapper extended through Base HuggingFace Class

- Note: Falcon is deprecated and will be removed in future versions. Please use another LLM.

::: pandasai.llm.falcon
options:
show_root_heading: true
Expand Down
4 changes: 2 additions & 2 deletions docs/LLMs/llms.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

PandasAI supports several large language models (LLMs). LLMs are used to generate code from natural language queries. The generated code is then executed to produce the result.

[![Choose the LLM](https://cdn.loom.com/sessions/thumbnails/5496c9c07ee04f69bfef1bc2359cd591-00001.jpg)](https://www.loom.com/share/5496c9c07ee04f69bfef1bc2359cd591 "Choose the LLM")

You can either choose a LLM by instantiating one and passing it to the `SmartDataFrame` or `SmartDatalake` constructor, or you can specify one in the `pandasai.json` file.

If the model expects one or more parameters, you can pass them to the constructor or specify them in the `pandasai.json` file, in the `llm_options` param, as it follows:
Expand All @@ -15,8 +17,6 @@ If the model expects one or more parameters, you can pass them to the constructo
}
```

## OpenAI models

In order to use OpenAI models, you need to have an OpenAI API key. You can get one [here](https://platform.openai.com/account/api-keys).

Once you have an API key, you can use it to instantiate an OpenAI object:
Expand Down
2 changes: 1 addition & 1 deletion mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ nav:
- Documents Building: building_docs.md
- License: license.md
extra:
version: "1.1"
version: "1.1.2"
plugins:
- search
- mkdocstrings:
Expand Down
58 changes: 30 additions & 28 deletions pandasai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,22 +98,22 @@ class PandasAI:
"""

_dl: SmartDatalake = None
_config: Config
_config: Union[Config, dict]

def __init__(
self,
llm=None,
conversational=False,
verbose=False,
enforce_privacy=False,
save_charts=False,
save_charts_path="",
enable_cache=True,
middlewares=None,
custom_whitelisted_dependencies=None,
enable_logging=True,
non_default_prompts: Optional[Dict[str, Type[Prompt]]] = None,
callback: Optional[BaseCallback] = None,
self,
llm=None,
conversational=False,
verbose=False,
enforce_privacy=False,
save_charts=False,
save_charts_path="",
enable_cache=True,
middlewares=None,
custom_whitelisted_dependencies=None,
enable_logging=True,
non_default_prompts: Optional[Dict[str, Type[Prompt]]] = None,
callback: Optional[BaseCallback] = None,
):
"""
__init__ method of the Class PandasAI
Expand Down Expand Up @@ -142,8 +142,10 @@ def __init__(
# noinspection PyArgumentList
# https://stackoverflow.com/questions/61226587/pycharm-does-not-recognize-logging-basicconfig-handlers-argument

warnings.warn("`PandasAI` (class) is deprecated since v1.0 and will be removed "
"in a future release. Please use `SmartDataframe` instead.")
warnings.warn(
"`PandasAI` (class) is deprecated since v1.0 and will be removed "
"in a future release. Please use `SmartDataframe` instead."
)

self._config = Config(
conversational=conversational,
Expand All @@ -161,12 +163,12 @@ def __init__(
)

def run(
self,
data_frame: Union[pd.DataFrame, List[pd.DataFrame]],
prompt: str,
show_code: bool = False,
anonymize_df: bool = True,
use_error_correction_framework: bool = True,
self,
data_frame: Union[pd.DataFrame, List[pd.DataFrame]],
prompt: str,
show_code: bool = False,
anonymize_df: bool = True,
use_error_correction_framework: bool = True,
) -> Union[str, pd.DataFrame]:
"""
Run the PandasAI to make Dataframes Conversational.
Expand Down Expand Up @@ -198,12 +200,12 @@ def run(
return self._dl.chat(prompt)

def __call__(
self,
data_frame: Union[pd.DataFrame, List[pd.DataFrame]],
prompt: str,
show_code: bool = False,
anonymize_df: bool = True,
use_error_correction_framework: bool = True,
self,
data_frame: Union[pd.DataFrame, List[pd.DataFrame]],
prompt: str,
show_code: bool = False,
anonymize_df: bool = True,
use_error_correction_framework: bool = True,
) -> Union[str, pd.DataFrame]:
"""
__call__ method of PandasAI class. It calls the `run` method.
Expand Down
11 changes: 7 additions & 4 deletions pandasai/config.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
import json
import logging
from typing import Optional, Union

from . import llm, middlewares, callbacks
from .helpers.path import find_closest
from .schemas.df_config import Config

logger = logging.getLogger(__name__)


def load_config(override_config: Config = None):
def load_config(override_config: Optional[Union[Config, dict]] = None):
config = {}

if override_config is None:
Expand All @@ -27,11 +32,9 @@ def load_config(override_config: Config = None):
if config.get("callback") and not override_config.get("callback"):
config["callback"] = getattr(callbacks, config["callback"])()
except Exception:
pass
logger.error("Could not load configuration", exc_info=True)

if override_config:
config.update(override_config)

config = Config(**config)

return config
6 changes: 3 additions & 3 deletions pandasai/helpers/code_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
class CodeManager:
_dfs: List
_middlewares: List[Middleware] = [ChartsMiddleware()]
_config: Config
_config: Union[Config, dict]
_logger: Logger = None
_additional_dependencies: List[dict] = []
_ast_comparatos_map: dict = {
Expand All @@ -46,12 +46,12 @@ class CodeManager:
def __init__(
self,
dfs: List,
config: Config,
config: Union[Config, dict],
logger: Logger,
):
"""
Args:
config (Config, optional): Config to be used. Defaults to None.
config (Union[Config, dict], optional): Config to be used. Defaults to None.
logger (Logger, optional): Logger to be used. Defaults to None.
"""

Expand Down
4 changes: 4 additions & 0 deletions pandasai/helpers/df_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ def df_type(df: DataFrameType) -> str:
Returns:
str: Type of the dataframe
"""
print("*" * 100)
print(df)
print("*" * 100)

if polars_imported and isinstance(df, pl.DataFrame):
return "polars"
elif isinstance(df, pd.DataFrame):
Expand Down
126 changes: 126 additions & 0 deletions pandasai/helpers/df_validator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
from typing import List, Dict
from pydantic import ValidationError
from pydantic import BaseModel
from pandasai.helpers.df_info import DataFrameType, df_type


class DfValidationResult:
"""
Validation results for a dataframe.
Attributes:
passed: Whether the validation passed or not.
errors: List of errors if the validation failed.
"""

_passed: bool
_errors: List[Dict]

def __init__(self, passed: bool = True, errors: List[Dict] = None):
"""
Args:
passed: Whether the validation passed or not.
errors: List of errors if the validation failed.
"""
if errors is None:
errors = []
self._passed = passed
self._errors = errors

@property
def passed(self):
return self._passed

def errors(self) -> List[Dict]:
return self._errors

def add_error(self, error_message: str):
"""
Add an error message to the validation results.
Args:
error_message: Error message to add.
"""
self._passed = False
self._errors.append(error_message)

def __bool__(self) -> bool:
"""
Define the truthiness of ValidationResults.
"""
return self.passed


class DfValidator:
"""
Validate a dataframe using a Pydantic schema.
Attributes:
df: dataframe to be validated
"""

_df: DataFrameType

def __init__(self, df: DataFrameType):
"""
Args:
df: dataframe to be validated
"""
self._df = df

def _validate_batch(self, schema, df_json: List[Dict]):
"""
Args:
schema: Pydantic schema
batch_df: dataframe batch
Returns:
list of errors
"""
try:
# Create a Pydantic Validator to validate rows of dataframe
class PdVal(BaseModel):
df: List[schema]

PdVal(df=df_json)
return []

except ValidationError as e:
return e.errors()

def _df_to_list_of_dict(self, df: DataFrameType, dataframe_type: str) -> List[Dict]:
"""
Create list of dict of dataframe rows on basis of dataframe type
Supports only polars and pandas dataframe
Args:
df: dataframe to be converted
dataframe_type: type of dataframe
Returns:
list of dict of dataframe rows
"""
if dataframe_type == "pandas":
return df.to_dict(orient="records")
elif dataframe_type == "polars":
return df.to_dicts()
else:
return []

def validate(self, schema: BaseModel) -> DfValidationResult:
"""
Args:
schema: Pydantic schema to be validated for the dataframe row
Returns:
Validation results
"""
dataframe_type = df_type(self._df)
if dataframe_type is None:
raise ValueError("Unsupported DataFrame")

df_json: List[Dict] = self._df_to_list_of_dict(self._df, dataframe_type)

errors = self._validate_batch(schema, df_json)

return DfValidationResult(len(errors) == 0, errors)
18 changes: 11 additions & 7 deletions pandasai/llm/falcon.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
>>> from pandasai.llm.falcon import Falcon
"""

import warnings

from ..helpers import load_dotenv
from .base import HuggingFaceLLM
Expand All @@ -17,19 +17,23 @@


class Falcon(HuggingFaceLLM):

"""Falcon LLM API
A base HuggingFaceLLM class is extended to use Falcon model.
"""
"""Falcon LLM API (Deprecated: Kept for backwards compatibility)"""

api_token: str
_api_url: str = (
"https://api-inference.huggingface.co/models/tiiuae/falcon-7b-instruct"
)
_max_retries: int = 30

def __init__(self, **kwargs):
warnings.warn(
"""Falcon is deprecated and will be removed in a future release.
Please use langchain.llms.HuggingFaceHub instead, although please be
aware that it may perform poorly.
"""
)
super().__init__(**kwargs)

@property
def type(self) -> str:
return "falcon"
Loading

0 comments on commit 9955000

Please sign in to comment.