From ebf999520c40b93d377ffe35ddd10a68d649cc1e Mon Sep 17 00:00:00 2001 From: Tanmaypatil123 Date: Tue, 24 Oct 2023 12:39:18 +0530 Subject: [PATCH] Modification in pipeline --- pandasai/pipelines/__init__.py | 3 ++ pandasai/pipelines/base.py | 59 ++++++++++++++++++++++++++++------ 2 files changed, 52 insertions(+), 10 deletions(-) diff --git a/pandasai/pipelines/__init__.py b/pandasai/pipelines/__init__.py index e69de29bb..43464c0d8 100644 --- a/pandasai/pipelines/__init__.py +++ b/pandasai/pipelines/__init__.py @@ -0,0 +1,3 @@ +from .base import Pipeline, BaseLogic, PromptRequestLogic + +__all__ = ["Pipeline", "BaseLogic", "PromptRequestLogic"] diff --git a/pandasai/pipelines/base.py b/pandasai/pipelines/base.py index d56030b59..b7261d808 100644 --- a/pandasai/pipelines/base.py +++ b/pandasai/pipelines/base.py @@ -1,9 +1,12 @@ from ..schemas.df_config import Config -from typing import Union, Optional +from typing import Union, Optional, List from pandasai.responses.context import Context from ..helpers.logger import Logger from pandasai.responses.response_parser import ResponseParser from abc import ABC, abstractmethod +import pandas as pd +import numpy as np +from pandasai import SmartDataframe class BaseLogic(ABC): @@ -11,15 +14,18 @@ class BaseLogic(ABC): Logic units for pipeline. """ + _config: Config = None + _logger: Logger = None + def __init__(self): pass @abstractmethod - def call(self, input_): + def call(self, config: Config, logger: Logger, input_): """ This method will return output according to Implementation.""" - pass + raise NotImplementedError("call method is not implemented.") class PromptRequestLogic(BaseLogic): @@ -27,15 +33,40 @@ class PromptRequestLogic(BaseLogic): Logic units for pipeline. """ - def __init__(self): - pass + def __init__( + self, + ): + super().__init__() - def call(self,input_): + def call(self, config: Config, logger: Logger, input_): """ This method will return output according to implementation. """ - pass + head = input_ + new_data = [] + column_data_types = head.dtypes + + ## generate 100 synthetic examples + for _ in range(100): + new_row = {} + for column_name, data_type in column_data_types.iteritems(): + if np.issubdtype(data_type, np.number): + # Generate random numbers for numerical columns + mean = head[column_name].mean() + std_dev = head[column_name].std() + new_value = np.random.normal(mean, std_dev) + else: + # Generate random values for non-numerical columns + unique_values = head[column_name].unique() + new_value = np.random.choice(unique_values) + new_row[column_name] = new_value + + new_data.append(new_row) + + ## convert it into smartdataframe + df = pd.DataFrame(new_data) + return SmartDataframe(df, config=Config) class Pipeline: @@ -45,13 +76,14 @@ class Pipeline: _config: Config = None _logger: Logger - _logics: list(BaseLogic) + _logics: List[BaseLogic] def __init__( self, config: Union[Config, dict] = None, context: Optional[Context] = None, - logics: Optional(list(PromptRequestLogic)) = None, + logics: Optional[List[BaseLogic]] = None, + df: pd.DataFrame = None, ): """ Intialize the pipeline with given context and configuration @@ -78,10 +110,17 @@ def __init__( else: self._response_parser = ResponseParser(context) + self._logics = logics + self._df = df + @abstractmethod def execute(self): """ This functions is responsible to loop through logic and Implementation. """ - pass + result = self._df + for logic in self._logics: + result = logic.call(self._config, self._logger, result) + + return result