Skip to content

Commit

Permalink
Modification in pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
Tanmaypatil123 committed Oct 24, 2023
1 parent 380235f commit ebf9995
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 10 deletions.
3 changes: 3 additions & 0 deletions pandasai/pipelines/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .base import Pipeline, BaseLogic, PromptRequestLogic

__all__ = ["Pipeline", "BaseLogic", "PromptRequestLogic"]
59 changes: 49 additions & 10 deletions pandasai/pipelines/base.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,72 @@
from ..schemas.df_config import Config
from typing import Union, Optional
from typing import Union, Optional, List
from pandasai.responses.context import Context
from ..helpers.logger import Logger
from pandasai.responses.response_parser import ResponseParser
from abc import ABC, abstractmethod
import pandas as pd
import numpy as np
from pandasai import SmartDataframe


class BaseLogic(ABC):
"""
Logic units for pipeline.
"""

_config: Config = None
_logger: Logger = None

def __init__(self):
pass

@abstractmethod
def call(self, input_):
def call(self, config: Config, logger: Logger, input_):
"""
This method will return output according to
Implementation."""
pass
raise NotImplementedError("call method is not implemented.")


class PromptRequestLogic(BaseLogic):
"""
Logic units for pipeline.
"""

def __init__(self):
pass
def __init__(
self,
):
super().__init__()

def call(self,input_):
def call(self, config: Config, logger: Logger, input_):
"""
This method will return output according to
implementation.
"""
pass
head = input_
new_data = []
column_data_types = head.dtypes

## generate 100 synthetic examples
for _ in range(100):
new_row = {}
for column_name, data_type in column_data_types.iteritems():
if np.issubdtype(data_type, np.number):
# Generate random numbers for numerical columns
mean = head[column_name].mean()
std_dev = head[column_name].std()
new_value = np.random.normal(mean, std_dev)
else:
# Generate random values for non-numerical columns
unique_values = head[column_name].unique()
new_value = np.random.choice(unique_values)
new_row[column_name] = new_value

new_data.append(new_row)

## convert it into smartdataframe
df = pd.DataFrame(new_data)
return SmartDataframe(df, config=Config)


class Pipeline:
Expand All @@ -45,13 +76,14 @@ class Pipeline:

_config: Config = None
_logger: Logger
_logics: list(BaseLogic)
_logics: List[BaseLogic]

def __init__(
self,
config: Union[Config, dict] = None,
context: Optional[Context] = None,
logics: Optional(list(PromptRequestLogic)) = None,
logics: Optional[List[BaseLogic]] = None,
df: pd.DataFrame = None,
):
"""
Intialize the pipeline with given context and configuration
Expand All @@ -78,10 +110,17 @@ def __init__(
else:
self._response_parser = ResponseParser(context)

self._logics = logics
self._df = df

@abstractmethod
def execute(self):
"""
This functions is responsible to loop through logic and
Implementation.
"""
pass
result = self._df
for logic in self._logics:
result = logic.call(self._config, self._logger, result)

return result

0 comments on commit ebf9995

Please sign in to comment.