Sinaptik-AI · gventuri · Sep 24, 2023 · Sep 21, 2023 · Sep 22, 2023 · Sep 22, 2023
diff --git a/examples/agent.py b/examples/agent.py
@@ -0,0 +1,28 @@
+import pandas as pd
+from pandasai.agent import Agent
+
+from pandasai.llm.openai import OpenAI
+
+employees_data = {
+    "EmployeeID": [1, 2, 3, 4, 5],
+    "Name": ["John", "Emma", "Liam", "Olivia", "William"],
+    "Department": ["HR", "Sales", "IT", "Marketing", "Finance"],
+}
+
+salaries_data = {
+    "EmployeeID": [1, 2, 3, 4, 5],
+    "Salary": [5000, 6000, 4500, 7000, 5500],
+}
+
+employees_df = pd.DataFrame(employees_data)
+salaries_df = pd.DataFrame(salaries_data)
+
+
+llm = OpenAI("OPEN_API")
+agent = Agent([employees_df, salaries_df], config={"llm": llm}, memory_size=10)
+response = agent.chat("Who gets paid the most?")
+print(response)
+questions = agent.clarification_questions()
+print(questions)
+response = agent.chat("Which department he belongs to?")
+print(response)
diff --git a/pandasai/__init__.py b/pandasai/__init__.py
@@ -44,6 +44,7 @@
 from .callbacks.base import BaseCallback
 from .schemas.df_config import Config
 from .helpers.cache import Cache
+from .agent import Agent
 
 __version__ = importlib.metadata.version(__package__ or __name__)
 
@@ -257,4 +258,4 @@ def clear_cache(filename: str = None):
     cache.clear()
 
 
-__all__ = ["PandasAI", "SmartDataframe", "SmartDatalake", "clear_cache"]
+__all__ = ["PandasAI", "SmartDataframe", "SmartDatalake", "Agent", "clear_cache"]
diff --git a/pandasai/agent/__init__.py b/pandasai/agent/__init__.py
@@ -0,0 +1,94 @@
+import json
+from typing import Union, List, Optional
+from pandasai.helpers.df_info import DataFrameType
+from pandasai.helpers.logger import Logger
+from pandasai.helpers.memory import Memory
+from pandasai.prompts.clarification_questions_prompt import ClarificationQuestionPrompt
+from pandasai.schemas.df_config import Config
+
+from pandasai.smart_datalake import SmartDatalake
+
+
+class Agent:
+    """
+    Agent class to improve the conversational experience in PandasAI
+    """
+
+    _memory: Memory
+    _lake: SmartDatalake = None
+    logger: Logger = None
+
+    def __init__(
+        self,
+        dfs: Union[DataFrameType, List[DataFrameType]],
+        config: Optional[Union[Config, dict]] = None,
+        logger: Logger = None,
+        memory_size=1,
+    ):
+        """
+        Args:
+            df (Union[SmartDataframe, SmartDatalake]): _description_
+            memory_size (int, optional): _description_. Defaults to 1.
+        """
+
+        if not isinstance(dfs, list):
+            dfs = [dfs]
+
+        self._lake = SmartDatalake(dfs, config, logger)
+        self.logger = self._lake.logger
+        self._memory = Memory(memory_size * 2)
+
+    def _get_conversation(self):
+        """
+        Get Conversation from history
+
+        """
+        return "\n".join(
+            [
+                f"{'Question' if message['is_user'] else 'Answer'}: "
+                f"{message['message']}"
+                for i, message in enumerate(self._memory.all())
+            ]
+        )
+
+    def chat(self, query: str):
+        """
+        Simulate a chat interaction with the assistant on Dataframe.
+        """
+        self._memory.add(query, True)
+        conversation = self._get_conversation()
+        result = self._lake.chat(query, start_conversation=conversation)
+        self._memory.add(result, False)
+        return result
+
+    def _get_clarification_prompt(self):
+        """
+        Create a clarification prompt with relevant variables.
+        """
+        prompt = ClarificationQuestionPrompt()
+        prompt.set_var("dfs", self._lake.dfs)
+        prompt.set_var("conversation", self._get_conversation())
+        return prompt
+
+    def clarification_questions(self):
+        """
+        Generate and return up to three clarification questions based on a given prompt.
+        """
+        try:
+            prompt = self._get_clarification_prompt()
+            result = self._lake.llm.generate_code(prompt)
+            questions = json.loads(result)
+        except Exception as exception:
+            return (
+                "Unfortunately, I was not able to get your clarification questions, "
+                "because of the following error:\n"
+                f"\n{exception}\n"
+            )
+
+        return questions[:3]
+
+    def start_new_conversation(self):
+        """
+        Clears the previous conversation
+        """
+        self._memory.clear()
diff --git a/pandasai/helpers/memory.py b/pandasai/helpers/memory.py
@@ -1,16 +1,21 @@
 """ Memory class to store the conversations """
+import sys
 
 
 class Memory:
     """Memory class to store the conversations"""
 
     _messages: list
+    _max_messages: int
 
-    def __init__(self):
+    def __init__(self, max_messages=sys.maxsize):
         self._messages = []
+        self._max_messages = max_messages
 
     def add(self, message: str, is_user: bool):
         self._messages.append({"message": message, "is_user": is_user})
+        if len(self._messages) > self._max_messages:
+            del self._messages[:2]
 
     def count(self) -> int:
         return len(self._messages)

diff --git a/pandasai/prompts/clarification_questions_prompt.py b/pandasai/prompts/clarification_questions_prompt.py
@@ -0,0 +1,47 @@
+""" Prompt to get clarification questions
+You are provided with the following pandas DataFrames:
+
+<dataframe>
+{dataframe}
+</dataframe>
+
+<conversation>
+{conversation}
+</conversation>
+
+Based on the conversation, are there any clarification questions that a senior data scientist would ask? These are questions for non technical people, only ask for questions they could ask given low tech expertise and no knowledge about how the dataframes are structured.
+
+Return the JSON array of the clarification questions. If there is no clarification question, return an empty array.
+
+Json:
+"""  # noqa: E501
+
+
+from .base import Prompt
+
+
+class ClarificationQuestionPrompt(Prompt):
+    """Prompt to get clarification questions"""
+
+    text: str = """
+You are provided with the following pandas DataFrames:
+
+<dataframe>
+{dataframes}
+</dataframe>
+
+<conversation>
+{conversation}
+</conversation>
+
+Based on the conversation, are there any clarification questions 
+that a senior data scientist would ask? These are questions for non technical people, 
+only ask for questions they could ask given low tech expertise and 
+no knowledge about how the dataframes are structured.
+
+Return the JSON array of the clarification questions. 
+
+If there is no clarification question, return an empty array.
+
+Json:
+"""
diff --git a/pandasai/smart_datalake/__init__.py b/pandasai/smart_datalake/__init__.py
@@ -255,7 +255,12 @@ def _get_cache_key(self) -> str:
 
         return cache_key
 
-    def chat(self, query: str, output_type: Optional[str] = None):
+    def chat(
+        self,
+        query: str,
+        output_type: Optional[str] = None,
+        start_conversation: Optional[str] = None,
+    ):
         """
         Run a query on the dataframe.
 
@@ -305,6 +310,9 @@ def chat(self, query: str, output_type: Optional[str] = None):
                     "save_charts_path": self._config.save_charts_path.rstrip("/"),
                     "output_type_hint": output_type_helper.template_hint,
                 }
+                if start_conversation is not None:
+                    default_values["conversation"] = start_conversation
+
                 generate_python_code_instruction = self._get_prompt(
                     "generate_python_code",
                     default_prompt=GeneratePythonCodePrompt,
@@ -644,3 +652,7 @@ def last_error(self):
     @last_error.setter
     def last_error(self, last_error: str):
         self._last_error = last_error
+
+    @property
+    def dfs(self):
+        return self._dfs