Sinaptik-AI · gventuri · Sep 24, 2023 · Sep 21, 2023 · Sep 22, 2023 · Sep 22, 2023
diff --git a/docs/examples.md b/docs/examples.md
@@ -206,3 +206,56 @@ print(paid_from_males_df)
 
 # [247 rows x 11 columns]
 ```
+
+## Working with Agent
+
+With the chat agent, you can engage in dynamic conversations where the agent retains context throughout the discussion. This enables you to have more interactive and meaningful exchanges.
+
+**Key Features**
+
+- **Context Retention:** The agent remembers the conversation history, allowing for seamless, context-aware interactions.
+
+- **Clarification Questions:** You can use the `clarification_questions` method to request clarification on any aspect of the conversation. This helps ensure you fully understand the information provided.
+
+- **Explanation:** The `explain` method is available to obtain detailed explanations of how the agent arrived at a particular solution or response. It offers transparency and insights into the agent's decision-making process.
+
+Feel free to initiate conversations, seek clarifications, and explore explanations to enhance your interactions with the chat agent!
+
+```
+import pandas as pd
+from pandasai import Agent
+
+from pandasai.llm.openai import OpenAI
+
+employees_data = {
+    "EmployeeID": [1, 2, 3, 4, 5],
+    "Name": ["John", "Emma", "Liam", "Olivia", "William"],
+    "Department": ["HR", "Sales", "IT", "Marketing", "Finance"],
+}
+
+salaries_data = {
+    "EmployeeID": [1, 2, 3, 4, 5],
+    "Salary": [5000, 6000, 4500, 7000, 5500],
+}
+
+employees_df = pd.DataFrame(employees_data)
+salaries_df = pd.DataFrame(salaries_data)
+
+
+llm = OpenAI("OpenAI_API_KEY")
+agent = Agent([employees_df, salaries_df], config={"llm": llm}, memory_size=10)
+
+# Chat with the agent
+response = agent.chat("Who gets paid the most?")
+print(response)
+
+# Get Clarification Questions
+questions = agent.clarification_questions()
+
+for question in questions:
+    print(question)
+
+# Explain how the chat response is generated
+response = agent.explain()
+print(response)
+```
diff --git a/examples/agent.py b/examples/agent.py
@@ -0,0 +1,37 @@
+import pandas as pd
+from pandasai import Agent
+
+from pandasai.llm.openai import OpenAI
+
+employees_data = {
+    "EmployeeID": [1, 2, 3, 4, 5],
+    "Name": ["John", "Emma", "Liam", "Olivia", "William"],
+    "Department": ["HR", "Sales", "IT", "Marketing", "Finance"],
+}
+
+salaries_data = {
+    "EmployeeID": [1, 2, 3, 4, 5],
+    "Salary": [5000, 6000, 4500, 7000, 5500],
+}
+
+employees_df = pd.DataFrame(employees_data)
+salaries_df = pd.DataFrame(salaries_data)
+
+
+llm = OpenAI("OPEN_API_KEY")
+agent = Agent([employees_df, salaries_df], config={"llm": llm}, memory_size=10)
+
+# Chat with the agent
+response = agent.chat("Who gets paid the most?")
+print(response)
+
+
+# # Get Clarification Questions
+questions = agent.clarification_questions()
+
+for question in questions:
+    print(question)
+
+# Explain how the chat response is generated
+response = agent.explain()
+print(response)
diff --git a/pandasai/__init__.py b/pandasai/__init__.py
@@ -44,6 +44,7 @@
 from .callbacks.base import BaseCallback
 from .schemas.df_config import Config
 from .helpers.cache import Cache
+from .agent import Agent
 
 __version__ = importlib.metadata.version(__package__ or __name__)
 
@@ -257,4 +258,4 @@ def clear_cache(filename: str = None):
     cache.clear()
 
 
-__all__ = ["PandasAI", "SmartDataframe", "SmartDatalake", "clear_cache"]
+__all__ = ["PandasAI", "SmartDataframe", "SmartDatalake", "Agent", "clear_cache"]
diff --git a/pandasai/agent/__init__.py b/pandasai/agent/__init__.py
@@ -0,0 +1,97 @@
+import json
+from typing import Union, List, Optional
+from pandasai.helpers.df_info import DataFrameType
+from pandasai.helpers.logger import Logger
+from pandasai.helpers.memory import Memory
+from pandasai.prompts.clarification_questions_prompt import ClarificationQuestionPrompt
+from pandasai.prompts.explain_prompt import ExplainPrompt
+from pandasai.schemas.df_config import Config
+from pandasai.smart_datalake import SmartDatalake
+
+
+class Agent:
+    """
+    Agent class to improve the conversational experience in PandasAI
+    """
+
+    _lake: SmartDatalake = None
+    _logger: Optional[Logger] = None
+
+    def __init__(
+        self,
+        dfs: Union[DataFrameType, List[DataFrameType]],
+        config: Optional[Union[Config, dict]] = None,
+        logger: Optional[Logger] = None,
+        memory_size: int = 1,
+    ):
+        """
+        Args:
+            df (Union[DataFrameType, List[DataFrameType]]): DataFrame can be Pandas,
+            Polars or Database connectors
+            memory_size (int, optional): Conversation history to use during chat.
+            Defaults to 1.
+        """
+
+        if not isinstance(dfs, list):
+            dfs = [dfs]
+
+        self._lake = SmartDatalake(dfs, config, logger, memory=Memory(memory_size))
+        self._logger = self._lake.logger
+
+    def chat(self, query: str, output_type: Optional[str] = None):
+        """
+        Simulate a chat interaction with the assistant on Dataframe.
+        """
+        try:
+            result = self._lake.chat(query, output_type=output_type)
+            return result
+        except Exception as exception:
+            return (
+                "Unfortunately, I was not able to get your answers, "
+                "because of the following error:\n"
+                f"\n{exception}\n"
+            )
+
+    def clarification_questions(self) -> List[str]:
+        """
+        Generate clarification questions based on the data
+        """
+        prompt = ClarificationQuestionPrompt(
+            self._lake.dfs, self._lake._memory.get_conversation()
+        )
+
+        result = self._lake.llm.call(prompt)
+        self._logger.log(
+            f"""Clarification Questions:  {result}
+            """
+        )
+        questions: list[str] = json.loads(result)
+        return questions[:3]
+
+    def start_new_conversation(self):
+        """
+        Clears the previous conversation
+        """
+        self._lake._memory.clear()
+
+    def explain(self) -> str:
+        """
+        Returns the explanation of the code how it reached to the solution
+        """
+        try:
+            prompt = ExplainPrompt(
+                self._lake._memory.get_conversation(),
+                self._lake.last_code_executed,
+            )
+            response = self._lake.llm.call(prompt)
+            self._logger.log(
+                f"""Explaination:  {response}
+                """
+            )
+            return response
+        except Exception as exception:
+            return (
+                "Unfortunately, I was not able to explain, "
+                "because of the following error:\n"
+                f"\n{exception}\n"
+            )
diff --git a/pandasai/helpers/memory.py b/pandasai/helpers/memory.py
@@ -5,9 +5,11 @@ class Memory:
     """Memory class to store the conversations"""
 
     _messages: list
+    _memory_size: int
 
-    def __init__(self):
+    def __init__(self, memory_size: int = 1):
         self._messages = []
+        self._memory_size = memory_size
 
     def add(self, message: str, is_user: bool):
         self._messages.append({"message": message, "is_user": is_user})
@@ -21,7 +23,12 @@ def all(self) -> list:
     def last(self) -> dict:
         return self._messages[-1]
 
-    def get_conversation(self, limit: int = 1) -> str:
+    def get_conversation(self, limit: int = None) -> str:
+        """
+        Returns the conversation messages based on limit parameter
+        or default memory size
+        """
+        limit = self._memory_size if limit is None else limit
         return "\n".join(
             [
                 f"{f'User {i+1}' if message['is_user'] else f'Assistant {i}'}: "

diff --git a/pandasai/prompts/clarification_questions_prompt.py b/pandasai/prompts/clarification_questions_prompt.py
@@ -0,0 +1,49 @@
+""" Prompt to get clarification questions
+You are provided with the following pandas DataFrames:
+
+{dataframes}
+
+<conversation>
+{conversation}
+</conversation>
+
+Based on the conversation, are there any clarification questions that a senior data scientist would ask? These are questions for non technical people, only ask for questions they could ask given low tech expertise and no knowledge about how the dataframes are structured.
+
+Return the JSON array of the clarification questions. If there is no clarification question, return an empty array.
+
+Json:
+"""  # noqa: E501
+
+
+from typing import List
+import pandas as pd
+from .base import Prompt
+
+
+class ClarificationQuestionPrompt(Prompt):
+    """Prompt to get clarification questions"""
+
+    text: str = """
+You are provided with the following pandas DataFrames:
+
+{dataframes}
+
+<conversation>
+{conversation}
+</conversation>
+
+Based on the conversation, are there any clarification questions 
+that a senior data scientist would ask? These are questions for non technical people, 
+only ask for questions they could ask given low tech expertise and 
+no knowledge about how the dataframes are structured.
+
+Return the JSON array of the clarification questions. 
+
+If there is no clarification question, return an empty array.
+
+Json:
+"""
+
+    def __init__(self, dataframes: List[pd.DataFrame], conversation: str):
+        self.set_var("dfs", dataframes)
+        self.set_var("conversation", conversation)
diff --git a/pandasai/prompts/explain_prompt.py b/pandasai/prompts/explain_prompt.py
@@ -0,0 +1,44 @@
+""" Prompt to explain code generation by the LLM
+The previous conversation we had
+
+<Conversation>
+{conversation}
+</Conversation>
+
+Based on the last conversation you generated the following code:
+
+<Code>
+{code}
+</Code
+
+Explain how you came up with code for non-technical people without 
+mentioning technical details or mentioning the libraries used?
+
+"""
+from .base import Prompt
+
+
+class ExplainPrompt(Prompt):
+    """Prompt to explain code generation by the LLM"""
+
+    text: str = """
+The previous conversation we had
+
+<Conversation>
+{conversation}
+</Conversation>
+
+Based on the last conversation you generated the following code:
+
+<Code>
+{code}
+</Code
+
+Explain how you came up with code for non-technical people without 
+mentioning technical details or mentioning the libraries used?
+
+"""
+
+    def __init__(self, conversation: str, code: str):
+        self.set_var("conversation", conversation)
+        self.set_var("code", code)
diff --git a/pandasai/smart_datalake/__init__.py b/pandasai/smart_datalake/__init__.py
@@ -305,6 +305,7 @@ def chat(self, query: str, output_type: Optional[str] = None):
                     "save_charts_path": self._config.save_charts_path.rstrip("/"),
                     "output_type_hint": output_type_helper.template_hint,
                 }
+
                 generate_python_code_instruction = self._get_prompt(
                     "generate_python_code",
                     default_prompt=GeneratePythonCodePrompt,
@@ -623,7 +624,7 @@ def last_code_generated(self):
 
     @last_code_generated.setter
     def last_code_generated(self, last_code_generated: str):
-        self._code_manager._last_code_generated = last_code_generated
+        self._last_code_generated = last_code_generated
 
     @property
     def last_code_executed(self):
@@ -644,3 +645,11 @@ def last_error(self):
     @last_error.setter
     def last_error(self, last_error: str):
         self._last_error = last_error
+
+    @property
+    def dfs(self):
+        return self._dfs
+
+    @property
+    def memory(self):
+        return self._memory