feat: use conversational=False by default

As data analysts need a non-conversational answer most of the times, we make it so it returns the actual dataframe instead
Charisn · Jun 5, 2023 · c865634 · c865634
1 parent eaa78ea
commit c865634
Show file tree

Hide file tree

Showing 8 changed files with 70 additions and 26 deletions.
diff --git a/README.md b/README.md
@@ -34,7 +34,7 @@ pip install pandasai
 
 > Disclaimer: GDP data was collected from [this source](https://ourworldindata.org/grapher/gross-domestic-product?tab=table), published by World Development Indicators - World Bank (2022.05.26) and collected at National accounts data - World Bank / OECD. It relates to the year of 2020. Happiness indexes were extracted from [the World Happiness Report](https://ftnnews.com/images/stories/documents/2020/WHR20.pdf). Another useful [link](https://data.world/makeovermonday/2020w19-world-happiness-report-2020).
 
-PandasAI is designed to be used in conjunction with [pandas](https://github.com/pandas-dev/pandas). It makes Pandas conversational, allowing you to ask questions about your data and get answers back, in the form of pandas DataFrames. 
+PandasAI is designed to be used in conjunction with [pandas](https://github.com/pandas-dev/pandas). It makes Pandas conversational, allowing you to ask questions about your data and get answers back, in the form of pandas DataFrames.
 
 ### Queries
 
@@ -55,7 +55,7 @@ df = pd.DataFrame({
 from pandasai.llm.openai import OpenAI
 llm = OpenAI(api_token="YOUR_API_TOKEN")
 
-pandas_ai = PandasAI(llm, conversational=False)
+pandas_ai = PandasAI(llm)
 pandas_ai(df, prompt='Which are the 5 happiest countries?')
 ```
 
@@ -136,19 +136,24 @@ You can find more examples in the [examples](examples) directory.
 ## Command-Line Tool
 
 Pai is the command line tool designed to provide a convenient way to interact with PandasAI through a command line interface (CLI). In order to access the CLI tool, make sure to create a virtualenv for testing purpose and to install project dependencies in your local virtual environment using `pip` by running the following command:
+
 ```
 pip install -e .
 ```
+
 Alternatively, you can use `poetry` to create and activate the virtual environment by running the following command:
+
 ```
 poetry shell
 ```
+
 Inside the activated virtual environment, install the project dependencies by running the following command:
+
 ```
 poetry install
 ```
 
-By following these steps, you will now have the necessary environment to access the CLI tool. 
+By following these steps, you will now have the necessary environment to access the CLI tool.
 
 ```
 pai [OPTIONS]

diff --git a/docs/getting-started.md b/docs/getting-started.md
@@ -34,7 +34,7 @@ df = pd.DataFrame({
 from pandasai.llm.openai import OpenAI
 llm = OpenAI(api_token="YOUR_API_TOKEN")
 
-pandas_ai = PandasAI(llm, conversational=False)
+pandas_ai = PandasAI(llm)
 pandas_ai.run(df, prompt='Which are the 5 happiest countries?')
 ```
 
@@ -73,7 +73,7 @@ from pandasai.llm.openai import OpenAI
 df = pd.read_csv("data/Loan payments data.csv")
 
 llm = OpenAI()
-pandas_ai = PandasAI(llm, verbose=True)
+pandas_ai = PandasAI(llm, verbose=True, conversational=True)
 response = pandas_ai.run(df, "How many loans are from men and have been paid off?")
 print(response)
 # Output: 247 loans have been paid off by men.
@@ -93,7 +93,7 @@ from pandasai.llm.openai import OpenAI
 df = pd.DataFrame(dataframe)
 
 llm = OpenAI()
-pandas_ai = PandasAI(llm, verbose=True, conversational=False)
+pandas_ai = PandasAI(llm, verbose=True)
 response = pandas_ai.run(df, "Calculate the sum of the gdp of north american countries")
 print(response)
 # Output: 20901884461056
@@ -148,11 +148,49 @@ employees_df = pd.DataFrame(employees_data)
 salaries_df = pd.DataFrame(salaries_data)
 
 llm = OpenAI()
-pandas_ai = PandasAI(llm, verbose=True, conversational=False)
+pandas_ai = PandasAI(llm, verbose=True)
 response = pandas_ai.run(
     [employees_df, salaries_df],
     "Who gets paid the most?",
 )
 print(response)
 # Output: Olivia gets paid the most.
 ```
+
+### Chain of commands
+
+You can chain commands by passing the output of one command to the next one. In the example, we first filter the original
+dataframe by gender and then by loans that have been paid off.
+
+```python
+import pandas as pd
+
+from pandasai import PandasAI
+from pandasai.llm.openai import OpenAI
+
+df = pd.read_csv("examples/data/Loan payments data.csv")
+
+llm = OpenAI()
+pandas_ai = PandasAI(llm, verbose=True)
+
+# We filter by males only
+from_males_df = pandas_ai(df, "Filter the dataframe by males")
+paid_from_males_df = pandas_ai(from_males_df, "Filter the dataframe by loans that have been paid off")
+print(paid_from_males_df)
+# Output:
+# [247 rows x 11 columns]
+#          Loan_ID loan_status  Principal  terms effective_date    due_date     paid_off_time  past_due_days  age             education Gender
+# 0    xqd20166231     PAIDOFF       1000     30       9/8/2016   10/7/2016   9/14/2016 19:31            NaN   45  High School or Below   male
+# 3    xqd20160004     PAIDOFF       1000     15       9/8/2016   9/22/2016   9/22/2016 20:00            NaN   27               college   male
+# 5    xqd20160706     PAIDOFF        300      7       9/9/2016   9/15/2016    9/9/2016 13:45            NaN   35       Master or Above   male
+# 6    xqd20160007     PAIDOFF       1000     30       9/9/2016   10/8/2016   10/7/2016 23:07            NaN   29               college   male
+# 7    xqd20160008     PAIDOFF       1000     30       9/9/2016   10/8/2016   10/5/2016 20:33            NaN   36               college   male
+# ..           ...         ...        ...    ...            ...         ...               ...            ...  ...                   ...    ...
+# 294  xqd20160295     PAIDOFF       1000     30      9/14/2016  10/13/2016  10/13/2016 13:00            NaN   36              Bechalor   male
+# 296  xqd20160297     PAIDOFF        800     15      9/14/2016   9/28/2016    9/21/2016 4:42            NaN   27               college   male
+# 297  xqd20160298     PAIDOFF       1000     30      9/14/2016  10/13/2016   10/13/2016 9:00            NaN   29  High School or Below   male
+# 298  xqd20160299     PAIDOFF       1000     30      9/14/2016  10/13/2016   10/13/2016 9:00            NaN   40  High School or Below   male
+# 299  xqd20160300     PAIDOFF       1000     30      9/14/2016  10/13/2016  10/13/2016 11:00            NaN   28               college   male
+
+# [247 rows x 11 columns]
+```
diff --git a/examples/from_csv.py b/examples/from_csv.py
@@ -8,7 +8,7 @@
 df = pd.read_csv("examples/data/Loan payments data.csv")
 
 llm = OpenAI()
-pandas_ai = PandasAI(llm, verbose=True)
+pandas_ai = PandasAI(llm, verbose=True, conversational=True)
 response = pandas_ai(df, "How many loans are from men and have been paid off?")
 print(response)
 # Output: 247 loans have been paid off by men.
diff --git a/examples/from_dataframe.py b/examples/from_dataframe.py
@@ -10,7 +10,7 @@
 df = pd.DataFrame(dataframe)
 
 llm = OpenAI()
-pandas_ai = PandasAI(llm, verbose=True, conversational=False)
+pandas_ai = PandasAI(llm, verbose=True)
 response = pandas_ai(df, "Calculate the sum of the gdp of north american countries")
 print(response)
 # Output: 20901884461056
diff --git a/examples/with_multiple_dataframes.py b/examples/with_multiple_dataframes.py
@@ -1,26 +1,27 @@
 """Example of using PandasAI on multiple Pandas DataFrame"""
 
 import pandas as pd
+
 from pandasai import PandasAI
 from pandasai.llm.openai import OpenAI
 
 employees_data = {
-    'EmployeeID': [1, 2, 3, 4, 5],
-    'Name': ['John', 'Emma', 'Liam', 'Olivia', 'William'],
-    'Department': ['HR', 'Sales', 'IT', 'Marketing', 'Finance']
+    "EmployeeID": [1, 2, 3, 4, 5],
+    "Name": ["John", "Emma", "Liam", "Olivia", "William"],
+    "Department": ["HR", "Sales", "IT", "Marketing", "Finance"],
 }
 
 salaries_data = {
-    'EmployeeID': [1, 2, 3, 4, 5],
-    'Salary': [5000, 6000, 4500, 7000, 5500]
+    "EmployeeID": [1, 2, 3, 4, 5],
+    "Salary": [5000, 6000, 4500, 7000, 5500],
 }
 
 employees_df = pd.DataFrame(employees_data)
 salaries_df = pd.DataFrame(salaries_data)
 
 
 llm = OpenAI()
-pandas_ai = PandasAI(llm, verbose=True)
+pandas_ai = PandasAI(llm, verbose=True, conversational=True)
 response = pandas_ai([employees_df, salaries_df], "Who gets paid the most?")
 print(response)
 # Output: Olivia
diff --git a/examples/with_privacy_enforced.py b/examples/with_privacy_enforced.py
@@ -10,7 +10,7 @@
 df = pd.DataFrame(dataframe)
 
 llm = OpenAI()
-pandas_ai = PandasAI(llm, verbose=True, conversational=False, enforce_privacy=True)
+pandas_ai = PandasAI(llm, verbose=True, enforce_privacy=True)
 response = pandas_ai(
     df,
     "Calculate the sum of the gdp of north american countries",

diff --git a/pandasai/__init__.py b/pandasai/__init__.py
@@ -27,7 +27,7 @@
     from pandasai.llm.openai import OpenAI
     llm = OpenAI(api_token="YOUR_API_TOKEN")
 
-    pandas_ai = PandasAI(llm, conversational=False)
+    pandas_ai = PandasAI(llm)
     pandas_ai(df, prompt='Which are the 5 happiest countries?')
 
     ```
@@ -37,7 +37,7 @@
 import re
 import sys
 from contextlib import redirect_stdout
-from typing import Optional
+from typing import Optional, Union
 
 import astor
 import matplotlib.pyplot as plt
@@ -81,7 +81,7 @@ class PandasAI:
         _verbose (bool, optional): To show the intermediate outputs e.g. python code
         generated and execution step on the prompt. Default to False
         _is_conversational_answer (bool, optional): Whether to return answer in conversational
-        form. Default to True
+        form. Default to False
         _enforce_privacy (bool, optional): Do not display the data on prompt in case of
         Sensitive data. Default to False
         _max_retries (int, optional): max no. of tries to generate code on failure. Default to 3
@@ -99,7 +99,7 @@ class PandasAI:
 
     _llm: LLM
     _verbose: bool = False
-    _is_conversational_answer: bool = True
+    _is_conversational_answer: bool = False
     _enforce_privacy: bool = False
     _max_retries: int = 3
     _is_notebook: bool = False
@@ -118,7 +118,7 @@ class PandasAI:
     def __init__(
         self,
         llm=None,
-        conversational=True,
+        conversational=False,
         verbose=False,
         enforce_privacy=False,
         save_charts=False,
@@ -129,7 +129,7 @@ def __init__(
 
         Args:
             llm (object): LLMs option to be used for API access. Default is None
-            conversational (bool): Whether to return answer in conversational form. Default to True
+            conversational (bool): Whether to return answer in conversational form. Default to False
             verbose (bool): To show the intermediate outputs e.g. python code generated and
              execution step on the prompt.  Default to False
             enforce_privacy (bool): Execute the codes with Privacy Mode ON.  Default to False
@@ -176,7 +176,7 @@ def run(
         show_code: bool = False,
         anonymize_df: bool = True,
         use_error_correction_framework: bool = True,
-    ) -> str:
+    ) -> Union[str, pd.DataFrame]:
         """
         Run the PandasAI to make Dataframes Conversational.
 
@@ -287,7 +287,7 @@ def __call__(
         show_code: bool = False,
         anonymize_df: bool = True,
         use_error_correction_framework: bool = True,
-    ) -> str:
+    ) -> Union[str, pd.DataFrame]:
         """
         __call__ method of PandasAI class. It calls the `run` method.
 

diff --git a/tests/test_pandasai.py b/tests/test_pandasai.py
@@ -29,7 +29,7 @@ def pandasai(self, llm):
 
     def test_init(self, pandasai):
         assert pandasai._llm is not None
-        assert pandasai._is_conversational_answer is True
+        assert pandasai._is_conversational_answer is False
         assert pandasai._verbose is False
 
     def test_init_without_llm(self):
@@ -44,7 +44,7 @@ def test_conversational_answer(self, pandasai, llm):
     def test_run(self, pandasai, llm):
         df = pd.DataFrame()
         llm._output = "1"
-        assert pandasai.run(df, "What number comes before 2?") == "1"
+        assert pandasai.run(df, "What number comes before 2?") == 1
 
     def test_run_with_conversational_answer(self, pandasai, llm):
         df = pd.DataFrame()