docs: add parquet example to pandas-ai (Sinaptik-AI#855)

* add the data as parquet format * add parquet example to the examples section * add an example of parquet file * update the structure of parquet read * add parquet * remove parquet dependency * remove specific parquet dependency * update comment line * lint: fix lint in examples --------- Co-authored-by: tanersemenn <0418> Co-authored-by: Gabriele Venturi <[email protected]>
a-urabayashi · Jan 11, 2024 · 36f3f23 · 36f3f23
1 parent 9a06b02
commit 36f3f23
Show file tree

Hide file tree

Showing 7 changed files with 61 additions and 32 deletions.
diff --git a/docs/examples.md b/docs/examples.md
@@ -71,6 +71,24 @@ print(response)
 # Output: 247 loans have been paid off by men.
 ```
 
+## Working with Parquet files
+
+Example of using PandasAI with a Parquet file
+
+```python
+from pandasai import SmartDataframe
+from pandasai.llm import OpenAI
+
+llm = OpenAI(api_token="YOUR_API_TOKEN")
+
+# You can instantiate a SmartDataframe with a path to a Parquet file
+df = SmartDataframe("data/Loan payments data.parquet", config={"llm": llm})
+
+response = df.chat("How many loans are from men and have been paid off?")
+print(response)
+# Output: 247 loans have been paid off by men.
+```
+
 ## Working with Google Sheets
 
 Example of using PandasAI with a Google Sheet. In order to use Google Sheets as a data source, you need to install the `pandasai[google-sheet]` extra dependency.

diff --git a/examples/data/Loan payments data.parquet b/examples/data/Loan payments data.parquet
diff --git a/examples/from_parquet.py b/examples/from_parquet.py
@@ -0,0 +1,13 @@
+"""Example of using PandasAI with a Parquet file."""
+
+from pandasai import SmartDataframe
+from pandasai.llm import OpenAI
+
+llm = OpenAI()
+
+smart_df_read = SmartDataframe(
+    df="examples/data/Loan payments data.parquet", config={"llm": llm}
+)
+response = smart_df_read.chat("How many loans are from men and have been paid off?")
+print(response)
+# Output: 247 loans have been paid off by men.
diff --git a/tests/connectors/test_sqlite.py b/tests/connectors/test_sqlite.py
@@ -1,21 +1,20 @@
 import unittest
 import pandas as pd
-from unittest.mock import Mock,patch
+from unittest.mock import Mock, patch
 from pandasai.connectors.base import SqliteConnectorConfig
 from pandasai.connectors import SqliteConnector
 
+
 class TestSqliteConnector(unittest.TestCase):
-    @patch("pandasai.connectors.sql.create_engine",autospec=True)
-    def setUp(self,mock_create_engine) -> None:
+    @patch("pandasai.connectors.sql.create_engine", autospec=True)
+    def setUp(self, mock_create_engine) -> None:
         self.mock_engine = Mock()
         self.mock_connection = Mock()
         self.mock_engine.connect.return_value = self.mock_connection
         mock_create_engine.return_value = self.mock_engine
 
         self.config = SqliteConnectorConfig(
-            dialect="sqlite",
-            database="path_todb.db",
-            table="yourtable"
+            dialect="sqlite", database="path_todb.db", table="yourtable"
         ).dict()
 
         self.connector = SqliteConnector(self.config)
@@ -37,11 +36,10 @@ def test_constructor_and_properties(
     def test_repr_method(self):
         # Test __repr__ method
         expected_repr = (
-            "<SqliteConnector dialect=sqlite "
-            "database=path_todb.db table=yourtable>"
+            "<SqliteConnector dialect=sqlite " "database=path_todb.db table=yourtable>"
         )
         self.assertEqual(repr(self.connector), expected_repr)
-    
+
     @patch("pandasai.connectors.sql.pd.read_sql", autospec=True)
     def test_head_method(self, mock_read_sql):
         expected_data = pd.DataFrame({"Column1": [1, 2, 3], "Column2": [4, 5, 6]})
@@ -82,4 +80,4 @@ def test_column_hash_property(self):
     def test_fallback_name_property(self):
         # Test fallback_name property
         fallback_name = self.connector.fallback_name
-        self.assertEqual(fallback_name, "yourtable")
+        self.assertEqual(fallback_name, "yourtable")
diff --git a/tests/llms/test_azure_openai.py b/tests/llms/test_azure_openai.py
@@ -32,13 +32,13 @@ def test_type_without_deployment(self):
 
     def test_type_with_token(self):
         assert (
-                AzureOpenAI(
-                    api_token="test",
-                    azure_endpoint="test",
-                    api_version="test",
-                    deployment_name="test",
-                ).type
-                == "azure-openai"
+            AzureOpenAI(
+                api_token="test",
+                azure_endpoint="test",
+                api_version="test",
+                deployment_name="test",
+            ).type
+            == "azure-openai"
         )
 
     def test_proxy(self):

diff --git a/tests/llms/test_openai.py b/tests/llms/test_openai.py
@@ -102,11 +102,11 @@ def test_chat_completion(self, mocker):
 
     def test_call_with_unsupported_model(self, prompt):
         with pytest.raises(
-                UnsupportedModelError,
-                match=(
-                        "Unsupported model: The model 'not a model' doesn't exist "
-                        "or is not supported yet."
-                ),
+            UnsupportedModelError,
+            match=(
+                "Unsupported model: The model 'not a model' doesn't exist "
+                "or is not supported yet."
+            ),
         ):
             llm = OpenAI(api_token="test", model="not a model")
             llm.call(instruction=prompt)
@@ -126,7 +126,9 @@ def test_call_supported_chat_model(self, mocker, prompt):
         assert result == "response"
 
     def test_call_finetuned_model(self, mocker, prompt):
-        openai = OpenAI(api_token="test", model="ft:gpt-3.5-turbo:my-org:custom_suffix:id")
+        openai = OpenAI(
+            api_token="test", model="ft:gpt-3.5-turbo:my-org:custom_suffix:id"
+        )
         mocker.patch.object(openai, "chat_completion", return_value="response")
 
         result = openai.call(instruction=prompt)

diff --git a/tests/skills/test_skills.py b/tests/skills/test_skills.py
@@ -160,8 +160,8 @@ def test_prompt_display(self):
         # Test prompt_display method when skills exist
         prompt = skills_manager.prompt_display()
         assert (
-                "You can call the following functions that have been pre-defined for you:"
-                in prompt
+            "You can call the following functions that have been pre-defined for you:"
+            in prompt
         )
 
         # Test prompt_display method when no skills exist
@@ -196,8 +196,7 @@ def skill_a(*args, **kwargs):
             return "SkillA Result"
 
         skill_b = Skill.from_function(
-            func=lambda _: "SkillB Result",
-            description="Skill B"
+            func=lambda _: "SkillB Result", description="Skill B"
         )
 
         agent.add_skills(skill_a)
@@ -214,8 +213,7 @@ def skill_a(*args, **kwargs):
             return "SkillA Result"
 
         skill_b = Skill.from_function(
-            func=lambda _: "SkillB Result",
-            description="Skill B"
+            func=lambda _: "SkillB Result", description="Skill B"
         )
 
         smart_dataframe.add_skills(skill_a)
@@ -291,12 +289,12 @@ def test_run_prompt_without_skills(self, agent):
         assert "<function>" not in last_prompt
         assert "</function>" not in last_prompt
         assert (
-                "You can call the following functions that have been pre-defined for you:"
-                not in last_prompt
+            "You can call the following functions that have been pre-defined for you:"
+            not in last_prompt
         )
 
     def test_code_exec_with_skills_no_use(
-            self, code_manager: CodeManager, exec_context
+        self, code_manager: CodeManager, exec_context
     ):
         code = """result = {'type': 'number', 'value': 1 + 1}"""
         skill1 = MagicMock()