Sinaptik-AI · gventuri · Nov 7, 2023 · Oct 31, 2023 · Oct 31, 2023 · Nov 1, 2023
diff --git a/examples/sql_direct_config.py b/examples/sql_direct_config.py
@@ -0,0 +1,59 @@
+"""Example of using PandasAI with a CSV file."""
+
+from pandasai import SmartDatalake
+from pandasai.llm import OpenAI
+from pandasai.connectors import PostgreSQLConnector
+from pandasai.smart_dataframe import SmartDataframe
+
+
+# With a PostgreSQL database
+order = PostgreSQLConnector(
+    config={
+        "host": "localhost",
+        "port": 5432,
+        "database": "testdb",
+        "username": "postgres",
+        "password": "123456",
+        "table": "orders",
+    }
+)
+
+order_details = PostgreSQLConnector(
+    config={
+        "host": "localhost",
+        "port": 5432,
+        "database": "testdb",
+        "username": "postgres",
+        "password": "123456",
+        "table": "order_details",
+    }
+)
+
+products = PostgreSQLConnector(
+    config={
+        "host": "localhost",
+        "port": 5432,
+        "database": "testdb",
+        "username": "postgres",
+        "password": "123456",
+        "table": "products",
+    }
+)
+
+
+llm = OpenAI("OPEN_API_KEY")
+
+
+order_details_smart_df = SmartDataframe(
+    order_details,
+    config={"llm": llm, "direct_sql": True},
+    description="Contain user order details",
+)
+
+
+df = SmartDatalake(
+    [order_details_smart_df, order, products],
+    config={"llm": llm, "direct_sql": True},
+)
+response = df.chat("return orders with count of distinct products")
+print(response)
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -42,7 +42,8 @@ nav:
       - Documents Building: building_docs.md
       - License: license.md
 extra:
-  version: "1.4.3"
+  version: "1.4.4"
+
 plugins:
   - search
   - mkdocstrings:

diff --git a/pandasai/assets/prompt_templates/default_instructions.tmpl b/pandasai/assets/prompt_templates/default_instructions.tmpl
@@ -0,0 +1,5 @@
+Analyze the data, using the provided dataframes (`dfs`).
+    1. Prepare: Preprocessing and cleaning data if necessary
+    2. Process: Manipulating data for analysis (grouping, filtering, aggregating, etc.)
+    3. Analyze: Conducting the actual analysis (if the user asks to plot a chart you must save it as an image in temp_chart.png and not show the chart.)
+    {viz_library_type}
diff --git a/pandasai/assets/prompt_templates/direct_sql_connector.tmpl b/pandasai/assets/prompt_templates/direct_sql_connector.tmpl
@@ -0,0 +1,39 @@
+You are provided with the following samples of sql tables data:
+
+<Tables>
+{tables}
+<Tables>
+
+<conversation>
+{conversation}
+</conversation>
+
+You are provided with following function that executes the sql query, 
+<Function>
+def execute_sql_query(sql_query: str) -> pd.Dataframe
+"""his method connect to the database executes the sql query and returns the dataframe"""
+</Function>
+
+This is the initial python function. Do not change the params.
+
+```python
+# TODO import all the dependencies required
+import pandas as pd
+
+def analyze_data() -> dict:
+    """
+    Analyze the data, using the provided dataframes (`dfs`).
+    1. Prepare: generate sql query to get data for analysis (grouping, filtering, aggregating, etc.)
+    2. Process: execute the query using execute method available to you which returns dataframe
+    3. Analyze: Conducting the actual analysis (if the user asks to plot a chart you must save it as an image in temp_chart.png and not show the chart.)
+    {viz_library_type}
+    At the end, return a dictionary of:
+    {output_type_hint}
+    """
+```
+
+Take a deep breath and reason step-by-step. Act as a senior data analyst.
+In the answer, you must never write the "technical" names of the tables.
+Based on the last message in the conversation:
+
+- return the updated analyze_data function wrapped within `python `
diff --git a/pandasai/assets/prompt_templates/generate_python_code.tmpl b/pandasai/assets/prompt_templates/generate_python_code.tmpl
@@ -6,8 +6,6 @@ You are provided with the following pandas DataFrames:
 {conversation}
 </conversation>
 
-{viz_library_type}
-
 This is the initial python function. Do not change the params. Given the context, use the right dataframes.
 ```python
 {current_code}

diff --git a/pandasai/assets/prompt_templates/viz_library.tmpl b/pandasai/assets/prompt_templates/viz_library.tmpl
@@ -0,0 +1 @@
+If the user requests to create a chart, utilize the Python {library} library to generate high-quality graphics that will be saved directly to a file.
diff --git a/pandasai/connectors/databricks.py b/pandasai/connectors/databricks.py
@@ -63,3 +63,20 @@ def __repr__(self):
             f"host={self._config.host} port={self._config.port} "
             f"database={self._config.database} httpPath={str(self._config.httpPath)}"
         )
+
+    def equals(self, other):
+        if isinstance(other, self.__class__):
+            return (
+                self._config.dialect,
+                self._config.token,
+                self._config.host,
+                self._config.port,
+                self._config.httpPath,
+            ) == (
+                other._config.dialect,
+                other._config.token,
+                other._config.host,
+                other._config.port,
+                other._config.httpPath,
+            )
+        return False
-            f"host={self._config.host} port={self._config.port} "
-            f"database={self._config.database} httpPath={str(self._config.httpPath)}"
-        )
-
-    def equals(self, other):
-        if isinstance(other, self.__class__):
-            return (
-                self._config.dialect,
-                self._config.token,
-                self._config.host,
-                self._config.port,
-                self._config.httpPath,
-            ) == (
-                other._config.dialect,
-                other._config.token,
-                other._config.host,
-                other._config.port,
-                other._config.httpPath,
-            )
-        return False
+    def __eq__(self, other):
+        if isinstance(other, self.__class__):
+            return (
+                self._config.dialect,
+                self._config.token,
+                self._config.host,
+                self._config.port,
+                self._config.httpPath,
+            ) == (
+                other._config.dialect,
+                other._config.token,
+                other._config.host,
+                other._config.port,
+                other._config.httpPath,
+            )
+        return False
-            f"host={self._config.host} port={self._config.port} "
-            f"database={self._config.database} httpPath={str(self._config.httpPath)}"
-        )
-
-    def equals(self, other):
-        if isinstance(other, self.__class__):
-            return (
-                self._config.dialect,
-                self._config.token,
-                self._config.host,
-                self._config.port,
-                self._config.httpPath,
-            ) == (
-                other._config.dialect,
-                other._config.token,
-                other._config.host,
-                other._config.port,
-                other._config.httpPath,
-            )
-        return False
+    def __eq__(self, other):
+        if isinstance(other, self.__class__):
+            return (
+                self._config.dialect,
+                self._config.token,
+                self._config.host,
+                self._config.port,
+                self._config.httpPath,
+            ) == (
+                other._config.dialect,
+                other._config.token,
+                other._config.host,
+                other._config.port,
+                other._config.httpPath,
+            )
+        return False
diff --git a/pandasai/connectors/snowflake.py b/pandasai/connectors/snowflake.py
@@ -90,3 +90,18 @@ def __repr__(self):
             f"database={self._config.database} schema={str(self._config.dbSchema)}  "
             f"table={self._config.table}>"
         )
+
+    def equals(self, other):
+        if isinstance(other, self.__class__):
+            return (
+                self._config.dialect,
+                self._config.account,
+                self._config.username,
+                self._config.password,
+            ) == (
+                other._config.dialect,
+                other._config.account,
+                other._config.username,
+                other._config.password,
+            )
+        return False
-    def equals(self, other):
-        if isinstance(other, self.__class__):
-            return (
-                self._config.dialect,
-                self._config.account,
-                self._config.username,
-                self._config.password,
-            ) == (
-                other._config.dialect,
-                other._config.account,
-                other._config.username,
-                other._config.password,
-            )
-        return False
+    def equals(self, other):
+        """
+        Compare the current object with another object for equality.
+
+        Args:
+            other: The object to compare with.
+
+        Returns:
+            True if the objects are equal, False otherwise.
+        """
+        if isinstance(other, self.__class__):
+            return (
+                self._config.dialect,
+                self._config.account,
+                self._config.username,
+                hash(self._config.password),
+            ) == (
+                other._config.dialect,
+                other._config.account,
+                other._config.username,
+                hash(other._config.password),
+            )
+        return False
-    def equals(self, other):
-        if isinstance(other, self.__class__):
-            return (
-                self._config.dialect,
-                self._config.account,
-                self._config.username,
-                self._config.password,
-            ) == (
-                other._config.dialect,
-                other._config.account,
-                other._config.username,
-                other._config.password,
-            )
-        return False
+    def equals(self, other):
+        """
+        Compare the current object with another object for equality.
+
+        Args:
+            other: The object to compare with.
+
+        Returns:
+            True if the objects are equal, False otherwise.
+        """
+        if isinstance(other, self.__class__):
+            return (
+                self._config.dialect,
+                self._config.account,
+                self._config.username,
+                hash(self._config.password),
+            ) == (
+                other._config.dialect,
+                other._config.account,
+                other._config.username,
+                hash(other._config.password),
+            )
+        return False
diff --git a/pandasai/connectors/sql.py b/pandasai/connectors/sql.py
@@ -5,6 +5,8 @@
 import re
 import os
 import pandas as pd
+
+from pandasai.exceptions import MaliciousQueryError
 from .base import BaseConnector, SQLConnectorConfig, SqliteConnectorConfig
 from .base import BaseConnectorConfig
 from sqlalchemy import create_engine, text, select, asc
@@ -360,6 +362,46 @@ def column_hash(self):
     def fallback_name(self):
         return self._config.table
 
+    def equals(self, other):
+        if isinstance(other, self.__class__):
+            return (
+                self._config.dialect,
+                self._config.driver,
+                self._config.host,
+                self._config.port,
+                self._config.username,
+                self._config.password,
+            ) == (
+                other._config.dialect,
+                other._config.driver,
+                other._config.host,
+                other._config.port,
+                other._config.username,
+                other._config.password,
+            )
+        return False
+
+    def _is_sql_query_safe(self, query: str):
+        infected_keywords = [
+            r"\bINSERT\b",
+            r"\bUPDATE\b",
+            r"\bDELETE\b",
+            r"\bDROP\b",
+            r"\bEXEC\b",
+            r"\bALTER\b",
+            r"\bCREATE\b",
+        ]
+
+        return not any(
+            re.search(keyword, query, re.IGNORECASE) for keyword in infected_keywords
+        )
+
+    def execute_direct_sql_query(self, sql_query):
+        if not self._is_sql_query_safe(sql_query):
+            raise MaliciousQueryError("Malicious query is generated in code")
+
+        return pd.read_sql(sql_query, self._connection)
+
 
 class SqliteConnector(SQLConnector):
     """

diff --git a/pandasai/exceptions.py b/pandasai/exceptions.py
@@ -155,10 +155,27 @@ class UnSupportedLogicUnit(Exception):
         Exception (Exception): UnSupportedLogicUnit
     """
 
+
 class InvalidWorkspacePathError(Exception):
     """
     Raised when the environment variable of workspace exist but path is invalid
 
     Args:
         Exception (Exception): InvalidWorkspacePathError
     """
+
+
+class InvalidConfigError(Exception):
+    """
+    Raised when config value is not appliable
+    Args:
+        Exception (Exception): InvalidConfigError
+    """
+
+
+class MaliciousQueryError(Exception):
+    """
+    Raise error if malicious query is generated
+    Args:
+        Exception (Excpetion): MaliciousQueryError
+    """
diff --git a/pandasai/helpers/code_manager.py b/pandasai/helpers/code_manager.py
@@ -28,9 +28,15 @@
 
 class CodeExecutionContext:
     _prompt_id: uuid.UUID = None
+    _can_direct_sql: bool = False
     _skills_manager: SkillsManager = None
 
-    def __init__(self, prompt_id: uuid.UUID, skills_manager: SkillsManager):
+    def __init__(
+        self,
+        prompt_id: uuid.UUID,
+        skills_manager: SkillsManager,
+        _can_direct_sql: bool = False,
+    ):
         """
         Additional Context for code execution
         Args:
@@ -39,6 +45,7 @@ def __init__(self, prompt_id: uuid.UUID, skills_manager: SkillsManager):
         """
         self._skills_manager = skills_manager
         self._prompt_id = prompt_id
+        self._can_direct_sql = _can_direct_sql
 
     @property
     def prompt_id(self):
@@ -48,6 +55,10 @@ def prompt_id(self):
     def skills_manager(self):
         return self._skills_manager
 
+    @property
+    def can_direct_sql(self):
+        return self._can_direct_sql
+
 
 class CodeManager:
     _dfs: List
@@ -283,6 +294,10 @@ def execute_code(self, code: str, context: CodeExecutionContext) -> Any:
 
         analyze_data = environment.get("analyze_data")
 
+        if context.can_direct_sql:
+            environment["execute_sql_query"] = self._dfs[0].get_query_exec_func()
+            return analyze_data()
+
         return analyze_data(self._get_originals(dfs))
 
     def _get_samples(self, dfs):

diff --git a/pandasai/helpers/viz_library_types/_viz_library_types.py b/pandasai/helpers/viz_library_types/_viz_library_types.py
@@ -1,13 +1,12 @@
 from abc import abstractmethod, ABC
 from typing import Any, Iterable
+from pandasai.prompts.generate_python_code import VizLibraryPrompt
 
 
 class BaseVizLibraryType(ABC):
     @property
     def template_hint(self) -> str:
-        return f"""When a user requests to create a chart, utilize the Python
-{self.name} library to generate high-quality graphics that will be saved 
-directly to a file."""
+        return VizLibraryPrompt(library=self.name)
 
     @property
     @abstractmethod

diff --git a/pandasai/prompts/base.py b/pandasai/prompts/base.py
@@ -2,6 +2,7 @@
 In order to better handle the instructions, this prompt module is written.
 """
 from abc import ABC, abstractmethod
+import string
 
 
 class AbstractPrompt(ABC):
@@ -92,12 +93,11 @@ def to_string(self):
         prompt_args = {}
         for key, value in self._args.items():
             if isinstance(value, AbstractPrompt):
+                args = [
+                    arg[1] for arg in string.Formatter().parse(value.template) if arg[1]
+                ]
                 value.set_vars(
-                    {
-                        k: v
-                        for k, v in self._args.items()
-                        if k != key and not isinstance(v, AbstractPrompt)
-                    }
+                    {k: v for k, v in self._args.items() if k != key and k in args}
                 )
                 prompt_args[key] = value.to_string()
             else:

diff --git a/pandasai/prompts/direct_sql_prompt.py b/pandasai/prompts/direct_sql_prompt.py
@@ -0,0 +1,40 @@
+""" Prompt to explain code generation by the LLM
+The previous conversation we had
+
+<Conversation>
+{conversation}
+</Conversation>
+
+Based on the last conversation you generated the following code:
+
+<Code>
+{code}
+</Code>
+
+Explain how you came up with code for non-technical people without 
+mentioning technical details or mentioning the libraries used?
+
+"""
+from .file_based_prompt import FileBasedPrompt
+
+
+class DirectSQLPrompt(FileBasedPrompt):
+    """Prompt to explain code generation by the LLM"""
+
+    _path_to_template = "assets/prompt_templates/direct_sql_connector.tmpl"
+
+    def _prepare_tables_data(self, tables):
+        tables_join = []
+        for table in tables:
+            table_description_tag = (
+                f' description="{table.table_description}"'
+                if table.table_description is not None
+                else ""
+            )
+            table_head_tag = f'<table name="{table.table_name}"{table_description_tag}>'
+            table = f"{table_head_tag}\n{table.head_csv}\n</table>"
+            tables_join.append(table)
+        return "\n\n".join(tables_join)
+
+    def setup(self, tables) -> None:
+        self.set_var("tables", self._prepare_tables_data(tables))
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		If the user requests to create a chart, utilize the Python {library} library to generate high-quality graphics that will be saved directly to a file.