feat: add CLI (Sinaptik-AI#107)

* more robust print statement matching * preliminary cli tool * no token variation * takes in token as non required argument * add default value for token, specifiy dotenv path * multiple file format support, better error handling * fix: using click instead of argparse * chore: added click to list of dependencies * feat: add support for starcoder in the cli * fix: click as a dev dependency * feat: support all file types for pandas * fix: pylint issues * docs: add README instructions for pai * lint: fix lint --------- Co-authored-by: Gabriele Venturi <[email protected]>
Charisn · May 16, 2023 · e0bcd23 · e0bcd23
1 parent 131f592
commit e0bcd23
Show file tree

Hide file tree

Showing 8 changed files with 129 additions and 10 deletions.
diff --git a/.gitignore b/.gitignore
@@ -13,4 +13,7 @@ dist
 pandasai.egg-info
 
 #venv
-/venv
+/venv
+
+# command line
+/pandasai_cli.egg-info
diff --git a/README.md b/README.md
@@ -86,6 +86,29 @@ pandas_ai.run(
 
 You can find more examples in the [examples](examples) directory.
 
+## Command-Line Tool
+Pai is the command line tool designed to provide a convenient way to interact with pandasai through a command line interface (CLI).
+
+```
+pai [OPTIONS]
+```
+Options:
+- **-d, --dataset**: The file path to the dataset.
+- **-t, --token**: Your HuggingFace or OpenAI API token, if no token provided pai will pull from the `.env` file.
+- **-m, --model**: Choice of LLM, either `openai`, `open-assistant`, or `starcoder`.
+- **-p, --prompt**: Prompt that PandasAI will run.
+
+To view a full list of available options and their descriptions, run the following command:
+```
+pai --help
+
+```
+>For example,
+>```
+>pai -d "~/pandasai/example/data/Loan payments data.csv" -m "openai" -p "How many loans are from men and have been paid off?"
+>```
+>Should result in the same output as the `from_csv.py` example.
+
 ## Privacy & Security
 
 In order to generate the Python code to run, we take the dataframe head, we randomize it (using random generation for sensitive data and shuffling for non-sensitive data) and send just the head.

diff --git a/pai/__init__.py b/pai/__init__.py
diff --git a/pai/__main__.py b/pai/__main__.py
@@ -0,0 +1,75 @@
+""" Driver code for the CLI tool """
+import os
+import click
+import pandas as pd
+from pandasai import PandasAI
+from pandasai.llm.openai import OpenAI
+from pandasai.llm.open_assistant import OpenAssistant
+from pandasai.llm.starcoder import Starcoder
+
+@click.command()
+@click.option('-d', '--dataset', type=str, required=True, help='The dataset to use.')
+@click.option('-t', '--token', type=str, required=False, default=None, help='The API token to use.')
+@click.option('-m', '--model', type=click.Choice(['openai', 'open-assistant', 'starcoder']),
+              required=True, help='The type of model to use.')
+@click.option('-p', '--prompt', type=str, required=True, help='The prompt to use.')
+def main(dataset: str, token: str, model: str, prompt: str) -> None:
+    """Main logic for the command line interface tool."""
+
+    ext = os.path.splitext(dataset)[1]
+
+    try:
+        file_format = {
+            ".csv": pd.read_csv,
+            ".xls": pd.read_excel,
+            ".xlsx": pd.read_excel,
+            ".xlsm": pd.read_excel,
+            ".xlsb": pd.read_excel,
+            ".json": pd.read_json,
+            ".html": pd.read_html,
+            ".sql": pd.read_sql,
+            ".feather": pd.read_feather,
+            ".parquet": pd.read_parquet,
+            ".dta": pd.read_stata,
+            ".sas7bdat": pd.read_sas,
+            ".h5": pd.read_hdf,
+            ".hdf5": pd.read_hdf,
+            ".pkl": pd.read_pickle,
+            ".pickle": pd.read_pickle,
+            ".gbq": pd.read_gbq,
+            ".orc": pd.read_orc,
+            ".xpt": pd.read_sas,
+            ".sav": pd.read_spss,
+            ".gz": pd.read_csv,
+            ".zip": pd.read_csv,
+            ".bz2": pd.read_csv,
+            ".xz": pd.read_csv,
+            ".txt": pd.read_csv,
+            ".xml": pd.read_xml,
+        }
+        if ext in file_format:
+            df = file_format[ext](dataset) # pylint: disable=C0103
+        else:
+            print("Unsupported file format.")
+            return
+
+    except Exception as e: # pylint: disable=W0718 disable=C0103
+        print(e)
+        return
+
+    if model == "openai":
+        llm = OpenAI(api_token = token)
+
+    elif model == "open-assistant":
+        llm = OpenAssistant(api_token = token)
+
+    elif model == 'starcoder':
+        llm = Starcoder(api_token = token)
+
+    try:
+        pandas_ai = PandasAI(llm, verbose=True)
+        response = pandas_ai.run(df, prompt)
+        print(response)
+
+    except Exception as e: # pylint: disable=W0718 disable=C0103
+        print(e)
diff --git a/pandasai/__init__.py b/pandasai/__init__.py
@@ -1,6 +1,7 @@
 """ PandasAI is a wrapper around a LLM to make dataframes convesational """
 import ast
 import io
+import re
 from contextlib import redirect_stdout
 from datetime import date
 from typing import Optional
@@ -246,8 +247,12 @@ def run_code(
         # Evaluate the last line and return its value or the captured output
         lines = code.strip().split("\n")
         last_line = lines[-1].strip()
-        if last_line.startswith("print(") and last_line.endswith(")"):
-            last_line = last_line[6:-1]
+
+        pattern = r"^print\((.*)\)$"
+        match = re.match(pattern, last_line)
+        if match:
+            last_line = match.group(1)
+
         try:
             return eval(
                 last_line,

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -25,6 +25,7 @@ pytest = "^7.3.1"
 isort = "^5.12.0"
 pytest-mock = "^3.10.0"
 pytest-env = "^0.8.1"
+click = "^8.1.3"
 
 [build-system]
 requires = ["poetry-core"]

diff --git a/setup.py b/setup.py
@@ -0,0 +1,12 @@
+from setuptools import setup
+
+setup(
+    name = 'pandasai-cli',
+    version = '0.1.2',
+    packages = ['pai'],
+    entry_points = {
+        'console_scripts': [
+            'pai = pai.__main__:main'
+        ]
+    }
+)