forked from Sinaptik-AI/pandas-ai
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
368 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
# Shortcuts | ||
|
||
Shortcuts are a way to quickly access the most common queries. At the moment, shortcuts are in beta, and only a few are available. More will be added in the future. | ||
|
||
## Available shortcuts | ||
|
||
### clean_data | ||
|
||
```python | ||
df = pd.read_csv('data.csv') | ||
pandas_ai.clean_data(df) | ||
``` | ||
|
||
This shortcut will do data cleaning on the data frame. | ||
|
||
### impute_missing_values | ||
|
||
```python | ||
df = pd.read_csv('data.csv') | ||
pandas_ai.impute_missing_values(df) | ||
``` | ||
|
||
This shortcut will impute missing values in the data frame. | ||
|
||
### generate_features | ||
|
||
```python | ||
df = pd.read_csv('data.csv') | ||
pandas_ai.generate_features(df) | ||
``` | ||
|
||
This shortcut will generate features in the data frame. | ||
|
||
### plot_pie_chart | ||
|
||
```python | ||
df = pd.read_csv('data.csv') | ||
pandas_ai.plot_pie_chart(df, labels = ['a', 'b', 'c'], values = [1, 2, 3]) | ||
``` | ||
|
||
This shortcut will plot a pie chart of the data frame. | ||
|
||
### plot_bar_chart | ||
|
||
```python | ||
df = pd.read_csv('data.csv') | ||
pandas_ai.plot_bar_chart(df, x = ['a', 'b', 'c'], y = [1, 2, 3]) | ||
``` | ||
|
||
This shortcut will plot a bar chart of the data frame. | ||
|
||
### plot_bar_chart | ||
|
||
```python | ||
df = pd.read_csv('data.csv') | ||
pandas_ai.plot_bar_chart(df, x = ['a', 'b', 'c']) | ||
``` | ||
|
||
This shortcut will plot a bar chart of the data frame. | ||
|
||
### plot_histogram | ||
|
||
```python | ||
df = pd.read_csv('data.csv') | ||
pandas_ai.plot_histogram(df, column = 'a') | ||
``` | ||
|
||
This shortcut will plot a histogram of the data frame. | ||
|
||
### plot_line_chart | ||
|
||
```python | ||
df = pd.read_csv('data.csv') | ||
pandas_ai.plot_line_chart(df, x = ['a', 'b', 'c'], y = [1, 2, 3]) | ||
``` | ||
|
||
This shortcut will plot a line chart of the data frame. | ||
|
||
### plot_scatter_chart | ||
|
||
```python | ||
df = pd.read_csv('data.csv') | ||
pandas_ai.plot_scatter_chart(df, x = ['a', 'b', 'c'], y = [1, 2, 3]) | ||
``` | ||
|
||
This shortcut will plot a scatter chart of the data frame. | ||
|
||
### plot_correlation_heatmap | ||
|
||
```python | ||
df = pd.read_csv('data.csv') | ||
pandas_ai.plot_correlation_heatmap(df) | ||
``` | ||
|
||
This shortcut will plot a correlation heatmap of the data frame. | ||
|
||
### plot_confusion_matrix | ||
|
||
```python | ||
df = pd.read_csv('data.csv') | ||
pandas_ai.plot_confusion_matrix(df, y_true = [1, 2, 3], y_pred = [1, 2, 3]) | ||
``` | ||
|
||
This shortcut will plot a confusion matrix of the data frame. | ||
|
||
### plot_roc_curve | ||
|
||
```python | ||
df = pd.read_csv('data.csv') | ||
pandas_ai.plot_roc_curve(df, y_true = [1, 2, 3], y_pred = [1, 2, 3]) | ||
``` | ||
|
||
This shortcut will plot a ROC curve of the data frame. | ||
|
||
### rolling_mean | ||
|
||
```python | ||
df = pd.read_csv('data.csv') | ||
pandas_ai.rolling_mean(df, column = 'a', window = 5) | ||
``` | ||
|
||
This shortcut will calculate the rolling mean of the data frame. | ||
|
||
### rolling_median | ||
|
||
```python | ||
df = pd.read_csv('data.csv') | ||
pandas_ai.rolling_median(df, column = 'a', window = 5) | ||
``` | ||
|
||
This shortcut will calculate the rolling median of the data frame. | ||
|
||
### rolling_std | ||
|
||
```python | ||
df = pd.read_csv('data.csv') | ||
pandas_ai.rolling_std(df, column = 'a', window = 5) | ||
``` | ||
|
||
This shortcut will calculate the rolling standard deviation of the data frame. | ||
|
||
### segment_customers | ||
|
||
```python | ||
df = pd.read_csv('data.csv') | ||
pandas_ai.segment_customers(df, features = ['a', 'b', 'c'], n_clusters = 5) | ||
``` | ||
|
||
This shortcut will segment customers in the data frame. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,174 @@ | ||
from typing import Union | ||
import pandas as pd | ||
from abc import ABC, abstractmethod | ||
|
||
|
||
class Shortcuts(ABC): | ||
@abstractmethod | ||
def run(self, df: pd.DataFrame, prompt: str) -> Union[str, pd.DataFrame]: | ||
"""Run method from PandasAI class.""" | ||
|
||
pass | ||
|
||
def clean_data(self, df: pd.DataFrame) -> pd.DataFrame: | ||
"""Do data cleaning and return the dataframe.""" | ||
|
||
return self.run( | ||
df, | ||
""" | ||
1. Copy the dataframe to a new variable named df_cleaned. | ||
2. Do data cleaning. | ||
3. Return df_cleaned. | ||
""", | ||
) | ||
|
||
def impute_missing_values(self, df: pd.DataFrame) -> pd.DataFrame: | ||
"""Do missing value imputation and return the dataframe.""" | ||
|
||
return self.run( | ||
df, | ||
""" | ||
1. Copy the dataframe to a new variable named df_imputed. | ||
2. Do the imputation of missing values. | ||
3. Return df_imputed. | ||
""", | ||
) | ||
|
||
def generate_features(self, df: pd.DataFrame) -> pd.DataFrame: | ||
"""Do feature generation and return the dataframe.""" | ||
|
||
return self.run( | ||
df, | ||
""" | ||
1. Copy the dataframe to a new variable named df_features. | ||
2. Do feature generation. | ||
3. Return df_features. | ||
""", | ||
) | ||
|
||
def plot_pie_chart(self, df: pd.DataFrame, labels: list, values: list) -> None: | ||
"""Plot a pie chart.""" | ||
|
||
self.run( | ||
df, | ||
f""" | ||
Plot a pie chart with the following labels and values: | ||
labels = {labels} | ||
values = {values} | ||
""", | ||
) | ||
|
||
def plot_bar_chart(self, df: pd.DataFrame, x: list, y: list) -> None: | ||
"""Plot a bar chart.""" | ||
|
||
self.run( | ||
df, | ||
f""" | ||
Plot a bar chart with the following x and y: | ||
x = {x} | ||
y = {y} | ||
""", | ||
) | ||
|
||
def plot_histogram(self, df: pd.DataFrame, column: str) -> None: | ||
"""Plot a histogram.""" | ||
|
||
self.run(df, f"Plot a histogram of the column {column}.") | ||
|
||
def plot_line_chart(self, df: pd.DataFrame, x: list, y: list) -> None: | ||
"""Plot a line chart.""" | ||
|
||
self.run( | ||
df, | ||
f""" | ||
Plot a line chart with the following x and y: | ||
x = {x} | ||
y = {y} | ||
""", | ||
) | ||
|
||
def plot_scatter_chart(self, df: pd.DataFrame, x: list, y: list) -> None: | ||
"""Plot a scatter chart.""" | ||
|
||
self.run( | ||
df, | ||
f""" | ||
Plot a scatter chart with the following x and y: | ||
x = {x} | ||
y = {y} | ||
""", | ||
) | ||
|
||
def plot_correlation_heatmap(self, df: pd.DataFrame) -> None: | ||
"""Plot a correlation heatmap.""" | ||
|
||
self.run(df, "Plot a correlation heatmap.") | ||
|
||
def plot_confusion_matrix( | ||
self, df: pd.DataFrame, y_true: list, y_pred: list | ||
) -> None: | ||
"""Plot a confusion matrix.""" | ||
|
||
self.run( | ||
df, | ||
f""" | ||
Plot a confusion matrix with the following y_true and y_pred: | ||
y_true = {y_true} | ||
y_pred = {y_pred} | ||
""", | ||
) | ||
|
||
def plot_roc_curve(self, df: pd.DataFrame, y_true: list, y_pred: list) -> None: | ||
"""Plot a ROC curve.""" | ||
|
||
self.run( | ||
df, | ||
f""" | ||
Plot a ROC curve with the following y_true and y_pred: | ||
y_true = {y_true} | ||
y_pred = {y_pred} | ||
""", | ||
) | ||
|
||
def rolling_mean(self, df: pd.DataFrame, column: str, window: int) -> pd.DataFrame: | ||
"""Calculate the rolling mean.""" | ||
|
||
return self.run( | ||
df, | ||
f"Calculate the rolling mean of the column {column} with a window" | ||
" of {window}.", | ||
) | ||
|
||
def rolling_median( | ||
self, df: pd.DataFrame, column: str, window: int | ||
) -> pd.DataFrame: | ||
"""Calculate the rolling median.""" | ||
|
||
return self.run( | ||
df, | ||
f"Calculate the rolling median of the column {column} with a window" | ||
" of {window}.", | ||
) | ||
|
||
def rolling_std(self, df: pd.DataFrame, column: str, window: int) -> pd.DataFrame: | ||
"""Calculate the rolling standard deviation.""" | ||
|
||
return self.run( | ||
df, | ||
f"Calculate the rolling standard deviation of the column {column} with a" | ||
"window of {window}.", | ||
) | ||
|
||
def segment_customers( | ||
self, df: pd.DataFrame, features: list, n_clusters: int | ||
) -> pd.DataFrame: | ||
"""Segment customers.""" | ||
|
||
return self.run( | ||
df, | ||
f""" | ||
Segment customers with the following features and number of clusters: | ||
features = {features} | ||
n_clusters = {n_clusters} | ||
""", | ||
) |
Oops, something went wrong.