diff --git a/pandasai/helpers/shortcuts.py b/pandasai/helpers/shortcuts.py index 05c4a3545..3290fefc5 100644 --- a/pandasai/helpers/shortcuts.py +++ b/pandasai/helpers/shortcuts.py @@ -329,5 +329,22 @@ def segment_customers(self, features: list, n_clusters: int) -> DataFrameType: Segment customers with the following features and number of clusters: features = {features} n_clusters = {n_clusters} +""" + ) + + def data_summarization(self) -> DataFrameType: + """ + Do summarization of the data. + + Returns: + DataFrameType: The summarized DataFrame. + """ + + return self.chat( + """ +Provide the summary in df format +1. For each distinct value : +2. Calculate subtotal of rows +3. Calculate the percentage of the total count for each group """ )