-
Notifications
You must be signed in to change notification settings - Fork 1.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(RephraseQuery): rephrase user query to get more accurate responses #592
Changes from all commits
70b0da8
1b51727
70244c3
f715035
2da890a
6736c44
cdeec68
9025f4e
d1b8e61
49d8720
b92fb39
7f17af8
2e4c902
7a554a5
f7e4d98
21f5bd8
393f2f2
adfc86a
bf9667b
cccee44
2d87306
995b90d
6fa9c1d
30765ac
e9b2342
0c3c997
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,8 +3,10 @@ | |
from pandasai.helpers.df_info import DataFrameType | ||
from pandasai.helpers.logger import Logger | ||
from pandasai.helpers.memory import Memory | ||
from pandasai.prompts.base import Prompt | ||
from pandasai.prompts.clarification_questions_prompt import ClarificationQuestionPrompt | ||
from pandasai.prompts.explain_prompt import ExplainPrompt | ||
from pandasai.prompts.rephase_query_prompt import RephraseQueryPrompt | ||
from pandasai.schemas.df_config import Config | ||
from pandasai.smart_datalake import SmartDatalake | ||
|
||
|
@@ -38,6 +40,28 @@ def __init__( | |
self._lake = SmartDatalake(dfs, config, logger, memory=Memory(memory_size)) | ||
self._logger = self._lake.logger | ||
|
||
def _call_llm_with_prompt(self, prompt: Prompt): | ||
""" | ||
Call LLM with prompt using error handling to retry based on config | ||
Args: | ||
prompt (Prompt): Prompt to pass to LLM's | ||
""" | ||
retry_count = 0 | ||
while retry_count < self._lake.config.max_retries: | ||
try: | ||
result: str = self._lake.llm.call(prompt) | ||
if prompt.validate(result): | ||
return result | ||
else: | ||
raise Exception("Response validation failed!") | ||
except Exception: | ||
if ( | ||
not self._lake.use_error_correction_framework | ||
or retry_count >= self._lake.config.max_retries - 1 | ||
): | ||
raise | ||
retry_count += 1 | ||
|
||
def chat(self, query: str, output_type: Optional[str] = None): | ||
""" | ||
Simulate a chat interaction with the assistant on Dataframe. | ||
|
@@ -60,7 +84,7 @@ def clarification_questions(self, query: str) -> List[str]: | |
self._lake.dfs, self._lake._memory.get_conversation(), query | ||
) | ||
|
||
result = self._lake.llm.call(prompt) | ||
result = self._call_llm_with_prompt(prompt) | ||
self._logger.log( | ||
f"""Clarification Questions: {result} | ||
""" | ||
|
@@ -83,7 +107,7 @@ def explain(self) -> str: | |
self._lake._memory.get_conversation(), | ||
self._lake.last_code_executed, | ||
) | ||
response = self._lake.llm.call(prompt) | ||
response = self._call_llm_with_prompt(prompt) | ||
self._logger.log( | ||
f"""Explaination: {response} | ||
""" | ||
|
@@ -95,3 +119,21 @@ def explain(self) -> str: | |
"because of the following error:\n" | ||
f"\n{exception}\n" | ||
) | ||
|
||
def rephrase_query(self, query: str): | ||
try: | ||
prompt = RephraseQueryPrompt( | ||
query, self._lake.dfs, self._lake._memory.get_conversation() | ||
) | ||
response = self._call_llm_with_prompt(prompt) | ||
self._logger.log( | ||
f"""Rephrased Response: {response} | ||
""" | ||
) | ||
return response | ||
except Exception as exception: | ||
return ( | ||
"Unfortunately, I was not able to repharse query, " | ||
"because of the following error:\n" | ||
f"\n{exception}\n" | ||
) | ||
Comment on lines
+123
to
+139
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
""" Prompt to rephrase query to get more accurate responses | ||
You are provided with the following pandas DataFrames: | ||
|
||
{dataframes} | ||
{conversation} | ||
Return the rephrased sentence of "{query}” in order to obtain more accurate and | ||
comprehensive responses without any explanations. If something from the original | ||
query is ambiguous, please clarify it in the rephrased query, making assumptions, | ||
if necessary. | ||
|
||
""" | ||
from typing import List | ||
|
||
import pandas as pd | ||
from .base import Prompt | ||
|
||
|
||
class RephraseQueryPrompt(Prompt): | ||
"""Prompt to rephrase query to get more accurate responses""" | ||
|
||
text: str = """ | ||
You are provided with the following pandas DataFrames: | ||
|
||
{dataframes} | ||
{conversation} | ||
Return the rephrased sentence of "{query}” in order to obtain more accurate and | ||
comprehensive responses without any explanations. If something from the original | ||
query is ambiguous, please clarify it in the rephrased query, making assumptions, | ||
if necessary. | ||
""" | ||
Comment on lines
+21
to
+30
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The same text is repeated twice in the class - text: str = """
- You are provided with the following pandas DataFrames:
-
- {dataframes}
- {conversation}
- Return the rephrased sentence of "{query}” in order to obtain more accurate and
- comprehensive responses without any explanations. If something from the original
- query is ambiguous, please clarify it in the rephrased query, making assumptions,
- if necessary.
- """
-
- conversation_text: str = """
- And based on our conversation:
-
- <conversation>
- {conversation}
- </conversation>
- """
+ PROMPT_TEXT: str = """
+ You are provided with the following pandas DataFrames:
+
+ {dataframes}
+ {conversation}
+ Return the rephrased sentence of "{query}” in order to obtain more accurate and
+ comprehensive responses without any explanations. If something from the original
+ query is ambiguous, please clarify it in the rephrased query, making assumptions,
+ if necessary.
+ """
+
+ CONVERSATION_TEXT: str = """
+ And based on our conversation:
+
+ <conversation>
+ {conversation}
+ </conversation>
+ """ |
||
|
||
conversation_text: str = """ | ||
And based on our conversation: | ||
|
||
<conversation> | ||
{conversation} | ||
</conversation> | ||
""" | ||
|
||
def __init__(self, query: str, dataframes: List[pd.DataFrame], conversation: str): | ||
conversation_content = ( | ||
self.conversation_text.format(conversation=conversation) | ||
if conversation | ||
else "" | ||
) | ||
self.set_var("conversation", conversation_content) | ||
self.set_var("query", query) | ||
self.set_var("dfs", dataframes) | ||
Comment on lines
+40
to
+48
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's no validation for the input parameters |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
_call_llm_with_prompt
method is a good addition for handling retries and error management. However, it's important to note that the exception handling here is quite broad. It catches all exceptions without distinguishing between different types of errors. This could potentially hide unexpected issues and make debugging more difficult. Consider refining the exception handling to be more specific or at least log the exception details before retrying.