-
Notifications
You must be signed in to change notification settings - Fork 0
/
prompts.py
109 lines (94 loc) · 3.71 KB
/
prompts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
from typing import Any
def _get_openai_content_with_image(
text: str,
image: str | None = None,
) -> str | list[dict[str, Any]]:
"""
Get OpenAI message content with image if present
:param text: Text message
:param image: Optional image in base64 or URL format
:return: OpenAI message content
"""
if image:
return [
{"type": "text", "text": text},
{"type": "image_url", "image_url": {"url": image}},
]
else:
return text
def solve_cot_prompt(
problem_statement: str,
image: str | None = None,
) -> list[dict]:
"""
Produce "Solve using CoT" prompt from
https://github.com/deepseek-ai/DeepSeek-Math/blob/main/README.md
:param problem_statement: problem text
:param image: base64 encoded image if present
:return: Prompt formatted for the OpenAI API.
"""
deepseek_prompt = (
"Please reason step by step, and put your final answer within \\boxed{}"
)
return [
{
"role": "user",
"content": _get_openai_content_with_image(
problem_statement + "\n" + deepseek_prompt, image
),
},
]
def judge_cot_prompt(
problem_statement: str,
golden_answer: str,
generated_answer: str,
) -> list[dict]:
"""
Produce "Judge with CoT" Prompt
:param problem_statement: The text of the problem to be graded.
:param golden_answer: The correct reference answer for the problem.
:param generated_answer: The student's answer to be evaluated.
:return: Prompt formatted for the OpenAI API.
"""
prompt = """You'll be provided with a math problem, a correct answer for it and a solution for evaluation.
You have to answer whether the solution is correct or not.
---
PROBLEM STATEMENT:
{}
CORRECT ANSWER:
{}
SOLUTION TO EVALUATE:
{}
---
Now please compare the answer obtained in the solution with the provided correct answer to evaluate whether the solution is correct or not.
Think step-by-step, following these steps, don't skip any:
1. Extract the answer from the provided solution
2. Make any derivations or transformations that may be necessary to compare the provided correct answer with the extracted answer
3. Perform the comparison
4. Conclude with your final verdict — put either "Yes" or "No" on a separate line"""
return [
{
"role": "user",
"content": prompt.format(
problem_statement, golden_answer, generated_answer
),
}
]
def judge_extract_prompt(
generated_judgment: str,
) -> list[dict]:
"""
Produce "Extract judgment" prompt
:param generated_judgment: The text of the judgment to be extracted.
:return: Prompt formatted for the OpenAI API.
"""
prompt = """You'll be given a result of an evaluation of some mathematical solution by a professional evaluator.
You need to extract the final verdict of this evaluation in simple terms: is the solution graded as correct or not.
Output only a single label — "Yes", "No" or "Inconclusive" — according to the provided evaluation ("Yes" if the solution is graded as correct, "No" if the solution is graded as incorrect, "Inconclusive" if the evaluation is incomplete or the final verdict is not settled upon).
Only output "Inconclusive" for incomplete or unsettled evaluations. If the evaluation does contain a single final verdict like "Yes", "Correct", "True", "No", "Incorrect", "False" and so on, even if it is supplied with some additional disclaimers and remarks, output a "Yes" or "No" label accordingly.
Here goes your input:
```
{}
```
Now please output exactly either "Yes", "No" or "Inconclusive"."""
return [{"role": "user", "content": prompt.format(generated_judgment)}]