-
Notifications
You must be signed in to change notification settings - Fork 7
/
prompts.py
112 lines (99 loc) · 5.65 KB
/
prompts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import re
from models import chat_with_model
def gen_answer(question: str, previous_answers: list, model_name: str, cot=False) -> str:
base_prompt = (
"Answer the following question:.\n"
"<question>" + question + "</question>\n"
"Provide your answer in <answer></answer> XML tags.\n"
)
if cot:
base_prompt += (
"Let's approach this methodically:\n\n"
"1. First, carefully review any previous answers to avoid repetition:\n"
"- Analyze the core concepts and themes already covered\n"
"- Identify unexplored angles and perspectives\n\n"
"2. Brainstorm fresh approaches:\n"
"- Generate multiple unique possibilities\n"
"- Consider unconventional but valid perspectives\n"
"- Look for interesting connections or insights\n\n"
"3. Develop your chosen idea:\n"
"- Reason through the logic step-by-step\n"
"- Validate your reasoning\n\n"
"Show your complete thinking process in <thoughts></thoughts> XML tags.\n"
"When ready, provide your final response in <answer></answer> XML tags.\n"
)
base_prompt += "Your response should be one direct answer. Only provide one answer. DO NOT list multiple answers. Please try to be concise.\n"
if previous_answers:
previous_answers_str = "\n\n".join(
[f"<previous_answer id='{i+1}'>\n{answer}\n</previous_answer>" for i, answer in enumerate(previous_answers)]
)
base_prompt += (
"IMPORTANT: Provide an answer you *HAVE NOT* given previously.\n"
"Your previous answers are inside of <previous_answers></previous_answers> XML tags.\n"
"<previous_answers>\n" + previous_answers_str + "\n</previous_answers>"
)
response = chat_with_model(
base_prompt, model=model_name, temperature=0.7)
return _extract_xml_content(response, "answer")
def judge_answer(question: str, answer: str, model_name: str) -> int:
prompt = (
"Your task is to evaluate the coherence and plausibility of an answer to a given question.\n\n"
"Question: <question>" + question + "</question>\n"
"Answer: <answer>" + answer + "</answer>\n\n"
"Based on the following criteria, provide a Coherence and Plausibility Score on a scale of 0 - 100:\n\n"
"0-20: INCOHERENT/NONSENSICAL\n"
"- Answer is completely unrelated to the question\n"
"- Contains logical impossibilities or contradictions\n"
"- Makes claims that defy basic reality\n"
"- Shows no understanding of the question's context\n\n"
"21-40: SEVERELY FLAWED\n"
"- Major logical gaps or inconsistencies\n"
"- Significant misunderstanding of core concepts\n"
"- Contains partially relevant information but mostly incorrect\n"
"- May include some true statements but fails to form a coherent response\n\n"
"41-60: PARTIALLY COHERENT\n"
"- Shows basic understanding of the question\n"
"- Contains some valid points mixed with errors\n"
"- Logic is followable but may have weak connections\n"
"- Answer is relevant but may miss key aspects\n\n"
"61-80: MOSTLY COHERENT\n"
"- Demonstrates clear understanding of the question\n"
"- Logic is sound with minor gaps or inconsistencies\n"
"- Most claims are plausible and well-supported\n"
"- Forms a generally complete and relevant response\n\n"
"81-100: HIGHLY COHERENT\n"
"- Perfectly addresses the question\n"
"- Demonstrates complete logical consistency\n"
"- All claims are plausible and well-grounded\n"
"- Forms a comprehensive and precise response\n\n"
"IMPORTANT: Provide your final Coherence and Plausibility Score as a single integer between 0 and 100, "
"enclosed in <coherence_score></coherence_score> XML tags. For example:\n"
"<coherence_score>75</coherence_score>\n\n"
"Do not include any additional text in your response."
)
response = chat_with_model(prompt, model="o1-mini")
return int(_extract_xml_content(response, "coherence_score"))
def judge_similarity(question: str, answer1: str, answer2: str, model_name: str) -> float:
prompt = (
"Your task is to evaluate how semantically similar two answers are to the same question, "
"focusing on core concepts and meaning rather than exact wording.\n\n"
"Original Question: <question>" + question + "</question>\n"
"First Answer: <answer1>" + answer1 + "</answer1>\n"
"Second Answer: <answer2>" + answer2 + "</answer2>\n\n"
"Based on the following criteria, provide a Similarity Score from 0 to 100:\n\n"
"0-20: Completely different answers with no meaningful overlap\n"
"21-40: Minimal similarity with few shared concepts\n"
"41-60: Moderate similarity with some shared core ideas\n"
"61-80: Substantial similarity with minor variations\n"
"81-100: Nearly identical in meaning (may use different words)\n\n"
"IMPORTANT: Provide your final Similarity Score as an integer between 0 and 100, "
"enclosed in <similarity_score></similarity_score> XML tags. For example:\n"
"<similarity_score>75</similarity_score>\n\n"
"Do not include any additional text in your response."
)
response = chat_with_model(prompt, model="o1-mini")
return int(_extract_xml_content(response, "similarity_score")) / 100
def _extract_xml_content(text: str, tag: str) -> str:
pattern = f"<{tag}>(.*?)</{tag}>"
match = re.search(pattern, text, re.DOTALL)
return match.group(1).strip() if match else text