-
Notifications
You must be signed in to change notification settings - Fork 0
/
mastea_app.py
201 lines (169 loc) · 7.73 KB
/
mastea_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
import torch
import transformers
import streamlit as st
import pandas as pd
import numpy as np
from io import StringIO
import anthropic
df_aq = pd.read_excel("all_QA.xlsx")
df_aq = df_aq.apply(lambda x: x.replace('\n', ''), axis=0)
list_of_topics = sorted(df_aq['TOPIC'].unique())
# Page configuration
st.set_page_config(page_title="MaSTeA for MaScQA", page_icon=":microscope:", layout="wide")
# Seed
torch.random.manual_seed(0)
# Set up the title of the application
st.title('MaSTeA for MaScQA :microscope: :female-scientist:')
subtitle = r'''
$\textsf{
\large \textbf{Ma}terials \textbf{S}cience \textbf{Te}aching \textbf{A}ssistant
}$
'''
st.write(f"{subtitle} ")
col1, col2 = st.columns((1,3))
col1.header("Settings")
question = ''
# Set up the title for second column
col2.header('Select your topic to study')
# Topic
topic = col2.selectbox(
"Topics",
options=sorted(df_aq['TOPIC'].unique()),
index=0, # Default value
help="Select the material science topic"
)
question_list_matching = df_aq.loc[df_aq['TOPIC'] == topic]['QUESTION'].to_list()
question_type_matching = df_aq.loc[df_aq['TOPIC'] == topic]['Question Type'].to_list()
question_type_formated = []
for i, val in enumerate(question_type_matching):
question_type_formated.append(f"{val}-{i+1}")
question_type_formated.insert(0, "Enter your own question")
question_selected = col2.selectbox(
"List of question",
options=question_type_formated,
index=0, # Default value
help="Select the material science question and get the answer with steps"
)
col2.caption('MCQS - Multiple choice questions, MCQS-NUMS - Numerical MCQS, MATCH - Matching questions, NUM - Numerical questions')
if question_selected == "Enter your own question":
question_num = 0
# Create a button that when clicked will output the LLMs generated output
if question_selected != "Enter your own question":
# Select model
model_option = col1.selectbox(
"Select a model",
("ChatGPT", "GPT4", "Llama3-8b", "Phi3-mini"),
index=0,
placeholder="Select model",
)
if model_option == 'ChatGPT':
model_answers = df_aq.loc[df_aq['TOPIC'] == topic]['detailed_chatgpt'].to_list()
if model_option == 'GPT4':
model_answers = df_aq.loc[df_aq['TOPIC'] == topic]['detailed_gpt4'].to_list()
if model_option == 'Llama3-8b':
model_answers = df_aq.loc[df_aq['TOPIC'] == topic]['detailed_llama3_8b'].to_list()
if model_option == 'Phi3-mini':
model_answers = df_aq.loc[df_aq['TOPIC'] == topic]['detailed_phi3_mini'].to_list()
answer_type_matching = df_aq.loc[df_aq['TOPIC'] == topic]['Correct Answer'].to_list()
question_num = int(question_selected.split("-")[-1])
# Format text
col2.write_stream(question_list_matching[question_num - 1].removeprefix('[').removesuffix(']').replace("'"," ").replace(","," ").split("\\n"))
if col2.button('Get answer'):
# Format text
col2.write_stream(model_answers[question_num - 1].removeprefix('[').removesuffix(']').replace("'"," ").replace(","," ").split("\\n"))
col2.markdown(f'Correct answer should be {answer_type_matching[question_num - 1]}')
# Read input
if question_selected == "Enter your own question":
#
system_prompt = "Solve the following question with highly detailed step by step explanation. Write the correct answer inside a dictionary at the end in the following format. The key 'answer' has a list which can be filled by all correct options or by a number as required while answering the question. For example for question with correct answer as option (a), return {'answer':[a]} at the end of solution. For question with multiple options'a,c' as answers, return {'answer':[a,c]}. And for question with numerical values as answer (say 1.33), return {'answer':[1.33]}"
# Select token length
token_length = col1.select_slider(
"Token length",
options=[256, 512, 1024, 2048],
value=256, # Default value
)
model_option = col1.selectbox("Select a model",
("Phi-3-mini-4k-instruct",
"claude-3-haiku-20240307",
"claude-3-sonnet-20240229",
"claude-3-opus-20240229",
"claude-2.1",
"claude-2.0",
"claude-instant-1.2",
),
index=0,
placeholder="Select model")
option = col2.radio("Input method",
("Upload text file", "Enter text"),
index=1,
help="Choose whether to upload a text file containing your question or to enter the text manually.")
if model_option != "Phi-3-mini-4k-instruct":
text_input_container = col2.empty()
api = text_input_container.text_input("Enter api key")
if api != "":
text_input_container.empty()
api_hide = "\*"*len(api)
col2.info(api_hide)
if option == "Upload text file":
uploaded_file = col1.file_uploader("Add text file containing question", type=["txt"])
if uploaded_file:
stringio = StringIO(uploaded_file.getvalue().decode("utf-8"))
string_data = stringio.read()
col1.text_area("Uploaded Question:", value=string_data, height=150)
question = string_data
else:
col1.warning("No file uploaded!")
elif option == "Enter text":
question = col2.text_area("Enter your question here:")
if model_option == "Phi-3-mini-4k-instruct":
# Load model and tokenizer
model = transformers.AutoModelForCausalLM.from_pretrained(
f"microsoft/{model_option}",
device_map="cpu",
torch_dtype="auto",
trust_remote_code=True,)
tokenizer = transformers.AutoTokenizer.from_pretrained(f"microsoft/{model_option}")
# Prepare the pipeline
pipe = transformers.pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
)
# Set generation arguments
generation_args = {
"max_new_tokens": token_length,
"return_full_text": True,
"do_sample": True,
"top_p": 0.9,
}
messages = [
{"role": "user", "content": system_prompt + question},
]
# Create a button that when clicked will output the LLMs generated output
# if question_type_matching[question_num] == "Enter your own question":
if col2.button('Generate output'):
output = pipe(messages, **generation_args)
model_output = output[0]['generated_text'][1]['content']
# Display a generated output message below the button
col2.write(f'{model_output}')
else:
if not api:
col2.warning("This model requires API key")
elif len(question) != 0:
client = anthropic.Anthropic(api_key = api)
message = client.messages.create(
model=model_option,
max_tokens=token_length,
temperature=0,
top_p=0.9,
system = system_prompt,
messages=[{"role": "user", "content": [
{"type": "text",
"text": question
}
]}])
# Create a button that when clicked will output the LLMs generated output
if col2.button('Generate output'):
model_output = message.content
# Display a generated output message below the button
col2.write(f'{model_output[0].text}')