-
Notifications
You must be signed in to change notification settings - Fork 23
/
Home.py
131 lines (109 loc) · 6.79 KB
/
Home.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import os
from dotenv import load_dotenv
import streamlit as st
import pandas as pd
# Load environment variables from .env file
load_dotenv()
# IMPORTANT: Remember to create a .env variable containing: OPENAI_API_KEY=sk-xyz where xyz is your key
# Access the API key from the environment variable
os.environ['OPENAI_API_KEY'] = os.environ.get("OPENAI_API_KEY")
os.environ['COHERE_API_KEY'] = os.environ.get("COHERE_API_KEY")
os.environ['ANTHROPIC_API_KEY'] = os.environ.get("ANTHROPIC_API_KEY")
from st_pages import Page, show_pages, add_page_title
st.sidebar.header("RAGTune")
# # Optional -- adds the title and icon to the current page
add_page_title()
# Specify what pages should be shown in the sidebar, and what their titles and icons
show_pages(
[
Page("Home.py", "Upload Document and Assign Dataset"),
Page("pages/1_LLM.py", "Evaluate LLM Models"),
Page("pages/2_embeddings.py", "Evaluate Embeddings"),
Page("pages/3_query_tranformations.py", "Evaluate Query Transformations"),
Page("pages/4_rerankers.py", "Evaluate Rerankers"),
# Page("pages/5_prompt_optimizer.py", "Prompt Optimization using DSPy"), coming soon
]
)
# Initialize doc_path with a default value
doc_path = "docs/constitution.pdf"
# Initialize session state keys if they don't exist
if 'eval_questions' not in st.session_state:
st.session_state['eval_questions'] = []
if 'eval_answers' not in st.session_state:
st.session_state['eval_answers'] = []
# Check if the user wants to use the default document or upload their own
st.header('Document Selection')
document_option = st.radio("Choose your document source", ('Upload a file', 'Use default test document'))
if document_option == 'Upload a file':
st.session_state['eval_questions'] = [""]
st.session_state['eval_answers'] = [""]
# Allow multiple files to be uploaded including pdf, csv, doc, docx, ppt, pptx
uploaded_files = st.file_uploader("Choose files", type=['pdf', 'csv', 'docx', 'pptx'], accept_multiple_files=True)
if uploaded_files:
# Ensure the 'uploaded_docs' directory exists before saving the files
upload_dir = "uploaded_docs"
if not os.path.exists(upload_dir):
os.makedirs(upload_dir)
# Save the uploaded files and collect their paths
for uploaded_file in uploaded_files:
with open(os.path.join(upload_dir, uploaded_file.name), "wb") as f:
f.write(uploaded_file.getbuffer())
# Update session state with the directory name of uploaded documents
st.session_state['doc_path'] = upload_dir
# User input for eval_questions and eval_answers
st.subheader('Provide Evaluation Questions and Answers')
data = {
'Questions': st.session_state['eval_questions'],
'Ground Truth': st.session_state['eval_answers']
}
qa_df = pd.DataFrame(data)
edited_qa_df = st.data_editor(data, num_rows="dynamic", use_container_width=True, hide_index=True)
eval_questions_list = edited_qa_df['Questions']
eval_answers_list = edited_qa_df['Ground Truth']
if st.button("Save eval Q&As"):
# Check if the number of questions matches the number of answers
st.session_state['eval_questions'] = eval_questions_list
st.session_state['eval_answers'] = eval_answers_list
st.success("Evaluation questions and answers saved successfully!")
else:
# Use the default document
doc_path = "docs"
st.write("Using the default document: Constitution.pdf")
# Default eval_questions and eval_answers
eval_questions = [
"what is article I of the constitution of the US about?",
"How many sections does ARTICLE. IV have?",
"Who is elegible to be the President of the US?",
"What majority is needed to amend the constitution",
"How many states are sufficient for ratification of the constitution?",
]
eval_answers = [
"Article I of the United States Constitution establishes the legislative branch of the federal government, known as the United States Congress. It outlines that all legislative powers are vested in Congress, which is divided into two parts: the House of Representatives and the Senate. The bicameral Congress was created as a compromise between large and small states, with representation based on population and equal representation for states. Article I grants Congress enumerated powers and the authority to pass laws necessary for carrying out those powers. It also sets out procedures for passing bills and imposes limits on Congress's authority. Additionally, Article I's Vesting Clause ensures that all federal legislative power belongs to Congress, emphasizing the separation of powers among the three branches of government",
"4 Sections",
"No Person except a natural born Citizen, or a Citizen of the United States, at the time of the Adoption of this Constitution, shall be eligible to the Office of President; neither shall any Per- son be eligible to that Office who shall not have attained to the Age of thirty five Years, and been fourteen Years a Resident within the United States.",
"The Congress, whenever two thirds of both Houses shall deem it necessary, shall propose Amendments to this Constitution, or, on the Ap- plication of the Legislatures of two thirds of the several States, shall call a Convention for pro- posing Amendments",
"The Ratification of the Conventions of nine States, shall be sufficient for the Establishment of this Constitution between the States so rati- fying the Same.",
]
# Assign the default questions and answers to the state
st.session_state['eval_questions'] = eval_questions
st.session_state['eval_answers'] = eval_answers
st.session_state['doc_path'] = doc_path
# Display eval questions and answers if available
if st.session_state.get('eval_questions') and st.session_state.get('eval_answers'):
st.subheader('Saved Evaluation Questions and Answers')
# Convert eval_questions and eval_answers to a DataFrame and display it
eval_qa_df = pd.DataFrame({
'Questions': st.session_state['eval_questions'],
'Ground Truth': st.session_state['eval_answers']
})
st.dataframe(eval_qa_df, use_container_width=True , hide_index=True)
if len(eval_qa_df["Questions"]) >= 4:
st.subheader('Proceed to one of the tabs on the left to perform Evaluations')
st.page_link("pages/1_LLM.py", label="LLM")
st.page_link("pages/2_embeddings.py", label="Embeddings")
st.page_link("pages/3_query_tranformations.py", label="Query Tranformations")
st.page_link("pages/4_rerankers.py", label="Rerankers")
else:
st.warning('Please add at least 4 rows of data for evaluation')
else:
st.header('No evaluation questions and answers provided.')