-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
129 lines (112 loc) · 6.99 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import json
import random
from pathlib import Path
import gradio as gr
import pandas as pd
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, pipeline
from logic import generate_prompt
from resource_handler import get_all_chunks, read_vector_index
test_questions = pd.read_parquet("hf://datasets/rag-datasets/rag-mini-wikipedia/data/test.parquet/part.0.parquet")
with Path(__file__).parents[0].joinpath("config.json").open("r") as json_file:
configurations = json.load(json_file)
all_chunks = get_all_chunks(configurations["chunk_file_path"])
vector_index = read_vector_index(configurations["faiss_index_file_path"])
encoder = SentenceTransformer(configurations["sentence_encoder_model"])
tokenizer = AutoTokenizer.from_pretrained(configurations["response_generator_model"])
pipe = pipeline("text-generation", model=configurations["response_generator_model"], device_map="auto",
num_return_sequences=1, do_sample=True, top_k=5,
max_new_tokens=50, return_full_text=False, stream=True)
def get_random_question() -> str:
"""
Return a random question from the rag-mini-wikipedia test dataset
:return: a random question
"""
return random.choice(test_questions['question'].tolist())
def submit_question(user_question: str):
"""
Generate a prompt based on the given question
:param user_question: The input question
:return: The generated prompt
"""
prompt = generate_prompt(user_question, encoder, vector_index, all_chunks, tokenizer)
return prompt
def generate_response(prompt: str):
"""
Generate a response from the LLM and yield it word by word
:param prompt: The input prompt
:yield: Words from the generated response
"""
response = pipe(prompt)
response_text = ""
for token in response['generated_text']:
response_text += token
yield response_text
with gr.Blocks(theme="gradio/monochrome", title="RAG Project Application") as rag_app:
gr.Markdown("# 🚀 RAG Project Application")
gr.Markdown("## 📚 Introduction")
gr.Markdown("RAG, or **R**etrieval **A**ugmented **G**eneration, is an innovative approach that enhances "
"the capabilities of large language model (LLM) applications. While LLMs are powerful, they're "
"expensive to train due to the massive datasets required. RAG addresses a key limitation of LLMs: "
"the lack of access to specific or up-to-date information not present in their training data. "
"This makes RAG ideal for applications like smart Q&A chatbots within your corporate wiki "
"or a Confluence-like platform. 💡")
gr.Markdown("## 📊 The Project Data")
gr.Markdown("The `rag-mini-wikipedia` dataset from Hugging Face is a compact question-answering dataset "
"tailored for testing and developing RAG models. Focused on Wikipedia articles, "
"it's perfect for tasks involving factual information retrieval within that domain. The dataset "
"comprises passages from Wikipedia along with corresponding questions and answers, all in English. 🌐")
gr.Markdown("## 🏗️ The Solution Architecture")
gr.Markdown("In RAG, documents are split into passages or chunks and encoded into vectors. For this project, "
"we've chosen the `all-MiniLM-L6-v2` sentence encoder from Hugging Face. This model converts "
"sentences and short paragraphs into 384-dimensional vectors, capturing their semantic meaning. 🧠<br/>"
"While our dataset is already structured in passages, we've included the chunking process in the "
"research phase to demonstrate how it's typically done.")
gr.Markdown("These vectors are then stored in a vector database. We've opted for Meta's `FAISS` (Facebook AI "
"Similarity Search) for this project. FAISS is a library that excels at efficient similarity "
"searches and clustering of dense vectors, making it ideal for our needs. 🔍")
gr.Markdown("When a user query is received (in this case, a randomly chosen question from the rag-mini-wikipedia "
"test dataset), a prompt is generated. The prompt consists of two parts:<br/>"
"* **System**: This sets the initial context and behavior for the AI assistant. It's not visible "
"to the user but guides the AI's responses.<br/>"
"* **User**: This represents the person interacting with the AI, asking questions or making"
" requests.<br/>"
"The user prompt is enriched with context extracted from the FAISS index, helping the LLM provide "
"more accurate answers. You can view the generated prompt in the code textbox below the chosen "
"question. 💬<br/>"
"Note: In this project, we're dealing with single questions rather than conversations, so the "
"system prompt isn't strictly necessary.")
gr.Markdown("The final prompt is then passed to the LLM (Language Model) or Response Generator Model. We've "
"chosen the `meta-llama/Llama-2-7b-chat-hf` model from Hugging Face for this project. It's part of "
"Meta's Llama 2 series and is fine-tuned with 7 billion parameters for dialogue applications. "
"Trained on a vast, publicly sourced dataset, this model excels in various domains. 🦙")
gr.Markdown("This application was created using Gradio 4.38.1. The code was developed with the assistance "
"of Claude 3.5 Sonnet. For more details on the chunking process, prompt development, conclusions, "
"and future steps, check out the research notebook in the 'Research' folder of this project "
"repository. 📁")
with gr.Row():
with gr.Column(scale=7):
question = gr.Textbox(label="Random Question", value=get_random_question(), interactive=False)
with gr.Column(scale=3):
with gr.Row():
new_question_btn = gr.Button("New Question")
submit_btn = gr.Button("Submit")
gr.Markdown("Given the question, here is the tailored prompt for the LLM:")
generated_text = gr.Code(label="Generated Text", language="markdown", interactive=False, visible=True,
show_label=True)
gr.Markdown("LLM Response:")
llm_response = gr.Textbox(label="LLM Response", interactive=False, visible=True, show_label=False)
new_question_btn.click(fn=get_random_question, outputs=question)
submit_btn.click(
fn=submit_question,
inputs=question,
outputs=generated_text,
show_progress="minimal"
).then(
fn=generate_response,
inputs=generated_text,
outputs=llm_response,
show_progress="minimal"
)
if __name__ == "__main__":
rag_app.launch()