Skip to content

Commit

Permalink
vector DB
Browse files Browse the repository at this point in the history
  • Loading branch information
HemanthSai7 committed Dec 18, 2023
1 parent 03ccb0f commit 5588cb0
Show file tree
Hide file tree
Showing 10 changed files with 56 additions and 16 deletions.
Binary file removed StudybotAPI/backend/data/History_1.pdf
Binary file not shown.
15 changes: 5 additions & 10 deletions StudybotAPI/backend/ingestion/embeddings.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from langchain.vectorstores import Qdrant
# from langchain.vectorstores import Qdrant
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter

Expand Down Expand Up @@ -32,17 +33,11 @@ def store_embeddings(self, docs):

texts = self.split_docs(docs)

vector_store = Qdrant.from_documents(
texts,
embeddings,
# path=self.cfg.VECTOR_DB,
location=":memory:",
# host="localhost",
# prefer_grpc=True,
collection_name=self.cfg.VECTOR_DB,
vector_store = DocArrayInMemorySearch.from_documents(
texts, embeddings
)

print(f"Vector store created at {self.cfg.VECTOR_DB}")
print(f"Vector store created.")

return vector_store

Expand Down
1 change: 1 addition & 0 deletions StudybotAPI/backend/ingestion/streamer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

1 change: 1 addition & 0 deletions StudybotAPI/backend/retriever/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,5 @@ def ops_inference(response_result: FrontendResponseModel, question: str):
except Exception as e:
response_result["status"] = "error"
response_result["message"].append(str(e))
print(response_result)
raise ModelDeploymentException(response_result)
3 changes: 2 additions & 1 deletion StudybotAPI/backend/utils/chain_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ async def llm_chain_loader(DATA_PATH: str):
qa_chain = ConversationalRetrievalChain.from_llm(
llm=llm,
chain_type="stuff",
retriever=db.as_retriever(search_type="similarity", search_kwargs={"k": 2}),
retriever=db.as_retriever(
search_type="mmr", search_kwargs={"k": 2, "fetch_k": 4}),
# return_source_documents=True,
# chain_type_kwargs={"prompt": prompt},
condense_question_prompt=prompt,
Expand Down
5 changes: 3 additions & 2 deletions StudybotAPI/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@ langchain==0.0.346
pydantic==1.10.2
pypdf
python-box
qdrant-client
# qdrant-client
torch
transformers
sentence_transformers
clarifai
Pillow
tiktoken
python-multipart
urllib3==1.25.11
# urllib3==1.25.11
docarray
Binary file added data/History_1.pdf
Binary file not shown.
4 changes: 2 additions & 2 deletions frontend/layouts/mainlayout.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

def mainlayout(func: Callable):
def wrapper():
with open("frontend/layouts/st_page_layouts.json", "r", encoding="utf-8") as f:
with open("layouts/st_page_layouts.json", "r", encoding="utf-8") as f:
st_page_layouts = json.load(f)

st.set_page_config(
Expand All @@ -19,7 +19,7 @@ def wrapper():
else "home"
]
)
add_logo("frontend/images/studybotlogo.svg", svg=True)
add_logo("images/studybotlogo.svg", svg=True)
st.markdown("# Studybot 📚")
user_greetings()
authors()
Expand Down
41 changes: 41 additions & 0 deletions frontend/pages/file_streaming.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import os

import streamlit as st

from langchain.callbacks.base import BaseCallbackHandler


class StreamHandler(BaseCallbackHandler):
def __init__(
self, container: st.delta_generator.DeltaGenerator, initial_text: str = ""
):
self.container = container
self.text = initial_text
self.run_id_ignore_token = None

def on_llm_start(self, serialized: dict, prompts: list, **kwargs):
# Workaround to prevent showing the rephrased question as output
if prompts[0].startswith("Human"):
self.run_id_ignore_token = kwargs.get("run_id")

def on_llm_new_token(self, token: str, **kwargs) -> None:
if self.run_id_ignore_token == kwargs.get("run_id", False):
return
self.text += token
self.container.markdown(self.text)


class PrintRetrievalHandler(BaseCallbackHandler):
def __init__(self, container):
self.status = container.status("**Context Retrieval**")

def on_retriever_start(self, serialized: dict, query: str, **kwargs):
self.status.write(f"**Question:** {query}")
self.status.update(label=f"**Context Retrieval:** {query}")

def on_retriever_end(self, documents, **kwargs):
for idx, doc in enumerate(documents):
source = os.path.basename(doc.metadata["source"])
self.status.write(f"**Document {idx} from {source}**")
self.status.markdown(doc.page_content)
self.status.update(state="complete")
2 changes: 1 addition & 1 deletion frontend/🏡_Home.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def home():
"<h2 style='text-align: center; color: black;'>Studybot Architecture</h1>",
unsafe_allow_html=True,
)
st.image("frontend/images/architecture.png")
st.image("images/architecture.png")


home()

0 comments on commit 5588cb0

Please sign in to comment.