-
Notifications
You must be signed in to change notification settings - Fork 56
/
app.py
74 lines (59 loc) · 2.15 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import faiss
import pickle
import pandas as pd
import streamlit as st
from sentence_transformers import SentenceTransformer
from vector_engine.utils import vector_search
@st.cache
def read_data(data="data/misinformation_papers.csv"):
"""Read the data from local."""
return pd.read_csv(data)
@st.cache(allow_output_mutation=True)
def load_bert_model(name="distilbert-base-nli-stsb-mean-tokens"):
"""Instantiate a sentence-level DistilBERT model."""
return SentenceTransformer(name)
@st.cache(allow_output_mutation=True)
def load_faiss_index(path_to_faiss="models/faiss_index.pickle"):
"""Load and deserialize the Faiss index."""
with open(path_to_faiss, "rb") as h:
data = pickle.load(h)
return faiss.deserialize_index(data)
def main():
# Load data and models
data = read_data()
model = load_bert_model()
faiss_index = load_faiss_index()
st.title("Vector-based searches with Sentence Transformers and Faiss")
# User search
user_input = st.text_area("Search box", "covid-19 misinformation and social media")
# Filters
st.sidebar.markdown("**Filters**")
filter_year = st.sidebar.slider("Publication year", 2010, 2021, (2010, 2021), 1)
filter_citations = st.sidebar.slider("Citations", 0, 250, 0)
num_results = st.sidebar.slider("Number of search results", 10, 50, 10)
# Fetch results
if user_input:
# Get paper IDs
D, I = vector_search([user_input], model, faiss_index, num_results)
# Slice data on year
frame = data[
(data.year >= filter_year[0])
& (data.year <= filter_year[1])
& (data.citations >= filter_citations)
]
# Get individual results
for id_ in I.flatten().tolist():
if id_ in set(frame.id):
f = frame[(frame.id == id_)]
else:
continue
st.write(
f"""**{f.iloc[0].original_title}**
**Citations**: {f.iloc[0].citations}
**Publication year**: {f.iloc[0].year}
**Abstract**
{f.iloc[0].abstract}
"""
)
if __name__ == "__main__":
main()