-
Notifications
You must be signed in to change notification settings - Fork 0
/
sims.py
28 lines (20 loc) · 881 Bytes
/
sims.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#step3
import pickle
from gensim import corpora, models, similarities
dictionary = corpora.Dictionary.load('question.dict')
corpus = corpora.MmCorpus('question.mm')
tfidf = models.TfidfModel(corpus)
corpus_tfidf = tfidf[corpus]
query = "what is a linked list"
query_bow = dictionary.doc2bow(query.lower().split())
query_tfidf = tfidf[query_bow]
lsi = models.LsiModel.load('model.lsi')
query_model = lsi[query_tfidf]
index = similarities.MatrixSimilarity.load('questions.index')
sims = index[query_model]
sims = sorted(enumerate(sims), key=lambda item: -item[1])
with open('names', 'r') as doc:
documents = pickle.load(doc)
for sim in sims[:5]:
print("\nSimilarity Score: " + str(sim[1]) + ', Id: ' + str(sim[0]) + '\nQuestion: ' + documents[sim[0]].encode('utf-8'))
print("------------------------------------------------------------------------------------")