-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathchatbot.py
137 lines (109 loc) · 4.75 KB
/
chatbot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# _____TF-IDF libraries_____
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
# _____helper Libraries_____
import pickle
import csv
import json
import timeit
import random
#import os
def talk_to_cb_primary(test_set_sentence, minimum_score , json_file_path , tfidf_vectorizer_pikle_path ,tfidf_matrix_train_pikle_path):
# json_file_path = "data/convertcsv.json"
# tfidf_vectorizer_pikle_path = "data/tfidf_vectorizer.pickle"
# tfidf_matrix_train_pikle_path ="data/tfidf_matrix_train.pickle"
test_set = (test_set_sentence, "")
try:
##--------------to use------------------#
f = open(tfidf_vectorizer_pikle_path, 'rb')
tfidf_vectorizer = pickle.load(f)
f.close()
f = open(tfidf_matrix_train_pikle_path, 'rb')
tfidf_matrix_train = pickle.load(f)
f.close()
# ----------------------------------------#
except:
# ---------------to train------------------#
tfidf_vectorizer , tfidf_matrix_train = train_chat(json_file_path , tfidf_vectorizer_pikle_path , tfidf_matrix_train_pikle_path)
# -----------------------------------------#
tfidf_matrix_test = tfidf_vectorizer.transform(test_set)
cosine = cosine_similarity(tfidf_matrix_test, tfidf_matrix_train)
cosine = np.delete(cosine, 0)
max = cosine.max()
response_index = 0
if (max > minimum_score):
new_max = max - 0.01
list = np.where(cosine > new_max)
# print ("number of responses with 0.01 from max = " + str(list[0].size))
response_index = random.choice(list[0])
else :
return "live_chat" , 0
# else:
#print ("not sure")
#print ("max is = " + str(max))
#response_index = np.where(cosine == max)[0][0] + 2 # no offset at all +3
j = 0
with open(json_file_path, "r") as sentences_file:
reader = json.load(sentences_file)
for row in reader:
j += 1 # we begin with 1 not 0 & j is initialized by 0
if j == response_index:
#if delimeter in row[1]:
# # get newest suggestion
# answer_row = row[1].split(delimeter)
# row[1] = answer_row[1]
#else: # add new suggestion
# note = "just return old original suggestion"
return row["response"], max
break
#def previous_chats(query):
# minimum_score = 0.7
# file = "data/previous_chats.json"
# tfidf_vectorizer_pikle_path = "data/previous_tfidf_vectorizer.pickle"
# tfidf_matrix_train_path = "data/previous_tfidf_matrix_train.pickle"
# query_response, score = talk_to_cb_primary(query , minimum_score , file , tfidf_vectorizer_pikle_path , tfidf_matrix_train_path)
# return query_response , score
def train_chat(json_file_path, tfidf_vectorizer_pikle_path , tfidf_matrix_train_pikle_path):
i = 0
sentences = []
# enter your test sentence
# 3ashan yzabt el indexes
sentences.append(" No you.")
sentences.append(" No you.")
start = timeit.default_timer()
# enter jabberwakky sentence
with open(json_file_path, "r") as sentences_file:
reader = json.load(sentences_file)
# reader.next()
# reader.next()
for row in reader:
# if i==stop_at_sentence:
# break
sentences.append(row["message"])
i += 1
tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix_train = tfidf_vectorizer.fit_transform(sentences) # finds the tfidf score with normalization
# tfidf_matrix_test =tfidf_vectorizer.transform(test_set)
stop = timeit.default_timer()
print ("training time took was : ")
print (stop - start)
f = open(tfidf_vectorizer_pikle_path, 'wb')
pickle.dump(tfidf_vectorizer, f)
f.close()
f = open(tfidf_matrix_train_pikle_path, 'wb')
pickle.dump(tfidf_matrix_train, f)
f.close()
return tfidf_vectorizer , tfidf_matrix_train
# -----------------------------------------#
def previous_chats(query):
minimum_score = 0.7
file = "D:/Projects/Bot/data/previous_chats.json"
tfidf_vectorizer_pikle_path = "D:/Projects/Bot/data/previous_tfidf_vectorizer.pickle"
tfidf_matrix_train_path = "D:/Projects/Bot/data/previous_tfidf_matrix_train.pickle"
query_response, score = talk_to_cb_primary(query , minimum_score , file , tfidf_vectorizer_pikle_path , tfidf_matrix_train_path)
return query_response
while 1:
sent = input("ishika : ")
print(previous_chats(sent))