-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathemo_utils.py
85 lines (66 loc) · 2.25 KB
/
emo_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import csv
import numpy as np
import emoji
import pandas as pd
def read_glove_vecs(glove_file):
with open(glove_file,'r', encoding='cp437') as f:
words = set()
word_to_vec_map = {}
for line in f:
line = line.strip().split()
curr_word = line[0]
words.add(curr_word)
word_to_vec_map[curr_word] = np.array(line[1:], dtype=np.float64)
i = 1
words_to_index = {}
index_to_words = {}
for w in sorted(words):
words_to_index[w] = i
index_to_words[i] = w
i = i + 1
return words_to_index, index_to_words, word_to_vec_map
def softmax(x):
"""Compute softmax values for each sets of scores in x."""
e_x = np.exp(x - np.max(x))
return e_x / e_x.sum()
def read_csv(filename = 'data/emojify_data.csv'):
phrase = []
emoji = []
with open (filename) as csvDataFile:
csvReader = csv.reader(csvDataFile)
for row in csvReader:
phrase.append(row[0])
emoji.append(row[1])
X = np.asarray(phrase)
Y = np.asarray(emoji, dtype=int)
return X, Y
def convert_to_one_hot(Y, C):
Y = np.eye(C)[Y.reshape(-1)]
return Y
emoji_dictionary = {"0": "\u2764\uFE0F", # :heart: prints a black instead of red heart depending on the font
"1": ":baseball:",
"2": ":smile:",
"3": ":disappointed:",
"4": ":fork_and_knife:"}
def label_to_emoji(label):
"""
Converts a label (int or string) into the corresponding emoji code (string) ready to be printed
"""
return emoji.emojize(emoji_dictionary[str(label)], use_aliases=True)
def print_predictions(X, pred):
print()
for i in range(X.shape[0]):
print(X[i], label_to_emoji(int(pred[i])))
def get_emoji(X, W, b, word_to_vec_map):
m = X.shape[0]
pred = np.zeros((m, 1))
for j in range(m):
words = X[j].lower().split()
avg = np.zeros((50,))
for w in words:
avg += word_to_vec_map[w]
avg = avg/len(words)
Z = np.dot(W, avg) + b
A = softmax(Z)
pred[j] = np.argmax(A)
print_predictions(X,pred)