-
Notifications
You must be signed in to change notification settings - Fork 67
/
Copy pathemotion_predictor.py
115 lines (98 loc) · 4.32 KB
/
emotion_predictor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import html
import pickle
import re
import pandas as pd
from keras import backend as K
from keras.models import load_model
from keras.preprocessing import sequence
class EmotionPredictor:
def __init__(self, classification, setting, use_unison_model=True):
"""
Args:
classification (str): Either 'ekman', 'plutchik', 'poms'
or 'unison'.
setting (str): Either 'mc' or 'ml'.
use_unison_model (bool): Whether to use unison model;
else use single model.
"""
if classification not in ['ekman', 'plutchik', 'poms', 'unison']:
raise ValueError('Unknown emotion classification: {}'.format(
classification))
if setting not in ['mc', 'ml']:
raise ValueError('Unknown setting: {}'.format(setting))
self.classification = classification
self.setting = setting
self.use_unison_model = use_unison_model
self.model = self._get_model()
self.embeddings_model = self._get_embeddings_model()
self.char_to_ind = self._get_char_mapping()
self.class_values = self._get_class_values()
self.max_len = self._get_max_sequence_length()
def _get_model(self):
self._loaded_model_filename = 'models/{}{}-{}.h5'.format(
'unison-' if self.use_unison_model else '',
self.classification,
self.setting,
)
return load_model(self._loaded_model_filename)
def _get_embeddings_model(self):
last_layer_output = K.function([self.model.layers[0].input,
K.learning_phase()],
[self.model.layers[-3].output])
return lambda x: last_layer_output([x, 0])[0]
@staticmethod
def _get_char_mapping():
with open('models/allowed-chars.pkl', 'rb') as f:
return pickle.load(f)
def _get_class_values(self):
if self.classification == 'ekman':
return ['Anger', 'Disgust', 'Fear', 'Joy', 'Sadness', 'Surprise']
elif self.classification == 'plutchik':
return ['Anger', 'Disgust', 'Fear', 'Joy', 'Sadness', 'Surprise',
'Trust', 'Anticipation']
elif self.classification == 'poms':
return ['Anger', 'Depression', 'Fatigue', 'Vigour', 'Tension',
'Confusion']
def _get_max_sequence_length(self):
if self.use_unison_model or self.classification == 'poms':
return 143
elif self.classification in ['ekman', 'plutchik']:
return 141
def predict_classes(self, tweets):
indices = self._tweet_to_indices(tweets)
predictions = self.model.predict(indices, verbose=False)
df = pd.DataFrame({'Tweet': tweets})
if self.setting == 'mc':
df['Emotion'] = [self.class_values[i] for i in
predictions.argmax(axis=-1)]
else:
predictions[predictions >= 0.5] = 1
predictions[predictions < 0.5] = 0
for emotion, values in zip(self.class_values, predictions.T):
df[emotion] = values
return df
def predict_probabilities(self, tweets):
indices = self._tweet_to_indices(tweets)
predictions = self.model.predict(indices, verbose=False)
df = pd.DataFrame({'Tweet': tweets})
for emotion, values in zip(self.class_values, predictions.T):
df[emotion] = values
return df
def embed(self, tweets):
indices = self._tweet_to_indices(tweets)
embeddings = self.embeddings_model(indices)
df = pd.DataFrame({'Tweet': tweets})
for index, values in enumerate(embeddings.T, start=1):
df['Dim{}'.format(index)] = values
return df
def embedd(self, tweets):
""" Here only for backwards compatibility. """
return self.embed(tweets)
def _tweet_to_indices(self, tweets):
indices = []
for t in tweets:
t = html.unescape(t) # unescape HTML
t = re.sub(r"http\S+", "", t) # remove normal URLS
t = re.sub(r"pic\.twitter\.com/\S+", "", t) # remove pic.twitter.com URLS
indices.append([self.char_to_ind[char] for char in t])
return sequence.pad_sequences(indices, maxlen=self.max_len)