-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathutills.py
105 lines (79 loc) · 3.19 KB
/
utills.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import json
from collections import Counter
import numpy as np
from keras.preprocessing.sequence import pad_sequences
def pad_all(dict, maxlen):
"""
:param dict:
:return: 0 -> [ 0 -> first sentence
1 -> second sentence
2 -> third sentence
],
1 -> label
"""
x_1 = dict['firsts']
x_2 = dict['seconds']
x_3 = dict['thirds']
x_1 = pad_sequences(x_1, maxlen=maxlen, padding='post', truncating='post', value=0)
x_2 = pad_sequences(x_2, maxlen=maxlen, padding='post', truncating='post', value=0)
x_3 = pad_sequences(x_3, maxlen=maxlen, padding='post', truncating='post', value=0)
return np.array([[x_1, x_2, x_3], dict['labels']])
def load_cui_dataset(path, maxlen):
with open(path + "train.json") as f:
print("Loading train")
train = json.load(f)
print("Done loading train. \n Padding train")
train = pad_all(train, maxlen)
print("Done padding train")
with open(path + "test.json") as f:
print("Loading test")
test = json.load(f)
print("Done loading test. \n Padding test")
test = pad_all(test, maxlen)
print("Done padding test")
with open(path + "dev.json") as f:
print("Loading dev")
dev = json.load(f)
print("Done loading dev. \n Padding dev")
dev = pad_all(dev, maxlen)
print("Done padding dev")
return train, dev, test
from keras import backend as K
def f1(y_true, y_pred):
def recall(y_true, y_pred):
"""Recall metric.
Only computes a batch-wise average of recall.
Computes the recall, a metric for multi-label classification of
how many relevant items are selected.
"""
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
recall = true_positives / (possible_positives + K.epsilon())
return recall
def precision(y_true, y_pred):
"""Precision metric.
Only computes a batch-wise average of precision.
Computes the precision, a metric for multi-label classification of
how many selected items are relevant.
"""
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
precision = true_positives / (predicted_positives + K.epsilon())
return precision
precision = precision(y_true, y_pred)
recall = recall(y_true, y_pred)
return 2 * ((precision * recall) / (precision + recall + K.epsilon()))
def get_class_weights(y, smooth_factor=0):
"""
Returns the weights for each class based on the frequencies of the samples
:param smooth_factor: factor that smooths extremely uneven weights
:param y: list of true labels (the labels must be hashable)
:return: dictionary with the weight for each class
"""
counter = Counter(y)
if smooth_factor > 0:
p = max(counter.values()) * smooth_factor
for k in counter.keys():
counter[k] += p
majority = max(counter.values())
return {cls: float(majority / count) for cls, count in counter.items()}