forked from sfikas/quaternion-resnet-kws
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathauxiliary_functions.py
113 lines (96 loc) · 3.76 KB
/
auxiliary_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import numpy as np
from skimage.transform import resize
from skimage import io as img_io
from skimage.color import rgb2gray
import cv2
def affine_transformation(img, m=1.0, s=.2, border_value=None):
h, w = img.shape[0], img.shape[1]
src_point = np.float32([[w / 2.0, h / 3.0],
[2 * w / 3.0, 2 * h / 3.0],
[w / 3.0, 2 * h / 3.0]])
random_shift = m + np.random.uniform(-1.0, 1.0, size=(3,2)) * s
dst_point = src_point * random_shift.astype(np.float32)
transform = cv2.getAffineTransform(src_point, dst_point)
if border_value is None:
border_value = np.median(img)
warped_img = cv2.warpAffine(img, transform, dsize=(w, h), borderValue=float(border_value))
return warped_img
def image_resize(img, height=None, width=None):
if height is not None and width is None:
scale = float(height) / float(img.shape[0])
width = int(scale*img.shape[1])
if width is not None and height is None:
scale = float(width) / float(img.shape[1])
height = int(scale*img.shape[0])
img = resize(image=img, output_shape=(height, width)).astype(np.float32)
return img
def centered(word_img, tsize, centering=(.5, .5), border_value=None):
height = tsize[0]
width = tsize[1]
xs, ys, xe, ye = 0, 0, width, height
diff_h = height-word_img.shape[0]
if diff_h >= 0:
pv = int(centering[0] * diff_h)
padh = (pv, diff_h-pv)
else:
diff_h = abs(diff_h)
ys, ye = diff_h/2, word_img.shape[0] - (diff_h - diff_h/2)
padh = (0, 0)
diff_w = width - word_img.shape[1]
if diff_w >= 0:
pv = int(centering[1] * diff_w)
padw = (pv, diff_w - pv)
else:
diff_w = abs(diff_w)
xs, xe = diff_w / 2, word_img.shape[1] - (diff_w - diff_w / 2)
padw = (0, 0)
if border_value is None:
border_value = np.median(word_img)
word_img = np.pad(word_img[ys:ye, xs:xe], (padh, padw), 'constant', constant_values=border_value)
return word_img
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
def parse_cfg(str_list):
"""
Parses backbone structure input argument.
"""
res = []
layerlist = str_list.split('x')
for layer in layerlist:
if layer.strip() == 'M':
res.append('M')
else:
try:
str_tuple: tuple([int(elem) for elem in str_tuple.split(',')])
except:
print('Error while parsing cfg element {} of argument: {}.'.format(layer, str_list))
res.append(str_tuple)
return(res)
# check this
def average_precision(ret_vec_relevance, gt_relevance_num=None):
'''
Computes the average precision from a list of relevance items
Params:
ret_vec_relevance: A 1-D numpy array containing ground truth (gt)
relevance values
gt_relevance_num: Number of relevant items in the data set
(with respect to the ground truth)
If None, the average precision is calculated wrt the number of
relevant items in the retrieval list (ret_vec_relevance)
Returns:
The average precision for the given relevance vector.
'''
if ret_vec_relevance.ndim != 1:
raise ValueError('Invalid ret_vec_relevance shape')
ret_vec_cumsum = np.cumsum(ret_vec_relevance, dtype=float)
ret_vec_range = np.arange(1, ret_vec_relevance.size + 1)
ret_vec_precision = ret_vec_cumsum / ret_vec_range
if gt_relevance_num is None:
n_relevance = ret_vec_relevance.sum()
else:
n_relevance = gt_relevance_num
if n_relevance > 0:
ret_vec_ap = (ret_vec_precision * ret_vec_relevance).sum() / n_relevance
else:
ret_vec_ap = 0.0
return ret_vec_ap