-
Notifications
You must be signed in to change notification settings - Fork 0
/
SamplePreprocessor.py
96 lines (80 loc) · 3.11 KB
/
SamplePreprocessor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
from __future__ import division
from __future__ import print_function
import random
import numpy as np
import cv2
def preprocessor(img, imgSize, enhance=False, dataAugmentation=False):
"put img into target img of size imgSize, transpose for TF and normalize gray-values"
# there are damaged files in IAM dataset - just use black image instead
if img is None:
img = np.zeros([imgSize[1], imgSize[0]]) # (64,800)
print("Image None!")
# increase dataset size by applying random stretches to the images
if dataAugmentation:
stretch = (random.random() - 0.5) # -0.5 .. +0.5
wStretched = max(int(img.shape[1] * (1 + stretch)), 1) # random width, but at least 1
img = cv2.resize(img, (wStretched, img.shape[0])) # stretch horizontally by factor 0.5 .. 1.5
if enhance: # only if the line text has low contrast and line width is thin
# increase contrast
pxmin = np.min(img)
pxmax = np.max(img)
imgContrast = (img - pxmin) / (pxmax - pxmin) * 255
# increase line width (optional)
kernel = np.ones((3, 3), np.uint8)
img = cv2.erode(imgContrast, kernel, iterations=1) # increase linewidth
# create target image and copy sample image into it
(wt, ht) = imgSize
(h, w) = img.shape
fx = w / wt
fy = h / ht
f = max(fx, fy)
newSize = (max(min(wt, int(w / f)), 1),
max(min(ht, int(h / f)), 1)) # scale according to f (result at least 1 and at most wt or ht)
img = cv2.resize(img, newSize, interpolation=cv2.INTER_CUBIC) # INTER_CUBIC interpolation best approximate the pixels image
# see this https://stackoverflow.com/a/57503843/7338066
target = np.ones([ht, wt]) * 255 # shape=(64,800)
target[0:newSize[1], 0:newSize[0]] = img
# transpose for TF
img = cv2.transpose(target)
return img
def wer(r, h):
"""
Calculation of WER with Levenshtein distance.
Works only for iterables up to 254 elements (uint8).
O(nm) time ans space complexity.
Parameters
----------
r : list
h : list
Returns
-------
int
Examples
--------
>>> wer("who is there".split(), "is there".split())
1
>>> wer("who is there".split(), "".split())
3
>>> wer("".split(), "who is there".split())
3
"""
# initialisation
d = np.zeros((len(r)+1)*(len(h)+1), dtype=np.uint8)
d = d.reshape((len(r)+1, len(h)+1))
for i in range(len(r)+1):
for j in range(len(h)+1):
if i == 0:
d[0][j] = j
elif j == 0:
d[i][0] = i
# computation
for i in range(1, len(r)+1):
for j in range(1, len(h)+1):
if r[i-1] == h[j-1]:
d[i][j] = d[i-1][j-1]
else:
substitution = d[i-1][j-1] + 1
insertion = d[i][j-1] + 1
deletion = d[i-1][j] + 1
d[i][j] = min(substitution, insertion, deletion)
return d[len(r)][len(h)]