-
Notifications
You must be signed in to change notification settings - Fork 0
/
sign_extractor.py
74 lines (55 loc) · 3.07 KB
/
sign_extractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#Extracting signature from input image
import cv2
import matplotlib.pyplot as plt
from skimage import measure, morphology
from skimage.measure import regionprops
import numpy as np
# the parameters are used to remove small size connected pixels outliar
constant_parameter_1 = 84
constant_parameter_2 = 250
constant_parameter_3 = 100
# the parameter is used to remove big size connected pixels outliar
constant_parameter_4 = 18
def extract_sign(src_img):
img = src_img
img = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)[1] # ensure binary
# Connected component analysis which finds regions of connected pixels which have the same values
blobs = img > img.mean()
blobs_labels = measure.label(blobs, background=1)
# to look into biggest component (having largest connceted pixesls with same values)
the_biggest_component = 0
total_area = 0
counter = 0
average = 0.0
for region in regionprops(blobs_labels):
if (region.area > 10):
total_area = total_area + region.area
counter = counter + 1
if (region.area >= 250):
if (region.area > the_biggest_component):
the_biggest_component = region.area
average = (total_area/counter)
print("the_biggest_component: " + str(the_biggest_component))
print("average: " + str(average))
#threshold value to remove connected outliar connected pixels are smaller than a4_small_size_outliar_constant for A4 size scanned documents
a4_small_size_outliar_constant = ((average/constant_parameter_1)*constant_parameter_2)+constant_parameter_3
print("a4_small_size_outliar_constant: " + str(a4_small_size_outliar_constant))
# remove the connected pixels are smaller than a4_small_size_outliar_constant
pre_version = morphology.remove_small_objects(blobs_labels, a4_small_size_outliar_constant)
#threshold value to remove outliar connected pixels are bigger than a4_big_size_outliar_constant for A4 size scanned documents
a4_big_size_outliar_constant = a4_small_size_outliar_constant*constant_parameter_4
print("a4_big_size_outliar_constant: " + str(a4_big_size_outliar_constant))
# remove the connected pixels are bigger than threshold a4_big_size_outliar_constant
# to get rid of undesired connected pixels such as table headers and etc.
component_sizes = np.bincount(pre_version.ravel()) #counting numbers of components whithin different size of them
too_small = component_sizes > (a4_big_size_outliar_constant)
too_small_mask = too_small[pre_version]
pre_version[too_small_mask] = 0 # setting all smaller components as 0
# save the the pre-version which is the image is labelled with colors as considering connected components
plt.imsave('pre_version.png', pre_version)
# read the pre-version
img = cv2.imread('pre_version.png', 0)
# ensure binary
img = cv2.threshold(img, 0, 255,
cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1] #sign as Black colour and backgound as White
return img