-
Notifications
You must be signed in to change notification settings - Fork 148
/
scan.py
333 lines (269 loc) · 14.1 KB
/
scan.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
# USAGE:
# python scan.py (--images <IMG_DIR> | --image <IMG_PATH>) [-i]
# For example, to scan a single image with interactive mode:
# python scan.py --image sample_images/desk.JPG -i
# To scan all images in a directory automatically:
# python scan.py --images sample_images
# Scanned images will be output to directory named 'output'
from pyimagesearch import transform
from pyimagesearch import imutils
from scipy.spatial import distance as dist
from matplotlib.patches import Polygon
import polygon_interacter as poly_i
import numpy as np
import matplotlib.pyplot as plt
import itertools
import math
import cv2
from pylsd.lsd import lsd
import argparse
import os
class DocScanner(object):
"""An image scanner"""
def __init__(self, interactive=False, MIN_QUAD_AREA_RATIO=0.25, MAX_QUAD_ANGLE_RANGE=40):
"""
Args:
interactive (boolean): If True, user can adjust screen contour before
transformation occurs in interactive pyplot window.
MIN_QUAD_AREA_RATIO (float): A contour will be rejected if its corners
do not form a quadrilateral that covers at least MIN_QUAD_AREA_RATIO
of the original image. Defaults to 0.25.
MAX_QUAD_ANGLE_RANGE (int): A contour will also be rejected if the range
of its interior angles exceeds MAX_QUAD_ANGLE_RANGE. Defaults to 40.
"""
self.interactive = interactive
self.MIN_QUAD_AREA_RATIO = MIN_QUAD_AREA_RATIO
self.MAX_QUAD_ANGLE_RANGE = MAX_QUAD_ANGLE_RANGE
def filter_corners(self, corners, min_dist=20):
"""Filters corners that are within min_dist of others"""
def predicate(representatives, corner):
return all(dist.euclidean(representative, corner) >= min_dist
for representative in representatives)
filtered_corners = []
for c in corners:
if predicate(filtered_corners, c):
filtered_corners.append(c)
return filtered_corners
def angle_between_vectors_degrees(self, u, v):
"""Returns the angle between two vectors in degrees"""
return np.degrees(
math.acos(np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v))))
def get_angle(self, p1, p2, p3):
"""
Returns the angle between the line segment from p2 to p1
and the line segment from p2 to p3 in degrees
"""
a = np.radians(np.array(p1))
b = np.radians(np.array(p2))
c = np.radians(np.array(p3))
avec = a - b
cvec = c - b
return self.angle_between_vectors_degrees(avec, cvec)
def angle_range(self, quad):
"""
Returns the range between max and min interior angles of quadrilateral.
The input quadrilateral must be a numpy array with vertices ordered clockwise
starting with the top left vertex.
"""
tl, tr, br, bl = quad
ura = self.get_angle(tl[0], tr[0], br[0])
ula = self.get_angle(bl[0], tl[0], tr[0])
lra = self.get_angle(tr[0], br[0], bl[0])
lla = self.get_angle(br[0], bl[0], tl[0])
angles = [ura, ula, lra, lla]
return np.ptp(angles)
def get_corners(self, img):
"""
Returns a list of corners ((x, y) tuples) found in the input image. With proper
pre-processing and filtering, it should output at most 10 potential corners.
This is a utility function used by get_contours. The input image is expected
to be rescaled and Canny filtered prior to be passed in.
"""
lines = lsd(img)
# massages the output from LSD
# LSD operates on edges. One "line" has 2 edges, and so we need to combine the edges back into lines
# 1. separate out the lines into horizontal and vertical lines.
# 2. Draw the horizontal lines back onto a canvas, but slightly thicker and longer.
# 3. Run connected-components on the new canvas
# 4. Get the bounding box for each component, and the bounding box is final line.
# 5. The ends of each line is a corner
# 6. Repeat for vertical lines
# 7. Draw all the final lines onto another canvas. Where the lines overlap are also corners
corners = []
if lines is not None:
# separate out the horizontal and vertical lines, and draw them back onto separate canvases
lines = lines.squeeze().astype(np.int32).tolist()
horizontal_lines_canvas = np.zeros(img.shape, dtype=np.uint8)
vertical_lines_canvas = np.zeros(img.shape, dtype=np.uint8)
for line in lines:
x1, y1, x2, y2, _ = line
if abs(x2 - x1) > abs(y2 - y1):
(x1, y1), (x2, y2) = sorted(((x1, y1), (x2, y2)), key=lambda pt: pt[0])
cv2.line(horizontal_lines_canvas, (max(x1 - 5, 0), y1), (min(x2 + 5, img.shape[1] - 1), y2), 255, 2)
else:
(x1, y1), (x2, y2) = sorted(((x1, y1), (x2, y2)), key=lambda pt: pt[1])
cv2.line(vertical_lines_canvas, (x1, max(y1 - 5, 0)), (x2, min(y2 + 5, img.shape[0] - 1)), 255, 2)
lines = []
# find the horizontal lines (connected-components -> bounding boxes -> final lines)
(contours, hierarchy) = cv2.findContours(horizontal_lines_canvas, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
contours = sorted(contours, key=lambda c: cv2.arcLength(c, True), reverse=True)[:2]
horizontal_lines_canvas = np.zeros(img.shape, dtype=np.uint8)
for contour in contours:
contour = contour.reshape((contour.shape[0], contour.shape[2]))
min_x = np.amin(contour[:, 0], axis=0) + 2
max_x = np.amax(contour[:, 0], axis=0) - 2
left_y = int(np.average(contour[contour[:, 0] == min_x][:, 1]))
right_y = int(np.average(contour[contour[:, 0] == max_x][:, 1]))
lines.append((min_x, left_y, max_x, right_y))
cv2.line(horizontal_lines_canvas, (min_x, left_y), (max_x, right_y), 1, 1)
corners.append((min_x, left_y))
corners.append((max_x, right_y))
# find the vertical lines (connected-components -> bounding boxes -> final lines)
(contours, hierarchy) = cv2.findContours(vertical_lines_canvas, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
contours = sorted(contours, key=lambda c: cv2.arcLength(c, True), reverse=True)[:2]
vertical_lines_canvas = np.zeros(img.shape, dtype=np.uint8)
for contour in contours:
contour = contour.reshape((contour.shape[0], contour.shape[2]))
min_y = np.amin(contour[:, 1], axis=0) + 2
max_y = np.amax(contour[:, 1], axis=0) - 2
top_x = int(np.average(contour[contour[:, 1] == min_y][:, 0]))
bottom_x = int(np.average(contour[contour[:, 1] == max_y][:, 0]))
lines.append((top_x, min_y, bottom_x, max_y))
cv2.line(vertical_lines_canvas, (top_x, min_y), (bottom_x, max_y), 1, 1)
corners.append((top_x, min_y))
corners.append((bottom_x, max_y))
# find the corners
corners_y, corners_x = np.where(horizontal_lines_canvas + vertical_lines_canvas == 2)
corners += zip(corners_x, corners_y)
# remove corners in close proximity
corners = self.filter_corners(corners)
return corners
def is_valid_contour(self, cnt, IM_WIDTH, IM_HEIGHT):
"""Returns True if the contour satisfies all requirements set at instantitation"""
return (len(cnt) == 4 and cv2.contourArea(cnt) > IM_WIDTH * IM_HEIGHT * self.MIN_QUAD_AREA_RATIO
and self.angle_range(cnt) < self.MAX_QUAD_ANGLE_RANGE)
def get_contour(self, rescaled_image):
"""
Returns a numpy array of shape (4, 2) containing the vertices of the four corners
of the document in the image. It considers the corners returned from get_corners()
and uses heuristics to choose the four corners that most likely represent
the corners of the document. If no corners were found, or the four corners represent
a quadrilateral that is too small or convex, it returns the original four corners.
"""
# these constants are carefully chosen
MORPH = 9
CANNY = 84
HOUGH = 25
IM_HEIGHT, IM_WIDTH, _ = rescaled_image.shape
# convert the image to grayscale and blur it slightly
gray = cv2.cvtColor(rescaled_image, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (7,7), 0)
# dilate helps to remove potential holes between edge segments
kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(MORPH,MORPH))
dilated = cv2.morphologyEx(gray, cv2.MORPH_CLOSE, kernel)
# find edges and mark them in the output map using the Canny algorithm
edged = cv2.Canny(dilated, 0, CANNY)
test_corners = self.get_corners(edged)
approx_contours = []
if len(test_corners) >= 4:
quads = []
for quad in itertools.combinations(test_corners, 4):
points = np.array(quad)
points = transform.order_points(points)
points = np.array([[p] for p in points], dtype = "int32")
quads.append(points)
# get top five quadrilaterals by area
quads = sorted(quads, key=cv2.contourArea, reverse=True)[:5]
# sort candidate quadrilaterals by their angle range, which helps remove outliers
quads = sorted(quads, key=self.angle_range)
approx = quads[0]
if self.is_valid_contour(approx, IM_WIDTH, IM_HEIGHT):
approx_contours.append(approx)
# for debugging: uncomment the code below to draw the corners and countour found
# by get_corners() and overlay it on the image
# cv2.drawContours(rescaled_image, [approx], -1, (20, 20, 255), 2)
# plt.scatter(*zip(*test_corners))
# plt.imshow(rescaled_image)
# plt.show()
# also attempt to find contours directly from the edged image, which occasionally
# produces better results
(cnts, hierarchy) = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:5]
# loop over the contours
for c in cnts:
# approximate the contour
approx = cv2.approxPolyDP(c, 80, True)
if self.is_valid_contour(approx, IM_WIDTH, IM_HEIGHT):
approx_contours.append(approx)
break
# If we did not find any valid contours, just use the whole image
if not approx_contours:
TOP_RIGHT = (IM_WIDTH, 0)
BOTTOM_RIGHT = (IM_WIDTH, IM_HEIGHT)
BOTTOM_LEFT = (0, IM_HEIGHT)
TOP_LEFT = (0, 0)
screenCnt = np.array([[TOP_RIGHT], [BOTTOM_RIGHT], [BOTTOM_LEFT], [TOP_LEFT]])
else:
screenCnt = max(approx_contours, key=cv2.contourArea)
return screenCnt.reshape(4, 2)
def interactive_get_contour(self, screenCnt, rescaled_image):
poly = Polygon(screenCnt, animated=True, fill=False, color="yellow", linewidth=5)
fig, ax = plt.subplots()
ax.add_patch(poly)
ax.set_title(('Drag the corners of the box to the corners of the document. \n'
'Close the window when finished.'))
p = poly_i.PolygonInteractor(ax, poly)
plt.imshow(rescaled_image)
plt.show()
new_points = p.get_poly_points()[:4]
new_points = np.array([[p] for p in new_points], dtype = "int32")
return new_points.reshape(4, 2)
def scan(self, image_path):
RESCALED_HEIGHT = 500.0
OUTPUT_DIR = 'output'
# load the image and compute the ratio of the old height
# to the new height, clone it, and resize it
image = cv2.imread(image_path)
assert(image is not None)
ratio = image.shape[0] / RESCALED_HEIGHT
orig = image.copy()
rescaled_image = imutils.resize(image, height = int(RESCALED_HEIGHT))
# get the contour of the document
screenCnt = self.get_contour(rescaled_image)
if self.interactive:
screenCnt = self.interactive_get_contour(screenCnt, rescaled_image)
# apply the perspective transformation
warped = transform.four_point_transform(orig, screenCnt * ratio)
# convert the warped image to grayscale
gray = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
# sharpen image
sharpen = cv2.GaussianBlur(gray, (0,0), 3)
sharpen = cv2.addWeighted(gray, 1.5, sharpen, -0.5, 0)
# apply adaptive threshold to get black and white effect
thresh = cv2.adaptiveThreshold(sharpen, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 21, 15)
# save the transformed image
basename = os.path.basename(image_path)
cv2.imwrite(OUTPUT_DIR + '/' + basename, thresh)
print("Proccessed " + basename)
if __name__ == "__main__":
ap = argparse.ArgumentParser()
group = ap.add_mutually_exclusive_group(required=True)
group.add_argument("--images", help="Directory of images to be scanned")
group.add_argument("--image", help="Path to single image to be scanned")
ap.add_argument("-i", action='store_true',
help = "Flag for manually verifying and/or setting document corners")
args = vars(ap.parse_args())
im_dir = args["images"]
im_file_path = args["image"]
interactive_mode = args["i"]
scanner = DocScanner(interactive_mode)
valid_formats = [".jpg", ".jpeg", ".jp2", ".png", ".bmp", ".tiff", ".tif"]
get_ext = lambda f: os.path.splitext(f)[1].lower()
# Scan single image specified by command line argument --image <IMAGE_PATH>
if im_file_path:
scanner.scan(im_file_path)
# Scan all valid images in directory specified by command line argument --images <IMAGE_DIR>
else:
im_files = [f for f in os.listdir(im_dir) if get_ext(f) in valid_formats]
for im in im_files:
scanner.scan(im_dir + '/' + im)