forked from rykov8/ssd_keras
-
Notifications
You must be signed in to change notification settings - Fork 86
/
data_icdar2015fst.py
68 lines (58 loc) · 2.55 KB
/
data_icdar2015fst.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import numpy as np
import os
from thirdparty.get_image_size import get_image_size
from ssd_data import BaseGTUtility
class GTUtility(BaseGTUtility):
"""Utility for ICDAR2015 (International Conference on Document Analysis
and Recognition) Focused Scene Text dataset.
# Arguments
data_path: Path to ground truth and image data.
test: Boolean for using training or test set.
polygon: Return oriented boxes defined by their four corner points.
Required by SegLink...
"""
def __init__(self, data_path, test=False, polygon=False):
self.data_path = data_path
if test:
gt_path = os.path.join(data_path, 'Challenge2_Test_Task1_GT')
image_path = os.path.join(data_path, 'Challenge2_Test_Task12_Images')
else:
gt_path = os.path.join(data_path, 'Challenge2_Training_Task1_GT')
image_path = os.path.join(data_path, 'Challenge2_Training_Task12_Images')
self.gt_path = gt_path
self.image_path = image_path
self.classes = ['Background', 'Text']
self.image_names = []
self.data = []
self.text = []
for image_name in os.listdir(image_path):
img_width, img_height = get_image_size(os.path.join(image_path, image_name))
boxes = []
text = []
gt_file_name = 'gt_' + os.path.splitext(image_name)[0] + '.txt'
with open(os.path.join(gt_path, gt_file_name), 'r') as f:
for line in f:
line_split = line.strip().split(' ')
assert len(line_split) == 5, "length is %d" % len(line_split)
box = [float(v.replace(',', '')) for v in line_split[0:4]]
box[0] /= img_width
box[1] /= img_height
box[2] /= img_width
box[3] /= img_height
if polygon:
xmin = box[0]
ymin = box[1]
xmax = box[2]
ymax = box[3]
box = [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax]
box = box + [1]
boxes.append(box)
text.append(line_split[4][1:-1])
boxes = np.asarray(boxes)
self.image_names.append(image_name)
self.data.append(boxes)
self.text.append(text)
self.init()
if __name__ == '__main__':
gt_util = GTUtility('data/ICDAR2015_FST', test=True)
print(gt_util.data)