-
Notifications
You must be signed in to change notification settings - Fork 28
/
digitStruct.py
139 lines (117 loc) · 3.86 KB
/
digitStruct.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#!/usr/bin/python
# Ref:https://confluence.slac.stanford.edu/display/PSDM/How+to+access+HDF5+data+from+Python
import h5py
import numpy as np
#
# Bounding Box
#
class BBox:
def __init__(self):
self.label = "" # Digit
self.left = 0
self.top = 0
self.width = 0
self.height = 0
class DigitStruct:
def __init__(self):
self.name = None # Image file name
self.bboxList = None # List of BBox structs
# Function for debugging
def printHDFObj(theObj, theObjName):
isFile = isinstance(theObj, h5py.File)
isGroup = isinstance(theObj, h5py.Group)
isDataSet = isinstance(theObj, h5py.Dataset)
isReference = isinstance(theObj, h5py.Reference)
print "{}".format(theObjName)
print " type(): {}".format(type(theObj))
if isFile or isGroup or isDataSet:
# if theObj.name != None:
# print " name: {}".format(theObj.name)
print " id: {}".format(theObj.id)
if isFile or isGroup:
print " keys: {}".format(theObj.keys())
if not isReference:
print " Len: {}".format(len(theObj))
if not (isFile or isGroup or isDataSet or isReference):
print theObj
def readDigitStructGroup(dsFile):
dsGroup = dsFile["digitStruct"]
return dsGroup
#
# Reads a string from the file using its reference
#
def readString(strRef, dsFile):
strObj = dsFile[strRef]
str = ''.join(chr(i) for i in strObj)
return str
#
# Reads an integer value from the file
#
def readInt(intArray, dsFile):
intRef = intArray[0]
isReference = isinstance(intRef, h5py.Reference)
intVal = 0
if isReference:
intObj = dsFile[intRef]
intVal = int(intObj[0])
else: # Assuming value type
intVal = int(intRef)
return intVal
def yieldNextInt(intDataset, dsFile):
for intData in intDataset:
intVal = readInt(intData, dsFile)
yield intVal
def yieldNextBBox(bboxDataset, dsFile):
for bboxArray in bboxDataset:
bboxGroupRef = bboxArray[0]
bboxGroup = dsFile[bboxGroupRef]
labelDataset = bboxGroup["label"]
leftDataset = bboxGroup["left"]
topDataset = bboxGroup["top"]
widthDataset = bboxGroup["width"]
heightDataset = bboxGroup["height"]
left = yieldNextInt(leftDataset, dsFile)
top = yieldNextInt(topDataset, dsFile)
width = yieldNextInt(widthDataset, dsFile)
height = yieldNextInt(heightDataset, dsFile)
bboxList = []
for label in yieldNextInt(labelDataset, dsFile):
bbox = BBox()
bbox.label = label
bbox.left = next(left)
bbox.top = next(top)
bbox.width = next(width)
bbox.height = next(height)
bboxList.append(bbox)
yield bboxList
def yieldNextFileName(nameDataset, dsFile):
for nameArray in nameDataset:
nameRef = nameArray[0]
name = readString(nameRef, dsFile)
yield name
# dsFile = h5py.File('../data/gsvhn/train/digitStruct.mat', 'r')
def yieldNextDigitStruct(dsFileName):
dsFile = h5py.File(dsFileName, 'r')
dsGroup = readDigitStructGroup(dsFile)
nameDataset = dsGroup["name"]
bboxDataset = dsGroup["bbox"]
bboxListIter = yieldNextBBox(bboxDataset, dsFile)
for name in yieldNextFileName(nameDataset, dsFile):
bboxList = next(bboxListIter)
obj = DigitStruct()
obj.name = name
obj.bboxList = bboxList
yield obj
def testMain():
dsFileName = '../data/gsvhn/train/digitStruct.mat'
testCounter = 0
for dsObj in yieldNextDigitStruct(dsFileName):
# testCounter += 1
print dsObj.name
for bbox in dsObj.bboxList:
print " {}:{},{},{},{}".format(
bbox.label, bbox.left, bbox.top, bbox.width, bbox.height)
if testCounter >= 5:
break
if __name__ == "__main__":
testMain()