forked from esitarski/CrossMgr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathExcel.py
115 lines (101 loc) · 3.45 KB
/
Excel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import xlrd
import xml.etree.ElementTree
import os
import six
import math
import unicodedata
from mmap import mmap, ACCESS_READ
def toAscii( s ):
if not s:
return ''
ret = unicodedata.normalize('NFKD', s).encode('ascii','ignore') if type(s) == six.text_type else str(s)
if ret.endswith( '.0' ):
ret = ret[:-2]
return ret
#----------------------------------------------------------------------------
class ReadExcelXls( object ):
def __init__(self, filename):
if not os.path.isfile(filename):
raise ValueError( "{} is not a valid filename".format(filename) )
with open(filename,'rb') as f:
self.book = xlrd.open_workbook(
filename=filename,
file_contents=mmap(f.fileno(),0,access=ACCESS_READ),
)
# self.book = xlrd.open_workbook(filename)
def is_nonempty_row(self, sheet, i):
values = sheet.row_values(i)
if isinstance(values[0], six.string_types) and values[0].startswith('#'):
return False # ignorable comment row
return any( bool(v) for v in values )
def sheet_names( self ):
return self.book.sheet_names()
def _parse_row(self, sheet, row_index, date_as_tuple):
""" Sanitize incoming excel data """
# Data Type Codes:
# EMPTY 0
# TEXT 1 a Unicode string
# NUMBER 2 float
# DATE 3 float
# BOOLEAN 4 int; 1 means TRUE, 0 means FALSE
# ERROR 5
values = []
for type, value in six.moves.zip(sheet.row_types(row_index), sheet.row_values(row_index)):
if type == 2:
if value == int(value):
value = int(value)
elif type == 3:
if isinstance(value, float) and value < 1.0:
t = value * (24.0*60.0*60.0)
fract, secs = math.modf( t )
if fract >= 0.99999:
secs += 1.0
fract = 0.0
elif fract <= 0.00001:
fract = 0.0
secs = int(secs)
if fract:
value = '{:02d}:{:02d}:{:02d}.{}'.format(
secs // (60*60), (secs // 60) % 60, secs % 60,
'{:.20f}'.format(fract)[2:],
)
else:
value = '{:02d}:{:02d}:{:02d}'.format(secs // (60*60), (secs // 60) % 60, secs % 60)
else:
try:
datetuple = xlrd.xldate_as_tuple(value, self.book.datemode)
validDate = True
except:
value = 'UnreadableDate'
validDate = False
if validDate:
if date_as_tuple:
value = datetuple
else:
# time only - no date component
if datetuple[0] == 0 and datetuple[1] == 0 and datetuple[2] == 0:
value = "%02d:%02d:%02d" % datetuple[3:]
# date only, no time
elif datetuple[3] == 0 and datetuple[4] == 0 and datetuple[5] == 0:
value = "%04d/%02d/%02d" % datetuple[:3]
else: # full date
value = "%04d/%02d/%02d %02d:%02d:%02d" % datetuple
elif type == 5:
value = xlrd.error_text_from_code[value]
values.append(value)
return values
def iter_list(self, sname, date_as_tuple=False):
sheet = self.book.sheet_by_name(sname) # XLRDError
for i in range(sheet.nrows):
yield self._parse_row(sheet, i, date_as_tuple)
#----------------------------------------------------------------------------
ReadExcelXlsx = ReadExcelXls
#----------------------------------------------------------------------------
def GetExcelReader( filename ):
if filename.endswith( '.xls' ):
return ReadExcelXls( filename )
elif filename.endswith( '.xlsx' ) or filename.endswith( '.xlsm' ):
return ReadExcelXlsx( filename )
else:
raise ValueError( '{} is not a recognized Excel format'.format(filename) )
#----------------------------------------------------------------------------