Skip to content

Commit

Permalink
Merge pull request #2232 from jerneju/attribute-file
Browse files Browse the repository at this point in the history
[FIX] File: Raise and handle Exc. when file bad pickle
  • Loading branch information
janezd authored Apr 21, 2017
2 parents 4924983 + 852055a commit c0d2199
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 24 deletions.
62 changes: 38 additions & 24 deletions Orange/data/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ def detect_encoding(filename):
if encoding in (b'utf-8', b'us-ascii', b'iso-8859-1',
b'utf-7', b'utf-16le', b'utf-16be', b'ebcdic'):
return encoding.decode('us-ascii')
except OSError: pass # windoze
except OSError:
pass # windoze

# file not available or unable to guess the encoding, have chardet do it
detector = UniversalDetector()
Expand Down Expand Up @@ -163,7 +164,7 @@ def valuemap_index(val):
offset = len(new_order)
column = values if data.ndim > 1 else data
column += offset
for i, val in enumerate(var.values):
for _, val in enumerate(var.values):
try:
oldval = old_order.index(val)
except ValueError:
Expand All @@ -185,11 +186,11 @@ class Flags:
_RE_SPLIT = re.compile(r'(?<!\\)' + DELIMITER).split
_RE_ATTR_UNQUOTED_STR = re.compile(r'^[a-zA-Z_]').match
ALL = OrderedDict((
('class', 'c'),
('ignore', 'i'),
('meta', 'm'),
('weight', 'w'),
('.+?=.*?', ''), # general key=value attributes
('class', 'c'),
('ignore', 'i'),
('meta', 'm'),
('weight', 'w'),
('.+?=.*?', ''), # general key=value attributes
))
_RE_ALL = re.compile(r'^({})$'.format('|'.join(filter(None, flatten(ALL.items())))))

Expand Down Expand Up @@ -228,10 +229,13 @@ def split(s):

# Matches discrete specification where all the values are listed, space-separated
_RE_DISCRETE_LIST = re.compile(r'^\s*[^\s]+(\s[^\s]+)+\s*$')
_RE_TYPES = re.compile(r'^\s*({}|{}|)\s*$'.format(_RE_DISCRETE_LIST.pattern,
'|'.join(flatten(getattr(vartype, 'TYPE_HEADERS')
for vartype in Variable.registry.values()))))
_RE_FLAGS = re.compile(r'^\s*( |{}|)*\s*$'.format('|'.join(flatten(filter(None, i) for i in Flags.ALL.items()))))
_RE_TYPES = re.compile(r'^\s*({}|{}|)\s*$'.format(
_RE_DISCRETE_LIST.pattern,
'|'.join(flatten(getattr(vartype, 'TYPE_HEADERS') for vartype in Variable.registry.values()))
))
_RE_FLAGS = re.compile(r'^\s*( |{}|)*\s*$'.format(
'|'.join(flatten(filter(None, i) for i in Flags.ALL.items()))
))


class FileFormatMeta(Registry):
Expand Down Expand Up @@ -454,8 +458,10 @@ def parse_headers(data):
"""Return (header rows, rest of data) as discerned from `data`"""

def is_number(item):
try: float(item)
except ValueError: return False
try:
float(item)
except ValueError:
return False
return True
# Second row items are type identifiers
def header_test2(items):
Expand Down Expand Up @@ -485,8 +491,10 @@ def header_test3(items):

# Try to parse a single-line header
if not header_rows:
try: lines.append(list(next(data)))
except StopIteration: pass
try:
lines.append(list(next(data)))
except StopIteration:
pass
if lines:
# Header if none of the values in line 1 parses as a number
if not all(is_number(i) for i in lines[0]):
Expand All @@ -497,7 +505,7 @@ def header_test3(items):
return header_rows, data

@classmethod
def data_table(self, data, headers=None):
def data_table(cls, data, headers=None):
"""
Return Orange.data.Table given rows of `headers` (iterable of iterable)
and rows of `data` (iterable of iterable; if ``numpy.ndarray``, might
Expand All @@ -510,22 +518,24 @@ def data_table(self, data, headers=None):
assuming they precede it.
"""
if not headers:
headers, data = self.parse_headers(data)
headers, data = cls.parse_headers(data)

# Consider various header types (single-row, two-row, three-row, none)
if 3 == len(headers):
if len(headers) == 3:
names, types, flags = map(list, headers)
else:
if 1 == len(headers):
if len(headers) == 1:
HEADER1_FLAG_SEP = '#'
# First row format either:
# 1) delimited column names
# 2) -||- with type and flags prepended, separated by #,
# e.g. d#sex,c#age,cC#IQ
_flags, names = zip(*[i.split(HEADER1_FLAG_SEP, 1) if HEADER1_FLAG_SEP in i else ('', i)
for i in headers[0]])
_flags, names = zip(*[i.split(HEADER1_FLAG_SEP, 1)
if HEADER1_FLAG_SEP in i else ('', i)
for i in headers[0]]
)
names = list(names)
elif 2 == len(headers):
elif len(headers) == 2:
names, _flags = map(list, headers)
else:
# Use heuristics for everything
Expand Down Expand Up @@ -635,7 +645,7 @@ def _equal_length(lst):
cols, domain_vars = append_to
cols.append(col)

existing_var, new_var_name, column = None, None, None
existing_var, new_var_name = None, None
if domain_vars is not None:
existing_var = names and names[col]
if not existing_var:
Expand Down Expand Up @@ -807,7 +817,11 @@ class PickleReader(FileFormat):

def read(self):
with open(self.filename, 'rb') as f:
return pickle.load(f)
table = pickle.load(f)
if not isinstance(table, Table):
raise TypeError("file does not contain a data table")
else:
return table

@staticmethod
def write_file(filename, data):
Expand Down
13 changes: 13 additions & 0 deletions Orange/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,16 @@ def test_locate_wildcard_extension(self):
l = FileFormat.locate("t", search_dirs=[tempdir])
self.assertEqual(l, fn)
shutil.rmtree(tempdir)


class TestReader(unittest.TestCase):

def test_open_bad_pickle(self):
"""
Raise TypeError when PickleReader reads a pickle
file without a table (and it suppose to be there).
GH-2232
"""
reader = PickleReader("")
with unittest.mock.patch("pickle.load", return_value=None):
self.assertRaises(TypeError, reader.read, "foo")

0 comments on commit c0d2199

Please sign in to comment.