Skip to content

Commit

Permalink
Merge pull request #3237 from robertcv/fixes/file_strip_columns
Browse files Browse the repository at this point in the history
[FIX] io: Handle mismatched number of header/data values
  • Loading branch information
lanzagar authored Sep 12, 2018
2 parents 972ec52 + 6e1be0c commit 1a916d7
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 1 deletion.
12 changes: 11 additions & 1 deletion Orange/data/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -618,15 +618,25 @@ def data_table(cls, data, headers=None):
# Determine maximum row length
rowlen = max(map(len, (names, types, flags)))

strip = False

def _equal_length(lst):
lst.extend(['']*(rowlen - len(lst)))
nonlocal strip
if len(lst) > rowlen > 0:
lst = lst[:rowlen]
strip = True
elif len(lst) < rowlen:
lst.extend(['']*(rowlen - len(lst)))
return lst

# Ensure all data is of equal width in a column-contiguous array
data = [_equal_length([s.strip() for s in row])
for row in data if any(row)]
data = np.array(data, dtype=object, order='F')

if strip:
warnings.warn("Columns with no headers were removed.")

# Data may actually be longer than headers were
try:
rowlen = data.shape[1]
Expand Down
15 changes: 15 additions & 0 deletions Orange/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import os
import tempfile
import shutil
import io

from Orange.data import ContinuousVariable
from Orange.data.io import FileFormat, TabReader, CSVReader, PickleReader, \
Expand Down Expand Up @@ -104,6 +105,20 @@ def test_open_bad_pickle(self):
with unittest.mock.patch("pickle.load", return_value=None):
self.assertRaises(TypeError, reader.read, "foo")

def test_empty_columns(self):
"""Can't read files with more columns then headers. GH-1417"""
samplefile = """\
a, b
1, 0,
1, 2,
"""
c = io.StringIO(samplefile)
with self.assertWarns(UserWarning) as cm:
table = CSVReader(c).read()
self.assertEqual(len(table.domain.attributes), 2)
self.assertEqual(cm.warning.args[0],
"Columns with no headers were removed.")


class TestIo(unittest.TestCase):
def test_sanitize_variable_deprecated_params(self):
Expand Down

0 comments on commit 1a916d7

Please sign in to comment.