From 93036825bae2e056304e50a452448efe2898bd24 Mon Sep 17 00:00:00 2001 From: Ales Erjavec Date: Fri, 24 Jul 2020 12:49:35 +0200 Subject: [PATCH 1/2] owcsvimport: Fix a type error in _open for zip archive --- Orange/widgets/data/owcsvimport.py | 4 +- Orange/widgets/data/tests/test_owcsvimport.py | 47 ++++++++++++++++++- 2 files changed, 47 insertions(+), 4 deletions(-) diff --git a/Orange/widgets/data/owcsvimport.py b/Orange/widgets/data/owcsvimport.py index 078adca1c8a..70689067dbc 100644 --- a/Orange/widgets/data/owcsvimport.py +++ b/Orange/widgets/data/owcsvimport.py @@ -1101,9 +1101,7 @@ def _open(path, mode, encoding=None): arh = zipfile.ZipFile(path, 'r') filelist = arh.infolist() if len(filelist) == 1: - filename = filelist[0] - zinfo = arh.getinfo(filename) - f = arh.open(zinfo.filename, 'r') + f = arh.open(filelist[0], 'r') if 't' in mode: f = io.TextIOWrapper(f, encoding=encoding) return f diff --git a/Orange/widgets/data/tests/test_owcsvimport.py b/Orange/widgets/data/tests/test_owcsvimport.py index b64bf78350c..6335263b6d3 100644 --- a/Orange/widgets/data/tests/test_owcsvimport.py +++ b/Orange/widgets/data/tests/test_owcsvimport.py @@ -1,4 +1,4 @@ -# pylint: disable=no-self-use +# pylint: disable=no-self-use,protected-access import unittest from unittest import mock from contextlib import ExitStack @@ -306,6 +306,51 @@ class Dialect(csv.excel): df = owcsvimport.load_csv(io.BytesIO(contents), opts) assert_array_equal(df.values, np.array([[3.21, 3.37], [4.13, 1000.142]])) + def test_open_compressed(self): + content = 'abc' + for ext in ["txt", "gz", "bz2", "xz", "zip"]: + with named_file('', suffix=f".{ext}") as fname: + with _open_write(fname, "wt", encoding="ascii") as f: + f.write(content) + f.close() + + with owcsvimport._open(fname, "rt", encoding="ascii") as f: + self.assertEqual(content, f.read()) + + +def _open_write(path, mode, encoding=None): + # pylint: disable=import-outside-toplevel + if mode not in {'w', 'wb', 'wt'}: + raise ValueError('r') + _, ext = os.path.splitext(path) + ext = ext.lower() + if ext == ".gz": + import gzip + return gzip.open(path, mode, encoding=encoding) + elif ext == ".bz2": + import bz2 + return bz2.open(path, mode, encoding=encoding) + elif ext == ".xz": + import lzma + return lzma.open(path, mode, encoding=encoding) + elif ext == ".zip": + import zipfile + arh = zipfile.ZipFile(path, 'w') + filename, _ = os.path.splitext(os.path.basename(path)) + f = arh.open(filename, mode="w") + f_close = f.close + # patch the f.close to also close the main archive file + + def close_(): + f_close() + arh.close() + f.close = close_ + if 't' in mode: + f = io.TextIOWrapper(f, encoding=encoding) + return f + else: + return open(path, mode, encoding=encoding) + if __name__ == "__main__": unittest.main() From 73b9048c99449a185c25c9c69d936910ddf31b31 Mon Sep 17 00:00:00 2001 From: Ales Erjavec Date: Fri, 24 Jul 2020 16:27:12 +0200 Subject: [PATCH 2/2] owcsvimport: Ensure archive is closed along the ZipFileExt wrapper --- Orange/widgets/data/owcsvimport.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Orange/widgets/data/owcsvimport.py b/Orange/widgets/data/owcsvimport.py index 70689067dbc..805f1ec321e 100644 --- a/Orange/widgets/data/owcsvimport.py +++ b/Orange/widgets/data/owcsvimport.py @@ -1102,6 +1102,13 @@ def _open(path, mode, encoding=None): filelist = arh.infolist() if len(filelist) == 1: f = arh.open(filelist[0], 'r') + # patch the f.close to also close the main archive file + f_close = f.close + + def close_(): + f_close() + arh.close() + f.close = close_ if 't' in mode: f = io.TextIOWrapper(f, encoding=encoding) return f