diff --git a/Orange/data/io.py b/Orange/data/io.py index fc583ad5ca0..f6ff3da8d1b 100644 --- a/Orange/data/io.py +++ b/Orange/data/io.py @@ -10,6 +10,7 @@ from ast import literal_eval from collections import OrderedDict, Counter from functools import lru_cache +from importlib import import_module from itertools import chain, repeat from math import isnan from numbers import Number @@ -293,11 +294,18 @@ def _ext_to_attr_if_attr2(cls, attr, attr2): """ Return ``{ext: `attr`, ...}`` dict if ``cls`` has `attr2`. If `attr` is '', return ``{ext: cls, ...}`` instead. + + If there are multiple formats for an extension, return a format + with the lowest priority. """ - return OrderedDict((ext, getattr(cls, attr, cls)) - for cls in cls.registry.values() - if hasattr(cls, attr2) - for ext in getattr(cls, 'EXTENSIONS', [])) + formats = OrderedDict() + for format in sorted(cls.registry.values(), key=lambda x: x.PRIORITY): + if not hasattr(format, attr2): + continue + for ext in getattr(format, 'EXTENSIONS', []): + # Only adds if not yet registered + formats.setdefault(ext, getattr(format, attr, format)) + return formats @property def names(cls): @@ -343,7 +351,9 @@ def write_file(cls, filename, data): iterable (list (rows) of lists of values (cols)). """ - PRIORITY = 10000 # Sort order in OWSave widget combo box, lower is better + # Priority when multiple formats support the same extension. Also + # the sort order in file open/save combo boxes. Lower is better. + PRIORITY = 10000 def __init__(self, filename): """ @@ -762,6 +772,17 @@ def write_data(cls, write, data): val for var, val in zip(vars, flatten(row))]) + @classmethod + def qualified_name(cls): + return cls.__module__ + '.' + cls.__name__ + + +def class_from_qualified_name(format_name): + """ File format class from qualified name. """ + elements = format_name.split(".") + m = import_module(".".join(elements[:-1])) + return getattr(m, elements[-1]) + class CSVReader(FileFormat): """Reader for comma separated files""" diff --git a/Orange/tests/test_io.py b/Orange/tests/test_io.py index 7fca704860b..d272fa549d7 100644 --- a/Orange/tests/test_io.py +++ b/Orange/tests/test_io.py @@ -9,6 +9,7 @@ from Orange.data.io import FileFormat, TabReader, CSVReader, PickleReader from Orange.data.table import get_sample_datasets_dir + class WildcardReader(FileFormat): EXTENSIONS = ('.wild', '.wild[0-9]') DESCRIPTION = "Dummy reader for testing extensions" @@ -35,6 +36,30 @@ def test_wildcard_extension(self): FileFormat.get_reader("t.wild2a") +class SameExtension(FileFormat): + PRIORITY = 100 + EXTENSIONS = ('.same_extension',) + DESCRIPTION = "Same extension, different priority" + + def read(self): + pass + + +class SameExtensionPreferred(SameExtension): + PRIORITY = 90 + + +class SameExtensionL(SameExtension): + PRIORITY = 110 + + +class TestMultipleSameExtension(unittest.TestCase): + + def test_find_reader(self): + reader = FileFormat.get_reader("some.same_extension") + self.assertIsInstance(reader, SameExtensionPreferred) + + class TestLocate(unittest.TestCase): def test_locate_sample_datasets(self): @@ -49,7 +74,6 @@ def test_locate_sample_datasets(self): search_dirs=[get_sample_datasets_dir()]) self.assertEqual(os.path.basename(iris), "iris.tab") - def test_locate_wildcard_extension(self): tempdir = tempfile.mkdtemp() with self.assertRaises(OSError): diff --git a/Orange/widgets/data/owfile.py b/Orange/widgets/data/owfile.py index 9553618b0fa..4a33416efe5 100644 --- a/Orange/widgets/data/owfile.py +++ b/Orange/widgets/data/owfile.py @@ -5,20 +5,20 @@ import numpy as np from AnyQt.QtWidgets import \ - QStyle, QComboBox, QMessageBox, QFileDialog, QGridLayout, QLabel, \ + QStyle, QComboBox, QMessageBox, QGridLayout, QLabel, \ QLineEdit, QSizePolicy as Policy from AnyQt.QtCore import Qt, QTimer, QSize from Orange.canvas.gui.utils import OSX_NSURL_toLocalFile -from Orange.data import StringVariable from Orange.data.table import Table, get_sample_datasets_dir -from Orange.data.io import FileFormat, UrlReader +from Orange.data.io import FileFormat, UrlReader, class_from_qualified_name from Orange.widgets import widget, gui from Orange.widgets.settings import Setting, ContextSetting, \ PerfectDomainContextHandler, SettingProvider from Orange.widgets.utils.domaineditor import DomainEditor from Orange.widgets.utils.itemmodels import PyListModel -from Orange.widgets.utils.filedialogs import RecentPathsWComboMixin, dialog_formats +from Orange.widgets.utils.filedialogs import RecentPathsWComboMixin, \ + open_filename_dialog from Orange.widgets.widget import Output # Backward compatibility: class RecentPath used to be defined in this module, @@ -40,7 +40,7 @@ def add_origin(examples, filename): return vars = examples.domain.variables + examples.domain.metas strings = [var for var in vars if var.is_string] - dir_name, basename = os.path.split(filename) + dir_name, _ = os.path.split(filename) for var in strings: if "type" in var.attributes and "origin" not in var.attributes: var.attributes["origin"] = dir_name @@ -112,9 +112,13 @@ class Outputs: class Warning(widget.OWWidget.Warning): file_too_big = widget.Msg("The file is too large to load automatically." " Press Reload to load.") + load_warning = widget.Msg("Read warning:\n{}") class Error(widget.OWWidget.Error): file_not_found = widget.Msg("File not found.") + missing_reader = widget.Msg("Missing reader.") + sheet_error = widget.Msg("Error listing available sheets.") + unknown = widget.Msg("Read error:\n{}") def __init__(self): super().__init__() @@ -264,11 +268,15 @@ def browse_file(self, in_demos=False): else: start_file = self.last_path() or os.path.expanduser("~/") - filename, _ = QFileDialog.getOpenFileName( - self, 'Open Orange Data File', start_file, dialog_formats()) + readers = [f for f in FileFormat.formats + if getattr(f, 'read', None) and getattr(f, "EXTENSIONS", None)] + filename, reader, _ = open_filename_dialog(start_file, None, readers) if not filename: return self.add_path(filename) + if reader is not None: + self.recent_paths[0].file_format = reader.qualified_name() + self.source = self.LOCAL_FILE self.load_data() @@ -276,46 +284,44 @@ def browse_file(self, in_demos=False): def load_data(self): # We need to catch any exception type since anything can happen in # file readers - # pylint: disable=broad-except self.closeContext() self.domain_editor.set_domain(None) self.apply_button.setEnabled(False) self.clear_messages() self.set_file_list() - if self.last_path() and not os.path.exists(self.last_path()): - self.Error.file_not_found() + + error = self._try_load() + if error: + error() + self.data = None + self.sheet_box.hide() self.Outputs.data.send(None) self.info.setText("No data.") - return - error = None + def _try_load(self): + # pylint: disable=broad-except + if self.last_path() and not os.path.exists(self.last_path()): + return self.Error.file_not_found + try: self.reader = self._get_reader() - if self.reader is None: - self.data = None - self.Outputs.data.send(None) - self.info.setText("No data.") - self.sheet_box.hide() - return - except Exception as ex: - error = ex + assert self.reader is not None + except Exception: + return self.Error.missing_reader - if not error: + try: self._update_sheet_combo() - with catch_warnings(record=True) as warnings: - try: - data = self.reader.read() - except Exception as ex: - log.exception(ex) - error = ex - self.warning(warnings[-1].message.args[0] if warnings else '') + except Exception: + return self.Error.sheet_error - if error: - self.data = None - self.Outputs.data.send(None) - self.info.setText("An error occurred:\n{}".format(error)) - self.sheet_box.hide() - return + with catch_warnings(record=True) as warnings: + try: + data = self.reader.read() + except Exception as ex: + log.exception(ex) + return lambda x=ex: self.Error.unknown(str(x)) + if warnings: + self.Warning.load_warning(warnings[-1].message.args[0]) self.info.setText(self._describe(data)) @@ -333,7 +339,13 @@ def _get_reader(self): FileFormat """ if self.source == self.LOCAL_FILE: - reader = FileFormat.get_reader(self.last_path()) + path = self.last_path() + if self.recent_paths and self.recent_paths[0].file_format: + qname = self.recent_paths[0].file_format + reader_class = class_from_qualified_name(qname) + reader = reader_class(path) + else: + reader = FileFormat.get_reader(path) if self.recent_paths and self.recent_paths[0].sheet: reader.select_sheet(self.recent_paths[0].sheet) return reader diff --git a/Orange/widgets/data/tests/test_owfile.py b/Orange/widgets/data/tests/test_owfile.py index da7c9193e85..9f0e493fe1b 100644 --- a/Orange/widgets/data/tests/test_owfile.py +++ b/Orange/widgets/data/tests/test_owfile.py @@ -4,7 +4,7 @@ from unittest.mock import Mock, patch import pickle import tempfile - +import warnings import numpy as np import scipy.sparse as sp @@ -16,9 +16,10 @@ import Orange from Orange.data import FileFormat, dataset_dirs, StringVariable, Table, \ Domain, DiscreteVariable +from Orange.data.io import TabReader from Orange.tests import named_file from Orange.widgets.data.owfile import OWFile -from Orange.widgets.utils.filedialogs import dialog_formats +from Orange.widgets.utils.filedialogs import dialog_formats, format_filter, RecentPath from Orange.widgets.tests.base import WidgetTest from Orange.widgets.utils.domaineditor import ComboDelegate, VarTypeDelegate, VarTableModel @@ -33,6 +34,35 @@ def read(self): pass +class FailedSheetsFormat(FileFormat): + EXTENSIONS = ('.failed_sheet',) + DESCRIPTION = "Make a sheet function that fails" + + def read(self): + pass + + def sheets(self): + raise Exception("Not working") + + +class WithWarnings(FileFormat): + EXTENSIONS = ('.with_warning',) + DESCRIPTION = "Warning" + + def read(self): + warnings.warn("Some warning") + return Orange.data.Table("iris") + + +class MyCustomTabReader(FileFormat): + EXTENSIONS = ('.tab',) + DESCRIPTION = "Always return iris" + PRIORITY = 999999 + + def read(self): + return Orange.data.Table("iris") + + class TestOWFile(WidgetTest): # Attribute used to store event data so it does not get garbage # collected before event is processed. @@ -209,6 +239,69 @@ def test_check_datetime_disabled(self): vartype_delegate.setEditorData(combo, idx(i)) self.assertEqual(combo.count(), counts[i]) + def test_reader_custom_tab(self): + with named_file("", suffix=".tab") as fn: + qname = MyCustomTabReader.qualified_name() + reader = RecentPath(fn, None, None, file_format=qname) + self.widget = self.create_widget(OWFile, + stored_settings={"recent_paths": [reader]}) + self.widget.load_data() + self.assertFalse(self.widget.Error.missing_reader.is_shown()) + outdata = self.get_output(self.widget.Outputs.data) + self.assertEqual(len(outdata), 150) # loaded iris + + def test_no_reader_extension(self): + with named_file("", suffix=".xyz_unknown") as fn: + no_reader = RecentPath(fn, None, None) + self.widget = self.create_widget(OWFile, + stored_settings={"recent_paths": [no_reader]}) + self.widget.load_data() + self.assertTrue(self.widget.Error.missing_reader.is_shown()) + + def test_fail_sheets(self): + with named_file("", suffix=".failed_sheet") as fn: + self.open_dataset(fn) + self.assertTrue(self.widget.Error.sheet_error.is_shown()) + + def test_with_warnings(self): + with named_file("", suffix=".with_warning") as fn: + self.open_dataset(fn) + self.assertTrue(self.widget.Warning.load_warning.is_shown()) + + def test_fail(self): + with named_file("name\nc\n\nstring", suffix=".tab") as fn: + self.open_dataset(fn) + self.assertTrue(self.widget.Error.unknown.is_shown()) + + def test_read_format(self): + iris = Table("iris") + + def open_iris_with_no_specific_format(a, b, c, filters, e): + return iris.__file__, filters.split(";;")[0] + + with patch("AnyQt.QtWidgets.QFileDialog.getOpenFileName", + open_iris_with_no_specific_format): + self.widget.browse_file() + + self.assertIsNone(self.widget.recent_paths[0].file_format) + + def open_iris_with_tab(a, b, c, filters, e): + return iris.__file__, format_filter(TabReader) + + with patch("AnyQt.QtWidgets.QFileDialog.getOpenFileName", + open_iris_with_tab): + self.widget.browse_file() + + self.assertEqual(self.widget.recent_paths[0].file_format, "Orange.data.io.TabReader") + + def test_no_specified_reader(self): + with named_file("", suffix=".tab") as fn: + no_class = RecentPath(fn, None, None, file_format="not.a.file.reader.class") + self.widget = self.create_widget(OWFile, + stored_settings={"recent_paths": [no_class]}) + self.widget.load_data() + self.assertTrue(self.widget.Error.missing_reader.is_shown()) + def test_domain_edit_on_sparse_data(self): iris = Table("iris") iris.X = sp.csr_matrix(iris.X) diff --git a/Orange/widgets/utils/filedialogs.py b/Orange/widgets/utils/filedialogs.py index 5f2eaa0bb04..63279693616 100644 --- a/Orange/widgets/utils/filedialogs.py +++ b/Orange/widgets/utils/filedialogs.py @@ -69,9 +69,9 @@ def get_file_name(start_dir, start_filter, file_formats): Args: start_dir (str): initial directory, optionally including the filename start_filter (str): initial filter - file_formats (list of Orange.data.io.FileFormat): file formats + file_formats (dict {extension: Orange.data.io.FileFormat}): file formats Returns: - (filename, filter, writer), or `(None, None, None)` on cancel + (filename, writer, filter), or `(None, None, None)` on cancel """ writers = sorted(set(file_formats.values()), key=lambda w: w.PRIORITY) filters = [format_filter(w) for w in writers] @@ -104,14 +104,57 @@ def get_file_name(start_dir, start_filter, file_formats): return filename, writer, filter +def open_filename_dialog(start_dir, start_filter, file_formats, title="Open...", + dialog=None): + """ + Open file dialog with file formats. + + Function also returns the format and filter to cover the case where the + same extension appears in multiple filters. + + Args: + start_dir (str): initial directory, optionally including the filename + start_filter (str): initial filter + file_formats (a list of Orange.data.io.FileFormat): file formats + title (str): title of the dialog + dialog: a function that creates a QT dialog + Returns: + (filename, file_format, filter), or `(None, None, None)` on cancel + """ + file_formats = sorted(set(file_formats), key=lambda w: (w.PRIORITY, w.DESCRIPTION)) + filters = [format_filter(f) for f in file_formats] + + # add all readable files option + all_extensions = set() + for f in file_formats: + all_extensions.update(f.EXTENSIONS) + file_formats.insert(0, None) + filters.insert(0, "All readable files (*{})".format( + ' *'.join(sorted(all_extensions)))) + + if start_filter not in filters: + start_filter = filters[0] + + if dialog is None: + dialog = QFileDialog.getOpenFileName + filename, filter = dialog( + None, title, start_dir, ';;'.join(filters), start_filter) + if not filename: + return None, None, None + + file_format = file_formats[filters.index(filter)] + return filename, file_format, filter + + class RecentPath: abspath = '' prefix = None #: Option[str] # BASEDIR | SAMPLE-DATASETS | ... relpath = '' #: Option[str] # path relative to `prefix` title = '' #: Option[str] # title of filename (e.g. from URL) sheet = '' #: Option[str] # sheet + file_format = None #: Option[str] # file format as a string - def __init__(self, abspath, prefix, relpath, title='', sheet=''): + def __init__(self, abspath, prefix, relpath, title='', sheet='', file_format=None): if os.name == "nt": # always use a cross-platform pathname component separator abspath = abspath.replace(os.path.sep, "/") @@ -122,6 +165,7 @@ def __init__(self, abspath, prefix, relpath, title='', sheet=''): self.relpath = relpath self.title = title self.sheet = sheet + self.file_format = file_format def __eq__(self, other): return (self.abspath == other.abspath or @@ -191,8 +235,6 @@ def search(self, searchpaths): path = os.path.join(base, self.relpath) if os.path.exists(path): return os.path.normpath(path) - else: - return None def resolve(self, searchpaths): if self.prefix is None and os.path.exists(self.abspath): @@ -203,7 +245,8 @@ def resolve(self, searchpaths): path = os.path.join(base, self.relpath) if os.path.exists(path): return RecentPath( - os.path.normpath(path), self.prefix, self.relpath) + os.path.normpath(path), self.prefix, self.relpath, + file_format=self.file_format) return None @property @@ -285,7 +328,7 @@ def _relocate_recent_files(self): search_paths = self._search_paths() rec = [] for recent in self.recent_paths: - kwargs = dict(title=recent.title, sheet=recent.sheet) + kwargs = dict(title=recent.title, sheet=recent.sheet, file_format=recent.file_format) resolved = recent.resolve(search_paths) if resolved is not None: rec.append( @@ -326,7 +369,7 @@ def select_file(self, n): def last_path(self): """Return the most recent absolute path or `None` if there is none""" - return self.recent_paths and self.recent_paths[0].abspath or None + return self.recent_paths[0].abspath if self.recent_paths else None class RecentPathsWComboMixin(RecentPathsWidgetMixin):