diff --git a/Orange/widgets/data/owrank.py b/Orange/widgets/data/owrank.py index 45e7b3be945..cbdb6a5ee52 100644 --- a/Orange/widgets/data/owrank.py +++ b/Orange/widgets/data/owrank.py @@ -6,52 +6,146 @@ """ -from collections import namedtuple +from collections import namedtuple, OrderedDict +import logging +from functools import partial +from itertools import chain import numpy as np from scipy.sparse import issparse +from AnyQt.QtGui import QFontMetrics from AnyQt.QtWidgets import ( - QTableView, QRadioButton, QButtonGroup, QGridLayout, QSizePolicy, - QStackedLayout, QStackedWidget, QWidget + QTableView, QRadioButton, QButtonGroup, QGridLayout, + QStackedWidget, QHeaderView, QCheckBox, QItemDelegate, ) -from AnyQt.QtGui import QStandardItemModel, QStandardItem from AnyQt.QtCore import ( Qt, QItemSelection, QItemSelectionRange, QItemSelectionModel, - QSortFilterProxyModel ) -from Orange.base import Learner from Orange.data import (Table, Domain, ContinuousVariable, DiscreteVariable, StringVariable) +from Orange.misc.cache import memoize_method from Orange.preprocess import score from Orange.canvas import report from Orange.widgets import gui from Orange.widgets.settings import (DomainContextHandler, Setting, ContextSetting) +from Orange.widgets.utils.itemmodels import PyTableModel from Orange.widgets.utils.sql import check_sql_input from Orange.widgets.widget import OWWidget, Msg, Input, Output -def table(shape, fill=None): - """ Return a 2D table with shape filed with ``fill`` - """ - return [[fill for j in range(shape[1])] for i in range(shape[0])] +log = logging.getLogger(__name__) -ScoreMeta = namedtuple("score_meta", ["name", "shortname", "score"]) +class ProblemType: + CLASSIFICATION, REGRESSION, UNSUPERVISED = range(3) + + @classmethod + def from_variable(cls, variable): + return (cls.CLASSIFICATION if isinstance(variable, DiscreteVariable) else + cls.REGRESSION if isinstance(variable, ContinuousVariable) else + cls.UNSUPERVISED) + +ScoreMeta = namedtuple("score_meta", ["name", "shortname", "scorer", 'problem_type', 'is_default']) # Default scores. -SCORES = [ScoreMeta("Information Gain", "Inf. gain", score.InfoGain), - ScoreMeta("Gain Ratio", "Gain Ratio", score.GainRatio), - ScoreMeta("Gini Decrease", "Gini", score.Gini), - ScoreMeta("ANOVA", "ANOVA", score.ANOVA), - ScoreMeta("Chi2", "Chi2", score.Chi2), - ScoreMeta("ReliefF", "ReliefF", score.ReliefF), - ScoreMeta("FCBF", "FCBF", score.FCBF), - ScoreMeta("Univariate Linear Regression", "Univar. Lin. Reg.", - score.UnivariateLinearRegression), - ScoreMeta("RReliefF", "RReliefF", score.RReliefF)] +CLS_SCORES = [ + ScoreMeta("Information Gain", "Info. gain", score.InfoGain, ProblemType.CLASSIFICATION, False), + ScoreMeta("Information Gain Ratio", "Gain ratio", score.GainRatio, ProblemType.CLASSIFICATION, True), + ScoreMeta("Gini Decrease", "Gini", score.Gini, ProblemType.CLASSIFICATION, True), + ScoreMeta("ANOVA", "ANOVA", score.ANOVA, ProblemType.CLASSIFICATION, False), + ScoreMeta("χ²", "χ²", score.Chi2, ProblemType.CLASSIFICATION, False), + ScoreMeta("ReliefF", "ReliefF", score.ReliefF, ProblemType.CLASSIFICATION, False), + ScoreMeta("FCBF", "FCBF", score.FCBF, ProblemType.CLASSIFICATION, False) +] +REG_SCORES = [ + ScoreMeta("Univariate Regression", "Univar. reg.", score.UnivariateLinearRegression, ProblemType.REGRESSION, True), + ScoreMeta("RReliefF", "RReliefF", score.RReliefF, ProblemType.REGRESSION, True) +] +SCORES = CLS_SCORES + REG_SCORES + + +class TableView(QTableView): + def __init__(self, parent=None, **kwargs): + super().__init__(parent=parent, + selectionBehavior=QTableView.SelectRows, + selectionMode=QTableView.ExtendedSelection, + sortingEnabled=True, + showGrid=False, + cornerButtonEnabled=False, + alternatingRowColors=True, + **kwargs) + self.setItemDelegate(gui.ColoredBarItemDelegate(self)) + self.setItemDelegateForColumn(0, QItemDelegate()) + + header = self.verticalHeader() + header.setSectionResizeMode(header.Fixed) + header.setFixedWidth(50) + header.setDefaultSectionSize(22) + header.setTextElideMode(Qt.ElideMiddle) # Note: https://bugreports.qt.io/browse/QTBUG-62091 + + header = self.horizontalHeader() + header.setSectionResizeMode(header.Fixed) + header.setFixedHeight(24) + header.setDefaultSectionSize(80) + header.setTextElideMode(Qt.ElideMiddle) + + def setVHeaderFixedWidthFromLabel(self, max_label): + header = self.verticalHeader() + width = QFontMetrics(header.font()).width(max_label) + header.setFixedWidth(min(width + 40, 400)) + + +class TableModel(PyTableModel): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._extremes = {} + + def data(self, index, role=Qt.DisplayRole, _isnan=np.isnan): + if role == gui.BarRatioRole and index.isValid(): + value = super().data(index, Qt.EditRole) + if not isinstance(value, float): + return None + vmin, vmax = self._extremes.get(index.column(), (-np.inf, np.inf)) + value = (value - vmin) / ((vmax - vmin) or 1) + return value + + if role == Qt.DisplayRole: + role = Qt.EditRole + + value = super().data(index, role) + + # Display nothing for non-existent attr value counts in the first column + if role == Qt.EditRole and index.column() == 0 and _isnan(value): + return '' + + return value + + def headerData(self, section, orientation, role=Qt.DisplayRole): + if role == Qt.InitialSortOrderRole: + return Qt.DescendingOrder + return super().headerData(section, orientation, role) + + def setExtremesFrom(self, column, values): + """Set extremes for columnn's ratio bars from values""" + try: + vmin = np.nanmin(values) + if np.isnan(vmin): + raise TypeError + except TypeError: + vmin, vmax = -np.inf, np.inf + else: + vmax = np.nanmax(values) + self._extremes[column] = (vmin, vmax) + + def resetSorting(self, yes_reset=False): + """We don't want to invalidate our sort proxy model everytime we + wrap a new list. Our proxymodel only invalidates explicitly + (i.e. when new data is set)""" + if yes_reset: + super().resetSorting() class OWRank(OWWidget): @@ -72,78 +166,79 @@ class Outputs: SelectNone, SelectAll, SelectManual, SelectNBest = range(4) - cls_default_selected = Setting({"Gain Ratio", "Gini Decrease"}) - reg_default_selected = Setting({"Univariate Linear Regression", "RReliefF"}) - selectMethod = Setting(SelectNBest) nSelected = Setting(5) auto_apply = Setting(True) - # Header state for discrete/continuous/no_class scores - headerState = Setting([None, None, None]) + sorting = Setting((0, Qt.DescendingOrder)) + selected_methods = Setting(set()) - settings_version = 1 + settings_version = 2 settingsHandler = DomainContextHandler() selected_rows = ContextSetting([]) + selectionMethod = ContextSetting(SelectNBest) - gain = inf_gain = gini = anova = chi2 = ulr = relief = rrelief = fcbc = True - _score_vars = ["gain", "inf_gain", "gini", "anova", "chi2", "relief", - "fcbc", "ulr", "rrelief"] - - class Warning(OWWidget.Warning): - no_target_var = Msg("Data does not have a target variable") + class Information(OWWidget.Information): + no_target_var = Msg("Data does not have a single target variable. " + "You can still connect in unsupervised scorers " + "such as PCA.") + missings_imputed = Msg('Missing values will be imputed as needed.') class Error(OWWidget.Error): invalid_type = Msg("Cannot handle target variable type {}") - inadequate_learner = Msg("{}") + inadequate_learner = Msg("Scorer {} inadequate: {}") def __init__(self): super().__init__() - self.measure_scores = None - self.update_scores = True - self.usefulAttributes = [] - self.learners = {} - self.labels = [] + self.scorers = OrderedDict() self.out_domain_desc = None - - self.all_measures = SCORES - - self.selectedMeasures = dict([(m.name, True) for m - in self.all_measures]) - # Discrete (0) or continuous (1) class mode - self.rankMode = 0 - self.data = None - - self.discMeasures = [m for m in self.all_measures if - issubclass(DiscreteVariable, m.score.class_type)] - self.contMeasures = [m for m in self.all_measures if - issubclass(ContinuousVariable, m.score.class_type)] - - self.score_checks = [] - self.cls_scoring_box = gui.vBox(None, "Scoring for Classification") - self.reg_scoring_box = gui.vBox(None, "Scoring for Regression") - boxes = [self.cls_scoring_box] * 7 + [self.reg_scoring_box] * 2 - for _score, var, box in zip(SCORES, self._score_vars, boxes): - check = gui.checkBox( - box, self, var, label=_score.name, - callback=lambda val=_score: self.measuresSelectionChanged(val)) - self.score_checks.append(check) - - self.score_stack = QStackedWidget(self) - self.score_stack.addWidget(self.cls_scoring_box) - self.score_stack.addWidget(self.reg_scoring_box) - self.score_stack.addWidget(QWidget()) - self.controlArea.layout().addWidget(self.score_stack) + self.problem_type_mode = ProblemType.CLASSIFICATION + + if not self.selected_methods: + self.selected_methods = {method.name for method in SCORES + if method.is_default} + + # GUI + + self.ranksModel = model = TableModel(parent=self) # type: TableModel + self.ranksView = view = TableView(self) # type: TableView + self.mainArea.layout().addWidget(view) + view.setModel(model) + view.setColumnWidth(0, 30) + view.selectionModel().selectionChanged.connect(self.commit) + + def _set_select_manual(): + self.setSelectionMethod(OWRank.SelectManual) + + view.pressed.connect(_set_select_manual) + view.verticalHeader().sectionClicked.connect(_set_select_manual) + view.horizontalHeader().sectionClicked.connect(self.headerClick) + + self.measuresStack = stacked = QStackedWidget(self) + self.controlArea.layout().addWidget(stacked) + + for scoring_methods in (CLS_SCORES, + REG_SCORES, + []): + box = gui.vBox(None, "Scoring Methods" if scoring_methods else None) + stacked.addWidget(box) + for method in scoring_methods: + box.layout().addWidget(QCheckBox( + method.name, self, + objectName=method.shortname, # To be easily found in tests + checked=method.name in self.selected_methods, + stateChanged=partial(self.methodSelectionChanged, method_name=method.name))) + gui.rubber(box) gui.rubber(self.controlArea) + self.switchProblemType(ProblemType.CLASSIFICATION) - selMethBox = gui.vBox( - self.controlArea, "Select Attributes", addSpace=True) + selMethBox = gui.vBox(self.controlArea, "Select Attributes", addSpace=True) grid = QGridLayout() grid.setContentsMargins(6, 0, 6, 0) self.selectButtons = QButtonGroup() - self.selectButtons.buttonClicked[int].connect(self.setSelectMethod) + self.selectButtons.buttonClicked[int].connect(self.setSelectionMethod) def button(text, buttonid, toolTip=None): b = QRadioButton(text) @@ -158,7 +253,7 @@ def button(text, buttonid, toolTip=None): b4 = button(self.tr("Best ranked:"), OWRank.SelectNBest) s = gui.spin(selMethBox, self, "nSelected", 1, 100, - callback=self.nSelectedChanged) + callback=lambda: self.setSelectionMethod(OWRank.SelectNBest)) grid.addWidget(b1, 0, 0) grid.addWidget(b2, 1, 0) @@ -166,405 +261,196 @@ def button(text, buttonid, toolTip=None): grid.addWidget(b4, 3, 0) grid.addWidget(s, 3, 1) - self.selectButtons.button(self.selectMethod).setChecked(True) + self.selectButtons.button(self.selectionMethod).setChecked(True) selMethBox.layout().addLayout(grid) gui.auto_commit(selMethBox, self, "auto_apply", "Send", box=False) - # Discrete, continuous and no_class table views are stacked - self.ranksViewStack = QStackedLayout() - self.mainArea.layout().addLayout(self.ranksViewStack) - - self.discRanksView = QTableView() - self.ranksViewStack.addWidget(self.discRanksView) - self.discRanksView.setSelectionBehavior(QTableView.SelectRows) - self.discRanksView.setSelectionMode(QTableView.MultiSelection) - self.discRanksView.setSortingEnabled(True) - - self.discRanksLabels = ["#"] + [m.shortname for m in self.discMeasures] - self.discRanksModel = QStandardItemModel(self) - self.discRanksModel.setHorizontalHeaderLabels(self.discRanksLabels) - - self.discRanksProxyModel = MySortProxyModel(self) - self.discRanksProxyModel.setSourceModel(self.discRanksModel) - self.discRanksView.setModel(self.discRanksProxyModel) - - self.discRanksView.setColumnWidth(0, 20) - self.discRanksView.selectionModel().selectionChanged.connect( - self.commit - ) - self.discRanksView.pressed.connect(self.onSelectItem) - self.discRanksView.horizontalHeader().sectionClicked.connect( - self.headerClick - ) - self.discRanksView.verticalHeader().sectionClicked.connect( - self.onSelectItem - ) - - if self.headerState[0] is not None: - self.discRanksView.horizontalHeader().restoreState( - self.headerState[0]) - - self.contRanksView = QTableView() - self.ranksViewStack.addWidget(self.contRanksView) - self.contRanksView.setSelectionBehavior(QTableView.SelectRows) - self.contRanksView.setSelectionMode(QTableView.MultiSelection) - self.contRanksView.setSortingEnabled(True) - - self.contRanksLabels = ["#"] + [m.shortname for m in self.contMeasures] - self.contRanksModel = QStandardItemModel(self) - self.contRanksModel.setHorizontalHeaderLabels(self.contRanksLabels) - - self.contRanksProxyModel = MySortProxyModel(self) - self.contRanksProxyModel.setSourceModel(self.contRanksModel) - self.contRanksView.setModel(self.contRanksProxyModel) - - self.contRanksView.setColumnWidth(0, 20) - self.contRanksView.selectionModel().selectionChanged.connect( - self.commit - ) - self.contRanksView.pressed.connect(self.onSelectItem) - self.contRanksView.horizontalHeader().sectionClicked.connect( - self.headerClick - ) - self.contRanksView.verticalHeader().sectionClicked.connect( - self.onSelectItem - ) - - if self.headerState[1] is not None: - self.contRanksView.horizontalHeader().restoreState( - self.headerState[1]) - - self.noClassRanksView = QTableView() - self.ranksViewStack.addWidget(self.noClassRanksView) - self.noClassRanksView.setSelectionBehavior(QTableView.SelectRows) - self.noClassRanksView.setSelectionMode(QTableView.MultiSelection) - self.noClassRanksView.setSortingEnabled(True) - - self.noClassRanksLabels = ["#"] - self.noClassRanksModel = QStandardItemModel(self) - self.noClassRanksModel.setHorizontalHeaderLabels(self.noClassRanksLabels) - - self.noClassRanksProxyModel = MySortProxyModel(self) - self.noClassRanksProxyModel.setSourceModel(self.noClassRanksModel) - self.noClassRanksView.setModel(self.noClassRanksProxyModel) - - self.noClassRanksView.setColumnWidth(0, 20) - self.noClassRanksView.selectionModel().selectionChanged.connect( - self.commit - ) - self.noClassRanksView.pressed.connect(self.onSelectItem) - self.noClassRanksView.horizontalHeader().sectionClicked.connect( - self.headerClick - ) - self.noClassRanksView.verticalHeader().sectionClicked.connect( - self.onSelectItem - ) - - if self.headerState[2] is not None: - self.noClassRanksView.horizontalHeader().restoreState( - self.headerState[2]) - - # Switch the current view to Discrete - self.switchRanksMode(0) - self.resetInternals() - self.updateDelegates() - self.updateVisibleScoreColumns() - self.resize(690, 500) - self.measure_scores = table((len(self.measures), 0), None) - - def switchRanksMode(self, index): + def switchProblemType(self, index): """ Switch between discrete/continuous/no_class mode """ - self.rankMode = index - self.ranksViewStack.setCurrentIndex(index) - - if index == 0: - self.ranksView = self.discRanksView - self.ranksModel = self.discRanksModel - self.ranksProxyModel = self.discRanksProxyModel - self.measures = self.discMeasures - self.selected_checks = self.cls_default_selected - self.reg_scoring_box.setSizePolicy(QSizePolicy.Ignored, - QSizePolicy.Ignored) - self.cls_scoring_box.setSizePolicy(QSizePolicy.Expanding, - QSizePolicy.Expanding) - elif index == 1: - self.ranksView = self.contRanksView - self.ranksModel = self.contRanksModel - self.ranksProxyModel = self.contRanksProxyModel - self.measures = self.contMeasures - self.selected_checks = self.reg_default_selected - self.cls_scoring_box.setSizePolicy(QSizePolicy.Ignored, - QSizePolicy.Ignored) - self.reg_scoring_box.setSizePolicy(QSizePolicy.Expanding, - QSizePolicy.Expanding) - else: - self.ranksView = self.noClassRanksView - self.ranksModel = self.noClassRanksModel - self.ranksProxyModel = self.noClassRanksProxyModel - self.measures = [] - self.selected_checks = set() - self.reg_scoring_box.setSizePolicy(QSizePolicy.Ignored, - QSizePolicy.Ignored) - self.cls_scoring_box.setSizePolicy(QSizePolicy.Ignored, - QSizePolicy.Ignored) - - shape = (len(self.measures) + len(self.learners), 0) - self.measure_scores = table(shape, None) - self.update_scores = False - for check, score in zip(self.score_checks, SCORES): - check.setChecked(score.name in self.selected_checks) - self.update_scores = True - self.score_stack.setCurrentIndex(index) - self.updateVisibleScoreColumns() + self.measuresStack.setCurrentIndex(index) + self.problem_type_mode = index @Inputs.data @check_sql_input def setData(self, data): self.closeContext() - self.clear_messages() - self.resetInternals() + self.selected_rows = [] + self.ranksModel.clear() + self.ranksModel.resetSorting(True) + + self.get_method_scores.cache_clear() + self.get_scorer_scores.cache_clear() + + self.Error.clear() + self.Information.clear() + self.Information.missings_imputed( + shown=data is not None and data.has_missing()) self.data = data - self.switchRanksMode(0) + self.switchProblemType(ProblemType.CLASSIFICATION) if self.data is not None: domain = self.data.domain - attrs = domain.attributes - self.usefulAttributes = [attr for attr in attrs - if attr.is_discrete or attr.is_continuous] - if domain.has_continuous_class: - self.switchRanksMode(1) + if domain.has_discrete_class: + problem_type = ProblemType.CLASSIFICATION + elif domain.has_continuous_class: + problem_type = ProblemType.REGRESSION elif not domain.class_var: - self.Warning.no_target_var() - self.switchRanksMode(2) - elif not domain.has_discrete_class: + self.Information.no_target_var() + problem_type = ProblemType.UNSUPERVISED + else: + # This can happen? self.Error.invalid_type(type(domain.class_var).__name__) + problem_type = None - if issparse(self.data.X): # keep only measures supporting sparse data - self.measures = [m for m in self.measures - if m.score.supports_sparse_data] - - self.ranksModel.setRowCount(len(attrs)) - for i, a in enumerate(attrs): - if a.is_discrete: - v = len(a.values) - else: - v = "C" - item = ScoreValueItem() - item.setData(v, Qt.DisplayRole) - self.ranksModel.setItem(i, 0, item) - item = QStandardItem(a.name) - item.setData(gui.attributeIconDict[a], Qt.DecorationRole) - self.ranksModel.setVerticalHeaderItem(i, item) - - shape = (len(self.measures) + len(self.learners), len(attrs)) - self.measure_scores = table(shape, None) - self.updateScores() - else: - self.Outputs.scores.send(None) - - self.selected_rows = [] - self.openContext(data) - self.selectMethodChanged() - self.commit() + if problem_type is not None: + self.switchProblemType(problem_type) - def get_selection(self): - selection = self.ranksView.selectionModel().selection() - return list(set(ind.row() for ind in selection.indexes())) + self.ranksModel.setVerticalHeaderLabels(domain.attributes) + self.ranksView.setVHeaderFixedWidthFromLabel( + max((a.name for a in domain.attributes), key=len)) - @Inputs.scorer - def set_learner(self, learner, lid=None): - if learner is None and lid is not None: - del self.learners[lid] - elif learner is not None: - self.learners[lid] = ScoreMeta( - learner.name, - learner.name, - learner - ) - attrs_len = 0 if not self.data else len(self.data.domain.attributes) - shape = (len(self.learners), attrs_len) - self.measure_scores = self.measure_scores[:len(self.measures)] - self.measure_scores += table(shape, None) - self.contRanksModel.setHorizontalHeaderLabels(self.contRanksLabels) - self.discRanksModel.setHorizontalHeaderLabels(self.discRanksLabels) - self.noClassRanksModel.setHorizontalHeaderLabels( - self.noClassRanksLabels) - measures_mask = [False] * len(self.measures) - measures_mask += [True for _ in self.learners] - self.updateScores(measures_mask) - self.commit() + self.selectionMethod = OWRank.SelectNBest - def updateScores(self, measuresMask=None): - """ - Update the current computed scores. + self.openContext(data) - If `measuresMask` is given it must be an list of bool values - indicating what measures should be recomputed. + def handleNewSignals(self): + self.setStatusMessage('Running') + self.updateScores() + self.setStatusMessage('') + self.commit() - """ - if not self.data: + @Inputs.scorer + def set_learner(self, scorer, id): + if scorer is None: + self.scorers.pop(id, None) + else: + # Avoid caching a (possibly stale) previous instance of the same + # Scorer passed via the same signal + if id in self.scorers: + self.get_scorer_scores.cache_clear() + + self.scorers[id] = ScoreMeta(scorer.name, scorer.name, scorer, + ProblemType.from_variable(scorer.class_type), + False) + + @memoize_method() + def get_method_scores(self, method): + estimator = method.scorer() + data = self.data + try: + scores = np.asarray(estimator(data)) + except ValueError: + log.warning("Scorer %s wasn't able to compute all scores at once", + method.name) + try: + scores = np.array([estimator(data, attr) + for attr in data.domain.attributes]) + except ValueError: + log.error( + "Scorer %s wasn't able to compute scores at all", + method.name) + scores = np.full(len(data.domain.attributes), np.nan) + return scores + + @memoize_method() + def get_scorer_scores(self, scorer): + try: + scores = scorer.scorer.score_data(self.data).T + except ValueError: + log.error( + "Scorer %s wasn't able to compute scores at all", + scorer.name) + scores = np.full((len(self.data.domain.attributes), 1), np.nan) + + labels = ((scorer.shortname,) + if scores.shape[1] == 1 else + tuple(scorer.shortname + '_' + str(i) + for i in range(1, 1 + scores.shape[1]))) + return scores, labels + + def updateScores(self): + if self.data is None: + self.ranksModel.clear() + self.Outputs.scores.send(None) return - if self.data.has_missing(): - self.information("Missing values have been imputed.") - measures = self.measures + [v for k, v in self.learners.items()] - if measuresMask is None: - # Update all selected measures - measuresMask = [self.selectedMeasures.get(m.name) - for m in self.measures] - measuresMask = measuresMask + [v.name for k, v in - self.learners.items()] - - data = self.data - learner_col = len(self.measures) - if len(measuresMask) <= len(self.measures) or \ - measuresMask[len(self.measures)]: - self.labels = [] - self.Error.inadequate_learner.clear() - - self.setStatusMessage("Running") - with self.progressBar(): - n_measure_update = len([x for x in measuresMask if x is not False]) - count = 0 - for index, (meas, mask) in enumerate(zip(measures, measuresMask)): - if not mask: - continue - self.progressBarSet(90 * count / n_measure_update) - count += 1 - if index < len(self.measures): - estimator = meas.score() - try: - self.measure_scores[index] = estimator(data) - except ValueError: - self.measure_scores[index] = [] - for attr in data.domain.attributes: - try: - self.measure_scores[index].append( - estimator(data, attr)) - except ValueError: - self.measure_scores[index].append(None) - else: - learner = meas.score - if isinstance(learner, Learner) and \ - not learner.check_learner_adequacy(self.data.domain): - self.Error.inadequate_learner( - learner.learner_adequacy_err_msg) - scores = table((1, len(data.domain.attributes))) - else: - scores = meas.score.score_data(data) - for i, row in enumerate(scores): - self.labels.append(meas.shortname + str(i + 1)) - if len(self.measure_scores) > learner_col: - self.measure_scores[learner_col] = row - else: - self.measure_scores.append(row) - learner_col += 1 - self.progressBarSet(90) - self.contRanksModel.setHorizontalHeaderLabels( - self.contRanksLabels + self.labels - ) - self.discRanksModel.setHorizontalHeaderLabels( - self.discRanksLabels + self.labels + methods = [method + for method in SCORES + if (method.name in self.selected_methods and + method.problem_type == self.problem_type_mode and + (not issparse(self.data.X) or + method.scorer.supports_sparse_data))] + + scorers = [] + self.Error.inadequate_learner.clear() + for scorer in self.scorers.values(): + if scorer.problem_type in (self.problem_type_mode, ProblemType.UNSUPERVISED): + scorers.append(scorer) + else: + self.Error.inadequate_learner(scorer.name, scorer.learner_adequacy_err_msg) + + method_scores = tuple(self.get_method_scores(method) + for method in methods) + + scorer_scores, scorer_labels = (), () + if scorers: + scorer_scores, scorer_labels = zip(*(self.get_scorer_scores(scorer) + for scorer in scorers)) + scorer_labels = tuple(chain.from_iterable(scorer_labels)) + + labels = tuple(method.shortname for method in methods) + scorer_labels + model_array = np.column_stack( + ([len(a.values) if a.is_discrete else np.nan + for a in self.data.domain.attributes],) + + (method_scores if method_scores else ()) + + (scorer_scores if scorer_scores else ()) ) - self.noClassRanksModel.setHorizontalHeaderLabels( - self.noClassRanksLabels + self.labels - ) - self.updateRankModel(measuresMask) - self.ranksProxyModel.invalidate() - self.selectMethodChanged() - self.Outputs.scores.send(self.create_scores_table(self.labels)) - self.setStatusMessage("") - - def updateRankModel(self, measuresMask): - """ - Update the rankModel. - """ - values = [] - diff = len(self.measure_scores) - len(measuresMask) - if len(measuresMask): - measuresMask += [measuresMask[-1]] * diff - for i in range(self.ranksModel.columnCount() - 1, - len(self.measure_scores), -1): - self.ranksModel.removeColumn(i) - - for i, (scores, m) in enumerate(zip(self.measure_scores, measuresMask)): - if not m and self.ranksModel.item(0, i + 1): - values.append([]) - continue - values_one = [] - for j, _score in enumerate(scores): - values_one.append(_score) - item = self.ranksModel.item(j, i + 1) - if not item: - item = ScoreValueItem() - self.ranksModel.setItem(j, i + 1, item) - item.setData(_score, Qt.DisplayRole) - values.append(values_one) - for i, (vals, m) in enumerate(zip(values, measuresMask)): - if not m: - continue - valid_vals = [v for v in vals if v is not None] - if valid_vals: - vmin, vmax = min(valid_vals), max(valid_vals) - for j, v in enumerate(vals): - if v is not None: - # Set the bar ratio role for i-th measure. - ratio = float((v - vmin) / ((vmax - vmin) or 1)) - item = self.ranksModel.item(j, i + 1) - item.setData(ratio, gui.BarRatioRole) - - self.ranksView.setColumnWidth(0, 20) - self.ranksView.resizeRowsToContents() - - def resetInternals(self): - self.data = None - self.usefulAttributes = [] - self.ranksModel.setRowCount(0) + for column, values in enumerate(model_array.T): + self.ranksModel.setExtremesFrom(column, values) - def onSelectItem(self, index): - """ - Called when the user selects/unselects an item in the table view. - """ - self.selectMethod = OWRank.SelectManual # Manual - self.selectButtons.button(self.selectMethod).setChecked(True) - self.commit() + self.ranksModel.wrap(model_array.tolist()) + self.ranksModel.setHorizontalHeaderLabels(('#',) + labels) + self.ranksView.setColumnWidth(0, 30) - def setSelectMethod(self, method): - if self.selectMethod != method: - self.selectMethod = method - self.selectButtons.button(method).setChecked(True) - self.selectMethodChanged() + # Re-apply sort + try: + sort_column, sort_order = self.sorting + if sort_column < len(labels): + self.ranksModel.sort(sort_column + 1, sort_order) # +1 for '#' (discrete count) column + except ValueError: + pass - def selectMethodChanged(self): self.autoSelection() - self.ranksView.setFocus() + self.Outputs.scores.send(self.create_scores_table(labels)) - def nSelectedChanged(self): - self.selectMethod = OWRank.SelectNBest - self.selectButtons.button(self.selectMethod).setChecked(True) - self.selectMethodChanged() + def setSelectionMethod(self, method): + if self.selectionMethod != method: + self.selectionMethod = method + self.selectButtons.button(method).setChecked(True) + self.autoSelection() + self.commit() def autoSelection(self): selModel = self.ranksView.selectionModel() - rowCount = self.ranksModel.rowCount() - columnCount = self.ranksModel.columnCount() - model = self.ranksProxyModel + model = self.ranksModel + rowCount = model.rowCount() + columnCount = model.columnCount() - if self.selectMethod == OWRank.SelectNone: + if self.selectionMethod == OWRank.SelectNone: selection = QItemSelection() - elif self.selectMethod == OWRank.SelectAll: + elif self.selectionMethod == OWRank.SelectAll: selection = QItemSelection( model.index(0, 0), model.index(rowCount - 1, columnCount - 1) ) - elif self.selectMethod == OWRank.SelectNBest: + elif self.selectionMethod == OWRank.SelectNBest: nSelected = min(self.nSelected, rowCount) selection = QItemSelection( model.index(0, 0), @@ -573,71 +459,29 @@ def autoSelection(self): else: selection = QItemSelection() if len(self.selected_rows): - selection = QItemSelection() - for row in self.selected_rows: + for row in model.mapFromSourceRows(self.selected_rows): selection.append(QItemSelectionRange( model.index(row, 0), model.index(row, columnCount - 1))) selModel.select(selection, QItemSelectionModel.ClearAndSelect) def headerClick(self, index): - if index >= 1 and self.selectMethod == OWRank.SelectNBest: + if index >= 1 and self.selectionMethod == OWRank.SelectNBest: # Reselect the top ranked attributes self.autoSelection() # Store the header states - disc = bytes(self.discRanksView.horizontalHeader().saveState()) - cont = bytes(self.contRanksView.horizontalHeader().saveState()) - no_class = bytes(self.noClassRanksView.horizontalHeader().saveState()) - self.headerState = [disc, cont, no_class] + sort_order = self.ranksModel.sortOrder() + sort_column = self.ranksModel.sortColumn() - 1 # -1 for '#' (discrete count) column + self.sorting = (sort_column, sort_order) - def measuresSelectionChanged(self, measure): - """Measure selection has changed. Update column visibility. - """ - checked = self.selectedMeasures[measure.name] - self.selectedMeasures[measure.name] = not checked - if not checked: - self.selected_checks.add(measure.name) - elif measure.name in self.selected_checks: - self.selected_checks.remove(measure.name) - measures_mask = [False] * len(self.measures) - measures_mask += [False for _ in self.learners] - # Update scores for shown column if they are not yet computed. - if measure in self.measures and self.measure_scores: - index = self.measures.index(measure) - if all(s is None for s in self.measure_scores[index]): - measures_mask[index] = True - if self.update_scores: - self.updateScores(measures_mask) - self.updateVisibleScoreColumns() - - def updateVisibleScoreColumns(self): - """ - Update the visible columns of the scores view. - """ - for i, measure in enumerate(self.measures): - shown = self.selectedMeasures.get(measure.name) - self.ranksView.setColumnHidden(i + 1, not shown) - self.ranksView.setColumnWidth(i + 1, 100) - - index = self.ranksView.horizontalHeader().sortIndicatorSection() - if self.ranksView.isColumnHidden(index): - self.headerState[self.rankMode] = None - - if self.headerState[self.rankMode] is None: - def get_sort_by_col(measures, selected_measures): - cols = [i + 1 for i, m in enumerate(measures) if - m.name in selected_measures] - return cols[0] if cols else len(measures) + 1 - - col = get_sort_by_col(self.measures, self.selected_checks) - self.ranksView.sortByColumn(col, Qt.DescendingOrder) - self.autoSelection() + def methodSelectionChanged(self, state, method_name): + if state == Qt.Checked: + self.selected_methods.add(method_name) + elif method_name in self.selected_methods: + self.selected_methods.remove(method_name) - def updateDelegates(self): - self.contRanksView.setItemDelegate(gui.ColoredBarItemDelegate(self)) - self.discRanksView.setItemDelegate(gui.ColoredBarItemDelegate(self)) - self.noClassRanksView.setItemDelegate(gui.ColoredBarItemDelegate(self)) + self.updateScores() def send_report(self): if not self.data: @@ -648,46 +492,41 @@ def send_report(self): self.report_items("Output", self.out_domain_desc) def commit(self): - self.selected_rows = self.get_selection() - if self.data and len(self.data.domain.attributes) == len( - self.selected_rows): - self.selectMethod = OWRank.SelectAll - self.selectButtons.button(self.selectMethod).setChecked(True) - selected = self.selectedAttrs() - if not self.data or not selected: + # Save indices of attributes in the original, unsorted domain + self.selected_rows = self.ranksModel.mapToSourceRows([ + i.row() for i in self.ranksView.selectionModel().selectedRows(0)]) + + selected_attrs = [] + if self.data is not None: + attributes = self.data.domain.attributes + if len(attributes) == len(self.selected_rows): + self.selectionMethod = OWRank.SelectAll + self.selectButtons.button(self.selectionMethod).setChecked(True) + selected_attrs = [attributes[i] + for i in self.selected_rows] + + if self.data is None or not selected_attrs: self.Outputs.reduced_data.send(None) self.out_domain_desc = None else: reduced_domain = Domain( - selected, self.data.domain.class_var, self.data.domain.metas) + selected_attrs, self.data.domain.class_var, self.data.domain.metas) data = self.data.transform(reduced_domain) self.Outputs.reduced_data.send(data) self.out_domain_desc = report.describe_domain(data.domain) - def selectedAttrs(self): - if self.data: - inds = self.ranksView.selectionModel().selectedRows(0) - source = self.ranksProxyModel.mapToSource - inds = map(source, inds) - inds = [ind.row() for ind in inds] - return [self.data.domain.attributes[i] for i in inds] - else: - return [] - def create_scores_table(self, labels): - indices = [i for i, m in enumerate(self.measures) - if self.selectedMeasures.get(m.name, False)] - measures = [s.name for s in self.measures if - self.selectedMeasures.get(s.name, False)] - measures += [label for label in labels] - if not measures: + model_list = self.ranksModel.tolist() + if not model_list or len(model_list[0]) == 1: # Empty or just n_values column return None - features = [ContinuousVariable(s) for s in measures] - metas = [StringVariable("Feature name")] - domain = Domain(features, metas=metas) - scores = np.nan_to_num(np.array([row for i, row in enumerate(self.measure_scores) - if i in indices or i >= len(self.measures)], dtype=np.float64).T) + domain = Domain([ContinuousVariable(label) for label in labels], + metas=[StringVariable("Feature")]) + + # Prevent np.inf scores + finfo = np.finfo(np.float64) + scores = np.clip(np.array(model_list)[:, 1:], finfo.min, finfo.max) + feature_names = np.array([a.name for a in self.data.domain.attributes]) # Reshape to 2d array as Table does not like 1d arrays feature_names = feature_names[:, None] @@ -698,56 +537,29 @@ def create_scores_table(self, labels): @classmethod def migrate_settings(cls, settings, version): - if not version: - # Before fc5caa1e1d716607f1f5c4e0b0be265c23280fa0 - # headerState had length 2 - headerState = settings.get("headerState", None) - if headerState is not None and \ - isinstance(headerState, tuple) and \ - len(headerState) < 3: - headerState = (list(headerState) + [None] * 3)[:3] - settings["headerState"] = headerState - - -class ScoreValueItem(QStandardItem): - """A StandardItem subclass for python objects. - """ - def __init__(self, *args): - super().__init__(*args) - self.setFlags(Qt.ItemIsSelectable | Qt.ItemIsEnabled) - - def __lt__(self, other): - model = self.model() - if model is not None: - role = model.sortRole() - else: - role = Qt.DisplayRole - my = self.data(role) - other = other.data(role) - if my is None: - return True - return my < other - - -class MySortProxyModel(QSortFilterProxyModel): - - @staticmethod - def comparable(val): - return val is not None and \ - (isinstance(val, str) or float('-inf') < val < float('inf')) - - def lessThan(self, left, right): - role = self.sortRole() - left_data = left.data(role) - if not self.comparable(left_data): - left_data = float('-inf') - right_data = right.data(role) - if not self.comparable(right_data): - right_data = float('-inf') - try: - return left_data < right_data - except TypeError: - return str(left_data) < str(right_data) + # If older settings, restore sort header to default + # Saved selected_rows will likely be incorrect + if version is None or version < 2: + column, order = 0, Qt.DescendingOrder + headerState = settings.pop("headerState") + + # Lacking knowledge of last problemType, use discrete ranks view's ordering + if isinstance(headerState, (tuple, list)): + headerState = headerState[0] + + if isinstance(headerState, bytes): + hview = QHeaderView(Qt.Horizontal) + hview.restoreState(headerState) + column, order = hview.sortIndicatorSection() - 1, hview.sortIndicatorOrder() + settings["sorting"] = (column, order) + + @classmethod + def migrate_context(cls, context, version): + if version is None or version < 2: + # Old selection was saved as sorted indices. New selection is original indices. + # Since we can't devise the latter without first computing the ranks, + # just reset the selection to avoid confusion. + context.values['selected_rows'] = [] if __name__ == "__main__": @@ -755,9 +567,8 @@ def lessThan(self, left, right): from Orange.classification import RandomForestLearner a = QApplication([]) ow = OWRank() - ow.setData(Table("heart_disease.tab")) ow.set_learner(RandomForestLearner(), (3, 'Learner', None)) - ow.commit() + ow.setData(Table("heart_disease.tab")) ow.show() a.exec_() ow.saveSettings() diff --git a/Orange/widgets/data/owtable.py b/Orange/widgets/data/owtable.py index 43c25d52c26..2b4e17c5d0c 100644 --- a/Orange/widgets/data/owtable.py +++ b/Orange/widgets/data/owtable.py @@ -775,9 +775,12 @@ def get_selection(self, view): indexes = selection.indexes() - rows = list(set(ind.row() for ind in indexes)) + rows = numpy.unique([ind.row() for ind in indexes]) # map the rows through the applied sorting (if any) - rows = sorted(model.mapToTableRows(rows)) + rows = model.mapToSourceRows(rows) + rows.sort() + rows = rows.tolist() + cols = sorted(set(ind.column() for ind in indexes)) return rows, cols diff --git a/Orange/widgets/data/tests/test_owrank.py b/Orange/widgets/data/tests/test_owrank.py index 55e2b6233f4..72fcdb1f9d2 100644 --- a/Orange/widgets/data/tests/test_owrank.py +++ b/Orange/widgets/data/tests/test_owrank.py @@ -6,21 +6,25 @@ from Orange.classification import LogisticRegressionLearner from Orange.regression import LinearRegressionLearner from Orange.projection import PCA -from Orange.widgets.data.owrank import OWRank +from Orange.widgets.data.owrank import OWRank, ProblemType, CLS_SCORES, REG_SCORES from Orange.widgets.tests.base import WidgetTest from AnyQt.QtCore import Qt +from AnyQt.QtWidgets import QCheckBox class TestOWRank(WidgetTest): def setUp(self): - self.widget = self.create_widget(OWRank) + self.widget = self.create_widget(OWRank) # type: OWRank self.iris = Table("iris") self.housing = Table("housing") self.log_reg = LogisticRegressionLearner() self.lin_reg = LinearRegressionLearner() self.pca = PCA() + def _get_checkbox(self, method_shortname): + return self.widget.controlArea.findChild(QCheckBox, method_shortname) + def test_input_data(self): """Check widget's data with data on the input""" self.assertEqual(self.widget.data, None) @@ -36,23 +40,25 @@ def test_input_data_disconnect(self): def test_input_scorer(self): """Check widget's scorer with scorer on the input""" - self.assertEqual(self.widget.learners, {}) + self.assertEqual(self.widget.scorers, {}) self.send_signal(self.widget.Inputs.scorer, self.log_reg, 1) - value = self.widget.learners[1] - self.assertEqual(self.log_reg, value.score) - self.assertIsInstance(value.score, Scorer) + value = self.widget.scorers[1] + self.assertEqual(self.log_reg, value.scorer) + self.assertIsInstance(value.scorer, Scorer) def test_input_scorer_fitter(self): heart_disease = Table('heart_disease') - self.assertEqual(self.widget.learners, {}) + self.assertEqual(self.widget.scorers, {}) + + model = self.widget.ranksModel for fitter, name in ((RandomForestLearner(), 'random forest'), (SGDLearner(), 'sgd')): with self.subTest(fitter=fitter): self.send_signal("Scorer", fitter, 1) - for data, model in ((self.housing, self.widget.contRanksModel), - (heart_disease, self.widget.discRanksModel)): + for data in (self.housing, + heart_disease): with self.subTest(data=data.name): self.send_signal('Data', data) scores = [model.data(model.index(row, model.columnCount() - 1)) @@ -65,14 +71,14 @@ def test_input_scorer_fitter(self): self.assertIn(name, last_column) self.send_signal("Scorer", None, 1) - self.assertEqual(self.widget.learners, {}) + self.assertEqual(self.widget.scorers, {}) def test_input_scorer_disconnect(self): """Check widget's scorer after disconnecting scorer on the input""" self.send_signal(self.widget.Inputs.scorer, self.log_reg, 1) - self.assertEqual(len(self.widget.learners), 1) + self.assertEqual(len(self.widget.scorers), 1) self.send_signal(self.widget.Inputs.scorer, None, 1) - self.assertEqual(self.widget.learners, {}) + self.assertEqual(self.widget.scorers, {}) def test_output_data(self): """Check data on the output after apply""" @@ -101,43 +107,37 @@ def test_output_scores_with_scorer(self): self.assertIsInstance(output, Table) self.assertEqual(output.X.shape, (len(self.iris.domain.attributes), 5)) - def test_scoring_method_check_box(self): + def test_scoring_method_problem_type(self): """Check scoring methods check boxes""" - boxes = [self.widget.cls_scoring_box] * 7 + \ - [self.widget.reg_scoring_box] * 2 - for check_box, box in zip(self.widget.score_checks, boxes): - self.assertEqual(check_box.parent(), box) self.send_signal(self.widget.Inputs.data, self.iris) - self.assertEqual(self.widget.score_stack.currentWidget(), boxes[0]) + self.assertEqual(self.widget.problem_type_mode, ProblemType.CLASSIFICATION) + self.assertEqual(self.widget.measuresStack.currentIndex(), ProblemType.CLASSIFICATION) + self.send_signal(self.widget.Inputs.data, self.housing) - self.assertEqual(self.widget.score_stack.currentWidget(), boxes[7]) + self.assertEqual(self.widget.problem_type_mode, ProblemType.REGRESSION) + self.assertEqual(self.widget.measuresStack.currentIndex(), ProblemType.REGRESSION) + data = Table.from_table(Domain(self.iris.domain.variables), self.iris) self.send_signal(self.widget.Inputs.data, data) - self.assertNotIn(self.widget.score_stack.currentWidget(), boxes) + self.assertEqual(self.widget.problem_type_mode, ProblemType.UNSUPERVISED) + self.assertEqual(self.widget.measuresStack.currentIndex(), ProblemType.UNSUPERVISED) - def test_scoring_method_default(self): - """Check selected scoring methods with no data on the input""" + def test_scoring_method_defaults(self): + """Check default scoring methods are selected""" self.send_signal(self.widget.Inputs.data, None) - check_score = (False, True, True, False, False, False, False, False, - False) - for check_box, checked in zip(self.widget.score_checks, check_score): - self.assertEqual(check_box.isChecked(), checked) - - def test_scoring_method_classification(self): - """Check selected scoring methods with classification data on the input""" - self.send_signal(self.widget.Inputs.data, self.iris) - check_score = (False, True, True, False, False, False, False, False, - False) - for check_box, checked in zip(self.widget.score_checks, check_score): - self.assertEqual(check_box.isChecked(), checked) + for method in CLS_SCORES: + checkbox = self._get_checkbox(method.shortname) + self.assertEqual(checkbox.isChecked(), method.is_default) - def test_scoring_method_regression(self): - """Check selected scoring methods with regression data on the input""" self.send_signal(self.widget.Inputs.data, self.housing) - check_score = (False, False, False, False, False, False, False, - True, True) - for check_box, checked in zip(self.widget.score_checks, check_score): - self.assertEqual(check_box.isChecked(), checked) + for method in REG_SCORES: + checkbox = self._get_checkbox(method.shortname) + self.assertEqual(checkbox.isChecked(), method.is_default) + + self.send_signal(self.widget.Inputs.data, self.iris) + for method in CLS_SCORES: + checkbox = self._get_checkbox(method.shortname) + self.assertEqual(checkbox.isChecked(), method.is_default) def test_cls_scorer_reg_data(self): """Check scores on the output with inadequate scorer""" @@ -155,25 +155,16 @@ def test_reg_scorer_cls_data(self): self.assertEqual(self.get_output(self.widget.Outputs.scores).X.shape, (len(self.iris.domain.attributes), 7)) - def test_scoring_method_visible(self): - """Check which scoring box is visible according to data""" - self.send_signal(self.widget.Inputs.data, self.iris) - self.assertEqual(self.widget.score_stack.currentIndex(), 0) - self.send_signal(self.widget.Inputs.data, self.housing) - self.assertEqual(self.widget.score_stack.currentIndex(), 1) - self.send_signal(self.widget.Inputs.data, None) - self.assertEqual(self.widget.score_stack.currentIndex(), 0) - def test_scores_updates_cls(self): """Check arbitrary workflow with classification data""" self.send_signal(self.widget.Inputs.data, self.iris) self.send_signal(self.widget.Inputs.scorer, self.log_reg, 1) self.assertEqual(self.get_output(self.widget.Outputs.scores).X.shape, (len(self.iris.domain.attributes), 5)) - self.widget.score_checks[2].setChecked(False) + self._get_checkbox('Gini').setChecked(False) self.assertEqual(self.get_output(self.widget.Outputs.scores).X.shape, (len(self.iris.domain.attributes), 4)) - self.widget.score_checks[2].setChecked(True) + self._get_checkbox('Gini').setChecked(True) self.assertEqual(self.get_output(self.widget.Outputs.scores).X.shape, (len(self.iris.domain.attributes), 5)) self.send_signal(self.widget.Inputs.scorer, self.log_reg, 2) @@ -195,15 +186,19 @@ def test_scores_updates_reg(self): self.send_signal(self.widget.Inputs.scorer, self.lin_reg, 1) self.assertEqual(self.get_output(self.widget.Outputs.scores).X.shape, (len(self.housing.domain.attributes), 3)) - self.widget.score_checks[-2].setChecked(False) + + self._get_checkbox('Univar. reg.').setChecked(False) self.assertEqual(self.get_output(self.widget.Outputs.scores).X.shape, (len(self.housing.domain.attributes), 2)) - self.widget.score_checks[-2].setChecked(True) + + self._get_checkbox('Univar. reg.').setChecked(True) self.assertEqual(self.get_output(self.widget.Outputs.scores).X.shape, (len(self.housing.domain.attributes), 3)) + self.send_signal(self.widget.Inputs.scorer, None, 1) self.assertEqual(self.get_output(self.widget.Outputs.scores).X.shape, (len(self.housing.domain.attributes), 2)) + self.send_signal(self.widget.Inputs.scorer, self.lin_reg, 1) self.assertEqual(self.get_output(self.widget.Outputs.scores).X.shape, (len(self.housing.domain.attributes), 3)) @@ -214,16 +209,28 @@ def test_scores_updates_no_class(self): self.assertIsNone(data.domain.class_var) self.send_signal(self.widget.Inputs.data, data) self.assertIsNone(self.get_output(self.widget.Outputs.scores)) + self.send_signal(self.widget.Inputs.scorer, self.lin_reg, 1) self.assertEqual(self.get_output(self.widget.Outputs.scores).X.shape, (len(self.iris.domain.variables), 1)) + self.send_signal(self.widget.Inputs.scorer, self.pca, 1) self.assertEqual(self.get_output(self.widget.Outputs.scores).X.shape, (len(self.iris.domain.variables), 7)) + self.send_signal(self.widget.Inputs.scorer, self.lin_reg, 2) self.assertEqual(self.get_output(self.widget.Outputs.scores).X.shape, (len(self.iris.domain.variables), 8)) + def test_scores_sorting(self): + """Check clicking on header column orders scores in a different way""" + self.send_signal(self.widget.Inputs.data, self.iris) + order1 = self.widget.ranksModel.mapToSourceRows(...).tolist() + self._get_checkbox('FCBF').setChecked(True) + self.widget.ranksView.horizontalHeader().setSortIndicator(3, Qt.DescendingOrder) + order2 = self.widget.ranksModel.mapToSourceRows(...).tolist() + self.assertNotEqual(order1, order2) + def test_data_which_make_scorer_nan(self): """ Tests if widget crashes due to too high (Infinite) calculated values. @@ -238,12 +245,13 @@ def test_data_which_make_scorer_nan(self): [-np.power(10, 10), 1, 1], [0, 1, 1] ))) - self.widget.score_checks[3].setChecked(True) #ANOVA + self.widget.selected_methods.add('ANOVA') self.send_signal(self.widget.Inputs.data, table) - def test_setting_migration_extends_header_state(self): + def test_setting_migration_fixes_header_state(self): # Settings as of version 3.3.5 settings = { + '__version__': 1, 'auto_apply': True, 'headerState': ( b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00' @@ -268,4 +276,4 @@ def test_setting_migration_extends_header_state(self): w = self.create_widget(OWRank, stored_settings=settings) - self.assertEqual(len(w.headerState), 3) + self.assertEqual(w.sorting, (0, Qt.AscendingOrder)) diff --git a/Orange/widgets/gui.py b/Orange/widgets/gui.py index b00b6fa6565..b4f6834fde1 100644 --- a/Orange/widgets/gui.py +++ b/Orange/widgets/gui.py @@ -2882,15 +2882,12 @@ def __init__(self, parent=None, decimals=3, color=Qt.red): self.float_fmt = "%%.%if" % decimals self.color = QtGui.QColor(color) - def displayText(self, value, locale): + def displayText(self, value, locale=QtCore.QLocale()): + if value is None or isinstance(value, float) and math.isnan(value): + return "NA" if isinstance(value, float): return self.float_fmt % value - elif isinstance(value, str): - return value - elif value is None: - return "NA" - else: - return str(value) + return str(value) def sizeHint(self, option, index): font = self.get_font(option, index) @@ -2902,7 +2899,7 @@ def sizeHint(self, option, index): def paint(self, painter, option, index): self.initStyleOption(option, index) - text = self.displayText(index.data(Qt.DisplayRole), QtCore.QLocale()) + text = self.displayText(index.data(Qt.DisplayRole)) ratio, have_ratio = self.get_bar_ratio(option, index) rect = option.rect diff --git a/Orange/widgets/tests/test_itemmodels.py b/Orange/widgets/tests/test_itemmodels.py index 9afe23712d7..9f42a4b4da7 100644 --- a/Orange/widgets/tests/test_itemmodels.py +++ b/Orange/widgets/tests/test_itemmodels.py @@ -56,12 +56,12 @@ def test_editable(self): def test_sort(self): self.model.sort(1) - self.assertEqual(self.model[0][0], 2) + self.assertEqual(self.model.index(0, 0).data(Qt.EditRole), 2) def test_setHeaderLabels(self): self.model.setHorizontalHeaderLabels(['Col 1', 'Col 2']) self.assertEqual(self.model.headerData(1, Qt.Horizontal), 'Col 2') - self.assertEqual(self.model.headerData(1, Qt.Vertical), '1') + self.assertEqual(self.model.headerData(1, Qt.Vertical), 2) def test_removeRows(self): self.model.removeRows(0, 1) diff --git a/Orange/widgets/utils/itemmodels.py b/Orange/widgets/utils/itemmodels.py index e05aff09fb9..45d830319b3 100644 --- a/Orange/widgets/utils/itemmodels.py +++ b/Orange/widgets/utils/itemmodels.py @@ -7,6 +7,7 @@ from contextlib import contextmanager from functools import reduce, partial, lru_cache from itertools import chain +from warnings import warn from xml.sax.saxutils import escape from AnyQt.QtCore import ( @@ -63,7 +64,155 @@ def _as_contiguous_range(the_slice, length): return start, stop, step -class PyTableModel(QAbstractTableModel): +class AbstractSortTableModel(QAbstractTableModel): + """ + A sorting proxy table model that sorts its rows in fast numpy, + avoiding potentially thousands of calls into + ``QSortFilterProxyModel.lessThan()`` or any potentially costly + reordering of original data. + + Override ``sortColumnData()``, adapting it to your underlying model. + + Make sure to use ``mapToSourceRows()``/``mapFromSourceRows()`` + whenever fetching or manipulating table data, such as in ``data()``. + + When updating the model (inserting, removing rows), the sort order + needs to be accounted for (e.g. reset and re-applied). + """ + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + self.__sortInd = None #: Indices sorting the source table + self.__sortIndInv = None #: The inverse of __sortInd + self.__sortColumn = -1 #: Sort key column, or -1 + self.__sortOrder = Qt.AscendingOrder + + def sortColumnData(self, column): + """Return raw, sortable data for column""" + raise NotImplementedError + + def _sortColumnData(self, column): + try: + # Call the overridden implementation if available + data = numpy.asarray(self.sortColumnData(column)) + except NotImplementedError: + # Fallback to slow implementation + data = numpy.array([self.index(row, column).data() + for row in range(self.rowCount())]) + + data = data[self.mapToSourceRows(Ellipsis)] + return data + + def sortColumn(self): + """The column currently used for sorting (-1 if no sorting is applied)""" + return self.__sortColumn + + def sortOrder(self): + """The current sort order""" + return self.__sortOrder + + def mapToSourceRows(self, rows): + """Return array of row indices in the source table for given model rows + + Parameters + ---------- + rows : int or list of int or numpy.ndarray or Ellipsis + View (sorted) rows. + + Returns + ------- + numpy.ndarray + Source rows matching input rows. If they are the same, + simply input `rows` is returned. + """ + if self.__sortInd is not None: + new_rows = self.__sortInd[rows] + if rows is Ellipsis: + new_rows.setflags(write=False) + rows = new_rows + return rows + + def mapFromSourceRows(self, rows): + """Return array of row indices in the model for given source table rows + + Parameters + ---------- + rows : int or list of int or numpy.ndarray or Ellipsis + Source model rows. + + Returns + ------- + numpy.ndarray + ModelIndex (sorted) rows matching input source rows. + If they are the same, simply input `rows` is returned. + """ + if self.__sortIndInv is not None: + new_rows = self.__sortIndInv[rows] + if rows is Ellipsis: + new_rows.setflags(write=False) + rows = new_rows + return rows + + def resetSorting(self): + """Invalidates the current sorting""" + return self.sort(-1) + + def sort(self, column: int, order: Qt.SortOrder=Qt.AscendingOrder): + """ + Sort the data by `column` into `order`. + + To reset the order, pass column=-1. + + Reimplemented from QAbstractItemModel.sort(). + + Notes + ----- + This only affects the model's data presentation. The underlying + data table is left unmodified. Use mapToSourceRows()/mapFromSourceRows() + when accessing data by row indexes. + """ + self.layoutAboutToBeChanged.emit() + + # Store persistent indices as well as their (actual) rows in the + # source data table. + persistent = self.persistentIndexList() + persistent_rows = self.mapToSourceRows([i.row() for i in persistent]) + + self.__sortColumn = -1 if column < 0 else column + self.__sortOrder = order + + indices = None + if column >= 0: + data = numpy.asarray(self._sortColumnData(column)) + if data is not None: + if data.dtype == object: + data = data.astype(str) + indices = numpy.argsort(data, kind="mergesort") + else: + indices = numpy.arange(self.rowCount()) + + if order == Qt.DescendingOrder: + indices = indices[::-1] + + indices = self.mapToSourceRows(indices) + + if indices is not None: + self.__sortInd = indices + self.__sortIndInv = numpy.argsort(indices) + else: + self.__sortInd = None + self.__sortIndInv = None + + persistent_rows = self.mapFromSourceRows(persistent_rows) + + self.changePersistentIndexList( + persistent, + [self.index(row, pind.column()) + for row, pind in zip(persistent_rows, persistent)]) + self.layoutChanged.emit() + + +class PyTableModel(AbstractSortTableModel): """ A model for displaying python tables (sequences of sequences) in QTableView objects. @@ -115,26 +264,26 @@ def flags(self, index): return flags | Qt.ItemIsEditable def setData(self, index, value, role): + row = self.mapFromSourceRows(index.row()) if role == Qt.EditRole: - self[index.row()][index.column()] = value + self[row][index.column()] = value self.dataChanged.emit(index, index) else: - self._roleData[index.row()][index.column()][role] = value + self._roleData[row][index.column()][role] = value return True def data(self, index, role=Qt.DisplayRole): if not index.isValid(): return - role_value = (self._roleData - .get(index.row(), {}) - .get(index.column(), {}) - .get(role)) + row, column = self.mapToSourceRows(index.row()), index.column() + + role_value = self._roleData.get(row, {}).get(column, {}).get(role) if role_value is not None: return role_value try: - value = self[index.row()][index.column()] + value = self[row][column] except IndexError: return if role == Qt.EditRole: @@ -162,36 +311,44 @@ def data(self, index, role=Qt.DisplayRole): if role == Qt.ToolTipRole: return str(value) - def sort(self, column, order=Qt.AscendingOrder): - self.beginResetModel() - indices = sorted(range(len(self._table)), - key=lambda i: self._table[i][column], - reverse=order != Qt.AscendingOrder) - self._table[:] = [self._table[i] for i in indices] - - rd = self._roleData - self._roleData = self._RoleData() - self._roleData.update((i, rd.get(row)) - for i, row in enumerate(indices) - if rd.get(row)) - - vheaders = self._headers.get(Qt.Vertical, ()) - if vheaders: - vheaders = tuple(vheaders) + ('',) * max(0, (len(self._table) - len(vheaders))) - vheaders = [vheaders[i] for i in indices] - self._headers[Qt.Vertical] = vheaders - self.endResetModel() + def sortColumnData(self, column): + return [row[column] for row in self._table] def setHorizontalHeaderLabels(self, labels): - self._headers[Qt.Horizontal] = labels + """ + Parameters + ---------- + labels : list of str or list of Variable + """ + self._headers[Qt.Horizontal] = tuple(labels) def setVerticalHeaderLabels(self, labels): - self._headers[Qt.Vertical] = labels + """ + Parameters + ---------- + labels : list of str or list of Variable + """ + self._headers[Qt.Vertical] = tuple(labels) def headerData(self, section, orientation, role=Qt.DisplayRole): - if role == Qt.DisplayRole: - headers = self._headers.get(orientation) - return headers[section] if headers and section < len(headers) else str(section) + headers = self._headers.get(orientation) + + if headers and section < len(headers): + section = self.mapToSourceRows(section) if orientation == Qt.Vertical else section + value = headers[section] + + if role == Qt.ToolTipRole: + role = Qt.DisplayRole + + if role == Qt.DisplayRole: + return value.name if isinstance(value, Variable) else value + + if role == Qt.DecorationRole: + if isinstance(value, Variable): + return gui.attributeIconDict[value] + + # Use QAbstractItemModel default for non-existent header/sections + return super().headerData(section, orientation, role) def removeRows(self, row, count, parent=QModelIndex()): if not parent.isValid(): @@ -243,6 +400,7 @@ def __delitem__(self, i): stop -= 1 else: start = stop = i = i if i >= 0 else len(self) + i + self._check_sort_order() self.beginRemoveRows(QModelIndex(), start, stop) del self._table[i] self.endRemoveRows() @@ -253,19 +411,31 @@ def __setitem__(self, i, value): stop -= 1 else: start = stop = i = i if i >= 0 else len(self) + i + self._check_sort_order() self._table[i] = value self.dataChanged.emit(self.index(start, 0), self.index(stop, self.columnCount() - 1)) + def _check_sort_order(self): + if self.mapToSourceRows(Ellipsis) is not Ellipsis: + warn("Can't modify PyTableModel when it's sorted", + RuntimeWarning, stacklevel=3) + raise RuntimeError("Can't modify PyTableModel when it's sorted") + def wrap(self, table): self.beginResetModel() self._table = table self._roleData = self._RoleData() + self.resetSorting() self.endResetModel() + def tolist(self): + return self._table + def clear(self): self.beginResetModel() self._table.clear() + self.resetSorting() self._roleData.clear() self.endResetModel() @@ -826,7 +996,7 @@ def addAction(self, action, *args): return self.insertAction(-1, action, *args) -class TableModel(QAbstractTableModel): +class TableModel(AbstractSortTableModel): """ An adapter for using Orange.data.Table within Qt's Item View Framework. @@ -957,78 +1127,13 @@ def row_instance(index): if self.__rowCount > (2 ** 31 - 1): raise ValueError("len(sourcedata) > 2 ** 31 - 1") - self.__sortColumn = -1 - self.__sortOrder = Qt.AscendingOrder - # Indices sorting the source table - self.__sortInd = None - # The inverse of __sortInd - self.__sortIndInv = None + def sortColumnData(self, column): + return self._columnSortKeyData(column, TableModel.ValueRole) - def sort(self, column, order): + def _columnSortKeyData(self, column, role): """ - Sort the data by `column` index into `order` - - To reset the sort order pass -1 as the column. - - :type column: int - :type order: Qt.SortOrder - - Reimplemented from QAbstractItemModel.sort - - .. note:: - This only affects the model's data presentation, the - underlying data table is left unmodified. - - """ - self.layoutAboutToBeChanged.emit() - - # Store persistent indices as well as their (actual) rows in the - # source data table. - persistent = self.persistentIndexList() - persistent_rows = numpy.array([ind.row() for ind in persistent], int) - if self.__sortInd is not None: - persistent_rows = self.__sortInd[persistent_rows] - - self.__sortColumn = column - self.__sortOrder = order - - if column < 0: - indices = None - else: - keydata = self.columnSortKeyData(column, TableModel.ValueRole) - if keydata is not None: - if keydata.dtype == object: - indices = sorted(range(self.__rowCount), - key=lambda i: str(keydata[i])) - indices = numpy.array(indices) - else: - indices = numpy.argsort(keydata, kind="mergesort") - else: - indices = numpy.arange(0, self.__rowCount) - - if order == Qt.DescendingOrder: - indices = indices[::-1] - - if self.__sortInd is not None: - indices = self.__sortInd[indices] - - if indices is not None: - self.__sortInd = indices - self.__sortIndInv = numpy.argsort(indices) - else: - self.__sortInd = None - self.__sortIndInv = None - - if self.__sortInd is not None: - persistent_rows = self.__sortIndInv[persistent_rows] - - for pind, row in zip(persistent, persistent_rows): - self.changePersistentIndex(pind, self.index(row, pind.column())) - self.layoutChanged.emit() - - def columnSortKeyData(self, column, role): - """ - Return a sequence of objects which can be used as `keys` for sorting. + Return a sequence of source table objects which can be used as + `keys` for sorting. :param int column: Sort column. :param Qt.ItemRole role: Sort item role. @@ -1037,51 +1142,15 @@ def columnSortKeyData(self, column, role): coldesc = self.columns[column] if isinstance(coldesc, TableModel.Column) \ and role == TableModel.ValueRole: - col_view, _ = self.source.get_column_view(coldesc.var) - col_data = numpy.asarray(col_view) + col_data = numpy.asarray(self.source.get_column_view(coldesc.var)[0]) + if coldesc.var.is_continuous: # continuous from metas have dtype object; cast it to float col_data = col_data.astype(float) - if self.__sortInd is not None: - col_data = col_data[self.__sortInd] return col_data else: - if self.__sortInd is not None: - indices = self.__sortInd - else: - indices = range(self.rowCount()) return numpy.asarray([self.index(i, column).data(role) - for i in indices]) - - def sortColumn(self): - """ - The column currently used for sorting (-1 if no sorting is applied). - """ - return self.__sortColumn - - def sortOrder(self): - """ - The current sort order. - """ - return self.__sortOrder - - def mapToTableRows(self, modelrows): - """ - Return the row indices in the source table for the given model rows. - """ - if self.__sortColumn < 0: - return modelrows - else: - return self.__sortInd[modelrows].tolist() - - def mapFromTableRows(self, tablerows): - """ - Return the row indices in the model for the given source table rows. - """ - if self.__sortColumn < 0: - return tablerows - else: - return self.__sortIndInv[tablerows].tolist() + for i in range(self.rowCount())]) def data(self, index, role, # For optimizing out LOAD_GLOBAL byte code instructions in @@ -1116,8 +1185,7 @@ def data(self, index, role, if not 0 <= row <= self.__rowCount: return None - if self.__sortInd is not None: - row = self.__sortInd[row] + row = self.mapToSourceRows(row) try: instance = self._row_instance(row) @@ -1154,10 +1222,10 @@ def data(self, index, role, return None def setData(self, index, value, role): - row, col = self.__sortIndInv[index.row()], index.column() + row = self.mapFromSourceRows(index.row()) if role == Qt.EditRole: try: - self.source[row, col] = value + self.source[row, index.column()] = value except (TypeError, IndexError): return False else: @@ -1182,12 +1250,8 @@ def headerData(self, section, orientation, role): """Reimplemented from `QAbstractTableModel.headerData`.""" if orientation == Qt.Vertical: if role == Qt.DisplayRole: - if self.__sortInd is not None: - return int(self.__sortInd[section] + 1) - else: - return int(section + 1) - else: - return None + return int(self.mapToSourceRows(section) + 1) + return None coldesc = self.columns[section] if role == Qt.DisplayRole: diff --git a/doc/development/source/tutorial-settings.rst b/doc/development/source/tutorial-settings.rst index fc17f0b3809..2c7909c5abd 100644 --- a/doc/development/source/tutorial-settings.rst +++ b/doc/development/source/tutorial-settings.rst @@ -296,6 +296,13 @@ Imagine opening a complex workflow you have designed a year ago with the new version of Orange and finding out that all the settings are back to default. Not fun! +.. warning:: + + If you change the format of an existing setting in a backwards-incompatible + way, you will also want to *change the name* of that setting. Otherwise, + older versions of Orange won't be able to load workflows with the new + setting format. + There are two helper functions you can use. :obj:`Orange.widget.settings.rename_settings(settings, old_name, new_name)` does the obvious operation on `settings`, which can be either a dictionary @@ -304,7 +311,7 @@ or a context, thus it can be called from `migrate_settings` or Another common operation may be upgrading your widget from storing variable names (as `str`) to storing variables (instances of classes derived from -`Variable`). In a typical scenario, this happenswhen combo boxes are upgraded to +`Variable`). In a typical scenario, this happens when combo boxes are upgraded to using models. Function :obj:`Orange.widget.settings.migrate_str_to_variable(settings, names=None)` makes the necessary changes to the settings listed in `names`. `names` can be