Skip to content

Commit

Permalink
Test and Score: Add comparison of models
Browse files Browse the repository at this point in the history
  • Loading branch information
janezd committed Dec 23, 2019
1 parent 9eba9f7 commit 9d8b70f
Show file tree
Hide file tree
Showing 3 changed files with 311 additions and 3 deletions.
139 changes: 137 additions & 2 deletions Orange/widgets/evaluate/owtestlearners.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# pylint doesn't understand the Settings magic
# pylint: disable=invalid-sequence-index
# pylint: disable=too-many-lines,too-many-instance-attributes
import abc
import enum
import logging
Expand All @@ -9,14 +10,17 @@

from concurrent.futures import Future
from collections import OrderedDict, namedtuple
from itertools import count
from typing import Any, Optional, List, Dict, Callable

import numpy as np
import baycomp

from AnyQt import QtGui
from AnyQt.QtGui import QStandardItem
from AnyQt.QtCore import Qt, QSize, QThread
from AnyQt.QtCore import pyqtSlot as Slot
from AnyQt.QtGui import QStandardItem, QDoubleValidator
from AnyQt.QtWidgets import QHeaderView, QTableWidget, QLabel

from Orange.base import Learner
import Orange.classification
Expand All @@ -35,7 +39,7 @@
from Orange.widgets.utils.widgetpreview import WidgetPreview
from Orange.widgets.utils.concurrent import ThreadExecutor, TaskState
from Orange.widgets.widget import OWWidget, Msg, Input, Output

from orangewidget.utils.itemmodels import PyListModel

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -175,6 +179,10 @@ class Outputs:
fold_feature = settings.ContextSetting(None)
fold_feature_selected = settings.ContextSetting(False)

use_rope = settings.Setting(False)
rope = settings.Setting(0.1)
comparison_criterion = settings.Setting(0)

TARGET_AVERAGE = "(Average over classes)"
class_selection = settings.ContextSetting(TARGET_AVERAGE)

Expand Down Expand Up @@ -275,13 +283,46 @@ def __init__(self):
callback=self._on_target_class_changed,
contentsLength=8)

self.modcompbox = box = gui.vBox(self.controlArea, "Model Comparison")
gui.comboBox(
box, self, "comparison_criterion", model=PyListModel(),
callback=self.update_comparison_table)

hbox = gui.hBox(box)
gui.checkBox(hbox, self, "use_rope",
"Negligible difference: ",
callback=self.update_comparison_table)
gui.lineEdit(hbox, self, "rope", validator=QDoubleValidator(),
controlWidth=70, callback=self.update_comparison_table,
alignment=Qt.AlignRight)

gui.rubber(self.controlArea)
self.score_table = ScoreTable(self)
self.score_table.shownScoresChanged.connect(self.update_stats_model)
view = self.score_table.view
view.setSizeAdjustPolicy(view.AdjustToContents)

box = gui.vBox(self.mainArea, "Evaluation Results")
box.layout().addWidget(self.score_table.view)

self.compbox = box = gui.vBox(self.mainArea, box="Model comparison")
table = self.comparison_table = QTableWidget(
wordWrap=False, editTriggers=QTableWidget.NoEditTriggers,
selectionMode=QTableWidget.NoSelection)
table.setSizeAdjustPolicy(table.AdjustToContents)
table.verticalHeader().setSectionResizeMode(QHeaderView.Fixed)

header = table.horizontalHeader()
header.setSectionResizeMode(QHeaderView.ResizeToContents)
header.setDefaultAlignment(Qt.AlignLeft)
header.setStretchLastSection(False)
box.layout().addWidget(table)
box.layout().addWidget(QLabel(
"<small>Table shows probabilities that the score for the model in "
"the row is higher than that of the model in the column. "
"Small numbers show the probability that the difference is "
"negligible.</small>", wordWrap=True))

@staticmethod
def sizeHint():
return QSize(780, 1)
Expand Down Expand Up @@ -440,6 +481,8 @@ def _update_scorers(self):
self.scorers = []
return
self.scorers = usable_scorers(self.data.domain.class_var)
self.controls.comparison_criterion.model()[:] = \
[scorer.long_name or scorer.name for scorer in self.scorers]

@Inputs.preprocessor
def set_preprocessor(self, preproc):
Expand Down Expand Up @@ -470,6 +513,9 @@ def shuffle_split_changed(self):
self._param_changed()

def _param_changed(self):
is_kfold = self.resampling == OWTestLearners.KFold
self.modcompbox.setEnabled(is_kfold)
self.comparison_table.setEnabled(is_kfold)
self._invalidate()
self.__update()

Expand Down Expand Up @@ -562,6 +608,91 @@ def update_stats_model(self):
self.error("\n".join(errors), shown=bool(errors))
self.Warning.scores_not_computed(shown=has_missing_scores)

def update_comparison_table(self):
self.comparison_table.clearContents()
if self.resampling != OWTestLearners.KFold:
return

slots = self._successful_slots()
scores = self._scores_by_folds(slots)
self._fill_table(slots, scores)

def _successful_slots(self):
model = self.score_table.model
proxy = self.score_table.sorted_model

keys = (model.data(proxy.mapToSource(proxy.index(row, 0)), Qt.UserRole)
for row in range(proxy.rowCount()))
slots = [slot for slot in (self.learners[key] for key in keys)
if slot.results is not None and slot.results.success]
return slots

def _scores_by_folds(self, slots):
scorer = self.scorers[self.comparison_criterion]()
self.compbox.setTitle(f"Model comparison by {scorer.name}")
if scorer.is_binary:
if self.class_selection != self.TARGET_AVERAGE:
class_var = self.data.domain.class_var
target_index = class_var.values.index(self.class_selection)
kw = dict(target=target_index)
else:
kw = dict(average='weighted')
else:
kw = {}

def call_scorer(results):
def thunked():
return scorer.scores_by_folds(results.value, **kw).flatten()

return thunked

scores = [Try(call_scorer(slot.results)) for slot in slots]
scores = [score.value if score.success else None for score in scores]
# `None in scores doesn't work -- these are np.arrays)
if any(score is None for score in scores):
self.Warning.scores_not_computed()
return scores

def _fill_table(self, slots, scores):
table = self.comparison_table
table.setRowCount(len(slots))
table.setColumnCount(len(slots))

names = [learner_name(slot.learner) for slot in slots]
table.setVerticalHeaderLabels(names)
table.setHorizontalHeaderLabels(names)

for row, row_name, row_scores in zip(count(), names, scores):
for col, col_name, col_scores in zip(range(row), names, scores):
if row_scores is None or col_scores is None:
continue
if self.use_rope and self.rope:
p0, rope, p1 = baycomp.two_on_single(
row_scores, col_scores, self.rope)
self._set_cell(table, row, col,
f"{p0:.3f}<br/><small>{rope:.3f})</small>",
f"p({row_name} > {col_name}) = {p0:.3f}\n"
f"p({row_name} = {col_name}) = {rope:.3f}")
self._set_cell(table, col, row,
f"{p1:.3f}<br/><small>{rope:.3f}</small>",
f"p({col_name} > {row_name}) = {p1:.3f}\n"
f"p({col_name} = {row_name}) = {rope:.3f}")
else:
p0, p1 = baycomp.two_on_single(row_scores, col_scores)
self._set_cell(table, row, col,
f"{p0:.3f}",
f"p({row_name} > {col_name}) = {p0:.3f}")
self._set_cell(table, col, row,
f"{p1:.3f}",
f"p({col_name} > {row_name}) = {p1:.3f}")

@staticmethod
def _set_cell(table, row, col, label, tooltip):
item = QLabel(label)
item.setToolTip(tooltip)
item.setAlignment(Qt.AlignCenter)
table.setCellWidget(row, col, item)

def _update_class_selection(self):
self.class_selection_combo.setCurrentIndex(-1)
self.class_selection_combo.clear()
Expand All @@ -585,6 +716,7 @@ def _update_class_selection(self):

def _on_target_class_changed(self):
self.update_stats_model()
self.update_comparison_table()

def _invalidate(self, which=None):
self.cancel()
Expand All @@ -611,6 +743,8 @@ def _invalidate(self, which=None):
item.setData(None, Qt.DisplayRole)
item.setData(None, Qt.ToolTipRole)

self.comparison_table.clearContents()

self.__needupdate = True

def commit(self):
Expand Down Expand Up @@ -866,6 +1000,7 @@ def __task_complete(self, f: 'Future[Results]'):

self.score_table.update_header(self.scorers)
self.update_stats_model()
self.update_comparison_table()

self.commit()

Expand Down
Loading

0 comments on commit 9d8b70f

Please sign in to comment.