Skip to content

Commit

Permalink
TestAndScore: Fix data errors
Browse files Browse the repository at this point in the history
  • Loading branch information
PrimozGodec committed May 5, 2020
1 parent d019c6c commit c1a4679
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 27 deletions.
38 changes: 20 additions & 18 deletions Orange/widgets/evaluate/owtestandscore.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from Orange.evaluation import Results
from Orange.preprocess.preprocess import Preprocess
import Orange.regression
from Orange.statistics.util import unique
from Orange.widgets import gui, settings, widget
from Orange.widgets.evaluate.utils import \
usable_scorers, ScoreTable, learner_name, scorer_caller
Expand Down Expand Up @@ -189,17 +190,14 @@ class Outputs:
class Error(OWWidget.Error):
train_data_empty = Msg("Train dataset is empty.")
test_data_empty = Msg("Test dataset is empty.")
class_required = Msg("Train data input requires a target variable.")
too_many_classes = Msg("Too many target variables.")
class_required_test = Msg("Test data input requires a target variable.")
too_many_folds = Msg("Number of folds exceeds the data size")
class_inconsistent = Msg("Test and train datasets "
"have different target variables.")
memory_error = Msg("Not enough memory.")
no_class_values = Msg("Target variable has no values.")
only_one_class_var_value = Msg("Target variable has only one value.")
test_data_incompatible = Msg(
"Test data may be incompatible with train data.")
data_error = Msg("{}")

class Warning(OWWidget.Warning):
missing_data = \
Expand Down Expand Up @@ -383,25 +381,29 @@ def set_train_data(self, data):
self.cancel()
self.Information.data_sampled.clear()
self.Error.train_data_empty.clear()
self.Error.class_required.clear()
self.Error.too_many_classes.clear()
self.Error.no_class_values.clear()
self.Error.only_one_class_var_value.clear()
self.Error.data_error.clear()

if data is not None and not data:
self.Error.train_data_empty()
data = None
if data:
conds = [not data.domain.class_vars,
len(data.domain.class_vars) > 1,
np.isnan(data.Y).all(),
data.domain.has_discrete_class and len(data.domain.class_var.values) == 1]
errors = [self.Error.class_required,
self.Error.too_many_classes,
self.Error.no_class_values,
self.Error.only_one_class_var_value]
for cond, error in zip(conds, errors):
data_errors = [
(
"Train data input requires a target variable.",
not data.domain.class_vars
),
("Too many target variables.", len(data.domain.class_vars) > 1),
("Target variable has no values.", np.isnan(data.Y).all()),
(
"Target variable has only one value.",
data.domain.has_discrete_class and len(unique(data.Y)) < 2
),
("Data has no features to learn from.", data.X.shape[1] == 0),
]

for error_msg, cond in data_errors:
if cond:
error()
self.Error.data_error(error_msg)
data = None
break

Expand Down
27 changes: 18 additions & 9 deletions Orange/widgets/evaluate/tests/test_owtestandscore.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,10 +166,10 @@ def test_one_class_value(self):
"yyyy"))
)
self.widget.n_folds = 0
self.assertFalse(self.widget.Error.only_one_class_var_value.is_shown())
self.assertFalse(self.widget.Error.data_error.is_shown())
self.send_signal("Data", table)
self.send_signal("Learner", MajorityLearner(), 0, wait=1000)
self.assertTrue(self.widget.Error.only_one_class_var_value.is_shown())
self.assertTrue(self.widget.Error.data_error.is_shown())

def test_nan_class(self):
"""
Expand All @@ -178,12 +178,17 @@ def test_nan_class(self):
"""
def assertErrorShown(data, is_shown):
self.send_signal("Data", data)
self.assertEqual(is_shown, self.widget.Error.no_class_values.is_shown())
self.assertEqual(is_shown, self.widget.Error.data_error.is_shown())

data = Table("iris")[::30]
data.Y[:] = np.nan

for data, is_shown in zip([None, data, Table("iris")[:30]], [False, True, False]):
iris_empty_x = Table.from_table(Domain([], data.Y), data)

for data, is_shown in zip(
[None, data, Table("iris")[:30], iris_empty_x],
[False, True, True, True]
):
assertErrorShown(data, is_shown)

def test_addon_scorers(self):
Expand Down Expand Up @@ -319,15 +324,19 @@ def _test_scores(self, train_data, test_data, learner, sampling, n_folds):
self.send_signal(self.widget.Inputs.learner, learner, 0, wait=5000)
return self._retrieve_scores()

def test_scores_constant_all_same(self):
def test_scores_constant(self):
table = Table.from_list(
self.scores_domain,
list(zip(*self.scores_table_values + [list("yyyy")]))
list(zip(*self.scores_table_values + [list("yyyn")]))
)

self.assertTupleEqual(self._test_scores(
table, table, ConstantLearner(), OWTestAndScore.TestOnTest, None),
(None, 1, 1, 1, 1))
self.assertTupleEqual(
self._test_scores(
table, table[:3], ConstantLearner(),
OWTestAndScore.TestOnTest, None
),
(None, 1, 1, 1, 1)
)

def test_scores_log_reg_overfitted(self):
table = Table.from_list(
Expand Down

0 comments on commit c1a4679

Please sign in to comment.