Skip to content

Commit

Permalink
Rank: Fix crash on dataset with missing values
Browse files Browse the repository at this point in the history
  • Loading branch information
VesnaT committed Dec 10, 2018
1 parent acd5dc9 commit 7d4cf49
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 3 deletions.
5 changes: 4 additions & 1 deletion Orange/preprocess/fss.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import Orange
from Orange.util import Reprable
from Orange.preprocess.preprocess import Preprocess
from Orange.preprocess.score import ANOVA, GainRatio, UnivariateLinearRegression

__all__ = ["SelectBestFeatures", "RemoveNaNColumns", "SelectRandomFeatures"]

Expand Down Expand Up @@ -57,6 +56,10 @@ def __call__(self, data):
discr_ratio = (sum(a.is_discrete
for a in data.domain.attributes)
/ len(data.domain.attributes))

from Orange.preprocess.score import ANOVA, GainRatio, \
UnivariateLinearRegression

if data.domain.has_discrete_class:
if discr_ratio >= 0.5:
method = GainRatio()
Expand Down
10 changes: 9 additions & 1 deletion Orange/preprocess/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from Orange.data import Domain, Variable, DiscreteVariable, ContinuousVariable
from Orange.data.filter import HasClass
from Orange.misc.wrapper_meta import WrapperMeta
from Orange.preprocess.fss import RemoveNaNColumns
from Orange.preprocess.preprocess import Discretize, SklImpute
from Orange.preprocess.util import _RefuseDataInConstructor
from Orange.statistics import contingency, distribution
Expand Down Expand Up @@ -66,6 +67,7 @@ def __call__(self, data, feature=None):
f = data.domain[feature]
data = data.transform(Domain([f], data.domain.class_vars))

orig_domain = data.domain
for pp in self.preprocessors:
data = pp(data)

Expand All @@ -76,7 +78,11 @@ def __call__(self, data, feature=None):
.format(self.friendly_name,
self._friendly_vartype_name(type(var))))

return self.score_data(data, feature)
scores = np.full(len(orig_domain.attributes), np.nan)
names = [a.name for a in data.domain.attributes]
mask = np.array([a.name in names for a in orig_domain.attributes])
scores[mask] = self.score_data(data, feature)
return scores

def score_data(self, data, feature):
raise NotImplementedError
Expand Down Expand Up @@ -340,6 +346,7 @@ class ReliefF(Scorer):
class_type = DiscreteVariable
supports_sparse_data = False
friendly_name = "ReliefF"
preprocessors = Scorer.preprocessors + [RemoveNaNColumns()]

def __init__(self, n_iterations=50, k_nearest=10, random_state=None):
self.n_iterations = n_iterations
Expand Down Expand Up @@ -374,6 +381,7 @@ class RReliefF(Scorer):
class_type = ContinuousVariable
supports_sparse_data = False
friendly_name = "RReliefF"
preprocessors = Scorer.preprocessors + [RemoveNaNColumns()]

def __init__(self, n_iterations=50, k_nearest=50, random_state=None):
self.n_iterations = n_iterations
Expand Down
8 changes: 7 additions & 1 deletion Orange/widgets/data/tests/test_owrank.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from Orange.regression import LinearRegressionLearner
from Orange.projection import PCA
from Orange.widgets.data.owrank import OWRank, ProblemType, CLS_SCORES, REG_SCORES
from Orange.widgets.tests.base import WidgetTest
from Orange.widgets.tests.base import WidgetTest, datasets
from Orange.widgets.widget import AttributeList


Expand Down Expand Up @@ -347,3 +347,9 @@ def test_no_attributes(self):
self.assertTrue(self.widget.Error.no_attributes.is_shown())
self.send_signal(self.widget.Inputs.data, data)
self.assertFalse(self.widget.Error.no_attributes.is_shown())

def test_dataset(self):
for method in CLS_SCORES + REG_SCORES:
self._get_checkbox(method.shortname).setChecked(True)
for ds in datasets.datasets():
self.send_signal(self.widget.Inputs.data, ds)

0 comments on commit 7d4cf49

Please sign in to comment.