Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] Rank adopted for sparse data #1399

Merged
merged 2 commits into from
Jul 1, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Orange/data/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -982,7 +982,8 @@ def total_weight(self):

def has_missing(self):
"""Return `True` if there are any missing attribute or class values."""
return bn.anynan(self.X) or bn.anynan(self._Y)
missing_x = not sp.issparse(self.X) and bn.anynan(self.X) # do not check for sparse X
return missing_x or bn.anynan(self._Y)

def has_missing_class(self):
"""Return `True` if there are any missing class values."""
Expand Down
7 changes: 7 additions & 0 deletions Orange/preprocess/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
class Scorer:
feature_type = None
class_type = None
supports_sparse_data = None
preprocessors = [
RemoveNaNClasses()
]
Expand Down Expand Up @@ -62,6 +63,8 @@ def score_data(self, data, feature):


class SklScorer(Scorer, metaclass=WrapperMeta):
supports_sparse_data = True

preprocessors = Scorer.preprocessors + [
Impute()
]
Expand Down Expand Up @@ -172,6 +175,7 @@ class ClassificationScorer(Scorer):
"""
feature_type = DiscreteVariable
class_type = DiscreteVariable
supports_sparse_data = True
preprocessors = Scorer.preprocessors + [
Discretize(remove_const=False)
]
Expand Down Expand Up @@ -302,6 +306,7 @@ class ReliefF(Scorer):
"""
feature_type = Variable
class_type = DiscreteVariable
supports_sparse_data = False

def __init__(self, n_iterations=50, k_nearest=10):
self.n_iterations = n_iterations
Expand All @@ -324,9 +329,11 @@ def score_data(self, data, feature):
return weights[0]
return weights


class RReliefF(Scorer):
feature_type = Variable
class_type = ContinuousVariable
supports_sparse_data = False

def __init__(self, n_iterations=50, k_nearest=50):
self.n_iterations = n_iterations
Expand Down
5 changes: 5 additions & 0 deletions Orange/widgets/data/owrank.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from collections import namedtuple

import numpy as np
from scipy.sparse import issparse

from PyQt4 import QtGui
from PyQt4.QtCore import Qt
Expand Down Expand Up @@ -255,6 +256,10 @@ def setData(self, data):
self.error(0, "Cannot handle class variable type %r" %
type(self.data.domain.class_var).__name__)

if issparse(self.data.X): # keep only measures supporting sparse data
self.measures = [m for m in self.measures
if m.score.supports_sparse_data]

self.ranksModel.setRowCount(len(attrs))
for i, a in enumerate(attrs):
if a.is_discrete:
Expand Down