From 7b358306cfa3d1f2d23bbbaa9e18d5501c95acf0 Mon Sep 17 00:00:00 2001 From: nikicc Date: Thu, 13 Oct 2016 11:27:01 +0200 Subject: [PATCH] Statistics.util.stats: Fix negative #nans in sparse Computing the number of nans for sparse matrices was broken and sometimes returned negative numbers. Variable `non_zero` contains the number of defined values in each column. So to calculate the number of undefined (nans) for each column we have to substract `non_zero` from the number of values in the column i.e. `X.shape[0]` and not `X.shape[1]`. The bug was noticed when the number of defined values in some column was larger than the number of features and consequenlty the number of nans was negative. This caused density in OWTable to exceeded 100%. --- Orange/statistics/util.py | 2 +- Orange/tests/test_statistics.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Orange/statistics/util.py b/Orange/statistics/util.py index f4cd6341cd6..55377211ec6 100644 --- a/Orange/statistics/util.py +++ b/Orange/statistics/util.py @@ -192,7 +192,7 @@ def stats(X, weights=None, compute_variance=False): X.max(axis=0).toarray().ravel(), np.asarray(X.mean(axis=0)).ravel() if not weighted else weighted_mean, np.zeros(X.shape[1]), # variance not supported - X.shape[1] - non_zero, + X.shape[0] - non_zero, non_zero)) else: nans = (~X.astype(bool)).sum(axis=0) if X.size else np.zeros(X.shape[1]) diff --git a/Orange/tests/test_statistics.py b/Orange/tests/test_statistics.py index d2f84414b4d..80b08d0ede2 100644 --- a/Orange/tests/test_statistics.py +++ b/Orange/tests/test_statistics.py @@ -48,11 +48,11 @@ def test_stats_sparse(self): # assure last two columns have just zero elements X = X[:3] - np.testing.assert_equal(stats(X), [[0, 1, 1/3, 0, 4, 1], - [0, 1, 1/3, 0, 4, 1], - [0, 1, 1/3, 0, 4, 1], - [0, 0, 0, 0, 5, 0], - [0, 0, 0, 0, 5, 0]]) + np.testing.assert_equal(stats(X), [[0, 1, 1/3, 0, 2, 1], + [0, 1, 1/3, 0, 2, 1], + [0, 1, 1/3, 0, 2, 1], + [0, 0, 0, 0, 3, 0], + [0, 0, 0, 0, 3, 0]]) def test_stats_weights(self): X = np.arange(4).reshape(2, 2).astype(float)