Skip to content

Commit

Permalink
Clustering: Fixed tests
Browse files Browse the repository at this point in the history
  • Loading branch information
PrimozGodec committed Jun 6, 2019
1 parent 178a862 commit 7016fbc
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 48 deletions.
2 changes: 1 addition & 1 deletion Orange/tests/test_clustering_dbscan.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,4 @@ def test_predict_numpy(self):
dbscan = DBSCAN()
c = dbscan(self.iris)
X = self.iris.X[::20]
p = c(X)
p = c.predict(X)
25 changes: 5 additions & 20 deletions Orange/tests/test_clustering_kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def setUpClass(cls):
def test_kmeans(self):
kmeans = KMeans(n_clusters=2)
c = kmeans(self.iris)
X = self.iris.X[:20]
X = self.iris[:20]
p = c(X)
# First 20 iris belong to one cluster
assert len(set(p.ravel())) == 1
Expand All @@ -28,16 +28,9 @@ def test_kmeans_parameters(self):
max_iter=10,
random_state=42,
tol=0.001,
init='random',
compute_silhouette_score=True)
init='random')
c = kmeans(self.iris)

def test_predict_single_instance(self):
kmeans = KMeans()
c = kmeans(self.iris)
inst = self.iris[0]
p = c(inst)

def test_predict_table(self):
kmeans = KMeans()
c = kmeans(self.iris)
Expand All @@ -48,18 +41,10 @@ def test_predict_numpy(self):
kmeans = KMeans()
c = kmeans(self.iris)
X = self.iris.X[::20]
p = c(X)
p = c.predict(X)

def test_predict_sparse(self):
kmeans = KMeans()
c = kmeans(self.iris)
X = csc_matrix(self.iris.X[::20])
p = c(X)

def test_silhouette_sparse(self):
"""Test if silhouette gets calculated for sparse data"""
kmeans = KMeans(compute_silhouette_score=True)
sparse_iris = self.iris.copy()
sparse_iris.X = csc_matrix(sparse_iris.X)
c = kmeans(sparse_iris)
self.assertFalse(np.isnan(c.silhouette))
self.iris.X = csc_matrix(self.iris.X[::20])
p = c(self.iris)
15 changes: 5 additions & 10 deletions Orange/tests/test_louvain.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,17 @@
import numpy as np

from Orange.data import Table
from Orange.clustering.louvain import Louvain
from Orange.clustering.louvain import Louvain, LouvainModel


class TestSVMLearner(unittest.TestCase):
class TestLouvain(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.data = Table('iris')
cls.louvain = Louvain()

def test_orange_table(self):
self.assertIsNone(self.louvain.fit(self.data))
clusters = self.louvain.fit_predict(self.data)
self.assertIn(type(clusters), [list, np.ndarray])

def test_np_array(self):
data_np = self.data.X
self.assertIsNone(self.louvain.fit(data_np))
clusters = self.louvain.fit_predict(data_np)
louvain_model = self.louvain(self.data)
self.assertEqual(type(louvain_model), LouvainModel)
clusters = louvain_model(self.data)
self.assertIn(type(clusters), [list, np.ndarray])
36 changes: 19 additions & 17 deletions Orange/widgets/unsupervised/tests/test_owkmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import numpy as np
from AnyQt.QtCore import Qt
from AnyQt.QtWidgets import QRadioButton
from sklearn.metrics import silhouette_score

import Orange.clustering
from Orange.data import Table, Domain
Expand Down Expand Up @@ -199,24 +200,22 @@ def test_data_on_output(self):

@patch("Orange.clustering.kmeans.KMeansModel.__call__")
def test_centroids_on_output(self, km_call):
ret = km_call.return_value = Mock()
ret.X = np.array([0] * 50 + [1] * 100)
ret.silhouette_samples = np.arange(150) / 150
km_call.return_value = np.array([0] * 50 + [1] * 100).flatten()

widget = self.widget
widget.optimize_k = False
widget.k = 4
self.send_signal(widget.Inputs.data, self.iris)
self.commit_and_wait()

widget.clusterings[4].silhouette_samples = np.arange(150) / 150
widget.samples_scores = lambda x: np.arctan(
np.arange(150) / 150) / np.pi + 0.5
widget.send_data()
out = self.get_output(widget.Outputs.centroids)
np.testing.assert_almost_equal(
out.metas,
[[0, np.mean(np.arctan(np.arange(50) / 150)) / np.pi + 0.5],
np.testing.assert_array_almost_equal(
np.array([[0, np.mean(np.arctan(np.arange(50) / 150)) / np.pi + 0.5],
[1, np.mean(np.arctan(np.arange(50, 150) / 150)) / np.pi + 0.5],
[2, 0], [3, 0]])
[2, 0], [3, 0]]), out.metas.astype(float))
self.assertEqual(out.name, "iris centroids")

def test_centroids_domain_on_output(self):
Expand Down Expand Up @@ -262,12 +261,14 @@ def test_optimization_fails(self):
self.KMeansFail.fail_on = {3, 5, 7}
model = widget.table_view.model()

with patch.object(model, "set_scores", wraps=model.set_scores) as set_scores:
with patch.object(
model, "set_scores", wraps=model.set_scores) as set_scores:
self.send_signal(self.widget.Inputs.data, self.iris, wait=5000)
scores, start_k = set_scores.call_args[0]
self.assertEqual(
scores,
[km if isinstance(km, str) else km.silhouette
[km if isinstance(km, str) else silhouette_score(
self.iris.X, km(self.iris))
for km in (widget.clusterings[k] for k in range(3, 9))]
)
self.assertEqual(start_k, 3)
Expand Down Expand Up @@ -312,15 +313,14 @@ def test_run_fails(self):
self.assertIsNotNone(self.get_output(self.widget.Outputs.annotated_data))

def test_select_best_row(self):
class Cluster:
def __init__(self, n):
self.silhouette = n

widget = self.widget
widget.k_from, widget.k_to = 2, 6
widget.clusterings = {k: Cluster(5 - (k - 4) ** 2) for k in range(2, 7)}
widget.optimize_k = True
self.send_signal(self.widget.Inputs.data, Table("housing"), wait=5000)
self.commit_and_wait()
widget.update_results()
self.assertEqual(widget.selected_row(), 2)
# for housing dataset best selection is 3 clusters, so row no. 1
self.assertEqual(widget.selected_row(), 1)

widget.clusterings = {k: "error" for k in range(2, 7)}
widget.update_results()
Expand Down Expand Up @@ -394,7 +394,9 @@ def test_silhouette_column(self):
# Avoid randomness in the test
random = np.random.RandomState(0) # pylint: disable=no-member
table = Table(random.rand(110, 2))
with patch("Orange.clustering.kmeans.SILHOUETTE_MAX_SAMPLES", 100):
with patch(
"Orange.widgets.unsupervised.owkmeans.SILHOUETTE_MAX_SAMPLES",
100):
self.send_signal(self.widget.Inputs.data, table)
outtable = self.get_output(widget.Outputs.annotated_data)
outtable = outtable.get_column_view("Silhouette")[0]
Expand Down

0 comments on commit 7016fbc

Please sign in to comment.