From e73e590a05a3f455bcffddd29fe79317a5cba571 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Primo=C5=BE=20Godec?=
Date: Mon, 17 Jun 2019 13:46:00 +0200
Subject: [PATCH] Clustering: Additional tests for clustering methods
---
Orange/tests/test_clustering_dbscan.py | 64 +++++++++++++-
Orange/tests/test_clustering_kmeans.py | 111 +++++++++++++++++++++--
Orange/tests/test_clustering_louvain.py | 113 ++++++++++++++++++++++++
Orange/tests/test_louvain.py | 18 ----
4 files changed, 278 insertions(+), 28 deletions(-)
create mode 100644 Orange/tests/test_clustering_louvain.py
delete mode 100644 Orange/tests/test_louvain.py
diff --git a/Orange/tests/test_clustering_dbscan.py b/Orange/tests/test_clustering_dbscan.py
index 14e107d4ba5..b6bf065abac 100644
--- a/Orange/tests/test_clustering_dbscan.py
+++ b/Orange/tests/test_clustering_dbscan.py
@@ -4,7 +4,9 @@
import unittest
import numpy as np
+from scipy.sparse import csc_matrix, csr_matrix
+from Orange.clustering import ClusteringModel
from Orange.data import Table
from Orange.clustering.dbscan import DBSCAN
@@ -14,15 +16,75 @@ def setUp(self):
self.iris = Table('iris')
self.dbscan = DBSCAN()
+ def test_dbscan(self):
+ c = self.dbscan(self.iris)
+ # First 20 iris belong to one cluster
+ self.assertEqual(np.ndarray, type(c))
+ self.assertEqual(len(self.iris), len(c))
+ self.assertEqual(1, len(set(c[:20].ravel())))
+
def test_dbscan_parameters(self):
dbscan = DBSCAN(eps=0.1, min_samples=7, metric='euclidean',
algorithm='auto', leaf_size=12, p=None)
- dbscan(self.iris)
+ c = dbscan(self.iris)
+ self.assertEqual(np.ndarray, type(c))
+ self.assertEqual(len(self.iris), len(c))
def test_predict_table(self):
pred = self.dbscan(self.iris)
self.assertEqual(np.ndarray, type(pred))
+ self.assertEqual(len(self.iris), len(pred))
def test_predict_numpy(self):
model = self.dbscan.fit(self.iris.X)
+ self.assertEqual(ClusteringModel, type(model))
self.assertEqual(np.ndarray, type(model.labels))
+ self.assertEqual(len(self.iris), len(model.labels))
+
+ def test_predict_sparse(self):
+ self.iris.X = csc_matrix(self.iris.X[::20])
+ c = self.dbscan(self.iris)
+ self.assertEqual(np.ndarray, type(c))
+ self.assertEqual(len(self.iris), len(c))
+
+ def test_model(self):
+ c = self.dbscan.get_model(self.iris)
+ self.assertEqual(ClusteringModel, type(c))
+ self.assertEqual(len(self.iris), len(c.labels))
+
+ self.assertRaises(NotImplementedError, c, self.iris)
+
+ def test_model_np(self):
+ """
+ Test with numpy array as an input in model.
+ """
+ c = self.dbscan.get_model(self.iris)
+ self.assertRaises(NotImplementedError, c, self.iris.X)
+
+ def test_model_sparse(self):
+ """
+ Test with sparse array as an input in model.
+ """
+ c = self.dbscan.get_model(self.iris)
+ self.assertRaises(NotImplementedError, c, csr_matrix(self.iris.X))
+
+ def test_model_instance(self):
+ """
+ Test with instance as an input in model.
+ """
+ c = self.dbscan.get_model(self.iris)
+ self.assertRaises(NotImplementedError, c, self.iris[0])
+
+ def test_model_list(self):
+ """
+ Test with list as an input in model.
+ """
+ c = self.dbscan.get_model(self.iris)
+ self.assertRaises(NotImplementedError, c, self.iris.X.tolist())
+
+ def test_model_bad_datatype(self):
+ """
+ Check model with data-type that is not supported.
+ """
+ c = self.dbscan.get_model(self.iris)
+ self.assertRaises(TypeError, c, 10)
diff --git a/Orange/tests/test_clustering_kmeans.py b/Orange/tests/test_clustering_kmeans.py
index e406b8df204..8bd7ca48f64 100644
--- a/Orange/tests/test_clustering_kmeans.py
+++ b/Orange/tests/test_clustering_kmeans.py
@@ -2,12 +2,15 @@
# pylint: disable=missing-docstring
import unittest
+import warnings
import numpy as np
-from scipy.sparse import csc_matrix
+from scipy.sparse import csc_matrix, csr_matrix
import Orange
-from Orange.clustering.kmeans import KMeans
+from Orange.clustering.kmeans import KMeans, KMeansModel
+from Orange.data import Table, Domain, ContinuousVariable
+from Orange.data.table import DomainTransformationError
class TestKMeans(unittest.TestCase):
@@ -18,25 +21,115 @@ def setUp(self):
def test_kmeans(self):
c = self.kmeans(self.iris)
# First 20 iris belong to one cluster
+ self.assertEqual(np.ndarray, type(c))
+ self.assertEqual(len(self.iris), len(c))
self.assertEqual(1, len(set(c[:20].ravel())))
def test_kmeans_parameters(self):
kmeans = KMeans(n_clusters=10, max_iter=10, random_state=42, tol=0.001,
init='random')
- kmeans(self.iris)
+ c = kmeans(self.iris)
+ self.assertEqual(np.ndarray, type(c))
+ self.assertEqual(len(self.iris), len(c))
def test_predict_table(self):
- kmeans = KMeans()
- c = kmeans(self.iris)
+ c = self.kmeans(self.iris)
self.assertEqual(np.ndarray, type(c))
+ self.assertEqual(len(self.iris), len(c))
def test_predict_numpy(self):
- kmeans = KMeans()
- c = kmeans.fit(self.iris.X)
+ c = self.kmeans.fit(self.iris.X)
+ self.assertEqual(KMeansModel, type(c))
self.assertEqual(np.ndarray, type(c.labels))
+ self.assertEqual(len(self.iris), len(c.labels))
def test_predict_sparse(self):
- kmeans = KMeans()
self.iris.X = csc_matrix(self.iris.X[::20])
- c = kmeans(self.iris)
+ c = self.kmeans(self.iris)
self.assertEqual(np.ndarray, type(c))
+ self.assertEqual(len(self.iris), len(c))
+
+ def test_model(self):
+ c = self.kmeans.get_model(self.iris)
+ self.assertEqual(KMeansModel, type(c))
+ self.assertEqual(len(self.iris), len(c.labels))
+
+ c1 = c(self.iris)
+ # prediction of the model must be same since data are same
+ np.testing.assert_array_almost_equal(c.labels, c1)
+
+ def test_model_np(self):
+ """
+ Test with numpy array as an input in model.
+ """
+ c = self.kmeans.get_model(self.iris)
+ c1 = c(self.iris.X)
+ # prediction of the model must be same since data are same
+ np.testing.assert_array_almost_equal(c.labels, c1)
+
+ def test_model_sparse(self):
+ """
+ Test with sparse array as an input in model.
+ """
+ c = self.kmeans.get_model(self.iris)
+ c1 = c(csr_matrix(self.iris.X))
+ # prediction of the model must be same since data are same
+ np.testing.assert_array_almost_equal(c.labels, c1)
+
+ def test_model_instance(self):
+ """
+ Test with instance as an input in model.
+ """
+ c = self.kmeans.get_model(self.iris)
+ c1 = c(self.iris[0])
+ # prediction of the model must be same since data are same
+ self.assertEqual(c1, c.labels[0])
+
+ def test_model_list(self):
+ """
+ Test with list as an input in model.
+ """
+ c = self.kmeans.get_model(self.iris)
+ c1 = c(self.iris.X.tolist())
+ # prediction of the model must be same since data are same
+ np.testing.assert_array_almost_equal(c.labels, c1)
+
+ # example with a list of only one data item
+ c1 = c(self.iris.X.tolist()[0])
+ # prediction of the model must be same since data are same
+ np.testing.assert_array_almost_equal(c.labels[0], c1)
+
+ def test_model_bad_datatype(self):
+ """
+ Check model with data-type that is not supported.
+ """
+ c = self.kmeans.get_model(self.iris)
+ self.assertRaises(TypeError, c, 10)
+
+ def test_model_data_table_domain(self):
+ """
+ Check model with data-type that is not supported.
+ """
+ # ok domain
+ data = Table(Domain(
+ list(self.iris.domain.attributes) + [ContinuousVariable("a")]),
+ np.concatenate((self.iris.X, np.ones((len(self.iris), 1))), axis=1))
+ c = self.kmeans.get_model(self.iris)
+ res = c(data)
+ np.testing.assert_array_almost_equal(c.labels, res)
+
+ # totally different domain - should fail
+ self.assertRaises(DomainTransformationError, c, Table("housing"))
+
+ def test_deprecated_silhouette(self):
+ with warnings.catch_warnings(record=True) as w:
+ KMeans(compute_silhouette_score=True)
+
+ assert len(w) == 1
+ assert issubclass(w[-1].category, DeprecationWarning)
+
+ with warnings.catch_warnings(record=True) as w:
+ KMeans(compute_silhouette_score=False)
+
+ assert len(w) == 1
+ assert issubclass(w[-1].category, DeprecationWarning)
diff --git a/Orange/tests/test_clustering_louvain.py b/Orange/tests/test_clustering_louvain.py
new file mode 100644
index 00000000000..19165b96cd4
--- /dev/null
+++ b/Orange/tests/test_clustering_louvain.py
@@ -0,0 +1,113 @@
+# Test methods with long descriptive names can omit docstrings
+# pylint: disable=missing-docstring
+
+import unittest
+
+import numpy as np
+import networkx
+from scipy.sparse import csc_matrix, csr_matrix
+
+from Orange.clustering import ClusteringModel, table_to_knn_graph
+from Orange.data import Table
+from Orange.clustering.louvain import Louvain
+
+
+class TestLouvain(unittest.TestCase):
+ def setUp(self):
+ self.iris = Table('iris')
+ self.louvain = Louvain()
+
+ def test_louvain(self):
+ c = self.louvain(self.iris)
+ # First 20 iris belong to one cluster
+ self.assertEqual(np.ndarray, type(c))
+ self.assertEqual(len(self.iris), len(c))
+ self.assertEqual(1, len(set(c[:20].ravel())))
+
+ def test_louvain_parameters(self):
+ louvain = Louvain(
+ k_neighbors=3, resolution=1.2, random_state=42, metric="l2")
+ c = louvain(self.iris)
+ self.assertEqual(np.ndarray, type(c))
+ self.assertEqual(len(self.iris), len(c))
+
+ def test_predict_table(self):
+ c = self.louvain(self.iris)
+ self.assertEqual(np.ndarray, type(c))
+ self.assertEqual(len(self.iris), len(c))
+
+ def test_predict_numpy(self):
+ c = self.louvain.fit(self.iris.X)
+ self.assertEqual(ClusteringModel, type(c))
+ self.assertEqual(np.ndarray, type(c.labels))
+ self.assertEqual(len(self.iris), len(c.labels))
+
+ def test_predict_sparse(self):
+ self.iris.X = csc_matrix(self.iris.X[::5])
+ c = self.louvain(self.iris)
+ self.assertEqual(np.ndarray, type(c))
+ self.assertEqual(len(self.iris), len(c))
+
+ def test_model(self):
+ c = self.louvain.get_model(self.iris)
+ self.assertEqual(ClusteringModel, type(c))
+ self.assertEqual(len(self.iris), len(c.labels))
+
+ self.assertRaises(NotImplementedError, c, self.iris)
+
+ def test_model_np(self):
+ """
+ Test with numpy array as an input in model.
+ """
+ c = self.louvain.get_model(self.iris)
+ self.assertRaises(NotImplementedError, c, self.iris.X)
+
+ def test_model_sparse(self):
+ """
+ Test with sparse array as an input in model.
+ """
+ c = self.louvain.get_model(self.iris)
+ self.assertRaises(NotImplementedError, c, csr_matrix(self.iris.X))
+
+ def test_model_instance(self):
+ """
+ Test with instance as an input in model.
+ """
+ c = self.louvain.get_model(self.iris)
+ self.assertRaises(NotImplementedError, c, self.iris[0])
+
+ def test_model_list(self):
+ """
+ Test with list as an input in model.
+ """
+ c = self.louvain.get_model(self.iris)
+ self.assertRaises(NotImplementedError, c, self.iris.X.tolist())
+
+ def test_graph(self):
+ """
+ Louvain accepts graphs too.
+ :return:
+ """
+ graph = table_to_knn_graph(self.iris.X, 30, "l2")
+ self.assertIsNotNone(graph)
+ self.assertEqual(networkx.Graph, type(graph), 1)
+
+ # basic clustering - get clusters
+ c = self.louvain(graph)
+ # First 20 iris belong to one cluster
+ self.assertEqual(np.ndarray, type(c))
+ self.assertEqual(len(self.iris), len(c))
+ self.assertEqual(1, len(set(c[:20].ravel())))
+
+ # clustering - get model
+ c = self.louvain.get_model(graph)
+ # First 20 iris belong to one cluster
+ self.assertEqual(ClusteringModel, type(c))
+ self.assertEqual(len(self.iris), len(c.labels))
+
+ def test_model_bad_datatype(self):
+ """
+ Check model with data-type that is not supported.
+ """
+ c = self.louvain.get_model(self.iris)
+ self.assertRaises(TypeError, c, 10)
diff --git a/Orange/tests/test_louvain.py b/Orange/tests/test_louvain.py
deleted file mode 100644
index e1c192f1eac..00000000000
--- a/Orange/tests/test_louvain.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Test methods with long descriptive names can omit docstrings
-# pylint: disable=missing-docstring
-
-import unittest
-import numpy as np
-
-from Orange.data import Table
-from Orange.clustering.louvain import Louvain
-
-
-class TestLouvain(unittest.TestCase):
- def setUp(self):
- self.data = Table('iris')
- self.louvain = Louvain()
-
- def test_orange_table(self):
- labels = self.louvain(self.data)
- self.assertEqual(np.ndarray, type(labels))