From e73e590a05a3f455bcffddd29fe79317a5cba571 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Primo=C5=BE=20Godec?= Date: Mon, 17 Jun 2019 13:46:00 +0200 Subject: [PATCH] Clustering: Additional tests for clustering methods --- Orange/tests/test_clustering_dbscan.py | 64 +++++++++++++- Orange/tests/test_clustering_kmeans.py | 111 +++++++++++++++++++++-- Orange/tests/test_clustering_louvain.py | 113 ++++++++++++++++++++++++ Orange/tests/test_louvain.py | 18 ---- 4 files changed, 278 insertions(+), 28 deletions(-) create mode 100644 Orange/tests/test_clustering_louvain.py delete mode 100644 Orange/tests/test_louvain.py diff --git a/Orange/tests/test_clustering_dbscan.py b/Orange/tests/test_clustering_dbscan.py index 14e107d4ba5..b6bf065abac 100644 --- a/Orange/tests/test_clustering_dbscan.py +++ b/Orange/tests/test_clustering_dbscan.py @@ -4,7 +4,9 @@ import unittest import numpy as np +from scipy.sparse import csc_matrix, csr_matrix +from Orange.clustering import ClusteringModel from Orange.data import Table from Orange.clustering.dbscan import DBSCAN @@ -14,15 +16,75 @@ def setUp(self): self.iris = Table('iris') self.dbscan = DBSCAN() + def test_dbscan(self): + c = self.dbscan(self.iris) + # First 20 iris belong to one cluster + self.assertEqual(np.ndarray, type(c)) + self.assertEqual(len(self.iris), len(c)) + self.assertEqual(1, len(set(c[:20].ravel()))) + def test_dbscan_parameters(self): dbscan = DBSCAN(eps=0.1, min_samples=7, metric='euclidean', algorithm='auto', leaf_size=12, p=None) - dbscan(self.iris) + c = dbscan(self.iris) + self.assertEqual(np.ndarray, type(c)) + self.assertEqual(len(self.iris), len(c)) def test_predict_table(self): pred = self.dbscan(self.iris) self.assertEqual(np.ndarray, type(pred)) + self.assertEqual(len(self.iris), len(pred)) def test_predict_numpy(self): model = self.dbscan.fit(self.iris.X) + self.assertEqual(ClusteringModel, type(model)) self.assertEqual(np.ndarray, type(model.labels)) + self.assertEqual(len(self.iris), len(model.labels)) + + def test_predict_sparse(self): + self.iris.X = csc_matrix(self.iris.X[::20]) + c = self.dbscan(self.iris) + self.assertEqual(np.ndarray, type(c)) + self.assertEqual(len(self.iris), len(c)) + + def test_model(self): + c = self.dbscan.get_model(self.iris) + self.assertEqual(ClusteringModel, type(c)) + self.assertEqual(len(self.iris), len(c.labels)) + + self.assertRaises(NotImplementedError, c, self.iris) + + def test_model_np(self): + """ + Test with numpy array as an input in model. + """ + c = self.dbscan.get_model(self.iris) + self.assertRaises(NotImplementedError, c, self.iris.X) + + def test_model_sparse(self): + """ + Test with sparse array as an input in model. + """ + c = self.dbscan.get_model(self.iris) + self.assertRaises(NotImplementedError, c, csr_matrix(self.iris.X)) + + def test_model_instance(self): + """ + Test with instance as an input in model. + """ + c = self.dbscan.get_model(self.iris) + self.assertRaises(NotImplementedError, c, self.iris[0]) + + def test_model_list(self): + """ + Test with list as an input in model. + """ + c = self.dbscan.get_model(self.iris) + self.assertRaises(NotImplementedError, c, self.iris.X.tolist()) + + def test_model_bad_datatype(self): + """ + Check model with data-type that is not supported. + """ + c = self.dbscan.get_model(self.iris) + self.assertRaises(TypeError, c, 10) diff --git a/Orange/tests/test_clustering_kmeans.py b/Orange/tests/test_clustering_kmeans.py index e406b8df204..8bd7ca48f64 100644 --- a/Orange/tests/test_clustering_kmeans.py +++ b/Orange/tests/test_clustering_kmeans.py @@ -2,12 +2,15 @@ # pylint: disable=missing-docstring import unittest +import warnings import numpy as np -from scipy.sparse import csc_matrix +from scipy.sparse import csc_matrix, csr_matrix import Orange -from Orange.clustering.kmeans import KMeans +from Orange.clustering.kmeans import KMeans, KMeansModel +from Orange.data import Table, Domain, ContinuousVariable +from Orange.data.table import DomainTransformationError class TestKMeans(unittest.TestCase): @@ -18,25 +21,115 @@ def setUp(self): def test_kmeans(self): c = self.kmeans(self.iris) # First 20 iris belong to one cluster + self.assertEqual(np.ndarray, type(c)) + self.assertEqual(len(self.iris), len(c)) self.assertEqual(1, len(set(c[:20].ravel()))) def test_kmeans_parameters(self): kmeans = KMeans(n_clusters=10, max_iter=10, random_state=42, tol=0.001, init='random') - kmeans(self.iris) + c = kmeans(self.iris) + self.assertEqual(np.ndarray, type(c)) + self.assertEqual(len(self.iris), len(c)) def test_predict_table(self): - kmeans = KMeans() - c = kmeans(self.iris) + c = self.kmeans(self.iris) self.assertEqual(np.ndarray, type(c)) + self.assertEqual(len(self.iris), len(c)) def test_predict_numpy(self): - kmeans = KMeans() - c = kmeans.fit(self.iris.X) + c = self.kmeans.fit(self.iris.X) + self.assertEqual(KMeansModel, type(c)) self.assertEqual(np.ndarray, type(c.labels)) + self.assertEqual(len(self.iris), len(c.labels)) def test_predict_sparse(self): - kmeans = KMeans() self.iris.X = csc_matrix(self.iris.X[::20]) - c = kmeans(self.iris) + c = self.kmeans(self.iris) self.assertEqual(np.ndarray, type(c)) + self.assertEqual(len(self.iris), len(c)) + + def test_model(self): + c = self.kmeans.get_model(self.iris) + self.assertEqual(KMeansModel, type(c)) + self.assertEqual(len(self.iris), len(c.labels)) + + c1 = c(self.iris) + # prediction of the model must be same since data are same + np.testing.assert_array_almost_equal(c.labels, c1) + + def test_model_np(self): + """ + Test with numpy array as an input in model. + """ + c = self.kmeans.get_model(self.iris) + c1 = c(self.iris.X) + # prediction of the model must be same since data are same + np.testing.assert_array_almost_equal(c.labels, c1) + + def test_model_sparse(self): + """ + Test with sparse array as an input in model. + """ + c = self.kmeans.get_model(self.iris) + c1 = c(csr_matrix(self.iris.X)) + # prediction of the model must be same since data are same + np.testing.assert_array_almost_equal(c.labels, c1) + + def test_model_instance(self): + """ + Test with instance as an input in model. + """ + c = self.kmeans.get_model(self.iris) + c1 = c(self.iris[0]) + # prediction of the model must be same since data are same + self.assertEqual(c1, c.labels[0]) + + def test_model_list(self): + """ + Test with list as an input in model. + """ + c = self.kmeans.get_model(self.iris) + c1 = c(self.iris.X.tolist()) + # prediction of the model must be same since data are same + np.testing.assert_array_almost_equal(c.labels, c1) + + # example with a list of only one data item + c1 = c(self.iris.X.tolist()[0]) + # prediction of the model must be same since data are same + np.testing.assert_array_almost_equal(c.labels[0], c1) + + def test_model_bad_datatype(self): + """ + Check model with data-type that is not supported. + """ + c = self.kmeans.get_model(self.iris) + self.assertRaises(TypeError, c, 10) + + def test_model_data_table_domain(self): + """ + Check model with data-type that is not supported. + """ + # ok domain + data = Table(Domain( + list(self.iris.domain.attributes) + [ContinuousVariable("a")]), + np.concatenate((self.iris.X, np.ones((len(self.iris), 1))), axis=1)) + c = self.kmeans.get_model(self.iris) + res = c(data) + np.testing.assert_array_almost_equal(c.labels, res) + + # totally different domain - should fail + self.assertRaises(DomainTransformationError, c, Table("housing")) + + def test_deprecated_silhouette(self): + with warnings.catch_warnings(record=True) as w: + KMeans(compute_silhouette_score=True) + + assert len(w) == 1 + assert issubclass(w[-1].category, DeprecationWarning) + + with warnings.catch_warnings(record=True) as w: + KMeans(compute_silhouette_score=False) + + assert len(w) == 1 + assert issubclass(w[-1].category, DeprecationWarning) diff --git a/Orange/tests/test_clustering_louvain.py b/Orange/tests/test_clustering_louvain.py new file mode 100644 index 00000000000..19165b96cd4 --- /dev/null +++ b/Orange/tests/test_clustering_louvain.py @@ -0,0 +1,113 @@ +# Test methods with long descriptive names can omit docstrings +# pylint: disable=missing-docstring + +import unittest + +import numpy as np +import networkx +from scipy.sparse import csc_matrix, csr_matrix + +from Orange.clustering import ClusteringModel, table_to_knn_graph +from Orange.data import Table +from Orange.clustering.louvain import Louvain + + +class TestLouvain(unittest.TestCase): + def setUp(self): + self.iris = Table('iris') + self.louvain = Louvain() + + def test_louvain(self): + c = self.louvain(self.iris) + # First 20 iris belong to one cluster + self.assertEqual(np.ndarray, type(c)) + self.assertEqual(len(self.iris), len(c)) + self.assertEqual(1, len(set(c[:20].ravel()))) + + def test_louvain_parameters(self): + louvain = Louvain( + k_neighbors=3, resolution=1.2, random_state=42, metric="l2") + c = louvain(self.iris) + self.assertEqual(np.ndarray, type(c)) + self.assertEqual(len(self.iris), len(c)) + + def test_predict_table(self): + c = self.louvain(self.iris) + self.assertEqual(np.ndarray, type(c)) + self.assertEqual(len(self.iris), len(c)) + + def test_predict_numpy(self): + c = self.louvain.fit(self.iris.X) + self.assertEqual(ClusteringModel, type(c)) + self.assertEqual(np.ndarray, type(c.labels)) + self.assertEqual(len(self.iris), len(c.labels)) + + def test_predict_sparse(self): + self.iris.X = csc_matrix(self.iris.X[::5]) + c = self.louvain(self.iris) + self.assertEqual(np.ndarray, type(c)) + self.assertEqual(len(self.iris), len(c)) + + def test_model(self): + c = self.louvain.get_model(self.iris) + self.assertEqual(ClusteringModel, type(c)) + self.assertEqual(len(self.iris), len(c.labels)) + + self.assertRaises(NotImplementedError, c, self.iris) + + def test_model_np(self): + """ + Test with numpy array as an input in model. + """ + c = self.louvain.get_model(self.iris) + self.assertRaises(NotImplementedError, c, self.iris.X) + + def test_model_sparse(self): + """ + Test with sparse array as an input in model. + """ + c = self.louvain.get_model(self.iris) + self.assertRaises(NotImplementedError, c, csr_matrix(self.iris.X)) + + def test_model_instance(self): + """ + Test with instance as an input in model. + """ + c = self.louvain.get_model(self.iris) + self.assertRaises(NotImplementedError, c, self.iris[0]) + + def test_model_list(self): + """ + Test with list as an input in model. + """ + c = self.louvain.get_model(self.iris) + self.assertRaises(NotImplementedError, c, self.iris.X.tolist()) + + def test_graph(self): + """ + Louvain accepts graphs too. + :return: + """ + graph = table_to_knn_graph(self.iris.X, 30, "l2") + self.assertIsNotNone(graph) + self.assertEqual(networkx.Graph, type(graph), 1) + + # basic clustering - get clusters + c = self.louvain(graph) + # First 20 iris belong to one cluster + self.assertEqual(np.ndarray, type(c)) + self.assertEqual(len(self.iris), len(c)) + self.assertEqual(1, len(set(c[:20].ravel()))) + + # clustering - get model + c = self.louvain.get_model(graph) + # First 20 iris belong to one cluster + self.assertEqual(ClusteringModel, type(c)) + self.assertEqual(len(self.iris), len(c.labels)) + + def test_model_bad_datatype(self): + """ + Check model with data-type that is not supported. + """ + c = self.louvain.get_model(self.iris) + self.assertRaises(TypeError, c, 10) diff --git a/Orange/tests/test_louvain.py b/Orange/tests/test_louvain.py deleted file mode 100644 index e1c192f1eac..00000000000 --- a/Orange/tests/test_louvain.py +++ /dev/null @@ -1,18 +0,0 @@ -# Test methods with long descriptive names can omit docstrings -# pylint: disable=missing-docstring - -import unittest -import numpy as np - -from Orange.data import Table -from Orange.clustering.louvain import Louvain - - -class TestLouvain(unittest.TestCase): - def setUp(self): - self.data = Table('iris') - self.louvain = Louvain() - - def test_orange_table(self): - labels = self.louvain(self.data) - self.assertEqual(np.ndarray, type(labels))