From e73e590a05a3f455bcffddd29fe79317a5cba571 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Primo=C5=BE=20Godec?= <p.godec9@gmail.com>
Date: Mon, 17 Jun 2019 13:46:00 +0200
Subject: [PATCH] Clustering: Additional tests for clustering methods

---
 Orange/tests/test_clustering_dbscan.py  |  64 +++++++++++++-
 Orange/tests/test_clustering_kmeans.py  | 111 +++++++++++++++++++++--
 Orange/tests/test_clustering_louvain.py | 113 ++++++++++++++++++++++++
 Orange/tests/test_louvain.py            |  18 ----
 4 files changed, 278 insertions(+), 28 deletions(-)
 create mode 100644 Orange/tests/test_clustering_louvain.py
 delete mode 100644 Orange/tests/test_louvain.py

diff --git a/Orange/tests/test_clustering_dbscan.py b/Orange/tests/test_clustering_dbscan.py
index 14e107d4ba5..b6bf065abac 100644
--- a/Orange/tests/test_clustering_dbscan.py
+++ b/Orange/tests/test_clustering_dbscan.py
@@ -4,7 +4,9 @@
 import unittest
 
 import numpy as np
+from scipy.sparse import csc_matrix, csr_matrix
 
+from Orange.clustering import ClusteringModel
 from Orange.data import Table
 from Orange.clustering.dbscan import DBSCAN
 
@@ -14,15 +16,75 @@ def setUp(self):
         self.iris = Table('iris')
         self.dbscan = DBSCAN()
 
+    def test_dbscan(self):
+        c = self.dbscan(self.iris)
+        # First 20 iris belong to one cluster
+        self.assertEqual(np.ndarray, type(c))
+        self.assertEqual(len(self.iris), len(c))
+        self.assertEqual(1, len(set(c[:20].ravel())))
+
     def test_dbscan_parameters(self):
         dbscan = DBSCAN(eps=0.1, min_samples=7, metric='euclidean',
                         algorithm='auto', leaf_size=12, p=None)
-        dbscan(self.iris)
+        c = dbscan(self.iris)
+        self.assertEqual(np.ndarray, type(c))
+        self.assertEqual(len(self.iris), len(c))
 
     def test_predict_table(self):
         pred = self.dbscan(self.iris)
         self.assertEqual(np.ndarray, type(pred))
+        self.assertEqual(len(self.iris), len(pred))
 
     def test_predict_numpy(self):
         model = self.dbscan.fit(self.iris.X)
+        self.assertEqual(ClusteringModel, type(model))
         self.assertEqual(np.ndarray, type(model.labels))
+        self.assertEqual(len(self.iris), len(model.labels))
+
+    def test_predict_sparse(self):
+        self.iris.X = csc_matrix(self.iris.X[::20])
+        c = self.dbscan(self.iris)
+        self.assertEqual(np.ndarray, type(c))
+        self.assertEqual(len(self.iris), len(c))
+
+    def test_model(self):
+        c = self.dbscan.get_model(self.iris)
+        self.assertEqual(ClusteringModel, type(c))
+        self.assertEqual(len(self.iris), len(c.labels))
+
+        self.assertRaises(NotImplementedError, c, self.iris)
+
+    def test_model_np(self):
+        """
+        Test with numpy array as an input in model.
+        """
+        c = self.dbscan.get_model(self.iris)
+        self.assertRaises(NotImplementedError, c, self.iris.X)
+
+    def test_model_sparse(self):
+        """
+        Test with sparse array as an input in model.
+        """
+        c = self.dbscan.get_model(self.iris)
+        self.assertRaises(NotImplementedError, c, csr_matrix(self.iris.X))
+
+    def test_model_instance(self):
+        """
+        Test with instance as an input in model.
+        """
+        c = self.dbscan.get_model(self.iris)
+        self.assertRaises(NotImplementedError, c, self.iris[0])
+
+    def test_model_list(self):
+        """
+        Test with list as an input in model.
+        """
+        c = self.dbscan.get_model(self.iris)
+        self.assertRaises(NotImplementedError, c, self.iris.X.tolist())
+
+    def test_model_bad_datatype(self):
+        """
+        Check model with data-type that is not supported.
+        """
+        c = self.dbscan.get_model(self.iris)
+        self.assertRaises(TypeError, c, 10)
diff --git a/Orange/tests/test_clustering_kmeans.py b/Orange/tests/test_clustering_kmeans.py
index e406b8df204..8bd7ca48f64 100644
--- a/Orange/tests/test_clustering_kmeans.py
+++ b/Orange/tests/test_clustering_kmeans.py
@@ -2,12 +2,15 @@
 # pylint: disable=missing-docstring
 
 import unittest
+import warnings
 
 import numpy as np
-from scipy.sparse import csc_matrix
+from scipy.sparse import csc_matrix, csr_matrix
 
 import Orange
-from Orange.clustering.kmeans import KMeans
+from Orange.clustering.kmeans import KMeans, KMeansModel
+from Orange.data import Table, Domain, ContinuousVariable
+from Orange.data.table import DomainTransformationError
 
 
 class TestKMeans(unittest.TestCase):
@@ -18,25 +21,115 @@ def setUp(self):
     def test_kmeans(self):
         c = self.kmeans(self.iris)
         # First 20 iris belong to one cluster
+        self.assertEqual(np.ndarray, type(c))
+        self.assertEqual(len(self.iris), len(c))
         self.assertEqual(1, len(set(c[:20].ravel())))
 
     def test_kmeans_parameters(self):
         kmeans = KMeans(n_clusters=10, max_iter=10, random_state=42, tol=0.001,
                         init='random')
-        kmeans(self.iris)
+        c = kmeans(self.iris)
+        self.assertEqual(np.ndarray, type(c))
+        self.assertEqual(len(self.iris), len(c))
 
     def test_predict_table(self):
-        kmeans = KMeans()
-        c = kmeans(self.iris)
+        c = self.kmeans(self.iris)
         self.assertEqual(np.ndarray, type(c))
+        self.assertEqual(len(self.iris), len(c))
 
     def test_predict_numpy(self):
-        kmeans = KMeans()
-        c = kmeans.fit(self.iris.X)
+        c = self.kmeans.fit(self.iris.X)
+        self.assertEqual(KMeansModel, type(c))
         self.assertEqual(np.ndarray, type(c.labels))
+        self.assertEqual(len(self.iris), len(c.labels))
 
     def test_predict_sparse(self):
-        kmeans = KMeans()
         self.iris.X = csc_matrix(self.iris.X[::20])
-        c = kmeans(self.iris)
+        c = self.kmeans(self.iris)
         self.assertEqual(np.ndarray, type(c))
+        self.assertEqual(len(self.iris), len(c))
+
+    def test_model(self):
+        c = self.kmeans.get_model(self.iris)
+        self.assertEqual(KMeansModel, type(c))
+        self.assertEqual(len(self.iris), len(c.labels))
+
+        c1 = c(self.iris)
+        # prediction of the model must be same since data are same
+        np.testing.assert_array_almost_equal(c.labels, c1)
+
+    def test_model_np(self):
+        """
+        Test with numpy array as an input in model.
+        """
+        c = self.kmeans.get_model(self.iris)
+        c1 = c(self.iris.X)
+        # prediction of the model must be same since data are same
+        np.testing.assert_array_almost_equal(c.labels, c1)
+
+    def test_model_sparse(self):
+        """
+        Test with sparse array as an input in model.
+        """
+        c = self.kmeans.get_model(self.iris)
+        c1 = c(csr_matrix(self.iris.X))
+        # prediction of the model must be same since data are same
+        np.testing.assert_array_almost_equal(c.labels, c1)
+
+    def test_model_instance(self):
+        """
+        Test with instance as an input in model.
+        """
+        c = self.kmeans.get_model(self.iris)
+        c1 = c(self.iris[0])
+        # prediction of the model must be same since data are same
+        self.assertEqual(c1, c.labels[0])
+
+    def test_model_list(self):
+        """
+        Test with list as an input in model.
+        """
+        c = self.kmeans.get_model(self.iris)
+        c1 = c(self.iris.X.tolist())
+        # prediction of the model must be same since data are same
+        np.testing.assert_array_almost_equal(c.labels, c1)
+
+        # example with a list of only one data item
+        c1 = c(self.iris.X.tolist()[0])
+        # prediction of the model must be same since data are same
+        np.testing.assert_array_almost_equal(c.labels[0], c1)
+
+    def test_model_bad_datatype(self):
+        """
+        Check model with data-type that is not supported.
+        """
+        c = self.kmeans.get_model(self.iris)
+        self.assertRaises(TypeError, c, 10)
+
+    def test_model_data_table_domain(self):
+        """
+        Check model with data-type that is not supported.
+        """
+        # ok domain
+        data = Table(Domain(
+            list(self.iris.domain.attributes) + [ContinuousVariable("a")]),
+                     np.concatenate((self.iris.X, np.ones((len(self.iris), 1))), axis=1))
+        c = self.kmeans.get_model(self.iris)
+        res = c(data)
+        np.testing.assert_array_almost_equal(c.labels, res)
+
+        # totally different domain - should fail
+        self.assertRaises(DomainTransformationError, c, Table("housing"))
+
+    def test_deprecated_silhouette(self):
+        with warnings.catch_warnings(record=True) as w:
+            KMeans(compute_silhouette_score=True)
+
+            assert len(w) == 1
+            assert issubclass(w[-1].category, DeprecationWarning)
+
+        with warnings.catch_warnings(record=True) as w:
+            KMeans(compute_silhouette_score=False)
+
+            assert len(w) == 1
+            assert issubclass(w[-1].category, DeprecationWarning)
diff --git a/Orange/tests/test_clustering_louvain.py b/Orange/tests/test_clustering_louvain.py
new file mode 100644
index 00000000000..19165b96cd4
--- /dev/null
+++ b/Orange/tests/test_clustering_louvain.py
@@ -0,0 +1,113 @@
+# Test methods with long descriptive names can omit docstrings
+# pylint: disable=missing-docstring
+
+import unittest
+
+import numpy as np
+import networkx
+from scipy.sparse import csc_matrix, csr_matrix
+
+from Orange.clustering import ClusteringModel, table_to_knn_graph
+from Orange.data import Table
+from Orange.clustering.louvain import Louvain
+
+
+class TestLouvain(unittest.TestCase):
+    def setUp(self):
+        self.iris = Table('iris')
+        self.louvain = Louvain()
+
+    def test_louvain(self):
+        c = self.louvain(self.iris)
+        # First 20 iris belong to one cluster
+        self.assertEqual(np.ndarray, type(c))
+        self.assertEqual(len(self.iris), len(c))
+        self.assertEqual(1, len(set(c[:20].ravel())))
+
+    def test_louvain_parameters(self):
+        louvain = Louvain(
+            k_neighbors=3, resolution=1.2, random_state=42, metric="l2")
+        c = louvain(self.iris)
+        self.assertEqual(np.ndarray, type(c))
+        self.assertEqual(len(self.iris), len(c))
+
+    def test_predict_table(self):
+        c = self.louvain(self.iris)
+        self.assertEqual(np.ndarray, type(c))
+        self.assertEqual(len(self.iris), len(c))
+
+    def test_predict_numpy(self):
+        c = self.louvain.fit(self.iris.X)
+        self.assertEqual(ClusteringModel, type(c))
+        self.assertEqual(np.ndarray, type(c.labels))
+        self.assertEqual(len(self.iris), len(c.labels))
+
+    def test_predict_sparse(self):
+        self.iris.X = csc_matrix(self.iris.X[::5])
+        c = self.louvain(self.iris)
+        self.assertEqual(np.ndarray, type(c))
+        self.assertEqual(len(self.iris), len(c))
+
+    def test_model(self):
+        c = self.louvain.get_model(self.iris)
+        self.assertEqual(ClusteringModel, type(c))
+        self.assertEqual(len(self.iris), len(c.labels))
+
+        self.assertRaises(NotImplementedError, c, self.iris)
+
+    def test_model_np(self):
+        """
+        Test with numpy array as an input in model.
+        """
+        c = self.louvain.get_model(self.iris)
+        self.assertRaises(NotImplementedError, c, self.iris.X)
+
+    def test_model_sparse(self):
+        """
+        Test with sparse array as an input in model.
+        """
+        c = self.louvain.get_model(self.iris)
+        self.assertRaises(NotImplementedError, c, csr_matrix(self.iris.X))
+
+    def test_model_instance(self):
+        """
+        Test with instance as an input in model.
+        """
+        c = self.louvain.get_model(self.iris)
+        self.assertRaises(NotImplementedError, c, self.iris[0])
+
+    def test_model_list(self):
+        """
+        Test with list as an input in model.
+        """
+        c = self.louvain.get_model(self.iris)
+        self.assertRaises(NotImplementedError, c, self.iris.X.tolist())
+
+    def test_graph(self):
+        """
+        Louvain accepts graphs too.
+        :return:
+        """
+        graph = table_to_knn_graph(self.iris.X, 30, "l2")
+        self.assertIsNotNone(graph)
+        self.assertEqual(networkx.Graph, type(graph), 1)
+
+        # basic clustering - get clusters
+        c = self.louvain(graph)
+        # First 20 iris belong to one cluster
+        self.assertEqual(np.ndarray, type(c))
+        self.assertEqual(len(self.iris), len(c))
+        self.assertEqual(1, len(set(c[:20].ravel())))
+
+        # clustering - get model
+        c = self.louvain.get_model(graph)
+        # First 20 iris belong to one cluster
+        self.assertEqual(ClusteringModel, type(c))
+        self.assertEqual(len(self.iris), len(c.labels))
+
+    def test_model_bad_datatype(self):
+        """
+        Check model with data-type that is not supported.
+        """
+        c = self.louvain.get_model(self.iris)
+        self.assertRaises(TypeError, c, 10)
diff --git a/Orange/tests/test_louvain.py b/Orange/tests/test_louvain.py
deleted file mode 100644
index e1c192f1eac..00000000000
--- a/Orange/tests/test_louvain.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Test methods with long descriptive names can omit docstrings
-# pylint: disable=missing-docstring
-
-import unittest
-import numpy as np
-
-from Orange.data import Table
-from Orange.clustering.louvain import Louvain
-
-
-class TestLouvain(unittest.TestCase):
-    def setUp(self):
-        self.data = Table('iris')
-        self.louvain = Louvain()
-
-    def test_orange_table(self):
-        labels = self.louvain(self.data)
-        self.assertEqual(np.ndarray, type(labels))