Skip to content

Commit

Permalink
Clustering: Additional tests for clustering methods
Browse files Browse the repository at this point in the history
  • Loading branch information
PrimozGodec committed Jun 21, 2019
1 parent 0fc3674 commit e5f03f4
Show file tree
Hide file tree
Showing 4 changed files with 307 additions and 29 deletions.
70 changes: 69 additions & 1 deletion Orange/tests/test_clustering_dbscan.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
import unittest

import numpy as np
from scipy.sparse import csc_matrix, csr_matrix

from Orange.clustering.clustering import ClusteringModel
from Orange.data import Table
from Orange.clustering.dbscan import DBSCAN

Expand All @@ -14,15 +16,81 @@ def setUp(self):
self.iris = Table('iris')
self.dbscan = DBSCAN()

def test_dbscan(self):
c = self.dbscan(self.iris)
# First 20 iris belong to one cluster
self.assertEqual(np.ndarray, type(c))
self.assertEqual(len(self.iris), len(c))
self.assertEqual(1, len(set(c[:20].ravel())))

def test_dbscan_parameters(self):
dbscan = DBSCAN(eps=0.1, min_samples=7, metric='euclidean',
algorithm='auto', leaf_size=12, p=None)
dbscan(self.iris)
c = dbscan(self.iris)
self.assertEqual(np.ndarray, type(c))
self.assertEqual(len(self.iris), len(c))

def test_predict_table(self):
pred = self.dbscan(self.iris)
self.assertEqual(np.ndarray, type(pred))
self.assertEqual(len(self.iris), len(pred))

def test_predict_numpy(self):
model = self.dbscan.fit(self.iris.X)
self.assertEqual(ClusteringModel, type(model))
self.assertEqual(np.ndarray, type(model.labels))
self.assertEqual(len(self.iris), len(model.labels))

def test_predict_sparse_csc(self):
self.iris.X = csc_matrix(self.iris.X[::20])
c = self.dbscan(self.iris)
self.assertEqual(np.ndarray, type(c))
self.assertEqual(len(self.iris), len(c))

def test_predict_spares_csr(self):
self.iris.X = csr_matrix(self.iris.X[::20])
c = self.dbscan(self.iris)
self.assertEqual(np.ndarray, type(c))
self.assertEqual(len(self.iris), len(c))

def test_model(self):
c = self.dbscan.get_model(self.iris)
self.assertEqual(ClusteringModel, type(c))
self.assertEqual(len(self.iris), len(c.labels))

self.assertRaises(NotImplementedError, c, self.iris)

def test_model_np(self):
"""
Test with numpy array as an input in model.
"""
c = self.dbscan.get_model(self.iris)
self.assertRaises(NotImplementedError, c, self.iris.X)

def test_model_sparse(self):
"""
Test with sparse array as an input in model.
"""
c = self.dbscan.get_model(self.iris)
self.assertRaises(NotImplementedError, c, csr_matrix(self.iris.X))

def test_model_instance(self):
"""
Test with instance as an input in model.
"""
c = self.dbscan.get_model(self.iris)
self.assertRaises(NotImplementedError, c, self.iris[0])

def test_model_list(self):
"""
Test with list as an input in model.
"""
c = self.dbscan.get_model(self.iris)
self.assertRaises(NotImplementedError, c, self.iris.X.tolist())

def test_model_bad_datatype(self):
"""
Check model with data-type that is not supported.
"""
c = self.dbscan.get_model(self.iris)
self.assertRaises(TypeError, c, 10)
128 changes: 118 additions & 10 deletions Orange/tests/test_clustering_kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@
# pylint: disable=missing-docstring

import unittest
import warnings

import numpy as np
from scipy.sparse import csc_matrix
from scipy.sparse import csc_matrix, csr_matrix

import Orange
from Orange.clustering.kmeans import KMeans
from Orange.clustering.kmeans import KMeans, KMeansModel
from Orange.data import Table, Domain, ContinuousVariable
from Orange.data.table import DomainTransformationError


class TestKMeans(unittest.TestCase):
Expand All @@ -18,25 +21,130 @@ def setUp(self):
def test_kmeans(self):
c = self.kmeans(self.iris)
# First 20 iris belong to one cluster
self.assertEqual(np.ndarray, type(c))
self.assertEqual(len(self.iris), len(c))
self.assertEqual(1, len(set(c[:20].ravel())))

def test_kmeans_parameters(self):
kmeans = KMeans(n_clusters=10, max_iter=10, random_state=42, tol=0.001,
init='random')
kmeans(self.iris)
c = kmeans(self.iris)
self.assertEqual(np.ndarray, type(c))
self.assertEqual(len(self.iris), len(c))

def test_predict_table(self):
kmeans = KMeans()
c = kmeans(self.iris)
c = self.kmeans(self.iris)
self.assertEqual(np.ndarray, type(c))
self.assertEqual(len(self.iris), len(c))

def test_predict_numpy(self):
kmeans = KMeans()
c = kmeans.fit(self.iris.X)
c = self.kmeans.fit(self.iris.X)
self.assertEqual(KMeansModel, type(c))
self.assertEqual(np.ndarray, type(c.labels))
self.assertEqual(len(self.iris), len(c.labels))

def test_predict_sparse(self):
kmeans = KMeans()
def test_predict_sparse_csc(self):
self.iris.X = csc_matrix(self.iris.X[::20])
c = kmeans(self.iris)
c = self.kmeans(self.iris)
self.assertEqual(np.ndarray, type(c))
self.assertEqual(len(self.iris), len(c))

def test_predict_spares_csr(self):
self.iris.X = csr_matrix(self.iris.X[::20])
c = self.kmeans(self.iris)
self.assertEqual(np.ndarray, type(c))
self.assertEqual(len(self.iris), len(c))

def test_model(self):
c = self.kmeans.get_model(self.iris)
self.assertEqual(KMeansModel, type(c))
self.assertEqual(len(self.iris), len(c.labels))

c1 = c(self.iris)
# prediction of the model must be same since data are same
np.testing.assert_array_almost_equal(c.labels, c1)

def test_model_np(self):
"""
Test with numpy array as an input in model.
"""
c = self.kmeans.get_model(self.iris)
c1 = c(self.iris.X)
# prediction of the model must be same since data are same
np.testing.assert_array_almost_equal(c.labels, c1)

def test_model_sparse_csc(self):
"""
Test with sparse array as an input in model.
"""
c = self.kmeans.get_model(self.iris)
c1 = c(csc_matrix(self.iris.X))
# prediction of the model must be same since data are same
np.testing.assert_array_almost_equal(c.labels, c1)

def test_model_sparse_csr(self):
"""
Test with sparse array as an input in model.
"""
c = self.kmeans.get_model(self.iris)
c1 = c(csr_matrix(self.iris.X))
# prediction of the model must be same since data are same
np.testing.assert_array_almost_equal(c.labels, c1)

def test_model_instance(self):
"""
Test with instance as an input in model.
"""
c = self.kmeans.get_model(self.iris)
c1 = c(self.iris[0])
# prediction of the model must be same since data are same
self.assertEqual(c1, c.labels[0])

def test_model_list(self):
"""
Test with list as an input in model.
"""
c = self.kmeans.get_model(self.iris)
c1 = c(self.iris.X.tolist())
# prediction of the model must be same since data are same
np.testing.assert_array_almost_equal(c.labels, c1)

# example with a list of only one data item
c1 = c(self.iris.X.tolist()[0])
# prediction of the model must be same since data are same
np.testing.assert_array_almost_equal(c.labels[0], c1)

def test_model_bad_datatype(self):
"""
Check model with data-type that is not supported.
"""
c = self.kmeans.get_model(self.iris)
self.assertRaises(TypeError, c, 10)

def test_model_data_table_domain(self):
"""
Check model with data-type that is not supported.
"""
# ok domain
data = Table(Domain(
list(self.iris.domain.attributes) + [ContinuousVariable("a")]),
np.concatenate((self.iris.X, np.ones((len(self.iris), 1))), axis=1))
c = self.kmeans.get_model(self.iris)
res = c(data)
np.testing.assert_array_almost_equal(c.labels, res)

# totally different domain - should fail
self.assertRaises(DomainTransformationError, c, Table("housing"))

def test_deprecated_silhouette(self):
with warnings.catch_warnings(record=True) as w:
KMeans(compute_silhouette_score=True)

assert len(w) == 1
assert issubclass(w[-1].category, DeprecationWarning)

with warnings.catch_warnings(record=True) as w:
KMeans(compute_silhouette_score=False)

assert len(w) == 1
assert issubclass(w[-1].category, DeprecationWarning)
120 changes: 120 additions & 0 deletions Orange/tests/test_clustering_louvain.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
# Test methods with long descriptive names can omit docstrings
# pylint: disable=missing-docstring

import unittest

import numpy as np
import networkx
from scipy.sparse import csc_matrix, csr_matrix

from Orange.clustering.clustering import ClusteringModel
from Orange.clustering.louvain import table_to_knn_graph
from Orange.data import Table
from Orange.clustering.louvain import Louvain


class TestLouvain(unittest.TestCase):
def setUp(self):
self.iris = Table('iris')
self.louvain = Louvain()

def test_louvain(self):
c = self.louvain(self.iris)
# First 20 iris belong to one cluster
self.assertEqual(np.ndarray, type(c))
self.assertEqual(len(self.iris), len(c))
self.assertEqual(1, len(set(c[:20].ravel())))

def test_louvain_parameters(self):
louvain = Louvain(
k_neighbors=3, resolution=1.2, random_state=42, metric="l2")
c = louvain(self.iris)
self.assertEqual(np.ndarray, type(c))
self.assertEqual(len(self.iris), len(c))

def test_predict_table(self):
c = self.louvain(self.iris)
self.assertEqual(np.ndarray, type(c))
self.assertEqual(len(self.iris), len(c))

def test_predict_numpy(self):
c = self.louvain.fit(self.iris.X)
self.assertEqual(ClusteringModel, type(c))
self.assertEqual(np.ndarray, type(c.labels))
self.assertEqual(len(self.iris), len(c.labels))

def test_predict_sparse_csc(self):
self.iris.X = csc_matrix(self.iris.X[::5])
c = self.louvain(self.iris)
self.assertEqual(np.ndarray, type(c))
self.assertEqual(len(self.iris), len(c))

def test_predict_spares_csr(self):
self.iris.X = csr_matrix(self.iris.X[::5])
c = self.louvain(self.iris)
self.assertEqual(np.ndarray, type(c))
self.assertEqual(len(self.iris), len(c))

def test_model(self):
c = self.louvain.get_model(self.iris)
self.assertEqual(ClusteringModel, type(c))
self.assertEqual(len(self.iris), len(c.labels))

self.assertRaises(NotImplementedError, c, self.iris)

def test_model_np(self):
"""
Test with numpy array as an input in model.
"""
c = self.louvain.get_model(self.iris)
self.assertRaises(NotImplementedError, c, self.iris.X)

def test_model_sparse(self):
"""
Test with sparse array as an input in model.
"""
c = self.louvain.get_model(self.iris)
self.assertRaises(NotImplementedError, c, csr_matrix(self.iris.X))

def test_model_instance(self):
"""
Test with instance as an input in model.
"""
c = self.louvain.get_model(self.iris)
self.assertRaises(NotImplementedError, c, self.iris[0])

def test_model_list(self):
"""
Test with list as an input in model.
"""
c = self.louvain.get_model(self.iris)
self.assertRaises(NotImplementedError, c, self.iris.X.tolist())

def test_graph(self):
"""
Louvain accepts graphs too.
:return:
"""
graph = table_to_knn_graph(self.iris.X, 30, "l2")
self.assertIsNotNone(graph)
self.assertEqual(networkx.Graph, type(graph), 1)

# basic clustering - get clusters
c = self.louvain(graph)
# First 20 iris belong to one cluster
self.assertEqual(np.ndarray, type(c))
self.assertEqual(len(self.iris), len(c))
self.assertEqual(1, len(set(c[:20].ravel())))

# clustering - get model
c = self.louvain.get_model(graph)
# First 20 iris belong to one cluster
self.assertEqual(ClusteringModel, type(c))
self.assertEqual(len(self.iris), len(c.labels))

def test_model_bad_datatype(self):
"""
Check model with data-type that is not supported.
"""
c = self.louvain.get_model(self.iris)
self.assertRaises(TypeError, c, 10)
18 changes: 0 additions & 18 deletions Orange/tests/test_louvain.py

This file was deleted.

0 comments on commit e5f03f4

Please sign in to comment.