Skip to content

Commit

Permalink
#548: Verify input arguments for algorithm in cluster module.
Browse files Browse the repository at this point in the history
  • Loading branch information
annoviko committed Oct 9, 2019
1 parent b123a4f commit be809d7
Show file tree
Hide file tree
Showing 14 changed files with 206 additions and 62 deletions.
10 changes: 5 additions & 5 deletions pyclustering/cluster/center_initializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,16 +206,16 @@ def __check_parameters(self):
"""
if (self.__amount <= 0) or (self.__amount > len(self.__data)):
raise AttributeError("Amount of cluster centers '" + str(self.__amount) + "' should be at least 1 and "
"should be less or equal to amount of points in data.")
raise ValueError("Amount of cluster centers '" + str(self.__amount) + "' should be at least 1 and "
"should be less or equal to amount of points in data.")

if self.__candidates != kmeans_plusplus_initializer.FARTHEST_CENTER_CANDIDATE:
if (self.__candidates <= 0) or (self.__candidates > len(self.__data)):
raise AttributeError("Amount of center candidates '" + str(self.__candidates) + "' should be at least 1 "
"and should be less or equal to amount of points in data.")
raise ValueError("Amount of center candidates '" + str(self.__candidates) + "' should be at least 1 "
"and should be less or equal to amount of points in data.")

if len(self.__data) == 0:
raise AttributeError("Data is empty.")
raise ValueError("Data is empty.")


def __calculate_shortest_distances(self, data, centers):
Expand Down
26 changes: 24 additions & 2 deletions pyclustering/cluster/optics.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ class optics:
"""

def __init__(self, sample, eps, minpts, amount_clusters = None, ccore = True, **kwargs):
def __init__(self, sample, eps, minpts, amount_clusters=None, ccore=True, **kwargs):
"""!
@brief Constructor of clustering algorithm OPTICS.
Expand Down Expand Up @@ -430,6 +430,8 @@ def __init__(self, sample, eps, minpts, amount_clusters = None, ccore = True, **
if self.__ccore:
self.__ccore = ccore_library.workable()

self.__verify_arguments()


def process(self):
"""!
Expand Down Expand Up @@ -772,4 +774,24 @@ def __neighbor_indexes_distance_matrix(self, optic_object):
"""
distances = self.__sample_pointer[optic_object.index_object]
return [[index_neighbor, distances[index_neighbor]] for index_neighbor in range(len(distances))
if ((distances[index_neighbor] <= self.__eps) and (index_neighbor != optic_object.index_object))]
if ((distances[index_neighbor] <= self.__eps) and (index_neighbor != optic_object.index_object))]


def __verify_arguments(self):
"""!
@brief Verify input parameters for the algorithm and throw exception in case of incorrectness.
"""
if len(self.__sample_pointer) == 0:
raise ValueError("Input data is empty (size: '%d')." % len(self.__sample_pointer))

if self.__eps < 0:
raise ValueError("Connectivity radius (current value: '%d') should be greater or equal to 0." % self.__eps)

if self.__minpts < 0:
raise ValueError("Minimum number of neighbors (current value: '%d') should be greater than 0." %
self.__minpts)

if (self.__amount_clusters is not None) and (self.__amount_clusters <= 0):
raise ValueError("Amount of clusters (current value: '%d') should be greater than 0." %
self.__amount_clusters)
29 changes: 25 additions & 4 deletions pyclustering/cluster/rock.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,13 +87,15 @@ def __init__(self, data, eps, number_clusters, threshold=0.5, ccore=True):
self.__ccore = ccore
if self.__ccore:
self.__ccore = ccore_library.workable()


self.__verify_arguments()

self.__degree_normalization = 1.0 + 2.0 * ((1.0 - threshold) / (1.0 + threshold))

self.__adjacency_matrix = None
self.__create_adjacency_matrix()


def process(self):
"""!
@brief Performs cluster analysis in line with rules of ROCK algorithm.
Expand Down Expand Up @@ -229,3 +231,22 @@ def __calculate_goodness(self, cluster1, cluster2):
devider = (len(cluster1) + len(cluster2)) ** self.__degree_normalization - len(cluster1) ** self.__degree_normalization - len(cluster2) ** self.__degree_normalization

return number_links / devider


def __verify_arguments(self):
"""!
@brief Verify input parameters for the algorithm and throw exception in case of incorrectness.
"""
if len(self.__pointer_data) == 0:
raise ValueError("Input data is empty (size: '%d')." % len(self.__pointer_data))

if self.__eps < 0:
raise ValueError("Connectivity radius (current value: '%d') should be greater or equal to 0." % self.__eps)

if self.__threshold < 0 or self.__threshold > 1:
raise ValueError("Threshold (current value: '%d') should be in range (0, 1)." % self.__threshold)

if (self.__number_clusters is not None) and (self.__number_clusters <= 0):
raise ValueError("Amount of clusters (current value: '%d') should be greater than 0." %
self.__number_clusters)
14 changes: 14 additions & 0 deletions pyclustering/cluster/silhouette.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ def __init__(self, data, clusters, **kwargs):
if self.__ccore is False:
self.__data = numpy.array(data)

self.__verify_arguments()


def process(self):
"""!
Expand Down Expand Up @@ -273,6 +275,18 @@ def __calculate_dataset_difference(self, index_point):
return dataset_differences


def __verify_arguments(self):
"""!
@brief Verify input parameters for the algorithm and throw exception in case of incorrectness.
"""
if len(self.__data) == 0:
raise ValueError("Input data is empty (size: '%d')." % len(self.__data))

if len(self.__clusters) == 0:
raise ValueError("Input clusters are empty (size: '%d')." % len(self.__clusters))



class silhouette_ksearch_type(IntEnum):
"""!
Expand Down
19 changes: 19 additions & 0 deletions pyclustering/cluster/somsc.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ def __init__(self, data, amount_clusters, epouch=100, ccore=True):

self.__network = None

self.__verify_arguments()


def process(self):
"""!
Expand Down Expand Up @@ -121,3 +123,20 @@ def get_cluster_encoding(self):
"""

return type_encoding.CLUSTER_INDEX_LIST_SEPARATION


def __verify_arguments(self):
"""!
@brief Verify input parameters for the algorithm and throw exception in case of incorrectness.
"""
if len(self.__data_pointer) == 0:
raise ValueError("Input data is empty (size: '%d')." % len(self.__data_pointer))

if self.__amount_clusters <= 0:
raise ValueError("Amount of clusters (current value: '%d') should be greater than 0." %
self.__amount_clusters)

if self.__epouch < 0:
raise ValueError("Amount of epouch (current value: '%d') should be greater or equal to 0." %
self.__epouch)
16 changes: 14 additions & 2 deletions pyclustering/cluster/syncnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,8 @@ class syncnet(sync_network):
"""

def __init__(self, sample, radius, conn_repr = conn_represent.MATRIX, initial_phases = initial_type.RANDOM_GAUSSIAN, enable_conn_weight = False, ccore = True):
def __init__(self, sample, radius, conn_repr=conn_represent.MATRIX, initial_phases=initial_type.RANDOM_GAUSSIAN,
enable_conn_weight=False, ccore=True):
"""!
@brief Contructor of the oscillatory network SYNC for cluster analysis.
Expand All @@ -215,7 +216,9 @@ def __init__(self, sample, radius, conn_repr = conn_represent.MATRIX, initial_ph
self._ccore_network_pointer = None
self._osc_loc = sample
self._num_osc = len(sample)


self._verify_arguments()

if (ccore is True) and ccore_library.workable():
self._ccore_network_pointer = syncnet_create_network(sample, radius, initial_phases, enable_conn_weight)

Expand Down Expand Up @@ -244,6 +247,15 @@ def __del__(self):
self._ccore_network_pointer = None


def _verify_arguments(self):
"""!
@brief Verify input parameters for the algorithm and throw exception in case of incorrectness.
"""
if self._num_osc <= 0:
raise ValueError("Input data is empty (size: '%d')." % self._num_osc)


def _create_connections(self, radius):
"""!
@brief Create connections between oscillators in line with input radius of connectivity.
Expand Down
4 changes: 0 additions & 4 deletions pyclustering/cluster/tests/unit/ut_center_initializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,3 @@ def templateKmeansPlusPlusVariousCentersSimple02(self):
def templateKmeansPlusPlusVariousCentersSimple03(self):
self.templateKmeansPlusPlusSeveralRuns(SIMPLE_SAMPLES.SAMPLE_SIMPLE2, 4, 1)
self.templateKmeansPlusPlusSeveralRuns(SIMPLE_SAMPLES.SAMPLE_SIMPLE2, 8, 1)


if __name__ == "__main__":
unittest.main()
13 changes: 11 additions & 2 deletions pyclustering/cluster/tests/unit/ut_optics.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,5 +170,14 @@ def testImpossibleClusterOrderingAllocationGeterogeneous(self):
assert 0 == len(borders)


if __name__ == "__main__":
unittest.main()
def test_incorrect_data(self):
self.assertRaises(ValueError, optics, [], 0.1, 1)

def test_incorrect_eps(self):
self.assertRaises(ValueError, optics, [[0], [1], [2]], -1.0, 1)

def test_incorrect_minpts(self):
self.assertRaises(ValueError, optics, [[0], [1], [2]], 0.5, -1)

def test_incorrect_amount_clusters(self):
self.assertRaises(ValueError, optics, [[0], [1], [2]], 0.5, 1, amount_clusters=-1)
49 changes: 30 additions & 19 deletions pyclustering/cluster/tests/unit/ut_rock.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,51 +24,62 @@
"""


import unittest;
import unittest

# Generate images without having a window appear.
import matplotlib;
matplotlib.use('Agg');
import matplotlib
matplotlib.use('Agg')

from pyclustering.cluster.tests.rock_templates import RockTestTemplates;
from pyclustering.cluster.rock import rock
from pyclustering.cluster.tests.rock_templates import RockTestTemplates

from pyclustering.samples.definitions import SIMPLE_SAMPLES;
from pyclustering.samples.definitions import SIMPLE_SAMPLES


class RockUnitTest(unittest.TestCase):
def testClusterAllocationSampleSimple1(self):
RockTestTemplates.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 1, 2, 0.5, [5, 5], False);
RockTestTemplates.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 5, 1, 0.5, [10], False);
RockTestTemplates.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 1, 2, 0.5, [5, 5], False)
RockTestTemplates.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE1, 5, 1, 0.5, [10], False)

def testClusterAllocationSampleSimple2(self):
RockTestTemplates.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE2, 1, 3, 0.5, [10, 5, 8], False);
RockTestTemplates.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE2, 5, 1, 0.5, [23], False);
RockTestTemplates.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE2, 1, 3, 0.5, [10, 5, 8], False)
RockTestTemplates.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE2, 5, 1, 0.5, [23], False)

def testClusterAllocationSampleSimple3(self):
RockTestTemplates.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE3, 1, 4, 0.5, [10, 10, 10, 30], False);
RockTestTemplates.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE3, 1, 4, 0.5, [10, 10, 10, 30], False)

def testClusterAllocationSampleSimple3WrongRadius(self):
RockTestTemplates.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE3, 1.7, 4, 0.5, [10, 10, 10, 30], False);
RockTestTemplates.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE3, 1.7, 4, 0.5, [10, 10, 10, 30], False)

def testClusterAllocationSampleSimple4(self):
RockTestTemplates.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE4, 1, 5, 0.5, [15, 15, 15, 15, 15], False);
RockTestTemplates.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE4, 1, 5, 0.5, [15, 15, 15, 15, 15], False)

def testClusterAllocationSampleSimple4WrongRadius(self):
RockTestTemplates.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE4, 1.5, 5, 0.5, [15, 15, 15, 15, 15], False);
RockTestTemplates.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE4, 1.5, 5, 0.5, [15, 15, 15, 15, 15], False)

def testClusterAllocationSampleSimple5(self):
RockTestTemplates.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE5, 1, 4, 0.5, [15, 15, 15, 15], False);
RockTestTemplates.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE5, 1, 4, 0.5, [15, 15, 15, 15], False)

def testClusterTheSameData1(self):
RockTestTemplates.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE9, 1, 2, 0.5, [10, 20], False);
RockTestTemplates.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE9, 1, 2, 0.5, [10, 20], False)

def testClusterTheSameData2(self):
RockTestTemplates.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE12, 1, 2, 0.5, [5, 5, 5], False);
RockTestTemplates.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE12, 1, 2, 0.5, [5, 5, 5], False)


def testClusterAllocationIncorrectNumberClusters(self):
RockTestTemplates.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE4, 1, 4, 0.5, [15, 15, 15, 15, 15], False);
RockTestTemplates.templateLengthProcessData(SIMPLE_SAMPLES.SAMPLE_SIMPLE4, 1, 4, 0.5, [15, 15, 15, 15, 15], False)


if __name__ == "__main__":
unittest.main();
def test_incorrect_data(self):
self.assertRaises(ValueError, rock, [], 0.1, 2)

def test_incorrect_eps(self):
self.assertRaises(ValueError, rock, [[0], [1], [2]], -1.0, 2)

def test_incorrect_minpts(self):
self.assertRaises(ValueError, rock, [[0], [1], [2]], 0.5, 0)

def test_incorrect_amount_clusters(self):
self.assertRaises(ValueError, rock, [[0], [1], [2]], 0.5, 1, -0.1)
self.assertRaises(ValueError, rock, [[0], [1], [2]], 0.5, 1, 1.1)
9 changes: 6 additions & 3 deletions pyclustering/cluster/tests/unit/ut_silhouette.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
import matplotlib
matplotlib.use('Agg')

from pyclustering.cluster.silhouette import silhouette_ksearch_type
from pyclustering.cluster.silhouette import silhouette, silhouette_ksearch_type
from pyclustering.cluster.tests.silhouette_templates import silhouette_test_template

from pyclustering.samples.definitions import SIMPLE_SAMPLES, SIMPLE_ANSWERS
Expand Down Expand Up @@ -163,5 +163,8 @@ def test_distance_matrix_sample07(self):
SIMPLE_ANSWERS.ANSWER_SIMPLE7, False)


if __name__ == "__main__":
unittest.main()
def test_incorrect_data(self):
self.assertRaises(ValueError, silhouette, [], [[1, 2], [3, 4]])

def test_incorrect_clusters(self):
self.assertRaises(ValueError, silhouette, [[1], [2], [3], [4]], [])
11 changes: 9 additions & 2 deletions pyclustering/cluster/tests/unit/ut_somsc.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import matplotlib
matplotlib.use('Agg')

from pyclustering.cluster.somsc import somsc
from pyclustering.cluster.tests.somsc_templates import SyncnetTestTemplates

from pyclustering.samples.definitions import SIMPLE_SAMPLES
Expand Down Expand Up @@ -88,5 +89,11 @@ def testClusterAllocationOneDimensionData(self):
SyncnetTestTemplates.templateClusterAllocationOneDimensionData(False)


if __name__ == "__main__":
unittest.main()
def test_incorrect_data(self):
self.assertRaises(ValueError, somsc, [], 1, 1)

def test_incorrect_epouch(self):
self.assertRaises(ValueError, somsc, [[0], [1], [2]], 1, -1)

def test_incorrect_amount_clusters(self):
self.assertRaises(ValueError, somsc, [[0], [1], [2]], 0, 1)
Loading

0 comments on commit be809d7

Please sign in to comment.