Skip to content

Commit

Permalink
[FIX] kmeans, OWKMeans: don't compute silhouette score if not requested
Browse files Browse the repository at this point in the history
  • Loading branch information
kernc committed Jul 1, 2016
1 parent ad6d0fc commit 074ceca
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 9 deletions.
1 change: 1 addition & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ exclude_lines =
pragma: no cover
raise NotImplementedError
if __name__ == .__main__.:
except MemoryError
14 changes: 9 additions & 5 deletions Orange/clustering/kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,21 @@ class KMeans(SklProjector):
__wraps__ = skl_cluster.KMeans

def __init__(self, n_clusters=8, init='k-means++', n_init=10, max_iter=300,
tol=0.0001, random_state=None, preprocessors=None):
tol=0.0001, random_state=None, preprocessors=None,
compute_silhouette_score=False):
super().__init__(preprocessors=preprocessors)
self.params = vars()
self._compute_silhouette = compute_silhouette_score

def fit(self, X, Y=None):
proj = skl_cluster.KMeans(**self.params)
proj = proj.fit(X, Y)
if 2 <= proj.n_clusters < X.shape[0]:
proj.silhouette = silhouette_score(X, proj.labels_)
else:
proj.silhouette = 0
proj.silhouette = np.nan
try:
if self._compute_silhouette and 2 <= proj.n_clusters < X.shape[0]:
proj.silhouette = silhouette_score(X, proj.labels_)
except MemoryError: # Pairwise dist in silhouette fails for large data
pass
proj.inertia = proj.inertia_ / X.shape[0]
cluster_dist = Euclidean(proj.cluster_centers_)
proj.inter_cluster = np.mean(cluster_dist[np.triu_indices_from(cluster_dist, 1)])
Expand Down
3 changes: 2 additions & 1 deletion Orange/tests/test_clustering_kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ def test_kmeans_parameters(self):
max_iter=10,
random_state=42,
tol=0.001,
init='random')
init='random',
compute_silhouette_score=True)
c = kmeans(self.iris)

def test_predict_single_instance(self):
Expand Down
8 changes: 5 additions & 3 deletions Orange/widgets/unsupervised/owkmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,8 @@ def run_optimization(self):
k_to = min(self.k_to, len(self.data))
kmeans = KMeans(
init=['random', 'k-means++'][self.smart_init],
n_init=self.n_init, max_iter=self.max_iterations)
n_init=self.n_init, max_iter=self.max_iterations,
compute_silhouette_score=self.scoring == self.SILHOUETTE)
with self.progressBar(k_to - self.k_from + 1) as progress:
for k in range(self.k_from, k_to + 1):
progress.advance()
Expand Down Expand Up @@ -264,7 +265,7 @@ def show_results(self):
best_run = scores.index(best_score)
score_span = (best_score - worst_score) or 1
max_score = max(scores)
nplaces = min(5, int(abs(math.log(max(max_score, 1e-10)))) + 2)
nplaces = min(5, np.floor(abs(math.log(max(max_score, 1e-10)))) + 2)
fmt = "{{:.{}f}}".format(nplaces)
model = self.table_model
model.setRowCount(len(k_scores))
Expand All @@ -278,7 +279,8 @@ def show_results(self):
item = model.item(i, 1)
if item is None:
item = QStandardItem()
item.setData(fmt.format(score), Qt.DisplayRole)
item.setData(fmt.format(score) if not np.isnan(score) else 'out-of-memory error',
Qt.DisplayRole)
bar_ratio = 0.95 * (score - worst_score) / score_span
item.setData(bar_ratio, gui.TableBarItem.BarRole)
model.setItem(i, 1, item)
Expand Down

0 comments on commit 074ceca

Please sign in to comment.