Skip to content

Commit

Permalink
Fixed k-means widget to new clustering
Browse files Browse the repository at this point in the history
  • Loading branch information
PrimozGodec committed Jun 6, 2019
1 parent e4636ae commit 178a862
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 10 deletions.
2 changes: 1 addition & 1 deletion Orange/evaluation/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def __init__(self, data, learners, k=1,
fold_models.append(model)

labels = model(data)
self.predicted[i, k, :] = labels.X.flatten()
self.predicted[i, k, :] = labels.flatten()



Expand Down
33 changes: 24 additions & 9 deletions Orange/widgets/unsupervised/owkmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@
pyqtSlot as Slot
from AnyQt.QtGui import QIntValidator
from AnyQt.QtWidgets import QGridLayout, QTableView
from sklearn.metrics import silhouette_samples, silhouette_score

import Orange.preprocess
from Orange.clustering import KMeans
from Orange.clustering.kmeans import KMeansModel, SILHOUETTE_MAX_SAMPLES
from Orange.clustering.kmeans import KMeansModel
from Orange.data import Table, Domain, DiscreteVariable, ContinuousVariable
from Orange.data.util import get_unique_names, array_equal
from Orange.preprocess.impute import ReplaceUnknowns
Expand All @@ -23,6 +25,10 @@


RANDOM_STATE = 0
SILHOUETTE_MAX_SAMPLES = 5000
# those two preprocessors are required for clustering and silhouette
preprocessors = [Orange.preprocess.Continuize(),
Orange.preprocess.SklImpute()]


class ClusterTableModel(QAbstractTableModel):
Expand Down Expand Up @@ -275,7 +281,7 @@ def _compute_clustering(data, k, init, n_init, max_iter, silhouette, random_stat

return KMeans(
n_clusters=k, init=init, n_init=n_init, max_iter=max_iter,
compute_silhouette_score=silhouette, random_state=random_state,
random_state=random_state
)(data)

@Slot(int, int)
Expand Down Expand Up @@ -432,10 +438,9 @@ def invalidate(self):
self.commit()

def update_results(self):
scores = [
mk if isinstance(mk, str) else mk.silhouette for mk in (
self.clusterings[k] for k in range(self.k_from, self.k_to + 1))
]
scores = [mk if isinstance(mk, str) else silhouette_score(
self.data.X, mk(self.data)) for mk in (
self.clusterings[k] for k in range(self.k_from, self.k_to + 1))]
best_row = max(
range(len(scores)), default=0,
key=lambda x: 0 if isinstance(scores[x], str) else scores[x]
Expand All @@ -454,6 +459,16 @@ def selected_row(self):
def select_row(self):
self.send_data()

def preproces(self, data):
for preprocessor in preprocessors:
data = preprocessor(data)
return data

def samples_scores(self, clust_ids):
d = self.preproces(self.data)
return np.arctan(
silhouette_samples(d.X, clust_ids)) / np.pi + 0.5

def send_data(self):
if self.optimize_k:
row = self.selected_row()
Expand All @@ -473,12 +488,12 @@ def send_data(self):
values=["C%d" % (x + 1) for x in range(km.k)]
)
clust_ids = km(self.data)
clust_col = clust_ids.X.ravel()
clust_col = clust_ids
silhouette_var = ContinuousVariable(
get_unique_names(domain, "Silhouette"))
if km.silhouette_samples is not None:
if len(self.data) <= SILHOUETTE_MAX_SAMPLES :
self.Warning.no_silhouettes.clear()
scores = np.arctan(km.silhouette_samples) / np.pi + 0.5
scores = self.samples_scores(clust_ids)
clust_scores = []
for i in range(km.k):
in_clust = clust_col == i
Expand Down

0 comments on commit 178a862

Please sign in to comment.