-
Notifications
You must be signed in to change notification settings - Fork 0
/
Kmeans.cpp
97 lines (76 loc) · 2.02 KB
/
Kmeans.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#include <math.h>
#include "KMeans.h"
KMeans::KMeans(int dimension_data, int number_clusters){
this->dimension_data = dimension_data;
this->number_clusters = number_clusters;
centroid = new double*[number_clusters];
for (int i = 0; i < number_clusters; i++){
centroid[i] = new double[dimension_data];
}
}
KMeans::~KMeans(){
for (int i = 0; i < number_clusters; i++){
delete[] centroid[i];
}
delete[] centroid;
}
void KMeans::Initialize(int number_data, double **data){
for (int i = 0; i < number_clusters; i++){
int number_sample = number_data / number_clusters;
for (int j = 0; j < dimension_data; j++){
double sum = 0;
for (int k = i * number_sample; k < (i + 1) * number_sample; k++){
sum += data[k][j];
}
centroid[i][j] = sum / number_sample;
}
}
}
int KMeans::Classify(double data[]){
int argmin;
double min = -1;
for (int j = 0; j < number_clusters; j++){
double distance = 0;
for (int k = 0; k < dimension_data; k++){
distance += (data[k] - centroid[j][k]) * (data[k] - centroid[j][k]);
}
distance = sqrt(distance);
if (min == -1 || min > distance){
argmin = j;
min = distance;
}
}
return argmin;
}
double KMeans::Cluster(int number_data, double **data){
double movements_centroids = 0;
int *label = new int[number_data];
double *mean = new double[dimension_data];
for (int i = 0; i < number_data; i++){
label[i] = Classify(data[i]);
}
for (int j = 0; j < number_clusters; j++){
int number_sample = 0;
double movements = 0;
for (int k = 0; k < dimension_data; k++){
mean[k] = 0;
}
for (int i = 0; i < number_data; i++){
if (label[i] == j){
for (int k = 0; k < dimension_data; k++){
mean[k] += data[i][k];
}
number_sample++;
}
}
for (int k = 0; k < dimension_data; k++){
if (number_sample) mean[k] /= number_sample;
movements += (centroid[j][k] - mean[k]) * (centroid[j][k] - mean[k]);
centroid[j][k] = mean[k];
}
movements_centroids += sqrt(movements);
}
delete[] label;
delete[] mean;
return movements_centroids;
}