-
Notifications
You must be signed in to change notification settings - Fork 1
/
kmeans.py
97 lines (89 loc) · 2.53 KB
/
kmeans.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import os
from zipfile import ZipFile
from glob import glob
import numpy as np
import cv2
from PIL import Image
import pyAesCrypt
from skvideo.io import FFmpegWriter
import tqdm
import io
import math
import warnings
from multiprocessing.dummy import Pool as ThreadPool
from multiprocessing import cpu_count
#import imageio.v3 as iio
from imageio.v3 import imread
class K_means:
def __init__(self, k=3, size=False, resample=32):
self.k = k
self.cluster = []
self.data = []
self.end = []
self.i = 0
self.size = size
self.resample = resample
def manhattan_distance(self,x1,x2):
s = 0.0
for i in range(len(x1)):
s += abs( float(x1[i]) - float(x2[i]) )
return s
def euclidian_distance(self,x1,x2):
s = 0.0
for i in range(len(x1)):
s += math.sqrt((float(x1[i]) - float(x2[i])) ** 2)
return s
def read_image(self,im):
if self.i >= self.k :
self.i = 0
try:
img = Image.open(im)
osize = img.size
img.thumbnail((self.resample,self.resample))
v = [float(p)/float(img.size[0]*img.size[1])*100 for p in np.histogram(np.asarray(img))[0]]
if self.size :
v += [osize[0], osize[1]]
i = self.i
self.i += 1
return [i, v, im]
except Exception as e:
print("Error reading ",im,e)
return [None, None, None]
def generate_k_means(self):
final_mean = []
for c in range(self.k):
partial_mean = []
for i in range(len(self.data[0])):
s = 0.0
t = 0
for j in range(len(self.data)):
if self.cluster[j] == c :
s += self.data[j][i]
t += 1
if t != 0 :
partial_mean.append(float(s)/float(t))
else:
partial_mean.append(float('inf'))
final_mean.append(partial_mean)
return final_mean
def generate_k_clusters(self,folder):
pool = ThreadPool(cpu_count())
result = pool.map(self.read_image, folder)
pool.close()
pool.join()
self.cluster = [r[0] for r in result if r[0] != None]
self.data = [r[1] for r in result if r[1] != None]
self.end = [r[2] for r in result if r[2] != None]
def rearrange_clusters(self):
isover = False
while(not isover):
isover = True
m = self.generate_k_means()
for x in range(len(self.cluster)):
dist = []
for a in range(self.k):
dist.append( self.manhattan_distance(self.data[x],m[a]) )
_mindist = dist.index(min(dist))
if self.cluster[x] != _mindist :
self.cluster[x] = _mindist
isover = False