Skip to content

Commit

Permalink
Merge pull request #58 from BipanjitGill/main
Browse files Browse the repository at this point in the history
Create kMeans.py
  • Loading branch information
CodingWarrior33 authored Oct 29, 2023
2 parents 6f75f4e + 59c6ed1 commit 1301de4
Showing 1 changed file with 65 additions and 0 deletions.
65 changes: 65 additions & 0 deletions ML_Algorithms/K-Means/kMeans.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Code for kMeans algorithm which is an Unsupervised Learning algorithm, which groups the unlabeled dataset into different clusters

import numpy as np

class K_Means:
def __init__(self, k=2, max_iter=100, tol=0.001):
self.k = k
# k= number of clusters
self.max_iter = max_iter
# max_iter= maximum number of times to iterate through the dataset
self.tol = tol
# tol= maximum tolerance level in the output


def fit(self,data):
# Initializing the centroid for each cluster
self.centroids = {}

# Taking random points as centroid
for i in range(self.k):
self.centroids[i] = data[i]

for i in range(self.max_iter):
self.classifications = {}

for i in range(self.k):
self.classifications[i] = []

for featureset in data:

# Calculating distance from each centroid
distances = [np.linalg.norm(featureset-self.centroids[centroid]) for centroid in self.centroids]

#Finding the centroid from which distance is minimum
classification = distances.index(min(distances))
self.classifications[classification].append(featureset)

prev_centroids = dict(self.centroids)

# Finding the new centroid on the basis of new classified points
for classification in self.classifications:
self.centroids[classification] = np.average(self.classifications[classification],axis=0)

optimized = True

#Checking if the centroid each cluster converge
for c in self.centroids:
original_centroid = prev_centroids[c]
current_centroid = self.centroids[c]

# Comparing the previous and new centroids
if np.sum((current_centroid-original_centroid)/original_centroid*100.0) > self.tol:
# print(np.sum((current_centroid-original_centroid)/original_centroid*100.0))
optimized = False

# If the centroids converge then it is done else again iterate through the whole dataset and find the new centroids
if optimized:
break

def predict(self,data):
# Calculating the distance of test data from the centroid of each cluster
distances = [np.linalg.norm(data-self.centroids[centroid]) for centroid in self.centroids]
# Classify the data with the cluster from which distance is minimum
classification = distances.index(min(distances))
return classification

0 comments on commit 1301de4

Please sign in to comment.