-
Notifications
You must be signed in to change notification settings - Fork 1
/
knnClassifier.py
120 lines (97 loc) · 5.05 KB
/
knnClassifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# Importing MNIST data set using sklearn
# pyplot and patches are used for plotting graphs
from sklearn.datasets import fetch_mldata
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
# The data that is imported is copied into the below variable
custom_data_home = "./"
mnist = fetch_mldata('MNIST original', data_home=custom_data_home)
# Variables used to define the number of traning and testing examples from the sample
traning_examples = 6000
testing_examples = 1000
total_examples = 60000
class_labels = 10
# Uses array from numpy for the mnist dat to convert to 32bit integer
mnist_data = np.array(mnist.data.shape, dtype=np.int32)
mnist_data = np.copy(mnist.data.astype(np.int32))
def knn_classifier():
k = [1, 9, 19, 29, 39, 49, 59, 69, 79, 89, 99]
# the imported data has labes that are orders
# shuffle is used to shuffle the input image data labels
shuffle = np.arange(total_examples)
np.random.shuffle(shuffle)
# Used to intilize zeros in the traning data variable and take first 6000 samples
traning_data = np.zeros((traning_examples, mnist.data.shape[1]), dtype=np.int32)
traning_data[:] = mnist_data[shuffle[:traning_examples]]
traning_target = mnist.target[shuffle[:traning_examples]]
# Used to intilize zeros in the testing data variables and the last 1000 samples
testing_data = np.zeros((testing_examples, mnist.data.shape[1]), dtype=np.int32)
testing_data[:] = mnist_data[shuffle[(total_examples - testing_examples): total_examples]]
testing_target = mnist.target[shuffle[(total_examples - testing_examples): total_examples]]
# Used to initilize zeros in the distance variable and convert it into 32 bit integer data type
euclidean_distance = np.zeros((traning_examples, traning_examples), dtype=np.int32)
euclidean_distance_copy = np.zeros((testing_examples, traning_examples), dtype=np.int32)
# temporary variables to store ecudilian distance
temporary = np.empty_like(euclidean_distance)
temporary_copy = np.empty_like(euclidean_distance_copy)
# Clasification varaible with traning and testing examples and their corrosponding labels
classification = np.zeros((traning_examples, class_labels))
classification_copy = np.zeros((testing_examples, class_labels))
# Calcualtes ecudilian distance for traning data
for i in range(0, traning_examples):
euclidean_distance[i:i + 1, :] = \
np.sqrt(np.sum(np.square(traning_data[:traning_examples, :] - traning_data[i, :]), axis=1))
# Calculates eculidian data for testing data
for i in range(0, testing_examples):
euclidean_distance_copy[i:i + 1, :] = \
np.sqrt(np.sum(np.square(traning_data[:, :] - testing_data[i, :]), axis=1))
# Sorting the calcualted distance by size
sorted_indices = np.argsort(euclidean_distance)
sorted_indices_copy = np.argsort(euclidean_distance_copy)
labels = traning_target[:][sorted_indices]
labels_copy = traning_target[:][sorted_indices_copy]
for i in range(0, traning_examples):
temporary[i, :] = euclidean_distance[i, :][sorted_indices[i]]
for i in range(0, testing_examples):
temporary_copy[i, :] = euclidean_distance_copy[i, :][sorted_indices_copy[i]]
euclidean_distance[:, :] = temporary[:, :]
euclidean_distance_copy[:, :] = temporary_copy[:, :]
# Used to delete the temporary variables created
np.delete(temporary, np.s_[:], 1)
np.delete(temporary_copy, np.s_[:], 1)
traning_error = np.zeros((1, len(k)), dtype=np.float)
testing_error = np.zeros((1, len(k)), dtype=np.float)
for loop in k:
for i in range(0, traning_examples):
for j in range(0, loop):
index = int(labels[i, j])
classification[i, index] += 1
for i in range(0, testing_examples):
for j in range(0, loop):
index = int(labels_copy[i, j])
classification_copy[i, index] += 1
temporary = np.argsort(classification)
temporary_copy = np.argsort(classification_copy)
for i in range(0, traning_examples):
if temporary[i, class_labels - 1] != traning_target[i]:
traning_error[0, k.index(loop)] += 1
for i in range(0, testing_examples):
if temporary_copy[i, class_labels - 1] != testing_target[i]:
testing_error[0, k.index(loop)] += 1
traning_error[0, k.index(loop)] /= traning_examples
testing_error[0, k.index(loop)] /= testing_examples
# plotting the graphs on same plane for errors vs value of K
plt.xlabel('Value of K')
plt.ylabel('Error')
# red represents the test error
# green represents the traning error
red_box = mpatches.Patch(color='red', label='Test Error')
green_box = mpatches.Patch(color='green', label='Traning Error')
plt.legend(handles=[red_box, green_box])
plt.plot(k, testing_error[0], color='red', marker='*')
plt.plot(k, traning_error[0], color='g', marker='*')
plt.title('knn Classifier')
plt.show()
# Calling knn Classifier() module
knn_classifier()