-
Notifications
You must be signed in to change notification settings - Fork 0
/
DigitRecognizer.py
117 lines (91 loc) · 3.38 KB
/
DigitRecognizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torch.utils.data.sampler import SubsetRandomSampler
import numpy as np
#The Model it is trained from refer to (model.py)
class Net(nn.Module):
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(784, 512) ##fc1 means first fully connected layer
self.fc2 = nn.Linear(512, 512)
#self.fc3 = nn.Linear(64, 64)
self.fc4 = nn.Linear(512, 10)
#how the data will flow through the network
def forward(self, x):
x = x.view(-1, 28*28)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
#x = F.relu(self.fc3(x))
x = (self.fc4(x))
return F.log_softmax(x, dim=1)
# Load the trained model
PATH = "TrainedModel.pt"
model = torch.load(PATH)
model.eval()
# Read the input image
im = cv2.imread("Images/IMG_3739-1.jpg")
cv2.imshow("img", im)
# Convert to grayscale and apply Gaussian Blur
im_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
im_gray = cv2.GaussianBlur(im_gray, (5, 5), 0)
# Threshold the image
ret, im_th = cv2.threshold(im_gray, 90, 255, cv2.THRESH_BINARY_INV)
# Find contours in the image
ctrs, hier = cv2.findContours(im_th.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Get rectangles contains each contour
rects = [cv2.boundingRect(ctr) for ctr in ctrs]
#Create an empty array for the predictions
nums = []
# For each rectangular region, calculate HOG features and predict the digit using Linear SVM.
for rect in rects:
# To draw the rectangles
cv2.rectangle(im, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (0, 255, 0), 3)
# Make each rectangular region around the digit
region = int(rect[3] * 1.6)
pt1 = int(rect[1] + rect[3] // 2 - region // 2)
pt2 = int(rect[0] + rect[2] // 2 - region // 2)
roi = im_th[pt1:pt1+region, pt2:pt2+region]
# Resizing the image to feed into the neural network
roi = cv2.resize(roi, (28, 28), interpolation=cv2.INTER_AREA)
roi = cv2.dilate(roi, (3, 3))
#prediction for each rectangle (on iteration)
prediction = (torch.argmax(model(torch.from_numpy(roi).float()))).item()
cv2.putText(im, str(prediction), (rect[0], rect[1]), cv2.FONT_HERSHEY_DUPLEX, 2, (0, 255, 255), 3)
nums.append(prediction)
#Create empty arrays for each of the vectors
v1 = []
v2 = []
#append each of the predictions to the end of the rectangle tuple
pog = [*zip(*zip(*rects), nums)]
#Sorting the array with all the positions of the rectangles to figure out which rectangle is which number
pog = (sorted(pog, key=lambda x: x[0], reverse=False))
for i in range(3):
v1.append(pog[i])
for i in range(3,6):
v2.append((pog[i]))
v1 = (sorted(v1, key=lambda x: x[1], reverse=False))
v2 = (sorted(v2, key=lambda x: x[1], reverse=False))
vec1 = []
vec2 = []
#format in vector form
for i in range(len(v1)):
vec1.append(v1[i][4])
vec2.append(v2[i][4])
print("The first vector is: ", vec1)
print("The second vector is: ", vec2)
#calculating dot product
dot = 0
for i in range(len(v1)):
dot += (vec1[i] * vec2[i])
#calculating cross product
cross = 0
cross = [vec1[1]*vec2[2] - vec1[2]*vec2[1], vec1[2]*vec2[0] - vec1[0]*vec2[2], vec1[0]*vec2[1] - vec1[1]*vec2[0]]
# Display image with output text
print("The cross product of the vectors is: ", cross)
print("The dot product is: ", dot)
cv2.imshow("Resulting Image with Rectangular ROIs", im)
cv2.waitKey(0)
cv2.destroyAllWindows()