-
Notifications
You must be signed in to change notification settings - Fork 0
/
AI_Challenge.py
99 lines (73 loc) · 2.74 KB
/
AI_Challenge.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# -*- coding: utf-8 -*-
"""KaggleCompetition.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1SK0N8Mt6222Ic5L9vqQbAR7v6NKR8yOJ
"""
#import libraries
import tensorflow as tf
import numpy as np
import matplotlib as plt
import pandas as pd
import time
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
#optimizer and learning rate lib
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.optimizers import RMSprop, Adam
from tensorflow.keras.callbacks import TensorBoard
NAME = "Number-model- {}".format(int(time.time()))
tensorboard = TensorBoard(log_dir='logs/{}'.format(NAME))
from google.colab import files
uploaded = files.upload()
from google.colab import files
uploaded = files.upload()
#Create a dataframe for the train set
df_Train = pd.read_csv('train.csv')
df_Test = pd.read_csv('test.csv')
#Create a dataframe for the test set
X_Train = df_Train.drop(labels = ["label"], axis = 1)
Y_Train = df_Train["label"]
Y_Test = df_Test
#Normalize the data in range of [0,1]
X_Train = X_Train / 255.0
Y_Test = Y_Test / 255.0
X_Train.head()
#reshape the input data:
X_Train = X_Train.values.reshape(-1,28,28,1)
Y_Test = Y_Test.values.reshape(-1,28,28,1)
#convert output to a categorical data
from keras.utils.np_utils import to_categorical
Y_Train = to_categorical(Y_Train, num_classes = 10)
#split the data into training and validation data set
from sklearn.model_selection import train_test_split
X_Train, X_val, Y_Train, Y_val = train_test_split(X_Train, Y_Train, test_size = 0.1)
#Conv machine learning model that has two hidden layers
model = Sequential()
model.add(Conv2D(64, (3,3), input_shape = X_Train.shape[1:] ) )
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size = (2,2) ) )
model.add(Conv2D(64, (3,3) ) )
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size = (2,2) ) )
model.add(Flatten())
model.add(Dense(64))
model.add(Dense(10))
model.add(Activation('softmax'))
#add optimizer and loss
model.compile(
optimizer = 'Adam', loss = 'categorical_crossentropy', metrics = ["accuracy"]
)
learning_rate = ReduceLROnPlateau(monitor = 'val_accuracy', patience = 2, verbose = 0, factor = 0.5, min_lr = 0.0001 )
#train the model
epochs = 3
batch_size = 120
model.fit(X_Train, Y_Train, epochs = epochs, batch_size = batch_size, validation_data = (X_val,Y_val), callbacks = [learning_rate])
#output prediction as a .csv file
with open('prediction.csv', 'w') as f:
image_id = 0
for prediction in predictions:
image_id += 1
best_choice = np.argmax(prediction)
row_string = str(image_id) + ',' + str(best_choice) + '\n'
f.write(row_string)