-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathStep1_Ship exist detection.py
175 lines (146 loc) · 5.99 KB
/
Step1_Ship exist detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
import os
import gc
print(os.listdir("../input"))
import numpy as np
import pandas as pd
import time
from keras.preprocessing.image import ImageDataGenerator
from sklearn.preprocessing import OneHotEncoder
from keras import optimizers
from sklearn.model_selection import train_test_split
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras.models import Sequential, Model
# Read Target file and ImageID
train = pd.read_csv('../input/train_ship_segmentations_v2.csv')
train.head()
# Tranfer EncodedPixels to target
#have ship ==> 1
#No ship ==> 0
train['exist_ship'] = train['EncodedPixels'].fillna(0)
train.loc[train['exist_ship']!=0,'exist_ship']=1
del train['EncodedPixels']
#Found some duplicate Image
print(len(train['ImageId']))
print(train['ImageId'].value_counts().shape[0])
train_gp = train.groupby('ImageId').sum().reset_index()
train_gp.loc[train_gp['exist_ship']>0,'exist_ship']=1
#Balance have chip and no chip data
#Remove 100000 data of no chip
print(train_gp['exist_ship'].value_counts())
train_gp= train_gp.sort_values(by='exist_ship')
train_gp = train_gp.drop(train_gp.index[0:100000])
print(train_gp['exist_ship'].value_counts())
train_sample = train_gp.sample(5000)
print(train_sample['exist_ship'].value_counts())
print (train_sample.shape)
#Load training data function
Train_path = '../input/train_v2/'
Test_path = '../input/test_v2/'
training_img_data = []
target_data = []
from PIL import Image
data = np.empty((len(train_sample['ImageId']),256, 256,3), dtype=np.uint8)
data_target = np.empty((len(train_sample['ImageId'])), dtype=np.uint8)
image_name_list = os.listdir(Train_path)
index = 0
for image_name in image_name_list:
if image_name in list(train_sample['ImageId']):
imageA = Image.open(Train_path+image_name).resize((256,256)).convert('RGB')
data[index]=imageA
data_target[index]=train_sample[train_gp['ImageId'].str.contains(image_name)]['exist_ship'].iloc[0]
index+=1
print(data.shape)
print(data_target.shape)
#Do onehot on target
targets =data_target.reshape(len(data_target),-1)
enc = OneHotEncoder()
enc.fit(targets)
targets = enc.transform(targets).toarray()
print(targets.shape)
#Split Training data to training data and validate data to detect overfit
x_train, x_val, y_train, y_val = train_test_split(data,targets, test_size = 0.2)
x_train.shape, x_val.shape, y_train.shape, y_val.shape
#Data augumentation
img_gen = ImageDataGenerator(
rescale=1./255,
zca_whitening = False,
rotation_range = 90,
width_shift_range = 0.2,
height_shift_range = 0.2,
brightness_range = [0.5, 1.5],
shear_range = 0.2,
zoom_range = 0.2,
horizontal_flip = True,
vertical_flip = True
)
#=========Load ResNet50 model with keras
#from keras.applications.vgg16 import VGG16 as PTModel, preprocess_input
#from keras.applications.densenet import DenseNet169 as PTModel, preprocess_input
from keras.applications.resnet50 import ResNet50 as ResModel
#from keras.applications.vgg16 import VGG16 as VGG16Model
img_width, img_height = 256, 256
model = ResModel(weights = 'imagenet', include_top=False, input_shape = (img_width, img_height, 3))
#Add Fully connection layer
for layer in model.layers:
layer.trainable = False
x = model.output
x = Flatten()(x)
x = Dense(1024, activation="relu")(x)
x = Dropout(0.5)(x)
x = Dense(1024, activation="relu")(x)
predictions = Dense(2, activation="softmax")(x)
# creating the final model
model_final = Model(input = model.input, output = predictions)
#Set HyperParamatrics and Start training
epochs = 10
lrate = 0.001
decay = lrate/epochs
#adam = optimizers.Adam(lr=lrate,beta_1=0.9, beta_2=0.999, decay=decay)
sgd = optimizers.SGD(lr=lrate, momentum=0.9, decay=decay, nesterov=False)
model_final.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
model_final.summary()
#Start Train
model_final.fit_generator(img_gen.flow(x_train, y_train, batch_size = 16),steps_per_epoch = len(x_train)/16, validation_data = (x_val,y_val), epochs = epochs )
model_final.save('ResNet_transfer_ship.h5')
#Predict accuracy by random read training data
train_predict_sample = train_gp.sample(2000)
print(train_predict_sample['exist_ship'].value_counts())
from PIL import Image
data_predict = np.empty((len(train_predict_sample['ImageId']),256, 256,3), dtype=np.uint8)
data_target_predict = np.empty((len(train_predict_sample['ImageId'])), dtype=np.uint8)
image_name_list = os.listdir(Train_path)
index = 0
for image_name in image_name_list:
if image_name in list(train_predict_sample['ImageId']):
imageA = Image.open(Train_path+image_name).resize((256,256)).convert('RGB')
data_predict[index]=imageA
data_target_predict[index]=train_predict_sample[train_gp['ImageId'].str.contains(image_name)]['exist_ship'].iloc[0]
index+=1
print(data_predict.shape)
print(data_target_predict.shape)
from sklearn.preprocessing import OneHotEncoder
targets_predict =data_target_predict.reshape(len(data_target_predict),-1)
enc = OneHotEncoder()
enc.fit(targets_predict)
targets_predict = enc.transform(targets_predict).toarray()
print(targets_predict.shape)
predict_ship = model_final.evaluate(data_predict,targets_predict)
print ('Accuracy of random data = '+ str(round(predict_ship[1]*100)) + "%")
#Generate predict result on test data
image_test_name_list = os.listdir(Test_path)
data_test = np.empty((len(image_test_name_list),256, 256,3), dtype=np.uint8)
test_name = []
index = 0
for image_name in image_test_name_list:
imageA = Image.open(Test_path+image_name).resize((256,256)).convert('RGB')
test_name.append(image_name)
data_test[index]=imageA
index+=1
print (data_test.shape)
result = model_final.predict(data_test)
result_list={
"ImageId": test_name,
"Have_ship":np.argmax(result,axis=1)
}
result_pd = pd.DataFrame(result_list)
result_pd.to_csv('Have_ship_or_not.csv',index = False)