-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdlnd_image_classification.py
755 lines (595 loc) · 27.5 KB
/
dlnd_image_classification.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
# coding: utf-8
# # 图像分类
#
# 在此项目中,你将对 [CIFAR-10 数据集](https://www.cs.toronto.edu/~kriz/cifar.html) 中的图片进行分类。该数据集包含飞机、猫狗和其他物体。你需要预处理这些图片,然后用所有样本训练一个卷积神经网络。图片需要**标准化(normalized)**,标签需要采用 **one-hot 编码**。你需要应用所学的知识构建**卷积的、最大池化(max pooling)**、**丢弃(dropout)**和**完全连接(fully connected)**的层。最后,你需要在样本图片上看到神经网络的预测结果。
#
#
# ## 获取数据
#
# 请运行以下单元,以下载 [CIFAR-10 数据集(Python版)](https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz)。
#
# In[1]:
"""
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
"""
from urllib.request import urlretrieve
from os.path import isfile, isdir
from tqdm import tqdm
import problem_unittests as tests
import tarfile
cifar10_dataset_folder_path = 'cifar-10-batches-py'
# Use Floyd's cifar-10 dataset if present
floyd_cifar10_location = '/input/cifar-10/python.tar.gz'
if isfile(floyd_cifar10_location):
tar_gz_path = floyd_cifar10_location
else:
tar_gz_path = 'cifar-10-python.tar.gz'
class DLProgress(tqdm):
last_block = 0
def hook(self, block_num=1, block_size=1, total_size=None):
self.total = total_size
self.update((block_num - self.last_block) * block_size)
self.last_block = block_num
if not isfile(tar_gz_path):
with DLProgress(unit='B', unit_scale=True, miniters=1, desc='CIFAR-10 Dataset') as pbar:
urlretrieve(
'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz',
tar_gz_path,
pbar.hook)
if not isdir(cifar10_dataset_folder_path):
with tarfile.open(tar_gz_path) as tar:
tar.extractall()
tar.close()
tests.test_folder_path(cifar10_dataset_folder_path)
# ## 探索数据
#
# 该数据集分成了几部分/批次(batches),以免你的机器在计算时内存不足。CIFAR-10 数据集包含 5 个部分,名称分别为 `data_batch_1`、`data_batch_2`,以此类推。每个部分都包含以下某个类别的标签和图片:
#
# * 飞机
# * 汽车
# * 鸟类
# * 猫
# * 鹿
# * 狗
# * 青蛙
# * 马
# * 船只
# * 卡车
#
# 了解数据集也是对数据进行预测的必经步骤。你可以通过更改 `batch_id` 和 `sample_id` 探索下面的代码单元。`batch_id` 是数据集一个部分的 ID(1 到 5)。`sample_id` 是该部分中图片和标签对(label pair)的 ID。
#
# 问问你自己:“可能的标签有哪些?”、“图片数据的值范围是多少?”、“标签是按顺序排列,还是随机排列的?”。思考类似的问题,有助于你预处理数据,并使预测结果更准确。
#
# In[2]:
get_ipython().magic('matplotlib inline')
get_ipython().magic("config InlineBackend.figure_format = 'retina'")
import helper
import numpy as np
# Explore the dataset
batch_id = 1
sample_id = 6
helper.display_stats(cifar10_dataset_folder_path, batch_id, sample_id)
# ## 实现预处理函数
#
# ### 标准化
#
# 在下面的单元中,实现 `normalize` 函数,传入图片数据 `x`,并返回标准化 Numpy 数组。值应该在 0 到 1 的范围内(含 0 和 1)。返回对象应该和 `x` 的形状一样。
#
# In[3]:
def normalize(x):
"""
Normalize a list of sample image data in the range of 0 to 1
: x: List of image data. The image shape is (32, 32, 3)
: return: Numpy array of normalize data
"""
# TODO: Implement Function
out = (x-np.min(x))/(np.max(x)-np.min(x))
return out
"""
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
"""
tests.test_normalize(normalize)
# ### One-hot 编码
#
# 和之前的代码单元一样,你将为预处理实现一个函数。这次,你将实现 `one_hot_encode` 函数。输入,也就是 `x`,是一个标签列表。实现该函数,以返回为 one_hot 编码的 Numpy 数组的标签列表。标签的可能值为 0 到 9。每次调用 `one_hot_encode` 时,对于每个值,one_hot 编码函数应该返回相同的编码。确保将编码映射保存到该函数外面。
#
# 提示:不要重复发明轮子。
#
# In[4]:
from sklearn import preprocessing
lb = preprocessing.LabelBinarizer()
lb.fit(range(10))
def one_hot_encode(x):
"""
One hot encode a list of sample labels. Return a one-hot encoded vector for each label.
: x: List of sample Labels
: return: Numpy array of one-hot encoded labels
"""
# TODO: Implement Function\
out = lb.transform(x)
return out
"""
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
"""
tests.test_one_hot_encode(one_hot_encode)
# ### 随机化数据
#
# 之前探索数据时,你已经了解到,样本的顺序是随机的。再随机化一次也不会有什么关系,但是对于这个数据集没有必要。
#
# ## 预处理所有数据并保存
#
# 运行下方的代码单元,将预处理所有 CIFAR-10 数据,并保存到文件中。下面的代码还使用了 10% 的训练数据,用来验证。
#
# In[5]:
"""
DON'T MODIFY ANYTHING IN THIS CELL
"""
# Preprocess Training, Validation, and Testing Data
helper.preprocess_and_save_data(cifar10_dataset_folder_path, normalize, one_hot_encode)
# # 检查点
#
# 这是你的第一个检查点。如果你什么时候决定再回到该记事本,或需要重新启动该记事本,你可以从这里开始。预处理的数据已保存到本地。
#
# In[6]:
"""
DON'T MODIFY ANYTHING IN THIS CELL
"""
import pickle
import problem_unittests as tests
import helper
# Load the Preprocessed Validation data
valid_features, valid_labels = pickle.load(open('preprocess_validation.p', mode='rb'))
# ## 构建网络
#
# 对于该神经网络,你需要将每层都构建为一个函数。你看到的大部分代码都位于函数外面。要更全面地测试你的代码,我们需要你将每层放入一个函数中。这样使我们能够提供更好的反馈,并使用我们的统一测试检测简单的错误,然后再提交项目。
#
# >**注意**:如果你觉得每周很难抽出足够的时间学习这门课程,我们为此项目提供了一个小捷径。对于接下来的几个问题,你可以使用 [TensorFlow Layers](https://www.tensorflow.org/api_docs/python/tf/layers) 或 [TensorFlow Layers (contrib)](https://www.tensorflow.org/api_guides/python/contrib.layers) 程序包中的类来构建每个层级,但是“卷积和最大池化层级”部分的层级除外。TF Layers 和 Keras 及 TFLearn 层级类似,因此很容易学会。
#
# >但是,如果你想充分利用这门课程,请尝试自己解决所有问题,不使用 TF Layers 程序包中的任何类。你依然可以使用其他程序包中的类,这些类和你在 TF Layers 中的类名称是一样的!例如,你可以使用 TF Neural Network 版本的 `conv2d` 类 [tf.nn.conv2d](https://www.tensorflow.org/api_docs/python/tf/nn/conv2d),而不是 TF Layers 版本的 `conv2d` 类 [tf.layers.conv2d](https://www.tensorflow.org/api_docs/python/tf/layers/conv2d)。
#
# 我们开始吧!
#
#
# ### 输入
#
# 神经网络需要读取图片数据、one-hot 编码标签和丢弃保留概率(dropout keep probability)。请实现以下函数:
#
# * 实现 `neural_net_image_input`
# * 返回 [TF Placeholder](https://www.tensorflow.org/api_docs/python/tf/placeholder)
# * 使用 `image_shape` 设置形状,部分大小设为 `None`
# * 使用 [TF Placeholder](https://www.tensorflow.org/api_docs/python/tf/placeholder) 中的 TensorFlow `name` 参数对 TensorFlow 占位符 "x" 命名
# * 实现 `neural_net_label_input`
# * 返回 [TF Placeholder](https://www.tensorflow.org/api_docs/python/tf/placeholder)
# * 使用 `n_classes` 设置形状,部分大小设为 `None`
# * 使用 [TF Placeholder](https://www.tensorflow.org/api_docs/python/tf/placeholder) 中的 TensorFlow `name` 参数对 TensorFlow 占位符 "y" 命名
# * 实现 `neural_net_keep_prob_input`
# * 返回 [TF Placeholder](https://www.tensorflow.org/api_docs/python/tf/placeholder),用于丢弃保留概率
# * 使用 [TF Placeholder](https://www.tensorflow.org/api_docs/python/tf/placeholder) 中的 TensorFlow `name` 参数对 TensorFlow 占位符 "keep_prob" 命名
#
# 这些名称将在项目结束时,用于加载保存的模型。
#
# 注意:TensorFlow 中的 `None` 表示形状可以是动态大小。
# In[7]:
import tensorflow as tf
def neural_net_image_input(image_shape):
"""
Return a Tensor for a batch of image input
: image_shape: Shape of the images
: return: Tensor for image input.
"""
# TODO: Implement Function
out = tf.placeholder(tf.float32,[None,image_shape[0],image_shape[1],image_shape[2]],name='x')
return out
def neural_net_label_input(n_classes):
"""
Return a Tensor for a batch of label input
: n_classes: Number of classes
: return: Tensor for label input.
"""
# TODO: Implement Function
out = tf.placeholder(tf.float32,[None,n_classes],name='y')
return out
def neural_net_keep_prob_input():
"""
Return a Tensor for keep probability
: return: Tensor for keep probability.
"""
# TODO: Implement Function
out = tf.placeholder(tf.float32,name='keep_prob')
return out
"""
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
"""
tf.reset_default_graph()
tests.test_nn_image_inputs(neural_net_image_input)
tests.test_nn_label_inputs(neural_net_label_input)
tests.test_nn_keep_prob_inputs(neural_net_keep_prob_input)
# ### 卷积和最大池化层
#
# 卷积层级适合处理图片。对于此代码单元,你应该实现函数 `conv2d_maxpool` 以便应用卷积然后进行最大池化:
#
# * 使用 `conv_ksize`、`conv_num_outputs` 和 `x_tensor` 的形状创建权重(weight)和偏置(bias)。
# * 使用权重和 `conv_strides` 对 `x_tensor` 应用卷积。
# * 建议使用我们建议的间距(padding),当然也可以使用任何其他间距。
# * 添加偏置
# * 向卷积中添加非线性激活(nonlinear activation)
# * 使用 `pool_ksize` 和 `pool_strides` 应用最大池化
# * 建议使用我们建议的间距(padding),当然也可以使用任何其他间距。
#
# **注意**:对于**此层**,**请勿使用** [TensorFlow Layers](https://www.tensorflow.org/api_docs/python/tf/layers) 或 [TensorFlow Layers (contrib)](https://www.tensorflow.org/api_guides/python/contrib.layers),但是仍然可以使用 TensorFlow 的 [Neural Network](https://www.tensorflow.org/api_docs/python/tf/nn) 包。对于所有**其他层**,你依然可以使用快捷方法。
#
# In[8]:
def conv2d_maxpool(x_tensor, conv_num_outputs, conv_ksize, conv_strides, pool_ksize, pool_strides):
"""
Apply convolution then max pooling to x_tensor
:param x_tensor: TensorFlow Tensor
:param conv_num_outputs: Number of outputs for the convolutional layer
:param conv_ksize: kernal size 2-D Tuple for the convolutional layer
:param conv_strides: Stride 2-D Tuple for convolution
:param pool_ksize: kernal size 2-D Tuple for pool
:param pool_strides: Stride 2-D Tuple for pool
: return: A tensor that represents convolution and max pooling of x_tensor
"""
# TODO: Implement Function
_,_,_,color_channels = x_tensor.get_shape()
weight = tf.Variable(tf.truncated_normal([conv_ksize[0],
conv_ksize[1],
color_channels.value,
conv_num_outputs],
stddev=0.1))
bias = tf.Variable(tf.zeros(conv_num_outputs))
# Convolution
conv_layer = tf.nn.conv2d(x_tensor,
weight,
strides=[1,conv_strides[0],conv_strides[1],1],
padding='SAME')
conv_layer = tf.nn.bias_add(conv_layer,
bias)
conv_layer = tf.nn.relu(conv_layer)
# Max pooling
output = tf.nn.max_pool(conv_layer,
ksize=[1,pool_ksize[0],pool_ksize[1],1],
strides=[1,pool_strides[0],pool_strides[1],1],
padding='SAME')
return output
"""
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
"""
tests.test_con_pool(conv2d_maxpool)
# ### 扁平化层
#
# 实现 `flatten` 函数,将 `x_tensor` 的维度从四维张量(4-D tensor)变成二维张量。输出应该是形状(*部分大小(Batch Size)*,*扁平化图片大小(Flattened Image Size)*)。快捷方法:对于此层,你可以使用 [TensorFlow Layers](https://www.tensorflow.org/api_docs/python/tf/layers) 或 [TensorFlow Layers (contrib)](https://www.tensorflow.org/api_guides/python/contrib.layers) 包中的类。如果你想要更大挑战,可以仅使用其他 TensorFlow 程序包。
#
# In[9]:
def flatten(x_tensor):
"""
Flatten x_tensor to (Batch Size, Flattened Image Size)
: x_tensor: A tensor of size (Batch Size, ...), where ... are the image dimensions.
: return: A tensor of size (Batch Size, Flattened Image Size).
"""
# TODO: Implement Function
batch,height,width,channels = x_tensor.get_shape()
flat_size = height.value * width.value * channels.value
output = tf.reshape(x_tensor,
[-1,flat_size])
return output
"""
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
"""
tests.test_flatten(flatten)
# ### 全连接层
#
# 实现 `fully_conn` 函数,以向 `x_tensor` 应用完全连接的层级,形状为(*部分大小(Batch Size)*,*num_outputs*)。快捷方法:对于此层,你可以使用 [TensorFlow Layers](https://www.tensorflow.org/api_docs/python/tf/layers) 或 [TensorFlow Layers (contrib)](https://www.tensorflow.org/api_guides/python/contrib.layers) 包中的类。如果你想要更大挑战,可以仅使用其他 TensorFlow 程序包。
# In[10]:
def fully_conn(x_tensor, num_outputs):
"""
Apply a fully connected layer to x_tensor using weight and bias
: x_tensor: A 2-D tensor where the first dimension is batch size.
: num_outputs: The number of output that the new tensor should be.
: return: A 2-D tensor where the second dimension is num_outputs.
"""
# TODO: Implement Function
_,hidden_input = x_tensor.get_shape()
hidden_weight = tf.Variable(tf.random_normal([hidden_input.value,num_outputs],
stddev=0.1))
hidden_bias = tf.Variable(tf.zeros(num_outputs))
output = tf.add(tf.matmul(x_tensor,
hidden_weight),
hidden_bias)
output = tf.nn.relu(output)
return output
"""
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
"""
tests.test_fully_conn(fully_conn)
# ### 输出层
#
# 实现 `output` 函数,向 x_tensor 应用完全连接的层级,形状为(*部分大小(Batch Size)*,*num_outputs*)。快捷方法:对于此层,你可以使用 [TensorFlow Layers](https://www.tensorflow.org/api_docs/python/tf/layers) 或 [TensorFlow Layers (contrib)](https://www.tensorflow.org/api_guides/python/contrib.layers) 包中的类。如果你想要更大挑战,可以仅使用其他 TensorFlow 程序包。
#
# **注意**:该层级不应应用 Activation、softmax 或交叉熵(cross entropy)。
# In[11]:
def output(x_tensor, num_outputs):
"""
Apply a output layer to x_tensor using weight and bias
: x_tensor: A 2-D tensor where the first dimension is batch size.
: num_outputs: The number of output that the new tensor should be.
: return: A 2-D tensor where the second dimension is num_outputs.
"""
# TODO: Implement Function
_,hidden_input = x_tensor.get_shape()
output_weight = tf.Variable(tf.random_normal([hidden_input.value,num_outputs],
stddev=0.1))
output_bias = tf.Variable(tf.zeros(num_outputs))
output = tf.add(tf.matmul(x_tensor,
output_weight),
output_bias)
return output
"""
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
"""
tests.test_output(output)
# ### 创建卷积模型
#
# 实现函数 `conv_net`, 创建卷积神经网络模型。该函数传入一批图片 `x`,并输出对数(logits)。使用你在上方创建的层创建此模型:
#
# * 应用 1、2 或 3 个卷积和最大池化层(Convolution and Max Pool layers)
# * 应用一个扁平层(Flatten Layer)
# * 应用 1、2 或 3 个完全连接层(Fully Connected Layers)
# * 应用一个输出层(Output Layer)
# * 返回输出
# * 使用 `keep_prob` 向模型中的一个或多个层应用 [TensorFlow 的 Dropout](https://www.tensorflow.org/api_docs/python/tf/nn/dropout)
# In[28]:
def conv_net(x, keep_prob):
"""
Create a convolutional neural network model
: x: Placeholder tensor that holds image data.
: keep_prob: Placeholder tensor that hold dropout keep probability.
: return: Tensor that represents logits
"""
# TODO: Apply 1, 2, or 3 Convolution and Max Pool layers
# Play around with different number of outputs, kernel size and stride
# Function Definition from Above:
# conv2d_maxpool(x_tensor, conv_num_outputs, conv_ksize, conv_strides, pool_ksize, pool_strides)
conv_num_outputs = 32
conv_ksize = (5,5)
conv_strides = (1,1)
pool_ksize = (2,2)
pool_strides = (2,2)
conv_layer = conv2d_maxpool(x, conv_num_outputs, conv_ksize, conv_strides, pool_ksize, pool_strides)
conv_num_outputs = 64
conv_ksize = (3,3)
conv_strides = (1,1)
pool_ksize = (2,2)
pool_strides = (2,2)
conv_layer = conv2d_maxpool(x, conv_num_outputs, conv_ksize, conv_strides, pool_ksize, pool_strides)
conv_num_outputs = 128
conv_ksize = (2,2)
conv_strides = (1,1)
pool_ksize = (2,2)
pool_strides = (2,2)
conv_layer = conv2d_maxpool(x, conv_num_outputs, conv_ksize, conv_strides, pool_ksize, pool_strides)
# TODO: Apply a Flatten Layer
# Function Definition from Above:
# flatten(x_tensor)
flat_layer = flatten(conv_layer)
# TODO: Apply 1, 2, or 3 Fully Connected Layers
# Play around with different number of outputs
# Function Definition from Above:
# fully_conn(x_tensor, num_outputs)
num_outputs = 512
fc_layer = fully_conn(flat_layer, num_outputs)
fc_layer = tf.nn.dropout(fc_layer,keep_prob)
# TODO: Apply an Output Layer
# Set this to the number of classes
# Function Definition from Above:
# output(x_tensor, num_outputs)
num_outputs = 10
output_layer = output(fc_layer, num_outputs)
# TODO: return output
return output_layer
"""
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
"""
##############################
## Build the Neural Network ##
##############################
# Remove previous weights, bias, inputs, etc..
tf.reset_default_graph()
# Inputs
x = neural_net_image_input((32, 32, 3))
y = neural_net_label_input(10)
keep_prob = neural_net_keep_prob_input()
# Model
logits = conv_net(x, keep_prob)
# Name logits Tensor, so that is can be loaded from disk after training
logits = tf.identity(logits, name='logits')
# Loss and Optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y))
optimizer = tf.train.AdamOptimizer().minimize(cost)
# Accuracy
correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name='accuracy')
tests.test_conv_net(conv_net)
# ## 训练神经网络
#
# ### 单次优化
#
# 实现函数 `train_neural_network` 以进行单次优化(single optimization)。该优化应该使用 `optimizer` 优化 `session`,其中 `feed_dict` 具有以下参数:
#
# * `x` 表示图片输入
# * `y` 表示标签
# * `keep_prob` 表示丢弃的保留率
#
# 每个部分都会调用该函数,所以 `tf.global_variables_initializer()` 已经被调用。
#
# 注意:不需要返回任何内容。该函数只是用来优化神经网络。
#
# In[29]:
def train_neural_network(session, optimizer, keep_probability, feature_batch, label_batch):
"""
Optimize the session on a batch of images and labels
: session: Current TensorFlow session
: optimizer: TensorFlow optimizer function
: keep_probability: keep probability
: feature_batch: Batch of Numpy image data
: label_batch: Batch of Numpy label data
"""
# TODO: Implement Function
session.run(optimizer,
feed_dict={x:feature_batch,
y:label_batch,
keep_prob:keep_probability})
"""
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
"""
tests.test_train_nn(train_neural_network)
# ### 显示数据
#
# 实现函数 `print_stats` 以输出损失和验证准确率。使用全局变量 `valid_features` 和 `valid_labels` 计算验证准确率。使用保留率 `1.0` 计算损失和验证准确率(loss and validation accuracy)。
#
# In[30]:
def print_stats(session, feature_batch, label_batch, cost, accuracy):
"""
Print information about loss and validation accuracy
: session: Current TensorFlow session
: feature_batch: Batch of Numpy image data
: label_batch: Batch of Numpy label data
: cost: TensorFlow cost function
: accuracy: TensorFlow accuracy function
"""
# TODO: Implement Function
keep_probability = 1.0
loss = session.run(cost,
feed_dict={x:feature_batch,
y:label_batch,
keep_prob:keep_probability})
valid_accuracy = session.run(accuracy,
feed_dict={x:valid_features,
y:valid_labels,
keep_prob:keep_probability})
print('loss:{} validation accuracy:{}'.format(loss,valid_accuracy))
# ### 超参数
#
# 调试以下超参数:
# * 设置 `epochs` 表示神经网络停止学习或开始过拟合的迭代次数
# * 设置 `batch_size`,表示机器内存允许的部分最大体积。大部分人设为以下常见内存大小:
#
# * 64
# * 128
# * 256
# * ...
# * 设置 `keep_probability` 表示使用丢弃时保留节点的概率
# In[35]:
# TODO: Tune Parameters
epochs = 20
batch_size = 512
keep_probability = 0.8
# ### 在单个 CIFAR-10 部分上训练
#
# 我们先用单个部分,而不是用所有的 CIFAR-10 批次训练神经网络。这样可以节省时间,并对模型进行迭代,以提高准确率。最终验证准确率达到 50% 或以上之后,在下一部分对所有数据运行模型。
#
# In[36]:
"""
DON'T MODIFY ANYTHING IN THIS CELL
"""
print('Checking the Training on a Single Batch...')
with tf.Session() as sess:
# Initializing the variables
sess.run(tf.global_variables_initializer())
# Training cycle
for epoch in range(epochs):
batch_i = 1
for batch_features, batch_labels in helper.load_preprocess_training_batch(batch_i, batch_size):
train_neural_network(sess, optimizer, keep_probability, batch_features, batch_labels)
print('Epoch {:>2}, CIFAR-10 Batch {}: '.format(epoch + 1, batch_i), end='')
print_stats(sess, batch_features, batch_labels, cost, accuracy)
# ### 完全训练模型
#
# 现在,单个 CIFAR-10 部分的准确率已经不错了,试试所有五个部分吧。
# In[37]:
"""
DON'T MODIFY ANYTHING IN THIS CELL
"""
save_model_path = './image_classification'
print('Training...')
with tf.Session() as sess:
# Initializing the variables
sess.run(tf.global_variables_initializer())
# Training cycle
for epoch in range(epochs):
# Loop over all batches
n_batches = 5
for batch_i in range(1, n_batches + 1):
for batch_features, batch_labels in helper.load_preprocess_training_batch(batch_i, batch_size):
train_neural_network(sess, optimizer, keep_probability, batch_features, batch_labels)
print('Epoch {:>2}, CIFAR-10 Batch {}: '.format(epoch + 1, batch_i), end='')
print_stats(sess, batch_features, batch_labels, cost, accuracy)
# Save Model
saver = tf.train.Saver()
save_path = saver.save(sess, save_model_path)
# # 检查点
#
# 模型已保存到本地。
#
# ## 测试模型
#
# 利用测试数据集测试你的模型。这将是最终的准确率。你的准确率应该高于 50%。如果没达到,请继续调整模型结构和参数。
# In[38]:
"""
DON'T MODIFY ANYTHING IN THIS CELL
"""
get_ipython().magic('matplotlib inline')
get_ipython().magic("config InlineBackend.figure_format = 'retina'")
import tensorflow as tf
import pickle
import helper
import random
# Set batch size if not already set
try:
if batch_size:
pass
except NameError:
batch_size = 64
save_model_path = './image_classification'
n_samples = 4
top_n_predictions = 3
def test_model():
"""
Test the saved model against the test dataset
"""
test_features, test_labels = pickle.load(open('preprocess_test.p', mode='rb'))
loaded_graph = tf.Graph()
with tf.Session(graph=loaded_graph) as sess:
# Load model
loader = tf.train.import_meta_graph(save_model_path + '.meta')
loader.restore(sess, save_model_path)
# Get Tensors from loaded model
loaded_x = loaded_graph.get_tensor_by_name('x:0')
loaded_y = loaded_graph.get_tensor_by_name('y:0')
loaded_keep_prob = loaded_graph.get_tensor_by_name('keep_prob:0')
loaded_logits = loaded_graph.get_tensor_by_name('logits:0')
loaded_acc = loaded_graph.get_tensor_by_name('accuracy:0')
# Get accuracy in batches for memory limitations
test_batch_acc_total = 0
test_batch_count = 0
for test_feature_batch, test_label_batch in helper.batch_features_labels(test_features, test_labels, batch_size):
test_batch_acc_total += sess.run(
loaded_acc,
feed_dict={loaded_x: test_feature_batch, loaded_y: test_label_batch, loaded_keep_prob: 1.0})
test_batch_count += 1
print('Testing Accuracy: {}\n'.format(test_batch_acc_total/test_batch_count))
# Print Random Samples
random_test_features, random_test_labels = tuple(zip(*random.sample(list(zip(test_features, test_labels)), n_samples)))
random_test_predictions = sess.run(
tf.nn.top_k(tf.nn.softmax(loaded_logits), top_n_predictions),
feed_dict={loaded_x: random_test_features, loaded_y: random_test_labels, loaded_keep_prob: 1.0})
helper.display_image_predictions(random_test_features, random_test_labels, random_test_predictions)
test_model()
# ## 为何准确率只有50-80%?
#
# 你可能想问,为何准确率不能更高了?首先,对于简单的 CNN 网络来说,50% 已经不低了。纯粹猜测的准确率为10%。但是,你可能注意到有人的准确率[远远超过 80%](http://rodrigob.github.io/are_we_there_yet/build/classification_datasets_results.html#43494641522d3130)。这是因为我们还没有介绍所有的神经网络知识。我们还需要掌握一些其他技巧。
#
# ## 提交项目
#
# 提交项目时,确保先运行所有单元,然后再保存记事本。将 notebook 文件另存为“dlnd_image_classification.ipynb”,再在目录 "File" -> "Download as" 另存为 HTML 格式。请在提交的项目中包含 “helper.py” 和 “problem_unittests.py” 文件。
#