DrSleep · salehjg · Nov 15, 2017 · Nov 15, 2017 · Nov 15, 2017
diff --git a/evaluate.py b/evaluate.py
@@ -28,7 +28,7 @@
 
 def get_arguments():
     """Parse all the arguments provided from the CLI.
-    
+
     Returns:
       A list of parsed arguments.
     """
@@ -49,22 +49,22 @@ def get_arguments():
 
 def load(saver, sess, ckpt_path):
     '''Load trained weights.
-    
+
     Args:
       saver: TensorFlow saver object.
       sess: TensorFlow session.
       ckpt_path: path to checkpoint file with parameters.
-    ''' 
+    '''
     saver.restore(sess, ckpt_path)
     print("Restored model parameters from {}".format(ckpt_path))
 
 def main():
     """Create the model and start the evaluation process."""
     args = get_arguments()
-    
+
     # Create queue coordinator.
     coord = tf.train.Coordinator()
-    
+
     # Load reader.
     with tf.name_scope("create_inputs"):
         reader = ImageReader(
@@ -84,36 +84,38 @@ def main():
 
     # Which variables to load.
     restore_var = tf.global_variables()
-    
+
     # Predictions.
     raw_output = net.layers['fc1_voc12']
     raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,])
     raw_output = tf.argmax(raw_output, dimension=3)
     pred = tf.expand_dims(raw_output, dim=3) # Create 4-d tensor.
-    
+
     # mIoU
     pred = tf.reshape(pred, [-1,])
     gt = tf.reshape(label_batch, [-1,])
-    weights = tf.cast(tf.less_equal(gt, args.num_classes - 1), tf.int32) # Ignoring all labels greater than or equal to n_classes.
-    mIoU, update_op = tf.contrib.metrics.streaming_mean_iou(pred, gt, num_classes=args.num_classes, weights=weights)
-
-    # Set up tf session and initialize variables. 
+    indices = tf.squeeze(tf.where(tf.less_equal(gt, args.num_classes - 1)), 1)  # ignore all labels >= num_classes
+    gt = tf.cast(tf.gather(gt, indices), tf.int32)
+    pred = tf.gather(pred, indices)
+    mIoU, update_op = tf.contrib.metrics.streaming_mean_iou(pred, gt, num_classes=args.num_classes)
+
+    # Set up tf session and initialize variables.
     config = tf.ConfigProto()
     config.gpu_options.allow_growth = True
     sess = tf.Session(config=config)
     init = tf.global_variables_initializer()
-    
+
     sess.run(init)
     sess.run(tf.local_variables_initializer())
-    
+
     # Load weights.
     loader = tf.train.Saver(var_list=restore_var)
     if args.restore_from is not None:
         load(loader, sess, args.restore_from)
-    
+
     # Start queue threads.
     threads = tf.train.start_queue_runners(coord=coord, sess=sess)
-    
+
     # Iterate over training steps.
     for step in range(args.num_steps):
         preds, _ = sess.run([pred, update_op])
@@ -122,6 +124,6 @@ def main():
     print('Mean IoU: {:.3f}'.format(mIoU.eval(session=sess)))
     coord.request_stop()
     coord.join(threads)
-    
+
 if __name__ == '__main__':
     main()
diff --git a/inference.py b/inference.py
@@ -30,12 +30,16 @@ def get_arguments():
       A list of parsed arguments.
     """
     parser = argparse.ArgumentParser(description="DeepLabLFOV Network Inference.")
+
     parser.add_argument("img_path", type=str,
-                        help="Path to the RGB image file.")
+                        help="Path to the RGB image file.", default='./images/test_indoor2.jpg')
+
     parser.add_argument("model_weights", type=str,
-                        help="Path to the file with model weights.")
+                        help="Path to the file with model weights.", default='./deeplab_resnet.ckpt')
+
     parser.add_argument("--num-classes", type=int, default=NUM_CLASSES,
                         help="Number of classes to predict (including background).")
+
     parser.add_argument("--save-dir", type=str, default=SAVE_DIR,
                         help="Where to save predicted mask.")
     return parser.parse_args()

diff --git a/inference_webcam.py b/inference_webcam.py
@@ -0,0 +1,127 @@
+"""Run DeepLab-ResNet on a given image.
+
+This script computes a segmentation mask for a given image.
+"""
+
+from __future__ import print_function
+
+import argparse
+from datetime import datetime
+import os
+import sys
+import time
+
+from PIL import Image
+
+import tensorflow as tf
+import numpy as np
+
+from deeplab_resnet import DeepLabResNetModel, ImageReader, decode_labels, prepare_label
+import cv2
+
+
+IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32)
+
+NUM_CLASSES = 21
+SAVE_DIR = './output/'
+
+def get_arguments():
+    """Parse all the arguments provided from the CLI.
+
+    Returns:
+      A list of parsed arguments.
+    """
+    parser = argparse.ArgumentParser(description="DeepLabLFOV Network Inference(webcam).")
+
+    parser.add_argument("--model_weights", type=str,
+                        help="Path to the file with model weights.", default='./deeplab_resnet.ckpt')
+
+    parser.add_argument("--num-classes", type=int, default=NUM_CLASSES,
+                        help="Number of classes to predict (including background).")
+
+    return parser.parse_args()
+
+def load(saver, sess, ckpt_path):
+    '''Load trained weights.
+
+    Args:
+      saver: TensorFlow saver object.
+      sess: TensorFlow session.
+      ckpt_path: path to checkpoint file with parameters.
+    ''' 
+    saver.restore(sess, ckpt_path)
+    print("Restored model parameters from {}".format(ckpt_path))
+
+def main():
+    """Create the model and start the evaluation process."""
+    args = get_arguments()
+
+    cv2.namedWindow("preview")
+    cv2.namedWindow("legend")
+    cv2.namedWindow("SemanticSegmentation")
+    leg = cv2.imread('./images/colour_scheme.png')
+    cv2.imshow('legend', leg)
+
+    vc = cv2.VideoCapture(0)
+
+    if vc.isOpened():  # try to get the first frame
+        rval, frame = vc.read()
+    else:
+        rval = False
+
+    # TODO : get resolution of webcam using opencv
+    img_input = tf.placeholder(dtype=tf.uint8, shape=[480,640,3])
+    # Convert RGB to BGR.
+    img_r, img_g, img_b = tf.split(axis=2, num_or_size_splits=3, value=img_input)
+    img = tf.cast(tf.concat(axis=2, values=[img_b, img_g, img_r]), dtype=tf.float32)
+    # Extract mean.
+    img -= IMG_MEAN 
+
+    # Create network.
+    net = DeepLabResNetModel({'data': tf.expand_dims(img, dim=0)}, is_training=False, num_classes=args.num_classes)
+
+    # Which variables to load.
+    restore_var = tf.global_variables()
+
+    # Predictions.
+    raw_output = net.layers['fc1_voc12']
+    raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(img)[0:2,])
+    raw_output_up = tf.argmax(raw_output_up, dimension=3)
+    pred = tf.expand_dims(raw_output_up, dim=3)
+
+
+    # Set up TF session and initialize variables. 
+    config = tf.ConfigProto()
+    config.gpu_options.allow_growth = True
+    sess = tf.Session(config=config)
+    init = tf.global_variables_initializer()
+
+    sess.run(init)
+
+    # Load weights.
+    loader = tf.train.Saver(var_list=restore_var)
+    load(loader, sess, args.model_weights)
+
+    # Perform inference.
+
+
+    while True:
+        preds = sess.run(pred,feed_dict={img_input:frame})
+
+        msk = decode_labels(preds, num_classes=args.num_classes)
+        im = Image.fromarray(msk[0])
+
+        open_cv_image = np.array(im)
+        # Convert RGB to BGR
+        open_cv_image = open_cv_image[:, :, ::-1].copy()
+
+        cv2.imshow("SemanticSegmentation", open_cv_image)
+        cv2.imshow("preview", frame)
+        rval, frame = vc.read()
+
+        if cv2.waitKey(1) & 0xFF == ord('q'):
+            break
+
+
+if __name__ == '__main__':
+    main()
diff --git a/kaffe/tensorflow/network.py b/kaffe/tensorflow/network.py
@@ -3,7 +3,10 @@
 slim = tf.contrib.slim
 
 DEFAULT_PADDING = 'SAME'
-
+try:
+    basestring
+except NameError:
+    basestring = str
 
 def layer(op):
     '''Decorator for composable network layers.'''
@@ -125,7 +128,7 @@ def conv(self,
         # Convolution for a given input and kernel
         convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding)
         with tf.variable_scope(name) as scope:
-            kernel = self.make_var('weights', shape=[k_h, k_w, c_i / group, c_o])
+            kernel = self.make_var('weights', shape=[k_h, k_w, c_i // group, c_o])
             if group == 1:
                 # This is the common-case. Convolve the input without any further complications.
                 output = convolve(input, kernel)
@@ -167,7 +170,7 @@ def atrous_conv(self,
         # Convolution for a given input and kernel
         convolve = lambda i, k: tf.nn.atrous_conv2d(i, k, dilation, padding=padding)
         with tf.variable_scope(name) as scope:
-            kernel = self.make_var('weights', shape=[k_h, k_w, c_i / group, c_o])
+            kernel = self.make_var('weights', shape=[k_h, k_w, c_i // group, c_o])
             if group == 1:
                 # This is the common-case. Convolve the input without any further complications.
                 output = convolve(input, kernel)

diff --git a/train.py b/train.py
@@ -250,4 +250,4 @@ def main():
     coord.join(threads)
 
 if __name__ == '__main__':
-    main()
+    main()
-Original file line number
+Diff line change
@@ Expand Up / @@ -250,4 +250,4 @@ def main(): @@
         coord.join(threads)
     if __name__ == '__main__':
-        main()
+        main()