Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Does your code contain the mask mAP computation? #139

Open
wants to merge 35 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
8b59e12
added mask visualization
souryuu Jul 4, 2017
b0f41c8
fixed multiple mask related issue
souryuu Jul 17, 2017
9b94db9
remove some comments
souryuu Jul 17, 2017
ab1195b
Changes in a network
souryuu Jul 19, 2017
702fae8
Changed training mask sampling method and IoU threshold
souryuu Jul 24, 2017
f5658d6
fixed some indention
souryuu Jul 28, 2017
93523f4
clean up variable names and comments
souryuu Jul 31, 2017
66898af
fixed test.py
souryuu Aug 1, 2017
9c27716
merged inst.py to sample.py
souryuu Aug 1, 2017
0b30a16
merge to master
souryuu Aug 1, 2017
0195519
clean up comments
souryuu Aug 1, 2017
9adc8e4
comments
souryuu Aug 1, 2017
b79d5c1
fixed only_positive in sample_rpn_outputs
souryuu Aug 1, 2017
2d0514c
fixed only_positive in sample_rpn_outputs_wrt_gt
souryuu Aug 1, 2017
c2027db
changed some hyper params
souryuu Aug 1, 2017
8b96804
remove gt during testing
souryuu Aug 1, 2017
ae98c85
fixed nms in sampling during test
souryuu Aug 3, 2017
4adbc54
fixed conflict from variable names
souryuu Aug 3, 2017
9b351ae
last check before change anchor
souryuu Aug 7, 2017
10ffbd2
changed anchor from 3x3 to 5x3
souryuu Aug 7, 2017
2d1622d
changed anchor to match with MaskRCNN original paper
souryuu Aug 9, 2017
43d3992
speed up sorting in sample_rpn_outputs
souryuu Aug 10, 2017
f69a778
changed detail in config_v1.py
souryuu Aug 10, 2017
e197a0e
fixed number of test data
souryuu Aug 10, 2017
6b41fee
fix wrong sorting in sample_rpn_outputs
souryuu Aug 10, 2017
44f3c67
simplified config_v1 (no need to set _is_training)
souryuu Aug 10, 2017
038973e
Change only_positive to False for training from scratch
souryuu Aug 11, 2017
0a166dc
excluded clowd instances from dataset
souryuu Aug 31, 2017
4b31df9
failed version dont use this one
souryuu Sep 8, 2017
ec156d6
commit before rollback
souryuu Sep 12, 2017
dca602c
roll back some part to v1
souryuu Sep 12, 2017
52115a5
test.py should now able to test bounding box AP (mask is not included…
souryuu Sep 12, 2017
2f636d8
added segmentation evaluation
souryuu Sep 13, 2017
569a0aa
fixed shuffle and queue during training
souryuu Sep 25, 2017
eec8946
fixed some memory issues (use script/train.sh for training)
souryuu Sep 29, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion libs/boxes/anchor.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,25 @@ def anchors_plane(height, width, stride = 1.0,
# ratios = kwargs.setdefault('ratios', [0.5, 1, 2.0])
# base = kwargs.setdefault('base', 16)
anc = anchors(scales, ratios, base)
all_anchors = cython_anchor.anchors_plane(height, width, stride, anc)
all_anchors = cython_anchor.anchors_plane(height, width, stride, anc).astype(np.float32)
return all_anchors

def jitter_gt_boxes(gt_boxes, jitter=0.1):
""" jitter the gtboxes, before adding them into rois, to be more robust for cls and rgs
gt_boxes: (G, 5) [x1 ,y1 ,x2, y2, class] int
"""
jittered_boxes = gt_boxes.copy()
ws = jittered_boxes[:, 2] - jittered_boxes[:, 0] + 1.0
hs = jittered_boxes[:, 3] - jittered_boxes[:, 1] + 1.0
width_offset = (np.random.rand(jittered_boxes.shape[0]) - 0.5) * jitter * ws
height_offset = (np.random.rand(jittered_boxes.shape[0]) - 0.5) * jitter * hs
jittered_boxes[:, 0] += width_offset
jittered_boxes[:, 2] += width_offset
jittered_boxes[:, 1] += height_offset
jittered_boxes[:, 3] += height_offset

return jittered_boxes

# Written by Ross Girshick and Sean Bell
def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
scales=2 ** np.arange(3, 6)):
Expand Down
27 changes: 19 additions & 8 deletions libs/boxes/bbox_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,16 @@ def bbox_transform(ex_rois, gt_rois):

# warnings.catch_warnings()
# warnings.filterwarnings('error')
targets_dx = 10.0 * (gt_ctr_x - ex_ctr_x) / ex_widths
targets_dy = 10.0 * (gt_ctr_y - ex_ctr_y) / ex_heights
targets_dw = 5.0 * np.log(gt_widths / ex_widths)
targets_dh = 5.0 * np.log(gt_heights / ex_heights)

# targets_dx = 10.0 * (gt_ctr_x - ex_ctr_x) / ex_widths
# targets_dy = 10.0 * (gt_ctr_y - ex_ctr_y) / ex_heights
# targets_dw = 5.0 * np.log(gt_widths / ex_widths)
# targets_dh = 5.0 * np.log(gt_heights / ex_heights)

targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
targets_dw = np.log(gt_widths / ex_widths)
targets_dh = np.log(gt_heights / ex_heights)

targets = np.vstack(
(targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
Expand All @@ -51,10 +57,15 @@ def bbox_transform_inv(boxes, deltas):
ctr_x = boxes[:, 0] + 0.5 * widths
ctr_y = boxes[:, 1] + 0.5 * heights

dx = deltas[:, 0::4] * 0.1
dy = deltas[:, 1::4] * 0.1
dw = deltas[:, 2::4] * 0.2
dh = deltas[:, 3::4] * 0.2
# dx = deltas[:, 0::4] * 0.1
# dy = deltas[:, 1::4] * 0.1
# dw = deltas[:, 2::4] * 0.2
# dh = deltas[:, 3::4] * 0.2

dx = deltas[:, 0::4]
dy = deltas[:, 1::4]
dw = deltas[:, 2::4]
dh = deltas[:, 3::4]

pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
Expand Down
45 changes: 30 additions & 15 deletions libs/configs/config_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
# dataset
##########################
tf.app.flags.DEFINE_bool(
'update_bn', False,
'update_bn', True,
'Whether or not to update bacth normalization layer')

tf.app.flags.DEFINE_integer(
Expand All @@ -41,6 +41,10 @@
'dataset_split_name', 'train2014',
'The name of the train/test/val split.')

tf.app.flags.DEFINE_string(
'dataset_split_name_test', 'train2014',#val2014
'The name of the test/val split.')

tf.app.flags.DEFINE_string(
'dataset_dir', 'data/coco/',
'The directory where the dataset files are stored.')
Expand Down Expand Up @@ -75,7 +79,7 @@
######################

tf.app.flags.DEFINE_float(
'weight_decay', 0.00005, 'The weight decay on the model weights.')
'weight_decay', 0.00001, 'The weight decay on the model weights.')

tf.app.flags.DEFINE_string(
'optimizer', 'momentum',
Expand Down Expand Up @@ -114,23 +118,25 @@
'ftrl_l2', 0.0, 'The FTRL l2 regularization strength.')

tf.app.flags.DEFINE_float(
'momentum', 0.99,
'momentum', 0.9,
'The momentum for the MomentumOptimizer and RMSPropOptimizer.')

tf.app.flags.DEFINE_float('rmsprop_momentum', 0.99, 'Momentum.')

tf.app.flags.DEFINE_float('rmsprop_decay', 0.99, 'Decay term for RMSProp.')

tf.app.flags.DEFINE_float('batch_norm_decay', 0.9, 'Decay term for batch normalization.')

#######################
# Learning Rate Flags #
#######################

tf.app.flags.DEFINE_string(
'learning_rate_decay_type', 'exponential',
'learning_rate_decay_type', 'fixed',
'Specifies how the learning rate is decayed. One of "fixed", "exponential",'
' or "polynomial"')

tf.app.flags.DEFINE_float('learning_rate', 0.002,
tf.app.flags.DEFINE_float('learning_rate', 0.0001,#0.0002
'Initial learning rate.')

tf.app.flags.DEFINE_float(
Expand Down Expand Up @@ -226,20 +232,21 @@
#######################
# BOX Flags #
#######################
tf.app.flags.DEFINE_float(
'rpn_bg_threshold', 0.3,
'Only regions which intersection is larger than fg_threshold are considered to be fg')

tf.app.flags.DEFINE_float(
'rpn_fg_threshold', 0.7,
'Only regions which intersection is larger than fg_threshold are considered to be fg')

tf.app.flags.DEFINE_float(
'fg_threshold', 0.7,
'rpn_bg_threshold', 0.3,
'Only regions which intersection is less than bg_threshold are considered to be fg')

tf.app.flags.DEFINE_float(
'fg_threshold', 0.5,
'Only regions which intersection is larger than fg_threshold are considered to be fg')

tf.app.flags.DEFINE_float(
'bg_threshold', 0.3,
'bg_threshold', 0.5,
'Only regions which intersection is less than bg_threshold are considered to be bg')

tf.app.flags.DEFINE_integer(
Expand All @@ -255,12 +262,12 @@
'Number of rois that should be sampled to train this network')

tf.app.flags.DEFINE_integer(
'rpn_batch_size', 500,
'rpn_batch_size', 256,
'Number of rpn anchors that should be sampled to train this network')

tf.app.flags.DEFINE_integer(
'allow_border', 10,
'How many pixels out of an image')
'allow_border', 0.0,
'Percentage of bounding box height and length that are allowed to be out of an image boundary')

##################################
# NMS #
Expand All @@ -274,9 +281,17 @@
'post_nms_top_n', 2000,
'Number of rpn anchors that should be sampled after nms')

tf.app.flags.DEFINE_integer(
'post_nms_inst_n', 300,
"Number of inst after NMS")

tf.app.flags.DEFINE_float(
'rpn_nms_threshold', 0.7,
'NMS threshold')
'NMS threshold in RPN')

tf.app.flags.DEFINE_float(
'mask_nms_threshold', 0.3,
'NMS threshold in mask network during testing')

##################################
# Mask #
Expand All @@ -290,7 +305,7 @@
'mask_threshold', 0.50,
'Least intersection of a positive mask')
tf.app.flags.DEFINE_integer(
'masks_per_image', 64,
'masks_per_image', 256,
'Number of rois that should be sampled to train this network')

tf.app.flags.DEFINE_float(
Expand Down
4 changes: 2 additions & 2 deletions libs/datasets/coco.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,12 @@ def _height_decoder(keys_to_tensors):
items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
num_classes=_NUM_CLASSES)

def read(tfrecords_filename):
def read(tfrecords_filename, is_training=False):

if not isinstance(tfrecords_filename, list):
tfrecords_filename = [tfrecords_filename]
filename_queue = tf.train.string_input_producer(
tfrecords_filename, num_epochs=100)
tfrecords_filename, shuffle=is_training)#, num_epochs=100

options = tf.python_io.TFRecordOptions(TFRecordCompressionType.ZLIB)
reader = tf.TFRecordReader(options=options)
Expand Down
6 changes: 3 additions & 3 deletions libs/datasets/dataset_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ def get_dataset(dataset_name, split_name, dataset_dir,
file_pattern = dataset_name + '_' + split_name + '*.tfrecord'

tfrecords = glob.glob(dataset_dir + '/records/' + file_pattern)
image, ih, iw, gt_boxes, gt_masks, num_instances, img_id = coco.read(tfrecords)
image, ih, iw, gt_boxes, gt_masks, num_instances, img_id = coco.read(tfrecords, is_training=is_training)

image, gt_boxes, gt_masks = coco_preprocess.preprocess_image(image, gt_boxes, gt_masks, is_training)
image, new_ih, new_iw, gt_boxes, gt_masks = coco_preprocess.preprocess_image(image, gt_boxes, gt_masks, is_training)
#visualize_input(gt_boxes, image, tf.expand_dims(gt_masks, axis=3))

return image, ih, iw, gt_boxes, gt_masks, num_instances, img_id
return image, ih, iw, new_ih, new_iw, gt_boxes, gt_masks, num_instances, img_id

31 changes: 26 additions & 5 deletions libs/datasets/download_and_convert_coco.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,8 +218,11 @@ def _get_coco_masks(coco, img_id, height, width, img_name):
if bboxes.shape[0] <= 0:
bboxes = np.zeros([0, 4], dtype=np.float32)
classes = np.zeros([0], dtype=np.float32)
print ('None Annotations %s' % img_name)
LOG('None Annotations %s' % img_name)
#print ('None Annotations %s' % img_name)
#LOG('None Annotations %s' % img_name)
no_annotation_flag = True
else:
no_annotation_flag = False
bboxes[:, 2] = bboxes[:, 0] + bboxes[:, 2]
bboxes[:, 3] = bboxes[:, 1] + bboxes[:, 3]
gt_boxes = np.hstack((bboxes, classes[:, np.newaxis]))
Expand All @@ -228,7 +231,7 @@ def _get_coco_masks(coco, img_id, height, width, img_name):
mask = mask.astype(np.uint8)
assert masks.shape[0] == gt_boxes.shape[0], 'Shape Error'

return gt_boxes, masks, mask
return gt_boxes, masks, mask, no_annotation_flag



Expand Down Expand Up @@ -286,11 +289,24 @@ def _add_to_tfrecord(record_dir, image_dir, annotation_dir, split_name):

# jump over the damaged images
if str(img_id) == '320612':
sys.stdout.write('\r>> skipping image %d/%d shard %d\n' % (
i + 1, len(imgs), shard_id))
sys.stdout.flush()
continue

# process anns
height, width = imgs[i][1]['height'], imgs[i][1]['width']
gt_boxes, masks, mask = _get_coco_masks(coco, img_id, height, width, img_name)
if float(height)/float(width) > 3.02 or float(width)/float(height) > 3.02:
sys.stdout.write('\r>> skipping image %d/%d shard %d height:%d width:%d\n' % (
i + 1, len(imgs), shard_id, height, width))
sys.stdout.flush()
continue
gt_boxes, masks, mask, no_annotation_flag = _get_coco_masks(coco, img_id, height, width, img_name)
if no_annotation_flag is True:
sys.stdout.write('\r>> skipping image %d/%d shard %d no annotation \n' % (
i + 1, len(imgs), shard_id))
sys.stdout.flush()
continue

# read image as RGB numpy
img = np.array(Image.open(img_name))
Expand Down Expand Up @@ -402,7 +418,12 @@ def is_in_minival(img_id, minival):
height, width = imgs[i][1]['height'], imgs[i][1]['width']
coco = coco_train if i < num_of_train else coco_val

gt_boxes, masks, mask = _get_coco_masks(coco, img_id, height, width, img_name)
gt_boxes, masks, mask, no_annotation_flag = _get_coco_masks(coco, img_id, height, width, img_name)
if no_annotation_flag is True:
sys.stdout.write('\r>> skipping image %d/%d shard %d no annotation \n' % (
i + 1, len(imgs), shard_id))
sys.stdout.flush()
continue

# read image as RGB numpy
img = np.array(Image.open(img_name))
Expand Down
Loading