image, image_meta, class_ids, bbox, mask = modellib.load_image_gt( dataset, config, image_id, augment=True, use_mini_mask=True) log("mask", mask) display_images([image]+[mask[:,:,i] for i in range(min(mask.shape[-1], 7))])
上面调用函数modellib.load_image_gt,参数use_mini_mask设置为True。效果如下图所示。首先做了镜像对称变化,另外我们可以看到mask的shape从(128, 128, 2)变成了(56, 56, 2),而且mask都是Bounding Box里的mask。

图:mini mask和增强
6、Anchor
anchor的顺序非常重要,训练和预测要使用相同的anchor序列。另外也要匹配卷积的运算顺序。对于一个FPN,anchor的顺序要便于卷积层的输出预测anchor的得分和位移(shift)。因此通常使用如下顺序:
首先安装金字塔的层级排序,首先是第一层,然后是第二层
对于同一层,安装卷积的顺序从左上到右下逐行排序
对于同一个点,按照宽高比(aspect ratio)排序
Anchor Stride:在FPN网络结构下,前几层的feature map是高分辨率的。比如输入图片是1024x1024,则第一层的feature map是256x256,这将产生大约200k个anchor(2562563),这些anchor是32x32的,而它们的stride是4个像素,因此会有大量重叠的anchor。如果我们每隔一个cell(而不是每个cell)生成一次anchor,这将极大降低计算量。这里使用的stride是2,这和论文使用的1不同。生成anchor的代码如下:
# Generate Anchors backbone_shapes = modellib.compute_backbone_shapes(config, config.IMAGE_SHAPE) anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, config.RPN_ANCHOR_RATIOS, backbone_shapes, config.BACKBONE_STRIDES, config.RPN_ANCHOR_STRIDE)
# 输出anchor的摘要信息 num_levels = len(backbone_shapes) anchors_per_cell = len(config.RPN_ANCHOR_RATIOS) print("Count: ", anchors.shape[0]) print("Scales: ", config.RPN_ANCHOR_SCALES) print("ratios: ", config.RPN_ANCHOR_RATIOS) print("Anchors per Cell: ", anchors_per_cell) print("Levels: ", num_levels) anchors_per_level = [] for l in range(num_levels): num_cells = backbone_shapes[l][0] * backbone_shapes[l][1] anchors_per_level.append(anchors_per_cell * num_cells // config.RPN_ANCHOR_STRIDE**2) print("Anchors in Level {}: {}".format(l, anchors_per_level[l]))
输出的统计信息是:
Count: 4092 Scales: (8, 16, 32, 64, 128) ratios: [0.5, 1, 2] Anchors per Cell: 3 Levels: 5 Anchors in Level 0: 3072 Anchors in Level 1: 768 Anchors in Level 2: 192 Anchors in Level 3: 48 Anchors in Level 4: 12
我们来分析一下,总共有5种scales。对于第0层,Feature map是32x32,每个cell有3种宽高比,因此总共有3072个anchor;而第一层的Feature map是16x16,所以有768个anchor。我们来看每一层的feature map中心cell的anchor。
## Visualize anchors of one cell at the center of the feature map of a specific level
# Load and draw random image image_id = np.random.choice(dataset.image_ids, 1)[0] image, image_meta, _, _, _ = modellib.load_image_gt(dataset, config, image_id) fig, ax = plt.subplots(1, figsize=(10, 10)) ax.imshow(image) levels = len(backbone_shapes)
for level in range(levels): colors = visualize.random_colors(levels) # Compute the index of the anchors at the center of the image level_start = sum(anchors_per_level[:level]) # sum of anchors of previous levels level_anchors = anchors[level_start:level_start+anchors_per_level[level]] print("Level {}. Anchors: {:6} Feature map Shape: {}".format(level, level_anchors.shape[0], backbone_shapes[level])) center_cell = backbone_shapes[level] // 2 center_cell_index = (center_cell[0] * backbone_shapes[level][1] + center_cell[1]) level_center = center_cell_index * anchors_per_cell center_anchor = anchors_per_cell * ( (center_cell[0] * backbone_shapes[level][1] / config.RPN_ANCHOR_STRIDE**2) + center_cell[1] / config.RPN_ANCHOR_STRIDE) level_center = int(center_anchor)
# Draw anchors. Brightness show the order in the array, dark to bright. for i, rect in enumerate(level_anchors[level_center:level_center+anchors_per_cell]): y1, x1, y2, x2 = rect p = patches.Rectangle((x1, y1), x2-x1, y2-y1, linewidth=2, facecolor='none', edgecolor=(i+1)*np.array(colors[level]) / anchors_per_cell) ax.add_patch(p)
结果如下图所示。

图:Anchor
7、训练数据生成器
我们在训练Mask R-CNN的时候,会计算候选的区域和真实的目标区域的IoU,从而选择正例和负例。
random_rois = 2000 g = modellib.data_generator( dataset, config, shuffle=True, random_rois=random_rois, batch_size=4, detection_targets=True)
# Get Next Image if random_rois: [normalized_images, image_meta, rpn_match, rpn_bbox, gt_class_ids, gt_boxes, gt_masks, rpn_rois, rois], [mrcnn_class_ids, mrcnn_bbox, mrcnn_mask] = next(g)
log("rois", rois) log("mrcnn_class_ids", mrcnn_class_ids) log("mrcnn_bbox", mrcnn_bbox) log("mrcnn_mask", mrcnn_mask) else: [normalized_images, image_meta, rpn_match, rpn_bbox, gt_boxes, gt_masks], _ = next(g)
log("gt_class_ids", gt_class_ids) log("gt_boxes", gt_boxes) log("gt_masks", gt_masks) log("rpn_match", rpn_match, ) log("rpn_bbox", rpn_bbox) image_id = modellib.parse_image_meta(image_meta)["image_id"][0] print("image_id: ", image_id, dataset.image_reference(image_id))
# Remove the last dim in mrcnn_class_ids. It's only added # to satisfy Keras restriction on target shape. mrcnn_class_ids = mrcnn_class_ids[:,:,0]
b = 0
# Restore original image (reverse normalization) sample_image = modellib.unmold_image(normalized_images[b], config)
# Compute anchor shifts. indices = np.where(rpn_match[b] == 1)[0] refined_anchors = utils.apply_box_deltas(anchors[indices], rpn_bbox[b, :len(indices)] * config.RPN_BBOX_STD_DEV) log("anchors", anchors) log("refined_anchors", refined_anchors)
# Get list of positive anchors positive_anchor_ids = np.where(rpn_match[b] == 1)[0] print("Positive anchors: {}".format(len(positive_anchor_ids))) negative_anchor_ids = np.where(rpn_match[b] == -1)[0] print("Negative anchors: {}".format(len(negative_anchor_ids))) neutral_anchor_ids = np.where(rpn_match[b] == 0)[0] print("Neutral anchors: {}".format(len(neutral_anchor_ids)))
# ROI breakdown by class for c, n in zip(dataset.class_names, np.bincount(mrcnn_class_ids[b].flatten())): if n: print("{:23}: {}".format(c[:20], n))
# Show positive anchors visualize.draw_boxes(sample_image, boxes=anchors[positive_anchor_ids], refined_boxes=refined_anchors)
输出为:
(编辑:温州站长网)
【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容!
|