ddd

xiaoxiao2021-02-28 35

1、detectron整个的模型建立过程，以关键点为例说明

2、整体的流程如上，但是从 FPN.add_fpn_rpn_outputs(model, blob_in, dim_in, spatial_scale_in)开始我们要详细介绍一下 1）在这个函数里面会除了加上rpn的输出之外，还会顺带加上proposal的生成过程

model.GenerateProposals( [rpn_cls_probs_fpn, rpn_bbox_pred_fpn, 'im_info'], ['rpn_rois_fpn' + slvl, 'rpn_roi_probs_fpn' + slvl], anchors=lvl_anchors, spatial_scale=sc )#产生的proposal将以rpn_rois_fpn_x命名，rpn_roi_probs_fpn_x将会表示每一个proposal的分数 123456

2）接下来是model.CollectAndDistributeFpnRpnProposals()这个函数，这个函数将上面产生的proposal分配给各个level的FPN网络并且声称一系列的标签

class CollectAndDistributeFpnRpnProposalsOp(object): def __init__(self, train): self._train = train def forward(self, inputs, outputs): """See modeling.detector.CollectAndDistributeFpnRpnProposals for inputs/outputs documentation. """ # inputs is # [rpn_rois_fpn2, ..., rpn_rois_fpn6, # rpn_roi_probs_fpn2, ..., rpn_roi_probs_fpn6] # If training with Faster R-CNN, then inputs will additionally include # + [roidb, im_info] #输入的blob是这些 rois = collect(inputs, self._train) #collect函数的作用是将之前各个level的rois都连在一起，取分数最高的前2000个，获得这些rois和对应的分数 if self._train: # During training we reuse the data loader code. We populate roidb # entries on the fly using the rois generated by RPN. # im_info: [[im_height, im_width, im_scale], ...] im_info = inputs[-1].data #获取到图像信息 im_scales = im_info[:, 2] #把图像缩放尺度拿出来 roidb = blob_utils.deserialize(inputs[-2].data) #roidb拿出来 # For historical consistency with the original Faster R-CNN # implementation we are *not* filtering crowd proposals. # This choice should be investigated in the future (it likely does # not matter). json_dataset.add_proposals(roidb, rois, im_scales, crowd_thresh=0) #向roidb里面加入proposal的信息，详细看下面该函数的介绍，经过了这个函数，roidb已经焕然一新了，加入proposal的信息了 # Compute training labels for the RPN proposals; also handles # distributing the proposals over FPN levels output_blob_names = roi_data.fast_rcnn.get_fast_rcnn_blob_names() #获得fasterrcnn所需要的blob blobs = {k: [] for k in output_blob_names} roi_data.fast_rcnn.add_fast_rcnn_blobs(blobs, im_scales, roidb) #往faster-rcnn的blob里面添加对应的元素 for i, k in enumerate(output_blob_names): blob_utils.py_op_copy_blob(blobs[k], outputs[i]) #将blob里面的送到output里面去 else: # For inference we have a special code path that avoids some data # loader overhead distribute(rois, None, outputs, self._train) def collect(inputs, is_training): cfg_key = 'TRAIN' if is_training else 'TEST' post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N k_max = cfg.FPN.RPN_MAX_LEVEL k_min = cfg.FPN.RPN_MIN_LEVEL num_lvls = k_max - k_min + 1 roi_inputs = inputs[:num_lvls] score_inputs = inputs[num_lvls:] if is_training: score_inputs = score_inputs[:-2] # rois are in [[batch_idx, x0, y0, x1, y2], ...] format # Combine predictions across all levels and retain the top scoring rois = np.concatenate([blob.data for blob in roi_inputs]) #pdb.set_trace() scores = np.concatenate([blob.data for blob in score_inputs]).squeeze() inds = np.argsort(-scores)[:post_nms_topN] rois = rois[inds, :] return rois 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657 def add_proposals(roidb, rois, scales, crowd_thresh): """Add proposal boxes (rois) to an roidb that has ground-truth annotations but no proposals. If the proposals are not at the original image scale, specify the scale factor that separate them in scales. """ box_list = [] for i in range(len(roidb)): inv_im_scale = 1. / scales[i] #算出将图像变为原来的尺度需要多大的缩放 idx = np.where(rois[:, 0] == i)[0] #因为roi都是有编号的，所以把属于该张图片的roi拿出来 box_list.append(rois[idx, 1:] * inv_im_scale) #把roi乘以刚刚算出来的尺度变换参数把他变换到原来的图片空间上的proposal！！！！！！！！！！！！！！！！！这里是第一次设计尺度还原 _merge_proposal_boxes_into_roidb(roidb, box_list) #调用该函数把proposal加入到roidb中区 if crowd_thresh > 0: _filter_crowd_proposals(roidb, crowd_thresh) #过滤人群 _add_class_assignments(roidb) #设置每个proposal的分类，是属于哪个分类的，以及最大重叠式多少同时做一系列的检查 def _merge_proposal_boxes_into_roidb(roidb, box_list): """Add proposal boxes to each roidb entry.""" assert len(box_list) == len(roidb) for i, entry in enumerate(roidb): boxes = box_list[i] num_boxes = boxes.shape[0] gt_overlaps = np.zeros( (num_boxes, entry['gt_overlaps'].shape[1]), dtype=entry['gt_overlaps'].dtype ) box_to_gt_ind_map = -np.ones( (num_boxes), dtype=entry['box_to_gt_ind_map'].dtype ) # Note: unlike in other places, here we intentionally include all gt # rois, even ones marked as crowd. Boxes that overlap with crowds will # be filtered out later (see: _filter_crowd_proposals). gt_inds = np.where(entry['gt_classes'] > 0)[0] if len(gt_inds) > 0: gt_boxes = entry['boxes'][gt_inds, :] #将gt框都拿出来 gt_classes = entry['gt_classes'][gt_inds] #将gt对应的类别拿出来 proposal_to_gt_overlaps = box_utils.bbox_overlaps( #计算proposal和gt之间的overlap boxes.astype(dtype=np.float32, copy=False), gt_boxes.astype(dtype=np.float32, copy=False) ) # Gt box that overlaps each input box the most # (ties are broken arbitrarily by class order) argmaxes = proposal_to_gt_overlaps.argmax(axis=1) #算出每个proposal对应的gt是谁 # Amount of that overlap maxes = proposal_to_gt_overlaps.max(axis=1) #把重叠最大的面积都拿出来，找出重叠不为0的那些 # Those boxes with non-zero overlap with gt boxes I = np.where(maxes > 0)[0] # Record max overlaps with the class of the appropriate gt box gt_overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] #gt_overlap对应的分类的地方就要设置相应的分数，例如proposalA与gtB最大重合，就在gtB所对应的位置设置重叠 box_to_gt_ind_map[I] = gt_inds[argmaxes[I]] #每一个proposal对应的是哪个gt entry['boxes'] = np.append( #之后向roidb的每个入口加入proposal的信息，同时保留之前的gt信息 entry['boxes'], boxes.astype(entry['boxes'].dtype, copy=False), axis=0 ) entry['gt_classes'] = np.append( entry['gt_classes'], np.zeros((num_boxes), dtype=entry['gt_classes'].dtype) ) entry['seg_areas'] = np.append( entry['seg_areas'], np.zeros((num_boxes), dtype=entry['seg_areas'].dtype) ) entry['gt_overlaps'] = np.append( entry['gt_overlaps'].toarray(), gt_overlaps, axis=0 ) entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps']) entry['is_crowd'] = np.append( entry['is_crowd'], np.zeros((num_boxes), dtype=entry['is_crowd'].dtype) ) entry['box_to_gt_ind_map'] = np.append( entry['box_to_gt_ind_map'], box_to_gt_ind_map.astype( entry['box_to_gt_ind_map'].dtype, copy=False ) ) def _filter_crowd_proposals(roidb, crowd_thresh): """Finds proposals that are inside crowd regions and marks them as overlap = -1 with each ground-truth rois, which means they will be excluded from training. """ for entry in roidb: gt_overlaps = entry['gt_overlaps'].toarray() crowd_inds = np.where(entry['is_crowd'] == 1)[0] non_gt_inds = np.where(entry['gt_classes'] == 0)[0] if len(crowd_inds) == 0 or len(non_gt_inds) == 0: continue crowd_boxes = box_utils.xyxy_to_xywh(entry['boxes'][crowd_inds, :]) non_gt_boxes = box_utils.xyxy_to_xywh(entry['boxes'][non_gt_inds, :]) iscrowd_flags = [int(True)] * len(crowd_inds) ious = COCOmask.iou(non_gt_boxes, crowd_boxes, iscrowd_flags) bad_inds = np.where(ious.max(axis=1) > crowd_thresh)[0] gt_overlaps[non_gt_inds[bad_inds], :] = -1 entry['gt_overlaps'] = scipy.sparse.csr_matrix(gt_overlaps) def _add_class_assignments(roidb): """Compute object category assignment for each box associated with each roidb entry. """ for entry in roidb: gt_overlaps = entry['gt_overlaps'].toarray() # max overlap with gt over classes (columns) max_overlaps = gt_overlaps.max(axis=1) # gt class that had the max overlap max_classes = gt_overlaps.argmax(axis=1) entry['max_classes'] = max_classes entry['max_overlaps'] = max_overlaps # sanity checks # if max overlap is 0, the class must be background (class 0) zero_inds = np.where(max_overlaps == 0)[0] assert all(max_classes[zero_inds] == 0) # if max overlap > 0, the class must be a fg class (not class 0) nonzero_inds = np.where(max_overlaps > 0)[0] assert all(max_classes[nonzero_inds] != 0) 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121

再来看add_fast_rcnn_blobs

def add_fast_rcnn_blobs(blobs, im_scales, roidb): """Add blobs needed for training Fast R-CNN style models.""" # Sample training RoIs from each image and append them to the blob lists for im_i, entry in enumerate(roidb): frcn_blobs = _sample_rois(entry, im_scales[im_i], im_i) #这个函数也是返回一个blob，这个blob里面有什么呢？看下面非代码部分的解释 for k, v in frcn_blobs.items(): blobs[k].append(v) # Concat the training blob lists into tensors for k, v in blobs.items(): if isinstance(v, list) and len(v) > 0: blobs[k] = np.concatenate(v) # Add FPN multilevel training RoIs, if configured if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_ROIS: _add_multilevel_rois(blobs) # Perform any final work and validity checks after the collating blobs for # all minibatch images valid = True if cfg.MODEL.KEYPOINTS_ON: valid = roi_data.keypoint_rcnn.finalize_keypoint_minibatch(blobs, valid) return valid 1234567891011121314151617181920212223

sample_rois这个函数最终返回的是从proposal里面按照一个batch的大小拿出来的proposal，正负样本比例参考配置文件，默认256，最终的blob返回的是 blob_dict = dict( labels_int32=sampled_labels.astype(np.int32, copy=False), rois=sampled_rois, bbox_targets=bbox_targets, bbox_inside_weights=bbox_inside_weights, bbox_outside_weights=bbox_outside_weights ) 如果有关键点加入训练的话，还要加入关键点的一些信息，关键点有哪些信息呢？ blobs[‘keypoint_rois’] = sampled_fg_rois blobs[‘keypoint_locations_int32’] = heats.astype(np.int32, copy=False) blobs[‘keypoint_weights’] = weights 经过了rois之后的blob的输出是

对于关键点部分的blob加入，是在前景对象上面设置关键点的信息

def add_keypoint_rcnn_blobs( blobs, roidb, fg_rois_per_image, fg_inds, im_scale, batch_idx ): """Add Mask R-CNN keypoint specific blobs to the given blobs dictionary.""" # Note: gt_inds must match how they're computed in # datasets.json_dataset._merge_proposal_boxes_into_roidb gt_inds = np.where(roidb['gt_classes'] > 0)[0] #找出gt max_overlaps = roidb['max_overlaps'] #找出roidb里面的max_overlaps的对应 gt_keypoints = roidb['gt_keypoints'] #找出gt的keypoint信息 ind_kp = gt_inds[roidb['box_to_gt_ind_map']] #找出所有的box对应的gt的索引值,长度为N，也即boxes的数量 within_box = _within_box(gt_keypoints[ind_kp, :, :], roidb['boxes']) #判断所有的关键点是否都在roidb的boxes范围内，此时within——box是一个Nx17的数组，其中N是所有的boxes的数量 vis_kp = gt_keypoints[ind_kp, 2, :] > 0 #vis_kp的大小为Nx17，也即每个box对应的那个gt的每个关键点的可见性 is_visible = np.sum(np.logical_and(vis_kp, within_box), axis=1) > 0 #这句代码的目的是为了判断N个roi是否都有可见的关键点，is_visible是一个N维的向量，只有那些可见关键点在roi内的数量大于0的roi才会被采纳作为下一步的roi进行使用 kp_fg_inds = np.where( np.logical_and(max_overlaps >= cfg.TRAIN.FG_THRESH, is_visible) #找出那些重叠度大于一定阈值的作为关键点部分proposal的正样本 )[0] kp_fg_rois_per_this_image = np.minimum(fg_rois_per_image, kp_fg_inds.size) # if kp_fg_inds.size > kp_fg_rois_per_this_image: kp_fg_inds = np.random.choice( kp_fg_inds, size=kp_fg_rois_per_this_image, replace=False ) sampled_fg_rois = roidb['boxes'][kp_fg_inds] #找出选取的那些作为人体关键点对应的那些box box_to_gt_ind_map = roidb['box_to_gt_ind_map'][kp_fg_inds] #找出选取的那些作为人体关键点对应的那些box所对应的gt num_keypoints = gt_keypoints.shape[2] #Nx3x17 sampled_keypoints = -np.ones( #预先定义采样的关键点，通通设置为-1，-1标签的点都是不参与训练的 (len(sampled_fg_rois), gt_keypoints.shape[1], num_keypoints), dtype=gt_keypoints.dtype ) for ii in range(len(sampled_fg_rois)): ind = box_to_gt_ind_map[ii] if ind >= 0: sampled_keypoints[ii, :, :] = gt_keypoints[gt_inds[ind], :, :] #找到采样得到的框，然后找到其对应的关键点的gt信息，然后赋给它 assert np.sum(sampled_keypoints[ii, 2, :]) > 0 heats, weights = keypoint_utils.keypoints_to_heatmap_labels( #接下来制作heatmap标签，这是最关键的一步，给出sampled-roi和sampled-keypoints，来制作对应的heat和weight sampled_keypoints, sampled_fg_rois ) shape = (sampled_fg_rois.shape[0] * cfg.KRCNN.NUM_KEYPOINTS, 1) #N*17 heats = heats.reshape(shape) #reshape成Nx17 weights = weights.reshape(shape) #reshape成NX17 sampled_fg_rois *= im_scale #将rois的尺度刻画到缩放后的图像的尺度 repeated_batch_idx = batch_idx * blob_utils.ones( (sampled_fg_rois.shape[0], 1) ) sampled_fg_rois = np.hstack((repeated_batch_idx, sampled_fg_rois)) #然后将选择出来的roi加上第几张图片这个信息 blobs['keypoint_rois'] = sampled_fg_rois #将blob里面加上对应的信息 blobs['keypoint_locations_int32'] = heats.astype(np.int32, copy=False) blobs['keypoint_weights'] = weights 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455

接下来看一下keypoint是怎么转换为最终的label的最终的map是要转为56x56的，所以会计算将原来的roi变成56x56，x和y方向各需要放大缩小多少倍，记住是要计算roi的怎么缩放到这个的倍数 1）首先计算缩放倍数，把关键点映射到最终的56x56的图上 2）来看一下关键点是不是合理的，也即是否映射到56x56的map图上 3）要看关键点合不合理，除了要做上述的判断还要看一下关键点是否是可见 4）然后将关键点映射到56x56map图上的2维位置变成一维的 5）最后返回生成的label和weight,都是NX17的大小 6）返回后将label和weight拉成一维向量

看一下关键点roi的选择原则： 1）首先roi内必须有可见关键点 2）然后roi的max_overlap必须达到一定的阈值只有这些roi才有资格入选keypoint的roi,在关键点标签制作的时候，那么有一点就是那么那些在roi之外的关键点怎么办？在刚刚的keypoint装换为label的时候会将roi之外的可见关键点设置为invalid，也即valid是false的 valid_loc = np.logical_and( np.logical_and(x >= 0, y >= 0), np.logical_and( x < cfg.KRCNN.HEATMAP_SIZE, y < cfg.KRCNN.HEATMAP_SIZE))

就是这段话，会判断一个位置是不是一个合理的位置，要看gt_keypoints 经过映射之后是否还在界内，通过这些也可以发现关键点部分的训练是单独训练关键点的，就是用RPN提出来的proposal来训练关键点。详细内容参考keypoint.py文件和keypoint_rcnn.py

上面流程图中把distribute在肢解就变成了如下形式最后的add_multilevel_roi_blobs的这个函数将roi分配给对应level的blob，因为roi分配的时候不同的level对应的索引是不同的，为了方便之后的恢复，blob里面有一个key就是做这个工作的

def add_multilevel_roi_blobs( blobs, blob_prefix, rois, target_lvls, lvl_min, lvl_max ): """Add RoI blobs for multiple FPN levels to the blobs dict. blobs: a dict mapping from blob name to numpy ndarray blob_prefix: name prefix to use for the FPN blobs rois: the source rois as a 2D numpy array of shape (N, 5) where each row is an roi and the columns encode (batch_idx, x1, y1, x2, y2) target_lvls: numpy array of shape (N, ) indicating which FPN level each roi in rois should be assigned to lvl_min: the finest (highest resolution) FPN level (e.g., 2) lvl_max: the coarest (lowest resolution) FPN level (e.g., 6) """ rois_idx_order = np.empty((0, )) rois_stacked = np.zeros((0, 5), dtype=np.float32) # for assert for lvl in range(lvl_min, lvl_max + 1): idx_lvl = np.where(target_lvls == lvl)[0] blobs[blob_prefix + '_fpn' + str(lvl)] = rois[idx_lvl, :] rois_idx_order = np.concatenate((rois_idx_order, idx_lvl)) rois_stacked = np.vstack( [rois_stacked, blobs[blob_prefix + '_fpn' + str(lvl)]] ) pdb.set_trace() rois_idx_restore = np.argsort(rois_idx_order).astype(np.int32, copy=False) blobs[blob_prefix + '_idx_restore_int32'] = rois_idx_restore # Sanity check that restore order is correct assert (rois_stacked[rois_idx_restore] == rois).all() 12345678910111213141516171819202122232425262728

_idx_restore_int32这个参数就是存储对应顺序的一个参数，当然这只是个后缀，在分配roi的时候一共有4个level有别于rpn的5个level，其中边长224代表的是第四个level，112代表的是第三个level，56是第二个，448是第四个

做完这些，有一步是

if cfg.MODEL.KEYPOINTS_ON: valid = roi_data.keypoint_rcnn.finalize_keypoint_minibatch(blobs, valid) return valid 1234

来判断这一个batch是不是合格，判断标准是valid的关键点的数量是多少，是不是大于20个，如果这一个batch的参与训练的关键点的数量没有达到阈值，那么就是不合格的，valid返回false，否则返回true,除此之外，还会加入一个关键点的norm参数作为一个blob，blobs[‘keypoint_loss_normalizer’] = np.array(norm, dtype=np.float32)

3、加入fast-rcnn的head

来看一下RoIFeatureTransform函数这个函数实现的是将各个level的roi进行roipooling，detectron采用的是roialign,由于roi之前存在了blob里面，按照不同的level进行了存储，不同型号的roi对应的level自然也是不一样的，rois_fpn_2,rois_fpn_3,rois_fpn_4,rois_fpn_5,对于上述的4中roi分别用fpn_res2_2_sum，fpn_res3_3_sum，fpn_res4_5_sum，fpn_res5_2_sum这几层的特征进行roi-pooling对于每一个level的roi做完pooling之后，将所有的pooling特征concat在一起，形成所有的roi的特征，也即

xform_shuffled, _ = self.net.Concat( bl_out_list, [blob_out + '_shuffled', '_concat_' + blob_out], axis=0 ) 1234

之前存储的时候保存了一个blob[‘roi_idx_restore_int32’]这样的一个blob，目的是为了恢复没有进行roi的level分配之前的roi顺序，因为之前制作标签的时候都是没有分配的顺序，所以为了之后的损失是和对应的标签是对应的，所以最后要将顺序进行还原，用这个restore_int32就可以实现

xform_out = self.net.BatchPermutation( [xform_shuffled, restore_bl], blob_out ) 123

总体代码如下

def RoIFeatureTransform( self, blobs_in, blob_out, blob_rois='rois', method='RoIPoolF', resolution=7, spatial_scale=1. / 16., sampling_ratio=0 ): """Add the specified RoI pooling method. The sampling_ratio argument is supported for some, but not all, RoI transform methods. RoIFeatureTransform abstracts away: - Use of FPN or not - Specifics of the transform method """ assert method in {'RoIPoolF', 'RoIAlign'}, \ 'Unknown pooling method: {}'.format(method) has_argmax = (method == 'RoIPoolF') if isinstance(blobs_in, list): # FPN case: add RoIFeatureTransform to each FPN level k_max = cfg.FPN.ROI_MAX_LEVEL # coarsest level of pyramid k_min = cfg.FPN.ROI_MIN_LEVEL # finest level of pyramid assert len(blobs_in) == k_max - k_min + 1 bl_out_list = [] for lvl in range(k_min, k_max + 1): bl_in = blobs_in[k_max - lvl] # blobs_in is in reversed order sc = spatial_scale[k_max - lvl] # in reversed order bl_rois = blob_rois + '_fpn' + str(lvl) bl_out = blob_out + '_fpn' + str(lvl) bl_out_list.append(bl_out) bl_argmax = ['_argmax_' + bl_out] if has_argmax else [] self.net.__getattr__(method)( [bl_in, bl_rois], [bl_out] + bl_argmax, pooled_w=resolution, pooled_h=resolution, spatial_scale=sc, sampling_ratio=sampling_ratio ) # The pooled features from all levels are concatenated along the # batch dimension into a single 4D tensor. xform_shuffled, _ = self.net.Concat( bl_out_list, [blob_out + '_shuffled', '_concat_' + blob_out], axis=0 ) # Unshuffle to match rois from dataloader restore_bl = blob_rois + '_idx_restore_int32' xform_out = self.net.BatchPermutation( [xform_shuffled, restore_bl], blob_out ) else: # Single feature level bl_argmax = ['_argmax_' + blob_out] if has_argmax else [] # sampling_ratio is ignored for RoIPoolF xform_out = self.net.__getattr__(method)( [blobs_in, blob_rois], [blob_out] + bl_argmax, pooled_w=resolution, pooled_h=resolution, spatial_scale=spatial_scale, sampling_ratio=sampling_ratio ) # Only return the first blob (the transformed features) return xform_out 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465

4、加入kcnn的head 顺序类似上面，先搭网络，再加输出和损失

转载请注明原文地址: https://www.6miu.com/read-2623214.html

技术

最新回复(0)