新增cornernet注释

pi-pi · pi-pi · commit b4ac5b093b8d · 2020-11-10T14:19:53.000+08:00
diff --git a/README.md b/README.md
@@ -57,7 +57,7 @@ mmdetection无疑是非常优异的目标检测框架，但是其整个框架代
 - [x] sabl  
 - [x] reppoints    
 - [x] reppointsv2    
-- [ ] cornernet  
+- [x] cornernet    
 
 
 ## 4 模型仓库
diff --git a/docs/changelog.md b/docs/changelog.md
@@ -1,6 +1,11 @@
 # ChangeLog
 
 ## V0.0.4
+### 2020.11.10
+**(1) 新特性**
+- 新增cornernet代码和注释
+- 新增cornernet文档
+
 ### 2020.11.9
 **(1) 新特性**
 - 新增reppointsv2代码和注释
diff --git a/mmdet/models/dense_heads/corner_head.py b/mmdet/models/dense_heads/corner_head.py
@@ -125,6 +125,7 @@ def __init__(self,
         super(CornerHead, self).__init__()
         self.num_classes = num_classes
         self.in_channels = in_channels
+        # 嵌入向量就1个数就行了
         self.corner_emb_channels = corner_emb_channels
         self.with_corner_emb = self.corner_emb_channels > 0
         self.corner_offset_channels = 2
@@ -405,6 +406,7 @@ def get_targets(self,
                 label = gt_labels[batch_id][box_id]
 
                 # Use coords in the feature level to generate ground truth
+                # 特征图尺度值的浮点坐标值
                 scale_left = left * width_ratio
                 scale_right = right * width_ratio
                 scale_top = top * height_ratio
@@ -413,6 +415,7 @@ def get_targets(self,
                 scale_center_y = center_y * height_ratio
 
                 # Int coords on feature map/ground truth tensor
+                # 取整操作
                 left_idx = int(min(scale_left, width - 1))
                 right_idx = int(min(scale_right, width - 1))
                 top_idx = int(min(scale_top, height - 1))
@@ -432,6 +435,7 @@ def get_targets(self,
                     radius)
 
                 # Generate corner offset
+                # 直接算偏移即可，特征图尺度
                 left_offset = scale_left - left_idx
                 top_offset = scale_top - top_idx
                 right_offset = scale_right - right_idx
@@ -444,6 +448,7 @@ def get_targets(self,
 
                 # Generate corner embedding
                 if with_corner_emb:
+                    # 每一行代表当前gt bbox的两个关键点在特征图上面的坐标
                     corner_match.append([[top_idx, left_idx],
                                          [bottom_idx, right_idx]])
                 # Generate guiding shift
@@ -615,6 +620,7 @@ def loss_single(self, tl_hmp, br_hmp, tl_emb, br_emb, tl_off, br_off,
         # The value of real corner would be 1 in heatmap ground truth.
         # The mask is computed in class agnostic mode and its shape is
         # batch * 1 * width * height.
+        # mask是作为权重计算的，只有正样本位置才是1，其余位置全部是0
         tl_off_mask = gt_tl_hmp.eq(1).sum(1).gt(0).unsqueeze(1).type_as(
             gt_tl_hmp)
         br_off_mask = gt_br_hmp.eq(1).sum(1).gt(0).unsqueeze(1).type_as(
diff --git a/mmdet/models/losses/ae_loss.py b/mmdet/models/losses/ae_loss.py
@@ -34,11 +34,14 @@ def ae_loss_per_image(tl_preds, br_preds, match):
         push_loss = tl_preds.sum() * 0.
     else:
         for m in match:
+            # 同一组
             [tl_y, tl_x], [br_y, br_x] = m
+            # 同一组预测值
             tl_e = tl_preds[:, tl_y, tl_x].view(-1, 1)
             br_e = br_preds[:, br_y, br_x].view(-1, 1)
             tl_list.append(tl_e)
             br_list.append(br_e)
+            # 同一组预测的平均值
             me_list.append((tl_e + br_e) / 2.0)
 
         tl_list = torch.cat(tl_list)
@@ -49,7 +52,7 @@ def ae_loss_per_image(tl_preds, br_preds, match):
 
         # N is object number in image, M is dimension of embedding vector
         N, M = tl_list.size()
-
+        # 拉的loss
         pull_loss = (tl_list - me_list).pow(2) + (br_list - me_list).pow(2)
         pull_loss = pull_loss.sum() / N
 
@@ -58,9 +61,11 @@ def ae_loss_per_image(tl_preds, br_preds, match):
         # confusion matrix of push loss
         conf_mat = me_list.expand((N, N, M)).permute(1, 0, 2) - me_list
         conf_weight = 1 - torch.eye(N).type_as(me_list)
+        # 计算任意组的距离
         conf_mat = conf_weight * (margin - conf_mat.sum(-1).abs())
 
         if N > 1:  # more than one object in current image
+            # 距离要大
             push_loss = F.relu(conf_mat).sum() / (N * (N - 1))
         else:
             push_loss = tl_preds.sum() * 0.
@@ -91,7 +96,9 @@ def forward(self, pred, target, match):
         """Forward function."""
         batch = pred.size(0)
         pull_all, push_all = 0.0, 0.0
+        # 单张图片处理
         for i in range(batch):
+            # match是利用label算出来的分组关系，match[i]里面的list每行代表同一组关键点坐标
             pull, push = ae_loss_per_image(pred[i], target[i], match[i])
 
             pull_all += self.pull_weight * pull