FateScript · Wang-zipeng · Feb 8, 2021 · Feb 8, 2021 · Feb 9, 2021 · Mar 1, 2021
diff --git a/dl_lib/data/datasets/builtin.py b/dl_lib/data/datasets/builtin.py
@@ -53,6 +53,13 @@
                           "coco/annotations/instances_val2017_100.json"),
 }
 
+_PREDEFINED_SPLITS_COCO["multi_metal"] = {
+    "multi_metal_coco_2014_train":
+    ("train2014", "annotations/instances_train2014.json"),
+    "multi_metal_coco_2014_val":
+    ("val2014", "annotations/instances_val2014.json"),
+}
+
 
 def register_all_coco(root=osp.join(
         osp.split(osp.split(dl_lib.__file__)[0])[0], "datasets")):
@@ -87,5 +94,5 @@ def register_all_pascal_voc(root=osp.join(
 
 
 # Register them all under "./datasets"
-register_all_coco()
+register_all_coco(root=r'E:\dataset\uncompressed')
 register_all_pascal_voc()
diff --git a/dl_lib/data/datasets/builtin_meta.py b/dl_lib/data/datasets/builtin_meta.py
@@ -140,6 +140,18 @@
     {"color": [250, 141, 255], "isthing": 0, "id": 200, "name": "rug-merged"},
 ]
 
+MULTI_METAL_COCO_CATEGORIES = [
+    {"color": [220, 20, 60], "isthing": 1, "id": 1, "name": "flat"},
+    {"color": [119, 11, 32], "isthing": 1, "id": 2, "name": "flat_back"},
+    {"color": [0, 0, 142], "isthing": 1, "id": 3, "name": "four_flat"},
+    {"color": [0, 0, 230], "isthing": 1, "id": 4, "name": "four_hole"},
+    {"color": [106, 0, 228], "isthing": 1, "id": 5, "name": "metal_three"},
+    {"color": [0, 60, 100], "isthing": 1, "id": 6, "name": "metal_three_back"},
+    {"color": [0, 80, 100], "isthing": 1, "id": 7, "name": "one_hole_back"},
+    {"color": [0, 0, 70], "isthing": 1, "id": 8, "name": "one_hole_front"},
+    {"color": [0, 0, 192], "isthing": 1, "id": 9, "name": "two_back"},
+    {"color": [250, 170, 30], "isthing": 1, "id": 10, "name": "two_front"},
+]
 
 def _get_coco_instances_meta():
     thing_ids = [k["id"] for k in COCO_CATEGORIES if k["isthing"] == 1]
@@ -155,6 +167,20 @@ def _get_coco_instances_meta():
     }
     return ret
 
+def _get_multi_metal_coco_instances_meta():
+    thing_ids = [k["id"] for k in MULTI_METAL_COCO_CATEGORIES if k["isthing"] == 1]
+    thing_colors = [k["color"] for k in MULTI_METAL_COCO_CATEGORIES if k["isthing"] == 1]
+    assert len(thing_ids) == 10, len(thing_ids)
+    # Mapping from the incontiguous COCO category id to an id in [0, 79]
+    thing_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(thing_ids)}
+    thing_classes = [k["name"] for k in MULTI_METAL_COCO_CATEGORIES if k["isthing"] == 1]
+    ret = {
+        "thing_dataset_id_to_contiguous_id": thing_dataset_id_to_contiguous_id,
+        "thing_classes": thing_classes,
+        "thing_colors": thing_colors,
+    }
+    return ret
+
 
 def _get_builtin_metadata(dataset_name):
     if dataset_name == "coco":
@@ -175,4 +201,6 @@ def _get_builtin_metadata(dataset_name):
             "thing_classes": CITYSCAPES_THING_CLASSES,
             "stuff_classes": CITYSCAPES_STUFF_CLASSES,
         }
+    elif dataset_name == "multi_metal":
+        return _get_multi_metal_coco_instances_meta()
     raise KeyError("No built-in metadata for dataset {}".format(dataset_name))
diff --git a/dl_lib/engine/defaults.py b/dl_lib/engine/defaults.py
@@ -12,6 +12,7 @@
 import argparse
 import logging
 import os
+import sys
 from collections import OrderedDict
 
 import torch
@@ -66,7 +67,7 @@ def default_argument_parser():
     # PyTorch still may leave orphan processes in multi-gpu training.
     # Therefore we use a deterministic way to obtain port,
     # so that users are aware of orphan processes by seeing the port occupied.
-    port = 2 ** 15 + 2 ** 14 + hash(os.getuid()) % 2 ** 14
+    port = 2 ** 15 + 2 ** 14 + hash(1 if sys.platform == "win32" else os.getuid()) % 2 ** 14
     parser.add_argument("--dist-url", default="tcp://127.0.0.1:{}".format(port))
     parser.add_argument(
         "opts",

diff --git a/dl_lib/evaluation/coco_evaluation.py b/dl_lib/evaluation/coco_evaluation.py
@@ -88,7 +88,8 @@ def _tasks_from_config(self, cfg):
 
         tasks = ("bbox",)
         if cfg.MODEL.MASK_ON:
-            tasks = tasks + ("segm",)
+            #tasks = tasks + ("segm",)
+            pass
         if cfg.MODEL.KEYPOINT_ON:
             tasks = tasks + ("keypoints",)
         return tasks

diff --git a/dl_lib/evaluation/evaluator.py b/dl_lib/evaluation/evaluator.py
@@ -101,7 +101,8 @@ def inference_on_dataset(model, data_loader, evaluator):
     Returns:
         The return value of `evaluator.evaluate()`
     """
-    num_devices = torch.distributed.get_world_size() if torch.distributed.is_initialized() else 1
+    #num_devices = torch.distributed.get_world_size() if torch.distributed.is_initialized() else 1
+    num_devices = 1
     logger = logging.getLogger(__name__)
     logger.info("Start inference on {} images".format(len(data_loader)))
 
@@ -120,6 +121,7 @@ def inference_on_dataset(model, data_loader, evaluator):
 
             start_compute_time = time.time()
             outputs = model(inputs)
+            draw_result(inputs, outputs)
             if torch.cuda.is_available():
                 torch.cuda.synchronize()
             total_compute_time += time.time() - start_compute_time
@@ -160,6 +162,19 @@ def inference_on_dataset(model, data_loader, evaluator):
         results = {}
     return results
 
+def draw_result(inputs, outputs):
+    import cv2
+    for input, output in zip(inputs, outputs):
+        file_name = input['file_name']
+        image = cv2.imread(file_name)
+        pred_segmentation = output['instances'].get('pred_segmentation')
+        pred_bbox = output['instances'].get('pred_boxes').tensor
+        for segmentation, bbox in zip(pred_segmentation, pred_bbox):
+            for idx in range(0, segmentation.shape[0], 2):
+                cv2.circle(image, (int(segmentation[idx]), int(segmentation[idx+1])), 2, (0, 255, 0), 0)
+            cv2.rectangle(image, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2)
+            cv2.imwrite(r'D:\project\COCO_MetalMulti\result\result.jpg', image)
+
 
 @contextmanager
 def inference_context(model):

diff --git a/dl_lib/layers/ROIAlign/ROIAlign_cuda.cu b/dl_lib/layers/ROIAlign/ROIAlign_cuda.cu
@@ -307,6 +307,10 @@ __global__ void RoIAlignBackwardFeature(
 
 namespace dl_lib {
 
+int ceil_div(int a, int b){
+	return  (a + b - 1) / b;
+}
+
 at::Tensor ROIAlign_forward_cuda(
     const at::Tensor& input,
     const at::Tensor& rois,
@@ -334,7 +338,9 @@ at::Tensor ROIAlign_forward_cuda(
   auto output_size = num_rois * pooled_height * pooled_width * channels;
   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
 
-  dim3 grid(std::min(at::cuda::ATenCeilDiv(output_size, 512L), 4096L));
+  dim3 grid(std::min(at::cuda::ATenCeilDiv(static_cast<int64_t>(output_size),
+                                           static_cast<int64_t>(512)),
+                     static_cast<int64_t>(4096)));
   dim3 block(512);
 
   if (output.numel() == 0) {
@@ -390,7 +396,9 @@ at::Tensor ROIAlign_backward_cuda(
 
   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
 
-  dim3 grid(std::min(at::cuda::ATenCeilDiv(grad.numel(), 512L), 4096L));
+  dim3 grid(std::min(at::cuda::ATenCeilDiv(static_cast<int64_t>(grad.numel()),
+                                           static_cast<int64_t>(512)),
+                     static_cast<int64_t>(4096)));
   dim3 block(512);
 
   // handle possibly empty gradients

diff --git a/dl_lib/network/centernet.py b/dl_lib/network/centernet.py
@@ -10,6 +10,8 @@
 from .generator import CenterNetDecoder, CenterNetGT
 from .loss import modified_focal_loss, reg_l1_loss
 
+import matplotlib.pyplot as plt
+
 
 class CenterNet(nn.Module):
     """
@@ -73,9 +75,9 @@ def forward(self, batched_inputs):
 
         gt_dict = self.get_ground_truth(batched_inputs)
 
-        return self.losses(pred_dict, gt_dict)
+        return self.losses(pred_dict, gt_dict, images)
 
-    def losses(self, pred_dict, gt_dict):
+    def losses(self, pred_dict, gt_dict, images):
         r"""
         calculate losses of pred and gt
 
@@ -107,19 +109,37 @@ def losses(self, pred_dict, gt_dict):
         index = gt_dict['index']
         index = index.to(torch.long)
         # width and height loss, better version
-        loss_wh = reg_l1_loss(pred_dict['wh'], mask, index, gt_dict['wh'])
+        loss_wh, _, _ = reg_l1_loss(pred_dict['wh'], mask, index, gt_dict['wh'])
 
         # regression loss
-        loss_reg = reg_l1_loss(pred_dict['reg'], mask, index, gt_dict['reg'])
+        loss_reg, _, _ = reg_l1_loss(pred_dict['reg'], mask, index, gt_dict['reg'])
+        loss_segmentation_x, pred_x_s, gt_x_s = reg_l1_loss(pred_dict['segmentation_x'], mask, index, gt_dict['segmentation_x'])
+        loss_segmentation_y, pred_y_s, gt_y_s = reg_l1_loss(pred_dict['segmentation_y'], mask, index, gt_dict['segmentation_y'])
+
+
+        for pred_x, gt_x, pred_y, gt_y in zip(pred_x_s[0], gt_x_s[0], pred_y_s[0], gt_y_s[0]):
+            pred_x = pred_x.cpu().data.numpy() * 512
+            gt_x = gt_x.cpu().data.numpy() * 512
+            pred_y = pred_y.cpu().data.numpy() * 512
+            gt_y = gt_y.cpu().data.numpy() * 512
+            # plt.scatter(i[:, 1], i[:, 0], color='b')
+            plt.imshow(np.transpose(images[0].cpu().data.numpy(), (1, 2, 0)))
+            plt.scatter(gt_x, gt_y, color='g')
+            plt.scatter(pred_x, pred_y, color='r')
+            plt.show()
 
         loss_cls *= self.cfg.MODEL.LOSS.CLS_WEIGHT
         loss_wh *= self.cfg.MODEL.LOSS.WH_WEIGHT
         loss_reg *= self.cfg.MODEL.LOSS.REG_WEIGHT
+        loss_segmentation_x *= self.cfg.MODEL.LOSS.SEG_WEIGHT
+        loss_segmentation_y *= self.cfg.MODEL.LOSS.SEG_WEIGHT
 
         loss = {
             "loss_cls": loss_cls,
             "loss_box_wh": loss_wh,
             "loss_center_reg": loss_reg,
+            "loss_segmentation_x": loss_segmentation_x,
+            "loss_segmentation_y": loss_segmentation_y,
         }
         # print(loss)
         return loss
@@ -168,16 +188,21 @@ def decode_prediction(self, pred_dict, img_info):
         fmap = pred_dict["cls"]
         reg = pred_dict["reg"]
         wh = pred_dict["wh"]
+        segmentation_x = pred_dict["segmentation_x"] if 'segmentation_x' in pred_dict else None
+        segmentation_y = pred_dict["segmentation_y"] if 'segmentation_y' in pred_dict else None
+        segmentation = (segmentation_x, segmentation_y) if segmentation_x is not None and segmentation_y is not None \
+            else None
 
-        boxes, scores, classes = CenterNetDecoder.decode(fmap, wh, reg)
+        boxes, scores, classes, segmentation = CenterNetDecoder.decode(fmap, wh, reg, segmentation=segmentation)
         # boxes = Boxes(boxes.reshape(boxes.shape[-2:]))
         scores = scores.reshape(-1)
         classes = classes.reshape(-1).to(torch.int64)
 
         # dets = CenterNetDecoder.decode(fmap, wh, reg)
         boxes = CenterNetDecoder.transform_boxes(boxes, img_info)
         boxes = Boxes(boxes)
-        return dict(pred_boxes=boxes, scores=scores, pred_classes=classes)
+        segmentation = CenterNetDecoder.transform_segmentation(segmentation, img_info)
+        return dict(pred_boxes=boxes, scores=scores, pred_classes=classes, pred_segmentation=segmentation)
 
     def preprocess_image(self, batched_inputs):
         """

diff --git a/dl_lib/network/generator/centernet_decode.py b/dl_lib/network/generator/centernet_decode.py
@@ -14,7 +14,7 @@
 class CenterNetDecoder(object):
 
     @staticmethod
-    def decode(fmap, wh, reg=None, cat_spec_wh=False, K=100):
+    def decode(fmap, wh, reg=None, cat_spec_wh=False, K=100, segmentation=None):
         r"""
         decode output feature map to detection results
 
@@ -47,6 +47,16 @@ def decode(fmap, wh, reg=None, cat_spec_wh=False, K=100):
         else:
             wh = wh.reshape(batch, K, 2)
 
+        if segmentation is not None:
+            segmentation_x = gather_feature(segmentation[0], index, use_transform=True)
+            segmentation_y = gather_feature(segmentation[1], index, use_transform=True)
+            batch_size = segmentation_x.shape[0]
+            objects_num = segmentation_x.shape[1]
+            points_num = segmentation_x.shape[2]
+            segmentation = torch.zeros((batch_size, objects_num, points_num*2))
+            segmentation[:, :, 0::2] = segmentation_x
+            segmentation[:, :, 1::2] = segmentation_y
+
         clses  = clses.reshape(batch, K, 1).float()
         scores = scores.reshape(batch, K, 1)
 
@@ -55,7 +65,7 @@ def decode(fmap, wh, reg=None, cat_spec_wh=False, K=100):
                             xs + half_w, ys + half_h],
                            dim=2)
 
-        detections = (bboxes, scores, clses)
+        detections = (bboxes, scores, clses, segmentation)
 
         return detections
 
@@ -82,6 +92,29 @@ def transform_boxes(boxes, img_info, scale=1):
         target_boxes = np.dot(aug_coords, trans.T).reshape(-1, 4)
         return target_boxes
 
+    @staticmethod
+    def transform_segmentation(boxes, img_info, scale=1):
+        r"""
+        transform predicted boxes to target boxes
+
+        Args:
+            boxes(Tensor): torch Tensor with (Batch, N, 4) shape
+            img_info(dict): dict contains all information of original image
+            scale(float): used for multiscale testing
+        """
+        boxes = boxes.cpu().numpy().reshape(-1, 8)
+
+        center = img_info['center']
+        size = img_info['size']
+        output_size = (img_info['width'], img_info['height'])
+        src, dst = CenterAffine.generate_src_and_dst(center, size, output_size)
+        trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
+
+        coords = boxes.reshape(-1, 2)
+        aug_coords = np.column_stack((coords, np.ones(coords.shape[0])))
+        target_segmentation = np.dot(aug_coords, trans.T).reshape(-1, 8)
+        return target_segmentation
+
     @staticmethod
     def pseudo_nms(fmap, pool_size=3):
         r"""
@@ -107,13 +140,13 @@ def topk_score(scores, K=40):
         topk_scores, topk_inds = torch.topk(scores.reshape(batch, channel, -1), K)
 
         topk_inds = topk_inds % (height * width)
-        topk_ys = (topk_inds / width).int().float()
+        topk_ys = (topk_inds.true_divide(width)).int().float()
         topk_xs = (topk_inds % width).int().float()
 
         # get all topk in in a batch
         topk_score, index = torch.topk(topk_scores.reshape(batch, -1), K)
         # div by K because index is grouped by K(C x K shape)
-        topk_clses = (index / K).int()
+        topk_clses = (index.true_divide(K)).int()
         topk_inds = gather_feature(topk_inds.view(batch, -1, 1), index).reshape(batch, K)
         topk_ys = gather_feature(topk_ys.reshape(batch, -1, 1), index).reshape(batch, K)
         topk_xs = gather_feature(topk_xs.reshape(batch, -1, 1), index).reshape(batch, K)

diff --git a/dl_lib/network/generator/centernet_gt.py b/dl_lib/network/generator/centernet_gt.py
@@ -15,8 +15,11 @@ def generate(config, batched_input):
         output_size = config.INPUT.OUTPUT_SIZE
         min_overlap = config.MODEL.CENTERNET.MIN_OVERLAP
         tensor_dim = config.MODEL.CENTERNET.TENSOR_DIM
+        num_polygons_points = config.MODEL.CENTERNET.NUM_POLYGON_POINTS
 
         scoremap_list, wh_list, reg_list, reg_mask_list, index_list = [[] for i in range(5)]
+        segmentation_list_x = []
+        segmentation_list_y = []
         for data in batched_input:
             # img_size = (data['height'], data['width'])
 
@@ -28,6 +31,8 @@ def generate(config, batched_input):
             gt_reg = torch.zeros_like(gt_wh)
             reg_mask = torch.zeros(tensor_dim)
             gt_index = torch.zeros(tensor_dim)
+            gt_segmentation_x = torch.ones(tensor_dim, num_polygons_points) * -128
+            gt_segmentation_y = torch.ones(tensor_dim, num_polygons_points) * -128
             # pass
 
             boxes, classes = bbox_dict['gt_boxes'], bbox_dict['gt_classes']
@@ -50,18 +55,37 @@ def generate(config, batched_input):
             )
             gt_wh[:num_boxes] = wh
 
+            masks = bbox_dict['gt_masks']
+            gt_segmentation_x[:num_boxes], gt_segmentation_y[:num_boxes] = \
+                masks.normalized_by_length(box_tensor.numpy(),
+                                           num_polygons_points,
+                                           box_scale)
+            gt_segmentation = torch.zeros(num_boxes, num_polygons_points * 2)
+            gt_segmentation[:, 0::2] = gt_segmentation_x[:num_boxes]
+            gt_segmentation[:, 1::2] = gt_segmentation_y[:num_boxes]
+            import cv2
+            image = data['image'].numpy().transpose((1, 2, 0))
+            image = cv2.resize(image, (128, 128))
+            for bbox in box_tensor:
+                cv2.rectangle(image, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2)
+                cv2.imwrite(r'D:\project\COCO_MetalMulti\result\result.jpg', image)
+
             scoremap_list.append(gt_scoremap)
             wh_list.append(gt_wh)
             reg_list.append(gt_reg)
             reg_mask_list.append(reg_mask)
             index_list.append(gt_index)
+            segmentation_list_x.append(gt_segmentation_x)
+            segmentation_list_y.append(gt_segmentation_y)
 
         gt_dict = {
             "score_map": torch.stack(scoremap_list, dim=0),
             "wh": torch.stack(wh_list, dim=0),
             "reg": torch.stack(reg_list, dim=0),
             "reg_mask": torch.stack(reg_mask_list, dim=0),
             "index": torch.stack(index_list, dim=0),
+            "segmentation_x": torch.stack(segmentation_list_x, dim=0),
+            "segmentation_y": torch.stack(segmentation_list_y, dim=0),
         }
         return gt_dict