增加目标检测

yizt · yizt · commit 68bb0767f226 · 2020-03-03T17:52:38.000+08:00
diff --git a/detection/Base-RCNN-C4.yaml b/detection/Base-RCNN-C4.yaml
@@ -0,0 +1,18 @@
+MODEL:
+  META_ARCHITECTURE: "GeneralizedRCNN"
+  RPN:
+    PRE_NMS_TOPK_TEST: 6000
+    POST_NMS_TOPK_TEST: 1000
+  ROI_HEADS:
+    NAME: "Res5ROIHeads"
+DATASETS:
+  TRAIN: ("coco_2017_train",)
+  TEST: ("coco_2017_val",)
+SOLVER:
+  IMS_PER_BATCH: 16
+  BASE_LR: 0.02
+  STEPS: (60000, 80000)
+  MAX_ITER: 90000
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+VERSION: 2
diff --git a/detection/__init__.py b/detection/__init__.py
@@ -0,0 +1,7 @@
+# -*- coding: utf-8 -*-
+"""
+ @File    : __init__.py.py
+ @Time    : 2020/2/24 下午10:08
+ @Author  : yizuotian
+ @Description    :
+""" 
diff --git a/detection/demo.py b/detection/demo.py
@@ -0,0 +1,300 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import argparse
+import multiprocessing as mp
+import os
+
+import cv2
+import detectron2.data.transforms as T
+import numpy as np
+import torch
+from detectron2.checkpoint import DetectionCheckpointer
+from detectron2.config import get_cfg
+from detectron2.data import MetadataCatalog
+from detectron2.data.detection_utils import read_image
+from detectron2.modeling import build_model
+from detectron2.utils.logger import setup_logger
+from skimage import io
+from torch import nn
+
+# constants
+WINDOW_NAME = "COCO detections"
+
+
+def setup_cfg(args):
+    # load config from file and command-line arguments
+    cfg = get_cfg()
+    cfg.merge_from_file(args.config_file)
+    cfg.merge_from_list(args.opts)
+    # Set score_threshold for builtin models
+    cfg.MODEL.RETINANET.SCORE_THRESH_TEST = args.confidence_threshold
+    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.confidence_threshold
+    cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = args.confidence_threshold
+    cfg.freeze()
+    return cfg
+
+
+def get_last_conv_name(net):
+    """
+    获取网络的最后一个卷积层的名字
+    :param net:
+    :return:
+    """
+    layer_name = None
+    for name, m in net.named_modules():
+        if isinstance(m, nn.Conv2d):
+            layer_name = name
+    return layer_name
+
+
+class GradCAM(object):
+    """
+    1: 网络不更新梯度,输入需要梯度更新
+    2: 使用目标类别的得分做反向传播
+    """
+
+    def __init__(self, net, layer_name):
+        self.net = net
+        self.layer_name = layer_name
+        self.feature = None
+        self.gradient = None
+        self.net.eval()
+        self.handlers = []
+        self._register_hook()
+
+    def _get_features_hook(self, module, input, output):
+        self.feature = output
+        print("feature shape:{}".format(output.size()))
+
+    def _get_grads_hook(self, module, input_grad, output_grad):
+        """
+
+        :param input_grad: tuple, input_grad[0]: None
+                                   input_grad[1]: weight
+                                   input_grad[2]: bias
+        :param output_grad:tuple,长度为1
+        :return:
+        """
+        self.gradient = output_grad[0]
+
+    def _register_hook(self):
+        for (name, module) in self.net.named_modules():
+            if name == self.layer_name:
+                self.handlers.append(module.register_forward_hook(self._get_features_hook))
+                self.handlers.append(module.register_backward_hook(self._get_grads_hook))
+
+    def remove_handlers(self):
+        for handle in self.handlers:
+            handle.remove()
+
+    def __call__(self, inputs, index=0):
+        """
+
+        :param inputs: {"image": [C,H,W], "height": height, "width": width}
+        :param index: 第几个边框
+        :return:
+        """
+        self.net.zero_grad()
+        output = self.net.inference([inputs])
+        print(output)
+        score = output[0]['instances'].scores[index]
+        proposal_idx = output[0]['instances'].indices[index]  # box来自第几个proposal
+        score.backward()
+
+        gradient = self.gradient[proposal_idx].cpu().data.numpy()  # [C,H,W]
+        weight = np.mean(gradient, axis=(1, 2))  # [C]
+
+        feature = self.feature[0].cpu().data.numpy()  # [C,H,W]
+
+        cam = feature * weight[:, np.newaxis, np.newaxis]  # [C,H,W]
+        cam = np.sum(cam, axis=0)  # [H,W]
+        cam = np.maximum(cam, 0)  # ReLU
+
+        # 数值归一化
+        cam -= np.min(cam)
+        cam /= np.max(cam)
+        # resize to 224*224
+        box = output[0]['instances'].pred_boxes.tensor[index].detach().numpy().astype(np.int32)
+        x1, y1, x2, y2 = box
+        cam = cv2.resize(cam, (x2 - x1, y2 - y1))
+
+        class_id = output[0]['instances'].pred_classes[index].detach().numpy()
+        return cam, box, class_id
+
+
+class GuidedBackPropagation(object):
+
+    def __init__(self, net):
+        self.net = net
+        for (name, module) in self.net.named_modules():
+            if isinstance(module, nn.ReLU):
+                module.register_backward_hook(self.backward_hook)
+        self.net.eval()
+
+    @classmethod
+    def backward_hook(cls, module, grad_in, grad_out):
+        """
+
+        :param module:
+        :param grad_in: tuple,长度为1
+        :param grad_out: tuple,长度为1
+        :return: tuple(new_grad_in,)
+        """
+        return torch.clamp(grad_in[0], min=0.0),
+
+    def __call__(self, inputs, index=0):
+        """
+
+        :param inputs: {"image": [C,H,W], "height": height, "width": width}
+        :param index: 第几个边框
+        :return:
+        """
+        self.net.zero_grad()
+        output = self.net.inference([inputs])
+        score = output[0]['instances'].scores[index]
+        score.backward()
+
+        return inputs['image'].grad[0]  # [3,H,W]
+
+
+def norm_image(image):
+    """
+    标准化图像
+    :param image: [H,W,C]
+    :return:
+    """
+    image = image.copy()
+    image -= np.max(np.min(image), 0)
+    image /= np.max(image)
+    image *= 255.
+    return np.uint8(image)
+
+
+def gen_cam(image, mask):
+    """
+    生成CAM图
+    :param image: [H,W,C],原始图像
+    :param mask: [H,W],范围0~1
+    :return: tuple(cam,heatmap)
+    """
+    # mask转为heatmap
+    heatmap = cv2.applyColorMap(np.uint8(255 * mask), cv2.COLORMAP_JET)
+    heatmap = np.float32(heatmap) / 255
+    heatmap = heatmap[..., ::-1]  # gbr to rgb
+
+    # 合并heatmap到原始图像
+    cam = heatmap + np.float32(image)
+    return norm_image(cam), heatmap
+
+
+def gen_gb(grad):
+    """
+    生guided back propagation 输入图像的梯度
+    :param grad: tensor,[3,H,W]
+    :return:
+    """
+    # 标准化
+    grad = grad.data.numpy()
+    gb = np.transpose(grad, (1, 2, 0))
+    return gb
+
+
+def save_image(image_dicts, input_image_name, network='frcnn', output_dir='./results'):
+    prefix = os.path.splitext(input_image_name)[0]
+    for key, image in image_dicts.items():
+        io.imsave(os.path.join(output_dir, '{}-{}-{}.jpg'.format(prefix, network, key)), image)
+
+
+def get_parser():
+    parser = argparse.ArgumentParser(description="Detectron2 demo for builtin models")
+    parser.add_argument(
+        "--config-file",
+        default="configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml",
+        metavar="FILE",
+        help="path to config file",
+    )
+    parser.add_argument("--input", help="A list of space separated input images")
+    parser.add_argument(
+        "--output",
+        help="A file or directory to save output visualizations. "
+             "If not given, will show output in an OpenCV window.",
+    )
+
+    parser.add_argument(
+        "--confidence-threshold",
+        type=float,
+        default=0.5,
+        help="Minimum score for instance predictions to be shown",
+    )
+    parser.add_argument(
+        "--opts",
+        help="Modify config options using the command-line 'KEY VALUE' pairs",
+        default=[],
+        nargs=argparse.REMAINDER,
+    )
+    return parser
+
+
+if __name__ == "__main__":
+    """
+    Usage:export KMP_DUPLICATE_LIB_OK=TRUE
+    python detection/demo.py --config-file detection/faster_rcnn_R_50_C4.yaml \
+      --input ./examples/pic1.jpg \
+      --opts MODEL.WEIGHTS /Users/yizuotian/pretrained_model/model_final_b1acc2.pkl MODEL.DEVICE cpu
+    """
+    mp.set_start_method("spawn", force=True)
+    args = get_parser().parse_args()
+    setup_logger(name="fvcore")
+    logger = setup_logger()
+    logger.info("Arguments: " + str(args))
+
+    cfg = setup_cfg(args)
+    print(cfg)
+    # 构建模型
+    model = build_model(cfg)
+    # 加载权重
+    checkpointer = DetectionCheckpointer(model)
+    checkpointer.load(cfg.MODEL.WEIGHTS)
+
+    # 加载图像
+    path = os.path.expanduser(args.input)
+    original_image = read_image(path, format="BGR")
+    height, width = original_image.shape[:2]
+    transform_gen = T.ResizeShortestEdge(
+        [cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST], cfg.INPUT.MAX_SIZE_TEST
+    )
+    image = transform_gen.get_transform(original_image).apply_image(original_image)
+    image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1)).requires_grad_(True)
+
+    inputs = {"image": image, "height": height, "width": width}
+
+    # Grad-CAM
+    layer_name = get_last_conv_name(model)
+    grad_cam = GradCAM(model, layer_name)
+    mask, box, class_id = grad_cam(inputs)  # cam mask
+    grad_cam.remove_handlers()
+    #
+    image_dict = {}
+    img = original_image[..., ::-1]
+    x1, y1, x2, y2 = box
+    image_dict['predict_box'] = img[y1:y2, x1:x2]
+    image_cam, image_dict['heatmap'] = gen_cam(img[y1:y2, x1:x2], mask)
+
+    # 获取类别名称
+    meta = MetadataCatalog.get(
+        cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused"
+    )
+    label = meta.thing_classes[class_id]
+
+    print("label:{}".format(label))
+    # GuidedBackPropagation
+    # gbp = GuidedBackPropagation(model)
+    # inputs['image'].grad.zero_()  # 梯度置零
+    # grad = gbp(inputs)
+    # print("grad.shape:{}".format(grad.shape))
+    # gb = gen_gb(grad)
+    # image_dict['gb'] = gb
+    # 生成Guided Grad-CAM
+    # cam_gb = gb * mask[..., np.newaxis]
+    # image_dict['cam_gb'] = norm_image(cam_gb)
+
+    save_image(image_dict, os.path.basename(path))
diff --git a/detection/faster_rcnn_R_50_C4.yaml b/detection/faster_rcnn_R_50_C4.yaml
@@ -0,0 +1,18 @@
+_BASE_: "./Base-RCNN-C4.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: False
+  RESNETS:
+    DEPTH: 50
+  ROI_HEADS:
+    NUM_CLASSES: 20
+INPUT:
+  MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
+  MIN_SIZE_TEST: 800
+DATASETS:
+  TRAIN: ('voc_2007_trainval', 'voc_2012_trainval')
+  TEST: ('voc_2007_test',)
+SOLVER:
+  STEPS: (12000, 16000)
+  MAX_ITER: 18000  # 17.4 epochs
+  WARMUP_ITERS: 100