|
| 1 | +""" |
| 2 | +Mask R-CNN |
| 3 | +Base Configurations class. |
| 4 | +
|
| 5 | +Copyright (c) 2017 Matterport, Inc. |
| 6 | +Licensed under the MIT License (see LICENSE for details) |
| 7 | +Written by Waleed Abdulla |
| 8 | +""" |
| 9 | + |
| 10 | +import numpy as np |
| 11 | + |
| 12 | + |
| 13 | +# Base Configuration Class |
| 14 | +# Don't use this class directly. Instead, sub-class it and override |
| 15 | +# the configurations you need to change. |
| 16 | + |
| 17 | +class Config(object): |
| 18 | + """Base configuration class. For custom configurations, create a |
| 19 | + sub-class that inherits from this one and override properties |
| 20 | + that need to be changed. |
| 21 | + """ |
| 22 | + # Name the configurations. For example, 'COCO', 'Experiment 3', ...etc. |
| 23 | + # Useful if your code needs to do things differently depending on which |
| 24 | + # experiment is running. |
| 25 | + NAME = None # Override in sub-classes |
| 26 | + |
| 27 | + # NUMBER OF GPUs to use. When using only a CPU, this needs to be set to 1. |
| 28 | + GPU_COUNT = 1 |
| 29 | + |
| 30 | + # Number of images to train with on each GPU. A 12GB GPU can typically |
| 31 | + # handle 2 images of 1024x1024px. |
| 32 | + # Adjust based on your GPU memory and image sizes. Use the highest |
| 33 | + # number that your GPU can handle for best performance. |
| 34 | + IMAGES_PER_GPU = 2 |
| 35 | + |
| 36 | + # Number of training steps per epoch |
| 37 | + # This doesn't need to match the size of the training set. Tensorboard |
| 38 | + # updates are saved at the end of each epoch, so setting this to a |
| 39 | + # smaller number means getting more frequent TensorBoard updates. |
| 40 | + # Validation stats are also calculated at each epoch end and they |
| 41 | + # might take a while, so don't set this too small to avoid spending |
| 42 | + # a lot of time on validation stats. |
| 43 | + STEPS_PER_EPOCH = 1000 |
| 44 | + |
| 45 | + # Number of validation steps to run at the end of every training epoch. |
| 46 | + # A bigger number improves accuracy of validation stats, but slows |
| 47 | + # down the training. |
| 48 | + VALIDATION_STEPS = 50 |
| 49 | + |
| 50 | + # Backbone network architecture |
| 51 | + # Supported values are: resnet50, resnet101. |
| 52 | + # You can also provide a callable that should have the signature |
| 53 | + # of model.resnet_graph. If you do so, you need to supply a callable |
| 54 | + # to COMPUTE_BACKBONE_SHAPE as well |
| 55 | + BACKBONE = "resnet101" |
| 56 | + |
| 57 | + # Only useful if you supply a callable to BACKBONE. Should compute |
| 58 | + # the shape of each layer of the FPN Pyramid. |
| 59 | + # See model.compute_backbone_shapes |
| 60 | + COMPUTE_BACKBONE_SHAPE = None |
| 61 | + |
| 62 | + # The strides of each layer of the FPN Pyramid. These values |
| 63 | + # are based on a Resnet101 backbone. |
| 64 | + BACKBONE_STRIDES = [4, 8, 16, 32, 64] |
| 65 | + |
| 66 | + # Size of the fully-connected layers in the classification graph |
| 67 | + FPN_CLASSIF_FC_LAYERS_SIZE = 1024 |
| 68 | + |
| 69 | + # Size of the top-down layers used to build the feature pyramid |
| 70 | + TOP_DOWN_PYRAMID_SIZE = 256 |
| 71 | + |
| 72 | + # Number of classification classes (including background) |
| 73 | + NUM_CLASSES = 1 # Override in sub-classes |
| 74 | + |
| 75 | + # Length of square anchor side in pixels |
| 76 | + RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512) |
| 77 | + |
| 78 | + # Ratios of anchors at each cell (width/height) |
| 79 | + # A value of 1 represents a square anchor, and 0.5 is a wide anchor |
| 80 | + RPN_ANCHOR_RATIOS = [0.5, 1, 2] |
| 81 | + |
| 82 | + # Anchor stride |
| 83 | + # If 1 then anchors are created for each cell in the backbone feature map. |
| 84 | + # If 2, then anchors are created for every other cell, and so on. |
| 85 | + RPN_ANCHOR_STRIDE = 1 |
| 86 | + |
| 87 | + # Non-max suppression threshold to filter RPN proposals. |
| 88 | + # You can increase this during training to generate more propsals. |
| 89 | + RPN_NMS_THRESHOLD = 0.7 |
| 90 | + |
| 91 | + # How many anchors per image to use for RPN training |
| 92 | + RPN_TRAIN_ANCHORS_PER_IMAGE = 256 |
| 93 | + |
| 94 | + # ROIs kept after tf.nn.top_k and before non-maximum suppression |
| 95 | + PRE_NMS_LIMIT = 6000 |
| 96 | + |
| 97 | + # ROIs kept after non-maximum suppression (training and inference) |
| 98 | + POST_NMS_ROIS_TRAINING = 2000 |
| 99 | + POST_NMS_ROIS_INFERENCE = 1000 |
| 100 | + |
| 101 | + # If enabled, resizes instance masks to a smaller size to reduce |
| 102 | + # memory load. Recommended when using high-resolution images. |
| 103 | + USE_MINI_MASK = True |
| 104 | + MINI_MASK_SHAPE = (56, 56) # (height, width) of the mini-mask |
| 105 | + |
| 106 | + # Input image resizing |
| 107 | + # Generally, use the "square" resizing mode for training and predicting |
| 108 | + # and it should work well in most cases. In this mode, images are scaled |
| 109 | + # up such that the small side is = IMAGE_MIN_DIM, but ensuring that the |
| 110 | + # scaling doesn't make the long side > IMAGE_MAX_DIM. Then the image is |
| 111 | + # padded with zeros to make it a square so multiple images can be put |
| 112 | + # in one batch. |
| 113 | + # Available resizing modes: |
| 114 | + # none: No resizing or padding. Return the image unchanged. |
| 115 | + # square: Resize and pad with zeros to get a square image |
| 116 | + # of size [max_dim, max_dim]. |
| 117 | + # pad64: Pads width and height with zeros to make them multiples of 64. |
| 118 | + # If IMAGE_MIN_DIM or IMAGE_MIN_SCALE are not None, then it scales |
| 119 | + # up before padding. IMAGE_MAX_DIM is ignored in this mode. |
| 120 | + # The multiple of 64 is needed to ensure smooth scaling of feature |
| 121 | + # maps up and down the 6 levels of the FPN pyramid (2**6=64). |
| 122 | + # crop: Picks random crops from the image. First, scales the image based |
| 123 | + # on IMAGE_MIN_DIM and IMAGE_MIN_SCALE, then picks a random crop of |
| 124 | + # size IMAGE_MIN_DIM x IMAGE_MIN_DIM. Can be used in training only. |
| 125 | + # IMAGE_MAX_DIM is not used in this mode. |
| 126 | + IMAGE_RESIZE_MODE = "square" |
| 127 | + IMAGE_MIN_DIM = 800 |
| 128 | + IMAGE_MAX_DIM = 1024 |
| 129 | + # Minimum scaling ratio. Checked after MIN_IMAGE_DIM and can force further |
| 130 | + # up scaling. For example, if set to 2 then images are scaled up to double |
| 131 | + # the width and height, or more, even if MIN_IMAGE_DIM doesn't require it. |
| 132 | + # However, in 'square' mode, it can be overruled by IMAGE_MAX_DIM. |
| 133 | + IMAGE_MIN_SCALE = 0 |
| 134 | + # Number of color channels per image. RGB = 3, grayscale = 1, RGB-D = 4 |
| 135 | + # Changing this requires other changes in the code. See the WIKI for more |
| 136 | + # details: https://github.com/matterport/Mask_RCNN/wiki |
| 137 | + IMAGE_CHANNEL_COUNT = 3 |
| 138 | + |
| 139 | + # Image mean (RGB) |
| 140 | + MEAN_PIXEL = np.array([123.7, 116.8, 103.9]) |
| 141 | + |
| 142 | + # Number of ROIs per image to feed to classifier/mask heads |
| 143 | + # The Mask RCNN paper uses 512 but often the RPN doesn't generate |
| 144 | + # enough positive proposals to fill this and keep a positive:negative |
| 145 | + # ratio of 1:3. You can increase the number of proposals by adjusting |
| 146 | + # the RPN NMS threshold. |
| 147 | + TRAIN_ROIS_PER_IMAGE = 200 |
| 148 | + |
| 149 | + # Percent of positive ROIs used to train classifier/mask heads |
| 150 | + ROI_POSITIVE_RATIO = 0.33 |
| 151 | + |
| 152 | + # Pooled ROIs |
| 153 | + POOL_SIZE = 7 |
| 154 | + MASK_POOL_SIZE = 14 |
| 155 | + |
| 156 | + # Shape of output mask |
| 157 | + # To change this you also need to change the neural network mask branch |
| 158 | + MASK_SHAPE = [28, 28] |
| 159 | + |
| 160 | + # Maximum number of ground truth instances to use in one image |
| 161 | + MAX_GT_INSTANCES = 100 |
| 162 | + |
| 163 | + # Bounding box refinement standard deviation for RPN and final detections. |
| 164 | + RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) |
| 165 | + BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) |
| 166 | + |
| 167 | + # Max number of final detections |
| 168 | + DETECTION_MAX_INSTANCES = 100 |
| 169 | + |
| 170 | + # Minimum probability value to accept a detected instance |
| 171 | + # ROIs below this threshold are skipped |
| 172 | + DETECTION_MIN_CONFIDENCE = 0.7 |
| 173 | + |
| 174 | + # Non-maximum suppression threshold for detection |
| 175 | + DETECTION_NMS_THRESHOLD = 0.3 |
| 176 | + |
| 177 | + # Learning rate and momentum |
| 178 | + # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes |
| 179 | + # weights to explode. Likely due to differences in optimizer |
| 180 | + # implementation. |
| 181 | + LEARNING_RATE = 0.001 |
| 182 | + LEARNING_MOMENTUM = 0.9 |
| 183 | + |
| 184 | + # Weight decay regularization |
| 185 | + WEIGHT_DECAY = 0.0001 |
| 186 | + |
| 187 | + # Loss weights for more precise optimization. |
| 188 | + # Can be used for R-CNN training setup. |
| 189 | + LOSS_WEIGHTS = { |
| 190 | + "rpn_class_loss": 1., |
| 191 | + "rpn_bbox_loss": 1., |
| 192 | + "mrcnn_class_loss": 1., |
| 193 | + "mrcnn_bbox_loss": 1., |
| 194 | + "mrcnn_mask_loss": 1. |
| 195 | + } |
| 196 | + |
| 197 | + # Use RPN ROIs or externally generated ROIs for training |
| 198 | + # Keep this True for most situations. Set to False if you want to train |
| 199 | + # the head branches on ROI generated by code rather than the ROIs from |
| 200 | + # the RPN. For example, to debug the classifier head without having to |
| 201 | + # train the RPN. |
| 202 | + USE_RPN_ROIS = True |
| 203 | + |
| 204 | + # Train or freeze batch normalization layers |
| 205 | + # None: Train BN layers. This is the normal mode |
| 206 | + # False: Freeze BN layers. Good when using a small batch size |
| 207 | + # True: (don't use). Set layer in training mode even when predicting |
| 208 | + TRAIN_BN = False # Defaulting to False since batch size is often small |
| 209 | + |
| 210 | + # Gradient norm clipping |
| 211 | + GRADIENT_CLIP_NORM = 5.0 |
| 212 | + |
| 213 | + def __init__(self): |
| 214 | + """Set values of computed attributes.""" |
| 215 | + # Effective batch size |
| 216 | + self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT |
| 217 | + |
| 218 | + # Input image size |
| 219 | + if self.IMAGE_RESIZE_MODE == "crop": |
| 220 | + self.IMAGE_SHAPE = np.array([self.IMAGE_MIN_DIM, self.IMAGE_MIN_DIM, |
| 221 | + self.IMAGE_CHANNEL_COUNT]) |
| 222 | + else: |
| 223 | + self.IMAGE_SHAPE = np.array([self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, |
| 224 | + self.IMAGE_CHANNEL_COUNT]) |
| 225 | + |
| 226 | + # Image meta data length |
| 227 | + # See compose_image_meta() for details |
| 228 | + self.IMAGE_META_SIZE = 1 + 3 + 3 + 4 + 1 + self.NUM_CLASSES |
| 229 | + |
| 230 | + def display(self): |
| 231 | + """Display Configuration values.""" |
| 232 | + print("\nConfigurations:") |
| 233 | + for a in dir(self): |
| 234 | + if not a.startswith("__") and not callable(getattr(self, a)): |
| 235 | + print("{:30} {}".format(a, getattr(self, a))) |
| 236 | + print("\n") |
0 commit comments