Skip to content

Commit b504ddd

Browse files
Add files via upload
0 parents  commit b504ddd

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

100 files changed

+7673
-0
lines changed

mrcnn/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
140 Bytes
Binary file not shown.
2.79 KB
Binary file not shown.
76 KB
Binary file not shown.
26.3 KB
Binary file not shown.
14.4 KB
Binary file not shown.

mrcnn/config.py

Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,236 @@
1+
"""
2+
Mask R-CNN
3+
Base Configurations class.
4+
5+
Copyright (c) 2017 Matterport, Inc.
6+
Licensed under the MIT License (see LICENSE for details)
7+
Written by Waleed Abdulla
8+
"""
9+
10+
import numpy as np
11+
12+
13+
# Base Configuration Class
14+
# Don't use this class directly. Instead, sub-class it and override
15+
# the configurations you need to change.
16+
17+
class Config(object):
18+
"""Base configuration class. For custom configurations, create a
19+
sub-class that inherits from this one and override properties
20+
that need to be changed.
21+
"""
22+
# Name the configurations. For example, 'COCO', 'Experiment 3', ...etc.
23+
# Useful if your code needs to do things differently depending on which
24+
# experiment is running.
25+
NAME = None # Override in sub-classes
26+
27+
# NUMBER OF GPUs to use. When using only a CPU, this needs to be set to 1.
28+
GPU_COUNT = 1
29+
30+
# Number of images to train with on each GPU. A 12GB GPU can typically
31+
# handle 2 images of 1024x1024px.
32+
# Adjust based on your GPU memory and image sizes. Use the highest
33+
# number that your GPU can handle for best performance.
34+
IMAGES_PER_GPU = 2
35+
36+
# Number of training steps per epoch
37+
# This doesn't need to match the size of the training set. Tensorboard
38+
# updates are saved at the end of each epoch, so setting this to a
39+
# smaller number means getting more frequent TensorBoard updates.
40+
# Validation stats are also calculated at each epoch end and they
41+
# might take a while, so don't set this too small to avoid spending
42+
# a lot of time on validation stats.
43+
STEPS_PER_EPOCH = 1000
44+
45+
# Number of validation steps to run at the end of every training epoch.
46+
# A bigger number improves accuracy of validation stats, but slows
47+
# down the training.
48+
VALIDATION_STEPS = 50
49+
50+
# Backbone network architecture
51+
# Supported values are: resnet50, resnet101.
52+
# You can also provide a callable that should have the signature
53+
# of model.resnet_graph. If you do so, you need to supply a callable
54+
# to COMPUTE_BACKBONE_SHAPE as well
55+
BACKBONE = "resnet101"
56+
57+
# Only useful if you supply a callable to BACKBONE. Should compute
58+
# the shape of each layer of the FPN Pyramid.
59+
# See model.compute_backbone_shapes
60+
COMPUTE_BACKBONE_SHAPE = None
61+
62+
# The strides of each layer of the FPN Pyramid. These values
63+
# are based on a Resnet101 backbone.
64+
BACKBONE_STRIDES = [4, 8, 16, 32, 64]
65+
66+
# Size of the fully-connected layers in the classification graph
67+
FPN_CLASSIF_FC_LAYERS_SIZE = 1024
68+
69+
# Size of the top-down layers used to build the feature pyramid
70+
TOP_DOWN_PYRAMID_SIZE = 256
71+
72+
# Number of classification classes (including background)
73+
NUM_CLASSES = 1 # Override in sub-classes
74+
75+
# Length of square anchor side in pixels
76+
RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)
77+
78+
# Ratios of anchors at each cell (width/height)
79+
# A value of 1 represents a square anchor, and 0.5 is a wide anchor
80+
RPN_ANCHOR_RATIOS = [0.5, 1, 2]
81+
82+
# Anchor stride
83+
# If 1 then anchors are created for each cell in the backbone feature map.
84+
# If 2, then anchors are created for every other cell, and so on.
85+
RPN_ANCHOR_STRIDE = 1
86+
87+
# Non-max suppression threshold to filter RPN proposals.
88+
# You can increase this during training to generate more propsals.
89+
RPN_NMS_THRESHOLD = 0.7
90+
91+
# How many anchors per image to use for RPN training
92+
RPN_TRAIN_ANCHORS_PER_IMAGE = 256
93+
94+
# ROIs kept after tf.nn.top_k and before non-maximum suppression
95+
PRE_NMS_LIMIT = 6000
96+
97+
# ROIs kept after non-maximum suppression (training and inference)
98+
POST_NMS_ROIS_TRAINING = 2000
99+
POST_NMS_ROIS_INFERENCE = 1000
100+
101+
# If enabled, resizes instance masks to a smaller size to reduce
102+
# memory load. Recommended when using high-resolution images.
103+
USE_MINI_MASK = True
104+
MINI_MASK_SHAPE = (56, 56) # (height, width) of the mini-mask
105+
106+
# Input image resizing
107+
# Generally, use the "square" resizing mode for training and predicting
108+
# and it should work well in most cases. In this mode, images are scaled
109+
# up such that the small side is = IMAGE_MIN_DIM, but ensuring that the
110+
# scaling doesn't make the long side > IMAGE_MAX_DIM. Then the image is
111+
# padded with zeros to make it a square so multiple images can be put
112+
# in one batch.
113+
# Available resizing modes:
114+
# none: No resizing or padding. Return the image unchanged.
115+
# square: Resize and pad with zeros to get a square image
116+
# of size [max_dim, max_dim].
117+
# pad64: Pads width and height with zeros to make them multiples of 64.
118+
# If IMAGE_MIN_DIM or IMAGE_MIN_SCALE are not None, then it scales
119+
# up before padding. IMAGE_MAX_DIM is ignored in this mode.
120+
# The multiple of 64 is needed to ensure smooth scaling of feature
121+
# maps up and down the 6 levels of the FPN pyramid (2**6=64).
122+
# crop: Picks random crops from the image. First, scales the image based
123+
# on IMAGE_MIN_DIM and IMAGE_MIN_SCALE, then picks a random crop of
124+
# size IMAGE_MIN_DIM x IMAGE_MIN_DIM. Can be used in training only.
125+
# IMAGE_MAX_DIM is not used in this mode.
126+
IMAGE_RESIZE_MODE = "square"
127+
IMAGE_MIN_DIM = 800
128+
IMAGE_MAX_DIM = 1024
129+
# Minimum scaling ratio. Checked after MIN_IMAGE_DIM and can force further
130+
# up scaling. For example, if set to 2 then images are scaled up to double
131+
# the width and height, or more, even if MIN_IMAGE_DIM doesn't require it.
132+
# However, in 'square' mode, it can be overruled by IMAGE_MAX_DIM.
133+
IMAGE_MIN_SCALE = 0
134+
# Number of color channels per image. RGB = 3, grayscale = 1, RGB-D = 4
135+
# Changing this requires other changes in the code. See the WIKI for more
136+
# details: https://github.com/matterport/Mask_RCNN/wiki
137+
IMAGE_CHANNEL_COUNT = 3
138+
139+
# Image mean (RGB)
140+
MEAN_PIXEL = np.array([123.7, 116.8, 103.9])
141+
142+
# Number of ROIs per image to feed to classifier/mask heads
143+
# The Mask RCNN paper uses 512 but often the RPN doesn't generate
144+
# enough positive proposals to fill this and keep a positive:negative
145+
# ratio of 1:3. You can increase the number of proposals by adjusting
146+
# the RPN NMS threshold.
147+
TRAIN_ROIS_PER_IMAGE = 200
148+
149+
# Percent of positive ROIs used to train classifier/mask heads
150+
ROI_POSITIVE_RATIO = 0.33
151+
152+
# Pooled ROIs
153+
POOL_SIZE = 7
154+
MASK_POOL_SIZE = 14
155+
156+
# Shape of output mask
157+
# To change this you also need to change the neural network mask branch
158+
MASK_SHAPE = [28, 28]
159+
160+
# Maximum number of ground truth instances to use in one image
161+
MAX_GT_INSTANCES = 100
162+
163+
# Bounding box refinement standard deviation for RPN and final detections.
164+
RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
165+
BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
166+
167+
# Max number of final detections
168+
DETECTION_MAX_INSTANCES = 100
169+
170+
# Minimum probability value to accept a detected instance
171+
# ROIs below this threshold are skipped
172+
DETECTION_MIN_CONFIDENCE = 0.7
173+
174+
# Non-maximum suppression threshold for detection
175+
DETECTION_NMS_THRESHOLD = 0.3
176+
177+
# Learning rate and momentum
178+
# The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes
179+
# weights to explode. Likely due to differences in optimizer
180+
# implementation.
181+
LEARNING_RATE = 0.001
182+
LEARNING_MOMENTUM = 0.9
183+
184+
# Weight decay regularization
185+
WEIGHT_DECAY = 0.0001
186+
187+
# Loss weights for more precise optimization.
188+
# Can be used for R-CNN training setup.
189+
LOSS_WEIGHTS = {
190+
"rpn_class_loss": 1.,
191+
"rpn_bbox_loss": 1.,
192+
"mrcnn_class_loss": 1.,
193+
"mrcnn_bbox_loss": 1.,
194+
"mrcnn_mask_loss": 1.
195+
}
196+
197+
# Use RPN ROIs or externally generated ROIs for training
198+
# Keep this True for most situations. Set to False if you want to train
199+
# the head branches on ROI generated by code rather than the ROIs from
200+
# the RPN. For example, to debug the classifier head without having to
201+
# train the RPN.
202+
USE_RPN_ROIS = True
203+
204+
# Train or freeze batch normalization layers
205+
# None: Train BN layers. This is the normal mode
206+
# False: Freeze BN layers. Good when using a small batch size
207+
# True: (don't use). Set layer in training mode even when predicting
208+
TRAIN_BN = False # Defaulting to False since batch size is often small
209+
210+
# Gradient norm clipping
211+
GRADIENT_CLIP_NORM = 5.0
212+
213+
def __init__(self):
214+
"""Set values of computed attributes."""
215+
# Effective batch size
216+
self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT
217+
218+
# Input image size
219+
if self.IMAGE_RESIZE_MODE == "crop":
220+
self.IMAGE_SHAPE = np.array([self.IMAGE_MIN_DIM, self.IMAGE_MIN_DIM,
221+
self.IMAGE_CHANNEL_COUNT])
222+
else:
223+
self.IMAGE_SHAPE = np.array([self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM,
224+
self.IMAGE_CHANNEL_COUNT])
225+
226+
# Image meta data length
227+
# See compose_image_meta() for details
228+
self.IMAGE_META_SIZE = 1 + 3 + 3 + 4 + 1 + self.NUM_CLASSES
229+
230+
def display(self):
231+
"""Display Configuration values."""
232+
print("\nConfigurations:")
233+
for a in dir(self):
234+
if not a.startswith("__") and not callable(getattr(self, a)):
235+
print("{:30} {}".format(a, getattr(self, a)))
236+
print("\n")

0 commit comments

Comments
 (0)