Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion dl_lib/data/datasets/builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,13 @@
"coco/annotations/instances_val2017_100.json"),
}

_PREDEFINED_SPLITS_COCO["multi_metal"] = {
"multi_metal_coco_2014_train":
("train2014", "annotations/instances_train2014.json"),
"multi_metal_coco_2014_val":
("val2014", "annotations/instances_val2014.json"),
}


def register_all_coco(root=osp.join(
osp.split(osp.split(dl_lib.__file__)[0])[0], "datasets")):
Expand Down Expand Up @@ -87,5 +94,5 @@ def register_all_pascal_voc(root=osp.join(


# Register them all under "./datasets"
register_all_coco()
register_all_coco(root=r'E:\dataset\uncompressed')
register_all_pascal_voc()
28 changes: 28 additions & 0 deletions dl_lib/data/datasets/builtin_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,18 @@
{"color": [250, 141, 255], "isthing": 0, "id": 200, "name": "rug-merged"},
]

MULTI_METAL_COCO_CATEGORIES = [
{"color": [220, 20, 60], "isthing": 1, "id": 1, "name": "flat"},
{"color": [119, 11, 32], "isthing": 1, "id": 2, "name": "flat_back"},
{"color": [0, 0, 142], "isthing": 1, "id": 3, "name": "four_flat"},
{"color": [0, 0, 230], "isthing": 1, "id": 4, "name": "four_hole"},
{"color": [106, 0, 228], "isthing": 1, "id": 5, "name": "metal_three"},
{"color": [0, 60, 100], "isthing": 1, "id": 6, "name": "metal_three_back"},
{"color": [0, 80, 100], "isthing": 1, "id": 7, "name": "one_hole_back"},
{"color": [0, 0, 70], "isthing": 1, "id": 8, "name": "one_hole_front"},
{"color": [0, 0, 192], "isthing": 1, "id": 9, "name": "two_back"},
{"color": [250, 170, 30], "isthing": 1, "id": 10, "name": "two_front"},
]

def _get_coco_instances_meta():
thing_ids = [k["id"] for k in COCO_CATEGORIES if k["isthing"] == 1]
Expand All @@ -155,6 +167,20 @@ def _get_coco_instances_meta():
}
return ret

def _get_multi_metal_coco_instances_meta():
thing_ids = [k["id"] for k in MULTI_METAL_COCO_CATEGORIES if k["isthing"] == 1]
thing_colors = [k["color"] for k in MULTI_METAL_COCO_CATEGORIES if k["isthing"] == 1]
assert len(thing_ids) == 10, len(thing_ids)
# Mapping from the incontiguous COCO category id to an id in [0, 79]
thing_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(thing_ids)}
thing_classes = [k["name"] for k in MULTI_METAL_COCO_CATEGORIES if k["isthing"] == 1]
ret = {
"thing_dataset_id_to_contiguous_id": thing_dataset_id_to_contiguous_id,
"thing_classes": thing_classes,
"thing_colors": thing_colors,
}
return ret


def _get_builtin_metadata(dataset_name):
if dataset_name == "coco":
Expand All @@ -175,4 +201,6 @@ def _get_builtin_metadata(dataset_name):
"thing_classes": CITYSCAPES_THING_CLASSES,
"stuff_classes": CITYSCAPES_STUFF_CLASSES,
}
elif dataset_name == "multi_metal":
return _get_multi_metal_coco_instances_meta()
raise KeyError("No built-in metadata for dataset {}".format(dataset_name))
3 changes: 2 additions & 1 deletion dl_lib/engine/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import argparse
import logging
import os
import sys
from collections import OrderedDict

import torch
Expand Down Expand Up @@ -66,7 +67,7 @@ def default_argument_parser():
# PyTorch still may leave orphan processes in multi-gpu training.
# Therefore we use a deterministic way to obtain port,
# so that users are aware of orphan processes by seeing the port occupied.
port = 2 ** 15 + 2 ** 14 + hash(os.getuid()) % 2 ** 14
port = 2 ** 15 + 2 ** 14 + hash(1 if sys.platform == "win32" else os.getuid()) % 2 ** 14
parser.add_argument("--dist-url", default="tcp://127.0.0.1:{}".format(port))
parser.add_argument(
"opts",
Expand Down
3 changes: 2 additions & 1 deletion dl_lib/evaluation/coco_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,8 @@ def _tasks_from_config(self, cfg):

tasks = ("bbox",)
if cfg.MODEL.MASK_ON:
tasks = tasks + ("segm",)
#tasks = tasks + ("segm",)
pass
if cfg.MODEL.KEYPOINT_ON:
tasks = tasks + ("keypoints",)
return tasks
Expand Down
17 changes: 16 additions & 1 deletion dl_lib/evaluation/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,8 @@ def inference_on_dataset(model, data_loader, evaluator):
Returns:
The return value of `evaluator.evaluate()`
"""
num_devices = torch.distributed.get_world_size() if torch.distributed.is_initialized() else 1
#num_devices = torch.distributed.get_world_size() if torch.distributed.is_initialized() else 1
num_devices = 1
logger = logging.getLogger(__name__)
logger.info("Start inference on {} images".format(len(data_loader)))

Expand All @@ -120,6 +121,7 @@ def inference_on_dataset(model, data_loader, evaluator):

start_compute_time = time.time()
outputs = model(inputs)
draw_result(inputs, outputs)
if torch.cuda.is_available():
torch.cuda.synchronize()
total_compute_time += time.time() - start_compute_time
Expand Down Expand Up @@ -160,6 +162,19 @@ def inference_on_dataset(model, data_loader, evaluator):
results = {}
return results

def draw_result(inputs, outputs):
import cv2
for input, output in zip(inputs, outputs):
file_name = input['file_name']
image = cv2.imread(file_name)
pred_segmentation = output['instances'].get('pred_segmentation')
pred_bbox = output['instances'].get('pred_boxes').tensor
for segmentation, bbox in zip(pred_segmentation, pred_bbox):
for idx in range(0, segmentation.shape[0], 2):
cv2.circle(image, (int(segmentation[idx]), int(segmentation[idx+1])), 2, (0, 255, 0), 0)
cv2.rectangle(image, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2)
cv2.imwrite(r'D:\project\COCO_MetalMulti\result\result.jpg', image)


@contextmanager
def inference_context(model):
Expand Down
12 changes: 10 additions & 2 deletions dl_lib/layers/ROIAlign/ROIAlign_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,10 @@ __global__ void RoIAlignBackwardFeature(

namespace dl_lib {

int ceil_div(int a, int b){
return (a + b - 1) / b;
}

at::Tensor ROIAlign_forward_cuda(
const at::Tensor& input,
const at::Tensor& rois,
Expand Down Expand Up @@ -334,7 +338,9 @@ at::Tensor ROIAlign_forward_cuda(
auto output_size = num_rois * pooled_height * pooled_width * channels;
cudaStream_t stream = at::cuda::getCurrentCUDAStream();

dim3 grid(std::min(at::cuda::ATenCeilDiv(output_size, 512L), 4096L));
dim3 grid(std::min(at::cuda::ATenCeilDiv(static_cast<int64_t>(output_size),
static_cast<int64_t>(512)),
static_cast<int64_t>(4096)));
dim3 block(512);

if (output.numel() == 0) {
Expand Down Expand Up @@ -390,7 +396,9 @@ at::Tensor ROIAlign_backward_cuda(

cudaStream_t stream = at::cuda::getCurrentCUDAStream();

dim3 grid(std::min(at::cuda::ATenCeilDiv(grad.numel(), 512L), 4096L));
dim3 grid(std::min(at::cuda::ATenCeilDiv(static_cast<int64_t>(grad.numel()),
static_cast<int64_t>(512)),
static_cast<int64_t>(4096)));
dim3 block(512);

// handle possibly empty gradients
Expand Down
37 changes: 31 additions & 6 deletions dl_lib/network/centernet.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from .generator import CenterNetDecoder, CenterNetGT
from .loss import modified_focal_loss, reg_l1_loss

import matplotlib.pyplot as plt


class CenterNet(nn.Module):
"""
Expand Down Expand Up @@ -73,9 +75,9 @@ def forward(self, batched_inputs):

gt_dict = self.get_ground_truth(batched_inputs)

return self.losses(pred_dict, gt_dict)
return self.losses(pred_dict, gt_dict, images)

def losses(self, pred_dict, gt_dict):
def losses(self, pred_dict, gt_dict, images):
r"""
calculate losses of pred and gt

Expand Down Expand Up @@ -107,19 +109,37 @@ def losses(self, pred_dict, gt_dict):
index = gt_dict['index']
index = index.to(torch.long)
# width and height loss, better version
loss_wh = reg_l1_loss(pred_dict['wh'], mask, index, gt_dict['wh'])
loss_wh, _, _ = reg_l1_loss(pred_dict['wh'], mask, index, gt_dict['wh'])

# regression loss
loss_reg = reg_l1_loss(pred_dict['reg'], mask, index, gt_dict['reg'])
loss_reg, _, _ = reg_l1_loss(pred_dict['reg'], mask, index, gt_dict['reg'])
loss_segmentation_x, pred_x_s, gt_x_s = reg_l1_loss(pred_dict['segmentation_x'], mask, index, gt_dict['segmentation_x'])
loss_segmentation_y, pred_y_s, gt_y_s = reg_l1_loss(pred_dict['segmentation_y'], mask, index, gt_dict['segmentation_y'])


for pred_x, gt_x, pred_y, gt_y in zip(pred_x_s[0], gt_x_s[0], pred_y_s[0], gt_y_s[0]):
pred_x = pred_x.cpu().data.numpy() * 512
gt_x = gt_x.cpu().data.numpy() * 512
pred_y = pred_y.cpu().data.numpy() * 512
gt_y = gt_y.cpu().data.numpy() * 512
# plt.scatter(i[:, 1], i[:, 0], color='b')
plt.imshow(np.transpose(images[0].cpu().data.numpy(), (1, 2, 0)))
plt.scatter(gt_x, gt_y, color='g')
plt.scatter(pred_x, pred_y, color='r')
plt.show()

loss_cls *= self.cfg.MODEL.LOSS.CLS_WEIGHT
loss_wh *= self.cfg.MODEL.LOSS.WH_WEIGHT
loss_reg *= self.cfg.MODEL.LOSS.REG_WEIGHT
loss_segmentation_x *= self.cfg.MODEL.LOSS.SEG_WEIGHT
loss_segmentation_y *= self.cfg.MODEL.LOSS.SEG_WEIGHT

loss = {
"loss_cls": loss_cls,
"loss_box_wh": loss_wh,
"loss_center_reg": loss_reg,
"loss_segmentation_x": loss_segmentation_x,
"loss_segmentation_y": loss_segmentation_y,
}
# print(loss)
return loss
Expand Down Expand Up @@ -168,16 +188,21 @@ def decode_prediction(self, pred_dict, img_info):
fmap = pred_dict["cls"]
reg = pred_dict["reg"]
wh = pred_dict["wh"]
segmentation_x = pred_dict["segmentation_x"] if 'segmentation_x' in pred_dict else None
segmentation_y = pred_dict["segmentation_y"] if 'segmentation_y' in pred_dict else None
segmentation = (segmentation_x, segmentation_y) if segmentation_x is not None and segmentation_y is not None \
else None

boxes, scores, classes = CenterNetDecoder.decode(fmap, wh, reg)
boxes, scores, classes, segmentation = CenterNetDecoder.decode(fmap, wh, reg, segmentation=segmentation)
# boxes = Boxes(boxes.reshape(boxes.shape[-2:]))
scores = scores.reshape(-1)
classes = classes.reshape(-1).to(torch.int64)

# dets = CenterNetDecoder.decode(fmap, wh, reg)
boxes = CenterNetDecoder.transform_boxes(boxes, img_info)
boxes = Boxes(boxes)
return dict(pred_boxes=boxes, scores=scores, pred_classes=classes)
segmentation = CenterNetDecoder.transform_segmentation(segmentation, img_info)
return dict(pred_boxes=boxes, scores=scores, pred_classes=classes, pred_segmentation=segmentation)

def preprocess_image(self, batched_inputs):
"""
Expand Down
41 changes: 37 additions & 4 deletions dl_lib/network/generator/centernet_decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
class CenterNetDecoder(object):

@staticmethod
def decode(fmap, wh, reg=None, cat_spec_wh=False, K=100):
def decode(fmap, wh, reg=None, cat_spec_wh=False, K=100, segmentation=None):
r"""
decode output feature map to detection results

Expand Down Expand Up @@ -47,6 +47,16 @@ def decode(fmap, wh, reg=None, cat_spec_wh=False, K=100):
else:
wh = wh.reshape(batch, K, 2)

if segmentation is not None:
segmentation_x = gather_feature(segmentation[0], index, use_transform=True)
segmentation_y = gather_feature(segmentation[1], index, use_transform=True)
batch_size = segmentation_x.shape[0]
objects_num = segmentation_x.shape[1]
points_num = segmentation_x.shape[2]
segmentation = torch.zeros((batch_size, objects_num, points_num*2))
segmentation[:, :, 0::2] = segmentation_x
segmentation[:, :, 1::2] = segmentation_y

clses = clses.reshape(batch, K, 1).float()
scores = scores.reshape(batch, K, 1)

Expand All @@ -55,7 +65,7 @@ def decode(fmap, wh, reg=None, cat_spec_wh=False, K=100):
xs + half_w, ys + half_h],
dim=2)

detections = (bboxes, scores, clses)
detections = (bboxes, scores, clses, segmentation)

return detections

Expand All @@ -82,6 +92,29 @@ def transform_boxes(boxes, img_info, scale=1):
target_boxes = np.dot(aug_coords, trans.T).reshape(-1, 4)
return target_boxes

@staticmethod
def transform_segmentation(boxes, img_info, scale=1):
r"""
transform predicted boxes to target boxes

Args:
boxes(Tensor): torch Tensor with (Batch, N, 4) shape
img_info(dict): dict contains all information of original image
scale(float): used for multiscale testing
"""
boxes = boxes.cpu().numpy().reshape(-1, 8)

center = img_info['center']
size = img_info['size']
output_size = (img_info['width'], img_info['height'])
src, dst = CenterAffine.generate_src_and_dst(center, size, output_size)
trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))

coords = boxes.reshape(-1, 2)
aug_coords = np.column_stack((coords, np.ones(coords.shape[0])))
target_segmentation = np.dot(aug_coords, trans.T).reshape(-1, 8)
return target_segmentation

@staticmethod
def pseudo_nms(fmap, pool_size=3):
r"""
Expand All @@ -107,13 +140,13 @@ def topk_score(scores, K=40):
topk_scores, topk_inds = torch.topk(scores.reshape(batch, channel, -1), K)

topk_inds = topk_inds % (height * width)
topk_ys = (topk_inds / width).int().float()
topk_ys = (topk_inds.true_divide(width)).int().float()
topk_xs = (topk_inds % width).int().float()

# get all topk in in a batch
topk_score, index = torch.topk(topk_scores.reshape(batch, -1), K)
# div by K because index is grouped by K(C x K shape)
topk_clses = (index / K).int()
topk_clses = (index.true_divide(K)).int()
topk_inds = gather_feature(topk_inds.view(batch, -1, 1), index).reshape(batch, K)
topk_ys = gather_feature(topk_ys.reshape(batch, -1, 1), index).reshape(batch, K)
topk_xs = gather_feature(topk_xs.reshape(batch, -1, 1), index).reshape(batch, K)
Expand Down
24 changes: 24 additions & 0 deletions dl_lib/network/generator/centernet_gt.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,11 @@ def generate(config, batched_input):
output_size = config.INPUT.OUTPUT_SIZE
min_overlap = config.MODEL.CENTERNET.MIN_OVERLAP
tensor_dim = config.MODEL.CENTERNET.TENSOR_DIM
num_polygons_points = config.MODEL.CENTERNET.NUM_POLYGON_POINTS

scoremap_list, wh_list, reg_list, reg_mask_list, index_list = [[] for i in range(5)]
segmentation_list_x = []
segmentation_list_y = []
for data in batched_input:
# img_size = (data['height'], data['width'])

Expand All @@ -28,6 +31,8 @@ def generate(config, batched_input):
gt_reg = torch.zeros_like(gt_wh)
reg_mask = torch.zeros(tensor_dim)
gt_index = torch.zeros(tensor_dim)
gt_segmentation_x = torch.ones(tensor_dim, num_polygons_points) * -128
gt_segmentation_y = torch.ones(tensor_dim, num_polygons_points) * -128
# pass

boxes, classes = bbox_dict['gt_boxes'], bbox_dict['gt_classes']
Expand All @@ -50,18 +55,37 @@ def generate(config, batched_input):
)
gt_wh[:num_boxes] = wh

masks = bbox_dict['gt_masks']
gt_segmentation_x[:num_boxes], gt_segmentation_y[:num_boxes] = \
masks.normalized_by_length(box_tensor.numpy(),
num_polygons_points,
box_scale)
gt_segmentation = torch.zeros(num_boxes, num_polygons_points * 2)
gt_segmentation[:, 0::2] = gt_segmentation_x[:num_boxes]
gt_segmentation[:, 1::2] = gt_segmentation_y[:num_boxes]
import cv2
image = data['image'].numpy().transpose((1, 2, 0))
image = cv2.resize(image, (128, 128))
for bbox in box_tensor:
cv2.rectangle(image, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2)
cv2.imwrite(r'D:\project\COCO_MetalMulti\result\result.jpg', image)

scoremap_list.append(gt_scoremap)
wh_list.append(gt_wh)
reg_list.append(gt_reg)
reg_mask_list.append(reg_mask)
index_list.append(gt_index)
segmentation_list_x.append(gt_segmentation_x)
segmentation_list_y.append(gt_segmentation_y)

gt_dict = {
"score_map": torch.stack(scoremap_list, dim=0),
"wh": torch.stack(wh_list, dim=0),
"reg": torch.stack(reg_list, dim=0),
"reg_mask": torch.stack(reg_mask_list, dim=0),
"index": torch.stack(index_list, dim=0),
"segmentation_x": torch.stack(segmentation_list_x, dim=0),
"segmentation_y": torch.stack(segmentation_list_y, dim=0),
}
return gt_dict

Expand Down
Loading