-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
482 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
# coding=utf-8 | ||
import cv2 | ||
import random | ||
import numpy as np | ||
|
||
class HSV(object): | ||
def __init__(self, hgain=0.5, sgain=0.5, vgain=0.5, p=0.5): | ||
self.hgain = hgain | ||
self.sgain = sgain | ||
self.vgain = vgain | ||
self.p = p | ||
def __call__(self, img, bboxes): | ||
if random.random() < self.p: | ||
x = np.random.uniform(-1, 1, 3) * [self.hgain, self.sgain, self.vgain] + 1 # random gains | ||
img_hsv = (cv2.cvtColor(img, cv2.COLOR_BGR2HSV) * x).clip(None, 255).astype(np.uint8) | ||
np.clip(img_hsv[:, :, 0], None, 179, out=img_hsv[:, :, 0]) # inplace hue clip (0 - 179 deg) | ||
img = cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed | ||
return img, bboxes | ||
|
||
class RandomVerticalFilp(object): | ||
def __init__(self, p=0.5): | ||
self.p = p | ||
def __call__(self, img, bboxes): | ||
if random.random() < self.p: | ||
h_img, _, _ = img.shape | ||
img = img[::-1, :, :] #倒序::-1 | ||
bboxes[:, [1, 3]] = h_img - bboxes[:, [3, 1]] # min,ymin,xmax,ymax,class | ||
return img, bboxes | ||
|
||
class RandomHorizontalFilp(object): | ||
def __init__(self, p=0.5): | ||
self.p = p | ||
|
||
def __call__(self, img, bboxes): | ||
if random.random() < self.p: | ||
_, w_img, _ = img.shape | ||
# img = np.fliplr(img) | ||
img = img[:, ::-1, :] | ||
bboxes[:, [0, 2]] = w_img - bboxes[:, [2, 0]] | ||
return img, bboxes | ||
|
||
class RandomCrop(object): | ||
def __init__(self, p=0.5): | ||
self.p = p | ||
|
||
def __call__(self, img, bboxes): | ||
if random.random() < self.p: | ||
h_img, w_img, _ = img.shape | ||
|
||
max_bbox = np.concatenate([np.min(bboxes[:, 0:2], axis=0), np.max(bboxes[:, 2:4], axis=0)], axis=-1) | ||
max_l_trans = max_bbox[0] | ||
max_u_trans = max_bbox[1] | ||
max_r_trans = w_img - max_bbox[2] | ||
max_d_trans = h_img - max_bbox[3] | ||
|
||
crop_xmin = max(0, int(max_bbox[0] - random.uniform(0, max_l_trans))) | ||
crop_ymin = max(0, int(max_bbox[1] - random.uniform(0, max_u_trans))) | ||
crop_xmax = min(w_img, int(max_bbox[2] + random.uniform(0, max_r_trans)))# | ||
crop_ymax = min(h_img, int(max_bbox[3] + random.uniform(0, max_d_trans)))# | ||
|
||
img = img[crop_ymin : crop_ymax, crop_xmin : crop_xmax] | ||
|
||
bboxes[:, [0, 2]] = bboxes[:, [0, 2]] - crop_xmin | ||
bboxes[:, [1, 3]] = bboxes[:, [1, 3]] - crop_ymin | ||
return img, bboxes | ||
|
||
|
||
class RandomAffine(object): | ||
def __init__(self, p=0.5): | ||
self.p = p | ||
|
||
def __call__(self, img, bboxes): | ||
if random.random() < self.p: | ||
h_img, w_img, _ = img.shape | ||
max_bbox = np.concatenate([np.min(bboxes[:, 0:2], axis=0), np.max(bboxes[:, 2:4], axis=0)], axis=-1) | ||
max_l_trans = max_bbox[0] | ||
max_u_trans = max_bbox[1] | ||
max_r_trans = w_img - max_bbox[2] | ||
max_d_trans = h_img - max_bbox[3] | ||
|
||
tx = random.uniform(-(max_l_trans - 1), (max_r_trans - 1)) | ||
ty = random.uniform(-(max_u_trans - 1), (max_d_trans - 1)) | ||
|
||
M = np.array([[1, 0, tx], [0, 1, ty]]) | ||
img = cv2.warpAffine(img, M, (w_img, h_img)) | ||
|
||
bboxes[:, [0, 2]] = bboxes[:, [0, 2]] + tx | ||
bboxes[:, [1, 3]] = bboxes[:, [1, 3]] + ty | ||
return img, bboxes | ||
|
||
|
||
class Resize(object): | ||
|
||
def __init__(self, target_shape, correct_box=True): | ||
self.h_target, self.w_target = target_shape | ||
self.correct_box = correct_box | ||
|
||
def __call__(self, img, bboxes): | ||
h_org , w_org , _= img.shape | ||
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32) | ||
|
||
resize_ratio = min(1.0 * self.w_target / w_org, 1.0 * self.h_target / h_org) | ||
resize_w = int(resize_ratio * w_org) | ||
resize_h = int(resize_ratio * h_org) | ||
image_resized = cv2.resize(img, (resize_w, resize_h)) | ||
|
||
image_paded = np.full((self.h_target, self.w_target, 3), 128.0) | ||
dw = int((self.w_target - resize_w) / 2) | ||
dh = int((self.h_target - resize_h) / 2) | ||
image_paded[dh:resize_h + dh, dw:resize_w + dw, :] = image_resized | ||
image = image_paded / 255.0 | ||
|
||
if self.correct_box: | ||
bboxes[:, [0, 2]] = bboxes[:, [0, 2]] * resize_ratio + dw | ||
bboxes[:, [1, 3]] = bboxes[:, [1, 3]] * resize_ratio + dh | ||
return image, bboxes | ||
return image | ||
|
||
|
||
class Mixup(object): | ||
def __init__(self, p=0.5): | ||
self.p = p | ||
|
||
def __call__(self, img_org, bboxes_org, img_mix, bboxes_mix): | ||
if random.random() > self.p: | ||
lam = np.random.beta(1.5, 1.5) | ||
img = lam * img_org + (1 - lam) * img_mix | ||
bboxes_org = np.concatenate( | ||
[bboxes_org, np.full((len(bboxes_org), 1), lam)], axis=1) | ||
bboxes_mix = np.concatenate( | ||
[bboxes_mix, np.full((len(bboxes_mix), 1), 1 - lam)], axis=1) | ||
bboxes = np.concatenate([bboxes_org, bboxes_mix]) | ||
|
||
else: | ||
img = img_org | ||
bboxes = np.concatenate([bboxes_org, np.full((len(bboxes_org), 1), 1.0)], axis=1) | ||
|
||
return img, bboxes | ||
|
||
|
||
class LabelSmooth(object): | ||
def __init__(self, delta=0.01): | ||
self.delta = delta | ||
|
||
def __call__(self, onehot, num_classes): | ||
return onehot * (1 - self.delta) + self.delta * 1.0 / num_classes |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
import os | ||
from torch.utils.data import Dataset | ||
from pycocotools.coco import COCO | ||
|
||
import config.cfg_npmmrdet_dior as cfg | ||
from utils.utils_coco import * | ||
|
||
class COCODataset(Dataset): | ||
""" | ||
COCO dataset class. | ||
""" | ||
def __init__(self, data_dir='COCO', json_file='instances_train2017.json', | ||
name='train2017', img_size=416, | ||
augmentation=None, min_size=1, debug=False): | ||
""" | ||
COCO dataset initialization. Annotation data are read into memory by COCO API. | ||
Args: | ||
model_type (str): model name specified in config file | ||
data_dir (str): dataset root directory | ||
json_file (str): COCO json file name ################## | ||
name (str): COCO data name (e.g. 'train2017' or 'val2017') ########### | ||
img_size (int): target image size after pre-processing | ||
min_size (int): bounding boxes smaller than this are ignored | ||
debug (bool): if True, only one data id is selected from the dataset | ||
""" | ||
self.data_dir = data_dir | ||
self.json_file = json_file | ||
self.coco = COCO(self.data_dir+'/json_gt/'+self.json_file) | ||
self.ids = self.coco.getImgIds() | ||
if debug: | ||
self.ids = self.ids[1:2] | ||
print("debug mode...", self.ids) | ||
self.class_ids = sorted(self.coco.getCatIds()) | ||
self.name = name | ||
self.max_labels = cfg.MAX_LABEL######################### | ||
self.img_size = img_size | ||
self.min_size = min_size | ||
self.lrflip = augmentation['LRFLIP'] | ||
self.jitter = augmentation['JITTER'] | ||
self.random_placing = augmentation['RANDOM_PLACING'] | ||
self.hue = augmentation['HUE'] | ||
self.saturation = augmentation['SATURATION'] | ||
self.exposure = augmentation['EXPOSURE'] | ||
self.random_distort = augmentation['RANDOM_DISTORT'] | ||
|
||
|
||
|
||
def __len__(self): | ||
return len(self.ids) | ||
|
||
def __getitem__(self, index): | ||
""" | ||
One image / label pair for the given index is picked up \ | ||
and pre-processed. | ||
Args: | ||
index (int): data index | ||
Returns: | ||
img (numpy.ndarray): pre-processed image | ||
padded_labels (torch.Tensor): pre-processed label data. \ | ||
The shape is :math:`[self.max_labels, 5]`. \ | ||
each label consists of [class, xc, yc, w, h]: | ||
class (float): class index. | ||
xc, yc (float) : center of bbox whose values range from 0 to 1. | ||
w, h (float) : size of bbox whose values range from 0 to 1. | ||
info_img : tuple of h, w, nh, nw, dx, dy. | ||
h, w (int): original shape of the image | ||
nh, nw (int): shape of the resized image without padding | ||
dx, dy (int): pad size | ||
id_ (int): same as the input index. Used for evaluation. | ||
""" | ||
id_ = self.ids[index] | ||
anno_ids = self.coco.getAnnIds(imgIds=[int(id_)], iscrowd=None) | ||
annotations = self.coco.loadAnns(anno_ids) | ||
lrflip = False | ||
if np.random.rand() > 0.5 and self.lrflip == True: | ||
lrflip = True | ||
|
||
# load image and preprocess | ||
img_file = os.path.join(self.data_dir, 'JPEGImages', | ||
'{:0>5d}'.format(id_) + '.jpg') | ||
img = cv2.imread(img_file) | ||
imgshow = img | ||
if self.json_file == 'instances_val5k.json' and img is None: | ||
img_file = os.path.join(self.data_dir, 'train2017', | ||
'{:012}'.format(id_) + '.jpg') | ||
img = cv2.imread(img_file) | ||
assert img is not None | ||
|
||
img, info_img = preprocess(img, self.img_size, jitter=self.jitter, | ||
random_placing=self.random_placing) | ||
|
||
if self.random_distort: | ||
img = random_distort(img, self.hue, self.saturation, self.exposure) | ||
|
||
img = np.transpose(img / 255., (2, 0, 1)) | ||
|
||
if lrflip: | ||
img = np.flip(img, axis=2).copy() | ||
|
||
# load labels | ||
labels = [] | ||
for anno in annotations: | ||
if anno['bbox'][2] > self.min_size and anno['bbox'][3] > self.min_size: | ||
labels.append([]) | ||
labels[-1].append(self.class_ids.index(anno['category_id'])) | ||
labels[-1].extend(anno['bbox']) | ||
|
||
padded_labels = np.zeros((self.max_labels, 5)) | ||
if len(labels) > 0: | ||
labels = np.stack(labels) | ||
labels = label2box(labels, info_img, self.img_size, lrflip) | ||
padded_labels[range(len(labels))[:self.max_labels] | ||
] = labels[:self.max_labels] | ||
padded_labels = torch.from_numpy(padded_labels) | ||
|
||
return img, padded_labels, info_img, id_, img_file |
Oops, something went wrong.