update3

Shank2358 · Jul 22, 2021 · a260711 · a260711
1 parent 52d2742
commit a260711
Show file tree

Hide file tree

Showing 4 changed files with 482 additions and 0 deletions.
diff --git a/dataload/augmentations.py b/dataload/augmentations.py
@@ -0,0 +1,146 @@
+# coding=utf-8
+import cv2
+import random
+import numpy as np
+
+class HSV(object):
+    def __init__(self, hgain=0.5, sgain=0.5, vgain=0.5, p=0.5):
+        self.hgain = hgain
+        self.sgain = sgain
+        self.vgain = vgain
+        self.p = p
+    def __call__(self, img, bboxes):
+        if random.random() < self.p:
+            x = np.random.uniform(-1, 1, 3) * [self.hgain, self.sgain, self.vgain] + 1  # random gains
+            img_hsv = (cv2.cvtColor(img, cv2.COLOR_BGR2HSV) * x).clip(None, 255).astype(np.uint8)
+            np.clip(img_hsv[:, :, 0], None, 179, out=img_hsv[:, :, 0])  # inplace hue clip (0 - 179 deg)
+            img = cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)  # no return needed
+        return img, bboxes
+
+class RandomVerticalFilp(object):
+    def __init__(self, p=0.5):
+        self.p = p
+    def __call__(self, img, bboxes):
+        if random.random() < self.p:
+            h_img, _, _ = img.shape
+            img = img[::-1, :, :] #倒序::-1
+            bboxes[:, [1, 3]] = h_img - bboxes[:, [3, 1]] # min,ymin,xmax,ymax，class
+        return img, bboxes
+
+class RandomHorizontalFilp(object):
+    def __init__(self, p=0.5):
+        self.p = p
+
+    def __call__(self, img, bboxes):
+        if random.random() < self.p:
+            _, w_img, _ = img.shape
+            # img = np.fliplr(img)
+            img = img[:, ::-1, :]
+            bboxes[:, [0, 2]] = w_img - bboxes[:, [2, 0]]
+        return img, bboxes
+
+class RandomCrop(object):
+    def __init__(self, p=0.5):
+        self.p = p
+
+    def __call__(self, img, bboxes):
+        if random.random() < self.p:
+            h_img, w_img, _ = img.shape
+
+            max_bbox = np.concatenate([np.min(bboxes[:, 0:2], axis=0), np.max(bboxes[:, 2:4], axis=0)], axis=-1)
+            max_l_trans = max_bbox[0]
+            max_u_trans = max_bbox[1]
+            max_r_trans = w_img - max_bbox[2]
+            max_d_trans = h_img - max_bbox[3]
+
+            crop_xmin = max(0, int(max_bbox[0] - random.uniform(0, max_l_trans)))
+            crop_ymin = max(0, int(max_bbox[1] - random.uniform(0, max_u_trans)))
+            crop_xmax = min(w_img, int(max_bbox[2] + random.uniform(0, max_r_trans)))#
+            crop_ymax = min(h_img, int(max_bbox[3] + random.uniform(0, max_d_trans)))#
+
+            img = img[crop_ymin : crop_ymax, crop_xmin : crop_xmax]
+
+            bboxes[:, [0, 2]] = bboxes[:, [0, 2]] - crop_xmin
+            bboxes[:, [1, 3]] = bboxes[:, [1, 3]] - crop_ymin
+        return img, bboxes
+
+
+class RandomAffine(object):
+    def __init__(self, p=0.5):
+        self.p = p
+
+    def __call__(self, img, bboxes):
+        if random.random() < self.p:
+            h_img, w_img, _ = img.shape
+            max_bbox = np.concatenate([np.min(bboxes[:, 0:2], axis=0), np.max(bboxes[:, 2:4], axis=0)], axis=-1)
+            max_l_trans = max_bbox[0]
+            max_u_trans = max_bbox[1]
+            max_r_trans = w_img - max_bbox[2]
+            max_d_trans = h_img - max_bbox[3]
+
+            tx = random.uniform(-(max_l_trans - 1), (max_r_trans - 1))
+            ty = random.uniform(-(max_u_trans - 1), (max_d_trans - 1))
+
+            M = np.array([[1, 0, tx], [0, 1, ty]])
+            img = cv2.warpAffine(img, M, (w_img, h_img))
+
+            bboxes[:, [0, 2]] = bboxes[:, [0, 2]] + tx
+            bboxes[:, [1, 3]] = bboxes[:, [1, 3]] + ty
+        return img, bboxes
+
+
+class Resize(object):
+
+    def __init__(self, target_shape, correct_box=True):
+        self.h_target, self.w_target = target_shape
+        self.correct_box = correct_box
+
+    def __call__(self, img, bboxes):
+        h_org , w_org , _= img.shape
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)
+
+        resize_ratio = min(1.0 * self.w_target / w_org, 1.0 * self.h_target / h_org)
+        resize_w = int(resize_ratio * w_org)
+        resize_h = int(resize_ratio * h_org)
+        image_resized = cv2.resize(img, (resize_w, resize_h))
+
+        image_paded = np.full((self.h_target, self.w_target, 3), 128.0)
+        dw = int((self.w_target - resize_w) / 2)
+        dh = int((self.h_target - resize_h) / 2)
+        image_paded[dh:resize_h + dh, dw:resize_w + dw, :] = image_resized
+        image = image_paded / 255.0
+
+        if self.correct_box:
+            bboxes[:, [0, 2]] = bboxes[:, [0, 2]] * resize_ratio + dw
+            bboxes[:, [1, 3]] = bboxes[:, [1, 3]] * resize_ratio + dh
+            return image, bboxes
+        return image
+
+
+class Mixup(object):
+    def __init__(self, p=0.5):
+        self.p = p
+
+    def __call__(self, img_org, bboxes_org, img_mix, bboxes_mix):
+        if random.random() > self.p:
+            lam = np.random.beta(1.5, 1.5)
+            img = lam * img_org + (1 - lam) * img_mix
+            bboxes_org = np.concatenate(
+                [bboxes_org, np.full((len(bboxes_org), 1), lam)], axis=1)
+            bboxes_mix = np.concatenate(
+                [bboxes_mix, np.full((len(bboxes_mix), 1), 1 - lam)], axis=1)
+            bboxes = np.concatenate([bboxes_org, bboxes_mix])
+
+        else:
+            img = img_org
+            bboxes = np.concatenate([bboxes_org, np.full((len(bboxes_org), 1), 1.0)], axis=1)
+
+        return img, bboxes
+
+
+class LabelSmooth(object):
+    def __init__(self, delta=0.01):
+        self.delta = delta
+
+    def __call__(self, onehot, num_classes):
+        return onehot * (1 - self.delta) + self.delta * 1.0 / num_classes
diff --git a/dataload/cocodataset.py b/dataload/cocodataset.py
@@ -0,0 +1,116 @@
+import os
+from torch.utils.data import Dataset
+from pycocotools.coco import COCO
+
+import config.cfg_npmmrdet_dior as cfg
+from utils.utils_coco import *
+
+class COCODataset(Dataset):
+    """
+    COCO dataset class.
+    """
+    def __init__(self, data_dir='COCO', json_file='instances_train2017.json',
+                 name='train2017', img_size=416,
+                 augmentation=None, min_size=1, debug=False):
+        """
+        COCO dataset initialization. Annotation data are read into memory by COCO API.
+        Args:
+            model_type (str): model name specified in config file
+            data_dir (str): dataset root directory
+            json_file (str): COCO json file name   ##################
+            name (str): COCO data name (e.g. 'train2017' or 'val2017') ###########
+            img_size (int): target image size after pre-processing
+            min_size (int): bounding boxes smaller than this are ignored
+            debug (bool): if True, only one data id is selected from the dataset
+        """
+        self.data_dir = data_dir
+        self.json_file = json_file
+        self.coco = COCO(self.data_dir+'/json_gt/'+self.json_file)
+        self.ids = self.coco.getImgIds()
+        if debug:
+            self.ids = self.ids[1:2]
+            print("debug mode...", self.ids)
+        self.class_ids = sorted(self.coco.getCatIds())
+        self.name = name
+        self.max_labels = cfg.MAX_LABEL#########################
+        self.img_size = img_size
+        self.min_size = min_size
+        self.lrflip = augmentation['LRFLIP']
+        self.jitter = augmentation['JITTER']
+        self.random_placing = augmentation['RANDOM_PLACING']
+        self.hue = augmentation['HUE']
+        self.saturation = augmentation['SATURATION']
+        self.exposure = augmentation['EXPOSURE']
+        self.random_distort = augmentation['RANDOM_DISTORT']
+
+
+
+    def __len__(self):
+        return len(self.ids)
+
+    def __getitem__(self, index):
+        """
+        One image / label pair for the given index is picked up \
+        and pre-processed.
+        Args:
+            index (int): data index
+        Returns:
+            img (numpy.ndarray): pre-processed image
+            padded_labels (torch.Tensor): pre-processed label data. \
+                The shape is :math:`[self.max_labels, 5]`. \
+                each label consists of [class, xc, yc, w, h]:
+                    class (float): class index.
+                    xc, yc (float) : center of bbox whose values range from 0 to 1.
+                    w, h (float) : size of bbox whose values range from 0 to 1.
+            info_img : tuple of h, w, nh, nw, dx, dy.
+                h, w (int): original shape of the image
+                nh, nw (int): shape of the resized image without padding
+                dx, dy (int): pad size
+            id_ (int): same as the input index. Used for evaluation.
+        """
+        id_ = self.ids[index]
+        anno_ids = self.coco.getAnnIds(imgIds=[int(id_)], iscrowd=None)
+        annotations = self.coco.loadAnns(anno_ids)
+        lrflip = False
+        if np.random.rand() > 0.5 and self.lrflip == True:
+            lrflip = True
+
+        # load image and preprocess
+        img_file = os.path.join(self.data_dir, 'JPEGImages',
+                                '{:0>5d}'.format(id_) + '.jpg')
+        img = cv2.imread(img_file)
+        imgshow = img
+        if self.json_file == 'instances_val5k.json' and img is None:
+            img_file = os.path.join(self.data_dir, 'train2017',
+                                    '{:012}'.format(id_) + '.jpg')
+            img = cv2.imread(img_file)
+        assert img is not None
+
+        img, info_img = preprocess(img, self.img_size, jitter=self.jitter,
+                                   random_placing=self.random_placing)
+
+        if self.random_distort:
+            img = random_distort(img, self.hue, self.saturation, self.exposure)
+
+        img = np.transpose(img / 255., (2, 0, 1))
+
+        if lrflip:
+            img = np.flip(img, axis=2).copy()
+
+        # load labels
+        labels = []
+        for anno in annotations:
+            if anno['bbox'][2] > self.min_size and anno['bbox'][3] > self.min_size:
+                labels.append([])
+                labels[-1].append(self.class_ids.index(anno['category_id']))
+                labels[-1].extend(anno['bbox'])
+
+        padded_labels = np.zeros((self.max_labels, 5))
+        if len(labels) > 0:
+            labels = np.stack(labels)
+            labels = label2box(labels, info_img, self.img_size, lrflip)
+            padded_labels[range(len(labels))[:self.max_labels]
+                          ] = labels[:self.max_labels]
+        padded_labels = torch.from_numpy(padded_labels)
+
+        return img, padded_labels, info_img, id_, img_file