diff --git a/config/cfg_lodet.py b/config/cfg_lodet.py new file mode 100644 index 0000000..9eed3c1 --- /dev/null +++ b/config/cfg_lodet.py @@ -0,0 +1,95 @@ +# coding=utf-8 + +PROJECT_PATH = "./" +DATA_PATH = "/mnt/Datasets/DIOR/" + + +DATA = {"CLASSES":['airplane','airport','baseballfield','basketballcourt','bridge','chimney', + 'dam','Expressway-Service-area','Expressway-toll-station','golffield','groundtrackfield','harbor', + 'overpass','ship','stadium','storagetank','tenniscourt','trainstation','vehicle','windmill'], + "NUM":20} +''' +MODEL = { + "ANCHORS":[[(1.494992296, 1.772419808), (2.550184278, 5.105188103), (4.511253175, 2.041398611)], # Anchors for small obj + [(3.852394468, 3.413543783), (3.827394513, 9.012606993), (7.569651633, 7.192874667)], # Anchors for medium obj + [(5.269568089, 8.068825014), (10.13079538, 3.44005408), (10.41848982, 10.60006263)]], # Anchors for big obj + "STRIDES":[8, 16, 32], + "ANCHORS_PER_SCLAE":3 + }#544 +''' +MODEL = { + "ANCHORS":[[(3.18524223, 1.57625129), (1.95394566,4.29178376), (6.65929852, 2.8841753)], # Anchors for small obj + [(1.9038, 4.42035), (6.712, 3.29255), (6.645, 12.7675)], # Anchors for medium obj + [(5.513875, 14.38123), (11.66746, 4.2333), (15.70345, 11.94367)]], # Anchors for big obj + "STRIDES":[8, 16, 32], + "ANCHORS_PER_SCLAE":3 + }#800 + +MAX_LABEL = 500 +SHOW_HEATMAP = False +SCALE_FACTOR=2.0 + +TRAIN = { + "EVAL_TYPE":'VOC', #['VOC', 'COCO'] + "TRAIN_IMG_SIZE":800, + "TRAIN_IMG_NUM":11759,#11759, + "AUGMENT":True, + "MULTI_SCALE_TRAIN":True, + "MULTI_TRAIN_RANGE":[12,25,1], + "BATCH_SIZE":10, + "IOU_THRESHOLD_LOSS":0.5, + "EPOCHS":121, + "NUMBER_WORKERS":16, + "MOMENTUM":0.9, + "WEIGHT_DECAY":0.0005, + "LR_INIT":1.5e-4, + "LR_END":1e-6, + "WARMUP_EPOCHS":5, + "IOU_TYPE":'CIOU' #['GIOU','CIOU'] + } + +TEST = { + "EVAL_TYPE":'VOC', #['VOC', 'COCO', 'BOTH'] + "EVAL_JSON":'test.json', + "EVAL_NAME":'test', + "NUM_VIS_IMG":0, + "TEST_IMG_SIZE":800, + "BATCH_SIZE":1, + "NUMBER_WORKERS":4, + "CONF_THRESH":0.05, + "NMS_THRESH":0.45, + "NMS_METHODS":'NMS', #['NMS', 'SOFT_NMS', 'NMS_DIOU', #'NMS_DIOU_SCALE'] + "MULTI_SCALE_TEST":False, + "MULTI_TEST_RANGE":[320,640,96], + "FLIP_TEST":False + } + + +''' +DOTA_cfg +DATA = {"CLASSES": ['plane', + 'baseball-diamond', + 'bridge', + 'ground-track-field', + 'small-vehicle', + 'large-vehicle', + 'ship', + 'tennis-court', + 'basketball-court', + 'storage-tank', 'soccer-ball-field', 'roundabout', 'harbor', 'swimming-pool', 'helicopter'], + "NUM": 15} + +MODEL = {"ANCHORS":[[(1.625, 2.656), ( 3.652, 3.981), (4.493, 1.797)], + [(4.358,3.123), (2.000, 4.558), (6.077, 6.688)], + [(2.443, 7.848), (6.237, 4.750), (9.784, 10.291)]] ,# Anchors for big obj 608 +"STRIDES":[8, 16, 32], +"ANCHORS_PER_SCLAE":3 +}#544 + +MODEL = {"ANCHORS":[[(2.80340246, 2.87380792), (4.23121697, 6.44043634), (7.38428433, 3.82613533)], + [(4.2460819, 4.349495965), (4.42917327, 10.59395029), (8.24772929, 6.224761455)], + [(6.02687863, 5.92446062), (7.178407523, 10.86361071), (15.30253702, 12.62863728)]] ,# Anchors for big obj 608 +"STRIDES":[8, 16, 32], +"ANCHORS_PER_SCLAE":3 +}#800 +''' diff --git a/dataR/evalR/coco_eval.py b/dataR/evalR/coco_eval.py new file mode 100644 index 0000000..0f8b540 --- /dev/null +++ b/dataR/evalR/coco_eval.py @@ -0,0 +1,167 @@ +import json +import tempfile +import matplotlib.pyplot as plt +from torch.autograd import Variable +from torch.utils.data import DataLoader +from pycocotools.cocoeval import COCOeval +import time +from tqdm import tqdm +from dataload.cocodataset import * +from eval.evaluator import Evaluator +from utils.utils_coco import * +from utils.visualize import * + +current_milli_time = lambda: int(round(time.time() * 1000)) + +class COCOEvaluator(): + """ + COCO AP Evaluation class. + All the data in the val2017 dataset are processed \ + and evaluated by COCO API. + """ + def __init__(self, data_dir, img_size, confthre, nmsthre): + """ + Args: + model_type (str): model name specified in config file + data_dir (str): dataset root directory + img_size (int): image size after preprocess. images are resized \ + to squares whose shape is (img_size, img_size). + confthre (float): + confidence threshold ranging from 0 to 1, \ + which is defined in the config file. + nmsthre (float): + IoU threshold of non-max supression ranging from 0 to 1. + """ + self.classes = cfg.DATA["CLASSES"] + self.val_data_path = cfg.DATA_PATH + self.pred_result_path = os.path.join(cfg.PROJECT_PATH, 'prediction') + self.__visual_imgs = cfg.TEST["NUM_VIS_IMG"] + + augmentation = {'LRFLIP': False, 'JITTER': 0, 'RANDOM_PLACING': False, + 'HUE': 0, 'SATURATION': 0, 'EXPOSURE': 0, 'RANDOM_DISTORT': False} + + self.dataset = COCODataset(data_dir=data_dir, + img_size=img_size, + augmentation=augmentation, + json_file=cfg.TEST["EVAL_JSON"], + name=cfg.TEST["EVAL_NAME"]) + self.dataloader = DataLoader(self.dataset, batch_size=cfg.TEST["BATCH_SIZE"], shuffle=False, + pin_memory=True, num_workers=cfg.TEST["NUMBER_WORKERS"]) + self.img_size = img_size + self.confthre = confthre + self.nmsthre = nmsthre + self.inference_time = 0. + def evaluate(self, model): + """ + COCO average precision (AP) Evaluation. Iterate inference on the test dataset + and the results are evaluated by COCO API. + Args: + model : model object + Returns: + ap50_95 (float) : calculated COCO AP for IoU=50:95 + ap50 (float) : calculated COCO AP for IoU=50 + """ + model.eval() + cuda = torch.cuda.is_available() + Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor + ids = [] + data_dict = [] + dataiterator = iter(self.dataloader) + #print(" Val datasets number is : {}".format(len(self.dataloader))) + for i in tqdm(range(len(self.dataloader))): + #while True: + #try: + img, _, info_img, id_, img_path = next(dataiterator) # load a batch + #except StopIteration: + #break + info_img = [float(info.numpy()) for info in info_img] + id_ = int(id_) + ids.append(id_) + with torch.no_grad(): + img = Variable(img.type(Tensor)) + start_time = current_milli_time() + _,outputs = model(img) + self.inference_time += (current_milli_time() - start_time) + outputs=outputs.unsqueeze(0) + outputs = postprocess( + outputs, cfg.DATA["NUM"], self.confthre, self.nmsthre) + if outputs[0] is None: + continue + outputs = outputs[0].cpu().data + + for output in outputs: + x1 = float(output[0]) + y1 = float(output[1]) + x2 = float(output[2]) + y2 = float(output[3]) + label = self.dataset.class_ids[int(output[6])] + box = box2label((y1, x1, y2, x2), info_img) + bbox = [box[1], box[0], box[3] - box[1], box[2] - box[0]] + score = float(output[4].data.item() * output[5].data.item()) # object score * class score + A = {"image_id": id_, "category_id": label, "bbox": bbox, + "score": score, "segmentation": []} # COCO json format + data_dict.append(A) + + if self.__visual_imgs and i <= self.__visual_imgs: + imgshow = cv2.imread(img_path[0]) + bboxes_prd = Evaluator(model).get_bbox(imgshow, cfg.TEST["MULTI_SCALE_TEST"], cfg.TEST["FLIP_TEST"]) + if bboxes_prd.shape[0] != 0: + boxes = bboxes_prd[..., :4] + class_inds = bboxes_prd[..., 5].astype(np.int32) + scores = bboxes_prd[..., 4] + visualize_boxes(image=imgshow, boxes=boxes, labels=class_inds, probs=scores, class_labels=self.classes) + path = os.path.join(self.pred_result_path, "imgs/{}.jpg".format(i)) + cv2.imwrite(path, imgshow) + + + annType = ['segm', 'bbox', 'keypoints'] + self.inference_time = 1.0 * self.inference_time / len(self.dataloader) + # Evaluate the Dt (detection) json comparing with the ground truth + if len(data_dict) > 0: + cocoGt = self.dataset.coco + _, tmp = tempfile.mkstemp() + json.dump(data_dict, open(tmp, 'w')) + cocoDt = cocoGt.loadRes(tmp) + cocoEval = COCOeval(self.dataset.coco, cocoDt, annType[1]) + cocoEval.params.imgIds = ids + cocoEval.evaluate() + cocoEval.accumulate() + cocoEval.summarize() + + ''' + # ----------pltshow------------- # + # precision[t,:,k,a,m] PR curves recall-precision value + # T:IoU thresh.5-.95, gap=0.05, t[0]=0.5,t[1]=0.55,t[2]=0.6,t[3]=0.65,t[4]=0.7,t[5]=0.75 ……,t[9]=0.95 + # R:101 recall thresh,0-101 + # K:class k[0] = person,k[1] = bycicle,.....COCO + # A:area, a[0]=all,a[1]=small,a[2]=medium,a[3]=large + # M:Maxdet m[0]=1,m[1]=10,m[2]=100 + + #C75: PR at IoU=.75 (AP at strict IoU), area under curve corresponds to APIoU=.75 metric. + #C50: PR at IoU=.50 (AP at PASCAL IoU), area under curve corresponds to APIoU=.50 metric. + #Loc: PR at IoU=.10 (localization errors ignored, but not duplicate detections). All remaining settings use IoU=.1. + #Sim: PR after supercategory false positives (fps) are removed. Specifically, any matches to objects with a different class label but that belong to the same supercategory don't count as either a fp (or tp). Sim is computed by setting all objects in the same supercategory to have the same class label as the class in question and setting their ignore flag to 1. Note that person is a singleton supercategory so its Sim result is identical to Loc. + #Oth: PR after all class confusions are removed. Similar to Sim, except now if a detection matches any other object it is no longer a fp (or tp). Oth is computed by setting all other objects to have the same class label as the class in question and setting their ignore flag to 1. + #BG: PR after all background (and class confusion) fps are removed. For a single category, BG is a step function that is 1 until max recall is reached then drops to 0 (the curve is smoother after averaging across categories). + #FN: PR after all remaining errors are removed (trivially AP=1). + + pr_array1 = cocoEval.eval['precision'][0, :, 0, 0, 2] + pr_array2 = cocoEval.eval['precision'][5, :, 0, 0, 2] + #pr_array3 = cocoEval.eval['precision'][6, :, 0, 0, 2] + #pr_array4 = cocoEval.eval['precision'][9, :, 0, 0, 2] + x = np.arange(0.0, 1.01, 0.01) + # x_1 = np.arange(0, 1.01, 0.111) + plt.xlabel('IoU') + plt.ylabel('precision') + plt.xlim(0, 1.0) + plt.ylim(0, 1.01) + plt.grid(True) + plt.plot(x, pr_array1, color='blue', linewidth = '3', label='IoU=0.5') + plt.plot(x, pr_array2, color='green', linewidth = '3', label='IoU=0.75') + plt.title("P-R curves catid=person maxDet=100") + plt.legend(loc="lower left") + plt.savefig("../prediction/APs.png", dpi=600) + # plt.show()''' + return cocoEval.stats[0], cocoEval.stats[1], self.inference_time + else: + return 0, 0, 0 diff --git a/dataR/evalR/evaluator.py b/dataR/evalR/evaluator.py new file mode 100644 index 0000000..811e74b --- /dev/null +++ b/dataR/evalR/evaluator.py @@ -0,0 +1,310 @@ +import shutil +import time +from tqdm import tqdm + +from dataloadR.augmentations import * +from evalR import voc_eval +from utils.utils_basic import * +from utils.visualize import * +from utils.heatmap import Show_Heatmap + +current_milli_time = lambda: int(round(time.time() * 1000)) + +class Evaluator(object): + def __init__(self, model, visiual=True): + self.classes = cfg.DATA["CLASSES"] + self.pred_result_path = os.path.join(cfg.PROJECT_PATH, 'predictionR')#预测结果的保存路径 + self.val_data_path = cfg.DATA_PATH + self.conf_thresh = cfg.TEST["CONF_THRESH"] + self.nms_thresh = cfg.TEST["NMS_THRESH"] + self.val_shape = cfg.TEST["TEST_IMG_SIZE"] + self.__visiual = visiual + self.__visual_imgs = cfg.TEST["NUM_VIS_IMG"] + self.model = model + self.device = next(model.parameters()).device + self.inference_time = 0. + self.showheatmap = cfg.SHOW_HEATMAP + self.iouthresh_test = cfg.TEST["IOU_THRESHOLD"] + + def APs_voc(self, multi_test=False, flip_test=False): + filename = cfg.TEST["EVAL_NAME"]+'.txt' + img_inds_file = os.path.join(self.val_data_path, 'ImageSets', filename) + with open(img_inds_file, 'r') as f: + lines = f.readlines() + img_inds = [line.strip() for line in lines] # 读取文件名 + + #if os.path.exists(self.pred_result_path): + #shutil.rmtree(self.pred_result_path) + #os.mkdir(self.pred_result_path) + # for classname in self.classes: + # with open(os.path.join(self.pred_result_path, 'comp4_det_test_' + classname + '.txt'), 'a') as f: + # f.close() + #E:\NPMMR1\data\results + #E:\NPMMR1\data\results + #rewritepath_pic = os.path.join(self.pred_result_path, 'results') + #if os.path.exists(rewritepath_pic): + #shutil.rmtree(rewritepath_pic) + #os.mkdir(rewritepath_pic) + + rewritepath = os.path.join(self.pred_result_path, 'voc') + if os.path.exists(rewritepath): + shutil.rmtree(rewritepath) + os.mkdir(rewritepath) + for img_ind in tqdm(img_inds): + img_path = os.path.join(self.val_data_path, 'JPEGImages', img_ind + '.tif') # 路径+JPEG+文件名############png + # 目标: 直接改成读txt的文件名,每一行读取 + img = cv2.imread(img_path) + bboxes_prd = self.get_bbox(img, multi_test, flip_test) + + ''' + if bboxes_prd.shape[0] != 0 and self.__visiual and self.__visual_imgs < 100: + boxes = bboxes_prd[..., :4] + boxes_rota = bboxes_prd[..., 4:8] + + scores = bboxes_prd[..., 8] + + class_inds = bboxes_prd[..., 9].astype(np.int32) + + visualize_boxes(image=img, boxes=boxes, labels=class_inds, probs=scores, class_labels=self.classes) + path = os.path.join(cfg.PROJECT_PATH, "data/results/{}.jpg".format(self.__visual_imgs)) + cv2.imwrite(path, img) + + self.__visual_imgs += 1 + ''' + + for bbox in bboxes_prd: + coor = np.array(bbox[:4], dtype=np.int32) + a_rota = np.array(bbox[4:8], dtype=np.float64) + + x1 = a_rota[0] * (coor[2]-coor[0]) + coor[0] + y1 = coor[1] + + x2 = coor[2] + y2 = a_rota[1] * (coor[3]-coor[1]) + coor[1] + + x3 = coor[2] - a_rota[2] * (coor[2]-coor[0]) + y3 = coor[3] + + x4 = coor[0] + y4 = coor[3] - a_rota[3] * (coor[3]-coor[1]) + + #coor_rota = np.array(bbox[4:8], dtype=np.float64) + score = bbox[8] + class_ind = int(bbox[9]) + class_name = self.classes[class_ind] + score = '%.4f' % score + xmin, ymin, xmax, ymax = map(str, coor) + #x1, y1, x2, y2, x3, y3, x4, y4 = map(str, coor_rota) + #a1, a2, a3, a4 = map(str, a_rota) + #print(a_rota) + img_ind_out = img_ind + ".tif" + s = ' '.join([img_ind, score, str(int(x1)), str(int(y1)), str(int(x2)), str(int(y2)), + str(int(x3)), str(int(y3)), str(int(x4)), str(int(y4))]) + '\n' + s1 = ' '.join([img_ind_out, class_name, score, str(int(x1)), str(int(y1)), str(int(x2)), str(int(y2)), + str(int(x3)), str(int(y3)), str(int(x4)), str(int(y4))]) + '\n' + + with open(os.path.join(self.pred_result_path, 'voc', 'comp4_det_test_' + class_name + '.txt'), 'a') as f: + f.write(s) # 写用于voc测试的文件,pred路径 + with open(os.path.join(self.pred_result_path, 'voc', '科目四_九霄瞰.txt'), 'a') as f1: + f1.write(s1) # 写用于voc测试的文件,pred路径 + color = np.zeros(3) + points = np.array( + [[int(x1), int(y1)], [int(x2), int(y2)], [int(x3), int(y3)], [int(x4), int(y4)]]) + if int(class_name) == 1: + # 25 black + color = (0, 0, 0) + elif int(class_name) == 2: + # 1359 blue + color = (255, 0, 0) + elif int(class_name) == 3: + # 639 Yellow + color = (0, 255, 255) + elif int(class_name) == 4: + # 4371 red + color = (0, 0, 255) + elif int(class_name) == 5: + # 3025 green + color = (0, 255, 0) + cv2.polylines(img, [points], 1, color, 2) + #print(points) + #cv2.rectangle(img, (int(xmin), int(ymin)), (int(xmax), int(ymax)), color, 2) + # c1 左上角 c2 右下角 + + store_path = os.path.join(cfg.PROJECT_PATH, 'dataR/results/', img_ind + '.tif') + #print(store_path) + cv2.imwrite(store_path, img, [int(cv2.IMWRITE_JPEG_QUALITY), 100]) + + + + self.inference_time = 1.0 * self.inference_time / len(img_inds) + return self.__calc_APs(iou_thresh=self.iouthresh_test), self.inference_time + + def get_bbox(self, img, multi_test=False, flip_test=False): + if multi_test: + test_input_sizes = range(cfg.TEST["MULTI_TEST_RANGE"][0], cfg.TEST["MULTI_TEST_RANGE"][1], cfg.TEST["MULTI_TEST_RANGE"][2]) + bboxes_list = [] + for test_input_size in test_input_sizes: + valid_scale =(0, np.inf) + bboxes_list.append(self.__predict(img, test_input_size, valid_scale)) + if flip_test: + bboxes_flip = self.__predict(img[:, ::-1], test_input_size, valid_scale) + bboxes_flip[:, [0, 2]] = img.shape[1] - bboxes_flip[:, [2, 0]] + bboxes_list.append(bboxes_flip) + bboxes = np.row_stack(bboxes_list) + else: + bboxes = self.__predict(img, self.val_shape, (0, np.inf)) + + ############glid_普通iou + #print(bboxes.shape) + #bboxes = nms(bboxes, self.conf_thresh, self.nms_thresh) + #print(bboxes.shape) + #bboxes = nms(bboxes, self.conf_thresh, self.nms_thresh) + bboxes = nms_glid(bboxes, self.conf_thresh, self.nms_thresh)################################# + + return bboxes + + def __predict(self, img, test_shape, valid_scale): + org_img = np.copy(img) + org_h, org_w, _ = org_img.shape + + img = self.__get_img_tensor(img, test_shape).to(self.device) + self.model.eval() + with torch.no_grad(): + start_time = current_milli_time() + if self.showheatmap: _, p_d, beta = self.model(img) + else: _, p_d = self.model(img) + self.inference_time += (current_milli_time() - start_time) + pred_bbox = p_d.squeeze().cpu().numpy() + + bboxes = self.__convert_pred(pred_bbox, test_shape, (org_h, org_w), valid_scale) + + if self.showheatmap and len(img): + self.__show_heatmap(beta[2], org_img) + return bboxes + + def __show_heatmap(self, beta, img): + Show_Heatmap(beta, img) + + def __get_img_tensor(self, img, test_shape): + img = Resize((test_shape, test_shape), correct_box=False)(img, None).transpose(2, 0, 1) + return torch.from_numpy(img[np.newaxis, ...]).float() + + + def __convert_pred(self, pred_bbox, test_input_size, org_img_shape, valid_scale): + """ + 预测框进行过滤,去除尺度不合理的框 + """ + #label: xywh(没有scale, 0 - 4), a1 - a4(4 - 8), r(8), conf(9), one_hot_smooth(10...) + pred_coor = xywh2xyxy(pred_bbox[:, :4]) #xywh2xyxy + + pred_conf = pred_bbox[:, 9] + #print(pred_bbox.shape,np.max(pred_conf)) + #print(pred_bbox[:, 10:].shape) + pred_prob = pred_bbox[:, 10:] + # (1) + # (xmin_org, xmax_org) = ((xmin, xmax) - dw) / resize_ratio + # (ymin_org, ymax_org) = ((ymin, ymax) - dh) / resize_ratio + # 需要注意的是,无论我们在训练的时候使用什么数据增强方式,都不影响此处的转换方式 + # 假设我们对输入测试图片使用了转换方式A,那么此处对bbox的转换方式就是方式A的逆向过程 + org_h, org_w = org_img_shape + resize_ratio = min(1.0 * test_input_size / org_w, 1.0 * test_input_size / org_h) + dw = (test_input_size - resize_ratio * org_w) / 2 + dh = (test_input_size - resize_ratio * org_h) / 2 + pred_coor[:, 0::2] = 1.0 * (pred_coor[:, 0::2] - dw) / resize_ratio + pred_coor[:, 1::2] = 1.0 * (pred_coor[:, 1::2] - dh) / resize_ratio +########################### + #pred_x1 = 1.0*(pred_bbox[:, 4:5] * pred_bbox[:, 2:3] + pred_coor[:, 0:1]- dw) / resize_ratio + #pred_y1 = 1.0*(pred_coor[:, 1:2]-dh) / resize_ratio + + #pred_x2 = 1.0*(pred_coor[:, 2:3]-dw) / resize_ratio + #pred_y2 = 1.0*(pred_bbox[:, 5:6] * pred_bbox[:, 3:4] + pred_coor[:, 1:2] -dh) / resize_ratio + + #pred_x3 = 1.0*(pred_coor[:, 2:3] - pred_bbox[:, 6:7] * pred_bbox[:, 2:3] - dw) / resize_ratio + #pred_y3 = 1.0*(pred_coor[:, 3:4]-dh) / resize_ratio + + #pred_x4 = 1.0*(pred_coor[:, 0:1]-dw) / resize_ratio + #pred_y4 = 1.0*(pred_coor[:, 3:4] - pred_bbox[:, 7:8] * pred_bbox[:, 3:4]- dh) / resize_ratio +################################### + #pred_rotaxy = np.concatenate([pred_x1, pred_y1, pred_x2, pred_y2, pred_x3, pred_y3, pred_x4, pred_y4], axis=-1)########### + pred_rotaxy = pred_bbox[:, 4:8] + pred_r = pred_bbox[:,8:9] + zero = np.zeros_like(pred_rotaxy) + pred_rotaxy = np.where(pred_r > 0.85, zero, pred_rotaxy) + # (2)将预测的bbox中超出原图的部分裁掉 + pred_coor = np.concatenate([np.maximum(pred_coor[:, :2], [0, 0]), + np.minimum(pred_coor[:, 2:], [org_w - 1, org_h - 1])], axis=-1) + ################################### + + #point1 = np.concatenate([np.minimum(np.maximum(pred_rotaxy[:, :2], [0, 0]), [org_w - 1, org_h - 1])], axis=-1) + #point2 = np.concatenate([np.minimum(np.maximum(pred_rotaxy[:, 2:4], [0, 0]), [org_w - 1, org_h - 1])], axis=-1) + #point3 = np.concatenate([np.minimum(np.maximum(pred_rotaxy[:, 4:6], [0, 0]), [org_w - 1, org_h - 1])], axis=-1) + #point4 = np.concatenate([np.minimum(np.maximum(pred_rotaxy[:, 6:8], [0, 0]), [org_w - 1, org_h - 1])], axis=-1) + #pred_coor_rota = np.concatenate([point1, point2, point3, point4], axis=-1) + + # (3)将无效bbox的coor置为0 + #flag0 = pred_bbox[:, 4]*0 + #flag1 = flag0 + 1 + + invalid_mask = np.logical_or((pred_coor[:, 0] > pred_coor[:, 2]), (pred_coor[:, 1] > pred_coor[:, 3])) + #invalid_mask_rota1 = np.logical_or((pred_bbox[:, 4] > flag1), (pred_bbox[:, 4] < flag0)) + #invalid_mask_rota2 = np.logical_or((pred_bbox[:, 5] > flag1), (pred_bbox[:, 5] < flag0)) + #invalid_mask_rota3 = np.logical_or((pred_bbox[:, 6] > flag1), (pred_bbox[:, 6] < flag0)) + #invalid_mask_rota4 = np.logical_or((pred_bbox[:, 7] > flag1), (pred_bbox[:, 7] < flag0)) + + #invalid_mask = np.logical_or(invalid_mask, invalid_mask_rota1) + #invalid_mask = np.logical_or(invalid_mask, invalid_mask_rota2) + #invalid_mask = np.logical_or(invalid_mask, invalid_mask_rota3) + #invalid_mask = np.logical_or(invalid_mask, invalid_mask_rota4) + + pred_coor[invalid_mask] = 0 + pred_rotaxy[invalid_mask] = 0############################ + + # (4)去掉不在有效范围内的bbox + bboxes_scale = np.sqrt(np.multiply.reduce(pred_coor[:, 2:4] - pred_coor[:, 0:2], axis=-1)) + scale_mask = np.logical_and((valid_scale[0] < bboxes_scale), (bboxes_scale < valid_scale[1])) + + # (5)将score低于score_threshold的bbox去掉 + classes = np.argmax(pred_prob, axis=-1) + scores = pred_conf * pred_prob[np.arange(len(pred_coor)), classes] + #print(pred_conf) +########################################################## + score_mask = scores > self.conf_thresh + + mask = np.logical_and(scale_mask, score_mask) + + + coors = pred_coor[mask] + coors_rota = pred_rotaxy[mask] + #coors_rota = pred_coor_rota[mask]####################### + + scores = scores[mask] + + classes = classes[mask] + + bboxes = np.concatenate([coors, coors_rota, scores[:, np.newaxis], classes[:, np.newaxis]], axis=-1)####################### + return bboxes + + + def __calc_APs(self, iou_thresh=0.5, use_07_metric=False): + """ + 计算每个类别的ap值 + :param iou_thresh: + :param use_07_metric: + :return:dict{cls:ap} + """ + filename = os.path.join(self.pred_result_path, 'voc', 'comp4_det_test_{:s}.txt') + cachedir = os.path.join(self.pred_result_path, 'voc', 'cache') + annopath = os.path.join(self.val_data_path, 'Annotations/{:s}.txt') + imagesetfile = os.path.join(self.val_data_path, 'ImageSets', cfg.TEST["EVAL_NAME"]+'.txt') + APs = {} + Recalls = {} + Precisions = {} + for i, cls in enumerate(self.classes): + R, P, AP = voc_eval.voc_eval(filename, annopath, imagesetfile, cls, cachedir, iou_thresh, use_07_metric)#调用voc_eval.py的函数进行计算 + APs[cls] = AP + Recalls[cls] = R + Precisions[cls] = P + if os.path.exists(cachedir): + shutil.rmtree(cachedir) + + return APs diff --git a/dataR/evalR/voc_eval.py b/dataR/evalR/voc_eval.py new file mode 100644 index 0000000..8ec8cc6 --- /dev/null +++ b/dataR/evalR/voc_eval.py @@ -0,0 +1,236 @@ +import xml.etree.ElementTree as ET +import os +import pickle +import numpy as np +from utils.utils_basic import * + +def parse_rec(filename): + """ Parse a PASCAL VOC xml file """ + tree = ET.parse(filename) + objects = [] + for obj in tree.findall('object'): + obj_struct = {} + obj_struct['name'] = obj.find('name').text + obj_struct['pose'] = obj.find('pose').text + obj_struct['truncated'] = int(obj.find('truncated').text) + obj_struct['difficult'] = int(obj.find('difficult').text) + bbox = obj.find('bndbox') + obj_struct['bbox'] = [int(bbox.find('xmin').text), + int(bbox.find('ymin').text), + int(bbox.find('xmax').text), + int(bbox.find('ymax').text)] + objects.append(obj_struct) + + return objects + +def parse_poly(filename): + """ + :param filename: ground truth file to parse + :return: all instances in a picture + """ + objects = [] + with open(filename, 'r') as f: + while True: + line = f.readline() + if line: + splitlines = line.strip().split(' ') + object_struct = {} + if (len(splitlines) < 9): + continue + classes = cfg.DATA["CLASSES"] + object_struct['name'] = classes[int(splitlines[0])-1] + if (len(splitlines) == 9): + object_struct['difficult'] = 0 + elif (len(splitlines) == 10): + object_struct['difficult'] = int(splitlines[9]) + object_struct['bbox'] = [float(splitlines[1]), + float(splitlines[2]), + float(splitlines[3]), + float(splitlines[4]), + float(splitlines[5]), + float(splitlines[6]), + float(splitlines[7]), + float(splitlines[8])] + objects.append(object_struct) + else: + break + return objects + +def voc_ap(rec, prec, use_07_metric=False): + """ ap = voc_ap(rec, prec, [use_07_metric]) + Compute VOC AP given precision and recall. + If use_07_metric is true, uses the + VOC 07 11 point method (default:False). + """ + if use_07_metric: + # 11 point metric + ap = 0. + for t in np.arange(0., 1.1, 0.1): + if np.sum(rec >= t) == 0: + p = 0 + else: + p = np.max(prec[rec >= t]) + ap = ap + p / 11. + else: + # correct AP calculation + # first append sentinel values at the end + mrec = np.concatenate(([0.], rec, [1.])) + mpre = np.concatenate(([0.], prec, [0.])) + + # compute the precision envelope + for i in range(mpre.size - 1, 0, -1): + mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) + + # to calculate area under PR curve, look for points + # where X axis (recall) changes value + i = np.where(mrec[1:] != mrec[:-1])[0] + + # and sum (\Delta recall) * prec + ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) + return ap + +'''''' +def voc_eval(detpath, + annopath, + imagesetfile, + classname, + cachedir, + ovthresh=0.5, + use_07_metric=False): + + # first load gt + if not os.path.isdir(cachedir): + os.mkdir(cachedir) + cachefile = os.path.join(cachedir, 'annots.pkl') + # read list of images + with open(imagesetfile, 'r') as f: + lines = f.readlines() + imagenames = [x.strip() for x in lines] + + if not os.path.isfile(cachefile): + # load annots + recs = {} + for i, imagename in enumerate(imagenames): + ####################parse_poly + recs[imagename] = parse_poly(annopath.format(imagename)) + if i % 100 == 0: + print ('Reading annotation for {:d}/{:d}'.format( + i + 1, len(imagenames))) + # save + print ('Saving cached annotations to {:s}'.format(cachefile)) + with open(cachefile, 'wb') as f: + pickle.dump(recs, f) + else: + # load + with open(cachefile, 'rb') as f: + recs = pickle.load(f) + + # extract gt objects for this class + class_recs = {} + npos = 0 + for imagename in imagenames: + R = [obj for obj in recs[imagename] if obj['name'] == classname] + bbox = np.array([x['bbox'] for x in R]) + difficult = np.array([x['difficult'] for x in R]).astype(np.bool) + det = [False] * len(R) + npos = npos + sum(~difficult) + class_recs[imagename] = {'bbox': bbox, + 'difficult': difficult, + 'det': det} + + # read dets####################### + detfile = detpath.format(classname) + with open(detfile, 'r') as f: + lines = f.readlines() + + splitlines = [x.strip().split(' ') for x in lines] + image_ids = [x[0] for x in splitlines] + confidence = np.array([float(x[1]) for x in splitlines]) + BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) + + # sort by confidence + sorted_ind = np.argsort(-confidence) + sorted_scores = np.sort(-confidence) + BB = BB[sorted_ind, :] + image_ids = [image_ids[x] for x in sorted_ind] + + # go down dets and mark TPs and FPs + nd = len(image_ids) + tp = np.zeros(nd) + fp = np.zeros(nd) + for d in range(nd): + R = class_recs[image_ids[d]] + bb = BB[d, :].astype(float) + ovmax = -np.inf + BBGT = R['bbox'].astype(float) + + if BBGT.size > 0: + # compute overlaps + # intersection + BBGT_xmin = np.min(BBGT[:, 0::2], axis=1) + BBGT_ymin = np.min(BBGT[:, 1::2], axis=1) + BBGT_xmax = np.max(BBGT[:, 0::2], axis=1) + BBGT_ymax = np.max(BBGT[:, 1::2], axis=1) + bb_xmin = np.min(bb[0::2]) + bb_ymin = np.min(bb[1::2]) + bb_xmax = np.max(bb[0::2]) + bb_ymax = np.max(bb[1::2]) + + ixmin = np.maximum(BBGT_xmin, bb_xmin) + iymin = np.maximum(BBGT_ymin, bb_ymin) + ixmax = np.minimum(BBGT_xmax, bb_xmax) + iymax = np.minimum(BBGT_ymax, bb_ymax) + iw = np.maximum(ixmax - ixmin + 1., 0.) + ih = np.maximum(iymax - iymin + 1., 0.) + inters = iw * ih + + # union + uni = ((bb_xmax - bb_xmin + 1.) * (bb_ymax - bb_ymin + 1.) + + (BBGT_xmax - BBGT_xmin + 1.) * + (BBGT_ymax - BBGT_ymin + 1.) - inters) + + overlaps = inters / uni + + ############################### + + BBGT_keep_mask = overlaps > 0 + BBGT_keep = BBGT[BBGT_keep_mask, :] + BBGT_keep_index = np.where(overlaps > 0)[0] + + # pdb.set_trace() + def calcoverlaps(BBGT_keep, bb): + overlaps = [] + for index, GT in enumerate(BBGT_keep): + overlap = polygen_iou_xy4_numpy_eval(BBGT_keep[index],bb) + #overlap = polyiou.iou_poly(polyiou.VectorDouble(BBGT_keep[index]), polyiou.VectorDouble(bb)) + overlaps.append(overlap) + return overlaps + + if len(BBGT_keep) > 0: + overlaps = calcoverlaps(BBGT_keep, bb) + + ovmax = np.max(overlaps) + jmax = np.argmax(overlaps) + # pdb.set_trace() + jmax = BBGT_keep_index[jmax] + + if ovmax > ovthresh: + if not R['difficult'][jmax]: + if not R['det'][jmax]: + tp[d] = 1. + R['det'][jmax] = 1 + else: + fp[d] = 1. + else: + fp[d] = 1. + + # compute precision recall + fp = np.cumsum(fp) + tp = np.cumsum(tp) + rec = tp / float(npos) + # avoid divide by zero in case the first detection matches a difficult + # ground truth + prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) + ap = voc_ap(rec, prec, use_07_metric) + + return rec, prec, ap diff --git a/dataR/example.txt b/dataR/example.txt new file mode 100644 index 0000000..c5c906e --- /dev/null +++ b/dataR/example.txt @@ -0,0 +1,136 @@ +/mnt/Ship2020/JPEGImages/1.tif 577,410,654,459,4,646,410,654,428,585,459,577,441,0.39491121 558,394,635,441,4,627,394,635,414,566,441,558,421,0.4410058 271,221,302,249,5,279,221,302,240,295,249,271,229,0.39861751 304,354,329,393,5,316,354,329,388,317,393,304,359,0.48512821 373,392,395,428,5,381,392,395,396,387,428,373,424,0.60606061 785,47,811,87,5,797,47,811,83,800,87,785,52,0.45528846 697,19,724,51,5,706,19,724,44,714,51,697,26,0.41666667 740,73,769,114,5,748,73,769,109,761,114,740,77,0.32506308 513,448,538,468,4,533,448,538,457,518,468,513,459,0.47 535,437,567,457,4,564,437,567,445,539,457,535,450,0.40234375 256,203,276,216,4,258,203,276,205,274,216,256,214,0.77692308 121,47,146,64,4,123,47,146,56,143,64,121,56,0.5 +/mnt/Ship2020/JPEGImages/10.tif 831,449,879,538,5,854,449,879,531,857,538,831,455,0.47331461 853,435,906,519,5,884,435,906,510,874,519,853,445,0.5730009 826,513,858,553,4,842,513,858,521,842,553,826,545,0.5 889,466,921,505,4,909,466,921,498,901,505,889,473,0.58012821 +/mnt/Ship2020/JPEGImages/100.tif 295,315,424,452,4,402,315,424,334,318,452,295,432,0.26710236 536,729,610,808,4,593,729,610,742,553,808,536,795,0.31867944 669,582,745,660,4,727,582,745,598,686,660,669,645,0.33746626 43,699,90,788,4,70,699,90,706,64,788,43,782,0.44549366 +/mnt/Ship2020/JPEGImages/1000.tif 975,816,1015,928,4,1001,816,1015,924,989,928,975,820,0.63928571 880,862,905,927,4,880,862,905,862,905,927,880,927,1.0 888,970,916,1023,4,915,970,916,1023,889,1023,888,970,0.96428571 +/mnt/Ship2020/JPEGImages/1001.tif 976,304,1015,416,4,1001,304,1015,412,990,416,976,308,0.63095238 880,350,905,415,4,880,350,905,350,905,415,880,415,1.0 887,458,918,522,4,916,458,918,522,890,522,887,458,0.91935484 898,545,928,614,4,923,545,928,612,903,614,898,547,0.81400966 875,543,912,616,5,886,543,912,546,902,616,875,612,0.69548315 922,751,953,820,4,949,751,953,818,927,820,922,752,0.83941094 147,942,185,1026,4,174,942,185,1022,158,1026,147,945,0.69298246 +/mnt/Ship2020/JPEGImages/1002.tif 922,0,953,53,4,949,0,953,50,926,53,922,2,0.83597079 147,175,188,272,4,175,175,188,268,160,272,147,178,0.66972592 +/mnt/Ship2020/JPEGImages/1003.tif 463,816,503,927,4,489,816,503,924,477,927,463,819,0.64189189 368,862,393,927,4,368,862,393,862,393,927,368,927,1.0 376,969,404,1023,4,403,969,404,1022,377,1023,376,970,0.94708995 +/mnt/Ship2020/JPEGImages/1004.tif 463,304,503,416,4,489,304,503,412,477,416,463,308,0.63928571 368,350,393,415,4,368,350,393,350,393,415,368,415,1.0 375,458,406,522,4,404,458,406,522,378,522,375,458,0.91935484 386,545,417,614,4,411,545,417,612,392,614,386,547,0.7886863 363,543,400,616,5,374,543,400,546,390,616,363,612,0.69548315 410,751,441,820,4,437,751,441,818,415,820,410,752,0.83941094 +/mnt/Ship2020/JPEGImages/1005.tif 386,33,417,102,4,411,33,417,100,392,102,386,35,0.7886863 363,31,400,104,5,374,31,400,34,390,104,363,100,0.69548315 410,239,441,308,4,437,239,441,306,415,308,410,240,0.83941094 +/mnt/Ship2020/JPEGImages/1006.tif 410,0,441,53,4,437,0,441,50,414,53,410,2,0.83597079 +/mnt/Ship2020/JPEGImages/1007.tif 886,692,951,931,4,920,692,951,926,916,931,886,696,0.52961056 +/mnt/Ship2020/JPEGImages/1008.tif 939,226,1007,352,5,991,226,1007,345,956,352,939,232,0.73080065 +/mnt/Ship2020/JPEGImages/1009.tif 374,692,439,931,4,408,692,439,927,404,931,374,697,0.52961056 +/mnt/Ship2020/JPEGImages/101.tif 537,217,610,296,4,594,217,610,230,553,296,537,283,0.31160049 669,71,745,149,4,728,71,745,86,687,149,669,133,0.33746626 44,187,91,276,4,70,187,91,194,64,276,44,270,0.44549366 +/mnt/Ship2020/JPEGImages/1010.tif 408,337,502,459,5,445,337,502,438,465,459,408,358,0.43024067 74,964,239,1017,4,236,964,239,1005,76,1017,74,977,0.75614637 +/mnt/Ship2020/JPEGImages/1011.tif 782,671,803,710,4,794,671,803,707,791,710,782,674,0.56043956 74,452,239,505,4,236,452,239,493,76,505,74,465,0.75614637 +/mnt/Ship2020/JPEGImages/1012.tif 67,645,127,869,4,91,645,127,648,103,869,67,865,0.596875 141,712,172,839,4,144,712,172,713,168,839,141,839,0.88404877 +/mnt/Ship2020/JPEGImages/1013.tif 67,133,127,357,4,91,133,127,136,104,357,67,353,0.60494792 141,200,172,327,4,144,200,172,201,168,327,141,327,0.88404877 +/mnt/Ship2020/JPEGImages/1014.tif 86,335,217,814,3,177,335,217,806,127,814,86,342,0.6848635 +/mnt/Ship2020/JPEGImages/1015.tif 101,0,217,308,3,191,0,217,300,127,308,101,7,0.76242723 539,545,663,1027,3,632,545,663,1021,570,1027,539,551,0.74377593 +/mnt/Ship2020/JPEGImages/1016.tif 621,93,859,357,2,656,93,859,326,823,357,621,124,0.23155399 302,0,391,198,2,343,0,391,187,350,198,302,11,0.4650437 193,0,287,206,2,235,0,287,194,244,206,193,11,0.45747263 876,391,1052,587,2,906,391,1052,563,1023,587,876,416,0.25071023 354,212,412,302,5,388,212,412,292,379,302,354,221,0.5612069 821,342,902,426,5,849,342,902,400,874,426,821,368,0.44121105 +/mnt/Ship2020/JPEGImages/1017.tif 68,420,238,612,2,99,420,238,586,208,612,68,446,0.26623775 255,753,319,812,5,302,753,319,771,271,812,255,793,0.4096928 205,796,267,857,5,250,796,267,815,222,857,205,838,0.41485986 94,506,185,608,5,114,506,185,593,165,608,94,522,0.30494505 79,530,156,613,5,97,530,156,596,138,613,79,547,0.34282585 +/mnt/Ship2020/JPEGImages/1018.tif 185,1,297,129,2,196,1,297,119,286,129,185,10,0.15785435 511,307,585,392,5,534,307,585,373,562,392,511,326,0.39538951 549,313,591,356,5,567,313,591,340,572,356,549,330,0.48615725 490,325,565,410,5,508,325,565,395,546,410,490,341,0.33905882 471,342,565,448,5,492,342,565,431,543,448,471,360,0.31829587 642,293,714,364,5,696,293,714,312,659,364,642,346,0.37695618 +/mnt/Ship2020/JPEGImages/1019.tif 970,69,1029,141,4,987,69,1029,131,1013,141,970,79,0.34086629 +/mnt/Ship2020/JPEGImages/102.tif 494,914,575,1036,4,519,914,575,1023,549,1036,494,928,0.35579842 +/mnt/Ship2020/JPEGImages/1020.tif 543,385,638,514,4,618,385,638,397,564,514,543,501,0.27086903 457,528,541,634,4,514,528,541,544,485,634,457,617,0.38112084 297,797,370,891,4,352,797,370,809,315,891,297,879,0.31127951 761,588,915,794,4,883,588,915,610,793,794,761,772,0.27020552 843,658,895,724,5,881,658,895,667,856,724,843,716,0.32153263 849,668,912,747,5,895,668,912,679,867,747,849,735,0.34247539 893,677,920,714,5,912,677,920,682,901,714,893,709,0.35135135 27,500,99,562,5,41,500,99,544,85,562,27,518,0.3718638 459,89,543,148,4,470,89,543,116,531,148,459,122,0.53692494 52,493,117,538,4,62,493,117,515,108,538,52,517,0.5157265 +/mnt/Ship2020/JPEGImages/1021.tif 437,633,523,793,4,494,633,523,645,465,793,437,782,0.35563227 +/mnt/Ship2020/JPEGImages/1022.tif 88,552,741,1150,1,201,552,741,1022,628,1150,88,680,0.31301377 500,417,537,497,5,517,417,537,422,520,497,500,492,0.53547297 507,156,577,302,5,530,156,577,163,553,302,507,294,0.64740705 +/mnt/Ship2020/JPEGImages/1023.tif 0,882,66,960,5,17,882,66,946,48,960,0,896,0.3494561 0,844,66,921,5,17,844,66,906,48,921,0,859,0.35665092 13,823,88,904,5,35,823,88,886,67,904,13,840,0.37884774 150,808,216,874,5,201,808,216,823,166,874,150,858,0.35938935 49,822,96,870,5,68,822,96,852,76,870,49,841,0.48049645 252,800,279,825,4,271,800,279,809,260,825,252,816,0.44296296 +/mnt/Ship2020/JPEGImages/1024.tif 549,19,612,95,4,562,19,612,85,598,95,549,29,0.28947368 530,22,595,100,4,547,22,595,89,579,100,530,34,0.32642998 490,50,550,131,4,507,50,550,121,534,131,490,59,0.32777778 458,69,517,142,4,475,69,517,132,500,142,458,79,0.34618064 +/mnt/Ship2020/JPEGImages/1025.tif 249,76,403,282,4,371,76,403,98,281,282,249,260,0.27020552 331,146,383,212,5,370,146,383,155,345,212,331,204,0.32153263 337,156,400,235,5,383,156,400,167,355,235,337,223,0.34247539 381,165,408,202,5,400,165,408,170,389,202,381,197,0.35135135 +/mnt/Ship2020/JPEGImages/1026.tif 540,914,680,1055,4,647,914,680,947,572,1055,540,1023,0.3556231 +/mnt/Ship2020/JPEGImages/1027.tif 478,402,680,607,4,647,402,680,435,511,607,478,575,0.27010384 +/mnt/Ship2020/JPEGImages/1028.tif 116,520,254,664,4,225,520,254,547,145,664,116,637,0.31884058 209,571,304,702,2,276,571,304,587,237,702,209,686,0.34487746 197,934,216,966,5,204,934,216,936,210,966,197,963,0.63322368 185,932,204,963,5,190,932,204,933,200,963,185,961,0.737691 193,967,207,989,5,198,967,207,969,202,989,193,987,0.61688312 182,964,198,987,5,187,964,198,967,193,987,182,984,0.63858696 168,960,189,986,5,178,960,189,965,180,986,168,981,0.52930403 152,732,185,815,5,160,732,185,734,178,815,152,812,0.75629792 185,918,223,935,4,186,918,223,924,221,935,185,930,0.6625387 163,989,180,1012,5,169,989,180,993,174,1012,163,1009,0.60230179 174,987,190,1013,5,179,987,190,990,185,1013,174,1010,0.64423077 186,993,200,1020,5,190,993,200,995,196,1020,186,1018,0.68253968 224,944,234,961,5,226,944,234,945,231,961,224,961,0.73529412 +/mnt/Ship2020/JPEGImages/1029.tif 52,19,115,95,4,65,19,115,85,102,95,52,29,0.28362573 33,23,98,100,4,50,23,98,89,82,100,33,34,0.32417582 0,58,53,131,4,16,58,53,121,37,131,0,67,0.3534505 558,603,635,718,4,585,603,635,704,608,718,558,617,0.38701299 +/mnt/Ship2020/JPEGImages/103.tif 25,217,98,296,4,82,217,98,230,41,296,25,283,0.31160049 157,70,233,149,4,216,70,233,86,175,149,157,133,0.33952365 +/mnt/Ship2020/JPEGImages/1030.tif 816,670,855,695,5,851,670,855,679,819,695,816,687,0.36871795 915,620,960,653,5,953,620,960,631,922,653,915,642,0.38518519 974,597,1011,621,5,1008,597,1011,604,977,621,974,614,0.32545045 +/mnt/Ship2020/JPEGImages/1031.tif 716,13,771,98,5,734,13,771,89,752,98,716,23,0.37294118 732,3,791,89,5,752,3,791,78,771,89,732,14,0.38017343 803,125,859,209,5,821,125,859,201,841,209,803,133,0.35544218 818,114,879,200,5,839,114,879,190,859,200,818,125,0.37609607 901,289,1027,388,5,921,289,1027,358,1007,388,901,319,0.36556037 +/mnt/Ship2020/JPEGImages/1032.tif 722,606,971,1008,3,910,606,971,637,783,1008,722,977,0.28431137 +/mnt/Ship2020/JPEGImages/1033.tif 27,914,168,1055,4,135,914,168,947,60,1055,27,1023,0.35664705 293,734,364,806,5,352,734,364,746,305,806,293,794,0.27934272 +/mnt/Ship2020/JPEGImages/1034.tif 0,402,199,606,4,166,402,199,434,32,606,0,574,0.26894275 +/mnt/Ship2020/JPEGImages/1035.tif 629,465,701,514,4,692,465,701,483,638,514,629,496,0.4005102 304,670,343,695,5,339,670,343,679,307,695,304,687,0.36871795 403,620,448,653,5,441,620,448,631,410,653,403,642,0.38518519 462,598,499,621,5,496,598,499,604,465,621,462,615,0.29964747 524,564,565,591,5,560,564,565,575,529,591,524,580,0.42999097 603,582,643,602,5,641,582,643,594,605,602,603,590,0.59 605,593,649,613,5,647,593,649,603,608,613,605,602,0.52215909 612,614,647,632,5,644,614,647,625,615,632,612,621,0.59206349 625,647,667,669,5,664,647,667,657,629,669,625,658,0.48106061 635,668,670,689,5,667,668,670,677,638,689,635,680,0.44081633 640,674,678,696,5,674,674,678,682,643,696,640,687,0.40729665 664,719,692,735,4,691,719,692,731,666,735,664,724,0.6953125 +/mnt/Ship2020/JPEGImages/1036.tif 204,525,259,610,5,222,525,259,601,240,610,204,535,0.37294118 220,516,279,601,5,240,516,279,591,259,601,220,526,0.37686939 291,637,347,721,5,309,637,347,713,329,721,291,645,0.35544218 306,627,367,713,5,327,627,367,702,347,713,306,637,0.37609607 710,891,769,944,5,721,891,769,932,758,944,710,903,0.3284298 834,630,882,710,5,857,630,882,703,859,710,834,637,0.4828125 817,639,858,712,5,834,639,858,705,842,712,817,645,0.4198129 304,158,343,183,5,339,158,343,167,307,183,304,175,0.36871795 403,108,448,140,5,441,108,448,118,410,140,403,130,0.37083333 462,86,499,109,5,496,86,499,92,465,109,462,103,0.29964747 524,52,565,79,5,560,52,565,63,529,79,524,68,0.42999097 603,70,643,90,5,641,70,643,82,605,90,603,78,0.59 605,80,649,100,5,646,80,649,91,607,100,605,90,0.52215909 612,102,647,119,5,644,102,647,112,615,119,612,109,0.57310924 625,135,667,157,5,664,135,667,145,629,157,625,146,0.48106061 635,156,670,177,5,667,156,670,165,638,177,635,168,0.44081633 640,162,678,184,5,674,162,678,170,643,184,640,175,0.40729665 808,798,844,864,5,829,798,844,803,823,864,808,859,0.42929293 799,813,825,850,5,813,813,825,819,810,850,799,845,0.45945946 782,559,837,646,5,809,559,837,635,810,646,782,570,0.49320794 813,568,852,629,5,832,568,852,623,833,629,813,574,0.48970156 799,647,842,714,5,822,647,842,706,820,714,799,654,0.51804929 +/mnt/Ship2020/JPEGImages/1037.tif 875,693,1083,1049,3,1022,693,1083,719,936,1049,875,1023,0.32346586 +/mnt/Ship2020/JPEGImages/1038.tif 849,225,1060,605,3,999,225,1060,251,910,605,849,579,0.31795959 911,563,987,634,4,926,563,987,615,971,634,911,581,0.35822832 +/mnt/Ship2020/JPEGImages/1039.tif 110,858,200,917,4,191,858,200,877,119,917,110,898,0.35762712 +/mnt/Ship2020/JPEGImages/104.tif 0,403,95,553,4,24,403,95,540,70,553,0,416,0.29985965 +/mnt/Ship2020/JPEGImages/1040.tif 110,346,200,405,4,191,346,200,365,119,405,110,386,0.35762712 +/mnt/Ship2020/JPEGImages/1041.tif 118,465,189,514,4,181,465,189,483,126,514,118,496,0.39724059 12,564,52,591,5,48,564,52,575,16,591,12,580,0.42592593 91,582,131,602,5,129,582,131,594,93,602,91,590,0.59 93,593,137,613,5,135,593,137,603,96,613,93,602,0.52215909 100,614,135,632,5,132,614,135,625,103,632,100,621,0.59206349 113,647,155,669,5,152,647,155,657,117,669,113,658,0.48106061 123,668,158,689,5,155,668,158,677,126,689,123,680,0.44081633 128,674,166,696,5,163,674,166,682,132,696,128,687,0.40729665 156,716,186,735,4,184,716,186,730,159,735,156,722,0.6754386 +/mnt/Ship2020/JPEGImages/1042.tif 198,891,256,944,5,208,891,256,932,246,944,198,903,0.32075472 322,630,370,710,5,345,630,370,703,347,710,322,637,0.4828125 306,639,347,712,5,322,639,347,706,330,712,306,645,0.41847645 12,52,53,79,5,48,52,53,63,17,79,12,68,0.42999097 91,70,131,90,5,129,70,131,82,93,90,91,78,0.59 93,81,137,101,5,135,81,137,91,96,101,93,90,0.52215909 100,102,135,120,5,132,102,135,113,103,120,100,109,0.59206349 113,135,155,157,5,152,135,155,145,117,157,113,146,0.48106061 123,156,158,177,5,155,156,158,165,126,177,123,168,0.44081633 128,161,166,183,5,162,161,166,170,132,183,128,175,0.41028708 272,559,328,646,5,300,559,328,636,300,646,272,569,0.5 301,567,344,633,5,322,567,344,625,324,633,301,574,0.4820296 289,792,339,864,5,318,792,339,801,311,864,289,854,0.44847222 +/mnt/Ship2020/JPEGImages/1043.tif 315,147,457,262,4,330,147,457,241,441,262,315,169,0.255297 169,959,253,1024,5,185,959,253,989,238,1024,169,995,0.52912088 +/mnt/Ship2020/JPEGImages/1044.tif 502,334,553,378,4,513,334,553,363,542,378,502,349,0.40953654 364,737,545,1048,3,484,737,545,762,425,1048,364,1023,0.3632197 523,833,599,873,4,530,833,599,854,592,873,523,852,0.47960526 +/mnt/Ship2020/JPEGImages/1045.tif 336,225,545,605,3,484,225,545,250,397,605,336,579,0.3197998 399,563,474,634,4,414,563,474,616,459,634,399,582,0.35633803 523,321,599,361,4,530,321,599,342,592,361,523,340,0.47960526 +/mnt/Ship2020/JPEGImages/1046.tif 399,51,475,122,4,414,51,475,104,459,122,399,70,0.35822832 480,414,607,617,4,570,414,607,432,517,617,480,600,0.32731469 +/mnt/Ship2020/JPEGImages/1047.tif 370,830,562,1013,4,535,830,562,858,398,1013,370,984,0.25435451 +/mnt/Ship2020/JPEGImages/1048.tif 712,309,1050,501,3,1023,309,1050,379,740,501,712,431,0.38661859 +/mnt/Ship2020/JPEGImages/1049.tif 499,243,616,374,4,520,243,616,356,594,374,499,262,0.27308019 862,132,938,207,4,878,132,938,190,922,207,862,149,0.34175439 831,285,901,353,4,844,285,901,338,887,353,831,301,0.33287815 904,936,1004,1051,5,957,936,1004,965,952,1051,904,1023,0.4873913 456,752,726,883,2,715,752,726,779,467,883,456,856,0.23005372 461,931,686,1052,4,673,931,686,961,474,1052,461,1022,0.27706152 +/mnt/Ship2020/JPEGImages/105.tif 658,632,1029,740,3,665,632,1029,660,1023,740,658,712,0.73230508 741,823,783,873,4,757,823,783,862,766,873,741,835,0.44214286 +/mnt/Ship2020/JPEGImages/1050.tif 844,473,1060,686,3,1022,473,1060,510,882,686,844,649,0.28851504 563,732,802,970,3,766,732,802,769,599,970,563,933,0.259256 +/mnt/Ship2020/JPEGImages/1051.tif 845,0,1029,187,3,991,0,1029,36,883,187,845,150,0.32108812 563,233,802,471,3,766,233,802,270,599,471,563,434,0.259256 +/mnt/Ship2020/JPEGImages/1052.tif 436,201,570,305,4,555,201,570,224,450,305,436,283,0.27773393 204,388,290,457,4,277,388,290,408,216,457,204,438,0.34580384 128,444,204,507,4,192,444,204,462,139,507,128,490,0.34502924 190,371,275,445,4,261,371,275,388,204,445,190,428,0.31875994 +/mnt/Ship2020/JPEGImages/1053.tif 567,468,664,639,4,617,468,664,482,614,639,567,625,0.48706819 526,897,583,948,4,537,897,583,935,571,948,526,911,0.35964912 544,734,596,765,5,549,734,596,752,591,765,544,747,0.43486352 541,768,585,791,5,545,768,585,779,581,791,541,780,0.51778656 529,795,574,822,5,534,795,574,807,570,822,529,809,0.52962963 +/mnt/Ship2020/JPEGImages/1054.tif 706,13,789,85,4,722,13,789,64,772,85,706,35,0.37868139 904,35,991,136,2,974,35,991,50,921,136,904,121,0.28587686 +/mnt/Ship2020/JPEGImages/1055.tif 116,799,194,945,4,153,799,194,934,158,945,116,809,0.47255883 106,851,152,946,5,129,851,152,940,130,946,106,856,0.49038902 +/mnt/Ship2020/JPEGImages/1056.tif 116,287,194,433,4,152,287,194,422,158,433,116,297,0.4670706 108,334,151,435,5,131,334,151,430,128,435,108,339,0.53142989 +/mnt/Ship2020/JPEGImages/1057.tif 551,171,688,269,5,570,171,688,226,669,269,551,214,0.45575749 870,754,906,798,4,895,754,906,762,881,798,870,790,0.37626263 +/mnt/Ship2020/JPEGImages/1058.tif 735,394,1110,1055,3,1022,394,1110,434,823,1055,735,1014,0.26718104 869,242,906,286,4,894,242,906,250,881,286,869,278,0.38820639 +/mnt/Ship2020/JPEGImages/1059.tif 0,812,265,1068,3,43,812,265,1022,221,1068,0,857,0.28353479 318,82,356,122,4,345,82,356,93,328,122,318,112,0.39375 337,104,363,134,4,356,104,363,110,343,134,337,129,0.34166667 344,113,371,140,4,365,113,371,119,350,140,344,134,0.34567901 368,130,401,164,4,393,130,401,136,375,164,368,157,0.3315508 385,149,413,179,4,405,149,413,156,392,179,385,171,0.38392857 375,145,403,173,4,397,145,403,151,382,173,375,166,0.3565051 422,203,453,235,4,430,203,453,225,445,235,422,213,0.40927419 511,3,535,41,4,524,3,535,7,523,41,511,38,0.48300439 523,9,547,45,4,539,9,547,13,532,45,523,40,0.390625 532,12,554,48,4,547,12,554,15,539,48,532,45,0.34848485 544,16,569,52,4,561,16,569,19,552,52,544,49,0.35 551,19,576,57,4,567,19,576,24,560,57,551,53,0.39315789 565,29,587,63,4,578,29,587,33,574,63,565,59,0.43048128 575,26,597,64,4,588,26,597,30,584,64,575,60,0.42822967 635,73,665,85,4,664,73,665,84,636,85,635,74,0.88888889 627,96,668,107,4,667,96,668,105,627,107,627,97,0.85476718 631,87,668,99,4,667,87,668,96,631,99,631,91,0.7027027 674,153,711,168,4,711,153,711,165,675,168,674,155,0.82432432 670,172,713,194,4,711,172,713,190,671,194,670,176,0.79598309 686,200,713,210,4,686,200,713,200,713,210,686,210,1.0 699,223,723,232,4,699,223,723,223,723,232,699,232,1.0 +/mnt/Ship2020/JPEGImages/106.tif 658,120,1029,228,3,665,120,1029,148,1023,228,658,200,0.73230508 738,309,786,362,4,755,309,786,349,768,362,738,323,0.43356918 +/mnt/Ship2020/JPEGImages/1060.tif 0,247,119,375,4,19,247,119,356,99,375,0,266,0.26365546 366,132,442,207,4,382,132,442,190,426,207,366,149,0.34175439 335,285,405,353,4,348,285,405,338,391,353,335,301,0.33287815 733,890,799,976,4,775,890,799,904,757,976,733,962,0.40803383 409,936,509,1051,5,461,936,509,965,456,1051,409,1023,0.4873913 5,752,230,865,2,219,752,230,779,17,865,5,837,0.26959685 10,930,188,1023,4,177,930,188,962,21,1023,10,991,0.36335629 731,159,767,203,4,753,159,767,168,745,203,731,194,0.43434343 +/mnt/Ship2020/JPEGImages/1061.tif 332,0,548,210,3,510,0,548,37,370,210,332,172,0.29166667 51,256,290,494,3,254,256,290,293,87,494,51,457,0.259256 +/mnt/Ship2020/JPEGImages/1062.tif 55,0,142,140,4,94,0,142,14,102,140,55,126,0.53678161 14,398,71,449,4,26,398,71,436,60,449,14,412,0.35964912 194,538,277,610,4,210,538,277,589,260,610,194,560,0.37868139 392,560,480,661,2,462,560,480,575,410,661,392,646,0.29230423 32,235,84,266,5,37,235,84,253,79,266,32,248,0.43486352 29,269,73,292,5,33,269,73,280,69,292,29,281,0.51778656 17,296,62,323,5,22,296,62,308,58,323,17,310,0.52962963 +/mnt/Ship2020/JPEGImages/1063.tif 567,676,628,757,5,593,676,628,689,602,757,567,744,0.55009107 605,702,642,758,5,622,702,642,709,625,758,605,751,0.53040541 621,725,646,761,5,637,725,646,730,630,761,621,756,0.39888889 571,667,597,706,5,587,667,597,672,582,706,571,701,0.42850099 557,710,580,746,5,572,710,580,714,564,746,557,743,0.35990338 835,759,857,793,5,843,759,857,788,848,793,835,763,0.41644385 505,837,539,857,5,509,837,539,847,535,857,505,847,0.5 +/mnt/Ship2020/JPEGImages/1064.tif 319,563,702,768,3,678,563,702,623,342,768,319,709,0.31598421 +/mnt/Ship2020/JPEGImages/1065.tif 889,880,993,956,4,979,880,993,903,903,956,889,933,0.35576923 +/mnt/Ship2020/JPEGImages/1066.tif 889,368,993,443,4,979,368,993,390,903,443,889,421,0.34897436 +/mnt/Ship2020/JPEGImages/1067.tif 728,760,932,1057,5,860,760,932,795,800,1057,728,1022,0.38760151 39,684,177,781,5,59,684,177,739,157,781,39,726,0.45241297 +/mnt/Ship2020/JPEGImages/1068.tif 727,247,932,545,5,861,247,932,284,797,545,727,509,0.3821411 39,172,177,269,5,59,172,177,227,157,269,39,214,0.45241297 358,754,394,798,4,383,754,394,762,369,798,358,790,0.37626263 +/mnt/Ship2020/JPEGImages/1069.tif 185,380,618,1074,3,518,380,618,432,285,1074,185,1023,0.2708784 358,243,394,286,4,383,243,394,250,369,286,358,279,0.36886305 +/mnt/Ship2020/JPEGImages/107.tif 146,120,642,237,3,153,120,642,159,636,237,146,199,0.66645988 223,310,270,361,4,244,310,270,347,249,361,223,324,0.47601168 +/mnt/Ship2020/JPEGImages/1070.tif 2,332,121,494,5,81,332,121,356,43,494,2,470,0.38764395 271,0,424,228,4,386,0,424,20,308,228,271,208,0.28981768 251,543,401,737,4,349,543,401,574,303,737,251,706,0.3956701 81,553,165,646,4,122,553,165,579,124,646,81,620,0.50524834 19,509,131,652,4,85,509,131,534,64,652,19,626,0.43968531 580,433,614,479,4,604,433,614,440,590,479,580,472,0.35677749 138,285,164,320,5,154,285,164,290,148,320,138,315,0.41758242 +/mnt/Ship2020/JPEGImages/1071.tif 96,683,347,925,3,312,683,347,722,132,925,96,887,0.25552336 +/mnt/Ship2020/JPEGImages/1072.tif 578,257,828,507,3,793,257,828,292,613,507,578,472,0.2408 +/mnt/Ship2020/JPEGImages/1073.tif 842,616,955,678,4,850,616,955,656,946,678,842,639,0.3835284 +/mnt/Ship2020/JPEGImages/1074.tif 844,104,957,166,4,852,104,957,144,948,166,844,127,0.3835284 57,676,118,757,5,83,676,118,689,92,757,57,744,0.55009107 95,702,132,758,5,112,702,132,709,115,758,95,751,0.53040541 111,725,136,761,5,127,725,136,730,120,761,111,756,0.39888889 61,667,87,706,5,77,667,87,672,72,706,61,701,0.42850099 47,710,70,746,5,62,710,70,714,54,746,47,743,0.35990338 325,759,347,793,5,333,759,347,789,338,793,325,764,0.41644385 0,837,29,857,5,3,837,29,846,25,857,0,848,0.53793103 +/mnt/Ship2020/JPEGImages/1075.tif 57,164,118,245,5,83,164,118,177,92,245,57,232,0.55009107 96,191,132,246,5,113,191,132,197,115,246,96,240,0.52171717 111,213,136,249,5,127,213,136,218,120,249,111,244,0.39888889 61,156,87,194,5,76,156,87,160,72,194,61,189,0.44129555 47,198,70,234,5,62,198,70,202,54,234,47,231,0.35990338 325,247,347,281,5,333,247,347,277,338,281,325,252,0.41644385 0,326,29,345,5,3,326,29,335,25,345,0,336,0.5199637 +/mnt/Ship2020/JPEGImages/1076.tif 377,368,480,444,4,467,368,480,391,390,444,377,421,0.35245273 804,256,878,351,5,833,256,878,333,849,351,804,274,0.43285917 837,377,907,471,5,863,377,907,455,881,471,837,393,0.41519757 +/mnt/Ship2020/JPEGImages/1077.tif 591,531,701,629,4,608,531,701,607,683,629,591,554,0.3156308 338,860,420,924,4,350,860,420,905,407,924,338,878,0.35337271 +/mnt/Ship2020/JPEGImages/1078.tif 591,19,701,117,4,608,19,701,95,683,117,591,42,0.3156308 338,348,420,412,4,350,348,420,393,407,412,338,366,0.35337271 371,522,456,594,4,385,522,456,575,443,594,371,542,0.34362745 701,617,806,699,4,714,617,806,677,793,699,701,639,0.32566783 476,944,504,984,4,492,944,504,950,488,984,476,978,0.45 +/mnt/Ship2020/JPEGImages/1079.tif 372,10,457,82,4,385,10,457,63,443,82,372,30,0.34362745 701,105,806,187,4,714,105,806,165,793,187,701,127,0.32566783 472,432,504,470,4,492,432,504,440,484,470,472,462,0.42763158 +/mnt/Ship2020/JPEGImages/108.tif 476,387,688,592,4,657,387,688,420,506,592,476,560,0.25678785 +/mnt/Ship2020/JPEGImages/1080.tif 704,603,802,741,5,754,603,802,624,751,741,704,721,0.48924135 217,759,420,1057,5,349,759,420,795,288,1057,217,1021,0.38605482 656,590,753,756,5,721,590,753,604,688,756,656,742,0.358589 +/mnt/Ship2020/JPEGImages/1081.tif 792,678,908,1019,3,843,678,908,687,858,1019,792,1009,0.56105268 +/mnt/Ship2020/JPEGImages/1082.tif 66,769,316,1019,3,281,769,316,804,101,1019,66,984,0.2408 +/mnt/Ship2020/JPEGImages/1083.tif 65,257,315,507,3,280,257,315,292,100,507,65,472,0.2408 +/mnt/Ship2020/JPEGImages/1084.tif 463,605,497,730,4,466,605,497,606,494,730,463,729,0.90517647 578,441,623,601,2,606,441,623,598,596,601,578,443,0.60763889 597,628,642,771,2,623,628,642,767,616,771,597,632,0.57342657 873,762,921,968,2,903,762,921,965,891,968,873,765,0.62135922 526,120,543,159,4,538,120,543,157,532,159,526,121,0.66289593 483,223,493,241,4,483,223,493,223,493,241,483,241,1.0 +/mnt/Ship2020/JPEGImages/1085.tif 329,616,443,678,4,338,616,443,656,434,678,329,639,0.38455008 +/mnt/Ship2020/JPEGImages/1086.tif 422,389,830,670,3,790,389,830,458,463,670,422,600,0.2974932 322,889,395,985,5,354,889,395,968,363,985,322,906,0.46018836 297,776,360,857,5,323,776,360,843,334,857,297,790,0.44287674 336,857,432,977,5,369,857,432,955,399,977,336,879,0.40104167 +/mnt/Ship2020/JPEGImages/1087.tif 423,0,737,226,3,697,0,737,69,463,226,423,156,0.35656107 294,331,362,412,5,321,331,362,395,336,412,294,347,0.43464052 320,445,401,544,5,358,445,401,524,362,544,320,466,0.48553436 336,417,434,533,5,373,417,434,508,397,533,336,442,0.43033075 +/mnt/Ship2020/JPEGImages/1088.tif 486,761,862,1010,3,518,761,862,949,829,1010,486,822,0.28906584 +/mnt/Ship2020/JPEGImages/1089.tif 486,249,862,498,3,518,249,862,438,829,498,486,310,0.28740494 +/mnt/Ship2020/JPEGImages/109.tif 614,657,643,743,4,634,657,643,740,623,743,614,660,0.67642342 634,864,662,953,4,655,864,662,952,641,953,634,865,0.74438202 638,966,669,1025,5,664,966,669,1023,643,1025,638,969,0.81000547 664,965,690,1025,5,684,965,690,1022,670,1025,664,967,0.74679487 +/mnt/Ship2020/JPEGImages/1090.tif 79,19,189,117,4,96,19,189,95,171,117,79,42,0.3156308 189,617,294,699,4,202,617,294,677,280,699,189,639,0.32787456 +/mnt/Ship2020/JPEGImages/1091.tif 189,105,294,187,4,202,105,294,165,280,187,189,127,0.32787456 774,975,815,1024,4,800,975,815,1014,789,1024,774,985,0.57939273 +/mnt/Ship2020/JPEGImages/1092.tif 280,678,396,1019,3,330,678,396,687,346,1019,280,1009,0.56512286 +/mnt/Ship2020/JPEGImages/1093.tif 683,658,736,786,2,704,658,736,780,715,786,683,664,0.40595519 70,948,101,1024,5,95,948,101,1023,76,1024,70,949,0.7983871 15,633,29,665,4,26,633,29,663,19,665,15,635,0.71875 +/mnt/Ship2020/JPEGImages/1094.tif 66,441,111,601,2,94,441,111,598,84,601,66,443,0.60763889 85,628,130,771,2,111,628,130,767,104,771,85,632,0.57342657 361,762,409,968,2,391,762,409,965,379,968,361,765,0.62135922 683,146,736,273,2,704,146,736,268,715,273,683,151,0.40439756 18,123,28,158,4,18,123,28,123,28,158,18,158,1.0 +/mnt/Ship2020/JPEGImages/1095.tif 423,111,539,398,5,494,111,539,387,467,398,423,123,0.60705275 +/mnt/Ship2020/JPEGImages/1096.tif 868,0,1059,134,4,1037,0,1059,35,889,134,868,98,0.31784793 0,422,357,675,3,316,422,357,492,39,675,0,605,0.32672357 +/mnt/Ship2020/JPEGImages/1097.tif 0,768,377,1010,3,31,768,377,950,346,1010,0,828,0.28938773 +/mnt/Ship2020/JPEGImages/1098.tif 171,146,224,274,2,192,146,224,268,203,274,171,152,0.40595519 +/mnt/Ship2020/JPEGImages/1099.tif 697,441,728,525,4,701,441,728,443,724,525,697,523,0.85330261 75,459,98,506,5,85,459,98,462,88,506,75,503,0.55689177 936,802,973,841,4,952,802,973,814,957,841,936,829,0.52598753 +/mnt/Ship2020/JPEGImages/11.tif 319,449,367,538,5,342,449,367,531,345,538,319,455,0.47331461 341,435,394,519,5,372,435,394,510,362,519,341,445,0.5730009 312,507,346,551,4,328,507,346,515,330,551,312,543,0.51871658 378,462,409,502,4,394,462,409,495,392,502,378,470,0.52016129 +/mnt/Ship2020/JPEGImages/110.tif 614,145,643,231,4,634,145,643,228,623,231,614,148,0.67642342 634,351,662,441,4,655,351,662,439,641,441,634,353,0.73888889 638,455,674,547,5,665,455,674,545,647,547,638,457,0.73913043 664,452,693,540,5,684,452,693,538,673,540,664,454,0.68103448 720,917,746,989,4,722,917,746,918,743,989,720,989,0.89823718 666,743,684,786,4,681,743,684,786,668,786,666,744,0.85271318 676,873,695,916,4,676,873,695,873,695,916,676,916,1.0 +/mnt/Ship2020/JPEGImages/1100.tif 421,837,704,1030,3,678,837,704,879,447,1030,421,987,0.27161794 +/mnt/Ship2020/JPEGImages/1101.tif 421,324,704,519,3,679,324,704,367,447,519,421,476,0.27087977 +/mnt/Ship2020/JPEGImages/1102.tif 835,607,888,686,4,853,607,888,677,871,686,835,615,0.36673036 +/mnt/Ship2020/JPEGImages/1103.tif 592,427,738,623,4,704,427,738,448,627,623,592,601,0.29415362 +/mnt/Ship2020/JPEGImages/1104.tif 323,95,376,174,4,341,95,376,165,359,174,323,103,0.36673036 +/mnt/Ship2020/JPEGImages/1105.tif 483,174,553,280,4,530,174,553,186,507,280,483,268,0.37291105 +/mnt/Ship2020/JPEGImages/1106.tif 844,265,923,334,4,907,265,923,285,860,334,844,314,0.37497707 +/mnt/Ship2020/JPEGImages/1107.tif 100,545,161,684,4,130,545,161,552,130,684,100,678,0.5 +/mnt/Ship2020/JPEGImages/1108.tif 655,755,921,1069,3,860,755,921,802,715,1069,655,1023,0.30816891 +/mnt/Ship2020/JPEGImages/1109.tif 503,563,600,717,4,529,563,600,704,575,717,503,575,0.30137903 659,850,716,930,5,678,850,716,921,697,930,659,859,0.37083333 +/mnt/Ship2020/JPEGImages/111.tif 720,405,746,477,4,722,405,746,406,743,477,720,477,0.89823718 729,485,753,552,4,750,485,753,551,733,552,729,485,0.8488806 665,232,682,267,4,681,232,682,267,666,267,665,232,0.94117647 674,359,696,401,4,692,359,696,400,679,401,674,361,0.77435065 +/mnt/Ship2020/JPEGImages/1110.tif 566,383,642,508,4,619,383,642,393,590,508,566,497,0.34126316 +/mnt/Ship2020/JPEGImages/1111.tif 910,803,1005,959,4,972,803,1005,818,942,959,910,945,0.37145749 +/mnt/Ship2020/JPEGImages/1112.tif 954,870,1039,936,5,1023,870,1039,902,970,936,954,904,0.49055258 942,839,1029,905,5,1014,839,1029,867,958,905,942,876,0.45611285 844,914,904,963,5,892,914,904,933,857,963,844,943,0.44047619 790,940,828,970,5,823,940,828,951,795,970,790,960,0.38947368 687,995,738,1040,5,725,995,738,1015,700,1040,687,1020,0.47276688 894,887,964,947,5,946,887,964,914,911,947,894,919,0.47916667 +/mnt/Ship2020/JPEGImages/1113.tif 954,358,1039,424,5,1023,358,1039,390,970,424,954,392,0.49055258 942,327,1029,393,5,1014,327,1029,355,958,393,942,364,0.45611285 844,402,904,451,5,892,402,904,421,857,451,844,431,0.44047619 790,428,829,458,5,823,428,829,438,795,458,790,448,0.38034188 684,482,737,526,5,725,482,737,505,696,526,684,503,0.51243568 640,533,675,557,5,671,533,675,542,644,557,640,548,0.40357143 891,378,962,436,5,947,378,962,405,905,436,891,410,0.47450219 891,369,947,412,5,938,369,947,389,900,412,891,392,0.4763289 645,541,681,565,4,676,541,681,552,650,565,645,554,0.46990741 +/mnt/Ship2020/JPEGImages/1114.tif 640,20,675,45,5,671,20,675,30,644,45,640,35,0.42285714 115,864,149,908,5,128,864,149,901,137,908,115,870,0.40675134 642,21,686,55,5,678,21,686,32,650,55,642,44,0.38770053 +/mnt/Ship2020/JPEGImages/1115.tif 101,132,279,254,4,262,132,279,163,117,254,101,224,0.29634831 +/mnt/Ship2020/JPEGImages/1116.tif 54,383,130,508,4,107,383,130,393,78,508,54,497,0.34126316 +/mnt/Ship2020/JPEGImages/1117.tif 361,729,450,887,4,419,729,450,742,391,887,361,875,0.3675864 +/mnt/Ship2020/JPEGImages/1118.tif 167,42,271,163,4,251,42,271,58,187,163,167,147,0.27368086 +/mnt/Ship2020/JPEGImages/1119.tif 736,750,862,864,5,834,750,862,783,765,864,736,830,0.38711362 +/mnt/Ship2020/JPEGImages/112.tif 731,0,753,40,4,750,0,753,39,733,40,731,1,0.86704545 745,629,784,760,4,769,629,784,758,760,760,745,631,0.61186142 \ No newline at end of file diff --git a/dataloadR/augmentations.py b/dataloadR/augmentations.py new file mode 100644 index 0000000..a8c0555 --- /dev/null +++ b/dataloadR/augmentations.py @@ -0,0 +1,260 @@ +# coding=utf-8 +import cv2 +import random +import numpy as np +import imgaug.augmenters as iaa + +def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5): + r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains + hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV)) + dtype = img.dtype # uint8 + + x = np.arange(0, 256, dtype=np.int16) + lut_hue = ((x * r[0]) % 180).astype(dtype) + lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) + lut_val = np.clip(x * r[2], 0, 255).astype(dtype) + + img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype) + cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed + +class HSV(object): + def __init__(self, hgain=0.5, sgain=0.5, vgain=0.5, p=0.5): + self.hgain = hgain + self.sgain = sgain + self.vgain = vgain + self.p = p + def __call__(self, img, bboxes): + if random.random() < self.p: + x = np.random.uniform(-1, 1, 3) * [self.hgain, self.sgain, self.vgain] + 1 # random gains + img_hsv = (cv2.cvtColor(img, cv2.COLOR_BGR2HSV) * x).clip(None, 255).astype(np.uint8) + np.clip(img_hsv[:, :, 0], None, 179, out=img_hsv[:, :, 0]) # inplace hue clip (0 - 179 deg) + img = cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed + return img, bboxes + +class HSV_new(object): + def __init__(self, saturation=0.5, brightness=0.3, p=0.5): + self.saturation = saturation + self.brightness = brightness + self.p = p + def __call__(self, img, bboxes): + if random.random() < self.p: + img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) # hue, sat, val + S = img_hsv[:, :, 1].astype(np.float32) # saturation + V = img_hsv[:, :, 2].astype(np.float32) # value + a = random.uniform(-1, 1) * self.saturation + 1 + b = random.uniform(-1, 1) * self.brightness + 1 + S *= a + V *= b + img_hsv[:, :, 1] = S if a < 1 else S.clip(None, 255) + img_hsv[:, :, 2] = V if b < 1 else V.clip(None, 255) + cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) + return img, bboxes + + +class Blur(object): + def __init__(self, sigma=1.3, p=0.5): + self.sigma = sigma + self.p = p + def __call__(self, img, bboxes): + if random.random() < self.p: + blur_aug = iaa.GaussianBlur(sigma=(0, self.sigma)) + img = blur_aug.augment_image(img) + return img, bboxes + + +class Grayscale(object): + def __init__(self, grayscale=0.3, p=0.5): + self.alpha = random.uniform(grayscale, 1.0) + self.p = p + + def __call__(self, img, bboxes): + if random.random() < self.p: + gray_aug = iaa.Grayscale(alpha=(self.alpha, 1.0)) + img = gray_aug.augment_image(img) + return img, bboxes + + +class Gamma(object): + def __init__(self, intensity=0.2, p=0.4): + self.intensity = intensity + self.p = p + + def __call__(self, img, bboxes): + if random.random() < self.p: + gm = random.uniform(1 - self.intensity, 1 + self.intensity) + img = np.uint8(np.power(img / float(np.max(img)), gm) * np.max(img)) + return img, bboxes + +class Noise(object): + def __init__(self, intensity=0.01, p=0.2): + self.intensity = intensity + self.p = p + def __call__(self, img, bboxes): + if random.random() < self.p: + noise_aug = iaa.AdditiveGaussianNoise(scale=(0, self.intensity * 255)) + img = noise_aug.augment_image(img) + return img, bboxes + +class Sharpen(object): + def __init__(self, intensity=0.15, p=0.2): + self.intensity = intensity + self.p = p + + def __call__(self, img, bboxes): + if random.random() < self.p: + sharpen_aug = iaa.Sharpen(alpha=(0.0, 1.0), lightness=(1 - self.intensity, 1 + self.intensity)) + img = sharpen_aug.augment_image(img) + return img, bboxes + +class Contrast(object): + def __init__(self, intensity=0.15, p=0.3): + self.intensity = intensity + self.p = p + def __call__(self, img, bboxes): + if random.random() < self.p: + contrast_aug = iaa.contrast.LinearContrast((1 - self.intensity, 1 + self.intensity)) + img = contrast_aug.augment_image(img) + return img, bboxes + +class RandomVerticalFilp(object):#################### + def __init__(self, p=0.5): + self.p = p + def __call__(self, img, bboxes): + if random.random() < self.p: + h_img, _, _ = img.shape + img = img[::-1, :, :] #倒序::-1 + bboxes[:, [1, 3]] = h_img - bboxes[:, [3, 1]] # min,ymin,xmax,ymax + #xmin,ymin,xmax,ymax,c,x1,y1,x2,y2,x3,y3,x4,y4,r + bboxes[:, [6, 8, 10, 12]] = h_img - bboxes[:, [6, 8, 10, 12]] + bboxes[:, [5,6,9,10]] = bboxes[:, [9,10,5,6]] + ''' + ######### + img1 = img.astype(np.uint8)#np.int8(img)# + points2 = np.array([[int(bboxes[0][5]),int(bboxes[0][6])], [int(bboxes[0][7]),int(bboxes[0][8])], [int(bboxes[0][9]),int(bboxes[0][10])], [int(bboxes[0][11]),int(bboxes[0][12])]]) + print(points2) + cv2.polylines(img1, [points2], 1, (0,255,0), 2) + plt.figure("Image") # 图像窗口名称 + plt.imshow(img1) + plt.show() + ''' + return img, bboxes + +class RandomHorizontalFilp(object):############################# + def __init__(self, p=0.5): + self.p = p + def __call__(self, img, bboxes): + if random.random() < self.p: + _, w_img, _ = img.shape + # img = np.fliplr(img) + img = img[:, ::-1, :] + bboxes[:, [0, 2]] = w_img - bboxes[:, [2, 0]]# min,ymin,xmax,ymax,class + ####xmin,ymin,xmax,ymax,c,x1,y1,x2,y2,x3,y3,x4,y4,r + bboxes[:, [5, 7 , 9, 11]] = w_img - bboxes[:, [5, 7 , 9, 11]] + bboxes[:, [7,8,11,12]] = bboxes[:, [11,12,7,8]] + return img, bboxes + +class RandomCrop(object): + def __init__(self, p=0.5): + self.p = p + + def __call__(self, img, bboxes): + if random.random() < self.p: + h_img, w_img, _ = img.shape + + max_bbox = np.concatenate([np.min(bboxes[:, 0:2], axis=0), np.max(bboxes[:, 2:4], axis=0)], axis=-1) + max_l_trans = max_bbox[0] + max_u_trans = max_bbox[1] + max_r_trans = w_img - max_bbox[2] + max_d_trans = h_img - max_bbox[3] + + crop_xmin = max(0, int(max_bbox[0] - random.uniform(0, max_l_trans))) + crop_ymin = max(0, int(max_bbox[1] - random.uniform(0, max_u_trans))) + crop_xmax = min(w_img, int(max_bbox[2] + random.uniform(0, max_r_trans)))# + crop_ymax = min(h_img, int(max_bbox[3] + random.uniform(0, max_d_trans)))# + + img = img[crop_ymin : crop_ymax, crop_xmin : crop_xmax] + + bboxes[:, [0, 2]] = bboxes[:, [0, 2]] - crop_xmin + bboxes[:, [1, 3]] = bboxes[:, [1, 3]] - crop_ymin + return img, bboxes + +class RandomAffine(object):############################## + def __init__(self, p=0.5): + self.p = p + + def __call__(self, img, bboxes): + if random.random() < self.p: + h_img, w_img, _ = img.shape + max_bbox = np.concatenate([np.min(bboxes[:, 0:2], axis=0), np.max(bboxes[:, 2:4], axis=0)], axis=-1) + max_l_trans = max_bbox[0] + max_u_trans = max_bbox[1] + max_r_trans = w_img - max_bbox[2] + max_d_trans = h_img - max_bbox[3] + + tx = random.uniform(-(max_l_trans - 1), (max_r_trans - 1)) + ty = random.uniform(-(max_u_trans - 1), (max_d_trans - 1)) + + M = np.array([[1, 0, tx], [0, 1, ty]]) + img = cv2.warpAffine(img, M, (w_img, h_img)) + + bboxes[:, [0, 2]] = bboxes[:, [0, 2]] + tx + bboxes[:, [1, 3]] = bboxes[:, [1, 3]] + ty + return img, bboxes + + +class Resize(object): + + def __init__(self, target_shape, correct_box=True): + self.h_target, self.w_target = target_shape + self.correct_box = correct_box + + def __call__(self, img, bboxes): + h_org , w_org , _= img.shape + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32) + + resize_ratio = min(1.0 * self.w_target / w_org, 1.0 * self.h_target / h_org) + resize_w = int(resize_ratio * w_org) + resize_h = int(resize_ratio * h_org) + image_resized = cv2.resize(img, (resize_w, resize_h)) + + image_paded = np.full((self.h_target, self.w_target, 3), 128.0) + dw = int((self.w_target - resize_w) / 2) + dh = int((self.h_target - resize_h) / 2) + image_paded[dh:resize_h + dh, dw:resize_w + dw, :] = image_resized + image = image_paded / 255.0 + + if self.correct_box: + ################################x1-y4 trans + bboxes[:, [0, 2, 5, 7, 9, 11]] = bboxes[:, [0, 2, 5, 7, 9, 11]] * resize_ratio + dw + bboxes[:, [1, 3, 6, 8, 10, 12]] = bboxes[:, [1, 3, 6, 8, 10, 12]] * resize_ratio + dh + return image, bboxes + return image + + +class Mixup(object): + def __init__(self, p=0.5): + self.p = p + + def __call__(self, img_org, bboxes_org, img_mix, bboxes_mix): + if random.random() > self.p: + lam = np.random.beta(1.5, 1.5) + img = lam * img_org + (1 - lam) * img_mix + bboxes_org = np.concatenate( + [bboxes_org, np.full((len(bboxes_org), 1), lam)], axis=1) + bboxes_mix = np.concatenate( + [bboxes_mix, np.full((len(bboxes_mix), 1), 1 - lam)], axis=1) + bboxes = np.concatenate([bboxes_org, bboxes_mix]) + + else: + img = img_org + bboxes = np.concatenate([bboxes_org, np.full((len(bboxes_org), 1), 1.0)], axis=1) + + return img, bboxes + + +class LabelSmooth(object): + def __init__(self, delta=0.01): + self.delta = delta + + def __call__(self, onehot, num_classes): + return onehot * (1 - self.delta) + self.delta * 1.0 / num_classes \ No newline at end of file diff --git a/dataloadR/cocodataset.py b/dataloadR/cocodataset.py new file mode 100644 index 0000000..6b1e07d --- /dev/null +++ b/dataloadR/cocodataset.py @@ -0,0 +1,116 @@ +import os +from torch.utils.data import Dataset +from pycocotools.coco import COCO + +import config.cfg_npmmr as cfg +from utils.utils_coco import * + +class COCODataset(Dataset): + """ + COCO dataset class. + """ + def __init__(self, data_dir='COCO', json_file='instances_train2017.json', + name='train2017', img_size=416, + augmentation=None, min_size=1, debug=False): + """ + COCO dataset initialization. Annotation data are read into memory by COCO API. + Args: + model_type (str): model name specified in config file + data_dir (str): dataset root directory + json_file (str): COCO json file name ################## + name (str): COCO data name (e.g. 'train2017' or 'val2017') ########### + img_size (int): target image size after pre-processing + min_size (int): bounding boxes smaller than this are ignored + debug (bool): if True, only one data id is selected from the dataset + """ + self.data_dir = data_dir + self.json_file = json_file + self.coco = COCO(self.data_dir+'/json_gt/'+self.json_file) + self.ids = self.coco.getImgIds() + if debug: + self.ids = self.ids[1:2] + print("debug mode...", self.ids) + self.class_ids = sorted(self.coco.getCatIds()) + self.name = name + self.max_labels = cfg.MAX_LABEL######################### + self.img_size = img_size + self.min_size = min_size + self.lrflip = augmentation['LRFLIP'] + self.jitter = augmentation['JITTER'] + self.random_placing = augmentation['RANDOM_PLACING'] + self.hue = augmentation['HUE'] + self.saturation = augmentation['SATURATION'] + self.exposure = augmentation['EXPOSURE'] + self.random_distort = augmentation['RANDOM_DISTORT'] + + + + def __len__(self): + return len(self.ids) + + def __getitem__(self, index): + """ + One image / label pair for the given index is picked up \ + and pre-processed. + Args: + index (int): data index + Returns: + img (numpy.ndarray): pre-processed image + padded_labels (torch.Tensor): pre-processed label data. \ + The shape is :math:`[self.max_labels, 5]`. \ + each label consists of [class, xc, yc, w, h]: + class (float): class index. + xc, yc (float) : center of bbox whose values range from 0 to 1. + w, h (float) : size of bbox whose values range from 0 to 1. + info_img : tuple of h, w, nh, nw, dx, dy. + h, w (int): original shape of the image + nh, nw (int): shape of the resized image without padding + dx, dy (int): pad size + id_ (int): same as the input index. Used for evaluation. + """ + id_ = self.ids[index] + anno_ids = self.coco.getAnnIds(imgIds=[int(id_)], iscrowd=None) + annotations = self.coco.loadAnns(anno_ids) + lrflip = False + if np.random.rand() > 0.5 and self.lrflip == True: + lrflip = True + + # load image and preprocess + img_file = os.path.join(self.data_dir, 'JPEGImages', + '{:0>5d}'.format(id_) + '.jpg') + img = cv2.imread(img_file) + imgshow = img + if self.json_file == 'instances_val5k.json' and img is None: + img_file = os.path.join(self.data_dir, 'train2017', + '{:012}'.format(id_) + '.jpg') + img = cv2.imread(img_file) + assert img is not None + + img, info_img = preprocess(img, self.img_size, jitter=self.jitter, + random_placing=self.random_placing) + + if self.random_distort: + img = random_distort(img, self.hue, self.saturation, self.exposure) + + img = np.transpose(img / 255., (2, 0, 1)) + + if lrflip: + img = np.flip(img, axis=2).copy() + + # load labels + labels = [] + for anno in annotations: + if anno['bbox'][2] > self.min_size and anno['bbox'][3] > self.min_size: + labels.append([]) + labels[-1].append(self.class_ids.index(anno['category_id'])) + labels[-1].extend(anno['bbox']) + + padded_labels = np.zeros((self.max_labels, 5)) + if len(labels) > 0: + labels = np.stack(labels) + labels = label2box(labels, info_img, self.img_size, lrflip) + padded_labels[range(len(labels))[:self.max_labels] + ] = labels[:self.max_labels] + padded_labels = torch.from_numpy(padded_labels) + + return img, padded_labels, info_img, id_, img_file diff --git a/dataloadR/datasets.py b/dataloadR/datasets.py new file mode 100644 index 0000000..d3de49d --- /dev/null +++ b/dataloadR/datasets.py @@ -0,0 +1,184 @@ +# coding=utf-8 +import os +import sys +sys.path.append("..") +sys.path.append("../utils") +import numpy as np +import cv2 +import random +import torch +from torch.utils.data import Dataset + +import config.cfg_npmmr as cfg +import dataloadR.augmentations as DataAug +import utils.utils_basic as tools +from PIL import Image +import matplotlib.pyplot as plt +class Construct_Dataset(Dataset): + def __init__(self, anno_file_type, img_size=448): + self.img_size = img_size # For Multi-training + self.classes = cfg.DATA["CLASSES"] + self.num_classes = len(self.classes) + self.class_to_id = dict(zip(self.classes, range(self.num_classes))) + self.__annotations = self.__load_annotations(anno_file_type) + + def __len__(self): + return len(self.__annotations) + + def __getitem__(self, item): + + img_org, bboxes_org = self.__parse_annotation(self.__annotations[item]) + img_org = img_org.transpose(2, 0, 1) # HWC->CHW + + item_mix = random.randint(0, len(self.__annotations) - 1) + img_mix, bboxes_mix = self.__parse_annotation(self.__annotations[item_mix]) + img_mix = img_mix.transpose(2, 0, 1) + + img, bboxes = DataAug.Mixup()(img_org, bboxes_org, img_mix, bboxes_mix) + #####bboxes xyxy + del img_org, bboxes_org, img_mix, bboxes_mix + #print(self.__annotations[item]) + label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes = self.__creat_label(bboxes) + + img = torch.from_numpy(img).float() + label_sbbox = torch.from_numpy(label_sbbox).float() + label_mbbox = torch.from_numpy(label_mbbox).float() + label_lbbox = torch.from_numpy(label_lbbox).float() + sbboxes = torch.from_numpy(sbboxes).float() + mbboxes = torch.from_numpy(mbboxes).float() + lbboxes = torch.from_numpy(lbboxes).float() + + return img, label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes + + def __load_annotations(self, anno_type): + assert anno_type in ['train', 'val', 'test'] + anno_path = os.path.join(cfg.PROJECT_PATH, 'dataR', anno_type + ".txt") + with open(anno_path, 'r') as f: + annotations = list(filter(lambda x: len(x) > 0, f.readlines())) + + assert len(annotations) > 0, "No images found in {}".format(anno_path) + return annotations + + def __parse_annotation(self, annotation): + anno = annotation.strip().split(' ') + + img_path = anno[0] + img = cv2.imread(img_path) # H*W*C and C=BGR + assert img is not None, 'File Not Found ' + img_path + bboxes = np.array([list(map(float, box.split(','))) for box in anno[1:]]) ####xmin,ymin,xmax,ymax,c,x1,y1,x2,y2,x3,y3,x4,y4,r + ############# + img, bboxes = DataAug.RandomVerticalFilp()(np.copy(img), np.copy(bboxes)) + img, bboxes = DataAug.RandomHorizontalFilp()(np.copy(img), np.copy(bboxes)) + img, bboxes = DataAug.HSV()(np.copy(img), np.copy(bboxes)) + + #img, bboxes = DataAug.Blur()(np.copy(img), np.copy(bboxes)) + #img, bboxes = DataAug.Grayscale()(np.copy(img), np.copy(bboxes)) + #img, bboxes = DataAug.Gamma()(np.copy(img), np.copy(bboxes)) + #img, bboxes = DataAug.Noise()(np.copy(img), np.copy(bboxes)) + # img, bboxes = DataAug.Sharpen()(np.copy(img), np.copy(bboxes)) + # img, bboxes = DataAug.Contrast()(np.copy(img), np.copy(bboxes)) + #img, bboxes = DataAug.RandomCrop()(np.copy(img), np.copy(bboxes)) + #img, bboxes = DataAug.RandomAffine()(np.copy(img), np.copy(bboxes)) + img, bboxes = DataAug.Resize((self.img_size, self.img_size), True)(np.copy(img), np.copy(bboxes)) + + return img, bboxes + + def __creat_label(self, bboxes): + anchors = np.array(cfg.MODEL["ANCHORS"]) + strides = np.array(cfg.MODEL["STRIDES"]) + train_output_size = self.img_size / strides + anchors_per_scale = cfg.MODEL["ANCHORS_PER_SCLAE"] + + label = [np.zeros((int(train_output_size[i]), int(train_output_size[i]), anchors_per_scale, 6 + 5 + 2+ self.num_classes)) for i in range(3)]####a r + for i in range(3): + label[i][..., 5+5+2] = 1.0 + bboxes_xywh = [np.zeros((150, 4)) for _ in range(3)] # Darknet the max_num is 30 + bbox_count = np.zeros((3,)) + for bbox in bboxes: + bbox_coor = bbox[:4] + bbox_class_ind = int(bbox[4])#######################从1开始的话要-1 + bbox_coor_in = bbox[5:13] + bbox_r = bbox[13] + bbox_mix = bbox[14] + # onehot + one_hot = np.zeros(self.num_classes, dtype=np.float32) + one_hot[bbox_class_ind] = 1.0 + one_hot_smooth = DataAug.LabelSmooth()(one_hot, self.num_classes) + + # convert "xyxy" to "xywh" + bbox_xywh = np.concatenate([(bbox_coor[2:] + bbox_coor[:2]) * 0.5, bbox_coor[2:] - bbox_coor[:2]],axis=-1) + + bbox_xywh_scaled = 1.0 * bbox_xywh[np.newaxis, :] / strides[:, np.newaxis] + + # convert x1-y4 to a1-a4 + a1 = (bbox_coor_in[0]-bbox_coor[0])/(bbox_coor[2]-bbox_coor[0]) + a2 = (bbox_coor_in[3]-bbox_coor[1])/(bbox_coor[3]-bbox_coor[1]) + a3 = (bbox_coor[2]-bbox_coor_in[4])/(bbox_coor[2]-bbox_coor[0]) + a4 = (bbox_coor[3]-bbox_coor_in[7])/(bbox_coor[3]-bbox_coor[1]) + bbox_a = np.concatenate([[a1],[a2],[a3],[a4]],axis=-1) + s13 = np.array(bbox_xywh[3]/np.sqrt((bbox_coor_in[0] - bbox_coor_in[4])**2 + (bbox_coor_in[1] - bbox_coor_in[5])**2)) + s24 = np.array(bbox_xywh[2]/np.sqrt((bbox_coor_in[2] - bbox_coor_in[6])**2 + (bbox_coor_in[3] - bbox_coor_in[7])**2)) + + iou = [] + exist_positive = False + for i in range(3): + anchors_xywh = np.zeros((anchors_per_scale, 4)) + anchors_xywh[:, 0:2] = np.floor(bbox_xywh_scaled[i, 0:2]).astype(np.int32) + 0.5 + anchors_xywh[:, 2:4] = anchors[i] + + iou_scale = tools.iou_xywh_numpy(bbox_xywh_scaled[i][np.newaxis, :], anchors_xywh) + iou.append(iou_scale) + iou_mask = iou_scale > 0.3 + + if np.any(iou_mask): + xind, yind = np.floor(bbox_xywh_scaled[i, 0:2]).astype(np.int32) + label[i][yind, xind, iou_mask, 0:4] = bbox_xywh + label[i][yind, xind, iou_mask, 4:8] = bbox_a + label[i][yind, xind, iou_mask, 8:9] = bbox_r + + label[i][yind, xind, iou_mask, 9:10] = s13 + label[i][yind, xind, iou_mask, 10:11] = s24 + + + label[i][yind, xind, iou_mask, 11:12] = 1.0 + label[i][yind, xind, iou_mask, 12:13] = bbox_mix + label[i][yind, xind, iou_mask, 13:] = one_hot_smooth + bbox_ind = int(bbox_count[i] % 150) + bboxes_xywh[i][bbox_ind, :4] = bbox_xywh + bbox_count[i] += 1 + + exist_positive = True + + if not exist_positive: + best_anchor_ind = np.argmax(np.array(iou).reshape(-1), axis=-1) + best_detect = int(best_anchor_ind / anchors_per_scale) + best_anchor = int(best_anchor_ind % anchors_per_scale) + + xind, yind = np.floor(bbox_xywh_scaled[best_detect, 0:2]).astype(np.int32) + label[best_detect][yind, xind, best_anchor, 0:4] = bbox_xywh + label[best_detect][yind, xind, best_anchor, 4:8] = bbox_a + label[best_detect][yind, xind, best_anchor, 8:9] = bbox_r + + + label[best_detect][yind, xind, best_anchor, 9:10] = s13 + label[best_detect][yind, xind, best_anchor, 10:11] = s24 + + label[best_detect][yind, xind, best_anchor, 11:12] = 1.0 + label[best_detect][yind, xind, best_anchor, 12:13] = bbox_mix + label[best_detect][yind, xind, best_anchor, 13:] = one_hot_smooth + + bbox_ind = int(bbox_count[best_detect] % 150) + bboxes_xywh[best_detect][bbox_ind, :4] = bbox_xywh + bbox_count[best_detect] += 1 + + label_sbbox, label_mbbox, label_lbbox = label + sbboxes, mbboxes, lbboxes = bboxes_xywh + + return label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes + +if __name__ == '__main__': + from torch.utils.data import DataLoader + train_dataset=Construct_Dataset(anno_file_type="train", img_size=cfg.TRAIN["TRAIN_IMG_SIZE"]) + train_dataloader = DataLoader(train_dataset,batch_size=1, num_workers=cfg.TRAIN["NUMBER_WORKERS"],shuffle=False) + for i, (imgs, label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes) in enumerate(train_dataloader): + continue \ No newline at end of file diff --git a/evalR/coco_eval.py b/evalR/coco_eval.py new file mode 100644 index 0000000..0f8b540 --- /dev/null +++ b/evalR/coco_eval.py @@ -0,0 +1,167 @@ +import json +import tempfile +import matplotlib.pyplot as plt +from torch.autograd import Variable +from torch.utils.data import DataLoader +from pycocotools.cocoeval import COCOeval +import time +from tqdm import tqdm +from dataload.cocodataset import * +from eval.evaluator import Evaluator +from utils.utils_coco import * +from utils.visualize import * + +current_milli_time = lambda: int(round(time.time() * 1000)) + +class COCOEvaluator(): + """ + COCO AP Evaluation class. + All the data in the val2017 dataset are processed \ + and evaluated by COCO API. + """ + def __init__(self, data_dir, img_size, confthre, nmsthre): + """ + Args: + model_type (str): model name specified in config file + data_dir (str): dataset root directory + img_size (int): image size after preprocess. images are resized \ + to squares whose shape is (img_size, img_size). + confthre (float): + confidence threshold ranging from 0 to 1, \ + which is defined in the config file. + nmsthre (float): + IoU threshold of non-max supression ranging from 0 to 1. + """ + self.classes = cfg.DATA["CLASSES"] + self.val_data_path = cfg.DATA_PATH + self.pred_result_path = os.path.join(cfg.PROJECT_PATH, 'prediction') + self.__visual_imgs = cfg.TEST["NUM_VIS_IMG"] + + augmentation = {'LRFLIP': False, 'JITTER': 0, 'RANDOM_PLACING': False, + 'HUE': 0, 'SATURATION': 0, 'EXPOSURE': 0, 'RANDOM_DISTORT': False} + + self.dataset = COCODataset(data_dir=data_dir, + img_size=img_size, + augmentation=augmentation, + json_file=cfg.TEST["EVAL_JSON"], + name=cfg.TEST["EVAL_NAME"]) + self.dataloader = DataLoader(self.dataset, batch_size=cfg.TEST["BATCH_SIZE"], shuffle=False, + pin_memory=True, num_workers=cfg.TEST["NUMBER_WORKERS"]) + self.img_size = img_size + self.confthre = confthre + self.nmsthre = nmsthre + self.inference_time = 0. + def evaluate(self, model): + """ + COCO average precision (AP) Evaluation. Iterate inference on the test dataset + and the results are evaluated by COCO API. + Args: + model : model object + Returns: + ap50_95 (float) : calculated COCO AP for IoU=50:95 + ap50 (float) : calculated COCO AP for IoU=50 + """ + model.eval() + cuda = torch.cuda.is_available() + Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor + ids = [] + data_dict = [] + dataiterator = iter(self.dataloader) + #print(" Val datasets number is : {}".format(len(self.dataloader))) + for i in tqdm(range(len(self.dataloader))): + #while True: + #try: + img, _, info_img, id_, img_path = next(dataiterator) # load a batch + #except StopIteration: + #break + info_img = [float(info.numpy()) for info in info_img] + id_ = int(id_) + ids.append(id_) + with torch.no_grad(): + img = Variable(img.type(Tensor)) + start_time = current_milli_time() + _,outputs = model(img) + self.inference_time += (current_milli_time() - start_time) + outputs=outputs.unsqueeze(0) + outputs = postprocess( + outputs, cfg.DATA["NUM"], self.confthre, self.nmsthre) + if outputs[0] is None: + continue + outputs = outputs[0].cpu().data + + for output in outputs: + x1 = float(output[0]) + y1 = float(output[1]) + x2 = float(output[2]) + y2 = float(output[3]) + label = self.dataset.class_ids[int(output[6])] + box = box2label((y1, x1, y2, x2), info_img) + bbox = [box[1], box[0], box[3] - box[1], box[2] - box[0]] + score = float(output[4].data.item() * output[5].data.item()) # object score * class score + A = {"image_id": id_, "category_id": label, "bbox": bbox, + "score": score, "segmentation": []} # COCO json format + data_dict.append(A) + + if self.__visual_imgs and i <= self.__visual_imgs: + imgshow = cv2.imread(img_path[0]) + bboxes_prd = Evaluator(model).get_bbox(imgshow, cfg.TEST["MULTI_SCALE_TEST"], cfg.TEST["FLIP_TEST"]) + if bboxes_prd.shape[0] != 0: + boxes = bboxes_prd[..., :4] + class_inds = bboxes_prd[..., 5].astype(np.int32) + scores = bboxes_prd[..., 4] + visualize_boxes(image=imgshow, boxes=boxes, labels=class_inds, probs=scores, class_labels=self.classes) + path = os.path.join(self.pred_result_path, "imgs/{}.jpg".format(i)) + cv2.imwrite(path, imgshow) + + + annType = ['segm', 'bbox', 'keypoints'] + self.inference_time = 1.0 * self.inference_time / len(self.dataloader) + # Evaluate the Dt (detection) json comparing with the ground truth + if len(data_dict) > 0: + cocoGt = self.dataset.coco + _, tmp = tempfile.mkstemp() + json.dump(data_dict, open(tmp, 'w')) + cocoDt = cocoGt.loadRes(tmp) + cocoEval = COCOeval(self.dataset.coco, cocoDt, annType[1]) + cocoEval.params.imgIds = ids + cocoEval.evaluate() + cocoEval.accumulate() + cocoEval.summarize() + + ''' + # ----------pltshow------------- # + # precision[t,:,k,a,m] PR curves recall-precision value + # T:IoU thresh.5-.95, gap=0.05, t[0]=0.5,t[1]=0.55,t[2]=0.6,t[3]=0.65,t[4]=0.7,t[5]=0.75 ……,t[9]=0.95 + # R:101 recall thresh,0-101 + # K:class k[0] = person,k[1] = bycicle,.....COCO + # A:area, a[0]=all,a[1]=small,a[2]=medium,a[3]=large + # M:Maxdet m[0]=1,m[1]=10,m[2]=100 + + #C75: PR at IoU=.75 (AP at strict IoU), area under curve corresponds to APIoU=.75 metric. + #C50: PR at IoU=.50 (AP at PASCAL IoU), area under curve corresponds to APIoU=.50 metric. + #Loc: PR at IoU=.10 (localization errors ignored, but not duplicate detections). All remaining settings use IoU=.1. + #Sim: PR after supercategory false positives (fps) are removed. Specifically, any matches to objects with a different class label but that belong to the same supercategory don't count as either a fp (or tp). Sim is computed by setting all objects in the same supercategory to have the same class label as the class in question and setting their ignore flag to 1. Note that person is a singleton supercategory so its Sim result is identical to Loc. + #Oth: PR after all class confusions are removed. Similar to Sim, except now if a detection matches any other object it is no longer a fp (or tp). Oth is computed by setting all other objects to have the same class label as the class in question and setting their ignore flag to 1. + #BG: PR after all background (and class confusion) fps are removed. For a single category, BG is a step function that is 1 until max recall is reached then drops to 0 (the curve is smoother after averaging across categories). + #FN: PR after all remaining errors are removed (trivially AP=1). + + pr_array1 = cocoEval.eval['precision'][0, :, 0, 0, 2] + pr_array2 = cocoEval.eval['precision'][5, :, 0, 0, 2] + #pr_array3 = cocoEval.eval['precision'][6, :, 0, 0, 2] + #pr_array4 = cocoEval.eval['precision'][9, :, 0, 0, 2] + x = np.arange(0.0, 1.01, 0.01) + # x_1 = np.arange(0, 1.01, 0.111) + plt.xlabel('IoU') + plt.ylabel('precision') + plt.xlim(0, 1.0) + plt.ylim(0, 1.01) + plt.grid(True) + plt.plot(x, pr_array1, color='blue', linewidth = '3', label='IoU=0.5') + plt.plot(x, pr_array2, color='green', linewidth = '3', label='IoU=0.75') + plt.title("P-R curves catid=person maxDet=100") + plt.legend(loc="lower left") + plt.savefig("../prediction/APs.png", dpi=600) + # plt.show()''' + return cocoEval.stats[0], cocoEval.stats[1], self.inference_time + else: + return 0, 0, 0 diff --git a/evalR/evaluator.py b/evalR/evaluator.py new file mode 100644 index 0000000..fa312b1 --- /dev/null +++ b/evalR/evaluator.py @@ -0,0 +1,239 @@ +import shutil +import time +from tqdm import tqdm + +from dataloadR.augmentations import * +from evalR import voc_eval +from utils.utils_basic import * +from utils.visualize import * +from utils.heatmap import Show_Heatmap + +current_milli_time = lambda: int(round(time.time() * 1000)) + +class Evaluator(object): + def __init__(self, model, visiual=True): + self.classes = cfg.DATA["CLASSES"] + self.pred_result_path = os.path.join(cfg.PROJECT_PATH, 'predictionR') + self.val_data_path = cfg.DATA_PATH + self.conf_thresh = cfg.TEST["CONF_THRESH"] + self.nms_thresh = cfg.TEST["NMS_THRESH"] + self.val_shape = cfg.TEST["TEST_IMG_SIZE"] + self.__visiual = visiual + self.__visual_imgs = cfg.TEST["NUM_VIS_IMG"] + self.model = model + self.device = next(model.parameters()).device + self.inference_time = 0. + self.showheatmap = cfg.SHOW_HEATMAP + self.iouthresh_test = cfg.TEST["IOU_THRESHOLD"] + + def APs_voc(self, multi_test=False, flip_test=False): + filename = cfg.TEST["EVAL_NAME"]+'.txt' + img_inds_file = os.path.join(self.val_data_path, 'ImageSets', filename) + with open(img_inds_file, 'r') as f: + lines = f.readlines() + img_inds = [line.strip() for line in lines] + + rewritepath = os.path.join(self.pred_result_path, 'voc') + if os.path.exists(rewritepath): + shutil.rmtree(rewritepath) + os.mkdir(rewritepath) + for img_ind in tqdm(img_inds): + img_path = os.path.join(self.val_data_path, 'JPEGImages', img_ind + '.png') # 路径+JPEG+文件名############png + img = cv2.imread(img_path) + bboxes_prd = self.get_bbox(img, multi_test, flip_test) + + for bbox in bboxes_prd: + coor = np.array(bbox[:4], dtype=np.int32) + a_rota = np.array(bbox[4:8], dtype=np.float64) + + x1 = a_rota[0] * (coor[2]-coor[0]) + coor[0] + y1 = coor[1] + + x2 = coor[2] + y2 = a_rota[1] * (coor[3]-coor[1]) + coor[1] + + x3 = coor[2] - a_rota[2] * (coor[2]-coor[0]) + y3 = coor[3] + + x4 = coor[0] + y4 = coor[3] - a_rota[3] * (coor[3]-coor[1]) + + #coor_rota = np.array(bbox[4:8], dtype=np.float64) + score = bbox[8] + class_ind = int(bbox[9]) + #print(class_ind) + class_name = self.classes[class_ind] + #print(class_name) + score = '%.4f' % score + xmin, ymin, xmax, ymax = map(str, coor) + #x1, y1, x2, y2, x3, y3, x4, y4 = map(str, coor_rota) + #a1, a2, a3, a4 = map(str, a_rota) + #print(a_rota) + #img_ind_out = img_ind + ".tif" + s = ' '.join([img_ind, score, str(int(x1)), str(int(y1)), str(int(x2)), str(int(y2)), + str(int(x3)), str(int(y3)), str(int(x4)), str(int(y4))]) + '\n' + #s1 = ' '.join([img_ind_out, class_name, score, str(int(x1)), str(int(y1)), str(int(x2)), str(int(y2)), str(int(x3)), str(int(y3)), str(int(x4)), str(int(y4))]) + '\n' + + with open(os.path.join(self.pred_result_path, 'voc', 'comp4_det_test_' + class_name + '.txt'), 'a') as f: + f.write(s) + #with open(os.path.join(self.pred_result_path, 'voc', 'results.txt'), 'a') as f1: + #f1.write(s1) + color = np.zeros(3) + points = np.array( + [[int(x1), int(y1)], [int(x2), int(y2)], [int(x3), int(y3)], [int(x4), int(y4)]]) + ''' + if int(class_name) == 1: + # 25 black + color = (0, 0, 0) + elif int(class_name) == 2: + # 1359 blue + color = (255, 0, 0) + elif int(class_name) == 3: + # 639 Yellow + color = (0, 255, 255) + elif int(class_name) == 4: + # 4371 red + color = (0, 0, 255) + elif int(class_name) == 5: + # 3025 green + color = (0, 255, 0) + ''' + color = (0, 255, 0) + cv2.polylines(img, [points], 1, color, 2) + #print(points) + #cv2.rectangle(img, (int(xmin), int(ymin)), (int(xmax), int(ymax)), color, 2) + # c1 左上角 c2 右下角 + + store_path = os.path.join(cfg.PROJECT_PATH, 'dataR/results/', img_ind + '.png')######## + #print(store_path) + cv2.imwrite(store_path, img, [int(cv2.IMWRITE_JPEG_QUALITY), 100])################# + + + + self.inference_time = 1.0 * self.inference_time / len(img_inds) + return self.__calc_APs(iou_thresh=self.iouthresh_test), self.inference_time + + def get_bbox(self, img, multi_test=False, flip_test=False): + if multi_test: + test_input_sizes = range(cfg.TEST["MULTI_TEST_RANGE"][0], cfg.TEST["MULTI_TEST_RANGE"][1], cfg.TEST["MULTI_TEST_RANGE"][2]) + bboxes_list = [] + for test_input_size in test_input_sizes: + valid_scale =(0, np.inf) + bboxes_list.append(self.__predict(img, test_input_size, valid_scale)) + if flip_test: + bboxes_flip = self.__predict(img[:, ::-1], test_input_size, valid_scale) + bboxes_flip[:, [0, 2]] = img.shape[1] - bboxes_flip[:, [2, 0]] + bboxes_list.append(bboxes_flip) + bboxes = np.row_stack(bboxes_list) + else: + bboxes = self.__predict(img, self.val_shape, (0, np.inf)) + + ########### + #print(bboxes.shape) + #bboxes = nms(bboxes, self.conf_thresh, self.nms_thresh) + #print(bboxes.shape) + #bboxes = nms(bboxes, self.conf_thresh, self.nms_thresh) + bboxes = nms_glid(bboxes, self.conf_thresh, self.nms_thresh)# + + return bboxes + + def __predict(self, img, test_shape, valid_scale): + org_img = np.copy(img) + org_h, org_w, _ = org_img.shape + + img = self.__get_img_tensor(img, test_shape).to(self.device) + self.model.eval() + with torch.no_grad(): + start_time = current_milli_time() + if self.showheatmap: _, p_d, beta = self.model(img) + else: _, p_d = self.model(img) + self.inference_time += (current_milli_time() - start_time) + pred_bbox = p_d.squeeze().cpu().numpy() + + bboxes = self.__convert_pred(pred_bbox, test_shape, (org_h, org_w), valid_scale) + + if self.showheatmap and len(img): + self.__show_heatmap(beta[2], org_img) + return bboxes + + def __show_heatmap(self, beta, img): + Show_Heatmap(beta, img) + + def __get_img_tensor(self, img, test_shape): + img = Resize((test_shape, test_shape), correct_box=False)(img, None).transpose(2, 0, 1) + return torch.from_numpy(img[np.newaxis, ...]).float() + + + def __convert_pred(self, pred_bbox, test_input_size, org_img_shape, valid_scale): + pred_coor = xywh2xyxy(pred_bbox[:, :4]) #xywh2xyxy + + pred_conf = pred_bbox[:, 9] + pred_prob = pred_bbox[:, 10:] + org_h, org_w = org_img_shape + resize_ratio = min(1.0 * test_input_size / org_w, 1.0 * test_input_size / org_h) + dw = (test_input_size - resize_ratio * org_w) / 2 + dh = (test_input_size - resize_ratio * org_h) / 2 + pred_coor[:, 0::2] = 1.0 * (pred_coor[:, 0::2] - dw) / resize_ratio + pred_coor[:, 1::2] = 1.0 * (pred_coor[:, 1::2] - dh) / resize_ratio + + #pred_rotaxy = np.concatenate([pred_x1, pred_y1, pred_x2, pred_y2, pred_x3, pred_y3, pred_x4, pred_y4], axis=-1)########### + pred_rotaxy = pred_bbox[:, 4:8] + pred_r = pred_bbox[:,8:9] + zero = np.zeros_like(pred_rotaxy) + pred_rotaxy = np.where(pred_r > 0.8, zero, pred_rotaxy) + + pred_coor = np.concatenate([np.maximum(pred_coor[:, :2], [0, 0]), + np.minimum(pred_coor[:, 2:], [org_w - 1, org_h - 1])], axis=-1) + + + invalid_mask = np.logical_or((pred_coor[:, 0] > pred_coor[:, 2]), (pred_coor[:, 1] > pred_coor[:, 3])) + + + pred_coor[invalid_mask] = 0 + pred_rotaxy[invalid_mask] = 0 + + bboxes_scale = np.sqrt(np.multiply.reduce(pred_coor[:, 2:4] - pred_coor[:, 0:2], axis=-1)) + scale_mask = np.logical_and((valid_scale[0] < bboxes_scale), (bboxes_scale < valid_scale[1])) + + classes = np.argmax(pred_prob, axis=-1) + scores = pred_conf * pred_prob[np.arange(len(pred_coor)), classes] + score_mask = scores > self.conf_thresh + + mask = np.logical_and(scale_mask, score_mask) + + + coors = pred_coor[mask] + coors_rota = pred_rotaxy[mask] + #coors_rota = pred_coor_rota[mask]####################### + + scores = scores[mask] + + classes = classes[mask] + + bboxes = np.concatenate([coors, coors_rota, scores[:, np.newaxis], classes[:, np.newaxis]], axis=-1)####################### + return bboxes + + + def __calc_APs(self, iou_thresh=0.5, use_07_metric=False): + """ + :param iou_thresh: + :param use_07_metric: + :return:dict{cls:ap} + """ + filename = os.path.join(self.pred_result_path, 'voc', 'comp4_det_test_{:s}.txt') + cachedir = os.path.join(self.pred_result_path, 'voc', 'cache') + annopath = os.path.join(self.val_data_path, 'Annotations/{:s}.txt') + imagesetfile = os.path.join(self.val_data_path, 'ImageSets', cfg.TEST["EVAL_NAME"]+'.txt') + #print(annopath) + APs = {} + Recalls = {} + Precisions = {} + for i, cls in enumerate(self.classes): + R, P, AP = voc_eval.voc_eval(filename, annopath, imagesetfile, cls, cachedir, iou_thresh, use_07_metric) + APs[cls] = AP + Recalls[cls] = R + Precisions[cls] = P + if os.path.exists(cachedir): + shutil.rmtree(cachedir) + + return APs diff --git a/evalR/voc_eval.py b/evalR/voc_eval.py new file mode 100644 index 0000000..1884be3 --- /dev/null +++ b/evalR/voc_eval.py @@ -0,0 +1,245 @@ +import xml.etree.ElementTree as ET +import os +import pickle +import numpy as np +from utils.utils_basic import * + + +def parse_rec(filename): + """ Parse a PASCAL VOC xml file """ + tree = ET.parse(filename) + objects = [] + for obj in tree.findall('object'): + obj_struct = {} + obj_struct['name'] = obj.find('name').text + obj_struct['pose'] = obj.find('pose').text + obj_struct['truncated'] = int(obj.find('truncated').text) + obj_struct['difficult'] = int(obj.find('difficult').text) + bbox = obj.find('bndbox') + obj_struct['bbox'] = [int(bbox.find('xmin').text), + int(bbox.find('ymin').text), + int(bbox.find('xmax').text), + int(bbox.find('ymax').text)] + objects.append(obj_struct) + + return objects + + +def parse_poly(filename): + """ + :param filename: ground truth file to parse + :return: all instances in a picture + """ + objects = [] + with open(filename, 'r') as f: + while True: + line = f.readline() + if line: + splitlines = line.strip().split(' ') + object_struct = {} + if (len(splitlines) < 9): + continue + classes = cfg.DATA["CLASSES"] + object_struct['name'] = classes[int(splitlines[0])] + # object_struct['name'] = splitlines[0] + if (len(splitlines) == 9): + object_struct['difficult'] = 0 + elif (len(splitlines) == 10): + object_struct['difficult'] = int(splitlines[9]) + object_struct['bbox'] = [float(splitlines[1]), + float(splitlines[2]), + float(splitlines[3]), + float(splitlines[4]), + float(splitlines[5]), + float(splitlines[6]), + float(splitlines[7]), + float(splitlines[8])] + objects.append(object_struct) + else: + break + return objects + + +def voc_ap(rec, prec, use_07_metric=False): + """ ap = voc_ap(rec, prec, [use_07_metric]) + Compute VOC AP given precision and recall. + If use_07_metric is true, uses the + VOC 07 11 point method (default:False). + """ + if use_07_metric: + # 11 point metric + ap = 0. + for t in np.arange(0., 1.1, 0.1): + if np.sum(rec >= t) == 0: + p = 0 + else: + p = np.max(prec[rec >= t]) + ap = ap + p / 11. + else: + # correct AP calculation + # first append sentinel values at the end + mrec = np.concatenate(([0.], rec, [1.])) + mpre = np.concatenate(([0.], prec, [0.])) + + # compute the precision envelope + for i in range(mpre.size - 1, 0, -1): + mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) + + # to calculate area under PR curve, look for points + # where X axis (recall) changes value + i = np.where(mrec[1:] != mrec[:-1])[0] + + # and sum (\Delta recall) * prec + ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) + return ap + + +'''''' + + +def voc_eval(detpath, + annopath, + imagesetfile, + classname, + cachedir, + ovthresh=0.5, + use_07_metric=False): + # first load gt + if not os.path.isdir(cachedir): + os.mkdir(cachedir) + cachefile = os.path.join(cachedir, 'annots.pkl') + # read list of images + with open(imagesetfile, 'r') as f: + lines = f.readlines() + imagenames = [x.strip() for x in lines] + + if not os.path.isfile(cachefile): + # load annots + recs = {} + for i, imagename in enumerate(imagenames): + ####################parse_poly + recs[imagename] = parse_poly(annopath.format(imagename)) + if i % 100 == 0: + print('Reading annotation for {:d}/{:d}'.format( + i + 1, len(imagenames))) + # save + print('Saving cached annotations to {:s}'.format(cachefile)) + with open(cachefile, 'wb') as f: + pickle.dump(recs, f) + else: + # load + with open(cachefile, 'rb') as f: + recs = pickle.load(f) + + # extract gt objects for this class + class_recs = {} + npos = 0 + for imagename in imagenames: + R = [obj for obj in recs[imagename] if obj['name'] == classname] + bbox = np.array([x['bbox'] for x in R]) + difficult = np.array([x['difficult'] for x in R]).astype(np.bool) + det = [False] * len(R) + npos = npos + sum(~difficult) + class_recs[imagename] = {'bbox': bbox, + 'difficult': difficult, + 'det': det} + + # read dets####################### + detfile = detpath.format(classname) + with open(detfile, 'r') as f: + lines = f.readlines() + + splitlines = [x.strip().split(' ') for x in lines] + image_ids = [x[0] for x in splitlines] + confidence = np.array([float(x[1]) for x in splitlines]) + BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) + + # sort by confidence + sorted_ind = np.argsort(-confidence) + sorted_scores = np.sort(-confidence) + BB = BB[sorted_ind, :] + image_ids = [image_ids[x] for x in sorted_ind] + + # go down dets and mark TPs and FPs + nd = len(image_ids) + tp = np.zeros(nd) + fp = np.zeros(nd) + for d in range(nd): + R = class_recs[image_ids[d]] + bb = BB[d, :].astype(float) + ovmax = -np.inf + BBGT = R['bbox'].astype(float) + + if BBGT.size > 0: + # compute overlaps + # intersection + BBGT_xmin = np.min(BBGT[:, 0::2], axis=1) + BBGT_ymin = np.min(BBGT[:, 1::2], axis=1) + BBGT_xmax = np.max(BBGT[:, 0::2], axis=1) + BBGT_ymax = np.max(BBGT[:, 1::2], axis=1) + bb_xmin = np.min(bb[0::2]) + bb_ymin = np.min(bb[1::2]) + bb_xmax = np.max(bb[0::2]) + bb_ymax = np.max(bb[1::2]) + + ixmin = np.maximum(BBGT_xmin, bb_xmin) + iymin = np.maximum(BBGT_ymin, bb_ymin) + ixmax = np.minimum(BBGT_xmax, bb_xmax) + iymax = np.minimum(BBGT_ymax, bb_ymax) + iw = np.maximum(ixmax - ixmin + 1., 0.) + ih = np.maximum(iymax - iymin + 1., 0.) + inters = iw * ih + + # union + uni = ((bb_xmax - bb_xmin + 1.) * (bb_ymax - bb_ymin + 1.) + + (BBGT_xmax - BBGT_xmin + 1.) * + (BBGT_ymax - BBGT_ymin + 1.) - inters) + + overlaps = inters / uni + + ############################### + + BBGT_keep_mask = overlaps > 0 + BBGT_keep = BBGT[BBGT_keep_mask, :] + BBGT_keep_index = np.where(overlaps > 0)[0] + + # pdb.set_trace() + def calcoverlaps(BBGT_keep, bb): + overlaps = [] + for index, GT in enumerate(BBGT_keep): + overlap = polygen_iou_xy4_numpy_eval(BBGT_keep[index], bb) + # overlap = polyiou.iou_poly(polyiou.VectorDouble(BBGT_keep[index]), polyiou.VectorDouble(bb)) + overlaps.append(overlap) + return overlaps + ############################# + + if len(BBGT_keep) > 0: + overlaps = calcoverlaps(BBGT_keep, bb) + + ovmax = np.max(overlaps) + jmax = np.argmax(overlaps) + # pdb.set_trace() + jmax = BBGT_keep_index[jmax] + + if ovmax > ovthresh: + # print(ovmax) + if not R['difficult'][jmax]: + if not R['det'][jmax]: + tp[d] = 1. + R['det'][jmax] = 1 + else: + fp[d] = 1. + else: + fp[d] = 1. + + # compute precision recall + # print(tp) + fp = np.cumsum(fp) + tp = np.cumsum(tp) + rec = tp / float(npos) + # avoid divide by zero in case the first detection matches a difficult + # ground truth + prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) + ap = voc_ap(rec, prec, use_07_metric) + + return rec, prec, ap diff --git a/modelR/backbones/cspdarknet53__npattention.py b/modelR/backbones/cspdarknet53__npattention.py new file mode 100644 index 0000000..ce6d6f0 --- /dev/null +++ b/modelR/backbones/cspdarknet53__npattention.py @@ -0,0 +1,269 @@ +import torch +import numpy as np +import torch.nn as nn +from ..layers.convolutions import Convolutional +from ..layers.conv_blocks import Residual_block_CSP +from ..layers.np_attention_blocks import Sobel_Edge_Block, NPAttention +import torch.nn.functional as F +class Route(nn.Module): + def __init__(self): + super(Route, self).__init__() + + def forward(self, x1, x2): + """ + x1 means previous output; x2 means current output + """ + out = torch.cat((x2, x1), dim=1) + return out + +class CSPDarknet53_NPAttention(nn.Module): + + def __init__(self, pre_weight=None): + super(CSPDarknet53_NPAttention, self).__init__() + self.__conv = Convolutional(filters_in=3, filters_out=32, kernel_size=3, stride=1, pad=1, norm='bn', + activate='Mish') + + self.__conv_5_0 = Convolutional(filters_in=32, filters_out=64, kernel_size=3, stride=2, pad=1, norm='bn', + activate='MEMish')# + self.__conv_5_0_0 = Convolutional(filters_in=64, filters_out=64, kernel_size=1, stride=1, pad=0, norm='bn', + activate='leaky')# + self.__route_5_0_0 = Route() ## self.__conv_5_0 + self.__conv_5_0_1 = Convolutional(filters_in=128, filters_out=64, kernel_size=1, stride=1, pad=0, norm='bn', + activate='leaky')# + self.__rb_5_0 = Residual_block_CSP(filters_in=64) + self.__conv_5_0_2 = Convolutional(filters_in=64, filters_out=64, kernel_size=1, stride=1, pad=0, norm='bn', + activate='leaky')# + self.__route_5_0_1 = Route() ### self.__conv_5_0 + self.__conv_5_0_3 = Convolutional(filters_in=128, filters_out=64, kernel_size=1, stride=1, pad=0, norm='bn', + activate='leaky') + + + self.__conv_5_1 = Convolutional(filters_in=64, filters_out=128, kernel_size=3, stride=2, pad=1, norm='bn', + activate='MEMish') + self.__conv_5_1_0 = Convolutional(filters_in=128, filters_out=64, kernel_size=1, stride=1, pad=0, norm='bn', + activate='leaky') + self.__route_5_1_0 = Route() ## self.__conv_5_1 128+64 + self.__conv_5_1_1 = Convolutional(filters_in=128+64, filters_out=64, kernel_size=1, stride=1, pad=0, norm='bn', + activate='leaky') + self.__rb_5_1_0 = Residual_block_CSP(filters_in=64) + self.__rb_5_1_1 = Residual_block_CSP(filters_in=64) + self.__conv_5_1_2 = Convolutional(filters_in=64, filters_out=64, kernel_size=1, stride=1, pad=0, norm='bn', + activate='leaky') + self.__route_5_1_1 = Route() ## self.__conv_5_1_0 64+64 + self.__conv_5_1_3 = Convolutional(filters_in=64+64, filters_out=128, kernel_size=1, stride=1, pad=0, norm='bn', + activate='leaky') + + + self.__conv_5_2 = Convolutional(filters_in=128, filters_out=256, kernel_size=3, stride=2, pad=1, norm='bn', + activate='MEMish') + self.__conv_5_2_0 = Convolutional(filters_in=256, filters_out=128, kernel_size=1, stride=1, pad=0, norm='bn', + activate='leaky') + self.__route_5_2_0 = Route() ## self.__conv_5_2 128+256 + self.__conv_5_2_1 = Convolutional(filters_in=128 + 256, filters_out=128, kernel_size=1, stride=1, pad=0, norm='bn', + activate='leaky') + self.__rb_5_2_0 = Residual_block_CSP(filters_in=128) + self.__rb_5_2_1 = Residual_block_CSP(filters_in=128) + self.__rb_5_2_2 = Residual_block_CSP(filters_in=128) + self.__rb_5_2_3 = Residual_block_CSP(filters_in=128) + self.__rb_5_2_4 = Residual_block_CSP(filters_in=128) + self.__rb_5_2_5 = Residual_block_CSP(filters_in=128) + self.__rb_5_2_6 = Residual_block_CSP(filters_in=128) + self.__rb_5_2_7 = Residual_block_CSP(filters_in=128) + self.__conv_5_2_2 = Convolutional(filters_in=128, filters_out=128, kernel_size=1, stride=1, pad=0, norm='bn', + activate='leaky') + self.__route_5_2_1 = Route() ## self.__conv_5_2_0 128+128 + self.__conv_5_2_3 = Convolutional(filters_in=128+128, filters_out=256, kernel_size=1, stride=1, pad=0, norm='bn', + activate='leaky') + + + self.__conv_5_3 = Convolutional(filters_in=256, filters_out=512, kernel_size=3, stride=2, pad=1, norm='bn', + activate='MEMish') + self.__conv_5_3_0 = Convolutional(filters_in=512, filters_out=256, kernel_size=1, stride=1, pad=0, norm='bn', + activate='leaky') + self.__route_5_3_0 = Route() ## self.__conv_5_3 256+512 + self.__conv_5_3_1 = Convolutional(filters_in=256 + 512, filters_out=256, kernel_size=1, stride=1, pad=0, norm='bn', + activate='leaky') + self.__rb_5_3_0 = Residual_block_CSP(filters_in=256) + self.__rb_5_3_1 = Residual_block_CSP(filters_in=256) + self.__rb_5_3_2 = Residual_block_CSP(filters_in=256) + self.__rb_5_3_3 = Residual_block_CSP(filters_in=256) + self.__rb_5_3_4 = Residual_block_CSP(filters_in=256) + self.__rb_5_3_5 = Residual_block_CSP(filters_in=256) + self.__rb_5_3_6 = Residual_block_CSP(filters_in=256) + self.__rb_5_3_7 = Residual_block_CSP(filters_in=256) + self.__conv_5_3_2 = Convolutional(filters_in=256, filters_out=256, kernel_size=1, stride=1, pad=0, norm='bn', + activate='leaky') + self.__route_5_3_1 = Route() ## self.__conv_5_3_0 256+256 + self.__conv_5_3_3 = Convolutional(filters_in=256+256, filters_out=512, kernel_size=1, stride=1, pad=0, norm='bn', + activate='leaky') + + + self.__conv_5_4 = Convolutional(filters_in=512, filters_out=1024, kernel_size=3, stride=2, pad=1, norm='bn', + activate='leaky') + self.__conv_5_4_0 = Convolutional(filters_in=1024, filters_out=512, kernel_size=1, stride=1, pad=0, norm='bn', + activate='leaky') + self.__route_5_4_0 = Route() ## self.__conv_5_4 512+1024 + self.__conv_5_4_1 = Convolutional(filters_in=512 + 1024, filters_out=512, kernel_size=1, stride=1, pad=0, norm='bn', + activate='leaky') + self.__rb_5_4_0 = Residual_block_CSP(filters_in=512) + self.__rb_5_4_1 = Residual_block_CSP(filters_in=512) + self.__rb_5_4_2 = Residual_block_CSP(filters_in=512) + self.__rb_5_4_3 = Residual_block_CSP(filters_in=512) + self.__conv_5_4_2 = Convolutional(filters_in=512, filters_out=512, kernel_size=1, stride=1, pad=0, norm='bn', + activate='leaky') + self.__route_5_4_1 = Route() ## self.__conv_5_4_0 512+512 + self.__conv_5_4_3 = Convolutional(filters_in=512+512, filters_out=1024, kernel_size=1, stride=1, pad=0, norm='bn', + activate='leaky') + + self.__initialize_weights() + + if pre_weight: + self.load_darknet_weights(pre_weight) + + self.__edge = Sobel_Edge_Block(channel_in=3) + #self.__edge_s = EdgeNLPyramid(256,128,use_scale=False,groups=8) + self.__edge_m = NPAttention(256,256,use_scale=False,groups=8) + + def __initialize_weights(self): + print("**" * 10, "Initing darknet weights", "**" * 10) + for m in self.modules(): + if isinstance(m, nn.Conv2d): + m.weight.data.normal_(0, 0.01) + if m.bias is not None: + m.bias.data.zero_() + + print("initing {}".format(m)) + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + + print("initing {}".format(m)) + elif isinstance(m, nn.Linear): + m.weight.data.normal_(0, 0.01) + m.bias.data.zero_() + print("initing {}".format(m)) + + + def load_darknet_weights(self, weight_file, cutoff=136): + "https://github.com/ultralytics/yolov3/blob/master/models.py" + print("**"*25 + "\nload darknet weights : ", weight_file) + + with open(weight_file, 'rb') as f: + _ = np.fromfile(f, dtype=np.int32, count=5) + weights = np.fromfile(f, dtype=np.float32) + count = 0 + ptr = 0 + for m in self.modules(): + if isinstance(m, Convolutional): + # only initing backbone conv's weights + if count == cutoff: + break + count += 1 + + conv_layer = m._Convolutional__conv + if m.norm == "bn": + # Load BN bias, weights, running mean and running variance + bn_layer = m._Convolutional__norm + num_b = bn_layer.bias.numel() # Number of biases + # Bias + bn_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.bias.data) + bn_layer.bias.data.copy_(bn_b) + ptr += num_b + # Weight + bn_w = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.weight.data) + bn_layer.weight.data.copy_(bn_w) + ptr += num_b + # Running Mean + bn_rm = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_mean) + bn_layer.running_mean.data.copy_(bn_rm) + ptr += num_b + # Running Var + bn_rv = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_var) + bn_layer.running_var.data.copy_(bn_rv) + ptr += num_b + + print("loading weight {}".format(bn_layer)) + else: + # Load conv. bias + num_b = conv_layer.bias.numel() + conv_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(conv_layer.bias.data) + conv_layer.bias.data.copy_(conv_b) + ptr += num_b + # Load conv. weights + num_w = conv_layer.weight.numel() + conv_w = torch.from_numpy(weights[ptr:ptr + num_w]).view_as(conv_layer.weight.data) + conv_layer.weight.data.copy_(conv_w) + ptr += num_w + + print("loading weight {}".format(conv_layer)) + + + def forward(self, x): + edge = self.__edge(x) + conv = self.__conv(x) + + conv_5_0 = self.__conv_5_0(conv) + conv_5_0_0 = self.__conv_5_0_0(conv_5_0) + route_5_0_0 = self.__route_5_0_0(conv_5_0_0, conv_5_0) + conv_5_0_1 = self.__conv_5_0_1(route_5_0_0) + rb_5_0 = self.__rb_5_0(conv_5_0_1) + conv_5_0_2 = self.__conv_5_0_2(rb_5_0) + route_5_0_1 = self.__route_5_0_1(conv_5_0_2, conv_5_0_0) + conv_5_0_3 = self.__conv_5_0_3(route_5_0_1) + + conv_5_1 = self.__conv_5_1(conv_5_0_3) + conv_5_1_0 = self.__conv_5_1_0(conv_5_1) + route_5_1_0 = self.__route_5_1_0(conv_5_1_0, conv_5_1) + conv_5_1_1 = self.__conv_5_1_1(route_5_1_0) + rb_5_1_0 = self.__rb_5_1_0(conv_5_1_1) + rb_5_1_1 = self.__rb_5_1_1(rb_5_1_0) + conv_5_1_2 = self.__conv_5_1_2(rb_5_1_1) + route_5_1_1 = self.__route_5_1_1(conv_5_1_2, conv_5_1_0) + conv_5_1_3 = self.__conv_5_1_3(route_5_1_1) + + conv_5_2 = self.__conv_5_2(conv_5_1_3) + conv_5_2_0 = self.__conv_5_2_0(conv_5_2) + route_5_2_0 = self.__route_5_2_0(conv_5_2_0, conv_5_2) + conv_5_2_1 = self.__conv_5_2_1(route_5_2_0) + rb_5_2_0 = self.__rb_5_2_0(conv_5_2_1) + rb_5_2_1 = self.__rb_5_2_1(rb_5_2_0) + rb_5_2_2 = self.__rb_5_2_2(rb_5_2_1) + rb_5_2_3 = self.__rb_5_2_3(rb_5_2_2) + rb_5_2_4 = self.__rb_5_2_4(rb_5_2_3) + rb_5_2_5 = self.__rb_5_2_5(rb_5_2_4) + rb_5_2_6 = self.__rb_5_2_6(rb_5_2_5) + rb_5_2_7 = self.__rb_5_2_7(rb_5_2_6) + conv_5_2_2 = self.__conv_5_2_2(rb_5_2_7) + route_5_2_1 = self.__route_5_2_1(conv_5_2_2, conv_5_2_0) + conv_5_2_3 = self.__conv_5_2_3(route_5_2_1) + + conv_5_3 = self.__conv_5_3(conv_5_2_3) + conv_5_3_0 = self.__conv_5_3_0(conv_5_3) + route_5_3_0 = self.__route_5_3_0(conv_5_3_0, conv_5_3) + conv_5_3_1 = self.__conv_5_3_1(route_5_3_0) + conv_5_3_1 = self.__edge_m(conv_5_3_1, F.interpolate(edge, scale_factor=0.5)) + rb_5_3_0 = self.__rb_5_3_0(conv_5_3_1) + rb_5_3_1 = self.__rb_5_3_1(rb_5_3_0) + rb_5_3_2 = self.__rb_5_3_2(rb_5_3_1) + rb_5_3_3 = self.__rb_5_3_3(rb_5_3_2) + rb_5_3_4 = self.__rb_5_3_4(rb_5_3_3) + rb_5_3_5 = self.__rb_5_3_5(rb_5_3_4) + rb_5_3_6 = self.__rb_5_3_6(rb_5_3_5) + rb_5_3_7 = self.__rb_5_3_7(rb_5_3_6) + conv_5_3_2 = self.__conv_5_3_2(rb_5_3_7) + route_5_3_1 = self.__route_5_3_1(conv_5_3_2, conv_5_3_0) + conv_5_3_3 = self.__conv_5_3_3(route_5_3_1) + + conv_5_4 = self.__conv_5_4(conv_5_3_3) + conv_5_4_0 = self.__conv_5_4_0(conv_5_4) + route_5_4_0 = self.__route_5_4_0(conv_5_4_0, conv_5_4) + conv_5_4_1 = self.__conv_5_4_1(route_5_4_0) + rb_5_4_0 = self.__rb_5_4_0(conv_5_4_1) + rb_5_4_1 = self.__rb_5_4_1(rb_5_4_0) + rb_5_4_2 = self.__rb_5_4_2(rb_5_4_1) + rb_5_4_3 = self.__rb_5_4_3(rb_5_4_2) + conv_5_4_2 = self.__conv_5_4_2(rb_5_4_3) + route_5_4_1 = self.__route_5_4_1(conv_5_4_2, conv_5_4_0) + conv_5_4_3 = self.__conv_5_4_3(route_5_4_1) + + return conv_5_2_3, conv_5_3_3, conv_5_4_3 \ No newline at end of file diff --git a/modelR/backbones/darknet53.py b/modelR/backbones/darknet53.py new file mode 100644 index 0000000..b018d61 --- /dev/null +++ b/modelR/backbones/darknet53.py @@ -0,0 +1,165 @@ +import numpy as np +import torch +import torch.nn as nn +from ..layers.convolutions import Convolutional +from ..layers.conv_blocks import Residual_block + +class Darknet53(nn.Module): + + def __init__(self, pre_weight=None): + super(Darknet53, self).__init__() + self.__conv = Convolutional(filters_in=3, filters_out=32, kernel_size=3, stride=1, pad=1, norm='bn', + activate='leaky') + + self.__conv_5_0 = Convolutional(filters_in=32, filters_out=64, kernel_size=3, stride=2, pad=1, norm='bn', + activate='leaky') + self.__rb_5_0 = Residual_block(filters_in=64, filters_out=64, filters_medium=32) + + self.__conv_5_1 = Convolutional(filters_in=64, filters_out=128, kernel_size=3, stride=2, pad=1, norm='bn', + activate='leaky') + self.__rb_5_1_0 = Residual_block(filters_in=128, filters_out=128, filters_medium=64) + self.__rb_5_1_1 = Residual_block(filters_in=128, filters_out=128, filters_medium=64) + + self.__conv_5_2 = Convolutional(filters_in=128, filters_out=256, kernel_size=3, stride=2, pad=1, norm='bn', + activate='leaky') + self.__rb_5_2_0 = Residual_block(filters_in=256, filters_out=256, filters_medium=128) + self.__rb_5_2_1 = Residual_block(filters_in=256, filters_out=256, filters_medium=128) + self.__rb_5_2_2 = Residual_block(filters_in=256, filters_out=256, filters_medium=128) + self.__rb_5_2_3 = Residual_block(filters_in=256, filters_out=256, filters_medium=128) + self.__rb_5_2_4 = Residual_block(filters_in=256, filters_out=256, filters_medium=128) + self.__rb_5_2_5 = Residual_block(filters_in=256, filters_out=256, filters_medium=128) + self.__rb_5_2_6 = Residual_block(filters_in=256, filters_out=256, filters_medium=128) + self.__rb_5_2_7 = Residual_block(filters_in=256, filters_out=256, filters_medium=128) + + self.__conv_5_3 = Convolutional(filters_in=256, filters_out=512, kernel_size=3, stride=2, pad=1, norm='bn', + activate='leaky') + self.__rb_5_3_0 = Residual_block(filters_in=512, filters_out=512, filters_medium=256) + self.__rb_5_3_1 = Residual_block(filters_in=512, filters_out=512, filters_medium=256) + self.__rb_5_3_2 = Residual_block(filters_in=512, filters_out=512, filters_medium=256) + self.__rb_5_3_3 = Residual_block(filters_in=512, filters_out=512, filters_medium=256) + self.__rb_5_3_4 = Residual_block(filters_in=512, filters_out=512, filters_medium=256) + self.__rb_5_3_5 = Residual_block(filters_in=512, filters_out=512, filters_medium=256) + self.__rb_5_3_6 = Residual_block(filters_in=512, filters_out=512, filters_medium=256) + self.__rb_5_3_7 = Residual_block(filters_in=512, filters_out=512, filters_medium=256) + + + self.__conv_5_4 = Convolutional(filters_in=512, filters_out=1024, kernel_size=3, stride=2, pad=1, norm='bn', + activate='leaky') + self.__rb_5_4_0 = Residual_block(filters_in=1024, filters_out=1024, filters_medium=512) + self.__rb_5_4_1 = Residual_block(filters_in=1024, filters_out=1024, filters_medium=512) + self.__rb_5_4_2 = Residual_block(filters_in=1024, filters_out=1024, filters_medium=512) + self.__rb_5_4_3 = Residual_block(filters_in=1024, filters_out=1024, filters_medium=512) + + self.__initialize_weights() + + if pre_weight: + self.load_darknet_weights(pre_weight) + + def __initialize_weights(self): + print("**" * 10, "Initing darknet weights", "**" * 10) + for m in self.modules(): + if isinstance(m, nn.Conv2d): + m.weight.data.normal_(0, 0.01) + if m.bias is not None: + m.bias.data.zero_() + + print("initing {}".format(m)) + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + + print("initing {}".format(m)) + elif isinstance(m, nn.Linear): + m.weight.data.normal_(0, 0.01) + m.bias.data.zero_() + print("initing {}".format(m)) + + + def load_darknet_weights(self, weight_file, cutoff=52): + "https://github.com/ultralytics/yolov3/blob/master/models.py" + print("**"*25 + "\nload darknet weights : ", weight_file) + with open(weight_file, 'rb') as f: + _ = np.fromfile(f, dtype=np.int32, count=5) + weights = np.fromfile(f, dtype=np.float32) + count = 0 + ptr = 0 + for m in self.modules(): + if isinstance(m, Convolutional): + # only initing backbone conv's weights + if count == cutoff: + break + count += 1 + conv_layer = m._Convolutional__conv + if m.norm == "bn": + # Load BN bias, weights, running mean and running variance + bn_layer = m._Convolutional__norm + num_b = bn_layer.bias.numel() # Number of biases + # Bias + bn_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.bias.data) + bn_layer.bias.data.copy_(bn_b) + ptr += num_b + # Weight + bn_w = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.weight.data) + bn_layer.weight.data.copy_(bn_w) + ptr += num_b + # Running Mean + bn_rm = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_mean) + bn_layer.running_mean.data.copy_(bn_rm) + ptr += num_b + # Running Var + bn_rv = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_var) + bn_layer.running_var.data.copy_(bn_rv) + ptr += num_b + print("loading weight {}".format(bn_layer)) + else: + # Load conv. bias + num_b = conv_layer.bias.numel() + conv_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(conv_layer.bias.data) + conv_layer.bias.data.copy_(conv_b) + ptr += num_b + # Load conv. weights + num_w = conv_layer.weight.numel() + conv_w = torch.from_numpy(weights[ptr:ptr + num_w]).view_as(conv_layer.weight.data) + conv_layer.weight.data.copy_(conv_w) + ptr += num_w + + print("loading weight {}".format(conv_layer)) + + + def forward(self, x): + x = self.__conv(x) + + x0_0 = self.__conv_5_0(x) + x0_1 = self.__rb_5_0(x0_0) + + x1_0 = self.__conv_5_1(x0_1) + x1_1 = self.__rb_5_1_0(x1_0) + x1_2 = self.__rb_5_1_1(x1_1) + + x2_0 = self.__conv_5_2(x1_2) + x2_1 = self.__rb_5_2_0(x2_0) + x2_2 = self.__rb_5_2_1(x2_1) + x2_3 = self.__rb_5_2_2(x2_2) + x2_4 = self.__rb_5_2_3(x2_3) + x2_5 = self.__rb_5_2_4(x2_4) + x2_6 = self.__rb_5_2_5(x2_5) + x2_7 = self.__rb_5_2_6(x2_6) + x2_8 = self.__rb_5_2_7(x2_7) # small + + x3_0 = self.__conv_5_3(x2_8) + x3_1 = self.__rb_5_3_0(x3_0) + x3_2 = self.__rb_5_3_1(x3_1) + x3_3 = self.__rb_5_3_2(x3_2) + x3_4 = self.__rb_5_3_3(x3_3) + x3_5 = self.__rb_5_3_4(x3_4) + x3_6 = self.__rb_5_3_5(x3_5) + x3_7 = self.__rb_5_3_6(x3_6) + x3_8 = self.__rb_5_3_7(x3_7) # medium + + x4_0 = self.__conv_5_4(x3_8) + x4_1 = self.__rb_5_4_0(x4_0) + x4_2 = self.__rb_5_4_1(x4_1) + x4_3 = self.__rb_5_4_2(x4_2) + x4_4 = self.__rb_5_4_3(x4_3) # large + + return x2_8, x3_8, x4_4 diff --git a/modelR/backbones/darknet53_npattention.py b/modelR/backbones/darknet53_npattention.py new file mode 100644 index 0000000..aa17596 --- /dev/null +++ b/modelR/backbones/darknet53_npattention.py @@ -0,0 +1,176 @@ +import numpy as np +import torch +import torch.nn as nn +from ..layers.convolutions import Convolutional +from ..layers.conv_blocks import Residual_block +from ..layers.np_attention_blocks import Sobel_Edge_Block, NPAttention +import torch.nn.functional as F + +class Darknet53_NPAttention(nn.Module): + + def __init__(self, pre_weight=None): + super(Darknet53_NPAttention, self).__init__() + self.__conv = Convolutional(filters_in=3, filters_out=32, kernel_size=3, stride=1, + pad=1, norm='bn', activate='leaky') + + self.__conv_5_0 = Convolutional(filters_in=32, filters_out=64, kernel_size=3, stride=2, + pad=1, norm='bn', activate='leaky') + self.__rb_5_0 = Residual_block(filters_in=64, filters_out=64, filters_medium=32) + + self.__conv_5_1 = Convolutional(filters_in=64, filters_out=128, kernel_size=3, stride=2, + pad=1, norm='bn', activate='leaky') + self.__rb_5_1_0 = Residual_block(filters_in=128, filters_out=128, filters_medium=64) + self.__rb_5_1_1 = Residual_block(filters_in=128, filters_out=128, filters_medium=64) + + self.__conv_5_2 = Convolutional(filters_in=128, filters_out=256, kernel_size=3, stride=2, + pad=1, norm='bn', activate='leaky') + self.__rb_5_2_0 = Residual_block(filters_in=256, filters_out=256, filters_medium=128) + self.__rb_5_2_1 = Residual_block(filters_in=256, filters_out=256, filters_medium=128) + self.__rb_5_2_2 = Residual_block(filters_in=256, filters_out=256, filters_medium=128) + self.__rb_5_2_3 = Residual_block(filters_in=256, filters_out=256, filters_medium=128) + self.__rb_5_2_4 = Residual_block(filters_in=256, filters_out=256, filters_medium=128) + self.__rb_5_2_5 = Residual_block(filters_in=256, filters_out=256, filters_medium=128) + self.__rb_5_2_6 = Residual_block(filters_in=256, filters_out=256, filters_medium=128) + self.__rb_5_2_7 = Residual_block(filters_in=256, filters_out=256, filters_medium=128) + + self.__conv_5_3 = Convolutional(filters_in=256, filters_out=512, kernel_size=3, stride=2, + pad=1, norm='bn', activate='leaky') + self.__rb_5_3_0 = Residual_block(filters_in=512, filters_out=512, filters_medium=256) + self.__rb_5_3_1 = Residual_block(filters_in=512, filters_out=512, filters_medium=256) + self.__rb_5_3_2 = Residual_block(filters_in=512, filters_out=512, filters_medium=256) + self.__rb_5_3_3 = Residual_block(filters_in=512, filters_out=512, filters_medium=256) + self.__rb_5_3_4 = Residual_block(filters_in=512, filters_out=512, filters_medium=256) + self.__rb_5_3_5 = Residual_block(filters_in=512, filters_out=512, filters_medium=256) + self.__rb_5_3_6 = Residual_block(filters_in=512, filters_out=512, filters_medium=256) + self.__rb_5_3_7 = Residual_block(filters_in=512, filters_out=512, filters_medium=256) + + + self.__conv_5_4 = Convolutional(filters_in=512, filters_out=1024, kernel_size=3, stride=2, + pad=1, norm='bn', activate='leaky') + self.__rb_5_4_0 = Residual_block(filters_in=1024, filters_out=1024, filters_medium=512) + self.__rb_5_4_1 = Residual_block(filters_in=1024, filters_out=1024, filters_medium=512) + self.__rb_5_4_2 = Residual_block(filters_in=1024, filters_out=1024, filters_medium=512) + self.__rb_5_4_3 = Residual_block(filters_in=1024, filters_out=1024, filters_medium=512) + + self.__initialize_weights() + + if pre_weight: + self.load_darknet_weights(pre_weight) + + self.__edge = Sobel_Edge_Block(channel_in=3) + #self.__edge_s = EdgeNLPyramid(256,128,use_scale=False,groups=8) + self.__edge_m = NPAttention(512,256,use_scale=False,groups=8) + + + def __initialize_weights(self): + print("**" * 10, "Initing darknet weights", "**" * 10) + for m in self.modules(): + if isinstance(m, nn.Conv2d): + m.weight.data.normal_(0, 0.01) + if m.bias is not None: + m.bias.data.zero_() + + print("initing {}".format(m)) + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + + print("initing {}".format(m)) + elif isinstance(m, nn.Linear): + m.weight.data.normal_(0, 0.01) + m.bias.data.zero_() + print("initing {}".format(m)) + + + def load_darknet_weights(self, weight_file, cutoff=52): + "https://github.com/ultralytics/yolov3/blob/master/models.py" + print("**"*25 + "\nload darknet weights : ", weight_file) + with open(weight_file, 'rb') as f: + _ = np.fromfile(f, dtype=np.int32, count=5) + weights = np.fromfile(f, dtype=np.float32) + count = 0 + ptr = 0 + for m in self.modules(): + if isinstance(m, Convolutional): + # only initing backbone conv's weights + if count == cutoff: + break + count += 1 + conv_layer = m._Convolutional__conv + if m.norm == "bn": + # Load BN bias, weights, running mean and running variance + bn_layer = m._Convolutional__norm + num_b = bn_layer.bias.numel() # Number of biases + # Bias + bn_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.bias.data) + bn_layer.bias.data.copy_(bn_b) + ptr += num_b + # Weight + bn_w = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.weight.data) + bn_layer.weight.data.copy_(bn_w) + ptr += num_b + # Running Mean + bn_rm = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_mean) + bn_layer.running_mean.data.copy_(bn_rm) + ptr += num_b + # Running Var + bn_rv = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_var) + bn_layer.running_var.data.copy_(bn_rv) + ptr += num_b + print("loading weight {}".format(bn_layer)) + else: + # Load conv. bias + num_b = conv_layer.bias.numel() + conv_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(conv_layer.bias.data) + conv_layer.bias.data.copy_(conv_b) + ptr += num_b + # Load conv. weights + num_w = conv_layer.weight.numel() + conv_w = torch.from_numpy(weights[ptr:ptr + num_w]).view_as(conv_layer.weight.data) + conv_layer.weight.data.copy_(conv_w) + ptr += num_w + + print("loading weight {}".format(conv_layer)) + + + def forward(self, x): + edge = self.__edge(x) + x = self.__conv(x) + + x0_0 = self.__conv_5_0(x) + x0_1 = self.__rb_5_0(x0_0) + + x1_0 = self.__conv_5_1(x0_1) + x1_1 = self.__rb_5_1_0(x1_0) + x1_2 = self.__rb_5_1_1(x1_1) + + x2_0 = self.__conv_5_2(x1_2) + #x2_0 = self.__edge_s(x2_0,edge) + x2_1 = self.__rb_5_2_0(x2_0) + x2_2 = self.__rb_5_2_1(x2_1) + x2_3 = self.__rb_5_2_2(x2_2) + x2_4 = self.__rb_5_2_3(x2_3) + x2_5 = self.__rb_5_2_4(x2_4) + x2_6 = self.__rb_5_2_5(x2_5) + x2_7 = self.__rb_5_2_6(x2_6) + x2_8 = self.__rb_5_2_7(x2_7) # small + + x3_0 = self.__conv_5_3(x2_8) + + x3_1 = self.__rb_5_3_0(x3_0) + x3_2 = self.__rb_5_3_1(x3_1) + x3_3 = self.__rb_5_3_2(x3_2) + x3_4 = self.__rb_5_3_3(x3_3) + x3_5 = self.__rb_5_3_4(x3_4) + x3_6 = self.__rb_5_3_5(x3_5) + x3_7 = self.__rb_5_3_6(x3_6) + x3_7 = self.__edge_m(x3_7, F.interpolate(edge, size=[edge.shape[1] // 2, edge.shape[1] // 2])) # , scale_factor=0.5 + x3_8 = self.__rb_5_3_7(x3_7) # medium + + x4_0 = self.__conv_5_4(x3_8) + x4_1 = self.__rb_5_4_0(x4_0) + x4_2 = self.__rb_5_4_1(x4_1) + x4_3 = self.__rb_5_4_2(x4_2) + x4_4 = self.__rb_5_4_3(x4_3) # large + + return x2_8, x3_8, x4_4 diff --git a/modelR/backbones/ghostnet.py b/modelR/backbones/ghostnet.py new file mode 100644 index 0000000..1ea826f --- /dev/null +++ b/modelR/backbones/ghostnet.py @@ -0,0 +1,270 @@ +""" +Creates a GhostNet Model as defined in: +GhostNet: More Features from Cheap Operations By Kai Han, Yunhe Wang, Qi Tian, Jianyuan Guo, Chunjing Xu, Chang Xu. +https://arxiv.org/abs/1911.11907 +Modified from https://github.com/d-li14/mobilenetv3.pytorch +""" +import torch +import torch.nn as nn +import math + + +__all__ = ['ghost_net'] + + +def _make_divisible(v, divisor, min_value=None): + """ + This function is taken from the original tf repo. + It ensures that all layers have a channel number that is divisible by 8 + It can be seen here: + https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py + """ + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +class SELayer(nn.Module): + def __init__(self, channel, reduction=4): + super(SELayer, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.fc = nn.Sequential( + nn.Linear(channel, channel // reduction), + nn.ReLU(inplace=True), + nn.Linear(channel // reduction, channel), ) + + def forward(self, x): + b, c, _, _ = x.size() + y = self.avg_pool(x).view(b, c) + y = self.fc(y).view(b, c, 1, 1) + y = torch.clamp(y, 0, 1) + return x * y + + +def depthwise_conv(inp, oup, kernel_size=3, stride=1, relu=False): + return nn.Sequential( + nn.Conv2d(inp, oup, kernel_size, stride, kernel_size//2, groups=inp, bias=False), + nn.BatchNorm2d(oup), + nn.ReLU(inplace=True) if relu else nn.Sequential(), + ) + +class GhostModule(nn.Module): + def __init__(self, inp, oup, kernel_size=1, ratio=2, dw_size=3, stride=1, relu=True): + super(GhostModule, self).__init__() + self.oup = oup + init_channels = math.ceil(oup / ratio) + new_channels = init_channels*(ratio-1) + + self.primary_conv = nn.Sequential( + nn.Conv2d(inp, init_channels, kernel_size, stride, kernel_size//2, bias=False), + nn.BatchNorm2d(init_channels), + nn.ReLU(inplace=True) if relu else nn.Sequential(), + ) + + self.cheap_operation = nn.Sequential( + nn.Conv2d(init_channels, new_channels, dw_size, 1, dw_size//2, groups=init_channels, bias=False), + nn.BatchNorm2d(new_channels), + nn.ReLU(inplace=True) if relu else nn.Sequential(), + ) + + def forward(self, x): + x1 = self.primary_conv(x) + x2 = self.cheap_operation(x1) + out = torch.cat([x1,x2], dim=1) + return out[:,:self.oup,:,:] + + +class GhostBottleneck(nn.Module): + def __init__(self, inp, hidden_dim, oup, kernel_size, stride, use_se): + super(GhostBottleneck, self).__init__() + assert stride in [1, 2] + + self.conv = nn.Sequential( + # pw + GhostModule(inp, hidden_dim, kernel_size=1, relu=True), + # dw + depthwise_conv(hidden_dim, hidden_dim, kernel_size, stride, relu=False) if stride==2 else nn.Sequential(), + # Squeeze-and-Excite + SELayer(hidden_dim) if use_se else nn.Sequential(), + # pw-linear + GhostModule(hidden_dim, oup, kernel_size=1, relu=False), + ) + + if stride == 1 and inp == oup: + self.shortcut = nn.Sequential() + else: + self.shortcut = nn.Sequential( + depthwise_conv(inp, inp, kernel_size, stride, relu=False), + nn.Conv2d(inp, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup), + ) + + def forward(self, x): + return self.conv(x) + self.shortcut(x) + + +class GhostNet(nn.Module): + def __init__(self, num_classes=1000, width_mult=1.): + super(GhostNet, self).__init__() + # setting of inverted residual blocks + self.cfgs = [ + # k_kernel_size, t_expansion_size, c_out, SE, stride + [3, 16, 16, 0, 1], + [3, 48, 24, 0, 2], + [3, 72, 24, 0, 1], + [5, 72, 40, 1, 2], + [5, 120, 40, 1, 1], + [3, 240, 80, 0, 2], + [3, 200, 80, 0, 1], + [3, 184, 80, 0, 1], + [3, 184, 80, 0, 1], + [3, 480, 112, 1, 1], + [3, 672, 112, 1, 1], + [5, 672, 160, 1, 2], + [5, 960, 160, 0, 1], + [5, 960, 160, 1, 1], + [5, 960, 160, 0, 1], + [5, 960, 160, 1, 1] + ] + + # building first layer + output_channel = _make_divisible(16 * width_mult, 4) + layers = [nn.Sequential( + nn.Conv2d(3, output_channel, 3, 2, 1, bias=False), + nn.BatchNorm2d(output_channel), + nn.ReLU(inplace=True) + )] + input_channel = output_channel + + # building inverted residual blocks + block = GhostBottleneck + for k, exp_size, c, use_se, s in self.cfgs: + output_channel = _make_divisible(c * width_mult, 4) + hidden_channel = _make_divisible(exp_size * width_mult, 4) + layers.append(block(input_channel, hidden_channel, output_channel, k, s, use_se)) + input_channel = output_channel + self.features = nn.Sequential(*layers) + + # building last several layers + output_channel = _make_divisible(exp_size * width_mult, 4) + self.squeeze = nn.Sequential( + nn.Conv2d(input_channel, output_channel, 1, 1, 0, bias=False), + nn.BatchNorm2d(output_channel), + nn.ReLU(inplace=True), + #nn.AdaptiveAvgPool2d((1, 1)), + ) + input_channel = output_channel + + ''' + output_channel = 1280 + self.classifier = nn.Sequential( + nn.Linear(input_channel, output_channel, bias=False), + nn.BatchNorm1d(output_channel), + nn.ReLU(inplace=True), + nn.Dropout(0.2), + nn.Linear(output_channel, num_classes), + ) + ''' + self._initialize_weights() + + def forward(self, x): + x = self.features(x) + x = self.squeeze(x) + x = x.view(x.size(0), -1) + #x = self.classifier(x) + return x + + def _initialize_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + + +def ghost_net(**kwargs): + """ + Constructs a GhostNet model + """ + cfgs = [ + # k, t, c, SE, s + [3, 16, 16, 0, 1], + [3, 48, 24, 0, 2], + [3, 72, 24, 0, 1], + [5, 72, 40, 1, 2], + [5, 120, 40, 1, 1], + [3, 240, 80, 0, 2], + [3, 200, 80, 0, 1], + [3, 184, 80, 0, 1], + [3, 184, 80, 0, 1], + [3, 480, 112, 1, 1], + [3, 672, 112, 1, 1], + [5, 672, 160, 1, 2], + [5, 960, 160, 0, 1], + [5, 960, 160, 1, 1], + [5, 960, 160, 0, 1], + [5, 960, 160, 1, 1] + ] + return GhostNet(cfgs, **kwargs) + +class FeatureExtractor(nn.Module): + def __init__(self, submodule, extracted_layers): + super(FeatureExtractor, self).__init__() + self.submodule = submodule + self.extracted_layers = extracted_layers + + def forward(self, x): + outputs = [] + for name, module in self.submodule._modules.items(): + if name is "features": + for f_name, f_module in module._modules.items(): + x = f_module(x) + if f_name in self.extracted_layers: + outputs.append(x) + if name is "squeeze": + x = module(x) + if name in self.extracted_layers: + outputs.append(x) + return outputs + +class GhostNet_Det(nn.Module): + def __init__(self, extract_list, weight_path=None, width_mult=1.): + super(GhostNet_Det, self).__init__() + + self.__submodule = GhostNet(width_mult=width_mult) + if weight_path: + print("*"*40, "\nLoading weight of F : {}".format(weight_path)) + pretrained_dict = torch.load(weight_path) + model_dict = self.__submodule.state_dict() + pretrained_dict = {k:v for k, v in pretrained_dict.items() if k in model_dict} + model_dict.update(pretrained_dict) + self.__submodule.load_state_dict(model_dict) + del pretrained_dict + print("Loaded weight of GhostNet : {}".format(weight_path)) + self.__extractor = FeatureExtractor(self.__submodule, extract_list) + + def forward(self, x): + return self.__extractor(x) + +if __name__=='__main__': + #model = ghost_net() + #model.eval() + #print(model) + #input = torch.randn(32,3,224,224) + #y = model(input) + #print(y) + + from model.get_model_complexity import get_model_complexity_info + from torchstat import stat + + net = GhostNet_Det(extract_list=["5", "11", "squeeze"],width_mult=1.0) + stat(net, (3, 544, 544)) + #print(net) + #flops, params = get_model_complexity_info(net, (3, 544, 544), as_strings=False, print_per_layer_stat=True) + #print('GFlops: %.3fG' % (flops / 1e9)) + #print('Params: %.2fM' % (params / 1e6)) \ No newline at end of file diff --git a/modelR/backbones/mobilenetv2.py b/modelR/backbones/mobilenetv2.py new file mode 100644 index 0000000..4af821a --- /dev/null +++ b/modelR/backbones/mobilenetv2.py @@ -0,0 +1,194 @@ +""" +Reference : https://github.com/d-li14/mobilenetv2.pytorch/blob/master/models/imagenet/mobilenetv2.py +""" +import torch +import torch.nn as nn +import math + +__all__ = ['mobilenetv2'] + +def _make_divisible(v, divisor, min_value=None): + """ + This function is taken from the original tf repo. + It ensures that all layers have a channel number that is divisible by 8 + It can be seen here: + https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py + :param v: + :param divisor: + :param min_value: + :return: + """ + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * v: + new_v += divisor + return new_v + +def conv_3x3_bn(inp, oup, stride): + return nn.Sequential( + nn.Conv2d(inp, oup, 3, stride, 1, bias=False), + nn.BatchNorm2d(oup), + nn.ReLU6(inplace=True), + ) + + +def conv_1x1_bn(inp, oup): + return nn.Sequential( + nn.Conv2d(inp, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup), + nn.ReLU6(inplace=True) + ) + +class InvertedResidual(nn.Module): + def __init__(self, inp, oup, stride, expand_ratio): + super(InvertedResidual, self).__init__() + assert stride in [1, 2] + + hidden_dim = round(inp * expand_ratio) + self.identity = stride == 1 and inp == oup + + if expand_ratio == 1: + self.conv = nn.Sequential( + # dw + nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False), + nn.BatchNorm2d(hidden_dim), + nn.ReLU6(inplace=True), + # pw-linear + nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup), + ) + else: + self.conv = nn.Sequential( + # pw + nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False), + nn.BatchNorm2d(hidden_dim), + nn.ReLU6(inplace=True), + # dw + nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False), + nn.BatchNorm2d(hidden_dim), + nn.ReLU6(inplace=True), + # pw-linear + nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup), + ) + + def forward(self, x): + conv = self.conv(x) + if self.identity: + return x + conv + else: + return conv + +class _MobileNetV2(nn.Module): + def __init__(self, num_classes=1000, width_mult=1.): + super(_MobileNetV2, self).__init__() + # setting of inverted residual blocks + self.cfgs = [ + # t, c, n, s + [1, 16, 1, 1], + [6, 24, 2, 2], + [6, 32, 3, 2], + [6, 64, 4, 2], + [6, 96, 3, 1], + [6, 160, 3, 2], + [6, 320, 1, 1], + ] + # building first layer + input_channel = _make_divisible(32 * width_mult, 4 if width_mult == 0.1 else 8) + layers = [conv_3x3_bn(3, input_channel, 2)] + # building inverted residual blocks + block = InvertedResidual + for t, c, n, s in self.cfgs: + output_channel = _make_divisible(c * width_mult, 4 if width_mult == 0.1 else 8) + for i in range(n): + layers.append(block(input_channel, output_channel, s if i == 0 else 1, t)) + input_channel = output_channel + self.features = nn.Sequential(*layers) + # building last several layers + output_channel = _make_divisible(1280 * width_mult, 4 if width_mult == 0.1 else 8) if width_mult > 1.0 else 1280 + self.conv = conv_1x1_bn(input_channel, output_channel) + self._initialize_weights() + + def forward(self, x): + x = self.features(x) + x = self.conv(x) + return x + + def _initialize_weights(self): + print("**" * 10, "Initing MobilenetV2 weights", "**" * 10) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + if m.bias is not None: + m.bias.data.zero_() + print("initing {}".format(m)) + + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + print("initing {}".format(m)) + + elif isinstance(m, nn.Linear): + m.weight.data.normal_(0, 0.01) + if m.bias is not None: + m.bias.data.zero_() + print("initing {}".format(m)) + +class FeatureExtractor(nn.Module): + def __init__(self, submodule, extracted_layers): + super(FeatureExtractor, self).__init__() + self.submodule = submodule + self.extracted_layers = extracted_layers + + def forward(self, x): + outputs = [] + for name, module in self.submodule._modules.items(): + if name is "features": + for f_name, f_module in module._modules.items(): + x = f_module(x) + if f_name in self.extracted_layers: + outputs.append(x) + if name is "conv": + x = module(x) + if name in self.extracted_layers: + outputs.append(x) + return outputs + +class MobilenetV2(nn.Module): + def __init__(self, extract_list, weight_path=None, width_mult=1.): + super(MobilenetV2, self).__init__() + + self.__submodule = _MobileNetV2(width_mult=width_mult) + if weight_path: + print("*"*40, "\nLoading weight of MobilenetV2 : {}".format(weight_path)) + pretrained_dict = torch.load(weight_path) + model_dict = self.__submodule.state_dict() + pretrained_dict = {k:v for k, v in pretrained_dict.items() if k in model_dict} + model_dict.update(pretrained_dict) + self.__submodule.load_state_dict(model_dict) + del pretrained_dict + print("Loaded weight of MobilenetV2 : {}".format(weight_path)) + self.__extractor = FeatureExtractor(self.__submodule, extract_list) + + def forward(self, x): + return self.__extractor(x) + +if __name__=='__main__': + #model = MobilenetV2(extract_list=["6", "13", "conv"]) + #model.eval() + #print(model) + #input = torch.randn(32,3,224,224) + #y = model(input) + #print(y) + + from model.get_model_complexity import get_model_complexity_info + from torchstat import stat + net = MobilenetV2(extract_list=["6", "13", "conv"],width_mult=1.0) + stat(net, (3, 544, 544)) + flops, params = get_model_complexity_info(net, (3, 544, 544), as_strings=False, print_per_layer_stat=True) + print('GFlops: %.3fG' % (flops / 1e9)) + print('Params: %.2fM' % (params / 1e6)) \ No newline at end of file diff --git a/modelR/backbones/mobilenetv2_320.py b/modelR/backbones/mobilenetv2_320.py new file mode 100644 index 0000000..3e1287c --- /dev/null +++ b/modelR/backbones/mobilenetv2_320.py @@ -0,0 +1,194 @@ +""" +Reference : https://github.com/d-li14/mobilenetv2.pytorch/blob/master/models/imagenet/mobilenetv2.py +""" +import torch +import torch.nn as nn +import math + +__all__ = ['mobilenetv2'] + +def _make_divisible(v, divisor, min_value=None): + """ + This function is taken from the original tf repo. + It ensures that all layers have a channel number that is divisible by 8 + It can be seen here: + https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py + :param v: + :param divisor: + :param min_value: + :return: + """ + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * v: + new_v += divisor + return new_v + +def conv_3x3_bn(inp, oup, stride): + return nn.Sequential( + nn.Conv2d(inp, oup, 3, stride, 1, bias=False), + nn.BatchNorm2d(oup), + nn.ReLU6(inplace=True), + ) + + +def conv_1x1_bn(inp, oup): + return nn.Sequential( + nn.Conv2d(inp, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup), + nn.ReLU6(inplace=True) + ) + +class InvertedResidual(nn.Module): + def __init__(self, inp, oup, stride, expand_ratio): + super(InvertedResidual, self).__init__() + assert stride in [1, 2] + + hidden_dim = round(inp * expand_ratio) + self.identity = stride == 1 and inp == oup + + if expand_ratio == 1: + self.conv = nn.Sequential( + # dw + nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False), + nn.BatchNorm2d(hidden_dim), + nn.ReLU6(inplace=True), + # pw-linear + nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup), + ) + else: + self.conv = nn.Sequential( + # pw + nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False), + nn.BatchNorm2d(hidden_dim), + nn.ReLU6(inplace=True), + # dw + nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False), + nn.BatchNorm2d(hidden_dim), + nn.ReLU6(inplace=True), + # pw-linear + nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup), + ) + + def forward(self, x): + conv = self.conv(x) + if self.identity: + return x + conv + else: + return conv + +class _MobileNetV2(nn.Module): + def __init__(self, num_classes=1000, width_mult=1.): + super(_MobileNetV2, self).__init__() + # setting of inverted residual blocks + self.cfgs = [ + # t, c, n, s + [1, 16, 1, 1], + [6, 24, 2, 2], + [6, 32, 3, 2], + [6, 64, 4, 2], + [6, 96, 3, 1], + [6, 160, 3, 2], + [6, 320, 1, 1], + ] + # building first layer + input_channel = _make_divisible(32 * width_mult, 4 if width_mult == 0.1 else 8) + layers = [conv_3x3_bn(3, input_channel, 2)] + # building inverted residual blocks + block = InvertedResidual + for t, c, n, s in self.cfgs: + output_channel = _make_divisible(c * width_mult, 4 if width_mult == 0.1 else 8) + for i in range(n): + layers.append(block(input_channel, output_channel, s if i == 0 else 1, t)) + input_channel = output_channel + self.features = nn.Sequential(*layers) + # building last several layers + #output_channel = _make_divisible(1280 * width_mult, 4 if width_mult == 0.1 else 8) if width_mult > 1.0 else 1280 + #self.conv = conv_1x1_bn(input_channel, output_channel) + self._initialize_weights() + + def forward(self, x): + x = self.features(x) + #x = self.conv(x) + return x + + def _initialize_weights(self): + print("**" * 10, "Initing MobilenetV2 weights", "**" * 10) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + if m.bias is not None: + m.bias.data.zero_() + print("initing {}".format(m)) + + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + print("initing {}".format(m)) + + elif isinstance(m, nn.Linear): + m.weight.data.normal_(0, 0.01) + if m.bias is not None: + m.bias.data.zero_() + print("initing {}".format(m)) + +class FeatureExtractor(nn.Module): + def __init__(self, submodule, extracted_layers): + super(FeatureExtractor, self).__init__() + self.submodule = submodule + self.extracted_layers = extracted_layers + + def forward(self, x): + outputs = [] + for name, module in self.submodule._modules.items(): + if name is "features": + for f_name, f_module in module._modules.items(): + x = f_module(x) + if f_name in self.extracted_layers: + outputs.append(x) + #if name is "conv": + #x = module(x) + #if name in self.extracted_layers: + #outputs.append(x) + return outputs + +class MobilenetV2(nn.Module): + def __init__(self, extract_list, weight_path=None, width_mult=1.): + super(MobilenetV2, self).__init__() + + self.__submodule = _MobileNetV2(width_mult=width_mult) + if weight_path: + print("*"*40, "\nLoading weight of MobilenetV2 : {}".format(weight_path)) + pretrained_dict = torch.load(weight_path) + model_dict = self.__submodule.state_dict() + pretrained_dict = {k:v for k, v in pretrained_dict.items() if k in model_dict} + model_dict.update(pretrained_dict) + self.__submodule.load_state_dict(model_dict) + del pretrained_dict + print("Loaded weight of MobilenetV2 : {}".format(weight_path)) + self.__extractor = FeatureExtractor(self.__submodule, extract_list) + + def forward(self, x): + return self.__extractor(x) + +if __name__=='__main__': + #model = MobilenetV2(extract_list=["6", "13", "conv"]) + #model.eval() + #print(model) + #input = torch.randn(32,3,224,224) + #y = model(input) + #print(y) + + from model.get_model_complexity import get_model_complexity_info + from torchstat import stat + net = MobilenetV2(extract_list=["6", "13", "conv"],width_mult=1.0) + stat(net, (3, 544, 544)) + flops, params = get_model_complexity_info(net, (3, 544, 544), as_strings=False, print_per_layer_stat=True) + print('GFlops: %.3fG' % (flops / 1e9)) + print('Params: %.2fM' % (params / 1e6)) \ No newline at end of file diff --git a/modelR/backbones/mobilenetv2_cond.py b/modelR/backbones/mobilenetv2_cond.py new file mode 100644 index 0000000..64a2500 --- /dev/null +++ b/modelR/backbones/mobilenetv2_cond.py @@ -0,0 +1,253 @@ + +import functools +import torch +import torch.nn as nn +import torch.nn.functional as F +import math + +__all__ = ['cond_mobilenetv2'] + +class route_func(nn.Module): + r"""CondConv: Conditionally Parameterized Convolutions for Efficient Inference + https://papers.nips.cc/paper/8412-condconv-conditionally-parameterized-convolutions-for-efficient-inference.pdf + Args: + c_in (int): Number of channels in the input image + num_experts (int): Number of experts for mixture. Default: 1 + """ + + def __init__(self, c_in, num_experts): + super(route_func, self).__init__() + self.avgpool = nn.AdaptiveAvgPool2d(output_size=1) + self.fc = nn.Linear(c_in, num_experts) + self.sigmoid = nn.Sigmoid() + + def forward(self, x): + x = self.avgpool(x) + x = x.view(x.size(0), -1) + x = self.fc(x) + x = self.sigmoid(x) + return x + + +class CondConv2d(nn.Module): + r"""CondConv: Conditionally Parameterized Convolutions for Efficient Inference + https://papers.nips.cc/paper/8412-condconv-conditionally-parameterized-convolutions-for-efficient-inference.pdf + Args: + in_channels (int): Number of channels in the input image + out_channels (int): Number of channels produced by the convolution + kernel_size (int or tuple): Size of the convolving kernel + stride (int or tuple, optional): Stride of the convolution. Default: 1 + padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0 + dilation (int or tuple, optional): Spacing between kernel elements. Default: 1 + groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1 + bias (bool, optional): If ``True``, adds a learnable bias to the output. Default: ``True`` + num_experts (int): Number of experts for mixture. Default: 1 + """ + + def __init__(self, in_channels, out_channels, kernel_size, + stride=1, padding=0, dilation=1, groups=1, bias=True, + num_experts=1): + super(CondConv2d, self).__init__() + + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = kernel_size + self.stride = stride + self.padding = padding + self.dilation = dilation + self.groups = groups + self.num_experts = num_experts + + self.weight = nn.Parameter(torch.Tensor(num_experts, out_channels, in_channels // groups, kernel_size, kernel_size)) + if bias: + self.bias = nn.Parameter(torch.Tensor(num_experts, out_channels)) + else: + self.register_parameter('bias', None) + + nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5)) + if self.bias is not None: + fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weight) + bound = 1 / math.sqrt(fan_in) + nn.init.uniform_(self.bias, -bound, bound) + + def forward(self, x, routing_weight): + b, c_in, h, w = x.size() + k, c_out, c_in, kh, kw = self.weight.size() + x = x.view(1, -1, h, w) + weight = self.weight.view(k, -1) + combined_weight = torch.mm(routing_weight, weight).view(-1, c_in, kh, kw) + if self.bias is not None: + combined_bias = torch.mm(routing_weight, self.bias).view(-1) + output = F.conv2d( + x, weight=combined_weight, bias=combined_bias, stride=self.stride, padding=self.padding, + dilation=self.dilation, groups=self.groups * b) + else: + output = F.conv2d( + x, weight=combined_weight, bias=None, stride=self.stride, padding=self.padding, + dilation=self.dilation, groups=self.groups * b) + + output = output.view(b, c_out, output.size(-2), output.size(-1)) + return output + + +def _make_divisible(v, divisor, min_value=None): + """ + This function is taken from the original tf repo. + It ensures that all layers have a channel number that is divisible by 8 + It can be seen here: + https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py + :param v: + :param divisor: + :param min_value: + :return: + """ + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +def conv_3x3_bn(inp, oup, stride): + return nn.Sequential( + nn.Conv2d(inp, oup, 3, stride, 1, bias=False), + nn.BatchNorm2d(oup), + nn.ReLU6(inplace=True) + ) + + +def conv_1x1_bn(inp, oup): + return nn.Sequential( + nn.Conv2d(inp, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup), + nn.ReLU6(inplace=True) + ) + + +class InvertedResidual(nn.Module): + def __init__(self, inp, oup, stride, expand_ratio, num_experts=None): + super(InvertedResidual, self).__init__() + assert stride in [1, 2] + + hidden_dim = round(inp * expand_ratio) + self.identity = stride == 1 and inp == oup + self.expand_ratio = expand_ratio + self.cond = num_experts is not None + Conv2d = functools.partial(CondConv2d, num_experts=num_experts) if num_experts else nn.Conv2d + + if expand_ratio != 1: + self.pw = Conv2d(inp, hidden_dim, 1, 1, 0, bias=False) + self.bn_pw = nn.BatchNorm2d(hidden_dim) + self.dw = Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False) + self.bn_dw = nn.BatchNorm2d(hidden_dim) + self.pw_linear = Conv2d(hidden_dim, oup, 1, 1, 0, bias=False) + self.bn_pw_linear = nn.BatchNorm2d(oup) + self.relu = nn.ReLU6(inplace=True) + + if num_experts: + self.route = route_func(inp, num_experts) + + def forward(self, x): + identity = x + if self.cond: + routing_weight = self.route(x) + if self.expand_ratio != 1: + x = self.relu(self.bn_pw(self.pw(x, routing_weight))) + x = self.relu(self.bn_dw(self.dw(x, routing_weight))) + x = self.bn_pw_linear(self.pw_linear(x, routing_weight)) + else: + if self.expand_ratio != 1: + x = self.relu(self.bn_pw(self.pw(x))) + x = self.relu(self.bn_dw(self.dw(x))) + x = self.bn_pw_linear(self.pw_linear(x)) + + if self.identity: + return x + identity + else: + return x + + +class CondMobileNetV2(nn.Module): + def __init__(self, num_classes=1000, width_mult=1., num_experts=8): + super(CondMobileNetV2, self).__init__() + # setting of inverted residual blocks + self.cfgs = [ + # t, c, n, s + [1, 16, 1, 1], + [6, 24, 2, 2], + [6, 32, 3, 2], + [6, 64, 4, 2], + [6, 96, 3, 1], + [6, 160, 3, 2], + [6, 320, 1, 1], + ] + + # building first layer + input_channel = _make_divisible(32 * width_mult, 8) + layers = [conv_3x3_bn(3, input_channel, 2)] + # building inverted residual blocks + block = InvertedResidual + self.num_experts = None + for j, (t, c, n, s) in enumerate(self.cfgs): + output_channel = _make_divisible(c * width_mult, 8) + for i in range(n): + layers.append(block(input_channel, output_channel, s if i == 0 else 1, t, self.num_experts)) + input_channel = output_channel + if j == 4 and i == 0: # CondConv layers in the final 6 inverted residual blocks + self.num_experts = num_experts + self.features = nn.Sequential(*layers) + # building last several layers + output_channel = _make_divisible(1280 * width_mult, 8) if width_mult > 1.0 else 1280 + self.conv = conv_1x1_bn(input_channel, output_channel) + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + self.classifier_route = route_func(output_channel, num_experts) + self.classifier = CondConv2d(output_channel, num_classes, kernel_size=1, bias=False, num_experts=num_experts) + + self._initialize_weights() + + def forward(self, x): + x = self.features(x) + x = self.conv(x) + x = self.avgpool(x) + routing_weight = self.classifier_route(x) + x = self.classifier(x, routing_weight) + x = x.squeeze_() + return x + + def _initialize_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + if m.bias is not None: + m.bias.data.zero_() + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + elif isinstance(m, nn.Linear): + m.weight.data.normal_(0, 0.01) + m.bias.data.zero_() + +def cond_mobilenetv2(**kwargs): + """ + Constructs a CondConv-based MobileNet V2 model + """ + return CondMobileNetV2(**kwargs) + +if __name__=='__main__': + #model = MobilenetV2(extract_list=["6", "13", "conv"]) + #model.eval() + #print(model) + #input = torch.randn(32,3,224,224) + #y = model(input) + #print(y) + + from model.get_model_complexity import get_model_complexity_info + from torchstat import stat + net = CondMobileNetV2() + stat(net, (3, 544, 544)) + flops, params = get_model_complexity_info(net, (3, 544, 544), as_strings=False, print_per_layer_stat=True) + print('GFlops: %.3fG' % (flops / 1e9)) + print('Params: %.2fM' % (params / 1e6)) \ No newline at end of file diff --git a/modelR/backbones/mobilenetv2_dwt.py b/modelR/backbones/mobilenetv2_dwt.py new file mode 100644 index 0000000..b4629d1 --- /dev/null +++ b/modelR/backbones/mobilenetv2_dwt.py @@ -0,0 +1,234 @@ +# This code is built from the PyTorch examples repository: https://github.com/pytorch/vision/tree/master/torchvision/models. +# Copyright (c) 2017 Torch Contributors. +# The Pytorch examples are available under the BSD 3-Clause License. + +# ========================================================================================== + +# Adobe’s modifications are Copyright 2019 Adobe. All rights reserved. +# Adobe’s modifications are licensed under the Creative Commons Attribution-NonCommercial-ShareAlike +# 4.0 International Public License (CC-NC-SA-4.0). To view a copy of the license, visit +# https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode. + +# ========================================================================================== + +# BSD-3 License + +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: + +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. + +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. + +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. + +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +import os +import sys +sys.path.append(os.path.abspath(os.path.dirname(__file__) + '/' + '..')) +import torch +import torch.nn as nn +from model.layers.conv_blocks import Downsample_DWT_tiny + +__all__ = ['mobilenetv2'] + +def _make_divisible(v, divisor, min_value=None): + """ + This function is taken from the original tf repo. + It ensures that all layers have a channel number that is divisible by 8 + It can be seen here: + https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py + :param v: + :param divisor: + :param min_value: + :return: + """ + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * v: + new_v += divisor + return new_v + +class ConvBNReLU(nn.Sequential): + def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1): + padding = (kernel_size - 1) // 2 + super(ConvBNReLU, self).__init__( + nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups = groups, bias = False), + nn.BatchNorm2d(out_planes), + nn.ReLU6(inplace = True) + ) + + +class InvertedResidual(nn.Module): + def __init__(self, inp, oup, stride, expand_ratio, wavename = 'haar'): + super(InvertedResidual, self).__init__() + self.stride = stride + assert stride in [1, 2] + + hidden_dim = int(round(inp * expand_ratio)) + self.use_res_connect = self.stride == 1 and inp == oup + + layers = [] + if expand_ratio != 1: + # pw + layers.append(ConvBNReLU(inp, hidden_dim, kernel_size = 1)) + if (stride == 1): + layers.extend([ + # dw + ConvBNReLU(hidden_dim, hidden_dim, stride = stride, groups = hidden_dim), + # pw-linear + nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias = False), + nn.BatchNorm2d(oup), + ]) + else: + layers.extend([ + # dw + ConvBNReLU(hidden_dim, hidden_dim, stride = 1, groups = hidden_dim), + #Downsample(filt_size = filter_size, stride = stride, channels = hidden_dim), + ######################## + Downsample_DWT_tiny(wavename=wavename), + ######################## + # pw-linear + nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias = False), + nn.BatchNorm2d(oup), + ]) + self.conv = nn.Sequential(*layers) + + def forward(self, x): + if self.use_res_connect: + return x + self.conv(x) + else: + return self.conv(x) + + +class _MobileNetV2(nn.Module): + def __init__(self, width_mult=1.0, wavename = 'haar'): + super(_MobileNetV2, self).__init__() + block = InvertedResidual + input_channel = 32 + #last_channel = 1280 + inverted_residual_setting = [ + # t, c, n, s + [1, 16, 1, 1], + [6, 24, 2, 2], + [6, 32, 3, 2], + [6, 64, 4, 2], + [6, 96, 3, 1], + [6, 160, 3, 2], + [6, 320, 1, 1], + ] + + # building first layer + input_channel = _make_divisible(input_channel * width_mult, 4 if width_mult == 0.1 else 8) + #self.last_channel = _make_divisible(last_channel * width_mult, 4 if width_mult == 0.1 else 8) if width_mult > 1.0 else last_channel + features = [ConvBNReLU(3, input_channel, stride = 2)] + # building inverted residual blocks + for t, c, n, s in inverted_residual_setting: + output_channel = _make_divisible(c * width_mult, 4 if width_mult == 0.1 else 8) + for i in range(n): + stride = s if i == 0 else 1 + features.append( + ############################### + block(input_channel, output_channel, stride, expand_ratio = t, wavename = wavename)) + ############################### + input_channel = output_channel + # building last several layers + #features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size = 1)) + # make it nn.Sequential + self.features = nn.Sequential(*features) + + # building classifier + #self.classifier = nn.Sequential( + # nn.Dropout(0.2), + #nn.Linear(self.last_channel, num_classes), + #) + + # weight initialization + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode = 'fan_out') + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.BatchNorm2d): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + nn.init.zeros_(m.bias) + + def forward(self, x): + x = self.features(x) + x = x.mean([2, 3]) + x = self.classifier(x) + return x + +class FeatureExtractor(nn.Module): + def __init__(self, submodule, extracted_layers): + super(FeatureExtractor, self).__init__() + self.submodule = submodule + self.extracted_layers = extracted_layers + + def forward(self, x): + outputs = [] + for name, module in self.submodule._modules.items(): + if name is "features": + for f_name, f_module in module._modules.items(): + x = f_module(x) + if f_name in self.extracted_layers: + outputs.append(x) + #if name is "conv": + #x = module(x) + #if name in self.extracted_layers: + #outputs.append(x) + return outputs + + +class MobilenetV2(nn.Module): + def __init__(self, extract_list, weight_path=None, wavename = 'haar', width_mult=1.): + super(MobilenetV2, self).__init__() + + self.__submodule = _MobileNetV2(width_mult=width_mult, wavename=wavename) + if weight_path: + print("*"*40, "\nLoading weight of MobilenetV2 : {}".format(weight_path)) + pretrained_dict = torch.load(weight_path) + model_dict = self.__submodule.state_dict() + pretrained_dict = {k:v for k, v in pretrained_dict.items() if k in model_dict} + model_dict.update(pretrained_dict) + self.__submodule.load_state_dict(model_dict) + del pretrained_dict + print("Loaded weight of MobilenetV2 : {}".format(weight_path)) + self.__extractor = FeatureExtractor(self.__submodule, extract_list) + + def forward(self, x): + return self.__extractor(x) + +if __name__=='__main__': + #model = MobilenetV2(extract_list=["6", "13", "conv"]) + #model.eval() + #print(model) + #input = torch.randn(32,3,224,224) + #y = model(input) + #print(y) + + from model.get_model_complexity import get_model_complexity_info + from torchstat import stat + net = MobilenetV2(extract_list=["6", "13", "conv"],width_mult=1.0).cuda() + #stat(net, (3, 544, 544)) + flops, params = get_model_complexity_info(net, (3, 544, 544), as_strings=False, print_per_layer_stat=True) + print('GFlops: %.3fG' % (flops / 1e9)) + print('Params: %.2fM' % (params / 1e6)) \ No newline at end of file diff --git a/modelR/backbones/shufflenetv2.py b/modelR/backbones/shufflenetv2.py new file mode 100644 index 0000000..c1eee42 --- /dev/null +++ b/modelR/backbones/shufflenetv2.py @@ -0,0 +1,225 @@ +import torch +import torch.nn as nn + +class ShuffleV2Block(nn.Module): + def __init__(self, inp, oup, mid_channels, *, ksize, stride): + super(ShuffleV2Block, self).__init__() + self.stride = stride + assert stride in [1, 2] + + self.mid_channels = mid_channels + self.ksize = ksize + pad = ksize // 2 + self.pad = pad + self.inp = inp + + outputs = oup - inp + + branch_main = [ + # pw + nn.Conv2d(inp, mid_channels, 1, 1, 0, bias=False), + nn.BatchNorm2d(mid_channels), + nn.ReLU(inplace=True), + # dw + nn.Conv2d(mid_channels, mid_channels, ksize, stride, pad, groups=mid_channels, bias=False), + nn.BatchNorm2d(mid_channels), + # pw-linear + nn.Conv2d(mid_channels, outputs, 1, 1, 0, bias=False), + nn.BatchNorm2d(outputs), + nn.ReLU(inplace=True), + ] + self.branch_main = nn.Sequential(*branch_main) + + if stride == 2: + branch_proj = [ + # dw + nn.Conv2d(inp, inp, ksize, stride, pad, groups=inp, bias=False), + nn.BatchNorm2d(inp), + # pw-linear + nn.Conv2d(inp, inp, 1, 1, 0, bias=False), + nn.BatchNorm2d(inp), + nn.ReLU(inplace=True), + ] + self.branch_proj = nn.Sequential(*branch_proj) + else: + self.branch_proj = None + + def forward(self, old_x): + if self.stride==1: + x_proj, x = self.channel_shuffle(old_x) + return torch.cat((x_proj, self.branch_main(x)), 1) + elif self.stride==2: + x_proj = old_x + x = old_x + return torch.cat((self.branch_proj(x_proj), self.branch_main(x)), 1) + + def channel_shuffle(self, x): + batchsize, num_channels, height, width = x.data.size() + assert (num_channels % 4 == 0) + x = x.reshape(batchsize * num_channels // 2, 2, height * width) + x = x.permute(1, 0, 2) + x = x.reshape(2, -1, num_channels // 2, height, width) + return x[0], x[1] + + +class ShuffleNetV2(nn.Module): + def __init__(self, model_size='1.5x'): + super(ShuffleNetV2, self).__init__() + print('model size is ', model_size) + + self.stage_repeats = [4, 8, 4] + self.model_size = model_size + if model_size == '0.5x': + self.stage_out_channels = [-1, 24, 48, 96, 192, 1024] + elif model_size == '1.0x': + self.stage_out_channels = [-1, 24, 116, 232, 464, 1024] + elif model_size == '1.5x': + self.stage_out_channels = [-1, 24, 176, 352, 704, 1024] + elif model_size == '2.0x': + self.stage_out_channels = [-1, 24, 244, 488, 976, 2048] + else: + raise NotImplementedError + + # building first layer + input_channel = self.stage_out_channels[1] + self.first_conv = nn.Sequential( + nn.Conv2d(3, input_channel, 3, 2, 1, bias=False), + nn.BatchNorm2d(input_channel), + nn.ReLU(inplace=True), + ) + + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.features = [] + for idxstage in range(len(self.stage_repeats)): + numrepeat = self.stage_repeats[idxstage] + output_channel = self.stage_out_channels[idxstage + 2] + + for i in range(numrepeat): + if i == 0: + self.features.append(ShuffleV2Block(input_channel, output_channel, + mid_channels=output_channel // 2, ksize=3, stride=2)) + else: + self.features.append(ShuffleV2Block(input_channel // 2, output_channel, + mid_channels=output_channel // 2, ksize=3, stride=1)) + + input_channel = output_channel + + self.features = nn.Sequential(*self.features) + + self.conv_last = nn.Sequential( + nn.Conv2d(input_channel, self.stage_out_channels[-1], 1, 1, 0, bias=False), + nn.BatchNorm2d(self.stage_out_channels[-1]), + nn.ReLU(inplace=True) + ) + + ''' + self.globalpool = nn.AvgPool2d(7) + if self.model_size == '2.0x': + self.dropout = nn.Dropout(0.2) + self.classifier = nn.Sequential(nn.Linear(self.stage_out_channels[-1], n_class, bias=False)) + ''' + self._initialize_weights() + + def forward(self, x): + x = self.first_conv(x) + x = self.maxpool(x) + x = self.features(x) + x = self.conv_last(x) + + ''' + x = self.globalpool(x) + if self.model_size == '2.0x': + x = self.dropout(x) + x = x.contiguous().view(-1, self.stage_out_channels[-1]) + x = self.classifier(x) + ''' + return x + + def _initialize_weights(self): + for name, m in self.named_modules(): + if isinstance(m, nn.Conv2d): + if 'first' in name: + nn.init.normal_(m.weight, 0, 0.01) + else: + nn.init.normal_(m.weight, 0, 1.0 / m.weight.shape[1]) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + if m.bias is not None: + nn.init.constant_(m.bias, 0.0001) + nn.init.constant_(m.running_mean, 0) + elif isinstance(m, nn.BatchNorm1d): + nn.init.constant_(m.weight, 1) + if m.bias is not None: + nn.init.constant_(m.bias, 0.0001) + nn.init.constant_(m.running_mean, 0) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + +class FeatureExtractor(nn.Module): + def __init__(self, submodule, extracted_layers): + super(FeatureExtractor, self).__init__() + self.submodule = submodule + self.extracted_layers = extracted_layers + + def forward(self, x): + outputs = [] + for name, module in self.submodule._modules.items(): + if name is "first_conv": + x = module(x) + if name in self.extracted_layers: + outputs.append(x) + if name is "maxpool": + x = module(x) + if name in self.extracted_layers: + outputs.append(x) + if name is "features": + for f_name, f_module in module._modules.items(): + x = f_module(x) + if f_name in self.extracted_layers: + outputs.append(x) + if name is "conv_last": + x = module(x) + if name in self.extracted_layers: + outputs.append(x) + return outputs + +class ShuffleNet2_Det(nn.Module): + def __init__(self, extract_list, weight_path=None, model_size='1.0x'): + super(ShuffleNet2_Det, self).__init__() + self.__submodule = ShuffleNetV2(model_size=model_size) + if weight_path: + print("*"*40, "\nLoading weight of ShuffleNetv2: {}".format(weight_path)) + pretrained_dict = torch.load(weight_path) + model_dict = self.__submodule.state_dict() + pretrained_dict = {k:v for k, v in pretrained_dict.items() if k in model_dict} + model_dict.update(pretrained_dict) + self.__submodule.load_state_dict(model_dict) + del pretrained_dict + print("Loaded weight of ShuffleNetv2: {}".format(weight_path)) + self.__extractor = FeatureExtractor(self.__submodule, extract_list) + + def forward(self, x): + return self.__extractor(x) + + + +if __name__ == "__main__": + #model = ShuffleNetV2() + #print(model) + + #test_data = torch.rand(5, 3, 224, 224) + #test_outputs = model(test_data) + #print(test_outputs.size()) + + from model.get_model_complexity import get_model_complexity_info + from torchstat import stat + net = ShuffleNet2_Det(extract_list=["3", "11", "conv_last"], model_size='1.0x') + stat(net, (3, 544, 544)) + #flops, params = get_model_complexity_info(net, (3, 544, 544), as_strings=False, print_per_layer_stat=True) + #print('GFlops: %.3fG' % (flops / 1e9)) + #print('Params: %.2fM' % (params / 1e6)) diff --git a/modelR/backbones/shufflenetv2_npattention.py b/modelR/backbones/shufflenetv2_npattention.py new file mode 100644 index 0000000..e0fe419 --- /dev/null +++ b/modelR/backbones/shufflenetv2_npattention.py @@ -0,0 +1,238 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from ..layers.np_attention_blocks import NPAttention,Sobel_Edge_Block + + +class ShuffleV2Block(nn.Module): + def __init__(self, inp, oup, mid_channels, *, ksize, stride): + super(ShuffleV2Block, self).__init__() + self.stride = stride + assert stride in [1, 2] + + self.mid_channels = mid_channels + self.ksize = ksize + pad = ksize // 2 + self.pad = pad + self.inp = inp + + outputs = oup - inp + + branch_main = [ + # pw + nn.Conv2d(inp, mid_channels, 1, 1, 0, bias=False), + nn.BatchNorm2d(mid_channels), + nn.ReLU(inplace=True), + # dw + nn.Conv2d(mid_channels, mid_channels, ksize, stride, pad, groups=mid_channels, bias=False), + nn.BatchNorm2d(mid_channels), + # pw-linear + nn.Conv2d(mid_channels, outputs, 1, 1, 0, bias=False), + nn.BatchNorm2d(outputs), + nn.ReLU(inplace=True), + ] + self.branch_main = nn.Sequential(*branch_main) + + if stride == 2: + branch_proj = [ + # dw + nn.Conv2d(inp, inp, ksize, stride, pad, groups=inp, bias=False), + nn.BatchNorm2d(inp), + # pw-linear + nn.Conv2d(inp, inp, 1, 1, 0, bias=False), + nn.BatchNorm2d(inp), + nn.ReLU(inplace=True), + ] + self.branch_proj = nn.Sequential(*branch_proj) + else: + self.branch_proj = None + + def forward(self, old_x): + if self.stride==1: + x_proj, x = self.channel_shuffle(old_x) + return torch.cat((x_proj, self.branch_main(x)), 1) + elif self.stride==2: + x_proj = old_x + x = old_x + return torch.cat((self.branch_proj(x_proj), self.branch_main(x)), 1) + + def channel_shuffle(self, x): + batchsize, num_channels, height, width = x.data.size() + assert (num_channels % 4 == 0) + x = x.reshape(batchsize * num_channels // 2, 2, height * width) + x = x.permute(1, 0, 2) + x = x.reshape(2, -1, num_channels // 2, height, width) + return x[0], x[1] + + +class ShuffleNetV2(nn.Module): + def __init__(self, model_size='1.5x'): + super(ShuffleNetV2, self).__init__() + print('model size is ', model_size) + + self.stage_repeats = [4, 8, 4] + self.model_size = model_size + if model_size == '0.5x': + self.stage_out_channels = [-1, 24, 48, 96, 192, 1024] + elif model_size == '1.0x': + self.stage_out_channels = [-1, 24, 116, 232, 464, 1024] + elif model_size == '1.5x': + self.stage_out_channels = [-1, 24, 176, 352, 704, 1024] + elif model_size == '2.0x': + self.stage_out_channels = [-1, 24, 244, 488, 976, 2048] + else: + raise NotImplementedError + + # building first layer + input_channel = self.stage_out_channels[1] + self.first_conv = nn.Sequential( + nn.Conv2d(3, input_channel, 3, 2, 1, bias=False), + nn.BatchNorm2d(input_channel), + nn.ReLU(inplace=True), + ) + + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.features = [] + for idxstage in range(len(self.stage_repeats)): + numrepeat = self.stage_repeats[idxstage] + output_channel = self.stage_out_channels[idxstage + 2] + + for i in range(numrepeat): + if i == 0: + self.features.append(ShuffleV2Block(input_channel, output_channel, + mid_channels=output_channel // 2, ksize=3, stride=2)) + else: + self.features.append(ShuffleV2Block(input_channel // 2, output_channel, + mid_channels=output_channel // 2, ksize=3, stride=1)) + + input_channel = output_channel + + self.features = nn.Sequential(*self.features) + + + + self.conv_last = nn.Sequential( + nn.Conv2d(input_channel, self.stage_out_channels[-1], 1, 1, 0, bias=False), + nn.BatchNorm2d(self.stage_out_channels[-1]), + nn.ReLU(inplace=True) + ) + + ''' + self.globalpool = nn.AvgPool2d(7) + if self.model_size == '2.0x': + self.dropout = nn.Dropout(0.2) + self.classifier = nn.Sequential(nn.Linear(self.stage_out_channels[-1], n_class, bias=False)) + ''' + self._initialize_weights() + + self.__edge = Sobel_Edge_Block(channel_in=3) + self.__npa = NPAttention(input_channel,input_channel,use_scale=False,groups=8) + + def forward(self, x): + edge= self.__edge(x) + x = self.first_conv(x) + x = self.maxpool(x) + + x = self.__npa(x, edge) + + x = self.features(x) + x = self.conv_last(x) + + + ''' + x = self.globalpool(x) + if self.model_size == '2.0x': + x = self.dropout(x) + x = x.contiguous().view(-1, self.stage_out_channels[-1]) + x = self.classifier(x) + ''' + return x + + def _initialize_weights(self): + for name, m in self.named_modules(): + if isinstance(m, nn.Conv2d): + if 'first' in name: + nn.init.normal_(m.weight, 0, 0.01) + else: + nn.init.normal_(m.weight, 0, 1.0 / m.weight.shape[1]) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + if m.bias is not None: + nn.init.constant_(m.bias, 0.0001) + nn.init.constant_(m.running_mean, 0) + elif isinstance(m, nn.BatchNorm1d): + nn.init.constant_(m.weight, 1) + if m.bias is not None: + nn.init.constant_(m.bias, 0.0001) + nn.init.constant_(m.running_mean, 0) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + +class FeatureExtractor(nn.Module): + def __init__(self, submodule, extracted_layers): + super(FeatureExtractor, self).__init__() + self.submodule = submodule + self.extracted_layers = extracted_layers + + def forward(self, x): + outputs = [] + for name, module in self.submodule._modules.items(): + if name is "first_conv": + x = module(x) + if name in self.extracted_layers: + outputs.append(x) + if name is "maxpool": + x = module(x) + if name in self.extracted_layers: + outputs.append(x) + if name is "features": + for f_name, f_module in module._modules.items(): + x = f_module(x) + if f_name in self.extracted_layers: + outputs.append(x) + if name is "conv_last": + x = module(x) + if name in self.extracted_layers: + outputs.append(x) + return outputs + +class ShuffleNet2_Det(nn.Module): + def __init__(self, extract_list, weight_path=None, model_size='1.5x'): + super(ShuffleNet2_Det, self).__init__() + self.__submodule = ShuffleNetV2(model_size=model_size) + if weight_path: + print("*"*40, "\nLoading weight of ShuffleNetv2: {}".format(weight_path)) + pretrained_dict = torch.load(weight_path) + model_dict = self.__submodule.state_dict() + pretrained_dict = {k:v for k, v in pretrained_dict.items() if k in model_dict} + model_dict.update(pretrained_dict) + self.__submodule.load_state_dict(model_dict) + del pretrained_dict + print("Loaded weight of ShuffleNetv2: {}".format(weight_path)) + self.__extractor = FeatureExtractor(self.__submodule, extract_list) + + def forward(self, x): + return self.__extractor(x) + + + +if __name__ == "__main__": + #model = ShuffleNetV2() + #print(model) + + #test_data = torch.rand(5, 3, 224, 224) + #test_outputs = model(test_data) + #print(test_outputs.size()) + + from model.get_model_complexity import get_model_complexity_info + from torchstat import stat + net = ShuffleNet2_Det(extract_list=["3", "11", "conv_last"], model_size='0.5x') + stat(net, (3, 544, 544)) + #flops, params = get_model_complexity_info(net, (3, 544, 544), as_strings=False, print_per_layer_stat=True) + #print('GFlops: %.3fG' % (flops / 1e9)) + #print('Params: %.2fM' % (params / 1e6)) diff --git a/modelR/backbones/shufflenetv2plus.py b/modelR/backbones/shufflenetv2plus.py new file mode 100644 index 0000000..156c853 --- /dev/null +++ b/modelR/backbones/shufflenetv2plus.py @@ -0,0 +1,384 @@ +import torch +import torch.nn as nn + +class SELayer(nn.Module): + + def __init__(self, inplanes, isTensor=True): + super(SELayer, self).__init__() + if isTensor: + # if the input is (N, C, H, W) + self.SE_opr = nn.Sequential( + nn.AdaptiveAvgPool2d(1), + nn.Conv2d(inplanes, inplanes // 4, kernel_size=1, stride=1, bias=False), + nn.BatchNorm2d(inplanes // 4), + nn.ReLU(inplace=True), + nn.Conv2d(inplanes // 4, inplanes, kernel_size=1, stride=1, bias=False), + ) + else: + # if the input is (N, C) + self.SE_opr = nn.Sequential( + nn.AdaptiveAvgPool2d(1), + nn.Linear(inplanes, inplanes // 4, bias=False), + nn.BatchNorm1d(inplanes // 4), + nn.ReLU(inplace=True), + nn.Linear(inplanes // 4, inplanes, bias=False), + ) + + def forward(self, x): + atten = self.SE_opr(x) + atten = torch.clamp(atten + 3, 0, 6) / 6 + return x * atten + +class HS(nn.Module): + + def __init__(self): + super(HS, self).__init__() + + def forward(self, inputs): + clip = torch.clamp(inputs + 3, 0, 6) / 6 + return inputs * clip + +class Shufflenet(nn.Module): + def __init__(self, inp, oup, base_mid_channels, *, ksize, stride, activation, useSE): + super(Shufflenet, self).__init__() + self.stride = stride + assert stride in [1, 2] + assert ksize in [3, 5, 7] + assert base_mid_channels == oup//2 + + self.base_mid_channel = base_mid_channels + self.ksize = ksize + pad = ksize // 2 + self.pad = pad + self.inp = inp + + outputs = oup - inp + + branch_main = [ + # pw + nn.Conv2d(inp, base_mid_channels, 1, 1, 0, bias=False), + nn.BatchNorm2d(base_mid_channels), + None, + # dw + nn.Conv2d(base_mid_channels, base_mid_channels, ksize, stride, pad, groups=base_mid_channels, bias=False), + nn.BatchNorm2d(base_mid_channels), + # pw-linear + nn.Conv2d(base_mid_channels, outputs, 1, 1, 0, bias=False), + nn.BatchNorm2d(outputs), + None, + ] + if activation == 'ReLU': + assert useSE == False + '''This model should not have SE with ReLU''' + branch_main[2] = nn.ReLU(inplace=True) + branch_main[-1] = nn.ReLU(inplace=True) + else: + branch_main[2] = HS() + branch_main[-1] = HS() + if useSE: + branch_main.append(SELayer(outputs)) + self.branch_main = nn.Sequential(*branch_main) + + if stride == 2: + branch_proj = [ + # dw + nn.Conv2d(inp, inp, ksize, stride, pad, groups=inp, bias=False), + nn.BatchNorm2d(inp), + # pw-linear + nn.Conv2d(inp, inp, 1, 1, 0, bias=False), + nn.BatchNorm2d(inp), + None, + ] + if activation == 'ReLU': + branch_proj[-1] = nn.ReLU(inplace=True) + else: + branch_proj[-1] = HS() + self.branch_proj = nn.Sequential(*branch_proj) + else: + self.branch_proj = None + + def forward(self, old_x): + if self.stride==1: + x_proj, x = channel_shuffle(old_x) + return torch.cat((x_proj, self.branch_main(x)), 1) + elif self.stride==2: + x_proj = old_x + x = old_x + return torch.cat((self.branch_proj(x_proj), self.branch_main(x)), 1) + +class Shuffle_Xception(nn.Module): + + def __init__(self, inp, oup, base_mid_channels, *, stride, activation, useSE): + super(Shuffle_Xception, self).__init__() + + assert stride in [1, 2] + assert base_mid_channels == oup//2 + + self.base_mid_channel = base_mid_channels + self.stride = stride + self.ksize = 3 + self.pad = 1 + self.inp = inp + outputs = oup - inp + + branch_main = [ + # dw + nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), + nn.BatchNorm2d(inp), + # pw + nn.Conv2d(inp, base_mid_channels, 1, 1, 0, bias=False), + nn.BatchNorm2d(base_mid_channels), + None, + # dw + nn.Conv2d(base_mid_channels, base_mid_channels, 3, stride, 1, groups=base_mid_channels, bias=False), + nn.BatchNorm2d(base_mid_channels), + # pw + nn.Conv2d(base_mid_channels, base_mid_channels, 1, 1, 0, bias=False), + nn.BatchNorm2d(base_mid_channels), + None, + # dw + nn.Conv2d(base_mid_channels, base_mid_channels, 3, stride, 1, groups=base_mid_channels, bias=False), + nn.BatchNorm2d(base_mid_channels), + # pw + nn.Conv2d(base_mid_channels, outputs, 1, 1, 0, bias=False), + nn.BatchNorm2d(outputs), + None, + ] + + if activation == 'ReLU': + branch_main[4] = nn.ReLU(inplace=True) + branch_main[9] = nn.ReLU(inplace=True) + branch_main[14] = nn.ReLU(inplace=True) + else: + branch_main[4] = HS() + branch_main[9] = HS() + branch_main[14] = HS() + assert None not in branch_main + + if useSE: + assert activation != 'ReLU' + branch_main.append(SELayer(outputs)) + + self.branch_main = nn.Sequential(*branch_main) + + if self.stride == 2: + branch_proj = [ + # dw + nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), + nn.BatchNorm2d(inp), + # pw-linear + nn.Conv2d(inp, inp, 1, 1, 0, bias=False), + nn.BatchNorm2d(inp), + None, + ] + if activation == 'ReLU': + branch_proj[-1] = nn.ReLU(inplace=True) + else: + branch_proj[-1] = HS() + self.branch_proj = nn.Sequential(*branch_proj) + + def forward(self, old_x): + if self.stride==1: + x_proj, x = channel_shuffle(old_x) + return torch.cat((x_proj, self.branch_main(x)), 1) + elif self.stride==2: + x_proj = old_x + x = old_x + return torch.cat((self.branch_proj(x_proj), self.branch_main(x)), 1) + +def channel_shuffle(x): + batchsize, num_channels, height, width = x.data.size() + assert (num_channels % 4 == 0) + x = x.reshape(batchsize * num_channels // 2, 2, height * width) + x = x.permute(1, 0, 2) + x = x.reshape(2, -1, num_channels // 2, height, width) + return x[0], x[1] + +class ShuffleNetV2_Plus(nn.Module): + def __init__(self, architecture=None, model_size='Medium'): + super(ShuffleNetV2_Plus, self).__init__() + + print('model size is ', model_size) + + #assert input_size % 32 == 0 + assert architecture is not None + + self.stage_repeats = [4, 4, 8, 4] + if model_size == 'Large': + self.stage_out_channels = [-1, 16, 68, 168, 336, 672, 1280] + elif model_size == 'Medium': + self.stage_out_channels = [-1, 16, 48, 128, 256, 512, 1280] + elif model_size == 'Small': + self.stage_out_channels = [-1, 16, 36, 104, 208, 416, 1280] + else: + raise NotImplementedError + + + # building first layer + input_channel = self.stage_out_channels[1] + self.first_conv = nn.Sequential( + nn.Conv2d(3, input_channel, 3, 2, 1, bias=False), + nn.BatchNorm2d(input_channel), + HS(), + ) + + self.features = [] + archIndex = 0 + for idxstage in range(len(self.stage_repeats)): + numrepeat = self.stage_repeats[idxstage] + output_channel = self.stage_out_channels[idxstage+2] + + activation = 'HS' if idxstage >= 1 else 'ReLU' + useSE = 'True' if idxstage >= 2 else False + + for i in range(numrepeat): + if i == 0: + inp, outp, stride = input_channel, output_channel, 2 + else: + inp, outp, stride = input_channel // 2, output_channel, 1 + + blockIndex = architecture[archIndex] + archIndex += 1 + if blockIndex == 0: + print('Shuffle3x3') + self.features.append(Shufflenet(inp, outp, base_mid_channels=outp // 2, ksize=3, stride=stride, + activation=activation, useSE=useSE)) + elif blockIndex == 1: + print('Shuffle5x5') + self.features.append(Shufflenet(inp, outp, base_mid_channels=outp // 2, ksize=5, stride=stride, + activation=activation, useSE=useSE)) + elif blockIndex == 2: + print('Shuffle7x7') + self.features.append(Shufflenet(inp, outp, base_mid_channels=outp // 2, ksize=7, stride=stride, + activation=activation, useSE=useSE)) + elif blockIndex == 3: + print('Xception') + self.features.append(Shuffle_Xception(inp, outp, base_mid_channels=outp // 2, stride=stride, + activation=activation, useSE=useSE)) + else: + raise NotImplementedError + input_channel = output_channel + assert archIndex == len(architecture) + self.features = nn.Sequential(*self.features) + + + self.conv_last = nn.Sequential( + nn.Conv2d(input_channel, 1280, 1, 1, 0, bias=False), + nn.BatchNorm2d(1280), + HS() + ) + ''' + self.globalpool = nn.AvgPool2d(7) + self.LastSE = SELayer(1280) + self.fc = nn.Sequential( + nn.Linear(1280, 1280, bias=False), + HS(), + ) + self.dropout = nn.Dropout(0.2) + self.classifier = nn.Sequential(nn.Linear(1280, n_class, bias=False)) + ''' + self._initialize_weights() + + def forward(self, x): + x = self.first_conv(x) + #print("aaaaaaaaaa",x.shape) + x = self.features(x) + x = self.conv_last(x) + + #x = self.globalpool(x) + #x = self.LastSE(x) + + #x = x.contiguous().view(-1, 1280) + + #x = self.fc(x) + #x = self.dropout(x) + #x = self.classifier(x) + return x + + def _initialize_weights(self): + for name, m in self.named_modules(): + if isinstance(m, nn.Conv2d): + if 'first' in name or 'SE' in name: + nn.init.normal_(m.weight, 0, 0.01) + else: + nn.init.normal_(m.weight, 0, 1.0 / m.weight.shape[1]) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + if m.bias is not None: + nn.init.constant_(m.bias, 0.0001) + nn.init.constant_(m.running_mean, 0) + elif isinstance(m, nn.BatchNorm1d): + nn.init.constant_(m.weight, 1) + if m.bias is not None: + nn.init.constant_(m.bias, 0.0001) + nn.init.constant_(m.running_mean, 0) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + + +class FeatureExtractor(nn.Module): + def __init__(self, submodule, extracted_layers): + super(FeatureExtractor, self).__init__() + self.submodule = submodule + self.extracted_layers = extracted_layers + + def forward(self, x): + outputs = [] + for name, module in self.submodule._modules.items(): + if name is "first_conv": + x = module(x) + if name in self.extracted_layers: + outputs.append(x) + if name is "features": + for f_name, f_module in module._modules.items(): + #print(module._modules.items()) + #print("aaaa",f_name,x.shape) + x = f_module(x) + if f_name in self.extracted_layers: + outputs.append(x) + if name is "conv_last": + x = module(x) + if name in self.extracted_layers: + outputs.append(x) + return outputs + +class ShuffleNet2Plus_Det(nn.Module): + def __init__(self, extract_list, weight_path=None, model_size='Medium'): + super(ShuffleNet2Plus_Det, self).__init__() + architecture = [0, 0, 3, 1, 1, 1, 0, 0, 2, 0, 2, 1, 1, 0, 2, 0, 2, 1, 3, 2] + self.__submodule = ShuffleNetV2_Plus(architecture=architecture, model_size=model_size) + if weight_path: + print("*"*40, "\nLoading weight of ShuffleNetv2Plus: {}".format(weight_path)) + pretrained_dict = torch.load(weight_path) + model_dict = self.__submodule.state_dict() + pretrained_dict = {k:v for k, v in pretrained_dict.items() if k in model_dict} + model_dict.update(pretrained_dict) + self.__submodule.load_state_dict(model_dict) + del pretrained_dict + print("Loaded weight of ShuffleNetv2Plus: {}".format(weight_path)) + self.__extractor = FeatureExtractor(self.__submodule, extract_list) + + def forward(self, x): + return self.__extractor(x) + +if __name__ == "__main__": + #architecture = [0, 0, 3, 1, 1, 1, 0, 0, 2, 0, 2, 1, 1, 0, 2, 0, 2, 1, 3, 2] + #model = ShuffleNetV2_Plus(architecture=architecture) + #print(model) + + #test_data = torch.rand(5, 3, 544, 544) + #test_outputs = model(test_data) + #print(test_outputs.size()) + + from model.get_model_complexity import get_model_complexity_info + from torchstat import stat + net = ShuffleNet2Plus_Det(extract_list=[1280, 256, 128], model_size='Large') + stat(net, (3, 544, 544)) + #flops, params = get_model_complexity_info(net, (3, 544, 544), as_strings=False, print_per_layer_stat=True) + #print('GFlops: %.3fG' % (flops / 1e9)) + #print('Params: %.2fM' % (params / 1e6)) + diff --git a/modelR/head/dsc_head.py b/modelR/head/dsc_head.py new file mode 100644 index 0000000..8a738bf --- /dev/null +++ b/modelR/head/dsc_head.py @@ -0,0 +1,64 @@ +import torch.nn as nn +import torch +import torch.nn.functional as F +import config.cfg_lodet as cfg + +class MSigmoid_approx(nn.Module): + def __init__(self): + super(MSigmoid_approx, self).__init__() + def forward(self, input): + return (torch.clamp(torch.sigmoid(input),0.011,1)-0.01)/(1-0.01) + +class DSC_Head(nn.Module): + def __init__(self, nC, anchors, stride, fact): + super(DSC_Head, self).__init__() + self.fact = fact + self.__anchors = anchors + self.__nA = len(anchors) + self.__nC = nC + self.__stride = stride + self.__MSigmoid_approx = MSigmoid_approx() + + def forward(self, p): + bs, nG = p.shape[0], p.shape[-1] + p = p.view(bs, self.__nA, 5 + 5 + self.__nC, nG, nG).permute(0, 3, 4, 1, 2) + p_de = self.__decode(p.clone()) + return (p, p_de) + + def __decode(self, p): + batch_size, output_size = p.shape[:2] + device = p.device + stride = self.__stride + anchors = (1.0 * self.__anchors).to(device) + conv_raw_dxdy = p[:, :, :, :, 0:2] + conv_raw_dwdh = p[:, :, :, :, 2:4] + conv_raw_a = p[:, :, :, :, 4:8] + conv_raw_r = p[:, :, :, :, 8:9] + conv_raw_conf = p[:, :, :, :, 9:10] + conv_raw_prob = p[:, :, :, :, 10:] + y = torch.arange(0, output_size).unsqueeze(1).repeat(1, output_size) + x = torch.arange(0, output_size).unsqueeze(0).repeat(output_size, 1) + grid_xy = torch.stack([x, y], dim=-1) + grid_xy = grid_xy.unsqueeze(0).unsqueeze(3).repeat(batch_size, 1, 1, cfg.MODEL["ANCHORS_PER_SCLAE"], 1).float().to(device) + # pred_xy = (torch.sigmoid(conv_raw_dxdy) + grid_xy) * stride + pred_xy = (torch.sigmoid(conv_raw_dxdy) * 1.05 - ((1.05 - 1) / 2) + grid_xy) * stride + pred_wh = (torch.exp(conv_raw_dwdh) * anchors ) * stride #* self.fact + pred_xywh = torch.cat([pred_xy, pred_wh], dim=-1) + # pred_a = torch.sigmoid(conv_raw_a) + # pred_r = torch.sigmoid(conv_raw_r) + #pred_a = F.relu6(conv_raw_a + 3, inplace=True) / 6 + pred_a = (torch.clamp(torch.sigmoid(conv_raw_a),0.011,1)-0.01)/(1-0.01) + pred_r = F.relu6(conv_raw_r + 3, inplace=True) / 6 + maskr = pred_r + zero = torch.zeros_like(maskr) + one = torch.ones_like(maskr) + maskr = torch.where(maskr > 0.8, zero, one) + pred_a[:, :, :, :, 0:1] = pred_a[:, :, :, :, 0:1] * maskr + pred_a[:, :, :, :, 1:2] = pred_a[:, :, :, :, 1:2] * maskr + pred_a[:, :, :, :, 2:3] = pred_a[:, :, :, :, 2:3] * maskr + pred_a[:, :, :, :, 3:4] = pred_a[:, :, :, :, 3:4] * maskr + + pred_conf = torch.sigmoid(conv_raw_conf) + pred_prob = torch.sigmoid(conv_raw_prob) + pred_bbox = torch.cat([pred_xywh, pred_a, pred_r, pred_conf, pred_prob], dim=-1) + return pred_bbox.view(-1, 5 + 5 + self.__nC) if not self.training else pred_bbox \ No newline at end of file diff --git a/modelR/head/mtr_head.py b/modelR/head/mtr_head.py new file mode 100644 index 0000000..3050631 --- /dev/null +++ b/modelR/head/mtr_head.py @@ -0,0 +1,119 @@ +import torch.nn as nn +import torch +import torch.nn.functional as F +#from dcn_v2 import DCNv2 +from modelR.layers.deform_conv_v2 import DeformConv2d, DeformConv2d_offset + + +class hsigmoid(nn.Module): + def forward(self, x): + out = F.relu6(x + 3, inplace=True) / 6 + return out + +class MTR_Head1(nn.Module): + def __init__(self, filters_in, anchor_num, fo_class, temp=False): + super(MTR_Head1, self).__init__() + self.fo_class = fo_class + self.anchor_num = anchor_num + self.temp = temp + + self.__conv_conf = nn.Conv2d(in_channels=filters_in, out_channels=self.anchor_num * 2, kernel_size=1, stride=1, + padding=0)###############conf 和 r + + # self.__conv_offset_mask1 = Convolutional(filters_in, self.anchor_num*4, kernel_size=1, stride=1, pad=0) + self.__conv_offset_mask = nn.Conv2d(in_channels=filters_in, out_channels=3 * 9, kernel_size=1, stride=1, + padding=0, bias=True) + + self.__dconv_loc = DeformConv2d_offset(inc=filters_in, outc=filters_in, kernel_size=3, padding=1, stride=1, bias=None) + #DCNv2(filters_in, filters_in, kernel_size=3, stride=1, padding=1) + + self.__bnloc = nn.BatchNorm2d(filters_in) + self.__reluloc = nn.LeakyReLU(inplace=True) + self.__dconv_locx = nn.Conv2d(filters_in, self.anchor_num * 8, kernel_size=1, stride=1, padding=0) + + self.__dconv_cla = DeformConv2d_offset(inc=filters_in, outc=filters_in, kernel_size=3, padding=1, stride=1, bias=None) + #DCNv2(filters_in, filters_in, kernel_size=3, stride=1, padding=1) + self.__bncla = nn.BatchNorm2d(filters_in) + self.__relucla = nn.LeakyReLU(inplace=True) + self.__dconv_clax = nn.Conv2d(filters_in, self.anchor_num * self.fo_class, kernel_size=1, stride=1, padding=0) + + self.init_offset() + + def init_offset(self): + self.__conv_offset_mask.weight.data.zero_() + self.__conv_offset_mask.bias.data.zero_() + + def forward(self, x): + out_conf = self.__conv_conf(x) + + out_offset_mask = self.__conv_offset_mask(x) + o1, o2, mask = torch.chunk(out_offset_mask, 3, dim=1) + offset = torch.cat((o1, o2), dim=1) + mask = torch.sigmoid(mask) + + # print(offset.shape) + # if self.temp == True: + # mask = torch.sigmoid(mask*edge) + # else: + #print(offset.shape, mask.shape) + out_loc = self.__dconv_locx(self.__reluloc(self.__bnloc(self.__dconv_loc(x, offset, mask)))) + out_cla = self.__dconv_clax(self.__relucla(self.__bncla(self.__dconv_cla(x, offset, mask)))) + + out_loc1 = out_loc.view(x.shape[0], self.anchor_num, 8, x.shape[2], x.shape[3]).cuda() + out_conf1 = out_conf.view(x.shape[0], self.anchor_num, 2, x.shape[2], x.shape[3]).cuda()####### + out_cla1 = out_cla.view(x.shape[0], self.anchor_num, self.fo_class, x.shape[2], x.shape[3]).cuda() + out = torch.cat((out_loc1, out_conf1, out_cla1), 2).cuda() + return out + +class MTR_Head2(nn.Module): + def __init__(self, nC, anchors, stride): + super(MTR_Head2, self).__init__() + self.__anchors = anchors + self.__nA = len(anchors) + self.__nC = nC + self.__stride = stride + + def forward(self, p): + p = p.permute(0, 3, 4, 1, 2) + #print(p.shape) + p_de = self.__decode(p.clone()) + return (p, p_de) + def __decode(self, p): + batch_size, output_size = p.shape[:2] + device = p.device + stride = self.__stride + anchors = (1.0 * self.__anchors).to(device) + conv_raw_dxdy = p[:, :, :, :, 0:2] + conv_raw_dwdh = p[:, :, :, :, 2:4] + conv_raw_a = p[:, :, :, :, 4:8] + conv_raw_r = p[:, :, :, :, 8:9] + conv_raw_conf = p[:, :, :, :, 9:10] + conv_raw_prob = p[:, :, :, :, 10:] + y = torch.arange(0, output_size).unsqueeze(1).repeat(1, output_size) + x = torch.arange(0, output_size).unsqueeze(0).repeat(output_size, 1) + grid_xy = torch.stack([x, y], dim=-1) + grid_xy = grid_xy.unsqueeze(0).unsqueeze(3).repeat(batch_size, 1, 1, 3, 1).float().to(device) + #pred_xy = (torch.sigmoid(conv_raw_dxdy) + grid_xy) * stride + pred_xy = (torch.sigmoid(conv_raw_dxdy)*1.05 - ((1.05-1)/2) + grid_xy) * stride + pred_wh = (torch.exp(conv_raw_dwdh) * anchors) * stride + pred_xywh = torch.cat([pred_xy, pred_wh], dim=-1) + #pred_a = torch.sigmoid(conv_raw_a) + #pred_r = torch.sigmoid(conv_raw_r) + #pred_a = F.relu6(conv_raw_a + 3, inplace=True)/6 + pred_a = (torch.clamp(torch.sigmoid(conv_raw_a), 0.011, 1) - 0.01) / (1 - 0.01) + pred_r = F.relu6(conv_raw_r + 3, inplace=True)/6 + + + maskr = pred_r + zero = torch.zeros_like(maskr) + one = torch.ones_like(maskr) + maskr = torch.where(maskr > 0.8, zero, one) + pred_a[:, :, :, :, 0:1] = pred_a[:, :, :, :, 0:1]*maskr + pred_a[:, :, :, :, 1:2] = pred_a[:, :, :, :, 1:2] * maskr + pred_a[:, :, :, :, 2:3] = pred_a[:, :, :, :, 2:3] * maskr + pred_a[:, :, :, :, 3:4] = pred_a[:, :, :, :, 3:4] * maskr + + pred_conf = torch.sigmoid(conv_raw_conf) + pred_prob = torch.sigmoid(conv_raw_prob) + pred_bbox = torch.cat([pred_xywh, pred_a, pred_r, pred_conf, pred_prob], dim=-1) + return pred_bbox.view(-1, 5 + 5 + self.__nC) if not self.training else pred_bbox diff --git a/modelR/layers/DWT_IDWT_Functions.py b/modelR/layers/DWT_IDWT_Functions.py new file mode 100644 index 0000000..f8af40e --- /dev/null +++ b/modelR/layers/DWT_IDWT_Functions.py @@ -0,0 +1,168 @@ +# Copyright (c) 2019, Adobe Inc. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial-ShareAlike +# 4.0 International Public License. To view a copy of this license, visit +# https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode. + +""" +自定义pytorch函数,实现一维、二维、三维张量的DWT和IDWT,未考虑边界延拓 +只有当图像行列数都是偶数,且重构滤波器组低频分量长度为2时,才能精确重构,否则在边界处有误差。 +""" +import torch +from torch.autograd import Function + +class DWTFunction_1D(Function): + @staticmethod + def forward(ctx, input, matrix_Low, matrix_High): + ctx.save_for_backward(matrix_Low, matrix_High) + L = torch.matmul(input, matrix_Low.t()) + H = torch.matmul(input, matrix_High.t()) + return L, H + @staticmethod + def backward(ctx, grad_L, grad_H): + matrix_L, matrix_H = ctx.saved_variables + grad_input = torch.add(torch.matmul(grad_L, matrix_L), torch.matmul(grad_H, matrix_H)) + return grad_input, None, None + + +class IDWTFunction_1D(Function): + @staticmethod + def forward(ctx, input_L, input_H, matrix_L, matrix_H): + ctx.save_for_backward(matrix_L, matrix_H) + output = torch.add(torch.matmul(input_L, matrix_L), torch.matmul(input_H, matrix_H)) + return output + @staticmethod + def backward(ctx, grad_output): + matrix_L, matrix_H = ctx.saved_variables + grad_L = torch.matmul(grad_output, matrix_L.t()) + grad_H = torch.matmul(grad_output, matrix_H.t()) + return grad_L, grad_H, None, None + + +class DWTFunction_2D(Function): + @staticmethod + def forward(ctx, input, matrix_Low_0, matrix_Low_1, matrix_High_0, matrix_High_1): + ctx.save_for_backward(matrix_Low_0, matrix_Low_1, matrix_High_0, matrix_High_1) + L = torch.matmul(matrix_Low_0, input) + H = torch.matmul(matrix_High_0, input) + LL = torch.matmul(L, matrix_Low_1) + LH = torch.matmul(L, matrix_High_1) + HL = torch.matmul(H, matrix_Low_1) + HH = torch.matmul(H, matrix_High_1) + return LL, LH, HL, HH + @staticmethod + def backward(ctx, grad_LL, grad_LH, grad_HL, grad_HH): + matrix_Low_0, matrix_Low_1, matrix_High_0, matrix_High_1 = ctx.saved_variables + grad_L = torch.add(torch.matmul(grad_LL, matrix_Low_1.t()), torch.matmul(grad_LH, matrix_High_1.t())) + grad_H = torch.add(torch.matmul(grad_HL, matrix_Low_1.t()), torch.matmul(grad_HH, matrix_High_1.t())) + grad_input = torch.add(torch.matmul(matrix_Low_0.t(), grad_L), torch.matmul(matrix_High_0.t(), grad_H)) + return grad_input, None, None, None, None + + +class DWTFunction_2D_tiny(Function): + @staticmethod + def forward(ctx, input, matrix_Low_0, matrix_Low_1, matrix_High_0, matrix_High_1): + ctx.save_for_backward(matrix_Low_0, matrix_Low_1, matrix_High_0, matrix_High_1) + L = torch.matmul(matrix_Low_0, input) + LL = torch.matmul(L, matrix_Low_1) + return LL + @staticmethod + def backward(ctx, grad_LL): + matrix_Low_0, matrix_Low_1, matrix_High_0, matrix_High_1 = ctx.saved_variables + grad_L = torch.matmul(grad_LL, matrix_Low_1.t()) + grad_input = torch.matmul(matrix_Low_0.t(), grad_L) + return grad_input, None, None, None, None + + +class IDWTFunction_2D(Function): + @staticmethod + def forward(ctx, input_LL, input_LH, input_HL, input_HH, + matrix_Low_0, matrix_Low_1, matrix_High_0, matrix_High_1): + ctx.save_for_backward(matrix_Low_0, matrix_Low_1, matrix_High_0, matrix_High_1) + L = torch.add(torch.matmul(input_LL, matrix_Low_1.t()), torch.matmul(input_LH, matrix_High_1.t())) + H = torch.add(torch.matmul(input_HL, matrix_Low_1.t()), torch.matmul(input_HH, matrix_High_1.t())) + output = torch.add(torch.matmul(matrix_Low_0.t(), L), torch.matmul(matrix_High_0.t(), H)) + return output + @staticmethod + def backward(ctx, grad_output): + matrix_Low_0, matrix_Low_1, matrix_High_0, matrix_High_1 = ctx.saved_variables + grad_L = torch.matmul(matrix_Low_0, grad_output) + grad_H = torch.matmul(matrix_High_0, grad_output) + grad_LL = torch.matmul(grad_L, matrix_Low_1) + grad_LH = torch.matmul(grad_L, matrix_High_1) + grad_HL = torch.matmul(grad_H, matrix_Low_1) + grad_HH = torch.matmul(grad_H, matrix_High_1) + return grad_LL, grad_LH, grad_HL, grad_HH, None, None, None, None + + +class DWTFunction_3D(Function): + @staticmethod + def forward(ctx, input, + matrix_Low_0, matrix_Low_1, matrix_Low_2, + matrix_High_0, matrix_High_1, matrix_High_2): + ctx.save_for_backward(matrix_Low_0, matrix_Low_1, matrix_Low_2, + matrix_High_0, matrix_High_1, matrix_High_2) + L = torch.matmul(matrix_Low_0, input) + H = torch.matmul(matrix_High_0, input) + LL = torch.matmul(L, matrix_Low_1).transpose(dim0 = 2, dim1 = 3) + LH = torch.matmul(L, matrix_High_1).transpose(dim0 = 2, dim1 = 3) + HL = torch.matmul(H, matrix_Low_1).transpose(dim0 = 2, dim1 = 3) + HH = torch.matmul(H, matrix_High_1).transpose(dim0 = 2, dim1 = 3) + LLL = torch.matmul(matrix_Low_2, LL).transpose(dim0 = 2, dim1 = 3) + LLH = torch.matmul(matrix_Low_2, LH).transpose(dim0 = 2, dim1 = 3) + LHL = torch.matmul(matrix_Low_2, HL).transpose(dim0 = 2, dim1 = 3) + LHH = torch.matmul(matrix_Low_2, HH).transpose(dim0 = 2, dim1 = 3) + HLL = torch.matmul(matrix_High_2, LL).transpose(dim0 = 2, dim1 = 3) + HLH = torch.matmul(matrix_High_2, LH).transpose(dim0 = 2, dim1 = 3) + HHL = torch.matmul(matrix_High_2, HL).transpose(dim0 = 2, dim1 = 3) + HHH = torch.matmul(matrix_High_2, HH).transpose(dim0 = 2, dim1 = 3) + return LLL, LLH, LHL, LHH, HLL, HLH, HHL, HHH + + @staticmethod + def backward(ctx, grad_LLL, grad_LLH, grad_LHL, grad_LHH, + grad_HLL, grad_HLH, grad_HHL, grad_HHH): + matrix_Low_0, matrix_Low_1, matrix_Low_2, matrix_High_0, matrix_High_1, matrix_High_2 = ctx.saved_variables + grad_LL = torch.add(torch.matmul(matrix_Low_2.t(), grad_LLL.transpose(dim0 = 2, dim1 = 3)), torch.matmul(matrix_High_2.t(), grad_HLL.transpose(dim0 = 2, dim1 = 3))).transpose(dim0 = 2, dim1 = 3) + grad_LH = torch.add(torch.matmul(matrix_Low_2.t(), grad_LLH.transpose(dim0 = 2, dim1 = 3)), torch.matmul(matrix_High_2.t(), grad_HLH.transpose(dim0 = 2, dim1 = 3))).transpose(dim0 = 2, dim1 = 3) + grad_HL = torch.add(torch.matmul(matrix_Low_2.t(), grad_LHL.transpose(dim0 = 2, dim1 = 3)), torch.matmul(matrix_High_2.t(), grad_HHL.transpose(dim0 = 2, dim1 = 3))).transpose(dim0 = 2, dim1 = 3) + grad_HH = torch.add(torch.matmul(matrix_Low_2.t(), grad_LHH.transpose(dim0 = 2, dim1 = 3)), torch.matmul(matrix_High_2.t(), grad_HHH.transpose(dim0 = 2, dim1 = 3))).transpose(dim0 = 2, dim1 = 3) + grad_L = torch.add(torch.matmul(grad_LL, matrix_Low_1.t()), torch.matmul(grad_LH, matrix_High_1.t())) + grad_H = torch.add(torch.matmul(grad_HL, matrix_Low_1.t()), torch.matmul(grad_HH, matrix_High_1.t())) + grad_input = torch.add(torch.matmul(matrix_Low_0.t(), grad_L), torch.matmul(matrix_High_0.t(), grad_H)) + return grad_input, None, None, None, None, None, None, None, None + + +class IDWTFunction_3D(Function): + @staticmethod + def forward(ctx, input_LLL, input_LLH, input_LHL, input_LHH, + input_HLL, input_HLH, input_HHL, input_HHH, + matrix_Low_0, matrix_Low_1, matrix_Low_2, + matrix_High_0, matrix_High_1, matrix_High_2): + ctx.save_for_backward(matrix_Low_0, matrix_Low_1, matrix_Low_2, + matrix_High_0, matrix_High_1, matrix_High_2) + input_LL = torch.add(torch.matmul(matrix_Low_2.t(), input_LLL.transpose(dim0 = 2, dim1 = 3)), torch.matmul(matrix_High_2.t(), input_HLL.transpose(dim0 = 2, dim1 = 3))).transpose(dim0 = 2, dim1 = 3) + input_LH = torch.add(torch.matmul(matrix_Low_2.t(), input_LLH.transpose(dim0 = 2, dim1 = 3)), torch.matmul(matrix_High_2.t(), input_HLH.transpose(dim0 = 2, dim1 = 3))).transpose(dim0 = 2, dim1 = 3) + input_HL = torch.add(torch.matmul(matrix_Low_2.t(), input_LHL.transpose(dim0 = 2, dim1 = 3)), torch.matmul(matrix_High_2.t(), input_HHL.transpose(dim0 = 2, dim1 = 3))).transpose(dim0 = 2, dim1 = 3) + input_HH = torch.add(torch.matmul(matrix_Low_2.t(), input_LHH.transpose(dim0 = 2, dim1 = 3)), torch.matmul(matrix_High_2.t(), input_HHH.transpose(dim0 = 2, dim1 = 3))).transpose(dim0 = 2, dim1 = 3) + input_L = torch.add(torch.matmul(input_LL, matrix_Low_1.t()), torch.matmul(input_LH, matrix_High_1.t())) + input_H = torch.add(torch.matmul(input_HL, matrix_Low_1.t()), torch.matmul(input_HH, matrix_High_1.t())) + output = torch.add(torch.matmul(matrix_Low_0.t(), input_L), torch.matmul(matrix_High_0.t(), input_H)) + return output + @staticmethod + def backward(ctx, grad_output): + matrix_Low_0, matrix_Low_1, matrix_Low_2, matrix_High_0, matrix_High_1, matrix_High_2 = ctx.saved_variables + grad_L = torch.matmul(matrix_Low_0, grad_output) + grad_H = torch.matmul(matrix_High_0, grad_output) + grad_LL = torch.matmul(grad_L, matrix_Low_1).transpose(dim0 = 2, dim1 = 3) + grad_LH = torch.matmul(grad_L, matrix_High_1).transpose(dim0 = 2, dim1 = 3) + grad_HL = torch.matmul(grad_H, matrix_Low_1).transpose(dim0 = 2, dim1 = 3) + grad_HH = torch.matmul(grad_H, matrix_High_1).transpose(dim0 = 2, dim1 = 3) + grad_LLL = torch.matmul(matrix_Low_2, grad_LL).transpose(dim0 = 2, dim1 = 3) + grad_LLH = torch.matmul(matrix_Low_2, grad_LH).transpose(dim0 = 2, dim1 = 3) + grad_LHL = torch.matmul(matrix_Low_2, grad_HL).transpose(dim0 = 2, dim1 = 3) + grad_LHH = torch.matmul(matrix_Low_2, grad_HH).transpose(dim0 = 2, dim1 = 3) + grad_HLL = torch.matmul(matrix_High_2, grad_LL).transpose(dim0 = 2, dim1 = 3) + grad_HLH = torch.matmul(matrix_High_2, grad_LH).transpose(dim0 = 2, dim1 = 3) + grad_HHL = torch.matmul(matrix_High_2, grad_HL).transpose(dim0 = 2, dim1 = 3) + grad_HHH = torch.matmul(matrix_High_2, grad_HH).transpose(dim0 = 2, dim1 = 3) + return grad_LLL, grad_LLH, grad_LHL, grad_LHH, grad_HLL, grad_HLH, grad_HHL, grad_HHH, None, None, None, None, None, None diff --git a/modelR/layers/__init__.py b/modelR/layers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/modelR/layers/activations.py b/modelR/layers/activations.py new file mode 100644 index 0000000..95caa5c --- /dev/null +++ b/modelR/layers/activations.py @@ -0,0 +1,57 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +class Swish(nn.Module): # + @staticmethod + def forward(x): + return x * torch.sigmoid(x) + +class Hardswish(nn.Module): # export-friendly version of nn.Hardswish() + @staticmethod + def forward(x): + # return x * F.hardsigmoid(x) # for torchscript and CoreML + return x * F.hardtanh(x + 3, 0., 6.) / 6. # for torchscript, CoreML and ONNX + +class MemoryEfficientSwish(nn.Module): + class F(torch.autograd.Function): + @staticmethod + def forward(ctx, x): + ctx.save_for_backward(x) + return x * torch.sigmoid(x) + @staticmethod + def backward(ctx, grad_output): + x = ctx.saved_tensors[0] + sx = torch.sigmoid(x) + return grad_output * (sx * (1 + x * (1 - sx))) + def forward(self, x): + return self.F.apply(x) + +class Mish(nn.Module): + @staticmethod + def forward(x): + return x * F.softplus(x).tanh() + +class MemoryEfficientMish(nn.Module): + class F(torch.autograd.Function): + @staticmethod + def forward(ctx, x): + ctx.save_for_backward(x) + return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x))) + @staticmethod + def backward(ctx, grad_output): + x = ctx.saved_tensors[0] + sx = torch.sigmoid(x) + fx = F.softplus(x).tanh() + return grad_output * (fx + x * sx * (1 - fx * fx)) + def forward(self, x): + return self.F.apply(x) + +class FReLU(nn.Module): + def __init__(self, c1, k=3): # ch_in, kernel + super().__init__() + self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1) + self.bn = nn.BatchNorm2d(c1) + + def forward(self, x): + return torch.max(x, self.bn(self.conv(x))) \ No newline at end of file diff --git a/modelR/layers/attention_blocks.py b/modelR/layers/attention_blocks.py new file mode 100644 index 0000000..dabd6b9 --- /dev/null +++ b/modelR/layers/attention_blocks.py @@ -0,0 +1,190 @@ +import torch.nn as nn +import torch + +class hsigmoid(nn.Module): + def forward(self,x): + out=x*nn.ReLU6(x+3,inplace=True)/6 + return out + +class SELayer(nn.Module): + """SENet + """ + def __init__(self, channel, reduction=16): + super(SELayer, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.fc = nn.Sequential( + nn.Linear(channel, channel // reduction, bias=False), + #nn.Conv2d(in_channels=channel, out_channels=channel // reduction, kernel_size=1, stride=1, padding=0, bias=False), + #nn.BatchNorm2d(channel // reduction), + nn.ReLU(inplace=True), + nn.Linear(channel // reduction, channel, bias=False), + #nn.Conv2d(in_channels=channel // reduction, out_channels=channel, kernel_size=1, stride=1, padding=0, bias=False), + #nn.BatchNorm2d(channel), + nn.Sigmoid(), + ) + + def forward(self, x): + b, c, _, _ = x.size() + y = self.avg_pool(x).view(b, c) + y = self.fc(y).view(b, c, 1, 1) + return x * y.expand_as(x) + +class NonLocalBlock(nn.Module): + """Non-local Network + """ + def __init__(self, channel): + super(NonLocalBlock, self).__init__() + self.inter_channel = channel // 2 + self.conv_phi = nn.Conv2d(in_channels=channel, out_channels=self.inter_channel, kernel_size=1, stride=1,padding=0, bias=False) + self.conv_theta = nn.Conv2d(in_channels=channel, out_channels=self.inter_channel, kernel_size=1, stride=1, padding=0, bias=False) + self.conv_g = nn.Conv2d(in_channels=channel, out_channels=self.inter_channel, kernel_size=1, stride=1, padding=0, bias=False) + self.softmax = nn.Softmax(dim=1) + self.conv_mask = nn.Conv2d(in_channels=self.inter_channel, out_channels=channel, kernel_size=1, stride=1, padding=0, bias=False) + + def forward(self, x): + # [N, C, H , W] + b, c, h, w = x.size() + # [N, C/2, H * W] + x_phi = self.conv_phi(x).view(b, c, -1) + # [N, H * W, C/2] + x_theta = self.conv_theta(x).view(b, c, -1).permute(0, 2, 1).contiguous() + x_g = self.conv_g(x).view(b, c, -1).permute(0, 2, 1).contiguous() + # [N, H * W, H * W] + mul_theta_phi = torch.matmul(x_theta, x_phi) + mul_theta_phi = self.softmax(mul_theta_phi) + # [N, H * W, C/2] + mul_theta_phi_g = torch.matmul(mul_theta_phi, x_g) + # [N, C/2, H, W] + mul_theta_phi_g = mul_theta_phi_g.permute(0,2,1).contiguous().view(b,self.inter_channel, h, w) + # [N, C, H , W] + mask = self.conv_mask(mul_theta_phi_g) + out = mask + x + return out + +class ContextBlock(nn.Module): + """GCNet + """ + def __init__(self,inplanes,ratio,pooling_type='att', + fusion_types=('channel_add', )): + super(ContextBlock, self).__init__() + valid_fusion_types = ['channel_add', 'channel_mul'] + assert pooling_type in ['avg', 'att'] + assert isinstance(fusion_types, (list, tuple)) + assert all([f in valid_fusion_types for f in fusion_types]) + assert len(fusion_types) > 0, 'at least one fusion should be used' + + self.inplanes = inplanes + self.ratio = ratio + self.planes = int(inplanes * ratio) + self.pooling_type = pooling_type + self.fusion_types = fusion_types + + if pooling_type == 'att': + self.conv_mask = nn.Conv2d(inplanes, 1, kernel_size=1) + self.softmax = nn.Softmax(dim=2) + else: + self.avg_pool = nn.AdaptiveAvgPool2d(1) + if 'channel_add' in fusion_types: + self.channel_add_conv = nn.Sequential( + nn.Conv2d(self.inplanes, self.planes, kernel_size=1), + nn.LayerNorm([self.planes, 1, 1]), + nn.ReLU(inplace=True), # yapf: disable + nn.Conv2d(self.planes, self.inplanes, kernel_size=1)) + else: + self.channel_add_conv = None + if 'channel_mul' in fusion_types: + self.channel_mul_conv = nn.Sequential( + nn.Conv2d(self.inplanes, self.planes, kernel_size=1), + nn.LayerNorm([self.planes, 1, 1]), + nn.ReLU(inplace=True), # yapf: disable + nn.Conv2d(self.planes, self.inplanes, kernel_size=1)) + else: + self.channel_mul_conv = None + + def spatial_pool(self, x): + batch, channel, height, width = x.size() + if self.pooling_type == 'att': + input_x = x + # [N, C, H * W] + input_x = input_x.view(batch, channel, height * width) + # [N, 1, C, H * W] + input_x = input_x.unsqueeze(1) + # [N, 1, H, W] + context_mask = self.conv_mask(x) + # [N, 1, H * W] + context_mask = context_mask.view(batch, 1, height * width) + # [N, 1, H * W] + context_mask = self.softmax(context_mask) + # [N, 1, H * W, 1] + context_mask = context_mask.unsqueeze(-1) + # [N, 1, C, 1] + context = torch.matmul(input_x, context_mask) + # [N, C, 1, 1] + context = context.view(batch, channel, 1, 1) + else: + # [N, C, 1, 1] + context = self.avg_pool(x) + return context + + def forward(self, x): + # [N, C, 1, 1] + context = self.spatial_pool(x) + out = x + if self.channel_mul_conv is not None: + # [N, C, 1, 1] + channel_mul_term = torch.sigmoid(self.channel_mul_conv(context)) + out = out * channel_mul_term + if self.channel_add_conv is not None: + # [N, C, 1, 1] + channel_add_term = self.channel_add_conv(context) + out = out + channel_add_term + return out + +class SpatialCGNL(nn.Module): + """Spatial CGNL block with dot production kernel for image classfication. + """ + def __init__(self, inplanes, planes, use_scale=False, groups=None): + self.use_scale = use_scale + self.groups = groups + super(SpatialCGNL, self).__init__() + self.t = nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, bias=False) + self.p = nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, bias=False) + self.g = nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, bias=False) + self.z = nn.Conv2d(planes, inplanes, kernel_size=1, stride=1, groups=self.groups, bias=False) + self.gn = nn.GroupNorm(num_groups=self.groups, num_channels=inplanes) + + def kernel(self, t, p, g, b, c, h, w): + t = t.view(b, 1, c * h * w) + p = p.view(b, 1, c * h * w) + g = g.view(b, c * h * w, 1) + att = torch.bmm(p, g) + if self.use_scale: + att = att.div((c*h*w)**0.5) + + x = torch.bmm(att, t) + x = x.view(b, c, h, w) + return x + + def forward(self, x): + residual = x + t = self.t(x) + p = self.p(x) + g = self.g(x) + b, c, h, w = t.size() + if self.groups and self.groups > 1: + _c = int(c / self.groups) + ts = torch.split(t, split_size_or_sections=_c, dim=1) + ps = torch.split(p, split_size_or_sections=_c, dim=1) + gs = torch.split(g, split_size_or_sections=_c, dim=1) + _t_sequences = [] + for i in range(self.groups): + _x = self.kernel(ts[i], ps[i], gs[i], + b, _c, h, w) + _t_sequences.append(_x) + x = torch.cat(_t_sequences, dim=1) + else: + x = self.kernel(t, p, g, + b, c, h, w) + x = self.z(x) + x = self.gn(x) + residual + return x diff --git a/modelR/layers/blur_pool.py b/modelR/layers/blur_pool.py new file mode 100644 index 0000000..339863f --- /dev/null +++ b/modelR/layers/blur_pool.py @@ -0,0 +1,125 @@ +import torch +import torch.nn.parallel +import numpy as np +import torch.nn as nn +import torch.nn.functional as F +from ..layers.convolutions import Convolutional + +class Downsample(nn.Module): + def __init__(self, pad_type='reflect', filt_size=3, stride=2, channels=None, pad_off=0): + super(Downsample, self).__init__() + self.filt_size = filt_size + self.pad_off = pad_off + self.pad_sizes = [int(1.*(filt_size-1)/2), int(np.ceil(1.*(filt_size-1)/2)), int(1.*(filt_size-1)/2), int(np.ceil(1.*(filt_size-1)/2))] + self.pad_sizes = [pad_size+pad_off for pad_size in self.pad_sizes] + self.stride = stride + self.off = int((self.stride-1)/2.) + self.channels = channels + + # print('Filter size [%i]'%filt_size) + if(self.filt_size==1): + a = np.array([1.,]) + elif(self.filt_size==2): + a = np.array([1., 1.]) + elif(self.filt_size==3): + a = np.array([1., 2., 1.]) + elif(self.filt_size==4): + a = np.array([1., 3., 3., 1.]) + elif(self.filt_size==5): + a = np.array([1., 4., 6., 4., 1.]) + elif(self.filt_size==6): + a = np.array([1., 5., 10., 10., 5., 1.]) + elif(self.filt_size==7): + a = np.array([1., 6., 15., 20., 15., 6., 1.]) + + filt = torch.Tensor(a[:,None]*a[None,:]) + filt = filt/torch.sum(filt) + self.register_buffer('filt', filt[None,None,:,:].repeat((self.channels,1,1,1))) + + self.pad = get_pad_layer(pad_type)(self.pad_sizes) + + def forward(self, inp): + if(self.filt_size==1): + if(self.pad_off==0): + return inp[:,:,::self.stride,::self.stride] + else: + return self.pad(inp)[:,:,::self.stride,::self.stride] + else: + return F.conv2d(self.pad(inp), self.filt, stride=self.stride, groups=inp.shape[1]) + +def get_pad_layer(pad_type): + if(pad_type in ['refl','reflect']): + PadLayer = nn.ReflectionPad2d + elif(pad_type in ['repl','replicate']): + PadLayer = nn.ReplicationPad2d + elif(pad_type=='zero'): + PadLayer = nn.ZeroPad2d + else: + print('Pad type [%s] not recognized'%pad_type) + return PadLayer + + +class Downsample1D(nn.Module): + def __init__(self, pad_type='reflect', filt_size=3, stride=2, channels=None, pad_off=0): + super(Downsample1D, self).__init__() + self.filt_size = filt_size + self.pad_off = pad_off + self.pad_sizes = [int(1. * (filt_size - 1) / 2), int(np.ceil(1. * (filt_size - 1) / 2))] + self.pad_sizes = [pad_size + pad_off for pad_size in self.pad_sizes] + self.stride = stride + self.off = int((self.stride - 1) / 2.) + self.channels = channels + + # print('Filter size [%i]' % filt_size) + if(self.filt_size == 1): + a = np.array([1., ]) + elif(self.filt_size == 2): + a = np.array([1., 1.]) + elif(self.filt_size == 3): + a = np.array([1., 2., 1.]) + elif(self.filt_size == 4): + a = np.array([1., 3., 3., 1.]) + elif(self.filt_size == 5): + a = np.array([1., 4., 6., 4., 1.]) + elif(self.filt_size == 6): + a = np.array([1., 5., 10., 10., 5., 1.]) + elif(self.filt_size == 7): + a = np.array([1., 6., 15., 20., 15., 6., 1.]) + + filt = torch.Tensor(a) + filt = filt / torch.sum(filt) + self.register_buffer('filt', filt[None, None, :].repeat((self.channels, 1, 1))) + + self.pad = get_pad_layer_1d(pad_type)(self.pad_sizes) + + def forward(self, inp): + if(self.filt_size == 1): + if(self.pad_off == 0): + return inp[:, :, ::self.stride] + else: + return self.pad(inp)[:, :, ::self.stride] + else: + return F.conv1d(self.pad(inp), self.filt, stride=self.stride, groups=inp.shape[1]) + +def get_pad_layer_1d(pad_type): + if(pad_type in ['refl', 'reflect']): + PadLayer = nn.ReflectionPad1d + elif(pad_type in ['repl', 'replicate']): + PadLayer = nn.ReplicationPad1d + elif(pad_type == 'zero'): + PadLayer = nn.ZeroPad1d + else: + print('Pad type [%s] not recognized' % pad_type) + return PadLayer + +class downs(nn.Module): + def __init__(self, filters_in, filters_out, groups=4): + super(downs, self).__init__() + self.__pw = Convolutional(filters_in=filters_in, filters_out=filters_out, kernel_size=1, + stride=1, pad=0, norm="bn", activate="leaky") + self.__ds = Downsample(filt_size=3, stride=2, channels=filters_out) + + def forward(self, x): + x = self.__pw(x) + out = self.__ds(x) + return out diff --git a/modelR/layers/conv_blocks.py b/modelR/layers/conv_blocks.py new file mode 100644 index 0000000..a0224fd --- /dev/null +++ b/modelR/layers/conv_blocks.py @@ -0,0 +1,64 @@ +import torch.nn as nn +from ..layers.convolutions import Convolutional + +class Residual_block(nn.Module): + def __init__(self, filters_in, filters_out, filters_medium): + super(Residual_block, self).__init__() + self.__conv1 = Convolutional(filters_in=filters_in, filters_out=filters_medium, kernel_size=1, + stride=1, pad=0, norm="bn", activate="leaky") + self.__conv2 = Convolutional(filters_in=filters_medium, filters_out=filters_out, kernel_size=3, + stride=1, pad=1, norm="bn", activate="leaky") + + def forward(self, x): + r = self.__conv1(x) + r = self.__conv2(r) + out = x + r + return out + +class InvertedResidual_block(nn.Module): + def __init__(self, inp, oup, stride, expand_ratio): + super(InvertedResidual_block, self).__init__() + self.__stride = stride + hidden_dim = int(inp * expand_ratio) + self.use_res_connect = self.__stride == 1 and inp==oup + if expand_ratio==1: + self.__conv = nn.Sequential( + Convolutional(filters_in=hidden_dim, filters_out=hidden_dim, kernel_size=3, + stride=self.__stride, pad=1, groups=hidden_dim, norm="bn", activate="relu6"), + Convolutional(filters_in=hidden_dim, filters_out=oup, kernel_size=1, + stride=1, pad=0, norm="bn") + ) + else: + self.__conv = nn.Sequential( + Convolutional(filters_in=inp, filters_out=hidden_dim, kernel_size=1, + stride=1, pad=0, norm="bn", activate="relu6"), + Convolutional(filters_in=hidden_dim, filters_out=hidden_dim, kernel_size=3, + stride=self.__stride, pad=1, groups=hidden_dim, norm="bn", activate="relu6"), + Convolutional(filters_in=hidden_dim, filters_out=oup, kernel_size=1, + stride=1, pad=0, norm="bn") + ) + + def forward(self, x): + if self.use_res_connect: + return x + self.__conv(x) + else: + return self.__conv(x) + +class Residual_block_CSP(nn.Module): + def __init__(self, filters_in): + super(Residual_block_CSP, self).__init__() + self.__conv1 = Convolutional(filters_in=filters_in, filters_out=filters_in, kernel_size=1, + stride=1, pad=0, norm="bn", activate="leaky") + self.__conv2 = Convolutional(filters_in=filters_in, filters_out=filters_in, kernel_size=3, + stride=1, pad=1, norm="bn", activate="leaky") + + def forward(self, x): + r = self.__conv1(x) + r = self.__conv2(r) + out = x + r + return out + + + + + diff --git a/modelR/layers/convolutions.py b/modelR/layers/convolutions.py new file mode 100644 index 0000000..aa53cab --- /dev/null +++ b/modelR/layers/convolutions.py @@ -0,0 +1,262 @@ +from .activations import * +from modelR.plugandplay.DynamicConv import Dynamic_conv2d +from modelR.plugandplay.CondConv import CondConv2d, route_func +from modelR.layers.deform_conv_v2 import DeformConv2d + +norm_name = {"bn": nn.BatchNorm2d} +activate_name = { + "relu": nn.ReLU, + "leaky": nn.LeakyReLU, + "relu6": nn.ReLU6, + "Mish": Mish, + "Swish": Swish, + "MEMish": MemoryEfficientMish, + "MESwish": MemoryEfficientSwish, + "FReLu": FReLU +} + +class Convolutional(nn.Module): + def __init__(self, filters_in, filters_out, kernel_size, stride, pad, groups=1, dila=1, norm=None, activate=None): + super(Convolutional, self).__init__() + self.norm = norm + self.activate = activate + self.__conv = nn.Conv2d(in_channels=filters_in, out_channels=filters_out, kernel_size=kernel_size, + stride=stride, padding=pad, bias=not norm, groups=groups, dilation=dila) + if norm: + assert norm in norm_name.keys() + if norm == "bn": + self.__norm = norm_name[norm](num_features=filters_out) + if activate: + assert activate in activate_name.keys() + if activate == "leaky": + self.__activate = activate_name[activate](negative_slope=0.1, inplace=True) + if activate == "relu": + self.__activate = activate_name[activate](inplace=True) + if activate == "relu6": + self.__activate = activate_name[activate](inplace=True) + if activate == "Mish": + self.__activate = activate_name[activate]() + if activate == "Swish": + self.__activate = activate_name[activate]() + if activate == "MEMish": + self.__activate = activate_name[activate]() + if activate == "MESwish": + self.__activate = activate_name[activate]() + if activate == "FReLu": + self.__activate = activate_name[activate]() + + def forward(self, x): + x = self.__conv(x) + if self.norm: + x = self.__norm(x) + if self.activate: + x = self.__activate(x) + return x + +class DeConvolutional(nn.Module): + def __init__(self, filters_in, filters_out, kernel_size=4, stride=2, pad=1, output_pad=0, groups=1, dila=1, norm=None, activate=None): + super(DeConvolutional, self).__init__() + self.norm = norm + self.activate = activate + if kernel_size == 4: + pad = 1 + output_pad = 0 + elif kernel_size == 3: + pad = 1 + output_pad = 1 + elif kernel_size == 2: + pad = 0 + output_pad = 0 + self.__deconv = nn.ConvTranspose2d(in_channels=filters_in, out_channels=filters_out, kernel_size=kernel_size, stride=stride, padding=pad, output_padding=output_pad) + if norm: + assert norm in norm_name.keys() + if norm == "bn": + self.__norm = norm_name[norm](num_features=filters_out) + if activate: + assert activate in activate_name.keys() + if activate == "leaky": + self.__activate = activate_name[activate](negative_slope=0.1, inplace=True) + if activate == "relu": + self.__activate = activate_name[activate](inplace=True) + if activate == "relu6": + self.__activate = activate_name[activate](inplace=True) + if activate == "Mish": + self.__activate = Mish() + if activate == "Swish": + self.__activate = Swish() + if activate == "MEMish": + self.__activate = MemoryEfficientMish() + if activate == "MESwish": + self.__activate = MemoryEfficientSwish() + if activate == "FReLu": + self.__activate = FReLU() + + def forward(self, x): + x = self.__deconv(x) + if self.norm: + x = self.__norm(x) + if self.activate: + x = self.__activate(x) + return x + + +class Separable_Conv(nn.Module): + def __init__(self, filters_in, filters_out, stride, norm="bn", activate="relu6"): + super(Separable_Conv, self).__init__() + + self.__dw = Convolutional(filters_in=filters_in, filters_out=filters_in, kernel_size=3, + stride=stride, pad=1, groups=filters_in, norm=norm, activate=activate) + + self.__pw = Convolutional(filters_in=filters_in, filters_out=filters_out, kernel_size=1, + stride=1, pad=0, norm=norm, activate=activate) + + def forward(self, x): + return self.__pw(self.__dw(x)) + + +class Deformable_Convolutional(nn.Module): + def __init__(self, filters_in, filters_out, kernel_size, stride, pad, groups=1, norm=None, activate=None): + super(Deformable_Convolutional, self).__init__() + self.norm = norm + self.activate = activate + self.__dcn = DeformConv2d(inc=filters_in, outc=filters_out, kernel_size=kernel_size, padding=pad, stride=stride, bias=None, modulation=True) + #DCN(filters_in, filters_out, kernel_size=kernel_size, stride=stride, padding=pad, deformable_groups=groups).cuda() + if norm: + assert norm in norm_name.keys() + if norm == "bn": + self.__norm = norm_name[norm](num_features=filters_out) + if activate: + assert activate in activate_name.keys() + if activate == "leaky": + self.__activate = activate_name[activate](negative_slope=0.1, inplace=True) + if activate == "relu": + self.__activate = activate_name[activate](inplace=True) + if activate == "relu6": + self.__activate = activate_name[activate](inplace=True) + if activate == "Mish": + self.__activate = Mish() + if activate == "Swish": + self.__activate = Swish() + if activate == "MEMish": + self.__activate = MemoryEfficientMish() + if activate == "MESwish": + self.__activate = MemoryEfficientSwish() + if activate == "FReLu": + self.__activate = FReLU() + + def forward(self, x): + x = self.__dcn(x) + if self.norm: + x = self.__norm(x) + if self.activate: + x = self.__activate(x) + return x + +class Separable_Conv_dila(nn.Module): + def __init__(self, filters_in, filters_out, stride, pad, dila): + super(Separable_Conv_dila, self).__init__() + + self.__dw = Convolutional(filters_in=filters_in, filters_out=filters_in, kernel_size=3, stride=stride, + pad=pad, groups=filters_in, dila=dila, norm="bn", activate="relu6") + #self.__se=SELayer(filters_in) + self.__pw = Convolutional(filters_in=filters_in, filters_out=filters_out, kernel_size=1, stride=1, + pad=0, norm="bn", activate="relu6") + + def channel_shuffle(self, features, groups=2): + batchsize, num_channels, height, width = features.data.size() + assert (num_channels % groups == 0) + channels_per_group = num_channels // groups + # reshape + features = features.view(batchsize, groups, channels_per_group, height, width) + features = torch.transpose(features, 1, 2).contiguous() + # flatten + features = features.view(batchsize, -1, height, width) + return features + + def forward(self, x): + #return self.__pw(self.__se(self.__dw(x))) + out = self.__pw(self.__dw(x)) + #out = self.channel_shuffle(out) + return out + + +class Cond_Convolutional(nn.Module): + def __init__(self, filters_in, filters_out, kernel_size, stride=1, pad=0, dila=1, groups=1, bias=True, num_experts=1, norm=None, activate=None): + + super(Cond_Convolutional, self).__init__() + self.norm = norm + self.activate = activate + self.__conv = CondConv2d(in_channels=filters_in, out_channels=filters_out, kernel_size=kernel_size, + stride=stride, padding=pad, dilation=dila, groups=groups, bias=bias, num_experts=num_experts) + self.__routef = route_func(filters_in, num_experts) + if norm: + assert norm in norm_name.keys() + if norm == "bn": + self.__norm = norm_name[norm](num_features=filters_out) + if activate: + assert activate in activate_name.keys() + if activate == "leaky": + self.__activate = activate_name[activate](negative_slope=0.1, inplace=True) + if activate == "relu": + self.__activate = activate_name[activate](inplace=True) + if activate == "relu6": + self.__activate = activate_name[activate](inplace=True) + if activate == "Mish": + self.__activate = Mish() + if activate == "Swish": + self.__activate = Swish() + if activate == "MEMish": + self.__activate = MemoryEfficientMish() + if activate == "MESwish": + self.__activate = MemoryEfficientSwish() + if activate == "FReLu": + self.__activate = FReLU() + + def forward(self, x): + routef = self.__routef(x) + x = self.__conv(x,routef) + if self.norm: + x = self.__norm(x) + if self.activate: + x = self.__activate(x) + return x + + +class Dynamic_Convolutional(nn.Module): + def __init__(self, filters_in, filters_out, kernel_size, stride=1, pad=0, dila=1, groups=1, bias=True, K=4, temperature=34, norm=None, activate=None): + + super(Dynamic_Convolutional, self).__init__() + self.norm = norm + self.activate = activate + self.__conv = Dynamic_conv2d(in_planes=filters_in, out_planes=filters_out, kernel_size=kernel_size, + ratio=0.25, stride=stride, padding=pad, dilation=dila, groups=groups, bias=bias, K=K, temperature=temperature, init_weight=True) + if norm: + assert norm in norm_name.keys() + if norm == "bn": + self.__norm = norm_name[norm](num_features=filters_out) + if activate: + assert activate in activate_name.keys() + if activate == "leaky": + self.__activate = activate_name[activate](negative_slope=0.1, inplace=True) + if activate == "relu": + self.__activate = activate_name[activate](inplace=True) + if activate == "relu6": + self.__activate = activate_name[activate](inplace=True) + if activate == "Mish": + self.__activate = Mish() + if activate == "Swish": + self.__activate = Swish() + if activate == "MEMish": + self.__activate = MemoryEfficientMish() + if activate == "MESwish": + self.__activate = MemoryEfficientSwish() + if activate == "FReLu": + self.__activate = FReLU() + + def forward(self, x): + x = self.__conv(x) + if self.norm: + x = self.__norm(x) + if self.activate: + x = self.__activate(x) + return x diff --git a/modelR/layers/deconvolutions.py b/modelR/layers/deconvolutions.py new file mode 100644 index 0000000..7d8cc8c --- /dev/null +++ b/modelR/layers/deconvolutions.py @@ -0,0 +1,221 @@ +# +# Modified by Peize Sun +# Contact: sunpeize@foxmail.com +# +# Copyright (c) https://github.com/FateScript/CenterNet-better +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# + +import math +import torch +import torch.nn as nn +import fvcore.nn.weight_init as weight_init +from detectron2.layers import Conv2d, DeformConv, ModulatedDeformConv + + +class DCNDeconvLayer(nn.Module): + + def __init__(self, in_planes, out_planes, deconv_kernel,deconv_stride=2, deconv_pad=1,deconv_out_pad=0, modulate_deform=True, num_groups=1, deform_num_groups=1,dilation=1): + super(DCNDeconvLayer, self).__init__() + self.deform_modulated = modulate_deform + if modulate_deform: + deform_conv_op = ModulatedDeformConv + # offset channels are 2 or 3 (if with modulated) * kernel_size * kernel_size + offset_channels = 27 + else: + deform_conv_op = DeformConv + offset_channels = 18 + + self.dcn_offset = Conv2d( + in_planes, + offset_channels * deform_num_groups, + kernel_size=3, + stride=1, + padding=1 * dilation, + dilation=dilation + ) + self.dcn = deform_conv_op( + in_planes, + out_planes, + kernel_size=3, + stride=1, + padding=1 * dilation, + bias=False, + groups=num_groups, + dilation=dilation, + deformable_groups=deform_num_groups + ) + for layer in [self.dcn]: + weight_init.c2_msra_fill(layer) + + nn.init.constant_(self.dcn_offset.weight, 0) + nn.init.constant_(self.dcn_offset.bias, 0) + + self.dcn_bn = nn.BatchNorm2d(out_planes) + self.up_sample = nn.ConvTranspose2d( + in_channels=out_planes, + out_channels=out_planes, + kernel_size=deconv_kernel, + stride=deconv_stride, padding=deconv_pad, + output_padding=deconv_out_pad, + bias=False, + ) + self._deconv_init() + self.up_bn = nn.BatchNorm2d(out_planes) + self.relu = nn.ReLU() + + def forward(self, x): + out = x + if self.deform_modulated: + offset_mask = self.dcn_offset(out) + offset_x, offset_y, mask = torch.chunk(offset_mask, 3, dim=1) + offset = torch.cat((offset_x, offset_y), dim=1) + mask = mask.sigmoid() + out = self.dcn(out, offset, mask) + else: + offset = self.dcn_offset(out) + out = self.dcn(out, offset) + x = out + + x = self.dcn_bn(x) + x = self.relu(x) + x = self.up_sample(x) + x = self.up_bn(x) + x = self.relu(x) + return x + + def _deconv_init(self): + w = self.up_sample.weight.data + f = math.ceil(w.size(2) / 2) + c = (2 * f - 1 - f % 2) / (2. * f) + for i in range(w.size(2)): + for j in range(w.size(3)): + w[0, 0, i, j] = \ + (1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c)) + for c in range(1, w.size(0)): + w[c, 0, :, :] = w[0, 0, :, :] + + +class CNDeconvLayer(nn.Module): + + def __init__( + self, + in_planes, out_planes, deconv_kernel, + deconv_stride=2, deconv_pad=1, deconv_out_pad=0, + num_groups=1, dilation=1, + modulate_deform=True # not used + ): + super(CNDeconvLayer, self).__init__() + + self.conv = Conv2d( + in_planes, + out_planes, + kernel_size=3, + stride=1, + padding=1, + dilation=dilation, + ) + + for layer in [self.conv]: + weight_init.c2_msra_fill(layer) + + self.bn = nn.BatchNorm2d(out_planes) + self.up_sample = nn.ConvTranspose2d( + in_channels=out_planes, + out_channels=out_planes, + kernel_size=deconv_kernel, + stride=deconv_stride, padding=deconv_pad, + output_padding=deconv_out_pad, + bias=False, + ) + self._deconv_init() + self.up_bn = nn.BatchNorm2d(out_planes) + self.relu = nn.ReLU() + + def forward(self, x): + + x = self.conv(x) + x = self.bn(x) + x = self.relu(x) + x = self.up_sample(x) + x = self.up_bn(x) + x = self.relu(x) + + return x + + def _deconv_init(self): + w = self.up_sample.weight.data + f = math.ceil(w.size(2) / 2) + c = (2 * f - 1 - f % 2) / (2. * f) + for i in range(w.size(2)): + for j in range(w.size(3)): + w[0, 0, i, j] = \ + (1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c)) + for c in range(1, w.size(0)): + w[c, 0, :, :] = w[0, 0, :, :] + + +class CenternetDeconv(nn.Module): + """ + The head used in CenterNet for object classification and box regression. + It has three subnet, with a common structure but separate parameters. + """ + + def __init__(self, cfg, backbone_shape): + super(CenternetDeconv, self).__init__() + channels = cfg.MODEL.OneNet.DECONV_CHANNEL + deconv_kernel = cfg.MODEL.OneNet.DECONV_KERNEL + modulate_deform = cfg.MODEL.OneNet.MODULATE_DEFORM + in_features = cfg.MODEL.OneNet.IN_FEATURES + + if cfg.MODEL.OneNet.DCN: + DeconvLayer = DCNDeconvLayer + else: + DeconvLayer = CNDeconvLayer + + self.deconv1 = DeconvLayer( + channels[0], channels[1], + deconv_kernel=deconv_kernel[0], + modulate_deform=modulate_deform, + ) + + self.lateral_conv1 = nn.Conv2d( + backbone_shape[in_features[-2]].channels, + channels[1], + kernel_size=1, stride=1, padding=0) + + self.deconv2 = DeconvLayer( + channels[1], channels[2], + deconv_kernel=deconv_kernel[1], + modulate_deform=modulate_deform, + ) + self.lateral_conv2 = nn.Conv2d( + backbone_shape[in_features[-3]].channels, + channels[2], + kernel_size=1, stride=1, padding=0) + + self.deconv3 = DeconvLayer( + channels[2], channels[3], + deconv_kernel=deconv_kernel[2], + modulate_deform=modulate_deform, + ) + self.lateral_conv3 = nn.Conv2d( + backbone_shape[in_features[-4]].channels, + channels[3], + kernel_size=1, stride=1, padding=0) + + self.output_conv = nn.Conv2d( + channels[3], channels[3], + kernel_size=3, stride=1, padding=1) + + def forward(self, features_list): + + x = features_list[-1] + x = self.deconv1(x) + x = self.lateral_conv1(features_list[-2]) + x + x = self.deconv2(x) + x = self.lateral_conv2(features_list[-3]) + x + x = self.deconv3(x) + x = self.lateral_conv3(features_list[-4]) + x + x = self.output_conv(x) + return x \ No newline at end of file diff --git a/modelR/layers/deform_conv_v2.py b/modelR/layers/deform_conv_v2.py new file mode 100644 index 0000000..d233346 --- /dev/null +++ b/modelR/layers/deform_conv_v2.py @@ -0,0 +1,284 @@ +import torch +from torch import nn + +class DeformConv2d(nn.Module): + def __init__(self, inc, outc, kernel_size=3, padding=1, stride=1, bias=None, modulation=True): + """ + Args: + modulation (bool, optional): If True, Modulated Defomable Convolution (Deformable ConvNets v2). + """ + super(DeformConv2d, self).__init__() + self.kernel_size = kernel_size + self.padding = padding + self.stride = stride + self.zero_padding = nn.ZeroPad2d(padding) + self.conv = nn.Conv2d(inc, outc, kernel_size=kernel_size, stride=kernel_size, bias=bias) + + self.p_conv = nn.Conv2d(inc, 2*kernel_size*kernel_size, kernel_size=3, padding=1, stride=stride) + nn.init.constant_(self.p_conv.weight, 0) + self.p_conv.register_backward_hook(self._set_lr) + + self.modulation = modulation + if modulation: + self.m_conv = nn.Conv2d(inc, kernel_size*kernel_size, kernel_size=3, padding=1, stride=stride) + nn.init.constant_(self.m_conv.weight, 0) + self.m_conv.register_backward_hook(self._set_lr) + + @staticmethod + def _set_lr(module, grad_input, grad_output): + grad_input = (grad_input[i] * 0.1 for i in range(len(grad_input))) + grad_output = (grad_output[i] * 0.1 for i in range(len(grad_output))) + + def forward(self, x): + offset = self.p_conv(x) + if self.modulation: + m = torch.sigmoid(self.m_conv(x)) + + dtype = offset.data.type() + ks = self.kernel_size + N = offset.size(1) // 2 + + if self.padding: + x = self.zero_padding(x) + + # (b, 2N, h, w) + p = self._get_p(offset, dtype) + + # (b, h, w, 2N) + p = p.contiguous().permute(0, 2, 3, 1) + q_lt = p.detach().floor() + q_rb = q_lt + 1 + + q_lt = torch.cat([torch.clamp(q_lt[..., :N], 0, x.size(2)-1), torch.clamp(q_lt[..., N:], 0, x.size(3)-1)], dim=-1).long() + q_rb = torch.cat([torch.clamp(q_rb[..., :N], 0, x.size(2)-1), torch.clamp(q_rb[..., N:], 0, x.size(3)-1)], dim=-1).long() + q_lb = torch.cat([q_lt[..., :N], q_rb[..., N:]], dim=-1) + q_rt = torch.cat([q_rb[..., :N], q_lt[..., N:]], dim=-1) + + # clip p + p = torch.cat([torch.clamp(p[..., :N], 0, x.size(2)-1), torch.clamp(p[..., N:], 0, x.size(3)-1)], dim=-1) + + # bilinear kernel (b, h, w, N) + g_lt = (1 + (q_lt[..., :N].type_as(p) - p[..., :N])) * (1 + (q_lt[..., N:].type_as(p) - p[..., N:])) + g_rb = (1 - (q_rb[..., :N].type_as(p) - p[..., :N])) * (1 - (q_rb[..., N:].type_as(p) - p[..., N:])) + g_lb = (1 + (q_lb[..., :N].type_as(p) - p[..., :N])) * (1 - (q_lb[..., N:].type_as(p) - p[..., N:])) + g_rt = (1 - (q_rt[..., :N].type_as(p) - p[..., :N])) * (1 + (q_rt[..., N:].type_as(p) - p[..., N:])) + + # (b, c, h, w, N) + x_q_lt = self._get_x_q(x, q_lt, N) + x_q_rb = self._get_x_q(x, q_rb, N) + x_q_lb = self._get_x_q(x, q_lb, N) + x_q_rt = self._get_x_q(x, q_rt, N) + + # (b, c, h, w, N) + x_offset = g_lt.unsqueeze(dim=1) * x_q_lt + \ + g_rb.unsqueeze(dim=1) * x_q_rb + \ + g_lb.unsqueeze(dim=1) * x_q_lb + \ + g_rt.unsqueeze(dim=1) * x_q_rt + + # modulation + if self.modulation: + m = m.contiguous().permute(0, 2, 3, 1) + m = m.unsqueeze(dim=1) + m = torch.cat([m for _ in range(x_offset.size(1))], dim=1) + x_offset *= m + + x_offset = self._reshape_x_offset(x_offset, ks) + out = self.conv(x_offset) + + return out + + def _get_p_n(self, N, dtype): + p_n_x, p_n_y = torch.meshgrid( + torch.arange(-(self.kernel_size-1)//2, (self.kernel_size-1)//2+1), + torch.arange(-(self.kernel_size-1)//2, (self.kernel_size-1)//2+1)) + # (2N, 1) + p_n = torch.cat([torch.flatten(p_n_x), torch.flatten(p_n_y)], 0) + p_n = p_n.view(1, 2*N, 1, 1).type(dtype) + + return p_n + + def _get_p_0(self, h, w, N, dtype): + p_0_x, p_0_y = torch.meshgrid( + torch.arange(1, h*self.stride+1, self.stride), + torch.arange(1, w*self.stride+1, self.stride)) + p_0_x = torch.flatten(p_0_x).view(1, 1, h, w).repeat(1, N, 1, 1) + p_0_y = torch.flatten(p_0_y).view(1, 1, h, w).repeat(1, N, 1, 1) + p_0 = torch.cat([p_0_x, p_0_y], 1).type(dtype) + + return p_0 + + def _get_p(self, offset, dtype): + N, h, w = offset.size(1)//2, offset.size(2), offset.size(3) + + # (1, 2N, 1, 1) + p_n = self._get_p_n(N, dtype) + # (1, 2N, h, w) + p_0 = self._get_p_0(h, w, N, dtype) + p = p_0 + p_n + offset + return p + + def _get_x_q(self, x, q, N): + b, h, w, _ = q.size() + padded_w = x.size(3) + c = x.size(1) + # (b, c, h*w) + x = x.contiguous().view(b, c, -1) + + # (b, h, w, N) + index = q[..., :N]*padded_w + q[..., N:] # offset_x*w + offset_y + # (b, c, h*w*N) + index = index.contiguous().unsqueeze(dim=1).expand(-1, c, -1, -1, -1).contiguous().view(b, c, -1) + + x_offset = x.gather(dim=-1, index=index).contiguous().view(b, c, h, w, N) + + return x_offset + + @staticmethod + def _reshape_x_offset(x_offset, ks): + b, c, h, w, N = x_offset.size() + x_offset = torch.cat([x_offset[..., s:s+ks].contiguous().view(b, c, h, w*ks) for s in range(0, N, ks)], dim=-1) + x_offset = x_offset.contiguous().view(b, c, h*ks, w*ks) + + return x_offset + + +class DeformConv2d_offset(nn.Module): + def __init__(self, inc, outc, kernel_size=3, padding=1, stride=1, bias=None): + """ + Args: + modulation (bool, optional): If True, Modulated Defomable Convolution (Deformable ConvNets v2). + """ + super(DeformConv2d_offset, self).__init__() + self.kernel_size = kernel_size + self.padding = padding + self.stride = stride + self.zero_padding = nn.ZeroPad2d(padding) + self.conv = nn.Conv2d(inc, outc, kernel_size=kernel_size, stride=kernel_size, bias=bias) + + #self.p_conv = nn.Conv2d(inc, 2*kernel_size*kernel_size, kernel_size=3, padding=1, stride=stride) + #nn.init.constant_(self.p_conv.weight, 0) + #self.p_conv.register_backward_hook(self._set_lr) + + #self.modulation = modulation + #if modulation: + #self.m_conv = nn.Conv2d(inc, kernel_size*kernel_size, kernel_size=3, padding=1, stride=stride) + #nn.init.constant_(self.m_conv.weight, 0) + #self.m_conv.register_backward_hook(self._set_lr) + + @staticmethod + def _set_lr(module, grad_input, grad_output): + grad_input = (grad_input[i] * 0.1 for i in range(len(grad_input))) + grad_output = (grad_output[i] * 0.1 for i in range(len(grad_output))) + + def forward(self, x, offset, modulation): + #offset = self.p_conv(x) + #if self.modulation: + #m = torch.sigmoid(self.m_conv(x)) + m = modulation + #print("aaaaaaaaaaaaaa",offset.shape,m.shape) + dtype = offset.data.type() + ks = self.kernel_size + N = offset.size(1) // 2 + + if self.padding: + x = self.zero_padding(x) + + # (b, 2N, h, w) + p = self._get_p(offset, dtype) + + # (b, h, w, 2N) + p = p.contiguous().permute(0, 2, 3, 1) + q_lt = p.detach().floor() + q_rb = q_lt + 1 + + q_lt = torch.cat([torch.clamp(q_lt[..., :N], 0, x.size(2)-1), torch.clamp(q_lt[..., N:], 0, x.size(3)-1)], dim=-1).long() + q_rb = torch.cat([torch.clamp(q_rb[..., :N], 0, x.size(2)-1), torch.clamp(q_rb[..., N:], 0, x.size(3)-1)], dim=-1).long() + q_lb = torch.cat([q_lt[..., :N], q_rb[..., N:]], dim=-1) + q_rt = torch.cat([q_rb[..., :N], q_lt[..., N:]], dim=-1) + + # clip p + p = torch.cat([torch.clamp(p[..., :N], 0, x.size(2)-1), torch.clamp(p[..., N:], 0, x.size(3)-1)], dim=-1) + + # bilinear kernel (b, h, w, N) + g_lt = (1 + (q_lt[..., :N].type_as(p) - p[..., :N])) * (1 + (q_lt[..., N:].type_as(p) - p[..., N:])) + g_rb = (1 - (q_rb[..., :N].type_as(p) - p[..., :N])) * (1 - (q_rb[..., N:].type_as(p) - p[..., N:])) + g_lb = (1 + (q_lb[..., :N].type_as(p) - p[..., :N])) * (1 - (q_lb[..., N:].type_as(p) - p[..., N:])) + g_rt = (1 - (q_rt[..., :N].type_as(p) - p[..., :N])) * (1 + (q_rt[..., N:].type_as(p) - p[..., N:])) + + # (b, c, h, w, N) + x_q_lt = self._get_x_q(x, q_lt, N) + x_q_rb = self._get_x_q(x, q_rb, N) + x_q_lb = self._get_x_q(x, q_lb, N) + x_q_rt = self._get_x_q(x, q_rt, N) + + # (b, c, h, w, N) + x_offset = g_lt.unsqueeze(dim=1) * x_q_lt + \ + g_rb.unsqueeze(dim=1) * x_q_rb + \ + g_lb.unsqueeze(dim=1) * x_q_lb + \ + g_rt.unsqueeze(dim=1) * x_q_rt + + # modulation + #if self.modulation: + m = m.contiguous().permute(0, 2, 3, 1) + m = m.unsqueeze(dim=1) + m = torch.cat([m for _ in range(x_offset.size(1))], dim=1) + x_offset *= m + + x_offset = self._reshape_x_offset(x_offset, ks) + out = self.conv(x_offset) + + return out + + def _get_p_n(self, N, dtype): + p_n_x, p_n_y = torch.meshgrid( + torch.arange(-(self.kernel_size-1)//2, (self.kernel_size-1)//2+1), + torch.arange(-(self.kernel_size-1)//2, (self.kernel_size-1)//2+1)) + # (2N, 1) + p_n = torch.cat([torch.flatten(p_n_x), torch.flatten(p_n_y)], 0) + p_n = p_n.view(1, 2*N, 1, 1).type(dtype) + + return p_n + + def _get_p_0(self, h, w, N, dtype): + p_0_x, p_0_y = torch.meshgrid( + torch.arange(1, h*self.stride+1, self.stride), + torch.arange(1, w*self.stride+1, self.stride)) + p_0_x = torch.flatten(p_0_x).view(1, 1, h, w).repeat(1, N, 1, 1) + p_0_y = torch.flatten(p_0_y).view(1, 1, h, w).repeat(1, N, 1, 1) + p_0 = torch.cat([p_0_x, p_0_y], 1).type(dtype) + + return p_0 + + def _get_p(self, offset, dtype): + N, h, w = offset.size(1)//2, offset.size(2), offset.size(3) + + # (1, 2N, 1, 1) + p_n = self._get_p_n(N, dtype) + # (1, 2N, h, w) + p_0 = self._get_p_0(h, w, N, dtype) + p = p_0 + p_n + offset + return p + + def _get_x_q(self, x, q, N): + b, h, w, _ = q.size() + padded_w = x.size(3) + c = x.size(1) + # (b, c, h*w) + x = x.contiguous().view(b, c, -1) + + # (b, h, w, N) + index = q[..., :N]*padded_w + q[..., N:] # offset_x*w + offset_y + # (b, c, h*w*N) + index = index.contiguous().unsqueeze(dim=1).expand(-1, c, -1, -1, -1).contiguous().view(b, c, -1) + + x_offset = x.gather(dim=-1, index=index).contiguous().view(b, c, h, w, N) + + return x_offset + + @staticmethod + def _reshape_x_offset(x_offset, ks): + b, c, h, w, N = x_offset.size() + x_offset = torch.cat([x_offset[..., s:s+ks].contiguous().view(b, c, h, w*ks) for s in range(0, N, ks)], dim=-1) + x_offset = x_offset.contiguous().view(b, c, h*ks, w*ks) + + return x_offset \ No newline at end of file diff --git a/modelR/layers/msr_blocks.py b/modelR/layers/msr_blocks.py new file mode 100644 index 0000000..31756e4 --- /dev/null +++ b/modelR/layers/msr_blocks.py @@ -0,0 +1,158 @@ +import torch +import torch.nn as nn +from dropblock import DropBlock2D, LinearScheduler +from ..layers.convolutions import * + +class MSR_Convset_L(nn.Module): + def __init__(self, filters_in): + super(MSR_Convset_L, self).__init__() + self.__dw0 = Convolutional(filters_in=filters_in, filters_out=filters_in*2, kernel_size=3, + stride=1, pad=1, norm="bn", activate="leaky") + self.__pw0 = Convolutional(filters_in=filters_in*2, filters_out=filters_in, kernel_size=1, + stride=1, pad=0, norm="bn", activate="leaky") + self.__dw1 = Convolutional(filters_in=filters_in, filters_out=filters_in, kernel_size=3, + stride=1, pad=2, dila=2, norm="bn", activate="leaky") + self.__dw2 = Convolutional(filters_in=filters_in, filters_out=filters_in, kernel_size=3, + stride=1, pad=4, dila=4, norm="bn", activate="leaky") + self.__dw3 = Convolutional(filters_in=filters_in, filters_out=filters_in, kernel_size=3, + stride=1, pad=6, dila=6, norm="bn", activate="leaky") + self.__pw1 = Convolutional(filters_in=filters_in*4, filters_out=filters_in, kernel_size=1, + stride=1, pad=0, norm="bn", activate="Mish") + self.__drop = LinearScheduler(DropBlock2D(block_size=3, drop_prob=0.1), start_value=0., + stop_value=0.1, nr_steps=5) + + def forward(self, x): + dw0 = self.__dw0(x) + dw0 = self.__drop(dw0) + pw0 = self.__pw0(dw0) + dw1 = self.__dw1(pw0) + dw2 = self.__dw2(pw0)+dw1 + dw3 = self.__dw3(pw0)+dw2 + cat = torch.cat((pw0, dw1, dw2, dw3),1) + pw1 = self.__pw1(cat) + return pw1 + +class MSR_Convset_M(nn.Module): + def __init__(self, filters_in): + super(MSR_Convset_M, self).__init__() + self.__dw0 = Convolutional(filters_in=filters_in, filters_out=filters_in*2, kernel_size=3, + stride=1, pad=1, norm="bn", activate="leaky") + self.__pw0 = Convolutional(filters_in=filters_in*2, filters_out=filters_in, kernel_size=1, + stride=1, pad=0, norm="bn", activate="leaky") + self.__dw1 = Convolutional(filters_in=filters_in, filters_out=filters_in, kernel_size=3, + stride=1, pad=1, dila=1, norm="bn", activate="leaky") + self.__dw2 = Convolutional(filters_in=filters_in, filters_out=filters_in, kernel_size=3, + stride=1, pad=2, dila=2, norm="bn", activate="leaky") + self.__pw1 = Convolutional(filters_in=filters_in*2, filters_out=filters_in, kernel_size=1, + stride=1, pad=0, norm="bn", activate="Mish") + self.__drop = LinearScheduler(DropBlock2D(block_size=3, drop_prob=0.1), start_value=0., + stop_value=0.1, nr_steps=5) + + def forward(self, x): + dw0 = self.__dw0(x) + dw0 = self.__drop(dw0) + pw0 = self.__pw0(dw0) + dw1 = self.__dw1(pw0) + dw2 = self.__dw2(pw0)+dw1 + cat = torch.cat((dw1, dw2),1) + pw1 = self.__pw1(cat) + return pw1 + +class MSR_Convset_S(nn.Module): + def __init__(self, filters_in): + super(MSR_Convset_S, self).__init__() + self.__dw0 = Convolutional(filters_in=filters_in, filters_out=filters_in*2, kernel_size=3, stride=1, pad=1, norm="bn", activate="leaky") + #self.__dw0 = Directional_Dynamic_Convolutional(filters_in=filters_in, filters_out=filters_in*2, kernel_size=3, stride=1, pad=1, dila=1, groups=1, bias=False, type='tri_sw', norm="bn", activate='leaky') + + self.__pw0 = Convolutional(filters_in=filters_in*2, filters_out=filters_in, kernel_size=1, stride=1, + pad=0, norm="bn", activate="leaky") + self.__dw1 = Convolutional(filters_in=filters_in, filters_out=filters_in*2, kernel_size=3, stride=1, + pad=1, dila=1, norm="bn", activate="leaky") + self.__pw1 = Convolutional(filters_in=filters_in*2, filters_out=filters_in, kernel_size=1, stride=1, + pad=0, norm="bn", activate="leaky") + self.__drop = LinearScheduler(DropBlock2D(block_size=3, drop_prob=0.1), start_value=0., stop_value=0.1, nr_steps=5) + + def forward(self, x): + dw0 = self.__dw0(x) + dw0 = self.__drop(dw0) + pw0 = self.__pw0(dw0) + dw1 = self.__dw1(pw0) + pw1 = self.__pw1(dw1) + return pw1 + +class MSR_Convset_L_R(nn.Module): + def __init__(self, filters_in): + super(MSR_Convset_L_R, self).__init__() + self.__dw0 = Directional_Dynamic_Convolutional(filters_in=filters_in, filters_out=filters_in*2, kernel_size=3, stride=1, + pad=1, dila=1, groups=2, bias=False, type='tri', norm="bn", activate='leaky') + self.__pw0 = Convolutional(filters_in=filters_in*2, filters_out=filters_in, kernel_size=1, + stride=1, pad=0, norm="bn", activate="leaky") + self.__dw1 = Directional_Dynamic_Convolutional(filters_in=filters_in, filters_out=filters_in, kernel_size=3, stride=1, + pad=2, dila=2, groups=2, bias=False, type='tri', norm="bn", activate='leaky') + self.__dw2 = Directional_Dynamic_Convolutional(filters_in=filters_in, filters_out=filters_in, kernel_size=3, stride=1, + pad=4, dila=4, groups=2, bias=False, type='tri', norm="bn", activate='leaky') + self.__dw3 = Directional_Dynamic_Convolutional(filters_in=filters_in, filters_out=filters_in, kernel_size=3, stride=1, + pad=6, dila=6, groups=2, bias=False, type='tri', norm="bn", activate='leaky') + self.__pw1 = Convolutional(filters_in=filters_in*4, filters_out=filters_in, kernel_size=1, + stride=1, pad=0, norm="bn", activate="MEMish") + self.__drop = LinearScheduler(DropBlock2D(block_size=3, drop_prob=0.1), start_value=0., + stop_value=0.1, nr_steps=5) + + def forward(self, x): + dw0 = self.__dw0(x) + dw0 = self.__drop(dw0) + pw0 = self.__pw0(dw0) + dw1 = self.__dw1(pw0) + dw2 = self.__dw2(pw0)+dw1 + dw3 = self.__dw3(pw0)+dw2 + cat = torch.cat((pw0, dw1, dw2, dw3),1) + pw1 = self.__pw1(cat) + return pw1 + +class MSR_Convset_M_R(nn.Module): + def __init__(self, filters_in): + super(MSR_Convset_M_R, self).__init__() + self.__dw0 = Directional_Dynamic_Convolutional(filters_in=filters_in, filters_out=filters_in*2, kernel_size=3, stride=1, + pad=1, dila=1, groups=2, bias=False, type='tri', norm="bn", activate='leaky') + self.__pw0 = Convolutional(filters_in=filters_in*2, filters_out=filters_in, kernel_size=1, + stride=1, pad=0, norm="bn", activate="leaky") + self.__dw1 = Directional_Dynamic_Convolutional(filters_in=filters_in, filters_out=filters_in, kernel_size=3, stride=1, + pad=1, dila=1, groups=2, bias=False, type='tri', norm="bn", activate='leaky') + self.__dw2 = Directional_Dynamic_Convolutional(filters_in=filters_in, filters_out=filters_in, kernel_size=3, stride=1, + pad=2, dila=2, groups=2, bias=False, type='tri', norm="bn", activate='leaky') + self.__pw1 = Convolutional(filters_in=filters_in*2, filters_out=filters_in, kernel_size=1, + stride=1, pad=0, norm="bn", activate="MEMish") + self.__drop = LinearScheduler(DropBlock2D(block_size=3, drop_prob=0.1), start_value=0., + stop_value=0.1, nr_steps=5) + + def forward(self, x): + dw0 = self.__dw0(x) + dw0 = self.__drop(dw0) + pw0 = self.__pw0(dw0) + dw1 = self.__dw1(pw0) + dw2 = self.__dw2(pw0)+dw1 + cat = torch.cat((dw1, dw2),1) + pw1 = self.__pw1(cat) + return pw1 + +class MSR_Convset_S_R(nn.Module): + def __init__(self, filters_in): + super(MSR_Convset_S_R, self).__init__() + #self.__dw0 = Convolutional(filters_in=filters_in, filters_out=filters_in*2, kernel_size=3, stride=1, pad=1, norm="bn", activate="leaky") + self.__dw0 = Directional_Dynamic_Convolutional(filters_in=filters_in, filters_out=filters_in*2, kernel_size=3, stride=1, + pad=1, dila=1, groups=2, bias=False, type='tri', norm="bn", activate='leaky') + self.__pw0 = Convolutional(filters_in=filters_in*2, filters_out=filters_in, kernel_size=1, stride=1, + pad=0, norm="bn", activate="leaky") + self.__dw1 = Directional_Dynamic_Convolutional(filters_in=filters_in, filters_out=filters_in*2, kernel_size=3, stride=1, + pad=1, dila=1, groups=2, bias=False, type='tri', norm="bn", activate='leaky') + self.__pw1 = Convolutional(filters_in=filters_in*2, filters_out=filters_in, kernel_size=1, stride=1, + pad=0, norm="bn", activate="leaky") + self.__drop = LinearScheduler(DropBlock2D(block_size=3, drop_prob=0.1), start_value=0., stop_value=0.1, nr_steps=5) + + def forward(self, x): + dw0 = self.__dw0(x) + dw0 = self.__drop(dw0) + pw0 = self.__pw0(dw0) + dw1 = self.__dw1(pw0) + pw1 = self.__pw1(dw1) + return pw1 \ No newline at end of file diff --git a/modelR/layers/multiscale_fusion_blocks.py b/modelR/layers/multiscale_fusion_blocks.py new file mode 100644 index 0000000..f775525 --- /dev/null +++ b/modelR/layers/multiscale_fusion_blocks.py @@ -0,0 +1,279 @@ +import torch +import torch.nn as nn +from ..layers.convolutions import Convolutional, Separable_Conv_dila, Separable_Conv, Deformable_Convolutional +import torch.nn.functional as F +from ..layers.attention_blocks import SELayer + +class SPP(nn.Module): + def __init__(self, depth=512): + super(SPP,self).__init__() + self.__maxpool5 = nn.MaxPool2d(kernel_size=5, stride=1, padding=2) + self.__maxpool9 = nn.MaxPool2d(kernel_size=9, stride=1, padding=4) + self.__maxpool13 = nn.MaxPool2d(kernel_size=13, stride=1, padding=6) + self.__outconv = nn.Conv2d(depth * 4, depth, 1, 1) + + def forward(self, x): + maxpool5 = self.__maxpool5(x) + maxpool9 = self.__maxpool9(x) + maxpool13 = self.__maxpool13(x) + cat_maxpool = torch.cat([x, maxpool5, maxpool9, maxpool13], dim=1) + SPP = self.__outconv(cat_maxpool) + return SPP + +class SPP_rec(nn.Module): + def __init__(self, depth=512): + super(SPP_rec,self).__init__() + self.__maxpool5 = nn.MaxPool2d(kernel_size=5, stride=1, padding=2) + self.__maxpool9 = nn.MaxPool2d(kernel_size=9, stride=1, padding=4) + self.__maxpool13 = nn.MaxPool2d(kernel_size=13, stride=1, padding=6) + + self.__maxpool5w = nn.MaxPool2d(kernel_size=(5,1), stride=1, padding=(2,0)) + self.__maxpool5h = nn.MaxPool2d(kernel_size=(1,5), stride=1, padding=(0,2)) + self.__maxpool9w = nn.MaxPool2d(kernel_size=(9,3), stride=1, padding=(4,1)) + self.__maxpool9h = nn.MaxPool2d(kernel_size=(3,9), stride=1, padding=(1,4)) + self.__maxpool13w = nn.MaxPool2d(kernel_size=(13,5), stride=1, padding=(6,2)) + self.__maxpool13h = nn.MaxPool2d(kernel_size=(5,13), stride=1, padding=(2,6)) + + self.__outconv = nn.Conv2d(depth * 4, depth, 1, 1) + + def forward(self, x): + maxpool5 = self.__maxpool5(x) + self.__maxpool5h(x) + self.__maxpool5w(x) + maxpool9 = self.__maxpool9(x) + self.__maxpool9h(x) + self.__maxpool9w(x) + maxpool13 = self.__maxpool13(x) + self.__maxpool13h(x) + self.__maxpool13w(x) + cat_maxpool = torch.cat([x, maxpool5, maxpool9, maxpool13], dim=1) + SPP_rec = self.__outconv(cat_maxpool) + return SPP_rec + +class ASPP_se(nn.Module): + def __init__(self, in_channel=1280, depth=512): + super(ASPP_se,self).__init__() + self.__dilaconv1 = nn.Conv2d(in_channel, depth, 1, 1) + self.__dilaconv5 = nn.Conv2d(in_channel, depth, 3, 1, padding=2, dilation=2) + self.__dilaconv9 = nn.Conv2d(in_channel, depth, 3, 1, padding=4, dilation=4) + self.__dilaconv13 = nn.Conv2d(in_channel, depth, 3, 1, padding=6, dilation=6) + self.__outconv = nn.Conv2d(depth * 4, depth, 1, 1) + self.__se = SELayer(depth) + + + def forward(self, x): + dilaconv1 = self.__dilaconv1(x) + dilaconv5 = self.__dilaconv5(x) + dilaconv9 = self.__dilaconv9(x) + dilaconv13 = self.__dilaconv13(x) + cat_dilaconv = torch.cat([dilaconv1, dilaconv5, dilaconv9, dilaconv13], dim=1) + ASPP_se = self.__se(self.__outconv(cat_dilaconv)) + return ASPP_se + +class ASPP(nn.Module): + def __init__(self, in_channel=1280, depth=512): + super(ASPP,self).__init__() + self.__dilaconv1 = nn.Conv2d(in_channel, depth, 1, 1) + self.__dilaconv5 = nn.Conv2d(in_channel, depth, 3, 1, padding=2, dilation=2) + self.__dilaconv9 = nn.Conv2d(in_channel, depth, 3, 1, padding=4, dilation=4) + self.__dilaconv13 = nn.Conv2d(in_channel, depth, 3, 1, padding=6, dilation=6) + self.__outconv = nn.Conv2d(depth * 4, depth, 1, 1) + + def forward(self, x): + dilaconv1 = self.__dilaconv1(x) + dilaconv5 = self.__dilaconv5(x) + dilaconv9 = self.__dilaconv9(x) + dilaconv13 = self.__dilaconv13(x) + cat_dilaconv = torch.cat([dilaconv1, dilaconv5, dilaconv9, dilaconv13], dim=1) + ASPP = self.__outconv(cat_dilaconv) + return ASPP + +class Sparable_ASPP(nn.Module): + def __init__(self, in_channel=1280, depth=512): + super(Sparable_ASPP,self).__init__() + self.__dilaconv1 = nn.Conv2d(in_channel, depth, 1, 1) + self.__dilaconv5 = Separable_Conv_dila(in_channel, depth, 1, pad=2, dila=2) + self.__dilaconv9 = Separable_Conv_dila(in_channel, depth, 1, pad=4, dila=4) + self.__dilaconv13 = Separable_Conv_dila(in_channel, depth, 1, pad=6, dila=6) + self.__outconv = nn.Conv2d(depth * 4, depth, 1, 1) + + def forward(self, x): + dilaconv1 = self.__dilaconv1(x) + dilaconv5 = self.__dilaconv5(x) + dilaconv9 = self.__dilaconv9(x) + dilaconv13 = self.__dilaconv13(x) + cat_dilaconv = torch.cat([dilaconv1, dilaconv5, dilaconv9, dilaconv13], dim=1) + ASPP = self.__outconv(cat_dilaconv) + return ASPP + +class Sparable_ASPP_se(nn.Module): + def __init__(self, in_channel=1024, depth=512): + super(Sparable_ASPP_se,self).__init__() + self.__dilaconv1 = Separable_Conv(in_channel, depth, 1) + self.__dilaconv5 = Separable_Conv_dila(depth, depth, 1, pad=2, dila=2) + self.__dilaconv9 = Separable_Conv_dila(depth, depth//2, 1, pad=4, dila=4) + self.__dilaconv13 = Separable_Conv_dila(depth, depth//2, 1, pad=6, dila=6) + self.__outconv = nn.Conv2d(depth * 3, depth, 1, 1) + #self.__outconv = Convolutional(filters_in=depth * 3, filters_out=depth, kernel_size=1, stride=1, pad=0, norm='bn', activate='leaky') + self.__se = SELayer(depth) + + def forward(self, x): + dilaconv1 = self.__dilaconv1(x) + dilaconv5 = self.__dilaconv5(dilaconv1) + dilaconv9 = self.__dilaconv9(dilaconv1) + dilaconv13 = self.__dilaconv13(dilaconv1) + cat_dilaconv = torch.cat([dilaconv1, dilaconv5, dilaconv9, dilaconv13], dim=1) + ASPP_se = self.__se(self.__outconv(cat_dilaconv)) + #ASPP_se = self.__outconv(cat_dilaconv) + return ASPP_se + +class ASFF(nn.Module): + def __init__(self, level, vis=False): + super(ASFF, self).__init__() + self.level = level + self.dim = [512,256,128] + self.inter_dim = self.dim[self.level] + if level == 0: + self.stride_level_1 = Convolutional(256, self.inter_dim, 3, 2, pad=1, norm='bn', activate='relu6') + self.stride_level_2 = Convolutional(128, self.inter_dim, 3, 2, pad=1, norm='bn', activate='relu6') + self.expand = Convolutional(self.inter_dim, 1024, 3, 1, pad=1, norm='bn', activate='relu6') + elif level == 1: + self.compress_level_0 = Convolutional(512, self.inter_dim, 1, 1, pad=0, norm='bn', activate='relu6') + self.stride_level_2 = Convolutional(128, self.inter_dim, 3, 2, pad=1, norm='bn', activate='relu6') + self.expand = Convolutional(self.inter_dim, 512, 3, 1, pad=1, norm='bn', activate='relu6') + elif level == 2: + self.compress_level_0 = Convolutional(512, self.inter_dim, 1, 1, pad=0, norm='bn', activate='relu6') + self.compress_level_1 = Convolutional(256, self.inter_dim, 1, 1, pad=0, norm='bn', activate='relu6') + self.expand = Convolutional(self.inter_dim, 256, 3, 1, pad=1, norm='bn', activate='relu6') + compress_c = 16 + self.weight_level_0 = Convolutional(self.inter_dim, compress_c, 1, 1, pad=0, norm='bn', activate='relu6') + self.weight_level_1 = Convolutional(self.inter_dim, compress_c, 1, 1, pad=0, norm='bn', activate='relu6') + self.weight_level_2 = Convolutional(self.inter_dim, compress_c, 1, 1, pad=0, norm='bn', activate='relu6') + self.weight_levels = nn.Conv2d(compress_c * 3, 3, kernel_size=1, stride=1, padding=0) + self.vis = vis + + def forward(self, x_level_0, x_level_1, x_level_2): + if self.level == 0: + level_0_resized = x_level_0 + level_1_resized = self.stride_level_1(x_level_1) + level_2_downsampled_inter = F.max_pool2d(x_level_2, 3, stride=2, padding=1) + level_2_resized = self.stride_level_2(level_2_downsampled_inter) + elif self.level == 1: + level_0_compressed = self.compress_level_0(x_level_0) + level_0_resized = F.interpolate(level_0_compressed, scale_factor=2, mode='nearest') + level_1_resized = x_level_1 + level_2_resized = self.stride_level_2(x_level_2) + elif self.level == 2: + level_0_compressed = self.compress_level_0(x_level_0) + level_0_resized = F.interpolate(level_0_compressed, scale_factor=4, mode='nearest') + level_1_compressed = self.compress_level_1(x_level_1) + level_1_resized = F.interpolate(level_1_compressed, scale_factor=2, mode='nearest') + level_2_resized = x_level_2 + + level_0_weight_v = self.weight_level_0(level_0_resized) + level_1_weight_v = self.weight_level_1(level_1_resized) + level_2_weight_v = self.weight_level_2(level_2_resized) + levels_weight_v = torch.cat((level_0_weight_v, level_1_weight_v, level_2_weight_v), 1) + levels_weight = self.weight_levels(levels_weight_v) + levels_weight = F.softmax(levels_weight, dim=1) + + fused_out_reduced = level_0_resized * levels_weight[:, 0:1, :, :] + \ + level_1_resized * levels_weight[:, 1:2, :, :] + \ + level_2_resized * levels_weight[:, 2:, :, :] + + out = self.expand(fused_out_reduced) + + if self.vis: + return out, levels_weight, fused_out_reduced.sum(dim=1) + else: + return out + +class ASFF_Mobile(nn.Module): + def __init__(self, level, vis=False): + super(ASFF_Mobile, self).__init__() + self.level = level + self.dim = [512,256,128] + self.inter_dim = self.dim[self.level] + if level == 0: + self.stride_level_1 = Separable_Conv(256, self.inter_dim, 2) + self.stride_level_2 = Separable_Conv(128, self.inter_dim, 2) + self.expand = Separable_Conv(self.inter_dim, 1024, 1) + elif level == 1: + self.compress_level_0 = Convolutional(512, self.inter_dim, 1, 1, pad=0, norm='bn', activate='relu6') + self.stride_level_2 = Separable_Conv(128, self.inter_dim, 2) + self.expand = Separable_Conv(self.inter_dim, 512, 1) + elif level == 2: + self.compress_level_0 = Convolutional(512, self.inter_dim, 1, 1, pad=0, norm='bn', activate='relu6') + self.compress_level_1 = Convolutional(256, self.inter_dim, 1, 1, pad=0, norm='bn', activate='relu6') + self.expand = Separable_Conv(self.inter_dim, 256, 1) + compress_c = 16 + self.weight_level_0 = Convolutional(self.inter_dim, compress_c, 1, 1, pad=0, norm='bn', activate='relu6') + self.weight_level_1 = Convolutional(self.inter_dim, compress_c, 1, 1, pad=0, norm='bn', activate='relu6') + self.weight_level_2 = Convolutional(self.inter_dim, compress_c, 1, 1, pad=0, norm='bn', activate='relu6') + self.weight_levels = nn.Conv2d(compress_c * 3, 3, kernel_size=1, stride=1, padding=0) + self.vis = vis + + def forward(self, x_level_0, x_level_1, x_level_2): + if self.level == 0: + level_0_resized = x_level_0 + level_1_resized = self.stride_level_1(x_level_1) + level_2_downsampled_inter = F.max_pool2d(x_level_2, 3, stride=2, padding=1) + level_2_resized = self.stride_level_2(level_2_downsampled_inter) + elif self.level == 1: + level_0_compressed = self.compress_level_0(x_level_0) + level_0_resized = F.interpolate(level_0_compressed, scale_factor=2, mode='nearest') + level_1_resized = x_level_1 + level_2_resized = self.stride_level_2(x_level_2) + elif self.level == 2: + level_0_compressed = self.compress_level_0(x_level_0) + level_0_resized = F.interpolate(level_0_compressed, scale_factor=4, mode='nearest') + level_1_compressed = self.compress_level_1(x_level_1) + level_1_resized = F.interpolate(level_1_compressed, scale_factor=2, mode='nearest') + level_2_resized = x_level_2 + + level_0_weight_v = self.weight_level_0(level_0_resized) + level_1_weight_v = self.weight_level_1(level_1_resized) + level_2_weight_v = self.weight_level_2(level_2_resized) + levels_weight_v = torch.cat((level_0_weight_v, level_1_weight_v, level_2_weight_v), 1) + levels_weight = self.weight_levels(levels_weight_v) + levels_weight = F.softmax(levels_weight, dim=1) + + fused_out_reduced = level_0_resized * levels_weight[:, 0:1, :, :] + \ + level_1_resized * levels_weight[:, 1:2, :, :] + \ + level_2_resized * levels_weight[:, 2:, :, :] + + out = self.expand(fused_out_reduced) + + if self.vis: + return out, levels_weight, fused_out_reduced.sum(dim=1) + else: + return out + +class FeatureAdaption(nn.Module): + def __init__(self, in_ch, out_ch, n_anchors): + super(FeatureAdaption, self).__init__() + self.sep=False + self.conv_offset = nn.Conv2d(in_channels=2*n_anchors, out_channels=2*9*n_anchors, groups = n_anchors, kernel_size=1,stride=1,padding=0) + self.dconv = Deformable_Convolutional(filters_in=in_ch, filters_out=out_ch, kernel_size=3, stride=1, pad=1, groups=n_anchors) + + def forward(self, input, wh_pred): + wh_pred_new = wh_pred.detach() + offset = self.conv_offset(wh_pred_new) + out = self.dconv(input, offset) + return out + +class Features_Fusion(nn.Module): + def __init__(self, in_channels, out_channels, r=16): + super(Features_Fusion,self).__init__() + self.out_channels = out_channels + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.conv_fc1 = Convolutional(in_channels, in_channels // r, kernel_size=1, stride=1, pad=0, norm='bn', activate='leaky') + self.conv_fc2 = nn.Conv2d(in_channels // r, out_channels * 2, kernel_size=1, padding=0, bias=False) + self.softmax = nn.Softmax(dim=2) + + + def forward(self, x1, x2): + batch_size = x1.size(0) + x_mix = torch.add(x1,x2) # 逐元素相加生成 混合特征U + x_avg = self.avg_pool(x_mix) + x_fcout = self.conv_fc2(self.conv_fc1(x_avg)) # 先降维,后升维,结果中前一半通道值为a,后一半为b + x_reshape = x_fcout.reshape(batch_size, self.out_channels, 2, -1) # 调整形状,变为两个全连接层的值 + x_softmax = self.softmax(x_reshape) # 使得两个全连接层对应位置进行softmax + w1 = x_softmax[:, :, 0:1,:] #将tensor按照指定维度切分成2个tensor块 + w2 = x_softmax[:, :, 1:2,:] + out = x1*w1 + x2*w2 # 两个加权后的特征 逐元素相加 + return out \ No newline at end of file diff --git a/modelR/layers/np_attention_blocks.py b/modelR/layers/np_attention_blocks.py new file mode 100644 index 0000000..75a8978 --- /dev/null +++ b/modelR/layers/np_attention_blocks.py @@ -0,0 +1,145 @@ +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd import Variable +from ..layers.convolutions import Convolutional + +def sobel_kernel(channel_in, channel_out, theta): + sobel_kernel0 = np.array([[1, 2, 1], [0, 0, 0], [-1, -2, -1]], dtype='float32') + sobel_kernel0 = sobel_kernel0.reshape((1, 1, 3, 3)) + sobel_kernel0 = Variable(torch.from_numpy(sobel_kernel0)) + sobel_kernel0 = sobel_kernel0.repeat(channel_out, channel_in, 1, 1).float() + sobel_kernel0 = sobel_kernel0.cuda()*theta.view(-1, 1, 1, 1).cuda() + + sobel_kernel45 = np.array([[2, 1, 0], [1, 0, -1], [0, -1, -2]], dtype='float32') + sobel_kernel45 = sobel_kernel45.reshape((1, 1, 3, 3)) + sobel_kernel45 = Variable(torch.from_numpy(sobel_kernel45)) + sobel_kernel45 = sobel_kernel45.repeat(channel_out, channel_in, 1, 1).float() + sobel_kernel45 = sobel_kernel45.cuda()*theta.view(-1, 1, 1, 1).cuda() + + sobel_kernel90 = np.array([[1, 0, -1], [2, 0, -2], [1, 0, -1]], dtype='float32') + sobel_kernel90 = sobel_kernel90.reshape((1, 1, 3, 3)) + sobel_kernel90 = Variable(torch.from_numpy(sobel_kernel90)) + sobel_kernel90 = sobel_kernel90.repeat(channel_out, channel_in, 1, 1).float() + sobel_kernel90 = sobel_kernel90.cuda()*theta.view(-1, 1, 1, 1).cuda() + + sobel_kernel135 = np.array([[0, -1, -2], [1, 0, -1], [2, 1, 0]], dtype='float32') + sobel_kernel135 = sobel_kernel135.reshape((1, 1, 3, 3)) + sobel_kernel135 = Variable(torch.from_numpy(sobel_kernel135)) + sobel_kernel135 = sobel_kernel135.repeat(channel_out, channel_in, 1, 1).float() + sobel_kernel135 = sobel_kernel135.cuda()*theta.view(-1, 1, 1, 1).cuda() + + return sobel_kernel0, sobel_kernel45, sobel_kernel90, sobel_kernel135 + +class Sobel_conv(nn.Module): + def __init__(self, channel_in, channel_out, alpha=0.5, sigma=4, stride=1, padding=1): + super(Sobel_conv, self).__init__() + self.channel_in = channel_in + self.channel_out = channel_out + self.stride = stride + self.padding = padding + self.sigma = sigma + self.alpha = alpha + self.__conv_weight = Convolutional(channel_out * 4, 4, kernel_size=1, stride=1, pad=0, norm='bn', activate='leaky') + self.theta = nn.Parameter(torch.sigmoid(torch.randn(channel_out) * 1.0) + self.alpha, requires_grad=True) + + def forward(self, x): + # [channel_out, channel_in, kernel, kernel] + kernel0, kernel45, kernel90, kernel135 = sobel_kernel(self.channel_in, self.channel_out, self.theta) + kernel0 = kernel0.float() + kernel45 = kernel45.float() + kernel90 = kernel90.float() + kernel135 = kernel135.float() + + out0 = F.conv2d(x, kernel0, stride=self.stride, padding=self.padding) + out45 = F.conv2d(x, kernel45, stride=self.stride, padding=self.padding) + out90 = F.conv2d(x, kernel90, stride=self.stride, padding=self.padding) + out135 = F.conv2d(x, kernel135, stride=self.stride, padding=self.padding) + + out_cat = torch.cat((out0, out45, out90, out135),1) + out_cat_conv = self.__conv_weight(out_cat) + out_weight = F.softmax(out_cat_conv, dim=1) + + out = torch.abs(out0)* out_weight[:,0:1,:,:] + torch.abs(out45)*out_weight[:,1:2,:,:]\ + + torch.abs(out90)*out_weight[:,2:3,:,:] + torch.abs(out135)*out_weight[:,3:,:,:] + out = (out * self.sigma) + return out + +class Sobel_Edge_Block(nn.Module): + def __init__(self, channel_in, alpha=0.5, sigma=4): + super(Sobel_Edge_Block, self).__init__() + self.__down0 = nn.AvgPool2d(kernel_size=(2, 2), stride=(2, 2)) + self.__sobelconv = Sobel_conv(channel_in, channel_in, alpha, sigma) + self.__down1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.__conv0 = Convolutional(channel_in, 2, kernel_size=1, stride=1, pad=0, norm='bn', activate='Mish') + + def forward(self, x): + x_down0 = self.__down0(x) + x_sobel = self.__sobelconv(x_down0) + x_down1 = self.__down1(self.__down1(x_sobel)) + x_conv0 = self.__conv0(x_down1) + return x_conv0 + +class NPAttention(nn.Module): + def __init__(self, inplanes, planes, use_scale=False, groups=None): + self.use_scale = use_scale + self.groups = groups + + super(NPAttention, self).__init__() + # conv theta + self.t = nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, bias=False) + self.softmax = nn.Softmax(dim=2) + self.t_mask = nn.Conv2d(2, 1, kernel_size=1, stride=1, bias=False) + # conv phi + self.p = nn.Conv2d(inplanes, planes//2, kernel_size=1, stride=1, bias=False) + self.p1 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1) + self.p2 = nn.MaxPool2d(kernel_size=5, stride=1, padding=2) + # conv g + self.g = nn.Conv2d(inplanes, planes//2, kernel_size=1, stride=1, bias=False) + self.g1 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1) + self.g2 = nn.MaxPool2d(kernel_size=5, stride=1, padding=2) + # conv z + self.z = nn.Conv2d(planes, inplanes, kernel_size=1, stride=1, groups=self.groups, bias=False) + self.gn = nn.GroupNorm(num_groups=self.groups, num_channels=inplanes) + + def kernel(self, t, p, g, b, c, h, w): + #The linear kernel (dot production) + t = t.view(b, c, h * w) + t = self.softmax(t) + t = t.view(b, 1, c * h * w) + p = p.view(b, 1, c * h * w) + g = g.view(b, c * h * w, 1) + att = torch.bmm(p, g) + if self.use_scale: + att = att.div((c*h*w)**0.5) + x = torch.bmm(att, t) + x = x.view(b, c, h, w) + return x + + def forward(self, x, mask): + residual = x + t = self.t(x) + t_mask = self.t_mask(mask) + t = t*t_mask+t + p = self.p(x) + p = torch.cat((p,self.p1(p),self.p2(p)),1) + g = self.g(x) + g = torch.cat((g,self.g1(g),self.g2(g)),1) + b, c, h, w = t.size() + if self.groups and self.groups > 1: + _c = int(c / self.groups) + ts = torch.split(t, split_size_or_sections=_c, dim=1) + ps = torch.split(p, split_size_or_sections=_c, dim=1) + gs = torch.split(g, split_size_or_sections=_c, dim=1) + _t_sequences = [] + for i in range(self.groups): + _x = self.kernel(ts[i], ps[i], gs[i], b, _c, h, w) + _t_sequences.append(_x) + x = torch.cat(_t_sequences, dim=1) + else: + x = self.kernel(t, p, g, b, c, h, w) + x = self.z(x) + xout = self.gn(x) + out = xout + residual + return out \ No newline at end of file diff --git a/modelR/layers/shuffle_blocks.py b/modelR/layers/shuffle_blocks.py new file mode 100644 index 0000000..b1b7d02 --- /dev/null +++ b/modelR/layers/shuffle_blocks.py @@ -0,0 +1,187 @@ +from .activations import * +from ..layers.convolutions import Convolutional, Cond_Convolutional +import math +import numpy as np +class Shuffle_new(nn.Module): + def __init__(self, filters_in, filters_out, kernel_size=3 ,c_tag=0.5, groups=3, dila=1): + super(Shuffle_new, self).__init__() + self.left_part = round(c_tag * filters_in) + self.right_part = filters_out - self.left_part + self.__dw = Convolutional(filters_in=self.right_part, filters_out=self.right_part, kernel_size=kernel_size, stride=1, pad=(kernel_size-1)//2, groups=self.right_part, dila=dila, norm="bn") + self.__pw1 = Convolutional(filters_in=self.right_part, filters_out=self.right_part, kernel_size=1, stride=1, pad=0, norm="bn", activate="leaky") + self.groups = groups + + def channel_shuffle(self, features): + batchsize, num_channels, height, width = features.data.size() + assert (num_channels % self.groups == 0) + channels_per_group = num_channels // self.groups + features = features.view(batchsize, self.groups, channels_per_group, height, width)# reshape + features = torch.transpose(features, 1, 2).contiguous() + features = features.view(batchsize, -1, height, width)# flatten + return features + + def forward(self, x): + left = x[:, :self.left_part, :, :].contiguous() + right = x[:, self.left_part:, :, :].contiguous() + right = self.__dw(right) + right = self.__pw1(right) + cat = torch.cat((left, right), 1) + out = self.channel_shuffle(cat) + return out + +class Shuffle_Cond_RFA(nn.Module): + def __init__(self, filters_in, filters_out, c_tag=0.5, groups=3, dila_r=4, dila_l=6): + super(Shuffle_Cond_RFA, self).__init__() + self.left_part = round(c_tag * filters_in) + self.right_part = filters_out - self.left_part + self.__dw_right = Cond_Convolutional(filters_in=self.right_part, filters_out=self.right_part, kernel_size=3, + stride=1, pad=dila_r, groups=self.right_part, dila=dila_r, bias=True, norm="bn") + self.__pw_right = Convolutional(filters_in=self.right_part, filters_out=self.right_part, kernel_size=1, + stride=1, pad=0, norm="bn", activate="leaky") + + self.__dw_left = Cond_Convolutional(filters_in=self.right_part, filters_out=self.right_part, kernel_size=3, + stride=1, pad=dila_l, groups=self.right_part, dila=dila_l, bias=True, norm="bn") + self.__pw1_left = Convolutional(filters_in=self.right_part, filters_out=self.right_part, kernel_size=1, + stride=1, pad=0, norm="bn", activate="leaky") + #self.groups = groups + + def forward(self, x): + left = x[:, :self.left_part, :, :].contiguous() + right = x[:, self.left_part:, :, :].contiguous() + left = self.__dw_left(left) + left = self.__pw1_left(left) + right = self.__dw_right(right) + right = self.__pw_right(right) + #cat = torch.cat((left, right), 1) + #out = self.channel_shuffle(cat) + return left+right + +class Shuffle_new_s(nn.Module): + def __init__(self, filters_in, filters_out, kernel_size=3 ,c_tag=0.5, groups=3, dila=1): + super(Shuffle_new_s, self).__init__() + self.__dw = Convolutional(filters_in=filters_in, filters_out=filters_in, kernel_size=kernel_size, stride=1, pad=(kernel_size-1)//2, groups=filters_in, dila=dila, norm="bn") + self.__pw1 = Convolutional(filters_in=filters_in, filters_out=filters_in, kernel_size=1, stride=1, pad=0, norm="bn", activate="leaky") + self.groups = groups + + def channel_shuffle(self, features): + batchsize, num_channels, height, width = features.data.size() + assert (num_channels % self.groups == 0) + channels_per_group = num_channels // self.groups + features = features.view(batchsize, self.groups, channels_per_group, height, width)# reshape + features = torch.transpose(features, 1, 2).contiguous() + features = features.view(batchsize, -1, height, width)# flatten + return features + def forward(self, x): + right = self.__dw(x) + right = self.__pw1(right) + cat = torch.cat((x, right), 1) + out = self.channel_shuffle(cat) + return out + +class Shuffle_RFA(nn.Module): + def __init__(self, filters_in, filters_out, c_tag=0.5, groups=3, dila_r=4, dila_l=6): + super(Shuffle_RFA, self).__init__() + self.left_part = round(c_tag * filters_in) + self.right_part = filters_out - self.left_part + self.__dw_right = Convolutional(filters_in=self.right_part, filters_out=self.right_part, kernel_size=3, + stride=1, pad=dila_r, groups=self.right_part, dila=dila_r, norm="bn") + self.__pw_right = Convolutional(filters_in=self.right_part, filters_out=self.right_part, kernel_size=1, + stride=1, pad=0, norm="bn", activate="relu") + + self.__dw_left = Convolutional(filters_in=self.right_part, filters_out=self.right_part, kernel_size=3, + stride=1, pad=dila_l, groups=self.right_part, dila=dila_l, norm="bn") + self.__pw1_left = Convolutional(filters_in=self.right_part, filters_out=self.right_part, kernel_size=1, + stride=1, pad=0, norm="bn", activate="relu") + + self.groups = groups + + def channel_shuffle(self, features): + batchsize, num_channels, height, width = features.data.size() + assert (num_channels % self.groups == 0) + channels_per_group = num_channels // self.groups + features = features.view(batchsize, self.groups, channels_per_group, height, width)# reshape + features = torch.transpose(features, 1, 2).contiguous() + features = features.view(batchsize, -1, height, width)# flatten + return features + def forward(self, x): + left = x[:, :self.left_part, :, :].contiguous() + right = x[:, self.left_part:, :, :].contiguous() + left = self.__dw_left(left) + left = self.__pw1_left(left) + right = self.__dw_right(right) + right = self.__pw_right(right) + cat = torch.cat((left, right), 1) + out = self.channel_shuffle(cat) + return out + +class DRF3(nn.Module): + def __init__(self, filters_in, filters_out, c_tag=0.5, groups=3): + super(DRF3, self).__init__() + self.left_part = round(c_tag * filters_in) + self.right_part = filters_out - self.left_part + #self.__dw_right = Convolutional(filters_in=self.right_part, filters_out=self.right_part, kernel_size=5, stride=1, pad=dila_r*2, groups=self.right_part, dila=dila_r, norm="bn") + self.__right_weight = nn.Parameter(torch.Tensor(self.right_part,1,3,3), requires_grad=True)#torch.rand(self.right_part,self.right_part,5,5) + self.__bn = nn.BatchNorm2d(self.right_part,affine=True) + self.__pw_right = Convolutional(filters_in=self.right_part, filters_out=self.right_part, kernel_size=1, + stride=1, pad=0, norm="bn", activate="leaky") + self.__globpool = nn.AdaptiveAvgPool2d(1) + self.__fc = Convolutional(1,1,1,1,0,norm='bn',activate="leaky") + self.groups = groups + + def channel_shuffle(self, features): + batchsize, num_channels, height, width = features.data.size() + assert (num_channels % self.groups == 0) + channels_per_group = num_channels // self.groups + features = features.view(batchsize, self.groups, channels_per_group, height, width)# reshape + features = torch.transpose(features, 1, 2).contiguous() + features = features.view(batchsize, -1, height, width)# flatten + return features + def forward(self, x): + left = x[:, :self.left_part, :, :].contiguous() + right = x[:, self.left_part:, :, :].contiguous() + fc = self.__fc(self.__globpool(right[:, 0:1, :, :])) + #print(fc.shape) + fcc = fc.detach().cpu() + #print(fcc.shape) + rfa = round(torch.sigmoid(torch.sum(fcc)).item() * 2 + 1) + right = self.__bn(F.conv2d(right, self.__right_weight, stride=1, padding=rfa, dilation=rfa, groups=self.right_part)) #self.__dw_right(right) + right = self.__pw_right(right) + cat = torch.cat((left, right), 1) + out = self.channel_shuffle(cat) + return out + +class DRF5(nn.Module): + def __init__(self, filters_in, filters_out, c_tag=0.5, groups=3): + super(DRF5, self).__init__() + self.left_part = round(c_tag * filters_in) + self.right_part = filters_out - self.left_part + #self.__dw_right = Convolutional(filters_in=self.right_part, filters_out=self.right_part, kernel_size=5, stride=1, pad=dila_r*2, groups=self.right_part, dila=dila_r, norm="bn") + self.__right_weight = nn.Parameter(torch.Tensor(self.right_part,1,5,5), requires_grad=True)#torch.rand(self.right_part,self.right_part,5,5) + self.__bn = nn.BatchNorm2d(self.right_part,affine=True) + self.__pw_right = Convolutional(filters_in=self.right_part, filters_out=self.right_part, kernel_size=1, + stride=1, pad=0, norm="bn", activate="leaky") + self.__globpool = nn.AdaptiveAvgPool2d(1) + self.__fc = Convolutional(1,1,1,1,0,norm='bn',activate="leaky") + self.groups = groups + + def channel_shuffle(self, features): + batchsize, num_channels, height, width = features.data.size() + assert (num_channels % self.groups == 0) + channels_per_group = num_channels // self.groups + features = features.view(batchsize, self.groups, channels_per_group, height, width)# reshape + features = torch.transpose(features, 1, 2).contiguous() + features = features.view(batchsize, -1, height, width)# flatten + return features + def forward(self, x): + left = x[:, :self.left_part, :, :].contiguous() + right = x[:, self.left_part:, :, :].contiguous() + fc = self.__fc(self.__globpool(right[:, 0:1, :, :])) + #print(fc.shape) + fcc = fc.detach().cpu() + #print(fcc.shape) + rfa = round(torch.sigmoid(torch.sum(fcc)).item() * 2 + 1) + right = self.__bn(F.conv2d(right, self.__right_weight, stride=1, padding=2*rfa, dilation=rfa, groups=self.right_part)) #self.__dw_right(right) + right = self.__pw_right(right) + cat = torch.cat((left, right), 1) + out = self.channel_shuffle(cat) + return out diff --git a/modelR/layers/utils.py b/modelR/layers/utils.py new file mode 100644 index 0000000..def0cfc --- /dev/null +++ b/modelR/layers/utils.py @@ -0,0 +1,54 @@ +import random +import math +from PIL import Image +import numpy as np + +import torch + + +def str2bool(v): + if v.lower() in ['true', 1]: + return True + elif v.lower() in ['false', 0]: + return False + else: + raise argparse.ArgumentTypeError('Boolean value expected.') + + +def count_params(model): + return sum(p.numel() for p in model.parameters() if p.requires_grad) + + +class AverageMeter(object): + """Computes and stores the average and current value""" + def __init__(self): + self.reset() + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count + + +def accuracy(output, target, topk=(1,)): + """Computes the accuracy over the k top predictions for the specified values of k""" + with torch.no_grad(): + maxk = max(topk) + batch_size = target.size(0) + + _, pred = output.topk(maxk, 1, True, True) + pred = pred.t() + correct = pred.eq(target.view(1, -1).expand_as(pred)) + + res = [] + for k in topk: + correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) + res.append(correct_k.mul_(100.0 / batch_size)) + return res diff --git a/modelR/layers/wt_layer.py b/modelR/layers/wt_layer.py new file mode 100644 index 0000000..c9f6b9e --- /dev/null +++ b/modelR/layers/wt_layer.py @@ -0,0 +1,762 @@ +""" +自定义 pytorch 层,实现一维、二维、三维张量的 DWT 和 IDWT,未考虑边界延拓 +只有当图像行列数都是偶数,且重构滤波器组低频分量长度为 2 时,才能精确重构,否则在边界处有误差。 +""" +import numpy as np +import math +from torch.nn import Module +from .DWT_IDWT_Functions import * +import pywt + +__all__ = ['DWT_1D', 'IDWT_1D', 'DWT_2D', 'IDWT_2D', 'DWT_3D', 'IDWT_3D', 'DWT_2D_tiny'] +class DWT_1D(Module): + """ + input: the 1D data to be decomposed -- (N, C, Length) + output: lfc -- (N, C, Length/2) + hfc -- (N, C, Length/2) + """ + def __init__(self, wavename): + """ + 1D discrete wavelet transform (DWT) for sequence decomposition + 用于序列分解的一维离散小波变换 DWT + :param wavename: pywt.wavelist(); in the paper, 'chx.y' denotes 'biorx.y'. + """ + super(DWT_1D, self).__init__() + wavelet = pywt.Wavelet(wavename) + self.band_low = wavelet.rec_lo + self.band_high = wavelet.rec_hi + assert len(self.band_low) == len(self.band_high) + self.band_length = len(self.band_low) + assert self.band_length % 2 == 0 + self.band_length_half = math.floor(self.band_length / 2) + + def get_matrix(self): + """ + 生成变换矩阵 + generating the matrices: \mathcal{L}, \mathcal{H} + :return: self.matrix_low = \mathcal{L}, self.matrix_high = \mathcal{H} + """ + L1 = self.input_height + L = math.floor(L1 / 2) + matrix_h = np.zeros( ( L, L1 + self.band_length - 2 ) ) + matrix_g = np.zeros( ( L1 - L, L1 + self.band_length - 2 ) ) + end = None if self.band_length_half == 1 else (-self.band_length_half+1) + index = 0 + for i in range(L): + for j in range(self.band_length): + matrix_h[i, index+j] = self.band_low[j] + index += 2 + index = 0 + for i in range(L1 - L): + for j in range(self.band_length): + matrix_g[i, index+j] = self.band_high[j] + index += 2 + matrix_h = matrix_h[:,(self.band_length_half-1):end] + matrix_g = matrix_g[:,(self.band_length_half-1):end] + if torch.cuda.is_available(): + self.matrix_low = torch.Tensor(matrix_h).cuda() + self.matrix_high = torch.Tensor(matrix_g).cuda() + else: + self.matrix_low = torch.Tensor(matrix_h) + self.matrix_high = torch.Tensor(matrix_g) + + def forward(self, input): + """ + input_low_frequency_component = \mathcal{L} * input + input_high_frequency_component = \mathcal{H} * input + :param input: the data to be decomposed + :return: the low-frequency and high-frequency components of the input data + """ + assert len(input.size()) == 3 + self.input_height = input.size()[-1] + self.get_matrix() + return DWTFunction_1D.apply(input, self.matrix_low, self.matrix_high) + + +class IDWT_1D(Module): + """ + input: lfc -- (N, C, Length/2) + hfc -- (N, C, Length/2) + output: the original data -- (N, C, Length) + """ + def __init__(self, wavename): + """ + 1D inverse DWT (IDWT) for sequence reconstruction + 用于序列重构的一维离散小波逆变换 IDWT + :param wavename: pywt.wavelist(); in the paper, 'chx.y' denotes 'biorx.y'. + """ + super(IDWT_1D, self).__init__() + wavelet = pywt.Wavelet(wavename) + self.band_low = wavelet.dec_lo + self.band_high = wavelet.dec_hi + self.band_low.reverse() + self.band_high.reverse() + assert len(self.band_low) == len(self.band_high) + self.band_length = len(self.band_low) + assert self.band_length % 2 == 0 + self.band_length_half = math.floor(self.band_length / 2) + + def get_matrix(self): + """ + generating the matrices: \mathcal{L}, \mathcal{H} + 生成变换矩阵 + :return: self.matrix_low = \mathcal{L}, self.matrix_high = \mathcal{H} + """ + L1 = self.input_height + L = math.floor(L1 / 2) + matrix_h = np.zeros( ( L, L1 + self.band_length - 2 ) ) + matrix_g = np.zeros( ( L1 - L, L1 + self.band_length - 2 ) ) + end = None if self.band_length_half == 1 else (-self.band_length_half+1) + index = 0 + for i in range(L): + for j in range(self.band_length): + matrix_h[i, index+j] = self.band_low[j] + index += 2 + index = 0 + for i in range(L1 - L): + for j in range(self.band_length): + matrix_g[i, index+j] = self.band_high[j] + index += 2 + matrix_h = matrix_h[:,(self.band_length_half-1):end] + matrix_g = matrix_g[:,(self.band_length_half-1):end] + if torch.cuda.is_available(): + self.matrix_low = torch.Tensor(matrix_h).cuda() + self.matrix_high = torch.Tensor(matrix_g).cuda() + else: + self.matrix_low = torch.Tensor(matrix_h) + self.matrix_high = torch.Tensor(matrix_g) + + def forward(self, L, H): + """ + :param L: the low-frequency component of the original data + :param H: the high-frequency component of the original data + :return: the original data + """ + assert len(L.size()) == len(H.size()) == 3 + self.input_height = L.size()[-1] + H.size()[-1] + self.get_matrix() + return IDWTFunction_1D.apply(L, H, self.matrix_low, self.matrix_high) + + +class DWT_2D_tiny(Module): + """ + input: the 2D data to be decomposed -- (N, C, H, W) + output -- lfc: (N, C, H/2, W/2) + #hfc_lh: (N, C, H/2, W/2) + #hfc_hl: (N, C, H/2, W/2) + #hfc_hh: (N, C, H/2, W/2) + ############## + DWT_2D_tiny only outputs the low-frequency component, which is used in WaveCNet; + ############# + the all four components could be get using DWT_2D, which is used in WaveUNet. + """ + def __init__(self, wavename): + """ + 2D discrete wavelet transform (DWT) for 2D image decomposition + :param wavename: pywt.wavelist(); in the paper, 'chx.y' denotes 'biorx.y'. + """ + super(DWT_2D_tiny, self).__init__() + wavelet = pywt.Wavelet(wavename) + self.band_low = wavelet.rec_lo + self.band_high = wavelet.rec_hi + assert len(self.band_low) == len(self.band_high) + self.band_length = len(self.band_low) + assert self.band_length % 2 == 0 + self.band_length_half = math.floor(self.band_length / 2) + + def get_matrix(self): + """ + 生成变换矩阵 + generating the matrices: \mathcal{L}, \mathcal{H} + :return: self.matrix_low = \mathcal{L}, self.matrix_high = \mathcal{H} + """ + L1 = np.max((self.input_height, self.input_width)) + L = math.floor(L1 / 2) + matrix_h = np.zeros( ( L, L1 + self.band_length - 2 ) ) + matrix_g = np.zeros( ( L1 - L, L1 + self.band_length - 2 ) ) + end = None if self.band_length_half == 1 else (-self.band_length_half+1) + + index = 0 + for i in range(L): + for j in range(self.band_length): + matrix_h[i, index+j] = self.band_low[j] + index += 2 + matrix_h_0 = matrix_h[0:(math.floor(self.input_height / 2)), 0:(self.input_height + self.band_length - 2)] + matrix_h_1 = matrix_h[0:(math.floor(self.input_width / 2)), 0:(self.input_width + self.band_length - 2)] + + index = 0 + for i in range(L1 - L): + for j in range(self.band_length): + matrix_g[i, index+j] = self.band_high[j] + index += 2 + matrix_g_0 = matrix_g[0:(self.input_height - math.floor(self.input_height / 2)),0:(self.input_height + self.band_length - 2)] + matrix_g_1 = matrix_g[0:(self.input_width - math.floor(self.input_width / 2)),0:(self.input_width + self.band_length - 2)] + + matrix_h_0 = matrix_h_0[:,(self.band_length_half-1):end] + matrix_h_1 = matrix_h_1[:,(self.band_length_half-1):end] + matrix_h_1 = np.transpose(matrix_h_1) + matrix_g_0 = matrix_g_0[:,(self.band_length_half-1):end] + matrix_g_1 = matrix_g_1[:,(self.band_length_half-1):end] + matrix_g_1 = np.transpose(matrix_g_1) + + if torch.cuda.is_available(): + self.matrix_low_0 = torch.Tensor(matrix_h_0).cuda() + self.matrix_low_1 = torch.Tensor(matrix_h_1).cuda() + self.matrix_high_0 = torch.Tensor(matrix_g_0).cuda() + self.matrix_high_1 = torch.Tensor(matrix_g_1).cuda() + else: + self.matrix_low_0 = torch.Tensor(matrix_h_0) + self.matrix_low_1 = torch.Tensor(matrix_h_1) + self.matrix_high_0 = torch.Tensor(matrix_g_0) + self.matrix_high_1 = torch.Tensor(matrix_g_1) + + + + def forward(self, input): + """ + input_lfc = \mathcal{L} * input * \mathcal{L}^T + #input_hfc_lh = \mathcal{H} * input * \mathcal{L}^T + #input_hfc_hl = \mathcal{L} * input * \mathcal{H}^T + #input_hfc_hh = \mathcal{H} * input * \mathcal{H}^T + :param input: the 2D data to be decomposed + :return: the low-frequency component of the input 2D data + """ + assert len(input.size()) == 4 + self.input_height = input.size()[-2] + self.input_width = input.size()[-1] + self.get_matrix() + return DWTFunction_2D_tiny.apply(input, self.matrix_low_0, self.matrix_low_1, self.matrix_high_0, self.matrix_high_1) + + +class DWT_2D(Module): + """ + input: the 2D data to be decomposed -- (N, C, H, W) + output -- lfc: (N, C, H/2, W/2) + hfc_lh: (N, C, H/2, W/2) + hfc_hl: (N, C, H/2, W/2) + hfc_hh: (N, C, H/2, W/2) + """ + def __init__(self, wavename): + """ + 2D discrete wavelet transform (DWT) for 2D image decomposition + :param wavename: pywt.wavelist(); in the paper, 'chx.y' denotes 'biorx.y'. + """ + super(DWT_2D, self).__init__() + wavelet = pywt.Wavelet(wavename) + self.band_low = wavelet.rec_lo + self.band_high = wavelet.rec_hi + assert len(self.band_low) == len(self.band_high) + self.band_length = len(self.band_low) + assert self.band_length % 2 == 0 + self.band_length_half = math.floor(self.band_length / 2) + + def get_matrix(self): + """ + 生成变换矩阵 + generating the matrices: \mathcal{L}, \mathcal{H} + :return: self.matrix_low = \mathcal{L}, self.matrix_high = \mathcal{H} + """ + L1 = np.max((self.input_height, self.input_width)) + L = math.floor(L1 / 2) + matrix_h = np.zeros( ( L, L1 + self.band_length - 2 ) ) + matrix_g = np.zeros( ( L1 - L, L1 + self.band_length - 2 ) ) + end = None if self.band_length_half == 1 else (-self.band_length_half+1) + + index = 0 + for i in range(L): + for j in range(self.band_length): + matrix_h[i, index+j] = self.band_low[j] + index += 2 + matrix_h_0 = matrix_h[0:(math.floor(self.input_height / 2)), 0:(self.input_height + self.band_length - 2)] + matrix_h_1 = matrix_h[0:(math.floor(self.input_width / 2)), 0:(self.input_width + self.band_length - 2)] + + index = 0 + for i in range(L1 - L): + for j in range(self.band_length): + matrix_g[i, index+j] = self.band_high[j] + index += 2 + matrix_g_0 = matrix_g[0:(self.input_height - math.floor(self.input_height / 2)),0:(self.input_height + self.band_length - 2)] + matrix_g_1 = matrix_g[0:(self.input_width - math.floor(self.input_width / 2)),0:(self.input_width + self.band_length - 2)] + + matrix_h_0 = matrix_h_0[:,(self.band_length_half-1):end] + matrix_h_1 = matrix_h_1[:,(self.band_length_half-1):end] + matrix_h_1 = np.transpose(matrix_h_1) + matrix_g_0 = matrix_g_0[:,(self.band_length_half-1):end] + matrix_g_1 = matrix_g_1[:,(self.band_length_half-1):end] + matrix_g_1 = np.transpose(matrix_g_1) + + if torch.cuda.is_available(): + self.matrix_low_0 = torch.Tensor(matrix_h_0).cuda() + self.matrix_low_1 = torch.Tensor(matrix_h_1).cuda() + self.matrix_high_0 = torch.Tensor(matrix_g_0).cuda() + self.matrix_high_1 = torch.Tensor(matrix_g_1).cuda() + else: + self.matrix_low_0 = torch.Tensor(matrix_h_0) + self.matrix_low_1 = torch.Tensor(matrix_h_1) + self.matrix_high_0 = torch.Tensor(matrix_g_0) + self.matrix_high_1 = torch.Tensor(matrix_g_1) + + def forward(self, input): + """ + input_lfc = \mathcal{L} * input * \mathcal{L}^T + input_hfc_lh = \mathcal{H} * input * \mathcal{L}^T + input_hfc_hl = \mathcal{L} * input * \mathcal{H}^T + input_hfc_hh = \mathcal{H} * input * \mathcal{H}^T + :param input: the 2D data to be decomposed + :return: the low-frequency and high-frequency components of the input 2D data + """ + assert len(input.size()) == 4 + self.input_height = input.size()[-2] + self.input_width = input.size()[-1] + self.get_matrix() + return DWTFunction_2D.apply(input, self.matrix_low_0, self.matrix_low_1, self.matrix_high_0, self.matrix_high_1) + + +class IDWT_2D(Module): + """ + input: lfc -- (N, C, H/2, W/2) + hfc_lh -- (N, C, H/2, W/2) + hfc_hl -- (N, C, H/2, W/2) + hfc_hh -- (N, C, H/2, W/2) + output: the original 2D data -- (N, C, H, W) + """ + def __init__(self, wavename): + """ + 2D inverse DWT (IDWT) for 2D image reconstruction + :param wavename: pywt.wavelist(); in the paper, 'chx.y' denotes 'biorx.y'. + """ + super(IDWT_2D, self).__init__() + wavelet = pywt.Wavelet(wavename) + self.band_low = wavelet.dec_lo + self.band_low.reverse() + self.band_high = wavelet.dec_hi + self.band_high.reverse() + assert len(self.band_low) == len(self.band_high) + self.band_length = len(self.band_low) + assert self.band_length % 2 == 0 + self.band_length_half = math.floor(self.band_length / 2) + + def get_matrix(self): + """ + 生成变换矩阵 + generating the matrices: \mathcal{L}, \mathcal{H} + :return: self.matrix_low = \mathcal{L}, self.matrix_high = \mathcal{H} + """ + L1 = np.max((self.input_height, self.input_width)) + L = math.floor(L1 / 2) + matrix_h = np.zeros( ( L, L1 + self.band_length - 2 ) ) + matrix_g = np.zeros( ( L1 - L, L1 + self.band_length - 2 ) ) + end = None if self.band_length_half == 1 else (-self.band_length_half+1) + + index = 0 + for i in range(L): + for j in range(self.band_length): + matrix_h[i, index+j] = self.band_low[j] + index += 2 + matrix_h_0 = matrix_h[0:(math.floor(self.input_height / 2)), 0:(self.input_height + self.band_length - 2)] + matrix_h_1 = matrix_h[0:(math.floor(self.input_width / 2)), 0:(self.input_width + self.band_length - 2)] + + index = 0 + for i in range(L1 - L): + for j in range(self.band_length): + matrix_g[i, index+j] = self.band_high[j] + index += 2 + matrix_g_0 = matrix_g[0:(self.input_height - math.floor(self.input_height / 2)),0:(self.input_height + self.band_length - 2)] + matrix_g_1 = matrix_g[0:(self.input_width - math.floor(self.input_width / 2)),0:(self.input_width + self.band_length - 2)] + + matrix_h_0 = matrix_h_0[:,(self.band_length_half-1):end] + matrix_h_1 = matrix_h_1[:,(self.band_length_half-1):end] + matrix_h_1 = np.transpose(matrix_h_1) + matrix_g_0 = matrix_g_0[:,(self.band_length_half-1):end] + matrix_g_1 = matrix_g_1[:,(self.band_length_half-1):end] + matrix_g_1 = np.transpose(matrix_g_1) + if torch.cuda.is_available(): + self.matrix_low_0 = torch.Tensor(matrix_h_0).cuda() + self.matrix_low_1 = torch.Tensor(matrix_h_1).cuda() + self.matrix_high_0 = torch.Tensor(matrix_g_0).cuda() + self.matrix_high_1 = torch.Tensor(matrix_g_1).cuda() + else: + self.matrix_low_0 = torch.Tensor(matrix_h_0) + self.matrix_low_1 = torch.Tensor(matrix_h_1) + self.matrix_high_0 = torch.Tensor(matrix_g_0) + self.matrix_high_1 = torch.Tensor(matrix_g_1) + + def forward(self, LL, LH, HL, HH): + """ + recontructing the original 2D data + the original 2D data = \mathcal{L}^T * lfc * \mathcal{L} + + \mathcal{H}^T * hfc_lh * \mathcal{L} + + \mathcal{L}^T * hfc_hl * \mathcal{H} + + \mathcal{H}^T * hfc_hh * \mathcal{H} + :param LL: the low-frequency component + :param LH: the high-frequency component, hfc_lh + :param HL: the high-frequency component, hfc_hl + :param HH: the high-frequency component, hfc_hh + :return: the original 2D data + """ + assert len(LL.size()) == len(LH.size()) == len(HL.size()) == len(HH.size()) == 4 + self.input_height = LL.size()[-2] + HH.size()[-2] + self.input_width = LL.size()[-1] + HH.size()[-1] + self.get_matrix() + return IDWTFunction_2D.apply(LL, LH, HL, HH, self.matrix_low_0, self.matrix_low_1, self.matrix_high_0, self.matrix_high_1) + + +class DWT_3D(Module): + """ + input: the 3D data to be decomposed -- (N, C, D, H, W) + output: lfc -- (N, C, D/2, H/2, W/2) + hfc_llh -- (N, C, D/2, H/2, W/2) + hfc_lhl -- (N, C, D/2, H/2, W/2) + hfc_lhh -- (N, C, D/2, H/2, W/2) + hfc_hll -- (N, C, D/2, H/2, W/2) + hfc_hlh -- (N, C, D/2, H/2, W/2) + hfc_hhl -- (N, C, D/2, H/2, W/2) + hfc_hhh -- (N, C, D/2, H/2, W/2) + """ + def __init__(self, wavename): + """ + 3D discrete wavelet transform (DWT) for 3D data decomposition + :param wavename: pywt.wavelist(); in the paper, 'chx.y' denotes 'biorx.y'. + """ + super(DWT_3D, self).__init__() + wavelet = pywt.Wavelet(wavename) + self.band_low = wavelet.rec_lo + self.band_high = wavelet.rec_hi + assert len(self.band_low) == len(self.band_high) + self.band_length = len(self.band_low) + assert self.band_length % 2 == 0 + self.band_length_half = math.floor(self.band_length / 2) + + def get_matrix(self): + """ + 生成变换矩阵 + generating the matrices: \mathcal{L}, \mathcal{H} + :return: self.matrix_low = \mathcal{L}, self.matrix_high = \mathcal{H} + """ + L1 = np.max((self.input_height, self.input_width)) + L = math.floor(L1 / 2) + matrix_h = np.zeros( ( L, L1 + self.band_length - 2 ) ) + matrix_g = np.zeros( ( L1 - L, L1 + self.band_length - 2 ) ) + end = None if self.band_length_half == 1 else (-self.band_length_half+1) + + index = 0 + for i in range(L): + for j in range(self.band_length): + matrix_h[i, index+j] = self.band_low[j] + index += 2 + matrix_h_0 = matrix_h[0:(math.floor(self.input_height / 2)), 0:(self.input_height + self.band_length - 2)] + matrix_h_1 = matrix_h[0:(math.floor(self.input_width / 2)), 0:(self.input_width + self.band_length - 2)] + matrix_h_2 = matrix_h[0:(math.floor(self.input_depth / 2)), 0:(self.input_depth + self.band_length - 2)] + + index = 0 + for i in range(L1 - L): + for j in range(self.band_length): + matrix_g[i, index+j] = self.band_high[j] + index += 2 + matrix_g_0 = matrix_g[0:(self.input_height - math.floor(self.input_height / 2)),0:(self.input_height + self.band_length - 2)] + matrix_g_1 = matrix_g[0:(self.input_width - math.floor(self.input_width / 2)),0:(self.input_width + self.band_length - 2)] + matrix_g_2 = matrix_g[0:(self.input_depth - math.floor(self.input_depth / 2)),0:(self.input_depth + self.band_length - 2)] + + matrix_h_0 = matrix_h_0[:,(self.band_length_half-1):end] + matrix_h_1 = matrix_h_1[:,(self.band_length_half-1):end] + matrix_h_1 = np.transpose(matrix_h_1) + matrix_h_2 = matrix_h_2[:,(self.band_length_half-1):end] + + matrix_g_0 = matrix_g_0[:,(self.band_length_half-1):end] + matrix_g_1 = matrix_g_1[:,(self.band_length_half-1):end] + matrix_g_1 = np.transpose(matrix_g_1) + matrix_g_2 = matrix_g_2[:,(self.band_length_half-1):end] + if torch.cuda.is_available(): + self.matrix_low_0 = torch.Tensor(matrix_h_0).cuda() + self.matrix_low_1 = torch.Tensor(matrix_h_1).cuda() + self.matrix_low_2 = torch.Tensor(matrix_h_2).cuda() + self.matrix_high_0 = torch.Tensor(matrix_g_0).cuda() + self.matrix_high_1 = torch.Tensor(matrix_g_1).cuda() + self.matrix_high_2 = torch.Tensor(matrix_g_2).cuda() + else: + self.matrix_low_0 = torch.Tensor(matrix_h_0) + self.matrix_low_1 = torch.Tensor(matrix_h_1) + self.matrix_low_2 = torch.Tensor(matrix_h_2) + self.matrix_high_0 = torch.Tensor(matrix_g_0) + self.matrix_high_1 = torch.Tensor(matrix_g_1) + self.matrix_high_2 = torch.Tensor(matrix_g_2) + + def forward(self, input): + """ + :param input: the 3D data to be decomposed + :return: the eight components of the input data, one low-frequency and seven high-frequency components + """ + assert len(input.size()) == 5 + self.input_depth = input.size()[-3] + self.input_height = input.size()[-2] + self.input_width = input.size()[-1] + self.get_matrix() + return DWTFunction_3D.apply(input, self.matrix_low_0, self.matrix_low_1, self.matrix_low_2, + self.matrix_high_0, self.matrix_high_1, self.matrix_high_2) + + +class IDWT_3D(Module): + """ + input: lfc -- (N, C, D/2, H/2, W/2) + hfc_llh -- (N, C, D/2, H/2, W/2) + hfc_lhl -- (N, C, D/2, H/2, W/2) + hfc_lhh -- (N, C, D/2, H/2, W/2) + hfc_hll -- (N, C, D/2, H/2, W/2) + hfc_hlh -- (N, C, D/2, H/2, W/2) + hfc_hhl -- (N, C, D/2, H/2, W/2) + hfc_hhh -- (N, C, D/2, H/2, W/2) + output: the original 3D data -- (N, C, D, H, W) + """ + def __init__(self, wavename): + """ + 3D inverse DWT (IDWT) for 3D data reconstruction + :param wavename: pywt.wavelist(); in the paper, 'chx.y' denotes 'biorx.y'. + """ + super(IDWT_3D, self).__init__() + wavelet = pywt.Wavelet(wavename) + self.band_low = wavelet.dec_lo + self.band_high = wavelet.dec_hi + self.band_low.reverse() + self.band_high.reverse() + assert len(self.band_low) == len(self.band_high) + self.band_length = len(self.band_low) + assert self.band_length % 2 == 0 + self.band_length_half = math.floor(self.band_length / 2) + + def get_matrix(self): + """ + 生成变换矩阵 + generating the matrices: \mathcal{L}, \mathcal{H} + :return: self.matrix_low = \mathcal{L}, self.matrix_high = \mathcal{H} + """ + L1 = np.max((self.input_height, self.input_width)) + L = math.floor(L1 / 2) + matrix_h = np.zeros( ( L, L1 + self.band_length - 2 ) ) + matrix_g = np.zeros( ( L1 - L, L1 + self.band_length - 2 ) ) + end = None if self.band_length_half == 1 else (-self.band_length_half+1) + + index = 0 + for i in range(L): + for j in range(self.band_length): + matrix_h[i, index+j] = self.band_low[j] + index += 2 + matrix_h_0 = matrix_h[0:(math.floor(self.input_height / 2)), 0:(self.input_height + self.band_length - 2)] + matrix_h_1 = matrix_h[0:(math.floor(self.input_width / 2)), 0:(self.input_width + self.band_length - 2)] + matrix_h_2 = matrix_h[0:(math.floor(self.input_depth / 2)), 0:(self.input_depth + self.band_length - 2)] + + index = 0 + for i in range(L1 - L): + for j in range(self.band_length): + matrix_g[i, index+j] = self.band_high[j] + index += 2 + matrix_g_0 = matrix_g[0:(self.input_height - math.floor(self.input_height / 2)),0:(self.input_height + self.band_length - 2)] + matrix_g_1 = matrix_g[0:(self.input_width - math.floor(self.input_width / 2)),0:(self.input_width + self.band_length - 2)] + matrix_g_2 = matrix_g[0:(self.input_depth - math.floor(self.input_depth / 2)),0:(self.input_depth + self.band_length - 2)] + + matrix_h_0 = matrix_h_0[:,(self.band_length_half-1):end] + matrix_h_1 = matrix_h_1[:,(self.band_length_half-1):end] + matrix_h_1 = np.transpose(matrix_h_1) + matrix_h_2 = matrix_h_2[:,(self.band_length_half-1):end] + + matrix_g_0 = matrix_g_0[:,(self.band_length_half-1):end] + matrix_g_1 = matrix_g_1[:,(self.band_length_half-1):end] + matrix_g_1 = np.transpose(matrix_g_1) + matrix_g_2 = matrix_g_2[:,(self.band_length_half-1):end] + if torch.cuda.is_available(): + self.matrix_low_0 = torch.Tensor(matrix_h_0).cuda() + self.matrix_low_1 = torch.Tensor(matrix_h_1).cuda() + self.matrix_low_2 = torch.Tensor(matrix_h_2).cuda() + self.matrix_high_0 = torch.Tensor(matrix_g_0).cuda() + self.matrix_high_1 = torch.Tensor(matrix_g_1).cuda() + self.matrix_high_2 = torch.Tensor(matrix_g_2).cuda() + else: + self.matrix_low_0 = torch.Tensor(matrix_h_0) + self.matrix_low_1 = torch.Tensor(matrix_h_1) + self.matrix_low_2 = torch.Tensor(matrix_h_2) + self.matrix_high_0 = torch.Tensor(matrix_g_0) + self.matrix_high_1 = torch.Tensor(matrix_g_1) + self.matrix_high_2 = torch.Tensor(matrix_g_2) + + def forward(self, LLL, LLH, LHL, LHH, HLL, HLH, HHL, HHH): + """ + :param LLL: the low-frequency component, lfc + :param LLH: the high-frequency componetn, hfc_llh + :param LHL: the high-frequency componetn, hfc_lhl + :param LHH: the high-frequency componetn, hfc_lhh + :param HLL: the high-frequency componetn, hfc_hll + :param HLH: the high-frequency componetn, hfc_hlh + :param HHL: the high-frequency componetn, hfc_hhl + :param HHH: the high-frequency componetn, hfc_hhh + :return: the original 3D input data + """ + assert len(LLL.size()) == len(LLH.size()) == len(LHL.size()) == len(LHH.size()) == 5 + assert len(HLL.size()) == len(HLH.size()) == len(HHL.size()) == len(HHH.size()) == 5 + self.input_depth = LLL.size()[-3] + HHH.size()[-3] + self.input_height = LLL.size()[-2] + HHH.size()[-2] + self.input_width = LLL.size()[-1] + HHH.size()[-1] + self.get_matrix() + return IDWTFunction_3D.apply(LLL, LLH, LHL, LHH, HLL, HLH, HHL, HHH, + self.matrix_low_0, self.matrix_low_1, self.matrix_low_2, + self.matrix_high_0, self.matrix_high_1, self.matrix_high_2) + + +if __name__ == '__main__': + from datetime import datetime + from torch.autograd import gradcheck + wavelet = pywt.Wavelet('bior1.1') + h = wavelet.rec_lo + g = wavelet.rec_hi + h_ = wavelet.dec_lo + g_ = wavelet.dec_hi + h_.reverse() + g_.reverse() + + """ + image_full_name = '/home/li-qiufu/Pictures/standard_test_images/lena_color_512.tif' + image = cv2.imread(image_full_name, flags = 1) + image = image[0:512,0:512,:] + print(image.shape) + height, width, channel = image.shape + #image = image.reshape((1,height,width)) + t0 = datetime.now() + for index in range(100): + m0 = DWT_2D(band_low = h, band_high = g) + image_tensor = torch.Tensor(image) + image_tensor.unsqueeze_(dim = 0) + print('image_re shape: {}'.format(image_tensor.size())) + image_tensor.transpose_(1,3) + print('image_re shape: {}'.format(image_tensor.size())) + image_tensor.transpose_(2,3) + print('image_re shape: {}'.format(image_tensor.size())) + image_tensor.requires_grad = False + LL, LH, HL, HH = m0(image_tensor) + matrix_low_0 = torch.Tensor(m0.matrix_low_0) + matrix_low_1 = torch.Tensor(m0.matrix_low_1) + matrix_high_0 = torch.Tensor(m0.matrix_high_0) + matrix_high_1 = torch.Tensor(m0.matrix_high_1) + + #image_tensor.requires_grad = True + #input = (image_tensor.double(), matrix_low_0.double(), matrix_low_1.double(), matrix_high_0.double(), matrix_high_1.double()) + #test = gradcheck(DWTFunction_2D.apply, input) + #print(test) + #print(LL.requires_grad) + #print(LH.requires_grad) + #print(HL.requires_grad) + #print(HH.requires_grad) + #LL.requires_grad = True + #input = (LL.double(), LH.double(), HL.double(), HH.double(), matrix_low_0.double(), matrix_low_1.double(), matrix_high_0.double(), matrix_high_1.double()) + #test = gradcheck(IDWTFunction_2D.apply, input) + #print(test) + + m1 = IDWT_2D(band_low = h_, band_high = g_) + image_re = m1(LL,LH,HL,HH) + t1 = datetime.now() + image_re.transpose_(2,3) + image_re.transpose_(1,3) + image_re_np = image_re.detach().numpy() + print('image_re shape: {}'.format(image_re_np.shape)) + + image_zero = image - image_re_np[0] + print(np.max(image_zero), np.min(image_zero)) + print(image_zero[:,8]) + print('taking {} secondes'.format(t1 - t0)) + cv2.imshow('reconstruction', image_re_np[0]/255) + cv2.imshow('image_zero', image_zero/255) + cv2.waitKey(0) + """ + """ + image_full_name = '/home/liqiufu/Pictures/standard_test_images/lena_color_512.tif' + image = cv2.imread(image_full_name, flags = 1) + image = image[0:512,0:512,:] + print(image.shape) + image_3d = np.concatenate((image, image, image, image, image, image), axis = 2) + print(image_3d.shape) + image_tensor = torch.Tensor(image_3d) + #image_tensor = image_tensor.transpose(dim0 = 2, dim1 = 1) + #image_tensor = image_tensor.transpose(dim0 = 1, dim1 = 0) + image_tensor.unsqueeze_(dim = 0) + image_tensor.unsqueeze_(dim = 0) + t0 = datetime.now() + for index in range(10): + m0 = DWT_3D(wavename = 'haar') + print('image_re shape: {}'.format(image_tensor.size())) + image_tensor.requires_grad = False + LLL, LLH, LHL, LHH, HLL, HLH, HHL, HHH = m0(image_tensor) + matrix_low_0 = torch.Tensor(m0.matrix_low_0) + matrix_low_1 = torch.Tensor(m0.matrix_low_1) + matrix_low_2 = torch.Tensor(m0.matrix_low_2) + matrix_high_0 = torch.Tensor(m0.matrix_high_0) + matrix_high_1 = torch.Tensor(m0.matrix_high_1) + matrix_high_2 = torch.Tensor(m0.matrix_high_2) + + #image_tensor.requires_grad = True + #input = (image_tensor.double(), matrix_low_0.double(), matrix_low_1.double(), matrix_low_2.double(), + # matrix_high_0.double(), matrix_high_1.double(), matrix_high_2.double()) + #test = gradcheck(DWTFunction_3D.apply, input) + #print('testing dwt3d -- {}'.format(test)) + #LLL.requires_grad = True + #input = (LLL.double(), LLH.double(), LHL.double(), LHH.double(), + # HLL.double(), HLH.double(), HHL.double(), HHH.double(), + # matrix_low_0.double(), matrix_low_1.double(), matrix_low_2.double(), + # matrix_high_0.double(), matrix_high_1.double(), matrix_high_2.double()) + #test = gradcheck(IDWTFunction_3D.apply, input) + #print('testing idwt3d -- {}'.format(test)) + + m1 = IDWT_3D(wavename = 'haar') + image_re = m1(LLL,LLH,LHL,LHH,HLL,HLH,HHL,HHH) + t1 = datetime.now() + image_re.squeeze_(dim = 0) + image_re.squeeze_(dim = 0) + #image_re.transpose_(0,1) + #image_re.transpose_(1,2) + image_re_np = image_re.detach().numpy() + print('image_re shape: {}'.format(image_re_np.shape)) + + image_zero = image - image_re_np[:,:,0:3] + print(np.max(image_zero), np.min(image_zero)) + #print(image_zero[:,8,0]) + print('taking {} secondes'.format(t1 - t0)) + cv2.imshow('reconstruction', image_re_np[:,:,0:3]/255) + cv2.imshow('image_zero', image_zero/255) + cv2.waitKey(0) + """ + + """ + import matplotlib.pyplot as plt + import numpy as np + vector_np = np.array(list(range(1280)))#.reshape((128,1)) + + print(vector_np.shape) + t0 = datetime.now() + for index in range(100): + vector = torch.Tensor(vector_np) + vector.unsqueeze_(dim = 0) + vector.unsqueeze_(dim = 0) + m0 = DWT_1D(band_low = h, band_high = g) + L, H = m0(vector) + + #matrix_low = torch.Tensor(m0.matrix_low) + #matrix_high = torch.Tensor(m0.matrix_high) + #vector.requires_grad = True + #input = (vector.double(), matrix_low.double(), matrix_high.double()) + #test = gradcheck(DWTFunction_1D.apply, input) + #print('testing 1D-DWT: {}'.format(test)) + #print(L.requires_grad) + #print(H.requires_grad) + #L.requires_grad = True + #H.requires_grad = True + #input = (L.double(), H.double(), matrix_low.double(), matrix_high.double()) + #test = gradcheck(IDWTFunction_1D.apply, input) + #print('testing 1D-IDWT: {}'.format(test)) + + m1 = IDWT_1D(band_low = h_, band_high = g_) + vector_re = m1(L, H) + t1 = datetime.now() + vector_re_np = vector_re.detach().numpy() + print('image_re shape: {}'.format(vector_re_np.shape)) + + vector_zero = vector_np - vector_re_np.reshape(vector_np.shape) + print(np.max(vector_zero), np.min(vector_zero)) + print(vector_zero[:8]) + print('taking {} secondes'.format(t1 - t0)) + """ diff --git a/modelR/lodet.py b/modelR/lodet.py new file mode 100644 index 0000000..64b13ef --- /dev/null +++ b/modelR/lodet.py @@ -0,0 +1,41 @@ +import sys +sys.path.append("..") +import torch.nn as nn +from modelR.backbones.mobilenetv2 import MobilenetV2 +from modelR.necks.csa_drf_fpn import CSA_DRF_FPN +from modelR.head.dsc_head import DSC_Head +from utils.utils_basic import * + +class LODet(nn.Module): + """ + Note : int the __init__(), to define the modules should be in order, because of the weight file is order + """ + def __init__(self, pre_weights=None): + super(LODet, self).__init__() + self.__anchors = torch.FloatTensor(cfg.MODEL["ANCHORS"]) + self.__strides = torch.FloatTensor(cfg.MODEL["STRIDES"]) + self.__nC = cfg.DATA["NUM"] + self.__backnone = MobilenetV2(weight_path=pre_weights, extract_list=["6", "13", "conv"])#"17" + self.__neck = CSA_DRF(fileters_in=[1280, 96, 32]) + # small + self.__head_s = DSC_Head(nC=self.__nC, anchors=self.__anchors[0], stride=self.__strides[0]) + # medium + self.__head_m = DSC_Head(nC=self.__nC, anchors=self.__anchors[1], stride=self.__strides[1]) + # large + self.__head_l = DSC_Head(nC=self.__nC, anchors=self.__anchors[2], stride=self.__strides[2]) + + def forward(self, x): + out = [] + x_s, x_m, x_l = self.__backnone(x) + x_s, x_m, x_l = self.__neck(x_l, x_m, x_s) + out.append(self.__head_s(x_s)) + out.append(self.__head_m(x_m)) + out.append(self.__head_l(x_l)) + if self.training: + p, p_d = list(zip(*out)) + return p, p_d # smalll, medium, large + else: + p, p_d = list(zip(*out)) + return p, torch.cat(p_d, 0) + + diff --git a/modelR/loss/loss.py b/modelR/loss/loss.py new file mode 100644 index 0000000..1d9b124 --- /dev/null +++ b/modelR/loss/loss.py @@ -0,0 +1,110 @@ +import sys +sys.path.append("../utils") +import torch +import torch.nn as nn +import torch.nn.functional as F +from utils import utils_basic +import config.cfg_lodet as cfg + +class FocalLoss(nn.Module): + def __init__(self, gamma=2.0, alpha=1.0, reduction="mean"): + super(FocalLoss, self).__init__() + self.__gamma = gamma + self.__alpha = alpha + self.__loss = nn.BCEWithLogitsLoss(reduction=reduction) + def forward(self, input, target): + loss = self.__loss(input=input, target=target) + loss *= self.__alpha * torch.pow(torch.abs(target - torch.sigmoid(input)), self.__gamma) + return loss + +class Loss(nn.Module): + def __init__(self, anchors, strides, iou_threshold_loss=0.5): + super(Loss, self).__init__() + self.__iou_threshold_loss = iou_threshold_loss + self.__strides = strides + self.__scale_factor = cfg.SCALE_FACTOR + self.__scale_factor_a = cfg.SCALE_FACTOR_A + + def forward(self, p, p_d, label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes): + strides = self.__strides + loss_s, loss_s_iou, loss_s_conf, loss_s_cls, loss_s_a, loss_s_r, loss_s_s = self.__cal_loss_per_layer(p[0], p_d[0], label_sbbox, + sbboxes, strides[0]) + loss_m, loss_m_iou, loss_m_conf, loss_m_cls, loss_m_a, loss_m_r, loss_m_s = self.__cal_loss_per_layer(p[1], p_d[1], label_mbbox, + mbboxes, strides[1]) + loss_l, loss_l_iou, loss_l_conf, loss_l_cls, loss_l_a, loss_l_r, loss_l_s = self.__cal_loss_per_layer(p[2], p_d[2], label_lbbox, + lbboxes, strides[2]) + loss = loss_l + loss_m + loss_s + loss_iou = loss_s_iou + loss_m_iou + loss_l_iou + loss_conf = loss_s_conf + loss_m_conf + loss_l_conf + loss_cls = loss_s_cls + loss_m_cls + loss_l_cls + loss_a = loss_s_a + loss_m_a + loss_l_a + loss_r = loss_s_r + loss_m_r + loss_l_r + loss_s = loss_s_s + loss_m_s + loss_l_s + return loss, loss_iou, loss_conf, loss_cls, loss_a, loss_r, loss_s + + def smooth_l1_loss(self, input, target, beta=1. / 9, size_average=True): + n = torch.abs(input - target) + cond = n < beta + loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta) + return loss + + def __cal_loss_per_layer(self, p, p_d, label, bboxes, stride): + batch_size, grid = p.shape[:2] + img_size = stride * grid + p_d_xywh = p_d[..., :4] + p_d_a = p_d[..., 4:8] + p_d_r = p_d[..., 8:9] + p_conf = p[..., 9:10] + p_cls = p[..., 10:] + + label_xywh = label[..., :4] + label_a = label[..., 4:8] + label_r = label[...,8:9] + label_s13 = label[...,9:10] + label_s24 = label[..., 10:11] + label_obj_mask = label[..., 11:12] + label_mix = label[..., 12:13] + label_cls = label[..., 13:] + + if cfg.TRAIN["IOU_TYPE"] == 'GIOU': + xiou = utils_basic.GIOU_xywh_torch(p_d_xywh, label_xywh).unsqueeze(-1) + elif cfg.TRAIN["IOU_TYPE"] == 'CIOU': + xiou = utils_basic.CIOU_xywh_torch(p_d_xywh, label_xywh).unsqueeze(-1) + bbox_loss_scale = self.__scale_factor - (self.__scale_factor-1.0) * label_xywh[..., 2:3] * label_xywh[..., 3:4] / (img_size ** 2) + loss_iou = label_obj_mask * bbox_loss_scale * (1.0 - xiou) * label_mix + + #loss r + loss_r = label_obj_mask * self.smooth_l1_loss (p_d_r, label_r) * label_mix * 16 + a_sum = self.smooth_l1_loss(p_d_a, label_a) + a_loss_scale = 1 + (self.__scale_factor_a -1)* (label_xywh[..., 2:3] * label_xywh[...,3:4] / (img_size ** 2)) + loss_a = label_obj_mask * a_sum * label_mix * a_loss_scale + onesa = torch.ones_like(p_d_r) + d13 = p_d_xywh[..., 2:3] * torch.abs(onesa - p_d_a[..., 0:1] - p_d_a[..., 2:3]) + s13 = p_d_xywh[..., 3:4] / torch.sqrt(torch.mul(d13, d13) + torch.mul(p_d_xywh[..., 3:4], p_d_xywh[..., 3:4])) + d24 = p_d_xywh[..., 3:4] * torch.abs(onesa - p_d_a[..., 1:2] - p_d_a[..., 3:4]) + s24 = p_d_xywh[..., 2:3] / torch.sqrt(torch.mul(d24, d24) + torch.mul(p_d_xywh[..., 2:3], p_d_xywh[..., 2:3])) + s1234sum = self.smooth_l1_loss(s13, label_s13)*(1.0/(label_s13+1e-8)) + self.smooth_l1_loss(s24, label_s24)*(1.0/(label_s24+1e-8)) + loss_s = label_obj_mask * s1234sum * label_mix + + FOCAL = FocalLoss(gamma=2, alpha=1.0, reduction="none") + iou = utils_basic.iou_xywh_torch(p_d_xywh.unsqueeze(4), bboxes.unsqueeze(1).unsqueeze(1).unsqueeze(1)) + iou_max = iou.max(-1, keepdim=True)[0] + label_noobj_mask = (1.0 - label_obj_mask) * (iou_max < self.__iou_threshold_loss).float() + + loss_conf = (label_obj_mask * FOCAL(input=p_conf, target=label_obj_mask) + + label_noobj_mask * FOCAL(input=p_conf, target=label_obj_mask)) * label_mix + + # loss classes + BCE = nn.BCEWithLogitsLoss(reduction="none") + loss_cls = label_obj_mask * BCE(input=p_cls, target=label_cls) * label_mix + + loss_iou = (torch.sum(loss_iou)) / batch_size + loss_conf = (torch.sum(loss_conf)) / batch_size + loss_cls = (torch.sum(loss_cls)) / batch_size + loss_a = (torch.sum(loss_a)) / batch_size + loss_r = (torch.sum(loss_r)) / batch_size + loss_s = (torch.sum(loss_s)) / batch_size + + loss = loss_iou + (loss_a + loss_r ) + loss_conf + loss_cls + loss_s + + return loss, loss_iou, loss_conf, loss_cls, loss_a, loss_r, loss_s diff --git a/modelR/necks/csa_drf_fpn.py b/modelR/necks/csa_drf_fpn.py new file mode 100644 index 0000000..8f0d5b7 --- /dev/null +++ b/modelR/necks/csa_drf_fpn.py @@ -0,0 +1,152 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from dropblock import DropBlock2D, LinearScheduler +from model.layers.convolutions import Convolutional, Deformable_Convolutional +from model.layers.shuffle_blocks import Shuffle_new, Shuffle_Cond_RFA, Shuffle_new_s +import config.cfg_lodet as cfg + +class Upsample(nn.Module): + def __init__(self, scale_factor=1, mode='nearest'): + super(Upsample, self).__init__() + self.scale_factor = scale_factor + self.mode = mode + + def forward(self, x): + return F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode) + +class Route(nn.Module): + def __init__(self): + super(Route, self).__init__() + + def forward(self, x1, x2): + """ + x1 means previous output; x2 means current output + """ + out = torch.cat((x2, x1), dim=1) + return out + +class CSA_DRF_FPN(nn.Module): + def __init__(self, fileters_in, model_size=1): + super(CSA_DRF_FPN, self).__init__() + + fi_0, fi_1, fi_2 = fileters_in + self.__fo = (cfg.DATA["NUM"]+5 +5)*cfg.MODEL["ANCHORS_PER_SCLAE"] + fm_0 = int(1024*model_size) + fm_1 = fm_0//2 + fm_2 = fm_0 // 4 + + self.__dcn2_1 = Deformable_Convolutional(fi_2, fi_2, kernel_size=3, stride=2, pad=1, groups=1) + self.__routdcn2_1 = Route() + + self.__dcn1_0 = Deformable_Convolutional(fi_1+fi_2, fi_1, kernel_size=3, stride=2, pad=1, groups=1) + + self.__routdcn1_0 = Route() + # large + self.__conv_set_0 = nn.Sequential( + Convolutional(filters_in=fi_0 + fi_1, filters_out=fm_0, kernel_size=1, stride=1, pad=0, norm="bn", activate="leaky"), + #Shuffle_new(filters_in=fm_0, filters_out=fm_0, groups=8), + Shuffle_Cond_RFA(filters_in=fm_0, filters_out=fm_0, groups=8, dila_l=4, dila_r=6),#, dila_l=4, dila_r=6 + Shuffle_new_s(filters_in=fm_0//2, filters_out=fm_0, groups=8), + ) + self.__conv0_0 = Shuffle_new(filters_in=fm_0, filters_out=fm_0, groups=4) + self.__conv0_1 = Convolutional(filters_in=fm_0, filters_out=self.__fo, kernel_size=1, stride=1, pad=0) + + self.__conv0up1 = nn.Conv2d(fm_0, fm_1, kernel_size=1, stride=1, padding=0) + self.__upsample0_1 = Upsample(scale_factor=2) + + # medium + self.__pw1 = Convolutional(filters_in=fi_2+fi_1, filters_out=fm_1, kernel_size=1, stride=1, pad=0, norm="bn", activate="leaky")#, groups=fi_2+fi_1 + self.__shuffle10 = Shuffle_new(filters_in=fm_1, filters_out=fm_1, groups=4) + self.__route0_1 = Route() + self.__conv_set_1 = nn.Sequential( + Convolutional(filters_in=fm_1*2, filters_out=fm_1, kernel_size=1, stride=1, pad=0, norm="bn", activate="leaky"), + Shuffle_Cond_RFA(filters_in=fm_1, filters_out=fm_1, groups=4, dila_l=2, dila_r=3),#, dila_l=2, dila_r=3 + #Shuffle_new(filters_in=fm_1, filters_out=fm_1, groups=4), + LinearScheduler(DropBlock2D(block_size=3, drop_prob=0.1), start_value=0., stop_value=0.1, nr_steps=5), + Shuffle_new_s(filters_in=fm_1//2, filters_out=fm_1, groups=4), + ) + self.__conv1_0 = Shuffle_new(filters_in=fm_1, filters_out=fm_1, groups=4) + self.__conv1_1 = Convolutional(filters_in=fm_1, filters_out=self.__fo, kernel_size=1, stride=1, pad=0) + + self.__conv1up2 = nn.Conv2d(fm_1, fm_2, kernel_size=1, stride=1, padding=0) + self.__upsample1_2 = Upsample(scale_factor=2) + + + # small + #self.__dcn2 = Deformable_Convolutional(fi_2, fi_2, kernel_size=3, stride=1, pad=1, groups=1, norm="bn") + self.__pw2 = Convolutional(filters_in=fi_2, filters_out=fm_2, kernel_size=1, stride=1, pad=0, norm="bn", activate="leaky") + self.__shuffle20 = Shuffle_new(filters_in=fm_2, filters_out=fm_2, groups=4) + self.__route1_2 = Route() + self.__conv_set_2 = nn.Sequential( + Convolutional(filters_in=fm_2*2, filters_out=fm_2, kernel_size=1, stride=1, pad=0, norm="bn", activate="leaky"), + Shuffle_new(filters_in=fm_2, filters_out=fm_2, groups=4), + #Shuffle_Cond_RFA(filters_in=fm_2, filters_out=fm_2, groups=4, dila_l=1, dila_r=2), + LinearScheduler(DropBlock2D(block_size=3, drop_prob=0.1), start_value=0., stop_value=0.1, nr_steps=5), + Shuffle_new(filters_in=fm_2, filters_out=fm_2, groups=4), + ) + self.__conv2_0 = Shuffle_new(filters_in=fm_2, filters_out=fm_2, groups=4) + self.__conv2_1 = Convolutional(filters_in=fm_2, filters_out=self.__fo, kernel_size=1, stride=1, pad=0) + + self.__initialize_weights() + + + def __initialize_weights(self): + print("**" * 10, "Initing FPN_YOLOV3 weights", "**" * 10) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + m.weight.data.normal_(0, 0.01) + if m.bias is not None: + m.bias.data.zero_() + print("initing {}".format(m)) + + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + print("initing {}".format(m)) + + elif isinstance(m, nn.Linear): + m.weight.data.normal_(0,0.01) + if m.bias is not None: + m.bias.data.zero_() + print("initing {}".format(m)) + + def forward(self, x0, x1, x2): + + dcn2_1 = self.__dcn2_1(x2) + routdcn2_1 = self.__routdcn2_1(x1, dcn2_1) + + dcn1_0 = self.__dcn1_0(routdcn2_1) + routdcn1_0 = self.__routdcn1_0(x0, dcn1_0) + + # large + conv_set_0 = self.__conv_set_0(routdcn1_0) + conv0up1 = self.__conv0up1(conv_set_0) + upsample0_1 = self.__upsample0_1(conv0up1) + + # medium + pw1 = self.__pw1(routdcn2_1) + shuffle10 = self.__shuffle10(pw1) + route0_1 = self.__route0_1(shuffle10,upsample0_1) + conv_set_1 = self.__conv_set_1(route0_1) + + conv1up2 = self.__conv1up2(conv_set_1) + upsample1_2 = self.__upsample1_2(conv1up2) + + # small + pw2 = self.__pw2(x2) + shuffle20 = self.__shuffle20(pw2) + route1_2 = self.__route1_2(shuffle20, upsample1_2) + conv_set_2 = self.__conv_set_2(route1_2) + + out0 = self.__conv0_0(conv_set_0) + out0 = self.__conv0_1(out0) + + out1 = self.__conv1_0(conv_set_1) + out1 = self.__conv1_1(out1) + + out2 = self.__conv2_0(conv_set_2) + out2 = self.__conv2_1(out2) + + return out2, out1, out0 # small, medium, large \ No newline at end of file diff --git a/modelR/necks/msr_fpn.py b/modelR/necks/msr_fpn.py new file mode 100644 index 0000000..58fab9b --- /dev/null +++ b/modelR/necks/msr_fpn.py @@ -0,0 +1,92 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import config.cfg_npmmr as cfg +from ..layers.convolutions import Convolutional, Deformable_Convolutional +from ..layers.msr_blocks import MSR_Convset_L, MSR_Convset_M, MSR_Convset_S, MSR_Convset_L_R, MSR_Convset_M_R, MSR_Convset_S_R +from ..head.mtr_head import MTR_Head1 + +class Upsample(nn.Module): + def __init__(self, scale_factor=1, mode='nearest'): + super(Upsample, self).__init__() + self.scale_factor = scale_factor + self.mode = mode + + def forward(self, x): + return F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode) + + +class Route(nn.Module): + def __init__(self): + super(Route, self).__init__() + + def forward(self, x1, x2): + out = torch.cat((x2, x1), dim=1) + return out + +class MSR_FPN(nn.Module): + def __init__(self, fileters_in): + + super(MSR_FPN, self).__init__() + fi_0, fi_1, fi_2 = fileters_in + self.__fo = cfg.DATA["NUM"] + + self.__conv21down = Deformable_Convolutional(filters_in=fi_2, filters_out=256, kernel_size=3, + stride=2, pad=1, norm="bn", activate="leaky") + self.__route21 = Route() + self.__conv10down = Deformable_Convolutional(filters_in=fi_1 + 256, filters_out=512, kernel_size=3, + stride=2, pad=1, norm="bn", activate="leaky") + + self.__route10 = Route() + self.__conv_set_0 = nn.Sequential( + Convolutional(filters_in=fi_0 + 512, filters_out=512, kernel_size=1, + stride=1, pad=0, norm="bn", activate="Mish"), + MSR_Convset_L(512), + ) + self.__conv01up = Convolutional(filters_in=512, filters_out=256, kernel_size=1, + stride=1, pad=0, norm="bn", activate="leaky") + self.__upsample0 = Upsample(scale_factor=2) + self.__route01 = Route() + + self.__conv_set_1 = nn.Sequential( + Convolutional(filters_in=fi_1 + 256 + 256, filters_out=256, kernel_size=1, + stride=1, pad=0, norm="bn", activate="Mish"), + MSR_Convset_M(256), + ) + self.__conv12up = Convolutional(filters_in=256, filters_out=128, kernel_size=1, + stride=1, pad=0, norm="bn", activate="leaky") + self.__upsample1 = Upsample(scale_factor=2) + self.__route12 = Route() + + self.__conv_set_2 = nn.Sequential( + Convolutional(filters_in=fi_2 + 128, filters_out=128, kernel_size=1, + stride=1, pad=0, norm="bn", activate="Mish"), + MSR_Convset_S(128), + ) + + self.__conv0_1 = MTR_Head1(filters_in=512, anchor_num=3, fo_class=self.__fo, temp=False) + self.__conv1_1 = MTR_Head1(filters_in=256, anchor_num=3, fo_class=self.__fo, temp=False) + self.__conv2_1 = MTR_Head1(filters_in=128, anchor_num=3, fo_class=self.__fo, temp=False) + + def forward(self, x0, x1, x2): + conv21down = self.__conv21down(x2) + route21 = self.__route21(x1, conv21down) + conv10down = self.__conv10down(route21) + route10 = self.__route10(x0, conv10down) + conv_set_0 = self.__conv_set_0(route10) + + conv01up = self.__conv01up(conv_set_0) + upsample0 = self.__upsample0(conv01up) + route01 = self.__route01(route21, upsample0) + conv_set_1 = self.__conv_set_1(route01) + + conv12up = self.__conv12up(conv_set_1) + upsample1 = self.__upsample1(conv12up) + route12 = self.__route12(x2, upsample1) + conv_set_2 = self.__conv_set_2(route12) + + out0 = self.__conv0_1(conv_set_0) + out1 = self.__conv1_1(conv_set_1) + out2 = self.__conv2_1(conv_set_2) + + return out2, out1, out0 # small, medium, large \ No newline at end of file diff --git a/modelR/npmmrdet_modelR.py b/modelR/npmmrdet_modelR.py new file mode 100644 index 0000000..0e3adbf --- /dev/null +++ b/modelR/npmmrdet_modelR.py @@ -0,0 +1,149 @@ +import sys +sys.path.append("..") + +import torch.nn as nn +from modelR.backbones.darknet53_npattention import Darknet53_NPAttention +from modelR.backbones.darknet53 import Darknet53 +from modelR.backbones.cspdarknet53__npattention import CSPDarknet53_NPAttention +from modelR.necks.panet_fpn import PANet_FPN +from modelR.necks.msr_fpn import MSR_FPN +from modelR.head.mtr_head_gcn import MTR_Head2 +from modelR.layers.convolutions import Convolutional +from utils.utils_basic import * + +class NPMMRDetR(nn.Module): + """ + Note : int the __init__(), to define the modules should be in order, because of the weight file is order + """ + def __init__(self, init_weights=True): + super(NPMMRDetR, self).__init__() + + self.__anchors = torch.FloatTensor(cfg.MODEL["ANCHORS"]) + self.__strides = torch.FloatTensor(cfg.MODEL["STRIDES"]) + self.__nC = cfg.DATA["NUM"] + + self.__backnone = Darknet53_NPAttention() + self.__neck = MSR_FPN(fileters_in=[1024, 512, 256]) + + # small + self.__head_s = MTR_Head2(nC=self.__nC, anchors=self.__anchors[0], stride=self.__strides[0]) + # medium + self.__head_m = MTR_Head2(nC=self.__nC, anchors=self.__anchors[1], stride=self.__strides[1]) + # large + self.__head_l = MTR_Head2(nC=self.__nC, anchors=self.__anchors[2], stride=self.__strides[2]) + + if init_weights: + self.__init_weights() + + + def forward(self, x): + out=[] + x_s, x_m, x_l = self.__backnone(x) + x_s, x_m, x_l= self.__neck(x_l, x_m, x_s) + out.append(self.__head_s(x_s)) + out.append(self.__head_m(x_m)) + out.append(self.__head_l(x_l)) + if self.training: + p, p_d = list(zip(*out)) + return p, p_d # smalll, medium, large + else: + p, p_d = list(zip(*out)) + return p, torch.cat(p_d, 0) + + def __init_weights(self): + " Note :nn.Conv2d nn.BatchNorm2d 'initing modes are uniform " + for m in self.modules(): + if isinstance(m, nn.Conv2d): + torch.nn.init.normal_(m.weight.data, 0.0, 0.01) + #torch.nn.init.xavier_normal_(m.weight.data, gain=1) + #torch.nn.init.kaiming_normal_(m.weight.data, a=0, mode='fan_in', nonlinearity='leaky_relu') + if m.bias is not None: + m.bias.data.zero_() + print("initing {}".format(m)) + + elif isinstance(m, nn.BatchNorm2d): + torch.nn.init.constant_(m.weight.data, 1.0) + torch.nn.init.constant_(m.bias.data, 0.0) + print("initing {}".format(m)) + + elif isinstance(m, nn.Linear): + m.weight.data.normal_(0,0.01) + #torch.nn.init.kaiming_normal_(m.weight.data, a=0, mode='fan_in', nonlinearity='leaky_relu') + if m.bias is not None: + m.bias.data.zero_() + print("initing {}".format(m)) + + def load_darknet_weights(self, weight_file, cutoff=52): + "https://github.com/ultralytics/yolov3/blob/master/models.py" + print("load darknet weights : ", weight_file) + + with open(weight_file, 'rb') as f: + _ = np.fromfile(f, dtype=np.int32, count=5) + weights = np.fromfile(f, dtype=np.float32) + count = 0 + ptr = 0 + for m in self.modules(): + if isinstance(m, Convolutional): + # only initing backbone conv's weights + if count == cutoff: + break + count += 1 + conv_layer = m._Convolutional__conv + if m.norm == "bn": + # Load BN bias, weights, running mean and running variance + bn_layer = m._Convolutional__norm + num_b = bn_layer.bias.numel() # Number of biases + # Bias + bn_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.bias.data) + bn_layer.bias.data.copy_(bn_b) + ptr += num_b + # Weight + bn_w = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.weight.data) + bn_layer.weight.data.copy_(bn_w) + ptr += num_b + # Running Mean + bn_rm = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_mean) + bn_layer.running_mean.data.copy_(bn_rm) + ptr += num_b + # Running Var + bn_rv = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_var) + bn_layer.running_var.data.copy_(bn_rv) + ptr += num_b + print("loading weight {}".format(bn_layer)) + else: + # Load conv. bias + num_b = conv_layer.bias.numel() + conv_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(conv_layer.bias.data) + conv_layer.bias.data.copy_(conv_b) + ptr += num_b + # Load conv. weights + num_w = conv_layer.weight.numel() + conv_w = torch.from_numpy(weights[ptr:ptr + num_w]).view_as(conv_layer.weight.data) + conv_layer.weight.data.copy_(conv_w) + ptr += num_w + print("loading weight {}".format(conv_layer)) + +if __name__ == '__main__': + from modelR.get_model_complexity import get_model_complexity_info + #from torchstat import stat + net = NPMMRDetR().cuda() + #print(net) + + #for m in net.modules(): + #modules(): + #if 'Convolutional' in m: + #print("aa",module_list[idx]) + + #if isinstance(m, nn.BatchNorm2d): + #print("aa",m) + + flops, params = get_model_complexity_info(net,(3, 544, 544), as_strings=False, print_per_layer_stat=True) + print('GFlops: %.3fG' % (flops / 1e9)) + print('Params: %.2fM' % (params / 1e6)) + #stat(net.cuda(), (3, 544, 544)) + # + #in_img = torch.randn(1, 3, 544, 544).cuda() + #p, p_d = net(in_img) + #print("Output Size of Each Head (Num_Classes: %d)" % cfg.DATA["NUM"]) + #for i in range(3): + #print(p[i].shape) \ No newline at end of file diff --git a/modelR/plugandplay/ACBlock.py b/modelR/plugandplay/ACBlock.py new file mode 100644 index 0000000..f2c98da --- /dev/null +++ b/modelR/plugandplay/ACBlock.py @@ -0,0 +1,108 @@ +import torch.nn as nn +import torch + + +class CropLayer(nn.Module): + + # E.g., (-1, 0) means this layer should crop the first and last rows of the feature map. And (0, -1) crops the first and last columns + def __init__(self, crop_set): + super(CropLayer, self).__init__() + self.rows_to_crop = - crop_set[0] + self.cols_to_crop = - crop_set[1] + assert self.rows_to_crop >= 0 + assert self.cols_to_crop >= 0 + + def forward(self, input): + return input[:, :, self.rows_to_crop:-self.rows_to_crop, self.cols_to_crop:-self.cols_to_crop] + + +class ACBlock(nn.Module): + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + padding_mode='zeros', + deploy=False): + super(ACBlock, self).__init__() + self.deploy = deploy + if deploy: + self.fused_conv = nn.Conv2d(in_channels=in_channels, + out_channels=out_channels, + kernel_size=(kernel_size, kernel_size), + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + bias=True, + padding_mode=padding_mode) + else: + self.square_conv = nn.Conv2d(in_channels=in_channels, + out_channels=out_channels, + kernel_size=(kernel_size, + kernel_size), + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + bias=False, + padding_mode=padding_mode) + self.square_bn = nn.BatchNorm2d(num_features=out_channels) + + center_offset_from_origin_border = padding - kernel_size // 2 + ver_pad_or_crop = (center_offset_from_origin_border + 1, + center_offset_from_origin_border) + hor_pad_or_crop = (center_offset_from_origin_border, + center_offset_from_origin_border + 1) + if center_offset_from_origin_border >= 0: + self.ver_conv_crop_layer = nn.Identity() + ver_conv_padding = ver_pad_or_crop + self.hor_conv_crop_layer = nn.Identity() + hor_conv_padding = hor_pad_or_crop + else: + self.ver_conv_crop_layer = CropLayer(crop_set=ver_pad_or_crop) + ver_conv_padding = (0, 0) + self.hor_conv_crop_layer = CropLayer(crop_set=hor_pad_or_crop) + hor_conv_padding = (0, 0) + self.ver_conv = nn.Conv2d(in_channels=in_channels, + out_channels=out_channels, + kernel_size=(3, 1), + stride=stride, + padding=ver_conv_padding, + dilation=dilation, + groups=groups, + bias=False, + padding_mode=padding_mode) + + self.hor_conv = nn.Conv2d(in_channels=in_channels, + out_channels=out_channels, + kernel_size=(1, 3), + stride=stride, + padding=hor_conv_padding, + dilation=dilation, + groups=groups, + bias=False, + padding_mode=padding_mode) + self.ver_bn = nn.BatchNorm2d(num_features=out_channels) + self.hor_bn = nn.BatchNorm2d(num_features=out_channels) + + def forward(self, input): + if self.deploy: + return self.fused_conv(input) + else: + square_outputs = self.square_conv(input) + square_outputs = self.square_bn(square_outputs) + # print(square_outputs.size()) + # return square_outputs + vertical_outputs = self.ver_conv_crop_layer(input) + vertical_outputs = self.ver_conv(vertical_outputs) + vertical_outputs = self.ver_bn(vertical_outputs) + # print(vertical_outputs.size()) + horizontal_outputs = self.hor_conv_crop_layer(input) + horizontal_outputs = self.hor_conv(horizontal_outputs) + horizontal_outputs = self.hor_bn(horizontal_outputs) + # print(horizontal_outputs.size()) + return square_outputs + vertical_outputs + horizontal_outputs diff --git a/modelR/plugandplay/ASPPBlock.py b/modelR/plugandplay/ASPPBlock.py new file mode 100644 index 0000000..a2ac71a --- /dev/null +++ b/modelR/plugandplay/ASPPBlock.py @@ -0,0 +1,80 @@ +import torch.nn as nn +import torch + + +class SeparableConv2d(nn.Module): + def __init__(self, + in_channels, + out_channels, + kernel_size=1, + stride=1, + padding=0, + dilation=1, + bias=False): + super(SeparableConv2d, self).__init__() + + self.conv1 = nn.Conv2d(in_channels, + in_channels, + kernel_size, + stride, + padding, + dilation, + groups=in_channels, + bias=bias) + self.pointwise = nn.Conv2d(in_channels, + out_channels, + 1, + 1, + 0, + 1, + 1, + bias=bias) + + def forward(self, x): + x = self.conv1(x) + x = self.pointwise(x) + return x + + +class ASPP(nn.Module): + def __init__(self, inplanes, planes, rate): + super(ASPP, self).__init__() + self.rate = rate + if rate == 1: + kernel_size = 1 + padding = 0 + else: + kernel_size = 3 + padding = rate + #self.conv1 = nn.Conv2d(planes, planes, kernel_size=3, bias=False,padding=1) + self.conv1 = SeparableConv2d(planes, planes, 3, 1, 1) + self.bn1 = nn.BatchNorm2d(planes) + self.relu1 = nn.ReLU() + + # self.atrous_convolution = nn.Conv2d(inplanes, planes, kernel_size=kernel_size, + # stride=1, padding=padding, dilation=rate, bias=False) + self.atrous_convolution = SeparableConv2d(inplanes, planes, + kernel_size, 1, padding, + rate) + self.bn = nn.BatchNorm2d(planes) + self.relu = nn.ReLU() + + self._init_weight() + + def forward(self, x): + x = self.atrous_convolution(x) + x = self.bn(x) + #x = self.relu(x) + if self.rate != 1: + x = self.conv1(x) + x = self.bn1(x) + x = self.relu1(x) + return x + + def _init_weight(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + torch.nn.init.kaiming_normal_(m.weight) + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() diff --git a/modelR/plugandplay/Activations.py b/modelR/plugandplay/Activations.py new file mode 100644 index 0000000..1b94e32 --- /dev/null +++ b/modelR/plugandplay/Activations.py @@ -0,0 +1,30 @@ +import torch +import torch.nn.functional as F + + +class SwishImplementation(torch.autograd.Function): + @staticmethod + def forward(ctx, i): + ctx.save_for_backward(i) + return i * torch.sigmoid(i) + + @staticmethod + def backward(ctx, grad_output): + sigmoid_i = torch.sigmoid(ctx.saved_variables[0]) + return grad_output * (sigmoid_i * (1 + ctx.saved_variables[0] * + (1 - sigmoid_i))) + + +class MemoryEfficientSwish(nn.Module): + def forward(self, x): + return SwishImplementation.apply(x) + + +class Swish(nn.Module): + def forward(self, x): + return x.mul_(torch.sigmoid(x)) + + +class Mish(nn.Module): # https://github.com/digantamisra98/Mish + def forward(self, x): + return x.mul_(F.softplus(x).tanh()) diff --git a/modelR/plugandplay/BlazeBlock.py b/modelR/plugandplay/BlazeBlock.py new file mode 100644 index 0000000..9a40750 --- /dev/null +++ b/modelR/plugandplay/BlazeBlock.py @@ -0,0 +1,70 @@ +from torch import nn +import torch.nn.functional as F + +''' +https://blog.csdn.net/yiran103/article/details/100063021 +''' + +class BlazeBlock(nn.Module): + def __init__(self, inp, oup1, oup2=None, stride=1, kernel_size=5): + super(BlazeBlock, self).__init__() + self.stride = stride + assert stride in [1, 2] + + self.use_double_block = oup2 is not None + self.use_pooling = self.stride != 1 + + if self.use_double_block: + self.channel_pad = oup2 - inp + else: + self.channel_pad = oup1 - inp + + padding = (kernel_size - 1) // 2 + + self.conv1 = nn.Sequential( + # dw + nn.Conv2d(inp, inp, kernel_size=kernel_size, stride=stride, + padding=padding, groups=inp, bias=True), + nn.BatchNorm2d(inp), + # pw-linear + nn.Conv2d(inp, oup1, 1, 1, 0, bias=True), + nn.BatchNorm2d(oup1), + ) + self.act = nn.ReLU(inplace=True) + + if self.use_double_block: + self.conv2 = nn.Sequential( + nn.ReLU(inplace=True), + # dw + nn.Conv2d(oup1, oup1, kernel_size=kernel_size, + stride=1, padding=padding, groups=oup1, bias=True), + nn.BatchNorm2d(oup1), + # pw-linear + nn.Conv2d(oup1, oup2, 1, 1, 0, bias=True), + nn.BatchNorm2d(oup2), + ) + + if self.use_pooling: + self.mp = nn.MaxPool2d(kernel_size=self.stride, stride=self.stride) + + def forward(self, x): + h = self.conv1(x) + if self.use_double_block: + h = self.conv2(h) + + # skip connection + if self.use_pooling: + x = self.mp(x) + if self.channel_pad > 0: + x = F.pad(x, (0, 0, 0, 0, 0, self.channel_pad), 'constant', 0) + return self.act(h + x) + + +def initialize(module): + # original implementation is unknown + if isinstance(module, nn.Conv2d): + nn.init.kaiming_normal_(module.weight.data) + nn.init.constant_(module.bias.data, 0) + elif isinstance(module, nn.BatchNorm2d): + nn.init.constant_(module.weight.data, 1) + nn.init.constant_(module.bias.data, 0) diff --git a/modelR/plugandplay/BorderAlignmentModule.py b/modelR/plugandplay/BorderAlignmentModule.py new file mode 100644 index 0000000..2191e88 --- /dev/null +++ b/modelR/plugandplay/BorderAlignmentModule.py @@ -0,0 +1,44 @@ +# source: https://github.com/Megvii-BaseDetection/BorderDet/blob/master/cvpods/layers/border_align.py +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +from torch import nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from cvpods import _C + + +class _BorderAlign(Function): + @staticmethod + def forward(ctx, input, boxes, wh, pool_size): + output = _C.border_align_forward(input, boxes, wh, pool_size) + ctx.pool_size = pool_size + ctx.save_for_backward(input, boxes, wh) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + pool_size = ctx.pool_size + input, boxes, wh = ctx.saved_tensors + grad_input = _C.border_align_backward( + grad_output, input, boxes, wh, pool_size) + return grad_input, None, None, None + + +border_align = _BorderAlign.apply + + +class BorderAlign(nn.Module): + def __init__(self, pool_size): + super(BorderAlign, self).__init__() + self.pool_size = pool_size + + def forward(self, feature, boxes): + feature = feature.contiguous() + boxes = boxes.contiguous() + wh = (boxes[:, :, 2:] - boxes[:, :, :2]).contiguous() + output = border_align(feature, boxes, wh, self.pool_size) + return output + + def __repr__(self): + tmpstr = self.__class__.__name__ + return tmpstr \ No newline at end of file diff --git a/modelR/plugandplay/CondConv.py b/modelR/plugandplay/CondConv.py new file mode 100644 index 0000000..d60251c --- /dev/null +++ b/modelR/plugandplay/CondConv.py @@ -0,0 +1,89 @@ +import functools +import torch +import torch.nn as nn +import torch.nn.functional as F +import math + +class route_func(nn.Module): + r"""CondConv: Conditionally Parameterized Convolutions for Efficient Inference + https://papers.nips.cc/paper/8412-condconv-conditionally-parameterized-convolutions-for-efficient-inference.pdf + Args: + c_in (int): Number of channels in the input image + num_experts (int): Number of experts for mixture. Default: 1 + """ + + def __init__(self, c_in, num_experts): + super(route_func, self).__init__() + self.avgpool = nn.AdaptiveAvgPool2d(output_size=1) + self.fc = nn.Linear(c_in, num_experts) + self.sigmoid = nn.Sigmoid() + + def forward(self, x): + x = self.avgpool(x) + x = x.view(x.size(0), -1) + x = self.fc(x) + x = self.sigmoid(x) + return x + + +class CondConv2d(nn.Module): + r"""CondConv: Conditionally Parameterized Convolutions for Efficient Inference + https://papers.nips.cc/paper/8412-condconv-conditionally-parameterized-convolutions-for-efficient-inference.pdf + Args: + in_channels (int): Number of channels in the input image + out_channels (int): Number of channels produced by the convolution + kernel_size (int or tuple): Size of the convolving kernel + stride (int or tuple, optional): Stride of the convolution. Default: 1 + padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0 + dilation (int or tuple, optional): Spacing between kernel elements. Default: 1 + groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1 + bias (bool, optional): If ``True``, adds a learnable bias to the output. Default: ``True`` + num_experts (int): Number of experts for mixture. Default: 1 + """ + + def __init__(self, in_channels, out_channels, kernel_size, + stride=1, padding=0, dilation=1, groups=1, bias=True, + num_experts=1): + super(CondConv2d, self).__init__() + + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = kernel_size + self.stride = stride + self.padding = padding + self.dilation = dilation + self.groups = groups + self.num_experts = num_experts + + self.weight = nn.Parameter(torch.Tensor(num_experts, out_channels, in_channels // groups, kernel_size, kernel_size)) + if bias: + self.bias = nn.Parameter(torch.Tensor(num_experts, out_channels)) + else: + self.register_parameter('bias', None) + + nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5)) + if self.bias is not None: + fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weight) + bound = 1 / math.sqrt(fan_in) + nn.init.uniform_(self.bias, -bound, bound) + + def forward(self, x, routing_weight): + b, c_in, h, w = x.size() + k, c_out, c_in, kh, kw = self.weight.size() + x = x.view(1, -1, h, w) + weight = self.weight.view(k, -1) + + combined_weight = torch.mm(routing_weight, weight).view(-1, c_in, kh, kw) + + if self.bias is not None: + combined_bias = torch.mm(routing_weight, self.bias).view(-1) + output = F.conv2d( + x, weight=combined_weight, bias=combined_bias, stride=self.stride, padding=self.padding, + dilation=self.dilation, groups=self.groups * b) + else: + output = F.conv2d( + x, weight=combined_weight, bias=None, stride=self.stride, padding=self.padding, + dilation=self.dilation, groups=self.groups * b) + + output = output.view(b, c_out, output.size(-2), output.size(-1)) + return output \ No newline at end of file diff --git a/modelR/plugandplay/ContextGating.py b/modelR/plugandplay/ContextGating.py new file mode 100644 index 0000000..1c524a7 --- /dev/null +++ b/modelR/plugandplay/ContextGating.py @@ -0,0 +1,34 @@ +import torch +import torch.nn as nn +import math + +''' +Learnable pooling with Context Gating for video classification +arXiv:1706.06905v2 +source: https://github.com/ekingedik/loupe-pytorch/blob/master/loupe_pytorch.py +''' +class GatingContext(nn.Module): + def __init__(self, dim, add_batch_norm=True): + super(GatingContext, self).__init__() + self.dim = dim + self.add_batch_norm = add_batch_norm + self.gating_weights = nn.Parameter(nn.init.normal_( + torch.empty(dim, dim), mean=0, std=(1 / math.sqrt(dim)),)) + self.sigmoid = nn.Sigmoid() + if add_batch_norm: + self.gating_biases = None + self.batch_norm = nn.BatchNorm1d(dim) + else: + self.gating_biases = nn.Parameter(torch.nn.init.normal_( + torch.empty(dim), mean=0, std=(1 / math.sqrt(dim)),)) + self.batch_norm = None + + def forward(self, x): + gates = torch.matmul(x, self.gating_weights) + if self.add_batch_norm: + gates = self.batch_norm(gates) + else: + gates = gates + self.gating_biases + gates = self.sigmoid(gates) + activation = x * gates + return activation diff --git a/modelR/plugandplay/DGC.py b/modelR/plugandplay/DGC.py new file mode 100644 index 0000000..4311ce6 --- /dev/null +++ b/modelR/plugandplay/DGC.py @@ -0,0 +1,125 @@ +# source:https://github.com/zhuogege1943/dgc/blob/439fde259c/layers.py +# arxiv:https://arxiv.org/abs/2007.04242 + +import torch +import torch.nn as nn +import torch.nn.functional as F + +class DynamicMultiHeadConv(nn.Module): + global_progress = 0.0 + def __init__(self, in_channels, out_channels, kernel_size, stride=1, + padding=0, dilation=1, heads=4, squeeze_rate=16, gate_factor=0.25): + super(DynamicMultiHeadConv, self).__init__() + self.norm = nn.BatchNorm2d(in_channels) + self.relu = nn.ReLU(inplace=True) + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.in_channels = in_channels + self.out_channels = out_channels + self.heads = heads + self.squeeze_rate = squeeze_rate + self.gate_factor = gate_factor + self.stride = stride + self.padding = padding + self.dilation = dilation + self.is_pruned = True + self.register_buffer('_inactive_channels', torch.zeros(1)) + + ### Check if arguments are valid + assert self.in_channels % self.heads == 0, \ + "head number can not be divided by input channels" + assert self.out_channels % self.heads == 0, \ + "head number can not be divided by output channels" + assert self.gate_factor <= 1.0, "gate factor is greater than 1" + + for i in range(self.heads): + self.__setattr__('headconv_%1d' % i, + HeadConv(in_channels, out_channels // self.heads, squeeze_rate, + kernel_size, stride, padding, dilation, 1, gate_factor)) + + def forward(self, x): + """ + The code here is just a coarse implementation. + The forward process can be quite slow and memory consuming, need to be optimized. + """ + if self.training: + progress = DynamicMultiHeadConv.global_progress + # gradually deactivate input channels + if progress < 3.0 / 4 and progress > 1.0 / 12: + self.inactive_channels = round(self.in_channels * (1 - self.gate_factor) * 3.0 / 2 * (progress - 1.0 / 12)) + elif progress >= 3.0 / 4: + self.inactive_channels = round(self.in_channels * (1 - self.gate_factor)) + + _lasso_loss = 0.0 + + x = self.norm(x) + x = self.relu(x) + + x_averaged = self.avg_pool(x) + x_mask = [] + weight = [] + for i in range(self.heads): + i_x, i_lasso_loss= self.__getattr__('headconv_%1d' % i)(x, x_averaged, self.inactive_channels) + x_mask.append(i_x) + weight.append(self.__getattr__('headconv_%1d' % i).conv.weight) + _lasso_loss = _lasso_loss + i_lasso_loss + + x_mask = torch.cat(x_mask, dim=1) # batch_size, 4 x C_in, H, W + weight = torch.cat(weight, dim=0) # 4 x C_out, C_in, k, k + + out = F.conv2d(x_mask, weight, None, self.stride, + self.padding, self.dilation, self.heads) + b, c, h, w = out.size() + out = out.view(b, self.heads, c // self.heads, h, w) + out = out.transpose(1, 2).contiguous().view(b, c, h, w) + return [out, _lasso_loss] + + @property + def inactive_channels(self): + return int(self._inactive_channels[0]) + + @inactive_channels.setter + def inactive_channels(self, val): + self._inactive_channels.fill_(val) + +class HeadConv(nn.Module): + def __init__(self, in_channels, out_channels, squeeze_rate, kernel_size, stride=1, + padding=0, dilation=1, groups=1, gate_factor=0.25): + super(HeadConv, self).__init__() + self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, + padding, dilation, groups=1, bias=False) + self.target_pruning_rate = gate_factor + if in_channels < 80: + squeeze_rate = squeeze_rate // 2 + self.fc1 = nn.Linear(in_channels, in_channels // squeeze_rate, bias=False) + self.relu_fc1 = nn.ReLU(inplace=True) + self.fc2 = nn.Linear(in_channels // squeeze_rate, in_channels, bias=True) + self.relu_fc2 = nn.ReLU(inplace=True) + + nn.init.kaiming_normal_(self.fc1.weight) + nn.init.kaiming_normal_(self.fc2.weight) + nn.init.constant_(self.fc2.bias, 1.0) + + def forward(self, x, x_averaged, inactive_channels): + b, c, _, _ = x.size() + x_averaged = x_averaged.view(b, c) + y = self.fc1(x_averaged) + y = self.relu_fc1(y) + y = self.fc2(y) + + + mask = self.relu_fc2(y) # b, c + _lasso_loss = mask.mean() + + mask_d = mask.detach() + mask_c = mask + + if inactive_channels > 0: + mask_c = mask.clone() + topk_maxmum, _ = mask_d.topk(inactive_channels, dim=1, largest=False, sorted=False) + clamp_max, _ = topk_maxmum.max(dim=1, keepdim=True) + mask_index = mask_d.le(clamp_max) + mask_c[mask_index] = 0 + + mask_c = mask_c.view(b, c, 1, 1) + x = x * mask_c.expand_as(x) + return x, _lasso_loss diff --git a/modelR/plugandplay/DOConv.py b/modelR/plugandplay/DOConv.py new file mode 100644 index 0000000..52daa5b --- /dev/null +++ b/modelR/plugandplay/DOConv.py @@ -0,0 +1,148 @@ +# coding=utf-8 +# arxiv: https://arxiv.org/abs/2006.12030 +# source:https://github.com/yangyanli/DO-Conv/blob/master/do_conv_pytorch.py + +import math +import torch +import numpy as np +from torch.nn import init +from itertools import repeat +from torch.nn import functional as F +from torch._six import container_abcs +from torch._jit_internal import Optional +from torch.nn.parameter import Parameter +from torch.nn.modules.module import Module + + +class DOConv2d(Module): + """ + DOConv2d can be used as an alternative for torch.nn.Conv2d. + The interface is similar to that of Conv2d, with one exception: + 1. D_mul: the depth multiplier for the over-parameterization. + Note that the groups parameter switchs between DO-Conv (groups=1), + DO-DConv (groups=in_channels), DO-GConv (otherwise). + """ + __constants__ = ['stride', 'padding', 'dilation', 'groups', + 'padding_mode', 'output_padding', 'in_channels', + 'out_channels', 'kernel_size', 'D_mul'] + __annotations__ = {'bias': Optional[torch.Tensor]} + + def __init__(self, in_channels, out_channels, kernel_size, D_mul=None, stride=1, + padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros'): + super(DOConv2d, self).__init__() + + kernel_size = _pair(kernel_size) + stride = _pair(stride) + padding = _pair(padding) + dilation = _pair(dilation) + + if in_channels % groups != 0: + raise ValueError('in_channels must be divisible by groups') + if out_channels % groups != 0: + raise ValueError('out_channels must be divisible by groups') + valid_padding_modes = {'zeros', 'reflect', 'replicate', 'circular'} + if padding_mode not in valid_padding_modes: + raise ValueError("padding_mode must be one of {}, but got padding_mode='{}'".format( + valid_padding_modes, padding_mode)) + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = kernel_size + self.stride = stride + self.padding = padding + self.dilation = dilation + self.groups = groups + self.padding_mode = padding_mode + self._padding_repeated_twice = tuple(x for x in self.padding for _ in range(2)) + + #################################### Initailization of D & W ################################### + M = self.kernel_size[0] + N = self.kernel_size[1] + self.D_mul = M * N if D_mul is None or M * N <= 1 else D_mul + self.W = Parameter(torch.Tensor(out_channels, in_channels // groups, self.D_mul)) + init.kaiming_uniform_(self.W, a=math.sqrt(5)) + + if M * N > 1: + self.D = Parameter(torch.Tensor(in_channels, M * N, self.D_mul)) + init_zero = np.zeros([in_channels, M * N, self.D_mul], dtype=np.float32) + self.D.data = torch.from_numpy(init_zero) + + eye = torch.reshape(torch.eye(M * N, dtype=torch.float32), (1, M * N, M * N)) + D_diag = eye.repeat((in_channels, 1, self.D_mul // (M * N))) + if self.D_mul % (M * N) != 0: # the cases when D_mul > M * N + zeros = torch.zeros([in_channels, M * N, self.D_mul % (M * N)]) + self.D_diag = Parameter(torch.cat([D_diag, zeros], dim=2), requires_grad=False) + else: # the case when D_mul = M * N + self.D_diag = Parameter(D_diag, requires_grad=False) + ################################################################################################## + + if bias: + self.bias = Parameter(torch.Tensor(out_channels)) + fan_in, _ = init._calculate_fan_in_and_fan_out(self.W) + bound = 1 / math.sqrt(fan_in) + init.uniform_(self.bias, -bound, bound) + else: + self.register_parameter('bias', None) + + def extra_repr(self): + s = ('{in_channels}, {out_channels}, kernel_size={kernel_size}' + ', stride={stride}') + if self.padding != (0,) * len(self.padding): + s += ', padding={padding}' + if self.dilation != (1,) * len(self.dilation): + s += ', dilation={dilation}' + if self.groups != 1: + s += ', groups={groups}' + if self.bias is None: + s += ', bias=False' + if self.padding_mode != 'zeros': + s += ', padding_mode={padding_mode}' + return s.format(**self.__dict__) + + def __setstate__(self, state): + super(DOConv2d, self).__setstate__(state) + if not hasattr(self, 'padding_mode'): + self.padding_mode = 'zeros' + + def _conv_forward(self, input, weight): + if self.padding_mode != 'zeros': + return F.conv2d(F.pad(input, self._padding_repeated_twice, mode=self.padding_mode), + weight, self.bias, self.stride, + _pair(0), self.dilation, self.groups) + return F.conv2d(input, weight, self.bias, self.stride, + self.padding, self.dilation, self.groups) + + def forward(self, input): + M = self.kernel_size[0] + N = self.kernel_size[1] + DoW_shape = (self.out_channels, self.in_channels // self.groups, M, N) + if M * N > 1: + ######################### Compute DoW ################# + # (input_channels, D_mul, M * N) + D = self.D + self.D_diag + W = torch.reshape(self.W, (self.out_channels // self.groups, self.in_channels, self.D_mul)) + + # einsum outputs (out_channels // groups, in_channels, M * N), + # which is reshaped to + # (out_channels, in_channels // groups, M, N) + DoW = torch.reshape(torch.einsum('ims,ois->oim', D, W), DoW_shape) + ####################################################### + else: + # in this case D_mul == M * N + # reshape from + # (out_channels, in_channels // groups, D_mul) + # to + # (out_channels, in_channels // groups, M, N) + DoW = torch.reshape(self.W, DoW_shape) + return self._conv_forward(input, DoW) + + +def _ntuple(n): + def parse(x): + if isinstance(x, container_abcs.Iterable): + return x + return tuple(repeat(x, n)) + + return parse + + +_pair = _ntuple(2) \ No newline at end of file diff --git a/modelR/plugandplay/DepthWiseConv.py b/modelR/plugandplay/DepthWiseConv.py new file mode 100644 index 0000000..62ac559 --- /dev/null +++ b/modelR/plugandplay/DepthWiseConv.py @@ -0,0 +1,23 @@ +import torch.nn as nn + + +class DWConv(nn.Module): + def __init__(self, in_plane, out_plane): + super(DWConv, self).__init__() + self.depth_conv = nn.Conv2d(in_channels=in_plane, + out_channels=in_plane, + kernel_size=3, + stride=1, + padding=1, + groups=in_plane) + self.point_conv = nn.Conv2d(in_channels=in_plane, + out_channels=out_plane, + kernel_size=1, + stride=1, + padding=0, + groups=1) + + def forward(self, x): + x = self.depth_conv(x) + x = self.point_conv(x) + return x diff --git a/modelR/plugandplay/DynamicConv.py b/modelR/plugandplay/DynamicConv.py new file mode 100644 index 0000000..14493c7 --- /dev/null +++ b/modelR/plugandplay/DynamicConv.py @@ -0,0 +1,94 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +# source: https://github.com/kaijieshi7/Dynamic-convolution-Pytorch/blob/master/dynamic_conv.py +# zhihu: https://zhuanlan.zhihu.com/p/142381725 +# zhihu: https://zhuanlan.zhihu.com/p/208519425 +class attention2d(nn.Module): + def __init__(self, in_planes, ratios, K, temperature, init_weight=True): + super(attention2d, self).__init__() + assert temperature%3==1 + self.avgpool = nn.AdaptiveAvgPool2d(1) + if in_planes!=3: + hidden_planes = int(in_planes*ratios) + else: + hidden_planes = K + self.fc1 = nn.Conv2d(in_planes, hidden_planes, 1, bias=False) + self.fc2 = nn.Conv2d(hidden_planes, K, 1, bias=False) + self.temperature = temperature + if init_weight: + self._initialize_weights() + + + def _initialize_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + if m.bias is not None: + nn.init.constant_(m.bias, 0) + + def updata_temperature(self): + if self.temperature!=1: + self.temperature -=3 + print('Change temperature to:', str(self.temperature)) + + + def forward(self, x): + x = self.avgpool(x) + x = self.fc1(x) + x = F.relu(x) + x = self.fc2(x).view(x.size(0), -1) + return F.softmax(x/self.temperature, 1) + + +class Dynamic_conv2d(nn.Module): + def __init__(self, in_planes, out_planes, kernel_size, ratio=0.25, stride=1, padding=0, dilation=1, groups=1, bias=True, K=4,temperature=34, init_weight=True): + super(Dynamic_conv2d, self).__init__() + assert in_planes%groups==0 + self.in_planes = in_planes + self.out_planes = out_planes + self.kernel_size = kernel_size + self.stride = stride + self.padding = padding + self.dilation = dilation + self.groups = groups + self.bias = bias + self.K = K + self.attention = attention2d(in_planes, ratio, K, temperature) + + self.weight = nn.Parameter(torch.Tensor(K, out_planes, in_planes//groups, kernel_size, kernel_size), requires_grad=True) + if bias: + self.bias = nn.Parameter(torch.Tensor(K, out_planes)) + else: + self.bias = None + if init_weight: + self._initialize_weights() + + #TODO 初始化 + def _initialize_weights(self): + for i in range(self.K): + nn.init.kaiming_uniform_(self.weight[i]) + + + def update_temperature(self): + self.attention.updata_temperature() + + def forward(self, x):#将batch视作维度变量,进行组卷积,因为组卷积的权重是不同的,动态卷积的权重也是不同的 + softmax_attention = self.attention(x) + batch_size, in_planes, height, width = x.size() + x = x.view(1, -1, height, width)# 变化成一个维度进行组卷积 + weight = self.weight.view(self.K, -1) + + # 动态卷积的权重的生成, 生成的是batch_size个卷积参数(每个参数不同) + aggregate_weight = torch.mm(softmax_attention, weight).view(-1, self.in_planes//self.groups, self.kernel_size, self.kernel_size) + if self.bias is not None: + aggregate_bias = torch.mm(softmax_attention, self.bias).view(-1) + output = F.conv2d(x, weight=aggregate_weight, bias=aggregate_bias, stride=self.stride, padding=self.padding, + dilation=self.dilation, groups=self.groups*batch_size) + else: + output = F.conv2d(x, weight=aggregate_weight, bias=None, stride=self.stride, padding=self.padding, + dilation=self.dilation, groups=self.groups * batch_size) + + output = output.view(batch_size, self.out_planes, output.size(-2), output.size(-1)) + return output \ No newline at end of file diff --git a/modelR/plugandplay/EffNetBlock.py b/modelR/plugandplay/EffNetBlock.py new file mode 100644 index 0000000..cefdd25 --- /dev/null +++ b/modelR/plugandplay/EffNetBlock.py @@ -0,0 +1,56 @@ +''' +EffNet: AN EFFICIENT STRUCTURE FOR CONVOLUTIONAL NEURAL NETWORKS +Implementation in Pytorch of Effnet. +https://arxiv.org/abs/1801.06434 +''' +import torch.nn as nn + +class Flatten(nn.Module): + def forward(self, x): + x = x.view(x.size()[0], -1) + return x + +class EffNet(nn.Module): + + def __init__(self, nb_classes=10, include_top=True, weights=None): + super(EffNet, self).__init__() + + self.block1 = self.make_layers(32, 64) + self.block2 = self.make_layers(64, 128) + self.block3 = self.make_layers(128, 256) + self.flatten = Flatten() + self.linear = nn.Linear(4096, nb_classes) + self.include_top = include_top + self.weights = weights + + def make_layers(self, ch_in, ch_out): + layers = [ + nn.Conv2d(3, ch_in, kernel_size=(1,1), stride=(1,1), bias=False, padding=0, dilation=(1,1)) if ch_in ==32 else nn.Conv2d(ch_in, ch_in, kernel_size=(1,1),stride=(1,1), bias=False, padding=0, dilation=(1,1)) , + self.make_post(ch_in), + # DepthWiseConvolution2D + nn.Conv2d(ch_in, 1 * ch_in, groups=ch_in, kernel_size=(1, 3),stride=(1,1), padding=(0,1), bias=False, dilation=(1,1)), + self.make_post(ch_in), + nn.MaxPool2d(kernel_size=(2,1), stride=(2,1)), + # DepthWiseConvolution2D + nn.Conv2d(ch_in, 1 * ch_in, groups=ch_in, kernel_size=(3, 1), stride=(1,1), padding=(1,0), bias=False, dilation=(1,1)), + self.make_post(ch_in), + nn.Conv2d(ch_in, ch_out, kernel_size=(1, 2), stride=(1, 2), bias=False, padding=(0,0), dilation=(1,1)), + self.make_post(ch_out), + ] + return nn.Sequential(*layers) + + def make_post(self, ch_in): + layers = [ + nn.LeakyReLU(0.3), + nn.BatchNorm2d(ch_in, momentum=0.99) + ] + return nn.Sequential(*layers) + + def forward(self, x): + x = self.block1(x) + x = self.block2(x) + x = self.block3(x) + if self.include_top: + x = self.flatten(x) + x = self.linear(x) + return x \ No newline at end of file diff --git a/modelR/plugandplay/FPT/FPT.py b/modelR/plugandplay/FPT/FPT.py new file mode 100644 index 0000000..ee72d61 --- /dev/null +++ b/modelR/plugandplay/FPT/FPT.py @@ -0,0 +1,102 @@ +#https://github.com/ZHANGDONG-NJUST/FPT/blob/ffdbf3de67ba9e811f05c800c64e4ea855cc0dae/lib/modeling/FPT.py +import torch +import math +import torch.nn as nn +import torch.nn.functional as F + +# from torch.nn import DataParallel # or your customized DataParallel module +# from sync_batchnorm import SynchronizedBatchNorm1d, patch_replication_callback + +from modeling.self_trans import SelfTrans +from modeling.rendering_trans import RenderTrans +from modeling.grounding_trans import GroundTrans +import nn as mynn +from dropblock import DropBlock2D + +class FPT(nn.Module): + def __init__(self, feature_dim, with_norm='none', upsample_method='bilinear'): + super(FPT, self).__init__() + self.feature_dim = feature_dim + assert upsample_method in ['nearest', 'bilinear'] + def interpolate(input): + return F.interpolate(input, scale_factor=2, mode=upsample_method, align_corners=False if upsample_method == 'bilinear' else None) + self.fpn_upsample = interpolate + assert with_norm in ['group_norm', 'batch_norm', 'none'] + if with_norm == 'batch_norm': + norm = nn.BatchNorm2d + elif with_norm == 'group_norm': + def group_norm(num_channels): + return nn.GroupNorm(32, num_channels) + norm = group_norm + self.st_p5 = SelfTrans(in_channels=feature_dim, inter_channels=None, mode='dot', dimension=2, bn_layer=True) + self.st_p4 = SelfTrans(in_channels=feature_dim, inter_channels=None, mode='dot', dimension=2, bn_layer=True) + self.st_p3 = SelfTrans(in_channels=feature_dim, inter_channels=None, mode='dot', dimension=2, bn_layer=True) + self.st_p2 = SelfTrans(in_channels=feature_dim, inter_channels=None, mode='dot', dimension=2, bn_layer=True) + self.gt_p4_p5 = GroundTrans(in_channels=feature_dim, inter_channels=None, mode='dot', dimension=2, bn_layer=True) + self.gt_p3_p4 = GroundTrans(in_channels=feature_dim, inter_channels=None, mode='dot', dimension=2, bn_layer=True) + self.gt_p3_p5 = GroundTrans(in_channels=feature_dim, inter_channels=None, mode='dot', dimension=2, bn_layer=True) + self.gt_p2_p3 = GroundTrans(in_channels=feature_dim, inter_channels=None, mode='dot', dimension=2, bn_layer=True) + self.gt_p2_p4 = GroundTrans(in_channels=feature_dim, inter_channels=None, mode='dot', dimension=2, bn_layer=True) + self.gt_p2_p5 = GroundTrans(in_channels=feature_dim, inter_channels=None, mode='dot', dimension=2, bn_layer=True) + self.rt_p5_p4 = RenderTrans(channels_high=feature_dim, channels_low=feature_dim, upsample=False) + self.rt_p5_p3 = RenderTrans(channels_high=feature_dim, channels_low=feature_dim, upsample=False) + self.rt_p5_p2 = RenderTrans(channels_high=feature_dim, channels_low=feature_dim, upsample=False) + self.rt_p4_p3 = RenderTrans(channels_high=feature_dim, channels_low=feature_dim, upsample=False) + self.rt_p4_p2 = RenderTrans(channels_high=feature_dim, channels_low=feature_dim, upsample=False) + self.rt_p3_p2 = RenderTrans(channels_high=feature_dim, channels_low=feature_dim, upsample=False) + drop_block = DropBlock2D(block_size=3, drop_prob=0.2) + + if with_norm != 'none': + self.fpn_p5_1x1 = nn.Sequential(*[nn.Conv2d(2048, feature_dim, 1, bias=False), norm(feature_dim)]) + self.fpn_p4_1x1 = nn.Sequential(*[nn.Conv2d(1024, feature_dim, 1, bias=False), norm(feature_dim)]) + self.fpn_p3_1x1 = nn.Sequential(*[nn.Conv2d(512, feature_dim, 1, bias=False), norm(feature_dim)]) + self.fpn_p2_1x1 = nn.Sequential(*[nn.Conv2d(256, feature_dim, 1, bias=False), norm(feature_dim)]) + + self.fpt_p5 = nn.Sequential(*[nn.Conv2d(feature_dim*5, feature_dim, 3, padding=1, bias=False), norm(feature_dim)]) + self.fpt_p4 = nn.Sequential(*[nn.Conv2d(feature_dim*5, feature_dim, 3, padding=1, bias=False), norm(feature_dim)]) + self.fpt_p3 = nn.Sequential(*[nn.Conv2d(feature_dim*5, feature_dim, 3, padding=1, bias=False), norm(feature_dim)]) + self.fpt_p2 = nn.Sequential(*[nn.Conv2d(feature_dim*5, feature_dim, 3, padding=1, bias=False), norm(feature_dim)]) + else: + self.fpn_p5_1x1 = nn.Conv2d(2048, feature_dim, 1) + self.fpn_p4_1x1 = nn.Conv2d(1024, feature_dim, 1) + self.fpn_p3_1x1 = nn.Conv2d(512, feature_dim, 1) + self.fpn_p2_1x1 = nn.Conv2d(256, feature_dim, 1) + + self.fpt_p5 = nn.Conv2d(feature_dim*5, feature_dim, 3, padding=1) + self.fpt_p4 = nn.Conv2d(feature_dim*5, feature_dim, 3, padding=1) + self.fpt_p3 = nn.Conv2d(feature_dim*5, feature_dim, 3, padding=1) + self.fpt_p2 = nn.Conv2d(feature_dim*5, feature_dim, 3, padding=1) + + self.initialize() + + def initialize(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_uniform_(m.weight.data, a=1) + if m.bias is not None: + m.bias.data.zero_() + + def forward(self, res2, res3, res4, res5): + fpn_p5_1 = self.fpn_p5_1x1(res5) + fpn_p4_1 = self.fpn_p4_1x1(res4) + fpn_p3_1 = self.fpn_p3_1x1(res3) + fpn_p2_1 = self.fpn_p2_1x1(res2) + fpt_p5_out = torch.cat((self.st_p5(fpn_p5_1), self.rt_p5_p4(fpn_p5_1, fpn_p4_1), + self.rt_p5_p3(fpn_p5_1,fpn_p3_1), self.rt_p5_p2(fpn_p5_1,fpn_p2_1), fpn_p5_1), 1) + fpt_p4_out = torch.cat((self.st_p4(fpn_p4_1), self.rt_p4_p3(fpn_p4_1, fpn_p3_1), + self.rt_p4_p2(fpn_p4_1,fpn_p2_1), self.gt_p4_p5(fpn_p4_1,fpn_p5_1), fpn_p4_1), 1) + fpt_p3_out = torch.cat((self.st_p3(fpn_p3_1), self.rt_p3_p2(fpn_p3_1, fpn_p2_1), + self.gt_p3_p4(fpn_p3_1,fpn_p4_1), self.gt_p3_p5(fpn_p3_1,fpn_p5_1), fpn_p3_1), 1) + fpt_p2_out = torch.cat((self.st_p2(fpn_p2_1), self.gt_p2_p3(fpn_p2_1, fpn_p3_1), + self.gt_p2_p4(fpn_p2_1,fpn_p4_1), self.gt_p2_p5(fpn_p2_1,fpn_p5_1), fpn_p2_1), 1) + fpt_p5 = self.fpt_p5(fpt_p5_out) + fpt_p4 = self.fpt_p4(fpt_p4_out) + fpt_p3 = self.fpt_p3(fpt_p3_out) + fpt_p2 = self.fpt_p2(fpt_p2_out) + ''' + fpt_p5 = drop_block(self.fpt_p5(fpt_p5_out)) + fpt_p4 = drop_block(self.fpt_p4(fpt_p4_out)) + fpt_p3 = drop_block(self.fpt_p3(fpt_p3_out)) + fpt_p2 = drop_block(self.fpt_p2(fpt_p2_out)) + ''' + return fpt_p2, fpt_p3, fpt_p4, fpt_p5 \ No newline at end of file diff --git a/modelR/plugandplay/FPT/grounding_trans.py b/modelR/plugandplay/FPT/grounding_trans.py new file mode 100644 index 0000000..8e11bbd --- /dev/null +++ b/modelR/plugandplay/FPT/grounding_trans.py @@ -0,0 +1,105 @@ +import torch +from torch import nn +from torch.nn import functional as F +class GroundTrans(nn.Module): + def __init__(self, in_channels, inter_channels=None, mode='dot', dimension=2, bn_layer=True): + super(GroundTrans, self).__init__() + assert dimension in [1, 2, 3] + if mode not in ['gaussian', 'embedded', 'dot', 'concatenate']: + raise ValueError('`mode` must be one of `gaussian`, `embedded`, `dot` or `concatenate`') + + self.mode = mode + self.dimension = dimension + + self.in_channels = in_channels + self.inter_channels = inter_channels + + if self.inter_channels is None: + self.inter_channels = in_channels // 2 + + if dimension == 3: + conv_nd = nn.Conv3d + max_pool_layer = nn.MaxPool3d(kernel_size=(1, 2, 2)) + bn = nn.BatchNorm3d + + elif dimension == 2: + conv_nd = nn.Conv2d + max_pool_layer = nn.MaxPool2d(kernel_size=(2, 2)) + bn = nn.BatchNorm2d + + else: + conv_nd = nn.Conv1d + max_pool_layer = nn.MaxPool1d(kernel_size=(2)) + bn = nn.BatchNorm1d + + self.g = conv_nd(in_channels=self.in_channels, out_channels=self.inter_channels, kernel_size=1) + + if bn_layer: + self.W_z = nn.Sequential( + conv_nd(in_channels=self.inter_channels, out_channels=self.in_channels, kernel_size=1), + bn(self.in_channels) + ) + nn.init.constant_(self.W_z[1].weight, 0) + nn.init.constant_(self.W_z[1].bias, 0) + else: + self.W_z = conv_nd(in_channels=self.inter_channels, out_channels=self.in_channels, kernel_size=1) + + nn.init.constant_(self.W_z.weight, 0) + nn.init.constant_(self.W_z.bias, 0) + + if self.mode == "embedded" or self.mode == "dot" or self.mode == "concatenate": + self.theta = conv_nd(in_channels=self.in_channels, out_channels=self.inter_channels, kernel_size=1) + self.phi = conv_nd(in_channels=self.in_channels, out_channels=self.inter_channels, kernel_size=1) + + if self.mode == "concatenate": + self.W_f = nn.Sequential( + nn.Conv2d(in_channels=self.inter_channels * 2, out_channels=1, kernel_size=1), + nn.ReLU() + ) + + def forward(self, x_low, x_high): + """ + args + x: (N, C, T, H, W) for dimension=3; (N, C, H, W) for dimension 2; (N, C, T) for dimension 1 + """ + batch_size = x_low.size(0) + g_x = self.g(x_high).view(batch_size, self.inter_channels, -1) + g_x = g_x.permute(0, 2, 1) + + if self.mode == "gaussian": + theta_x = x_low.view(batch_size, self.in_channels, -1) + phi_x = x_high.view(batch_size, self.in_channels, -1) + theta_x = theta_x.permute(0, 2, 1) + f = torch.matmul(theta_x, phi_x) + + elif self.mode == "embedded" or self.mode == "dot": + theta_x = self.theta(x_low).view(batch_size, self.inter_channels, -1) + phi_x = self.phi(x_high).view(batch_size, self.inter_channels, -1) + theta_x = theta_x.permute(0, 2, 1) + f = torch.matmul(theta_x, phi_x) + + elif self.mode == "concatenate": + theta_x = self.theta(x_low).view(batch_size, self.inter_channels, -1, 1) + phi_x = self.phi(x_high).view(batch_size, self.inter_channels, 1, -1) + + h = theta_x.size(2) + w = phi_x.size(3) + theta_x = theta_x.repeat(1, 1, 1, w) + phi_x = phi_x.repeat(1, 1, h, 1) + concat = torch.cat([theta_x, phi_x], dim=1) + f = self.W_f(concat) + f = f.view(f.size(0), f.size(2), f.size(3)) + + + if self.mode == "gaussian" or self.mode == "embedded": + f_div_C = F.softmax(f, dim=-1) + elif self.mode == "dot" or self.mode == "concatenate": + N = f.size(-1) # number of position in x + f_div_C = f / N + y = torch.matmul(f_div_C, g_x) + + y = y.permute(0, 2, 1).contiguous() + y = y.view(batch_size, self.inter_channels, *x_low.size()[2:]) + + z = self.W_z(y) + return z \ No newline at end of file diff --git a/modelR/plugandplay/FPT/rendering_trans.py b/modelR/plugandplay/FPT/rendering_trans.py new file mode 100644 index 0000000..cebe12e --- /dev/null +++ b/modelR/plugandplay/FPT/rendering_trans.py @@ -0,0 +1,44 @@ +import torch +from torch import nn +from torch.nn import functional as F + +class RenderTrans(nn.Module): + def __init__(self, channels_high, channels_low, upsample=True): + super(RenderTrans, self).__init__() + self.upsample = upsample + + self.conv3x3 = nn.Conv2d(channels_high, channels_high, kernel_size=3, padding=1, bias=False) + self.bn_low = nn.BatchNorm2d(channels_high) + + self.conv1x1 = nn.Conv2d(channels_low, channels_high, kernel_size=1, padding=0, bias=False) + self.bn_high = nn.BatchNorm2d(channels_high) + + if upsample: + self.conv_upsample = nn.ConvTranspose2d(channels_low, channels_high, kernel_size=4, stride=2, padding=1, bias=False) + self.bn_upsample = nn.BatchNorm2d(channels_high) + else: + self.conv_reduction = nn.Conv2d(channels_low, channels_high, kernel_size=1, padding=0, bias=False) + self.bn_reduction = nn.BatchNorm2d(channels_high) + self.relu = nn.ReLU(inplace=True) + self.conv_cat = nn.Conv2d(channels_high*2, channels_high, kernel_size=1, padding=0, bias=False) + + def forward(self, x_high, x_low): + b, c, h, w = x_low.shape + x_low_gp = nn.AvgPool2d(x_low.shape[2:])(x_low).view(len(x_low), c, 1, 1) + x_low_gp = self.conv1x1(x_low_gp) + x_low_gp = self.bn_low(x_low_gp) + x_low_gp = self.relu(x_low_gp) + + x_high_mask = self.conv3x3(x_high) + x_high_mask = self.bn_high(x_high_mask) + + x_att = x_high_mask * x_low_gp + if self.upsample: + out = self.relu( + self.bn_upsample(self.conv_upsample(x_high)) + x_att) + # self.conv_cat(torch.cat([self.bn_upsample(self.conv_upsample(x_high)), x_att], dim=1)) + else: + out = self.relu( + self.bn_reduction(self.conv_reduction(x_high)) + x_att) + # # self.conv_cat(torch.cat([self.bn_reduction(self.conv_reduction(x_high)), x_att], dim=1)) + return out \ No newline at end of file diff --git a/modelR/plugandplay/FPT/self_trans.py b/modelR/plugandplay/FPT/self_trans.py new file mode 100644 index 0000000..92b99aa --- /dev/null +++ b/modelR/plugandplay/FPT/self_trans.py @@ -0,0 +1,138 @@ +import torch +from torch import nn +from torch.nn import functional as F + +class SelfTrans(nn.Module): + def __init__(self, in_channels, inter_channels=None, mode='dot', dimension=2, bn_layer=True): + # def __init__(self, in_channels, inter_channels=None, mode='dot', dimension=2, bn_layer=True, n_mix, d_k): + super(SelfTrans, self).__init__() + assert dimension in [1, 2, 3] + if mode not in ['gaussian', 'embedded', 'dot', 'concatenate']: + raise ValueError('`mode` must be one of `gaussian`, `embedded`, `dot` or `concatenate`') + self.mode = mode + self.dimension = dimension + self.in_channels = in_channels + self.inter_channels = inter_channels + # the channel size is reduced to half inside the block + if self.inter_channels is None: + self.inter_channels = in_channels // 2 + if dimension == 3: + conv_nd = nn.Conv3d + max_pool_layer = nn.MaxPool3d(kernel_size=(1, 2, 2)) + bn = nn.BatchNorm3d + elif dimension == 2: + conv_nd = nn.Conv2d + max_pool_layer = nn.MaxPool2d(kernel_size=(2, 2)) + bn = nn.BatchNorm2d + else: + conv_nd = nn.Conv1d + max_pool_layer = nn.MaxPool1d(kernel_size=(2)) + bn = nn.BatchNorm1d + self.g = conv_nd(in_channels=self.in_channels, out_channels=self.inter_channels, kernel_size=1) + + if bn_layer: + self.W_z = nn.Sequential( + conv_nd(in_channels=self.inter_channels, out_channels=self.in_channels, kernel_size=1), + bn(self.in_channels) + ) + nn.init.constant_(self.W_z[1].weight, 0) + nn.init.constant_(self.W_z[1].bias, 0) + else: + self.W_z = conv_nd(in_channels=self.inter_channels, out_channels=self.in_channels, kernel_size=1) + + nn.init.constant_(self.W_z.weight, 0) + nn.init.constant_(self.W_z.bias, 0) + + if self.mode == "embedded" or self.mode == "dot" or self.mode == "concatenate": + self.theta = conv_nd(in_channels=self.in_channels, out_channels=self.inter_channels, kernel_size=1) + self.phi = conv_nd(in_channels=self.in_channels, out_channels=self.inter_channels, kernel_size=1) + + if self.mode == "concatenate": + self.W_f = nn.Sequential( + nn.Conv2d(in_channels=self.inter_channels * 2, out_channels=1, kernel_size=1), + nn.ReLU() + ) + def forward(self, x): + """ + args + x: (N, C, T, H, W) for dimension=3; (N, C, H, W) for dimension 2; (N, C, T) for dimension 1 + """ + batch_size = x.size(0) + g_x = self.g(x).view(batch_size, self.inter_channels, -1) + g_x = g_x.permute(0, 2, 1) + + if self.mode == "gaussian": + theta_x = x.view(batch_size, self.in_channels, -1) + phi_x = x.view(batch_size, self.in_channels, -1) + theta_x = theta_x.permute(0, 2, 1) + f = torch.matmul(theta_x, phi_x) + + elif self.mode == "embedded" or self.mode == "dot": + theta_x = self.theta(x).view(batch_size, self.inter_channels, -1) + phi_x = self.phi(x).view(batch_size, self.inter_channels, -1) + theta_x = theta_x.permute(0, 2, 1) + f = torch.matmul(theta_x, phi_x) + + elif self.mode == "concatenate": + theta_x = self.theta(x).view(batch_size, self.inter_channels, -1, 1) + phi_x = self.phi(x).view(batch_size, self.inter_channels, 1, -1) + + h = theta_x.size(2) + w = phi_x.size(3) + theta_x = theta_x.repeat(1, 1, 1, w) + phi_x = phi_x.repeat(1, 1, h, 1) + concat = torch.cat([theta_x, phi_x], dim=1) + f = self.W_f(concat) + f = f.view(f.size(0), f.size(2), f.size(3)) + + if self.mode == "gaussian" or self.mode == "embedded": + f_div_C = F.softmax(f, dim=-1) + # f_div_C = MixtureOfSoftMax(n_mix=n_mix, d_k=d_k) + + elif self.mode == "dot" or self.mode == "concatenate": + N = f.size(-1) + f_div_C = f / N + y = torch.matmul(f_div_C, g_x) + + y = y.permute(0, 2, 1).contiguous() + y = y.view(batch_size, self.inter_channels, *x.size()[2:]) + + W_y = self.W_z(y) + z = W_y + x + return z + +class MixtureOfSoftMax(nn.Module): + def __init__(self, n_mix, d_k, attn_dropout=0.1): + super(MixtureOfSoftMax, self).__init__() + self.temperature = np.power(d_k, 0.5) + self.n_mix = n_mix + self.att_drop = attn_dropout + self.dropout = nn.Dropout(attn_dropout) + self.softmax1 = nn.Softmax(dim=1) + self.softmax2 = nn.Softmax(dim=2) + self.d_k = d_k + if n_mix > 1: + self.weight = nn.Parameter(torch.Tensor(n_mix, d_k)) + std = np.power(n_mix, -0.5) + self.weight.data.uniform_(-std, std) + + def forward(self, qt, kt, vt): + B, d_k, N = qt.size() + m = self.n_mix + assert d_k == self.d_k + d = d_k // m + if m > 1: + bar_qt = torch.mean(qt, 2, True) + pi = self.softmax1(torch.matmul(self.weight, bar_qt)).view(B*m, 1, 1) + q = qt.view(B*m, d, N).transpose(1, 2) + N2 = kt.size(2) + kt = kt.view(B*m, d, N2) + v = vt.transpose(1, 2) + attn = torch.bmm(q, kt) + attn = attn / self.temperature + attn = self.softmax2(attn) + attn = self.dropout(attn) + if m > 1: + attn = (attn * pi).view(B, m, N, N2).sum(1) + output = torch.bmm(attn, v) + return output, attn \ No newline at end of file diff --git a/modelR/plugandplay/FuseBNConv.py b/modelR/plugandplay/FuseBNConv.py new file mode 100644 index 0000000..ccc050d --- /dev/null +++ b/modelR/plugandplay/FuseBNConv.py @@ -0,0 +1,27 @@ +import torch + +def fuse_conv_and_bn(conv, bn): + # https://tehnokv.com/posts/fusing-batchnorm-and-conv/ + with torch.no_grad(): + # init + fusedconv = torch.nn.Conv2d(conv.in_channels, + conv.out_channels, + kernel_size=conv.kernel_size, + stride=conv.stride, + padding=conv.padding, + bias=True) + + # prepare filters + w_conv = conv.weight.clone().view(conv.out_channels, -1) + w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) + fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size())) + + # prepare spatial bias + if conv.bias is not None: + b_conv = conv.bias + else: + b_conv = torch.zeros(conv.weight.size(0)) + b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps)) + fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn) + + return fusedconv \ No newline at end of file diff --git a/modelR/plugandplay/GhostModule.py b/modelR/plugandplay/GhostModule.py new file mode 100644 index 0000000..98172d4 --- /dev/null +++ b/modelR/plugandplay/GhostModule.py @@ -0,0 +1,40 @@ +import torch.nn as nn +import math +import torch + +class GhostModule(nn.Module): + def __init__(self, inp, oup, kernel_size=1, ratio=2, dw_size=3, stride=1, relu=True): + super(GhostModule, self).__init__() + self.oup = oup + init_channels = math.ceil(oup / ratio) + new_channels = init_channels*(ratio-1) + + self.primary_conv = nn.Sequential( + nn.Conv2d(inp, init_channels, kernel_size, + stride, kernel_size//2, bias=False), + nn.BatchNorm2d(init_channels), + nn.ReLU(inplace=True) if relu else nn.Sequential(), + ) + + self.cheap_operation = nn.Sequential( + nn.Conv2d(init_channels, new_channels, dw_size, 1, + dw_size//2, groups=init_channels, bias=False), + nn.BatchNorm2d(new_channels), + nn.ReLU(inplace=True) if relu else nn.Sequential(), + ) + + def forward(self, x): + x1 = self.primary_conv(x) + x2 = self.cheap_operation(x1) + out = torch.cat([x1, x2], dim=1) + return out[:, :self.oup, :, :] + + +if __name__ == "__main__": + model = GhostModule(128, 256, 3) + + in_tensor = torch.zeros((2, 128, 64, 64)) + + out_tensor = model(in_tensor) + + print(out_tensor.shape) \ No newline at end of file diff --git a/modelR/plugandplay/MixedDepthwiseConv.py b/modelR/plugandplay/MixedDepthwiseConv.py new file mode 100644 index 0000000..1b368ee --- /dev/null +++ b/modelR/plugandplay/MixedDepthwiseConv.py @@ -0,0 +1,31 @@ +import numpy as np +import torch +import torch.nn as nn + + +class MixConv2d(nn.Module): # MixConv: Mixed Depthwise Convolutional Kernels https://arxiv.org/abs/1907.09595 + def __init__(self, in_ch, out_ch, k=(3, 5, 7), stride=1, dilation=1, bias=True, method='equal_params'): + super(MixConv2d, self).__init__() + + groups = len(k) + if method == 'equal_ch': # equal channels per group + i = torch.linspace(0, groups - 1E-6, out_ch).floor() # out_ch indices + ch = [(i == g).sum() for g in range(groups)] + else: # 'equal_params': equal parameter count per group + b = [out_ch] + [0] * groups + a = np.eye(groups + 1, groups, k=-1) + a -= np.roll(a, 1, axis=1) + a *= np.array(k) ** 2 + a[0] = 1 + ch = np.linalg.lstsq(a, b, rcond=None)[0].round().astype(int) # solve for equal weight indices, ax = b + + self.m = nn.ModuleList([nn.Conv2d(in_channels=in_ch, + out_channels=ch[g], + kernel_size=k[g], + stride=stride, + padding=k[g] // 2, # 'same' pad + dilation=dilation, + bias=bias) for g in range(groups)]) + + def forward(self, x): + return torch.cat([m(x) for m in self.m], 1) diff --git a/modelR/plugandplay/PSConv.py b/modelR/plugandplay/PSConv.py new file mode 100644 index 0000000..b6b112b --- /dev/null +++ b/modelR/plugandplay/PSConv.py @@ -0,0 +1,63 @@ +# arxiv: https://arxiv.org/abs/2007.06191 +# source: https://github.com/d-li14/PSConv/blob/fefe40d998/mmdet/models/utils/psconv.py + +import torch +import torch.nn as nn + + +class PSConv2d(nn.Module): + def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1, dilation=1, parts=4, bias=False): + super(PSConv2d, self).__init__() + self.gwconv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, dilation, dilation, groups=parts, bias=bias) + self.gwconv_shift = nn.Conv2d(in_channels, out_channels, kernel_size, stride, 2 * dilation, 2 * dilation, groups=parts, bias=bias) + self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=bias) + + def backward_hook(grad): + out = grad.clone() + out[self.mask] = 0 + return out + + self.mask = torch.zeros(self.conv.weight.shape).byte().cuda() + _in_channels = in_channels // parts + _out_channels = out_channels // parts + for i in range(parts): + self.mask[i * _out_channels: (i + 1) * _out_channels, i * _in_channels: (i + 1) * _in_channels, : , :] = 1 + self.mask[(i + parts//2)%parts * _out_channels: ((i + parts//2)%parts + 1) * _out_channels, i * _in_channels: (i + 1) * _in_channels, :, :] = 1 + self.conv.weight.data[self.mask] = 0 + self.conv.weight.register_hook(backward_hook) + + def forward(self, x): + x1, x2 = x.chunk(2, dim=1) + x_shift = self.gwconv_shift(torch.cat((x2, x1), dim=1)) + return self.gwconv(x) + self.conv(x) + x_shift + + +# PSConv-based Group Convolution +class PSGConv2d(nn.Module): + def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1, dilation=1, groups=1, parts=4, bias=False): + super(PSGConv2d, self).__init__() + self.gwconv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, dilation, dilation, groups=groups * parts, bias=bias) + self.gwconv_shift = nn.Conv2d(in_channels, out_channels, kernel_size, stride, 2 * dilation, 2 * dilation, groups=groups * parts, bias=bias) + self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, groups=groups, bias=bias) + + def backward_hook(grad): + out = grad.clone() + out[self.mask] = 0 + return out + + self.mask = torch.zeros(self.conv.weight.shape).byte().cuda() + _in_channels = in_channels // (groups * parts) + _out_channels = out_channels // (groups * parts) + for i in range(parts): + for j in range(groups): + self.mask[(i + j * groups) * _out_channels: (i + j * groups + 1) * _out_channels, i * _in_channels: (i + 1) * _in_channels, : , :] = 1 + self.mask[((i + parts // 2) % parts + j * groups) * _out_channels: ((i + parts // 2) % parts + j * groups + 1) * _out_channels, i * _in_channels: (i + 1) * _in_channels, :, :] = 1 + self.conv.weight.data[self.mask] = 0 + self.conv.weight.register_hook(backward_hook) + self.groups = groups + + def forward(self, x): + x_split = (z.chunk(2, dim=1) for z in x.chunk(self.groups, dim=1)) + x_merge = torch.cat(tuple(torch.cat((x2, x1), dim=1) for (x1, x2) in x_split), dim=1) + x_shift = self.gwconv_shift(x_merge) + return self.gwconv(x) + self.conv(x) + x_shift \ No newline at end of file diff --git a/modelR/plugandplay/PSPModule.py b/modelR/plugandplay/PSPModule.py new file mode 100644 index 0000000..e9a0f8c --- /dev/null +++ b/modelR/plugandplay/PSPModule.py @@ -0,0 +1,28 @@ +import torch.nn as nn +import torch +import torch.nn.functional as F + +class PSPModule(nn.Module): + def __init__(self, features, out_features=1024, sizes=(1, 2, 3, 6)): + super().__init__() + self.stages = [] + self.stages = nn.ModuleList( + [self._make_stage(features, size) for size in sizes]) + self.bottleneck = nn.Conv2d(features * (len(sizes) + 1), + out_features, + kernel_size=1) + self.relu = nn.ReLU() + + def _make_stage(self, features, size): + prior = nn.AdaptiveAvgPool2d(output_size=(size, size)) + conv = nn.Conv2d(features, features, kernel_size=1, bias=False) + return nn.Sequential(prior, conv) + + def forward(self, feats): + h, w = feats.size(2), feats.size(3) + priors = [ + F.upsample(input=stage(feats), size=(h, w), mode='bilinear') + for stage in self.stages + ] + [feats] + bottle = self.bottleneck(torch.cat(priors, 1)) + return self.relu(bottle) \ No newline at end of file diff --git a/modelR/plugandplay/PyConv.py b/modelR/plugandplay/PyConv.py new file mode 100644 index 0000000..a71250f --- /dev/null +++ b/modelR/plugandplay/PyConv.py @@ -0,0 +1,45 @@ +# arxiv: https://arxiv.org/abs/2006.11538 +# source: https://github.com/iduta/pyconv/blob/master/models/pyconvresnet.py +import torch +import torch.nn as nn +import os + + +class PyConv2d(nn.Module): + """PyConv2d with padding (general case). Applies a 2D PyConv over an input signal composed of several input planes. + Args: + in_channels (int): Number of channels in the input image + out_channels (list): Number of channels for each pyramid level produced by the convolution + pyconv_kernels (list): Spatial size of the kernel for each pyramid level + pyconv_groups (list): Number of blocked connections from input channels to output channels for each pyramid level + stride (int or tuple, optional): Stride of the convolution. Default: 1 + dilation (int or tuple, optional): Spacing between kernel elements. Default: 1 + bias (bool, optional): If ``True``, adds a learnable bias to the output. Default: ``False`` + Example:: + >>> # PyConv with two pyramid levels, kernels: 3x3, 5x5 + >>> m = PyConv2d(in_channels=64, out_channels=[32, 32], pyconv_kernels=[3, 5], pyconv_groups=[1, 4]) + >>> input = torch.randn(4, 64, 56, 56) + >>> output = m(input) + >>> # PyConv with three pyramid levels, kernels: 3x3, 5x5, 7x7 + >>> m = PyConv2d(in_channels=64, out_channels=[16, 16, 32], pyconv_kernels=[3, 5, 7], pyconv_groups=[1, 4, 8]) + >>> input = torch.randn(4, 64, 56, 56) + >>> output = m(input) + """ + def __init__(self, in_channels, out_channels, pyconv_kernels, pyconv_groups, stride=1, dilation=1, bias=False): + super(PyConv2d, self).__init__() + + assert len(out_channels) == len(pyconv_kernels) == len(pyconv_groups) + + self.pyconv_levels = [None] * len(pyconv_kernels) + for i in range(len(pyconv_kernels)): + self.pyconv_levels[i] = nn.Conv2d(in_channels, out_channels[i], kernel_size=pyconv_kernels[i], + stride=stride, padding=pyconv_kernels[i] // 2, groups=pyconv_groups[i], + dilation=dilation, bias=bias) + self.pyconv_levels = nn.ModuleList(self.pyconv_levels) + + def forward(self, x): + out = [] + for level in self.pyconv_levels: + out.append(level(x)) + + return torch.cat(out, 1) \ No newline at end of file diff --git a/modelR/plugandplay/ReceptiveFieldModule.py b/modelR/plugandplay/ReceptiveFieldModule.py new file mode 100644 index 0000000..cdfa6d2 --- /dev/null +++ b/modelR/plugandplay/ReceptiveFieldModule.py @@ -0,0 +1,214 @@ +import torch.nn as nn +import torch + + +class BasicConv(nn.Module): + def __init__(self, + in_planes, + out_planes, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + relu=True, + bn=True, + bias=False): + super(BasicConv, self).__init__() + self.out_channels = out_planes + self.conv = nn.Conv2d(in_planes, + out_planes, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + bias=bias) + self.bn = nn.BatchNorm2d( + out_planes, eps=1e-5, momentum=0.01, affine=True) if bn else None + self.relu = nn.ReLU(inplace=True) if relu else None + + def forward(self, x): + x = self.conv(x) + if self.bn is not None: + x = self.bn(x) + if self.relu is not None: + x = self.relu(x) + return x + +class BasicRFB(nn.Module): + ''' + [rfb] + filters = 128 + stride = 1 or 2 + scale = 1.0 + ''' + def __init__(self, in_planes, out_planes, stride=1, scale=0.1, visual=1): + super(BasicRFB, self).__init__() + self.scale = scale + self.out_channels = out_planes + inter_planes = in_planes // 8 + self.branch0 = nn.Sequential( + BasicConv(in_planes, + 2 * inter_planes, + kernel_size=1, + stride=stride), + BasicConv(2 * inter_planes, + 2 * inter_planes, + kernel_size=3, + stride=1, + padding=visual, + dilation=visual, + relu=False)) + self.branch1 = nn.Sequential( + BasicConv(in_planes, inter_planes, kernel_size=1, stride=1), + BasicConv(inter_planes, + 2 * inter_planes, + kernel_size=(3, 3), + stride=stride, + padding=(1, 1)), + BasicConv(2 * inter_planes, + 2 * inter_planes, + kernel_size=3, + stride=1, + padding=visual + 1, + dilation=visual + 1, + relu=False)) + self.branch2 = nn.Sequential( + BasicConv(in_planes, inter_planes, kernel_size=1, stride=1), + BasicConv(inter_planes, (inter_planes // 2) * 3, + kernel_size=3, + stride=1, + padding=1), + BasicConv((inter_planes // 2) * 3, + 2 * inter_planes, + kernel_size=3, + stride=stride, + padding=1), + BasicConv(2 * inter_planes, + 2 * inter_planes, + kernel_size=3, + stride=1, + padding=2 * visual + 1, + dilation=2 * visual + 1, + relu=False)) + + self.ConvLinear = BasicConv(6 * inter_planes, + out_planes, + kernel_size=1, + stride=1, + relu=False) + self.shortcut = BasicConv(in_planes, + out_planes, + kernel_size=1, + stride=stride, + relu=False) + self.relu = nn.ReLU(inplace=False) + + def forward(self, x): + x0 = self.branch0(x) + x1 = self.branch1(x) + x2 = self.branch2(x) + + out = torch.cat((x0, x1, x2), 1) + out = self.ConvLinear(out) + short = self.shortcut(x) + out = out * self.scale + short + out = self.relu(out) + + return out + + +class BasicRFB_small(nn.Module): + ''' + [rfbs] + filters = 128 + stride=1 or 2 + scale = 1.0 + ''' + def __init__(self, in_planes, out_planes, stride=1, scale=0.1): + super(BasicRFB_small, self).__init__() + self.scale = scale + self.out_channels = out_planes + inter_planes = in_planes // 4 + + self.branch0 = nn.Sequential( + BasicConv(in_planes, inter_planes, kernel_size=1, stride=1), + BasicConv(inter_planes, + inter_planes, + kernel_size=3, + stride=1, + padding=1, + relu=False)) + self.branch1 = nn.Sequential( + BasicConv(in_planes, inter_planes, kernel_size=1, stride=1), + BasicConv(inter_planes, + inter_planes, + kernel_size=(3, 1), + stride=1, + padding=(1, 0)), + BasicConv(inter_planes, + inter_planes, + kernel_size=3, + stride=1, + padding=3, + dilation=3, + relu=False)) + self.branch2 = nn.Sequential( + BasicConv(in_planes, inter_planes, kernel_size=1, stride=1), + BasicConv(inter_planes, + inter_planes, + kernel_size=(1, 3), + stride=stride, + padding=(0, 1)), + BasicConv(inter_planes, + inter_planes, + kernel_size=3, + stride=1, + padding=3, + dilation=3, + relu=False)) + self.branch3 = nn.Sequential( + BasicConv(in_planes, inter_planes // 2, kernel_size=1, stride=1), + BasicConv(inter_planes // 2, (inter_planes // 4) * 3, + kernel_size=(1, 3), + stride=1, + padding=(0, 1)), + BasicConv((inter_planes // 4) * 3, + inter_planes, + kernel_size=(3, 1), + stride=stride, + padding=(1, 0)), + BasicConv(inter_planes, + inter_planes, + kernel_size=3, + stride=1, + padding=5, + dilation=5, + relu=False)) + + self.ConvLinear = BasicConv(4 * inter_planes, + out_planes, + kernel_size=1, + stride=1, + relu=False) + self.shortcut = BasicConv(in_planes, + out_planes, + kernel_size=1, + stride=stride, + relu=False) + self.relu = nn.ReLU(inplace=False) + + def forward(self, x): + x0 = self.branch0(x) + x1 = self.branch1(x) + x2 = self.branch2(x) + x3 = self.branch3(x) + + out = torch.cat((x0, x1, x2, x3), 1) + out = self.ConvLinear(out) + short = self.shortcut(x) + out = out * self.scale + short + out = self.relu(out) + + return out diff --git a/modelR/plugandplay/SPConv.py b/modelR/plugandplay/SPConv.py new file mode 100644 index 0000000..a805924 --- /dev/null +++ b/modelR/plugandplay/SPConv.py @@ -0,0 +1,57 @@ +import torch.nn as nn +import torch +''' +https://github.com/qiulinzhang/SPConv.pytorch +''' + +class SPConv_3x3(nn.Module): + def __init__(self, inplanes, outplanes, stride=1, ratio=0.5, reduction=16): + super(SPConv_3x3, self).__init__() + self.inplanes_3x3 = int(inplanes*ratio) + self.inplanes_1x1 = inplanes - self.inplanes_3x3 + self.outplanes_3x3 = int(outplanes*ratio) + self.outplanes_1x1 = outplanes - self.outplanes_3x3 + self.outplanes = outplanes + self.stride = stride + + self.gwc = nn.Conv2d(self.inplanes_3x3, self.outplanes, kernel_size=3, stride=self.stride, + padding=1, groups=2, bias=False) + self.pwc = nn.Conv2d(self.inplanes_3x3, self.outplanes, kernel_size=1, bias=False) + + self.conv1x1 = nn.Conv2d(self.inplanes_1x1, self.outplanes,kernel_size=1) + self.avgpool_s2_1 = nn.AvgPool2d(kernel_size=2,stride=2) + self.avgpool_s2_3 = nn.AvgPool2d(kernel_size=2, stride=2) + self.avgpool_add_1 = nn.AdaptiveAvgPool2d(1) + self.avgpool_add_3 = nn.AdaptiveAvgPool2d(1) + self.bn1 = nn.BatchNorm2d(self.outplanes) + self.bn2 = nn.BatchNorm2d(self.outplanes) + self.ratio = ratio + self.groups = int(1/self.ratio) + def forward(self, x): + b, c, _, _ = x.size() + + + x_3x3 = x[:,:int(c*self.ratio),:,:] + x_1x1 = x[:,int(c*self.ratio):,:,:] + out_3x3_gwc = self.gwc(x_3x3) + if self.stride ==2: + x_3x3 = self.avgpool_s2_3(x_3x3) + out_3x3_pwc = self.pwc(x_3x3) + out_3x3 = out_3x3_gwc + out_3x3_pwc + out_3x3 = self.bn1(out_3x3) + out_3x3_ratio = self.avgpool_add_3(out_3x3).squeeze() + + # use avgpool first to reduce information lost + if self.stride == 2: + x_1x1_se = self.avgpool_s2_1(x_1x1_se) + + out_1x1 = self.conv1x1(x_1x1) + out_1x1 = self.bn2(out_1x1) + out_1x1_ratio = self.avgpool_add_1(out_1x1).squeeze() + + out_31_ratio = torch.stack((out_3x3_ratio, out_1x1_ratio), 2) + out_31_ratio = nn.Softmax(dim=2)(out_31_ratio) + out = out_1x1 * (out_31_ratio[:,:,1].view(b, self.outplanes, 1, 1).expand_as(out_1x1))\ + + out_3x3 * (out_31_ratio[:,:,0].view(b, self.outplanes, 1, 1).expand_as(out_3x3)) + + return out \ No newline at end of file diff --git a/modelR/plugandplay/SSHContextModule.py b/modelR/plugandplay/SSHContextModule.py new file mode 100644 index 0000000..42aaf1d --- /dev/null +++ b/modelR/plugandplay/SSHContextModule.py @@ -0,0 +1,44 @@ +import torch +import torch.nn as nn + +''' +arxiv: 1708.03979 +SSH: Single Stage Headless Face Detector +''' + +class Conv3x3BNReLU(nn.Module): + def __init__(self, in_channel, out_channel): + super(Conv3x3BNReLU,self).__init__() + self.conv3x3 = nn.Conv2d(in_channel, out_channel, 3, 1, 1) + self.bn = nn.BatchNorm2d(out_channel) + self.relu = nn.ReLU(inplace=True) + + def forward(self, x): + return self.relu(self.bn(self.conv3x3(x))) + + +class SSHContextModule(nn.Module): + def __init__(self, in_channel): + super(SSHContextModule, self).__init__() + self.stem = Conv3x3BNReLU(in_channel, in_channel//2) + self.branch1_conv3x3 = Conv3x3BNReLU(in_channel//2, in_channel//2) + self.branch2_conv3x3_1 = Conv3x3BNReLU(in_channel//2, in_channel//2) + self.branch2_conv3x3_2 = Conv3x3BNReLU(in_channel//2, in_channel//2) + + def forward(self, x): + x = self.stem(x) + # branch1 + x1 = self.branch1_conv3x3(x) + # branch2 + x2 = self.branch2_conv3x3_1(x) + x2 = self.branch2_conv3x3_2(x2) + # concat + # print(x1.shape, x2.shape) + return torch.cat([x1, x2], dim=1) + + +if __name__ == "__main__": + in_tensor = torch.zeros((6, 64, 128, 128)) + module = SSHContextModule(64) + out_tensor = module(in_tensor) + print(out_tensor.shape) diff --git a/modelR/plugandplay/SematicEmbbedBlock.py b/modelR/plugandplay/SematicEmbbedBlock.py new file mode 100644 index 0000000..b5d4e05 --- /dev/null +++ b/modelR/plugandplay/SematicEmbbedBlock.py @@ -0,0 +1,20 @@ +import torch.nn as nn + +""" +https://zhuanlan.zhihu.com/p/76378871 +arxiv: 1804.03821 +ExFuse +""" + +class SematicEmbbedBlock(nn.Module): + def __init__(self, high_in_plane, low_in_plane, out_plane): + super(SematicEmbbedBlock, self).__init__() + self.conv3x3 = nn.Conv2d(high_in_plane, out_plane, 3, 1, 1) + self.upsample = nn.UpsamplingBilinear2d(scale_factor=2) + + self.conv1x1 = nn.Conv2d(low_in_plane, out_plane, 1) + + def forward(self, high_x, low_x): + high_x = self.upsample(self.conv3x3(high_x)) + low_x = self.conv1x1(low_x) + return high_x * low_x \ No newline at end of file diff --git a/modelR/plugandplay/SlimConv.py b/modelR/plugandplay/SlimConv.py new file mode 100644 index 0000000..2131afb --- /dev/null +++ b/modelR/plugandplay/SlimConv.py @@ -0,0 +1,54 @@ +import torch.nn as nn +import torch +''' +https://arxiv.org/pdf/2003.07469.pdf +''' + +class slim_conv_3x3(nn.Module): + + def __init__(self, in_planes, stride, groups, dilation): + super(slim_conv_3x3, self).__init__() + self.stride = stride + + reduce_1 = 2 + reduce_2 = 4 + + self.conv2_2 = nn.Sequential(nn.Conv2d(in_planes//reduce_1, in_planes//reduce_2, kernel_size=1, bias=False), + nn.BatchNorm2d(in_planes//reduce_2), + nn.ReLU(inplace=True), + nn.Conv2d(in_planes // reduce_2, in_planes // reduce_2, kernel_size=3, + stride=stride, groups=groups, padding=dilation, bias=False, dilation=dilation), + nn.BatchNorm2d(in_planes // reduce_2)) + + self.conv2_1 = nn.Sequential(nn.Conv2d(in_planes//reduce_1, in_planes//reduce_1, kernel_size=3, stride=stride, groups=groups, padding=dilation, bias=False, dilation=dilation), + nn.BatchNorm2d(in_planes//reduce_1)) + + self.fc = nn.Sequential(nn.Conv2d(in_planes, in_planes // 32, kernel_size=1, bias=False), + nn.BatchNorm2d(in_planes // 32), + nn.ReLU(inplace=True), + nn.Conv2d(in_planes // 32, + in_planes, kernel_size=1), + nn.Sigmoid()) + self.pool = nn.AdaptiveAvgPool2d(1) + + def forward(self, x): + out = x + b, c, h, _ = out.size() + + w = self.pool(out) + w = self.fc(w) + w_f = torch.flip(w, [1]) + + out1 = w*out + out2 = w_f*out + fs1 = torch.split(out1, c // 2, 1) + fs2 = torch.split(out2, c // 2, 1) + + ft1 = fs1[0] + fs1[1] + ft2 = fs2[0] + fs2[1] + + out2_1 = self.conv2_1(ft1) + out2_2 = self.conv2_2(ft2) + + out = torch.cat((out2_1, out2_2), 1) + return out diff --git a/modelR/plugandplay/StripPooling.py b/modelR/plugandplay/StripPooling.py new file mode 100644 index 0000000..ecb960c --- /dev/null +++ b/modelR/plugandplay/StripPooling.py @@ -0,0 +1,140 @@ +import torch +import torch.nn as nn + +import torch.nn.functional as F + +''' +https://www.cnblogs.com/YongQiVisionIMAX/p/12630769.html +https://github.com/Andrew-Qibin/SPNet/blob/master/models/spnet.py +''' + + +class StripPooling(nn.Module): + def __init__(self, in_channels, pool_size, norm_layer, up_kwargs): + super(StripPooling, self).__init__() + self.pool1 = nn.AdaptiveAvgPool2d(pool_size[0]) + self.pool2 = nn.AdaptiveAvgPool2d(pool_size[1]) + + self.pool3 = nn.AdaptiveAvgPool2d((1, None)) + self.pool4 = nn.AdaptiveAvgPool2d((None, 1)) + + inter_channels = int(in_channels/4) + + self.conv1_1 = nn.Sequential(nn.Conv2d(in_channels, inter_channels, 1, bias=False), + norm_layer(inter_channels), + nn.ReLU(True)) + self.conv1_2 = nn.Sequential(nn.Conv2d(in_channels, inter_channels, 1, bias=False), + norm_layer(inter_channels), + nn.ReLU(True)) + + self.conv2_0 = nn.Sequential(nn.Conv2d(inter_channels, inter_channels, 3, 1, 1, bias=False), + norm_layer(inter_channels)) + self.conv2_1 = nn.Sequential(nn.Conv2d(inter_channels, inter_channels, 3, 1, 1, bias=False), + norm_layer(inter_channels)) + self.conv2_2 = nn.Sequential(nn.Conv2d(inter_channels, inter_channels, 3, 1, 1, bias=False), + norm_layer(inter_channels)) + self.conv2_3 = nn.Sequential(nn.Conv2d(inter_channels, inter_channels, (1, 3), 1, (0, 1), bias=False), + norm_layer(inter_channels)) + self.conv2_4 = nn.Sequential(nn.Conv2d(inter_channels, inter_channels, (3, 1), 1, (1, 0), bias=False), + norm_layer(inter_channels)) + self.conv2_5 = nn.Sequential(nn.Conv2d(inter_channels, inter_channels, 3, 1, 1, bias=False), + norm_layer(inter_channels), + nn.ReLU(True)) + self.conv2_6 = nn.Sequential(nn.Conv2d(inter_channels, inter_channels, 3, 1, 1, bias=False), + norm_layer(inter_channels), + nn.ReLU(True)) + self.conv3 = nn.Sequential(nn.Conv2d(inter_channels*2, in_channels, 1, bias=False), + norm_layer(in_channels)) + # bilinear interpolate options + self._up_kwargs = up_kwargs + + def forward(self, x): + _, _, h, w = x.size() + x1 = self.conv1_1(x) + x2 = self.conv1_2(x) + + x2_1 = self.conv2_0(x1) + + x2_2 = F.interpolate(self.conv2_1(self.pool1(x1)), + (h, w), **self._up_kwargs) + x2_3 = F.interpolate(self.conv2_2(self.pool2(x1)), + (h, w), **self._up_kwargs) + x2_4 = F.interpolate(self.conv2_3(self.pool3(x2)), + (h, w), **self._up_kwargs) + x2_5 = F.interpolate(self.conv2_4(self.pool4(x2)), + (h, w), **self._up_kwargs) + + x1 = self.conv2_5(F.relu_(x2_1 + x2_2 + x2_3)) + x2 = self.conv2_6(F.relu_(x2_5 + x2_4)) + out = self.conv3(torch.cat([x1, x2], dim=1)) + + return F.relu_(x + out) + + +class PyramidPooling(nn.Module): + """ + Reference: + Zhao, Hengshuang, et al. *"Pyramid scene parsing network."* + """ + + def __init__(self, in_channels, norm_layer, up_kwargs): + super(PyramidPooling, self).__init__() + self.pool1 = nn.AdaptiveAvgPool2d(1) + self.pool2 = nn.AdaptiveAvgPool2d(2) + self.pool3 = nn.AdaptiveAvgPool2d(3) + self.pool4 = nn.AdaptiveAvgPool2d(6) + + out_channels = int(in_channels/4) + self.conv1 = nn.Sequential(nn.Conv2d(in_channels, out_channels, 1, bias=False), + norm_layer(out_channels), + nn.ReLU(True)) + self.conv2 = nn.Sequential(nn.Conv2d(in_channels, out_channels, 1, bias=False), + norm_layer(out_channels), + nn.ReLU(True)) + self.conv3 = nn.Sequential(nn.Conv2d(in_channels, out_channels, 1, bias=False), + norm_layer(out_channels), + nn.ReLU(True)) + self.conv4 = nn.Sequential(nn.Conv2d(in_channels, out_channels, 1, bias=False), + norm_layer(out_channels), + nn.ReLU(True)) + # bilinear interpolate options + self._up_kwargs = up_kwargs + + def forward(self, x): + _, _, h, w = x.size() + feat1 = F.interpolate(self.conv1(self.pool1(x)), + (h, w), **self._up_kwargs) + feat2 = F.interpolate(self.conv2(self.pool2(x)), + (h, w), **self._up_kwargs) + feat3 = F.interpolate(self.conv3(self.pool3(x)), + (h, w), **self._up_kwargs) + feat4 = F.interpolate(self.conv4(self.pool4(x)), + (h, w), **self._up_kwargs) + return torch.cat((x, feat1, feat2, feat3, feat4), 1) + + +class SPHead(nn.Module): + def __init__(self, in_channels, out_channels, norm_layer, up_kwargs): + super(SPHead, self).__init__() + inter_channels = in_channels // 2 + self.trans_layer = nn.Sequential(nn.Conv2d(in_channels, inter_channels, 1, 1, 0, bias=False), + norm_layer(inter_channels), + nn.ReLU(True) + ) + self.strip_pool1 = StripPooling( + inter_channels, (20, 12), norm_layer, up_kwargs) + self.strip_pool2 = StripPooling( + inter_channels, (20, 12), norm_layer, up_kwargs) + self.score_layer = nn.Sequential(nn.Conv2d(inter_channels, inter_channels // 2, 3, 1, 1, bias=False), + norm_layer(inter_channels // 2), + nn.ReLU(True), + nn.Dropout2d(0.1, False), + nn.Conv2d(inter_channels // 2, out_channels, 1)) + + def forward(self, x): + x = self.trans_layer(x) + x = self.strip_pool1(x) + x = self.strip_pool2(x) + x = self.score_layer(x) + return x + diff --git a/modelR/plugandplay/ULSAM.py b/modelR/plugandplay/ULSAM.py new file mode 100644 index 0000000..f13ad14 --- /dev/null +++ b/modelR/plugandplay/ULSAM.py @@ -0,0 +1,122 @@ +# source:https://github.com/Nandan91/ULSAM/blob/master/ulsam.py +# arxiv: https://arxiv.org/abs/2006.15102 +import torch +import torch.nn as nn + +torch.set_default_tensor_type(torch.cuda.FloatTensor) + + +class SubSpace(nn.Module): + """ + Subspace class. + ... + Attributes + ---------- + nin : int + number of input feature volume. + Methods + ------- + __init__(nin) + initialize method. + forward(x) + forward pass. + """ + + def __init__(self, nin): + super(SubSpace, self).__init__() + self.conv_dws = nn.Conv2d( + nin, nin, kernel_size=1, stride=1, padding=0, groups=nin + ) + self.bn_dws = nn.BatchNorm2d(nin, momentum=0.9) + self.relu_dws = nn.ReLU(inplace=False) + + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=1, padding=1) + + self.conv_point = nn.Conv2d( + nin, 1, kernel_size=1, stride=1, padding=0, groups=1 + ) + self.bn_point = nn.BatchNorm2d(1, momentum=0.9) + self.relu_point = nn.ReLU(inplace=False) + + self.softmax = nn.Softmax(dim=2) + + def forward(self, x): + out = self.conv_dws(x) + out = self.bn_dws(out) + out = self.relu_dws(out) + + out = self.maxpool(x) + + out = self.conv_point(out) + out = self.bn_point(out) + out = self.relu_point(out) + + m, n, p, q = out.shape + out = self.softmax(out.view(m, n, -1)) + out = out.view(m, n, p, q) + + out = out.expand(x.shape[0], x.shape[1], x.shape[2], x.shape[3]) + + out = torch.mul(out, x) + + out = out + x + + return out + + +class ULSAM(nn.Module): + """ + Grouped Attention Block having multiple (num_splits) Subspaces. + ... + Attributes + ---------- + nin : int + number of input feature volume. + nout : int + number of output feature maps + h : int + height of a input feature map + w : int + width of a input feature map + num_splits : int + number of subspaces + Methods + ------- + __init__(nin) + initialize method. + forward(x) + forward pass. + """ + + def __init__(self, nin, nout, h, w, num_splits): + super(ULSAM, self).__init__() + + assert nin % num_splits == 0 + + self.nin = nin + self.nout = nout + self.h = h + self.w = w + self.num_splits = num_splits + + self.subspaces = nn.ModuleList( + [SubSpace(int(self.nin / self.num_splits)) for i in range(self.num_splits)] + ) + + def forward(self, x): + group_size = int(self.nin / self.num_splits) + + # split at batch dimension + sub_feat = torch.chunk(x, self.num_splits, dim=1) + + out = [] + for idx, l in enumerate(self.subspaces): + out.append(self.subspaces[idx](sub_feat[idx])) + + out = torch.cat(out, dim=1) + + return out + + +# for debug +# print(ULSAM(64, 64, 112, 112, 4)) \ No newline at end of file diff --git a/modelR/plugandplay/UsefullTools.py b/modelR/plugandplay/UsefullTools.py new file mode 100644 index 0000000..816a0b7 --- /dev/null +++ b/modelR/plugandplay/UsefullTools.py @@ -0,0 +1,103 @@ +import torch.nn.functional as F +import torch.nn as nn +import torch + +class Flatten(nn.Module): + # Use after nn.AdaptiveAvgPool2d(1) to remove last 2 dimensions + def forward(self, x): + return x.view(x.size(0), -1) + + +class Concat(nn.Module): + # Concatenate a list of tensors along dimension + def __init__(self, dimension=1): + super(Concat, self).__init__() + self.d = dimension + + def forward(self, x): + return torch.cat(x, self.d) + + +class FeatureConcat(nn.Module): + def __init__(self, layers): + super(FeatureConcat, self).__init__() + self.layers = layers # layer indices + self.multiple = len(layers) > 1 # multiple layers flag + + def forward(self, x, outputs): + return torch.cat([outputs[i] for i in self.layers], 1) if self.multiple else outputs[self.layers[0]] + +class GlobalAvgPool2d(nn.Module): + def __init__(self): + """Global average pooling over the input's spatial dimensions""" + super(GlobalAvgPool2d, self).__init__() + + def forward(self, inputs): + return F.adaptive_avg_pool2d(inputs, 1).view(inputs.size(0), -1) + +class GramMatrix(nn.Module): + r""" Gram Matrix for a 4D convolutional featuremaps as a mini-batch + .. math:: + \mathcal{G} = \sum_{h=1}^{H_i}\sum_{w=1}^{W_i} \mathcal{F}_{h,w}\mathcal{F}_{h,w}^T + """ + def forward(self, y): + (b, ch, h, w) = y.size() + features = y.view(b, ch, w * h) + features_t = features.transpose(1, 2) + gram = features.bmm(features_t) / (ch * h * w) + return gram + +class View(nn.Module): + """Reshape the input into different size, an inplace operator, support + SelfParallel mode. + """ + def __init__(self, *args): + super(View, self).__init__() + if len(args) == 1 and isinstance(args[0], torch.Size): + self.size = args[0] + else: + self.size = torch.Size(args) + + def forward(self, input): + return input.view(self.size) + +class Sum(nn.Module): + def __init__(self, dim, keep_dim=False): + super(Sum, self).__init__() + self.dim = dim + self.keep_dim = keep_dim + + def forward(self, input): + return input.sum(self.dim, self.keep_dim) + +class Mean(nn.Module): + def __init__(self, dim, keep_dim=False): + super(Mean, self).__init__() + self.dim = dim + self.keep_dim = keep_dim + + def forward(self, input): + return input.mean(self.dim, self.keep_dim) + +class Normalize(nn.Module): + r"""Performs :math:`L_p` normalization of inputs over specified dimension. + Does: + .. math:: + v = \frac{v}{\max(\lVert v \rVert_p, \epsilon)} + for each subtensor v over dimension dim of input. Each subtensor is + flattened into a vector, i.e. :math:`\lVert v \rVert_p` is not a matrix + norm. + With default arguments normalizes over the second dimension with Euclidean + norm. + Args: + p (float): the exponent value in the norm formulation. Default: 2 + dim (int): the dimension to reduce. Default: 1 + """ + def __init__(self, p=2, dim=1): + super(Normalize, self).__init__() + self.p = p + self.dim = dim + + def forward(self, x): + return F.normalize(x, self.p, self.dim, eps=1e-8) + diff --git a/modelR/plugandplay/WeightedFeatureFusion.py b/modelR/plugandplay/WeightedFeatureFusion.py new file mode 100644 index 0000000..49cb700 --- /dev/null +++ b/modelR/plugandplay/WeightedFeatureFusion.py @@ -0,0 +1,34 @@ +import torch.nn as nn +import torch + +# using in EffcientDet +class WeightedFeatureFusion(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 + def __init__(self, layers, weight=False): + super(WeightedFeatureFusion, self).__init__() + self.layers = layers # layer indices + self.weight = weight # apply weights boolean + self.n = len(layers) + 1 # number of layers + if weight: + self.w = nn.Parameter(torch.zeros(self.n), requires_grad=True) # layer weights + + def forward(self, x, outputs): + # Weights + if self.weight: + w = torch.sigmoid(self.w) * (2 / self.n) # sigmoid weights (0-1) + x = x * w[0] + + # Fusion + nx = x.shape[1] # input channels + for i in range(self.n - 1): + a = outputs[self.layers[i]] * w[i + 1] if self.weight else outputs[self.layers[i]] # feature to add + na = a.shape[1] # feature channels + + # Adjust channels + if nx == na: # same shape + x = x + a + elif nx > na: # slice input + x[:, :na] = x[:, :na] + a # or a = nn.ZeroPad2d((0, 0, 0, 0, 0, dc))(a); x = x + a + else: # slice feature + x = x + a[:, :nx] + + return x \ No newline at end of file diff --git a/modelR/plugandplay/__init__.py b/modelR/plugandplay/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/modelR/plugandplay/attentions/BAM/bam.py b/modelR/plugandplay/attentions/BAM/bam.py new file mode 100644 index 0000000..6790aa9 --- /dev/null +++ b/modelR/plugandplay/attentions/BAM/bam.py @@ -0,0 +1,87 @@ +import torch +import math +import torch.nn as nn +import torch.nn.functional as F + + +class Flatten(nn.Module): + def forward(self, x): + return x.view(x.size(0), -1) + + +class ChannelGate(nn.Module): + def __init__(self, gate_channel, reduction_ratio=16, num_layers=1): + super(ChannelGate, self).__init__() + self.gate_c = nn.Sequential() + self.gate_c.add_module('flatten', Flatten()) + + gate_channels = [gate_channel] # eg 64 + gate_channels += [gate_channel // reduction_ratio] * num_layers # eg 4 + gate_channels += [gate_channel] # 64 + # gate_channels: [64, 4, 4] + + for i in range(len(gate_channels) - 2): + self.gate_c.add_module( + 'gate_c_fc_%d' % i, + nn.Linear(gate_channels[i], gate_channels[i + 1])) + self.gate_c.add_module('gate_c_bn_%d' % (i + 1), + nn.BatchNorm1d(gate_channels[i + 1])) + self.gate_c.add_module('gate_c_relu_%d' % (i + 1), nn.ReLU()) + + self.gate_c.add_module('gate_c_fc_final', + nn.Linear(gate_channels[-2], gate_channels[-1])) + + def forward(self, x): + avg_pool = F.avg_pool2d(x, x.size(2), stride=x.size(2)) + return self.gate_c(avg_pool).unsqueeze(2).unsqueeze(3).expand_as(x) + + +class SpatialGate(nn.Module): + def __init__(self, + gate_channel, + reduction_ratio=16, + dilation_conv_num=2, + dilation_val=4): + super(SpatialGate, self).__init__() + self.gate_s = nn.Sequential() + + self.gate_s.add_module( + 'gate_s_conv_reduce0', + nn.Conv2d(gate_channel, + gate_channel // reduction_ratio, + kernel_size=1)) + self.gate_s.add_module('gate_s_bn_reduce0', + nn.BatchNorm2d(gate_channel // reduction_ratio)) + self.gate_s.add_module('gate_s_relu_reduce0', nn.ReLU()) + + # 进行多个空洞卷积,丰富感受野 + for i in range(dilation_conv_num): + self.gate_s.add_module( + 'gate_s_conv_di_%d' % i, + nn.Conv2d(gate_channel // reduction_ratio, + gate_channel // reduction_ratio, + kernel_size=3, + padding=dilation_val, + dilation=dilation_val)) + self.gate_s.add_module( + 'gate_s_bn_di_%d' % i, + nn.BatchNorm2d(gate_channel // reduction_ratio)) + self.gate_s.add_module('gate_s_relu_di_%d' % i, nn.ReLU()) + + self.gate_s.add_module( + 'gate_s_conv_final', + nn.Conv2d(gate_channel // reduction_ratio, 1, kernel_size=1)) + + def forward(self, x): + return self.gate_s(x).expand_as(x) + + +class BAM(nn.Module): + def __init__(self, gate_channel): + super(BAM, self).__init__() + self.channel_att = ChannelGate(gate_channel) + self.spatial_att = SpatialGate(gate_channel) + + def forward(self, x): + att = 1 + F.sigmoid(self.channel_att(x) * self.spatial_att(x)) + return att * x \ No newline at end of file diff --git a/modelR/plugandplay/attentions/BiSeNet/bisemodel.py b/modelR/plugandplay/attentions/BiSeNet/bisemodel.py new file mode 100644 index 0000000..d0fe046 --- /dev/null +++ b/modelR/plugandplay/attentions/BiSeNet/bisemodel.py @@ -0,0 +1,208 @@ +import torch +from torch import nn +from contextpath import build_contextpath +import warnings +warnings.filterwarnings(action='ignore') + + +class ConvBlock(torch.nn.Module): + def __init__(self, + in_channels, + out_channels, + kernel_size=3, + stride=2, + padding=1): + super().__init__() + self.conv1 = nn.Conv2d(in_channels, + out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + bias=False) + self.bn = nn.BatchNorm2d(out_channels) + self.relu = nn.ReLU() + + def forward(self, input): + x = self.conv1(input) + return self.relu(self.bn(x)) + + +class Spatial_path(torch.nn.Module): + def __init__(self): + super().__init__() + self.convblock1 = ConvBlock(in_channels=3, out_channels=64) + self.convblock2 = ConvBlock(in_channels=64, out_channels=128) + self.convblock3 = ConvBlock(in_channels=128, out_channels=256) + + def forward(self, input): + x = self.convblock1(input) + x = self.convblock2(x) + x = self.convblock3(x) + return x + + +class AttentionRefinementModule(torch.nn.Module): + def __init__(self, in_channels, out_channels): + super().__init__() + self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1) + self.bn = nn.BatchNorm2d(out_channels) + self.sigmoid = nn.Sigmoid() + self.in_channels = in_channels + self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1)) + + def forward(self, input): + # global average pooling + x = self.avgpool(input) + assert self.in_channels == x.size( + 1), 'in_channels and out_channels should all be {}'.format( + x.size(1)) + x = self.conv(x) + # x = self.sigmoid(self.bn(x)) + x = self.sigmoid(x) + # channels of input and x should be same + x = torch.mul(input, x) + return x + + +class FeatureFusionModule(torch.nn.Module): + def __init__(self, num_classes, in_channels): + super().__init__() + self.in_channels = in_channels + self.convblock = ConvBlock(in_channels=self.in_channels, + out_channels=num_classes, + stride=1) + self.conv1 = nn.Conv2d(num_classes, num_classes, kernel_size=1) + self.relu = nn.ReLU() + self.conv2 = nn.Conv2d(num_classes, num_classes, kernel_size=1) + self.sigmoid = nn.Sigmoid() + self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1)) + + def forward(self, input_1, input_2): + x = torch.cat((input_1, input_2), dim=1) + assert self.in_channels == x.size( + 1), 'in_channels of ConvBlock should be {}'.format(x.size(1)) + feature = self.convblock(x) + x = self.avgpool(feature) + + x = self.relu(self.conv1(x)) + x = self.sigmoid(self.conv2(x)) + x = torch.mul(feature, x) + x = torch.add(x, feature) + return x + + +class BiSeNet(torch.nn.Module): + def __init__(self, num_classes, context_path): + super().__init__() + # build spatial path + self.saptial_path = Spatial_path() + + # build context path + self.context_path = build_contextpath(name=context_path) + + # build attention refinement module for resnet 101 + if context_path == 'resnet101': + self.attention_refinement_module1 = AttentionRefinementModule( + 1024, 1024) + self.attention_refinement_module2 = AttentionRefinementModule( + 2048, 2048) + # supervision block + self.supervision1 = nn.Conv2d(in_channels=1024, + out_channels=num_classes, + kernel_size=1) + self.supervision2 = nn.Conv2d(in_channels=2048, + out_channels=num_classes, + kernel_size=1) + # build feature fusion module + self.feature_fusion_module = FeatureFusionModule(num_classes, 3328) + + elif context_path == 'resnet18': + # build attention refinement module for resnet 18 + self.attention_refinement_module1 = AttentionRefinementModule( + 256, 256) + self.attention_refinement_module2 = AttentionRefinementModule( + 512, 512) + # supervision block + self.supervision1 = nn.Conv2d(in_channels=256, + out_channels=num_classes, + kernel_size=1) + self.supervision2 = nn.Conv2d(in_channels=512, + out_channels=num_classes, + kernel_size=1) + # build feature fusion module + self.feature_fusion_module = FeatureFusionModule(num_classes, 1024) + else: + print('Error: unspport context_path network \n') + + # build final convolution + self.conv = nn.Conv2d(in_channels=num_classes, + out_channels=num_classes, + kernel_size=1) + + # self.init_weight() + self.mul_lr = [] + self.mul_lr.append(self.saptial_path) + self.mul_lr.append(self.attention_refinement_module1) + self.mul_lr.append(self.attention_refinement_module2) + self.mul_lr.append(self.supervision1) + self.mul_lr.append(self.supervision2) + self.mul_lr.append(self.feature_fusion_module) + self.mul_lr.append(self.conv) + def forward(self, input): + print("input shape:", input.shape) + # output of spatial path + sx = self.saptial_path(input) + # output of context path + cx1, cx2, tail = self.context_path(input) + cx1 = self.attention_refinement_module1(cx1) + cx2 = self.attention_refinement_module2(cx2) + cx2 = torch.mul(cx2, tail) + # upsampling + cx1 = torch.nn.functional.interpolate(cx1, + size=sx.size()[-2:], + mode='bilinear') + cx2 = torch.nn.functional.interpolate(cx2, + size=sx.size()[-2:], + mode='bilinear') + cx = torch.cat((cx1, cx2), dim=1) + if self.training == True: + cx1_sup = self.supervision1(cx1) + cx2_sup = self.supervision2(cx2) + cx1_sup = torch.nn.functional.interpolate(cx1_sup, + size=input.size()[-2:], + mode='bilinear') + cx2_sup = torch.nn.functional.interpolate(cx2_sup, + size=input.size()[-2:], + mode='bilinear') + + print("sx shape:", sx.shape) + print("cx shape:", cx.shape) + # output of feature fusion module + result = self.feature_fusion_module(sx, cx) + # upsampling + result = torch.nn.functional.interpolate(result, + scale_factor=8, + mode='bilinear') + result = self.conv(result) + if self.training == True: + return result, cx1_sup, cx2_sup + + return result + + +if __name__ == '__main__': + model = BiSeNet(32, 'resnet18') + # model = nn.DataParallel(model) + x = torch.rand(2, 3, 416, 416) + record = model.parameters() + # for key, params in model.named_parameters(): + # if 'bn' in key: + # params.requires_grad = False + # from utils import group_weight + # params_list = [] + # for module in model.mul_lr: + # params_list = group_weight(params_list, module, nn.BatchNorm2d, 10) + # params_list = group_weight(params_list, model.context_path, torch.nn.BatchNorm2d, 1) + + # print(model.parameters()) + y = model(x) \ No newline at end of file diff --git a/modelR/plugandplay/attentions/BiSeNet/contextpath.py b/modelR/plugandplay/attentions/BiSeNet/contextpath.py new file mode 100644 index 0000000..0c95c08 --- /dev/null +++ b/modelR/plugandplay/attentions/BiSeNet/contextpath.py @@ -0,0 +1,74 @@ +import torch +from torchvision import models + + +class resnet18(torch.nn.Module): + def __init__(self, pretrained=True): + super().__init__() + self.features = models.resnet18(pretrained=False) + self.conv1 = self.features.conv1 + self.bn1 = self.features.bn1 + self.relu = self.features.relu + self.maxpool1 = self.features.maxpool + self.layer1 = self.features.layer1 + self.layer2 = self.features.layer2 + self.layer3 = self.features.layer3 + self.layer4 = self.features.layer4 + + def forward(self, input): + x = self.conv1(input) + x = self.relu(self.bn1(x)) + x = self.maxpool1(x) + feature1 = self.layer1(x) # 1 / 4 + feature2 = self.layer2(feature1) # 1 / 8 + feature3 = self.layer3(feature2) # 1 / 16 + feature4 = self.layer4(feature3) # 1 / 32 + # global average pooling to build tail + tail = torch.mean(feature4, 3, keepdim=True) + tail = torch.mean(tail, 2, keepdim=True) + return feature3, feature4, tail + + +class resnet101(torch.nn.Module): + def __init__(self, pretrained=True): + super().__init__() + self.features = models.resnet101(pretrained=False) + self.conv1 = self.features.conv1 + self.bn1 = self.features.bn1 + self.relu = self.features.relu + self.maxpool1 = self.features.maxpool + self.layer1 = self.features.layer1 + self.layer2 = self.features.layer2 + self.layer3 = self.features.layer3 + self.layer4 = self.features.layer4 + + def forward(self, input): + x = self.conv1(input) + x = self.relu(self.bn1(x)) + x = self.maxpool1(x) + feature1 = self.layer1(x) # 1 / 4 + feature2 = self.layer2(feature1) # 1 / 8 + feature3 = self.layer3(feature2) # 1 / 16 + feature4 = self.layer4(feature3) # 1 / 32 + # global average pooling to build tail + tail = torch.mean(feature4, 3, keepdim=True) + tail = torch.mean(tail, 2, keepdim=True) + return feature3, feature4, tail + + +def build_contextpath(name): + model = { + 'resnet18': resnet18(pretrained=True), + 'resnet101': resnet101(pretrained=True) + } + return model[name] + + +if __name__ == '__main__': + # + model_18 = build_contextpath('resnet18') + model_101 = build_contextpath('resnet101') + x = torch.rand(1, 3, 256, 256) + + y_18 = model_18(x) + y_101 = model_101(x) \ No newline at end of file diff --git a/modelR/plugandplay/attentions/CBAM/cbam.py b/modelR/plugandplay/attentions/CBAM/cbam.py new file mode 100644 index 0000000..e85343b --- /dev/null +++ b/modelR/plugandplay/attentions/CBAM/cbam.py @@ -0,0 +1,100 @@ +import torch +import torch.nn as nn + + +def conv3x3(in_planes, out_planes, stride=1): + "3x3 convolution with padding" + return nn.Conv2d(in_planes, + out_planes, + kernel_size=3, + stride=stride, + padding=1, + bias=False) + + +class ChannelAttention(nn.Module): + def __init__(self, in_planes, ratio=4): + super(ChannelAttention, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.max_pool = nn.AdaptiveMaxPool2d(1) + + self.sharedMLP = nn.Sequential( + nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False), nn.ReLU(), + nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False)) + self.sigmoid = nn.Sigmoid() + + def forward(self, x): + avgout = self.sharedMLP(self.avg_pool(x)) + maxout = self.sharedMLP(self.max_pool(x)) + return self.sigmoid(avgout + maxout) + + +class SpatialAttention(nn.Module): + def __init__(self, kernel_size=7): + super(SpatialAttention, self).__init__() + assert kernel_size in (3, 7), "kernel size must be 3 or 7" + padding = 3 if kernel_size == 7 else 1 + + self.conv = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False) + self.sigmoid = nn.Sigmoid() + + def forward(self, x): + avgout = torch.mean(x, dim=1, keepdim=True) + maxout, _ = torch.max(x, dim=1, keepdim=True) + x = torch.cat([avgout, maxout], dim=1) + x = self.conv(x) + return self.sigmoid(x) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = nn.BatchNorm2d(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2d(planes) + + self.ca = ChannelAttention(planes) + self.sa = SpatialAttention() + + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + out = self.ca(out) * out # 广播机制 + out = self.sa(out) * out # 广播机制 + + if self.downsample is not None: + print("downsampling") + residual = self.downsample(x) + + print(out.shape, residual.shape) + + out += residual + out = self.relu(out) + + return out + + +if __name__ == "__main__": + downsample = nn.Sequential( + nn.Conv2d(16, 32, kernel_size=1, stride=1, bias=False), + nn.BatchNorm2d(32)) + + x = torch.ones(3, 16, 32, 32) + + model = BasicBlock(16, 32, stride=1, downsample=downsample) + + print(model(x).shape) \ No newline at end of file diff --git a/modelR/plugandplay/attentions/CCNet/ccnet.py b/modelR/plugandplay/attentions/CCNet/ccnet.py new file mode 100644 index 0000000..410c377 --- /dev/null +++ b/modelR/plugandplay/attentions/CCNet/ccnet.py @@ -0,0 +1,103 @@ +import functools +import time + +import torch +import torch.autograd as autograd +import torch.cuda.comm as comm +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd.function import once_differentiable + + +class CA_Weight(autograd.Function): + @staticmethod + def forward(ctx, t, f): + # Save context + n, c, h, w = t.size() + size = (n, h + w - 1, h, w) + weight = torch.zeros(size, + dtype=t.dtype, + layout=t.layout, + device=t.device) + + _ext.ca_forward_cuda(t, f, weight) + + # Output + ctx.save_for_backward(t, f) + + return weight + + @staticmethod + @once_differentiable + def backward(ctx, dw): + t, f = ctx.saved_tensors + + dt = torch.zeros_like(t) + df = torch.zeros_like(f) + + _ext.ca_backward_cuda(dw.contiguous(), t, f, dt, df) + + _check_contiguous(dt, df) + + return dt, df + + +class CA_Map(autograd.Function): + @staticmethod + def forward(ctx, weight, g): + # Save context + out = torch.zeros_like(g) + _ext.ca_map_forward_cuda(weight, g, out) + + # Output + ctx.save_for_backward(weight, g) + + return out + + @staticmethod + @once_differentiable + def backward(ctx, dout): + weight, g = ctx.saved_tensors + + dw = torch.zeros_like(weight) + dg = torch.zeros_like(g) + + _ext.ca_map_backward_cuda(dout.contiguous(), weight, g, dw, dg) + + _check_contiguous(dw, dg) + + return dw, dg + + +ca_weight = CA_Weight.apply +ca_map = CA_Map.apply + + +class CrissCrossAttention(nn.Module): + """ Criss-Cross Attention Module""" + def __init__(self, in_dim): + super(CrissCrossAttention, self).__init__() + self.chanel_in = in_dim + + self.query_conv = nn.Conv2d(in_channels=in_dim, + out_channels=in_dim // 8, + kernel_size=1) + self.key_conv = nn.Conv2d(in_channels=in_dim, + out_channels=in_dim // 8, + kernel_size=1) + self.value_conv = nn.Conv2d(in_channels=in_dim, + out_channels=in_dim, + kernel_size=1) + self.gamma = nn.Parameter(torch.zeros(1)) + + def forward(self, x): + proj_query = self.query_conv(x) + proj_key = self.key_conv(x) + proj_value = self.value_conv(x) + + energy = ca_weight(proj_query, proj_key) + attention = F.softmax(energy, 1) + out = ca_map(attention, proj_value) + out = self.gamma * out + x + + return out diff --git a/modelR/plugandplay/attentions/DANet/DualAttention.py b/modelR/plugandplay/attentions/DANet/DualAttention.py new file mode 100644 index 0000000..f1790c4 --- /dev/null +++ b/modelR/plugandplay/attentions/DANet/DualAttention.py @@ -0,0 +1,85 @@ +########################################################################### +# Created by: CASIA IVA +# Email: jliu@nlpr.ia.ac.cn +# Copyright (c) 2018 +########################################################################### + +# source : https://github.com/junfu1115/DANet/blob/master/encoding/nn/attention.py + +import numpy as np +import torch +import math +from torch.nn import Module, Sequential, Conv2d, ReLU,AdaptiveMaxPool2d, AdaptiveAvgPool2d, \ + NLLLoss, BCELoss, CrossEntropyLoss, AvgPool2d, MaxPool2d, Parameter, Linear, Sigmoid, Softmax, Dropout, Embedding +from torch.nn import functional as F +from torch.autograd import Variable +torch_ver = torch.__version__[:3] + +__all__ = ['PAM_Module', 'CAM_Module'] + + +class PAM_Module(Module): + """ Position attention module""" + #Ref from SAGAN + def __init__(self, in_dim): + super(PAM_Module, self).__init__() + self.chanel_in = in_dim + + self.query_conv = Conv2d(in_channels=in_dim, out_channels=in_dim//8, kernel_size=1) + self.key_conv = Conv2d(in_channels=in_dim, out_channels=in_dim//8, kernel_size=1) + self.value_conv = Conv2d(in_channels=in_dim, out_channels=in_dim, kernel_size=1) + self.gamma = Parameter(torch.zeros(1)) + + self.softmax = Softmax(dim=-1) + def forward(self, x): + """ + inputs : + x : input feature maps( B X C X H X W) + returns : + out : attention value + input feature + attention: B X (HxW) X (HxW) + """ + m_batchsize, C, height, width = x.size() + proj_query = self.query_conv(x).view(m_batchsize, -1, width*height).permute(0, 2, 1) + proj_key = self.key_conv(x).view(m_batchsize, -1, width*height) + energy = torch.bmm(proj_query, proj_key) + attention = self.softmax(energy) + proj_value = self.value_conv(x).view(m_batchsize, -1, width*height) + + out = torch.bmm(proj_value, attention.permute(0, 2, 1)) + out = out.view(m_batchsize, C, height, width) + + out = self.gamma*out + x + return out + + +class CAM_Module(Module): + """ Channel attention module""" + def __init__(self, in_dim): + super(CAM_Module, self).__init__() + self.chanel_in = in_dim + + + self.gamma = Parameter(torch.zeros(1)) + self.softmax = Softmax(dim=-1) + def forward(self,x): + """ + inputs : + x : input feature maps( B X C X H X W) + returns : + out : attention value + input feature + attention: B X C X C + """ + m_batchsize, C, height, width = x.size() + proj_query = x.view(m_batchsize, C, -1) + proj_key = x.view(m_batchsize, C, -1).permute(0, 2, 1) + energy = torch.bmm(proj_query, proj_key) + energy_new = torch.max(energy, -1, keepdim=True)[0].expand_as(energy)-energy + attention = self.softmax(energy_new) + proj_value = x.view(m_batchsize, C, -1) + + out = torch.bmm(attention, proj_value) + out = out.view(m_batchsize, C, height, width) + + out = self.gamma*out + x + return out \ No newline at end of file diff --git a/modelR/plugandplay/attentions/GCBlock/gcnet.py b/modelR/plugandplay/attentions/GCBlock/gcnet.py new file mode 100644 index 0000000..b149a09 --- /dev/null +++ b/modelR/plugandplay/attentions/GCBlock/gcnet.py @@ -0,0 +1,91 @@ +import torch +from torch import nn + +class ContextBlock(nn.Module): + def __init__(self,inplanes,ratio,pooling_type='att', + fusion_types=('channel_add', )): + super(ContextBlock, self).__init__() + valid_fusion_types = ['channel_add', 'channel_mul'] + + assert pooling_type in ['avg', 'att'] + assert isinstance(fusion_types, (list, tuple)) + assert all([f in valid_fusion_types for f in fusion_types]) + assert len(fusion_types) > 0, 'at least one fusion should be used' + + self.inplanes = inplanes + self.ratio = ratio + self.planes = int(inplanes * ratio) + self.pooling_type = pooling_type + self.fusion_types = fusion_types + + if pooling_type == 'att': + self.conv_mask = nn.Conv2d(inplanes, 1, kernel_size=1) + self.softmax = nn.Softmax(dim=2) + else: + self.avg_pool = nn.AdaptiveAvgPool2d(1) + if 'channel_add' in fusion_types: + self.channel_add_conv = nn.Sequential( + nn.Conv2d(self.inplanes, self.planes, kernel_size=1), + nn.LayerNorm([self.planes, 1, 1]), + nn.ReLU(inplace=True), # yapf: disable + nn.Conv2d(self.planes, self.inplanes, kernel_size=1)) + else: + self.channel_add_conv = None + if 'channel_mul' in fusion_types: + self.channel_mul_conv = nn.Sequential( + nn.Conv2d(self.inplanes, self.planes, kernel_size=1), + nn.LayerNorm([self.planes, 1, 1]), + nn.ReLU(inplace=True), # yapf: disable + nn.Conv2d(self.planes, self.inplanes, kernel_size=1)) + else: + self.channel_mul_conv = None + + + def spatial_pool(self, x): + batch, channel, height, width = x.size() + if self.pooling_type == 'att': + input_x = x + # [N, C, H * W] + input_x = input_x.view(batch, channel, height * width) + # [N, 1, C, H * W] + input_x = input_x.unsqueeze(1) + # [N, 1, H, W] + context_mask = self.conv_mask(x) + # [N, 1, H * W] + context_mask = context_mask.view(batch, 1, height * width) + # [N, 1, H * W] + context_mask = self.softmax(context_mask) + # [N, 1, H * W, 1] + context_mask = context_mask.unsqueeze(-1) + # [N, 1, C, 1] + context = torch.matmul(input_x, context_mask) + # [N, C, 1, 1] + context = context.view(batch, channel, 1, 1) + else: + # [N, C, 1, 1] + context = self.avg_pool(x) + return context + + def forward(self, x): + # [N, C, 1, 1] + context = self.spatial_pool(x) + out = x + if self.channel_mul_conv is not None: + # [N, C, 1, 1] + channel_mul_term = torch.sigmoid(self.channel_mul_conv(context)) + out = out * channel_mul_term + if self.channel_add_conv is not None: + # [N, C, 1, 1] + channel_add_term = self.channel_add_conv(context) + out = out + channel_add_term + return out + +if __name__ == "__main__": + in_tensor = torch.ones((12, 64, 128, 128)) + + cb = ContextBlock(inplanes=64, ratio=1./16.,pooling_type='att') + + out_tensor = cb(in_tensor) + + print(in_tensor.shape) + print(out_tensor.shape) \ No newline at end of file diff --git a/modelR/plugandplay/attentions/HRNet/hrnet.py b/modelR/plugandplay/attentions/HRNet/hrnet.py new file mode 100644 index 0000000..bc99c08 --- /dev/null +++ b/modelR/plugandplay/attentions/HRNet/hrnet.py @@ -0,0 +1,549 @@ +# ------------------------------------------------------------------------------ +# Copyright (c) Microsoft +# Licensed under the MIT License. +# Written by Bin Xiao (Bin.Xiao@microsoft.com) +# Modified by Ke Sun (sunk@mail.ustc.edu.cn) +# ------------------------------------------------------------------------------ + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import logging +import functools + +import numpy as np + +import torch +import torch.nn as nn +import torch._utils +import torch.nn.functional as F + +BN_MOMENTUM = 0.1 +logger = logging.getLogger(__name__) + + +def conv3x3(in_planes, out_planes, stride=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=1, bias=False) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, + padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) + self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, + bias=False) + self.bn3 = nn.BatchNorm2d(planes * self.expansion, + momentum=BN_MOMENTUM) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class HighResolutionModule(nn.Module): + def __init__(self, num_branches, blocks, num_blocks, num_inchannels, + num_channels, fuse_method, multi_scale_output=True): + ''' + 调用: + # 调用高低分辨率交互模块, stage2 为例 + HighResolutionModule(num_branches, # 2 + block, # 'BASIC' + num_blocks, # [4, 4] + num_inchannels, # 上个stage的out channel + num_channels, # [32, 64] + fuse_method, # SUM + reset_multi_scale_output) + ''' + super(HighResolutionModule, self).__init__() + self._check_branches( + # 检查分支数目是否合理 + num_branches, blocks, num_blocks, num_inchannels, num_channels) + + self.num_inchannels = num_inchannels + # 融合选用相加的方式 + self.fuse_method = fuse_method + self.num_branches = num_branches + + self.multi_scale_output = multi_scale_output + + # 两个核心部分,一个是branches构建,一个是融合layers构建 + self.branches = self._make_branches( + num_branches, blocks, num_blocks, num_channels) + self.fuse_layers = self._make_fuse_layers() + + self.relu = nn.ReLU(False) + + def _check_branches(self, num_branches, blocks, num_blocks, + num_inchannels, num_channels): + # 分别检查参数是否符合要求,看models.py中的参数,blocks参数冗余了 + if num_branches != len(num_blocks): + error_msg = 'NUM_BRANCHES({}) <> NUM_BLOCKS({})'.format( + num_branches, len(num_blocks)) + logger.error(error_msg) + raise ValueError(error_msg) + + if num_branches != len(num_channels): + error_msg = 'NUM_BRANCHES({}) <> NUM_CHANNELS({})'.format( + num_branches, len(num_channels)) + logger.error(error_msg) + raise ValueError(error_msg) + + if num_branches != len(num_inchannels): + error_msg = 'NUM_BRANCHES({}) <> NUM_INCHANNELS({})'.format( + num_branches, len(num_inchannels)) + logger.error(error_msg) + raise ValueError(error_msg) + + def _make_one_branch(self, branch_index, block, num_blocks, num_channels, + stride=1): + # 构建一个分支,一个分支重复num_blocks个block + downsample = None + + # 这里判断,如果通道变大(分辨率变小),则使用下采样 + if stride != 1 or \ + self.num_inchannels[branch_index] != num_channels[branch_index] * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(self.num_inchannels[branch_index], + num_channels[branch_index] * block.expansion, + kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(num_channels[branch_index] * block.expansion, + momentum=BN_MOMENTUM), + ) + + layers = [] + layers.append(block(self.num_inchannels[branch_index], + num_channels[branch_index], stride, downsample)) + + self.num_inchannels[branch_index] = \ + num_channels[branch_index] * block.expansion + + for i in range(1, num_blocks[branch_index]): + layers.append(block(self.num_inchannels[branch_index], + num_channels[branch_index])) + + return nn.Sequential(*layers) + + def _make_branches(self, num_branches, block, num_blocks, num_channels): + branches = [] + + # 通过循环构建多分支,每个分支属于不同的分辨率 + for i in range(num_branches): + branches.append( + self._make_one_branch(i, block, num_blocks, num_channels)) + + return nn.ModuleList(branches) + + def _make_fuse_layers(self): + if self.num_branches == 1: + return None + + num_branches = self.num_branches # 2 + num_inchannels = self.num_inchannels + fuse_layers = [] + for i in range(num_branches if self.multi_scale_output else 1): + # i代表枚举所有分支 + fuse_layer = [] + for j in range(num_branches): + # j代表处理的当前分支 + if j > i: # 进行上采样,使用最近邻插值 + fuse_layer.append(nn.Sequential( + nn.Conv2d(num_inchannels[j], + num_inchannels[i], + 1, + 1, + 0, + bias=False), + nn.BatchNorm2d(num_inchannels[i], + momentum=BN_MOMENTUM), + nn.Upsample(scale_factor=2**(j-i), mode='nearest'))) + elif j == i: + # 本层不做处理 + fuse_layer.append(None) + else: + conv3x3s = [] + # 进行strided 3x3 conv下采样,如果跨两层,就使用两次strided 3x3 conv + for k in range(i-j): + if k == i - j - 1: + num_outchannels_conv3x3 = num_inchannels[i] + conv3x3s.append(nn.Sequential( + nn.Conv2d(num_inchannels[j], + num_outchannels_conv3x3, + 3, 2, 1, bias=False), + nn.BatchNorm2d(num_outchannels_conv3x3, + momentum=BN_MOMENTUM))) + else: + num_outchannels_conv3x3 = num_inchannels[j] + conv3x3s.append(nn.Sequential( + nn.Conv2d(num_inchannels[j], + num_outchannels_conv3x3, + 3, 2, 1, bias=False), + nn.BatchNorm2d(num_outchannels_conv3x3, + nn.ReLU(False))) + fuse_layer.append(nn.Sequential(*conv3x3s)) + fuse_layers.append(nn.ModuleList(fuse_layer)) + + return nn.ModuleList(fuse_layers) + + def get_num_inchannels(self): + return self.num_inchannels + + def forward(self, x): + if self.num_branches == 1: + return [self.branches[0](x[0])] + + for i in range(self.num_branches): + x[i]=self.branches[i](x[i]) + + x_fuse=[] + for i in range(len(self.fuse_layers)): + y=x[0] if i == 0 else self.fuse_layers[i][0](x[0]) + for j in range(1, self.num_branches): + if i == j: + y=y + x[j] + else: + y=y + self.fuse_layers[i][j](x[j]) + x_fuse.append(self.relu(y)) + + # 将fuse以后的多个分支结果保存到list中 + return x_fuse + + +blocks_dict={ + 'BASIC': BasicBlock, + 'BOTTLENECK': Bottleneck +} + + +class HighResolutionNet(nn.Module): + + def __init__(self, cfg, **kwargs): + super(HighResolutionNet, self).__init__() + + self.conv1=nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, + bias=False) + self.bn1=nn.BatchNorm2d(64, momentum=BN_MOMENTUM) + self.conv2=nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, + bias=False) + self.bn2=nn.BatchNorm2d(64, momentum=BN_MOMENTUM) + self.relu=nn.ReLU(inplace=True) + + self.stage1_cfg=cfg['MODEL']['EXTRA']['STAGE1'] + num_channels=self.stage1_cfg['NUM_CHANNELS'][0] + block=blocks_dict[self.stage1_cfg['BLOCK']] + num_blocks=self.stage1_cfg['NUM_BLOCKS'][0] + + self.layer1=self._make_layer(block, 64, num_channels, num_blocks) + stage1_out_channel=block.expansion*num_channels + + self.stage2_cfg=cfg['MODEL']['EXTRA']['STAGE2'] + num_channels=self.stage2_cfg['NUM_CHANNELS'] + block=blocks_dict[self.stage2_cfg['BLOCK']] + num_channels=[ + num_channels[i] * block.expansion for i in range(len(num_channels))] + self.transition1=self._make_transition_layer( + [stage1_out_channel], num_channels) + self.stage2, pre_stage_channels=self._make_stage( + self.stage2_cfg, num_channels) + + self.stage3_cfg=cfg['MODEL']['EXTRA']['STAGE3'] + num_channels=self.stage3_cfg['NUM_CHANNELS'] + block=blocks_dict[self.stage3_cfg['BLOCK']] + num_channels=[ + num_channels[i] * block.expansion for i in range(len(num_channels))] + self.transition2=self._make_transition_layer( + pre_stage_channels, num_channels) + self.stage3, pre_stage_channels=self._make_stage( + self.stage3_cfg, num_channels) + + self.stage4_cfg=cfg['MODEL']['EXTRA']['STAGE4'] + num_channels=self.stage4_cfg['NUM_CHANNELS'] + block=blocks_dict[self.stage4_cfg['BLOCK']] + num_channels=[ + num_channels[i] * block.expansion for i in range(len(num_channels))] + self.transition3=self._make_transition_layer( + pre_stage_channels, num_channels) + self.stage4, pre_stage_channels=self._make_stage( + self.stage4_cfg, num_channels, multi_scale_output=True) + + # Classification Head + self.incre_modules, self.downsamp_modules, \ + self.final_layer=self._make_head(pre_stage_channels) + + self.classifier=nn.Linear(2048, 1000) + + def _make_head(self, pre_stage_channels): + head_block=Bottleneck + head_channels=[32, 64, 128, 256] + + # Increasing the #channels on each resolution + # from C, 2C, 4C, 8C to 128, 256, 512, 1024 + incre_modules=[] + for i, channels in enumerate(pre_stage_channels): + incre_module=self._make_layer(head_block, + channels, + head_channels[i], + 1, + stride=1) + incre_modules.append(incre_module) + incre_modules=nn.ModuleList(incre_modules) + + # downsampling modules + downsamp_modules=[] + for i in range(len(pre_stage_channels)-1): + in_channels=head_channels[i] * head_block.expansion + out_channels=head_channels[i+1] * head_block.expansion + + downsamp_module=nn.Sequential( + nn.Conv2d(in_channels=in_channels, + out_channels=out_channels, + kernel_size=3, + stride=2, + padding=1), + nn.BatchNorm2d(out_channels, momentum=BN_MOMENTUM), + nn.ReLU(inplace=True) + ) + + downsamp_modules.append(downsamp_module) + downsamp_modules=nn.ModuleList(downsamp_modules) + + final_layer=nn.Sequential( + nn.Conv2d( + in_channels=head_channels[3] * head_block.expansion, + out_channels=2048, + kernel_size=1, + stride=1, + padding=0 + ), + nn.BatchNorm2d(2048, momentum=BN_MOMENTUM), + nn.ReLU(inplace=True) + ) + + return incre_modules, downsamp_modules, final_layer + + def _make_transition_layer( + self, num_channels_pre_layer, num_channels_cur_layer): + num_branches_cur=len(num_channels_cur_layer) + num_branches_pre=len(num_channels_pre_layer) + + transition_layers=[] + for i in range(num_branches_cur): + if i < num_branches_pre: + if num_channels_cur_layer[i] != num_channels_pre_layer[i]: + transition_layers.append(nn.Sequential( + nn.Conv2d(num_channels_pre_layer[i], + num_channels_cur_layer[i], + 3, + 1, + 1, + bias=False), + nn.BatchNorm2d( + num_channels_cur_layer[i], momentum=BN_MOMENTUM), + nn.ReLU(inplace=True))) + else: + transition_layers.append(None) + else: + conv3x3s=[] + for j in range(i+1-num_branches_pre): + inchannels=num_channels_pre_layer[-1] + outchannels=num_channels_cur_layer[i] \ + if j == i-num_branches_pre else inchannels + conv3x3s.append(nn.Sequential( + nn.Conv2d( + inchannels, outchannels, 3, 2, 1, bias=False), + nn.BatchNorm2d(outchannels, momentum=BN_MOMENTUM), + nn.ReLU(inplace=True))) + transition_layers.append(nn.Sequential(*conv3x3s)) + + return nn.ModuleList(transition_layers) + + def _make_layer(self, block, inplanes, planes, blocks, stride=1): + downsample=None + if stride != 1 or inplanes != planes * block.expansion: + downsample=nn.Sequential( + nn.Conv2d(inplanes, planes * block.expansion, + kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM), + ) + + layers=[] + layers.append(block(inplanes, planes, stride, downsample)) + inplanes=planes * block.expansion + for i in range(1, blocks): + layers.append(block(inplanes, planes)) + + return nn.Sequential(*layers) + + def _make_stage(self, layer_config, num_inchannels, + multi_scale_output=True): + num_modules=layer_config['NUM_MODULES'] + num_branches=layer_config['NUM_BRANCHES'] + num_blocks=layer_config['NUM_BLOCKS'] + num_channels=layer_config['NUM_CHANNELS'] + block=blocks_dict[layer_config['BLOCK']] + fuse_method=layer_config['FUSE_METHOD'] + + modules=[] + for i in range(num_modules): + # multi_scale_output is only used last module + if not multi_scale_output and i == num_modules - 1: + reset_multi_scale_output=False + else: + reset_multi_scale_output=True + + modules.append( + # 调用高低分辨率交互模块, stage2 为例 + HighResolutionModule(num_branches, # 2 + block, # 'BASIC' + num_blocks, # [4, 4] + num_inchannels, # 上个stage的out channel + num_channels, # [32, 64] + fuse_method, # SUM + reset_multi_scale_output) + ) + num_inchannels=modules[-1].get_num_inchannels() + + return nn.Sequential(*modules), num_inchannels + + def forward(self, x): + + # 使用两个strided 3x3conv进行快速降维 + x=self.relu(self.bn1(self.conv1(x))) + x=self.relu(self.bn2(self.conv2(x))) + + # 构建了一串BasicBlock构成的模块 + x=self.layer1(x) + + # 然后是多个stage,每个stage核心是调用HighResolutionModule模块 + x_list=[] + for i in range(self.stage2_cfg['NUM_BRANCHES']): + if self.transition1[i] is not None: + x_list.append(self.transition1[i](x)) + else: + x_list.append(x) + y_list=self.stage2(x_list) + + x_list=[] + for i in range(self.stage3_cfg['NUM_BRANCHES']): + if self.transition2[i] is not None: + x_list.append(self.transition2[i](y_list[-1])) + else: + x_list.append(y_list[i]) + y_list=self.stage3(x_list) + + x_list=[] + for i in range(self.stage4_cfg['NUM_BRANCHES']): + if self.transition3[i] is not None: + x_list.append(self.transition3[i](y_list[-1])) + else: + x_list.append(y_list[i]) + y_list=self.stage4(x_list) + + # 添加分类头,上文中有显示,在分类问题中添加这种头 + # 在其他问题中换用不同的头 + y=self.incre_modules[0](y_list[0]) + for i in range(len(self.downsamp_modules)): + y=self.incre_modules[i+1](y_list[i+1]) + \ + self.downsamp_modules[i](y) + + y=self.final_layer(y) + + if torch._C._get_tracing_state(): + # 在不写C代码的情况下执行forward,直接用python版本 + y=y.flatten(start_dim=2).mean(dim=2) + else: + y=F.avg_pool2d(y, kernel_size=y.size() + [2:]).view(y.size(0), -1) + + y=self.classifier(y) + + return y + + def init_weights(self, pretrained='',): + logger.info('=> init weights from normal distribution') + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_( + m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + if os.path.isfile(pretrained): + pretrained_dict=torch.load(pretrained) + logger.info('=> loading pretrained model {}'.format(pretrained)) + model_dict=self.state_dict() + pretrained_dict={k: v for k, v in pretrained_dict.items() + if k in model_dict.keys()} + for k, _ in pretrained_dict.items(): + logger.info( + '=> loading {} pretrained model {}'.format(k, pretrained)) + model_dict.update(pretrained_dict) + self.load_state_dict(model_dict) + + +def get_cls_net(config, **kwargs): + model=HighResolutionNet(config, **kwargs) + model.init_weights() + return model diff --git a/modelR/plugandplay/attentions/Non-local/LICENSE b/modelR/plugandplay/attentions/Non-local/LICENSE new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/modelR/plugandplay/attentions/Non-local/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/modelR/plugandplay/attentions/Non-local/Non-Local_pytorch_0.3.1/demo_MNIST.py b/modelR/plugandplay/attentions/Non-local/Non-Local_pytorch_0.3.1/demo_MNIST.py new file mode 100644 index 0000000..cd29e4f --- /dev/null +++ b/modelR/plugandplay/attentions/Non-local/Non-Local_pytorch_0.3.1/demo_MNIST.py @@ -0,0 +1,83 @@ +import torch +import torch.utils.data as Data +import torchvision +from lib.network import Network +from torch.autograd import Variable +from torch import nn +import time + + +def calc_acc(x, y): + x = torch.max(x, dim=-1)[1] + accuracy = sum(x == y) / x.size(0) + return accuracy + + +train_data = torchvision.datasets.MNIST(root='./mnist', train=True, + transform=torchvision.transforms.ToTensor(), + download=True) +test_data = torchvision.datasets.MNIST(root='./mnist/', + transform=torchvision.transforms.ToTensor(), + train=False) + +train_loader = Data.DataLoader(dataset=train_data, batch_size=128, shuffle=True) +test_loader = Data.DataLoader(dataset=test_data, batch_size=128, shuffle=False) + +train_batch_num = len(train_loader) +test_batch_num = len(test_loader) + +net = Network() +if torch.cuda.is_available(): + net = nn.DataParallel(net) + net.cuda() + +opt = torch.optim.Adam(net.parameters(), lr=0.001) +loss_func = nn.CrossEntropyLoss() + + +for epoch_index in range(20): + st = time.time() + for train_batch_index, (img_batch, label_batch) in enumerate(train_loader): + img_batch = Variable(img_batch) + label_batch = Variable(label_batch) + + if torch.cuda.is_available(): + img_batch = img_batch.cuda() + label_batch = label_batch.cuda() + + predict = net(img_batch) + acc = calc_acc(predict.cpu().data, label_batch.cpu().data) + loss = loss_func(predict, label_batch) + + net.zero_grad() + loss.backward() + opt.step() + + print('(LR:%f) Time of a epoch:%.4fs' % (opt.param_groups[0]['lr'], time.time()-st)) + + net.eval() + total_loss = 0 + total_acc = 0 + + for test_batch_index, (img_batch, label_batch) in enumerate(test_loader): + img_batch = Variable(img_batch, volatile=True) + label_batch = Variable(label_batch, volatile=True) + + if torch.cuda.is_available(): + img_batch = img_batch.cuda() + label_batch = label_batch.cuda() + + predict = net(img_batch) + acc = calc_acc(predict.cpu().data, label_batch.cpu().data) + loss = loss_func(predict, label_batch) + + total_loss += loss + total_acc += acc + + net.train() + + mean_acc = total_acc / test_batch_num + mean_loss = total_loss / test_batch_num + + print('[Test] epoch[%d/%d] acc:%.4f loss:%.4f\n' + % (epoch_index, 100, mean_acc, mean_loss.data[0])) diff --git a/modelR/plugandplay/attentions/Non-local/Non-Local_pytorch_0.4.1_to_1.1.0/demo_MNIST.py b/modelR/plugandplay/attentions/Non-local/Non-Local_pytorch_0.4.1_to_1.1.0/demo_MNIST.py new file mode 100644 index 0000000..60128d0 --- /dev/null +++ b/modelR/plugandplay/attentions/Non-local/Non-Local_pytorch_0.4.1_to_1.1.0/demo_MNIST.py @@ -0,0 +1,83 @@ +import torch +import torch.utils.data as Data +import torchvision +from lib.network import Network +from torch import nn +import time + + +# def calc_acc(x, y): +# x = torch.max(x, dim=-1)[1] +# accuracy = sum(x == y) / x.size(0) +# return accuracy + + +train_data = torchvision.datasets.MNIST(root='./mnist', train=True, + transform=torchvision.transforms.ToTensor(), + download=True) +test_data = torchvision.datasets.MNIST(root='./mnist/', + transform=torchvision.transforms.ToTensor(), + train=False) + +train_loader = Data.DataLoader(dataset=train_data, batch_size=128, shuffle=True) +test_loader = Data.DataLoader(dataset=test_data, batch_size=128, shuffle=False) + +train_batch_num = len(train_loader) +test_batch_num = len(test_loader) + +net = Network() +if torch.cuda.is_available(): + net = nn.DataParallel(net) + net.cuda() + +opt = torch.optim.Adam(net.parameters(), lr=0.001) +loss_func = nn.CrossEntropyLoss() + +for epoch_index in range(20): + st = time.time() + + torch.set_grad_enabled(True) + net.train() + for train_batch_index, (img_batch, label_batch) in enumerate(train_loader): + if torch.cuda.is_available(): + img_batch = img_batch.cuda() + label_batch = label_batch.cuda() + + predict = net(img_batch) + # acc = calc_acc(predict.cpu().data, label_batch.cpu().data) + loss = loss_func(predict, label_batch) + + net.zero_grad() + loss.backward() + opt.step() + + print('(LR:%f) Time of a epoch:%.4fs' % (opt.param_groups[0]['lr'], time.time()-st)) + + torch.set_grad_enabled(False) + net.eval() + total_loss = [] + total_acc = 0 + total_sample = 0 + + for test_batch_index, (img_batch, label_batch) in enumerate(test_loader): + if torch.cuda.is_available(): + img_batch = img_batch.cuda() + label_batch = label_batch.cuda() + + predict = net(img_batch) + loss = loss_func(predict, label_batch) + + predict = predict.argmax(dim=1) + acc = (predict == label_batch).sum() + + total_loss.append(loss) + total_acc += acc + total_sample += img_batch.size(0) + + net.train() + + mean_acc = total_acc.item() * 1.0 / total_sample + mean_loss = sum(total_loss) / total_loss.__len__() + + print('[Test] epoch[%d/%d] acc:%.4f%% loss:%.4f\n' + % (epoch_index, 100, mean_acc * 100, mean_loss.item())) diff --git a/modelR/plugandplay/attentions/Non-local/README.md b/modelR/plugandplay/attentions/Non-local/README.md new file mode 100644 index 0000000..6eda6bc --- /dev/null +++ b/modelR/plugandplay/attentions/Non-local/README.md @@ -0,0 +1,79 @@ +# Non-local_pytorch +- Implementation of [**Non-local Neural Block**](https://arxiv.org/abs/1711.07971). + +## Statement +- You can find different kinds of non-local block in **lib/**. + +- You can **visualize** the Non_local Attention Map by following the **Running Steps** shown below. + +- The code is tested on MNIST dataset. You can select the type of non-local block in **lib/network.py**. + +- If there is something wrong in my code, please contact me, thanks! + +## Environment +- python 3.7.3 +- pytorch 1.2.0 +- opencv 3.4.2 + +## Visualization +1. In the **first** Non-local Layer. + +![](nl_map_vis/nl_map_1/37.png) ![](nl_map_vis/nl_map_1/44.png) ![](nl_map_vis/nl_map_1/46.png) ![](nl_map_vis/nl_map_1/110.png) ![](nl_map_vis/nl_map_1/161.png) + +2. In the **second** Non-local Layer. + +![](nl_map_vis/nl_map_2/1.png) ![](nl_map_vis/nl_map_2/8.png) ![](nl_map_vis/nl_map_2/10.png) ![](nl_map_vis/nl_map_2/18.png) ![](nl_map_vis/nl_map_2/38.png) + + +## Running Steps +1. Select the type of non-local block in **lib/network.py**. + ``` + from lib.non_local_concatenation import NONLocalBlock2D + from lib.non_local_gaussian import NONLocalBlock2D + from lib.non_local_embedded_gaussian import NONLocalBlock2D + from lib.non_local_dot_product import NONLocalBlock2D +2. Run **demo_MNIST_train.py** with one GPU or multi GPU to train the Network. Then the weights will be save in **weights/**. + ``` + CUDA_VISIBLE_DEVICES=0,1 python demo_MNIST.py + +3. Run **nl_map_save.py** to save NL_MAP of one test sample in **nl_map_vis**. + ``` + CUDA_VISIBLE_DEVICES=0,1 python nl_map_save.py + +4. Come into **nl_map_vis/** and run **nl_map_vis.py** to visualize the NL_MAP. (tips: if the Non-local type you select is **non_local_concatenation** or **non_local_dot_product** (without Softmax operation), you may need to normalize NL_MAP in the visualize code) + ``` + python nl_map_save.py + +## Update Records +1. Figure out how to implement the **concatenation** type, and add the code to **lib/**. + +2. Fix the bug in **lib/non_local.py** (old version) when using multi-gpu. Someone shares the +reason with me, and you can find it in [here](https://github.com/pytorch/pytorch/issues/8637). + +3. Fix the error of 3D pooling in **lib/non_local.py** (old version). Appreciate +[**protein27**](https://github.com/AlexHex7/Non-local_pytorch/issues/17) for pointing it out. + +4. For convenience, I split the **lib/non_local.py** into four python files, and move the +old versions (**lib/non_loca.py** and **lib/non_local_simple_version.py**) into +**lib/backup/**. + +5. Modify the code to support pytorch 0.4.1, and move the code supporting pytorch 0.3.1 \ +to **Non-Local_pytorch_0.3.1/**. + +6. Test the code with pytorch 1.1.0 and it works. + +7. Move the code supporting pytorch 0.4.1 and 1.1.0 to **Non-Local_pytorch_0.4.1_to_1.1.0/** (In fact, I think it can also support pytorch 1.2.0). + +8. In order to visualize NL_MAP, some code have been slightly modified. The code **nl_map_save.py** is added to save NL_MAP (two Non-local Layer) of one test sample. The code **Non-local_pytorch/nl_map_vis.py** is added to visualize NL_MAP. Besieds, the code is support pytorch 1.2.0. + + +## Todo +- Experiments on Charades dataset. +- Experiments on COCO dataset. + + +## Related Repositories +1. [**Non-local ResNet-50 TSM**](https://github.com/MIT-HAN-LAB/temporal-shift-module) +([**Paper**](https://arxiv.org/abs/1811.08383)) on Kinetics dataset. They report that their model achieves a good performance +of **75.6% on Kinetics**, which is even higher than Non-local ResNet-50 I3D +([**Here**](https://github.com/AlexHex7/Non-local_pytorch/issues/23)). \ No newline at end of file diff --git a/modelR/plugandplay/attentions/Non-local/demo_MNIST_train.py b/modelR/plugandplay/attentions/Non-local/demo_MNIST_train.py new file mode 100644 index 0000000..1af26f0 --- /dev/null +++ b/modelR/plugandplay/attentions/Non-local/demo_MNIST_train.py @@ -0,0 +1,81 @@ +import torch +import torch.utils.data as Data +import torchvision +from lib.network import Network +from torch import nn +import time + + +train_data = torchvision.datasets.MNIST(root='./mnist', train=True, + transform=torchvision.transforms.ToTensor(), + download=True) +test_data = torchvision.datasets.MNIST(root='./mnist/', + transform=torchvision.transforms.ToTensor(), + train=False) + +train_loader = Data.DataLoader(dataset=train_data, batch_size=128, shuffle=True) +test_loader = Data.DataLoader(dataset=test_data, batch_size=128, shuffle=False) + +train_batch_num = len(train_loader) +test_batch_num = len(test_loader) + +net = Network() +if torch.cuda.is_available(): + net = nn.DataParallel(net) + net.cuda() + +opt = torch.optim.Adam(net.parameters(), lr=0.001) +loss_func = nn.CrossEntropyLoss() + +for epoch_index in range(10): + st = time.time() + + torch.set_grad_enabled(True) + net.train() + for train_batch_index, (img_batch, label_batch) in enumerate(train_loader): + if torch.cuda.is_available(): + img_batch = img_batch.cuda() + label_batch = label_batch.cuda() + + predict = net(img_batch) + loss = loss_func(predict, label_batch) + + net.zero_grad() + loss.backward() + opt.step() + + print('(LR:%f) Time of a epoch:%.4fs' % (opt.param_groups[0]['lr'], time.time()-st)) + + torch.set_grad_enabled(False) + net.eval() + total_loss = [] + total_acc = 0 + total_sample = 0 + + for test_batch_index, (img_batch, label_batch) in enumerate(test_loader): + if torch.cuda.is_available(): + img_batch = img_batch.cuda() + label_batch = label_batch.cuda() + + predict = net(img_batch) + loss = loss_func(predict, label_batch) + + predict = predict.argmax(dim=1) + acc = (predict == label_batch).sum() + + total_loss.append(loss) + total_acc += acc + total_sample += img_batch.size(0) + + net.train() + + mean_acc = total_acc.item() * 1.0 / total_sample + mean_loss = sum(total_loss) / total_loss.__len__() + + print('[Test] epoch[%d/%d] acc:%.4f%% loss:%.4f\n' + % (epoch_index, 10, mean_acc * 100, mean_loss.item())) + +# weight_path = 'weights/net.pth' +# print('Save Net weights to', weight_path) +# net.cpu() +# torch.save(net.state_dict(), weight_path) diff --git a/modelR/plugandplay/attentions/Non-local/nl_map_save.py b/modelR/plugandplay/attentions/Non-local/nl_map_save.py new file mode 100644 index 0000000..85d1306 --- /dev/null +++ b/modelR/plugandplay/attentions/Non-local/nl_map_save.py @@ -0,0 +1,39 @@ +import torch +import torch.utils.data as Data +import torchvision +from lib.network import Network +from torch import nn +import numpy as np + + +test_data = torchvision.datasets.MNIST(root='./mnist/', + transform=torchvision.transforms.ToTensor(), + train=False) + +test_loader = iter(Data.DataLoader(dataset=test_data, batch_size=1, shuffle=False)) + +net = Network() +if torch.cuda.is_available(): + net = nn.DataParallel(net) + net.cuda() + +net.load_state_dict(torch.load('weights/net.pth')) + + +img_batch, label_batch = test_loader.__next__() +img_batch = img_batch.cuda() +label_batch = label_batch.cuda() + +torch.set_grad_enabled(False) +net.eval() + +_, nl_mep_list = net.module.forward_with_nl_map(img_batch) + +# (b, h1*w1, h2*w2) +nl_map_1 = nl_mep_list[0].cpu().numpy() +nl_map_2 = nl_mep_list[1].cpu().numpy() + +img = torchvision.transforms.ToPILImage()(img_batch.cpu()[0]) +img.save('nl_map_vis/sample.png') +np.save('nl_map_vis/nl_map_1', nl_map_1) +np.save('nl_map_vis/nl_map_2', nl_map_2) diff --git a/modelR/plugandplay/attentions/Non-local/nl_map_vis/nl_map_vis.py b/modelR/plugandplay/attentions/Non-local/nl_map_vis/nl_map_vis.py new file mode 100644 index 0000000..ff14370 --- /dev/null +++ b/modelR/plugandplay/attentions/Non-local/nl_map_vis/nl_map_vis.py @@ -0,0 +1,53 @@ +""" +(tips: if the Non-local type you select is **non_local_concatenation** +or **non_local_dot_product** (without Softmax operation), +you may need to normalize NL_MAP in the visualize code) +""" +import numpy as np +import cv2 +import math +import os + + +def vis_nl_map(img_path, nl_map_path, vis_size=(56, 56)): + dst_dir = nl_map_path.split('.')[0] + if not os.path.exists(dst_dir): + os.mkdir(dst_dir) + + img = cv2.imread(img_path, 1) + img = cv2.resize(img, dsize=vis_size) + h, w, c = img.shape + + nl_map_1 = np.load(nl_map_path)[0] + + total_region, nl_map_length = nl_map_1.shape + region_per_row = round(math.sqrt(total_region)) + size_of_region = round(w / region_per_row) + + nl_map_size = round(math.sqrt(nl_map_length)) + + for index in range(total_region): + img_draw = img.copy() + nl_map = nl_map_1[index] + nl_map = nl_map.reshape(nl_map_size, nl_map_size) + nl_map = cv2.resize(nl_map, dsize=(h, w)) + + nl_map = np.uint8(nl_map * 255) + + heat_img = cv2.applyColorMap(nl_map, cv2.COLORMAP_JET) + heat_img = cv2.cvtColor(heat_img, cv2.COLOR_BGR2RGB) + img_add = cv2.addWeighted(img_draw, 0.3, heat_img, 0.7, 0) + + x0 = index // region_per_row * size_of_region + x1 = x0 + size_of_region + + y0 = index % region_per_row * size_of_region + y1 = y0 + size_of_region + + cv2.rectangle(img_add, (y0, x0), (y1, x1), (255, 0, 0), 1) + cv2.imwrite('%s/%d.png' % (dst_dir, index), cv2.cvtColor(img_add, cv2.COLOR_BGR2RGB)) + + +if __name__ == '__main__': + vis_nl_map(img_path='sample.png', nl_map_path='nl_map_1.npy', vis_size=(56, 56)) + vis_nl_map(img_path='sample.png', nl_map_path='nl_map_2.npy', vis_size=(56, 56)) diff --git a/modelR/plugandplay/attentions/ObjectContextualRepresentation/OCRModule.py b/modelR/plugandplay/attentions/ObjectContextualRepresentation/OCRModule.py new file mode 100644 index 0000000..2134257 --- /dev/null +++ b/modelR/plugandplay/attentions/ObjectContextualRepresentation/OCRModule.py @@ -0,0 +1,176 @@ +# source: https://github.com/IndigoAI/SemanticSegmentation/blob/master/HRNet/OCR.py +import torch +import torch.nn as nn +import torch._utils +import torch.nn.functional as F +from OCRForClothes.HRNet.batchnorm import SynchronizedBatchNorm2d +BatchNorm2d = SynchronizedBatchNorm2d + +import warnings +warnings.filterwarnings("ignore") + + +class ModuleHelper: + + @staticmethod + def BNReLU(num_features, bn_type=None, **kwargs): + return nn.Sequential( + BatchNorm2d(num_features, **kwargs), + nn.ReLU() + ) + + @staticmethod + def BatchNorm2d(*args, **kwargs): + return BatchNorm2d + +def conv3x3(in_planes, out_planes, stride=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=1, bias=False) + + +class SpatialGather_Module(nn.Module): + """ + Aggregate the context features according to the initial + predicted probability distribution. + Employ the soft-weighted method to aggregate the context. + """ + def __init__(self, cls_num=0, scale=1): + super(SpatialGather_Module, self).__init__() + self.cls_num = cls_num + self.scale = scale + + def forward(self, feats, probs): + # feats - blue cube + batch_size, c, h, w = probs.size(0), probs.size(1), probs.size(2), probs.size(3) + probs = probs.view(batch_size, c, -1) + feats = feats.view(batch_size, feats.size(1), -1) + feats = feats.permute(0, 2, 1) # batch x hw x c + probs = F.softmax(self.scale * probs, dim=2)# batch x k x hw + # probs - m~ + ocr_context = torch.matmul(probs, feats)\ + .permute(0, 2, 1).unsqueeze(3)# batch x k x c + # ocr_context - f_proxy + return ocr_context + + +class _ObjectAttentionBlock(nn.Module): + ''' + The basic implementation for object context block + Input: + N X C X H X W + Parameters: + in_channels : the dimension of the input feature map + key_channels : the dimension after the key/query transform + scale : choose the scale to downsample the input feature maps (save memory cost) + bn_type : specify the bn type + Return: + N X C X H X W + ''' + def __init__(self, + in_channels, + key_channels, + scale=1, + bn_type=None): + super(_ObjectAttentionBlock, self).__init__() + self.scale = scale + self.in_channels = in_channels + self.key_channels = key_channels + self.pool = nn.MaxPool2d(kernel_size=(scale, scale)) + self.f_pixel = nn.Sequential( + nn.Conv2d(in_channels=self.in_channels, out_channels=self.key_channels, + kernel_size=1, stride=1, padding=0, bias=False), + ModuleHelper.BNReLU(self.key_channels, bn_type=bn_type), + nn.Conv2d(in_channels=self.key_channels, out_channels=self.key_channels, + kernel_size=1, stride=1, padding=0, bias=False), + ModuleHelper.BNReLU(self.key_channels, bn_type=bn_type), + ) + self.f_object = nn.Sequential( + nn.Conv2d(in_channels=self.in_channels, out_channels=self.key_channels, + kernel_size=1, stride=1, padding=0, bias=False), + ModuleHelper.BNReLU(self.key_channels, bn_type=bn_type), + nn.Conv2d(in_channels=self.key_channels, out_channels=self.key_channels, + kernel_size=1, stride=1, padding=0, bias=False), + ModuleHelper.BNReLU(self.key_channels, bn_type=bn_type), + ) + self.f_down = nn.Sequential( + nn.Conv2d(in_channels=self.in_channels, out_channels=self.key_channels, + kernel_size=1, stride=1, padding=0, bias=False), + ModuleHelper.BNReLU(self.key_channels, bn_type=bn_type), + ) + self.f_up = nn.Sequential( + nn.Conv2d(in_channels=self.key_channels, out_channels=self.in_channels, + kernel_size=1, stride=1, padding=0, bias=False), + ModuleHelper.BNReLU(self.in_channels, bn_type=bn_type), + ) + + def forward(self, x, proxy): + batch_size, h, w = x.size(0), x.size(2), x.size(3) + if self.scale > 1: + x = self.pool(x) + + query = self.f_pixel(x).view(batch_size, self.key_channels, -1) # phi(x) + query = query.permute(0, 2, 1) + key = self.f_object(proxy).view(batch_size, self.key_channels, -1) # psi(f) + value = self.f_down(proxy).view(batch_size, self.key_channels, -1) # delta + value = value.permute(0, 2, 1) + + sim_map = torch.matmul(query, key) + sim_map = (self.key_channels**-.5) * sim_map + sim_map = F.softmax(sim_map, dim=-1) # w + + # add bg context ... + context = torch.matmul(sim_map, value) # sum(w * delta) + context = context.permute(0, 2, 1).contiguous() + context = context.view(batch_size, self.key_channels, *x.size()[2:]) + context = self.f_up(context) # ro + if self.scale > 1: + context = F.interpolate(input=context, size=(h, w), mode='bilinear', align_corners=ALIGN_CORNERS) + + return context + + +class ObjectAttentionBlock2D(_ObjectAttentionBlock): + def __init__(self, + in_channels, + key_channels, + scale=1, + bn_type=None): + super(ObjectAttentionBlock2D, self).__init__(in_channels, + key_channels, + scale, + bn_type=bn_type) + + +class SpatialOCR_Module(nn.Module): + """ + Implementation of the OCR module: + We aggregate the global object representation to update the representation for each pixel. + """ + def __init__(self, + in_channels, + key_channels, + out_channels, + scale=1, + dropout=0.1, + bn_type=None): + super(SpatialOCR_Module, self).__init__() + self.object_context_block = ObjectAttentionBlock2D(in_channels, + key_channels, + scale, + bn_type) + _in_channels = 2 * in_channels + + self.conv_bn_dropout = nn.Sequential( + nn.Conv2d(_in_channels, out_channels, kernel_size=1, padding=0, bias=False), + ModuleHelper.BNReLU(out_channels, bn_type=bn_type), + nn.Dropout2d(dropout) + ) + + + def forward(self, feats, proxy_feats): + context = self.object_context_block(feats, proxy_feats) # ro + + output = self.conv_bn_dropout(torch.cat([context, feats], 1)) # last cube + + return output \ No newline at end of file diff --git a/modelR/plugandplay/attentions/ResNeSt/SplitAttention.py b/modelR/plugandplay/attentions/ResNeSt/SplitAttention.py new file mode 100644 index 0000000..16c6fe9 --- /dev/null +++ b/modelR/plugandplay/attentions/ResNeSt/SplitAttention.py @@ -0,0 +1,90 @@ +import torch +from torch import nn +import torch.nn.functional as F +from torch.nn import Conv2d, Module, Linear, BatchNorm2d, ReLU +from torch.nn.modules.utils import _pair + +__all__ = ['SplAtConv2d'] + +class SplAtConv2d(Module): + """Split-Attention Conv2d + """ + def __init__(self, in_channels, channels, kernel_size, stride=(1, 1), padding=(0, 0), + dilation=(1, 1), groups=1, bias=True, + radix=2, reduction_factor=4, + rectify=False, rectify_avg=False, norm_layer=None, + dropblock_prob=0.0, **kwargs): + super(SplAtConv2d, self).__init__() + padding = _pair(padding) + self.rectify = rectify and (padding[0] > 0 or padding[1] > 0) + self.rectify_avg = rectify_avg + inter_channels = max(in_channels*radix//reduction_factor, 32) + self.radix = radix + self.cardinality = groups + self.channels = channels + self.dropblock_prob = dropblock_prob + if self.rectify: + from rfconv import RFConv2d + self.conv = RFConv2d(in_channels, channels*radix, kernel_size, stride, padding, dilation, + groups=groups*radix, bias=bias, average_mode=rectify_avg, **kwargs) + else: + self.conv = Conv2d(in_channels, channels*radix, kernel_size, stride, padding, dilation, + groups=groups*radix, bias=bias, **kwargs) + self.use_bn = norm_layer is not None + if self.use_bn: + self.bn0 = norm_layer(channels*radix) + self.relu = ReLU(inplace=True) + self.fc1 = Conv2d(channels, inter_channels, 1, groups=self.cardinality) + if self.use_bn: + self.bn1 = norm_layer(inter_channels) + self.fc2 = Conv2d(inter_channels, channels*radix, 1, groups=self.cardinality) + if dropblock_prob > 0.0: + self.dropblock = DropBlock2D(dropblock_prob, 3) + self.rsoftmax = rSoftMax(radix, groups) + + def forward(self, x): + x = self.conv(x) + if self.use_bn: + x = self.bn0(x) + if self.dropblock_prob > 0.0: + x = self.dropblock(x) + x = self.relu(x) + + batch, rchannel = x.shape[:2] + if self.radix > 1: + splited = torch.split(x, rchannel//self.radix, dim=1) + gap = sum(splited) + else: + gap = x + gap = F.adaptive_avg_pool2d(gap, 1) + gap = self.fc1(gap) + + if self.use_bn: + gap = self.bn1(gap) + gap = self.relu(gap) + + atten = self.fc2(gap) + atten = self.rsoftmax(atten).view(batch, -1, 1, 1) + + if self.radix > 1: + attens = torch.split(atten, rchannel//self.radix, dim=1) + out = sum([att*split for (att, split) in zip(attens, splited)]) + else: + out = atten * x + return out.contiguous() + +class rSoftMax(nn.Module): + def __init__(self, radix, cardinality): + super().__init__() + self.radix = radix + self.cardinality = cardinality + + def forward(self, x): + batch = x.size(0) + if self.radix > 1: + x = x.view(batch, self.cardinality, self.radix, -1).transpose(1, 2) + x = F.softmax(x, dim=1) + x = x.reshape(batch, -1) + else: + x = torch.sigmoid(x) + return x \ No newline at end of file diff --git a/modelR/plugandplay/attentions/SE/senet.py b/modelR/plugandplay/attentions/SE/senet.py new file mode 100644 index 0000000..e9b43ed --- /dev/null +++ b/modelR/plugandplay/attentions/SE/senet.py @@ -0,0 +1,18 @@ +import torch.nn as nn + +class SELayer(nn.Module): + def __init__(self, channel, reduction=16): + super(SELayer, self).__init__() + self.avgpool = nn.AdaptiveAvgPool2d(1) + self.fc = nn.Sequential( + nn.Linear(channel, channel//reduction,bias=False), + nn.ReLU(inplace=True), + nn.Linear(channel//reduction,channel, bias=False), + nn.Sigmoid() + ) + + def forward(self, x): + b,c,h,w = x.size() + y = self.avgpool(x).view(b,c) + y = self.fc(y).view(b,c,1,1) + return x * y.expand_as(x) \ No newline at end of file diff --git a/modelR/plugandplay/attentions/SK/sknet.py b/modelR/plugandplay/attentions/SK/sknet.py new file mode 100644 index 0000000..86538a9 --- /dev/null +++ b/modelR/plugandplay/attentions/SK/sknet.py @@ -0,0 +1,68 @@ +import torch.nn as nn +import torch + +class SKConv(nn.Module): + def __init__(self, features, WH, M, G, r, stride=1, L=32): + """ Constructor + Args: + features: input channel dimensionality. + WH: input spatial dimensionality, used for GAP kernel size. + M: the number of branchs. + G: num of convolution groups. + r: the radio for compute d, the length of z. + stride: stride, default 1. + L: the minimum dim of the vector z in paper, default 32. + """ + super(SKConv, self).__init__() + d = max(int(features / r), L) + self.M = M + self.features = features + self.convs = nn.ModuleList([]) + for i in range(M): + self.convs.append( + nn.Sequential( + nn.Conv2d(features, + features, + kernel_size=3 + i * 2, + stride=stride, + padding=1 + i, + groups=G), nn.BatchNorm2d(features), + nn.ReLU(inplace=False))) + # self.gap = nn.AvgPool2d(int(WH/stride)) + print("D:", d) + self.fc = nn.Linear(features, d) + self.fcs = nn.ModuleList([]) + for i in range(M): + self.fcs.append(nn.Linear(d, features)) + self.softmax = nn.Softmax(dim=1) + + def forward(self, x): + for i, conv in enumerate(self.convs): + fea = conv(x).unsqueeze_(dim=1) + if i == 0: + feas = fea + else: + feas = torch.cat([feas, fea], dim=1) + fea_U = torch.sum(feas, dim=1) + # fea_s = self.gap(fea_U).squeeze_() + fea_s = fea_U.mean(-1).mean(-1) + fea_z = self.fc(fea_s) + for i, fc in enumerate(self.fcs): + print(i, fea_z.shape) + vector = fc(fea_z).unsqueeze_(dim=1) + print(i, vector.shape) + if i == 0: + attention_vectors = vector + else: + attention_vectors = torch.cat([attention_vectors, vector], + dim=1) + attention_vectors = self.softmax(attention_vectors) + attention_vectors = attention_vectors.unsqueeze(-1).unsqueeze(-1) + fea_v = (feas * attention_vectors).sum(dim=1) + return fea_v + +if __name__ == "__main__": + t = torch.ones((32, 256, 24,24)) + sk = SKConv(256,WH=1,M=2,G=1,r=2) + out = sk(t) + print(out.shape) \ No newline at end of file diff --git a/modelR/plugandplay/attentions/scSE/cSE.py b/modelR/plugandplay/attentions/scSE/cSE.py new file mode 100644 index 0000000..d54ec7a --- /dev/null +++ b/modelR/plugandplay/attentions/scSE/cSE.py @@ -0,0 +1,34 @@ +import torch +import torch.nn as nn + + +class cSE(nn.Module): + def __init__(self, in_channels): + super().__init__() + self.avgpool = nn.AdaptiveAvgPool2d(1) + self.Conv_Squeeze = nn.Conv2d(in_channels, + in_channels // 2, + kernel_size=1, + bias=False) + self.Conv_Excitation = nn.Conv2d(in_channels // 2, + in_channels, + kernel_size=1, + bias=False) + self.norm = nn.Sigmoid() + + def forward(self, U): + z = self.avgpool(U) # shape: [bs, c, h, w] to [bs, c, 1, 1] + z = self.Conv_Squeeze(z) # shape: [bs, c/2, 1, 1] + z = self.Conv_Excitation(z) # shape: [bs, c, 1, 1] + z = self.norm(z) + return U * z.expand_as(U) + + +if __name__ == "__main__": + bs, c, h, w = 10, 3, 64, 64 + in_tensor = torch.ones(bs, c, h, w) + + c_se = cSE(c) + print("in shape:", in_tensor.shape) + out_tensor = c_se(in_tensor) + print("out shape:", out_tensor.shape) diff --git a/modelR/plugandplay/attentions/scSE/sSE.py b/modelR/plugandplay/attentions/scSE/sSE.py new file mode 100644 index 0000000..17a21f2 --- /dev/null +++ b/modelR/plugandplay/attentions/scSE/sSE.py @@ -0,0 +1,24 @@ +import torch +import torch.nn as nn + + +class sSE(nn.Module): + def __init__(self, in_channels): + super().__init__() + self.Conv1x1 = nn.Conv2d(in_channels, 1, kernel_size=1, bias=False) + self.norm = nn.Sigmoid() + + def forward(self, U): + q = self.Conv1x1(U) # U:[bs,c,h,w] to q:[bs,1,h,w] + q = self.norm(q) + return U * q # 广播机制 + + +if __name__ == "__main__": + bs, c, h, w = 10, 3, 64, 64 + in_tensor = torch.ones(bs, c, h, w) + + s_se = sSE(c) + print("in shape:", in_tensor.shape) + out_tensor = s_se(in_tensor) + print("out shape:", out_tensor.shape) diff --git a/modelR/plugandplay/attentions/scSE/scSE.py b/modelR/plugandplay/attentions/scSE/scSE.py new file mode 100644 index 0000000..b6cedd3 --- /dev/null +++ b/modelR/plugandplay/attentions/scSE/scSE.py @@ -0,0 +1,49 @@ +import torch +import torch.nn as nn + + +class sSE(nn.Module): + def __init__(self, in_channels): + super().__init__() + self.Conv1x1 = nn.Conv2d(in_channels, 1, kernel_size=1, bias=False) + self.norm = nn.Sigmoid() + + def forward(self, U): + q = self.Conv1x1(U) # U:[bs,c,h,w] to q:[bs,1,h,w] + q = self.norm(q) + return U * q # 广播机制 + +class cSE(nn.Module): + def __init__(self, in_channels): + super().__init__() + self.avgpool = nn.AdaptiveAvgPool2d(1) + self.Conv_Squeeze = nn.Conv2d(in_channels, in_channels // 2, kernel_size=1, bias=False) + self.Conv_Excitation = nn.Conv2d(in_channels//2, in_channels, kernel_size=1, bias=False) + self.norm = nn.Sigmoid() + + def forward(self, U): + z = self.avgpool(U)# shape: [bs, c, h, w] to [bs, c, 1, 1] + z = self.Conv_Squeeze(z) # shape: [bs, c/2] + z = self.Conv_Excitation(z) # shape: [bs, c] + z = self.norm(z) + return U * z.expand_as(U) + +class csSE(nn.Module): + def __init__(self, in_channels): + super().__init__() + self.cSE = cSE(in_channels) + self.sSE = sSE(in_channels) + + def forward(self, U): + U_sse = self.sSE(U) + U_cse = self.cSE(U) + return U_cse+U_sse + +if __name__ == "__main__": + bs, c, h, w = 10, 3, 64, 64 + in_tensor = torch.ones(bs, c, h, w) + + cs_se = csSE(c) + print("in shape:",in_tensor.shape) + out_tensor = cs_se(in_tensor) + print("out shape:", out_tensor.shape) diff --git a/modelR/plugandplay/resnet_se_dca.py b/modelR/plugandplay/resnet_se_dca.py new file mode 100644 index 0000000..5c4841f --- /dev/null +++ b/modelR/plugandplay/resnet_se_dca.py @@ -0,0 +1,261 @@ +# source: https://github.com/eccv2020-4574/DCANet/blob/master/models/resnet/resnet_se_dca.py +# arxiv: https://arxiv.org/abs/2007.05099 + +import torch.nn as nn +import torch.utils.model_zoo as model_zoo +from torch.nn.parameter import Parameter +import torch +import torch.nn.functional as F +from torch.nn import init +from torch.autograd import Variable +from collections import OrderedDict +import math + + +__all__ = ['dca_se_resnet18', 'dca_se_resnet34', 'dca_se_resnet50', 'dca_se_resnet101', 'dca_se_resnet152'] + +class CSELayer(nn.Module): + def __init__(self,in_channel, channel, reduction = 16): + super(CSELayer, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.fc = nn.Sequential( + nn.Linear(channel, channel // reduction), + nn.ReLU(inplace = True), + nn.Linear(channel // reduction, channel), + nn.Sigmoid() + ) + if in_channel != channel: + self.att_fc = nn.Sequential( + nn.Linear(in_channel, channel), + nn.LayerNorm(channel), + nn.ReLU(inplace=True) + ) + self.conv = nn.Sequential( + nn.Conv2d(2, 1, kernel_size=1), + nn.LayerNorm(channel), + nn.ReLU(inplace=True) + ) + + + def forward(self, x): + b, c, _, _ = x[0].size() + gap = self.avg_pool(x[0]).view(b, c) + if x[1] is None: + all_att = self.fc(gap) + else: + pre_att = self.att_fc(x[1]) if hasattr(self, 'att_fc') else x[1] + all_att = torch.cat((gap.view(b, 1, 1, c), pre_att.view(b, 1, 1, c)), dim=1) + all_att = self.conv(all_att).view(b, c) + all_att = self.fc(all_att) + return {0: x[0] * all_att.view(b, c, 1, 1), 1: gap*all_att} + +def conv3x3(in_planes, out_planes, stride=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=1, bias=False) + + +def conv1x1(in_planes, out_planes, stride=1): + """1x1 convolution""" + return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = nn.BatchNorm2d(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2d(planes) + self.downsample = downsample + self.stride = stride + self.se = CSELayer(inplanes,planes) + + def forward(self, x): + identity = x[0] + + out = self.conv1(x[0]) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.se({0:out,1:x[1]}) + + if self.downsample is not None: + identity = self.downsample(x[0]) + + out_x = out[0] + identity + out_x = self.relu(out_x) + out_att = out[1] + + return {0: out_x,1:out_att} + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(Bottleneck, self).__init__() + self.conv1 = conv1x1(inplanes, planes) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = conv3x3(planes, planes, stride) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = conv1x1(planes, planes * self.expansion) + self.bn3 = nn.BatchNorm2d(planes * self.expansion) + self.se = CSELayer(inplanes,planes * self.expansion) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + identity = x[0] + + out = self.conv1(x[0]) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + out = self.se({0:out,1:x[1]}) + + if self.downsample is not None: + identity = self.downsample(x[0]) + + out_x = out[0] + identity + out_x = self.relu(out_x) + out_att = out[1] + + return {0: out_x, 1: out_att} + + +class ResNet(nn.Module): + + def __init__(self, block, layers, num_classes=1000, zero_init_residual=False): + super(ResNet, self).__init__() + self.inplanes = 64 + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + self.fc = nn.Linear(512 * block.expansion, num_classes) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + # Zero-initialize the last BN in each residual branch, + # so that the residual branch starts with zeros, and each residual block behaves like an identity. + # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 + if zero_init_residual: + for m in self.modules(): + if isinstance(m, Bottleneck): + nn.init.constant_(m.bn3.weight, 0) + elif isinstance(m, BasicBlock): + nn.init.constant_(m.bn2.weight, 0) + + def _make_layer(self, block, planes, blocks, stride=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + conv1x1(self.inplanes, planes * block.expansion, stride), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample)) + self.inplanes = planes * block.expansion + for _ in range(1, blocks): + layers.append(block(self.inplanes, planes)) + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + att = None + x = {0: x, 1: att} + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + x = self.avgpool(x[0]) + x = x.view(x.size(0), -1) + x = self.fc(x) + + return x + + +def dca_se_resnet18(pretrained=False, **kwargs): + """Constructs a ResNet-18 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) + return model + + +def dca_se_resnet34(pretrained=False, **kwargs): + """Constructs a ResNet-34 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) + return model + + +def dca_se_resnet50(pretrained=False, **kwargs): + """Constructs a ResNet-50 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) + return model + + +def dca_se_resnet101(pretrained=False, **kwargs): + """Constructs a ResNet-101 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) + return model + + +def dca_se_resnet152(pretrained=False, **kwargs): + """Constructs a ResNet-152 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) + return model + + + + +def demo(): + net = dca_se_resnet50(num_classes=1000) + y = net(torch.randn(2, 3, 224,224)) + print(y.size()) + +# demo() \ No newline at end of file diff --git a/predictionR/voc/comp4_det_test_baseball-diamond.txt b/predictionR/voc/comp4_det_test_baseball-diamond.txt new file mode 100644 index 0000000..4c1a60c --- /dev/null +++ b/predictionR/voc/comp4_det_test_baseball-diamond.txt @@ -0,0 +1,7 @@ +P0045__1__0___1306 0.3896 1106 84 1292 257 1125 425 944 251 +P0045__1__0___1306 0.3889 782 559 934 718 790 850 642 691 +P0045__1__0___1306 0.3644 773 543 941 688 797 838 634 693 +P0045__1__0___1306 0.3414 1201 496 1356 656 1209 795 1058 634 +P0045__1__0___1306 0.3316 713 174 872 325 724 456 567 307 +P0045__1__0___1306 0.3140 816 527 961 686 815 821 675 661 +P0045__1__0___1306 0.2110 591 1016 763 1212 580 1369 407 1174 diff --git a/test.py b/test.py new file mode 100644 index 0000000..0cbfdcb --- /dev/null +++ b/test.py @@ -0,0 +1,146 @@ +import utils.gpu as gpu +from modelR.lodet import LODet +from tensorboardX import SummaryWriter +from evalR.evaluator import Evaluator +import argparse +import os +import config.cfg_lodet as cfg +from utils.visualize import * + +import time +import logging +from utils.utils_coco import * +from utils.log import Logger +import cv2 +from eval.coco_eval import COCOEvaluator +import torch.backends.cudnn as cudnn +class Tester(object): + def __init__(self, weight_path=None, gpu_id=0, visiual=None, eval=False): + self.img_size = cfg.TEST["TEST_IMG_SIZE"] + self.__num_class = cfg.DATA["NUM"] + self.__conf_threshold = cfg.TEST["CONF_THRESH"] + self.__nms_threshold = cfg.TEST["NMS_THRESH"] + self.__device = gpu.select_device(gpu_id, force_cpu=False) + self.__multi_scale_test = cfg.TEST["MULTI_SCALE_TEST"] + self.__flip_test = cfg.TEST["FLIP_TEST"] + self.__classes = cfg.DATA["CLASSES"] + + self.__visiual = visiual + self.__eval = eval + self.__model = LODet().to(self.__device) # Single GPU + + net_model = LODet() + if torch.cuda.device_count() >1: ## Multi GPUs + print("Let's use", torch.cuda.device_count(), "GPUs!") + net_model = torch.nn.DataParallel(net_model) ## Multi GPUs + self.__model = net_model.to(self.__device) + elif torch.cuda.device_count() ==1: + self.__model = net_model.to(self.__device) + + self.__load_model_weights(weight_path) + + self.__evalter = Evaluator(self.__model, visiual=False) + + def __load_model_weights(self, weight_path): + print("loading weight file from : {}".format(weight_path)) + weight = os.path.join(weight_path) + chkpt = torch.load(weight, map_location=self.__device) + self.__model.load_state_dict(chkpt) #['model'] + #print("loading weight file is done") + del chkpt + + def test(self): + global logger + logger.info("***********Start Evaluation****************") + + if self.__visiual: + imgs = os.listdir(self.__visiual) + for v in imgs: + path = os.path.join(self.__visiual, v) + #print("test images : {}".format(path)) + img = cv2.imread(path) + assert img is not None + bboxes_prd = self.__evalter.get_bbox(img) + if bboxes_prd.shape[0] != 0: + boxes = bboxes_prd[..., :4] + class_inds = bboxes_prd[..., 5].astype(np.int32) + scores = bboxes_prd[..., 4] + visualize_boxes(image=img, boxes=boxes, labels=class_inds, probs=scores, class_labels=self.__classes) + path = os.path.join(cfg.PROJECT_PATH, "prediction/imgs_all/{}".format(v)) + cv2.imwrite(path, img) + #print("saved images : {}".format(path)) + + mAP = 0 + if self.__eval and cfg.TEST["EVAL_TYPE"] == 'VOC': + with torch.no_grad(): + start = time.time() + APs, inference_time = Evaluator(self.__model).APs_voc(self.__multi_scale_test, self.__flip_test) + + for i in APs: + print("{} --> AP : {}".format(i, APs[i])) + mAP += APs[i] + mAP = mAP / self.__num_class + logger.info('mAP:{}'.format(mAP)) + logger.info("inference time: {:.2f} ms".format(inference_time)) + writer.add_scalar('test/VOCmAP', mAP) + end = time.time() + logger.info("Test cost time:{:.4f}s".format(end - start)) + #print('mAP:%g' % (mAP)) + #print("inference time : {:.2f} ms".format(inference_time)) + + elif self.__eval and cfg.TEST["EVAL_TYPE"] == 'COCO': + with torch.no_grad(): + start = time.time() + evaluator = COCOEvaluator(data_dir=cfg.DATA_PATH, + img_size=cfg.TEST["TEST_IMG_SIZE"], + confthre=cfg.TEST["CONF_THRESH"], + nmsthre=cfg.TEST["NMS_THRESH"]) + ap50_95, ap50, inference_time = evaluator.evaluate(self.__model) + logger.info('ap50_95:{} | ap50:{}'.format(ap50_95, ap50)) + logger.info("inference time: {:.2f} ms".format(inference_time)) + writer.add_scalar('test/COCOAP50', ap50) + writer.add_scalar('test/COCOAP50_95', ap50_95) + end = time.time() + logger.info("Test cost time:{:.4f}s".format(end - start)) + + elif self.__eval and cfg.TEST["EVAL_TYPE"] == 'BOTH': + with torch.no_grad(): + start = time.time() + APs, inference_time = Evaluator(self.__model).APs_voc(self.__multi_scale_test, self.__flip_test) + for i in APs: + print("{} --> mAP : {}".format(i, APs[i])) + mAP += APs[i] + mAP = mAP / self.__num_class + logger.info('mAP:{}'.format(mAP)) + logger.info("inference time: {:.2f} ms".format(inference_time)) + writer.add_scalar('test/VOCmAP', mAP) + end = time.time() + logger.info("Test cost time:{:.4f}s".format(end - start)) + start = time.time() + evaluator = COCOEvaluator(data_dir=cfg.DATA_PATH, + img_size=cfg.TEST["TEST_IMG_SIZE"], + confthre=cfg.TEST["CONF_THRESH"], + nmsthre=cfg.TEST["NMS_THRESH"]) + ap50_95, ap50, inference_time = evaluator.evaluate(self.__model) + logger.info('ap50_95:{} | ap50:{}'.format(ap50_95, ap50)) + logger.info("inference time: {:.2f} ms".format(inference_time)) + writer.add_scalar('test/COCOAP50', ap50) + writer.add_scalar('test/COCOAP50_95', ap50_95) + end = time.time() + logger.info("Test cost time:{:.4f}s".format(end - start)) + +if __name__ == "__main__": + global logger + parser = argparse.ArgumentParser() + parser.add_argument('--weight_path', type=str, default='weight/best.pt', help='weight file path') + parser.add_argument('--log_val_path', type=str, default='log/', help='weight file path') + parser.add_argument('--visiual', type=str, default=None, help='test data path or None') + parser.add_argument('--eval', action='store_true', default=True, help='eval flag') + parser.add_argument('--gpu_id', type=int, default=0, help='gpu id') + parser.add_argument('--log_path', type=str, default='log/', help='log path') + opt = parser.parse_args() + writer = SummaryWriter(logdir=opt.log_path + '/event') + logger = Logger(log_file_name=opt.log_val_path + '/log_coco_test.txt', log_level=logging.DEBUG, + logger_name='NPMMRDet').get_log() + + Tester(weight_path=opt.weight_path, gpu_id=opt.gpu_id, eval=opt.eval, visiual=opt.visiual).test() \ No newline at end of file diff --git a/trainR.py b/trainR.py new file mode 100644 index 0000000..f44be36 --- /dev/null +++ b/trainR.py @@ -0,0 +1,205 @@ +import logging +import argparse +import torch.optim as optim +from torch.utils.data import DataLoader +from tensorboardX import SummaryWriter +import dataloadR.datasets as data +import utils.gpu as gpu +from utils import cosine_lr_scheduler +from utils.log import Logger +from model.lodet import LODet +from model.loss.loss import Loss +from eval.evaluator import * +from eval.coco_eval import COCOEvaluator +from torch.cuda.amp import autocast as autocast + +class Trainer(object): + def __init__(self, weight_path, resume, gpu_id): + init_seeds(0) + self.prune=0 + self.sr=True + self.device = gpu.select_device(gpu_id) + print(self.device) + self.start_epoch = 0 + self.best_mAP = 0. + self.epochs = cfg.TRAIN["EPOCHS"] + self.weight_path = weight_path + self.multi_scale_train = cfg.TRAIN["MULTI_SCALE_TRAIN"] + if self.multi_scale_train: print('Using multi scales training') + else: print('train img size is {}'.format(cfg.TRAIN["TRAIN_IMG_SIZE"])) + + self.train_dataset = data.Construct_Dataset(anno_file_type="train", img_size=cfg.TRAIN["TRAIN_IMG_SIZE"]) + self.train_dataloader = DataLoader(self.train_dataset, + batch_size=cfg.TRAIN["BATCH_SIZE"], + num_workers=cfg.TRAIN["NUMBER_WORKERS"], + shuffle=True, + pin_memory=True) + + net_model = LODet() + if torch.cuda.device_count() >1: ## multi GPUs + print("Let's use", torch.cuda.device_count(), "GPUs!") + net_model = torch.nn.DataParallel(net_model) + self.model = net_model.to(self.device) + elif torch.cuda.device_count() ==1: + self.model = net_model.to(self.device) ## Single GPU + + #self.optimizer = optim.SGD(self.model.parameters(), lr=cfg.TRAIN["LR_INIT"], momentum=cfg.TRAIN["MOMENTUM"], weight_decay=cfg.TRAIN["WEIGHT_DECAY"]) + self.optimizer = optim.Adam(self.model.parameters(), lr=cfg.TRAIN["LR_INIT"]) + + + self.criterion = Loss(anchors=cfg.MODEL["ANCHORS"], strides=cfg.MODEL["STRIDES"], + iou_threshold_loss=cfg.TRAIN["IOU_THRESHOLD_LOSS"]) + + if resume: + self.__load_model_weights(weight_path) + + self.scheduler = cosine_lr_scheduler.CosineDecayLR(self.optimizer, + T_max=self.epochs*len(self.train_dataloader), + lr_init=cfg.TRAIN["LR_INIT"], + lr_min=cfg.TRAIN["LR_END"], + warmup=cfg.TRAIN["WARMUP_EPOCHS"] * len(self.train_dataloader)) + + def __load_model_weights(self, weight_path): + last_weight = os.path.join(os.path.split(weight_path)[0], "last.pt") + chkpt = torch.load(last_weight, map_location=self.device) + self.model.load_state_dict(chkpt['model'])#, False + self.start_epoch = chkpt['epoch'] + 1 + if chkpt['optimizer'] is not None: + self.optimizer.load_state_dict(chkpt['optimizer']) + self.best_mAP = chkpt['best_mAP'] + del chkpt + + def __save_model_weights(self, epoch, mAP): + if mAP > self.best_mAP: + self.best_mAP = mAP + best_weight = os.path.join(os.path.split(self.weight_path)[0], "best.pt") + last_weight = os.path.join(os.path.split(self.weight_path)[0], "last.pt") + chkpt = {'epoch': epoch, + 'best_mAP': self.best_mAP, + 'model': self.model.state_dict(), + 'optimizer': self.optimizer.state_dict()} + torch.save(chkpt, last_weight,_use_new_zipfile_serialization=False) + if self.best_mAP == mAP: + torch.save(chkpt['model'], best_weight,_use_new_zipfile_serialization=False) + if epoch > 0 and epoch % 5 == 0: + torch.save(chkpt, os.path.join(os.path.split(self.weight_path)[0], 'backup_epoch%g.pt'%epoch)) + del chkpt + + def __save_model_weights1(self, epoch, mAP): + if mAP > self.best_mAP: + self.best_mAP = mAP + best_weight = os.path.join(os.path.split(self.weight_path)[0], "best1.pt") + last_weight = os.path.join(os.path.split(self.weight_path)[0], "last1.pt") + chkpt = {'epoch': epoch, + 'best_mAP': self.best_mAP, + 'model': self.model.state_dict(), + 'optimizer': self.optimizer.state_dict()} + torch.save(chkpt, last_weight,_use_new_zipfile_serialization=False) + torch.save(chkpt['model'], best_weight, _use_new_zipfile_serialization=False) + torch.save(chkpt, os.path.join(os.path.split(self.weight_path)[0], 'backup_epoch%g.pt'%epoch)) + del chkpt + + def train(self): + global writer + logger.info(self.model) + logger.info(" Training start! Img size:{:d}, Batchsize:{:d}, Number of workers:{:d}".format( + cfg.TRAIN["TRAIN_IMG_SIZE"], cfg.TRAIN["BATCH_SIZE"], cfg.TRAIN["NUMBER_WORKERS"])) + logger.info(" Train datasets number is : {}".format(len(self.train_dataset))) + + for epoch in range(self.start_epoch, self.epochs): + start = time.time() + self.model.train() + + mloss = torch.zeros(4) + mAP = 0 + self.__save_model_weights1(epoch, mAP) + for i, (imgs, label_sbbox, label_mbbox, label_lbbox, + sbboxes, mbboxes, lbboxes) in enumerate(self.train_dataloader): + + self.scheduler.step(len(self.train_dataloader)*epoch + i) + imgs = imgs.to(self.device) + label_sbbox = label_sbbox.to(self.device) + label_mbbox = label_mbbox.to(self.device) + label_lbbox = label_lbbox.to(self.device) + sbboxes = sbboxes.to(self.device) + mbboxes = mbboxes.to(self.device) + lbboxes = lbboxes.to(self.device) + p, p_d = self.model(imgs) + + loss, loss_iou, loss_conf, loss_cls = self.criterion(p, p_d, label_sbbox, label_mbbox, + label_lbbox, sbboxes, mbboxes, lbboxes) + self.optimizer.zero_grad() + + loss.backward() + self.optimizer.step() + + loss_items = torch.tensor([loss_iou, loss_conf, loss_cls, loss]) + mloss = (mloss * i + loss_items) / (i + 1) + + if i % 50 == 0: + logger.info( + " Epoch:[{:3}/{}] Batch:[{:3}/{}] Img_size:[{:3}] Loss:{:.4f} " + "Loss_IoU:{:.4f} | Loss_Conf:{:.4f} | Loss_Cls:{:.4f} LR:{:g}".format( + epoch, self.epochs, i, len(self.train_dataloader) - 1, self.train_dataset.img_size, + mloss[3], mloss[0], mloss[1], mloss[2], self.optimizer.param_groups[0]['lr'] + )) + writer.add_scalar('loss_iou', mloss[0], len(self.train_dataloader) + * (cfg.TRAIN["BATCH_SIZE"]) * epoch + i) + writer.add_scalar('loss_conf', mloss[1], len(self.train_dataloader) + * (cfg.TRAIN["BATCH_SIZE"]) * epoch + i) + writer.add_scalar('loss_cls', mloss[2], len(self.train_dataloader) + * (cfg.TRAIN["BATCH_SIZE"]) * epoch + i) + writer.add_scalar('train_loss', mloss[3], len(self.train_dataloader) + * (cfg.TRAIN["BATCH_SIZE"]) * epoch + i) + + if self.multi_scale_train and (i+1) % 10 == 0: + self.train_dataset.img_size = random.choice(range( + cfg.TRAIN["MULTI_TRAIN_RANGE"][0], cfg.TRAIN["MULTI_TRAIN_RANGE"][1], + cfg.TRAIN["MULTI_TRAIN_RANGE"][2])) * 32 + + if epoch >= 30 and epoch % 5 == 0 and cfg.TRAIN["EVAL_TYPE"] == 'VOC': + logger.info("===== Validate =====".format(epoch, self.epochs)) + with torch.no_grad(): + APs, inference_time = Evaluator(self.model).APs_voc() + for i in APs: + logger.info("{} --> mAP : {}".format(i, APs[i])) + mAP += APs[i] + mAP = mAP / self.train_dataset.num_classes + logger.info("mAP : {}".format(mAP)) + logger.info("inference time: {:.2f} ms".format(inference_time)) + writer.add_scalar('mAP', mAP, epoch) + + elif epoch >= 30 and epoch % 5 == 0 and cfg.TRAIN["EVAL_TYPE"] == 'COCO': + logger.info("===== Validate =====".format(epoch, self.epochs)) + with torch.no_grad(): + evaluator = COCOEvaluator(data_dir=cfg.DATA_PATH, + img_size=cfg.TEST["TEST_IMG_SIZE"], + confthre=cfg.TEST["CONF_THRESH"], + nmsthre=cfg.TEST["NMS_THRESH"]) + ap50_95, ap50, inference_time = evaluator.evaluate(self.model) + mAP = ap50 + logger.info('ap50_95:{} | ap50:{}'.format(ap50_95, ap50)) + logger.info("inference time: {:.2f} ms".format(inference_time)) + writer.add_scalar('val/COCOAP50', ap50, epoch) + writer.add_scalar('val/COCOAP50_95', ap50_95, epoch) + + self.__save_model_weights(epoch, mAP) + logger.info('Save weights Done') + logger.info("mAP: {:.3f}".format(mAP)) + end = time.time() + logger.info("Inference time: {:.4f}s".format(end - start)) + + logger.info("Training finished. Best_mAP: {:.3f}%".format(self.best_mAP)) + +if __name__ == "__main__": + global logger, writer + parser = argparse.ArgumentParser() + parser.add_argument('--weight_path', type=str, default='weight/mobilenetv2_1.0-0c6065bc.pth', + help='weight file path') #default=None + parser.add_argument('--resume', action='store_true',default=False, help='resume training flag') + parser.add_argument('--gpu_id', type=int, default=0, help='gpu id') + parser.add_argument('--log_path', type=str, default='log/', help='log path') + opt = parser.parse_args() + writer = SummaryWriter(logdir=opt.log_path + '/event') + logger = Logger(log_file_name=opt.log_path + '/log.txt', log_level=logging.DEBUG, logger_name='LODet').get_log() + Trainer(weight_path=opt.weight_path, resume=opt.resume, gpu_id=opt.gpu_id).train() \ No newline at end of file diff --git a/utils/cosine_lr_scheduler.py b/utils/cosine_lr_scheduler.py new file mode 100644 index 0000000..79edeb7 --- /dev/null +++ b/utils/cosine_lr_scheduler.py @@ -0,0 +1,61 @@ +import numpy as np + +class CosineDecayLR(object): + def __init__(self, optimizer, T_max, lr_init, lr_min=0., warmup=0): + """ + a cosine decay scheduler about steps, not epochs. + :param optimizer: ex. optim.SGD + :param T_max: max steps, and steps=epochs * batches + :param lr_max: lr_max is init lr. + :param warmup: in the training begin, the lr is smoothly increase from 0 to lr_init, which means "warmup", + this means warmup steps, if 0 that means don't use lr warmup. + """ + super(CosineDecayLR, self).__init__() + self.__optimizer = optimizer + self.__T_max = T_max + self.__lr_min = lr_min + self.__lr_max = lr_init + self.__warmup = warmup + + + def step(self, t): + if self.__warmup and t < self.__warmup: + lr = self.__lr_max / self.__warmup * t + else: + T_max = self.__T_max - self.__warmup + t = t - self.__warmup + lr = self.__lr_min + 0.5 * (self.__lr_max - self.__lr_min) * (1 + np.cos(t/T_max * np.pi)) + for param_group in self.__optimizer.param_groups: + param_group["lr"] = lr + + +if __name__ == '__main__': + import matplotlib.pyplot as plt + from matplotlib.ticker import FuncFormatter + import math + from model.npmmrdet_model import NPMMRDet + import torch.optim as optim + import config.cfg_npmmrdet_dior as cfg + + net = NPMMRDet() + + optimizer = optim.SGD(net.parameters(), cfg.TRAIN["LR_INIT"], cfg.TRAIN["MOMENTUM"], weight_decay=cfg.TRAIN["WEIGHT_DECAY"]) + #optimizer = optim.Adam(net.parameters(), lr = cfg.TRAIN["LR_INIT"]) + + scheduler = CosineDecayLR(optimizer, math.ceil(cfg.TRAIN["EPOCHS"]/cfg.TRAIN["BATCH_SIZE"])*cfg.TRAIN["TRAIN_IMG_NUM"], + cfg.TRAIN["LR_INIT"], cfg.TRAIN["LR_END"], cfg.TRAIN["WARMUP_EPOCHS"]*cfg.TRAIN["TRAIN_IMG_NUM"]) + + y = [] + for t in range(math.ceil(cfg.TRAIN["EPOCHS"]/cfg.TRAIN["BATCH_SIZE"])): + for i in range(cfg.TRAIN["TRAIN_IMG_NUM"]): + scheduler.step(cfg.TRAIN["TRAIN_IMG_NUM"]*t+i) + y.append(optimizer.param_groups[0]['lr']) + + print(y) + plt.figure() + plt.plot(y, label='LambdaLR') + plt.xlabel('steps') + plt.ylabel('LR') + plt.tight_layout() + plt.savefig("../prediction/lr.png", dpi=600) + plt.show() \ No newline at end of file diff --git a/utils/gpu.py b/utils/gpu.py new file mode 100644 index 0000000..32f40a2 --- /dev/null +++ b/utils/gpu.py @@ -0,0 +1,23 @@ +import torch +import torch.backends.cudnn as cudnn +def select_device(id, force_cpu=False): + cuda = False if force_cpu else torch.cuda.is_available() + cudnn.benchmark = True + device = torch.device('cuda:{}'.format(id) if cuda else 'cpu') + #device = torch.cuda.set_device(0 if cuda else 'cpu') + if not cuda: + print('Using CPU') + if cuda: + #device = torch.cuda.set_device(id) + c = 1024 ** 2 # bytes to MB + ng = torch.cuda.device_count() + x = [torch.cuda.get_device_properties(i) for i in range(ng)] + print("Using CUDA device0 _CudaDeviceProperties(name='%s', total_memory=%dMB)" % + (x[0].name, x[0].total_memory / c)) + if ng > 0: + # torch.cuda.set_device(0) # OPTIONAL: Set GPU ID + for i in range(1, ng): + print(" device%g _CudaDeviceProperties(name='%s', total_memory=%dMB)" % + (i, x[i].name, x[i].total_memory / c)) + + return device \ No newline at end of file diff --git a/utils/heatmap.py b/utils/heatmap.py new file mode 100644 index 0000000..7b6e7bd --- /dev/null +++ b/utils/heatmap.py @@ -0,0 +1,60 @@ +import cv2 +import math +import random +import numpy as np +import os + +def Show_Heatmap(beta, img=None): + cv2.namedWindow('img') + cv2.namedWindow('img1') + if img is None: + img = cv2.imread(os.path.join("VOCdevkit\VOC2007\JPEGImages/000001.jpg"), 1) # the same input image + + h,w,c = img.shape + img1 = img.copy() + img = np.float32(img) / 255 + + (height, width) = beta.shape[1:] + h1 = int(math.sqrt(height)) + w1 = int(math.sqrt(width)) + + for i in range(height): + img_show = img1.copy() + h2 = int(i / w1) + w2 = int(i % h1) + + mask = np.zeros((h1, w1), dtype=np.float32) + mask[h2, w2] = 1 + mask = cv2.resize(mask, (w, h)) + mask = np.repeat(mask[:, :, np.newaxis], 3, axis=2) + mskd = img_show * mask + color = (random.random(), random.random(), random.random()) + clmsk = np.ones(mask.shape) * mask + clmsk[:, :, 0] = clmsk[:, :, 0] * color[0] * 256 + clmsk[:, :, 1] = clmsk[:, :, 1] * color[1] * 256 + clmsk[:, :, 2] = clmsk[:, :, 2] * color[2] * 256 + img_show = img_show + 0.8 * clmsk - 0.8 * mskd + + cam = beta[0, i, :] + cam = cam.view(h1, w1).data.cpu().numpy() + cam = cv2.resize(cam, (w, h)) + cam = cam - np.min(cam) + cam = cam / np.max(cam) + # cam = 1 / (1 + np.exp(-cam)) + + heatmap = cv2.applyColorMap(np.uint8(255 * cam), cv2.COLORMAP_JET) + heatmap = np.float32(heatmap) / 255 + cam = heatmap + np.float32(img) + cam = cam - np.min(cam) + cam = cam / np.max(cam) + cam = np.uint8(255 * (cam)) + cv2.imwrite('att.jpg', cam) + cv2.imwrite('img.jpg', np.uint8(img_show)) + cv2.imshow('img', cam) + cv2.imshow('img1', np.uint8(img_show)) + k = cv2.waitKey(0) + if k & 0xFF == ord('q'): + cv2.destroyAllWindows() + exit(0) + + diff --git a/utils/log.py b/utils/log.py new file mode 100644 index 0000000..ae044de --- /dev/null +++ b/utils/log.py @@ -0,0 +1,23 @@ +import logging + + +class Logger(object): + def __init__(self,log_file_name,log_level,logger_name): + # firstly, create a logger + self.__logger = logging.getLogger(logger_name) + self.__logger.setLevel(log_level) + # secondly, create a handler + file_handler = logging.FileHandler(log_file_name) + console_handler = logging.StreamHandler() + # thirdly, define the output form of handler + formatter = logging.Formatter( + '[%(asctime)s]-[%(filename)s line:%(lineno)d]:%(message)s ' + ) + file_handler.setFormatter(formatter) + console_handler.setFormatter(formatter) + # finally, add the Hander to logger + self.__logger.addHandler(file_handler) + self.__logger.addHandler(console_handler) + + def get_log(self): + return self.__logger \ No newline at end of file diff --git a/utils/num_of_works_set.py b/utils/num_of_works_set.py new file mode 100644 index 0000000..02cdd65 --- /dev/null +++ b/utils/num_of_works_set.py @@ -0,0 +1,23 @@ +import time +import torch.utils.data as d +import torchvision +import torchvision.transforms as transforms + +if __name__ == '__main__': + BATCH_SIZE = 100 + transform = transforms.Compose([transforms.ToTensor(), + transforms.Normalize((0.5,), (0.5,))]) + train_set = torchvision.datasets.MNIST('\mnist', download=True, train=True, transform=transform) + + # data loaders + train_loader = d.DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True) + + for num_workers in range(20): + train_loader = d.DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=num_workers) + # training ... + start = time.time() + for epoch in range(1): + for step, (batch_x, batch_y) in enumerate(train_loader): + pass + end = time.time() + print('num_workers is {} and it took {} seconds'.format(num_workers, end - start)) \ No newline at end of file diff --git a/utils/utils_basic.py b/utils/utils_basic.py new file mode 100644 index 0000000..6bf04d4 --- /dev/null +++ b/utils/utils_basic.py @@ -0,0 +1,527 @@ +#coding=utf-8 +import os +import sys +sys.path.append("..") +import math +import numpy as np +import random +import torch +import config.cfg_lodet as cfg +from shapely.geometry import Polygon, MultiPoint # 多边形 + +def init_seeds(seed=0): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + os.environ['PYTHONHASHSEED'] = str(seed) + +def xyxy2xywh(x): + # Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h] + y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x) + + y[:, 0] = (x[:, 0] + x[:, 2]) / 2.0 + y[:, 1] = (x[:, 1] + x[:, 3]) / 2.0 + y[:, 2] = x[:, 2] - x[:, 0] + y[:, 3] = x[:, 3] - x[:, 1] + return y + +def xywh2xyxy(x): + # Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2] + y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x) + y[:, 0] = x[:, 0] - x[:, 2] / 2 + y[:, 1] = x[:, 1] - x[:, 3] / 2 + y[:, 2] = x[:, 0] + x[:, 2] / 2 + y[:, 3] = x[:, 1] + x[:, 3] / 2 + return y + +def iou_xywh_numpy(boxes1, boxes2): + boxes1 = np.array(boxes1) + boxes2 = np.array(boxes2) + boxes1_area = boxes1[..., 2] * boxes1[..., 3] + boxes2_area = boxes2[..., 2] * boxes2[..., 3] + + boxes1 = np.concatenate([boxes1[..., :2] - boxes1[..., 2:] * 0.5, + boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1) + boxes2 = np.concatenate([boxes2[..., :2] - boxes2[..., 2:] * 0.5, + boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1) + + left_up = np.maximum(boxes1[..., :2], boxes2[..., :2]) + right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:]) + + inter_section = np.maximum(right_down - left_up, 0.0) + inter_area = inter_section[..., 0] * inter_section[..., 1] + union_area = boxes1_area + boxes2_area - inter_area + return 1.0 * inter_area / union_area + +def iou_xyxy_numpy(boxes1, boxes2): + boxes1 = np.array(boxes1) + boxes2 = np.array(boxes2) + + boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1]) + boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1]) + + left_up = np.maximum(boxes1[..., :2], boxes2[..., :2]) + right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:]) + + inter_section = np.maximum(right_down - left_up, 0.0) + inter_area = inter_section[..., 0] * inter_section[..., 1] + union_area = boxes1_area + boxes2_area - inter_area + return 1.0 * inter_area / union_area + +def diou_xyxy_numpy(boxes1, boxes2): + boxes1 = np.array(boxes1) + boxes2 = np.array(boxes2) + + boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1]) + boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1]) + + # 计算出boxes1和boxes2相交部分的左上角坐标、右下角坐标 + left_up = np.maximum(boxes1[..., :2], boxes2[..., :2]) + right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:]) + + # 计算出boxes1和boxes2相交部分的宽、高 + # 因为两个boxes没有交集时,(right_down - left_up) < 0,所以maximum可以保证当两个boxes没有交集时,它们之间的iou为0 + inter_section = np.maximum(right_down - left_up, 0.0) + inter_area = inter_section[..., 0] * inter_section[..., 1] + union_area = boxes1_area + boxes2_area - inter_area + IOU = 1.0 * inter_area / union_area + + enclose_left_up = np.minimum(boxes1[..., :2], boxes2[..., :2]) + enclose_right_down = np.maximum(boxes1[..., 2:], boxes2[..., 2:]) + enclose_section = np.maximum(enclose_right_down - enclose_left_up, np.zeros_like(enclose_right_down)) + enclose_c2 = np.power(enclose_section[..., 0], 2) + np.power(enclose_section[..., 1], 2) + + boxes1 = np.concatenate((0.5 * (boxes1[..., 0:1] + boxes1[..., 2:3]), 0.5 * (boxes1[..., 1:2] + boxes1[..., 3:]), + (boxes1[..., 2:3] - boxes1[..., 0:1]), (boxes1[..., 3:] - boxes1[..., 1:2])), axis=-1) + boxes2 = np.concatenate((0.5 * (boxes2[..., 0:1] + boxes2[..., 2:3]), 0.5 * (boxes2[..., 1:2] + boxes2[..., 3:]), + (boxes2[..., 2:3] - boxes2[..., 0:1]), (boxes2[..., 3:] - boxes2[..., 1:2])), axis=-1) + + p2 = np.power(boxes1[..., 0] - boxes2[..., 0], 2) + np.power(boxes1[..., 1] - boxes2[..., 1], 2) + + DIOU = IOU - 1.0 * p2 / enclose_c2 + return DIOU + +def polygen_iou_xy4_numpy(boxes1, boxes2): + #print(boxes2.shape) + num = boxes2.shape[0] + if num == 0: + iou_out = [] + else: + boxes1 = boxes1.reshape(-1, 4, 2) + boxes2 = boxes2.reshape(-1, 4, 2) + #print(boxes1.shape, boxes2.shape) + iou = np.zeros(num) + for i in range(0, num): + #print("num",num,i) + poly1 = Polygon(boxes1[0,:,:]).convex_hull + #print("uuuuuuu,",boxes2.shape) + poly2 = Polygon(boxes2[i,:,:]).convex_hull + union_poly = np.concatenate((boxes1[0,:,:], boxes2[i,:,:]),axis=0) + if poly1.intersects(poly2): # 如果两四边形相交 + inter_area = poly1.intersection(poly2).area # 相交面积 + union_area = MultiPoint(union_poly).convex_hull.area + iou[i] = float(inter_area) / union_area + iou_out = iou + return np.array(iou_out) + +def polygen_iou_xy4_numpy_eval(boxes1, boxes2): + boxes1 = boxes1.reshape( 4, 2) + boxes2 = boxes2.reshape(4, 2) + poly1 = Polygon(boxes1).convex_hull + poly2 = Polygon(boxes2).convex_hull + union_poly = np.concatenate((boxes1, boxes2),axis=0) + iou=0 + if poly1.intersects(poly2): # 如果两四边形相交 + inter_area = poly1.intersection(poly2).area # 相交面积 + union_area = MultiPoint(union_poly).convex_hull.area + iou = float(inter_area) / union_area + return iou + +def polygen_iou_xy4_numpy1(boxes1, boxes2):############loss + size1 = boxes1.shape + num = size1[0]*size1[1]*size1[2]*size1[3] + boxes1 = boxes1.cpu().detach().numpy() + boxes2 = boxes2.cpu().detach().numpy() + boxes1 = boxes1.reshape(-1, 4, 2) + boxes2 = boxes2.reshape(-1, 4, 2) + iou = np.zeros(num) + for i in range(0, num): + poly1 = Polygon(boxes1[i,:,:]).convex_hull + poly2 = Polygon(boxes2[i,:,:]).convex_hull + union_poly = np.concatenate((boxes1[i,:,:], boxes2[i,:,:])) + if poly1.intersects(poly2): # 如果两四边形相交 + inter_area = poly1.intersection(poly2).area # 相交面积 + union_area = MultiPoint(union_poly).convex_hull.area + iou[i] = float(inter_area) / union_area + iou_out = iou.reshape((size1[0],size1[1],size1[2],size1[3])) + return torch.tensor(iou_out) + +def polygen_iou_xy4_torch(boxes1, boxes2): + size1 = boxes1.shape + num = size1[0]*size1[1]*size1[2]*size1[3] + #boxes1 = boxes1.cpu().detach().numpy() + #boxes2 = boxes2.cpu().detach().numpy() + boxes1 = boxes1.view(-1, 4, 2) + print(boxes1.shape) + boxes2 = boxes2.view(-1, 4, 2) + iou = torch.zeros(num) + for i in range(0, num): + poly1 = Polygon(boxes1[i,:,:]).convex_hull + poly2 = Polygon(boxes2[i,:,:]).convex_hull + union_poly = torch.cat((boxes1[i,:,:], boxes2[i,:,:])) + if poly1.intersects(poly2): # 如果两四边形不相交 + inter_area = poly1.intersection(poly2).area # 相交面积 + union_area = MultiPoint(union_poly).convex_hull.area + iou[i] = float(inter_area) / union_area + iou_out = iou.view(size1[0],size1[1],size1[2],size1[3]) + return iou_out + +def polygen_iou_xy4_torch1(boxes1, boxes2): + import cv2 + + size1 = boxes1.shape + num = size1[0]*size1[1]*size1[2]*size1[3] + #boxes1 = boxes1.cpu().detach().numpy() + #boxes2 = boxes2.cpu().detach().numpy() + boxes1 = boxes1.view(-1, 4, 2).unsqueeze(1) + boxes2 = boxes2.view(-1, 4, 2).unsqueeze(1) + + im = torch.zeros(num, 10, 10) + im1 = torch.zeros(num, 10, 10) + + original_grasp_mask = cv2.fillPoly(im, boxes2, 255) + print(original_grasp_mask.shape) + prediction_grasp_mask = cv2.fillPoly(im1, boxes2, 255) + masked_and = cv2.bitwise_and(original_grasp_mask, prediction_grasp_mask, mask=im) + masked_or = cv2.bitwise_or(original_grasp_mask, prediction_grasp_mask) + + or_area = torch.sum(torch.float32(torch.gt(masked_or, 0))) + and_area = torch.sum(torch.float32(torch.gt(masked_and, 0))) + IOU = and_area / or_area + + iou = torch.zeros(num) + for i in range(0, num): + poly1 = Polygon(boxes1[i,:,:]).convex_hull + poly2 = Polygon(boxes2[i,:,:]).convex_hull + union_poly = torch.cat((boxes1[i,:,:], boxes2[i,:,:])) + if poly1.intersects(poly2): # 如果两四边形不相交 + inter_area = poly1.intersection(poly2).area # 相交面积 + union_area = MultiPoint(union_poly).convex_hull.area + iou[i] = float(inter_area) / union_area + iou_out = iou.view(size1[0],size1[1],size1[2],size1[3]) + return iou_out + + +def iou_xyxy_torch(boxes1, boxes2): + """ + :param boxes1: boxes1和boxes2的shape可以不相同,但是需要满足广播机制,且需要是Tensor + :param boxes2: 且需要保证最后一维为坐标维,以及坐标的存储结构为(xmin, ymin, xmax, ymax) + :return: 返回boxes1和boxes2的IOU,IOU的shape为boxes1和boxes2广播后的shape[:-1] + """ + boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1]) + boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1]) + + # 计算出boxes1与boxes1相交部分的左上角坐标、右下角坐标 + left_up = torch.max(boxes1[..., :2], boxes2[..., :2]) + right_down = torch.min(boxes1[..., 2:], boxes2[..., 2:]) + + # 因为两个boxes没有交集时,(right_down - left_up) < 0,所以maximum可以保证当两个boxes没有交集时,它们之间的iou为0 + inter_section = torch.max(right_down - left_up, torch.zeros_like(right_down)) + inter_area = inter_section[..., 0] * inter_section[..., 1] + union_area = boxes1_area + boxes2_area - inter_area + IOU = 1.0 * inter_area / union_area + return IOU + +def iou_xywh_torch(boxes1, boxes2): + """ + :param boxes1: boxes1和boxes2的shape可以不相同,但是需要满足广播机制,且需要是Tensor + :param boxes2: 且需要保证最后一维为坐标维,以及坐标的存储结构为(x, y, w, h) + :return: 返回boxes1和boxes2的IOU,IOU的shape为boxes1和boxes2广播后的shape[:-1] + """ + + boxes1_area = boxes1[..., 2] * boxes1[..., 3] + boxes2_area = boxes2[..., 2] * boxes2[..., 3] + + # 分别计算出boxes1和boxes2的左上角坐标、右下角坐标 + # 存储结构为(xmin, ymin, xmax, ymax),其中(xmin,ymin)是bbox的左上角坐标,(xmax,ymax)是bbox的右下角坐标 + boxes1 = torch.cat([boxes1[..., :2] - boxes1[..., 2:] * 0.5, + boxes1[..., :2] + boxes1[..., 2:] * 0.5], dim=-1) + boxes2 = torch.cat([boxes2[..., :2] - boxes2[..., 2:] * 0.5, + boxes2[..., :2] + boxes2[..., 2:] * 0.5], dim=-1) + + # 计算出boxes1与boxes1相交部分的左上角坐标、右下角坐标 + left_up = torch.max(boxes1[..., :2], boxes2[..., :2]) + right_down = torch.min(boxes1[..., 2:], boxes2[..., 2:]) + + # 因为两个boxes没有交集时,(right_down - left_up) < 0,所以maximum可以保证当两个boxes没有交集时,它们之间的iou为0 + inter_section = torch.max(right_down - left_up, torch.zeros_like(right_down)) + inter_area = inter_section[..., 0] * inter_section[..., 1] + union_area = boxes1_area + boxes2_area - inter_area + IOU = 1.0 * inter_area / union_area + return IOU + +def GIOU_xywh_torch(boxes1, boxes2): + """ + https://arxiv.org/abs/1902.09630 + boxes1(boxes2)' shape is [..., (x,y,w,h)].The size is for original image. + """ + # xywh->xyxy + boxes1 = torch.cat([boxes1[..., :2] - boxes1[..., 2:] * 0.5, + boxes1[..., :2] + boxes1[..., 2:] * 0.5], dim=-1) + boxes2 = torch.cat([boxes2[..., :2] - boxes2[..., 2:] * 0.5, + boxes2[..., :2] + boxes2[..., 2:] * 0.5], dim=-1) + + boxes1 = torch.cat([torch.min(boxes1[..., :2], boxes1[..., 2:]), + torch.max(boxes1[..., :2], boxes1[..., 2:])], dim=-1) + boxes2 = torch.cat([torch.min(boxes2[..., :2], boxes2[..., 2:]), + torch.max(boxes2[..., :2], boxes2[..., 2:])], dim=-1) + + boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1]) + boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1]) + + inter_left_up = torch.max(boxes1[..., :2], boxes2[..., :2]) + inter_right_down = torch.min(boxes1[..., 2:], boxes2[..., 2:]) + inter_section = torch.max(inter_right_down - inter_left_up, torch.zeros_like(inter_right_down)) + inter_area = inter_section[..., 0] * inter_section[..., 1] + union_area = boxes1_area + boxes2_area - inter_area + IOU = 1.0 * inter_area / union_area + + enclose_left_up = torch.min(boxes1[..., :2], boxes2[..., :2]) + enclose_right_down = torch.max(boxes1[..., 2:], boxes2[..., 2:]) + enclose_section = torch.max(enclose_right_down - enclose_left_up, torch.zeros_like(enclose_right_down)) + enclose_area = enclose_section[..., 0] * enclose_section[..., 1] + + GIOU = IOU - 1.0 * (enclose_area - union_area) / enclose_area + return GIOU + +#DIOU和CIOU +def CIOU_xywh_torch1(boxes1, boxes2): + # xywh->xyxy + p2 = torch.pow(boxes1[..., 0] - boxes2[..., 0], 2) + torch.pow(boxes1[..., 1] - boxes2[..., 1], 2) + + # 增加av。分母boxes2[..., 3]可能为0,所以加上除0保护防止nan。 + atan1 = torch.atan(boxes1[..., 2] / boxes1[..., 3]) + temp_a = torch.where(boxes2[..., 3] > 0.0, boxes2[..., 3], boxes2[..., 3] + 1.0) + atan2 = torch.atan(boxes2[..., 2] / temp_a) + v = 4.0 * torch.pow(atan1 - atan2, 2) / (math.pi ** 2) + + + boxes1 = torch.cat([boxes1[..., :2] - boxes1[..., 2:] * 0.5, + boxes1[..., :2] + boxes1[..., 2:] * 0.5], dim=-1) + boxes2 = torch.cat([boxes2[..., :2] - boxes2[..., 2:] * 0.5, + boxes2[..., :2] + boxes2[..., 2:] * 0.5], dim=-1) + + boxes1 = torch.cat([torch.min(boxes1[..., :2], boxes1[..., 2:]), + torch.max(boxes1[..., :2], boxes1[..., 2:])], dim=-1) + boxes2 = torch.cat([torch.min(boxes2[..., :2], boxes2[..., 2:]), + torch.max(boxes2[..., :2], boxes2[..., 2:])], dim=-1) + + boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1]) + boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1]) + + inter_left_up = torch.max(boxes1[..., :2], boxes2[..., :2]) # 内框的左上 + inter_right_down = torch.min(boxes1[..., 2:], boxes2[..., 2:]) + inter_section = torch.max(inter_right_down - inter_left_up, torch.zeros_like(inter_right_down)) + inter_area = inter_section[..., 0] * inter_section[..., 1] + union_area = boxes1_area + boxes2_area - inter_area + IOU = 1.0 * inter_area / union_area + + enclose_left_up = torch.min(boxes1[..., :2], boxes2[..., :2]) + enclose_right_down = torch.max(boxes1[..., 2:], boxes2[..., 2:]) + enclose_section = torch.max(enclose_right_down - enclose_left_up, torch.zeros_like(enclose_right_down)) + enclose_c2 = torch.pow(enclose_section[..., 0], 2) + torch.pow(enclose_section[..., 1], 2) + + alpha = v / (1 - IOU + v) + CIOU = IOU - 1.0 * p2 / enclose_c2 - 1.0 * alpha * v + + return CIOU + +def CIOU_xywh_torch(boxes1,boxes2): + + #cal CIOU of two boxes or batch boxes + #:param boxes1:[xmin,ymin,xmax,ymax] or[[xmin,ymin,xmax,ymax],[xmin,ymin,xmax,ymax],...] + #:param boxes2:[xmin,ymin,xmax,ymax] + #:return: + + # xywh->xyxy + boxes1 = torch.cat([boxes1[..., :2] - boxes1[..., 2:] * 0.5, + boxes1[..., :2] + boxes1[..., 2:] * 0.5], dim=-1) + boxes2 = torch.cat([boxes2[..., :2] - boxes2[..., 2:] * 0.5, + boxes2[..., :2] + boxes2[..., 2:] * 0.5], dim=-1) + + boxes1 = torch.cat([torch.min(boxes1[..., :2], boxes1[..., 2:]), + torch.max(boxes1[..., :2], boxes1[..., 2:])], dim=-1) + boxes2 = torch.cat([torch.min(boxes2[..., :2], boxes2[..., 2:]), + torch.max(boxes2[..., :2], boxes2[..., 2:])], dim=-1) + + boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1]) + boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1]) + + inter_left_up = torch.max(boxes1[..., :2], boxes2[..., :2]) + inter_right_down = torch.min(boxes1[..., 2:], boxes2[..., 2:]) + inter_section = torch.max(inter_right_down - inter_left_up, torch.zeros_like(inter_right_down)) + inter_area = inter_section[..., 0] * inter_section[..., 1] + union_area = boxes1_area + boxes2_area - inter_area + ious = 1.0 * inter_area / union_area + + # cal outer boxes + outer_left_up = torch.min(boxes1[..., :2], boxes2[..., :2]) + outer_right_down = torch.max(boxes1[..., 2:], boxes2[..., 2:]) + outer = torch.max(outer_right_down - outer_left_up, torch.zeros_like(inter_right_down)) + # outer_diagonal_line = torch.pow(outer[...,0]+outer[...,1]) + outer_diagonal_line = torch.pow(outer[..., 0], 2) + torch.pow(outer[..., 1], 2) + # outer_diagonal_line = torch.sum(torch.pow(outer, 2), axis=-1) + + # cal center distance + boxes1_center = (boxes1[..., :2] + boxes1[...,2:]) * 0.5 + boxes2_center = (boxes2[..., :2] + boxes2[...,2:]) * 0.5 + center_dis = torch.pow(boxes1_center[...,0]-boxes2_center[...,0], 2) +\ + torch.pow(boxes1_center[...,1]-boxes2_center[...,1], 2) + + # cal penalty term + # cal width,height + boxes1_size = torch.max(boxes1[..., 2:] - boxes1[..., :2], torch.zeros_like(inter_right_down)) + boxes2_size = torch.max(boxes2[..., 2:] - boxes2[..., :2], torch.zeros_like(inter_right_down)) + v = (4 / (math.pi ** 2)) * torch.pow( + torch.atan((boxes1_size[...,0]/torch.clamp(boxes1_size[...,1],min = 1e-6))) - + torch.atan((boxes2_size[..., 0] / torch.clamp(boxes2_size[..., 1],min = 1e-6))), 2) + alpha = v / (1-ious+v) + + #cal ciou + cious = ious - (center_dis / outer_diagonal_line + alpha*v) + return cious + + +def nms(bboxes, score_threshold, iou_threshold, sigma=0.3): + """ + :param bboxes: + 假设有N个bbox的score大于score_threshold,那么bboxes的shape为(N, 6),存储格式为(xmin, ymin, xmax, ymax, score, class) + 其中(xmin, ymin, xmax, ymax)的大小都是相对于输入原图的,score = conf * prob,class是bbox所属类别的索引号 + :return: best_bboxes +github 假设NMS后剩下N个bbox,那么best_bboxes的shape为(N, 6),存储格式为(xmin, ymin, xmax, ymax, score, class) + 其中(xmin, ymin, xmax, ymax)的大小都是相对于输入原图的,score = conf * prob,class是bbox所属类别的索引号 + """ + classes_in_img = list(set(bboxes[:, 5].astype(np.int32))) + best_bboxes = [] + scale_factor = cfg.SCALE_FACTOR + for cls in classes_in_img: + cls_mask = (bboxes[:, 5].astype(np.int32) == cls) + cls_bboxes = bboxes[cls_mask] + while len(cls_bboxes) > 0: + max_ind = np.argmax(cls_bboxes[:, 4])#取分数最大的 + best_bbox = cls_bboxes[max_ind] + best_bboxes.append(best_bbox) + cls_bboxes = np.concatenate([cls_bboxes[: max_ind], cls_bboxes[max_ind + 1:]]) + ##################################### + iou = iou_xyxy_numpy(best_bbox[np.newaxis, :4], cls_bboxes[:, :4]) + #################################### + #scale_factor = 1.0 * best_bbox[..., 2:3] * best_bbox[..., 3:4] / (cfg.TEST["TEST_IMG_SIZE"] ** 2) + + method = cfg.TEST["NMS_METHODS"] + weight = np.ones((len(iou),), dtype=np.float32) + if method == 'NMS': + iou_mask = iou > iou_threshold + weight[iou_mask] = 0.0 + elif method == 'SOFT_NMS': + weight = np.exp(-(1.0 * iou ** 2 / sigma)) + elif method == 'NMS_DIOU': + diou = diou_xyxy_numpy(best_bbox[np.newaxis, :4], cls_bboxes[:, :4]) + iou_mask = diou > iou_threshold + weight[iou_mask] = 0.0 + #elif method == 'NMS_DIOU_SCALE': + #iou_mask = scale_factor-(scale_factor-1.0)*diou > iou_threshold + #weight[iou_mask] = 0.0 + + cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight + score_mask = cls_bboxes[:, 4] > score_threshold + cls_bboxes = cls_bboxes[score_mask] + return np.array(best_bboxes) + + +def nms_glid(bboxes, score_threshold, iou_threshold, sigma=0.3): + """ + :param bboxes: + 假设有N个bbox的score大于score_threshold,那么bboxes的shape为(N, 6),存储格式为(xmin, ymin, xmax, ymax, score, class) + 其中(xmin, ymin, xmax, ymax)的大小都是相对于输入原图的,score = conf * prob,class是bbox所属类别的索引号 + :return: best_bboxes + 假设NMS后剩下N个bbox,那么best_bboxes的shape为(N, 6),存储格式为(xmin, ymin, xmax, ymax, score, class) + 其中(xmin, ymin, xmax, ymax)的大小都是相对于输入原图的,score = conf * prob,class是bbox所属类别的索引号 + """ + ######[coors(0:4), coors_rota(4:8), scores[:, np.newaxis](12), classes[:, np.newaxis]](13) + + classes_in_img = list(set(bboxes[:, 9].astype(np.int32))) + best_bboxes = [] + scale_factor = cfg.SCALE_FACTOR + for cls in classes_in_img: + cls_mask = (bboxes[:, 9].astype(np.int32) == cls) + cls_bboxes = bboxes[cls_mask] + + while len(cls_bboxes) > 0: + max_ind = np.argmax(cls_bboxes[:, 8])#取分数最大的 + best_bbox = cls_bboxes[max_ind] + best_bboxes.append(best_bbox) + cls_bboxes = np.concatenate([cls_bboxes[: max_ind], cls_bboxes[max_ind + 1:]]) + + xmin = best_bbox[np.newaxis, 0:1] + ymin = best_bbox[np.newaxis, 1:2] + xmax = best_bbox[np.newaxis, 2:3] + ymax = best_bbox[np.newaxis, 3:4] + a1 = best_bbox[np.newaxis, 4:5] + a2 = best_bbox[np.newaxis, 5:6] + a3 = best_bbox[np.newaxis, 5:6] + a4 = best_bbox[np.newaxis, 6:7] + x1 = a1*(xmax-xmin)+xmin + y1 = ymin + x2 = a2*(ymax-ymin)+ymin + y2 = xmax + x3 = xmax-a3*(xmax-xmin) + y3 = ymax + x4 = xmin + y4 = ymax-a4*(ymax-ymin) + best_bbox_r = np.concatenate((x1,y1,x2,y2,x3,y3,x4,y4),axis=-1) + + + xminl = cls_bboxes[:, 0:1] + yminl = cls_bboxes[:, 1:2] + xmaxl = cls_bboxes[:, 2:3] + ymaxl = cls_bboxes[:, 3:4] + a1l = cls_bboxes[:, 4:5] + a2l = cls_bboxes[:, 5:6] + a3l = cls_bboxes[:, 5:6] + a4l = cls_bboxes[:, 6:7] + x1l = a1l*(xmaxl-xminl)+xminl + y1l = yminl + x2l = a2l*(ymaxl-yminl)+yminl + y2l = xmaxl + x3l = xmaxl-a3l*(xmaxl-xminl) + y3l = ymaxl + x4l = xminl + y4l = ymaxl-a4l*(ymaxl-yminl) + cls_bboxes_r = np.concatenate((x1l,y1l,x2l,y2l,x3l,y3l,x4l,y4l),axis=-1) + #print(cls_bboxes_r.shape) + iou = polygen_iou_xy4_numpy(best_bbox_r[np.newaxis, :8], cls_bboxes_r[:, :8]) + #print(np.maximum(iou,0.7)) + #iou = iou_xyxy_numpy(best_bbox[np.newaxis, :4], cls_bboxes[:, :4]) + #################################### + #scale_factor = 1.0 * best_bbox[..., 2:3] * best_bbox[..., 3:4] / (cfg.TEST["TEST_IMG_SIZE"] ** 2) + method = cfg.TEST["NMS_METHODS"] + weight = np.ones((len(iou),), dtype=np.float32) + if method == 'NMS': + iou_mask = iou > iou_threshold + weight[iou_mask] = 0.0 + elif method == 'SOFT_NMS': + weight = np.exp(-(1.0 * iou ** 2 / sigma)) + elif method == 'NMS_DIOU': + diou = diou_xyxy_numpy(best_bbox[np.newaxis, :4], cls_bboxes[:, :4]) + iou_mask = diou > iou_threshold + weight[iou_mask] = 0.0 + #elif method == 'NMS_DIOU_SCALE': + #iou_mask = scale_factor-(scale_factor-1.0)*diou > iou_threshold + #weight[iou_mask] = 0.0 + + cls_bboxes[:, 8] = cls_bboxes[:, 8] * weight + score_mask = cls_bboxes[:, 8] > score_threshold + cls_bboxes = cls_bboxes[score_mask] + return np.array(best_bboxes) diff --git a/utils/utils_coco.py b/utils/utils_coco.py new file mode 100644 index 0000000..e992f62 --- /dev/null +++ b/utils/utils_coco.py @@ -0,0 +1,270 @@ +from __future__ import division +import torch +import numpy as np +import cv2 + +def nms(bbox, thresh, score=None, limit=None): + """Suppress bounding boxes according to their IoUs and confidence scores. + Args: + bbox (array): Bounding boxes to be transformed. The shape is + :math:`(R, 4)`. :math:`R` is the number of bounding boxes. + thresh (float): Threshold of IoUs. + score (array): An array of confidences whose shape is :math:`(R,)`. + limit (int): The upper bound of the number of the output bounding + boxes. If it is not specified, this method selects as many + bounding boxes as possible. + Returns: + array: + An array with indices of bounding boxes that are selected. \ + They are sorted by the scores of bounding boxes in descending \ + order. \ + The shape of this array is :math:`(K,)` and its dtype is\ + :obj:`numpy.int32`. Note that :math:`K \\leq R`. + + from: https://github.com/chainer/chainercv + """ + if len(bbox) == 0: + return np.zeros((0,), dtype=np.int32) + + if score is not None: + order = score.argsort()[::-1] + bbox = bbox[order] + bbox_area = np.prod(bbox[:, 2:] - bbox[:, :2], axis=1) + + selec = np.zeros(bbox.shape[0], dtype=bool) + for i, b in enumerate(bbox): + tl = np.maximum(b[:2], bbox[selec, :2]) + br = np.minimum(b[2:], bbox[selec, 2:]) + area = np.prod(br - tl, axis=1) * (tl < br).all(axis=1) + + iou = area / (bbox_area[i] + bbox_area[selec] - area) + if (iou >= thresh).any(): + continue + + selec[i] = True + if limit is not None and np.count_nonzero(selec) >= limit: + break + + selec = np.where(selec)[0] + if score is not None: + selec = order[selec] + return selec.astype(np.int32) + + +def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45): + + box_corner = prediction.new(prediction.shape) + box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2 + box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2 + box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2 + box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2 + prediction[:, :, :4] = box_corner[:, :, :4] + + output = [None for _ in range(len(prediction))] + for i, image_pred in enumerate(prediction): + # Filter out confidence scores below threshold + class_pred = torch.max(image_pred[:, 5:5 + num_classes], 1) + class_pred = class_pred[0] + conf_mask = (image_pred[:, 4] * class_pred >= conf_thre).squeeze() + image_pred = image_pred[conf_mask] + + # If none are remaining => process next image + if not image_pred.size(0): + continue + # Get detections with higher confidence scores than the threshold + ind = (image_pred[:, 5:] * image_pred[:, 4][:, None] >= conf_thre).nonzero() + # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred) + detections = torch.cat(( + image_pred[ind[:, 0], :5], + image_pred[ind[:, 0], 5 + ind[:, 1]].unsqueeze(1), + ind[:, 1].float().unsqueeze(1) + ), 1) + # Iterate through all predicted classes + unique_labels = detections[:, -1].cpu().unique() + if prediction.is_cuda: + unique_labels = unique_labels.cuda() + for c in unique_labels: + # Get the detections with the particular class + detections_class = detections[detections[:, -1] == c] + nms_in = detections_class.cpu().numpy() + nms_out_index = nms( + nms_in[:, :4], nms_thre, score=nms_in[:, 4]*nms_in[:, 5]) + detections_class = detections_class[nms_out_index] + if output[i] is None: + output[i] = detections_class + else: + output[i] = torch.cat((output[i], detections_class)) + + return output + + +def bboxes_iou(bboxes_a, bboxes_b, xyxy=True): + if bboxes_a.shape[1] != 4 or bboxes_b.shape[1] != 4: + raise IndexError + + # top left + if xyxy: + tl = torch.max(bboxes_a[:, None, :2], bboxes_b[:, :2]) + # bottom right + br = torch.min(bboxes_a[:, None, 2:], bboxes_b[:, 2:]) + area_a = torch.prod(bboxes_a[:, 2:] - bboxes_a[:, :2], 1) + area_b = torch.prod(bboxes_b[:, 2:] - bboxes_b[:, :2], 1) + else: + tl = torch.max((bboxes_a[:, None, :2] - bboxes_a[:, None, 2:] / 2), + (bboxes_b[:, :2] - bboxes_b[:, 2:] / 2)) + # bottom right + br = torch.min((bboxes_a[:, None, :2] + bboxes_a[:, None, 2:] / 2), + (bboxes_b[:, :2] + bboxes_b[:, 2:] / 2)) + + area_a = torch.prod(bboxes_a[:, 2:], 1) + area_b = torch.prod(bboxes_b[:, 2:], 1) + en = (tl < br).type(tl.type()).prod(dim=2) + area_i = torch.prod(br - tl, 2) * en # * ((tl < br).all()) + return area_i / (area_a[:, None] + area_b - area_i) + + +def label2box(labels, info_img, maxsize, lrflip): + + h, w, nh, nw, dx, dy = info_img + x1 = labels[:, 1] / w + y1 = labels[:, 2] / h + x2 = (labels[:, 1] + labels[:, 3]) / w + y2 = (labels[:, 2] + labels[:, 4]) / h + labels[:, 1] = (((x1 + x2) / 2) * nw + dx) / maxsize + labels[:, 2] = (((y1 + y2) / 2) * nh + dy) / maxsize + labels[:, 3] = labels[:, 3] * nw / w / maxsize + labels[:, 4] = labels[:, 4] * nh / h / maxsize + if lrflip: + labels[:, 1] = 1 - labels[:, 1] + return labels + + +def box2label(box, info_img): + + h, w, nh, nw, dx, dy = info_img + y1, x1, y2, x2 = box + box_h = ((y2 - y1) / nh) * h + box_w = ((x2 - x1) / nw) * w + y1 = ((y1 - dy) / nh) * h + x1 = ((x1 - dx) / nw) * w + label = [y1, x1, y1 + box_h, x1 + box_w] + return label + + +def preprocess(img, imgsize, jitter, random_placing=False): + h, w, _ = img.shape + img = img[:, :, ::-1] + assert img is not None + + if jitter > 0: + # add jitter + dw = jitter * w + dh = jitter * h + new_ar = (w + np.random.uniform(low=-dw, high=dw))\ + / (h + np.random.uniform(low=-dh, high=dh)) + else: + new_ar = w / h + + if new_ar < 1: + nh = imgsize + nw = nh * new_ar + else: + nw = imgsize + nh = nw / new_ar + nw, nh = int(nw), int(nh) + + if random_placing: + dx = int(np.random.uniform(imgsize - nw)) + dy = int(np.random.uniform(imgsize - nh)) + else: + dx = (imgsize - nw) // 2 + dy = (imgsize - nh) // 2 + + img = cv2.resize(img, (nw, nh)) + sized = np.ones((imgsize, imgsize, 3), dtype=np.uint8) * 127 + sized[dy:dy+nh, dx:dx+nw, :] = img + + info_img = (h, w, nh, nw, dx, dy) + return sized, info_img + +def rand_scale(s): + """ + calculate random scaling factor + Args: + s (float): range of the random scale. + Returns: + random scaling factor (float) whose range is + from 1 / s to s . + """ + scale = np.random.uniform(low=1, high=s) + if np.random.rand() > 0.5: + return scale + return 1 / scale + +def random_distort(img, hue, saturation, exposure): + """ + perform random distortion in the HSV color space. + Args: + img (numpy.ndarray): input image whose shape is :math:`(H, W, C)`. + Values range from 0 to 255. + hue (float): random distortion parameter. + saturation (float): random distortion parameter. + exposure (float): random distortion parameter. + Returns: + img (numpy.ndarray) + """ + dhue = np.random.uniform(low=-hue, high=hue) + dsat = rand_scale(saturation) + dexp = rand_scale(exposure) + + img = cv2.cvtColor(img, cv2.COLOR_RGB2HSV) + img = np.asarray(img, dtype=np.float32) / 255. + img[:, :, 1] *= dsat + img[:, :, 2] *= dexp + H = img[:, :, 0] + dhue + + if dhue > 0: + H[H > 1.0] -= 1.0 + else: + H[H < 0.0] += 1.0 + + img[:, :, 0] = H + img = (img * 255).clip(0, 255).astype(np.uint8) + img = cv2.cvtColor(img, cv2.COLOR_HSV2RGB) + img = np.asarray(img, dtype=np.float32) + + return img + + +def get_coco_label_names(): + """ + COCO label names and correspondence between the model's class index and COCO class index. + Returns: + coco_label_names (tuple of str) : all the COCO label names including background class. + coco_class_ids (list of int) : index of 80 classes that are used in 'instance' annotations + coco_cls_colors (np.ndarray) : randomly generated color vectors used for box visualization + + """ + coco_label_names = ('background', # class zero + 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', + 'boat', 'traffic light', 'fire hydrant', 'street sign', 'stop sign', + 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', + 'elephant', 'bear', 'zebra', 'giraffe', 'hat', 'backpack', 'umbrella', + 'shoe', 'eye glasses', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', + 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', + 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'plate', 'wine glass', + 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', + 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', + 'couch', 'potted plant', 'bed', 'mirror', 'dining table', 'window', 'desk', + 'toilet', 'door', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', + 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'blender', 'book', + 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' + ) + coco_class_ids = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, + 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, + 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] + + coco_cls_colors = np.random.randint(128, 255, size=(80, 3)) + + return coco_label_names, coco_class_ids, coco_cls_colors diff --git a/utils/visDOTAnew.py b/utils/visDOTAnew.py new file mode 100644 index 0000000..19bb5d2 --- /dev/null +++ b/utils/visDOTAnew.py @@ -0,0 +1,421 @@ +import collections +import numpy as np +import PIL.Image as Image +import PIL.ImageColor as ImageColor +import PIL.ImageDraw as ImageDraw +import PIL.ImageFont as ImageFont + +_TITLE_LEFT_MARGIN = 10 +_TITLE_TOP_MARGIN = 10 + +''' +STANDARD_COLORS = [ + 'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque', + 'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite', + 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan', + 'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange', + 'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet', + 'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite', + 'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod', + 'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki', + 'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue', + 'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey', + 'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue', + 'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime', + 'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid', + 'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen', + 'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin', + 'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed', + 'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed', + 'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple', + 'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown', + 'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue', + 'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow', + 'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White', + 'WhiteSmoke', 'Yellow', 'YellowGreen' +] +''' + +''' +STANDARD_COLORS = ['royalblue','limegreen', 'silver','darkseagreen','red','mediumvioletred','tomato','springgreen','Snow','paleturquoise', +'turquoise', 'crimson','orangered','forestgreen','LightSlateGrey']''' + +STANDARD_COLORS = [ +'red','orangered','tomato','lightcoral', +'fuchsia','gold','orange','khaki', +'limegreen', 'forestgreen','springgreen','paleturquoise', +'turquoise','dodgerblue','royalblue','slateblue', +'orchid','crimson','mediumvioletred','pink' +] +''' +STANDARD_COLORS = ['silver', +'lightcoral','royalblue','orange', +'limegreen', 'pink','springgreen', +'gold','turquoise', +'orchid' +] +''' +''' +STANDARD_COLORS = ['silver', +'dodgerblue','lightcoral','darkorange', +'forestgreen', 'crimson','mediumspringgreen', +'darkseagreen','gold', +'magenta' +] +''' + + + +#['pedestrian','people','bicycle','car','van','truck','tricycle','awning-tricycle','bus','motor'] +def visualize_boxes(image, boxes, labels, probs, class_labels): + + category_index = {} + for id_, label_name in enumerate(class_labels): + category_index[id_] = {"name": label_name} + image=visualize_boxes_and_labels_on_image_array(image, boxes, labels, probs, category_index) + return image + +def visualize_boxes_and_labels_on_image_array( + image, + boxes, + classes, + scores, + category_index, + instance_masks=None, + instance_boundaries=None, + use_normalized_coordinates=False, + max_boxes_to_draw=3000, + min_score_thresh=.5, + agnostic_mode=False, + line_thickness=5, + groundtruth_box_visualization_color='black', + skip_scores=False, + skip_labels=False): + """Overlay labeled boxes on an image with formatted scores and label names. + + This function groups boxes that correspond to the same location + and creates a display string for each detection and overlays these + on the image. Note that this function modifies the image in place, and returns + that same image. + + Args: + image: uint8 numpy array with shape (img_height, img_width, 3) + boxes: a numpy array of shape [N, 4] + classes: a numpy array of shape [N]. Note that class indices are 1-based, + and match the keys in the label map. + scores: a numpy array of shape [N] or None. If scores=None, then + this function assumes that the boxes to be plotted are groundtruth + boxes and plot all boxes as black with no classes or scores. + category_index: a dict containing category dictionaries (each holding + category index `id` and category name `name`) keyed by category indices. + instance_masks: a numpy array of shape [N, image_height, image_width] with + values ranging between 0 and 1, can be None. + instance_boundaries: a numpy array of shape [N, image_height, image_width] + with values ranging between 0 and 1, can be None. + use_normalized_coordinates: whether boxes is to be interpreted as + normalized coordinates or not. + max_boxes_to_draw: maximum number of boxes to visualize. If None, draw + all boxes. + min_score_thresh: minimum score threshold for a box to be visualized + agnostic_mode: boolean (default: False) controlling whether to evaluate in + class-agnostic mode or not. This mode will display scores but ignore + classes. + line_thickness: integer (default: 4) controlling line width of the boxes. + groundtruth_box_visualization_color: box color for visualizing groundtruth + boxes + skip_scores: whether to skip score when drawing a single detection + skip_labels: whether to skip label when drawing a single detection + + Returns: + uint8 numpy array with shape (img_height, img_width, 3) with overlaid boxes. + """ + # Create a display string (and color) for every box location, group any boxes + # that correspond to the same location. + box_to_display_str_map = collections.defaultdict(list) + box_to_color_map = collections.defaultdict(str) + + box_to_instance_masks_map = {} + box_to_instance_boundaries_map = {} + if not max_boxes_to_draw: + max_boxes_to_draw = boxes.shape[0] + + sorted_ind = np.argsort(-scores) + boxes=boxes[sorted_ind] + scores=scores[sorted_ind] + classes=classes[sorted_ind] + for i in range(min(max_boxes_to_draw, boxes.shape[0])): + if scores is None or scores[i] > min_score_thresh: + box = tuple(boxes[i].tolist()) + if instance_masks is not None: + box_to_instance_masks_map[box] = instance_masks[i] + if instance_boundaries is not None: + box_to_instance_boundaries_map[box] = instance_boundaries[i] + if scores is None: + box_to_color_map[box] = groundtruth_box_visualization_color + else: + display_str = '' + if not skip_labels: + if not agnostic_mode: + if classes[i] in category_index.keys(): + class_name = category_index[classes[i]]['name'] + else: + class_name = 'N/A' + display_str = str(class_name) + if not skip_scores: + if not display_str: + display_str = '{}%'.format(int(100 * scores[i])) + else: + display_str = '{}: {}%'.format(display_str, int(100 * scores[i])) + box_to_display_str_map[box].append(display_str) + if agnostic_mode: + box_to_color_map[box] = 'DarkOrange' + else: + box_to_color_map[box] = STANDARD_COLORS[ + classes[i] % len(STANDARD_COLORS)] + + # Draw all boxes onto image. + for box, color in box_to_color_map.items(): + xmin, ymin, xmax, ymax = box + if instance_masks is not None: + draw_mask_on_image_array( + image, + box_to_instance_masks_map[box], + color=color + ) + if instance_boundaries is not None: + draw_mask_on_image_array( + image, + box_to_instance_boundaries_map[box], + color='red', + alpha=1.0 + ) + draw_bounding_box_on_image_array( + image, + ymin, + xmin, + ymax, + xmax, + color=color, + thickness=line_thickness, + display_str_list=box_to_display_str_map[box], + use_normalized_coordinates=use_normalized_coordinates) + return image + + +def draw_bounding_box_on_image_array(image, + ymin, + xmin, + ymax, + xmax, + color='red', + thickness=4, + display_str_list=(), + use_normalized_coordinates=True): + """Adds a bounding box to an image (numpy array). + + Bounding box coordinates can be specified in either absolute (pixel) or + normalized coordinates by setting the use_normalized_coordinates argument. + + Args: + image: a numpy array with shape [height, width, 3]. + ymin: ymin of bounding box. + xmin: xmin of bounding box. + ymax: ymax of bounding box. + xmax: xmax of bounding box. + color: color to draw bounding box. Default is red. + thickness: line thickness. Default value is 4. + display_str_list: list of strings to display in box + (each to be shown on its own line). + use_normalized_coordinates: If True (default), treat coordinates + ymin, xmin, ymax, xmax as relative to the image. Otherwise treat + coordinates as absolute. + """ + image_pil = Image.fromarray(np.uint8(image)).convert('RGB') + draw_bounding_box_on_image(image_pil, ymin, xmin, ymax, xmax, color, + thickness, display_str_list, + use_normalized_coordinates) + np.copyto(image, np.array(image_pil)) + + +def draw_bounding_box_on_image(image, + ymin, + xmin, + ymax, + xmax, + color='red', + thickness=4, + display_str_list=(), + use_normalized_coordinates=True): + """Adds a bounding box to an image. + + Bounding box coordinates can be specified in either absolute (pixel) or + normalized coordinates by setting the use_normalized_coordinates argument. + + Each string in display_str_list is displayed on a separate line above the + bounding box in black text on a rectangle filled with the input 'color'. + If the top of the bounding box extends to the edge of the image, the strings + are displayed below the bounding box. + + Args: + image: a PIL.Image object. + ymin: ymin of bounding box. + xmin: xmin of bounding box. + ymax: ymax of bounding box. + xmax: xmax of bounding box. + color: color to draw bounding box. Default is red. + thickness: line thickness. Default value is 4. + display_str_list: list of strings to display in box + (each to be shown on its own line). + use_normalized_coordinates: If True (default), treat coordinates + ymin, xmin, ymax, xmax as relative to the image. Otherwise treat + coordinates as absolute. + """ + draw = ImageDraw.Draw(image) + im_width, im_height = image.size + if use_normalized_coordinates: + (left, right, top, bottom) = (xmin * im_width, xmax * im_width, + ymin * im_height, ymax * im_height) + else: + (left, right, top, bottom) = (xmin, xmax, ymin, ymax) + draw.line([(left, top), (left, bottom), (right, bottom), + (right, top), (left, top)], width=thickness, fill=color) + ''' + try: + font = ImageFont.truetype('arial.ttf', 24) + except IOError: + font = ImageFont.load_default() + + # If the total height of the display strings added to the top of the bounding + # box exceeds the top of the image, stack the strings below the bounding box + # instead of above. + display_str_heights = [font.getsize(ds)[1] for ds in display_str_list] + # Each display_str has a top and bottom margin of 0.05x. + total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights) + + if top > total_display_str_height: + text_bottom = top + else: + text_bottom = bottom + total_display_str_height + # Reverse list and print from bottom to top. + for display_str in display_str_list[::-1]: + text_width, text_height = font.getsize(display_str) + margin = np.ceil(0.05 * text_height) + draw.rectangle( + [(left, text_bottom - text_height - 2 * margin), (left + text_width, + text_bottom)], + fill=color) + draw.text( + (left + margin, text_bottom - text_height - margin), + display_str, + fill='black', + font=font) + text_bottom -= text_height - 2 * margin + ''' + +def draw_mask_on_image_array(image, mask, color='red', alpha=0.4): + """Draws mask on an image. + + Args: + image: uint8 numpy array with shape (img_height, img_height, 3) + mask: a uint8 numpy array of shape (img_height, img_height) with + values between either 0 or 1. + color: color to draw the keypoints with. Default is red. + alpha: transparency value between 0 and 1. (default: 0.4) + + Raises: + ValueError: On incorrect data type for image or masks. + """ + if image.dtype != np.uint8: + raise ValueError('`image` not of type np.uint8') + if mask.dtype != np.uint8: + raise ValueError('`mask` not of type np.uint8') + if np.any(np.logical_and(mask != 1, mask != 0)): + raise ValueError('`mask` elements should be in [0, 1]') + if image.shape[:2] != mask.shape: + raise ValueError('The image has spatial dimensions %s but the mask has ' + 'dimensions %s' % (image.shape[:2], mask.shape)) + rgb = ImageColor.getrgb(color) + pil_image = Image.fromarray(image) + + solid_color = np.expand_dims( + np.ones_like(mask), axis=2) * np.reshape(list(rgb), [1, 1, 3]) + pil_solid_color = Image.fromarray(np.uint8(solid_color)).convert('RGBA') + pil_mask = Image.fromarray(np.uint8(255.0 * alpha * mask)).convert('L') + pil_image = Image.composite(pil_solid_color, pil_image, pil_mask) + np.copyto(image, np.array(pil_image.convert('RGB'))) + + +if __name__ == '__main__': + import cv2 + + + import os + cateNames = ['plane', 'baseball-diamond', 'bridge', 'ground-track-field', + 'small-vehicle', 'large-vehicle', 'ship', 'tennis-court', 'basketball-court', + 'storage-tank', 'soccer-ball-field', 'roundabout', 'harbor', 'swimming-pool', 'helicopter'] + + #cateNames = ['pedestrian','people','bicycle','car','van','truck','tricycle','awning-tricycle','bus','motor'] + + #cateNames = ['airplane', 'airport', 'baseballfield', 'basketballcourt', 'bridge', 'chimney', + # 'dam', 'Expressway-Service-area', 'Expressway-toll-station', 'golffield', 'groundtrackfield', 'harbor', + # 'overpass', 'ship', 'stadium', 'storagetank', 'tenniscourt', 'trainstation', 'vehicle', 'windmill'] + + path = "D:/PyProjects/visdotatest/savep/" + #path = "D:/PyProjects/visdronetest/savep/" # 文件夹目录 + files = os.listdir(path) # 得到文件夹下的所有文件名称 + txts = [] + #pathimg = "D:/Datasets/VisDrone2019/test/input/JPEGImages/" # 文件夹目录 + #pathimgsave = "D:/PyProjects/visdronetest/vis_res/" # 文件夹目录 + + pathimg = "D:/Datasets/DOTA/test/images/" # 文件夹目录 + pathimgsave = "D:/PyProjects/visdotatest/vis_res/" # 文件夹目录 + + for file in files: # 遍历文件夹 + position = path + file # 构造绝对路径,"\\",其中一个'\'为转义符 + #print(position) + + positionimg = pathimg + file.replace('txt','png') # 构造绝对路径,"\\",其中一个'\'为转义符file + #print(positionimg) + positionimgsave = pathimgsave + file.replace('txt', 'png') # 构造绝对路径,"\\",其中一个'\'为转义符file + imshow = cv2.imread(positionimg) + b = [] + l = [] + p = [] + with open(position, "r", encoding='utf-8') as f: # 打开文件 + + #with open('G:/test/vis/1.txt', "r") as f: + for line in f.readlines(): + data = line.split('\t\n') + for str1 in data: + sub_str = str1.split(' ') + if sub_str: + l.append(cateNames.index(sub_str[0])) + p.append(float(sub_str[1])) + aa = [] + + + aa.append(float(sub_str[2])) + aa.append(float(sub_str[3])) + aa.append(float(sub_str[4])) + aa.append(float(sub_str[5])) + + + b.append(aa) + + _boxes = np.array(b) + _labels = np.array(l) + _probs = np.array(p) + #print('lab', _labels) + #print('bbo', _boxes) + #print('pro', _probs.shape) + + visualize_boxes(image=imshow, boxes=_boxes, labels=_labels, probs=_probs, class_labels=cateNames) + + #plt.subplot(111) + #results = imshow[...,::-1] + #plt.imshow(results) + #plt.show() + #plt.savefig('test2png.jpg', dpi=100) + cv2.imwrite(positionimgsave, imshow, [int(cv2.IMWRITE_PNG_COMPRESSION),0]) + #, [int(cv2.IMWRITE_PNG_COMPRESSION),0] + diff --git a/utils/visualize.py b/utils/visualize.py new file mode 100644 index 0000000..93edc22 --- /dev/null +++ b/utils/visualize.py @@ -0,0 +1,189 @@ +import collections +import numpy as np +import PIL.Image as Image +import PIL.ImageColor as ImageColor +import PIL.ImageDraw as ImageDraw +import PIL.ImageFont as ImageFont + +_TITLE_LEFT_MARGIN = 10 +_TITLE_TOP_MARGIN = 10 +STANDARD_COLORS = [ + 'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque', + 'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite', + 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan', + 'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange', + 'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet', + 'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite', + 'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod', + 'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki', + 'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue', + 'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey', + 'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue', + 'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime', + 'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid', + 'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen', + 'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin', + 'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed', + 'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed', + 'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple', + 'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown', + 'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue', + 'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow', + 'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White', + 'WhiteSmoke', 'Yellow', 'YellowGreen' +] + +def visualize_boxes(image, boxes, labels, probs, class_labels): + + category_index = {} + for id_, label_name in enumerate(class_labels): + category_index[id_] = {"name": label_name} + image=visualize_boxes_and_labels_on_image_array(image, boxes, labels, probs, category_index) + return image + +def visualize_boxes_and_labels_on_image_array( + image, + boxes, + classes, + scores, + category_index, + instance_masks=None, + instance_boundaries=None, + use_normalized_coordinates=False, + max_boxes_to_draw=200, + min_score_thresh=.5, + agnostic_mode=False, + line_thickness=4, + groundtruth_box_visualization_color='black', + skip_scores=False, + skip_labels=False): + + box_to_display_str_map = collections.defaultdict(list) + box_to_color_map = collections.defaultdict(str) + box_to_instance_masks_map = {} + box_to_instance_boundaries_map = {} + if not max_boxes_to_draw: + max_boxes_to_draw = boxes.shape[0] + + sorted_ind = np.argsort(-scores) + boxes=boxes[sorted_ind] + scores=scores[sorted_ind] + classes=classes[sorted_ind] + for i in range(min(max_boxes_to_draw, boxes.shape[0])): + if scores is None or scores[i] > min_score_thresh: + box = tuple(boxes[i].tolist()) + if instance_masks is not None: + box_to_instance_masks_map[box] = instance_masks[i] + if instance_boundaries is not None: + box_to_instance_boundaries_map[box] = instance_boundaries[i] + if scores is None: + box_to_color_map[box] = groundtruth_box_visualization_color + else: + display_str = '' + if not skip_labels: + if not agnostic_mode: + if classes[i] in category_index.keys(): + class_name = category_index[classes[i]]['name'] + else: + class_name = 'N/A' + display_str = str(class_name) + if not skip_scores: + if not display_str: + display_str = '{}%'.format(int(100 * scores[i])) + else: + display_str = '{}: {}%'.format(display_str, int(100 * scores[i])) + box_to_display_str_map[box].append(display_str) + if agnostic_mode: + box_to_color_map[box] = 'DarkOrange' + else: + box_to_color_map[box] = STANDARD_COLORS[ + classes[i] % len(STANDARD_COLORS)] + + for box, color in box_to_color_map.items(): + xmin, ymin, xmax, ymax = box + if instance_masks is not None: + draw_mask_on_image_array( + image, + box_to_instance_masks_map[box], + color=color + ) + if instance_boundaries is not None: + draw_mask_on_image_array( + image, + box_to_instance_boundaries_map[box], + color='red', + alpha=1.0 + ) + draw_bounding_box_on_image_array(image, ymin, xmin, ymax, xmax, color=color, + thickness=line_thickness, display_str_list=box_to_display_str_map[box], + use_normalized_coordinates=use_normalized_coordinates) + return image + + +def draw_bounding_box_on_image_array(image, ymin, xmin, ymax, xmax, color='red', + thickness=4, display_str_list=(), use_normalized_coordinates=True): + + image_pil = Image.fromarray(np.uint8(image)).convert('RGB') + draw_bounding_box_on_image(image_pil, ymin, xmin, ymax, xmax, color, + thickness, display_str_list, + use_normalized_coordinates) + np.copyto(image, np.array(image_pil)) + + +def draw_bounding_box_on_image(image, ymin, xmin, ymax, xmax, color='red', thickness=4, display_str_list=(), use_normalized_coordinates=True): + draw = ImageDraw.Draw(image) + im_width, im_height = image.size + if use_normalized_coordinates: + (left, right, top, bottom) = (xmin * im_width, xmax * im_width, + ymin * im_height, ymax * im_height) + else: + (left, right, top, bottom) = (xmin, xmax, ymin, ymax) + draw.line([(left, top), (left, bottom), (right, bottom), + (right, top), (left, top)], width=thickness, fill=color) + ''' + try: + font = ImageFont.truetype('arial.ttf', 24) + except IOError: + font = ImageFont.load_default() + display_str_heights = [font.getsize(ds)[1] for ds in display_str_list] + total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights) + + if top > total_display_str_height: + text_bottom = top + else: + text_bottom = bottom + total_display_str_height + + for display_str in display_str_list[::-1]: + text_width, text_height = font.getsize(display_str) + margin = np.ceil(0.05 * text_height) + draw.rectangle( + [(left, text_bottom - text_height - 2 * margin), (left + text_width, + text_bottom)], + fill=color) + draw.text( + (left + margin, text_bottom - text_height - margin), + display_str, + fill='black', + font=font) + text_bottom -= text_height - 2 * margin + ''' + +def draw_mask_on_image_array(image, mask, color='red', alpha=0.4): + + if image.dtype != np.uint8: + raise ValueError('`image` not of type np.uint8') + if mask.dtype != np.uint8: + raise ValueError('`mask` not of type np.uint8') + if np.any(np.logical_and(mask != 1, mask != 0)): + raise ValueError('`mask` elements should be in [0, 1]') + if image.shape[:2] != mask.shape: + raise ValueError('The image has spatial dimensions %s but the mask has ' + 'dimensions %s' % (image.shape[:2], mask.shape)) + rgb = ImageColor.getrgb(color) + pil_image = Image.fromarray(image) + solid_color = np.expand_dims( + np.ones_like(mask), axis=2) * np.reshape(list(rgb), [1, 1, 3]) + pil_solid_color = Image.fromarray(np.uint8(solid_color)).convert('RGBA') + pil_mask = Image.fromarray(np.uint8(255.0 * alpha * mask)).convert('L') + pil_image = Image.composite(pil_solid_color, pil_image, pil_mask) + np.copyto(image, np.array(pil_image.convert('RGB'))) \ No newline at end of file diff --git a/utils/visualizeDOTA.py b/utils/visualizeDOTA.py new file mode 100644 index 0000000..f46a189 --- /dev/null +++ b/utils/visualizeDOTA.py @@ -0,0 +1,389 @@ +import collections +import numpy as np +import PIL.Image as Image +import PIL.ImageColor as ImageColor +import PIL.ImageDraw as ImageDraw +import PIL.ImageFont as ImageFont + +_TITLE_LEFT_MARGIN = 10 +_TITLE_TOP_MARGIN = 10 + +STANDARD_COLORS = [ +'red','orangered','tomato','lightcoral', +'silver','gold','orange','khaki', +'limegreen', 'forestgreen','springgreen','paleturquoise', +'turquoise','dodgerblue','royalblue','slateblue', +'orchid','crimson','mediumvioletred','pink' +] +''' +STANDARD_COLORS = ['silver', +'lightcoral','royalblue','orange', +'limegreen', 'pink','springgreen', +'turquoise','gold', +'orchid' +] +''' +''' +STANDARD_COLORS = ['silver', +'dodgerblue','lightcoral','darkorange', +'forestgreen', 'crimson','mediumspringgreen', +'darkseagreen','gold', +'magenta' +] +''' + + + +#['pedestrian','people','bicycle','car','van','truck','tricycle','awning-tricycle','bus','motor'] +def visualize_boxes(image, boxes, labels, probs, class_labels): + + category_index = {} + for id_, label_name in enumerate(class_labels): + category_index[id_] = {"name": label_name} + image=visualize_boxes_and_labels_on_image_array(image, boxes, labels, probs, category_index) + return image + +def visualize_boxes_and_labels_on_image_array( + image, + boxes, + classes, + scores, + category_index, + instance_masks=None, + instance_boundaries=None, + use_normalized_coordinates=False, + max_boxes_to_draw=3000, + min_score_thresh=.5, + agnostic_mode=False, + line_thickness=5, + groundtruth_box_visualization_color='black', + skip_scores=False, + skip_labels=False): + """Overlay labeled boxes on an image with formatted scores and label names. + + This function groups boxes that correspond to the same location + and creates a display string for each detection and overlays these + on the image. Note that this function modifies the image in place, and returns + that same image. + + Args: + image: uint8 numpy array with shape (img_height, img_width, 3) + boxes: a numpy array of shape [N, 4] + classes: a numpy array of shape [N]. Note that class indices are 1-based, + and match the keys in the label map. + scores: a numpy array of shape [N] or None. If scores=None, then + this function assumes that the boxes to be plotted are groundtruth + boxes and plot all boxes as black with no classes or scores. + category_index: a dict containing category dictionaries (each holding + category index `id` and category name `name`) keyed by category indices. + instance_masks: a numpy array of shape [N, image_height, image_width] with + values ranging between 0 and 1, can be None. + instance_boundaries: a numpy array of shape [N, image_height, image_width] + with values ranging between 0 and 1, can be None. + use_normalized_coordinates: whether boxes is to be interpreted as + normalized coordinates or not. + max_boxes_to_draw: maximum number of boxes to visualize. If None, draw + all boxes. + min_score_thresh: minimum score threshold for a box to be visualized + agnostic_mode: boolean (default: False) controlling whether to evaluate in + class-agnostic mode or not. This mode will display scores but ignore + classes. + line_thickness: integer (default: 4) controlling line width of the boxes. + groundtruth_box_visualization_color: box color for visualizing groundtruth + boxes + skip_scores: whether to skip score when drawing a single detection + skip_labels: whether to skip label when drawing a single detection + + Returns: + uint8 numpy array with shape (img_height, img_width, 3) with overlaid boxes. + """ + # Create a display string (and color) for every box location, group any boxes + # that correspond to the same location. + box_to_display_str_map = collections.defaultdict(list) + box_to_color_map = collections.defaultdict(str) + + box_to_instance_masks_map = {} + box_to_instance_boundaries_map = {} + if not max_boxes_to_draw: + max_boxes_to_draw = boxes.shape[0] + + sorted_ind = np.argsort(-scores) + boxes=boxes[sorted_ind] + scores=scores[sorted_ind] + classes=classes[sorted_ind] + for i in range(min(max_boxes_to_draw, boxes.shape[0])): + if scores is None or scores[i] > min_score_thresh: + box = tuple(boxes[i].tolist()) + if instance_masks is not None: + box_to_instance_masks_map[box] = instance_masks[i] + if instance_boundaries is not None: + box_to_instance_boundaries_map[box] = instance_boundaries[i] + if scores is None: + box_to_color_map[box] = groundtruth_box_visualization_color + else: + display_str = '' + if not skip_labels: + if not agnostic_mode: + if classes[i] in category_index.keys(): + class_name = category_index[classes[i]]['name'] + else: + class_name = 'N/A' + display_str = str(class_name) + if not skip_scores: + if not display_str: + display_str = '{}%'.format(int(100 * scores[i])) + else: + display_str = '{}: {}%'.format(display_str, int(100 * scores[i])) + box_to_display_str_map[box].append(display_str) + if agnostic_mode: + box_to_color_map[box] = 'DarkOrange' + else: + box_to_color_map[box] = STANDARD_COLORS[ + classes[i] % len(STANDARD_COLORS)] + + # Draw all boxes onto image. + for box, color in box_to_color_map.items(): + xmin, ymin, xmax, ymax = box + if instance_masks is not None: + draw_mask_on_image_array( + image, + box_to_instance_masks_map[box], + color=color + ) + if instance_boundaries is not None: + draw_mask_on_image_array( + image, + box_to_instance_boundaries_map[box], + color='red', + alpha=1.0 + ) + draw_bounding_box_on_image_array( + image, + ymin, + xmin, + ymax, + xmax, + color=color, + thickness=line_thickness, + display_str_list=box_to_display_str_map[box], + use_normalized_coordinates=use_normalized_coordinates) + return image + + +def draw_bounding_box_on_image_array(image, + ymin, + xmin, + ymax, + xmax, + color='red', + thickness=4, + display_str_list=(), + use_normalized_coordinates=True): + """Adds a bounding box to an image (numpy array). + + Bounding box coordinates can be specified in either absolute (pixel) or + normalized coordinates by setting the use_normalized_coordinates argument. + + Args: + image: a numpy array with shape [height, width, 3]. + ymin: ymin of bounding box. + xmin: xmin of bounding box. + ymax: ymax of bounding box. + xmax: xmax of bounding box. + color: color to draw bounding box. Default is red. + thickness: line thickness. Default value is 4. + display_str_list: list of strings to display in box + (each to be shown on its own line). + use_normalized_coordinates: If True (default), treat coordinates + ymin, xmin, ymax, xmax as relative to the image. Otherwise treat + coordinates as absolute. + """ + image_pil = Image.fromarray(np.uint8(image)).convert('RGB') + draw_bounding_box_on_image(image_pil, ymin, xmin, ymax, xmax, color, + thickness, display_str_list, + use_normalized_coordinates) + np.copyto(image, np.array(image_pil)) + + +def draw_bounding_box_on_image(image, + ymin, + xmin, + ymax, + xmax, + color='red', + thickness=4, + display_str_list=(), + use_normalized_coordinates=True): + """Adds a bounding box to an image. + + Bounding box coordinates can be specified in either absolute (pixel) or + normalized coordinates by setting the use_normalized_coordinates argument. + + Each string in display_str_list is displayed on a separate line above the + bounding box in black text on a rectangle filled with the input 'color'. + If the top of the bounding box extends to the edge of the image, the strings + are displayed below the bounding box. + + Args: + image: a PIL.Image object. + ymin: ymin of bounding box. + xmin: xmin of bounding box. + ymax: ymax of bounding box. + xmax: xmax of bounding box. + color: color to draw bounding box. Default is red. + thickness: line thickness. Default value is 4. + display_str_list: list of strings to display in box + (each to be shown on its own line). + use_normalized_coordinates: If True (default), treat coordinates + ymin, xmin, ymax, xmax as relative to the image. Otherwise treat + coordinates as absolute. + """ + draw = ImageDraw.Draw(image) + im_width, im_height = image.size + if use_normalized_coordinates: + (left, right, top, bottom) = (xmin * im_width, xmax * im_width, + ymin * im_height, ymax * im_height) + else: + (left, right, top, bottom) = (xmin, xmax, ymin, ymax) + draw.line([(left, top), (left, bottom), (right, bottom), + (right, top), (left, top)], width=thickness, fill=color) + ''' + try: + font = ImageFont.truetype('arial.ttf', 24) + except IOError: + font = ImageFont.load_default() + + # If the total height of the display strings added to the top of the bounding + # box exceeds the top of the image, stack the strings below the bounding box + # instead of above. + display_str_heights = [font.getsize(ds)[1] for ds in display_str_list] + # Each display_str has a top and bottom margin of 0.05x. + total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights) + + if top > total_display_str_height: + text_bottom = top + else: + text_bottom = bottom + total_display_str_height + # Reverse list and print from bottom to top. + for display_str in display_str_list[::-1]: + text_width, text_height = font.getsize(display_str) + margin = np.ceil(0.05 * text_height) + draw.rectangle( + [(left, text_bottom - text_height - 2 * margin), (left + text_width, + text_bottom)], + fill=color) + draw.text( + (left + margin, text_bottom - text_height - margin), + display_str, + fill='black', + font=font) + text_bottom -= text_height - 2 * margin + ''' + +def draw_mask_on_image_array(image, mask, color='red', alpha=0.4): + """Draws mask on an image. + + Args: + image: uint8 numpy array with shape (img_height, img_height, 3) + mask: a uint8 numpy array of shape (img_height, img_height) with + values between either 0 or 1. + color: color to draw the keypoints with. Default is red. + alpha: transparency value between 0 and 1. (default: 0.4) + + Raises: + ValueError: On incorrect data type for image or masks. + """ + if image.dtype != np.uint8: + raise ValueError('`image` not of type np.uint8') + if mask.dtype != np.uint8: + raise ValueError('`mask` not of type np.uint8') + if np.any(np.logical_and(mask != 1, mask != 0)): + raise ValueError('`mask` elements should be in [0, 1]') + if image.shape[:2] != mask.shape: + raise ValueError('The image has spatial dimensions %s but the mask has ' + 'dimensions %s' % (image.shape[:2], mask.shape)) + rgb = ImageColor.getrgb(color) + pil_image = Image.fromarray(image) + + solid_color = np.expand_dims( + np.ones_like(mask), axis=2) * np.reshape(list(rgb), [1, 1, 3]) + pil_solid_color = Image.fromarray(np.uint8(solid_color)).convert('RGBA') + pil_mask = Image.fromarray(np.uint8(255.0 * alpha * mask)).convert('L') + pil_image = Image.composite(pil_solid_color, pil_image, pil_mask) + np.copyto(image, np.array(pil_image.convert('RGB'))) + + +if __name__ == '__main__': + import cv2 + + + import os + #cateNames = ['plane', 'baseball-diamond', 'bridge', 'ground-track-field', + # 'small-vehicle', 'large-vehicle', 'ship', 'tennis-court', 'basketball-court', + # 'storage-tank', 'soccer-ball-field', 'roundabout', 'harbor', 'swimming-pool', 'helicopter'] + + #cateNames = ['pedestrian','people','bicycle','car','van','truck','tricycle','awning-tricycle','bus','motor'] + + cateNames = ['airplane', 'airport', 'baseballfield', 'basketballcourt', 'bridge', 'chimney', + 'dam', 'Expressway-Service-area', 'Expressway-toll-station', 'golffield', 'groundtrackfield', 'harbor', + 'overpass', 'ship', 'stadium', 'storagetank', 'tenniscourt', 'trainstation', 'vehicle', 'windmill'] + + path = "D:/PyProjects/visdiortest/savep/" + #path = "D:/PyProjects/visdronetest/savep/" # 文件夹目录 + files = os.listdir(path) # 得到文件夹下的所有文件名称 + txts = [] + #pathimg = "D:/Datasets/VisDrone2019/test/input/JPEGImages/" # 文件夹目录 + #pathimgsave = "D:/PyProjects/visdronetest/vis_res/" # 文件夹目录 + + pathimg = "D:/Datasets/DIOR/JPEGImages/" # 文件夹目录 + pathimgsave = "D:/PyProjects/visdiortest/vis_res/" # 文件夹目录 + + for file in files: # 遍历文件夹 + position = path + file # 构造绝对路径,"\\",其中一个'\'为转义符 + #print(position) + + positionimg = pathimg + file.replace('txt','jpg') # 构造绝对路径,"\\",其中一个'\'为转义符file + #print(positionimg) + positionimgsave = pathimgsave + file.replace('txt', 'jpg') # 构造绝对路径,"\\",其中一个'\'为转义符file + imshow = cv2.imread(positionimg) + b = [] + l = [] + p = [] + with open(position, "r", encoding='utf-8') as f: # 打开文件 + + #with open('G:/test/vis/1.txt', "r") as f: + for line in f.readlines(): + data = line.split('\t\n') + for str1 in data: + sub_str = str1.split(' ') + if sub_str: + l.append(cateNames.index(sub_str[0])) + p.append(float(sub_str[1])) + aa = [] + + + aa.append(float(sub_str[2])) + aa.append(float(sub_str[3])) + aa.append(float(sub_str[4])) + aa.append(float(sub_str[5])) + + + b.append(aa) + + _boxes = np.array(b) + _labels = np.array(l) + _probs = np.array(p) + #print('lab', _labels) + #print('bbo', _boxes) + #print('pro', _probs.shape) + + visualize_boxes(image=imshow, boxes=_boxes, labels=_labels, probs=_probs, class_labels=cateNames) + + #plt.subplot(111) + #results = imshow[...,::-1] + #plt.imshow(results) + #plt.show() + #plt.savefig('test2png.jpg', dpi=100) + cv2.imwrite(positionimgsave, imshow, [int(cv2.IMWRITE_PNG_COMPRESSION),0]) + #, [int(cv2.IMWRITE_PNG_COMPRESSION),0] + diff --git a/utils/xml2coco.py b/utils/xml2coco.py new file mode 100644 index 0000000..8124a19 --- /dev/null +++ b/utils/xml2coco.py @@ -0,0 +1,181 @@ +import xml.etree.ElementTree as ET +import os +import json +import config.cfg_npmmrdet_dior as cfg + +coco = dict() +coco['images'] = [] +coco['type'] = 'instances' +coco['annotations'] = [] +coco['categories'] = [] + +category_set = dict() +image_set = set() + +category_item_id = 0 +image_id = 11725 ##### bug (first_id-1) +annotation_id = 0 + +def addCatItem(name): + global category_item_id + category_item = dict() + category_item['supercategory'] = 'none' + + classes = cfg.DATA["CLASSES"] + category_item_id = classes.index(name)+1 + category_item['id'] = category_item_id + category_item['name'] = name + coco['categories'].append(category_item) + category_set[name] = category_item_id + return category_item_id + + +def addImgItem(file_name, size): + global image_id + if file_name is None: + raise Exception('Could not find filename tag in xml file.') + if size['width'] is None: + raise Exception('Could not find width tag in xml file.') + if size['height'] is None: + raise Exception('Could not find height tag in xml file.') + image_id += 1 + image_item = dict() + image_item['id'] = image_id + image_item['file_name'] = file_name + image_item['width'] = size['width'] + image_item['height'] = size['height'] + coco['images'].append(image_item) + image_set.add(file_name) + return image_id + + +def addAnnoItem(object_name, image_id, category_id, bbox): + global annotation_id + annotation_item = dict() + annotation_item['segmentation'] = [] + seg = [] + # bbox[] is x,y,w,h + # left_top + seg.append(bbox[0]) + seg.append(bbox[1]) + # left_bottom + seg.append(bbox[0]) + seg.append(bbox[1] + bbox[3]) + # right_bottom + seg.append(bbox[0] + bbox[2]) + seg.append(bbox[1] + bbox[3]) + # right_top + seg.append(bbox[0] + bbox[2]) + seg.append(bbox[1]) + + annotation_item['segmentation'].append(seg) + + annotation_item['area'] = bbox[2] * bbox[3] + annotation_item['iscrowd'] = 0 + annotation_item['ignore'] = 0 + annotation_item['image_id'] = image_id + annotation_item['bbox'] = bbox + annotation_item['category_id'] = category_id + annotation_id += 1 + annotation_item['id'] = annotation_id + coco['annotations'].append(annotation_item) + + +def parseXmlFiles(xml_path): + for f in os.listdir(xml_path): + if not f.endswith('.xml'): + continue + + bndbox = dict() + size = dict() + current_image_id = None + current_category_id = None + file_name = None + size['width'] = None + size['height'] = None + size['depth'] = None + + xml_file = os.path.join(xml_path, f) + print(xml_file) + + tree = ET.parse(xml_file) + root = tree.getroot() + if root.tag != 'annotation': + raise Exception('pascal voc xml root element should be annotation, rather than {}'.format(root.tag)) + + # elem is , , , + for elem in root: + current_parent = elem.tag + current_sub = None + object_name = None + + if elem.tag == 'folder': + continue + + if elem.tag == 'filename': + file_name = elem.text + if file_name in category_set: + raise Exception('file_name duplicated') + + # add img item only after parse tag + elif current_image_id is None and file_name is not None and size['width'] is not None: + if file_name not in image_set: + current_image_id = addImgItem(file_name, size) + print('add image with {} and {}'.format(file_name, size)) + else: + raise Exception('duplicated image: {}'.format(file_name)) + # subelem is , , , , + for subelem in elem: + bndbox['xmin'] = None + bndbox['xmax'] = None + bndbox['ymin'] = None + bndbox['ymax'] = None + + current_sub = subelem.tag + if current_parent == 'object' and subelem.tag == 'name': + object_name = subelem.text + if object_name not in category_set: + current_category_id = addCatItem(object_name) + else: + current_category_id = category_set[object_name] + + elif current_parent == 'size': + if size[subelem.tag] is not None: + raise Exception('xml structure broken at size tag.') + size[subelem.tag] = int(subelem.text) + + # option is , , , , when subelem is + for option in subelem: + if current_sub == 'bndbox': + if bndbox[option.tag] is not None: + raise Exception('xml structure corrupted at bndbox tag.') + bndbox[option.tag] = int(option.text) + + # only after parse the tag + if bndbox['xmin'] is not None: + if object_name is None: + raise Exception('xml structure broken at bndbox tag') + if current_image_id is None: + raise Exception('xml structure broken at bndbox tag') + if current_category_id is None: + raise Exception('xml structure broken at bndbox tag') + bbox = [] + # x + bbox.append(bndbox['xmin']) + # y + bbox.append(bndbox['ymin']) + # w + bbox.append(bndbox['xmax'] - bndbox['xmin']) + # h + bbox.append(bndbox['ymax'] - bndbox['ymin']) + print('add annotation with {},{},{},{}'.format(object_name, current_image_id, current_category_id, + bbox)) + addAnnoItem(object_name, current_image_id, current_category_id, bbox) + +if __name__ == '__main__': + import config.cfg_npmmrdet_dior as cfg + + xml_path = os.path.join(cfg.DATA_PATH, "Annotest/") + json_file = os.path.join(cfg.DATA_PATH, "json_gt/",'test.json') + parseXmlFiles(xml_path) + json.dump(coco, open(json_file, 'w')) \ No newline at end of file diff --git a/weight/ShuffleNetV2+.Medium.pth b/weight/ShuffleNetV2+.Medium.pth new file mode 100644 index 0000000..293cb48 Binary files /dev/null and b/weight/ShuffleNetV2+.Medium.pth differ diff --git a/weight/ShuffleNetV2+.Small.pth b/weight/ShuffleNetV2+.Small.pth new file mode 100644 index 0000000..af58281 Binary files /dev/null and b/weight/ShuffleNetV2+.Small.pth differ diff --git a/weight/ShuffleNetV2.1.5x.pth b/weight/ShuffleNetV2.1.5x.pth new file mode 100644 index 0000000..cca6b6f Binary files /dev/null and b/weight/ShuffleNetV2.1.5x.pth differ diff --git a/weight/ghost_93.98.pth b/weight/ghost_93.98.pth new file mode 100644 index 0000000..26da9f2 Binary files /dev/null and b/weight/ghost_93.98.pth differ diff --git a/weight/mobilenet_v2_dwt_haar_256_best.pth b/weight/mobilenet_v2_dwt_haar_256_best.pth new file mode 100644 index 0000000..91afd79 Binary files /dev/null and b/weight/mobilenet_v2_dwt_haar_256_best.pth differ diff --git a/weight/mobilenetv2_1.0-0c6065bc.pth b/weight/mobilenetv2_1.0-0c6065bc.pth new file mode 100644 index 0000000..0a66ce9 Binary files /dev/null and b/weight/mobilenetv2_1.0-0c6065bc.pth differ