diff --git a/log/event/events.out.tfevents.1627388720.Shank b/log/event/events.out.tfevents.1627388720.Shank new file mode 100644 index 0000000..814dad9 Binary files /dev/null and b/log/event/events.out.tfevents.1627388720.Shank differ diff --git a/log/event/events.out.tfevents.1627388833.Shank b/log/event/events.out.tfevents.1627388833.Shank new file mode 100644 index 0000000..90b6b66 Binary files /dev/null and b/log/event/events.out.tfevents.1627388833.Shank differ diff --git a/modelR/lodet.py b/modelR/lodet.py index e22d56b..17c1423 100644 --- a/modelR/lodet.py +++ b/modelR/lodet.py @@ -4,6 +4,7 @@ from modelR.backbones.mobilenetv2 import MobilenetV2 from modelR.necks.csa_drf_fpn import CSA_DRF_FPN from modelR.head.dsc_head import DSC_Head +from modelR.head.dsc_head_hbb import Ordinary_Head from utils.utils_basic import * class LODet(nn.Module): @@ -18,11 +19,11 @@ def __init__(self, pre_weights=None): self.__backnone = MobilenetV2(weight_path=pre_weights, extract_list=["6", "13", "conv"])#"17" self.__neck = CSA_DRF_FPN(fileters_in=[1280, 96, 32]) # small - self.__head_s = DSC_Head(nC=self.__nC, anchors=self.__anchors[0], stride=self.__strides[0]) + self.__head_s = Ordinary_Head(nC=self.__nC, anchors=self.__anchors[0], stride=self.__strides[0]) # medium - self.__head_m = DSC_Head(nC=self.__nC, anchors=self.__anchors[1], stride=self.__strides[1]) + self.__head_m = Ordinary_Head(nC=self.__nC, anchors=self.__anchors[1], stride=self.__strides[1]) # large - self.__head_l = DSC_Head(nC=self.__nC, anchors=self.__anchors[2], stride=self.__strides[2]) + self.__head_l = Ordinary_Head(nC=self.__nC, anchors=self.__anchors[2], stride=self.__strides[2]) def forward(self, x): out = [] diff --git a/modelR/lodet_hbb.py b/modelR/lodet_hbb.py new file mode 100644 index 0000000..33027c4 --- /dev/null +++ b/modelR/lodet_hbb.py @@ -0,0 +1,42 @@ +import sys +sys.path.append("..") +import torch.nn as nn +from modelR.backbones.mobilenetv2 import MobilenetV2 +from modelR.necks.csa_drf_fpn import CSA_DRF_FPN +from modelR.head.dsc_head import DSC_Head +from modelR.head.dsc_head_hbb import Ordinary_Head +from utils.utils_basic import * + +class LODet(nn.Module): + """ + Note : int the __init__(), to define the modules should be in order, because of the weight file is order + """ + def __init__(self, pre_weights=None): + super(LODet, self).__init__() + self.__anchors = torch.FloatTensor(cfg.MODEL["ANCHORS"]) + self.__strides = torch.FloatTensor(cfg.MODEL["STRIDES"]) + self.__nC = cfg.DATA["NUM"] + self.__backnone = MobilenetV2(weight_path=pre_weights, extract_list=["6", "13", "conv"])#"17" + self.__neck = CSA_DRF_FPN(fileters_in=[1280, 96, 32]) + # small + self.__head_s = DSC_Head(nC=self.__nC, anchors=self.__anchors[0], stride=self.__strides[0]) + # medium + self.__head_m = DSC_Head(nC=self.__nC, anchors=self.__anchors[1], stride=self.__strides[1]) + # large + self.__head_l = DSC_Head(nC=self.__nC, anchors=self.__anchors[2], stride=self.__strides[2]) + + def forward(self, x): + out = [] + x_s, x_m, x_l = self.__backnone(x) + x_s, x_m, x_l = self.__neck(x_l, x_m, x_s) + out.append(self.__head_s(x_s)) + out.append(self.__head_m(x_m)) + out.append(self.__head_l(x_l)) + if self.training: + p, p_d = list(zip(*out)) + return p, p_d # smalll, medium, large + else: + p, p_d = list(zip(*out)) + return p, torch.cat(p_d, 0) + + diff --git a/modelR/loss/loss.py b/modelR/loss/loss.py index 1d9b124..8c2af7e 100644 --- a/modelR/loss/loss.py +++ b/modelR/loss/loss.py @@ -105,6 +105,6 @@ def __cal_loss_per_layer(self, p, p_d, label, bboxes, stride): loss_r = (torch.sum(loss_r)) / batch_size loss_s = (torch.sum(loss_s)) / batch_size - loss = loss_iou + (loss_a + loss_r ) + loss_conf + loss_cls + loss_s + loss = loss_iou + (loss_a + loss_r) + loss_conf + loss_cls + loss_s return loss, loss_iou, loss_conf, loss_cls, loss_a, loss_r, loss_s diff --git a/modelR/loss/loss_hbb.py b/modelR/loss/loss_hbb.py new file mode 100644 index 0000000..7a8e573 --- /dev/null +++ b/modelR/loss/loss_hbb.py @@ -0,0 +1,107 @@ +import sys +sys.path.append("../utils") +import torch +import torch.nn as nn +import torch.nn.functional as F +from utils import utils_basic +import config.cfg_lodet as cfg + +class FocalLoss(nn.Module): + def __init__(self, gamma=2.0, alpha=1.0, reduction="mean"): + super(FocalLoss, self).__init__() + self.__gamma = gamma + self.__alpha = alpha + self.__loss = nn.BCEWithLogitsLoss(reduction=reduction) + def forward(self, input, target): + loss = self.__loss(input=input, target=target) + loss *= self.__alpha * torch.pow(torch.abs(target - torch.sigmoid(input)), self.__gamma) + return loss + +class Loss(nn.Module): + def __init__(self, anchors, strides, iou_threshold_loss=0.5): + super(Loss, self).__init__() + self.__iou_threshold_loss = iou_threshold_loss + self.__strides = strides + self.__scale_factor = cfg.SCALE_FACTOR + self.__scale_factor_a = cfg.SCALE_FACTOR_A + + def forward(self, p, p_d, label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes): + strides = self.__strides + loss_s, loss_s_iou, loss_s_conf, loss_s_cls = self.__cal_loss_per_layer(p[0], p_d[0], label_sbbox, + sbboxes, strides[0]) + loss_m, loss_m_iou, loss_m_conf, loss_m_cls = self.__cal_loss_per_layer(p[1], p_d[1], label_mbbox, + mbboxes, strides[1]) + loss_l, loss_l_iou, loss_l_conf, loss_l_cls = self.__cal_loss_per_layer(p[2], p_d[2], label_lbbox, + lbboxes, strides[2]) + loss = loss_l + loss_m + loss_s + loss_iou = loss_s_iou + loss_m_iou + loss_l_iou + loss_conf = loss_s_conf + loss_m_conf + loss_l_conf + loss_cls = loss_s_cls + loss_m_cls + loss_l_cls + return loss, loss_iou, loss_conf, loss_cls + + def smooth_l1_loss(self, input, target, beta=1. / 9, size_average=True): + n = torch.abs(input - target) + cond = n < beta + loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta) + return loss + + def __cal_loss_per_layer(self, p, p_d, label, bboxes, stride): + batch_size, grid = p.shape[:2] + img_size = stride * grid + p_d_xywh = p_d[..., :4] + p_d_a = p_d[..., 4:8] + p_d_r = p_d[..., 8:9] + p_conf = p[..., 9:10] + p_cls = p[..., 10:] + + label_xywh = label[..., :4] + label_a = label[..., 4:8] + label_r = label[...,8:9] + label_s13 = label[...,9:10] + label_s24 = label[..., 10:11] + label_obj_mask = label[..., 11:12] + label_mix = label[..., 12:13] + label_cls = label[..., 13:] + + if cfg.TRAIN["IOU_TYPE"] == 'GIOU': + xiou = utils_basic.GIOU_xywh_torch(p_d_xywh, label_xywh).unsqueeze(-1) + elif cfg.TRAIN["IOU_TYPE"] == 'CIOU': + xiou = utils_basic.CIOU_xywh_torch(p_d_xywh, label_xywh).unsqueeze(-1) + bbox_loss_scale = self.__scale_factor - (self.__scale_factor-1.0) * label_xywh[..., 2:3] * label_xywh[..., 3:4] / (img_size ** 2) + loss_iou = label_obj_mask * bbox_loss_scale * (1.0 - xiou) * label_mix + + #loss r + loss_r = label_obj_mask * self.smooth_l1_loss (p_d_r, label_r) * label_mix * 16 + a_sum = self.smooth_l1_loss(p_d_a, label_a) + a_loss_scale = 1 + (self.__scale_factor_a -1)* (label_xywh[..., 2:3] * label_xywh[...,3:4] / (img_size ** 2)) + loss_a = label_obj_mask * a_sum * label_mix * a_loss_scale + onesa = torch.ones_like(p_d_r) + d13 = p_d_xywh[..., 2:3] * torch.abs(onesa - p_d_a[..., 0:1] - p_d_a[..., 2:3]) + s13 = p_d_xywh[..., 3:4] / torch.sqrt(torch.mul(d13, d13) + torch.mul(p_d_xywh[..., 3:4], p_d_xywh[..., 3:4])) + d24 = p_d_xywh[..., 3:4] * torch.abs(onesa - p_d_a[..., 1:2] - p_d_a[..., 3:4]) + s24 = p_d_xywh[..., 2:3] / torch.sqrt(torch.mul(d24, d24) + torch.mul(p_d_xywh[..., 2:3], p_d_xywh[..., 2:3])) + s1234sum = self.smooth_l1_loss(s13, label_s13)*(1.0/(label_s13+1e-8)) + self.smooth_l1_loss(s24, label_s24)*(1.0/(label_s24+1e-8)) + loss_s = label_obj_mask * s1234sum * label_mix + + FOCAL = FocalLoss(gamma=2, alpha=1.0, reduction="none") + iou = utils_basic.iou_xywh_torch(p_d_xywh.unsqueeze(4), bboxes.unsqueeze(1).unsqueeze(1).unsqueeze(1)) + iou_max = iou.max(-1, keepdim=True)[0] + label_noobj_mask = (1.0 - label_obj_mask) * (iou_max < self.__iou_threshold_loss).float() + + loss_conf = (label_obj_mask * FOCAL(input=p_conf, target=label_obj_mask) + + label_noobj_mask * FOCAL(input=p_conf, target=label_obj_mask)) * label_mix + + # loss classes + BCE = nn.BCEWithLogitsLoss(reduction="none") + loss_cls = label_obj_mask * BCE(input=p_cls, target=label_cls) * label_mix + + loss_iou = (torch.sum(loss_iou)) / batch_size + loss_conf = (torch.sum(loss_conf)) / batch_size + loss_cls = (torch.sum(loss_cls)) / batch_size + loss_a = (torch.sum(loss_a)) / batch_size + loss_r = (torch.sum(loss_r)) / batch_size + loss_s = (torch.sum(loss_s)) / batch_size + + loss = loss_iou + (loss_a + loss_r) + loss_conf + loss_cls + loss_s + + return loss, loss_iou, loss_conf, loss_cls, loss_a, loss_r, loss_s diff --git a/trainHBB.py b/trainHBB.py new file mode 100644 index 0000000..a707456 --- /dev/null +++ b/trainHBB.py @@ -0,0 +1,241 @@ +import logging +import argparse +import torch.optim as optim +from torch.utils.data import DataLoader +from tensorboardX import SummaryWriter +import dataloadR.datasets as data +import utils.gpu as gpu +from utils import cosine_lr_scheduler +from utils.log import Logger +from modelR.lodet_hbb import LODet +from modelR.loss.loss_hbb import Loss +from evalR.evaluator import * +from evalR.coco_eval import COCOEvaluator +from torch.cuda.amp import autocast as autocast + +class Trainer(object): + def __init__(self, weight_path, resume, gpu_id): + init_seeds(0) + self.prune=0 + self.sr=True + self.device = gpu.select_device(gpu_id) + print(self.device) + self.start_epoch = 0 + self.best_mAP = 0. + self.epochs = cfg.TRAIN["EPOCHS"] + self.weight_path = weight_path + self.multi_scale_train = cfg.TRAIN["MULTI_SCALE_TRAIN"] + if self.multi_scale_train: print('Using multi scales training') + else: print('train img size is {}'.format(cfg.TRAIN["TRAIN_IMG_SIZE"])) + + self.train_dataset = data.Construct_Dataset(anno_file_type="train", img_size=cfg.TRAIN["TRAIN_IMG_SIZE"]) + self.train_dataloader = DataLoader(self.train_dataset, + batch_size=cfg.TRAIN["BATCH_SIZE"], + num_workers=cfg.TRAIN["NUMBER_WORKERS"], + shuffle=True, + pin_memory=True) + + net_model = LODet() + if torch.cuda.device_count() >1: ## multi GPUs + print("Let's use", torch.cuda.device_count(), "GPUs!") + net_model = torch.nn.DataParallel(net_model) + self.model = net_model.to(self.device) + elif torch.cuda.device_count() ==1: + self.model = net_model.to(self.device) ## Single GPU + + #self.optimizer = optim.SGD(self.model.parameters(), lr=cfg.TRAIN["LR_INIT"], momentum=cfg.TRAIN["MOMENTUM"], weight_decay=cfg.TRAIN["WEIGHT_DECAY"]) + self.optimizer = optim.Adam(self.model.parameters(), lr=cfg.TRAIN["LR_INIT"]) + + + self.criterion = Loss(anchors=cfg.MODEL["ANCHORS"], strides=cfg.MODEL["STRIDES"], + iou_threshold_loss=cfg.TRAIN["IOU_THRESHOLD_LOSS"]) + + if resume: + self.__load_model_weights(weight_path) + + self.scheduler = cosine_lr_scheduler.CosineDecayLR(self.optimizer, + T_max=self.epochs*len(self.train_dataloader), + lr_init=cfg.TRAIN["LR_INIT"], + lr_min=cfg.TRAIN["LR_END"], + warmup=cfg.TRAIN["WARMUP_EPOCHS"] * len(self.train_dataloader)) + + + + + def __load_model_weights(self, weight_path): + last_weight = os.path.join(os.path.split(weight_path)[0], "last.pt") + chkpt = torch.load(last_weight, map_location=self.device) + self.model.load_state_dict(chkpt['model'])#, False + self.start_epoch = chkpt['epoch'] + 1 + if chkpt['optimizer'] is not None: + self.optimizer.load_state_dict(chkpt['optimizer']) + self.best_mAP = chkpt['best_mAP'] + del chkpt + + + def __save_model_weights(self, epoch, mAP): + if mAP > self.best_mAP: + self.best_mAP = mAP + best_weight = os.path.join(os.path.split(self.weight_path)[0], "best.pt") + last_weight = os.path.join(os.path.split(self.weight_path)[0], "last.pt") + chkpt = {'epoch': epoch, + 'best_mAP': self.best_mAP, + 'model': self.model.state_dict(), + 'optimizer': self.optimizer.state_dict()} + torch.save(chkpt, last_weight,_use_new_zipfile_serialization=False) + + if self.best_mAP == mAP: + torch.save(chkpt['model'], best_weight,_use_new_zipfile_serialization=False) + if epoch > 0 and epoch % 5 == 0: + torch.save(chkpt, os.path.join(os.path.split(self.weight_path)[0], 'backup_epoch%g.pt'%epoch)) + # + del chkpt + + def __save_model_weights1(self, epoch, mAP): + if mAP > self.best_mAP: + self.best_mAP = mAP + best_weight = os.path.join(os.path.split(self.weight_path)[0], "best1.pt") + last_weight = os.path.join(os.path.split(self.weight_path)[0], "last1.pt") + chkpt = {'epoch': epoch, + 'best_mAP': self.best_mAP, + 'model': self.model.state_dict(), + 'optimizer': self.optimizer.state_dict()} + torch.save(chkpt, last_weight,_use_new_zipfile_serialization=False) + + torch.save(chkpt['model'], best_weight, _use_new_zipfile_serialization=False) + torch.save(chkpt, os.path.join(os.path.split(self.weight_path)[0], 'backup_epoch%g.pt'%epoch)) + # + del chkpt + + def train(self): + global writer + logger.info(self.model) + logger.info(" Training start! Img size:{:d}, Batchsize:{:d}, Number of workers:{:d}".format( + cfg.TRAIN["TRAIN_IMG_SIZE"], cfg.TRAIN["BATCH_SIZE"], cfg.TRAIN["NUMBER_WORKERS"])) + logger.info(" Train datasets number is : {}".format(len(self.train_dataset))) + + for epoch in range(self.start_epoch, self.epochs): + start = time.time() + self.model.train() + + ''' + ################################################################################## + sr_flag = get_sr_flag(epoch, self.sr) + if self.prune == 1: + CBL_idx, _, prune_idx, shortcut_idx, _ = parse_module_defs2(self.model) ############ + if self.sr: + print('shortcut sparse training') + elif self.prune == 0: + CBL_idx, _, prune_idx = parse_module_defs(self.model) ############ model.cfg -> idx + if self.sr: + print('normal sparse training ') + print(prune_idx)#[1, 3, 7, 10, 14, 17, 20, 23, 26, 29, 32, 35, 39, 42, 45, 48, 51, 54, 57, 60, 64, 67, 70, 73, 76, 77, 78, 79, 80, 81, 88, 89, 90, 91, 92, 93, 100, 101, 102, 103, 104, 105] + ################################################################################### + ''' + + mloss = torch.zeros(4) + mAP = 0 + self.__save_model_weights1(epoch, mAP) + for i, (imgs, label_sbbox, label_mbbox, label_lbbox, + sbboxes, mbboxes, lbboxes) in enumerate(self.train_dataloader): + + self.scheduler.step(len(self.train_dataloader)*epoch + i) + imgs = imgs.to(self.device) + label_sbbox = label_sbbox.to(self.device) + label_mbbox = label_mbbox.to(self.device) + label_lbbox = label_lbbox.to(self.device) + sbboxes = sbboxes.to(self.device) + mbboxes = mbboxes.to(self.device) + lbboxes = lbboxes.to(self.device) + p, p_d = self.model(imgs) + + loss, loss_iou, loss_conf, loss_cls = self.criterion(p, p_d, label_sbbox, label_mbbox, + label_lbbox, sbboxes, mbboxes, lbboxes) + self.optimizer.zero_grad() + + + loss.backward() + self.optimizer.step() + + ''' + ######################## + idx2mask = None + # if opt.sr and opt.prune==1 and epoch > opt.epochs * 0.5: + # idx2mask = get_mask2(model, prune_idx, 0.85) + ##self.model.module_list = self.model.module.module_list + BNOptimizer.updateBN(sr_flag, self.model, 0.001, prune_idx, epoch, idx2mask) ###########实际剪枝更新的部分 + ################################################### + ''' + + loss_items = torch.tensor([loss_iou, loss_conf, loss_cls, loss]) + mloss = (mloss * i + loss_items) / (i + 1) + + if i % 50 == 0: + logger.info( + " Epoch:[{:3}/{}] Batch:[{:3}/{}] Img_size:[{:3}] Loss:{:.4f} " + "Loss_IoU:{:.4f} | Loss_Conf:{:.4f} | Loss_Cls:{:.4f} LR:{:g}".format( + epoch, self.epochs, i, len(self.train_dataloader) - 1, self.train_dataset.img_size, + mloss[3], mloss[0], mloss[1], mloss[2], self.optimizer.param_groups[0]['lr'] + )) + writer.add_scalar('loss_iou', mloss[0], len(self.train_dataloader) + / (cfg.TRAIN["BATCH_SIZE"]) * epoch + i) + writer.add_scalar('loss_conf', mloss[1], len(self.train_dataloader) + / (cfg.TRAIN["BATCH_SIZE"]) * epoch + i) + writer.add_scalar('loss_cls', mloss[2], len(self.train_dataloader) + / (cfg.TRAIN["BATCH_SIZE"]) * epoch + i) + writer.add_scalar('train_loss', mloss[3], len(self.train_dataloader) + / (cfg.TRAIN["BATCH_SIZE"]) * epoch + i) + + if self.multi_scale_train and (i+1) % 10 == 0: + self.train_dataset.img_size = random.choice(range( + cfg.TRAIN["MULTI_TRAIN_RANGE"][0], cfg.TRAIN["MULTI_TRAIN_RANGE"][1], + cfg.TRAIN["MULTI_TRAIN_RANGE"][2])) * 32 + + + if epoch >= 60 and epoch % 5 == 0 and cfg.TRAIN["EVAL_TYPE"] == 'VOC': + logger.info("===== Validate =====".format(epoch, self.epochs)) + with torch.no_grad(): + APs, inference_time = Evaluator(self.model).APs_voc() + for i in APs: + logger.info("{} --> mAP : {}".format(i, APs[i])) + mAP += APs[i] + mAP = mAP / self.train_dataset.num_classes + logger.info("mAP : {}".format(mAP)) + logger.info("inference time: {:.2f} ms".format(inference_time)) + writer.add_scalar('mAP', mAP, epoch) + + elif epoch >= 60 and epoch % 5 == 0 and cfg.TRAIN["EVAL_TYPE"] == 'COCO': + logger.info("===== Validate =====".format(epoch, self.epochs)) + with torch.no_grad(): + evaluator = COCOEvaluator(data_dir=cfg.DATA_PATH, + img_size=cfg.TEST["TEST_IMG_SIZE"], + confthre=cfg.TEST["CONF_THRESH"], + nmsthre=cfg.TEST["NMS_THRESH"]) + ap50_95, ap50, inference_time = evaluator.evaluate(self.model) + mAP = ap50 + logger.info('ap50_95:{} | ap50:{}'.format(ap50_95, ap50)) + logger.info("inference time: {:.2f} ms".format(inference_time)) + writer.add_scalar('val/COCOAP50', ap50, epoch) + writer.add_scalar('val/COCOAP50_95', ap50_95, epoch) + + self.__save_model_weights(epoch, mAP) + logger.info('Save weights Done') + logger.info("mAP: {:.3f}".format(mAP)) + end = time.time() + logger.info("Inference time: {:.4f}s".format(end - start)) + + logger.info("Training finished. Best_mAP: {:.3f}%".format(self.best_mAP)) + +if __name__ == "__main__": + global logger, writer + parser = argparse.ArgumentParser() + parser.add_argument('--weight_path', type=str, default='weight/mobilenetv2_1.0-0c6065bc.pth', + help='weight file path') #default=None + parser.add_argument('--resume', action='store_true',default=False, help='resume training flag') + parser.add_argument('--gpu_id', type=int, default=0, help='gpu id') + parser.add_argument('--log_path', type=str, default='log/', help='log path') + opt = parser.parse_args() + writer = SummaryWriter(logdir=opt.log_path + '/event') + logger = Logger(log_file_name=opt.log_path + '/log.txt', log_level=logging.DEBUG, logger_name='NPMMRDet').get_log() + + Trainer(weight_path=opt.weight_path, resume=opt.resume, gpu_id=opt.gpu_id).train() \ No newline at end of file diff --git a/trainR.py b/trainR.py index 9437967..3cd94aa 100644 --- a/trainR.py +++ b/trainR.py @@ -16,8 +16,6 @@ class Trainer(object): def __init__(self, weight_path, resume, gpu_id): init_seeds(0) - self.prune=0 - self.sr=True self.device = gpu.select_device(gpu_id) print(self.device) self.start_epoch = 0 @@ -32,8 +30,7 @@ def __init__(self, weight_path, resume, gpu_id): self.train_dataloader = DataLoader(self.train_dataset, batch_size=cfg.TRAIN["BATCH_SIZE"], num_workers=cfg.TRAIN["NUMBER_WORKERS"], - shuffle=True, - pin_memory=True) + shuffle=True) net_model = LODet() if torch.cuda.device_count() >1: ## multi GPUs @@ -43,15 +40,15 @@ def __init__(self, weight_path, resume, gpu_id): elif torch.cuda.device_count() ==1: self.model = net_model.to(self.device) ## Single GPU - #self.optimizer = optim.SGD(self.model.parameters(), lr=cfg.TRAIN["LR_INIT"], momentum=cfg.TRAIN["MOMENTUM"], weight_decay=cfg.TRAIN["WEIGHT_DECAY"]) - self.optimizer = optim.Adam(self.model.parameters(), lr=cfg.TRAIN["LR_INIT"]) - + self.optimizer = optim.SGD(self.model.parameters(), lr=cfg.TRAIN["LR_INIT"], + momentum=cfg.TRAIN["MOMENTUM"], weight_decay=cfg.TRAIN["WEIGHT_DECAY"]) self.criterion = Loss(anchors=cfg.MODEL["ANCHORS"], strides=cfg.MODEL["STRIDES"], iou_threshold_loss=cfg.TRAIN["IOU_THRESHOLD_LOSS"]) if resume: self.__load_model_weights(weight_path) + #self.__save_model_weights_best(160) self.scheduler = cosine_lr_scheduler.CosineDecayLR(self.optimizer, T_max=self.epochs*len(self.train_dataloader), @@ -59,6 +56,21 @@ def __init__(self, weight_path, resume, gpu_id): lr_min=cfg.TRAIN["LR_END"], warmup=cfg.TRAIN["WARMUP_EPOCHS"] * len(self.train_dataloader)) + ''' + def __load_model_weights(self, weight_path, resume): + if resume: + last_weight = os.path.join(os.path.split(weight_path)[0], "last.pt") + chkpt = torch.load(last_weight, map_location=self.device) + self.model.load_state_dict(chkpt['model'])#, False + self.start_epoch = chkpt['epoch'] + 1 + if chkpt['optimizer'] is not None: + self.optimizer.load_state_dict(chkpt['optimizer']) + self.best_mAP = chkpt['best_mAP'] + del chkpt + #else: + #self.model.load_darknet_weights(weight_path) ## Single GPU + #self.model.module.load_darknet_weights(weight_path) ## multi GPUs + ''' def __load_model_weights(self, weight_path): last_weight = os.path.join(os.path.split(weight_path)[0], "last.pt") chkpt = torch.load(last_weight, map_location=self.device) @@ -79,24 +91,21 @@ def __save_model_weights(self, epoch, mAP): 'model': self.model.state_dict(), 'optimizer': self.optimizer.state_dict()} torch.save(chkpt, last_weight,_use_new_zipfile_serialization=False) + if self.best_mAP == mAP: torch.save(chkpt['model'], best_weight,_use_new_zipfile_serialization=False) if epoch > 0 and epoch % 5 == 0: torch.save(chkpt, os.path.join(os.path.split(self.weight_path)[0], 'backup_epoch%g.pt'%epoch)) + # del chkpt - def __save_model_weights1(self, epoch, mAP): - if mAP > self.best_mAP: - self.best_mAP = mAP - best_weight = os.path.join(os.path.split(self.weight_path)[0], "best1.pt") - last_weight = os.path.join(os.path.split(self.weight_path)[0], "last1.pt") + def __save_model_weights_best(self, epoch): + best_weight = os.path.join(os.path.split(self.weight_path)[0], "best.pt") chkpt = {'epoch': epoch, 'best_mAP': self.best_mAP, 'model': self.model.state_dict(), 'optimizer': self.optimizer.state_dict()} - torch.save(chkpt, last_weight,_use_new_zipfile_serialization=False) - torch.save(chkpt['model'], best_weight, _use_new_zipfile_serialization=False) - torch.save(chkpt, os.path.join(os.path.split(self.weight_path)[0], 'backup_epoch%g.pt'%epoch)) + torch.save(chkpt['model'], best_weight,_use_new_zipfile_serialization=False) del chkpt def train(self): @@ -109,12 +118,9 @@ def train(self): for epoch in range(self.start_epoch, self.epochs): start = time.time() self.model.train() + mloss = torch.zeros(7) - mloss = torch.zeros(4) - mAP = 0 - #self.__save_model_weights1(epoch, mAP) - for i, (imgs, label_sbbox, label_mbbox, label_lbbox, - sbboxes, mbboxes, lbboxes) in enumerate(self.train_dataloader): + for i, (imgs, label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes) in enumerate(self.train_dataloader): self.scheduler.step(len(self.train_dataloader)*epoch + i) imgs = imgs.to(self.device) @@ -125,39 +131,43 @@ def train(self): mbboxes = mbboxes.to(self.device) lbboxes = lbboxes.to(self.device) p, p_d = self.model(imgs) - - loss, loss_iou, loss_conf, loss_cls = self.criterion(p, p_d, label_sbbox, label_mbbox, + loss, loss_iou, loss_conf, loss_cls, loss_a, loss_r, loss_s = self.criterion(p, p_d, label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes) self.optimizer.zero_grad() - loss.backward() self.optimizer.step() - loss_items = torch.tensor([loss_iou, loss_conf, loss_cls, loss]) + loss_items = torch.tensor([loss_iou, loss_conf, loss_cls, loss_a, loss_r, loss_s, loss]) mloss = (mloss * i + loss_items) / (i + 1) - + mAP = 0 if i % 50 == 0: logger.info( " Epoch:[{:3}/{}] Batch:[{:3}/{}] Img_size:[{:3}] Loss:{:.4f} " - "Loss_IoU:{:.4f} | Loss_Conf:{:.4f} | Loss_Cls:{:.4f} LR:{:g}".format( + "Loss_IoU:{:.4f} | Loss_Conf:{:.4f} | Loss_Cls:{:.4f} | Loss_avgA:{:.4f} | Loss_R:{:.4f} | Loss_S:{:.4f} LR:{:g}".format( epoch, self.epochs, i, len(self.train_dataloader) - 1, self.train_dataset.img_size, - mloss[3], mloss[0], mloss[1], mloss[2], self.optimizer.param_groups[0]['lr'] + mloss[6], mloss[0], mloss[1], mloss[2], mloss[3], mloss[4], mloss[5], self.optimizer.param_groups[0]['lr'] )) writer.add_scalar('loss_iou', mloss[0], len(self.train_dataloader) - * (cfg.TRAIN["BATCH_SIZE"]) * epoch + i) + / (cfg.TRAIN["BATCH_SIZE"]) * epoch + i) writer.add_scalar('loss_conf', mloss[1], len(self.train_dataloader) - * (cfg.TRAIN["BATCH_SIZE"]) * epoch + i) + / (cfg.TRAIN["BATCH_SIZE"]) * epoch + i) writer.add_scalar('loss_cls', mloss[2], len(self.train_dataloader) - * (cfg.TRAIN["BATCH_SIZE"]) * epoch + i) - writer.add_scalar('train_loss', mloss[3], len(self.train_dataloader) - * (cfg.TRAIN["BATCH_SIZE"]) * epoch + i) + / (cfg.TRAIN["BATCH_SIZE"]) * epoch + i) + writer.add_scalar('loss_a', mloss[3], len(self.train_dataloader) + / (cfg.TRAIN["BATCH_SIZE"]) * epoch + i) + writer.add_scalar('loss_r', mloss[4], len(self.train_dataloader) + / (cfg.TRAIN["BATCH_SIZE"]) * epoch + i) + writer.add_scalar('loss_s', mloss[5], len(self.train_dataloader) + / (cfg.TRAIN["BATCH_SIZE"]) * epoch + i) + writer.add_scalar('train_loss', mloss[6], len(self.train_dataloader) + / (cfg.TRAIN["BATCH_SIZE"]) * epoch + i) if self.multi_scale_train and (i+1) % 10 == 0: self.train_dataset.img_size = random.choice(range( cfg.TRAIN["MULTI_TRAIN_RANGE"][0], cfg.TRAIN["MULTI_TRAIN_RANGE"][1], cfg.TRAIN["MULTI_TRAIN_RANGE"][2])) * 32 - - if epoch >= 30 and epoch % 5 == 0 and cfg.TRAIN["EVAL_TYPE"] == 'VOC': + self.__save_model_weights(epoch, mAP) + if epoch >= 70 and epoch % 5 == 0 and cfg.TRAIN["EVAL_TYPE"] == 'VOC': logger.info("===== Validate =====".format(epoch, self.epochs)) with torch.no_grad(): APs, inference_time = Evaluator(self.model).APs_voc() @@ -169,7 +179,7 @@ def train(self): logger.info("inference time: {:.2f} ms".format(inference_time)) writer.add_scalar('mAP', mAP, epoch) - elif epoch >= 30 and epoch % 5 == 0 and cfg.TRAIN["EVAL_TYPE"] == 'COCO': + elif epoch >= 50 and epoch % 5 == 0 and cfg.TRAIN["EVAL_TYPE"] == 'COCO': logger.info("===== Validate =====".format(epoch, self.epochs)) with torch.no_grad(): evaluator = COCOEvaluator(data_dir=cfg.DATA_PATH, @@ -179,6 +189,7 @@ def train(self): ap50_95, ap50, inference_time = evaluator.evaluate(self.model) mAP = ap50 logger.info('ap50_95:{} | ap50:{}'.format(ap50_95, ap50)) + logger.info("mAP: {:.3f}".format(mAP)) logger.info("inference time: {:.2f} ms".format(inference_time)) writer.add_scalar('val/COCOAP50', ap50, epoch) writer.add_scalar('val/COCOAP50_95', ap50_95, epoch) @@ -194,12 +205,14 @@ def train(self): if __name__ == "__main__": global logger, writer parser = argparse.ArgumentParser() - parser.add_argument('--weight_path', type=str, default='weight/mobilenetv2_1.0-0c6065bc.pth', - help='weight file path') #default=None + parser.add_argument('--weight_path', type=str, default=None#'weight/mobilenetv2_1.0-0c6065bc.pth' + , help='weight file path') #default=None + #parser.add_argument('--weight_path', type=str, default='weight/yolov4.conv.137', help='weight file path') # default=None parser.add_argument('--resume', action='store_true',default=False, help='resume training flag') parser.add_argument('--gpu_id', type=int, default=0, help='gpu id') parser.add_argument('--log_path', type=str, default='log/', help='log path') opt = parser.parse_args() writer = SummaryWriter(logdir=opt.log_path + '/event') - logger = Logger(log_file_name=opt.log_path + '/log.txt', log_level=logging.DEBUG, logger_name='LODet').get_log() + logger = Logger(log_file_name=opt.log_path + '/log.txt', log_level=logging.DEBUG, logger_name='NPMMRDet').get_log() + Trainer(weight_path=opt.weight_path, resume=opt.resume, gpu_id=opt.gpu_id).train() \ No newline at end of file