diff --git a/PSOL/PSOL_inference.py b/PSOL/PSOL_inference.py new file mode 100644 index 0000000..f5b9a75 --- /dev/null +++ b/PSOL/PSOL_inference.py @@ -0,0 +1,220 @@ +import os +import sys +import json +import numpy as np +import torch +import torchvision.transforms as transforms +from torch.backends import cudnn +import torch.nn as nn +import torchvision +from PIL import Image +from utils.func import * +from utils.vis import * +from utils.IoU import * +from models.models import choose_locmodel,choose_clsmodel +from utils.augment import * +import argparse + +parser = argparse.ArgumentParser(description='Parameters for PSOL evaluation') +parser.add_argument('--loc-model', metavar='locarg', type=str, default='vgg16',dest='locmodel') +parser.add_argument('--cls-model', metavar='clsarg', type=str, default='vgg16',dest='clsmodel') +parser.add_argument('--input_size',default=256,dest='input_size') +parser.add_argument('--crop_size',default=224,dest='crop_size') +parser.add_argument('--ten-crop', help='tencrop', action='store_true',dest='tencrop') +parser.add_argument('--gpu',help='which gpu to use',default='4',dest='gpu') +parser.add_argument('data',metavar='DIR',help='path to imagenet dataset') + +args = parser.parse_args() +os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu +os.environ['OMP_NUM_THREADS'] = "4" +os.environ['MKL_NUM_THREADS'] = "4" +cudnn.benchmark = True +TEN_CROP = args.tencrop +normalize = transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) +transform = transforms.Compose([ + transforms.Resize((args.input_size,args.input_size)), + transforms.CenterCrop(args.crop_size), + transforms.ToTensor(), + normalize +]) +cls_transform = transforms.Compose([ + transforms.Resize((args.input_size,args.input_size)), + transforms.CenterCrop(args.crop_size), + transforms.ToTensor(), + normalize +]) +ten_crop_aug = transforms.Compose([ + transforms.Resize((args.input_size,args.input_size)), + transforms.TenCrop(args.crop_size), + transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])), + transforms.Lambda(lambda crops: torch.stack([normalize(crop) for crop in crops])), +]) +locname = args.locmodel +model = choose_locmodel(locname, True) + +print(model) +model = model.to(0) +model.eval() +clsname = args.clsmodel +cls_model = choose_clsmodel(clsname) +cls_model = cls_model.to(0) +cls_model.eval() + +root = args.data +val_imagedir = os.path.join(root, 'val') + +anno_root = os.path.join(root,'bbox') +val_annodir = os.path.join(anno_root, 'myval') + + +classes = os.listdir(val_imagedir) +classes.sort() +temp_softmax = nn.Softmax() +#print(classes[0]) + + +class_to_idx = {classes[i]:i for i in range(len(classes))} + +result = {} + +accs = [] +accs_top5 = [] +loc_accs = [] +cls_accs = [] +final_cls = [] +final_loc = [] +final_clsloc = [] +final_clsloctop5 = [] +final_ind = [] +for k in range(1000): + cls = classes[k] + + total = 0 + IoUSet = [] + IoUSetTop5 = [] + LocSet = [] + ClsSet = [] + + files = os.listdir(os.path.join(val_imagedir, cls)) + files.sort() + + for (i, name) in enumerate(files): + # raw_img = cv2.imread(os.path.join(imagedir, cls, name)) + now_index = int(name.split('_')[-1].split('.')[0]) + final_ind.append(now_index-1) + xmlfile = os.path.join(val_annodir, cls, name.split('.')[0] + '.xml') + gt_boxes = get_cls_gt_boxes(xmlfile, cls) + if len(gt_boxes)==0: + continue + + raw_img = Image.open(os.path.join(val_imagedir, cls, name)).convert('RGB') + w, h = raw_img.size + + with torch.no_grad(): + img = transform(raw_img) + img = torch.unsqueeze(img, 0) + img = img.to(0) + reg_outputs = model(img) + + bbox = to_data(reg_outputs) + bbox = torch.squeeze(bbox) + bbox = bbox.numpy() + if TEN_CROP: + img = ten_crop_aug(raw_img) + img = img.to(0) + vgg16_out = cls_model(img) + vgg16_out = temp_softmax(vgg16_out) + vgg16_out = torch.mean(vgg16_out,dim=0,keepdim=True) + vgg16_out = torch.topk(vgg16_out, 5, 1)[1] + else: + img = cls_transform(raw_img) + img = torch.unsqueeze(img, 0) + img = img.to(0) + vgg16_out = cls_model(img) + vgg16_out = torch.topk(vgg16_out, 5, 1)[1] + vgg16_out = to_data(vgg16_out) + vgg16_out = torch.squeeze(vgg16_out) + vgg16_out = vgg16_out.numpy() + out = vgg16_out + ClsSet.append(out[0]==class_to_idx[cls]) + + #handle resize and centercrop for gt_boxes + for j in range(len(gt_boxes)): + temp_list = list(gt_boxes[j]) + raw_img_i, gt_bbox_i = ResizedBBoxCrop((256,256))(raw_img, temp_list) + raw_img_i, gt_bbox_i = CenterBBoxCrop((224))(raw_img_i, gt_bbox_i) + w, h = raw_img_i.size + + gt_bbox_i[0] = gt_bbox_i[0] * w + gt_bbox_i[2] = gt_bbox_i[2] * w + gt_bbox_i[1] = gt_bbox_i[1] * h + gt_bbox_i[3] = gt_bbox_i[3] * h + + gt_boxes[j] = gt_bbox_i + + w, h = raw_img_i.size + + bbox[0] = bbox[0] * w + bbox[2] = bbox[2] * w + bbox[0] + bbox[1] = bbox[1] * h + bbox[3] = bbox[3] * h + bbox[1] + + max_iou = -1 + for gt_bbox in gt_boxes: + iou = IoU(bbox, gt_bbox) + if iou > max_iou: + max_iou = iou + + LocSet.append(max_iou) + temp_loc_iou = max_iou + if out[0] != class_to_idx[cls]: + max_iou = 0 + + # print(max_iou) + result[os.path.join(cls, name)] = max_iou + IoUSet.append(max_iou) + #cal top5 IoU + max_iou = 0 + for i in range(5): + if out[i] == class_to_idx[cls]: + max_iou = temp_loc_iou + IoUSetTop5.append(max_iou) + #visualization code + ''' + opencv_image = deepcopy(np.array(raw_img_i)) + opencv_image = opencv_image[:, :, ::-1].copy() + for gt_bbox in gt_boxes: + cv2.rectangle(opencv_image, (int(gt_bbox[0]), int(gt_bbox[1])), + (int(gt_bbox[2]), int(gt_bbox[3])), (0, 255, 0), 4) + cv2.rectangle(opencv_image, (bbox[0], bbox[1]), (bbox[2], bbox[3]), + (0, 255, 255), 4) + cv2.imwrite(os.path.join(savepath, str(name) + '.jpg'), np.asarray(opencv_image)) + ''' + cls_loc_acc = np.sum(np.array(IoUSet) > 0.5) / len(IoUSet) + final_clsloc.extend(IoUSet) + cls_loc_acc_top5 = np.sum(np.array(IoUSetTop5) > 0.5) / len(IoUSetTop5) + final_clsloctop5.extend(IoUSetTop5) + loc_acc = np.sum(np.array(LocSet) > 0.5) / len(LocSet) + final_loc.extend(LocSet) + cls_acc = np.sum(np.array(ClsSet))/len(ClsSet) + final_cls.extend(ClsSet) + print('{} cls-loc acc is {}, loc acc is {}, vgg16 cls acc is {}'.format(cls, cls_loc_acc, loc_acc, cls_acc)) + with open('inference_CorLoc.txt', 'a+') as corloc_f: + corloc_f.write('{} {}\n'.format(cls, loc_acc)) + accs.append(cls_loc_acc) + accs_top5.append(cls_loc_acc_top5) + loc_accs.append(loc_acc) + cls_accs.append(cls_acc) + if (k+1) %100==0: + print(k) + + +print(accs) +print('Cls-Loc acc {}'.format(np.mean(accs))) +print('Cls-Loc acc Top 5 {}'.format(np.mean(accs_top5))) + +print('GT Loc acc {}'.format(np.mean(loc_accs))) +print('{} cls acc {}'.format(clsname, np.mean(cls_accs))) +with open('Corloc_result.txt', 'w') as f: + for k in sorted(result.keys()): + f.write('{} {}\n'.format(k, str(result[k]))) diff --git a/PSOL/PSOL_training.py b/PSOL/PSOL_training.py new file mode 100644 index 0000000..b7f8d95 --- /dev/null +++ b/PSOL/PSOL_training.py @@ -0,0 +1,236 @@ +# coding: utf-8 + +# In[1]: + +import time +import os +import random +import math + +import torch +import torchvision +import torch.nn as nn +import torch.optim as optim +import numpy as np +import matplotlib.pyplot as plt +from torch.utils.data import Dataset +from torch.optim import lr_scheduler +from torch.autograd import Variable +from torchvision import datasets, models, transforms +from PIL import Image +from loader.imagenet_loader import ImageNetDataset +from utils.func import * +from utils.IoU import * +from models.models import * +import warnings +warnings.filterwarnings("ignore", "(Possibly )?corrupt EXIF data", UserWarning) +import argparse + +# In[2]: + +### Some utilities + + +# In[3]: + +def compute_reg_acc(preds, targets, theta=0.5): + # preds = box_transform_inv(preds.clone(), im_sizes) + # preds = crop_boxes(preds, im_sizes) + # targets = box_transform_inv(targets.clone(), im_sizes) + IoU = compute_IoU(preds, targets) + # print(preds, targets, IoU) + corr = (IoU >= theta).sum() + return float(corr) / float(preds.size(0)) + + +def compute_cls_acc(preds, targets): + pred = torch.max(preds, 1)[1] + # print(preds, pred) + num_correct = (pred == targets).sum() + return float(num_correct) / float(preds.size(0)) + + +def compute_acc(reg_preds, reg_targets, cls_preds, cls_targets, theta=0.5): + IoU = compute_IoU(reg_preds, reg_targets) + reg_corr = (IoU >= theta) + + pred = torch.max(cls_preds, 1)[1] + cls_corr = (pred == cls_targets) + + corr = (reg_corr & cls_corr).sum() + + return float(corr) / float(reg_preds.size(0)) + + +class AverageMeter(object): + def __init__(self): + self.reset() + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.cnt = 0 + + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.cnt += n + self.avg = self.sum / self.cnt + + +# ### Visualize training data + +# In[8]: + +train_transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) +]) +test_transfrom = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) +]) +# ### Training + +# In[10]: + +# prepare data +parser = argparse.ArgumentParser(description='Parameters for PSOL evaluation') +parser.add_argument('--loc-model', metavar='locarg', type=str, default='resnet50',dest='locmodel') +parser.add_argument('--input_size',default=256,dest='input_size') +parser.add_argument('--crop_size',default=224,dest='crop_size') +parser.add_argument('--epochs',default=6,dest='epochs') +parser.add_argument('--gpu',help='which gpu to use',default='4,5,6,7',dest='gpu') +parser.add_argument('--ddt_path',help='generated ddt path',default='ImageNet/Projection/VGG16-448',dest="ddt_path") +parser.add_argument('--gt_path',help='validation groundtruth path',default='ImageNet_gt/',dest="gt_path") +parser.add_argument('--save_path',help='model save path',default='ImageNet_checkpoint',dest='save_path') +parser.add_argument('--batch_size',default=256,dest='batch_size') +parser.add_argument('data',metavar='DIR',help='path to imagenet dataset') + + +args = parser.parse_args() +batch_size = args.batch_size +#lr = 1e-3 * (batch_size / 64) +lr = 1e-3 * (batch_size / 256) +# lr = 3e-4 +momentum = 0.9 +weight_decay = 1e-4 +print_freq = 10 +root = args.data +savepath = args.save_path +os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu +os.environ['OMP_NUM_THREADS'] = '20' +os.environ['MKL_NUM_THREADS'] = '20' + +MyTrainData = ImageNetDataset(root=root, ddt_path=args.ddt_path, gt_path=args.gt_path,train=True, input_size=args.input_size,crop_size = args.crop_size, + transform=train_transform) +MyTestData = ImageNetDataset(root=root, ddt_path=args.ddt_path, gt_path=args.gt_path, train=False, input_size=args.input_size,crop_size = args.crop_size, + transform=test_transfrom) + + + +train_loader = torch.utils.data.DataLoader(dataset=MyTrainData, + batch_size=batch_size, + shuffle=True, num_workers=20, pin_memory=True) +test_loader = torch.utils.data.DataLoader(dataset=MyTestData, batch_size=batch_size, + num_workers=8, pin_memory=True) +dataloaders = {'train': train_loader, 'test': test_loader} + +# construct model +model = choose_locmodel(args.locmodel) +print(model) +model = torch.nn.DataParallel(model).cuda() +reg_criterion = nn.MSELoss().cuda() +dense1_params = list(map(id, model.module.fc.parameters())) +rest_params = filter(lambda x: id(x) not in dense1_params, model.parameters()) +param_list = [{'params': model.module.fc.parameters(), 'lr': 2 * lr}, + {'params': rest_params,'lr': 1 * lr}] +optimizer = torch.optim.SGD(param_list, lr, momentum=momentum, + weight_decay=weight_decay) +scheduler = lr_scheduler.StepLR(optimizer, step_size=args.epochs, gamma=0.1) +torch.backends.cudnn.benchmark = True +best_model_state = model.state_dict() +best_epoch = -1 +best_acc = 0.0 + +epoch_loss = {'train': [], 'test': []} +epoch_acc = {'train': [], 'test': []} +epochs = args.epochs +lambda_reg = 0 +for epoch in range(epochs): + lambda_reg = 50 + for phase in ('train', 'test'): + reg_accs = AverageMeter() + accs = AverageMeter() + reg_losses = AverageMeter() + batch_time = AverageMeter() + data_time = AverageMeter() + if phase == 'train': + if epoch >0: + scheduler.step() + model.train() + else: + model.eval() + + end = time.time() + cnt = 0 + for ims, labels, boxes in dataloaders[phase]: + data_time.update(time.time() - end) + inputs = Variable(ims.cuda()) + boxes = Variable(boxes.cuda()) + labels = Variable(labels.cuda()) + + optimizer.zero_grad() + + # forward + if phase == 'train': + if 'inception' in args.locmodel: + reg_outputs1,reg_outputs2 = model(inputs) + reg_loss1 = reg_criterion(reg_outputs1, boxes) + reg_loss2 = reg_criterion(reg_outputs2, boxes) + reg_loss = 1 * reg_loss1 + 0.3 * reg_loss2 + reg_outputs = reg_outputs1 + else: + reg_outputs = model(inputs) + reg_loss = reg_criterion(reg_outputs, boxes) + #_,reg_loss = compute_iou(reg_outputs,boxes) + else: + with torch.no_grad(): + reg_outputs = model(inputs) + reg_loss = reg_criterion(reg_outputs, boxes) + loss = lambda_reg * reg_loss + reg_acc = compute_reg_acc(reg_outputs.data.cpu(), boxes.data.cpu()) + + nsample = inputs.size(0) + reg_accs.update(reg_acc, nsample) + reg_losses.update(reg_loss.item(), nsample) + if phase == 'train': + loss.backward() + optimizer.step() + batch_time.update(time.time() - end) + end = time.time() + if cnt % print_freq == 0: + print( + '[{}]\tEpoch: {}/{}\t Iter: {}/{} Time {:.3f} ({:.3f})\t Data {:.3f} ({:.3f})\tLoc Loss: {:.4f}\tLoc Acc: {:.2%}\t'.format( + phase, epoch + 1, epochs, cnt, len(dataloaders[phase]), batch_time.val,batch_time.avg,data_time.val,data_time.avg,lambda_reg * reg_losses.avg, reg_accs.avg)) + cnt += 1 + if phase == 'test' and reg_accs.avg > best_acc: + best_acc = reg_accs.avg + best_epoch = epoch + best_model_state = model.state_dict() + + elapsed_time = time.time() - end + print( + '[{}]\tEpoch: {}/{}\tLoc Loss: {:.4f}\tLoc Acc: {:.2%}\tTime: {:.3f}'.format( + phase, epoch + 1, epochs, lambda_reg * reg_losses.avg, reg_accs.avg,elapsed_time)) + epoch_loss[phase].append(reg_losses.avg) + epoch_acc[phase].append(reg_accs.avg) + + print('[Info] best test acc: {:.2%} at {}th epoch'.format(best_acc, best_epoch + 1)) + if not os.path.exists(savepath): + os.makedirs(savepath) + torch.save(model.state_dict(), os.path.join(savepath,'checkpoint_localization_imagenet_ddt_' + args.locmodel + "_" + str(epoch) + '.pth.tar')) + torch.save(best_model_state, os.path.join(savepath,'best_cls_localization_imagenet_ddt_' + args.locmodel + "_" + str(epoch) + '.pth.tar')) + + diff --git a/PSOL/generate_box_imagenet.py b/PSOL/generate_box_imagenet.py new file mode 100644 index 0000000..607aa5b --- /dev/null +++ b/PSOL/generate_box_imagenet.py @@ -0,0 +1,163 @@ +import os +import sys +import cv2 +import json +import numpy as np +from paddle import dtype +import torch +import torchvision.transforms as transforms +from torch.backends import cudnn +from torch.autograd import Variable +import torch.nn as nn +import torchvision +import torchvision.models as models +from PIL import Image +from skimage import measure +# from scipy.misc import imresize +from utils.func import * +from utils.vis import * +from utils.IoU import * +import argparse +from loader.ddt_imagenet_dataset import DDTImageNetDataset + + +parser = argparse.ArgumentParser(description='Parameters for DDT generate box') +parser.add_argument('--input_size',default=448,dest='input_size') +parser.add_argument('--data',default="data/PSOL_imgs",help='path to imagenet dataset') +parser.add_argument('--gpu',help='which gpu to use',default='0,1,2,3',dest='gpu') +parser.add_argument('--output_path',default='data/PSOL_pseudo_box',dest='output_path') +parser.add_argument('--batch_size',default=64,dest='batch_size') +args = parser.parse_args() +os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu +os.environ['OMP_NUM_THREADS'] = "10" +os.environ['MKL_NUM_THREADS'] = "10" +cudnn.benchmark = True +model_ft = models.vgg16(pretrained=True) +model_ft.classifier=nn.Sequential( + nn.Linear(512 * 7 * 7, 4096), + nn.ReLU(True), + nn.Dropout(), + nn.Linear(4096, 4096), + nn.ReLU(True), + nn.Dropout(), + nn.Linear(4096, 2), + ) + + +# create new OrderedDict that does not contain `module.` +model_ft.load_state_dict(torch.load("./checkpoint.pth.tar")['state_dict']) +model = model_ft.features +#removed = list(model.children())[:-1] +#model = torch.nn.Sequential(*removed) +model = torch.nn.DataParallel(model).cuda() +model.eval() +projdir = args.output_path +if not os.path.exists(projdir): + os.makedirs(projdir) + +transform = transforms.Compose([ + transforms.Resize((args.input_size,args.input_size)), + transforms.ToTensor(), + transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) +]) +batch_size = args.batch_size +a = DDTImageNetDataset(root=args.data,batch_size=args.batch_size, transforms=transform) + +# print(classes[0]) + +for class_ind in range(3): + #if class_ind == 10: + # import sys + # sys.exit() + now_class_dict = {} + feature_list = [] + ddt_bbox = {} + with torch.no_grad(): + from tqdm import tqdm + for (input_img,path) in tqdm(a[class_ind]): + input_img = to_variable(input_img) + output = model(input_img) + output = to_data(output) + output = torch.squeeze(output).numpy() + if len(output.shape) == 3: + output = np.expand_dims(output,0) + output = np.transpose(output,(0,2,3,1)) + n,h,w,c = output.shape + for i in range(n): + now_class_dict[path[i]] = output[i,:,:,:] + output = np.reshape(output,(n*h*w,c)) + feature_list.append(output) + X = np.concatenate(feature_list,axis=0) + print ("Before Mean") + mean_matrix = np.mean(X, 0) + print ("AFTER Mean") + X = X - mean_matrix + print("Before PCA") + trans_matrix = sk_pca(X, 1) + print("AFTER PCA") + cls = a.label_class_dict[class_ind] + # save json + d = {'mean_matrix': mean_matrix.tolist(), 'trans_matrix': trans_matrix.tolist()} + with open(os.path.join(projdir, '%s_trans.json' % cls), 'w') as f: + json.dump(d, f) + # load json + with open(os.path.join(projdir, '%s_trans.json' % cls), 'r') as f: + t = json.load(f) + mean_matrix = np.array(t['mean_matrix']) + trans_matrix = np.array(t['trans_matrix']) + + print('trans_matrix shape is {}'.format(trans_matrix.shape)) + cnt = 0 + for k,v in now_class_dict.items(): + w = 14 + h = 14 + he = 448 + wi = 448 + v = np.reshape(v,(h * w,512)) + v = v - mean_matrix + + heatmap = np.dot(v, trans_matrix.T) + heatmap = np.reshape(heatmap, (h, w)) + highlight = np.zeros(heatmap.shape) + highlight[heatmap > 0] = 1 + # max component + all_labels = measure.label(highlight) + highlight = np.zeros(highlight.shape) + highlight[all_labels == count_max(all_labels.tolist())] = 1 + + # visualize heatmap + # show highlight in origin image + highlight = np.round(highlight * 255) + highlight_big = cv2.resize(highlight, (he, wi), interpolation=cv2.INTER_NEAREST) + props = measure.regionprops(highlight_big.astype(int)) + + if len(props) == 0: + #print(highlight) + bbox = [0, 0, wi, he] + else: + temp = props[0]['bbox'] + bbox = [temp[1], temp[0], temp[3], temp[2]] + + temp_bbox = [bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]] + temp_save_box = [x / 448 for x in temp_bbox] + ddt_bbox[os.path.join(cls, k)] = temp_save_box + + highlight_big = np.expand_dims(np.asarray(highlight_big), 2) + highlight_3 = np.concatenate((np.zeros((he, wi, 1)), np.zeros((he, wi, 1))), axis=2) + highlight_3 = np.concatenate((highlight_3, highlight_big), axis=2) + cnt +=1 + if cnt < 100: + savepath = args.output_path+'%s' % cls + if not os.path.exists(savepath): + os.makedirs(savepath) + from PIL import Image + raw_img = Image.open(k).convert("RGB") + raw_img = raw_img.resize((448,448)) + raw_img = np.asarray(raw_img) + raw_img = cv2.cvtColor(raw_img,cv2.COLOR_BGR2RGB) + cv2.rectangle(raw_img, (temp_bbox[0], temp_bbox[1]), + (temp_bbox[2] + temp_bbox[0], temp_bbox[3] + temp_bbox[1]), (255, 0, 0), 4) + save_name = k.split('/')[-1] + cv2.imwrite(os.path.join(savepath, save_name), np.asarray(raw_img)) + with open(os.path.join(projdir, '%s_bbox.json' % cls), 'w') as fp: + json.dump(ddt_bbox, fp) \ No newline at end of file diff --git a/PSOL/generate_box_imagenet_crop.py b/PSOL/generate_box_imagenet_crop.py new file mode 100644 index 0000000..510c73d --- /dev/null +++ b/PSOL/generate_box_imagenet_crop.py @@ -0,0 +1,160 @@ +import os +import sys +import cv2 +import json +import numpy as np +from paddle import dtype +import torch +import torchvision.transforms as transforms +from torch.backends import cudnn +from torch.autograd import Variable +import torch.nn as nn +import torchvision +import torchvision.models as models +from PIL import Image +from skimage import measure +# from scipy.misc import imresize +from utils.func import * +from utils.vis import * +from utils.IoU import * +import argparse +from loader.ddt_imagenet_dataset import DDTImageNetDataset + + +parser = argparse.ArgumentParser(description='Parameters for DDT generate box') +parser.add_argument('--input_size',default=448,dest='input_size') +parser.add_argument('--data',default="data/PSOL_imgs",help='path to imagenet dataset') +parser.add_argument('--gpu',help='which gpu to use',default='0,1,2,3',dest='gpu') +parser.add_argument('--output_path',default='data/DDT_crop/',dest='output_path') +parser.add_argument('--batch_size',default=64,dest='batch_size') +args = parser.parse_args() +os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu +os.environ['OMP_NUM_THREADS'] = "10" +os.environ['MKL_NUM_THREADS'] = "10" +cudnn.benchmark = True +model_ft = models.vgg16(pretrained=True) +model_ft.classifier=nn.Sequential( + nn.Linear(512 * 7 * 7, 4096), + nn.ReLU(True), + nn.Dropout(), + nn.Linear(4096, 4096), + nn.ReLU(True), + nn.Dropout(), + nn.Linear(4096, 2), + ) + + +# create new OrderedDict that does not contain `module.` +model_ft.load_state_dict(torch.load("./checkpoint.pth.tar")['state_dict']) +model = model_ft.features + +model = torch.nn.DataParallel(model).cuda() +model.eval() +projdir = args.output_path +if not os.path.exists(projdir): + os.makedirs(projdir) + +transform = transforms.Compose([ + transforms.Resize((args.input_size,args.input_size)), + transforms.ToTensor(), + transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) +]) +batch_size = args.batch_size +a = DDTImageNetDataset(root=args.data,batch_size=args.batch_size, transforms=transform) + +# print(classes[0]) + +for class_ind in range(3): + #if class_ind == 10: + # import sys + # sys.exit() + now_class_dict = {} + feature_list = [] + ddt_bbox = {} + with torch.no_grad(): + from tqdm import tqdm + for (input_img,path) in tqdm(a[class_ind]): + input_img = to_variable(input_img) + output = model(input_img) + output = to_data(output) + output = torch.squeeze(output).numpy() + if len(output.shape) == 3: + output = np.expand_dims(output,0) + output = np.transpose(output,(0,2,3,1)) + n,h,w,c = output.shape + for i in range(n): + now_class_dict[path[i]] = output[i,:,:,:] + output = np.reshape(output,(n*h*w,c)) + feature_list.append(output) + X = np.concatenate(feature_list,axis=0) + print ("Before Mean") + mean_matrix = np.mean(X, 0) + print ("AFTER Mean") + X = X - mean_matrix + print("Before PCA") + trans_matrix = sk_pca(X, 1) + print("AFTER PCA") + cls = a.label_class_dict[class_ind] + # save json + d = {'mean_matrix': mean_matrix.tolist(), 'trans_matrix': trans_matrix.tolist()} + with open(os.path.join(projdir, '%s_trans.json' % cls), 'w') as f: + json.dump(d, f) + # load json + with open(os.path.join(projdir, '%s_trans.json' % cls), 'r') as f: + t = json.load(f) + mean_matrix = np.array(t['mean_matrix']) + trans_matrix = np.array(t['trans_matrix']) + + print('trans_matrix shape is {}'.format(trans_matrix.shape)) + cnt = 0 + for k,v in tqdm(now_class_dict.items()): + w = 14 + h = 14 + he = 448 + wi = 448 + v = np.reshape(v,(h * w,512)) + v = v - mean_matrix + + heatmap = np.dot(v, trans_matrix.T) + heatmap = np.reshape(heatmap, (h, w)) + highlight = np.zeros(heatmap.shape) + highlight[heatmap > 0] = 1 + # max component + all_labels = measure.label(highlight) + highlight = np.zeros(highlight.shape) + highlight[all_labels == count_max(all_labels.tolist())] = 1 + + # visualize heatmap + # show highlight in origin image + highlight = np.round(highlight * 255) + highlight_big = cv2.resize(highlight, (he, wi), interpolation=cv2.INTER_NEAREST) + props = measure.regionprops(highlight_big.astype(int)) + + if len(props) == 0: + #print(highlight) + bbox = [0, 0, wi, he] + else: + temp = props[0]['bbox'] + bbox = [temp[1], temp[0], temp[3], temp[2]] + + temp_bbox = [bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]] + temp_save_box = [x / 448 for x in temp_bbox] + ddt_bbox[os.path.join(cls, k)] = temp_save_box + + highlight_big = np.expand_dims(np.asarray(highlight_big), 2) + highlight_3 = np.concatenate((np.zeros((he, wi, 1)), np.zeros((he, wi, 1))), axis=2) + highlight_3 = np.concatenate((highlight_3, highlight_big), axis=2) + cnt +=1 + if 1: + if "train" in cls: + cls = "train" + savepath = args.output_path+'%s' % cls + if not os.path.exists(savepath): + os.makedirs(savepath) + from PIL import Image + raw_img = Image.open(k).convert("RGB") + raw_img = np.asarray(raw_img) + raw_img = raw_img[int(temp_bbox[1]/448*raw_img.shape[0]):int((temp_bbox[3] + temp_bbox[1])/448*raw_img.shape[0]),int(temp_bbox[0]/448*raw_img.shape[1]):int((temp_bbox[2] + temp_bbox[0])/448*raw_img.shape[1])] + raw_img = cv2.cvtColor(raw_img,cv2.COLOR_BGR2RGB) + save_name = k.split('/')[-1] + cv2.imwrite(os.path.join(savepath, save_name), np.asarray(raw_img)) \ No newline at end of file diff --git a/PSOL/loader/__pycache__/ddt_imagenet_dataset.cpython-36.pyc b/PSOL/loader/__pycache__/ddt_imagenet_dataset.cpython-36.pyc new file mode 100644 index 0000000..67f5ef6 Binary files /dev/null and b/PSOL/loader/__pycache__/ddt_imagenet_dataset.cpython-36.pyc differ diff --git a/PSOL/loader/ddt_imagenet_dataset.py b/PSOL/loader/ddt_imagenet_dataset.py new file mode 100644 index 0000000..14c364b --- /dev/null +++ b/PSOL/loader/ddt_imagenet_dataset.py @@ -0,0 +1,78 @@ +from torchvision.datasets import ImageFolder +import torch.utils.data as data +from PIL import Image +def pil_loader(path): + # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835) + with open(path, 'rb') as f: + with Image.open(f) as img: + return img.convert('RGB') +class SubImageDataset(data.Dataset): + def __init__(self, now_list, transforms=None): + self.imgs = now_list + self.transforms = transforms + + def __getitem__(self, index): + """ + Args: + index (int): Index + Returns: + tuple: (image, target) where target is class_index of the target class. + """ + path,label = self.imgs[index] + now_img = pil_loader(path) + if self.transforms is not None: + now_img = self.transforms(now_img) + return now_img,path + + def __len__(self): + return len(self.imgs) + + def __next__(self): + pass +class DDTImageNetDataset(data.Dataset): + def __init__(self, root, transforms=None, batch_size=128,target_transform=None): + self.img_dataset = ImageFolder(root) + self.label_class_dict = {} + for k,v in self.img_dataset.class_to_idx.items(): + self.label_class_dict[v] = k + + from collections import defaultdict + self.class_dict = defaultdict(list) + for i,(location,label) in enumerate(self.img_dataset.imgs): + self.class_dict[label].append((location,label)) + self.all_dataset = [] + for i in range(3): + self.all_dataset.append(SubImageDataset(self.class_dict[i],transforms=transforms)) + self.batch_size = batch_size + self.transforms = transforms + def __getitem__(self, index): + """ + Args: + index (int): Index + Returns: + tuple: (image, target) where target is class_index of the target class. + """ + now_dataset = self.all_dataset[index] + now_loader = data.DataLoader(now_dataset,batch_size=self.batch_size,shuffle=False,num_workers=8) + for i,(img,path) in enumerate(now_loader): + yield img,path + pass + + def __len__(self): + return len(self.class_dict) + + def __next__(self): + pass +if __name__ == '__main__': + import torchvision + transform = torchvision.transforms.Compose([ + torchvision.transforms.Resize((224,224)), + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) + ] + ) + a = DDTImageNetDataset(batch_size=2,transforms=transform) + for i in a[0]: + print(i) + import sys + sys.exit() diff --git a/PSOL/loader/imagenet_loader.py b/PSOL/loader/imagenet_loader.py new file mode 100644 index 0000000..6629f67 --- /dev/null +++ b/PSOL/loader/imagenet_loader.py @@ -0,0 +1,197 @@ +import torch.utils.data as data +import torch +import torchvision +from PIL import Image +import os +import os.path +import numpy as np +import json +from torchvision.transforms import functional as F +import warnings +import random +import math +import copy +import numbers +from utils.augment import * +IMG_EXTENSIONS = [ + '.jpg', '.JPG', '.jpeg', '.JPEG', + '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP', +] + + +def is_image_file(filename): + return any(filename.endswith(extension) for extension in IMG_EXTENSIONS) + +def find_classes(dir): + classes = [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))] + classes.sort() + class_to_idx = {classes[i]: i for i in range(len(classes))} + return classes, class_to_idx + +def get_bbox_dict(root): + print('loading from ground truth bbox') + name_idx_dict = {} + with open(os.path.join(root, 'images.txt')) as f: + filelines = f.readlines() + for fileline in filelines: + fileline = fileline.strip('\n').split() + idx, name = fileline[0], fileline[1] + name_idx_dict[name] = idx + + idx_bbox_dict = {} + with open(os.path.join(root, 'bounding_boxes.txt')) as f: + filelines = f.readlines() + for fileline in filelines: + fileline = fileline.strip('\n').split() + idx, bbox = fileline[0], list(map(float, fileline[1:])) + idx_bbox_dict[idx] = bbox + + name_bbox_dict = {} + for name in name_idx_dict.keys(): + name_bbox_dict[name] = idx_bbox_dict[name_idx_dict[name]] + + return name_bbox_dict + +def pil_loader(path): + # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835) + with open(path, 'rb') as f: + with Image.open(f) as img: + return img.convert('RGB') + +def default_loader(path): + from torchvision import get_image_backend + #if get_image_backend() == 'accimage': + # return accimage_loader(path) + #else: + return pil_loader(path) + + +def load_train_bbox(label_dict,bbox_dir): + #bbox_dir = 'ImageNet/Projection/VGG16-448' + final_dict = {} + for i in range(1000): + now_name = label_dict[i] + now_json_file = os.path.join(bbox_dir,now_name+"_bbox.json") + with open(now_json_file, 'r') as fp: + name_bbox_dict = json.load(fp) + final_dict[i] = name_bbox_dict + return final_dict +def load_val_bbox(label_dict,all_imgs,gt_location): + #gt_location ='/data/zhangcl/DDT-code/ImageNet_gt' + import scipy.io as sio + gt_label = sio.loadmat(os.path.join(gt_location,'cache_groundtruth.mat')) + locs = [(x[0].split('/')[-1],x[0],x[1]) for x in all_imgs] + locs.sort() + final_bbox_dict = {} + for i in range(len(locs)): + #gt_label['rec'][:,1][0][0][0], if multilabel then get length, for final eval + final_bbox_dict[locs[i][1]] = gt_label['rec'][:,i][0][0][0][0][1][0] + return final_bbox_dict +class ImageNetDataset(data.Dataset): + """A generic data loader where the images are arranged in this way: :: + root/dog/xxx.png + root/dog/xxy.png + root/dog/xxz.png + root/cat/123.png + root/cat/nsdf3.png + root/cat/asd932_.png + Args: + root (string): Root directory path. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + loader (callable, optional): A function to load an image given its path. + Attributes: + classes (list): List of the class names. + class_to_idx (dict): Dict with items (class_name, class_index). + imgs (list): List of (image path, class_index) tuples + """ + + def __init__(self, root, ddt_path,gt_path, input_size=256, crop_size=224,train=True, transform=None, target_transform=None, loader=default_loader): + from torchvision.datasets import ImageFolder + self.train = train + self.input_size = input_size + self.crop_size = crop_size + self.ddt_path = ddt_path + self.gt_path = gt_path + if self.train: + self.img_dataset = ImageFolder(os.path.join(root,'train')) + else: + self.img_dataset = ImageFolder(os.path.join(root,'val')) + if len(self.img_dataset) == 0: + raise(RuntimeError("Found 0 images in subfolders of: " + root + "\n" + "Supported image extensions are: " + ",".join(IMG_EXTENSIONS))) + self.label_class_dict = {} + self.train = train + + for k, v in self.img_dataset.class_to_idx.items(): + self.label_class_dict[v] = k + if self.train: + #load train bbox + self.bbox_dict = load_train_bbox(self.label_class_dict,self.ddt_path) + else: + #load test bbox + self.bbox_dict = load_val_bbox(self.label_class_dict,self.img_dataset.imgs,self.gt_path) + self.img_dataset = self.img_dataset.imgs + + self.transform = transform + self.target_transform = target_transform + self.loader = loader + def __getitem__(self, index): + """ + Args: + index (int): Index + Returns: + tuple: (image, target) where target is class_index of the target class. + """ + path, target = self.img_dataset[index] + img = self.loader(path) + if self.train: + bbox = self.bbox_dict[target][path] + else: + bbox = self.bbox_dict[path] + w,h = img.size + + bbox = np.array(bbox, dtype='float32') + + #convert from x, y, w, h to x1,y1,x2,y2 + + + + + if self.train: + bbox[0] = bbox[0] + bbox[2] = bbox[0] + bbox[2] + bbox[1] = bbox[1] + bbox[3] = bbox[1] + bbox[3] + bbox[0] = math.ceil(bbox[0] * w) + bbox[2] = math.ceil(bbox[2] * w) + bbox[1] = math.ceil(bbox[1] * h) + bbox[3] = math.ceil(bbox[3] * h) + img_i, bbox_i = RandomResizedBBoxCrop((self.crop_size))(img, bbox) + #img_i, bbox_i = ResizedBBoxCrop((256,256))(img, bbox) + #img_i, bbox_i = RandomBBoxCrop((224))(img_i, bbox_i) + #img_i, bbox_i = ResizedBBoxCrop((320,320))(img, bbox) + #img_i, bbox_i = RandomBBoxCrop((299))(img_i, bbox_i) + img, bbox = RandomHorizontalFlipBBox()(img_i, bbox_i) + #img, bbox = img_i, bbox_i + else: + img_i, bbox_i = ResizedBBoxCrop((self.input_size,self.input_size))(img, bbox) + img, bbox = CenterBBoxCrop((self.crop_size))(img_i, bbox_i) + + + bbox[2] = bbox[2] - bbox[0] + bbox[3] = bbox[3] - bbox[1] + + if self.transform is not None: + img = self.transform(img) + if self.target_transform is not None: + target = self.target_transform(target) + return img, target, bbox + + def __len__(self): + return len(self.img_dataset) + +if __name__ == '__main__': + a =ImageNetDataset('/mnt/ramdisk/ImageNet/val/',train=False) \ No newline at end of file diff --git a/PSOL/models/models.py b/PSOL/models/models.py new file mode 100644 index 0000000..ebd3b6a --- /dev/null +++ b/PSOL/models/models.py @@ -0,0 +1,82 @@ +import torchvision +import torch.nn as nn +from utils.func import * +class VGGGAP(nn.Module): + def __init__(self, pretrained=True, num_classes=200): + super(VGGGAP,self).__init__() + self.features = torchvision.models.vgg16(pretrained=pretrained).features + self.avgpool = nn.AdaptiveAvgPool2d((1,1)) + self.classifier = nn.Sequential((nn.Linear(512,512),nn.ReLU(),nn.Linear(512,4),nn.Sigmoid())) + def forward(self, x): + x = self.features(x) + x = self.avgpool(x) + x = x.view(x.size(0),-1) + x = self.classifier(x) + return x +class VGG16(nn.Module): + def __init__(self, pretrained=True, num_classes=200): + super(VGG16,self).__init__() + self.features = torchvision.models.vgg16(pretrained=pretrained).features + temp_classifier = torchvision.models.vgg16(pretrained=pretrained).classifier + removed = list(temp_classifier.children()) + removed = removed[:-1] + temp_layer = nn.Sequential(nn.Linear(4096,512),nn.ReLU(),nn.Linear(512,4),nn.Sigmoid()) + removed.append(temp_layer) + self.classifier = nn.Sequential(*removed) + def forward(self, x): + x = self.features(x) + x = x.view(x.size(0),-1) + x = self.classifier(x) + return x + +def choose_locmodel(model_name,pretrained=False): + if model_name == 'densenet161': + model = torchvision.models.densenet161(pretrained=True) + + model.classifier = nn.Sequential( + nn.Linear(2208, 512), + nn.ReLU(), + nn.Linear(512, 4), + nn.Sigmoid() + ) + if pretrained: + model = copy_parameters(model, torch.load('densenet161loc.pth.tar')) + elif model_name == 'resnet50': + model = torchvision.models.resnet50(pretrained=True, num_classes=1000) + model.fc = nn.Sequential( + nn.Linear(2048, 512), + nn.ReLU(), + nn.Linear(512, 4), + nn.Sigmoid() + ) + if pretrained: + model = copy_parameters(model, torch.load('resnet50loc.pth.tar')) + elif model_name == 'vgggap': + model = VGGGAP(pretrained=True,num_classes=1000) + if pretrained: + model = copy_parameters(model, torch.load('vgggaploc.pth.tar')) + elif model_name == 'vgg16': + model = VGG16(pretrained=True,num_classes=1000) + if pretrained: + model = copy_parameters(model, torch.load('vgg16loc.pth.tar')) + elif model_name == 'inceptionv3': + #need for rollback inceptionv3 official code + pass + else: + raise ValueError('Do not have this model currently!') + return model +def choose_clsmodel(model_name): + if model_name == 'vgg16': + cls_model = torchvision.models.vgg16(pretrained=True) + elif model_name == 'inceptionv3': + cls_model = torchvision.models.inception_v3(pretrained=True, aux_logits=True, transform_input=True) + elif model_name == 'resnet50': + cls_model = torchvision.models.resnet50(pretrained=True) + elif model_name == 'densenet161': + cls_model = torchvision.models.densenet161(pretrained=True) + elif model_name == 'dpn131': + cls_model = torch.hub.load('rwightman/pytorch-dpn-pretrained', 'dpn131', pretrained=True,test_time_pool=True) + elif model_name == 'efficientnetb7': + from efficientnet_pytorch import EfficientNet + cls_model = EfficientNet.from_pretrained('efficientnet-b7') + return cls_model \ No newline at end of file diff --git a/PSOL/utils/IoU.py b/PSOL/utils/IoU.py new file mode 100644 index 0000000..944e760 --- /dev/null +++ b/PSOL/utils/IoU.py @@ -0,0 +1,157 @@ +import numpy as np +import xml.etree.ElementTree as ET +import torch +def get_gt_boxes(xmlfile): + '''get ground-truth bbox from VOC xml file''' + tree = ET.parse(xmlfile) + objs = tree.findall('object') + num_objs = len(objs) + gt_boxes = [] + for obj in objs: + bbox = obj.find('bndbox') + x1 = float(bbox.find('xmin').text)-1 + y1 = float(bbox.find('ymin').text)-1 + x2 = float(bbox.find('xmax').text)-1 + y2 = float(bbox.find('ymax').text)-1 + + gt_boxes.append((x1, y1, x2, y2)) + return gt_boxes + +def get_cls_gt_boxes(xmlfile, cls): + '''get ground-truth bbox from VOC xml file''' + tree = ET.parse(xmlfile) + objs = tree.findall('object') + num_objs = len(objs) + gt_boxes = [] + for obj in objs: + bbox = obj.find('bndbox') + cls_name = obj.find('name').text + #print(cls_name, cls) + if cls_name != cls: + continue + x1 = float(bbox.find('xmin').text)-1 + y1 = float(bbox.find('ymin').text)-1 + x2 = float(bbox.find('xmax').text)-1 + y2 = float(bbox.find('ymax').text)-1 + + gt_boxes.append((x1, y1, x2, y2)) + if len(gt_boxes)==0: + pass + #print('%s bbox = 0'%cls) + + return gt_boxes + +def get_cls_and_gt_boxes(xmlfile, cls,class_to_idx): + '''get ground-truth bbox from VOC xml file''' + tree = ET.parse(xmlfile) + objs = tree.findall('object') + num_objs = len(objs) + gt_boxes = [] + for obj in objs: + bbox = obj.find('bndbox') + cls_name = obj.find('name').text + #print(cls_name, cls) + if cls_name != cls: + continue + x1 = float(bbox.find('xmin').text)-1 + y1 = float(bbox.find('ymin').text)-1 + x2 = float(bbox.find('xmax').text)-1 + y2 = float(bbox.find('ymax').text)-1 + + gt_boxes.append((class_to_idx[cls_name],[x1, y1, x2-x1, y2-y1])) + if len(gt_boxes)==0: + pass + #print('%s bbox = 0'%cls) + + return gt_boxes +def convert_boxes(boxes): + ''' convert the bbox to the format (x1, y1, x2, y2) where x1,y10 + if aarea + barea - inter <=0: + print(a) + print(b) + o = inter / (aarea+barea-inter) + #if w<=0 or h<=0: + # o = 0 + return o + +def to_2d_tensor(inp): + inp = torch.Tensor(inp) + if len(inp.size()) < 2: + inp = inp.unsqueeze(0) + return inp + + +def xywh_to_x1y1x2y2(boxes): + boxes = to_2d_tensor(boxes) + boxes[:, 2] += boxes[:, 0] - 1 + boxes[:, 3] += boxes[:, 1] - 1 + return boxes + + +def x1y1x2y2_to_xywh(boxes): + boxes = to_2d_tensor(boxes) + boxes[:, 2] -= boxes[:, 0] - 1 + boxes[:, 3] -= boxes[:, 1] - 1 + return boxes + +def compute_IoU(pred_box, gt_box): + boxes1 = to_2d_tensor(pred_box) + # boxes1 = xywh_to_x1y1x2y2(boxes1) + boxes1[:, 2] = torch.clamp(boxes1[:, 0] + boxes1[:, 2], 0, 1) + boxes1[:, 3] = torch.clamp(boxes1[:, 1] + boxes1[:, 3], 0, 1) + + boxes2 = to_2d_tensor(gt_box) + boxes2[:, 2] = torch.clamp(boxes2[:, 0] + boxes2[:, 2], 0, 1) + boxes2[:, 3] = torch.clamp(boxes2[:, 1] + boxes2[:, 3], 0, 1) + # boxes2 = xywh_to_x1y1x2y2(boxes2) + + intersec = boxes1.clone() + intersec[:, 0] = torch.max(boxes1[:, 0], boxes2[:, 0]) + intersec[:, 1] = torch.max(boxes1[:, 1], boxes2[:, 1]) + intersec[:, 2] = torch.min(boxes1[:, 2], boxes2[:, 2]) + intersec[:, 3] = torch.min(boxes1[:, 3], boxes2[:, 3]) + + def compute_area(boxes): + # in (x1, y1, x2, y2) format + dx = boxes[:, 2] - boxes[:, 0] + dx[dx < 0] = 0 + dy = boxes[:, 3] - boxes[:, 1] + dy[dy < 0] = 0 + return dx * dy + + a1 = compute_area(boxes1) + a2 = compute_area(boxes2) + ia = compute_area(intersec) + assert ((a1 + a2 - ia < 0).sum() == 0) + return ia / (a1 + a2 - ia) \ No newline at end of file diff --git a/PSOL/utils/__pycache__/IoU.cpython-36.pyc b/PSOL/utils/__pycache__/IoU.cpython-36.pyc new file mode 100644 index 0000000..cc8e215 Binary files /dev/null and b/PSOL/utils/__pycache__/IoU.cpython-36.pyc differ diff --git a/PSOL/utils/__pycache__/func.cpython-36.pyc b/PSOL/utils/__pycache__/func.cpython-36.pyc new file mode 100644 index 0000000..adea3f6 Binary files /dev/null and b/PSOL/utils/__pycache__/func.cpython-36.pyc differ diff --git a/PSOL/utils/__pycache__/vis.cpython-36.pyc b/PSOL/utils/__pycache__/vis.cpython-36.pyc new file mode 100644 index 0000000..2167e82 Binary files /dev/null and b/PSOL/utils/__pycache__/vis.cpython-36.pyc differ diff --git a/PSOL/utils/augment.py b/PSOL/utils/augment.py new file mode 100644 index 0000000..c597726 --- /dev/null +++ b/PSOL/utils/augment.py @@ -0,0 +1,279 @@ +from PIL import Image +import copy +import numbers +from torchvision.transforms import functional as F +from .func import * +import random +import warnings +import math + +class RandomHorizontalFlipBBox(object): + def __init__(self, p=0.5): + self.p = p + + def __call__(self, img, bbox): + if random.random() < self.p: + flipbox = copy.deepcopy(bbox) + flipbox[0] = 1-bbox[2] + flipbox[2] = 1-bbox[0] + return F.hflip(img), flipbox + + return img, bbox +class RandomResizedBBoxCrop(object): + """Crop the given PIL Image to random size and aspect ratio. + + A crop of random size (default: of 0.08 to 1.0) of the original size and a random + aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio is made. This crop + is finally resized to given size. + This is popularly used to train the Inception networks. + + Args: + size: expected output size of each edge + scale: range of size of the origin size cropped + ratio: range of aspect ratio of the origin aspect ratio cropped + interpolation: Default: PIL.Image.BILINEAR + """ + + def __init__(self, size, scale=(0.2, 1.0), ratio=(3. / 4., 4. / 3.), interpolation=Image.BILINEAR): + if isinstance(size, tuple): + self.size = size + else: + self.size = (size, size) + if (scale[0] > scale[1]) or (ratio[0] > ratio[1]): + warnings.warn("range should be of kind (min, max)") + + self.interpolation = interpolation + self.scale = scale + self.ratio = ratio + + @staticmethod + def get_params(img, bbox, scale, ratio): + """Get parameters for ``crop`` for a random sized crop. + + Args: + img (PIL Image): Image to be cropped. + scale (tuple): range of size of the origin size cropped + ratio (tuple): range of aspect ratio of the origin aspect ratio cropped + + Returns: + tuple: params (i, j, h, w) to be passed to ``crop`` for a random + sized crop. + """ + + area = img.size[0] * img.size[1] + + for attempt in range(30): + target_area = random.uniform(*scale) * area + log_ratio = (math.log(ratio[0]), math.log(ratio[1])) + aspect_ratio = math.exp(random.uniform(*log_ratio)) + + w = int(round(math.sqrt(target_area * aspect_ratio))) + h = int(round(math.sqrt(target_area / aspect_ratio))) + + if w <= img.size[0] and h <= img.size[1]: + + i = random.randint(0, img.size[1] - h) # i is y actually + j = random.randint(0, img.size[0] - w) # j is x + + #compute intersection between crop image and bbox + intersec = compute_intersec(i, j, h, w, bbox) + + if intersec[2]-intersec[0]>0 and intersec[3]-intersec[1]>0: + intersec = normalize_intersec(i, j, h, w, intersec) + return i, j, h, w, intersec + + # Fallback to central crop + in_ratio = img.size[0] / img.size[1] + if (in_ratio < min(ratio)): + w = img.size[0] + h = int(round(w / min(ratio))) + elif (in_ratio > max(ratio)): + h = img.size[1] + w = int(round(h * max(ratio))) + else: # whole image + w = img.size[0] + h = img.size[1] + + i = (img.size[1] - h) // 2 + j = (img.size[0] - w) // 2 + + intersec = compute_intersec(i, j, h, w, bbox) + intersec = normalize_intersec(i, j, h, w, intersec) + return i, j, h, w, intersec + + def __call__(self, img, bbox): + """ + Args: + img (PIL Image): Image to be cropped and resized. + + Returns: + PIL Image: Randomly cropped and resized image. + """ + i, j, h, w, crop_bbox = self.get_params(img, bbox, self.scale, self.ratio) + return F.resized_crop(img, i, j, h, w, self.size, self.interpolation), crop_bbox +class RandomBBoxCrop(object): + """Crop the given PIL Image at a random location. + + Args: + size (sequence or int): Desired output size of the crop. If size is an + int instead of sequence like (h, w), a square crop (size, size) is + made. + padding (int or sequence, optional): Optional padding on each border + of the image. Default is None, i.e no padding. If a sequence of length + 4 is provided, it is used to pad left, top, right, bottom borders + respectively. If a sequence of length 2 is provided, it is used to + pad left/right, top/bottom borders, respectively. + pad_if_needed (boolean): It will pad the image if smaller than the + desired size to avoid raising an exception. Since cropping is done + after padding, the padding seems to be done at a random offset. + fill: Pixel fill value for constant fill. Default is 0. If a tuple of + length 3, it is used to fill R, G, B channels respectively. + This value is only used when the padding_mode is constant + padding_mode: Type of padding. Should be: constant, edge, reflect or symmetric. Default is constant. + + - constant: pads with a constant value, this value is specified with fill + + - edge: pads with the last value on the edge of the image + + - reflect: pads with reflection of image (without repeating the last value on the edge) + + padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode + will result in [3, 2, 1, 2, 3, 4, 3, 2] + + - symmetric: pads with reflection of image (repeating the last value on the edge) + + padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode + will result in [2, 1, 1, 2, 3, 4, 4, 3] + + """ + + def __init__(self, size): + if isinstance(size, numbers.Number): + self.size = (int(size), int(size)) + else: + self.size = size + + @staticmethod + def get_params(img, bbox, output_size): + """Get parameters for ``crop`` for a random crop. + + Args: + img (PIL Image): Image to be cropped. + output_size (tuple): Expected output size of the crop. + + Returns: + tuple: params (i, j, h, w) to be passed to ``crop`` for random crop. + """ + w, h = img.size + th, tw = output_size + if w == tw and h == th: + return 0, 0, h, w + + i = random.randint(0, h - th) + j = random.randint(0, w - tw) + intersec = compute_intersec(i, j, h, w, bbox) + intersec = normalize_intersec(i, j, h, w, intersec) + return i, j, th, tw, intersec + + def __call__(self, img, bbox): + """ + Args: + img (PIL Image): Image to be cropped. + + Returns: + PIL Image: Cropped image. + """ + + i, j, h, w,crop_bbox = self.get_params(img, bbox, self.size) + + return F.crop(img, i, j, h, w),crop_bbox + + def __repr__(self): + return self.__class__.__name__ + '(size={0}, padding={1})'.format(self.size, self.padding) + +class ResizedBBoxCrop(object): + + def __init__(self, size, interpolation=Image.BILINEAR): + self.size = size + + self.interpolation = interpolation + + @staticmethod + def get_params(img, bbox, size): + #resize to 256 + if isinstance(size, int): + w, h = img.size + if (w <= h and w == size) or (h <= w and h == size): + img = copy.deepcopy(img) + ow, oh = w, h + if w < h: + ow = size + oh = int(size*h/w) + else: + oh = size + ow = int(size*w/h) + else: + ow, oh = size[::-1] + w, h = img.size + + + intersec = copy.deepcopy(bbox) + ratew = ow / w + rateh = oh / h + intersec[0] = bbox[0]*ratew + intersec[2] = bbox[2]*ratew + intersec[1] = bbox[1]*rateh + intersec[3] = bbox[3]*rateh + + #intersec = normalize_intersec(i, j, h, w, intersec) + return (oh, ow), intersec + + def __call__(self, img, bbox): + """ + Args: + img (PIL Image): Image to be cropped and resized. + + Returns: + PIL Image: Randomly cropped and resized image. + """ + size, crop_bbox = self.get_params(img, bbox, self.size) + return F.resize(img, self.size, self.interpolation), crop_bbox + + +class CenterBBoxCrop(object): + + def __init__(self, size, interpolation=Image.BILINEAR): + self.size = size + + self.interpolation = interpolation + + @staticmethod + def get_params(img, bbox, size): + #center crop + if isinstance(size, numbers.Number): + output_size = (int(size), int(size)) + + w, h = img.size + th, tw = output_size + + i = int(round((h - th) / 2.)) + j = int(round((w - tw) / 2.)) + + intersec = compute_intersec(i, j, th, tw, bbox) + intersec = normalize_intersec(i, j, th, tw, intersec) + + #intersec = normalize_intersec(i, j, h, w, intersec) + return i, j, th, tw, intersec + + def __call__(self, img, bbox): + """ + Args: + img (PIL Image): Image to be cropped and resized. + + Returns: + PIL Image: Randomly cropped and resized image. + """ + i, j, th, tw, crop_bbox = self.get_params(img, bbox, self.size) + return F.center_crop(img, self.size), crop_bbox + + diff --git a/PSOL/utils/func.py b/PSOL/utils/func.py new file mode 100644 index 0000000..072ef7b --- /dev/null +++ b/PSOL/utils/func.py @@ -0,0 +1,118 @@ +import numpy as np +import matplotlib.pyplot as plt +import torch +import copy +def count_max(x): + count_dict = {} + for xlist in x: + for item in xlist: + if item==0: + continue + if item not in count_dict.keys(): + count_dict[item] = 0 + count_dict[item] += 1 + if count_dict == {}: + return -1 + count_dict = sorted(count_dict.items(), key=lambda d:d[1], reverse=True) + return count_dict[0][0] + + +def sk_pca(X, k): + from sklearn.decomposition import PCA + pca = PCA(k) + pca.fit(X) + vec = pca.components_ + #print(vec.shape) + return vec + +def fld(x1, x2): + x1, x2 = np.mat(x1), np.mat(x2) + n1 = x1.shape[0] + n2 = x2.shape[0] + k = x1.shape[1] + + m1 = np.mean(x1, axis=0) + m2 = np.mean(x2, axis=0) + m = np.mean(np.concatenate((x1, x2), axis=0), axis=0) + print(x1.shape, m1.shape) + + + c1 = np.cov(x1.T) + s1 = c1*(n1-1) + c2 = np.cov(x2.T) + s2 = c2*(n2-1) + Sw = s1/n1 + s2/n2 + print(Sw.shape) + W = np.dot(np.linalg.inv(Sw), (m1-m2).T) + print(W.shape) + W = W / np.linalg.norm(W, 2) + return np.mean(np.dot(x1, W)), np.mean(np.dot(x2, W)), W + +def pca(X, k): + n, m = X.shape + mean = np.mean(X, 0) + #print(mean.shape) + temp = X - mean + conv = np.cov(X.T) + #print(conv.shape) + conv1 = np.cov(temp.T) + #print(conv-conv1) + + w, v = np.linalg.eig(conv) + #print(w.shape) + #print(v.shape) + index = np.argsort(-w) + vec = np.matrix(v.T[index[:k]]) + #print(vec.shape) + + recon = (temp * vec.T)*vec+mean + + #print(X-recon) + return vec +def to_variable(x): + if torch.cuda.is_available(): + x = x.to(0) + return torch.autograd.Variable(x) + +def to_data(x): + if torch.cuda.is_available(): + x = x.cpu() + return x.data + +def copy_parameters(model, pretrained_dict): + model_dict = model.state_dict() + + pretrained_dict = {k[7:]: v for k, v in pretrained_dict.items() if k[7:] in model_dict and pretrained_dict[k].size()==model_dict[k[7:]].size()} + for k, v in pretrained_dict.items(): + print(k) + model_dict.update(pretrained_dict) + model.load_state_dict(model_dict) + return model + + +def compute_intersec(i, j, h, w, bbox): + ''' + intersection box between croped box and GT BBox + ''' + intersec = copy.deepcopy(bbox) + + intersec[0] = max(j, bbox[0]) + intersec[1] = max(i, bbox[1]) + intersec[2] = min(j + w, bbox[2]) + intersec[3] = min(i + h, bbox[3]) + return intersec + + +def normalize_intersec(i, j, h, w, intersec): + ''' + return: normalize into [0, 1] + ''' + + intersec[0] = (intersec[0] - j) / w + intersec[2] = (intersec[2] - j) / w + intersec[1] = (intersec[1] - i) / h + intersec[3] = (intersec[3] - i) / h + return intersec + + + diff --git a/PSOL/utils/nms.py b/PSOL/utils/nms.py new file mode 100644 index 0000000..a20a320 --- /dev/null +++ b/PSOL/utils/nms.py @@ -0,0 +1,36 @@ +import numpy as np + +def nms(boxes, scores, thresh): + if len(boxes)==0: + return [] + boxes = np.array(boxes) + #scores = np.array(scores) + x1 = boxes[:, 0] + y1 = boxes[:, 1] + x2 = boxes[:, 2] + y2 = boxes[:, 3] + + areas = (x2-x1+1)*(y2-y1+1) + + scores = np.array(scores) + + order = np.argsort(scores)[-100:] + keep_boxes = [] + while order.size > 0: + i = order[-1] + keep_boxes.append(boxes[i]) + + xx1 = np.maximum(x1[i], x1[order[:-1]]) + yy1 = np.maximum(y1[i], y1[order[:-1]]) + xx2 = np.minimum(x2[i], x2[order[:-1]]) + yy2 = np.minimum(y2[i], y2[order[:-1]]) + + w = np.maximum(0.0, xx2-xx1+1) + h = np.maximum(0.0, yy2-yy1+1) + inter = w*h + + ovr = inter / (areas[i] + areas[order[:-1]] - inter) + inds = np.where(ovr <= thresh) + order = order[inds] + + return keep_boxes diff --git a/PSOL/utils/vis.py b/PSOL/utils/vis.py new file mode 100644 index 0000000..fb39ae9 --- /dev/null +++ b/PSOL/utils/vis.py @@ -0,0 +1,22 @@ +from __future__ import print_function + +import cv2 +import numpy as np +import os + +_GREEN = (18, 217, 15) +_RED = (15, 18, 217) + +def vis_bbox(img, bbox, color=_GREEN, thick=1): + '''Visualize a bounding box''' + img = img.astype(np.uint8) + (x0, y0, x1, y1) = bbox + cv2.rectangle(img, (int(x0), int(y0)), (int(x1), int(y1)), color, thickness=thick) + return img + +def vis_one_image(img, boxes, color=_GREEN): + for bbox in boxes: + img = vis_bbox(img, (bbox[0], bbox[1], bbox[2], bbox[3]), color) + return img + + diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/classification_vgg.py b/classification_vgg.py new file mode 100644 index 0000000..8375dcf --- /dev/null +++ b/classification_vgg.py @@ -0,0 +1,492 @@ +import argparse +import os +import random +import shutil +import time +import warnings +from enum import Enum + +import torch +import torch.nn as nn +import torch.nn.parallel +import torch.backends.cudnn as cudnn +import torch.distributed as dist +import torch.optim +from torch.optim.lr_scheduler import StepLR +import torch.multiprocessing as mp +import torch.utils.data +import torch.utils.data.distributed +import torchvision.transforms as transforms +import torchvision.datasets as datasets +import torchvision.models as models +from torch.utils.data import Subset +os.environ["CUDA_VISIBLE_DEVICES"] = "0" +model_names = sorted(name for name in models.__dict__ + if name.islower() and not name.startswith("__") + and callable(models.__dict__[name])) + +parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') +parser.add_argument('data', metavar='DIR', nargs='?', default='imagenet', + help='path to dataset (default: imagenet)') +parser.add_argument('-a', '--arch', metavar='ARCH', default='vgg16', + choices=model_names, + help='model architecture: ' + + ' | '.join(model_names) + + ' (default: resnet18)') +parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', + help='number of data loading workers (default: 4)') +parser.add_argument('--epochs', default=1, type=int, metavar='N', + help='number of total epochs to run') +parser.add_argument('--start-epoch', default=0, type=int, metavar='N', + help='manual epoch number (useful on restarts)') +parser.add_argument('-b', '--batch-size', default=16, type=int, + metavar='N', + help='mini-batch size (default: 256), this is the total ' + 'batch size of all GPUs on the current node when ' + 'using Data Parallel or Distributed Data Parallel') +parser.add_argument('--lr', '--learning-rate', default=0.01, type=float, + metavar='LR', help='initial learning rate', dest='lr') +parser.add_argument('--momentum', default=0.9, type=float, metavar='M', + help='momentum') +parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, + metavar='W', help='weight decay (default: 1e-4)', + dest='weight_decay') +parser.add_argument('-p', '--print-freq', default=10, type=int, + metavar='N', help='print frequency (default: 10)') +parser.add_argument('--resume', type=str, metavar='PATH', + help='path to latest checkpoint (default: none)') +parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', + help='evaluate model on validation set') +parser.add_argument('--pretrained', dest='pretrained',default=True, action='store_true', + help='use pre-trained model') +parser.add_argument('--world-size', default=-1, type=int, + help='number of nodes for distributed training') +parser.add_argument('--rank', default=-1, type=int, + help='node rank for distributed training') +parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str, + help='url used to set up distributed training') +parser.add_argument('--dist-backend', default='nccl', type=str, + help='distributed backend') +parser.add_argument('--seed', default=None, type=int, + help='seed for initializing training. ') +parser.add_argument('--gpu', type=int,default=0, + help='GPU id to use.') +parser.add_argument('--multiprocessing-distributed', action='store_true', + help='Use multi-processing distributed training to launch ' + 'N processes per node, which has N GPUs. This is the ' + 'fastest way to use PyTorch for either single node or ' + 'multi node data parallel training') +parser.add_argument('--dummy', action='store_true', help="use fake data to benchmark") + +best_acc1 = 0 + + +def main(): + args = parser.parse_args() + + if args.seed is not None: + random.seed(args.seed) + torch.manual_seed(args.seed) + cudnn.deterministic = True + warnings.warn('You have chosen to seed training. ' + 'This will turn on the CUDNN deterministic setting, ' + 'which can slow down your training considerably! ' + 'You may see unexpected behavior when restarting ' + 'from checkpoints.') + + if args.gpu is not None: + warnings.warn('You have chosen a specific GPU. This will completely ' + 'disable data parallelism.') + + if args.dist_url == "env://" and args.world_size == -1: + args.world_size = int(os.environ["WORLD_SIZE"]) + + args.distributed = args.world_size > 1 or args.multiprocessing_distributed + + ngpus_per_node = torch.cuda.device_count() + if args.multiprocessing_distributed: + # Since we have ngpus_per_node processes per node, the total world_size + # needs to be adjusted accordingly + args.world_size = ngpus_per_node * args.world_size + # Use torch.multiprocessing.spawn to launch distributed processes: the + # main_worker process function + mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) + else: + # Simply call main_worker function + main_worker(args.gpu, ngpus_per_node, args) + + +def main_worker(gpu, ngpus_per_node, args): + global best_acc1 + args.gpu = gpu + + if args.gpu is not None: + print("Use GPU: {} for training".format(args.gpu)) + + if args.distributed: + if args.dist_url == "env://" and args.rank == -1: + args.rank = int(os.environ["RANK"]) + if args.multiprocessing_distributed: + # For multiprocessing distributed training, rank needs to be the + # global rank among all the processes + args.rank = args.rank * ngpus_per_node + gpu + dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, + world_size=args.world_size, rank=args.rank) + # create model + if args.pretrained: + print("=> using pre-trained model '{}'".format(args.arch)) + model = models.__dict__[args.arch](pretrained=True) + else: + print("=> creating model '{}'".format(args.arch)) + model = models.__dict__[args.arch]() + model.classifier=nn.Sequential( + nn.Linear(512 * 7 * 7, 4096), + nn.ReLU(True), + nn.Dropout(), + nn.Linear(4096, 4096), + nn.ReLU(True), + nn.Dropout(), + nn.Linear(4096, 2), + ) + if not torch.cuda.is_available(): + print('using CPU, this will be slow') + elif args.distributed: + # For multiprocessing distributed, DistributedDataParallel constructor + # should always set the single device scope, otherwise, + # DistributedDataParallel will use all available devices. + if args.gpu is not None: + torch.cuda.set_device(args.gpu) + model.cuda(args.gpu) + # When using a single GPU per process and per + # DistributedDataParallel, we need to divide the batch size + # ourselves based on the total number of GPUs of the current node. + args.batch_size = int(args.batch_size / ngpus_per_node) + args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node) + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) + else: + model.cuda() + # DistributedDataParallel will divide and allocate batch_size to all + # available GPUs if device_ids are not set + model = torch.nn.parallel.DistributedDataParallel(model) + elif args.gpu is not None: + torch.cuda.set_device(args.gpu) + model = model.cuda(args.gpu) + else: + # DataParallel will divide and allocate batch_size to all available GPUs + if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): + model.features = torch.nn.DataParallel(model.features) + model.cuda() + else: + model = torch.nn.DataParallel(model).cuda() + + # define loss function (criterion), optimizer, and learning rate scheduler + criterion = nn.CrossEntropyLoss().cuda(args.gpu) + + optimizer = torch.optim.AdamW(model.parameters(), 3e-4) + + """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" + scheduler = StepLR(optimizer, step_size=30, gamma=0.1) + + # optionally resume from a checkpoint + if args.resume: + if os.path.isfile(args.resume): + print("=> loading checkpoint '{}'".format(args.resume)) + if args.gpu is None: + checkpoint = torch.load(args.resume) + else: + # Map model to be loaded to specified single gpu. + loc = 'cuda:{}'.format(args.gpu) + checkpoint = torch.load(args.resume, map_location=loc) + args.start_epoch = checkpoint['epoch'] + best_acc1 = checkpoint['best_acc1'] + if args.gpu is not None: + # best_acc1 may be from a checkpoint from a different GPU + best_acc1 = best_acc1.to(args.gpu) + model.load_state_dict(checkpoint['state_dict']) + optimizer.load_state_dict(checkpoint['optimizer']) + scheduler.load_state_dict(checkpoint['scheduler']) + print("=> loaded checkpoint '{}' (epoch {})" + .format(args.resume, checkpoint['epoch'])) + else: + print("=> no checkpoint found at '{}'".format(args.resume)) + + cudnn.benchmark = True + + # Data loading code + if args.dummy: + print("=> Dummy data is used!") + train_dataset = datasets.FakeData(1281167, (3, 224, 224), 1000, transforms.ToTensor()) + val_dataset = datasets.FakeData(50000, (3, 224, 224), 1000, transforms.ToTensor()) + else: + traindir = "./data/classification" + valdir = "./data/classification" + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) + + train_dataset = datasets.ImageFolder( + traindir, + transforms.Compose([ + transforms.Resize((448,448)), + transforms.ToTensor(), + transforms.ColorJitter(), + transforms.GaussianBlur(3), + normalize, + ])) + + val_dataset = datasets.ImageFolder( + valdir, + transforms.Compose([ + transforms.Resize((448,448)), + transforms.ToTensor(), + normalize, + ])) + + if args.distributed: + train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) + val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset, shuffle=False, drop_last=True) + else: + train_sampler = None + val_sampler = None + + train_loader = torch.utils.data.DataLoader( + train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), + num_workers=args.workers, pin_memory=True, sampler=train_sampler) + + val_loader = torch.utils.data.DataLoader( + val_dataset, batch_size=args.batch_size, shuffle=False, + num_workers=args.workers, pin_memory=True, sampler=val_sampler) + + if args.evaluate: + validate(val_loader, model, criterion, args) + return + + for epoch in range(args.start_epoch, args.epochs): + if args.distributed: + train_sampler.set_epoch(epoch) + + # train for one epoch + train(train_loader, model, criterion, optimizer, epoch, args) + + # evaluate on validation set + # acc1 = validate(val_loader, model, criterion, args) + acc1 =1 + scheduler.step() + + + # remember best acc@1 and save checkpoint + is_best = acc1 > best_acc1 + best_acc1 = max(acc1, best_acc1) + + if not args.multiprocessing_distributed or (args.multiprocessing_distributed + and args.rank % ngpus_per_node == 0): + save_checkpoint({ + 'epoch': epoch + 1, + 'arch': args.arch, + 'state_dict': model.state_dict(), + 'best_acc1': best_acc1, + 'optimizer' : optimizer.state_dict(), + 'scheduler' : scheduler.state_dict() + }, is_best) + + +def train(train_loader, model, criterion, optimizer, epoch, args): + batch_time = AverageMeter('Time', ':6.3f') + data_time = AverageMeter('Data', ':6.3f') + losses = AverageMeter('Loss', ':.4e') + top1 = AverageMeter('Acc@1', ':6.2f') + top5 = AverageMeter('Acc@5', ':6.2f') + progress = ProgressMeter( + len(train_loader), + [batch_time, data_time, losses, top1, top5], + prefix="Epoch: [{}]".format(epoch)) + + # switch to train mode + model.train() + + end = time.time() + for i, (images, target) in enumerate(train_loader): + # measure data loading time + data_time.update(time.time() - end) + + if args.gpu is not None: + images = images.cuda(args.gpu, non_blocking=True) + if torch.cuda.is_available(): + target = target.cuda(args.gpu, non_blocking=True) + + # compute output + output = model(images) + loss = criterion(output, target) + # measure accuracy and record loss + acc1, acc5 = accuracy(output, target, topk=(1, 1)) + losses.update(loss.item(), images.size(0)) + top1.update(acc1[0], images.size(0)) + top5.update(acc5[0], images.size(0)) + + # compute gradient and do SGD step + optimizer.zero_grad() + loss.backward() + optimizer.step() + + # measure elapsed time + batch_time.update(time.time() - end) + end = time.time() + + if i % args.print_freq == 0: + progress.display(i + 1) + + +def validate(val_loader, model, criterion, args): + + def run_validate(loader, base_progress=0): + with torch.no_grad(): + end = time.time() + for i, (images, target) in enumerate(loader): + i = base_progress + i + if args.gpu is not None: + images = images.cuda(args.gpu, non_blocking=True) + if torch.cuda.is_available(): + target = target.cuda(args.gpu, non_blocking=True) + + # compute output + output = model(images) + loss = criterion(output, target) + + # measure accuracy and record loss + acc1, acc5 = accuracy(output, target, topk=(1, 1)) + losses.update(loss.item(), images.size(0)) + top1.update(acc1[0], images.size(0)) + top5.update(acc5[0], images.size(0)) + + # measure elapsed time + batch_time.update(time.time() - end) + end = time.time() + + if i % args.print_freq == 0: + progress.display(i + 1) + + batch_time = AverageMeter('Time', ':6.3f', Summary.NONE) + losses = AverageMeter('Loss', ':.4e', Summary.NONE) + top1 = AverageMeter('Acc@1', ':6.2f', Summary.AVERAGE) + top5 = AverageMeter('Acc@5', ':6.2f', Summary.AVERAGE) + progress = ProgressMeter( + len(val_loader) + (args.distributed and (len(val_loader.sampler) * args.world_size < len(val_loader.dataset))), + [batch_time, losses, top1, top5], + prefix='Test: ') + + # switch to evaluate mode + model.eval() + + run_validate(val_loader) + if args.distributed: + top1.all_reduce() + top5.all_reduce() + + if args.distributed and (len(val_loader.sampler) * args.world_size < len(val_loader.dataset)): + aux_val_dataset = Subset(val_loader.dataset, + range(len(val_loader.sampler) * args.world_size, len(val_loader.dataset))) + aux_val_loader = torch.utils.data.DataLoader( + aux_val_dataset, batch_size=args.batch_size, shuffle=False, + num_workers=args.workers, pin_memory=True) + run_validate(aux_val_loader, len(val_loader)) + + progress.display_summary() + + return top1.avg + + +def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'): + torch.save(state, filename) + +class Summary(Enum): + NONE = 0 + AVERAGE = 1 + SUM = 2 + COUNT = 3 + +class AverageMeter(object): + """Computes and stores the average and current value""" + def __init__(self, name, fmt=':f', summary_type=Summary.AVERAGE): + self.name = name + self.fmt = fmt + self.summary_type = summary_type + self.reset() + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count + + def all_reduce(self): + device = "cuda" if torch.cuda.is_available() else "cpu" + total = torch.tensor([self.sum, self.count], dtype=torch.float32, device=device) + dist.all_reduce(total, dist.ReduceOp.SUM, async_op=False) + self.sum, self.count = total.tolist() + self.avg = self.sum / self.count + + def __str__(self): + fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' + return fmtstr.format(**self.__dict__) + + def summary(self): + fmtstr = '' + if self.summary_type is Summary.NONE: + fmtstr = '' + elif self.summary_type is Summary.AVERAGE: + fmtstr = '{name} {avg:.3f}' + elif self.summary_type is Summary.SUM: + fmtstr = '{name} {sum:.3f}' + elif self.summary_type is Summary.COUNT: + fmtstr = '{name} {count:.3f}' + else: + raise ValueError('invalid summary type %r' % self.summary_type) + + return fmtstr.format(**self.__dict__) + + +class ProgressMeter(object): + def __init__(self, num_batches, meters, prefix=""): + self.batch_fmtstr = self._get_batch_fmtstr(num_batches) + self.meters = meters + self.prefix = prefix + + def display(self, batch): + entries = [self.prefix + self.batch_fmtstr.format(batch)] + entries += [str(meter) for meter in self.meters] + print('\t'.join(entries)) + + def display_summary(self): + entries = [" *"] + entries += [meter.summary() for meter in self.meters] + print(' '.join(entries)) + + def _get_batch_fmtstr(self, num_batches): + num_digits = len(str(num_batches // 1)) + fmt = '{:' + str(num_digits) + 'd}' + return '[' + fmt + '/' + fmt.format(num_batches) + ']' + +def accuracy(output, target, topk=(1,)): + """Computes the accuracy over the k top predictions for the specified values of k""" + with torch.no_grad(): + maxk = max(topk) + batch_size = target.size(0) + + _, pred = output.topk(maxk, 1, True, True) + pred = pred.t() + correct = pred.eq(target.view(1, -1).expand_as(pred)) + + res = [] + for k in topk: + correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) + res.append(correct_k.mul_(100.0 / batch_size)) + return res + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/create_PSOL_dataset.py b/create_PSOL_dataset.py new file mode 100644 index 0000000..725bd5d --- /dev/null +++ b/create_PSOL_dataset.py @@ -0,0 +1,30 @@ +import os +import shutil +from tqdm import tqdm +import _thread + +# Memory can't process all the data at once, so we divide the data into three part. +file_lists = os.listdir("data/train_crop_low/") +if os.path.exists("data/PSOL_imgs")==False: + os.mkdir("data/PSOL_imgs") +def func1(): + for i in tqdm(file_lists[:40000]): + if os.path.exists("data/PSOL_imgs/train1")==False: + os.mkdir("data/PSOL_imgs/train1") + shutil.copyfile("data/train_crop_low/"+i,"data/PSOL_imgs/train1/"+i) + print("Done1") +def func2(): + for i in tqdm(file_lists[40000:]): + if os.path.exists("data/PSOL_imgs/train2")==False: + os.mkdir("data/PSOL_imgs/train2") + shutil.copyfile("data/train_crop_low/"+i,"data/PSOL_imgs/train2/"+i) + print("Done2") +def func3(): + for i in os.listdir("data/test_crop"): + if os.path.exists("data/PSOL_imgs/test")==False: + os.mkdir("data/PSOL_imgs/test") + shutil.copyfile("data/test_crop/"+i,"data/PSOL_imgs/test/"+i) + print("Done3") +func1() +func2() +func3() \ No newline at end of file diff --git a/create_classification_dataset.py b/create_classification_dataset.py new file mode 100644 index 0000000..00ffc8b --- /dev/null +++ b/create_classification_dataset.py @@ -0,0 +1,77 @@ +import cv2 +import os +from isort import file +import numpy as np +import random +from tqdm import tqdm +def crop(img, has_id = True): + """ + has_id: If the cropped image has tiktok id. + """ + if has_id == True: + # According to our prior knowledge, the tictok id is in either top or bottom part in a image. + if img.shape[1] > img.shape[0]: # if the image.rows > image.cols + crop10 = img[:int(img.shape[0]/5),:int(img.shape[1]/5*2)] # The tictok id is not in the middle part. + crop11 = img[:int(img.shape[0]/5),int(img.shape[1]/5*3):] + crop1 = np.concatenate((crop10,crop11),axis=1) + crop20 = img[int(img.shape[0]/5*4):,:int(img.shape[1]/5*2)] + crop21 = img[int(img.shape[0]/5*4):,int(img.shape[1]/5*3):] + crop2 = np.concatenate((crop20,crop21),axis=1) + else: + crop1 = img[:int(img.shape[0]/10),:] + crop2 = img[int(img.shape[0]/10*9):int(img.shape[0]/10*10),:] + return np.concatenate((crop1,crop2)) + else: + if img.shape[1] > img.shape[0]: # if the image.rows > image.cols + x1 = random.randint(1,3) + x2 = random.randint(1,3) + crop10 = img[int(img.shape[0]/5*(x1)):int(img.shape[0]/5*(x1+1)),:int(img.shape[1]/5*2)] # The tictok id is not in the middle part. + crop11 = img[int(img.shape[0]/5*(x1)):int(img.shape[0]/5*(x1+1)),int(img.shape[1]/5*3):] + crop1 = np.concatenate((crop10,crop11),axis=1) + crop20 = img[int(img.shape[0]/5*(x2)):int(img.shape[0]/5*(x2+1)),:int(img.shape[1]/5*2)] + crop21 = img[int(img.shape[0]/5*(x2)):int(img.shape[0]/5*(x2+1)),int(img.shape[1]/5*3):] + crop2 = np.concatenate((crop20,crop21),axis=1) + else: + x1 = random.randint(2,7) + x2 = random.randint(2,7) + + crop1 = img[int(img.shape[0]/10*x1):int(img.shape[0]/10*(x1+1)),:] + crop2 = img[int(img.shape[0]/10*x2):int(img.shape[0]/10*(x2+1)),:] + if random.randint(1,8)<=2: + crop1 = np.zeros_like(crop1) + if random.randint(1,8)<=2: + crop2 = np.zeros_like(crop2) + img = np.concatenate((crop1,crop2)) + return np.concatenate((crop1,crop2)) + +def create_classification_dataset(files): + # Create Imagenet like dataset to train if there is a tiktok id in a image. + for file in tqdm(files): + img = cv2.imread(file) + if "train" in file_path: + img = cv2.resize(img,(int(img.shape[1]/1.7),int(img.shape[0]/1.7))) + file = file.split("/")[-1] + for has_id in [False,True]: + if has_id == 1: + img_has_id = crop(img,has_id) + if os.path.exists("./data/classification/has_id")==False: + os.mkdir("./data/classification/has_id") + cv2.imwrite("./data/classification/has_id/"+file,img_has_id) + else: + img_no_id = crop(img,has_id) + if os.path.exists("./data/classification/no_id")==False: + os.mkdir("./data/classification/no_id") + cv2.imwrite("./data/classification/no_id/"+file,img_no_id) + +import random +file_path = ["./data/test_set_random/","./data/train_set_random/"] +# Random choose 20000 images to create the dataset. +files_train = random.sample(os.listdir(file_path[1]),10000) +files_train = [file_path[1]+i for i in files_train] +files_test = random.sample(os.listdir(file_path[0]),10000) +files_test = [file_path[0]+i for i in files_test] +files = files_train+files_test + +if os.path.exists("./data/classification/")==False: + os.mkdir("./data/classification/") +create_classification_dataset(files) \ No newline at end of file diff --git a/crop.py b/crop.py new file mode 100644 index 0000000..94246a7 --- /dev/null +++ b/crop.py @@ -0,0 +1,41 @@ +import cv2 +import os +import numpy as np +from tqdm import tqdm +file_path = ["./data/test_set_random/","./data/train_set_random/"] +def crop(img): + # According to our prior knowledge, the tictok id is in either top or bottom part in a image. + if img.shape[1] > img.shape[0]: # if the image.rows > image.cols + crop10 = img[:int(img.shape[0]/5),:int(img.shape[1]/5*2)] # The tictok id is not in the middle part. + crop11 = img[:int(img.shape[0]/5),int(img.shape[1]/5*3):] + crop1 = np.concatenate((crop10,crop11),axis=1) + crop20 = img[int(img.shape[0]/5*4):,:int(img.shape[1]/5*2)] + crop21 = img[int(img.shape[0]/5*4):,int(img.shape[1]/5*3):] + crop2 = np.concatenate((crop20,crop21),axis=1) + else: + crop1 = img[:int(img.shape[0]/10),:] + crop2 = img[int(img.shape[0]/10*9):int(img.shape[0]/10*10),:] + return np.concatenate((crop1,crop2)) +def crop_list(file_path): + files = os.listdir(file_path) + for file in tqdm(files): + img = cv2.imread(file_path+file) + if "train" in file_path: + img_low = cv2.resize(img,(int(img.shape[1]/1.7),int(img.shape[0]/1.7))) + img_high = img + img_low = crop(img_low) + img_high = crop(img_high) + # os.mkdir("/usr/yyx/data/test_crop_new/") + if os.path.exists("./data/train_crop_low/")==False: + os.mkdir("./data/train_crop_low/") + if os.path.exists("./data/train_crop_high/")==False: + os.mkdir("./data/train_crop_high/") + cv2.imwrite("./data/train_crop_low/"+file,img_low) + cv2.imwrite("./data/train_crop_high/"+file,img_high) + else: + img = crop(img) + if os.path.exists("./data/test_crop/")==False: + os.mkdir("./data/test_crop/") + cv2.imwrite("./data/test_crop/"+file,img) +for i in file_path: + crop_list(i) \ No newline at end of file diff --git a/preprocess.sh b/preprocess.sh new file mode 100644 index 0000000..c5ca311 --- /dev/null +++ b/preprocess.sh @@ -0,0 +1,5 @@ +python3 crop.py +python3 create_classification_dataset.py +python3 classification_vgg.py +python3 create_PSOL_dataset.py +python3 PSOL/generate_box_imagenet_crop.py \ No newline at end of file