wbw520
diff --git a/‎data/cityscapes.py
+1-2 b/‎data/cityscapes.py
+1-2
diff --git a/‎data/facade.py
+46 b/‎data/facade.py
+46
diff --git a/‎data/transforms.py
-1 b/‎data/transforms.py
-1
diff --git a/‎mae_demo.py
+107 b/‎mae_demo.py
+107
@@ -2,7 +2,6 @@
 import numpy as np
 import torch
 from PIL import Image
-from torch.utils import data
 
 
 num_classes = 19
@@ -34,7 +33,7 @@ def make_dataset_cityscapes(args, quality, mode):
     return items
 
 
-class CityScapes(data.Dataset):
+class CityScapes(torch.utils.data.Dataset):
     def __init__(self, args, quality, mode, joint_transform=None, standard_transform=None):
         self.imgs = make_dataset_cityscapes(args, quality, mode)
         if len(self.imgs) == 0:
 
@@ -0,0 +1,46 @@
+import torch
+from PIL import Image
+import numpy as np
+from utils.base_tools import get_name
+
+
+ignore_label = 255
+
+
+def prepare_facade_data(args):
+    items = get_name(args.root + "/translated_data/images")
+
+
+class Facade(torch.utils.data.Dataset):
+    def __init__(self, args, mode, joint_transform=None, standard_transform=None):
+        self.args = args
+        self.imgs = ""
+        if len(self.imgs) == 0:
+            raise RuntimeError('Found 0 images, please check the data set')
+
+        self.joint_transform = joint_transform
+        self.standard_transform = standard_transform
+        if self.args.use_ignore:
+            self.id_to_trainid = {6: 255, 7: 255, 8: 255, 9: 255}
+
+    def __getitem__(self, index):
+        img_path, mask_path = self.imgs[index]
+        img, mask = Image.open(img_path).convert('RGB'), Image.open(mask_path)
+
+        mask = np.array(mask)
+        mask_copy = mask.copy()
+        if self.args.use_ignore:
+            for k, v in self.id_to_trainid.items():
+                mask_copy[mask == k] = v
+        mask = Image.fromarray(mask_copy.astype(np.uint8))
+
+        if self.joint_transform is not None:
+            img, mask = self.joint_transform(img, mask)
+
+        if self.standard_transform is not None:
+            img = self.standard_transform(img)
+
+        return {"images": img, "masks": torch.from_numpy(np.array(mask, dtype=np.int32)).long()}
+
+    def __len__(self):
+        return len(self.imgs)
@@ -1,7 +1,6 @@
 import math
 import numbers
 import random
-
 from PIL import Image, ImageOps
 import numpy as np
 
 
@@ -0,0 +1,107 @@
+import sys
+import os
+import requests
+import torch
+import numpy as np
+import matplotlib.pyplot as plt
+from PIL import Image
+from model import mae_model as models_mae
+
+# define the utils
+
+imagenet_mean = np.array([0.485, 0.456, 0.406])
+imagenet_std = np.array([0.229, 0.224, 0.225])
+
+
+def show_image(image, title=''):
+    # image is [H, W, 3]
+    assert image.shape[2] == 3
+    plt.imshow(torch.clamp((image * imagenet_std + imagenet_mean) * 255, 0, 255).int())
+    plt.title(title, fontsize=16)
+    plt.axis('off')
+
+
+def prepare_model(chkpt_dir, arch='mae_vit_large_patch8'):
+    # build mode
+    model = models_mae.__dict__[arch](img_size=640)
+    # load model
+    checkpoint = torch.load(chkpt_dir, map_location='cpu')
+    msg = model.load_state_dict(checkpoint['model'], strict=False)
+    print(msg)
+    return model
+
+
+def crop_center(pil_img, crop_width, crop_height):
+    img_width, img_height = pil_img.size
+    return pil_img.crop(((img_width - crop_width) // 2,
+                         (img_height - crop_height) // 2,
+                         (img_width + crop_width) // 2,
+                         (img_height + crop_height) // 2))
+
+
+def run_one_image(img, model):
+    x = torch.tensor(img)
+
+    # make it a batch-like
+    x = x.unsqueeze(dim=0)
+    x = torch.einsum('nhwc->nchw', x)
+
+    # run MAE
+    loss, y, mask = model(x.float(), mask_ratio=0.75)
+    y = model.unpatchify(y)
+    y = torch.einsum('nchw->nhwc', y).detach().cpu()
+
+    # visualize the mask
+    mask = mask.detach()
+    mask = mask.unsqueeze(-1).repeat(1, 1, model.patch_embed.patch_size[0] ** 2 * 3)  # (N, H*W, p*p*3)
+    mask = model.unpatchify(mask)  # 1 is removing, 0 is keeping
+    mask = torch.einsum('nchw->nhwc', mask).detach().cpu()
+
+    x = torch.einsum('nchw->nhwc', x)
+
+    # masked image
+    im_masked = x * (1 - mask)
+
+    # MAE reconstruction pasted with visible patches
+    im_paste = x * (1 - mask) + y * mask
+
+    # make the plt figure larger
+    plt.rcParams['figure.figsize'] = [24, 24]
+
+    plt.subplot(1, 4, 1)
+    show_image(x[0], "original")
+
+    plt.subplot(1, 4, 2)
+    show_image(im_masked[0], "masked")
+
+    plt.subplot(1, 4, 3)
+    show_image(y[0], "reconstruction")
+
+    plt.subplot(1, 4, 4)
+    show_image(im_paste[0], "reconstruction + visible")
+
+    plt.show()
+
+
+# load an image
+img = Image.open("/home/wangbowen/DATA/cityscapes/leftImg8bit_trainvaltest/leftImg8bit/test/berlin/berlin_000362_000019_leftImg8bit.png")
+img = crop_center(img, 768, 768)
+img = img.resize((640, 640))
+img = np.array(img) / 255.
+
+# normalize by ImageNet mean and std
+img = img - imagenet_mean
+img = img / imagenet_std
+
+# plt.rcParams['figure.figsize'] = [5, 5]
+# show_image(torch.tensor(img))
+# plt.show()
+
+# This is an MAE model trained with pixels as targets for visualization (ViT-Large, training mask ratio=0.75)
+
+model_mae_gan = prepare_model('save_model/8_640_mae_pre_checkpoint-179.pth', 'mae_vit_large_patch8')
+print('Model loaded.')
+
+# torch.manual_seed(2)
+print('MAE with extra GAN loss:')
+run_one_image(img, model_mae_gan)