diff --git a/.github/trailer_0.gif b/.github/trailer_0.gif
new file mode 100644
index 0000000..6c8f26b
Binary files /dev/null and b/.github/trailer_0.gif differ
diff --git a/README.md b/README.md
index 0ca7c4c..bceab81 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,6 @@
 # PercepTreeV1
+
+
 Official code repository for the papers:
 
 <div align="left">
@@ -33,8 +35,13 @@ All our datasets are made available to increase the adoption of deep learning fo
   </tr>
   <tr>
     <td>SynthTree43k</td>
-    <td>A dataset containing 43 000 synthetic images and over 190 000 annotated trees. Includes images, train, test, and validation splits. </td>
-    <td><a href="https://ulavaldti-my.sharepoint.com/:f:/g/personal/vigro7_ulaval_ca/EvdPF5CryRVAhlBYoqk05ysB2J82dYQU0j6PQ2WH-b7WDg?e=845PrC">OneDrive</a></td>
+    <td>A dataset containing 43 000 synthetic images and over 190 000 annotated trees. Includes images, train, test, and validation splits. (84.6 GB) </td>
+    <td><a href="http://norlab.s3.valeria.science/SynthTree43k.zip?AWSAccessKeyId=VCI7FLOHYPGLOOOAH0S5&Expires=2274019241&Signature=KfOgwrHX8WHejopspqQ8XMwlMJE%3D">S3 storage</a></td>
+  <tr>
+  <tr>
+    <td>SynthTree43k</td>
+    <td>Depth images.</td>
+    <td>soon</td>
   <tr>
   <tr>
     <td>CanaTree100</td>
@@ -43,6 +50,12 @@ All our datasets are made available to increase the adoption of deep learning fo
   <tr>
 </table>
 
+The annotations files are already included in the download link, but some users requested the annotations for entire trees:
+<a href="https://drive.google.com/file/d/1AZUtdrNJGPWgqEwUrRin6OKwE_KGavZq/view?usp=sharing">train_RGB_entire_tree.json</a>,
+<a href="https://drive.google.com/file/d/1doTRoLvQ1pGaNb75mx-SOr5aEVBLNnZe/view?usp=sharing">val_RGB_entire_tree.json</a>,
+<a href="https://drive.google.com/file/d/1ZMYqFylSrx2KDHR-2TSoXFq-_uoyb6Qp/view?usp=share_link">test_RGB_entire_tree.json</a>.
+Beware that it can result in worse detection performance (in my experience), but maybe there is something to do with models not based on RPN (square ROIs), such as <a href="https://github.com/facebookresearch/Mask2Former">Mask2Former</a>.
+
 ## Pre-trained models
 Pre-trained models weights are compatible with Detectron2 config files.
 All models are trained on our synthetic dataset SynthTree43k.
@@ -97,6 +110,20 @@ We provide a demo file to try it out.
   </tr>
 </table>
 
+### Mask R-CNN finetuned on real images (`CanaTree100`)
+<table>
+  <tr>
+    <th>Backbone</th>
+    <th>Description</th>
+    <th colspan="6">Download</th>
+  </tr>
+  <tr>
+    <td>X-101-FPN</td>
+    <td>Trained on fold 01, good for inference.</td>
+    <td><a href="https://drive.google.com/file/d/108tORWyD2BFFfO5kYim9jP0wIVNcw0OJ/view?usp=sharing">model</a></td>
+  </tr>
+</table>
+
 ## Demos
 Once you have a working Detectron2 and OpenCV installation, running the demo is easy.
 
@@ -110,7 +137,25 @@ Once you have a working Detectron2 and OpenCV installation, running the demo is
 -Open `demo_video.py` and uncomment the model config corresponding to pre-trained model weights you downloaded previously, comment the others. Default is X-101.
 - In `demo_video.py`, specify path to the video you want to try it on by setting the `video_path` variable.
 
-The gif below shows how well the models trained on SynthTree43k transfer to real-world, without any fine-tuning on real-world images. -->
 <div align="left">
-  <img width="60%" alt="DINO illustration" src=".github/pred_synth_to_real.gif">
+  <img width="70%" alt="DINO illustration" src=".github/trailer_0.gif">
 </div>
+
+# Bibtex
+If you find our work helpful for your research, please consider citing the following BibTeX entry.   
+```bibtex
+@article{grondin2022tree,
+    author = {Grondin, Vincent and Fortin, Jean-Michel and Pomerleau, François and Giguère, Philippe},
+    title = {Tree detection and diameter estimation based on deep learning},
+    journal = {Forestry: An International Journal of Forest Research},
+    year = {2022},
+    month = {10},
+}
+
+@inproceedings{grondin2022training,
+  title={Training Deep Learning Algorithms on Synthetic Forest Images for Tree Detection},
+  author={Grondin, Vincent and Pomerleau, Fran{\c{c}}ois and Gigu{\`e}re, Philippe},
+  booktitle={ICRA 2022 Workshop in Innovation in Forestry Robotics: Research and Industry Adoption},
+  year={2022}
+}
+```
diff --git a/train_synth_RGB.py b/train_synth_RGB.py
new file mode 100644
index 0000000..2aa84ad
--- /dev/null
+++ b/train_synth_RGB.py
@@ -0,0 +1,411 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+from __future__ import  absolute_import
+
+# Some basic setup:
+# Setup detectron2 logger
+from detectron2.utils.logger import setup_logger
+setup_logger()
+
+# import some common libraries
+import torch; print(torch.__version__)
+import os, json, cv2, random
+import numpy as np
+import time
+import datetime
+
+# import some common detectron2 utilities
+from detectron2 import model_zoo
+from detectron2.engine import DefaultTrainer
+from detectron2.data import build_detection_train_loader
+from detectron2.engine import DefaultPredictor
+from detectron2.config import get_cfg
+from detectron2.utils.visualizer import Visualizer
+from detectron2.utils.visualizer import ColorMode
+from detectron2.data import MetadataCatalog, DatasetCatalog
+from detectron2.structures import BoxMode
+from detectron2.checkpoint import DetectionCheckpointer
+from detectron2.data import detection_utils as utils
+from detectron2.data.datasets.coco import load_coco_json
+import detectron2.data.transforms as T
+import copy
+
+from detectron2.evaluation import COCOEvaluator, inference_on_dataset, LVISEvaluator
+from detectron2.data import build_detection_test_loader
+from detectron2.engine import HookBase
+import detectron2.utils.comm as comm
+from detectron2.evaluation import inference_context
+from detectron2.utils.logger import log_every_n_seconds
+from detectron2.data.dataset_mapper import DatasetMapper
+from detectron2.engine.hooks import PeriodicWriter
+
+import albumentations as A
+from pycocotools.coco import COCO, maskUtils
+import logging
+import pandas as pd
+from tensorboard import version; print(version.VERSION)
+from tqdm import tqdm
+from itertools import chain
+
+
+def test_mapper(dataset_dict):
+    # Implement a mapper, similar to the default DatasetMapper, but with your own customizations
+    # This mapper uses to power of the albumentations library to optimize DA
+    dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
+    image = utils.read_image(dataset_dict["file_name"], format="BGR")
+    
+    # get annotations
+    bboxes = [ann['bbox'] for ann in dataset_dict['annotations']]
+    labels = [ann['category_id'] for ann in dataset_dict['annotations']]
+    keypoints = np.array([ann['keypoints'] for ann in dataset_dict['annotations']]).reshape((-1, 3))
+    masks = [maskUtils.decode(ann['segmentation']) for ann in dataset_dict['annotations']]
+    
+    # FDA things
+    # im_name='/home/vince/repos/coco-annotator/datasets/essai_03/image_00000_RGB.png'
+    # target_image = utils.read_image(im_name, format="BGR")
+    
+    # Configure data augmentation -> https://albumentations.ai/docs/getting_started/transforms_and_targets/
+    transform = A.Compose([
+        A.RandomCrop(720, 720, p=0.0),
+    ], keypoint_params=A.KeypointParams(format='xy', remove_invisible=False),
+        bbox_params=A.BboxParams(format='coco', label_fields=['bbox_ids'], min_visibility=0.1))
+    
+    transformed = transform(image=image,
+                            masks=masks,
+                            bboxes=bboxes,
+                            keypoints=keypoints,
+                            category_id=labels,
+                            bbox_ids=np.arange(len(bboxes)))
+    
+    transformed_image = transformed["image"]
+    h, w, _ = transformed_image.shape
+    visible_ids = transformed['bbox_ids']
+    transformed_masks = [maskUtils.encode(np.asfortranarray(mask)) for mask in np.array(transformed["masks"])[visible_ids]]
+    transformed_bboxes = np.array(transformed["bboxes"])
+    transformed_keypoints = np.array(transformed['keypoints']).reshape((-1, 5, 3))[visible_ids]     # Ideally find a way to retrieve NUM_KEYPOINTS instead of hardcoding
+    for keypoints in transformed_keypoints:
+        for keypoint in keypoints:
+            if keypoint[0] > w or keypoint[0] < 0 or keypoint[1] > h or keypoint[1] < 0:
+                keypoint[0:2] = [-0.5, -0.5]
+                keypoint[2] = 0
+                
+    # check if horizontal flip
+    for keypoints in transformed_keypoints:
+        if keypoints[1][0] > keypoints[2][0]:
+            temp_kp = np.copy(keypoints[2])
+            keypoints[2] = keypoints[1]
+            keypoints[1] = temp_kp
+    
+    transformed_labels = np.array(transformed['category_id'])
+    dataset_dict["image"] = torch.as_tensor(transformed_image.transpose(2, 0, 1).astype("float32"))
+    annos = [
+        {
+            'iscrowd': 0,
+            'bbox': transformed_bboxes[i].tolist(),
+            'keypoints': transformed_keypoints[i].tolist(),
+            'segmentation': transformed_masks[i],
+            'category_id': transformed_labels[i],
+            'bbox_mode': BoxMode.XYWH_ABS,
+        }
+        for i in range(len(transformed_bboxes))
+    ]
+    dataset_dict['annotations'] = annos
+    instances = utils.annotations_to_instances(annos, image.shape[:2], mask_format="bitmask")
+    dataset_dict["instances"] = utils.filter_empty_instances(instances)
+    return dataset_dict
+
+
+def albumentations_mapper(dataset_dict):
+    # Implement a mapper, similar to the default DatasetMapper, but with your own customizations
+    dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
+    image = utils.read_image(dataset_dict["file_name"], format="BGR")
+    
+    # get annotations
+    bboxes = [ann['bbox'] for ann in dataset_dict['annotations']]
+    labels = [ann['category_id'] for ann in dataset_dict['annotations']]
+    keypoints = np.array([ann['keypoints'] for ann in dataset_dict['annotations']]).reshape((-1, 3))
+    masks = [maskUtils.decode(ann['segmentation']) for ann in dataset_dict['annotations']]
+        
+    # Configure data augmentation -> https://albumentations.ai/docs/getting_started/transforms_and_targets/
+    transform = A.Compose([
+        A.HorizontalFlip(p=0.5),
+        A.RandomCrop(720, 720, p=1.0),
+        A.RandomBrightnessContrast(p=0.3, brightness_limit=[-0.1, 0.1], contrast_limit=[-0.1, 0.3], brightness_by_max=True),
+        
+        A.GaussNoise(p=0.2, var_limit=(10.0, 50.0), mean=0, per_channel=True),
+        A.GlassBlur(p=0.1, sigma=0.6, max_delta=3, iterations=2, mode='fast'),
+        A.ISONoise(p=0.2, color_shift=(0.01, 0.05), intensity=(0.1, 0.5)),
+                        
+        A.HueSaturationValue(p=0.3, sat_shift_limit=0.25, hue_shift_limit=0, val_shift_limit=0),
+        A.MotionBlur(p=0.2, blur_limit=7),
+        A.Perspective(p=0.2),
+    ], keypoint_params=A.KeypointParams(format='xy', remove_invisible=False),
+        bbox_params=A.BboxParams(format='coco', label_fields=['bbox_ids'], min_visibility=0.1))
+    
+    transformed = transform(image=image,
+                            masks=masks,
+                            bboxes=bboxes,
+                            keypoints=keypoints,
+                            category_id=labels,
+                            bbox_ids=np.arange(len(bboxes)))
+    
+    transformed_image = transformed["image"]
+    h, w, _ = transformed_image.shape
+    visible_ids = transformed['bbox_ids']
+    transformed_masks = [maskUtils.encode(np.asfortranarray(mask)) for mask in np.array(transformed["masks"])[visible_ids]]
+    transformed_bboxes = np.array(transformed["bboxes"])
+    transformed_keypoints = np.array(transformed['keypoints']).reshape((-1, 5, 3))[visible_ids]     # Ideally find a way to retrieve NUM_KEYPOINTS instead of hardcoding
+    for keypoints in transformed_keypoints:
+        for keypoint in keypoints:
+            if keypoint[0] > w or keypoint[0] < 0 or keypoint[1] > h or keypoint[1] < 0:
+                keypoint[0:2] = [-0.5, -0.5]
+                keypoint[2] = 0
+                
+    # check if horizontal flip
+    for keypoints in transformed_keypoints:
+        if keypoints[1][0] > keypoints[2][0]:
+            temp_kp = np.copy(keypoints[2])
+            keypoints[2] = keypoints[1]
+            keypoints[1] = temp_kp
+    
+    transformed_labels = np.array(transformed['category_id'])
+    dataset_dict["image"] = torch.as_tensor(transformed_image.transpose(2, 0, 1).astype("float32"))
+    annos = [
+        {
+            'iscrowd': 0,
+            'bbox': transformed_bboxes[i].tolist(),
+            'keypoints': transformed_keypoints[i].tolist(),
+            'segmentation': transformed_masks[i],
+            'category_id': transformed_labels[i],
+            'bbox_mode': BoxMode.XYWH_ABS,
+        }
+        for i in range(len(transformed_bboxes))
+    ]
+    dataset_dict['annotations'] = annos
+    instances = utils.annotations_to_instances(annos, image.shape[:2], mask_format="bitmask")
+    dataset_dict["instances"] = utils.filter_empty_instances(instances)
+    return dataset_dict
+
+
+
+# https://github.com/facebookresearch/detectron2/issues/1763
+# https://gilberttanner.com/blog/detectron-2-object-detection-with-pytorch
+class MyTrainer(DefaultTrainer):
+    @classmethod
+    def build_train_loader(cls, cfg):
+        return build_detection_train_loader(
+            cfg, mapper=albumentations_mapper
+        )
+
+    @classmethod
+    def build_test_loader(cls, cfg, dataset_name):
+        return build_detection_test_loader(
+            cfg, dataset_name, mapper=test_mapper
+        )    
+
+    @classmethod
+    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
+        if output_folder is None:
+            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
+        return COCOEvaluator(dataset_name, ("bbox", "segm", "keypoints"), False, output_dir=output_folder, kpt_oks_sigmas=(.25, .25, .25, .25, .25))    # ("bbox", "segm", "keypoints")
+
+    def build_hooks(self):
+        hooks = super(MyTrainer, self).build_hooks()
+        cfg = self.cfg
+        if len(cfg.DATASETS.TEST) > 0:
+            loss_eval_hook = LossEvalHook(
+                cfg.TEST.EVAL_PERIOD,
+                self.model,
+                MyTrainer.build_test_loader(cfg, cfg.DATASETS.TEST[0]),
+            )
+            hooks.insert(-1, loss_eval_hook)
+
+        return hooks
+
+
+class LossEvalHook(HookBase):
+    def __init__(self, eval_period, model, data_loader):
+        self._model = model
+        self._period = eval_period
+        self._data_loader = data_loader
+
+    def _do_loss_eval(self):
+        # Copying inference_on_dataset from evaluator.py
+        total = len(self._data_loader)
+        num_warmup = min(5, total - 1)
+
+        start_time = time.perf_counter()
+        total_compute_time = 0
+        losses = []
+        for idx, inputs in enumerate(self._data_loader):
+            if idx == num_warmup:
+                start_time = time.perf_counter()
+                total_compute_time = 0
+            start_compute_time = time.perf_counter()
+            if torch.cuda.is_available():
+                torch.cuda.synchronize()
+            total_compute_time += time.perf_counter() - start_compute_time
+            iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup)
+            seconds_per_img = total_compute_time / iters_after_start
+            if idx >= num_warmup * 2 or seconds_per_img > 5:
+                total_seconds_per_img = (time.perf_counter() - start_time) / iters_after_start
+                eta = datetime.timedelta(seconds=int(total_seconds_per_img * (total - idx - 1)))
+                log_every_n_seconds(
+                    logging.INFO,
+                    "Loss on Validation  done {}/{}. {:.4f} s / img. ETA={}".format(
+                        idx + 1, total, seconds_per_img, str(eta)
+                    ),
+                    n=5,
+                )
+            loss_batch = self._get_loss(inputs)
+            losses.append(loss_batch)
+        mean_loss = np.mean(losses)
+        # self.trainer.storage.put_scalar('validation_loss', mean_loss)
+        comm.synchronize()
+
+        # return losses
+        return mean_loss
+
+    def _get_loss(self, data):
+        # How loss is calculated on train_loop
+        metrics_dict = self._model(data)
+        metrics_dict = {
+            k: v.detach().cpu().item() if isinstance(v, torch.Tensor) else float(v)
+            for k, v in metrics_dict.items()
+        }
+        total_losses_reduced = sum(loss for loss in metrics_dict.values())
+        return total_losses_reduced
+
+    def after_step(self):
+        next_iter = int(self.trainer.iter) + 1
+        is_final = next_iter == self.trainer.max_iter
+        if is_final or (self._period > 0 and next_iter % self._period == 0):
+            mean_loss = self._do_loss_eval()
+            self.trainer.storage.put_scalars(validation_loss=mean_loss)
+            print("validation do loss eval", mean_loss)
+        else:
+            pass
+         
+# name of the .pth file
+model_name = 'your-coco-pretrained-weights.pth'
+
+img_dir = 'path/to/synthtree/images'
+
+if __name__ == "__main__":
+    
+    torch.cuda.is_available()
+    
+    coco_train_filename='./output/train_RGB.json'
+    coco_val_filename='./output/val_RGB.json'
+    coco_test_filename='./output/test_RGB.json'
+    
+    train_dataset_name="tree_train_set"
+    val_dataset_name="tree_val_set"
+    test_dataset_name="tree_test_set"
+    
+    logger = setup_logger(name=__name__)    
+    
+    dicts_train = load_coco_json(coco_train_filename, img_dir, train_dataset_name)  
+    logger.info("Done loading {} samples.".format(len(dicts_train)))
+    dicts_val = load_coco_json(coco_val_filename, img_dir, val_dataset_name)  
+    logger.info("Done loading {} samples.".format(len(dicts_val)))
+    dicts_test = load_coco_json(coco_test_filename, img_dir, test_dataset_name)  
+    logger.info("Done loading {} samples.".format(len(dicts_test)))
+    
+    for d in ["train_set"]:
+        DatasetCatalog.register("tree_" + d, lambda d=d: dicts_train)
+        MetadataCatalog.get("tree_" + d).set(thing_classes=["tree"], keypoint_names=["kpCP", "kpL", "kpR", "ax1", "ax2"], keypoint_flip_map=[])
+        
+    for d in ["val_set"]:
+        DatasetCatalog.register("tree_" + d, lambda d=d: dicts_val)
+        MetadataCatalog.get("tree_" + d).set(thing_classes=["tree"], keypoint_names=["kpCP", "kpL", "kpR", "ax1", "ax2"], keypoint_flip_map=[])
+    
+    for d in ["test_set"]:
+        DatasetCatalog.register("tree_" + d, lambda d=d: dicts_test)
+        MetadataCatalog.get("tree_" + d).set(thing_classes=["tree"], keypoint_names=["kpCP", "kpL", "kpR", "ax1", "ax2"], keypoint_flip_map=[])
+    
+    
+    cfg = get_cfg()
+    # cfg = LazyConfig.load(model_zoo.get_config_file("new_baselines/mask_rcnn_R_101_FPN_400ep_LSJ.py"))
+    # cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml"))
+    # cfg.merge_from_file(model_zoo.get_config_file("COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml"))
+    cfg.merge_from_file(model_zoo.get_config_file("COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml"))
+    cfg.merge_from_list(opts)
+    cfg.DATASETS.TRAIN = ("tree_train_set",)
+    cfg.DATASETS.VAL = ("tree_val_set",)
+    cfg.DATASETS.TEST = ("tree_test_set",)
+    cfg.DATALOADER.NUM_WORKERS = 8
+    # better to load the weigths from a COCO model rather than a COCO-keypoint model
+    # cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, model_name)
+    cfg.INPUT.MASK_FORMAT = "bitmask"
+    cfg.SOLVER.IMS_PER_BATCH = 4    # 8
+    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
+    cfg.SOLVER.GAMMA = 0.1
+    cfg.SOLVER.STEPS = [10000, 30000]
+    cfg.SOLVER.BASE_LR = 0.002  # pick a good LR
+    cfg.SOLVER.MAX_ITER = 60000
+    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256   # faster (default: 512)
+    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # only has one class (tree)
+    cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES = 1  
+    cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS = 5
+    cfg.TEST.KEYPOINT_OKS_SIGMAS = (.25, .25, .25, .25, .25)
+    cfgMODEL.BACKBONE.FREEZE_AT = 2
+    cfg.SOLVER.CHECKPOINT_PERIOD = 5000
+    cfg.TEST.EVAL_PERIOD = 2000    # only uncomment when evaluating during training
+    cfg.INPUT.MIN_SIZE_TEST = 0  # no resize at test time
+    
+    cfg.CUDNN_BENCHMARK = True
+    cfg.MODEL.MASK_ON = True
+    cfg.MODEL.KEYPOINT_ON = True   
+    cfg.OUTPUT_DIR = './output'
+    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
+    
+    trainer = MyTrainer(cfg)
+    trainer.resume_or_load(resume=True)
+    trainer.train()
+
+    metrics_df = pd.read_json(cfg.OUTPUT_DIR + "/metrics.json", orient="records", lines=True)
+    mdf = metrics_df.sort_values("iteration")
+    # print(mdf)
+
+    cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
+    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.01
+    # cfg.INPUT.MIN_SIZE_TEST = 0  # no resize at test time
+    
+    predictor_synth = DefaultPredictor(cfg)
+    
+    dir_fold_test = cfg.OUTPUT_DIR + "/eval_0"
+    os.makedirs(dir_fold_test, exist_ok=True)
+    evaluator = COCOEvaluator("tree_test_set", cfg, False, output_dir=dir_fold_test)
+    val_loader = build_detection_test_loader(cfg, "tree_test_set")
+    print(inference_on_dataset(predictor_synth.model, val_loader, evaluator))
+        
+    
+    # visualize detections
+    dicts = list(chain.from_iterable([DatasetCatalog.get(k) for k in cfg.DATASETS.TEST]))
+    random.shuffle(dicts)
+    tree_metadata = MetadataCatalog.get("tree_val_set")
+    for dic in tqdm(dicts):
+        img = utils.read_image(dic["file_name"], "BGR")
+        outputs_synth = predictor_synth(img)
+        v_synth = Visualizer(img[:, :, ::-1],
+                        metadata=tree_metadata, 
+                        scale=1, 
+                        instance_mode =  ColorMode.IMAGE     # remove color from image, better see instances  
+        )
+        
+        # remove keypoints
+        # outputs_synth["instances"].remove('pred_keypoints')
+        
+        out_synth = v_synth.draw_instance_predictions(outputs_synth["instances"].to("cpu"))
+        
+        cv2.imshow('predictions', out_synth.get_image()[:, :, ::-1])
+        # cv2.imshow('predictions', img)
+        k = cv2.waitKey(0)
+        
+        # exit loop if esc is pressed
+        if k == 27:
+            cv2.destroyAllWindows()
+            break
+    cv2.destroyAllWindows()