diff --git a/.github/pred_synth_to_real.gif b/.github/pred_synth_to_real.gif new file mode 100644 index 0000000..67a4667 Binary files /dev/null and b/.github/pred_synth_to_real.gif differ diff --git a/README.md b/README.md index b2a1d32..959a1e4 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,78 @@ # PercepTreeV1 -Tree detection in forests based on deep learning. +Official code repository for the paper Training Deep Learning Algorithms on Synthetic Forest Images for Tree Detection [link coming], presented at the ICRA IFRRIA Workshop . +The version 1 of this project is done using synthetic forest dataset `SynthTree43k`, but soon we will release models fine-tuned on real-wolrd images. Plans to release SynthTree43k are underway. + +The gif below shows how well the models trained on SynthTree43k transfer to real-world, without any fine-tuning on real-world images. +
+ DINO illustration +
+ +## Dataset +Soon to be released. + +## Pre-trained models +Pre-trained models weights are compatible with Detectron2 config files. +All models are trained on our synthetic dataset SynthTree43k. +We provide a demo file to try it out. + +### Mask R-CNN + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
BackboneModalitybox AP50mask AP50Download
R-50-FPNRGB87.7469.36model
R-101-FPNRGB88.5170.53model
X-101-FPNRGB88.9171.07model
R-50-FPNDepth89.6770.66model
R-101-FPNDepth89.8971.65model
X-101-FPNDepth87.4168.19model
+ +## Demos +Once you have a working Detectron2 and OpenCV installation, running the demo is easy. + +### Demo on a single image +- Download the pre-trained model weight and save it in the `/output` folder (of your local PercepTreeV1 repos). +-Open `demo_single_frame.py` and uncomment the model config corresponding to pre-trained model weights you downloaded previously, comment the others. Default is X-101. Set the `model_name` to the same name as your downloaded model ex.: 'X-101_RGB_60k.pth' +- In `demo_single_frame.py`, specify path to the image you want to try it on by setting the `image_path` variable. + +### Demo on video +- Download the pre-trained model weight and save it in the `/output` folder (of your local PercepTreeV1 repos). +-Open `demo_video.py` and uncomment the model config corresponding to pre-trained model weights you downloaded previously, comment the others. Default is X-101. +- In `demo_video.py`, specify path to the video you want to try it on by setting the `video_path` variable. diff --git a/demo_single_frame.py b/demo_single_frame.py new file mode 100755 index 0000000..ac445db --- /dev/null +++ b/demo_single_frame.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Test trained network on a video +""" +from __future__ import absolute_import + +# Setup detectron2 logger +from detectron2.utils.logger import setup_logger +setup_logger() + +# import some common libraries +import os, cv2 +import torch + +# import detectron2 utilities +from detectron2 import model_zoo +from detectron2.engine import DefaultPredictor +from detectron2.config import get_cfg +from detectron2.data import MetadataCatalog +from detectron2.utils.visualizer import Visualizer + + +# local paths to model and image +model_name = 'X-101_RGB_60k.pth' +image_path = './output/image_00000_RGB.png' + +if __name__ == "__main__": + torch.cuda.is_available() + logger = setup_logger(name=__name__) + + # All configurables are listed in /repos/detectron2/detectron2/config/defaults.py + cfg = get_cfg() + cfg.INPUT.MASK_FORMAT = "bitmask" + cfg.merge_from_file(model_zoo.get_config_file("COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml")) + # cfg.merge_from_file(model_zoo.get_config_file("COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml")) + # cfg.merge_from_file(model_zoo.get_config_file("COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml")) + cfg.DATASETS.TRAIN = () + cfg.DATASETS.TEST = () + cfg.DATALOADER.NUM_WORKERS = 8 + cfg.SOLVER.IMS_PER_BATCH = 8 + cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256 # faster (default: 512) + cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 # only has one class (tree) + cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES = 1 + cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS = 5 + cfg.MODEL.MASK_ON = True + + cfg.OUTPUT_DIR = './output' + cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, model_name) + cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7 + # cfg.INPUT.MIN_SIZE_TEST = 0 # no resize at test time + + # set detector + predictor_synth = DefaultPredictor(cfg) + + # set metadata + tree_metadata = MetadataCatalog.get("my_tree_dataset").set(thing_classes=["Tree"], keypoint_names=["kpCP", "kpL", "kpR", "AX1", "AX2"]) + + # inference + im = cv2.imread(image_path) + outputs_pred = predictor_synth(im) + v_synth = Visualizer(im[:, :, ::-1], + metadata=tree_metadata, + scale=1, + ) + out_synth = v_synth.draw_instance_predictions(outputs_pred["instances"].to("cpu")) + cv2.imshow('predictions', out_synth.get_image()[:, :, ::-1]) + k = cv2.waitKey(0) + + cv2.destroyAllWindows() + + \ No newline at end of file diff --git a/demo_video.py b/demo_video.py new file mode 100755 index 0000000..2c2e97c --- /dev/null +++ b/demo_video.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Test trained network on a video +""" +from __future__ import absolute_import + +# Setup detectron2 logger +from detectron2.utils.logger import setup_logger +setup_logger() + +# import some common libraries +import os, cv2 +import torch + +# import detectron2 utilities +from detectron2 import model_zoo +from detectron2.engine import DefaultPredictor +from detectron2.config import get_cfg +from detectron2.data import MetadataCatalog +from detectron2.utils.video_visualizer import VideoVisualizer + + +# model and video variables +model_name = 'X-101_RGB_60k.pth' +video_path = './output/forest_walk_1min.mp4' + +if __name__ == "__main__": + torch.cuda.is_available() + logger = setup_logger(name=__name__) + + # All configurables are listed in /repos/detectron2/detectron2/config/defaults.py + cfg = get_cfg() + cfg.INPUT.MASK_FORMAT = "bitmask" + cfg.merge_from_file(model_zoo.get_config_file("COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml")) + # cfg.merge_from_file(model_zoo.get_config_file("COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml")) + # cfg.merge_from_file(model_zoo.get_config_file("COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml")) + cfg.DATASETS.TRAIN = () + cfg.DATASETS.TEST = () + cfg.DATALOADER.NUM_WORKERS = 8 + cfg.SOLVER.IMS_PER_BATCH = 8 + cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256 # faster (default: 512) + cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 # only has one class (tree) + cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES = 1 + cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS = 5 + cfg.MODEL.MASK_ON = True + + cfg.OUTPUT_DIR = './output' + cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, model_name) + cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7 + # cfg.INPUT.MIN_SIZE_TEST = 0 # no resize at test time + + # set detector + predictor_synth = DefaultPredictor(cfg) + + # set metadata + tree_metadata = MetadataCatalog.get("my_tree_dataset").set(thing_classes=["Tree"], keypoint_names=["kpCP", "kpL", "kpR", "AX1", "AX2"]) + + # Get one video frame + vcap = cv2.VideoCapture('/home/vince/Videos/forest_walk_1min.mp4') + + # get vcap property + w = int(vcap.get(cv2.CAP_PROP_FRAME_WIDTH)) + h = int(vcap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + fps = int(vcap.get(cv2.CAP_PROP_FPS)) + n_frames = int(vcap.get(cv2.CAP_PROP_FRAME_COUNT)) + + # VIDEO recorder + # Grab the stats from image1 to use for the resultant video + # fourcc = cv2.VideoWriter_fourcc(*'mp4v') + # video = cv2.VideoWriter("pred_and_track_00.mp4",fourcc, 5, (w, h)) + + # Check if camera opened successfully + if (vcap.isOpened()== False): + print("Error opening video stream or file") + + vid_vis = VideoVisualizer(metadata=tree_metadata) + + nframes = 0 + while(vcap.isOpened() ): + ret, frame = vcap.read() + # if frame is read correctly ret is True + if not ret: + print("Can't receive frame (stream end?). Exiting ...") + break + y = 000 + # h = 800 + x = 000 + # w = 800 + crop_frame = frame[y:y+h, x:x+w] + # cv2.imshow('frame', crop_frame) + if cv2.waitKey(1) == ord('q'): + break + + # 5 fps + if nframes % 12 == 0: + outputs_pred = predictor_synth(crop_frame) + # v_synth = Visualizer(crop_frame[:, :, ::-1], + # metadata=tree_metadata, + # scale=1, + # instance_mode = ColorMode.IMAGE # remove color from image, better see instances + # ) + out = vid_vis.draw_instance_predictions(crop_frame, outputs_pred["instances"].to("cpu")) + + vid_frame = out.get_image() + # video.write(vid_frame) + cv2.imshow('frame', vid_frame) + + nframes += 1 + + # video.release() + vcap.release() + cv2.destroyAllWindows() + + + \ No newline at end of file diff --git a/output/forest_walk_1min.mp4 b/output/forest_walk_1min.mp4 new file mode 100644 index 0000000..620ccfa Binary files /dev/null and b/output/forest_walk_1min.mp4 differ diff --git a/output/image_00000_RGB.png b/output/image_00000_RGB.png new file mode 100644 index 0000000..4d97a79 Binary files /dev/null and b/output/image_00000_RGB.png differ