-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
265 additions
and
1 deletion.
There are no files selected for viewing
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,78 @@ | ||
# PercepTreeV1 | ||
Tree detection in forests based on deep learning. | ||
Official code repository for the paper Training Deep Learning Algorithms on Synthetic Forest Images for Tree Detection [link coming], presented at the ICRA IFRRIA Workshop . | ||
The version 1 of this project is done using synthetic forest dataset `SynthTree43k`, but soon we will release models fine-tuned on real-wolrd images. Plans to release SynthTree43k are underway. | ||
|
||
The gif below shows how well the models trained on SynthTree43k transfer to real-world, without any fine-tuning on real-world images. | ||
<div align="center"> | ||
<img width="100%" alt="DINO illustration" src=".github/pred_synth_to_real.gif"> | ||
</div> | ||
|
||
## Dataset | ||
Soon to be released. | ||
|
||
## Pre-trained models | ||
Pre-trained models weights are compatible with Detectron2 config files. | ||
All models are trained on our synthetic dataset SynthTree43k. | ||
We provide a demo file to try it out. | ||
|
||
### Mask R-CNN | ||
<table> | ||
<tr> | ||
<th>Backbone</th> | ||
<th>Modality</th> | ||
<th>box AP50</th> | ||
<th>mask AP50</th> | ||
<th colspan="6">Download</th> | ||
</tr> | ||
<tr> | ||
<td>R-50-FPN</td> | ||
<td>RGB</td> | ||
<td>87.74</td> | ||
<td>69.36</td> | ||
<td><a href="https://drive.google.com/file/d/1pnJZ3Vc0SVTn_J8l_pwR4w1LMYnFHzhV/view?usp=sharing">model</a></td> | ||
<tr> | ||
<td>R-101-FPN</td> | ||
<td>RGB</td> | ||
<td>88.51</td> | ||
<td>70.53</td> | ||
<td><a href="https://drive.google.com/file/d/1ApKm914PuKm24kPl0sP7-XgG_Ottx5tJ/view?usp=sharing">model</a></td> | ||
<tr> | ||
<td>X-101-FPN</td> | ||
<td>RGB</td> | ||
<td>88.91</td> | ||
<td>71.07</td> | ||
<td><a href="https://drive.google.com/file/d/1Q5KV5beWVZXK_vlIED1jgpf4XJgN71ky/view?usp=sharing">model</a></td> | ||
</tr> | ||
<tr> | ||
<td>R-50-FPN</td> | ||
<td>Depth</td> | ||
<td>89.67</td> | ||
<td>70.66</td> | ||
<td><a href="https://drive.google.com/file/d/1bnH7ZSXWoOJx5AkbNeHf_McV46qiKIkY/view?usp=sharing">model</a></td> | ||
<tr> | ||
<td>R-101-FPN</td> | ||
<td>Depth</td> | ||
<td>89.89</td> | ||
<td>71.65</td> | ||
<td><a href="https://drive.google.com/file/d/1DgMscnTIGty7y9-VNcq1zERrevfT3b_L/view?usp=sharing">model</a></td> | ||
<tr> | ||
<td>X-101-FPN</td> | ||
<td>Depth</td> | ||
<td>87.41</td> | ||
<td>68.19</td> | ||
<td><a href="https://drive.google.com/file/d/1rsCbLSvFf2I47FJK4vhhv0du5uCV6zjO/view?usp=sharing">model</a></td> | ||
</tr> | ||
</table> | ||
|
||
## Demos | ||
Once you have a working Detectron2 and OpenCV installation, running the demo is easy. | ||
|
||
### Demo on a single image | ||
- Download the pre-trained model weight and save it in the `/output` folder (of your local PercepTreeV1 repos). | ||
-Open `demo_single_frame.py` and uncomment the model config corresponding to pre-trained model weights you downloaded previously, comment the others. Default is X-101. Set the `model_name` to the same name as your downloaded model ex.: 'X-101_RGB_60k.pth' | ||
- In `demo_single_frame.py`, specify path to the image you want to try it on by setting the `image_path` variable. | ||
|
||
### Demo on video | ||
- Download the pre-trained model weight and save it in the `/output` folder (of your local PercepTreeV1 repos). | ||
-Open `demo_video.py` and uncomment the model config corresponding to pre-trained model weights you downloaded previously, comment the others. Default is X-101. | ||
- In `demo_video.py`, specify path to the video you want to try it on by setting the `video_path` variable. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
#!/usr/bin/env python3 | ||
# -*- coding: utf-8 -*- | ||
""" | ||
Test trained network on a video | ||
""" | ||
from __future__ import absolute_import | ||
|
||
# Setup detectron2 logger | ||
from detectron2.utils.logger import setup_logger | ||
setup_logger() | ||
|
||
# import some common libraries | ||
import os, cv2 | ||
import torch | ||
|
||
# import detectron2 utilities | ||
from detectron2 import model_zoo | ||
from detectron2.engine import DefaultPredictor | ||
from detectron2.config import get_cfg | ||
from detectron2.data import MetadataCatalog | ||
from detectron2.utils.visualizer import Visualizer | ||
|
||
|
||
# local paths to model and image | ||
model_name = 'X-101_RGB_60k.pth' | ||
image_path = './output/image_00000_RGB.png' | ||
|
||
if __name__ == "__main__": | ||
torch.cuda.is_available() | ||
logger = setup_logger(name=__name__) | ||
|
||
# All configurables are listed in /repos/detectron2/detectron2/config/defaults.py | ||
cfg = get_cfg() | ||
cfg.INPUT.MASK_FORMAT = "bitmask" | ||
cfg.merge_from_file(model_zoo.get_config_file("COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml")) | ||
# cfg.merge_from_file(model_zoo.get_config_file("COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml")) | ||
# cfg.merge_from_file(model_zoo.get_config_file("COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml")) | ||
cfg.DATASETS.TRAIN = () | ||
cfg.DATASETS.TEST = () | ||
cfg.DATALOADER.NUM_WORKERS = 8 | ||
cfg.SOLVER.IMS_PER_BATCH = 8 | ||
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256 # faster (default: 512) | ||
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 # only has one class (tree) | ||
cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES = 1 | ||
cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS = 5 | ||
cfg.MODEL.MASK_ON = True | ||
|
||
cfg.OUTPUT_DIR = './output' | ||
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, model_name) | ||
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7 | ||
# cfg.INPUT.MIN_SIZE_TEST = 0 # no resize at test time | ||
|
||
# set detector | ||
predictor_synth = DefaultPredictor(cfg) | ||
|
||
# set metadata | ||
tree_metadata = MetadataCatalog.get("my_tree_dataset").set(thing_classes=["Tree"], keypoint_names=["kpCP", "kpL", "kpR", "AX1", "AX2"]) | ||
|
||
# inference | ||
im = cv2.imread(image_path) | ||
outputs_pred = predictor_synth(im) | ||
v_synth = Visualizer(im[:, :, ::-1], | ||
metadata=tree_metadata, | ||
scale=1, | ||
) | ||
out_synth = v_synth.draw_instance_predictions(outputs_pred["instances"].to("cpu")) | ||
cv2.imshow('predictions', out_synth.get_image()[:, :, ::-1]) | ||
k = cv2.waitKey(0) | ||
|
||
cv2.destroyAllWindows() | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
#!/usr/bin/env python3 | ||
# -*- coding: utf-8 -*- | ||
""" | ||
Test trained network on a video | ||
""" | ||
from __future__ import absolute_import | ||
|
||
# Setup detectron2 logger | ||
from detectron2.utils.logger import setup_logger | ||
setup_logger() | ||
|
||
# import some common libraries | ||
import os, cv2 | ||
import torch | ||
|
||
# import detectron2 utilities | ||
from detectron2 import model_zoo | ||
from detectron2.engine import DefaultPredictor | ||
from detectron2.config import get_cfg | ||
from detectron2.data import MetadataCatalog | ||
from detectron2.utils.video_visualizer import VideoVisualizer | ||
|
||
|
||
# model and video variables | ||
model_name = 'X-101_RGB_60k.pth' | ||
video_path = './output/forest_walk_1min.mp4' | ||
|
||
if __name__ == "__main__": | ||
torch.cuda.is_available() | ||
logger = setup_logger(name=__name__) | ||
|
||
# All configurables are listed in /repos/detectron2/detectron2/config/defaults.py | ||
cfg = get_cfg() | ||
cfg.INPUT.MASK_FORMAT = "bitmask" | ||
cfg.merge_from_file(model_zoo.get_config_file("COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml")) | ||
# cfg.merge_from_file(model_zoo.get_config_file("COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml")) | ||
# cfg.merge_from_file(model_zoo.get_config_file("COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml")) | ||
cfg.DATASETS.TRAIN = () | ||
cfg.DATASETS.TEST = () | ||
cfg.DATALOADER.NUM_WORKERS = 8 | ||
cfg.SOLVER.IMS_PER_BATCH = 8 | ||
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256 # faster (default: 512) | ||
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 # only has one class (tree) | ||
cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES = 1 | ||
cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS = 5 | ||
cfg.MODEL.MASK_ON = True | ||
|
||
cfg.OUTPUT_DIR = './output' | ||
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, model_name) | ||
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7 | ||
# cfg.INPUT.MIN_SIZE_TEST = 0 # no resize at test time | ||
|
||
# set detector | ||
predictor_synth = DefaultPredictor(cfg) | ||
|
||
# set metadata | ||
tree_metadata = MetadataCatalog.get("my_tree_dataset").set(thing_classes=["Tree"], keypoint_names=["kpCP", "kpL", "kpR", "AX1", "AX2"]) | ||
|
||
# Get one video frame | ||
vcap = cv2.VideoCapture('/home/vince/Videos/forest_walk_1min.mp4') | ||
|
||
# get vcap property | ||
w = int(vcap.get(cv2.CAP_PROP_FRAME_WIDTH)) | ||
h = int(vcap.get(cv2.CAP_PROP_FRAME_HEIGHT)) | ||
fps = int(vcap.get(cv2.CAP_PROP_FPS)) | ||
n_frames = int(vcap.get(cv2.CAP_PROP_FRAME_COUNT)) | ||
|
||
# VIDEO recorder | ||
# Grab the stats from image1 to use for the resultant video | ||
# fourcc = cv2.VideoWriter_fourcc(*'mp4v') | ||
# video = cv2.VideoWriter("pred_and_track_00.mp4",fourcc, 5, (w, h)) | ||
|
||
# Check if camera opened successfully | ||
if (vcap.isOpened()== False): | ||
print("Error opening video stream or file") | ||
|
||
vid_vis = VideoVisualizer(metadata=tree_metadata) | ||
|
||
nframes = 0 | ||
while(vcap.isOpened() ): | ||
ret, frame = vcap.read() | ||
# if frame is read correctly ret is True | ||
if not ret: | ||
print("Can't receive frame (stream end?). Exiting ...") | ||
break | ||
y = 000 | ||
# h = 800 | ||
x = 000 | ||
# w = 800 | ||
crop_frame = frame[y:y+h, x:x+w] | ||
# cv2.imshow('frame', crop_frame) | ||
if cv2.waitKey(1) == ord('q'): | ||
break | ||
|
||
# 5 fps | ||
if nframes % 12 == 0: | ||
outputs_pred = predictor_synth(crop_frame) | ||
# v_synth = Visualizer(crop_frame[:, :, ::-1], | ||
# metadata=tree_metadata, | ||
# scale=1, | ||
# instance_mode = ColorMode.IMAGE # remove color from image, better see instances | ||
# ) | ||
out = vid_vis.draw_instance_predictions(crop_frame, outputs_pred["instances"].to("cpu")) | ||
|
||
vid_frame = out.get_image() | ||
# video.write(vid_frame) | ||
cv2.imshow('frame', vid_frame) | ||
|
||
nframes += 1 | ||
|
||
# video.release() | ||
vcap.release() | ||
cv2.destroyAllWindows() | ||
|
||
|
||
|
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.