Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions examples/bdd100k_detection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import argparse

from perceptionmetrics.datasets.bdd100k import BDD100KDetectionDataset


def parse_args() -> argparse.Namespace:
"""Parse user input arguments

:return: parsed arguments
:rtype: argparse.Namespace
"""
parser = argparse.ArgumentParser()
parser.add_argument(
"--images_dir",
type=str,
required=True,
help="Root directory containing train/ and val/ image folders",
)
parser.add_argument(
"--labels_dir",
type=str,
required=True,
help="Root directory containing train/ and val/ label JSON folders",
)

return parser.parse_args()


def main():
"""Main function"""
args = parse_args()

dataset = BDD100KDetectionDataset(
images_dir=args.images_dir,
labels_dir=args.labels_dir,
)

print(f"Total samples: {len(dataset.dataset)}")
if not dataset.dataset.empty:
print(f"Splits: {dataset.dataset['split'].value_counts().to_dict()}")
print(f"Ontology classes: {list(dataset.ontology.keys())}")


if __name__ == "__main__":
main()
8 changes: 7 additions & 1 deletion perceptionmetrics/datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
from perceptionmetrics.datasets.bdd100k import (
BDD100KDetectionDataset,
build_bdd100k_dataset,
)
from perceptionmetrics.datasets.gaia import (
GaiaImageSegmentationDataset,
GaiaLiDARSegmentationDataset,
Expand All @@ -16,13 +20,15 @@
)
from perceptionmetrics.datasets.rugd import RUGDImageSegmentationDataset
from perceptionmetrics.datasets.wildscenes import WildscenesImageSegmentationDataset

try:
from perceptionmetrics.datasets.coco import CocoDataset
except ImportError:
print("COCO dataset dependencies not available")
CocoDataset = None

REGISTRY = {
"bdd100k_image_detection": BDD100KDetectionDataset,
"gaia_image_segmentation": GaiaImageSegmentationDataset,
"gaia_lidar_segmentation": GaiaLiDARSegmentationDataset,
"generic_image_segmentation": GenericImageSegmentationDataset,
Expand All @@ -36,4 +42,4 @@
}

if CocoDataset is not None:
REGISTRY["coco_image_detection"] = CocoDataset
REGISTRY["coco_image_detection"] = CocoDataset
140 changes: 140 additions & 0 deletions perceptionmetrics/datasets/bdd100k.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
from glob import glob
import json
import logging
import os
from typing import Tuple, List

import pandas as pd

from perceptionmetrics.datasets.detection import ImageDetectionDataset

# BDD100K fixed 10-category ontology
BDD100K_ONTOLOGY = {
"car": {"idx": 0, "rgb": [0, 0, 0]},
"truck": {"idx": 1, "rgb": [0, 0, 0]},
"bus": {"idx": 2, "rgb": [0, 0, 0]},
"person": {"idx": 3, "rgb": [0, 0, 0]},
"rider": {"idx": 4, "rgb": [0, 0, 0]},
"bike": {"idx": 5, "rgb": [0, 0, 0]},
"motor": {"idx": 6, "rgb": [0, 0, 0]},
"traffic light": {"idx": 7, "rgb": [0, 0, 0]},
"traffic sign": {"idx": 8, "rgb": [0, 0, 0]},
"train": {"idx": 9, "rgb": [0, 0, 0]},
}


def build_bdd100k_dataset(
images_dir: str, labels_dir: str
) -> Tuple[pd.DataFrame, dict]:
"""Build dataset DataFrame and ontology from BDD100K directory structure.

Expected layout::

<images_dir>/train/<filename>.jpg
<images_dir>/val/<filename>.jpg
<labels_dir>/train/<filename>.json
<labels_dir>/val/<filename>.json

:param images_dir: Root directory containing train/ and val/ image folders
:type images_dir: str
:param labels_dir: Root directory containing train/ and val/ label folders
:type labels_dir: str
:return: Tuple of (dataset DataFrame with columns [image, annotation, split],
ontology dict)
:rtype: Tuple[pd.DataFrame, dict]
"""
ontology = dict(BDD100K_ONTOLOGY)

rows = []
for split in ["train", "val"]:
split_images_dir = os.path.join(images_dir, split)
if not os.path.isdir(split_images_dir):
logging.warning(
"Image split directory not found: %s; skipping.", split_images_dir
)
continue

image_files = glob(os.path.join(split_images_dir, "*.jpg")) + glob(
os.path.join(split_images_dir, "*.png")
)
for image_fname in sorted(image_files):
image_basename = os.path.basename(image_fname)
stem = os.path.splitext(image_basename)[0]
label_fname = os.path.join(labels_dir, split, f"{stem}.json")

if not os.path.isfile(label_fname):
logging.warning(
"No matching label file for image '%s'; skipping.",
image_fname,
)
continue

rows.append(
{
"image": image_fname,
"annotation": label_fname,
"split": split,
}
)

dataset = pd.DataFrame(rows)
dataset.attrs = {"ontology": ontology}

return dataset, ontology


class BDD100KDetectionDataset(ImageDetectionDataset):
"""BDD100K object detection dataset.

:param images_dir: Root directory containing train/ and val/ image folders
:type images_dir: str
:param labels_dir: Root directory containing train/ and val/ label folders
:type labels_dir: str
"""

def __init__(self, images_dir: str, labels_dir: str):
dataset, ontology = build_bdd100k_dataset(images_dir, labels_dir)
# Paths in the DataFrame are already absolute; dataset_dir is used
# only by PerceptionDataset base class (must not be None).
super().__init__(dataset=dataset, dataset_dir=images_dir, ontology=ontology)

def read_annotation(self, fname: str) -> Tuple[List[List[float]], List[int]]:
"""Read bounding boxes and category indices from a BDD100K per-image JSON.

Objects without a ``box2d`` key (e.g. lane/poly2d annotations) are
skipped. Unknown categories not in the BDD100K ontology are skipped
with a warning.

:param fname: Path to the per-image JSON annotation file
:type fname: str
:return: Tuple of (boxes as [[x1, y1, x2, y2], ...], category_indices)
:rtype: Tuple[List[List[float]], List[int]]
"""
with open(fname, "r", encoding="utf-8") as f:
data = json.load(f)

# Build reverse lookup: category name -> index
cat_to_idx = {name: info["idx"] for name, info in self.ontology.items()}

boxes = []
category_indices = []

objects = data.get("frames", [{}])[0].get("objects", [])
for obj in objects:
if "box2d" not in obj:
continue

category = obj.get("category", "")
if category not in cat_to_idx:
logging.warning(
"Unknown category '%s' in annotation '%s'; skipping.",
category,
fname,
)
continue

box = obj["box2d"]
boxes.append([box["x1"], box["y1"], box["x2"], box["y2"]])
category_indices.append(cat_to_idx[category])

return boxes, category_indices
Loading
Loading