Skip to content

Commit 70a17a5

Browse files
committed
Added scene images for Open Images dataset 🏞️
1 parent 3121649 commit 70a17a5

4 files changed

+216
-3
lines changed

configs/coco_scene_images_transformer.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ data:
5252
train:
5353
target: taming.data.annotated_objects_coco.AnnotatedObjectsCoco
5454
params:
55-
data_path: data/coco_annotations_100
55+
data_path: data/coco_annotations_100 # substitute with path to full dataset
5656
split: train
5757
keys: [image, objects_bbox, file_name]
5858
no_tokens: 8192
@@ -67,7 +67,7 @@ data:
6767
validation:
6868
target: taming.data.annotated_objects_coco.AnnotatedObjectsCoco
6969
params:
70-
data_path: data/coco_annotations_100
70+
data_path: data/coco_annotations_100 # substitute with path to full dataset
7171
split: validation
7272
keys: [image, objects_bbox, file_name]
7373
no_tokens: 8192
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
model:
2+
base_learning_rate: 4.5e-06
3+
target: taming.models.cond_transformer.Net2NetTransformer
4+
params:
5+
cond_stage_key: objects_bbox
6+
transformer_config:
7+
target: taming.modules.transformer.mingpt.GPT
8+
params:
9+
vocab_size: 8192
10+
block_size: 348 # = 256 + 92 = dim(vqgan_latent_space,16x16) + dim(conditional_builder.embedding_dim)
11+
n_layer: 40
12+
n_head: 16
13+
n_embd: 1408
14+
embd_pdrop: 0.1
15+
resid_pdrop: 0.1
16+
attn_pdrop: 0.1
17+
first_stage_config:
18+
target: taming.models.vqgan.VQModel
19+
params:
20+
ckpt_path: /path/to/coco_oi_epoch12.ckpt # https://heibox.uni-heidelberg.de/f/461d9a9f4fcf48ab84f4/
21+
embed_dim: 256
22+
n_embed: 8192
23+
ddconfig:
24+
double_z: false
25+
z_channels: 256
26+
resolution: 256
27+
in_channels: 3
28+
out_ch: 3
29+
ch: 128
30+
ch_mult:
31+
- 1
32+
- 1
33+
- 2
34+
- 2
35+
- 4
36+
num_res_blocks: 2
37+
attn_resolutions:
38+
- 16
39+
dropout: 0.0
40+
lossconfig:
41+
target: taming.modules.losses.DummyLoss
42+
cond_stage_config:
43+
target: taming.models.dummy_cond_stage.DummyCondStage
44+
params:
45+
conditional_key: objects_bbox
46+
47+
data:
48+
target: main.DataModuleFromConfig
49+
params:
50+
batch_size: 6
51+
num_workers: 12
52+
train:
53+
target: taming.data.annotated_objects_open_images.AnnotatedObjectsOpenImages
54+
params:
55+
data_path: data/open_images_annotations_100 # substitute with path to full dataset
56+
split: train
57+
keys: [image, objects_bbox, file_name]
58+
no_tokens: 8192
59+
target_image_size: 256
60+
min_object_area: 0.0001
61+
min_objects_per_image: 2
62+
max_objects_per_image: 30
63+
crop_method: random-2d
64+
random_flip: true
65+
use_group_parameter: true
66+
encode_crop: true
67+
validation:
68+
target: taming.data.annotated_objects_coco.AnnotatedObjectsCoco
69+
params:
70+
data_path: data/open_images_annotations_100 # substitute with path to full dataset
71+
split: validation
72+
keys: [image, objects_bbox, file_name]
73+
no_tokens: 8192
74+
target_image_size: 256
75+
min_object_area: 0.0001
76+
min_objects_per_image: 2
77+
max_objects_per_image: 30
78+
crop_method: center
79+
random_flip: false
80+
use_group_parameter: true
81+
encode_crop: true

taming/data/annotated_objects_dataset.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ def clean_up_annotations_and_image_descriptions(self) -> None:
137137

138138
@staticmethod
139139
def filter_object_number(all_annotations: Dict[str, List[Annotation]], min_object_area: float,
140-
min_objects_per_image: int, max_objects_per_image: int) -> Dict[str, List[Annotation]]:
140+
min_objects_per_image: int, max_objects_per_image: int) -> Dict[str, List[Annotation]]:
141141
filtered = {}
142142
for image_id, annotations in all_annotations.items():
143143
annotations_with_min_area = [a for a in annotations if a.area > min_object_area]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
from collections import defaultdict
2+
from csv import DictReader, reader as TupleReader
3+
from pathlib import Path
4+
from typing import Dict, List, Any
5+
import warnings
6+
7+
from taming.data.annotated_objects_dataset import AnnotatedObjectsDataset
8+
from taming.data.helper_types import Annotation, Category
9+
from tqdm import tqdm
10+
11+
OPEN_IMAGES_STRUCTURE = {
12+
'train': {
13+
'top_level': '',
14+
'class_descriptions': 'class-descriptions-boxable.csv',
15+
'annotations': 'oidv6-train-annotations-bbox.csv',
16+
'file_list': 'train-images-boxable.csv',
17+
'files': 'train'
18+
},
19+
'validation': {
20+
'top_level': '',
21+
'class_descriptions': 'class-descriptions-boxable.csv',
22+
'annotations': 'validation-annotations-bbox.csv',
23+
'file_list': 'validation-images.csv',
24+
'files': 'validation'
25+
},
26+
'test': {
27+
'top_level': '',
28+
'class_descriptions': 'class-descriptions-boxable.csv',
29+
'annotations': 'test-annotations-bbox.csv',
30+
'file_list': 'test-images.csv',
31+
'files': 'test'
32+
}
33+
}
34+
35+
36+
def load_annotations(descriptor_path: Path, min_object_area: float, category_no_for_id: Dict[str, int]) -> \
37+
Dict[str, List[Annotation]]:
38+
annotations: Dict[str, List[Annotation]] = defaultdict(list)
39+
with open(descriptor_path) as file:
40+
reader = DictReader(file)
41+
for i, row in tqdm(enumerate(reader), total=14620000, desc='Loading OpenImages annotations'):
42+
width = float(row['XMax']) - float(row['XMin'])
43+
height = float(row['YMax']) - float(row['YMin'])
44+
area = width * height
45+
category_id = row['LabelName']
46+
if area >= min_object_area and category_id in category_no_for_id:
47+
annotations[row['ImageID']].append(
48+
Annotation(
49+
id=i,
50+
image_id=row['ImageID'],
51+
source=row['Source'],
52+
category_id=category_id,
53+
category_no=category_no_for_id[category_id],
54+
confidence=float(row['Confidence']),
55+
bbox=(float(row['XMin']), float(row['YMin']), width, height),
56+
area=area,
57+
is_occluded=bool(int(row['IsOccluded'])),
58+
is_truncated=bool(int(row['IsTruncated'])),
59+
is_group_of=bool(int(row['IsGroupOf'])),
60+
is_depiction=bool(int(row['IsDepiction'])),
61+
is_inside=bool(int(row['IsInside']))
62+
)
63+
)
64+
if 'train' in str(descriptor_path) and i < 14000000:
65+
warnings.warn(f'Running with subset of Open Images. Train dataset has length [{len(annotations)}].')
66+
return dict(annotations)
67+
68+
69+
def load_image_ids(csv_path: Path) -> List[str]:
70+
with open(csv_path) as file:
71+
reader = DictReader(file)
72+
return [row['image_name'] for row in reader]
73+
74+
75+
def load_categories(csv_path: Path) -> Dict[str, Category]:
76+
with open(csv_path) as file:
77+
reader = TupleReader(file)
78+
return {row[0]: Category(id=row[0], name=row[1], super_category=None) for row in reader}
79+
80+
81+
class AnnotatedObjectsOpenImages(AnnotatedObjectsDataset):
82+
def __init__(self, **kwargs):
83+
"""
84+
@param data_path: is the path to the following folder structure:
85+
open_images/
86+
│ oidv6-train-annotations-bbox.csv
87+
├── class-descriptions-boxable.csv
88+
├── oidv6-train-annotations-bbox.csv
89+
├── test
90+
│ ├── 000026e7ee790996.jpg
91+
│ ├── 000062a39995e348.jpg
92+
│ └── ...
93+
├── test-annotations-bbox.csv
94+
├── test-images.csv
95+
├── train
96+
│ ├── 000002b66c9c498e.jpg
97+
│ ├── 000002b97e5471a0.jpg
98+
│ └── ...
99+
├── train-images-boxable.csv
100+
├── validation
101+
│ ├── 0001eeaf4aed83f9.jpg
102+
│ ├── 0004886b7d043cfd.jpg
103+
│ └── ...
104+
├── validation-annotations-bbox.csv
105+
└── validation-images.csv
106+
@param: split: one of 'train', 'validation' or 'test'
107+
@param: desired image size (returns square images)
108+
"""
109+
110+
super().__init__(**kwargs)
111+
112+
self.categories = load_categories(self.paths['class_descriptions'])
113+
self.filter_categories()
114+
self.setup_category_id_and_number()
115+
116+
self.image_descriptions = {}
117+
annotations = load_annotations(self.paths['annotations'], self.min_object_area, self.category_number)
118+
self.annotations = self.filter_object_number(annotations, self.min_object_area, self.min_objects_per_image,
119+
self.max_objects_per_image)
120+
self.image_ids = list(self.annotations.keys())
121+
self.clean_up_annotations_and_image_descriptions()
122+
123+
def get_path_structure(self) -> Dict[str, str]:
124+
if self.split not in OPEN_IMAGES_STRUCTURE:
125+
raise ValueError(f'Split [{self.split} does not exist for Open Images data.]')
126+
return OPEN_IMAGES_STRUCTURE[self.split]
127+
128+
def get_image_path(self, image_id: str) -> Path:
129+
return self.paths['files'].joinpath(f'{image_id:0>16}.jpg')
130+
131+
def get_image_description(self, image_id: str) -> Dict[str, Any]:
132+
return {'file_path': str(self.get_image_path(image_id))}

0 commit comments

Comments
 (0)