Skip to content

Commit

Permalink
streaming export for task or job data, and for yolo format
Browse files Browse the repository at this point in the history
  • Loading branch information
Eldies committed Feb 10, 2025
1 parent 74da174 commit 094dc27
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 17 deletions.
39 changes: 24 additions & 15 deletions cvat/apps/dataset_manager/bindings.py
Original file line number Diff line number Diff line change
Expand Up @@ -1660,11 +1660,8 @@ def __init__(
self._user = self._load_user_info(instance_meta) if dimension == DimensionType.DIM_3D else {}
self._dimension = dimension
self._format_type = format_type

is_video = instance_meta['mode'] == 'interpolation'
ext = ''
if is_video:
ext = TaskFrameProvider.VIDEO_FRAME_EXT
self._instance_data = instance_data
self._include_images = include_images

if dimension == DimensionType.DIM_3D or include_images:
if isinstance(instance_data, TaskData):
Expand All @@ -1678,23 +1675,29 @@ def __init__(
{0: MediaSource(db_task)}
)

dm_items: list[dm.DatasetItem] = []
for frame_data in instance_data.group_by_frame(include_empty=True):
def __iter__(self):
instance_meta = self._instance_data.meta[self._instance_data.META_FIELD]
is_video = instance_meta['mode'] == 'interpolation'
ext = ''
if is_video:
ext = TaskFrameProvider.VIDEO_FRAME_EXT

for frame_data in self._instance_data.group_by_frame(include_empty=True):
dm_media_args = { 'path': frame_data.name + ext }
if dimension == DimensionType.DIM_3D:
if self._dimension == DimensionType.DIM_3D:
dm_media: dm.PointCloud = self._media_provider.get_media_for_frame(
0, frame_data.id, **dm_media_args
)

if not include_images:
if not self._include_images:
dm_media_args["extra_images"] = [
dm.Image.from_file(path=osp.basename(image.path))
for image in dm_media.extra_images
]
dm_media = dm.PointCloud.from_file(**dm_media_args)
else:
dm_media_args['size'] = (frame_data.height, frame_data.width)
if include_images:
if self._include_images:
dm_media: dm.Image = self._media_provider.get_media_for_frame(
0, frame_data.idx, **dm_media_args
)
Expand All @@ -1705,16 +1708,16 @@ def __init__(

dm_attributes = {'frame': frame_data.frame}

if dimension == DimensionType.DIM_2D:
if self._dimension == DimensionType.DIM_2D:
dm_item = dm.DatasetItem(
id=osp.splitext(frame_data.name)[0],
subset=frame_data.subset,
annotations=dm_anno,
media=dm_media,
attributes=dm_attributes,
)
elif dimension == DimensionType.DIM_3D:
if format_type == "sly_pointcloud":
elif self._dimension == DimensionType.DIM_3D:
if self._format_type == "sly_pointcloud":
dm_attributes["name"] = self._user["name"]
dm_attributes["createdAt"] = self._user["createdAt"]
dm_attributes["updatedAt"] = self._user["updatedAt"]
Expand All @@ -1731,9 +1734,10 @@ def __init__(
attributes=dm_attributes,
)

dm_items.append(dm_item)
yield dm_item

self._items = dm_items
def __len__(self):
return len(self._instance_data)

def _read_cvat_anno(self, cvat_frame_anno: CommonData.Frame, labels: list):
categories = self.categories()
Expand All @@ -1747,6 +1751,11 @@ def map_label(name, parent=''): return label_cat.find(name, parent)[0]
return self.convert_annotations(cvat_frame_anno,
label_attrs, map_label, self._format_type, self._dimension)

@property
def is_stream(self) -> bool:
return True


class CVATProjectDataExtractor(dm.DatasetBase, CVATDataExtractorMixin):
def __init__(
self,
Expand Down
7 changes: 5 additions & 2 deletions cvat/apps/dataset_manager/formats/yolo.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import Callable, Optional

from datumaro.components.annotation import AnnotationType
from datumaro.components.dataset import StreamDataset
from datumaro.components.dataset_base import DatasetItem
from datumaro.components.project import Dataset
from pyunpack import Archive
Expand Down Expand Up @@ -36,7 +37,8 @@ def _export_common(
**kwargs,
):
with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor:
dataset = Dataset.from_extractors(extractor, env=dm_env)
dataset_cls = Dataset if isinstance(instance_data, ProjectData) else StreamDataset
dataset = dataset_cls.from_extractors(extractor, env=dm_env)
dataset.export(temp_dir, format_name, save_media=save_images, **kwargs)

make_zip_archive(temp_dir, dst_file)
Expand Down Expand Up @@ -109,7 +111,8 @@ def _export_yolo_ultralytics_oriented_boxes(*args, **kwargs):
@exporter(name="Ultralytics YOLO Segmentation", ext="ZIP", version="1.0")
def _export_yolo_ultralytics_segmentation(dst_file, temp_dir, instance_data, *, save_images=False):
with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor:
dataset = Dataset.from_extractors(extractor, env=dm_env)
dataset_cls = Dataset if isinstance(instance_data, ProjectData) else StreamDataset
dataset = dataset_cls.from_extractors(extractor, env=dm_env)
dataset = dataset.transform("masks_to_polygons")
dataset.export(temp_dir, "yolo_ultralytics_segmentation", save_media=save_images)

Expand Down

0 comments on commit 094dc27

Please sign in to comment.