diff --git a/cvat/apps/engine/backup.py b/cvat/apps/engine/backup.py index 49544d90aa77..87f9244fc537 100644 --- a/cvat/apps/engine/backup.py +++ b/cvat/apps/engine/backup.py @@ -3,7 +3,9 @@ # # SPDX-License-Identifier: MIT +import codecs import io +import json import mimetypes import os import re @@ -21,6 +23,7 @@ from zipfile import ZipFile import django_rq +import json_stream from django.conf import settings from django.core.exceptions import ObjectDoesNotExist from django.db import transaction @@ -596,22 +599,21 @@ def serialize_data(): target_manifest_file = os.path.join(target_dir, self.MANIFEST_FILENAME) if target_dir else self.MANIFEST_FILENAME zip_object.writestr(target_manifest_file, data=JSONRenderer().render(task)) - def _write_annotations(self, zip_object, target_dir=None): + def _write_annotations(self, zip_object: ZipFile, target_dir: Optional[str] = None) -> None: + @json_stream.streamable_list def serialize_annotations(): - job_annotations = [] db_jobs = self._get_db_jobs() db_job_ids = (j.id for j in db_jobs) for db_job_id in db_job_ids: annotations = dm.task.get_job_data(db_job_id) annotations_serializer = LabeledDataSerializer(data=annotations) annotations_serializer.is_valid(raise_exception=True) - job_annotations.append(self._prepare_annotations(annotations_serializer.data, self._label_mapping)) - - return job_annotations + yield self._prepare_annotations(annotations_serializer.data, self._label_mapping) annotations = serialize_annotations() target_annotations_file = os.path.join(target_dir, self.ANNOTATIONS_FILENAME) if target_dir else self.ANNOTATIONS_FILENAME - zip_object.writestr(target_annotations_file, data=JSONRenderer().render(annotations)) + with zip_object.open(target_annotations_file, 'w') as f: + json.dump(annotations, codecs.getwriter('utf-8')(f), separators=(',', ':')) def _export_task(self, zip_obj, target_dir=None): self._write_data(zip_obj, target_dir) diff --git a/cvat/requirements/base.in b/cvat/requirements/base.in index 1172778f8baa..380e066d8893 100644 --- a/cvat/requirements/base.in +++ b/cvat/requirements/base.in @@ -33,6 +33,7 @@ djangorestframework>=3.15.2,<4 drf-spectacular==0.26.2 furl==2.1.0 google-cloud-storage==1.42.0 +json-stream>=2.0 lxml>=5.2.1,<6 natsort==8.0.0 numpy~=1.22.2 diff --git a/cvat/requirements/base.txt b/cvat/requirements/base.txt index feb15d9183ce..c663053ff5ab 100644 --- a/cvat/requirements/base.txt +++ b/cvat/requirements/base.txt @@ -1,4 +1,4 @@ -# SHA1:9c45ee6ba604552349bcaf41a8f35abbc7c62ddd +# SHA1:02cd495ccf64874404b603b505a50d84acc316cc # # This file is autogenerated by pip-compile-multi # To update, run: @@ -29,7 +29,7 @@ botocore==1.20.112 # s3transfer cachetools==5.5.1 # via google-auth -certifi==2024.12.14 +certifi==2025.1.31 # via # clickhouse-connect # msrest @@ -52,7 +52,7 @@ coreschema==0.0.4 # via coreapi crontab==1.0.1 # via rq-scheduler -cryptography==44.0.0 +cryptography==44.0.1 # via # azure-storage-blob # datumaro @@ -71,7 +71,7 @@ dj-pagination==2.5.0 # via -r cvat/requirements/base.in dj-rest-auth[with-social]==5.0.2 # via -r cvat/requirements/base.in -django==4.2.18 +django==4.2.19 # via # -r cvat/requirements/base.in # dj-rest-auth @@ -119,7 +119,7 @@ easyprocess==1.1 # via pyunpack entrypoint2==1.1 # via pyunpack -fonttools==4.55.8 +fonttools==4.56.0 # via matplotlib freezegun==1.5.1 # via rq-scheduler @@ -142,7 +142,7 @@ google-crc32c==1.6.0 # via google-resumable-media google-resumable-media==2.7.2 # via google-cloud-storage -googleapis-common-protos==1.66.0 +googleapis-common-protos==1.67.0 # via google-api-core h5py==3.12.1 # via datumaro @@ -175,7 +175,9 @@ joblib==1.4.2 # nltk # scikit-learn json-stream==2.3.3 - # via datumaro + # via + # -r cvat/requirements/base.in + # datumaro json-stream-rs-tokenizer==0.4.27 # via json-stream jsonschema==4.17.3 @@ -184,7 +186,7 @@ kiwisolver==1.4.7 # via matplotlib limits==4.0.1 # via python-logstash-async -lxml==5.3.0 +lxml==5.3.1 # via # -r cvat/requirements/base.in # datumaro @@ -285,7 +287,7 @@ python3-openid==3.2.0 # via django-allauth python3-saml==1.16.0 # via django-allauth -pytz==2024.2 +pytz==2025.1 # via # clickhouse-connect # pandas