Skip to content

Commit

Permalink
do not keep all annotations in RAM on backup (#9099)
Browse files Browse the repository at this point in the history
On backup, all the annotations are kept in memory. It can be a problem
if the annotations are large.
Fixing it.
  • Loading branch information
Eldies authored Feb 13, 2025
1 parent 56e1953 commit a31a782
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 15 deletions.
14 changes: 8 additions & 6 deletions cvat/apps/engine/backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
#
# SPDX-License-Identifier: MIT

import codecs
import io
import json
import mimetypes
import os
import re
Expand All @@ -21,6 +23,7 @@
from zipfile import ZipFile

import django_rq
import json_stream
from django.conf import settings
from django.core.exceptions import ObjectDoesNotExist
from django.db import transaction
Expand Down Expand Up @@ -596,22 +599,21 @@ def serialize_data():
target_manifest_file = os.path.join(target_dir, self.MANIFEST_FILENAME) if target_dir else self.MANIFEST_FILENAME
zip_object.writestr(target_manifest_file, data=JSONRenderer().render(task))

def _write_annotations(self, zip_object, target_dir=None):
def _write_annotations(self, zip_object: ZipFile, target_dir: Optional[str] = None) -> None:
@json_stream.streamable_list
def serialize_annotations():
job_annotations = []
db_jobs = self._get_db_jobs()
db_job_ids = (j.id for j in db_jobs)
for db_job_id in db_job_ids:
annotations = dm.task.get_job_data(db_job_id)
annotations_serializer = LabeledDataSerializer(data=annotations)
annotations_serializer.is_valid(raise_exception=True)
job_annotations.append(self._prepare_annotations(annotations_serializer.data, self._label_mapping))

return job_annotations
yield self._prepare_annotations(annotations_serializer.data, self._label_mapping)

annotations = serialize_annotations()
target_annotations_file = os.path.join(target_dir, self.ANNOTATIONS_FILENAME) if target_dir else self.ANNOTATIONS_FILENAME
zip_object.writestr(target_annotations_file, data=JSONRenderer().render(annotations))
with zip_object.open(target_annotations_file, 'w') as f:
json.dump(annotations, codecs.getwriter('utf-8')(f), separators=(',', ':'))

def _export_task(self, zip_obj, target_dir=None):
self._write_data(zip_obj, target_dir)
Expand Down
1 change: 1 addition & 0 deletions cvat/requirements/base.in
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ djangorestframework>=3.15.2,<4
drf-spectacular==0.26.2
furl==2.1.0
google-cloud-storage==1.42.0
json-stream>=2.0
lxml>=5.2.1,<6
natsort==8.0.0
numpy~=1.22.2
Expand Down
20 changes: 11 additions & 9 deletions cvat/requirements/base.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SHA1:9c45ee6ba604552349bcaf41a8f35abbc7c62ddd
# SHA1:02cd495ccf64874404b603b505a50d84acc316cc
#
# This file is autogenerated by pip-compile-multi
# To update, run:
Expand Down Expand Up @@ -29,7 +29,7 @@ botocore==1.20.112
# s3transfer
cachetools==5.5.1
# via google-auth
certifi==2024.12.14
certifi==2025.1.31
# via
# clickhouse-connect
# msrest
Expand All @@ -52,7 +52,7 @@ coreschema==0.0.4
# via coreapi
crontab==1.0.1
# via rq-scheduler
cryptography==44.0.0
cryptography==44.0.1
# via
# azure-storage-blob
# datumaro
Expand All @@ -71,7 +71,7 @@ dj-pagination==2.5.0
# via -r cvat/requirements/base.in
dj-rest-auth[with-social]==5.0.2
# via -r cvat/requirements/base.in
django==4.2.18
django==4.2.19
# via
# -r cvat/requirements/base.in
# dj-rest-auth
Expand Down Expand Up @@ -119,7 +119,7 @@ easyprocess==1.1
# via pyunpack
entrypoint2==1.1
# via pyunpack
fonttools==4.55.8
fonttools==4.56.0
# via matplotlib
freezegun==1.5.1
# via rq-scheduler
Expand All @@ -142,7 +142,7 @@ google-crc32c==1.6.0
# via google-resumable-media
google-resumable-media==2.7.2
# via google-cloud-storage
googleapis-common-protos==1.66.0
googleapis-common-protos==1.67.0
# via google-api-core
h5py==3.12.1
# via datumaro
Expand Down Expand Up @@ -175,7 +175,9 @@ joblib==1.4.2
# nltk
# scikit-learn
json-stream==2.3.3
# via datumaro
# via
# -r cvat/requirements/base.in
# datumaro
json-stream-rs-tokenizer==0.4.27
# via json-stream
jsonschema==4.17.3
Expand All @@ -184,7 +186,7 @@ kiwisolver==1.4.7
# via matplotlib
limits==4.0.1
# via python-logstash-async
lxml==5.3.0
lxml==5.3.1
# via
# -r cvat/requirements/base.in
# datumaro
Expand Down Expand Up @@ -285,7 +287,7 @@ python3-openid==3.2.0
# via django-allauth
python3-saml==1.16.0
# via django-allauth
pytz==2024.2
pytz==2025.1
# via
# clickhouse-connect
# pandas
Expand Down

0 comments on commit a31a782

Please sign in to comment.