From 1979ac6971663840b29c68561c1586830fda7022 Mon Sep 17 00:00:00 2001 From: Eg0ra Date: Fri, 13 Dec 2024 13:22:35 +0700 Subject: [PATCH] Improve slow import/export duration --- HISTORY.rst | 6 ++++ docs/installation.rst | 8 +++++ import_export_extensions/apps.py | 7 ++++ import_export_extensions/resources.py | 46 +++++++++++++++++++-------- 4 files changed, 53 insertions(+), 14 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index 66bafcd..74d6280 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -1,6 +1,12 @@ ======= History ======= +UNRELEASED +------------------ +* Fix issue with slow export duration (https://github.com/saritasa-nest/django-import-export-extensions/issues/79): + + * Add setting ``STATUS_UPDATE_ROW_COUNT`` which defines the number of rows after import/export of which the task status is updated; + * Add ability to specify ``status_update_row_count`` for each resource; 1.1.0 (2024-12-06) ------------------ diff --git a/docs/installation.rst b/docs/installation.rst index 3558f15..a816655 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -80,6 +80,14 @@ Mapping file extensions to mime types to import files. By default, it uses the `mimetypes.types_map `_ from Python's mimetypes module. +``STATUS_UPDATE_ROW_COUNT`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Defines the number of rows after import/export of which the task status is +updated. This helps to increase the speed of import/export. The default value +is 100. This parameter can be specified separately for each resource by adding +``status_update_row_count`` to its ``Meta``. + Settings from django-import-export ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Additionally, the package supports settings from the original django-import-export package. diff --git a/import_export_extensions/apps.py b/import_export_extensions/apps.py index 790fe41..9c531ad 100644 --- a/import_export_extensions/apps.py +++ b/import_export_extensions/apps.py @@ -7,6 +7,8 @@ # Default configuration # Maximum num of rows to be imported DEFAULT_MAX_DATASET_ROWS = 100000 +# After how many imported/exported rows celery task status will be updated +DEFAULT_STATUS_UPDATE_ROW_COUNT = 100 class CeleryImportExport(AppConfig): @@ -24,3 +26,8 @@ def ready(self): DEFAULT_MAX_DATASET_ROWS, ) settings.MIME_TYPES_MAP = mimetypes.types_map.copy() + settings.STATUS_UPDATE_ROW_COUNT = getattr( + settings, + "STATUS_UPDATE_ROW_COUNT", + DEFAULT_STATUS_UPDATE_ROW_COUNT, + ) diff --git a/import_export_extensions/resources.py b/import_export_extensions/resources.py index 1310d6c..9fbeff8 100644 --- a/import_export_extensions/resources.py +++ b/import_export_extensions/resources.py @@ -1,14 +1,16 @@ import collections import typing from enum import Enum +from functools import cached_property +from django.conf import settings from django.db.models import QuerySet from django.utils import timezone from django.utils.functional import classproperty from django.utils.translation import gettext_lazy as _ import tablib -from celery import current_task, result +from celery import current_task from django_filters import rest_framework as filters from django_filters.utils import translate_validation from import_export import fields, resources @@ -41,8 +43,19 @@ def __init__( """Remember init kwargs.""" self._filter_kwargs = filter_kwargs self.resource_init_kwargs: dict[str, typing.Any] = kwargs + self.total_objects_count = 0 + self.current_object_number = 0 super().__init__() + @cached_property + def status_update_row_count(self): + """Rows count after which to update celery task status.""" + return getattr( + self._meta, + "status_update_row_count", + settings.STATUS_UPDATE_ROW_COUNT, + ) + def get_queryset(self): """Filter export queryset via filterset class.""" queryset = super().get_queryset() @@ -194,6 +207,8 @@ def export( """Init task state before exporting.""" if queryset is None: queryset = self.get_queryset() + + queryset = self.filter_export(queryset, **kwargs) self.initialize_task_state( state=TaskState.EXPORTING.name, queryset=queryset, @@ -229,15 +244,15 @@ def initialize_task_state( return if isinstance(queryset, QuerySet): - total = queryset.count() + self.total_objects_count = queryset.count() else: - total = len(queryset) + self.total_objects_count = len(queryset) self._update_current_task_state( state=state, meta=dict( - current=0, - total=total, + current=self.current_object_number, + total=self.total_objects_count, ), ) @@ -254,15 +269,18 @@ def update_task_state( if not current_task or current_task.request.called_directly: return - async_result = result.AsyncResult(current_task.request.get("id")) - - self._update_current_task_state( - state=state, - meta=dict( - current=async_result.result.get("current", 0) + 1, - total=async_result.result.get("total", 0), - ), - ) + self.current_object_number += 1 + if ( + self.current_object_number % self.status_update_row_count == 0 + or self.current_object_number == self.total_objects_count + ): + self._update_current_task_state( + state=state, + meta=dict( + current=self.current_object_number, + total=self.total_objects_count, + ), + ) def _update_current_task_state(self, state: str, meta: dict[str, int]): """Update state of task where resource is executed."""