Delete results immediately, not in two steps (celery#118)

asfaltboy · auvipy · commit 2bacd29fb8fc · 2019-10-28T09:42:50.000+06:00
* Delete results immediately, not in two steps Takes inspiration from the benchmark test in the related ticket, to remove the slowest running operation in the cleanup task. The same, direct deletion operation, is done in the built-in database backend of celery: https://github.com/celery/celery/blob/c780e3a954579ee5b7243b9cb7444e44a6398d5b/celery/backends/database/__init__.py#L207-L217 To test, creates some records, and run clean up task: def create_old_results(records=200000, age=5): """ takes 40s to create 200k old records on my machine "" from datetime import datetime, timedelta from django.db import connection from celery import states, uuid from django_celery_results.models import * old_date = datetime.now() - timedelta(days=age) bunch = [TaskResult(task_id=uuid()) for _ in range(records)] created = TaskResult.objects.bulk_create(bunch) TaskResult.objects.filter(id__gte=created[0].id).update(date_done=old_date) Before the modification > Task celery.backend_cleanup[4cd0ad59-f241-4d3b-b848-45dfc9564725] succeeded in 8.506982273000176s: None After the modification > Task celery.backend_cleanup[069fe103-f894-4b96-ad1a-720a6bbbec6f] succeeded in 0.3088349770041532s: None fixes celery#117 * Use the new pytest execution form #dropthedot Ref: https://stackoverflow.com/a/41893170/484127 * Add benchmark for delete_expired in test suite * Remove the TaskResult.hidden field
diff --git a/.travis.yml b/.travis.yml
@@ -27,6 +27,7 @@ matrix:
     - { python: 2.7, env: TOXENV=flakeplus }
     - { python: 3.6, env: TOXENV=pydocstyle }
     - { python: 3.6, env: TOXENV=cov }
+    - { python: 3.6, env: TOXENV=integration }
     # disabled temporarily due to upstream bug
     # https://github.com/celery/sphinx_celery/issues/9
     # - { python: 3.5, env: TOXENV=apicheck }
diff --git a/Makefile b/Makefile
@@ -1,7 +1,7 @@
 PROJ=django_celery_results
 PGPIDENT="Celery Security Team"
 PYTHON=python
-PYTEST=py.test
+PYTEST=pytest
 GIT=git
 TOX=tox
 ICONV=iconv
@@ -141,7 +141,7 @@ test:
 	$(PYTHON) setup.py test
 
 cov: covbuild
-	(cd $(TESTDIR); py.test -x --cov=django_celery_results --cov-report=html)
+	(cd $(TESTDIR); pytest -x --cov=django_celery_results --cov-report=html)
 
 build:
 	$(PYTHON) setup.py sdist bdist_wheel
diff --git a/conftest.py b/conftest.py
@@ -0,0 +1,38 @@
+import pytest
+
+
+def pytest_addoption(parser):
+    parser.addoption(
+        '-B',
+        '--run-benchmarks',
+        action='store_true',
+        default=False,
+        help='run benchmarks',
+    )
+
+
+def pytest_runtest_setup(item):
+    """
+    Skip tests marked benchmark unless --run-benchmark is given to pytest
+    """
+    run_benchmarks = item.config.getoption('--run-benchmarks')
+
+    is_benchmark = any(item.iter_markers(name="benchmark"))
+
+    if is_benchmark:
+        if run_benchmarks:
+            return
+
+        pytest.skip(
+            'need --run-benchmarks to run benchmarks'
+        )
+
+
+def pytest_collection_modifyitems(items):
+    """
+    Add the "benchmark" mark to tests that start with "benchmark_".
+    """
+    for item in items:
+        test_class_name = item.cls.__name__
+        if test_class_name.startswith("benchmark_"):
+            item.add_marker(pytest.mark.benchmark)
diff --git a/django_celery_results/admin.py b/django_celery_results/admin.py
@@ -22,7 +22,7 @@ class TaskResultAdmin(admin.ModelAdmin):
     date_hierarchy = 'date_done'
     list_display = ('task_id', 'task_name', 'date_done', 'status', 'worker')
     list_filter = ('status', 'date_done', 'task_name',)
-    readonly_fields = ('date_created', 'date_done', 'result', 'hidden', 'meta')
+    readonly_fields = ('date_created', 'date_done', 'result', 'meta')
     search_fields = ('task_name', 'task_id', 'status')
     fieldsets = (
         (None, {
@@ -49,7 +49,6 @@ class TaskResultAdmin(admin.ModelAdmin):
                 'date_created',
                 'date_done',
                 'traceback',
-                'hidden',
                 'meta',
             ),
             'classes': ('extrapretty', 'wide')
diff --git a/django_celery_results/managers.py b/django_celery_results/managers.py
@@ -173,5 +173,4 @@ def get_all_expired(self, expires):
     def delete_expired(self, expires):
         """Delete all expired results."""
         with transaction.atomic():
-            self.get_all_expired(expires).update(hidden=True)
-            self.filter(hidden=True).delete()
+            self.get_all_expired(expires).delete()
diff --git a/django_celery_results/migrations/0007_remove_taskresult_hidden.py b/django_celery_results/migrations/0007_remove_taskresult_hidden.py
@@ -0,0 +1,23 @@
+# -*- coding: utf-8 -*-
+# Generated by Django 2.2.6 on 2019-10-27 11:29
+
+# this file is auto-generated so don't do flake8 on it
+# flake8: noqa
+
+from __future__ import absolute_import, unicode_literals
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('django_celery_results', '0006_taskresult_date_created'),
+    ]
+
+    operations = [
+        migrations.RemoveField(
+            model_name='taskresult',
+            name='hidden',
+        ),
+    ]
diff --git a/django_celery_results/models.py b/django_celery_results/models.py
@@ -75,11 +75,6 @@ class TaskResult(models.Model):
         blank=True, null=True,
         verbose_name=_('Traceback'),
         help_text=_('Text of the traceback if the task generated one'))
-    hidden = models.BooleanField(
-        editable=False, default=False, db_index=True,
-        verbose_name=_('Hidden'),
-        help_text=_('Soft Delete flag that can be used '
-                    'instead of full delete'))
     meta = models.TextField(
         null=True, default=None, editable=False,
         verbose_name=_('Task Meta Information'),
diff --git a/requirements/test.txt b/requirements/test.txt
@@ -1,5 +1,6 @@
 case>=1.3.1
 pytest>=4.3
 pytest-django
+pytest-benchmark
 pytz>dev
 psycopg2cffi
diff --git a/setup.cfg b/setup.cfg
@@ -1,7 +1,10 @@
 [tool:pytest]
-testpaths = t/unit
+testpaths = t/
 python_classes = test_*
-DJANGO_SETTINGS_MODULE=t.proj.settings
+python_files = test_* benchmark_*
+DJANGO_SETTINGS_MODULE = t.proj.settings
+markers =
+    benchmark: mark a test as a benchmark
 
 [flake8]
 # classes can be lowercase, arguments and variables can be uppercase
diff --git a/setup.py b/setup.py
@@ -121,7 +121,7 @@ def reqs(*f):
 
 
 class pytest(setuptools.command.test.test):
-    user_options = [('pytest-args=', 'a', 'Arguments to pass to py.test')]
+    user_options = [('pytest-args=', 'a', 'Arguments to pass to pytest')]
 
     def initialize_options(self):
         setuptools.command.test.test.initialize_options(self)
diff --git a/t/conftest.py b/t/conftest.py
@@ -2,11 +2,8 @@
 
 import pytest
 
-from celery.contrib.pytest import depends_on_current_app
 from celery.contrib.testing.app import TestApp, Trap
 
-__all__ = ['app', 'depends_on_current_app']
-
 
 @pytest.fixture(scope='session', autouse=True)
 def setup_default_app_trap():
diff --git a/t/integration/__init__.py b/t/integration/__init__.py
diff --git a/t/integration/benchmark_models.py b/t/integration/benchmark_models.py
@@ -0,0 +1,75 @@
+from __future__ import absolute_import, unicode_literals
+
+import pytest
+
+from datetime import timedelta
+import time
+
+from django.test import TransactionTestCase
+
+from celery import uuid
+
+from django_celery_results.models import TaskResult
+from django_celery_results.utils import now
+
+RECORDS_COUNT = 100000
+
+
+@pytest.fixture()
+def use_benchmark(request, benchmark):
+    def wrapped(a=10, b=5):
+        return a + b
+    request.cls.benchmark = benchmark
+
+
+@pytest.mark.usefixtures('use_benchmark')
+@pytest.mark.usefixtures('depends_on_current_app')
+class benchmark_Models(TransactionTestCase):
+    multi_db = True
+
+    @pytest.fixture(autouse=True)
+    def setup_app(self, app):
+        self.app = app
+        self.app.conf.result_serializer = 'pickle'
+        self.app.conf.result_backend = (
+            'django_celery_results.backends:DatabaseBackend')
+
+    def create_many_task_result(self, count):
+        start = time.time()
+        draft_results = [TaskResult(task_id=uuid()) for _ in range(count)]
+        drafted = time.time()
+        results = TaskResult.objects.bulk_create(draft_results)
+        done_creating = time.time()
+
+        print((
+            'drafting time: {drafting:.2f}\n'
+            'bulk_create time: {done:.2f}\n'
+            '------'
+        ).format(drafting=drafted - start, done=done_creating - drafted))
+        return results
+
+    def setup_records_to_delete(self):
+        self.create_many_task_result(count=RECORDS_COUNT)
+        mid_point = TaskResult.objects.order_by('id')[int(RECORDS_COUNT / 2)]
+        todelete = TaskResult.objects.filter(id__gte=mid_point.id)
+        todelete.update(date_done=now() - timedelta(days=10))
+
+    def test_taskresult_delete_expired(self):
+        start = time.time()
+        self.setup_records_to_delete()
+        after_setup = time.time()
+        self.benchmark.pedantic(
+            TaskResult.objects.delete_expired,
+            args=(self.app.conf.result_expires,),
+            iterations=1,
+            rounds=1,
+        )
+        done = time.time()
+        assert TaskResult.objects.count() == int(RECORDS_COUNT / 2)
+
+        print((
+            '------'
+            'setup time: {setup:.2f}\n'
+            'bench time: {bench:.2f}\n'
+        ).format(setup=after_setup - start, bench=done - after_setup))
+        assert self.benchmark.stats.stats.max < 1
diff --git a/tox.ini b/tox.ini
@@ -12,6 +12,7 @@ envlist =
     apicheck
     pydocstyle
     cov
+    integration
 
 [travis:env]
 DJANGO =
@@ -29,7 +30,7 @@ deps=
     django21: -r{toxinidir}/requirements/test-django21.txt
     django22: -r{toxinidir}/requirements/test-django22.txt
 
-    cov: -r{toxinidir}/requirements/test-django.txt
+    cov,integration: -r{toxinidir}/requirements/test-django.txt
 
     linkcheck,apicheck: -r{toxinidir}/requirements/docs.txt
     flake8,flakeplus,pydocstyle: -r{toxinidir}/requirements/pkgutils.txt
@@ -38,7 +39,7 @@ recreate = True
 commands =
     pip install -U celery==4.3
     pip install -U kombu==4.5
-    py.test -xv
+    pytest -xv
 
 [testenv:apicheck]
 commands =
@@ -64,4 +65,10 @@ commands =
 usedevelop = true
 commands = pip install -U celery==4.3
            pip install -U kombu==4.5
-           py.test --cov=django_celery_results --cov-report=xml --no-cov-on-fail
+           pytest --cov=django_celery_results --cov-report=xml --no-cov-on-fail
+
+[testenv:integration]
+commands =
+    pip install -U celery==4.3
+    pip install -U kombu==4.5
+    pytest -B -xv