Skip to content

Add async task background worker #4591

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 9 commits into
base: srothh/worker-class-hierarchy
Choose a base branch
from
127 changes: 127 additions & 0 deletions sentry_sdk/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
from abc import ABC, abstractmethod
import os
import threading
import asyncio
import inspect

from time import sleep, time
from sentry_sdk._queue import Queue, FullError
Expand Down Expand Up @@ -170,3 +172,128 @@ def _target(self) -> None:
finally:
self._queue.task_done()
sleep(0)


class AsyncWorker(Worker):
def __init__(self, queue_size: int = DEFAULT_QUEUE_SIZE) -> None:
self._queue: asyncio.Queue[Any] = asyncio.Queue(queue_size)
self._task: Optional[asyncio.Task[None]] = None
# Event loop needs to remain in the same process
self._task_for_pid: Optional[int] = None
self._loop: Optional[asyncio.AbstractEventLoop] = None
# Track active callback tasks so they have a strong reference and can be cancelled on kill
self._active_tasks: set[asyncio.Task[None]] = set()

@property
def is_alive(self) -> bool:
if self._task_for_pid != os.getpid():
return False
if not self._task or not self._loop:
return False
return self._loop.is_running() and not self._task.done()

def kill(self) -> None:
if self._task:
try:
self._queue.put_nowait(_TERMINATOR)
except asyncio.QueueFull:
logger.debug("async worker queue full, kill failed")
# Also cancel any active callback tasks
# Avoid modifying the set while cancelling tasks
tasks_to_cancel = set(self._active_tasks)
for task in tasks_to_cancel:
task.cancel()
self._active_tasks.clear()
self._loop = None
self._task = None
self._task_for_pid = None

def start(self) -> None:
if not self.is_alive:
try:
self._loop = asyncio.get_running_loop()
self._task = self._loop.create_task(self._target())
self._task_for_pid = os.getpid()
except RuntimeError:
# There is no event loop running
self._loop = None
self._task = None
self._task_for_pid = None

def full(self) -> bool:
return self._queue.full()

def _ensure_task(self) -> None:
if not self.is_alive:
self.start()

async def _wait_flush(self, timeout: float, callback: Optional[Any] = None) -> None:
if not self._loop or not self._loop.is_running():
return

initial_timeout = min(0.1, timeout)

# Timeout on the join
try:
await asyncio.wait_for(self._queue.join(), timeout=initial_timeout)
except asyncio.TimeoutError:
pending = self._queue.qsize() + 1
logger.debug("%d event(s) pending on flush", pending)
if callback is not None:
callback(pending, timeout)

try:
remaining_timeout = timeout - initial_timeout
await asyncio.wait_for(self._queue.join(), timeout=remaining_timeout)
except asyncio.TimeoutError:
pending = self._queue.qsize() + 1
logger.error("flush timed out, dropped %s events", pending)

async def flush_async(self, timeout: float, callback: Optional[Any] = None) -> None:
logger.debug("background worker got flush request")
if self.is_alive and timeout > 0.0:
await self._wait_flush(timeout, callback)
logger.debug("background worker flushed")

def submit(self, callback: Callable[[], Any]) -> bool:
self._ensure_task()

try:
self._queue.put_nowait(callback)
return True
except asyncio.QueueFull:
return False

async def _target(self) -> None:
while True:
callback = await self._queue.get()
if callback is _TERMINATOR:
self._queue.task_done()
break
# Firing tasks instead of awaiting them allows for concurrent requests
task = asyncio.create_task(self._process_callback(callback))
# Create a strong reference to the task so it can be cancelled on kill
# and does not get garbage collected while running
self._active_tasks.add(task)
task.add_done_callback(self._on_task_complete)
# Yield to let the event loop run other tasks
await asyncio.sleep(0)

async def _process_callback(self, callback: Callable[[], Any]) -> None:
if inspect.iscoroutinefunction(callback):
# Callback is an async coroutine, need to await it
await callback()
else:
# Callback is a sync function, need to call it
callback()

def _on_task_complete(self, task: asyncio.Task[None]) -> None:
try:
task.result()
except Exception:
logger.error("Failed processing job", exc_info=True)
finally:
# Mark the task as done and remove it from the active tasks set
# This happens only after the task has completed
self._queue.task_done()
self._active_tasks.discard(task)
Loading