livekit · C1-BA-B1-F3 · Jun 26, 2026 · Jun 26, 2026 · devin-ai-integration · Jun 26, 2026
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import asyncio
+import os
 import time
 from abc import ABC, abstractmethod
 from collections.abc import AsyncIterable, Awaitable
@@ -18,6 +19,11 @@
 from .chat_context import ChatContext, ChatItem, FunctionCall
 from .tool_context import Tool, ToolChoice, ToolContext
 
+# Default retry configuration for generate_reply
+DEFAULT_MAX_RETRIES = int(os.environ.get("LIVEKIT_REALTIME_MAX_RETRIES", "3"))
+DEFAULT_RETRY_BASE_DELAY = float(os.environ.get("LIVEKIT_REALTIME_RETRY_BASE_DELAY", "1.0"))
+DEFAULT_RETRY_MAX_DELAY = float(os.environ.get("LIVEKIT_REALTIME_RETRY_MAX_DELAY", "10.0"))
+
 
 @dataclass
 class InputSpeechStartedEvent:
@@ -87,8 +93,9 @@ class RealtimeCapabilities:
 
 
 class RealtimeError(Exception):
-    def __init__(self, message: str) -> None:
+    def __init__(self, message: str, *, recoverable: bool = True) -> None:
         super().__init__(message)
+        self.recoverable = recoverable
 
 
 class RealtimeModel:
@@ -223,14 +230,127 @@ def push_audio(self, frame: rtc.AudioFrame) -> None: ...
     @abstractmethod
     def push_video(self, frame: rtc.VideoFrame) -> None: ...
 
-    @abstractmethod
     def generate_reply(
         self,
         *,
         instructions: NotGivenOr[str] = NOT_GIVEN,
         tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN,
         tools: NotGivenOr[list[Tool]] = NOT_GIVEN,
-    ) -> asyncio.Future[GenerationCreatedEvent]: ...  # can raise RealtimeError on Timeout
+    ) -> asyncio.Future[GenerationCreatedEvent]:
+        fut: asyncio.Future[GenerationCreatedEvent] = asyncio.Future()
+        impl_fut = self._do_generate_reply(
+            instructions=instructions,
+            tool_choice=tool_choice,
+            tools=tools,
+        )
+
+        def _on_impl_done(f: asyncio.Future[GenerationCreatedEvent]) -> None:
+            if fut.done():
+                return
+            try:
+                fut.set_result(f.result())
+            except RealtimeError as e:
+                if e.recoverable:
+                    asyncio.ensure_future(
+                        self._retry_generate_reply(
+                            fut=fut,
+                            instructions=instructions,
+                            tool_choice=tool_choice,
+                            tools=tools,
+                            attempt=1,
+                        )
+                    )
+                else:
+                    fut.set_exception(e)
+            except Exception as e:
+                fut.set_exception(e)
-        def _on_impl_done(f: asyncio.Future[GenerationCreatedEvent]) -> None:
-            if fut.done():
-                return
-            try:
-                fut.set_result(f.result())
-            except RealtimeError as e:
-                if e.recoverable:
-                    asyncio.ensure_future(
-                        self._retry_generate_reply(
-                            fut=fut,
-                            instructions=instructions,
-                            tool_choice=tool_choice,
-                            tools=tools,
-                            attempt=1,
-                        )
-                    )
-                else:
-                    fut.set_exception(e)
-            except Exception as e:
-                fut.set_exception(e)
+        def _on_impl_done(f: asyncio.Future[GenerationCreatedEvent]) -> None:
+            if fut.done():
+                return
+            try:
+                fut.set_result(f.result())
+            except RealtimeError as e:
+                if e.recoverable:
+                    asyncio.ensure_future(
+                        self._retry_generate_reply(
+                            fut=fut,
+                            instructions=instructions,
+                            tool_choice=tool_choice,
+                            tools=tools,
+                            attempt=1,
+                        )
+                    )
+                else:
+                    fut.set_exception(e)
+            except asyncio.CancelledError:
+                fut.cancel()
+            except Exception as e:
+                fut.set_exception(e)
-        def _on_impl_done(f: asyncio.Future[GenerationCreatedEvent]) -> None:
-            if fut.done():
-                return
-            try:
-                fut.set_result(f.result())
-            except RealtimeError as e:
-                if e.recoverable:
-                    asyncio.ensure_future(
-                        self._retry_generate_reply(
-                            fut=fut,
-                            instructions=instructions,
-                            tool_choice=tool_choice,
-                            tools=tools,
-                            attempt=1,
-                        )
-                    )
-                else:
-                    fut.set_exception(e)
-            except Exception as e:
-                fut.set_exception(e)
+        def _on_impl_done(f: asyncio.Future[GenerationCreatedEvent]) -> None:
+            if fut.done():
+                return
+            try:
+                fut.set_result(f.result())
+            except RealtimeError as e:
+                if e.recoverable:
+                    asyncio.ensure_future(
+                        self._retry_generate_reply(
+                            fut=fut,
+                            instructions=instructions,
+                            tool_choice=tool_choice,
+                            tools=tools,
+                            attempt=1,
+                        )
+                    )
+                else:
+                    fut.set_exception(e)
+            except asyncio.CancelledError:
+                fut.cancel()
+            except Exception as e:
+                fut.set_exception(e)
+
+        impl_fut.add_done_callback(_on_impl_done)
+        return fut
+
+    async def _retry_generate_reply(
+        self,
+        *,
+        fut: asyncio.Future[GenerationCreatedEvent],
+        instructions: NotGivenOr[str],
+        tool_choice: NotGivenOr[ToolChoice],
+        tools: NotGivenOr[list[Tool]],
+        attempt: int,
+    ) -> None:
+        max_retries = DEFAULT_MAX_RETRIES
+        base_delay = DEFAULT_RETRY_BASE_DELAY
+        max_delay = DEFAULT_RETRY_MAX_DELAY
+
+        if attempt > max_retries:
+            if not fut.done():
+                fut.set_exception(
+                    RealtimeError(
+                        f"generate_reply failed after {max_retries} retries",
+                        recoverable=False,
+                    )
+                )
+            return
+
+        delay = min(base_delay * (2 ** (attempt - 1)), max_delay)
+        logger.warning(
+            "generate_reply failed (recoverable), retrying in %.1fs (attempt %d/%d)",
+            delay,
+            attempt,
+            max_retries,
+        )
+        await asyncio.sleep(delay)
+
+        if fut.done():
+            return
+
+        try:
+            impl_fut = self._do_generate_reply(
+                instructions=instructions,
+                tool_choice=tool_choice,
+                tools=tools,
+            )
+
+            def _on_retry_done(f: asyncio.Future[GenerationCreatedEvent]) -> None:
+                if fut.done():
+                    return
+                try:
+                    fut.set_result(f.result())
+                except RealtimeError as e:
+                    if e.recoverable and attempt < max_retries:
+                        asyncio.ensure_future(
+                            self._retry_generate_reply(
+                                fut=fut,
+                                instructions=instructions,
+                                tool_choice=tool_choice,
+                                tools=tools,
+                                attempt=attempt + 1,
+                            )
+                        )
+                    elif e.recoverable:
+                        fut.set_exception(
+                            RealtimeError(
+                                f"generate_reply failed after {max_retries} retries",
+                                recoverable=False,
+                            )
+                        )
+                    else:
+                        fut.set_exception(e)
+                except Exception as e:
+                    fut.set_exception(e)
-            def _on_retry_done(f: asyncio.Future[GenerationCreatedEvent]) -> None:
-                if fut.done():
-                    return
-                try:
-                    fut.set_result(f.result())
-                except RealtimeError as e:
-                    if e.recoverable and attempt < max_retries:
-                        asyncio.ensure_future(
-                            self._retry_generate_reply(
-                                fut=fut,
-                                instructions=instructions,
-                                tool_choice=tool_choice,
-                                tools=tools,
-                                attempt=attempt + 1,
-                            )
-                        )
-                    elif e.recoverable:
-                        fut.set_exception(
-                            RealtimeError(
-                                f"generate_reply failed after {max_retries} retries",
-                                recoverable=False,
-                            )
-                        )
-                    else:
-                        fut.set_exception(e)
-                except Exception as e:
-                    fut.set_exception(e)
+            def _on_retry_done(f: asyncio.Future[GenerationCreatedEvent]) -> None:
+                if fut.done():
+                    return
+                try:
+                    fut.set_result(f.result())
+                except RealtimeError as e:
+                    if e.recoverable and attempt < max_retries:
+                        asyncio.ensure_future(
+                            self._retry_generate_reply(
+                                fut=fut,
+                                instructions=instructions,
+                                tool_choice=tool_choice,
+                                tools=tools,
+                                attempt=attempt + 1,
+                            )
+                        )
+                    elif e.recoverable:
+                        fut.set_exception(
+                            RealtimeError(
+                                f"generate_reply failed after {max_retries} retries",
+                                recoverable=False,
+                            )
+                        )
+                    else:
+                        fut.set_exception(e)
+                except asyncio.CancelledError:
+                    fut.cancel()
+                except Exception as e:
+                    fut.set_exception(e)
-            def _on_retry_done(f: asyncio.Future[GenerationCreatedEvent]) -> None:
-                if fut.done():
-                    return
-                try:
-                    fut.set_result(f.result())
-                except RealtimeError as e:
-                    if e.recoverable and attempt < max_retries:
-                        asyncio.ensure_future(
-                            self._retry_generate_reply(
-                                fut=fut,
-                                instructions=instructions,
-                                tool_choice=tool_choice,
-                                tools=tools,
-                                attempt=attempt + 1,
-                            )
-                        )
-                    elif e.recoverable:
-                        fut.set_exception(
-                            RealtimeError(
-                                f"generate_reply failed after {max_retries} retries",
-                                recoverable=False,
-                            )
-                        )
-                    else:
-                        fut.set_exception(e)
-                except Exception as e:
-                    fut.set_exception(e)
+            def _on_retry_done(f: asyncio.Future[GenerationCreatedEvent]) -> None:
+                if fut.done():
+                    return
+                try:
+                    fut.set_result(f.result())
+                except RealtimeError as e:
+                    if e.recoverable and attempt < max_retries:
+                        asyncio.ensure_future(
+                            self._retry_generate_reply(
+                                fut=fut,
+                                instructions=instructions,
+                                tool_choice=tool_choice,
+                                tools=tools,
+                                attempt=attempt + 1,
+                            )
+                        )
+                    elif e.recoverable:
+                        fut.set_exception(
+                            RealtimeError(
+                                f"generate_reply failed after {max_retries} retries",
+                                recoverable=False,
+                            )
+                        )
+                    else:
+                        fut.set_exception(e)
+                except asyncio.CancelledError:
+                    fut.cancel()
+                except Exception as e:
+                    fut.set_exception(e)
+
+            impl_fut.add_done_callback(_on_retry_done)
+        except Exception as e:
+            if not fut.done():
+                fut.set_exception(e)
+
+    @abstractmethod
+    def _do_generate_reply(
+        self,
+        *,
+        instructions: NotGivenOr[str] = NOT_GIVEN,
+        tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN,
+        tools: NotGivenOr[list[Tool]] = NOT_GIVEN,
+    ) -> asyncio.Future[GenerationCreatedEvent]: ...
 
     # commit the input audio buffer to the server
     @abstractmethod

@@ -2024,7 +2024,7 @@ def push_audio(self, frame: rtc.AudioFrame) -> None:
         else:
             logger.warning("audio input channel closed, skipping audio")
 
-    def generate_reply(
+    def _do_generate_reply(
         self,
         *,
         instructions: NotGivenOr[str] = NOT_GIVEN,
@@ -2132,7 +2132,10 @@ async def _send_text() -> None:
             def _on_timeout() -> None:
                 if not fut.done():
                     fut.set_exception(
-                        llm.RealtimeError("generate_reply timed out waiting for generation")
+                        llm.RealtimeError(
+                            "generate_reply timed out waiting for generation",
+                            recoverable=True,
+                        )
                     )
                     if self._pending_generation_fut is fut:
                         self._pending_generation_fut = None

@@ -722,7 +722,7 @@ def _send_client_event(self, event: ClientEvents) -> None:
         with contextlib.suppress(utils.aio.channel.ChanClosed):
             self._msg_ch.send_nowait(event)
 
-    def generate_reply(
+    def _do_generate_reply(
         self,
         *,
         instructions: NotGivenOr[str] = NOT_GIVEN,
@@ -737,7 +737,10 @@ def generate_reply(
             )
             fut = asyncio.Future[llm.GenerationCreatedEvent]()
             fut.set_exception(
-                llm.RealtimeError(f"generate_reply is not compatible with '{self._opts.model}'")
+                llm.RealtimeError(
+                    f"generate_reply is not compatible with '{self._opts.model}'",
+                    recoverable=False,
+                )
             )
             return fut
         if self._pending_generation_fut and not self._pending_generation_fut.done():
@@ -773,7 +776,8 @@ def _on_timeout() -> None:
             if not fut.done():
                 fut.set_exception(
                     llm.RealtimeError(
-                        "generate_reply timed out waiting for generation_created event."
+                        "generate_reply timed out waiting for generation_created event.",
+                        recoverable=True,
                     )
                 )
                 if self._pending_generation_fut is fut:

@@ -229,7 +229,7 @@ def push_video(self, frame: rtc.VideoFrame) -> None:
 
     # -- Public API: generation control --
 
-    def generate_reply(
+    def _do_generate_reply(
         self,
         *,
         instructions: NotGivenOr[str] = NOT_GIVEN,

@@ -1551,7 +1551,7 @@ def clear_audio(self) -> None:
         self.send_event(InputAudioBufferClearEvent(type="input_audio_buffer.clear"))
         self._pushed_duration_s = 0
 
-    def generate_reply(
+    def _do_generate_reply(
         self,
         *,
         instructions: NotGivenOr[str] = NOT_GIVEN,
@@ -1583,7 +1583,7 @@ def generate_reply(
         def _on_timeout() -> None:
             self._response_created_futures.pop(event_id, None)
             if fut and not fut.done():
-                fut.set_exception(llm.RealtimeError("generate_reply timed out."))
+                fut.set_exception(llm.RealtimeError("generate_reply timed out.", recoverable=True))
 
         handle = asyncio.get_event_loop().call_later(10.0, _on_timeout)
 
@@ -1994,11 +1994,16 @@ def _handle_response_done(self, event: ResponseDoneEvent) -> None:
             if event.response.status in ("failed", "incomplete"):
                 details = event.response.status_details
                 msg = f"response {event.response.status}"
+                recoverable = True
                 if details and details.error:
                     msg = f"{msg}: [{details.error.type}] {details.error.code}"
+                    if details.error.code == "rate_limit_exceeded":
+                        recoverable = False
-                    if details.error.code == "rate_limit_exceeded":
-                        recoverable = False
+                    if details.error.code == "rate_limit_exceeded":
+                        recoverable = True
-                    if details.error.code == "rate_limit_exceeded":
-                        recoverable = False
+                    if details.error.code == "rate_limit_exceeded":
+                        recoverable = True
                 elif details and details.reason:
                     msg = f"{msg}: {details.reason}"
-                self._current_generation._done_fut.set_exception(llm.RealtimeError(msg))
+                self._current_generation._done_fut.set_exception(
+                    llm.RealtimeError(msg, recoverable=recoverable)
+                )
             else:
                 self._current_generation._done_fut.set_result(None)
 

@@ -608,7 +608,7 @@ async def _send_say(
                 )
             )
 
-    def generate_reply(
+    def _do_generate_reply(
         self,
         *,
         instructions: NotGivenOr[str] = NOT_GIVEN,
@@ -639,7 +639,7 @@ def generate_reply(
 
         def _on_timeout() -> None:
             if not fut.done():
-                fut.set_exception(llm.RealtimeError("generate_reply timed out."))
+                fut.set_exception(llm.RealtimeError("generate_reply timed out.", recoverable=True))
 
         handle = asyncio.get_event_loop().call_later(10.0, _on_timeout)
 

@@ -475,7 +475,7 @@ def _send_audio_bytes(self, audio_data: bytes) -> None:
             self._msg_ch.send_nowait(audio_data)
 
     @utils.log_exceptions(logger=logger)
-    def generate_reply(
+    def _do_generate_reply(
         self,
         *,
         instructions: NotGivenOr[str] = NOT_GIVEN,
@@ -518,7 +518,8 @@ def _on_timeout() -> None:
             if not fut.done():
                 fut.set_exception(
                     llm.RealtimeError(
-                        "generate_reply timed out waiting for generation_created event."
+                        "generate_reply timed out waiting for generation_created event.",
+                        recoverable=True,
                     )
                 )
                 if self._pending_generation_fut is fut: