livekit · weiguangli-io · Mar 4, 2026 · devin-ai-integration · Mar 4, 2026
diff --git a/livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py b/livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py
@@ -540,7 +540,7 @@ def _process_stream_event(self, data: dict) -> None:
             )
             self._event_ch.send_nowait(interim_event)
 
-        if utterance:
+        if utterance and timed_words:
             if self._last_preflight_start_time == 0.0:
                 self._last_preflight_start_time = start_time
 
@@ -555,12 +555,19 @@ def _process_stream_event(self, data: dict) -> None:
                 len(utterance_words), 1
             )
 
+            # Use the cumulative words text (same as INTERIM) instead of the
+            # chunk-based utterance field.  Both INTERIM and PREFLIGHT events
+            # flow through on_interim_transcript in the framework and are
+            # rendered in replacement mode (is_delta_stream=False).  Using the
+            # chunk-based utterance here would cause the displayed text to
+            # regress/jump when the shorter chunk overwrites the longer
+            # cumulative text for the same segment ID.  See #4779.
             final_event = stt.SpeechEvent(
                 type=stt.SpeechEventType.PREFLIGHT_TRANSCRIPT,
                 alternatives=[
                     stt.SpeechData(
                         language=language,
-                        text=utterance,
+                        text=interim_text,
                         start_time=self._last_preflight_start_time,
                         end_time=end_time,
                         words=utterance_words,
-                        text=interim_text,
-                        start_time=self._last_preflight_start_time,
-                        end_time=end_time,
-                        words=utterance_words,
+                        text=interim_text,
+                        start_time=self._last_preflight_start_time,
+                        end_time=end_time,
+                        words=timed_words,
-                        text=interim_text,
-                        start_time=self._last_preflight_start_time,
-                        end_time=end_time,
-                        words=utterance_words,
+                        text=interim_text,
+                        start_time=self._last_preflight_start_time,
+                        end_time=end_time,
+                        words=timed_words,