livekit · IanSteno · Mar 4, 2026 · Mar 4, 2026
diff --git a/livekit-plugins/livekit-plugins-elevenlabs/livekit/plugins/elevenlabs/tts.py b/livekit-plugins/livekit-plugins-elevenlabs/livekit/plugins/elevenlabs/tts.py
@@ -129,7 +129,9 @@ def __init__(
             base_url (NotGivenOr[str]): Custom base URL for the API. Optional.
             streaming_latency (NotGivenOr[int]): Optimize for streaming latency, defaults to 0 - disabled. 4 for max latency optimizations. deprecated
             inactivity_timeout (int): Inactivity timeout in seconds for the websocket connection. Defaults to 300.
-            auto_mode (bool): Reduces latency by disabling chunk schedule and buffers. Sentence tokenizer will be used to synthesize one sentence at a time. Defaults to True.
+            auto_mode (bool): Reduces latency by disabling chunk schedule and buffers.
+                Sentence tokenizer will be used to synthesize one sentence at a time.
+                Defaults to True unless ``chunk_length_schedule`` is provided.
             word_tokenizer (NotGivenOr[tokenize.WordTokenizer | tokenize.SentenceTokenizer]): Tokenizer for processing text. Defaults to basic WordTokenizer when auto_mode=False, `livekit.agents.tokenize.blingfire.SentenceTokenizer` otherwise.
             enable_ssml_parsing (bool): Enable SSML parsing for input text. Defaults to False.
             enable_logging (bool): Enable logging of the request. When set to false, zero retention mode will be used. Defaults to True.
@@ -160,7 +162,7 @@ def __init__(
             )
 
         if not is_given(auto_mode):
-            auto_mode = True
+            auto_mode = not is_given(chunk_length_schedule)
 
         if not is_given(word_tokenizer):
             word_tokenizer = (
@@ -497,6 +499,32 @@ class _TTSOptions:
     pronunciation_dictionary_locators: NotGivenOr[list[PronunciationDictionaryLocator]]
 
 
+def _build_context_init_packet(opts: _TTSOptions, *, context_id: str) -> dict[str, Any]:
+    voice_settings = (
+        _strip_nones(dataclasses.asdict(opts.voice_settings))
+        if is_given(opts.voice_settings)
+        else {}
+    )
+    init_pkt: dict[str, Any] = {
+        "text": " ",
+        "voice_settings": voice_settings,
+        "context_id": context_id,
+    }
+    if is_given(opts.chunk_length_schedule):
+        init_pkt["generation_config"] = {
+            "chunk_length_schedule": opts.chunk_length_schedule,
+        }
+    if is_given(opts.pronunciation_dictionary_locators):
+        init_pkt["pronunciation_dictionary_locators"] = [
+            {
+                "pronunciation_dictionary_id": locator.pronunciation_dictionary_id,
+                "version_id": locator.version_id,
+            }
+            for locator in opts.pronunciation_dictionary_locators
+        ]
+    return init_pkt
+
+
 @dataclass
 class _SynthesizeContent:
     context_id: str
@@ -595,24 +623,10 @@ async def _send_loop(self) -> None:
                     is_new_context = msg.context_id not in self._active_contexts
 
                     if is_new_context:
-                        voice_settings = (
-                            _strip_nones(dataclasses.asdict(self._opts.voice_settings))
-                            if is_given(self._opts.voice_settings)
-                            else {}
+                        init_pkt = _build_context_init_packet(
+                            self._opts,
+                            context_id=msg.context_id,
                         )
-                        init_pkt: dict[str, Any] = {
-                            "text": " ",
-                            "voice_settings": voice_settings,
-                            "context_id": msg.context_id,
-                        }
-                        if is_given(self._opts.pronunciation_dictionary_locators):
-                            init_pkt["pronunciation_dictionary_locators"] = [
-                                {
-                                    "pronunciation_dictionary_id": locator.pronunciation_dictionary_id,
-                                    "version_id": locator.version_id,
-                                }
-                                for locator in self._opts.pronunciation_dictionary_locators
-                            ]
                         await self._ws.send_json(init_pkt)
                         self._active_contexts.add(msg.context_id)
 

diff --git a/tests/test_plugin_elevenlabs_tts.py b/tests/test_plugin_elevenlabs_tts.py
@@ -0,0 +1,64 @@
+"""Unit tests for ElevenLabs TTS plugin configuration behavior."""
+
+from livekit.plugins.elevenlabs import tts as elevenlabs_tts
+
+
+def test_auto_mode_defaults_to_true_without_chunk_length_schedule() -> None:
+    tts = elevenlabs_tts.TTS(api_key="test-key")
+    assert tts._opts.auto_mode is True
+
+
+def test_auto_mode_defaults_to_false_with_chunk_length_schedule() -> None:
+    tts = elevenlabs_tts.TTS(api_key="test-key", chunk_length_schedule=[120, 160, 250, 290])
+    assert tts._opts.auto_mode is False
+
+
+def test_auto_mode_respects_explicit_value_with_chunk_length_schedule() -> None:
+    tts = elevenlabs_tts.TTS(
+        api_key="test-key",
+        chunk_length_schedule=[120, 160, 250, 290],
+        auto_mode=True,
+    )
+    assert tts._opts.auto_mode is True
+
+
+def test_build_context_init_packet_includes_generation_config() -> None:
+    tts = elevenlabs_tts.TTS(api_key="test-key", chunk_length_schedule=[80, 120], auto_mode=False)
+    packet = elevenlabs_tts._build_context_init_packet(  # pyright: ignore[reportPrivateUsage]
+        tts._opts, context_id="ctx-1"
+    )
+
+    assert packet["text"] == " "
+    assert packet["context_id"] == "ctx-1"
+    assert packet["generation_config"] == {"chunk_length_schedule": [80, 120]}
+
+
+def test_build_context_init_packet_omits_generation_config_when_not_set() -> None:
+    tts = elevenlabs_tts.TTS(api_key="test-key")
+    packet = elevenlabs_tts._build_context_init_packet(  # pyright: ignore[reportPrivateUsage]
+        tts._opts, context_id="ctx-2"
+    )
+
+    assert "generation_config" not in packet
+
+
+def test_build_context_init_packet_includes_pronunciation_dictionaries() -> None:
+    tts = elevenlabs_tts.TTS(
+        api_key="test-key",
+        pronunciation_dictionary_locators=[
+            elevenlabs_tts.PronunciationDictionaryLocator(
+                pronunciation_dictionary_id="dict-1",
+                version_id="v1",
+            )
+        ],
+    )
+    packet = elevenlabs_tts._build_context_init_packet(  # pyright: ignore[reportPrivateUsage]
+        tts._opts, context_id="ctx-3"
+    )
+
+    assert packet["pronunciation_dictionary_locators"] == [
+        {
+            "pronunciation_dictionary_id": "dict-1",
+            "version_id": "v1",
+        }
+    ]