Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,9 @@ def __init__(
base_url (NotGivenOr[str]): Custom base URL for the API. Optional.
streaming_latency (NotGivenOr[int]): Optimize for streaming latency, defaults to 0 - disabled. 4 for max latency optimizations. deprecated
inactivity_timeout (int): Inactivity timeout in seconds for the websocket connection. Defaults to 300.
auto_mode (bool): Reduces latency by disabling chunk schedule and buffers. Sentence tokenizer will be used to synthesize one sentence at a time. Defaults to True.
auto_mode (bool): Reduces latency by disabling chunk schedule and buffers.
Sentence tokenizer will be used to synthesize one sentence at a time.
Defaults to True unless ``chunk_length_schedule`` is provided.
word_tokenizer (NotGivenOr[tokenize.WordTokenizer | tokenize.SentenceTokenizer]): Tokenizer for processing text. Defaults to basic WordTokenizer when auto_mode=False, `livekit.agents.tokenize.blingfire.SentenceTokenizer` otherwise.
enable_ssml_parsing (bool): Enable SSML parsing for input text. Defaults to False.
enable_logging (bool): Enable logging of the request. When set to false, zero retention mode will be used. Defaults to True.
Expand Down Expand Up @@ -160,7 +162,7 @@ def __init__(
)

if not is_given(auto_mode):
auto_mode = True
auto_mode = not is_given(chunk_length_schedule)

if not is_given(word_tokenizer):
word_tokenizer = (
Expand Down Expand Up @@ -497,6 +499,32 @@ class _TTSOptions:
pronunciation_dictionary_locators: NotGivenOr[list[PronunciationDictionaryLocator]]


def _build_context_init_packet(opts: _TTSOptions, *, context_id: str) -> dict[str, Any]:
voice_settings = (
_strip_nones(dataclasses.asdict(opts.voice_settings))
if is_given(opts.voice_settings)
else {}
)
init_pkt: dict[str, Any] = {
"text": " ",
"voice_settings": voice_settings,
"context_id": context_id,
}
if is_given(opts.chunk_length_schedule):
init_pkt["generation_config"] = {
"chunk_length_schedule": opts.chunk_length_schedule,
}
if is_given(opts.pronunciation_dictionary_locators):
init_pkt["pronunciation_dictionary_locators"] = [
{
"pronunciation_dictionary_id": locator.pronunciation_dictionary_id,
"version_id": locator.version_id,
}
for locator in opts.pronunciation_dictionary_locators
]
return init_pkt


@dataclass
class _SynthesizeContent:
context_id: str
Expand Down Expand Up @@ -595,24 +623,10 @@ async def _send_loop(self) -> None:
is_new_context = msg.context_id not in self._active_contexts

if is_new_context:
voice_settings = (
_strip_nones(dataclasses.asdict(self._opts.voice_settings))
if is_given(self._opts.voice_settings)
else {}
init_pkt = _build_context_init_packet(
self._opts,
context_id=msg.context_id,
)
init_pkt: dict[str, Any] = {
"text": " ",
"voice_settings": voice_settings,
"context_id": msg.context_id,
}
if is_given(self._opts.pronunciation_dictionary_locators):
init_pkt["pronunciation_dictionary_locators"] = [
{
"pronunciation_dictionary_id": locator.pronunciation_dictionary_id,
"version_id": locator.version_id,
}
for locator in self._opts.pronunciation_dictionary_locators
]
await self._ws.send_json(init_pkt)
self._active_contexts.add(msg.context_id)

Expand Down
64 changes: 64 additions & 0 deletions tests/test_plugin_elevenlabs_tts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
"""Unit tests for ElevenLabs TTS plugin configuration behavior."""

from livekit.plugins.elevenlabs import tts as elevenlabs_tts


def test_auto_mode_defaults_to_true_without_chunk_length_schedule() -> None:
tts = elevenlabs_tts.TTS(api_key="test-key")
assert tts._opts.auto_mode is True


def test_auto_mode_defaults_to_false_with_chunk_length_schedule() -> None:
tts = elevenlabs_tts.TTS(api_key="test-key", chunk_length_schedule=[120, 160, 250, 290])
assert tts._opts.auto_mode is False


def test_auto_mode_respects_explicit_value_with_chunk_length_schedule() -> None:
tts = elevenlabs_tts.TTS(
api_key="test-key",
chunk_length_schedule=[120, 160, 250, 290],
auto_mode=True,
)
assert tts._opts.auto_mode is True


def test_build_context_init_packet_includes_generation_config() -> None:
tts = elevenlabs_tts.TTS(api_key="test-key", chunk_length_schedule=[80, 120], auto_mode=False)
packet = elevenlabs_tts._build_context_init_packet( # pyright: ignore[reportPrivateUsage]
tts._opts, context_id="ctx-1"
)

assert packet["text"] == " "
assert packet["context_id"] == "ctx-1"
assert packet["generation_config"] == {"chunk_length_schedule": [80, 120]}


def test_build_context_init_packet_omits_generation_config_when_not_set() -> None:
tts = elevenlabs_tts.TTS(api_key="test-key")
packet = elevenlabs_tts._build_context_init_packet( # pyright: ignore[reportPrivateUsage]
tts._opts, context_id="ctx-2"
)

assert "generation_config" not in packet


def test_build_context_init_packet_includes_pronunciation_dictionaries() -> None:
tts = elevenlabs_tts.TTS(
api_key="test-key",
pronunciation_dictionary_locators=[
elevenlabs_tts.PronunciationDictionaryLocator(
pronunciation_dictionary_id="dict-1",
version_id="v1",
)
],
)
packet = elevenlabs_tts._build_context_init_packet( # pyright: ignore[reportPrivateUsage]
tts._opts, context_id="ctx-3"
)

assert packet["pronunciation_dictionary_locators"] == [
{
"pronunciation_dictionary_id": "dict-1",
"version_id": "v1",
}
]