livekit · bryanhoulton · Mar 24, 2025 · Apr 1, 2025 · Apr 1, 2025 · Apr 1, 2025
diff --git a/livekit-plugins/livekit-plugins-anthropic/livekit/plugins/anthropic/llm.py b/livekit-plugins/livekit-plugins-anthropic/livekit/plugins/anthropic/llm.py
@@ -63,6 +63,7 @@ class LLMOptions:
     temperature: float | None
     parallel_tool_calls: bool | None
     tool_choice: Union[ToolChoice, Literal["auto", "required", "none"]] | None
+    thinking: anthropic.types.ThinkingConfig | None = None
     caching: Literal["ephemeral"] | None = None
     """If set to "ephemeral", the system prompt, tools, and chat history will be cached."""
 
@@ -79,6 +80,7 @@ def __init__(
         temperature: float | None = None,
         parallel_tool_calls: bool | None = None,
         tool_choice: Union[ToolChoice, Literal["auto", "required", "none"]] = "auto",
+        thinking: anthropic.types.ThinkingConfig | None = None,
         caching: Literal["ephemeral"] | None = None,
     ) -> None:
         """
@@ -117,6 +119,7 @@ def __init__(
             parallel_tool_calls=parallel_tool_calls,
             tool_choice=tool_choice,
             caching=caching,
+            thinking=thinking,
         )
         self._client = client or anthropic.AsyncClient(
             api_key=api_key,
@@ -141,8 +144,9 @@ def chat(
         temperature: float | None = None,
         n: int | None = 1,
         parallel_tool_calls: bool | None = None,
-        tool_choice: Union[ToolChoice, Literal["auto", "required", "none"]]
-        | None = None,
+        tool_choice: (
+            Union[ToolChoice, Literal["auto", "required", "none"]] | None
+        ) = None,
     ) -> "LLMStream":
         if temperature is None:
             temperature = self._opts.temperature
@@ -202,13 +206,37 @@ def chat(
         )
         collaped_anthropic_ctx = _merge_messages(anthropic_ctx)
 
+        # Need to have `max_tokens` always be more than `budget_tokens` if thinking is enabled.
+        thinking_opts = self._opts.thinking
+        max_tokens = opts.get(
+            "max_tokens",
+            (
+                (
+                    thinking_opts.budget_tokens + 1
+                    if thinking_opts and thinking_opts.budget_tokens
+                    else 1024
+                )
+            ),
+        )
+        if (
+            thinking_opts
+            and thinking_opts.budget_tokens
+            and max_tokens <= thinking_opts.budget_tokens
+        ):
+            raise ValueError(
+                f"max_tokens ({max_tokens}) must be greater than thinking.budget_tokens ({thinking_opts.budget_tokens}) if thinking is enabled"
+            )
+
         stream = self._client.messages.create(
-            max_tokens=opts.get("max_tokens", 1024),
+            max_tokens=max_tokens,
             messages=collaped_anthropic_ctx,
-            model=self._opts.model,
-            temperature=temperature or anthropic.NOT_GIVEN,
-            top_k=n or anthropic.NOT_GIVEN,
+            model=str(self._opts.model),
+            temperature=temperature if temperature is not None else anthropic.NOT_GIVEN,
+            top_k=n if n is not None else anthropic.NOT_GIVEN,
             stream=True,
+            thinking=(
+                self._opts.thinking if self._opts.thinking else anthropic.NOT_GIVEN
+            ),
             **opts,
         )