Skip to content

Add option to enable Claude Thinking #1721

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 5 commits into
base: 0.x
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ class LLMOptions:
temperature: float | None
parallel_tool_calls: bool | None
tool_choice: Union[ToolChoice, Literal["auto", "required", "none"]] | None
thinking: anthropic.types.ThinkingConfig | None = None
caching: Literal["ephemeral"] | None = None
"""If set to "ephemeral", the system prompt, tools, and chat history will be cached."""

Expand All @@ -79,6 +80,7 @@ def __init__(
temperature: float | None = None,
parallel_tool_calls: bool | None = None,
tool_choice: Union[ToolChoice, Literal["auto", "required", "none"]] = "auto",
thinking: anthropic.types.ThinkingConfig | None = None,
caching: Literal["ephemeral"] | None = None,
) -> None:
"""
Expand Down Expand Up @@ -117,6 +119,7 @@ def __init__(
parallel_tool_calls=parallel_tool_calls,
tool_choice=tool_choice,
caching=caching,
thinking=thinking,
)
self._client = client or anthropic.AsyncClient(
api_key=api_key,
Expand All @@ -141,8 +144,9 @@ def chat(
temperature: float | None = None,
n: int | None = 1,
parallel_tool_calls: bool | None = None,
tool_choice: Union[ToolChoice, Literal["auto", "required", "none"]]
| None = None,
tool_choice: (
Union[ToolChoice, Literal["auto", "required", "none"]] | None
) = None,
) -> "LLMStream":
if temperature is None:
temperature = self._opts.temperature
Expand Down Expand Up @@ -202,13 +206,37 @@ def chat(
)
collaped_anthropic_ctx = _merge_messages(anthropic_ctx)

# Need to have `max_tokens` always be more than `budget_tokens` if thinking is enabled.
thinking_opts = self._opts.thinking
max_tokens = opts.get(
"max_tokens",
(
(
thinking_opts.budget_tokens + 1
if thinking_opts and thinking_opts.budget_tokens
else 1024
)
),
)
if (
thinking_opts
and thinking_opts.budget_tokens
and max_tokens <= thinking_opts.budget_tokens
):
raise ValueError(
f"max_tokens ({max_tokens}) must be greater than thinking.budget_tokens ({thinking_opts.budget_tokens}) if thinking is enabled"
)

stream = self._client.messages.create(
max_tokens=opts.get("max_tokens", 1024),
max_tokens=max_tokens,
messages=collaped_anthropic_ctx,
model=self._opts.model,
temperature=temperature or anthropic.NOT_GIVEN,
top_k=n or anthropic.NOT_GIVEN,
model=str(self._opts.model),
temperature=temperature if temperature is not None else anthropic.NOT_GIVEN,
top_k=n if n is not None else anthropic.NOT_GIVEN,
stream=True,
thinking=(
self._opts.thinking if self._opts.thinking else anthropic.NOT_GIVEN
),
**opts,
)

Expand Down
Loading