Skip to content

Commit de9376b

Browse files
committed
feat: init claude3.7 extended thinking mode
1 parent 37752f0 commit de9376b

File tree

4 files changed

+107
-19
lines changed

4 files changed

+107
-19
lines changed
Lines changed: 90 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,56 +1,130 @@
1-
from anthropic import AsyncAnthropic, AsyncAnthropicBedrock
1+
import logging
2+
from typing import Any, AsyncIterable
3+
4+
from anthropic import AsyncAnthropic, AsyncAnthropicBedrock, AsyncStream
25
from anthropic._types import NOT_GIVEN
6+
from anthropic.types import (
7+
RawContentBlockDeltaEvent,
8+
RawContentBlockStartEvent,
9+
RawContentBlockStopEvent,
10+
RawMessageDeltaEvent,
11+
RawMessageStartEvent,
12+
RawMessageStopEvent,
13+
RawMessageStreamEvent,
14+
)
315
from fastapi.responses import JSONResponse, StreamingResponse
16+
from pydantic import BaseModel
417

518
from .helpers import log, map_messages, map_resp, map_tools
619

720

821
async def completions(client: AsyncAnthropic | AsyncAnthropicBedrock, input: dict):
22+
is_thinking = False
23+
24+
model = str(input["model"])
25+
if model.endswith("-thinking"):
26+
is_thinking = True
27+
model = model.removesuffix("-thinking")
28+
29+
# max_tokens defaults:
30+
# - 4096 for regular models, so that it works with even the smallest models
31+
# - 64000 for thinking models - the max for 3.7 Sonnet with extended thinking mode right now
32+
max_tokens = input.get("max_tokens", 4096 if not is_thinking else 64000)
33+
if max_tokens is not None:
34+
max_tokens = int(max_tokens)
35+
36+
thinking_config: Any | NOT_GIVEN = NOT_GIVEN
37+
if is_thinking:
38+
thinking_config = {
39+
"type": "enabled",
40+
"budget_tokens": round(
41+
max_tokens / 2
42+
), # TODO: figure out a good percentage of max_tokens to use for thinking
43+
}
44+
945
tools = input.get("tools", NOT_GIVEN)
1046
if tools is not NOT_GIVEN:
1147
tools = map_tools(tools)
1248

1349
system, messages = map_messages(input["messages"])
1450

15-
max_tokens = input.get("max_tokens", 1024)
16-
if max_tokens is not None:
17-
max_tokens = int(max_tokens)
18-
19-
temperature = input.get("temperature", NOT_GIVEN)
51+
temperature = input.get("temperature", NOT_GIVEN) if not is_thinking else NOT_GIVEN
2052
if temperature is not NOT_GIVEN:
2153
temperature = float(temperature)
2254

23-
top_k = input.get("top_k", NOT_GIVEN)
55+
top_k = input.get("top_k", NOT_GIVEN) if not is_thinking else NOT_GIVEN
2456
if top_k is not NOT_GIVEN:
2557
top_k = int(top_k)
2658

27-
top_p = input.get("top_p", NOT_GIVEN)
59+
top_p = input.get("top_p", NOT_GIVEN) if not is_thinking else NOT_GIVEN
2860
if top_p is not NOT_GIVEN:
2961
top_p = float(top_p)
3062

63+
stream = input.get("stream", False)
64+
65+
logging.error(f"@@@ thinking_config: {thinking_config}")
3166
try:
3267
response = await client.messages.create(
68+
thinking=thinking_config,
3369
max_tokens=max_tokens,
3470
system=system,
3571
messages=messages,
36-
model=input["model"],
72+
model=model,
3773
temperature=temperature,
3874
tools=tools,
3975
top_k=top_k,
4076
top_p=top_p,
77+
stream=stream,
4178
)
79+
if stream:
80+
async for event in response:
81+
logging.error(f"@@@Anthropic event: {event.model_dump_json()}")
82+
log(f"Anthropic event: {event.model_dump_json()}")
83+
return StreamingResponse(
84+
"data: " + event.model_dump_json() + "\n\n",
85+
media_type="application/x-ndjson",
86+
)
87+
else:
88+
logging.error(f"@@@ Anthropic response: {response.model_dump_json()}")
89+
log(f"Anthropic response: {response.model_dump_json()}")
90+
91+
mapped_response = map_resp(response)
92+
93+
logging.error(
94+
f"@@@ Mapped Anthropic response: {mapped_response.model_dump_json()}"
95+
)
96+
log(f"Mapped Anthropic response: {mapped_response.model_dump_json()}")
97+
return StreamingResponse(
98+
"data: " + mapped_response.model_dump_json() + "\n\n",
99+
media_type="application/x-ndjson",
100+
)
101+
42102
except Exception as e:
103+
logging.error(f"@@@Anthropic API error: {e}")
43104
return JSONResponse(
44105
content={"error": str(e)}, status_code=e.__dict__.get("status_code", 500)
45106
)
46107

47-
log(f"Anthropic response: {response.model_dump_json()}")
48108

49-
mapped_response = map_resp(response)
109+
def map_event(event: RawMessageStreamEvent) -> BaseModel:
110+
if isinstance(event, RawContentBlockStartEvent):
111+
return event
112+
elif isinstance(event, RawContentBlockDeltaEvent):
113+
return event
114+
elif isinstance(event, RawContentBlockStopEvent):
115+
return event
116+
elif isinstance(event, RawMessageStartEvent):
117+
return event
118+
elif isinstance(event, RawMessageDeltaEvent):
119+
return event
120+
elif isinstance(event, RawMessageStopEvent):
121+
return event
122+
else:
123+
raise ValueError(f"Unknown event type: {event}")
50124

51-
log(f"Mapped Anthropic response: {mapped_response.model_dump_json()}")
52125

53-
return StreamingResponse(
54-
"data: " + mapped_response.model_dump_json() + "\n\n",
55-
media_type="application/x-ndjson",
56-
)
126+
async def convert_stream(
127+
stream: AsyncStream[RawMessageStreamEvent],
128+
) -> AsyncIterable[str]:
129+
async for chunk in stream:
130+
yield "data: " + map_event(chunk).model_dump_json() + "\n\n"

anthropic-model-provider/main.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import anthropic.pagination
55
from anthropic import AsyncAnthropic
6+
from anthropic.types import ModelInfo
67
from fastapi import FastAPI, Request
78
from fastapi.responses import JSONResponse, StreamingResponse
89

@@ -15,6 +16,8 @@
1516
app = FastAPI()
1617
uri = "http://127.0.0.1:" + os.environ.get("PORT", "8000")
1718

19+
thinking_models_prefixes = ["claude-3-7-sonnet"]
20+
1821

1922
def log(*args):
2023
if debug:
@@ -40,12 +43,23 @@ async def list_models() -> JSONResponse:
4043
resp: anthropic.pagination.AsyncPage[
4144
anthropic.types.ModelInfo
4245
] = await client.models.list(limit=20)
46+
thinking_models = []
47+
for model in resp.data:
48+
if any(model.id.startswith(m) for m in thinking_models_prefixes):
49+
thinking_models.append(
50+
ModelInfo(
51+
id=model.id + "-thinking",
52+
display_name=model.display_name + " (Thinking)",
53+
created_at=model.created_at,
54+
type="model",
55+
)
56+
)
4357
return JSONResponse(
4458
content={
4559
"object": "list",
4660
"data": [
4761
set_model_usage(model.model_dump(exclude={"created_at"}))
48-
for model in resp.data
62+
for model in resp.data + thinking_models
4963
],
5064
}
5165
)
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
fastapi
22
uvicorn[standard]
3-
anthropic==0.43.0
3+
anthropic==0.49.0
44
openai>=1.54.3

anthropic-model-provider/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,5 @@
44
name="anthropic_common",
55
version="0.1",
66
packages=find_packages(include=["anthropic_common"]),
7-
install_requires=["fastapi", "openai", "anthropic>=0.43.0", "openai>=1.35.7"],
7+
install_requires=["fastapi", "openai", "anthropic>=0.49.0", "openai>=1.35.7"],
88
)

0 commit comments

Comments
 (0)