feat: init claude3.7 extended thinking mode

iwilltry42 · iwilltry42 · commit de9376b24b9f · 2025-03-25T20:24:55.000+01:00
diff --git a/anthropic-model-provider/anthropic_common/api.py b/anthropic-model-provider/anthropic_common/api.py
@@ -1,56 +1,130 @@
-from anthropic import AsyncAnthropic, AsyncAnthropicBedrock
+import logging
+from typing import Any, AsyncIterable
+
+from anthropic import AsyncAnthropic, AsyncAnthropicBedrock, AsyncStream
 from anthropic._types import NOT_GIVEN
+from anthropic.types import (
+    RawContentBlockDeltaEvent,
+    RawContentBlockStartEvent,
+    RawContentBlockStopEvent,
+    RawMessageDeltaEvent,
+    RawMessageStartEvent,
+    RawMessageStopEvent,
+    RawMessageStreamEvent,
+)
 from fastapi.responses import JSONResponse, StreamingResponse
+from pydantic import BaseModel
 
 from .helpers import log, map_messages, map_resp, map_tools
 
 
 async def completions(client: AsyncAnthropic | AsyncAnthropicBedrock, input: dict):
+    is_thinking = False
+
+    model = str(input["model"])
+    if model.endswith("-thinking"):
+        is_thinking = True
+        model = model.removesuffix("-thinking")
+
+    # max_tokens defaults:
+    # - 4096 for regular models, so that it works with even the smallest models
+    # - 64000 for thinking models - the max for 3.7 Sonnet with extended thinking mode right now
+    max_tokens = input.get("max_tokens", 4096 if not is_thinking else 64000)
+    if max_tokens is not None:
+        max_tokens = int(max_tokens)
+
+    thinking_config: Any | NOT_GIVEN = NOT_GIVEN
+    if is_thinking:
+        thinking_config = {
+            "type": "enabled",
+            "budget_tokens": round(
+                max_tokens / 2
+            ),  # TODO: figure out a good percentage of max_tokens to use for thinking
+        }
+
     tools = input.get("tools", NOT_GIVEN)
     if tools is not NOT_GIVEN:
         tools = map_tools(tools)
 
     system, messages = map_messages(input["messages"])
 
-    max_tokens = input.get("max_tokens", 1024)
-    if max_tokens is not None:
-        max_tokens = int(max_tokens)
-
-    temperature = input.get("temperature", NOT_GIVEN)
+    temperature = input.get("temperature", NOT_GIVEN) if not is_thinking else NOT_GIVEN
     if temperature is not NOT_GIVEN:
         temperature = float(temperature)
 
-    top_k = input.get("top_k", NOT_GIVEN)
+    top_k = input.get("top_k", NOT_GIVEN) if not is_thinking else NOT_GIVEN
     if top_k is not NOT_GIVEN:
         top_k = int(top_k)
 
-    top_p = input.get("top_p", NOT_GIVEN)
+    top_p = input.get("top_p", NOT_GIVEN) if not is_thinking else NOT_GIVEN
     if top_p is not NOT_GIVEN:
         top_p = float(top_p)
 
+    stream = input.get("stream", False)
+
+    logging.error(f"@@@ thinking_config: {thinking_config}")
     try:
         response = await client.messages.create(
+            thinking=thinking_config,
             max_tokens=max_tokens,
             system=system,
             messages=messages,
-            model=input["model"],
+            model=model,
             temperature=temperature,
             tools=tools,
             top_k=top_k,
             top_p=top_p,
+            stream=stream,
         )
+        if stream:
+            async for event in response:
+                logging.error(f"@@@Anthropic event: {event.model_dump_json()}")
+                log(f"Anthropic event: {event.model_dump_json()}")
+            return StreamingResponse(
+                "data: " + event.model_dump_json() + "\n\n",
+                media_type="application/x-ndjson",
+            )
+        else:
+            logging.error(f"@@@ Anthropic response: {response.model_dump_json()}")
+            log(f"Anthropic response: {response.model_dump_json()}")
+
+            mapped_response = map_resp(response)
+
+            logging.error(
+                f"@@@ Mapped Anthropic response: {mapped_response.model_dump_json()}"
+            )
+            log(f"Mapped Anthropic response: {mapped_response.model_dump_json()}")
+            return StreamingResponse(
+                "data: " + mapped_response.model_dump_json() + "\n\n",
+                media_type="application/x-ndjson",
+            )
+
     except Exception as e:
+        logging.error(f"@@@Anthropic API error: {e}")
         return JSONResponse(
             content={"error": str(e)}, status_code=e.__dict__.get("status_code", 500)
         )
 
-    log(f"Anthropic response: {response.model_dump_json()}")
 
-    mapped_response = map_resp(response)
+def map_event(event: RawMessageStreamEvent) -> BaseModel:
+    if isinstance(event, RawContentBlockStartEvent):
+        return event
+    elif isinstance(event, RawContentBlockDeltaEvent):
+        return event
+    elif isinstance(event, RawContentBlockStopEvent):
+        return event
+    elif isinstance(event, RawMessageStartEvent):
+        return event
+    elif isinstance(event, RawMessageDeltaEvent):
+        return event
+    elif isinstance(event, RawMessageStopEvent):
+        return event
+    else:
+        raise ValueError(f"Unknown event type: {event}")
 
-    log(f"Mapped Anthropic response: {mapped_response.model_dump_json()}")
 
-    return StreamingResponse(
-        "data: " + mapped_response.model_dump_json() + "\n\n",
-        media_type="application/x-ndjson",
-    )
+async def convert_stream(
+    stream: AsyncStream[RawMessageStreamEvent],
+) -> AsyncIterable[str]:
+    async for chunk in stream:
+        yield "data: " + map_event(chunk).model_dump_json() + "\n\n"
diff --git a/anthropic-model-provider/main.py b/anthropic-model-provider/main.py
@@ -3,6 +3,7 @@
 
 import anthropic.pagination
 from anthropic import AsyncAnthropic
+from anthropic.types import ModelInfo
 from fastapi import FastAPI, Request
 from fastapi.responses import JSONResponse, StreamingResponse
 
@@ -15,6 +16,8 @@
 app = FastAPI()
 uri = "http://127.0.0.1:" + os.environ.get("PORT", "8000")
 
+thinking_models_prefixes = ["claude-3-7-sonnet"]
+
 
 def log(*args):
     if debug:
@@ -40,12 +43,23 @@ async def list_models() -> JSONResponse:
         resp: anthropic.pagination.AsyncPage[
             anthropic.types.ModelInfo
         ] = await client.models.list(limit=20)
+        thinking_models = []
+        for model in resp.data:
+            if any(model.id.startswith(m) for m in thinking_models_prefixes):
+                thinking_models.append(
+                    ModelInfo(
+                        id=model.id + "-thinking",
+                        display_name=model.display_name + " (Thinking)",
+                        created_at=model.created_at,
+                        type="model",
+                    )
+                )
         return JSONResponse(
             content={
                 "object": "list",
                 "data": [
                     set_model_usage(model.model_dump(exclude={"created_at"}))
-                    for model in resp.data
+                    for model in resp.data + thinking_models
                 ],
             }
         )
diff --git a/anthropic-model-provider/requirements.txt b/anthropic-model-provider/requirements.txt
@@ -1,4 +1,4 @@
 fastapi
 uvicorn[standard]
-anthropic==0.43.0
+anthropic==0.49.0
 openai>=1.54.3
diff --git a/anthropic-model-provider/setup.py b/anthropic-model-provider/setup.py
@@ -4,5 +4,5 @@
     name="anthropic_common",
     version="0.1",
     packages=find_packages(include=["anthropic_common"]),
-    install_requires=["fastapi", "openai", "anthropic>=0.43.0", "openai>=1.35.7"],
+    install_requires=["fastapi", "openai", "anthropic>=0.49.0", "openai>=1.35.7"],
 )

Original file line number	Diff line number	Diff line change
`@@ -4,5 +4,5 @@`
`4`	`4`	`name="anthropic_common",`
`5`	`5`	`version="0.1",`
`6`	`6`	`packages=find_packages(include=["anthropic_common"]),`
`7`		`- install_requires=["fastapi", "openai", "anthropic>=0.43.0", "openai>=1.35.7"],`
	`7`	`+ install_requires=["fastapi", "openai", "anthropic>=0.49.0", "openai>=1.35.7"],`
`8`	`8`	`)`