11import logging
2- from typing import AsyncGenerator , Union
2+ from typing import Any , AsyncGenerator , Dict , Union
33
44import tiktoken
55from fastapi import FastAPI , HTTPException
99from .config import ResponseConfig
1010from .models import (
1111 AnthropicChatRequest ,
12- AnthropicChatResponse ,
1312 AnthropicStreamDelta ,
1413 AnthropicStreamResponse ,
1514 OpenAIChatRequest ,
16- OpenAIChatResponse ,
1715 OpenAIDeltaMessage ,
1816 OpenAIStreamChoice ,
1917 OpenAIStreamResponse ,
2018)
19+ from .providers .anthropic import AnthropicProvider
20+ from .providers .openai import OpenAIProvider
2121
2222log_handler = logging .StreamHandler ()
2323log_handler .setFormatter (jsonlogger .JsonFormatter ())
2727app = FastAPI (title = "Mock LLM Server" )
2828
2929response_config = ResponseConfig ()
30+ openai_provider = OpenAIProvider (response_config )
31+ anthropic_provider = AnthropicProvider (response_config )
3032
3133
3234def count_tokens (text : str , model : str ) -> int :
@@ -80,9 +82,8 @@ async def anthropic_stream_response(
8082@app .post ("/v1/chat/completions" , response_model = None )
8183async def openai_chat_completion (
8284 request : OpenAIChatRequest ,
83- ) -> Union [OpenAIChatResponse , StreamingResponse ]:
84- """Handle chat completion requests, supporting
85- both regular and streaming responses."""
85+ ) -> Union [Dict [str , Any ], StreamingResponse ]:
86+ """Handle OpenAI chat completion requests"""
8687 try :
8788 logger .info (
8889 "Received chat completion request" ,
@@ -92,47 +93,7 @@ async def openai_chat_completion(
9293 "stream" : request .stream ,
9394 },
9495 )
95-
96- last_message = next (
97- (msg for msg in reversed (request .messages ) if msg .role == "user" ), None
98- )
99-
100- if not last_message :
101- raise HTTPException (
102- status_code = 400 , detail = "No user message found in request"
103- )
104-
105- if request .stream :
106- return StreamingResponse (
107- openai_stream_response (last_message .content , request .model ),
108- media_type = "text/event-stream" ,
109- )
110-
111- response_content = await response_config .get_response_with_lag (
112- last_message .content
113- )
114-
115- # Calculate mock token counts
116- prompt_tokens = count_tokens (str (request .messages ), request .model )
117- completion_tokens = count_tokens (response_content , request .model )
118- total_tokens = prompt_tokens + completion_tokens
119-
120- return OpenAIChatResponse (
121- model = request .model ,
122- choices = [
123- {
124- "index" : 0 ,
125- "message" : {"role" : "assistant" , "content" : response_content },
126- "finish_reason" : "stop" ,
127- }
128- ],
129- usage = {
130- "prompt_tokens" : prompt_tokens ,
131- "completion_tokens" : completion_tokens ,
132- "total_tokens" : total_tokens ,
133- },
134- )
135-
96+ return await openai_provider .handle_chat_completion (request )
13697 except Exception as e :
13798 logger .error (f"Error processing request: { str (e )} " )
13899 raise HTTPException (
@@ -143,9 +104,8 @@ async def openai_chat_completion(
143104@app .post ("/v1/messages" , response_model = None )
144105async def anthropic_chat_completion (
145106 request : AnthropicChatRequest ,
146- ) -> Union [AnthropicChatResponse , StreamingResponse ]:
147- """Handle Anthropic chat completion requests,
148- supporting both regular and streaming responses."""
107+ ) -> Union [Dict [str , Any ], StreamingResponse ]:
108+ """Handle Anthropic chat completion requests"""
149109 try :
150110 logger .info (
151111 "Received Anthropic chat completion request" ,
@@ -155,41 +115,7 @@ async def anthropic_chat_completion(
155115 "stream" : request .stream ,
156116 },
157117 )
158-
159- last_message = next (
160- (msg for msg in reversed (request .messages ) if msg .role == "user" ), None
161- )
162-
163- if not last_message :
164- raise HTTPException (
165- status_code = 400 , detail = "No user message found in request"
166- )
167-
168- if request .stream :
169- return StreamingResponse (
170- anthropic_stream_response (last_message .content , request .model ),
171- media_type = "text/event-stream" ,
172- )
173-
174- response_content = await response_config .get_response_with_lag (
175- last_message .content
176- )
177-
178- # Calculate mock token counts
179- prompt_tokens = count_tokens (str (request .messages ), request .model )
180- completion_tokens = count_tokens (response_content , request .model )
181- total_tokens = prompt_tokens + completion_tokens
182-
183- return AnthropicChatResponse (
184- model = request .model ,
185- content = [{"type" : "text" , "text" : response_content }],
186- usage = {
187- "input_tokens" : prompt_tokens ,
188- "output_tokens" : completion_tokens ,
189- "total_tokens" : total_tokens ,
190- },
191- )
192-
118+ return await anthropic_provider .handle_chat_completion (request )
193119 except Exception as e :
194120 logger .error (f"Error processing request: { str (e )} " )
195121 raise HTTPException (
0 commit comments