Skip to content

Commit 0ead770

Browse files
committed
performance improvement
1 parent fa14ae8 commit 0ead770

File tree

3 files changed

+13
-10
lines changed

3 files changed

+13
-10
lines changed

src/api/models/base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,12 @@ def validate(self, chat_request: ChatRequest):
2929
pass
3030

3131
@abstractmethod
32-
def chat(self, chat_request: ChatRequest) -> ChatResponse:
32+
async def chat(self, chat_request: ChatRequest) -> ChatResponse:
3333
"""Handle a basic chat completion requests."""
3434
pass
3535

3636
@abstractmethod
37-
def chat_stream(self, chat_request: ChatRequest) -> AsyncIterable[bytes]:
37+
async def chat_stream(self, chat_request: ChatRequest) -> AsyncIterable[bytes]:
3838
"""Handle a basic chat completion requests with stream response."""
3939
pass
4040

src/api/models/bedrock.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import tiktoken
1313
from botocore.config import Config
1414
from fastapi import HTTPException
15+
from starlette.concurrency import run_in_threadpool
1516

1617
from api.models.base import BaseChatModel, BaseEmbeddingsModel
1718
from api.schema import (
@@ -145,7 +146,7 @@ def validate(self, chat_request: ChatRequest):
145146
detail=error,
146147
)
147148

148-
def _invoke_bedrock(self, chat_request: ChatRequest, stream=False):
149+
async def _invoke_bedrock(self, chat_request: ChatRequest, stream=False):
149150
"""Common logic for invoke bedrock models"""
150151
if DEBUG:
151152
logger.info("Raw request: " + chat_request.model_dump_json())
@@ -157,9 +158,11 @@ def _invoke_bedrock(self, chat_request: ChatRequest, stream=False):
157158

158159
try:
159160
if stream:
160-
response = bedrock_runtime.converse_stream(**args)
161+
# Run the blocking boto3 call in a thread pool
162+
response = await run_in_threadpool(bedrock_runtime.converse_stream, **args)
161163
else:
162-
response = bedrock_runtime.converse(**args)
164+
# Run the blocking boto3 call in a thread pool
165+
response = await run_in_threadpool(bedrock_runtime.converse, **args)
163166
except bedrock_runtime.exceptions.ValidationException as e:
164167
logger.error("Validation Error: " + str(e))
165168
raise HTTPException(status_code=400, detail=str(e))
@@ -171,11 +174,11 @@ def _invoke_bedrock(self, chat_request: ChatRequest, stream=False):
171174
raise HTTPException(status_code=500, detail=str(e))
172175
return response
173176

174-
def chat(self, chat_request: ChatRequest) -> ChatResponse:
177+
async def chat(self, chat_request: ChatRequest) -> ChatResponse:
175178
"""Default implementation for Chat API."""
176179

177180
message_id = self.generate_message_id()
178-
response = self._invoke_bedrock(chat_request)
181+
response = await self._invoke_bedrock(chat_request)
179182

180183
output_message = response["output"]["message"]
181184
input_tokens = response["usage"]["inputTokens"]
@@ -194,9 +197,9 @@ def chat(self, chat_request: ChatRequest) -> ChatResponse:
194197
logger.info("Proxy response :" + chat_response.model_dump_json())
195198
return chat_response
196199

197-
def chat_stream(self, chat_request: ChatRequest) -> AsyncIterable[bytes]:
200+
async def chat_stream(self, chat_request: ChatRequest) -> AsyncIterable[bytes]:
198201
"""Default implementation for Chat Stream API"""
199-
response = self._invoke_bedrock(chat_request, stream=True)
202+
response = await self._invoke_bedrock(chat_request, stream=True)
200203
message_id = self.generate_message_id()
201204
stream = response.get("stream")
202205
for chunk in stream:

src/api/routers/chat.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,4 +40,4 @@ async def chat_completions(
4040
model.validate(chat_request)
4141
if chat_request.stream:
4242
return StreamingResponse(content=model.chat_stream(chat_request), media_type="text/event-stream")
43-
return model.chat(chat_request)
43+
return await model.chat(chat_request)

0 commit comments

Comments
 (0)