Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion fastdeploy/engine/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,8 @@ class CompletionOutput:

index: int
send_idx: int
token_ids: list[int]
token_ids: list[Any]
decode_type: int = 0
logprob: Optional[float] = None
top_logprobs: Optional[LogprobsLists] = None
logprobs: Optional[SampleLogprobs] = None
Expand Down
6 changes: 5 additions & 1 deletion fastdeploy/entrypoints/openai/response_processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from typing import Any, List, Optional

from fastdeploy.input.tokenzier_client import AsyncTokenizerClient, ImageDecodeRequest
from fastdeploy.utils import api_server_logger


class ChatResponseProcessor:
Expand All @@ -41,6 +42,8 @@ def __init__(
self.eos_token_id = eos_token_id
if decoder_base_url is not None:
self.decoder_client = AsyncTokenizerClient(base_url=decoder_base_url)
else:
self.decoder_client = None
self._mm_buffer: List[Any] = [] # Buffer for accumulating image token_ids
self._end_image_code_request_output: Optional[Any] = None
self._multipart_buffer = []
Expand Down Expand Up @@ -74,6 +77,7 @@ async def process_response_chat(self, request_outputs, stream, enable_thinking,
include_stop_str_in_output: Whether or not to include stop strings in the output.
"""
for request_output in request_outputs:
api_server_logger.debug(f"request_output {request_output}")
if not self.enable_mm_output:
yield self.data_processor.process_response_dict(
response_dict=request_output,
Expand Down Expand Up @@ -112,7 +116,7 @@ async def process_response_chat(self, request_outputs, stream, enable_thinking,
yield request_output

elif decode_type == 1:
self._mm_buffer.extend(token_ids)
self._mm_buffer.append(token_ids)
self._end_image_code_request_output = request_output
else:
self.accumulate_token_ids(request_output)
Expand Down
4 changes: 2 additions & 2 deletions tests/entrypoints/openai/test_response_processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ async def test_streaming_text_and_image(self):
image_part = results[1]["outputs"]["multipart"][0]
self.assertEqual(image_part["type"], "image")
self.assertEqual(image_part["url"], "http://image.url/test.png")
self.assertEqual(results[1]["outputs"]["token_ids"], [[11, 22]])
self.assertEqual(results[1]["outputs"]["token_ids"], [[[11, 22]]])

# 第三个 yield:text
text_part = results[2]["outputs"]["multipart"][0]
Expand All @@ -99,7 +99,7 @@ async def test_streaming_buffer_accumulation(self):
]

self.assertEqual(results, [])
self.assertEqual(self.processor_mm._mm_buffer, [[33, 44]])
self.assertEqual(self.processor_mm._mm_buffer, [[[33, 44]]])

async def test_non_streaming_accumulate_and_emit(self):
"""非流式模式:等 eos_token_id 才输出 multipart(text+image)"""
Expand Down
Loading