-
Notifications
You must be signed in to change notification settings - Fork 223
Streaming for Phi4 tool parser #3673
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
|
@@ -102,7 +102,7 @@ Document PartialJsonBuilder::add(const std::string& chunk) { | |||||||
auto beginIt = buffer.begin() + currentPosition; | ||||||||
auto endIt = buffer.end(); | ||||||||
|
||||||||
for (auto it = beginIt; it != endIt; ++it, currentPosition++) { | ||||||||
for (auto it = beginIt; it != endIt && state != IteratorState::END; ++it, ++currentPosition) { | ||||||||
finishedWithEscapeCharacter = false; | ||||||||
char c = *it; | ||||||||
|
||||||||
|
@@ -209,7 +209,12 @@ Document PartialJsonBuilder::add(const std::string& chunk) { | |||||||
|
||||||||
Document doc; | ||||||||
if (state == IteratorState::END && openCloseStack.empty()) { | ||||||||
doc.Parse(buffer.c_str()); | ||||||||
if (currentPosition == buffer.size()) { | ||||||||
doc.Parse(buffer.c_str()); | ||||||||
} else { | ||||||||
doc.Parse(buffer.c_str(), currentPosition); | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The
Suggested change
Copilot uses AI. Check for mistakes. Positive FeedbackNegative Feedback |
||||||||
} | ||||||||
|
||||||||
if (doc.HasParseError()) { | ||||||||
throw std::runtime_error("Invalid JSON. Content:\n" + buffer); | ||||||||
} | ||||||||
|
@@ -263,4 +268,16 @@ Document PartialJsonBuilder::add(const std::string& chunk) { | |||||||
} | ||||||||
return doc; | ||||||||
} | ||||||||
|
||||||||
bool PartialJsonBuilder::isComplete() const { | ||||||||
return state == IteratorState::END; | ||||||||
} | ||||||||
|
||||||||
std::string PartialJsonBuilder::getUnprocessedBuffer() const { | ||||||||
if (currentPosition < buffer.size()) { | ||||||||
return buffer.substr(currentPosition); | ||||||||
} | ||||||||
return ""; | ||||||||
} | ||||||||
|
||||||||
} // namespace ovms |
Original file line number | Diff line number | Diff line change | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -32,6 +32,87 @@ | |||||||||||
|
||||||||||||
namespace ovms { | ||||||||||||
|
||||||||||||
void Phi4ToolParser::movePostColonContentToUnprocessedBuffer(std::string& chunk) { | ||||||||||||
size_t colonPos = chunk.find(':'); | ||||||||||||
if (colonPos != std::string::npos) { | ||||||||||||
// Store everything after the colon in unprocessedBuffer to process in the next call | ||||||||||||
unprocessedBuffer = chunk.substr(colonPos + 1) + unprocessedBuffer; | ||||||||||||
// Keep everything up to and including the colon | ||||||||||||
chunk = chunk.substr(0, colonPos + 1); | ||||||||||||
} | ||||||||||||
} | ||||||||||||
|
||||||||||||
void Phi4ToolParser::movePostToolCallEndContentToUnprocessedBuffer() { | ||||||||||||
// Move content that appeared after the end of the tool call to unprocessedBuffer | ||||||||||||
unprocessedBuffer = jsonBuilder.getUnprocessedBuffer() + unprocessedBuffer; | ||||||||||||
// Remove potential escape characters added in arguments processing logic from the unprocessedBuffer as we move to the next tool call | ||||||||||||
unprocessedBuffer.erase( | ||||||||||||
std::remove(unprocessedBuffer.begin(), unprocessedBuffer.end(), '\\'), | ||||||||||||
unprocessedBuffer.end()); | ||||||||||||
} | ||||||||||||
|
||||||||||||
void Phi4ToolParser::updateOpenBracesCount(const std::string& chunk) { | ||||||||||||
// Note that this method can be fooled by unclosed braces in string values. | ||||||||||||
// If turns out insufficient, we will need full JSON parsing to track opened/closed braces for arguments. | ||||||||||||
for (char c : chunk) { | ||||||||||||
if (c == '{') { | ||||||||||||
openBracesCount++; | ||||||||||||
} else if (c == '}') { | ||||||||||||
openBracesCount--; | ||||||||||||
if (openBracesCount == 0) { | ||||||||||||
break; // No need to count further if we balanced the braces | ||||||||||||
} | ||||||||||||
} | ||||||||||||
} | ||||||||||||
} | ||||||||||||
|
||||||||||||
void Phi4ToolParser::handleEndOfToolCall(std::string& chunk) { | ||||||||||||
// We are at the end of the tool call object, so we add closing quote before the last closing brace | ||||||||||||
size_t lastClosingBrace = chunk.find_last_of('}'); | ||||||||||||
if (lastClosingBrace != std::string::npos) { | ||||||||||||
// Move anything after the last closing brace to unprocessedBuffer, since it's the start of the next tool call or end of the array | ||||||||||||
if (lastClosingBrace + 1 < chunk.size()) { | ||||||||||||
unprocessedBuffer = chunk.substr(lastClosingBrace + 1) + unprocessedBuffer; | ||||||||||||
chunk.erase(lastClosingBrace + 1); | ||||||||||||
} | ||||||||||||
chunk.insert(lastClosingBrace, "\""); | ||||||||||||
} else { | ||||||||||||
// If there is no closing brace, we just add closing quote at the end | ||||||||||||
chunk.append("\""); | ||||||||||||
} | ||||||||||||
} | ||||||||||||
|
||||||||||||
void Phi4ToolParser::handleGenerationFinish(std::string& chunk) const { | ||||||||||||
// We look for the closing brace to close the string properly | ||||||||||||
size_t lastClosingBrace = chunk.find_last_of('}'); | ||||||||||||
if (lastClosingBrace != std::string::npos) { | ||||||||||||
chunk.insert(lastClosingBrace, "\""); | ||||||||||||
} else { | ||||||||||||
// If there is no closing brace, we just add closing quote at the end | ||||||||||||
chunk.append("\""); | ||||||||||||
} | ||||||||||||
} | ||||||||||||
|
||||||||||||
void Phi4ToolParser::openArgumentsString(std::string& chunk) const { | ||||||||||||
// Add opening quote before the first non-whitespace character | ||||||||||||
size_t firstNonWhitespaceCharacter = chunk.find_first_not_of(" \t\n\r\f\v"); | ||||||||||||
if (firstNonWhitespaceCharacter != std::string::npos) { | ||||||||||||
chunk.insert(firstNonWhitespaceCharacter, "\""); | ||||||||||||
} else { | ||||||||||||
// If the chunk is all whitespace, just insert quote at the end | ||||||||||||
chunk.append("\""); | ||||||||||||
} | ||||||||||||
} | ||||||||||||
|
||||||||||||
void Phi4ToolParser::clearState() { | ||||||||||||
// Clear state for the next tool call | ||||||||||||
lastJson.Clear(); | ||||||||||||
jsonBuilder.clear(); | ||||||||||||
toolCallIndex++; | ||||||||||||
argumentsQuotesOpened = false; | ||||||||||||
openBracesCount = 1; // Reset to 1 as we count the tool call opening brace | ||||||||||||
} | ||||||||||||
|
||||||||||||
void Phi4ToolParser::parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens) { | ||||||||||||
std::vector<std::string> tools; | ||||||||||||
|
||||||||||||
|
@@ -86,8 +167,183 @@ void Phi4ToolParser::parse(ParsedOutput& parsedOutput, const std::vector<int64_t | |||||||||||
} | ||||||||||||
|
||||||||||||
std::optional<rapidjson::Document> Phi4ToolParser::parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) { | ||||||||||||
// Not implemented | ||||||||||||
SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Phi4OutputParser::parseChunk is not implemented"); | ||||||||||||
/* | ||||||||||||
Phi4 with vLLM template produces tool calls in the format: | ||||||||||||
functools[{"name": [function name], "arguments": [function arguments as JSON]}, ...] | ||||||||||||
|
||||||||||||
Due to the tool call format used by Phi4, we need to track the state of parsing more closely. | ||||||||||||
We have four states: | ||||||||||||
1) AWAITING_START_TAG - we are waiting for the "functools" tag to appear in the chunk | ||||||||||||
2) AWAITING_TOOL_CALLS_OPENING_BRACKET - we have seen "functools" but are waiting for the opening bracket of the array | ||||||||||||
3) AWAITING_TOOL_CALL_OPENING_BRACE - we have seen the opening bracket of the array but are waiting for the opening brace of the next tool call object | ||||||||||||
4) PROCESSING_TOOL_CALL - we are processing the tool call object | ||||||||||||
mzegla marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||
|
||||||||||||
To avoid missing any generated content, we use unprocessedBuffer to store any output that is not used in the current state, but might be relevant in the next state. | ||||||||||||
Since tools calls in the array are separated by commas we also need to track when the tool call object ends (no special tag for that). | ||||||||||||
Next challenge, common for all parsers, is to return arguments as string even though model generates them as JSON. | ||||||||||||
We address this by escaping double quotes and adding opening quote at the beginning of arguments and closing quote at the end of arguments. | ||||||||||||
*/ | ||||||||||||
SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Phi4ToolParser::parseChunk called with chunk: '{}', finishReason: {}", chunk, static_cast<int>(finishReason)); | ||||||||||||
if (chunk.empty()) { | ||||||||||||
SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Received empty chunk for Phi4ToolParser"); | ||||||||||||
return std::nullopt; | ||||||||||||
} | ||||||||||||
|
||||||||||||
// We merge unprocessedBuffer from previous calls with the current chunk to avoid losing any content | ||||||||||||
std::string modifiedChunk = unprocessedBuffer + chunk; | ||||||||||||
unprocessedBuffer.clear(); | ||||||||||||
|
||||||||||||
bool processingArguments = lastJson.HasMember("arguments"); | ||||||||||||
|
||||||||||||
// Before we have 'arguments' in the JSON, we do not want to process both key and value in the same call due to special handling of arguments value. | ||||||||||||
// We look for colon after 'arguments' key and move everything after it to unprocessedBuffer to be processed in the next call. | ||||||||||||
if (!processingArguments) { | ||||||||||||
movePostColonContentToUnprocessedBuffer(modifiedChunk); | ||||||||||||
} | ||||||||||||
|
||||||||||||
// Phase 1: Control the internal state and apply changes to the chunk if needed | ||||||||||||
if (internalState == AWAITING_START_TAG) { | ||||||||||||
// We did not see "functools" yet, so we look for it in the current chunk | ||||||||||||
if (modifiedChunk.find(parsingStartTag) != std::string::npos) { | ||||||||||||
// We found "functools", so we switch to the the state where we are waiting for the opening bracket of the array | ||||||||||||
internalState = AWAITING_TOOL_CALLS_OPENING_BRACKET; | ||||||||||||
if (modifiedChunk.length() > parsingStartTag.length()) { | ||||||||||||
// We have more content in the chunk after "functools", so we process the rest of the chunk in the next state | ||||||||||||
std::string remainingChunk = modifiedChunk.substr(modifiedChunk.find(parsingStartTag) + parsingStartTag.length()); | ||||||||||||
if (remainingChunk.empty()) { | ||||||||||||
return std::nullopt; // Nothing more to process in this chunk | ||||||||||||
} else { | ||||||||||||
return parseChunk(remainingChunk, finishReason); | ||||||||||||
} | ||||||||||||
} else { // modifiedChunk.length() == parsingStartTag.length() as at this state, chunk cannot be smaller | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The comment indicates this condition should only occur when chunk length equals tag length, but the code structure suggests this could also handle cases where the chunk is shorter than the tag. Consider adding an assertion or clearer logic to handle the case where
Suggested change
Copilot uses AI. Check for mistakes. Positive FeedbackNegative Feedback |
||||||||||||
return std::nullopt; // Nothing more to process in this chunk | ||||||||||||
} | ||||||||||||
} | ||||||||||||
return std::nullopt; | ||||||||||||
} else if (internalState == AWAITING_TOOL_CALLS_OPENING_BRACKET) { | ||||||||||||
// Next chunk after "functools" should start with opening bracket of the array | ||||||||||||
if (modifiedChunk[0] == '[') { | ||||||||||||
mzegla marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||
// We found the opening bracket, so we switch to waiting for the opening brace of the first tool call | ||||||||||||
internalState = AWAITING_TOOL_CALL_OPENING_BRACE; | ||||||||||||
|
||||||||||||
// We process the rest of the chunk after the opening bracket | ||||||||||||
std::string remainingChunk = modifiedChunk.substr(1); | ||||||||||||
if (remainingChunk.empty()) { | ||||||||||||
return std::nullopt; // Nothing more to process in this chunk | ||||||||||||
} else { | ||||||||||||
// Process the remaining chunk as part of tool call processing | ||||||||||||
return parseChunk(remainingChunk, finishReason); | ||||||||||||
} | ||||||||||||
} else { | ||||||||||||
// Still waiting for the opening bracket, ignore this chunk | ||||||||||||
return std::nullopt; | ||||||||||||
} | ||||||||||||
} else if (internalState == AWAITING_TOOL_CALL_OPENING_BRACE) { | ||||||||||||
// We are waiting for the opening brace of the tool call object | ||||||||||||
size_t firstOpeningBrace = modifiedChunk.find_first_of('{'); | ||||||||||||
if (firstOpeningBrace != std::string::npos) { | ||||||||||||
internalState = PROCESSING_TOOL_CALL; | ||||||||||||
clearState(); | ||||||||||||
|
||||||||||||
// Process the rest of the chunk after the opening brace (brace included) as part of tool call processing | ||||||||||||
std::string remainingChunk = modifiedChunk.substr(firstOpeningBrace); | ||||||||||||
if (remainingChunk.empty()) { | ||||||||||||
return std::nullopt; // Nothing more to process in this chunk | ||||||||||||
} else { | ||||||||||||
return parseChunk(remainingChunk, finishReason); | ||||||||||||
} | ||||||||||||
} else { | ||||||||||||
// Still waiting for the opening brace, ignore this chunk | ||||||||||||
return std::nullopt; | ||||||||||||
} | ||||||||||||
} else { // internalState == PROCESSING_TOOL_CALL | ||||||||||||
// Remove any newlines to avoid breaking JSON format | ||||||||||||
modifiedChunk.erase(std::remove(modifiedChunk.begin(), modifiedChunk.end(), '\n'), modifiedChunk.end()); | ||||||||||||
|
||||||||||||
// JSON already contains 'arguments' (they cannot be null at this point). Apply modifications to the input chunk if needed to keep the format valid. | ||||||||||||
if (processingArguments) { | ||||||||||||
// Escaping double quotes in the arguments string | ||||||||||||
for (size_t pos = 0; (pos = modifiedChunk.find("\"", pos)) != std::string::npos; pos += 2) { | ||||||||||||
modifiedChunk.insert(pos, "\\"); | ||||||||||||
} | ||||||||||||
|
||||||||||||
// Keep track of opened/closed braces to identify the end of the tool call object. | ||||||||||||
updateOpenBracesCount(modifiedChunk); | ||||||||||||
|
||||||||||||
// When we start collecting arguments, force string type by adding opening quote | ||||||||||||
if (!argumentsQuotesOpened) { | ||||||||||||
openArgumentsString(modifiedChunk); | ||||||||||||
argumentsQuotesOpened = true; | ||||||||||||
} | ||||||||||||
|
||||||||||||
if (finishReason != ov::genai::GenerationFinishReason::NONE) { | ||||||||||||
handleGenerationFinish(modifiedChunk); | ||||||||||||
} else if (openBracesCount == 0) { | ||||||||||||
// If we balanced the braces, we are at the end of the tool call object | ||||||||||||
handleEndOfToolCall(modifiedChunk); | ||||||||||||
} | ||||||||||||
} | ||||||||||||
|
||||||||||||
// Phase 2: Parse the modified chunk with PartialJsonBuilder and return appropriate delta if possible | ||||||||||||
rapidjson::Document newJson; | ||||||||||||
try { | ||||||||||||
// Otherwise just push the current chunk | ||||||||||||
newJson = jsonBuilder.add(modifiedChunk); | ||||||||||||
} catch (const std::exception& e) { | ||||||||||||
SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool call chunk partial parse failed: {}", e.what()); | ||||||||||||
// Throwing an error since at this point the JSON is broken and next chunks will not make it right. | ||||||||||||
throw std::runtime_error("Generated tool call structure is not valid"); | ||||||||||||
} | ||||||||||||
|
||||||||||||
rapidjson::Document doc; | ||||||||||||
// Case 1: 'arguments' has just appeared in the current chunk. If so, we return first delta. | ||||||||||||
if (newJson.HasMember("arguments") && !lastJson.HasMember("arguments")) { | ||||||||||||
std::string functionName; | ||||||||||||
if (lastJson.HasMember("name") && lastJson["name"].IsString()) { | ||||||||||||
functionName = lastJson["name"].GetString(); | ||||||||||||
} else if (newJson.HasMember("name") && newJson["name"].IsString()) { | ||||||||||||
// We received big chunk with both full function name and arguments, so we get function name from the new JSON | ||||||||||||
functionName = newJson["name"].GetString(); | ||||||||||||
} else { | ||||||||||||
SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool call name has not been generated and arguments already started"); | ||||||||||||
throw std::runtime_error("Tool call name is missing in generated output"); | ||||||||||||
mzegla marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||
} | ||||||||||||
// Wrap first delta in {"tool_calls":[{"id":<id>,"type":"function","index":<toolCallIndex>,"function":{"name": <functionName>}}]} | ||||||||||||
doc = wrapFirstDelta(functionName, toolCallIndex); | ||||||||||||
lastJson.CopyFrom(newJson, lastJson.GetAllocator()); | ||||||||||||
Comment on lines
+301
to
+313
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. helper: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since this part is common for multiple parsers would you be okay with doing it separately? |
||||||||||||
return doc; | ||||||||||||
// Case 2: 'arguments' already exists in the last JSON, we compute delta and return it. | ||||||||||||
} else if (lastJson.HasMember("arguments")) { | ||||||||||||
rapidjson::Document delta = PartialJsonBuilder::computeDelta(lastJson, newJson); | ||||||||||||
|
||||||||||||
// Handle the case when tool call has finished - store unprocessed output and switch internal state | ||||||||||||
if (jsonBuilder.isComplete()) { | ||||||||||||
movePostToolCallEndContentToUnprocessedBuffer(); | ||||||||||||
// Switch to the state where we are waiting for the opening brace of the next tool call object | ||||||||||||
internalState = AWAITING_TOOL_CALL_OPENING_BRACE; | ||||||||||||
} else { | ||||||||||||
lastJson.CopyFrom(newJson, lastJson.GetAllocator()); | ||||||||||||
} | ||||||||||||
|
||||||||||||
// If delta is empty or contains only null or empty string values, we don't stream anything. | ||||||||||||
if (delta.ObjectEmpty()) { | ||||||||||||
return std::nullopt; | ||||||||||||
} | ||||||||||||
|
||||||||||||
for (auto it = delta.MemberBegin(); it != delta.MemberEnd(); ++it) { | ||||||||||||
if (it->value.IsNull() || (it->value.IsString() && std::string(it->value.GetString()).empty())) { | ||||||||||||
return std::nullopt; | ||||||||||||
} | ||||||||||||
} | ||||||||||||
|
||||||||||||
// Wrap delta in {"tool_calls":[{"index":<toolCallIndex>,"function":<delta>}]} | ||||||||||||
doc = wrapDelta(delta, toolCallIndex); | ||||||||||||
return doc; | ||||||||||||
// Case 3: No 'arguments' exists or just appeared, so we keep building up until we have complete function name | ||||||||||||
} else { | ||||||||||||
lastJson.CopyFrom(newJson, lastJson.GetAllocator()); | ||||||||||||
} | ||||||||||||
} | ||||||||||||
return std::nullopt; | ||||||||||||
} | ||||||||||||
} // namespace ovms |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since we change implementation in partial json builder, aren't we missing some unit tests of partial json buidler that would uncover those gaps?