Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/llm/io_processing/hermes3/tool_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ std::optional<rapidjson::Document> Hermes3ToolParser::parseChunk(const std::stri
argumentsDelayWindow[0] = argumentsDelayWindow[1];
}

// If we are closing the tool call, we need to add closing quote after the last closing brace that we assume is present in the chunk processed in the last call.
// If we are closing the tool call, we need to add closing quote after the last arguments closing brace that we assume is present in the chunk processed in the last call.
if (modifiedChunk.find(toolCallEndTag) != std::string::npos) {
size_t lastClosingBrace = argumentsDelayWindow[0].find_last_of('}');
if (lastClosingBrace != std::string::npos) {
Expand Down
21 changes: 19 additions & 2 deletions src/llm/io_processing/partial_json_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ Document PartialJsonBuilder::add(const std::string& chunk) {
auto beginIt = buffer.begin() + currentPosition;
auto endIt = buffer.end();

for (auto it = beginIt; it != endIt; ++it, currentPosition++) {
for (auto it = beginIt; it != endIt && state != IteratorState::END; ++it, ++currentPosition) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we change implementation in partial json builder, aren't we missing some unit tests of partial json buidler that would uncover those gaps?

finishedWithEscapeCharacter = false;
char c = *it;

Expand Down Expand Up @@ -209,7 +209,12 @@ Document PartialJsonBuilder::add(const std::string& chunk) {

Document doc;
if (state == IteratorState::END && openCloseStack.empty()) {
doc.Parse(buffer.c_str());
if (currentPosition == buffer.size()) {
doc.Parse(buffer.c_str());
} else {
doc.Parse(buffer.c_str(), currentPosition);
Copy link

Copilot AI Sep 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Parse method is being called with a length parameter, but RapidJSON's Parse(const char*, size_t) expects the second parameter to be the length of the string to parse. Consider using buffer.substr(0, currentPosition) or verify that this usage is correct for the intended JSON parsing behavior.

Suggested change
doc.Parse(buffer.c_str(), currentPosition);
std::string jsonSubstring = buffer.substr(0, currentPosition);
doc.Parse(jsonSubstring.c_str(), jsonSubstring.size());

Copilot uses AI. Check for mistakes.

}

if (doc.HasParseError()) {
throw std::runtime_error("Invalid JSON. Content:\n" + buffer);
}
Expand Down Expand Up @@ -263,4 +268,16 @@ Document PartialJsonBuilder::add(const std::string& chunk) {
}
return doc;
}

bool PartialJsonBuilder::isComplete() const {
return state == IteratorState::END;
}

std::string PartialJsonBuilder::getUnprocessedBuffer() const {
if (currentPosition < buffer.size()) {
return buffer.substr(currentPosition);
}
return "";
}

} // namespace ovms
5 changes: 5 additions & 0 deletions src/llm/io_processing/partial_json_builder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,11 @@ class PartialJsonBuilder {
void clear();
// Add new chunk to the buffer return current parsed JSON document (incremental parsing)
Document add(const std::string& chunk);
// Check if the current state is END (i.e. we have a complete JSON)
bool isComplete() const;

// Get the unprocessed part of the buffer (from current position to the end)
std::string getUnprocessedBuffer() const;

static Document computeDelta(const Document& previous, const Document& current);
};
Expand Down
260 changes: 258 additions & 2 deletions src/llm/io_processing/phi4/tool_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,87 @@

namespace ovms {

void Phi4ToolParser::movePostColonContentToUnprocessedBuffer(std::string& chunk) {
size_t colonPos = chunk.find(':');
if (colonPos != std::string::npos) {
// Store everything after the colon in unprocessedBuffer to process in the next call
unprocessedBuffer = chunk.substr(colonPos + 1) + unprocessedBuffer;
// Keep everything up to and including the colon
chunk = chunk.substr(0, colonPos + 1);
}
}

void Phi4ToolParser::movePostToolCallEndContentToUnprocessedBuffer() {
// Move content that appeared after the end of the tool call to unprocessedBuffer
unprocessedBuffer = jsonBuilder.getUnprocessedBuffer() + unprocessedBuffer;
// Remove potential escape characters added in arguments processing logic from the unprocessedBuffer as we move to the next tool call
unprocessedBuffer.erase(
std::remove(unprocessedBuffer.begin(), unprocessedBuffer.end(), '\\'),
unprocessedBuffer.end());
}

void Phi4ToolParser::updateOpenBracesCount(const std::string& chunk) {
// Note that this method can be fooled by unclosed braces in string values.
// If turns out insufficient, we will need full JSON parsing to track opened/closed braces for arguments.
for (char c : chunk) {
if (c == '{') {
openBracesCount++;
} else if (c == '}') {
openBracesCount--;
if (openBracesCount == 0) {
break; // No need to count further if we balanced the braces
}
}
}
}

void Phi4ToolParser::handleEndOfToolCall(std::string& chunk) {
// We are at the end of the tool call object, so we add closing quote before the last closing brace
size_t lastClosingBrace = chunk.find_last_of('}');
if (lastClosingBrace != std::string::npos) {
// Move anything after the last closing brace to unprocessedBuffer, since it's the start of the next tool call or end of the array
if (lastClosingBrace + 1 < chunk.size()) {
unprocessedBuffer = chunk.substr(lastClosingBrace + 1) + unprocessedBuffer;
chunk.erase(lastClosingBrace + 1);
}
chunk.insert(lastClosingBrace, "\"");
} else {
// If there is no closing brace, we just add closing quote at the end
chunk.append("\"");
}
}

void Phi4ToolParser::handleGenerationFinish(std::string& chunk) const {
// We look for the closing brace to close the string properly
size_t lastClosingBrace = chunk.find_last_of('}');
if (lastClosingBrace != std::string::npos) {
chunk.insert(lastClosingBrace, "\"");
} else {
// If there is no closing brace, we just add closing quote at the end
chunk.append("\"");
}
}

void Phi4ToolParser::openArgumentsString(std::string& chunk) const {
// Add opening quote before the first non-whitespace character
size_t firstNonWhitespaceCharacter = chunk.find_first_not_of(" \t\n\r\f\v");
if (firstNonWhitespaceCharacter != std::string::npos) {
chunk.insert(firstNonWhitespaceCharacter, "\"");
} else {
// If the chunk is all whitespace, just insert quote at the end
chunk.append("\"");
}
}

void Phi4ToolParser::clearState() {
// Clear state for the next tool call
lastJson.Clear();
jsonBuilder.clear();
toolCallIndex++;
argumentsQuotesOpened = false;
openBracesCount = 1; // Reset to 1 as we count the tool call opening brace
}

void Phi4ToolParser::parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens) {
std::vector<std::string> tools;

Expand Down Expand Up @@ -86,8 +167,183 @@ void Phi4ToolParser::parse(ParsedOutput& parsedOutput, const std::vector<int64_t
}

std::optional<rapidjson::Document> Phi4ToolParser::parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) {
// Not implemented
SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Phi4OutputParser::parseChunk is not implemented");
/*
Phi4 with vLLM template produces tool calls in the format:
functools[{"name": [function name], "arguments": [function arguments as JSON]}, ...]

Due to the tool call format used by Phi4, we need to track the state of parsing more closely.
We have four states:
1) AWAITING_START_TAG - we are waiting for the "functools" tag to appear in the chunk
2) AWAITING_TOOL_CALLS_OPENING_BRACKET - we have seen "functools" but are waiting for the opening bracket of the array
3) AWAITING_TOOL_CALL_OPENING_BRACE - we have seen the opening bracket of the array but are waiting for the opening brace of the next tool call object
4) PROCESSING_TOOL_CALL - we are processing the tool call object

To avoid missing any generated content, we use unprocessedBuffer to store any output that is not used in the current state, but might be relevant in the next state.
Since tools calls in the array are separated by commas we also need to track when the tool call object ends (no special tag for that).
Next challenge, common for all parsers, is to return arguments as string even though model generates them as JSON.
We address this by escaping double quotes and adding opening quote at the beginning of arguments and closing quote at the end of arguments.
*/
SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Phi4ToolParser::parseChunk called with chunk: '{}', finishReason: {}", chunk, static_cast<int>(finishReason));
if (chunk.empty()) {
SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Received empty chunk for Phi4ToolParser");
return std::nullopt;
}

// We merge unprocessedBuffer from previous calls with the current chunk to avoid losing any content
std::string modifiedChunk = unprocessedBuffer + chunk;
unprocessedBuffer.clear();

bool processingArguments = lastJson.HasMember("arguments");

// Before we have 'arguments' in the JSON, we do not want to process both key and value in the same call due to special handling of arguments value.
// We look for colon after 'arguments' key and move everything after it to unprocessedBuffer to be processed in the next call.
if (!processingArguments) {
movePostColonContentToUnprocessedBuffer(modifiedChunk);
}

// Phase 1: Control the internal state and apply changes to the chunk if needed
if (internalState == AWAITING_START_TAG) {
// We did not see "functools" yet, so we look for it in the current chunk
if (modifiedChunk.find(parsingStartTag) != std::string::npos) {
// We found "functools", so we switch to the the state where we are waiting for the opening bracket of the array
internalState = AWAITING_TOOL_CALLS_OPENING_BRACKET;
if (modifiedChunk.length() > parsingStartTag.length()) {
// We have more content in the chunk after "functools", so we process the rest of the chunk in the next state
std::string remainingChunk = modifiedChunk.substr(modifiedChunk.find(parsingStartTag) + parsingStartTag.length());
if (remainingChunk.empty()) {
return std::nullopt; // Nothing more to process in this chunk
} else {
return parseChunk(remainingChunk, finishReason);
}
} else { // modifiedChunk.length() == parsingStartTag.length() as at this state, chunk cannot be smaller
Copy link

Copilot AI Sep 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment indicates this condition should only occur when chunk length equals tag length, but the code structure suggests this could also handle cases where the chunk is shorter than the tag. Consider adding an assertion or clearer logic to handle the case where modifiedChunk.length() < parsingStartTag.length().

Suggested change
} else { // modifiedChunk.length() == parsingStartTag.length() as at this state, chunk cannot be smaller
} else { // modifiedChunk.length() == parsingStartTag.length() as at this state, chunk cannot be smaller
if (modifiedChunk.length() < parsingStartTag.length()) {
SPDLOG_LOGGER_ERROR(llm_calculator_logger, "Unexpected state: modifiedChunk.length() < parsingStartTag.length() ({} < {}). This may indicate a logic error or malformed input.", modifiedChunk.length(), parsingStartTag.length());
}

Copilot uses AI. Check for mistakes.

return std::nullopt; // Nothing more to process in this chunk
}
}
return std::nullopt;
} else if (internalState == AWAITING_TOOL_CALLS_OPENING_BRACKET) {
// Next chunk after "functools" should start with opening bracket of the array
if (modifiedChunk[0] == '[') {
// We found the opening bracket, so we switch to waiting for the opening brace of the first tool call
internalState = AWAITING_TOOL_CALL_OPENING_BRACE;

// We process the rest of the chunk after the opening bracket
std::string remainingChunk = modifiedChunk.substr(1);
if (remainingChunk.empty()) {
return std::nullopt; // Nothing more to process in this chunk
} else {
// Process the remaining chunk as part of tool call processing
return parseChunk(remainingChunk, finishReason);
}
} else {
// Still waiting for the opening bracket, ignore this chunk
return std::nullopt;
}
} else if (internalState == AWAITING_TOOL_CALL_OPENING_BRACE) {
// We are waiting for the opening brace of the tool call object
size_t firstOpeningBrace = modifiedChunk.find_first_of('{');
if (firstOpeningBrace != std::string::npos) {
internalState = PROCESSING_TOOL_CALL;
clearState();

// Process the rest of the chunk after the opening brace (brace included) as part of tool call processing
std::string remainingChunk = modifiedChunk.substr(firstOpeningBrace);
if (remainingChunk.empty()) {
return std::nullopt; // Nothing more to process in this chunk
} else {
return parseChunk(remainingChunk, finishReason);
}
} else {
// Still waiting for the opening brace, ignore this chunk
return std::nullopt;
}
} else { // internalState == PROCESSING_TOOL_CALL
// Remove any newlines to avoid breaking JSON format
modifiedChunk.erase(std::remove(modifiedChunk.begin(), modifiedChunk.end(), '\n'), modifiedChunk.end());

// JSON already contains 'arguments' (they cannot be null at this point). Apply modifications to the input chunk if needed to keep the format valid.
if (processingArguments) {
// Escaping double quotes in the arguments string
for (size_t pos = 0; (pos = modifiedChunk.find("\"", pos)) != std::string::npos; pos += 2) {
modifiedChunk.insert(pos, "\\");
}

// Keep track of opened/closed braces to identify the end of the tool call object.
updateOpenBracesCount(modifiedChunk);

// When we start collecting arguments, force string type by adding opening quote
if (!argumentsQuotesOpened) {
openArgumentsString(modifiedChunk);
argumentsQuotesOpened = true;
}

if (finishReason != ov::genai::GenerationFinishReason::NONE) {
handleGenerationFinish(modifiedChunk);
} else if (openBracesCount == 0) {
// If we balanced the braces, we are at the end of the tool call object
handleEndOfToolCall(modifiedChunk);
}
}

// Phase 2: Parse the modified chunk with PartialJsonBuilder and return appropriate delta if possible
rapidjson::Document newJson;
try {
// Otherwise just push the current chunk
newJson = jsonBuilder.add(modifiedChunk);
} catch (const std::exception& e) {
SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool call chunk partial parse failed: {}", e.what());
// Throwing an error since at this point the JSON is broken and next chunks will not make it right.
throw std::runtime_error("Generated tool call structure is not valid");
}

rapidjson::Document doc;
// Case 1: 'arguments' has just appeared in the current chunk. If so, we return first delta.
if (newJson.HasMember("arguments") && !lastJson.HasMember("arguments")) {
std::string functionName;
if (lastJson.HasMember("name") && lastJson["name"].IsString()) {
functionName = lastJson["name"].GetString();
} else if (newJson.HasMember("name") && newJson["name"].IsString()) {
// We received big chunk with both full function name and arguments, so we get function name from the new JSON
functionName = newJson["name"].GetString();
} else {
SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool call name has not been generated and arguments already started");
throw std::runtime_error("Tool call name is missing in generated output");
}
// Wrap first delta in {"tool_calls":[{"id":<id>,"type":"function","index":<toolCallIndex>,"function":{"name": <functionName>}}]}
doc = wrapFirstDelta(functionName, toolCallIndex);
lastJson.CopyFrom(newJson, lastJson.GetAllocator());
Comment on lines +301 to +313
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

helper:
sendArgumentsFirstTime?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this part is common for multiple parsers would you be okay with doing it separately?

return doc;
// Case 2: 'arguments' already exists in the last JSON, we compute delta and return it.
} else if (lastJson.HasMember("arguments")) {
rapidjson::Document delta = PartialJsonBuilder::computeDelta(lastJson, newJson);

// Handle the case when tool call has finished - store unprocessed output and switch internal state
if (jsonBuilder.isComplete()) {
movePostToolCallEndContentToUnprocessedBuffer();
// Switch to the state where we are waiting for the opening brace of the next tool call object
internalState = AWAITING_TOOL_CALL_OPENING_BRACE;
} else {
lastJson.CopyFrom(newJson, lastJson.GetAllocator());
}

// If delta is empty or contains only null or empty string values, we don't stream anything.
if (delta.ObjectEmpty()) {
return std::nullopt;
}

for (auto it = delta.MemberBegin(); it != delta.MemberEnd(); ++it) {
if (it->value.IsNull() || (it->value.IsString() && std::string(it->value.GetString()).empty())) {
return std::nullopt;
}
}

// Wrap delta in {"tool_calls":[{"index":<toolCallIndex>,"function":<delta>}]}
doc = wrapDelta(delta, toolCallIndex);
return doc;
// Case 3: No 'arguments' exists or just appeared, so we keep building up until we have complete function name
} else {
lastJson.CopyFrom(newJson, lastJson.GetAllocator());
}
}
return std::nullopt;
}
} // namespace ovms
31 changes: 31 additions & 0 deletions src/llm/io_processing/phi4/tool_parser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#pragma warning(pop)

#include "src/llm/io_processing/base_output_parser.hpp"
#include "src/llm/io_processing/partial_json_builder.hpp"

namespace ovms {
class Phi4ToolParser : public BaseOutputParser {
Expand All @@ -37,6 +38,36 @@ class Phi4ToolParser : public BaseOutputParser {
const std::string parsingStartTag = "functools";
const std::string parsingEndTag = "";

// Streaming required members

enum InternalState {
AWAITING_START_TAG,
AWAITING_TOOL_CALLS_OPENING_BRACKET,
AWAITING_TOOL_CALL_OPENING_BRACE,
PROCESSING_TOOL_CALL
};

InternalState internalState = AWAITING_START_TAG;
rapidjson::Document lastJson;
PartialJsonBuilder jsonBuilder;
// Index to track the current tool call being processed (-1 means no tool call has been started yet)
int toolCallIndex = -1;
// Flag to indicate if double quote has been added at the beginning of arguments
bool argumentsQuotesOpened = false;
std::string unprocessedBuffer;

// Stack of opened braces to track nested structures while in arguments collection phase
// Starting with 1, since we count the tool call opening brace and expect it to be closed as arguments end
size_t openBracesCount = 1;

void movePostColonContentToUnprocessedBuffer(std::string& chunk);
void movePostToolCallEndContentToUnprocessedBuffer();
void updateOpenBracesCount(const std::string& chunk);
void handleGenerationFinish(std::string& chunk) const;
void handleEndOfToolCall(std::string& chunk);
void openArgumentsString(std::string& chunk) const;
void clearState();

public:
Phi4ToolParser() = delete;
explicit Phi4ToolParser(ov::genai::Tokenizer& tokenizer) :
Expand Down
Loading