Skip to content

Commit e5b46c9

Browse files
committed
expanded holistic test
1 parent 8be071e commit e5b46c9

File tree

3 files changed

+61
-27
lines changed

3 files changed

+61
-27
lines changed

src/llm/io_processing/phi4/tool_parser.cpp

Lines changed: 28 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,16 @@
3232

3333
namespace ovms {
3434

35+
void Phi4ToolParser::movePostColonContentToUnprocessedBuffer(std::string& chunk) {
36+
size_t colonPos = chunk.find(':');
37+
if (colonPos != std::string::npos) {
38+
// Store everything after the colon in unprocessedBuffer to process in the next call
39+
unprocessedBuffer = chunk.substr(colonPos + 1) + unprocessedBuffer;
40+
// Keep everything up to and including the colon
41+
chunk = chunk.substr(0, colonPos + 1);
42+
}
43+
}
44+
3545
void Phi4ToolParser::parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens) {
3646
std::vector<std::string> tools;
3747

@@ -87,6 +97,9 @@ void Phi4ToolParser::parse(ParsedOutput& parsedOutput, const std::vector<int64_t
8797

8898
std::optional<rapidjson::Document> Phi4ToolParser::parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) {
8999
/*
100+
Phi4 with vLLM template produces tool calls in the format:
101+
functools[{"name": [function name], "arguments": [function arguments as JSON]}, ...]
102+
90103
Due to the tool call format used by Phi4, we need to track the state of parsing more closely.
91104
We have four states:
92105
1) AWAITING_START_TAG - we are waiting for the "functools" tag to appear in the chunk
@@ -109,6 +122,12 @@ std::optional<rapidjson::Document> Phi4ToolParser::parseChunk(const std::string&
109122
std::string modifiedChunk = unprocessedBuffer + chunk;
110123
unprocessedBuffer.clear();
111124

125+
// Before we have 'arguments' in the JSON, we do not want to process both key and value in the same call due to special handling of arguments value.
126+
// We look for colon after 'arguments' key and move everything after it to unprocessedBuffer to be processed in the next call.
127+
if (!lastJson.HasMember("arguments")) {
128+
movePostColonContentToUnprocessedBuffer(modifiedChunk);
129+
}
130+
112131
// Phase 1: Control the internal state and apply changes to the chunk if needed
113132
if (internalState == AWAITING_START_TAG) {
114133
// We did not see "functools" yet, so we look for it in the current chunk
@@ -188,6 +207,9 @@ std::optional<rapidjson::Document> Phi4ToolParser::parseChunk(const std::string&
188207
openBracesCount++;
189208
} else if (c == '}') {
190209
openBracesCount--;
210+
if (openBracesCount == 0) {
211+
break; // No need to count further if we balanced the braces
212+
}
191213
}
192214
}
193215

@@ -214,24 +236,17 @@ std::optional<rapidjson::Document> Phi4ToolParser::parseChunk(const std::string&
214236
// If we balanced the braces, we are at the end of the tool call object, so we add closing quote before the last closing brace
215237
size_t lastClosingBrace = modifiedChunk.find_last_of('}');
216238
if (lastClosingBrace != std::string::npos) {
239+
// Move anything after the last closing brace to unprocessedBuffer, since it's the start of the next tool call or end of the array
240+
if (lastClosingBrace + 1 < modifiedChunk.size()) {
241+
unprocessedBuffer = modifiedChunk.substr(lastClosingBrace + 1) + unprocessedBuffer;
242+
modifiedChunk.erase(lastClosingBrace + 1);
243+
}
217244
modifiedChunk.insert(lastClosingBrace, "\"");
218245
} else {
219246
// If there is no closing brace, we just add closing quote at the end
220247
modifiedChunk.append("\"");
221248
}
222249
}
223-
} else { // no arguments yet, we need to make sure they are added only as a key
224-
// If 'arguments":' appears in the chunk and there is any non-whitespace content after it, which is not string,
225-
// we add double quote after colon to force string type
226-
size_t argumentsPos = modifiedChunk.find("arguments\":");
227-
if (argumentsPos != std::string::npos) {
228-
// Move everything after 'arguments":' to unprocessedBuffer, so we can add opening quote at the beginning of arguments in the next call
229-
size_t afterArgumentsPos = argumentsPos + std::string("arguments\":").length();
230-
if (afterArgumentsPos < modifiedChunk.length()) {
231-
unprocessedBuffer = modifiedChunk.substr(afterArgumentsPos);
232-
modifiedChunk.erase(afterArgumentsPos);
233-
}
234-
}
235250
}
236251

237252
// Phase 2: Parse the modified chunk with PartialJsonBuilder and return appropriate delta if possible
@@ -240,7 +255,6 @@ std::optional<rapidjson::Document> Phi4ToolParser::parseChunk(const std::string&
240255
// Otherwise just push the current chunk
241256
newJson = jsonBuilder.add(modifiedChunk);
242257
} catch (const std::exception& e) {
243-
(void)e; // Suppress unused variable warning on Windows
244258
SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool call chunk partial parse failed: {}", e.what());
245259
// Throwing an error since at this point the JSON is broken and next chunks will not make it right.
246260
throw std::runtime_error("Generated tool call structure is not valid");
@@ -269,7 +283,7 @@ std::optional<rapidjson::Document> Phi4ToolParser::parseChunk(const std::string&
269283

270284
// Handle the case when tool call has finished - store unprocessed output and switch internal state
271285
if (jsonBuilder.isComplete()) {
272-
unprocessedBuffer = jsonBuilder.getUnprocessedBuffer();
286+
unprocessedBuffer = jsonBuilder.getUnprocessedBuffer() + unprocessedBuffer;
273287
// Remove potential escape characters added in arguments processing logic from the unprocessedBuffer as we move to the next tool call
274288
unprocessedBuffer.erase(
275289
std::remove(unprocessedBuffer.begin(), unprocessedBuffer.end(), '\\'),

src/llm/io_processing/phi4/tool_parser.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ class Phi4ToolParser : public BaseOutputParser {
5959
// Starting with 1, since we count the tool call opening brace and expect it to be closed as arguments end
6060
size_t openBracesCount = 1;
6161

62+
void movePostColonContentToUnprocessedBuffer(std::string& chunk);
63+
6264
public:
6365
Phi4ToolParser() = delete;
6466
explicit Phi4ToolParser(ov::genai::Tokenizer& tokenizer) :

src/test/llm/output_parsers/phi4_output_parser_test.cpp

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -215,12 +215,15 @@ TEST_F(Phi4OutputParserTest, HolisticStreaming) {
215215
{" \"", ov::genai::GenerationFinishReason::NONE, std::nullopt},
216216
{"arguments\":", ov::genai::GenerationFinishReason::NONE, "{\"delta\":{\"tool_calls\":[{\"id\":\"XXXXXXXXX\",\"type\":\"function\",\"index\":0,\"function\":{\"name\":\"get_humidity\"}}]}}"},
217217
{" {\"", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]}})"},
218-
{"location", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"location"}}]}})"},
219-
{"\":", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":"}}]}})"},
220-
{" \"", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":" \""}}]}})"},
221-
{"Paris", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"Paris"}}]}})"},
222-
{"\"}}", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]}})"},
218+
{"locations", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"locations"}}]}})"},
219+
{"\": {\"real_cities\": ", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\": {\"real_cities\": "}}]}})"},
220+
{" [\"", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":" [\""}}]}})"},
221+
{"Paris\", \"New", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"Paris\", \"New"}}]}})"},
222+
{"York\"], \"fictional_cities\": [\"", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"York\"], \"fictional_cities\": [\""}}]}})"},
223+
{"Cintra\", \"Oxenfurt\"]}}", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"Cintra\", \"Oxenfurt\"]}}"}}]}})"},
224+
{"}", ov::genai::GenerationFinishReason::NONE, std::nullopt},
223225
{",", ov::genai::GenerationFinishReason::NONE, std::nullopt},
226+
224227
{" {\"", ov::genai::GenerationFinishReason::NONE, std::nullopt},
225228
{"name", ov::genai::GenerationFinishReason::NONE, std::nullopt},
226229
{"\":", ov::genai::GenerationFinishReason::NONE, std::nullopt},
@@ -229,12 +232,27 @@ TEST_F(Phi4OutputParserTest, HolisticStreaming) {
229232
{"_temperature", ov::genai::GenerationFinishReason::NONE, std::nullopt},
230233
{"\",", ov::genai::GenerationFinishReason::NONE, std::nullopt},
231234
{" \"", ov::genai::GenerationFinishReason::NONE, std::nullopt},
235+
// Simulate getting arguments key, value and close of tool call all in one chunk
236+
{"arguments\": {}},", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":1,"function":{"name":"get_temperature"}}]}})"},
237+
// Such chunk is broken into parts before and after colon, so along with the next chunk we also process ' {}},' part
238+
239+
// At this point we process ' {}}, {\"' part, but since it's both end and start of tool call, we split it again.
240+
// So in that call we process ' {}}' part and push ', {\"' part to the next call.
241+
{" {\"", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":1,"function":{"arguments":"{}"}}]}})"},
242+
// At this point we process ', {\"name' which can be processed as a whole, no more delay from that point
243+
{"name", ov::genai::GenerationFinishReason::NONE, std::nullopt},
244+
{"\":", ov::genai::GenerationFinishReason::NONE, std::nullopt},
245+
{" \"", ov::genai::GenerationFinishReason::NONE, std::nullopt},
246+
{"get", ov::genai::GenerationFinishReason::NONE, std::nullopt},
247+
{"_temperature", ov::genai::GenerationFinishReason::NONE, std::nullopt},
248+
{"\",", ov::genai::GenerationFinishReason::NONE, std::nullopt},
249+
{" \"", ov::genai::GenerationFinishReason::NONE, std::nullopt},
232250
{"arguments", ov::genai::GenerationFinishReason::NONE, std::nullopt},
233-
{"\":", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":1,"function":{"name":"get_temperature"}}]}})"},
234-
{" {\"", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":1,"function":{"arguments":"{\""}}]}})"},
235-
{"location", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":1,"function":{"arguments":"location"}}]}})"},
236-
{"\":", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":1,"function":{"arguments":"\":"}}]}})"},
237-
{" \"", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":1,"function":{"arguments":" \""}}]}})"},
251+
{"\":", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":2,"function":{"name":"get_temperature"}}]}})"},
252+
{" {\"", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":2,"function":{"arguments":"{\""}}]}})"},
253+
{"location", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":2,"function":{"arguments":"location"}}]}})"},
254+
{"\":", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":2,"function":{"arguments":"\":"}}]}})"},
255+
{" \"", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":2,"function":{"arguments":" \""}}]}})"},
238256
// Last chunk is added in the for loop below
239257
};
240258

@@ -243,11 +261,11 @@ TEST_F(Phi4OutputParserTest, HolisticStreaming) {
243261
outputParserWithRegularToolParsing = std::make_unique<OutputParser>(*phi4Tokenizer, "phi4", "");
244262
auto chunkToDeltaVecCopy = chunkToDeltaVec;
245263
if (lastFinishReason == ov::genai::GenerationFinishReason::NONE) {
246-
chunkToDeltaVecCopy.push_back({"Paris\"}}", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":1,"function":{"arguments":"Paris\"}"}}]}})"});
264+
chunkToDeltaVecCopy.push_back({"Paris\"}}", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":2,"function":{"arguments":"Paris\"}"}}]}})"});
247265
} else if (lastFinishReason == ov::genai::GenerationFinishReason::STOP) {
248-
chunkToDeltaVecCopy.push_back({"Paris\"}}", ov::genai::GenerationFinishReason::STOP, R"({"delta":{"tool_calls":[{"index":1,"function":{"arguments":"Paris\"}"}}]}})"});
266+
chunkToDeltaVecCopy.push_back({"Paris\"}}", ov::genai::GenerationFinishReason::STOP, R"({"delta":{"tool_calls":[{"index":2,"function":{"arguments":"Paris\"}"}}]}})"});
249267
} else {
250-
chunkToDeltaVecCopy.push_back({"Par", ov::genai::GenerationFinishReason::LENGTH, R"({"delta":{"tool_calls":[{"index":1,"function":{"arguments":"Par"}}]}})"});
268+
chunkToDeltaVecCopy.push_back({"Par", ov::genai::GenerationFinishReason::LENGTH, R"({"delta":{"tool_calls":[{"index":2,"function":{"arguments":"Par"}}]}})"});
251269
}
252270
int64_t chunkIteration = -1;
253271
for (const auto& [chunk, finishReason, expectedDelta] : chunkToDeltaVecCopy) {

0 commit comments

Comments
 (0)