Skip to content

Commit 8be071e

Browse files
committed
init
1 parent d8b34a6 commit 8be071e

File tree

6 files changed

+412
-5
lines changed

6 files changed

+412
-5
lines changed

src/llm/io_processing/hermes3/tool_parser.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ std::optional<rapidjson::Document> Hermes3ToolParser::parseChunk(const std::stri
165165
argumentsDelayWindow[0] = argumentsDelayWindow[1];
166166
}
167167

168-
// If we are closing the tool call, we need to add closing quote after the last closing brace that we assume is present in the chunk processed in the last call.
168+
// If we are closing the tool call, we need to add closing quote after the last arguments closing brace that we assume is present in the chunk processed in the last call.
169169
if (modifiedChunk.find(toolCallEndTag) != std::string::npos) {
170170
size_t lastClosingBrace = argumentsDelayWindow[0].find_last_of('}');
171171
if (lastClosingBrace != std::string::npos) {

src/llm/io_processing/partial_json_builder.cpp

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ Document PartialJsonBuilder::add(const std::string& chunk) {
102102
auto beginIt = buffer.begin() + currentPosition;
103103
auto endIt = buffer.end();
104104

105-
for (auto it = beginIt; it != endIt; ++it, currentPosition++) {
105+
for (auto it = beginIt; it != endIt && state != IteratorState::END; ++it, ++currentPosition) {
106106
finishedWithEscapeCharacter = false;
107107
char c = *it;
108108

@@ -209,7 +209,12 @@ Document PartialJsonBuilder::add(const std::string& chunk) {
209209

210210
Document doc;
211211
if (state == IteratorState::END && openCloseStack.empty()) {
212-
doc.Parse(buffer.c_str());
212+
if (currentPosition == buffer.size()) {
213+
doc.Parse(buffer.c_str());
214+
} else {
215+
doc.Parse(buffer.c_str(), currentPosition);
216+
}
217+
213218
if (doc.HasParseError()) {
214219
throw std::runtime_error("Invalid JSON. Content:\n" + buffer);
215220
}
@@ -264,4 +269,15 @@ Document PartialJsonBuilder::add(const std::string& chunk) {
264269
return doc;
265270
}
266271

272+
bool PartialJsonBuilder::isComplete() const {
273+
return state == IteratorState::END;
274+
}
275+
276+
std::string PartialJsonBuilder::getUnprocessedBuffer() const {
277+
if (currentPosition < buffer.size()) {
278+
return buffer.substr(currentPosition);
279+
}
280+
return "";
281+
}
282+
267283
} // namespace ovms

src/llm/io_processing/partial_json_builder.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,11 @@ class PartialJsonBuilder {
6969
void clear();
7070
// Add new chunk to the buffer return current parsed JSON document (incremental parsing)
7171
Document add(const std::string& chunk);
72+
// Check if the current state is END (i.e. we have a complete JSON)
73+
bool isComplete() const;
74+
75+
// Get the unprocessed part of the buffer (from current position to the end)
76+
std::string getUnprocessedBuffer() const;
7277

7378
static Document computeDelta(const Document& previous, const Document& current);
7479
};

src/llm/io_processing/phi4/tool_parser.cpp

Lines changed: 213 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,219 @@ void Phi4ToolParser::parse(ParsedOutput& parsedOutput, const std::vector<int64_t
8686
}
8787

8888
std::optional<rapidjson::Document> Phi4ToolParser::parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) {
89-
// Not implemented
90-
SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Phi4OutputParser::parseChunk is not implemented");
89+
/*
90+
Due to the tool call format used by Phi4, we need to track the state of parsing more closely.
91+
We have four states:
92+
1) AWAITING_START_TAG - we are waiting for the "functools" tag to appear in the chunk
93+
2) AWAITING_TOOL_CALLS_OPENING_BRACKET - we have seen "functools" but are waiting for the opening bracket of the array
94+
3) AWAITING_TOOL_CALL_OPENING_BRACE - we have seen the opening bracket of the array but are waiting for the opening brace of the next tool call object
95+
4) PROCESSING_TOOL_CALL - we are processing the tool call object
96+
97+
To avoid missing any generated content, we use unprocessedBuffer to store any output that is not used in the current state, but might be relevant in the next state.
98+
Since tools calls in the array are separated by commas we also need to track when the tool call object ends (no special tag for that).
99+
Next challenge, common for all parsers, is to return arguments as string even though model generates them as JSON.
100+
We address this by escaping double quotes and adding opening quote at the beginning of arguments and closing quote at the end of arguments.
101+
*/
102+
SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Phi4ToolParser::parseChunk called with chunk: '{}', finishReason: {}", chunk, static_cast<int>(finishReason));
103+
if (chunk.empty()) {
104+
SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Received empty chunk for Phi4ToolParser");
105+
return std::nullopt;
106+
}
107+
108+
// We merge unprocessedBuffer from previous calls with the current chunk to avoid losing any content
109+
std::string modifiedChunk = unprocessedBuffer + chunk;
110+
unprocessedBuffer.clear();
111+
112+
// Phase 1: Control the internal state and apply changes to the chunk if needed
113+
if (internalState == AWAITING_START_TAG) {
114+
// We did not see "functools" yet, so we look for it in the current chunk
115+
if (modifiedChunk.find(parsingStartTag) != std::string::npos) {
116+
// We found "functools", so we switch to the the state where we are waiting for the opening bracket of the array
117+
internalState = AWAITING_TOOL_CALLS_OPENING_BRACKET;
118+
if (modifiedChunk.length() > parsingStartTag.length()) {
119+
// We have more content in the chunk after "functools", so we process the rest of the chunk in the next state
120+
std::string remainingChunk = modifiedChunk.substr(modifiedChunk.find(parsingStartTag) + parsingStartTag.length());
121+
if (remainingChunk.empty()) {
122+
return std::nullopt; // Nothing more to process in this chunk
123+
} else {
124+
return parseChunk(remainingChunk, finishReason);
125+
}
126+
} else { // modifiedChunk.length() == parsingStartTag.length() as at this state, chunk cannot be smaller
127+
return std::nullopt; // Nothing more to process in this chunk
128+
}
129+
}
130+
return std::nullopt;
131+
} else if (internalState == AWAITING_TOOL_CALLS_OPENING_BRACKET) {
132+
// Next chunk after "functools" should start with opening bracket of the array
133+
if (modifiedChunk[0] == '[') {
134+
// We found the opening bracket, so we switch to waiting for the opening brace of the first tool call
135+
internalState = AWAITING_TOOL_CALL_OPENING_BRACE;
136+
137+
// We process the rest of the chunk after the opening bracket
138+
std::string remainingChunk = modifiedChunk.substr(1);
139+
if (remainingChunk.empty()) {
140+
return std::nullopt; // Nothing more to process in this chunk
141+
} else {
142+
// Process the remaining chunk as part of tool call processing
143+
return parseChunk(remainingChunk, finishReason);
144+
}
145+
} else {
146+
// Still waiting for the opening bracket, ignore this chunk
147+
return std::nullopt;
148+
}
149+
} else if (internalState == AWAITING_TOOL_CALL_OPENING_BRACE) {
150+
// We are waiting for the opening brace of the tool call object
151+
size_t firstOpeningBrace = modifiedChunk.find_first_of('{');
152+
if (firstOpeningBrace != std::string::npos) {
153+
internalState = PROCESSING_TOOL_CALL;
154+
// Clear state for the next tool call
155+
lastJson.Clear();
156+
jsonBuilder.clear();
157+
toolCallIndex++;
158+
argumentsQuotesOpened = false;
159+
openBracesCount = 1; // Reset to 1 as we count just found opening brace of the tool call
160+
161+
// Process the rest of the chunk after the opening brace (brace included) as part of tool call processing
162+
std::string remainingChunk = modifiedChunk.substr(firstOpeningBrace);
163+
if (remainingChunk.empty()) {
164+
return std::nullopt; // Nothing more to process in this chunk
165+
} else {
166+
return parseChunk(remainingChunk, finishReason);
167+
}
168+
} else {
169+
// Still waiting for the opening brace, ignore this chunk
170+
return std::nullopt;
171+
}
172+
} else { // internalState == PROCESSING_TOOL_CALL
173+
// Remove any newlines to avoid breaking JSON format
174+
modifiedChunk.erase(std::remove(modifiedChunk.begin(), modifiedChunk.end(), '\n'), modifiedChunk.end());
175+
176+
// JSON already contains 'arguments' (they cannot be null at this point). Apply modifications to the input chunk if needed to keep the format valid.
177+
if (lastJson.HasMember("arguments")) {
178+
// Escaping double quotes in the arguments string
179+
for (size_t pos = 0; (pos = modifiedChunk.find("\"", pos)) != std::string::npos; pos += 2) {
180+
modifiedChunk.insert(pos, "\\");
181+
}
182+
183+
// Keep track of opened/closed braces to identify the end of the tool call object.
184+
// Note that this method can be fooled by unclosed braces in string values.
185+
// If turns out insufficient, we will need full JSON parsing to track opened/closed braces for arguments.
186+
for (char c : modifiedChunk) {
187+
if (c == '{') {
188+
openBracesCount++;
189+
} else if (c == '}') {
190+
openBracesCount--;
191+
}
192+
}
193+
194+
// When we start collecting arguments, force string type by adding opening quote
195+
if (!argumentsQuotesOpened) {
196+
// Add opening quote before the first non-whitespace character
197+
size_t firstNonWhitespaceCharacter = modifiedChunk.find_first_not_of(" \t\n\r\f\v");
198+
if (firstNonWhitespaceCharacter != std::string::npos) {
199+
modifiedChunk.insert(firstNonWhitespaceCharacter, "\"");
200+
} else {
201+
// If the chunk is all whitespace, just insert quote at the end
202+
modifiedChunk.append("\"");
203+
}
204+
argumentsQuotesOpened = true;
205+
}
206+
207+
if (finishReason != ov::genai::GenerationFinishReason::NONE) {
208+
// If generation has stopped, we look for the closing brace to close the string properly
209+
size_t lastClosingBrace = modifiedChunk.find_last_of('}');
210+
if (lastClosingBrace != std::string::npos) {
211+
modifiedChunk.insert(lastClosingBrace, "\"");
212+
}
213+
} else if (openBracesCount == 0) {
214+
// If we balanced the braces, we are at the end of the tool call object, so we add closing quote before the last closing brace
215+
size_t lastClosingBrace = modifiedChunk.find_last_of('}');
216+
if (lastClosingBrace != std::string::npos) {
217+
modifiedChunk.insert(lastClosingBrace, "\"");
218+
} else {
219+
// If there is no closing brace, we just add closing quote at the end
220+
modifiedChunk.append("\"");
221+
}
222+
}
223+
} else { // no arguments yet, we need to make sure they are added only as a key
224+
// If 'arguments":' appears in the chunk and there is any non-whitespace content after it, which is not string,
225+
// we add double quote after colon to force string type
226+
size_t argumentsPos = modifiedChunk.find("arguments\":");
227+
if (argumentsPos != std::string::npos) {
228+
// Move everything after 'arguments":' to unprocessedBuffer, so we can add opening quote at the beginning of arguments in the next call
229+
size_t afterArgumentsPos = argumentsPos + std::string("arguments\":").length();
230+
if (afterArgumentsPos < modifiedChunk.length()) {
231+
unprocessedBuffer = modifiedChunk.substr(afterArgumentsPos);
232+
modifiedChunk.erase(afterArgumentsPos);
233+
}
234+
}
235+
}
236+
237+
// Phase 2: Parse the modified chunk with PartialJsonBuilder and return appropriate delta if possible
238+
rapidjson::Document newJson;
239+
try {
240+
// Otherwise just push the current chunk
241+
newJson = jsonBuilder.add(modifiedChunk);
242+
} catch (const std::exception& e) {
243+
(void)e; // Suppress unused variable warning on Windows
244+
SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool call chunk partial parse failed: {}", e.what());
245+
// Throwing an error since at this point the JSON is broken and next chunks will not make it right.
246+
throw std::runtime_error("Generated tool call structure is not valid");
247+
}
248+
249+
rapidjson::Document doc;
250+
// Case 1: 'arguments' has just appeared in the current chunk. If so, we return first delta.
251+
if (newJson.HasMember("arguments") && !lastJson.HasMember("arguments")) {
252+
std::string functionName;
253+
if (lastJson.HasMember("name") && lastJson["name"].IsString()) {
254+
functionName = lastJson["name"].GetString();
255+
} else if (newJson.HasMember("name") && newJson["name"].IsString()) {
256+
// We received big chunk with both full function name and arguments, so we get function name from the new JSON
257+
functionName = newJson["name"].GetString();
258+
} else {
259+
SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool call name has not been generated and arguments already started");
260+
throw std::runtime_error("Tool call name is missing in generated output");
261+
}
262+
// Wrap first delta in {"tool_calls":[{"id":<id>,"type":"function","index":<toolCallIndex>,"function":{"name": <functionName>}}]}
263+
doc = wrapFirstDelta(functionName, toolCallIndex);
264+
lastJson.CopyFrom(newJson, lastJson.GetAllocator());
265+
return doc;
266+
// Case 2: 'arguments' already exists in the last JSON, we compute delta and return it.
267+
} else if (lastJson.HasMember("arguments")) {
268+
rapidjson::Document delta = PartialJsonBuilder::computeDelta(lastJson, newJson);
269+
270+
// Handle the case when tool call has finished - store unprocessed output and switch internal state
271+
if (jsonBuilder.isComplete()) {
272+
unprocessedBuffer = jsonBuilder.getUnprocessedBuffer();
273+
// Remove potential escape characters added in arguments processing logic from the unprocessedBuffer as we move to the next tool call
274+
unprocessedBuffer.erase(
275+
std::remove(unprocessedBuffer.begin(), unprocessedBuffer.end(), '\\'),
276+
unprocessedBuffer.end());
277+
// Switch to the state where we are waiting for the opening brace of the next tool call object
278+
internalState = AWAITING_TOOL_CALL_OPENING_BRACE;
279+
} else {
280+
lastJson.CopyFrom(newJson, lastJson.GetAllocator());
281+
}
282+
283+
// If delta is empty or contains only null or empty string values, we don't stream anything.
284+
if (delta.ObjectEmpty()) {
285+
return std::nullopt;
286+
}
287+
288+
for (auto it = delta.MemberBegin(); it != delta.MemberEnd(); ++it) {
289+
if (it->value.IsNull() || (it->value.IsString() && std::string(it->value.GetString()).empty())) {
290+
return std::nullopt;
291+
}
292+
}
293+
294+
// Wrap delta in {"tool_calls":[{"index":<toolCallIndex>,"function":<delta>}]}
295+
doc = wrapDelta(delta, toolCallIndex);
296+
return doc;
297+
// Case 3: No 'arguments' exists or just appeared, so we keep building up until we have complete function name
298+
} else {
299+
lastJson.CopyFrom(newJson, lastJson.GetAllocator());
300+
}
301+
}
91302
return std::nullopt;
92303
}
93304
} // namespace ovms

src/llm/io_processing/phi4/tool_parser.hpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,28 @@ class Phi4ToolParser : public BaseOutputParser {
3737
const std::string parsingStartTag = "functools";
3838
const std::string parsingEndTag = "";
3939

40+
// Streaming required members
41+
42+
enum InternalState {
43+
AWAITING_START_TAG,
44+
AWAITING_TOOL_CALLS_OPENING_BRACKET,
45+
AWAITING_TOOL_CALL_OPENING_BRACE,
46+
PROCESSING_TOOL_CALL
47+
};
48+
49+
InternalState internalState = AWAITING_START_TAG;
50+
rapidjson::Document lastJson;
51+
PartialJsonBuilder jsonBuilder;
52+
// Index to track the current tool call being processed (-1 means no tool call has been started yet)
53+
int toolCallIndex = -1;
54+
// Flag to indicate if double quote has been added at the beginning of arguments
55+
bool argumentsQuotesOpened = false;
56+
std::string unprocessedBuffer;
57+
58+
// Stack of opened braces to track nested structures while in arguments collection phase
59+
// Starting with 1, since we count the tool call opening brace and expect it to be closed as arguments end
60+
size_t openBracesCount = 1;
61+
4062
public:
4163
Phi4ToolParser() = delete;
4264
explicit Phi4ToolParser(ov::genai::Tokenizer& tokenizer) :

0 commit comments

Comments
 (0)