@@ -86,8 +86,219 @@ void Phi4ToolParser::parse(ParsedOutput& parsedOutput, const std::vector<int64_t
86
86
}
87
87
88
88
std::optional<rapidjson::Document> Phi4ToolParser::parseChunk (const std::string& chunk, ov::genai::GenerationFinishReason finishReason) {
89
- // Not implemented
90
- SPDLOG_LOGGER_DEBUG (llm_calculator_logger, " Phi4OutputParser::parseChunk is not implemented" );
89
+ /*
90
+ Due to the tool call format used by Phi4, we need to track the state of parsing more closely.
91
+ We have four states:
92
+ 1) AWAITING_START_TAG - we are waiting for the "functools" tag to appear in the chunk
93
+ 2) AWAITING_TOOL_CALLS_OPENING_BRACKET - we have seen "functools" but are waiting for the opening bracket of the array
94
+ 3) AWAITING_TOOL_CALL_OPENING_BRACE - we have seen the opening bracket of the array but are waiting for the opening brace of the next tool call object
95
+ 4) PROCESSING_TOOL_CALL - we are processing the tool call object
96
+
97
+ To avoid missing any generated content, we use unprocessedBuffer to store any output that is not used in the current state, but might be relevant in the next state.
98
+ Since tools calls in the array are separated by commas we also need to track when the tool call object ends (no special tag for that).
99
+ Next challenge, common for all parsers, is to return arguments as string even though model generates them as JSON.
100
+ We address this by escaping double quotes and adding opening quote at the beginning of arguments and closing quote at the end of arguments.
101
+ */
102
+ SPDLOG_LOGGER_DEBUG (llm_calculator_logger, " Phi4ToolParser::parseChunk called with chunk: '{}', finishReason: {}" , chunk, static_cast <int >(finishReason));
103
+ if (chunk.empty ()) {
104
+ SPDLOG_LOGGER_DEBUG (llm_calculator_logger, " Received empty chunk for Phi4ToolParser" );
105
+ return std::nullopt ;
106
+ }
107
+
108
+ // We merge unprocessedBuffer from previous calls with the current chunk to avoid losing any content
109
+ std::string modifiedChunk = unprocessedBuffer + chunk;
110
+ unprocessedBuffer.clear ();
111
+
112
+ // Phase 1: Control the internal state and apply changes to the chunk if needed
113
+ if (internalState == AWAITING_START_TAG) {
114
+ // We did not see "functools" yet, so we look for it in the current chunk
115
+ if (modifiedChunk.find (parsingStartTag) != std::string::npos) {
116
+ // We found "functools", so we switch to the the state where we are waiting for the opening bracket of the array
117
+ internalState = AWAITING_TOOL_CALLS_OPENING_BRACKET;
118
+ if (modifiedChunk.length () > parsingStartTag.length ()) {
119
+ // We have more content in the chunk after "functools", so we process the rest of the chunk in the next state
120
+ std::string remainingChunk = modifiedChunk.substr (modifiedChunk.find (parsingStartTag) + parsingStartTag.length ());
121
+ if (remainingChunk.empty ()) {
122
+ return std::nullopt ; // Nothing more to process in this chunk
123
+ } else {
124
+ return parseChunk (remainingChunk, finishReason);
125
+ }
126
+ } else { // modifiedChunk.length() == parsingStartTag.length() as at this state, chunk cannot be smaller
127
+ return std::nullopt ; // Nothing more to process in this chunk
128
+ }
129
+ }
130
+ return std::nullopt ;
131
+ } else if (internalState == AWAITING_TOOL_CALLS_OPENING_BRACKET) {
132
+ // Next chunk after "functools" should start with opening bracket of the array
133
+ if (modifiedChunk[0 ] == ' [' ) {
134
+ // We found the opening bracket, so we switch to waiting for the opening brace of the first tool call
135
+ internalState = AWAITING_TOOL_CALL_OPENING_BRACE;
136
+
137
+ // We process the rest of the chunk after the opening bracket
138
+ std::string remainingChunk = modifiedChunk.substr (1 );
139
+ if (remainingChunk.empty ()) {
140
+ return std::nullopt ; // Nothing more to process in this chunk
141
+ } else {
142
+ // Process the remaining chunk as part of tool call processing
143
+ return parseChunk (remainingChunk, finishReason);
144
+ }
145
+ } else {
146
+ // Still waiting for the opening bracket, ignore this chunk
147
+ return std::nullopt ;
148
+ }
149
+ } else if (internalState == AWAITING_TOOL_CALL_OPENING_BRACE) {
150
+ // We are waiting for the opening brace of the tool call object
151
+ size_t firstOpeningBrace = modifiedChunk.find_first_of (' {' );
152
+ if (firstOpeningBrace != std::string::npos) {
153
+ internalState = PROCESSING_TOOL_CALL;
154
+ // Clear state for the next tool call
155
+ lastJson.Clear ();
156
+ jsonBuilder.clear ();
157
+ toolCallIndex++;
158
+ argumentsQuotesOpened = false ;
159
+ openBracesCount = 1 ; // Reset to 1 as we count just found opening brace of the tool call
160
+
161
+ // Process the rest of the chunk after the opening brace (brace included) as part of tool call processing
162
+ std::string remainingChunk = modifiedChunk.substr (firstOpeningBrace);
163
+ if (remainingChunk.empty ()) {
164
+ return std::nullopt ; // Nothing more to process in this chunk
165
+ } else {
166
+ return parseChunk (remainingChunk, finishReason);
167
+ }
168
+ } else {
169
+ // Still waiting for the opening brace, ignore this chunk
170
+ return std::nullopt ;
171
+ }
172
+ } else { // internalState == PROCESSING_TOOL_CALL
173
+ // Remove any newlines to avoid breaking JSON format
174
+ modifiedChunk.erase (std::remove (modifiedChunk.begin (), modifiedChunk.end (), ' \n ' ), modifiedChunk.end ());
175
+
176
+ // JSON already contains 'arguments' (they cannot be null at this point). Apply modifications to the input chunk if needed to keep the format valid.
177
+ if (lastJson.HasMember (" arguments" )) {
178
+ // Escaping double quotes in the arguments string
179
+ for (size_t pos = 0 ; (pos = modifiedChunk.find (" \" " , pos)) != std::string::npos; pos += 2 ) {
180
+ modifiedChunk.insert (pos, " \\ " );
181
+ }
182
+
183
+ // Keep track of opened/closed braces to identify the end of the tool call object.
184
+ // Note that this method can be fooled by unclosed braces in string values.
185
+ // If turns out insufficient, we will need full JSON parsing to track opened/closed braces for arguments.
186
+ for (char c : modifiedChunk) {
187
+ if (c == ' {' ) {
188
+ openBracesCount++;
189
+ } else if (c == ' }' ) {
190
+ openBracesCount--;
191
+ }
192
+ }
193
+
194
+ // When we start collecting arguments, force string type by adding opening quote
195
+ if (!argumentsQuotesOpened) {
196
+ // Add opening quote before the first non-whitespace character
197
+ size_t firstNonWhitespaceCharacter = modifiedChunk.find_first_not_of (" \t\n\r\f\v " );
198
+ if (firstNonWhitespaceCharacter != std::string::npos) {
199
+ modifiedChunk.insert (firstNonWhitespaceCharacter, " \" " );
200
+ } else {
201
+ // If the chunk is all whitespace, just insert quote at the end
202
+ modifiedChunk.append (" \" " );
203
+ }
204
+ argumentsQuotesOpened = true ;
205
+ }
206
+
207
+ if (finishReason != ov::genai::GenerationFinishReason::NONE) {
208
+ // If generation has stopped, we look for the closing brace to close the string properly
209
+ size_t lastClosingBrace = modifiedChunk.find_last_of (' }' );
210
+ if (lastClosingBrace != std::string::npos) {
211
+ modifiedChunk.insert (lastClosingBrace, " \" " );
212
+ }
213
+ } else if (openBracesCount == 0 ) {
214
+ // If we balanced the braces, we are at the end of the tool call object, so we add closing quote before the last closing brace
215
+ size_t lastClosingBrace = modifiedChunk.find_last_of (' }' );
216
+ if (lastClosingBrace != std::string::npos) {
217
+ modifiedChunk.insert (lastClosingBrace, " \" " );
218
+ } else {
219
+ // If there is no closing brace, we just add closing quote at the end
220
+ modifiedChunk.append (" \" " );
221
+ }
222
+ }
223
+ } else { // no arguments yet, we need to make sure they are added only as a key
224
+ // If 'arguments":' appears in the chunk and there is any non-whitespace content after it, which is not string,
225
+ // we add double quote after colon to force string type
226
+ size_t argumentsPos = modifiedChunk.find (" arguments\" :" );
227
+ if (argumentsPos != std::string::npos) {
228
+ // Move everything after 'arguments":' to unprocessedBuffer, so we can add opening quote at the beginning of arguments in the next call
229
+ size_t afterArgumentsPos = argumentsPos + std::string (" arguments\" :" ).length ();
230
+ if (afterArgumentsPos < modifiedChunk.length ()) {
231
+ unprocessedBuffer = modifiedChunk.substr (afterArgumentsPos);
232
+ modifiedChunk.erase (afterArgumentsPos);
233
+ }
234
+ }
235
+ }
236
+
237
+ // Phase 2: Parse the modified chunk with PartialJsonBuilder and return appropriate delta if possible
238
+ rapidjson::Document newJson;
239
+ try {
240
+ // Otherwise just push the current chunk
241
+ newJson = jsonBuilder.add (modifiedChunk);
242
+ } catch (const std::exception& e) {
243
+ (void )e; // Suppress unused variable warning on Windows
244
+ SPDLOG_LOGGER_DEBUG (llm_calculator_logger, " Tool call chunk partial parse failed: {}" , e.what ());
245
+ // Throwing an error since at this point the JSON is broken and next chunks will not make it right.
246
+ throw std::runtime_error (" Generated tool call structure is not valid" );
247
+ }
248
+
249
+ rapidjson::Document doc;
250
+ // Case 1: 'arguments' has just appeared in the current chunk. If so, we return first delta.
251
+ if (newJson.HasMember (" arguments" ) && !lastJson.HasMember (" arguments" )) {
252
+ std::string functionName;
253
+ if (lastJson.HasMember (" name" ) && lastJson[" name" ].IsString ()) {
254
+ functionName = lastJson[" name" ].GetString ();
255
+ } else if (newJson.HasMember (" name" ) && newJson[" name" ].IsString ()) {
256
+ // We received big chunk with both full function name and arguments, so we get function name from the new JSON
257
+ functionName = newJson[" name" ].GetString ();
258
+ } else {
259
+ SPDLOG_LOGGER_DEBUG (llm_calculator_logger, " Tool call name has not been generated and arguments already started" );
260
+ throw std::runtime_error (" Tool call name is missing in generated output" );
261
+ }
262
+ // Wrap first delta in {"tool_calls":[{"id":<id>,"type":"function","index":<toolCallIndex>,"function":{"name": <functionName>}}]}
263
+ doc = wrapFirstDelta (functionName, toolCallIndex);
264
+ lastJson.CopyFrom (newJson, lastJson.GetAllocator ());
265
+ return doc;
266
+ // Case 2: 'arguments' already exists in the last JSON, we compute delta and return it.
267
+ } else if (lastJson.HasMember (" arguments" )) {
268
+ rapidjson::Document delta = PartialJsonBuilder::computeDelta (lastJson, newJson);
269
+
270
+ // Handle the case when tool call has finished - store unprocessed output and switch internal state
271
+ if (jsonBuilder.isComplete ()) {
272
+ unprocessedBuffer = jsonBuilder.getUnprocessedBuffer ();
273
+ // Remove potential escape characters added in arguments processing logic from the unprocessedBuffer as we move to the next tool call
274
+ unprocessedBuffer.erase (
275
+ std::remove (unprocessedBuffer.begin (), unprocessedBuffer.end (), ' \\ ' ),
276
+ unprocessedBuffer.end ());
277
+ // Switch to the state where we are waiting for the opening brace of the next tool call object
278
+ internalState = AWAITING_TOOL_CALL_OPENING_BRACE;
279
+ } else {
280
+ lastJson.CopyFrom (newJson, lastJson.GetAllocator ());
281
+ }
282
+
283
+ // If delta is empty or contains only null or empty string values, we don't stream anything.
284
+ if (delta.ObjectEmpty ()) {
285
+ return std::nullopt ;
286
+ }
287
+
288
+ for (auto it = delta.MemberBegin (); it != delta.MemberEnd (); ++it) {
289
+ if (it->value .IsNull () || (it->value .IsString () && std::string (it->value .GetString ()).empty ())) {
290
+ return std::nullopt ;
291
+ }
292
+ }
293
+
294
+ // Wrap delta in {"tool_calls":[{"index":<toolCallIndex>,"function":<delta>}]}
295
+ doc = wrapDelta (delta, toolCallIndex);
296
+ return doc;
297
+ // Case 3: No 'arguments' exists or just appeared, so we keep building up until we have complete function name
298
+ } else {
299
+ lastJson.CopyFrom (newJson, lastJson.GetAllocator ());
300
+ }
301
+ }
91
302
return std::nullopt ;
92
303
}
93
304
} // namespace ovms
0 commit comments