Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
0428b48
Begin
atobiszei Sep 11, 2025
e6f0c21
Checkpoint
atobiszei Sep 15, 2025
58fc69d
WIP
atobiszei Sep 17, 2025
d6cc713
Qwen3Coder bfcl - 0.95 simple/multiple
atobiszei Sep 19, 2025
42d8e4b
Test fixes
atobiszei Sep 22, 2025
3b8b620
Self-review
atobiszei Sep 22, 2025
312eb9f
Self-review p2
atobiszei Sep 22, 2025
198aef9
Spell fix
atobiszei Sep 22, 2025
1b80de7
Streaming working
atobiszei Sep 26, 2025
895330a
Style fixes
atobiszei Sep 26, 2025
c1a9c54
Add handling arguments in chat template in string format
atobiszei Sep 29, 2025
ea14b3b
Fix rebase
atobiszei Sep 30, 2025
145726e
Fix Qwen3CoderBfcl test
atobiszei Sep 30, 2025
47e0087
Unary & stream unification
atobiszei Oct 1, 2025
36bf91d
Refactor cd
atobiszei Oct 1, 2025
8dafe14
Refactor cd2
atobiszei Oct 1, 2025
42d6fec
Logging fixes
atobiszei Oct 1, 2025
55e4cc0
Self-review
atobiszei Oct 2, 2025
de5a859
Review fixes p1
atobiszei Oct 2, 2025
900bb90
Merge branch 'main' into atobisze_qwen3_tool_parser
dtrawins Oct 2, 2025
84ac218
Skip double tool schema parsing
atobiszei Oct 3, 2025
cbe4fab
Create rapidjson utils
atobiszei Oct 3, 2025
e70123f
Extend streaming test with another tool call
atobiszei Oct 3, 2025
1023820
Output parsers build split
atobiszei Oct 3, 2025
830869e
Merge remote-tracking branch 'origin/main' into atobisze_qwen3_tool_p…
atobiszei Oct 3, 2025
33cd9f2
Fix BUILD file
atobiszei Oct 3, 2025
9b8c2a3
Fix unconvential using
atobiszei Oct 6, 2025
e648478
Review fixes
atobiszei Oct 6, 2025
649371d
Add ToolSchemaWrapper
atobiszei Oct 7, 2025
e7bca2d
Apply suggestion from @atobiszei
atobiszei Oct 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Dockerfile.ubuntu
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ RUN apt-get update && apt-get install --no-install-recommends -y \
python3-pip \
unzip \
gdb \
tmux \
tree \
vim && \
apt-get clean && \
Expand Down
118 changes: 118 additions & 0 deletions extras/chat_template_examples/chat_template_qwen3coder_instruct.jinja
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
{% macro render_extra_keys(json_dict, handled_keys) %}
{%- if json_dict is mapping %}
{%- for json_key in json_dict if json_key not in handled_keys %}
{%- if json_dict[json_key] is mapping or (json_dict[json_key] is sequence and json_dict[json_key] is not string) %}
{{- '\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | tojson | safe) ~ '</' ~ json_key ~ '>' }}
{%- else %}
{{-'\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | string) ~ '</' ~ json_key ~ '>' }}
{%- endif %}
{%- endfor %}
{%- endif %}
{% endmacro %}

{%- if messages[0]["role"] == "system" %}
{%- set system_message = messages[0]["content"] %}
{%- set loop_messages = messages[1:] %}
{%- else %}
{%- set loop_messages = messages %}
{%- endif %}

{%- if not tools is defined %}
{%- set tools = [] %}
{%- endif %}

{%- if system_message is defined %}
{{- "<|im_start|>system\n" + system_message }}
{%- else %}
{%- if tools is iterable and tools | length > 0 %}
{{- "<|im_start|>system\nYou are Qwen, a helpful AI assistant that can interact with a computer to solve tasks." }}
{%- endif %}
{%- endif %}
{%- if tools is iterable and tools | length > 0 %}
{{- "\n\n# Tools\n\nYou have access to the following functions:\n\n" }}
{{- "<tools>" }}
{%- for tool in tools %}
{%- if tool.function is defined %}
{%- set tool = tool.function %}
{%- endif %}
{{- "\n<function>\n<name>" ~ tool.name ~ "</name>" }}
{%- if tool.description is defined %}
{{- '\n<description>' ~ (tool.description | trim) ~ '</description>' }}
{%- endif %}
{{- '\n<parameters>' }}
{%- if tool.parameters is defined and tool.parameters is mapping and tool.parameters.properties is defined and tool.parameters.properties is mapping %}
{%- for param_name, param_fields in tool.parameters.properties|items %}
{{- '\n<parameter>' }}
{{- '\n<name>' ~ param_name ~ '</name>' }}
{%- if param_fields.type is defined %}
{{- '\n<type>' ~ (param_fields.type | string) ~ '</type>' }}
{%- endif %}
{%- if param_fields.description is defined %}
{{- '\n<description>' ~ (param_fields.description | trim) ~ '</description>' }}
{%- endif %}
{%- set handled_keys = ['name', 'type', 'description'] %}
{{- render_extra_keys(param_fields, handled_keys) }}
{{- '\n</parameter>' }}
{%- endfor %}
{%- endif %}
{% set handled_keys = ['type', 'properties'] %}
{{- render_extra_keys(tool.parameters, handled_keys) }}
{{- '\n</parameters>' }}
{%- set handled_keys = ['type', 'name', 'description', 'parameters'] %}
{{- render_extra_keys(tool, handled_keys) }}
{{- '\n</function>' }}
{%- endfor %}
{{- "\n</tools>" }}
{{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
{%- endif %}
{%- if system_message is defined %}
{{- '<|im_end|>\n' }}
{%- else %}
{%- if tools is iterable and tools | length > 0 %}
{{- '<|im_end|>\n' }}
{%- endif %}
{%- endif %}
{%- for message in loop_messages %}
{%- if message.role == "assistant" and message.tool_calls is defined and message.tool_calls is iterable and message.tool_calls | length > 0 %}
{{- '<|im_start|>' + message.role }}
{%- if message.content is defined and message.content is string and message.content | trim | length > 0 %}
{{- '\n' + message.content | trim + '\n' }}
{%- endif %}
{%- for tool_call in message.tool_calls %}
{%- if tool_call.function is defined %}
{%- set tool_call = tool_call.function %}
{%- endif %}
{{- '\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
{%- if tool_call.arguments is defined %}
{%- set arguments = tool_call.arguments | from_json %}
{%- for args_name, args_value in arguments|items %}
{{- '<parameter=' + args_name + '>\n' }}
{%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
{{- args_value }}
{{- '\n</parameter>\n' }}
{%- endfor %}
{%- endif %}
{{- '</function>\n</tool_call>' }}
{%- endfor %}
{{- '<|im_end|>\n' }}
{%- elif message.role == "user" or message.role == "system" or message.role == "assistant" %}
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
{%- elif message.role == "tool" %}
{%- if loop.previtem and loop.previtem.role != "tool" %}
{{- '<|im_start|>user\n' }}
{%- endif %}
{{- '<tool_response>\n' }}
{{- message.content }}
{{- '\n</tool_response>\n' }}
{%- if not loop.last and loop.nextitem.role != "tool" %}
{{- '<|im_end|>\n' }}
{%- elif loop.last %}
{{- '<|im_end|>\n' }}
{%- endif %}
{%- else %}
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>\n' }}
{%- endif %}
{%- endfor %}
{%- if add_generation_prompt %}
{{- '<|im_start|>assistant\n' }}
{%- endif %}
1 change: 1 addition & 0 deletions spelling-whitelist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,4 @@ release_files/thirdparty-licenses/icu.LICENSE.txt:160: TaBE ==> table, tab
release_files/thirdparty-licenses/libgt2.LICENSE.txt:1040: aheared ==> adhered
release_files/thirdparty-licenses/libgt2.LICENSE.txt:1065: rouines ==> routines
release_files/thirdparty-licenses/libgt2.LICENSE.txt:1083: publically ==> publicly
src/test/llm/output_parsers/qwen3coder_output_parser_test.cpp:559: paramete ==> parameter
8 changes: 2 additions & 6 deletions src/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,6 @@ COPTS_OV_TRACE = select({
"//conditions:default": ["-DOV_TRACE=0"],
"//:not_disable_ov_trace" : ["-DOV_TRACE=1"],
})
COPTS_ADJUSTED = COMMON_STATIC_LIBS_COPTS + select({
"//conditions:default": [],
"//:fuzzer_build" : COMMON_FUZZER_COPTS,
})
LINKOPTS_ADJUSTED = COMMON_STATIC_LIBS_LINKOPTS + select({
"//conditions:default": [],
"//:fuzzer_build" : COMMON_FUZZER_LINKOPTS,
Expand Down Expand Up @@ -2692,7 +2688,7 @@ cc_test(
"//conditions:default": [
"//src/llm:genai_servables",
"//src/llm:output_parsers",
":llm_output_parsers_tests",
":test_llm_output_parser_tests",
"//src/test/mediapipe/calculators:mediapipe_test_calculators",
"//src/test/mediapipe/calculators:dependency_free_http_test_calculators",
"@mediapipe//mediapipe/calculators/ovms:ovms_calculator",
Expand Down Expand Up @@ -3113,7 +3109,7 @@ cc_library(
)

cc_library(
name = "llm_output_parsers_tests",
name = "test_llm_output_parser_tests",
linkstatic = 1,
alwayslink = True,
srcs = glob(["test/llm/output_parsers/*_test.cpp"]),
Expand Down
75 changes: 67 additions & 8 deletions src/llm/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,10 @@ ovms_cc_library(
name = "openai_request",
hdrs = ["apis/openai_request.hpp"],
srcs = [],
deps = select({
deps = [
"//src/port:rapidjson_document",
":apis_tool_schema_wrapper",
] + select({
"//conditions:default": ["//third_party:genai", ":llm_engine"],
"//:not_genai_bin" : [":llm_engine"],
}),
Expand All @@ -90,8 +93,62 @@ ovms_cc_library(
)

ovms_cc_library(
name = "apis_tool_schema_wrapper",
hdrs = ["apis/tool_schema_wrapper.hpp"],
deps = [
"//src/port:rapidjson_document",
],
visibility = ["//visibility:public"],
)
ovms_cc_library(
name = "partial_json_builder",
hdrs = ["io_processing/partial_json_builder.hpp"],
srcs = ["io_processing/partial_json_builder.cpp"],
deps = [
"@com_github_tencent_rapidjson//:rapidjson",
],
visibility = ["//visibility:public"],
)

ovms_cc_library(
name = "io_processing_utils",
hdrs = ["io_processing/utils.hpp"],
srcs = ["io_processing/utils.cpp"],
visibility = ["//visibility:public"],
)

ovms_cc_library(
name = "io_processing_base_output_parser",
hdrs = ["io_processing/base_output_parser.hpp"],
srcs = ["io_processing/base_output_parser.cpp"],
deps = [
"@com_github_tencent_rapidjson//:rapidjson",
":io_processing_utils",
] + select({
"//conditions:default": ["//third_party:genai", ":llm_engine"],
"//:not_genai_bin" : [":llm_engine"],
}),
)
ovms_cc_library(
name = "io_processing_qwen3coder_tool_parser",
hdrs = ["io_processing/qwen3coder/qwen3coder_tool_parser.hpp"],
srcs = ["io_processing/qwen3coder/qwen3coder_tool_parser.cpp"],
deps = [
"//src:libovmslogging",
"//src:libovmsstatus",
"//src/utils:rapidjson_utils",
":io_processing_utils",
":io_processing_base_output_parser",
":apis_tool_schema_wrapper",
] + select({
"//conditions:default": ["//third_party:genai", ":llm_engine"],
"//:not_genai_bin" : [":llm_engine"],
}),
visibility = ["//visibility:public"],
)
ovms_cc_library( # TODO split further so we don't have to recompile everything when changing one parser ...
name = "output_parsers",
hdrs = ["io_processing/base_output_parser.hpp",
hdrs = [
"io_processing/hermes3/tool_parser.hpp",
"io_processing/llama3/tool_parser.hpp",
"io_processing/phi4/tool_parser.hpp",
Expand All @@ -101,9 +158,8 @@ ovms_cc_library(
"io_processing/gptoss/tool_parser.hpp",
"io_processing/gptoss/harmony.hpp",
"io_processing/output_parser.hpp",
"io_processing/partial_json_builder.hpp",
"io_processing/utils.hpp"],
srcs = ["io_processing/base_output_parser.cpp",
],
srcs = [
"io_processing/hermes3/tool_parser.cpp",
"io_processing/llama3/tool_parser.cpp",
"io_processing/phi4/tool_parser.cpp",
Expand All @@ -113,18 +169,21 @@ ovms_cc_library(
"io_processing/gptoss/tool_parser.cpp",
"io_processing/gptoss/harmony.cpp",
"io_processing/output_parser.cpp",
"io_processing/partial_json_builder.cpp",
"io_processing/utils.cpp"],
],
deps = [
"@com_github_tencent_rapidjson//:rapidjson",
"//src:libovmslogging",
"//src:libovmsstring_utils",
":partial_json_builder",
":io_processing_base_output_parser",
":io_processing_qwen3coder_tool_parser",
":io_processing_utils",
":apis_tool_schema_wrapper",
] + select({
"//conditions:default": ["//third_party:genai", ":llm_engine"],
"//:not_genai_bin" : [":llm_engine"],
}),
visibility = ["//visibility:public"],
additional_copts = COPTS_PYTHON
)

ovms_cc_library(
Expand Down
12 changes: 7 additions & 5 deletions src/llm/apis/openai_completions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -331,12 +331,16 @@ absl::Status OpenAIChatCompletionsHandler::parseTools() {
// If we keep the tool, add tool name and schema to the request
auto parametersIt = functionIt->value.GetObject().FindMember("parameters");
if (parametersIt != functionIt->value.GetObject().MemberEnd() && parametersIt->value.IsObject()) {
// now we want to insert to a mapping of
// tool name -> tool schema representations struct
// Dump parameters object to string since this is the schema format expected by GenAI
// Keep the rapidjson::Value object as well to avoid re-parsing in outputParsers
rapidjson::StringBuffer buffer;
rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
parametersIt->value.Accept(writer);
std::string parametersStr = buffer.GetString();
request.toolNameSchemaMap[nameIt->value.GetString()] = parametersStr;
ToolSchemaWrapper schemaReprs{&parametersIt->value, std::move(parametersStr)};
request.toolNameSchemaMap[nameIt->value.GetString()] = std::move(schemaReprs);
}
}
} else {
Expand Down Expand Up @@ -739,10 +743,8 @@ void OpenAIChatCompletionsHandler::incrementProcessedTokens(size_t numTokens) {

absl::Status OpenAIChatCompletionsHandler::parseRequest(std::optional<uint32_t> maxTokensLimit, uint32_t bestOfLimit, std::optional<uint32_t> maxModelLength, std::optional<std::string> allowedLocalMediaPath) {
absl::Status status = parseCommonPart(maxTokensLimit, bestOfLimit, maxModelLength);

if (status != absl::OkStatus())
return status;

if (endpoint == Endpoint::COMPLETIONS)
status = parseCompletionsPart();
else
Expand All @@ -762,9 +764,9 @@ ParsedOutput OpenAIChatCompletionsHandler::parseOutputIfNeeded(const std::vector
OVMS_PROFILE_FUNCTION();
ParsedOutput parsedOutput;
if (endpoint != Endpoint::CHAT_COMPLETIONS || outputParser == nullptr) {
parsedOutput.content = tokenizer.decode(generatedIds);
parsedOutput.content = this->tokenizer.decode(generatedIds);
} else {
parsedOutput = outputParser->parse(generatedIds, areToolsAvailable());
parsedOutput = outputParser->parse(generatedIds, this->areToolsAvailable());
}
return parsedOutput;
}
Expand Down
4 changes: 3 additions & 1 deletion src/llm/apis/openai_completions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,10 @@ class OpenAIChatCompletionsHandler {
endpoint(endpoint),
created(creationTime),
tokenizer(tokenizer) {
// TODO we should delay creating output parser until we have request with toolNameSchemaMap parsed
// now we pass it now but it has to be populated first before first use
if (!toolParserName.empty() || !reasoningParserName.empty()) {
outputParser = std::make_unique<OutputParser>(tokenizer, toolParserName, reasoningParserName);
outputParser = std::make_unique<OutputParser>(tokenizer, toolParserName, reasoningParserName, this->request.toolNameSchemaMap);
}
}

Expand Down
7 changes: 5 additions & 2 deletions src/llm/apis/openai_request.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,16 @@
#include <openvino/runtime/tensor.hpp>
#include <openvino/genai/tokenizer.hpp>

#include "src/port/rapidjson_document.hpp"

#include "tool_schema_wrapper.hpp"

namespace ovms {
using ImageHistory = std::vector<std::pair<size_t, ov::Tensor>>;

struct StreamOptions {
bool includeUsage = false;
};

// Class that maps OpenAI request content.
struct OpenAIChatCompletionsRequest {
ov::genai::ChatHistory chatHistory;
Expand Down Expand Up @@ -74,7 +77,7 @@ struct OpenAIChatCompletionsRequest {
// Schema for response_format handling
std::optional<std::string> responseSchema{std::nullopt};
// Map that holds tool names and schemas for their arguments
std::map<std::string, std::string> toolNameSchemaMap;
ToolsSchemas_t toolNameSchemaMap;
// Holds value for tool_choice field as described in https://platform.openai.com/docs/api-reference/chat/create#chat_create-tool_choice
std::string toolChoice;

Expand Down
31 changes: 31 additions & 0 deletions src/llm/apis/tool_schema_wrapper.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
//*****************************************************************************
// Copyright 2025 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************

// Type that holds vector of pairs where first element is chat turn index and second is image tensor
// this way we store information about which image is associated with which chat turn
#pragma once
#include <map>
#include <string>

#include "src/port/rapidjson_document.hpp"

namespace ovms {
struct ToolSchemaWrapper {
rapidjson::Value* rapidjsonRepr;
std::string stringRepr;
};
using ToolsSchemas_t = std::map<std::string, ToolSchemaWrapper>;
} // namespace ovms
Loading