openvinotoolkit · atobiszei · Oct 7, 2025 · Sep 11, 2025 · Sep 15, 2025 · Sep 17, 2025
diff --git a/Dockerfile.ubuntu b/Dockerfile.ubuntu
@@ -118,6 +118,7 @@ RUN apt-get update && apt-get install --no-install-recommends -y \
             python3-pip \
             unzip \
             gdb \
+            tmux \
             tree \
             vim && \
             apt-get clean && \

diff --git a/extras/chat_template_examples/chat_template_qwen3coder_instruct.jinja b/extras/chat_template_examples/chat_template_qwen3coder_instruct.jinja
@@ -0,0 +1,118 @@
+{% macro render_extra_keys(json_dict, handled_keys) %}
+    {%- if json_dict is mapping %}
+        {%- for json_key in json_dict if json_key not in handled_keys %}
+            {%- if json_dict[json_key] is mapping or (json_dict[json_key] is sequence and json_dict[json_key] is not string) %}
+                {{- '\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | tojson | safe) ~ '</' ~ json_key ~ '>' }}
+            {%- else %}
+                {{-'\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | string) ~ '</' ~ json_key ~ '>' }}
+            {%- endif %}
+        {%- endfor %}
+    {%- endif %}
+{% endmacro %}
+
+{%- if messages[0]["role"] == "system" %}
+    {%- set system_message = messages[0]["content"] %}
+    {%- set loop_messages = messages[1:] %}
+{%- else %}
+    {%- set loop_messages = messages %}
+{%- endif %}
+
+{%- if not tools is defined %}
+    {%- set tools = [] %}
+{%- endif %}
+
+{%- if system_message is defined %}
+    {{- "<|im_start|>system\n" + system_message }}
+{%- else %}
+    {%- if tools is iterable and tools | length > 0 %}
+        {{- "<|im_start|>system\nYou are Qwen, a helpful AI assistant that can interact with a computer to solve tasks." }}
+    {%- endif %}
+{%- endif %}
+{%- if tools is iterable and tools | length > 0 %}
+    {{- "\n\n# Tools\n\nYou have access to the following functions:\n\n" }}
+    {{- "<tools>" }}
+    {%- for tool in tools %}
+        {%- if tool.function is defined %}
+            {%- set tool = tool.function %}
+        {%- endif %}
+        {{- "\n<function>\n<name>" ~ tool.name ~ "</name>" }}
+        {%- if tool.description is defined %}
+            {{- '\n<description>' ~ (tool.description | trim) ~ '</description>' }}
+        {%- endif %}
+        {{- '\n<parameters>' }}
+        {%- if tool.parameters is defined and tool.parameters is mapping and tool.parameters.properties is defined and tool.parameters.properties is mapping %}
+            {%- for param_name, param_fields in tool.parameters.properties|items %}
+                {{- '\n<parameter>' }}
+                {{- '\n<name>' ~ param_name ~ '</name>' }}
+                {%- if param_fields.type is defined %}
+                    {{- '\n<type>' ~ (param_fields.type | string) ~ '</type>' }}
+                {%- endif %}
+                {%- if param_fields.description is defined %}
+                    {{- '\n<description>' ~ (param_fields.description | trim) ~ '</description>' }}
+                {%- endif %}
+                {%- set handled_keys = ['name', 'type', 'description'] %}
+                {{- render_extra_keys(param_fields, handled_keys) }}
+                {{- '\n</parameter>' }}
+            {%- endfor %}
+        {%- endif %}
+        {% set handled_keys = ['type', 'properties'] %}
+        {{- render_extra_keys(tool.parameters, handled_keys) }}
+        {{- '\n</parameters>' }}
+        {%- set handled_keys = ['type', 'name', 'description', 'parameters'] %}
+        {{- render_extra_keys(tool, handled_keys) }}
+        {{- '\n</function>' }}
+    {%- endfor %}
+    {{- "\n</tools>" }}
+    {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
+{%- endif %}
+{%- if system_message is defined %}
+    {{- '<|im_end|>\n' }}
+{%- else %}
+    {%- if tools is iterable and tools | length > 0 %}
+        {{- '<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- for message in loop_messages %}
+    {%- if message.role == "assistant" and message.tool_calls is defined and message.tool_calls is iterable and message.tool_calls | length > 0 %}
+        {{- '<|im_start|>' + message.role }}
+        {%- if message.content is defined and message.content is string and message.content | trim | length > 0 %}
+            {{- '\n' + message.content | trim + '\n' }}
+        {%- endif %}
+        {%- for tool_call in message.tool_calls %}
+            {%- if tool_call.function is defined %}
+                {%- set tool_call = tool_call.function %}
+            {%- endif %}
+            {{- '\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
+            {%- if tool_call.arguments is defined %}
+                {%- set arguments = tool_call.arguments | from_json %}
+                {%- for args_name, args_value in arguments|items %}
+                    {{- '<parameter=' + args_name + '>\n' }}
+                    {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
+                    {{- args_value }}
+                    {{- '\n</parameter>\n' }}
+                {%- endfor %}
+            {%- endif %}
+            {{- '</function>\n</tool_call>' }}
+        {%- endfor %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "user" or message.role == "system" or message.role == "assistant" %}
+        {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if loop.previtem and loop.previtem.role != "tool" %}
+            {{- '<|im_start|>user\n' }}
+        {%- endif %}
+        {{- '<tool_response>\n' }}
+        {{- message.content }}
+        {{- '\n</tool_response>\n' }}
+        {%- if not loop.last and loop.nextitem.role != "tool" %}
+            {{- '<|im_end|>\n' }}
+        {%- elif loop.last %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- else %}
+        {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>\n' }}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+{%- endif %}
diff --git a/spelling-whitelist.txt b/spelling-whitelist.txt
@@ -25,3 +25,4 @@ release_files/thirdparty-licenses/icu.LICENSE.txt:160: TaBE ==> table, tab
 release_files/thirdparty-licenses/libgt2.LICENSE.txt:1040: aheared ==> adhered
 release_files/thirdparty-licenses/libgt2.LICENSE.txt:1065: rouines ==> routines
 release_files/thirdparty-licenses/libgt2.LICENSE.txt:1083: publically ==> publicly
+src/test/llm/output_parsers/qwen3coder_output_parser_test.cpp:559: paramete ==> parameter
diff --git a/src/BUILD b/src/BUILD
@@ -23,10 +23,6 @@ COPTS_OV_TRACE = select({
     "//conditions:default": ["-DOV_TRACE=0"],
     "//:not_disable_ov_trace" : ["-DOV_TRACE=1"],
 })
-COPTS_ADJUSTED = COMMON_STATIC_LIBS_COPTS + select({
-        "//conditions:default": [],
-        "//:fuzzer_build" : COMMON_FUZZER_COPTS,
-})
 LINKOPTS_ADJUSTED = COMMON_STATIC_LIBS_LINKOPTS + select({
     "//conditions:default": [],
     "//:fuzzer_build" : COMMON_FUZZER_LINKOPTS,
@@ -2692,7 +2688,7 @@ cc_test(
             "//conditions:default": [
                 "//src/llm:genai_servables",
                 "//src/llm:output_parsers",
-                ":llm_output_parsers_tests",
+                ":test_llm_output_parser_tests",
                 "//src/test/mediapipe/calculators:mediapipe_test_calculators",
                 "//src/test/mediapipe/calculators:dependency_free_http_test_calculators",
                 "@mediapipe//mediapipe/calculators/ovms:ovms_calculator",
@@ -3113,7 +3109,7 @@ cc_library(
 )
 
 cc_library(
-    name = "llm_output_parsers_tests",
+    name = "test_llm_output_parser_tests",
     linkstatic = 1,
     alwayslink = True,
     srcs = glob(["test/llm/output_parsers/*_test.cpp"]),

diff --git a/src/llm/BUILD b/src/llm/BUILD
@@ -81,7 +81,10 @@ ovms_cc_library(
     name = "openai_request",
     hdrs = ["apis/openai_request.hpp"],
     srcs = [],
-    deps = select({
+    deps = [
+        "//src/port:rapidjson_document",
+        ":apis_tool_schema_wrapper",
+    ] + select({
         "//conditions:default": ["//third_party:genai", ":llm_engine"],
         "//:not_genai_bin" : [":llm_engine"],
     }),
@@ -90,8 +93,62 @@ ovms_cc_library(
 )
 
 ovms_cc_library(
+    name = "apis_tool_schema_wrapper",
+    hdrs = ["apis/tool_schema_wrapper.hpp"],
+    deps = [
+        "//src/port:rapidjson_document",
+    ],
+    visibility = ["//visibility:public"],
+)
+ovms_cc_library(
+    name = "partial_json_builder",
+    hdrs = ["io_processing/partial_json_builder.hpp"],
+    srcs = ["io_processing/partial_json_builder.cpp"],
+    deps = [
+        "@com_github_tencent_rapidjson//:rapidjson",
+    ],
+    visibility = ["//visibility:public"],
+)
+
+ovms_cc_library(
+    name = "io_processing_utils",
+    hdrs = ["io_processing/utils.hpp"],
+    srcs = ["io_processing/utils.cpp"],
+    visibility = ["//visibility:public"],
+)
+
+ovms_cc_library(
+    name = "io_processing_base_output_parser",
+    hdrs = ["io_processing/base_output_parser.hpp"],
+    srcs = ["io_processing/base_output_parser.cpp"],
+    deps = [
+        "@com_github_tencent_rapidjson//:rapidjson",
+        ":io_processing_utils",
+    ] + select({
+        "//conditions:default": ["//third_party:genai", ":llm_engine"],
+        "//:not_genai_bin" : [":llm_engine"],
+    }),
+)
+ovms_cc_library(
+    name = "io_processing_qwen3coder_tool_parser",
+    hdrs = ["io_processing/qwen3coder/qwen3coder_tool_parser.hpp"],
+    srcs = ["io_processing/qwen3coder/qwen3coder_tool_parser.cpp"],
+    deps = [
+        "//src:libovmslogging",
+        "//src:libovmsstatus",
+        "//src/utils:rapidjson_utils",
+        ":io_processing_utils",
+        ":io_processing_base_output_parser",
+        ":apis_tool_schema_wrapper",
+    ] + select({
+        "//conditions:default": ["//third_party:genai", ":llm_engine"],
+        "//:not_genai_bin" : [":llm_engine"],
+    }),
+    visibility = ["//visibility:public"],
+)
+ovms_cc_library( # TODO split further so we don't have to recompile everything when changing one parser ...
     name = "output_parsers",
-    hdrs = ["io_processing/base_output_parser.hpp",
+    hdrs = [
             "io_processing/hermes3/tool_parser.hpp",
             "io_processing/llama3/tool_parser.hpp",
             "io_processing/phi4/tool_parser.hpp",
@@ -101,9 +158,8 @@ ovms_cc_library(
             "io_processing/gptoss/tool_parser.hpp",
             "io_processing/gptoss/harmony.hpp",
             "io_processing/output_parser.hpp",
-            "io_processing/partial_json_builder.hpp",
-            "io_processing/utils.hpp"],
-    srcs = ["io_processing/base_output_parser.cpp",
+    ],
+    srcs = [
             "io_processing/hermes3/tool_parser.cpp",
             "io_processing/llama3/tool_parser.cpp",
             "io_processing/phi4/tool_parser.cpp",
@@ -113,18 +169,21 @@ ovms_cc_library(
             "io_processing/gptoss/tool_parser.cpp",
             "io_processing/gptoss/harmony.cpp",
             "io_processing/output_parser.cpp",
-            "io_processing/partial_json_builder.cpp",
-            "io_processing/utils.cpp"],
+    ],
     deps = [
         "@com_github_tencent_rapidjson//:rapidjson",
         "//src:libovmslogging",
         "//src:libovmsstring_utils",
+        ":partial_json_builder",
+        ":io_processing_base_output_parser",
+        ":io_processing_qwen3coder_tool_parser",
+        ":io_processing_utils",
+        ":apis_tool_schema_wrapper",
     ] + select({
         "//conditions:default": ["//third_party:genai", ":llm_engine"],
         "//:not_genai_bin" : [":llm_engine"],
     }),
     visibility = ["//visibility:public"],
-    additional_copts = COPTS_PYTHON
 )
 
 ovms_cc_library(

diff --git a/src/llm/apis/openai_completions.cpp b/src/llm/apis/openai_completions.cpp
@@ -331,12 +331,16 @@ absl::Status OpenAIChatCompletionsHandler::parseTools() {
                         // If we keep the tool, add tool name and schema to the request
                         auto parametersIt = functionIt->value.GetObject().FindMember("parameters");
                         if (parametersIt != functionIt->value.GetObject().MemberEnd() && parametersIt->value.IsObject()) {
+                            // now we want to insert to a mapping of
+                            // tool name -> tool schema representations struct
                             // Dump parameters object to string since this is the schema format expected by GenAI
+                            // Keep the rapidjson::Value object as well to avoid re-parsing in outputParsers
                             rapidjson::StringBuffer buffer;
                             rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
                             parametersIt->value.Accept(writer);
                             std::string parametersStr = buffer.GetString();
-                            request.toolNameSchemaMap[nameIt->value.GetString()] = parametersStr;
+                            ToolSchemaWrapper schemaReprs{&parametersIt->value, std::move(parametersStr)};
+                            request.toolNameSchemaMap[nameIt->value.GetString()] = std::move(schemaReprs);
                         }
                     }
                 } else {
@@ -739,10 +743,8 @@ void OpenAIChatCompletionsHandler::incrementProcessedTokens(size_t numTokens) {
 
 absl::Status OpenAIChatCompletionsHandler::parseRequest(std::optional<uint32_t> maxTokensLimit, uint32_t bestOfLimit, std::optional<uint32_t> maxModelLength, std::optional<std::string> allowedLocalMediaPath) {
     absl::Status status = parseCommonPart(maxTokensLimit, bestOfLimit, maxModelLength);
-
     if (status != absl::OkStatus())
         return status;
-
     if (endpoint == Endpoint::COMPLETIONS)
         status = parseCompletionsPart();
     else
@@ -762,9 +764,9 @@ ParsedOutput OpenAIChatCompletionsHandler::parseOutputIfNeeded(const std::vector
     OVMS_PROFILE_FUNCTION();
     ParsedOutput parsedOutput;
     if (endpoint != Endpoint::CHAT_COMPLETIONS || outputParser == nullptr) {
-        parsedOutput.content = tokenizer.decode(generatedIds);
+        parsedOutput.content = this->tokenizer.decode(generatedIds);
     } else {
-        parsedOutput = outputParser->parse(generatedIds, areToolsAvailable());
+        parsedOutput = outputParser->parse(generatedIds, this->areToolsAvailable());
     }
     return parsedOutput;
 }

diff --git a/src/llm/apis/openai_completions.hpp b/src/llm/apis/openai_completions.hpp
@@ -86,8 +86,10 @@ class OpenAIChatCompletionsHandler {
         endpoint(endpoint),
         created(creationTime),
         tokenizer(tokenizer) {
+        // TODO we should delay creating output parser until we have request with toolNameSchemaMap parsed
+        // now we pass it now but it has to be populated first before first use
         if (!toolParserName.empty() || !reasoningParserName.empty()) {
-            outputParser = std::make_unique<OutputParser>(tokenizer, toolParserName, reasoningParserName);
+            outputParser = std::make_unique<OutputParser>(tokenizer, toolParserName, reasoningParserName, this->request.toolNameSchemaMap);
         }
     }
 

diff --git a/src/llm/apis/openai_request.hpp b/src/llm/apis/openai_request.hpp
@@ -27,13 +27,16 @@
 #include <openvino/runtime/tensor.hpp>
 #include <openvino/genai/tokenizer.hpp>
 
+#include "src/port/rapidjson_document.hpp"
+
+#include "tool_schema_wrapper.hpp"
+
 namespace ovms {
 using ImageHistory = std::vector<std::pair<size_t, ov::Tensor>>;
 
 struct StreamOptions {
     bool includeUsage = false;
 };
-
 // Class that maps OpenAI request content.
 struct OpenAIChatCompletionsRequest {
     ov::genai::ChatHistory chatHistory;
@@ -74,7 +77,7 @@ struct OpenAIChatCompletionsRequest {
     // Schema for response_format handling
     std::optional<std::string> responseSchema{std::nullopt};
     // Map that holds tool names and schemas for their arguments
-    std::map<std::string, std::string> toolNameSchemaMap;
+    ToolsSchemas_t toolNameSchemaMap;
     // Holds value for tool_choice field as described in https://platform.openai.com/docs/api-reference/chat/create#chat_create-tool_choice
     std::string toolChoice;
 

diff --git a/src/llm/apis/tool_schema_wrapper.hpp b/src/llm/apis/tool_schema_wrapper.hpp
@@ -0,0 +1,31 @@
+//*****************************************************************************
+// Copyright 2025 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+// Type that holds vector of pairs where first element is chat turn index and second is image tensor
+// this way we store information about which image is associated with which chat turn
+#pragma once
+#include <map>
+#include <string>
+
+#include "src/port/rapidjson_document.hpp"
+
+namespace ovms {
+struct ToolSchemaWrapper {
+    rapidjson::Value* rapidjsonRepr;
+    std::string stringRepr;
+};
+using ToolsSchemas_t = std::map<std::string, ToolSchemaWrapper>;
+}  // namespace ovms
-Original file line number
+Diff line change
@@ Expand Up @@
                 python3-pip \
                 unzip \
                 gdb \
+                tmux \
                 tree \
                 vim && \
                 apt-get clean && \
@@ Expand Down @@