Skip to content

Commit f0fa10a

Browse files
committed
Qwen3Coder bfcl - 0.95 simple/multiple
0.825 parallel multiple
1 parent 267d1e8 commit f0fa10a

18 files changed

+131
-34
lines changed

src/llm/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ ovms_cc_library(
115115
"@com_github_tencent_rapidjson//:rapidjson",
116116
"//src:libovmslogging",
117117
"//src:libovmsstring_utils",
118+
"openai_request", # FIXME remove
118119
] + select({
119120
"//conditions:default": ["//third_party:genai", ":llm_engine"],
120121
"//:not_genai_bin" : [":llm_engine"],

src/llm/apis/openai_completions.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -739,10 +739,8 @@ void OpenAIChatCompletionsHandler::incrementProcessedTokens(size_t numTokens) {
739739

740740
absl::Status OpenAIChatCompletionsHandler::parseRequest(std::optional<uint32_t> maxTokensLimit, uint32_t bestOfLimit, std::optional<uint32_t> maxModelLength, std::optional<std::string> allowedLocalMediaPath) {
741741
absl::Status status = parseCommonPart(maxTokensLimit, bestOfLimit, maxModelLength);
742-
743742
if (status != absl::OkStatus())
744743
return status;
745-
746744
if (endpoint == Endpoint::COMPLETIONS)
747745
status = parseCompletionsPart();
748746
else
@@ -764,7 +762,7 @@ ParsedOutput OpenAIChatCompletionsHandler::parseOutputIfNeeded(const std::vector
764762
if (endpoint != Endpoint::CHAT_COMPLETIONS || outputParser == nullptr) {
765763
parsedOutput.content = tokenizer.decode(generatedIds);
766764
} else {
767-
parsedOutput = outputParser->parse(generatedIds, areToolsAvailable());
765+
parsedOutput = outputParser->parse(generatedIds, areToolsAvailable(), this->request.toolNameSchemaMap);
768766
}
769767
return parsedOutput;
770768
}
@@ -1058,6 +1056,7 @@ std::string OpenAIChatCompletionsHandler::serializeStreamingChunk(const std::str
10581056
choice.AddMember("logprobs", Value(), allocator);
10591057
if (endpoint == Endpoint::CHAT_COMPLETIONS) {
10601058
if (outputParser != nullptr) {
1059+
// FIXME need tool maps for streaming
10611060
std::optional<Document> delta = outputParser->parseChunk(chunkResponse, areToolsAvailable(), finishReason);
10621061
if (!delta.has_value()) {
10631062
return "";

src/llm/apis/openai_request.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ struct StreamOptions {
3434
bool includeUsage = false;
3535
};
3636

37+
using ToolsSchemas_t = std::map<std::string, std::string>;
3738
// Class that maps OpenAI request content.
3839
struct OpenAIChatCompletionsRequest {
3940
ov::genai::ChatHistory chatHistory;
@@ -74,7 +75,7 @@ struct OpenAIChatCompletionsRequest {
7475
// Schema for response_format handling
7576
std::optional<std::string> responseSchema{std::nullopt};
7677
// Map that holds tool names and schemas for their arguments
77-
std::map<std::string, std::string> toolNameSchemaMap;
78+
ToolsSchemas_t toolNameSchemaMap;
7879
// Holds value for tool_choice field as described in https://platform.openai.com/docs/api-reference/chat/create#chat_create-tool_choice
7980
std::string toolChoice;
8081

src/llm/io_processing/base_output_parser.hpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#pragma warning(pop)
3131

3232
#include "partial_json_builder.hpp"
33+
#include "../apis/openai_request.hpp"
3334

3435
namespace ovms {
3536
struct ToolCall {
@@ -49,6 +50,17 @@ struct ParsedOutput {
4950
std::string reasoning;
5051
};
5152

53+
enum class ParameterType_t {
54+
STRING,
55+
NUMBER,
56+
BOOLEAN,
57+
ARRAY,
58+
OBJECT,
59+
UNKNOWN
60+
};
61+
using ParametersTypeMap_t = std::unordered_map<std::string, ParameterType_t>; // param name -> param type
62+
using ToolsParameterTypeMap_t = std::unordered_map<std::string, ParametersTypeMap_t>; // tool name -> (param name -> param type)
63+
5264
class BaseOutputParser {
5365
protected:
5466
ov::genai::Tokenizer tokenizer;
@@ -79,7 +91,7 @@ class BaseOutputParser {
7991
// Parse model output and extract relevant information to parsedOutput fields. Raw generated tokens are provided as an argument.
8092
// Additionally parsedOutput.content is already filled with decoded content when this method is called, enabling chain or parsing.
8193
// Parser is also responsible for removing extracted part from the parsedOutput.content if necessary.
82-
virtual void parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens) = 0;
94+
virtual void parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens, const ToolsSchemas_t& toolNameSchemaMap) = 0;
8395

8496
// Parse model output chunk in the streaming mode. If in result of processing the chunk we cannot produce meaningful response, we return std::nullopt.
8597
// Otherwise we return a JSON object containing the delta that conforms to OpenAI API.

src/llm/io_processing/hermes3/tool_parser.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131

3232
namespace ovms {
3333

34-
void Hermes3ToolParser::parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens) {
34+
void Hermes3ToolParser::parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens, const ToolsSchemas_t&) {
3535
const std::string startTag = "<tool_call>";
3636
const std::string endTag = "</tool_call>";
3737
std::vector<std::string> tools;

src/llm/io_processing/hermes3/tool_parser.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ class Hermes3ToolParser : public BaseOutputParser {
5555
explicit Hermes3ToolParser(ov::genai::Tokenizer& tokenizer) :
5656
BaseOutputParser(tokenizer) {}
5757

58-
void parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens) override;
58+
void parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens, const ToolsSchemas_t&) override;
5959
std::optional<rapidjson::Document> parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) override;
6060
const std::string& getParsingStartTag() const override {
6161
return parsingStartTag;

src/llm/io_processing/llama3/tool_parser.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
#include "../utils.hpp"
3232

3333
namespace ovms {
34-
void Llama3ToolParser::parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens) {
34+
void Llama3ToolParser::parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens, const ToolsSchemas_t&) {
3535
// TODO: check if we can rely on decoded <|python_tag|> token to be present in the content, so we can drop multiple detokenizations and copies
3636
// and just extract substrings from the content and modify content in-place
3737

src/llm/io_processing/llama3/tool_parser.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ class Llama3ToolParser : public BaseOutputParser {
5959
explicit Llama3ToolParser(ov::genai::Tokenizer& tokenizer) :
6060
BaseOutputParser(tokenizer) {}
6161

62-
void parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens) override;
62+
void parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens, const ToolsSchemas_t&) override;
6363
std::optional<rapidjson::Document> parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) override;
6464
const std::string& getParsingStartTag() const override {
6565
return parsingStartTag;

src/llm/io_processing/mistral/tool_parser.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232

3333
namespace ovms {
3434

35-
void MistralToolParser::parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens) {
35+
void MistralToolParser::parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens, const ToolsSchemas_t&) {
3636
std::vector<std::string> tools;
3737

3838
if (parsedOutput.content.empty() || generatedTokens.size() <= 0) {

src/llm/io_processing/mistral/tool_parser.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ class MistralToolParser : public BaseOutputParser {
3939
explicit MistralToolParser(ov::genai::Tokenizer& tokenizer) :
4040
BaseOutputParser(tokenizer) {}
4141

42-
void parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens) override;
42+
void parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens, const ToolsSchemas_t&) override;
4343
std::optional<rapidjson::Document> parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) override;
4444
const std::string& getParsingStartTag() const override {
4545
static const std::string toolCallStartTag = "[TOOL_CALLS]";

0 commit comments

Comments
 (0)