@@ -31,6 +31,7 @@ const std::string tokenizerPath = getWindowsRepoRootPath() + "\\src\\test\\llm_t
31
31
const std::string tokenizerPath = " /ovms/src/test/llm_testing/meta-llama/Llama-3.1-8B-Instruct" ;
32
32
#endif
33
33
34
+ static ovms::ToolsSchemas_t toolsSchemas; // can be empty for llama3
34
35
static std::unique_ptr<ov::genai::Tokenizer> llama3Tokenizer;
35
36
36
37
// Id of the <|python_tag|> which is a special token used to indicate the start of a tool calls
@@ -68,7 +69,7 @@ TEST_F(Llama3OutputParserTest, ParseToolCallOutputWithSingleToolCall) {
68
69
std::vector<int64_t > generatedTokens (generatedTensor.data <int64_t >(), generatedTensor.data <int64_t >() + generatedTensor.get_size ());
69
70
generatedTokens.insert (generatedTokens.begin (), botTokenId);
70
71
for (bool immediateParsing : {false , true }) {
71
- ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse (generatedTokens, true ) : outputParserWithRegularToolParsing->parse (generatedTokens, true );
72
+ ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse (generatedTokens, true , toolsSchemas ) : outputParserWithRegularToolParsing->parse (generatedTokens, true , toolsSchemas );
72
73
EXPECT_EQ (parsedOutput.content , " " );
73
74
EXPECT_EQ (parsedOutput.reasoning , " " );
74
75
ASSERT_EQ (parsedOutput.toolCalls .size (), 1 );
@@ -83,7 +84,7 @@ TEST_F(Llama3OutputParserTest, ParseToolCallOutputNoToolsInTheRequest) {
83
84
auto generatedTensor = llama3Tokenizer->encode (input, ov::genai::add_special_tokens (false )).input_ids ;
84
85
std::vector<int64_t > generatedTokens (generatedTensor.data <int64_t >(), generatedTensor.data <int64_t >() + generatedTensor.get_size ());
85
86
for (bool immediateParsing : {false , true }) {
86
- ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse (generatedTokens, false ) : outputParserWithRegularToolParsing->parse (generatedTokens, false );
87
+ ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse (generatedTokens, false , toolsSchemas ) : outputParserWithRegularToolParsing->parse (generatedTokens, false , toolsSchemas );
87
88
EXPECT_EQ (parsedOutput.content , input);
88
89
EXPECT_EQ (parsedOutput.reasoning , " " );
89
90
ASSERT_EQ (parsedOutput.toolCalls .size (), 0 );
@@ -96,7 +97,7 @@ TEST_F(Llama3OutputParserTest, ParseRegularJsonOutputToolsInTheRequest) {
96
97
auto generatedTensor = llama3Tokenizer->encode (input, ov::genai::add_special_tokens (false )).input_ids ;
97
98
std::vector<int64_t > generatedTokens (generatedTensor.data <int64_t >(), generatedTensor.data <int64_t >() + generatedTensor.get_size ());
98
99
for (bool immediateParsing : {false , true }) {
99
- ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse (generatedTokens, true ) : outputParserWithRegularToolParsing->parse (generatedTokens, true );
100
+ ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse (generatedTokens, true , toolsSchemas ) : outputParserWithRegularToolParsing->parse (generatedTokens, true , toolsSchemas );
100
101
EXPECT_EQ (parsedOutput.content , " " );
101
102
EXPECT_EQ (parsedOutput.reasoning , " " );
102
103
ASSERT_EQ (parsedOutput.toolCalls .size (), 0 );
@@ -109,7 +110,7 @@ TEST_F(Llama3OutputParserTest, ParseRegularJsonOutputNoToolsInTheRequest) {
109
110
auto generatedTensor = llama3Tokenizer->encode (input, ov::genai::add_special_tokens (false )).input_ids ;
110
111
std::vector<int64_t > generatedTokens (generatedTensor.data <int64_t >(), generatedTensor.data <int64_t >() + generatedTensor.get_size ());
111
112
for (bool immediateParsing : {false , true }) {
112
- ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse (generatedTokens, false ) : outputParserWithRegularToolParsing->parse (generatedTokens, false );
113
+ ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse (generatedTokens, false , toolsSchemas ) : outputParserWithRegularToolParsing->parse (generatedTokens, false , toolsSchemas );
113
114
EXPECT_EQ (parsedOutput.content , input);
114
115
EXPECT_EQ (parsedOutput.reasoning , " " );
115
116
}
@@ -122,7 +123,7 @@ TEST_F(Llama3OutputParserTest, ParseToolCallOutputWithThreeToolCalls) {
122
123
auto generatedTensor = llama3Tokenizer->encode (input, ov::genai::add_special_tokens (false )).input_ids ;
123
124
std::vector<int64_t > generatedTokens (generatedTensor.data <int64_t >(), generatedTensor.data <int64_t >() + generatedTensor.get_size ());
124
125
for (bool immediateParsing : {false , true }) {
125
- ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse (generatedTokens, true ) : outputParserWithRegularToolParsing->parse (generatedTokens, true );
126
+ ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse (generatedTokens, true , toolsSchemas ) : outputParserWithRegularToolParsing->parse (generatedTokens, true , toolsSchemas );
126
127
EXPECT_EQ (parsedOutput.content , " " );
127
128
EXPECT_EQ (parsedOutput.reasoning , " " );
128
129
ASSERT_EQ (parsedOutput.toolCalls .size (), 3 );
@@ -149,7 +150,7 @@ TEST_F(Llama3OutputParserTest, ParseToolCallOutputWithContentAndNoToolCalls) {
149
150
auto generatedTensor = llama3Tokenizer->encode (input, ov::genai::add_special_tokens (false )).input_ids ;
150
151
std::vector<int64_t > generatedTokens (generatedTensor.data <int64_t >(), generatedTensor.data <int64_t >() + generatedTensor.get_size ());
151
152
for (bool immediateParsing : {false , true }) {
152
- ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse (generatedTokens, true ) : outputParserWithRegularToolParsing->parse (generatedTokens, true );
153
+ ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse (generatedTokens, true , toolsSchemas ) : outputParserWithRegularToolParsing->parse (generatedTokens, true , toolsSchemas );
153
154
EXPECT_EQ (parsedOutput.content , immediateParsing ? " " : " This is a regular model response without tool calls." );
154
155
ASSERT_EQ (parsedOutput.toolCalls .size (), 0 );
155
156
EXPECT_EQ (parsedOutput.reasoning , " " );
@@ -168,7 +169,7 @@ TEST_F(Llama3OutputParserTest, ParseToolCallOutputWithContentAndSingleToolCall)
168
169
generatedTokens.insert (generatedTokens.end (), botTokenId);
169
170
generatedTokens.insert (generatedTokens.end (), generatedToolCallTokens.begin (), generatedToolCallTokens.end ());
170
171
for (bool immediateParsing : {false , true }) {
171
- ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse (generatedTokens, true ) : outputParserWithRegularToolParsing->parse (generatedTokens, true );
172
+ ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse (generatedTokens, true , toolsSchemas ) : outputParserWithRegularToolParsing->parse (generatedTokens, true , toolsSchemas );
172
173
EXPECT_EQ (parsedOutput.content , immediateParsing ? " " : " This is a content part and next will be a tool call." );
173
174
EXPECT_EQ (parsedOutput.reasoning , " " );
174
175
ASSERT_EQ (parsedOutput.toolCalls .size (), immediateParsing ? 0 : 1 );
0 commit comments