@@ -31,6 +31,7 @@ const std::string tokenizerPath = getWindowsRepoRootPath() + "\\src\\test\\llm_t
3131const std::string tokenizerPath = " /ovms/src/test/llm_testing/meta-llama/Llama-3.1-8B-Instruct" ;
3232#endif
3333
34+ static ovms::ToolsSchemas_t toolsSchemas; // can be empty for llama3
3435static std::unique_ptr<ov::genai::Tokenizer> llama3Tokenizer;
3536
3637// Id of the <|python_tag|> which is a special token used to indicate the start of a tool calls
@@ -68,7 +69,7 @@ TEST_F(Llama3OutputParserTest, ParseToolCallOutputWithSingleToolCall) {
6869 std::vector<int64_t > generatedTokens (generatedTensor.data <int64_t >(), generatedTensor.data <int64_t >() + generatedTensor.get_size ());
6970 generatedTokens.insert (generatedTokens.begin (), botTokenId);
7071 for (bool immediateParsing : {false , true }) {
71- ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse (generatedTokens, true ) : outputParserWithRegularToolParsing->parse (generatedTokens, true );
72+ ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse (generatedTokens, true , toolsSchemas ) : outputParserWithRegularToolParsing->parse (generatedTokens, true , toolsSchemas );
7273 EXPECT_EQ (parsedOutput.content , " " );
7374 EXPECT_EQ (parsedOutput.reasoning , " " );
7475 ASSERT_EQ (parsedOutput.toolCalls .size (), 1 );
@@ -83,7 +84,7 @@ TEST_F(Llama3OutputParserTest, ParseToolCallOutputNoToolsInTheRequest) {
8384 auto generatedTensor = llama3Tokenizer->encode (input, ov::genai::add_special_tokens (false )).input_ids ;
8485 std::vector<int64_t > generatedTokens (generatedTensor.data <int64_t >(), generatedTensor.data <int64_t >() + generatedTensor.get_size ());
8586 for (bool immediateParsing : {false , true }) {
86- ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse (generatedTokens, false ) : outputParserWithRegularToolParsing->parse (generatedTokens, false );
87+ ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse (generatedTokens, false , toolsSchemas ) : outputParserWithRegularToolParsing->parse (generatedTokens, false , toolsSchemas );
8788 EXPECT_EQ (parsedOutput.content , input);
8889 EXPECT_EQ (parsedOutput.reasoning , " " );
8990 ASSERT_EQ (parsedOutput.toolCalls .size (), 0 );
@@ -96,7 +97,7 @@ TEST_F(Llama3OutputParserTest, ParseRegularJsonOutputToolsInTheRequest) {
9697 auto generatedTensor = llama3Tokenizer->encode (input, ov::genai::add_special_tokens (false )).input_ids ;
9798 std::vector<int64_t > generatedTokens (generatedTensor.data <int64_t >(), generatedTensor.data <int64_t >() + generatedTensor.get_size ());
9899 for (bool immediateParsing : {false , true }) {
99- ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse (generatedTokens, true ) : outputParserWithRegularToolParsing->parse (generatedTokens, true );
100+ ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse (generatedTokens, true , toolsSchemas ) : outputParserWithRegularToolParsing->parse (generatedTokens, true , toolsSchemas );
100101 EXPECT_EQ (parsedOutput.content , " " );
101102 EXPECT_EQ (parsedOutput.reasoning , " " );
102103 ASSERT_EQ (parsedOutput.toolCalls .size (), 0 );
@@ -109,7 +110,7 @@ TEST_F(Llama3OutputParserTest, ParseRegularJsonOutputNoToolsInTheRequest) {
109110 auto generatedTensor = llama3Tokenizer->encode (input, ov::genai::add_special_tokens (false )).input_ids ;
110111 std::vector<int64_t > generatedTokens (generatedTensor.data <int64_t >(), generatedTensor.data <int64_t >() + generatedTensor.get_size ());
111112 for (bool immediateParsing : {false , true }) {
112- ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse (generatedTokens, false ) : outputParserWithRegularToolParsing->parse (generatedTokens, false );
113+ ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse (generatedTokens, false , toolsSchemas ) : outputParserWithRegularToolParsing->parse (generatedTokens, false , toolsSchemas );
113114 EXPECT_EQ (parsedOutput.content , input);
114115 EXPECT_EQ (parsedOutput.reasoning , " " );
115116 }
@@ -122,7 +123,7 @@ TEST_F(Llama3OutputParserTest, ParseToolCallOutputWithThreeToolCalls) {
122123 auto generatedTensor = llama3Tokenizer->encode (input, ov::genai::add_special_tokens (false )).input_ids ;
123124 std::vector<int64_t > generatedTokens (generatedTensor.data <int64_t >(), generatedTensor.data <int64_t >() + generatedTensor.get_size ());
124125 for (bool immediateParsing : {false , true }) {
125- ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse (generatedTokens, true ) : outputParserWithRegularToolParsing->parse (generatedTokens, true );
126+ ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse (generatedTokens, true , toolsSchemas ) : outputParserWithRegularToolParsing->parse (generatedTokens, true , toolsSchemas );
126127 EXPECT_EQ (parsedOutput.content , " " );
127128 EXPECT_EQ (parsedOutput.reasoning , " " );
128129 ASSERT_EQ (parsedOutput.toolCalls .size (), 3 );
@@ -149,7 +150,7 @@ TEST_F(Llama3OutputParserTest, ParseToolCallOutputWithContentAndNoToolCalls) {
149150 auto generatedTensor = llama3Tokenizer->encode (input, ov::genai::add_special_tokens (false )).input_ids ;
150151 std::vector<int64_t > generatedTokens (generatedTensor.data <int64_t >(), generatedTensor.data <int64_t >() + generatedTensor.get_size ());
151152 for (bool immediateParsing : {false , true }) {
152- ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse (generatedTokens, true ) : outputParserWithRegularToolParsing->parse (generatedTokens, true );
153+ ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse (generatedTokens, true , toolsSchemas ) : outputParserWithRegularToolParsing->parse (generatedTokens, true , toolsSchemas );
153154 EXPECT_EQ (parsedOutput.content , immediateParsing ? " " : " This is a regular model response without tool calls." );
154155 ASSERT_EQ (parsedOutput.toolCalls .size (), 0 );
155156 EXPECT_EQ (parsedOutput.reasoning , " " );
@@ -168,7 +169,7 @@ TEST_F(Llama3OutputParserTest, ParseToolCallOutputWithContentAndSingleToolCall)
168169 generatedTokens.insert (generatedTokens.end (), botTokenId);
169170 generatedTokens.insert (generatedTokens.end (), generatedToolCallTokens.begin (), generatedToolCallTokens.end ());
170171 for (bool immediateParsing : {false , true }) {
171- ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse (generatedTokens, true ) : outputParserWithRegularToolParsing->parse (generatedTokens, true );
172+ ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse (generatedTokens, true , toolsSchemas ) : outputParserWithRegularToolParsing->parse (generatedTokens, true , toolsSchemas );
172173 EXPECT_EQ (parsedOutput.content , immediateParsing ? " " : " This is a content part and next will be a tool call." );
173174 EXPECT_EQ (parsedOutput.reasoning , " " );
174175 ASSERT_EQ (parsedOutput.toolCalls .size (), immediateParsing ? 0 : 1 );
0 commit comments