huggingface · drbh · Jan 24, 2025 · Jan 24, 2025 · Feb 4, 2025 · Feb 4, 2025
diff --git a/clients/python/tests/test_client.py b/clients/python/tests/test_client.py
@@ -14,7 +14,6 @@ def test_generate(llama_7b_url, hf_headers):
     assert response.details.generated_tokens == 1
     assert response.details.seed is None
     assert len(response.details.prefill) == 0
-    # assert response.details.prefill[0] == InputToken(id=1, text="<s>", logprob=None)
     assert len(response.details.tokens) == 1
     assert response.details.tokens[0].id == 29918
     assert response.details.tokens[0].text == "_"
@@ -84,10 +83,6 @@ async def test_generate_async(llama_7b_url, hf_headers):
     assert response.details.generated_tokens == 1
     assert response.details.seed is None
     assert len(response.details.prefill) == 0
-    # assert response.details.prefill[0] == InputToken(id=1, text="<s>", logprob=None)
-    # assert response.details.prefill[1] == InputToken(
-    #     id=1243, text="test", logprob=-10.96875
-    # )
     assert len(response.details.tokens) == 1
     assert response.details.tokens[0].id == 29918
     assert response.details.tokens[0].text == "_"

diff --git a/clients/python/text_generation/types.py b/clients/python/text_generation/types.py
@@ -67,7 +67,7 @@ class ChoiceDeltaToolCall(BaseModel):
 class ChoiceDelta(BaseModel):
     role: str
     content: Optional[str] = None
-    tool_calls: Optional[ChoiceDeltaToolCall] = None
+    tool_calls: Optional[List[ChoiceDeltaToolCall]] = None
 
 
 class Choice(BaseModel):
@@ -464,4 +464,4 @@ class DeployedModel(BaseModel):
     # https://github.com/pydantic/pydantic/issues/9177
     model_config = ConfigDict(protected_namespaces=())
     model_id: str
-    sha: str
+    task: str
diff --git a/docs/openapi.json b/docs/openapi.json
@@ -2302,7 +2302,10 @@
             "example": "assistant"
           },
           "tool_calls": {
-            "$ref": "#/components/schemas/DeltaToolCall"
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/DeltaToolCall"
+            }
           }
         }
       },

diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py
@@ -269,6 +269,8 @@ def eq_chat_complete(response: ChatComplete, other: ChatComplete) -> bool:
         def eq_chat_complete_chunk(
             response: ChatCompletionChunk, other: ChatCompletionChunk
         ) -> bool:
+            if len(response.choices) == 0:
+                return len(other.choices) == 0
             return response.choices[0].delta.content == other.choices[0].delta.content
 
         def eq_response(response: Response, other: Response) -> bool:

diff --git a/...odels/__snapshots__/test_completion_prompts/test_flash_llama_completion_stream_usage.json b/...odels/__snapshots__/test_completion_prompts/test_flash_llama_completion_stream_usage.json
@@ -12,11 +12,11 @@
         "logprobs": null
       }
     ],
-    "created": 1726656043,
+    "created": 1740516693,
     "id": "",
-    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "model": "meta-llama/Llama-3.1-8B-Instruct",
     "object": "chat.completion.chunk",
-    "system_fingerprint": "2.2.1-dev0-native",
+    "system_fingerprint": "3.1.1-dev0-native",
     "usage": null
   },
   {
@@ -32,11 +32,11 @@
         "logprobs": null
       }
     ],
-    "created": 1726656043,
+    "created": 1740516693,
     "id": "",
-    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "model": "meta-llama/Llama-3.1-8B-Instruct",
     "object": "chat.completion.chunk",
-    "system_fingerprint": "2.2.1-dev0-native",
+    "system_fingerprint": "3.1.1-dev0-native",
     "usage": null
   },
   {
@@ -52,11 +52,11 @@
         "logprobs": null
       }
     ],
-    "created": 1726656043,
+    "created": 1740516693,
     "id": "",
-    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "model": "meta-llama/Llama-3.1-8B-Instruct",
     "object": "chat.completion.chunk",
-    "system_fingerprint": "2.2.1-dev0-native",
+    "system_fingerprint": "3.1.1-dev0-native",
     "usage": null
   },
   {
@@ -72,11 +72,11 @@
         "logprobs": null
       }
     ],
-    "created": 1726656043,
+    "created": 1740516694,
     "id": "",
-    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "model": "meta-llama/Llama-3.1-8B-Instruct",
     "object": "chat.completion.chunk",
-    "system_fingerprint": "2.2.1-dev0-native",
+    "system_fingerprint": "3.1.1-dev0-native",
     "usage": null
   },
   {
@@ -92,11 +92,11 @@
         "logprobs": null
       }
     ],
-    "created": 1726656043,
+    "created": 1740516694,
     "id": "",
-    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "model": "meta-llama/Llama-3.1-8B-Instruct",
     "object": "chat.completion.chunk",
-    "system_fingerprint": "2.2.1-dev0-native",
+    "system_fingerprint": "3.1.1-dev0-native",
     "usage": null
   },
   {
@@ -112,11 +112,11 @@
         "logprobs": null
       }
     ],
-    "created": 1726656043,
+    "created": 1740516694,
     "id": "",
-    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "model": "meta-llama/Llama-3.1-8B-Instruct",
     "object": "chat.completion.chunk",
-    "system_fingerprint": "2.2.1-dev0-native",
+    "system_fingerprint": "3.1.1-dev0-native",
     "usage": null
   },
   {
@@ -132,11 +132,11 @@
         "logprobs": null
       }
     ],
-    "created": 1726656044,
+    "created": 1740516694,
     "id": "",
-    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "model": "meta-llama/Llama-3.1-8B-Instruct",
     "object": "chat.completion.chunk",
-    "system_fingerprint": "2.2.1-dev0-native",
+    "system_fingerprint": "3.1.1-dev0-native",
     "usage": null
   },
   {
@@ -152,11 +152,11 @@
         "logprobs": null
       }
     ],
-    "created": 1726656044,
+    "created": 1740516694,
     "id": "",
-    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "model": "meta-llama/Llama-3.1-8B-Instruct",
     "object": "chat.completion.chunk",
-    "system_fingerprint": "2.2.1-dev0-native",
+    "system_fingerprint": "3.1.1-dev0-native",
     "usage": null
   },
   {
@@ -172,11 +172,11 @@
         "logprobs": null
       }
     ],
-    "created": 1726656044,
+    "created": 1740516694,
     "id": "",
-    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "model": "meta-llama/Llama-3.1-8B-Instruct",
     "object": "chat.completion.chunk",
-    "system_fingerprint": "2.2.1-dev0-native",
+    "system_fingerprint": "3.1.1-dev0-native",
     "usage": null
   },
   {
@@ -192,11 +192,20 @@
         "logprobs": null
       }
     ],
-    "created": 1726656044,
+    "created": 1740516694,
     "id": "",
-    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "model": "meta-llama/Llama-3.1-8B-Instruct",
     "object": "chat.completion.chunk",
-    "system_fingerprint": "2.2.1-dev0-native",
+    "system_fingerprint": "3.1.1-dev0-native",
+    "usage": null
+  },
+  {
+    "choices": [],
+    "created": 1740516694,
+    "id": "",
+    "model": "meta-llama/Llama-3.1-8B-Instruct",
+    "object": "chat.completion.chunk",
+    "system_fingerprint": "3.1.1-dev0-native",
     "usage": {
       "completion_tokens": 10,
       "prompt_tokens": 40,

diff --git a/integration-tests/models/__snapshots__/test_openai_llama_tools/test_openai_llama_tools.json b/integration-tests/models/__snapshots__/test_openai_llama_tools/test_openai_llama_tools.json
@@ -0,0 +1,29 @@
+{
+  "choices": [
+    {
+      "delta": {
+        "role": "assistant",
+        "tool_calls": [
+          {
+            "function": {
+              "arguments": "\"}",
+              "name": "get_current_weather"
+            },
+            "id": "",
+            "index": 0,
+            "type": "function"
+          }
+        ]
+      },
+      "finish_reason": null,
+      "index": 0,
+      "logprobs": null
+    }
+  ],
+  "created": 1739910558,
+  "id": "",
+  "model": "meta-llama/Llama-3.1-8B-Instruct",
+  "object": "chat.completion.chunk",
+  "system_fingerprint": "3.1.1-dev0-native",
+  "usage": null
+}
diff --git a/...snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information.json b/...snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information.json
@@ -5,22 +5,22 @@
       "index": 0,
       "logprobs": null,
       "message": {
-        "content": "I am an AI assistant",
+        "content": "I am a helpful assistant!",
         "name": null,
         "role": "assistant",
         "tool_calls": null
       },
       "usage": null
     }
   ],
-  "created": 1728497062,
+  "created": 1739357385,
   "id": "",
   "model": "meta-llama/Llama-3.1-8B-Instruct",
   "object": "chat.completion",
-  "system_fingerprint": "2.4.2-dev0-native",
+  "system_fingerprint": "3.1.1-dev0-native",
   "usage": {
     "completion_tokens": 23,
-    "prompt_tokens": 604,
-    "total_tokens": 627
+    "prompt_tokens": 494,
+    "total_tokens": 517
   }
 }
diff --git a/...ts__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information_stream.json b/...ts__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information_stream.json
@@ -2,7 +2,7 @@
   "choices": [
     {
       "delta": {
-        "content": " assistant",
+        "content": "!",
         "role": "assistant",
         "tool_calls": null
       },
@@ -11,10 +11,10 @@
       "logprobs": null
     }
   ],
-  "created": 1728497531,
+  "created": 1740432006,
   "id": "",
   "model": "meta-llama/Llama-3.1-8B-Instruct",
   "object": "chat.completion.chunk",
-  "system_fingerprint": "2.4.2-dev0-native",
+  "system_fingerprint": "3.1.1-dev0-native",
   "usage": null
 }
diff --git a/...s/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream.json b/...s/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream.json
@@ -2,7 +2,7 @@
   "choices": [
     {
       "delta": {
-        "content": " fans",
+        "content": ".",
         "role": "assistant",
         "tool_calls": null
       },
@@ -11,10 +11,10 @@
       "logprobs": null
     }
   ],
-  "created": 1728497461,
+  "created": 1740432012,
   "id": "",
   "model": "meta-llama/Llama-3.1-8B-Instruct",
   "object": "chat.completion.chunk",
-  "system_fingerprint": "2.4.2-dev0-native",
+  "system_fingerprint": "3.1.1-dev0-native",
   "usage": null
 }
diff --git a/...test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_function_object.json b/...test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_function_object.json
@@ -3,25 +3,27 @@
     {
       "delta": {
         "role": "assistant",
-        "tool_calls": {
-          "function": {
-            "arguments": "<|eot_id|>",
-            "name": null
-          },
-          "id": "",
-          "index": 0,
-          "type": "function"
-        }
+        "tool_calls": [
+          {
+            "function": {
+              "arguments": "}",
+              "name": "get_n_day_weather_forecast"
+            },
+            "id": "",
+            "index": 0,
+            "type": "function"
+          }
+        ]
       },
-      "finish_reason": "stop",
+      "finish_reason": null,
       "index": 0,
       "logprobs": null
     }
   ],
-  "created": 1732293254,
+  "created": 1739910826,
   "id": "",
   "model": "meta-llama/Llama-3.1-8B-Instruct",
   "object": "chat.completion.chunk",
-  "system_fingerprint": "2.4.1-dev0-native",
+  "system_fingerprint": "3.1.1-dev0-native",
   "usage": null
 }
diff --git a/...napshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_none.json b/...napshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_none.json
@@ -11,10 +11,10 @@
       "logprobs": null
     }
   ],
-  "created": 1729262528,
+  "created": 1740433572,
   "id": "",
   "model": "meta-llama/Llama-3.1-8B-Instruct",
   "object": "chat.completion.chunk",
-  "system_fingerprint": "2.3.2-dev0-native",
+  "system_fingerprint": "3.1.1-dev0-native",
   "usage": null
 }