diff --git a/src/huggingface_hub/inference/_client.py b/src/huggingface_hub/inference/_client.py
index 9f2f96d963..932c3aa98e 100644
--- a/src/huggingface_hub/inference/_client.py
+++ b/src/huggingface_hub/inference/_client.py
@@ -463,6 +463,7 @@ def automatic_speech_recognition(
audio: ContentT,
*,
model: Optional[str] = None,
+ extra_body: Optional[Dict] = None,
) -> AutomaticSpeechRecognitionOutput:
"""
Perform automatic speech recognition (ASR or audio-to-text) on the given audio content.
@@ -473,6 +474,9 @@ def automatic_speech_recognition(
model (`str`, *optional*):
The model to use for ASR. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
Inference Endpoint. If not provided, the default recommended model for ASR will be used.
+ extra_body (`Dict`, *optional*):
+ Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
+ for supported parameters.
Returns:
[`AutomaticSpeechRecognitionOutput`]: An item containing the transcribed text and optionally the timestamp chunks.
@@ -493,7 +497,7 @@ def automatic_speech_recognition(
provider_helper = get_provider_helper(self.provider, task="automatic-speech-recognition")
request_parameters = provider_helper.prepare_request(
inputs=audio,
- parameters={},
+ parameters={**(extra_body or {})},
headers=self.headers,
model=model or self.model,
api_key=self.token,
@@ -524,6 +528,7 @@ def chat_completion( # type: ignore
tools: Optional[List[ChatCompletionInputTool]] = None,
top_logprobs: Optional[int] = None,
top_p: Optional[float] = None,
+ extra_body: Optional[Dict] = None,
) -> ChatCompletionOutput: ...
@overload
@@ -549,6 +554,7 @@ def chat_completion( # type: ignore
tools: Optional[List[ChatCompletionInputTool]] = None,
top_logprobs: Optional[int] = None,
top_p: Optional[float] = None,
+ extra_body: Optional[Dict] = None,
) -> Iterable[ChatCompletionStreamOutput]: ...
@overload
@@ -574,6 +580,7 @@ def chat_completion(
tools: Optional[List[ChatCompletionInputTool]] = None,
top_logprobs: Optional[int] = None,
top_p: Optional[float] = None,
+ extra_body: Optional[Dict] = None,
) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]: ...
def chat_completion(
@@ -599,6 +606,7 @@ def chat_completion(
tools: Optional[List[ChatCompletionInputTool]] = None,
top_logprobs: Optional[int] = None,
top_p: Optional[float] = None,
+ extra_body: Optional[Dict] = None,
) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]:
"""
A method for completing conversations using a specified language model.
@@ -613,7 +621,7 @@ def chat_completion(
- Some parameters might not be supported by some providers.
+ You can pass provider-specific parameters to the model by using the `extra_body` argument.
Args:
@@ -668,7 +676,9 @@ def chat_completion(
tools (List of [`ChatCompletionInputTool`], *optional*):
A list of tools the model may call. Currently, only functions are supported as a tool. Use this to
provide a list of functions the model may generate JSON inputs for.
-
+ extra_body (`Dict`, *optional*):
+ Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
+ for supported parameters.
Returns:
[`ChatCompletionOutput`] or Iterable of [`ChatCompletionStreamOutput`]:
Generated text returned from the server:
@@ -753,7 +763,7 @@ def chat_completion(
print(chunk.choices[0].delta.content)
```
- Example using a third-party provider directly. Usage will be billed on your Together AI account.
+ Example using a third-party provider directly with extra (provider-specific) parameters. Usage will be billed on your Together AI account.
```py
>>> from huggingface_hub import InferenceClient
>>> client = InferenceClient(
@@ -763,6 +773,7 @@ def chat_completion(
>>> client.chat_completion(
... model="meta-llama/Meta-Llama-3-8B-Instruct",
... messages=[{"role": "user", "content": "What is the capital of France?"}],
+ ... extra_body={"safety_model": "Meta-Llama/Llama-Guard-7b"},
... )
```
@@ -956,6 +967,7 @@ def chat_completion(
"top_p": top_p,
"stream": stream,
"stream_options": stream_options,
+ **(extra_body or {}),
}
request_parameters = provider_helper.prepare_request(
inputs=messages,
@@ -2390,7 +2402,7 @@ def text_to_image(
model: Optional[str] = None,
scheduler: Optional[str] = None,
seed: Optional[int] = None,
- extra_parameters: Optional[Dict[str, Any]] = None,
+ extra_body: Optional[Dict[str, Any]] = None,
) -> "Image":
"""
Generate an image based on a given text using a specified model.
@@ -2401,6 +2413,10 @@ def text_to_image(
+
+ You can pass provider-specific parameters to the model by using the `extra_body` argument.
+
+
Args:
prompt (`str`):
The prompt to generate an image from.
@@ -2424,7 +2440,7 @@ def text_to_image(
Override the scheduler with a compatible one.
seed (`int`, *optional*):
Seed for the random number generator.
- extra_parameters (`Dict[str, Any]`, *optional*):
+ extra_body (`Dict[str, Any]`, *optional*):
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
for supported parameters.
@@ -2490,7 +2506,7 @@ def text_to_image(
>>> image = client.text_to_image(
... "An astronaut riding a horse on the moon.",
... model="black-forest-labs/FLUX.1-schnell",
- ... extra_parameters={"output_quality": 100},
+ ... extra_body={"output_quality": 100},
... )
>>> image.save("astronaut.png")
```
@@ -2506,7 +2522,7 @@ def text_to_image(
"guidance_scale": guidance_scale,
"scheduler": scheduler,
"seed": seed,
- **(extra_parameters or {}),
+ **(extra_body or {}),
},
headers=self.headers,
model=model or self.model,
@@ -2526,11 +2542,15 @@ def text_to_video(
num_frames: Optional[float] = None,
num_inference_steps: Optional[int] = None,
seed: Optional[int] = None,
- extra_parameters: Optional[Dict[str, Any]] = None,
+ extra_body: Optional[Dict[str, Any]] = None,
) -> bytes:
"""
Generate a video based on a given text.
+
+ You can pass provider-specific parameters to the model by using the `extra_body` argument.
+
+
Args:
prompt (`str`):
The prompt to generate a video from.
@@ -2550,7 +2570,7 @@ def text_to_video(
expense of slower inference.
seed (`int`, *optional*):
Seed for the random number generator.
- extra_parameters (`Dict[str, Any]`, *optional*):
+ extra_body (`Dict[str, Any]`, *optional*):
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
for supported parameters.
@@ -2598,7 +2618,7 @@ def text_to_video(
"num_frames": num_frames,
"num_inference_steps": num_inference_steps,
"seed": seed,
- **(extra_parameters or {}),
+ **(extra_body or {}),
},
headers=self.headers,
model=model or self.model,
@@ -2629,11 +2649,15 @@ def text_to_speech(
top_p: Optional[float] = None,
typical_p: Optional[float] = None,
use_cache: Optional[bool] = None,
- extra_parameters: Optional[Dict[str, Any]] = None,
+ extra_body: Optional[Dict[str, Any]] = None,
) -> bytes:
"""
Synthesize an audio of a voice pronouncing a given text.
+
+ You can pass provider-specific parameters to the model by using the `extra_body` argument.
+
+
Args:
text (`str`):
The text to synthesize.
@@ -2687,7 +2711,7 @@ def text_to_speech(
paper](https://hf.co/papers/2202.00666) for more details.
use_cache (`bool`, *optional*):
Whether the model should use the past last key/values attentions to speed up decoding
- extra_parameters (`Dict[str, Any]`, *optional*):
+ extra_body (`Dict[str, Any]`, *optional*):
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
for supported parameters.
Returns:
@@ -2746,7 +2770,7 @@ def text_to_speech(
>>> audio = client.text_to_speech(
... "Hello, my name is Kororo, an awesome text-to-speech model.",
... model="hexgrad/Kokoro-82M",
- ... extra_parameters={"voice": "af_nicole"},
+ ... extra_body={"voice": "af_nicole"},
... )
>>> Path("hello.flac").write_bytes(audio)
```
@@ -2777,7 +2801,7 @@ def text_to_speech(
... model="m-a-p/YuE-s1-7B-anneal-en-cot",
... api_key=...,
... )
- >>> audio = client.text_to_speech(lyrics, extra_parameters={"genres": genres})
+ >>> audio = client.text_to_speech(lyrics, extra_body={"genres": genres})
>>> with open("output.mp3", "wb") as f:
... f.write(audio)
```
@@ -2802,7 +2826,7 @@ def text_to_speech(
"top_p": top_p,
"typical_p": typical_p,
"use_cache": use_cache,
- **(extra_parameters or {}),
+ **(extra_body or {}),
},
headers=self.headers,
model=model or self.model,
diff --git a/src/huggingface_hub/inference/_generated/_async_client.py b/src/huggingface_hub/inference/_generated/_async_client.py
index 5b686edac8..7503f35ac3 100644
--- a/src/huggingface_hub/inference/_generated/_async_client.py
+++ b/src/huggingface_hub/inference/_generated/_async_client.py
@@ -496,6 +496,7 @@ async def automatic_speech_recognition(
audio: ContentT,
*,
model: Optional[str] = None,
+ extra_body: Optional[Dict] = None,
) -> AutomaticSpeechRecognitionOutput:
"""
Perform automatic speech recognition (ASR or audio-to-text) on the given audio content.
@@ -506,6 +507,9 @@ async def automatic_speech_recognition(
model (`str`, *optional*):
The model to use for ASR. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
Inference Endpoint. If not provided, the default recommended model for ASR will be used.
+ extra_body (`Dict`, *optional*):
+ Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
+ for supported parameters.
Returns:
[`AutomaticSpeechRecognitionOutput`]: An item containing the transcribed text and optionally the timestamp chunks.
@@ -527,7 +531,7 @@ async def automatic_speech_recognition(
provider_helper = get_provider_helper(self.provider, task="automatic-speech-recognition")
request_parameters = provider_helper.prepare_request(
inputs=audio,
- parameters={},
+ parameters={**(extra_body or {})},
headers=self.headers,
model=model or self.model,
api_key=self.token,
@@ -558,6 +562,7 @@ async def chat_completion( # type: ignore
tools: Optional[List[ChatCompletionInputTool]] = None,
top_logprobs: Optional[int] = None,
top_p: Optional[float] = None,
+ extra_body: Optional[Dict] = None,
) -> ChatCompletionOutput: ...
@overload
@@ -583,6 +588,7 @@ async def chat_completion( # type: ignore
tools: Optional[List[ChatCompletionInputTool]] = None,
top_logprobs: Optional[int] = None,
top_p: Optional[float] = None,
+ extra_body: Optional[Dict] = None,
) -> AsyncIterable[ChatCompletionStreamOutput]: ...
@overload
@@ -608,6 +614,7 @@ async def chat_completion(
tools: Optional[List[ChatCompletionInputTool]] = None,
top_logprobs: Optional[int] = None,
top_p: Optional[float] = None,
+ extra_body: Optional[Dict] = None,
) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]: ...
async def chat_completion(
@@ -633,6 +640,7 @@ async def chat_completion(
tools: Optional[List[ChatCompletionInputTool]] = None,
top_logprobs: Optional[int] = None,
top_p: Optional[float] = None,
+ extra_body: Optional[Dict] = None,
) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]:
"""
A method for completing conversations using a specified language model.
@@ -647,7 +655,7 @@ async def chat_completion(
- Some parameters might not be supported by some providers.
+ You can pass provider-specific parameters to the model by using the `extra_body` argument.
Args:
@@ -702,7 +710,9 @@ async def chat_completion(
tools (List of [`ChatCompletionInputTool`], *optional*):
A list of tools the model may call. Currently, only functions are supported as a tool. Use this to
provide a list of functions the model may generate JSON inputs for.
-
+ extra_body (`Dict`, *optional*):
+ Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
+ for supported parameters.
Returns:
[`ChatCompletionOutput`] or Iterable of [`ChatCompletionStreamOutput`]:
Generated text returned from the server:
@@ -790,7 +800,7 @@ async def chat_completion(
print(chunk.choices[0].delta.content)
```
- Example using a third-party provider directly. Usage will be billed on your Together AI account.
+ Example using a third-party provider directly with extra (provider-specific) parameters. Usage will be billed on your Together AI account.
```py
>>> from huggingface_hub import InferenceClient
>>> client = InferenceClient(
@@ -800,6 +810,7 @@ async def chat_completion(
>>> client.chat_completion(
... model="meta-llama/Meta-Llama-3-8B-Instruct",
... messages=[{"role": "user", "content": "What is the capital of France?"}],
+ ... extra_body={"safety_model": "Meta-Llama/Llama-Guard-7b"},
... )
```
@@ -996,6 +1007,7 @@ async def chat_completion(
"top_p": top_p,
"stream": stream,
"stream_options": stream_options,
+ **(extra_body or {}),
}
request_parameters = provider_helper.prepare_request(
inputs=messages,
@@ -2446,7 +2458,7 @@ async def text_to_image(
model: Optional[str] = None,
scheduler: Optional[str] = None,
seed: Optional[int] = None,
- extra_parameters: Optional[Dict[str, Any]] = None,
+ extra_body: Optional[Dict[str, Any]] = None,
) -> "Image":
"""
Generate an image based on a given text using a specified model.
@@ -2457,6 +2469,10 @@ async def text_to_image(
+
+ You can pass provider-specific parameters to the model by using the `extra_body` argument.
+
+
Args:
prompt (`str`):
The prompt to generate an image from.
@@ -2480,7 +2496,7 @@ async def text_to_image(
Override the scheduler with a compatible one.
seed (`int`, *optional*):
Seed for the random number generator.
- extra_parameters (`Dict[str, Any]`, *optional*):
+ extra_body (`Dict[str, Any]`, *optional*):
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
for supported parameters.
@@ -2547,7 +2563,7 @@ async def text_to_image(
>>> image = client.text_to_image(
... "An astronaut riding a horse on the moon.",
... model="black-forest-labs/FLUX.1-schnell",
- ... extra_parameters={"output_quality": 100},
+ ... extra_body={"output_quality": 100},
... )
>>> image.save("astronaut.png")
```
@@ -2563,7 +2579,7 @@ async def text_to_image(
"guidance_scale": guidance_scale,
"scheduler": scheduler,
"seed": seed,
- **(extra_parameters or {}),
+ **(extra_body or {}),
},
headers=self.headers,
model=model or self.model,
@@ -2583,11 +2599,15 @@ async def text_to_video(
num_frames: Optional[float] = None,
num_inference_steps: Optional[int] = None,
seed: Optional[int] = None,
- extra_parameters: Optional[Dict[str, Any]] = None,
+ extra_body: Optional[Dict[str, Any]] = None,
) -> bytes:
"""
Generate a video based on a given text.
+
+ You can pass provider-specific parameters to the model by using the `extra_body` argument.
+
+
Args:
prompt (`str`):
The prompt to generate a video from.
@@ -2607,7 +2627,7 @@ async def text_to_video(
expense of slower inference.
seed (`int`, *optional*):
Seed for the random number generator.
- extra_parameters (`Dict[str, Any]`, *optional*):
+ extra_body (`Dict[str, Any]`, *optional*):
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
for supported parameters.
@@ -2655,7 +2675,7 @@ async def text_to_video(
"num_frames": num_frames,
"num_inference_steps": num_inference_steps,
"seed": seed,
- **(extra_parameters or {}),
+ **(extra_body or {}),
},
headers=self.headers,
model=model or self.model,
@@ -2686,11 +2706,15 @@ async def text_to_speech(
top_p: Optional[float] = None,
typical_p: Optional[float] = None,
use_cache: Optional[bool] = None,
- extra_parameters: Optional[Dict[str, Any]] = None,
+ extra_body: Optional[Dict[str, Any]] = None,
) -> bytes:
"""
Synthesize an audio of a voice pronouncing a given text.
+
+ You can pass provider-specific parameters to the model by using the `extra_body` argument.
+
+
Args:
text (`str`):
The text to synthesize.
@@ -2744,7 +2768,7 @@ async def text_to_speech(
paper](https://hf.co/papers/2202.00666) for more details.
use_cache (`bool`, *optional*):
Whether the model should use the past last key/values attentions to speed up decoding
- extra_parameters (`Dict[str, Any]`, *optional*):
+ extra_body (`Dict[str, Any]`, *optional*):
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
for supported parameters.
Returns:
@@ -2804,7 +2828,7 @@ async def text_to_speech(
>>> audio = client.text_to_speech(
... "Hello, my name is Kororo, an awesome text-to-speech model.",
... model="hexgrad/Kokoro-82M",
- ... extra_parameters={"voice": "af_nicole"},
+ ... extra_body={"voice": "af_nicole"},
... )
>>> Path("hello.flac").write_bytes(audio)
```
@@ -2835,7 +2859,7 @@ async def text_to_speech(
... model="m-a-p/YuE-s1-7B-anneal-en-cot",
... api_key=...,
... )
- >>> audio = client.text_to_speech(lyrics, extra_parameters={"genres": genres})
+ >>> audio = client.text_to_speech(lyrics, extra_body={"genres": genres})
>>> with open("output.mp3", "wb") as f:
... f.write(audio)
```
@@ -2860,7 +2884,7 @@ async def text_to_speech(
"top_p": top_p,
"typical_p": typical_p,
"use_cache": use_cache,
- **(extra_parameters or {}),
+ **(extra_body or {}),
},
headers=self.headers,
model=model or self.model,
diff --git a/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py b/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py
index 7e20a8116c..083461f6a9 100644
--- a/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py
+++ b/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py
@@ -99,7 +99,7 @@ class AutomaticSpeechRecognitionInput(BaseInferenceType):
class AutomaticSpeechRecognitionOutputChunk(BaseInferenceType):
text: str
"""A chunk of text identified by the model"""
- timestamps: List[float]
+ timestamp: List[float]
"""The start and end timestamps corresponding with the text"""
diff --git a/src/huggingface_hub/utils/_http.py b/src/huggingface_hub/utils/_http.py
index 243c060460..b116f78d7b 100644
--- a/src/huggingface_hub/utils/_http.py
+++ b/src/huggingface_hub/utils/_http.py
@@ -576,10 +576,10 @@ def _curlify(request: requests.PreparedRequest) -> str:
if request.body:
body = request.body
if isinstance(body, bytes):
- body = body.decode("utf-8")
+ body = body.decode("utf-8", errors="ignore")
if len(body) > 1000:
body = body[:1000] + " ... [truncated]"
- parts += [("-d", body)]
+ parts += [("-d", body.replace("\n", ""))]
parts += [(None, request.url)]
diff --git a/utils/check_task_parameters.py b/utils/check_task_parameters.py
index d732fd821d..cd95a18a4a 100644
--- a/utils/check_task_parameters.py
+++ b/utils/check_task_parameters.py
@@ -90,7 +90,7 @@
"question", # For QA tasks
"context", # For QA tasks
"labels", # For classification tasks
- "extra_parameters", # For extra parameters
+ "extra_body", # For extra parameters
}
#### NODE VISITORS (READING THE CODE)