diff --git a/src/huggingface_hub/inference/_client.py b/src/huggingface_hub/inference/_client.py index 9f2f96d963..932c3aa98e 100644 --- a/src/huggingface_hub/inference/_client.py +++ b/src/huggingface_hub/inference/_client.py @@ -463,6 +463,7 @@ def automatic_speech_recognition( audio: ContentT, *, model: Optional[str] = None, + extra_body: Optional[Dict] = None, ) -> AutomaticSpeechRecognitionOutput: """ Perform automatic speech recognition (ASR or audio-to-text) on the given audio content. @@ -473,6 +474,9 @@ def automatic_speech_recognition( model (`str`, *optional*): The model to use for ASR. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed Inference Endpoint. If not provided, the default recommended model for ASR will be used. + extra_body (`Dict`, *optional*): + Additional provider-specific parameters to pass to the model. Refer to the provider's documentation + for supported parameters. Returns: [`AutomaticSpeechRecognitionOutput`]: An item containing the transcribed text and optionally the timestamp chunks. @@ -493,7 +497,7 @@ def automatic_speech_recognition( provider_helper = get_provider_helper(self.provider, task="automatic-speech-recognition") request_parameters = provider_helper.prepare_request( inputs=audio, - parameters={}, + parameters={**(extra_body or {})}, headers=self.headers, model=model or self.model, api_key=self.token, @@ -524,6 +528,7 @@ def chat_completion( # type: ignore tools: Optional[List[ChatCompletionInputTool]] = None, top_logprobs: Optional[int] = None, top_p: Optional[float] = None, + extra_body: Optional[Dict] = None, ) -> ChatCompletionOutput: ... @overload @@ -549,6 +554,7 @@ def chat_completion( # type: ignore tools: Optional[List[ChatCompletionInputTool]] = None, top_logprobs: Optional[int] = None, top_p: Optional[float] = None, + extra_body: Optional[Dict] = None, ) -> Iterable[ChatCompletionStreamOutput]: ... @overload @@ -574,6 +580,7 @@ def chat_completion( tools: Optional[List[ChatCompletionInputTool]] = None, top_logprobs: Optional[int] = None, top_p: Optional[float] = None, + extra_body: Optional[Dict] = None, ) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]: ... def chat_completion( @@ -599,6 +606,7 @@ def chat_completion( tools: Optional[List[ChatCompletionInputTool]] = None, top_logprobs: Optional[int] = None, top_p: Optional[float] = None, + extra_body: Optional[Dict] = None, ) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]: """ A method for completing conversations using a specified language model. @@ -613,7 +621,7 @@ def chat_completion( - Some parameters might not be supported by some providers. + You can pass provider-specific parameters to the model by using the `extra_body` argument. Args: @@ -668,7 +676,9 @@ def chat_completion( tools (List of [`ChatCompletionInputTool`], *optional*): A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for. - + extra_body (`Dict`, *optional*): + Additional provider-specific parameters to pass to the model. Refer to the provider's documentation + for supported parameters. Returns: [`ChatCompletionOutput`] or Iterable of [`ChatCompletionStreamOutput`]: Generated text returned from the server: @@ -753,7 +763,7 @@ def chat_completion( print(chunk.choices[0].delta.content) ``` - Example using a third-party provider directly. Usage will be billed on your Together AI account. + Example using a third-party provider directly with extra (provider-specific) parameters. Usage will be billed on your Together AI account. ```py >>> from huggingface_hub import InferenceClient >>> client = InferenceClient( @@ -763,6 +773,7 @@ def chat_completion( >>> client.chat_completion( ... model="meta-llama/Meta-Llama-3-8B-Instruct", ... messages=[{"role": "user", "content": "What is the capital of France?"}], + ... extra_body={"safety_model": "Meta-Llama/Llama-Guard-7b"}, ... ) ``` @@ -956,6 +967,7 @@ def chat_completion( "top_p": top_p, "stream": stream, "stream_options": stream_options, + **(extra_body or {}), } request_parameters = provider_helper.prepare_request( inputs=messages, @@ -2390,7 +2402,7 @@ def text_to_image( model: Optional[str] = None, scheduler: Optional[str] = None, seed: Optional[int] = None, - extra_parameters: Optional[Dict[str, Any]] = None, + extra_body: Optional[Dict[str, Any]] = None, ) -> "Image": """ Generate an image based on a given text using a specified model. @@ -2401,6 +2413,10 @@ def text_to_image( + + You can pass provider-specific parameters to the model by using the `extra_body` argument. + + Args: prompt (`str`): The prompt to generate an image from. @@ -2424,7 +2440,7 @@ def text_to_image( Override the scheduler with a compatible one. seed (`int`, *optional*): Seed for the random number generator. - extra_parameters (`Dict[str, Any]`, *optional*): + extra_body (`Dict[str, Any]`, *optional*): Additional provider-specific parameters to pass to the model. Refer to the provider's documentation for supported parameters. @@ -2490,7 +2506,7 @@ def text_to_image( >>> image = client.text_to_image( ... "An astronaut riding a horse on the moon.", ... model="black-forest-labs/FLUX.1-schnell", - ... extra_parameters={"output_quality": 100}, + ... extra_body={"output_quality": 100}, ... ) >>> image.save("astronaut.png") ``` @@ -2506,7 +2522,7 @@ def text_to_image( "guidance_scale": guidance_scale, "scheduler": scheduler, "seed": seed, - **(extra_parameters or {}), + **(extra_body or {}), }, headers=self.headers, model=model or self.model, @@ -2526,11 +2542,15 @@ def text_to_video( num_frames: Optional[float] = None, num_inference_steps: Optional[int] = None, seed: Optional[int] = None, - extra_parameters: Optional[Dict[str, Any]] = None, + extra_body: Optional[Dict[str, Any]] = None, ) -> bytes: """ Generate a video based on a given text. + + You can pass provider-specific parameters to the model by using the `extra_body` argument. + + Args: prompt (`str`): The prompt to generate a video from. @@ -2550,7 +2570,7 @@ def text_to_video( expense of slower inference. seed (`int`, *optional*): Seed for the random number generator. - extra_parameters (`Dict[str, Any]`, *optional*): + extra_body (`Dict[str, Any]`, *optional*): Additional provider-specific parameters to pass to the model. Refer to the provider's documentation for supported parameters. @@ -2598,7 +2618,7 @@ def text_to_video( "num_frames": num_frames, "num_inference_steps": num_inference_steps, "seed": seed, - **(extra_parameters or {}), + **(extra_body or {}), }, headers=self.headers, model=model or self.model, @@ -2629,11 +2649,15 @@ def text_to_speech( top_p: Optional[float] = None, typical_p: Optional[float] = None, use_cache: Optional[bool] = None, - extra_parameters: Optional[Dict[str, Any]] = None, + extra_body: Optional[Dict[str, Any]] = None, ) -> bytes: """ Synthesize an audio of a voice pronouncing a given text. + + You can pass provider-specific parameters to the model by using the `extra_body` argument. + + Args: text (`str`): The text to synthesize. @@ -2687,7 +2711,7 @@ def text_to_speech( paper](https://hf.co/papers/2202.00666) for more details. use_cache (`bool`, *optional*): Whether the model should use the past last key/values attentions to speed up decoding - extra_parameters (`Dict[str, Any]`, *optional*): + extra_body (`Dict[str, Any]`, *optional*): Additional provider-specific parameters to pass to the model. Refer to the provider's documentation for supported parameters. Returns: @@ -2746,7 +2770,7 @@ def text_to_speech( >>> audio = client.text_to_speech( ... "Hello, my name is Kororo, an awesome text-to-speech model.", ... model="hexgrad/Kokoro-82M", - ... extra_parameters={"voice": "af_nicole"}, + ... extra_body={"voice": "af_nicole"}, ... ) >>> Path("hello.flac").write_bytes(audio) ``` @@ -2777,7 +2801,7 @@ def text_to_speech( ... model="m-a-p/YuE-s1-7B-anneal-en-cot", ... api_key=..., ... ) - >>> audio = client.text_to_speech(lyrics, extra_parameters={"genres": genres}) + >>> audio = client.text_to_speech(lyrics, extra_body={"genres": genres}) >>> with open("output.mp3", "wb") as f: ... f.write(audio) ``` @@ -2802,7 +2826,7 @@ def text_to_speech( "top_p": top_p, "typical_p": typical_p, "use_cache": use_cache, - **(extra_parameters or {}), + **(extra_body or {}), }, headers=self.headers, model=model or self.model, diff --git a/src/huggingface_hub/inference/_generated/_async_client.py b/src/huggingface_hub/inference/_generated/_async_client.py index 5b686edac8..7503f35ac3 100644 --- a/src/huggingface_hub/inference/_generated/_async_client.py +++ b/src/huggingface_hub/inference/_generated/_async_client.py @@ -496,6 +496,7 @@ async def automatic_speech_recognition( audio: ContentT, *, model: Optional[str] = None, + extra_body: Optional[Dict] = None, ) -> AutomaticSpeechRecognitionOutput: """ Perform automatic speech recognition (ASR or audio-to-text) on the given audio content. @@ -506,6 +507,9 @@ async def automatic_speech_recognition( model (`str`, *optional*): The model to use for ASR. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed Inference Endpoint. If not provided, the default recommended model for ASR will be used. + extra_body (`Dict`, *optional*): + Additional provider-specific parameters to pass to the model. Refer to the provider's documentation + for supported parameters. Returns: [`AutomaticSpeechRecognitionOutput`]: An item containing the transcribed text and optionally the timestamp chunks. @@ -527,7 +531,7 @@ async def automatic_speech_recognition( provider_helper = get_provider_helper(self.provider, task="automatic-speech-recognition") request_parameters = provider_helper.prepare_request( inputs=audio, - parameters={}, + parameters={**(extra_body or {})}, headers=self.headers, model=model or self.model, api_key=self.token, @@ -558,6 +562,7 @@ async def chat_completion( # type: ignore tools: Optional[List[ChatCompletionInputTool]] = None, top_logprobs: Optional[int] = None, top_p: Optional[float] = None, + extra_body: Optional[Dict] = None, ) -> ChatCompletionOutput: ... @overload @@ -583,6 +588,7 @@ async def chat_completion( # type: ignore tools: Optional[List[ChatCompletionInputTool]] = None, top_logprobs: Optional[int] = None, top_p: Optional[float] = None, + extra_body: Optional[Dict] = None, ) -> AsyncIterable[ChatCompletionStreamOutput]: ... @overload @@ -608,6 +614,7 @@ async def chat_completion( tools: Optional[List[ChatCompletionInputTool]] = None, top_logprobs: Optional[int] = None, top_p: Optional[float] = None, + extra_body: Optional[Dict] = None, ) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]: ... async def chat_completion( @@ -633,6 +640,7 @@ async def chat_completion( tools: Optional[List[ChatCompletionInputTool]] = None, top_logprobs: Optional[int] = None, top_p: Optional[float] = None, + extra_body: Optional[Dict] = None, ) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]: """ A method for completing conversations using a specified language model. @@ -647,7 +655,7 @@ async def chat_completion( - Some parameters might not be supported by some providers. + You can pass provider-specific parameters to the model by using the `extra_body` argument. Args: @@ -702,7 +710,9 @@ async def chat_completion( tools (List of [`ChatCompletionInputTool`], *optional*): A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for. - + extra_body (`Dict`, *optional*): + Additional provider-specific parameters to pass to the model. Refer to the provider's documentation + for supported parameters. Returns: [`ChatCompletionOutput`] or Iterable of [`ChatCompletionStreamOutput`]: Generated text returned from the server: @@ -790,7 +800,7 @@ async def chat_completion( print(chunk.choices[0].delta.content) ``` - Example using a third-party provider directly. Usage will be billed on your Together AI account. + Example using a third-party provider directly with extra (provider-specific) parameters. Usage will be billed on your Together AI account. ```py >>> from huggingface_hub import InferenceClient >>> client = InferenceClient( @@ -800,6 +810,7 @@ async def chat_completion( >>> client.chat_completion( ... model="meta-llama/Meta-Llama-3-8B-Instruct", ... messages=[{"role": "user", "content": "What is the capital of France?"}], + ... extra_body={"safety_model": "Meta-Llama/Llama-Guard-7b"}, ... ) ``` @@ -996,6 +1007,7 @@ async def chat_completion( "top_p": top_p, "stream": stream, "stream_options": stream_options, + **(extra_body or {}), } request_parameters = provider_helper.prepare_request( inputs=messages, @@ -2446,7 +2458,7 @@ async def text_to_image( model: Optional[str] = None, scheduler: Optional[str] = None, seed: Optional[int] = None, - extra_parameters: Optional[Dict[str, Any]] = None, + extra_body: Optional[Dict[str, Any]] = None, ) -> "Image": """ Generate an image based on a given text using a specified model. @@ -2457,6 +2469,10 @@ async def text_to_image( + + You can pass provider-specific parameters to the model by using the `extra_body` argument. + + Args: prompt (`str`): The prompt to generate an image from. @@ -2480,7 +2496,7 @@ async def text_to_image( Override the scheduler with a compatible one. seed (`int`, *optional*): Seed for the random number generator. - extra_parameters (`Dict[str, Any]`, *optional*): + extra_body (`Dict[str, Any]`, *optional*): Additional provider-specific parameters to pass to the model. Refer to the provider's documentation for supported parameters. @@ -2547,7 +2563,7 @@ async def text_to_image( >>> image = client.text_to_image( ... "An astronaut riding a horse on the moon.", ... model="black-forest-labs/FLUX.1-schnell", - ... extra_parameters={"output_quality": 100}, + ... extra_body={"output_quality": 100}, ... ) >>> image.save("astronaut.png") ``` @@ -2563,7 +2579,7 @@ async def text_to_image( "guidance_scale": guidance_scale, "scheduler": scheduler, "seed": seed, - **(extra_parameters or {}), + **(extra_body or {}), }, headers=self.headers, model=model or self.model, @@ -2583,11 +2599,15 @@ async def text_to_video( num_frames: Optional[float] = None, num_inference_steps: Optional[int] = None, seed: Optional[int] = None, - extra_parameters: Optional[Dict[str, Any]] = None, + extra_body: Optional[Dict[str, Any]] = None, ) -> bytes: """ Generate a video based on a given text. + + You can pass provider-specific parameters to the model by using the `extra_body` argument. + + Args: prompt (`str`): The prompt to generate a video from. @@ -2607,7 +2627,7 @@ async def text_to_video( expense of slower inference. seed (`int`, *optional*): Seed for the random number generator. - extra_parameters (`Dict[str, Any]`, *optional*): + extra_body (`Dict[str, Any]`, *optional*): Additional provider-specific parameters to pass to the model. Refer to the provider's documentation for supported parameters. @@ -2655,7 +2675,7 @@ async def text_to_video( "num_frames": num_frames, "num_inference_steps": num_inference_steps, "seed": seed, - **(extra_parameters or {}), + **(extra_body or {}), }, headers=self.headers, model=model or self.model, @@ -2686,11 +2706,15 @@ async def text_to_speech( top_p: Optional[float] = None, typical_p: Optional[float] = None, use_cache: Optional[bool] = None, - extra_parameters: Optional[Dict[str, Any]] = None, + extra_body: Optional[Dict[str, Any]] = None, ) -> bytes: """ Synthesize an audio of a voice pronouncing a given text. + + You can pass provider-specific parameters to the model by using the `extra_body` argument. + + Args: text (`str`): The text to synthesize. @@ -2744,7 +2768,7 @@ async def text_to_speech( paper](https://hf.co/papers/2202.00666) for more details. use_cache (`bool`, *optional*): Whether the model should use the past last key/values attentions to speed up decoding - extra_parameters (`Dict[str, Any]`, *optional*): + extra_body (`Dict[str, Any]`, *optional*): Additional provider-specific parameters to pass to the model. Refer to the provider's documentation for supported parameters. Returns: @@ -2804,7 +2828,7 @@ async def text_to_speech( >>> audio = client.text_to_speech( ... "Hello, my name is Kororo, an awesome text-to-speech model.", ... model="hexgrad/Kokoro-82M", - ... extra_parameters={"voice": "af_nicole"}, + ... extra_body={"voice": "af_nicole"}, ... ) >>> Path("hello.flac").write_bytes(audio) ``` @@ -2835,7 +2859,7 @@ async def text_to_speech( ... model="m-a-p/YuE-s1-7B-anneal-en-cot", ... api_key=..., ... ) - >>> audio = client.text_to_speech(lyrics, extra_parameters={"genres": genres}) + >>> audio = client.text_to_speech(lyrics, extra_body={"genres": genres}) >>> with open("output.mp3", "wb") as f: ... f.write(audio) ``` @@ -2860,7 +2884,7 @@ async def text_to_speech( "top_p": top_p, "typical_p": typical_p, "use_cache": use_cache, - **(extra_parameters or {}), + **(extra_body or {}), }, headers=self.headers, model=model or self.model, diff --git a/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py b/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py index 7e20a8116c..083461f6a9 100644 --- a/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +++ b/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py @@ -99,7 +99,7 @@ class AutomaticSpeechRecognitionInput(BaseInferenceType): class AutomaticSpeechRecognitionOutputChunk(BaseInferenceType): text: str """A chunk of text identified by the model""" - timestamps: List[float] + timestamp: List[float] """The start and end timestamps corresponding with the text""" diff --git a/src/huggingface_hub/utils/_http.py b/src/huggingface_hub/utils/_http.py index 243c060460..b116f78d7b 100644 --- a/src/huggingface_hub/utils/_http.py +++ b/src/huggingface_hub/utils/_http.py @@ -576,10 +576,10 @@ def _curlify(request: requests.PreparedRequest) -> str: if request.body: body = request.body if isinstance(body, bytes): - body = body.decode("utf-8") + body = body.decode("utf-8", errors="ignore") if len(body) > 1000: body = body[:1000] + " ... [truncated]" - parts += [("-d", body)] + parts += [("-d", body.replace("\n", ""))] parts += [(None, request.url)] diff --git a/utils/check_task_parameters.py b/utils/check_task_parameters.py index d732fd821d..cd95a18a4a 100644 --- a/utils/check_task_parameters.py +++ b/utils/check_task_parameters.py @@ -90,7 +90,7 @@ "question", # For QA tasks "context", # For QA tasks "labels", # For classification tasks - "extra_parameters", # For extra parameters + "extra_body", # For extra parameters } #### NODE VISITORS (READING THE CODE)