Merge branch 'dev/header' of github.com:narugo1992/huggingface_hub in…

…to dev/header
huggingface · Feb 4, 2025 · fe5211f · fe5211f
2 parents 62c9307 + 67692f5
commit fe5211f
Show file tree

Hide file tree

Showing 5 changed files with 84 additions and 36 deletions.
diff --git a/src/huggingface_hub/inference/_client.py b/src/huggingface_hub/inference/_client.py
@@ -463,6 +463,7 @@ def automatic_speech_recognition(
         audio: ContentT,
         *,
         model: Optional[str] = None,
+        extra_body: Optional[Dict] = None,
     ) -> AutomaticSpeechRecognitionOutput:
         """
         Perform automatic speech recognition (ASR or audio-to-text) on the given audio content.
@@ -473,6 +474,9 @@ def automatic_speech_recognition(
             model (`str`, *optional*):
                 The model to use for ASR. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
                 Inference Endpoint. If not provided, the default recommended model for ASR will be used.
+            extra_body (`Dict`, *optional*):
+                Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
+                for supported parameters.
         Returns:
             [`AutomaticSpeechRecognitionOutput`]: An item containing the transcribed text and optionally the timestamp chunks.
 
@@ -493,7 +497,7 @@ def automatic_speech_recognition(
         provider_helper = get_provider_helper(self.provider, task="automatic-speech-recognition")
         request_parameters = provider_helper.prepare_request(
             inputs=audio,
-            parameters={},
+            parameters={**(extra_body or {})},
             headers=self.headers,
             model=model or self.model,
             api_key=self.token,
@@ -524,6 +528,7 @@ def chat_completion(  # type: ignore
         tools: Optional[List[ChatCompletionInputTool]] = None,
         top_logprobs: Optional[int] = None,
         top_p: Optional[float] = None,
+        extra_body: Optional[Dict] = None,
     ) -> ChatCompletionOutput: ...
 
     @overload
@@ -549,6 +554,7 @@ def chat_completion(  # type: ignore
         tools: Optional[List[ChatCompletionInputTool]] = None,
         top_logprobs: Optional[int] = None,
         top_p: Optional[float] = None,
+        extra_body: Optional[Dict] = None,
     ) -> Iterable[ChatCompletionStreamOutput]: ...
 
     @overload
@@ -574,6 +580,7 @@ def chat_completion(
         tools: Optional[List[ChatCompletionInputTool]] = None,
         top_logprobs: Optional[int] = None,
         top_p: Optional[float] = None,
+        extra_body: Optional[Dict] = None,
     ) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]: ...
 
     def chat_completion(
@@ -599,6 +606,7 @@ def chat_completion(
         tools: Optional[List[ChatCompletionInputTool]] = None,
         top_logprobs: Optional[int] = None,
         top_p: Optional[float] = None,
+        extra_body: Optional[Dict] = None,
     ) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]:
         """
         A method for completing conversations using a specified language model.
@@ -613,7 +621,7 @@ def chat_completion(
         </Tip>
 
         <Tip>
-        Some parameters might not be supported by some providers.
+        You can pass provider-specific parameters to the model by using the `extra_body` argument.
         </Tip>
 
         Args:
@@ -668,7 +676,9 @@ def chat_completion(
             tools (List of [`ChatCompletionInputTool`], *optional*):
                 A list of tools the model may call. Currently, only functions are supported as a tool. Use this to
                 provide a list of functions the model may generate JSON inputs for.
-
+            extra_body (`Dict`, *optional*):
+                Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
+                for supported parameters.
         Returns:
             [`ChatCompletionOutput`] or Iterable of [`ChatCompletionStreamOutput`]:
             Generated text returned from the server:
@@ -753,7 +763,7 @@ def chat_completion(
             print(chunk.choices[0].delta.content)
         ```
 
-        Example using a third-party provider directly. Usage will be billed on your Together AI account.
+        Example using a third-party provider directly with extra (provider-specific) parameters. Usage will be billed on your Together AI account.
         ```py
         >>> from huggingface_hub import InferenceClient
         >>> client = InferenceClient(
@@ -763,6 +773,7 @@ def chat_completion(
         >>> client.chat_completion(
         ...     model="meta-llama/Meta-Llama-3-8B-Instruct",
         ...     messages=[{"role": "user", "content": "What is the capital of France?"}],
+        ...     extra_body={"safety_model": "Meta-Llama/Llama-Guard-7b"},
         ... )
         ```
 
@@ -956,6 +967,7 @@ def chat_completion(
             "top_p": top_p,
             "stream": stream,
             "stream_options": stream_options,
+            **(extra_body or {}),
         }
         request_parameters = provider_helper.prepare_request(
             inputs=messages,
@@ -2390,7 +2402,7 @@ def text_to_image(
         model: Optional[str] = None,
         scheduler: Optional[str] = None,
         seed: Optional[int] = None,
-        extra_parameters: Optional[Dict[str, Any]] = None,
+        extra_body: Optional[Dict[str, Any]] = None,
     ) -> "Image":
         """
         Generate an image based on a given text using a specified model.
@@ -2401,6 +2413,10 @@ def text_to_image(
 
         </Tip>
 
+        <Tip>
+        You can pass provider-specific parameters to the model by using the `extra_body` argument.
+        </Tip>
+
         Args:
             prompt (`str`):
                 The prompt to generate an image from.
@@ -2424,7 +2440,7 @@ def text_to_image(
                 Override the scheduler with a compatible one.
             seed (`int`, *optional*):
                 Seed for the random number generator.
-            extra_parameters (`Dict[str, Any]`, *optional*):
+            extra_body (`Dict[str, Any]`, *optional*):
                 Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
                 for supported parameters.
 
@@ -2490,7 +2506,7 @@ def text_to_image(
         >>> image = client.text_to_image(
         ...     "An astronaut riding a horse on the moon.",
         ...     model="black-forest-labs/FLUX.1-schnell",
-        ...     extra_parameters={"output_quality": 100},
+        ...     extra_body={"output_quality": 100},
         ... )
         >>> image.save("astronaut.png")
         ```
@@ -2506,7 +2522,7 @@ def text_to_image(
                 "guidance_scale": guidance_scale,
                 "scheduler": scheduler,
                 "seed": seed,
-                **(extra_parameters or {}),
+                **(extra_body or {}),
             },
             headers=self.headers,
             model=model or self.model,
@@ -2526,11 +2542,15 @@ def text_to_video(
         num_frames: Optional[float] = None,
         num_inference_steps: Optional[int] = None,
         seed: Optional[int] = None,
-        extra_parameters: Optional[Dict[str, Any]] = None,
+        extra_body: Optional[Dict[str, Any]] = None,
     ) -> bytes:
         """
         Generate a video based on a given text.
 
+        <Tip>
+        You can pass provider-specific parameters to the model by using the `extra_body` argument.
+        </Tip>
+
         Args:
             prompt (`str`):
                 The prompt to generate a video from.
@@ -2550,7 +2570,7 @@ def text_to_video(
                 expense of slower inference.
             seed (`int`, *optional*):
                 Seed for the random number generator.
-            extra_parameters (`Dict[str, Any]`, *optional*):
+            extra_body (`Dict[str, Any]`, *optional*):
                 Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
                 for supported parameters.
 
@@ -2598,7 +2618,7 @@ def text_to_video(
                 "num_frames": num_frames,
                 "num_inference_steps": num_inference_steps,
                 "seed": seed,
-                **(extra_parameters or {}),
+                **(extra_body or {}),
             },
             headers=self.headers,
             model=model or self.model,
@@ -2629,11 +2649,15 @@ def text_to_speech(
         top_p: Optional[float] = None,
         typical_p: Optional[float] = None,
         use_cache: Optional[bool] = None,
-        extra_parameters: Optional[Dict[str, Any]] = None,
+        extra_body: Optional[Dict[str, Any]] = None,
     ) -> bytes:
         """
         Synthesize an audio of a voice pronouncing a given text.
 
+        <Tip>
+        You can pass provider-specific parameters to the model by using the `extra_body` argument.
+        </Tip>
+
         Args:
             text (`str`):
                 The text to synthesize.
@@ -2687,7 +2711,7 @@ def text_to_speech(
                 paper](https://hf.co/papers/2202.00666) for more details.
             use_cache (`bool`, *optional*):
                 Whether the model should use the past last key/values attentions to speed up decoding
-            extra_parameters (`Dict[str, Any]`, *optional*):
+            extra_body (`Dict[str, Any]`, *optional*):
                 Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
                 for supported parameters.
         Returns:
@@ -2746,7 +2770,7 @@ def text_to_speech(
         >>> audio = client.text_to_speech(
         ...     "Hello, my name is Kororo, an awesome text-to-speech model.",
         ...     model="hexgrad/Kokoro-82M",
-        ...     extra_parameters={"voice": "af_nicole"},
+        ...     extra_body={"voice": "af_nicole"},
         ... )
         >>> Path("hello.flac").write_bytes(audio)
         ```
@@ -2777,7 +2801,7 @@ def text_to_speech(
         ...     model="m-a-p/YuE-s1-7B-anneal-en-cot",
         ...     api_key=...,
         ... )
-        >>> audio = client.text_to_speech(lyrics, extra_parameters={"genres": genres})
+        >>> audio = client.text_to_speech(lyrics, extra_body={"genres": genres})
         >>> with open("output.mp3", "wb") as f:
         ...     f.write(audio)
         ```
@@ -2802,7 +2826,7 @@ def text_to_speech(
                 "top_p": top_p,
                 "typical_p": typical_p,
                 "use_cache": use_cache,
-                **(extra_parameters or {}),
+                **(extra_body or {}),
             },
             headers=self.headers,
             model=model or self.model,