-
Notifications
You must be signed in to change notification settings - Fork 633
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[InferenceClient] Better handling of task parameters #2812
Changes from 10 commits
ea3245e
1aa5558
f8c673b
169205c
305c720
d927057
57d2ae2
bf96fde
9817c12
958a426
c537251
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -92,7 +92,6 @@ | |
TextGenerationInputGrammarType, | ||
TextGenerationOutput, | ||
TextGenerationStreamOutput, | ||
TextToImageTargetSize, | ||
TextToSpeechEarlyStoppingEnum, | ||
TokenClassificationAggregationStrategy, | ||
TokenClassificationOutputElement, | ||
|
@@ -474,8 +473,6 @@ def automatic_speech_recognition( | |
model (`str`, *optional*): | ||
The model to use for ASR. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed | ||
Inference Endpoint. If not provided, the default recommended model for ASR will be used. | ||
parameters (Dict[str, Any], *optional*): | ||
Additional parameters to pass to the model. | ||
Returns: | ||
[`AutomaticSpeechRecognitionOutput`]: An item containing the transcribed text and optionally the timestamp chunks. | ||
|
||
|
@@ -2392,9 +2389,8 @@ def text_to_image( | |
guidance_scale: Optional[float] = None, | ||
model: Optional[str] = None, | ||
scheduler: Optional[str] = None, | ||
target_size: Optional[TextToImageTargetSize] = None, | ||
seed: Optional[int] = None, | ||
**kwargs, | ||
extra_parameters: Optional[Dict[str, Any]] = None, | ||
) -> "Image": | ||
""" | ||
Generate an image based on a given text using a specified model. | ||
|
@@ -2426,10 +2422,11 @@ def text_to_image( | |
Defaults to None. | ||
scheduler (`str`, *optional*): | ||
Override the scheduler with a compatible one. | ||
target_size (`TextToImageTargetSize`, *optional*): | ||
The size in pixel of the output image | ||
seed (`int`, *optional*): | ||
Seed for the random number generator. | ||
extra_parameters (`Dict[str, Any]`, *optional*): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we have a good example of how to use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yep, added in 305c720 |
||
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation | ||
for supported parameters. | ||
|
||
Returns: | ||
`Image`: The generated image. | ||
|
@@ -2482,6 +2479,21 @@ def text_to_image( | |
... ) | ||
>>> image.save("astronaut.png") | ||
``` | ||
|
||
Example using Replicate provider with extra parameters | ||
```py | ||
>>> from huggingface_hub import InferenceClient | ||
>>> client = InferenceClient( | ||
... provider="replicate", # Use replicate provider | ||
... api_key="hf_...", # Pass your HF token | ||
... ) | ||
>>> image = client.text_to_image( | ||
... "An astronaut riding a horse on the moon.", | ||
... model="black-forest-labs/FLUX.1-schnell", | ||
... extra_parameters={"output_quality": 100}, | ||
... ) | ||
>>> image.save("astronaut.png") | ||
``` | ||
""" | ||
provider_helper = get_provider_helper(self.provider, task="text-to-image") | ||
request_parameters = provider_helper.prepare_request( | ||
|
@@ -2493,9 +2505,8 @@ def text_to_image( | |
"num_inference_steps": num_inference_steps, | ||
"guidance_scale": guidance_scale, | ||
"scheduler": scheduler, | ||
"target_size": target_size, | ||
"seed": seed, | ||
**kwargs, | ||
**(extra_parameters or {}), | ||
}, | ||
headers=self.headers, | ||
model=model or self.model, | ||
|
@@ -2515,6 +2526,7 @@ def text_to_video( | |
num_frames: Optional[float] = None, | ||
num_inference_steps: Optional[int] = None, | ||
seed: Optional[int] = None, | ||
extra_parameters: Optional[Dict[str, Any]] = None, | ||
) -> bytes: | ||
""" | ||
Generate a video based on a given text. | ||
|
@@ -2538,6 +2550,9 @@ def text_to_video( | |
expense of slower inference. | ||
seed (`int`, *optional*): | ||
Seed for the random number generator. | ||
extra_parameters (`Dict[str, Any]`, *optional*): | ||
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation | ||
for supported parameters. | ||
|
||
Returns: | ||
`bytes`: The generated video. | ||
|
@@ -2583,6 +2598,7 @@ def text_to_video( | |
"num_frames": num_frames, | ||
"num_inference_steps": num_inference_steps, | ||
"seed": seed, | ||
**(extra_parameters or {}), | ||
}, | ||
headers=self.headers, | ||
model=model or self.model, | ||
|
@@ -2613,6 +2629,7 @@ def text_to_speech( | |
top_p: Optional[float] = None, | ||
typical_p: Optional[float] = None, | ||
use_cache: Optional[bool] = None, | ||
extra_parameters: Optional[Dict[str, Any]] = None, | ||
) -> bytes: | ||
""" | ||
Synthesize an audio of a voice pronouncing a given text. | ||
|
@@ -2670,7 +2687,9 @@ def text_to_speech( | |
paper](https://hf.co/papers/2202.00666) for more details. | ||
use_cache (`bool`, *optional*): | ||
Whether the model should use the past last key/values attentions to speed up decoding | ||
|
||
extra_parameters (`Dict[str, Any]`, *optional*): | ||
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation | ||
for supported parameters. | ||
Returns: | ||
`bytes`: The generated audio. | ||
|
||
|
@@ -2717,6 +2736,20 @@ def text_to_speech( | |
... ) | ||
>>> Path("hello_world.flac").write_bytes(audio) | ||
``` | ||
Example using Replicate provider with extra parameters | ||
```py | ||
>>> from huggingface_hub import InferenceClient | ||
>>> client = InferenceClient( | ||
... provider="replicate", # Use replicate provider | ||
... api_key="hf_...", # Pass your HF token | ||
... ) | ||
>>> audio = client.text_to_speech( | ||
... "Hello, my name is Kororo, an awesome text-to-speech model.", | ||
... model="hexgrad/Kokoro-82M", | ||
... extra_parameters={"voice": "af_nicole"}, | ||
... ) | ||
>>> Path("hello.flac").write_bytes(audio) | ||
``` | ||
""" | ||
provider_helper = get_provider_helper(self.provider, task="text-to-speech") | ||
request_parameters = provider_helper.prepare_request( | ||
|
@@ -2738,6 +2771,7 @@ def text_to_speech( | |
"top_p": top_p, | ||
"typical_p": typical_p, | ||
"use_cache": use_cache, | ||
**(extra_parameters or {}), | ||
}, | ||
headers=self.headers, | ||
model=model or self.model, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -134,7 +134,7 @@ def __init__(self): | |
|
||
def _prepare_payload(self, inputs: Any, parameters: Dict[str, Any]) -> Dict[str, Any]: | ||
parameters = {k: v for k, v in parameters.items() if v is not None} | ||
if "image_size" not in parameters and "width" in parameters and "height" in parameters: | ||
if "width" in parameters and "height" in parameters: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what if only one if passed btw? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we should be able to send only one if specified, the other one would be set to the default value. I'll fix that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no actually for fal-ai, you either send both, or neither. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there is no default values for each one of them, according to their documentation There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok thanks for checking 👍 |
||
parameters["image_size"] = { | ||
"width": parameters.pop("width"), | ||
"height": parameters.pop("height"), | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a breaking change but hopefully totally fine
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i'll mention this in the next release notes! but it should be fine, if users were previously using
text_to_image
with the HF Inference API, this shouldn't be an issue since all API parameters were exposed as explicit method argumentsThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes exactly