huggingface · Wauplin · Feb 12, 2025 · Feb 5, 2025 · Feb 5, 2025 · Feb 5, 2025
diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py
@@ -153,38 +153,38 @@
     "model-index",
     "pipeline_tag",
     "private",
+    "resourceGroup",
     "safetensors",
     "sha",
     "siblings",
     "spaces",
     "tags",
     "transformersInfo",
     "trendingScore",
-    "widgetData",
     "usedStorage",
-    "resourceGroup",
+    "widgetData",
 ]
 
 ExpandDatasetProperty_T = Literal[
     "author",
     "cardData",
     "citation",
     "createdAt",
-    "disabled",
     "description",
+    "disabled",
     "downloads",
     "downloadsAllTime",
     "gated",
     "lastModified",
     "likes",
     "paperswithcode_id",
     "private",
-    "siblings",
+    "resourceGroup",
     "sha",
-    "trendingScore",
+    "siblings",
     "tags",
+    "trendingScore",
     "usedStorage",
-    "resourceGroup",
 ]
 
 ExpandSpaceProperty_T = Literal[
@@ -197,15 +197,15 @@
     "likes",
     "models",
     "private",
+    "resourceGroup",
     "runtime",
     "sdk",
-    "siblings",
     "sha",
+    "siblings",
     "subdomain",
     "tags",
     "trendingScore",
     "usedStorage",
-    "resourceGroup",
 ]
 
 USERNAME_PLACEHOLDER = "hf_user"
@@ -698,6 +698,19 @@ def __init__(self, **kwargs):
         self.last_commit = last_commit
 
 
+@dataclass
+class InferenceProviderMapping:
+    status: Literal["live", "staging"]
+    provider_id: str
+    task: str
+
+    def __init__(self, **kwargs):
+        self.status = kwargs.pop("status")
+        self.provider_id = kwargs.pop("providerId")
+        self.task = kwargs.pop("task")
+        self.__dict__.update(**kwargs)
+
+
 @dataclass
 class ModelInfo:
     """
@@ -740,6 +753,8 @@ class ModelInfo:
             Status of the model on the inference API.
             Warm models are available for immediate use. Cold models will be loaded on first inference call.
             Frozen models are not available in Inference API.
+        inference_provider_mapping (`Dict`, *optional*):
+            Model's inference provider mapping.
         likes (`int`):
             Number of likes of the model.
         library_name (`str`, *optional*):
@@ -785,6 +800,7 @@ class ModelInfo:
     gated: Optional[Literal["auto", "manual", False]]
     gguf: Optional[Dict]
     inference: Optional[Literal["warm", "cold", "frozen"]]
+    inference_provider_mapping: Optional[Dict[str, InferenceProviderMapping]]
     likes: Optional[int]
     library_name: Optional[str]
     tags: Optional[List[str]]
@@ -817,7 +833,15 @@ def __init__(self, **kwargs):
         self.likes = kwargs.pop("likes", None)
         self.library_name = kwargs.pop("library_name", None)
         self.gguf = kwargs.pop("gguf", None)
+
         self.inference = kwargs.pop("inference", None)
+        self.inference_provider_mapping = kwargs.pop("inferenceProviderMapping", None)
+        if self.inference_provider_mapping:
+            self.inference_provider_mapping = {
+                provider: InferenceProviderMapping(**value)
+                for provider, value in self.inference_provider_mapping.items()
+            }
+
         self.tags = kwargs.pop("tags", None)
         self.pipeline_tag = kwargs.pop("pipeline_tag", None)
         self.mask_token = kwargs.pop("mask_token", None)

diff --git a/src/huggingface_hub/inference/_client.py b/src/huggingface_hub/inference/_client.py
@@ -133,7 +133,7 @@ class InferenceClient:
             path will be appended to the base URL (see the [TGI Messages API](https://huggingface.co/docs/text-generation-inference/en/messages_api)
             documentation for details). When passing a URL as `model`, the client will not append any suffix path to it.
         provider (`str`, *optional*):
-            Name of the provider to use for inference. Can be `"replicate"`, `"together"`, `"fal-ai"`, `"sambanova"` or `"hf-inference"`.
+            Name of the provider to use for inference. Can be "fal-ai"`, `"fireworks-ai"`, `"replicate"`, "sambanova"`, `"together"`, or `"hf-inference"`.
             defaults to hf-inference (Hugging Face Serverless Inference API).
             If model is a URL or `base_url` is passed, then `provider` is not used.
         token (`str` or `bool`, *optional*):
@@ -263,8 +263,9 @@ def post(
                 "`InferenceClient.post` is deprecated and should not be used directly anymore."
             )
         provider_helper = HFInferenceTask(task or "unknown")
-        url = provider_helper.build_url(provider_helper.map_model(model))
-        headers = provider_helper.prepare_headers(headers=self.headers, api_key=self.token)
+        mapped_model = provider_helper._prepare_mapped_model(model or self.model)
+        url = provider_helper._prepare_url(self.token, mapped_model)  # type: ignore[arg-type]
+        headers = provider_helper._prepare_headers(self.headers, self.token)  # type: ignore[arg-type]
         return self._inner_post(
             request_parameters=RequestParameters(
                 url=url,

diff --git a/src/huggingface_hub/inference/_common.py b/src/huggingface_hub/inference/_common.py
@@ -18,7 +18,6 @@
 import io
 import json
 import logging
-from abc import ABC, abstractmethod
 from contextlib import contextmanager
 from dataclasses import dataclass
 from pathlib import Path
@@ -50,12 +49,7 @@
     ValidationError,
 )
 
-from ..utils import (
-    get_session,
-    is_aiohttp_available,
-    is_numpy_available,
-    is_pillow_available,
-)
+from ..utils import get_session, is_aiohttp_available, is_numpy_available, is_pillow_available
 from ._generated.types import ChatCompletionStreamOutput, TextGenerationStreamOutput
 
 
@@ -85,24 +79,6 @@ class RequestParameters:
     headers: Dict[str, Any]
 
 
-class TaskProviderHelper(ABC):
-    """Protocol defining the interface for task-specific provider helpers."""
-
-    @abstractmethod
-    def prepare_request(
-        self,
-        *,
-        inputs: Any,
-        parameters: Dict[str, Any],
-        headers: Dict,
-        model: Optional[str],
-        api_key: Optional[str],
-        extra_payload: Optional[Dict[str, Any]] = None,
-    ) -> RequestParameters: ...
-    @abstractmethod
-    def get_response(self, response: Union[bytes, Dict]) -> Any: ...
-
-
 # Add dataclass for ModelStatus. We use this dataclass in get_model_status function.
 @dataclass
 class ModelStatus:

diff --git a/src/huggingface_hub/inference/_generated/_async_client.py b/src/huggingface_hub/inference/_generated/_async_client.py
@@ -121,7 +121,7 @@ class AsyncInferenceClient:
             path will be appended to the base URL (see the [TGI Messages API](https://huggingface.co/docs/text-generation-inference/en/messages_api)
             documentation for details). When passing a URL as `model`, the client will not append any suffix path to it.
         provider (`str`, *optional*):
-            Name of the provider to use for inference. Can be `"replicate"`, `"together"`, `"fal-ai"`, `"sambanova"` or `"hf-inference"`.
+            Name of the provider to use for inference. Can be "fal-ai"`, `"fireworks-ai"`, `"replicate"`, "sambanova"`, `"together"`, or `"hf-inference"`.
             defaults to hf-inference (Hugging Face Serverless Inference API).
             If model is a URL or `base_url` is passed, then `provider` is not used.
         token (`str` or `bool`, *optional*):
@@ -258,8 +258,9 @@ async def post(
                 "`InferenceClient.post` is deprecated and should not be used directly anymore."
             )
         provider_helper = HFInferenceTask(task or "unknown")
-        url = provider_helper.build_url(provider_helper.map_model(model))
-        headers = provider_helper.prepare_headers(headers=self.headers, api_key=self.token)
+        mapped_model = provider_helper._prepare_mapped_model(model or self.model)
+        url = provider_helper._prepare_url(self.token, mapped_model)  # type: ignore[arg-type]
+        headers = provider_helper._prepare_headers(self.headers, self.token)  # type: ignore[arg-type]
         return await self._inner_post(
             request_parameters=RequestParameters(
                 url=url,

diff --git a/src/huggingface_hub/inference/_providers/__init__.py b/src/huggingface_hub/inference/_providers/__init__.py
@@ -1,12 +1,13 @@
 from typing import Dict, Literal
 
-from .._common import TaskProviderHelper
+from ._common import TaskProviderHelper
 from .fal_ai import (
     FalAIAutomaticSpeechRecognitionTask,
     FalAITextToImageTask,
     FalAITextToSpeechTask,
     FalAITextToVideoTask,
 )
+from .fireworks_ai import FireworksAIConversationalTask
 from .hf_inference import HFInferenceBinaryInputTask, HFInferenceConversational, HFInferenceTask
 from .replicate import ReplicateTask, ReplicateTextToSpeechTask
 from .sambanova import SambanovaConversationalTask
@@ -15,6 +16,7 @@
 
 PROVIDER_T = Literal[
     "fal-ai",
+    "fireworks-ai",
     "hf-inference",
     "replicate",
     "sambanova",
@@ -28,6 +30,9 @@
         "text-to-speech": FalAITextToSpeechTask(),
         "text-to-video": FalAITextToVideoTask(),
     },
+    "fireworks-ai": {
+        "conversational": FireworksAIConversationalTask(),
+    },
     "hf-inference": {
         "text-to-image": HFInferenceTask("text-to-image"),
         "conversational": HFInferenceConversational(),