draft: add hyperbolic support

Kaihuang724 · Kaihuang724 · commit c507b1e8f9c5 · 2025-02-05T14:00:48.000-08:00
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -45,7 +45,7 @@ jobs:
           HF_REPLICATE_KEY: dummy
           HF_SAMBANOVA_KEY: dummy
           HF_TOGETHER_KEY: dummy
-
+          HF_HYPERBOLIC_KEY: dummy
   browser:
     runs-on: ubuntu-latest
     timeout-minutes: 10
@@ -85,7 +85,7 @@ jobs:
           HF_REPLICATE_KEY: dummy
           HF_SAMBANOVA_KEY: dummy
           HF_TOGETHER_KEY: dummy
-
+          HF_HYPERBOLIC_KEY: dummy
   e2e:
     runs-on: ubuntu-latest
     timeout-minutes: 10
@@ -152,3 +152,4 @@ jobs:
           HF_REPLICATE_KEY: dummy
           HF_SAMBANOVA_KEY: dummy
           HF_TOGETHER_KEY: dummy
+          HF_HYPERBOLIC_KEY: dummy
diff --git a/packages/inference/README.md b/packages/inference/README.md
@@ -46,7 +46,7 @@ Your access token should be kept private. If you need to protect it in front-end
 
 You can send inference requests to third-party providers with the inference client.
 
-Currently, we support the following providers: [Fal.ai](https://fal.ai), [Replicate](https://replicate.com), [Together](https://together.xyz) and [Sambanova](https://sambanova.ai).
+Currently, we support the following providers: [Fal.ai](https://fal.ai), [Replicate](https://replicate.com), [Together](https://together.xyz), [Sambanova](https://sambanova.ai) and [Hyperbolic](https://hyperbolic.xyz).
 
 To send requests to a third-party provider, you have to pass the `provider` parameter to the inference function. Make sure your request is authenticated with an access token.
 ```ts
@@ -68,6 +68,7 @@ Only a subset of models are supported when requesting third-party providers. You
 - [Replicate supported models](./src/providers/replicate.ts)
 - [Sambanova supported models](./src/providers/sambanova.ts)
 - [Together supported models](./src/providers/together.ts)
+- [Hyperbolic supported models](./src/providers/hyperbolic.ts)
 - [HF Inference API (serverless)](https://huggingface.co/models?inference=warm&sort=trending)
 
 ❗**Important note:** To be compatible, the third-party API must adhere to the "standard" shape API we expect on HF model pages for each pipeline task type. 
diff --git a/packages/inference/src/index.ts b/packages/inference/src/index.ts
@@ -5,5 +5,6 @@ export { FAL_AI_SUPPORTED_MODEL_IDS } from "./providers/fal-ai";
 export { REPLICATE_SUPPORTED_MODEL_IDS } from "./providers/replicate";
 export { SAMBANOVA_SUPPORTED_MODEL_IDS } from "./providers/sambanova";
 export { TOGETHER_SUPPORTED_MODEL_IDS } from "./providers/together";
+export { HYPERBOLIC_SUPPORTED_MODEL_IDS } from "./providers/hyperbolic";
 export * from "./types";
 export * from "./tasks";
diff --git a/packages/inference/src/lib/makeRequestOptions.ts b/packages/inference/src/lib/makeRequestOptions.ts
@@ -4,6 +4,7 @@ import { FAL_AI_API_BASE_URL, FAL_AI_SUPPORTED_MODEL_IDS } from "../providers/fa
 import { REPLICATE_API_BASE_URL, REPLICATE_SUPPORTED_MODEL_IDS } from "../providers/replicate";
 import { SAMBANOVA_API_BASE_URL, SAMBANOVA_SUPPORTED_MODEL_IDS } from "../providers/sambanova";
 import { TOGETHER_API_BASE_URL, TOGETHER_SUPPORTED_MODEL_IDS } from "../providers/together";
+import { HYPERBOLIC_API_BASE_URL, HYPERBOLIC_SUPPORTED_MODEL_IDS } from "../providers/hyperbolic";
 import type { InferenceProvider } from "../types";
 import type { InferenceTask, Options, RequestArgs } from "../types";
 import { isUrl } from "./isUrl";
@@ -177,6 +178,8 @@ function mapModel(params: {
 				return SAMBANOVA_SUPPORTED_MODEL_IDS[task]?.[params.model];
 			case "together":
 				return TOGETHER_SUPPORTED_MODEL_IDS[task]?.[params.model];
+			case "hyperbolic":
+				return HYPERBOLIC_SUPPORTED_MODEL_IDS[task]?.[params.model];
 		}
 	})();
 
@@ -243,6 +246,12 @@ function makeUrl(params: {
 			}
 			return baseUrl;
 		}
+		case "hyperbolic": {
+			const baseUrl = shouldProxy
+				? HF_HUB_INFERENCE_PROXY_TEMPLATE.replace("{{PROVIDER}}", params.provider)
+				: HYPERBOLIC_API_BASE_URL;
+			return `${baseUrl}/v1/chat/completions`;
+		}
 		default: {
 			const baseUrl = HF_HUB_INFERENCE_PROXY_TEMPLATE.replaceAll("{{PROVIDER}}", "hf-inference");
 			const url = params.forceTask
diff --git a/packages/inference/src/providers/hyperbolic.ts b/packages/inference/src/providers/hyperbolic.ts
@@ -0,0 +1,46 @@
+import type { ProviderMapping } from "./types";
+
+export const HYPERBOLIC_API_BASE_URL = "https://api.hyperbolic.xyz";
+
+type HyperbolicId = string;
+
+/**
+ * https://docs.together.ai/reference/models-1
+ */
+export const HYPERBOLIC_SUPPORTED_MODEL_IDS: ProviderMapping<HyperbolicId> = {
+	"text-to-image": {
+		"black-forest-labs/FLUX.1-dev": "black-forest-labs/FLUX.1-dev",
+		"stabilityai/stable-diffusion-xl-base-1.0": "SDXL1.0-base",
+		"stable-diffusion-v1-5/stable-diffusion-v1-5": "stable-diffusion-v1-5/stable-diffusion-v1-5",
+		"segmind/SSD-1B": "segmind/SSD-1B",
+		"stabilityai/stable-diffusion-2": "stabilityai/stable-diffusion-2",
+		"stabilityai/sdxl-turbo": "stabilityai/sdxl-turbo",
+	},
+	"image-text-to-text": {
+		"Qwen/Qwen2-VL-72B-Instruct": "Qwen/Qwen2-VL-72B-Instruct",
+		"mistralai/Pixtral-12B-2409": "mistralai/Pixtral-12B-2409",
+		"Qwen/Qwen2-VL-7B-Instruct": "Qwen/Qwen2-VL-7B-Instruct",
+	},
+	"text-generation": {
+		"meta-llama/Llama-3.1-405B-BASE-BF16": "meta-llama/Llama-3.1-405B-BASE-BF16",
+		"meta-llama/Llama-3.1-405B-BASE-FP8": "meta-llama/Llama-3.1-405B-BASE-FP8",
+		"Qwen/Qwen2.5-72B-Instruct": "Qwen/Qwen2.5-72B-Instruct-BF16",
+	},
+	"text-to-audio": {
+		"myshell-ai/MeloTTS-English-v3": "myshell-ai/MeloTTS-English-v3",
+	},
+	conversational: {
+		"deepseek-ai/DeepSeek-R1": "deepseek-ai/DeepSeek-R1",
+		"deepseek-ai/DeepSeek-R1-Zero": "deepseek-ai/DeepSeek-R1-Zero",
+		"deepseek-ai/DeepSeek-V3": "deepseek-ai/DeepSeek-V3",
+		"meta-llama/Llama-3.2-3B-Instruct": "meta-llama/Llama-3.2-3B-Instruct",
+		"meta-llama/Llama-3.3-70B-Instruct": "meta-llama/Llama-3.3-70B-Instruct",
+		"meta-llama/Llama-3.1-70B-Instruct": "meta-llama/Llama-3.1-70B-Instruct-BF16",
+		"meta-llama/Meta-Llama-3-70B-Instruct": "meta-llama/Llama-3-70b-BF16",
+		"meta-llama/Llama-3.1-8B-Instruct": "meta-llama/Llama-3.1-8B-Instruct-BF16",
+		"NousResearch/Hermes-3-Llama-3.1-70B": "NousResearch/Hermes-3-Llama-3.1-70B-BF16",
+		"Qwen/Qwen2.5-72B-Instruct": "Qwen/Qwen2.5-72B-Instruct-BF16",
+		"Qwen/Qwen2.5-Coder-32B-Instruct": "Qwen/Qwen2.5-Coder-32B-Instruct-BF16",
+		"Qwen/QwQ-32B-Preview": "Qwen/QwQ-32B-Preview-BF16",
+	},
+};
diff --git a/packages/inference/src/tasks/custom/request.ts b/packages/inference/src/tasks/custom/request.ts
@@ -36,8 +36,10 @@ export async function request<T>(
 			}
 			if (output.error || output.detail) {
 				throw new Error(JSON.stringify(output.error ?? output.detail));
+			} else if (typeof output === "object") {
+				throw new Error(JSON.stringify(output));
 			} else {
-				throw new Error(output);
+				throw new Error(String(output));
 			}
 		}
 		const message = contentType?.startsWith("text/plain;") ? await response.text() : undefined;
diff --git a/packages/inference/src/types.ts b/packages/inference/src/types.ts
@@ -44,7 +44,14 @@ export interface Options {
 
 export type InferenceTask = Exclude<PipelineType, "other">;
 
-export const INFERENCE_PROVIDERS = ["fal-ai", "replicate", "sambanova", "together", "hf-inference"] as const;
+export const INFERENCE_PROVIDERS = [
+	"fal-ai",
+	"replicate",
+	"sambanova",
+	"together",
+	"hf-inference",
+	"hyperbolic",
+] as const;
 export type InferenceProvider = (typeof INFERENCE_PROVIDERS)[number];
 
 export interface BaseArgs {
diff --git a/packages/inference/test/HfInference.spec.ts b/packages/inference/test/HfInference.spec.ts
@@ -1080,4 +1080,69 @@ describe.concurrent("HfInference", () => {
 			);
 		});
 	});
+
+	describe.concurrent(
+		"Hyperbolic",
+		() => {
+			const client = new HfInference(env.HF_HYPERBOLIC_KEY);
+
+			it("chatCompletion - hyperbolic", async () => {
+				const res = await client.chatCompletion({
+					model: "meta-llama/Llama-3.2-3B-Instruct",
+					provider: "hyperbolic",
+					messages: [{ role: "user", content: "Complete this sentence with words, one plus one is equal " }],
+					temperature: 0.1,
+				});
+
+				expect(res).toBeDefined();
+				expect(res.choices).toBeDefined();
+				expect(res.choices?.length).toBeGreaterThan(0);
+
+				if (res.choices && res.choices.length > 0) {
+					const completion = res.choices[0].message?.content;
+					expect(completion).toBeDefined();
+					expect(typeof completion).toBe("string");
+					expect(completion).toContain("two");
+				}
+			});
+
+			it("chatCompletion stream", async () => {
+				const stream = client.chatCompletionStream({
+					model: "meta-llama/Llama-3.3-70B-Instruct",
+					provider: "hyperbolic",
+					messages: [{ role: "user", content: "Complete the equation 1 + 1 = , just the answer" }],
+				}) as AsyncGenerator<ChatCompletionStreamOutput>;
+				let out = "";
+				for await (const chunk of stream) {
+					if (chunk.choices && chunk.choices.length > 0) {
+						out += chunk.choices[0].delta.content;
+					}
+				}
+				expect(out).toContain("2");
+			});
+
+			it("textToImage", async () => {
+				const res = await client.textToImage({
+					model: "stabilityai/stable-diffusion-2",
+					provider: "hyperbolic",
+					inputs: "award winning high resolution photo of a giant tortoise",
+				});
+				expect(res).toBeInstanceOf(Blob);
+			});
+
+			it("textGeneration", async () => {
+				const res = await client.textGeneration({
+					model: "meta-llama/Llama-3.1-405B-BASE-FP8",
+					provider: "hyperbolic",
+					inputs: "Paris is",
+					parameters: {
+						temperature: 0,
+						max_tokens: 10,
+					},
+				});
+				expect(res).toMatchObject({ generated_text: " city of love" });
+			});
+		},
+		TIMEOUT
+	);
 });
diff --git a/packages/tasks/src/inference-providers.ts b/packages/tasks/src/inference-providers.ts
@@ -1,4 +1,11 @@
-export const INFERENCE_PROVIDERS = ["hf-inference", "fal-ai", "replicate", "sambanova", "together"] as const;
+export const INFERENCE_PROVIDERS = [
+	"hf-inference",
+	"fal-ai",
+	"replicate",
+	"sambanova",
+	"together",
+	"hyperbolic",
+] as const;
 
 export type InferenceProvider = (typeof INFERENCE_PROVIDERS)[number];
 
diff --git a/packages/tasks/src/tasks/feature-extraction/spec/input.json b/packages/tasks/src/tasks/feature-extraction/spec/input.json
@@ -8,10 +8,7 @@
 	"properties": {
 		"inputs": {
 			"title": "FeatureExtractionInputs",
-			"oneOf": [
-				{ "type": "string" },
-				{ "type": "array", "items": { "type": "string" } }
-			],
+			"oneOf": [{ "type": "string" }, { "type": "array", "items": { "type": "string" } }],
 			"description": "The text or list of texts to embed."
 		},
 		"normalize": {
diff --git a/packages/tasks/src/tasks/text-generation/data.ts b/packages/tasks/src/tasks/text-generation/data.ts
@@ -61,9 +61,7 @@ const taskData: TaskDataCustom = {
 		},
 	],
 	models: [
-		{	description: "A text-generation model trained to follow instructions.",
-			id: "google/gemma-2-2b-it",
-		},
+		{ description: "A text-generation model trained to follow instructions.", id: "google/gemma-2-2b-it" },
 		{
 			description: "Smaller variant of one of the most powerful models.",
 			id: "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",

Original file line number	Diff line number	Diff line change
`@@ -36,8 +36,10 @@ export async function request<T>(`
`36`	`36`	`}`
`37`	`37`	`if (output.error \|\| output.detail) {`
`38`	`38`	`throw new Error(JSON.stringify(output.error ?? output.detail));`
	`39`	`+ } else if (typeof output === "object") {`
	`40`	`+ throw new Error(JSON.stringify(output));`
`39`	`41`	`} else {`
`40`		`- throw new Error(output);`
	`42`	`+ throw new Error(String(output));`
`41`	`43`	`}`
`42`	`44`	`}`
`43`	`45`	`const message = contentType?.startsWith("text/plain;") ? await response.text() : undefined;`