[inference] Fix types for Tool calling (#1367)

julien-c · web-flow · commit a160b10d461d · 2025-04-18T16:40:26.000+02:00
EDIT: ok, ready for review ![tenor](https://github.com/user-attachments/assets/e5871f1a-d42a-449b-9cb1-77aed17f45d8)
diff --git a/README.md b/README.md
@@ -27,7 +27,7 @@ await uploadFile({
   }
 });
 
-// Use HF Inference API, or external Inference Providers!
+// Use all supported Inference Providers!
 
 await inference.chatCompletion({
   model: "meta-llama/Llama-3.1-8B-Instruct",
@@ -55,7 +55,7 @@ await inference.textToImage({
 
 This is a collection of JS libraries to interact with the Hugging Face API, with TS types included.
 
-- [@huggingface/inference](packages/inference/README.md): Use HF Inference API (serverless), Inference Endpoints (dedicated) and all supported Inference Providers to make calls to 100,000+ Machine Learning models
+- [@huggingface/inference](packages/inference/README.md): Use all supported (serverless) Inference Providers or switch to Inference Endpoints (dedicated) to make calls to 100,000+ Machine Learning models
 - [@huggingface/hub](packages/hub/README.md): Interact with huggingface.co to create or delete repos and commit / download files
 - [@huggingface/agents](packages/agents/README.md): Interact with HF models through a natural language interface
 - [@huggingface/gguf](packages/gguf/README.md): A GGUF parser that works on remotely hosted files.
@@ -128,18 +128,18 @@ import { InferenceClient } from "@huggingface/inference";
 
 const HF_TOKEN = "hf_...";
 
-const inference = new InferenceClient(HF_TOKEN);
+const client = new InferenceClient(HF_TOKEN);
 
 // Chat completion API
-const out = await inference.chatCompletion({
+const out = await client.chatCompletion({
   model: "meta-llama/Llama-3.1-8B-Instruct",
   messages: [{ role: "user", content: "Hello, nice to meet you!" }],
   max_tokens: 512
 });
 console.log(out.choices[0].message);
 
 // Streaming chat completion API
-for await (const chunk of inference.chatCompletionStream({
+for await (const chunk of client.chatCompletionStream({
   model: "meta-llama/Llama-3.1-8B-Instruct",
   messages: [{ role: "user", content: "Hello, nice to meet you!" }],
   max_tokens: 512
@@ -148,14 +148,14 @@ for await (const chunk of inference.chatCompletionStream({
 }
 
 /// Using a third-party provider:
-await inference.chatCompletion({
+await client.chatCompletion({
   model: "meta-llama/Llama-3.1-8B-Instruct",
   messages: [{ role: "user", content: "Hello, nice to meet you!" }],
   max_tokens: 512,
   provider: "sambanova", // or together, fal-ai, replicate, cohere …
 })
 
-await inference.textToImage({
+await client.textToImage({
   model: "black-forest-labs/FLUX.1-dev",
   inputs: "a picture of a green bird",
   provider: "fal-ai",
@@ -164,7 +164,7 @@ await inference.textToImage({
 
 
 // You can also omit "model" to use the recommended model for the task
-await inference.translation({
+await client.translation({
   inputs: "My name is Wolfgang and I live in Amsterdam",
   parameters: {
     src_lang: "en",
@@ -173,17 +173,17 @@ await inference.translation({
 });
 
 // pass multimodal files or URLs as inputs
-await inference.imageToText({
+await client.imageToText({
   model: 'nlpconnect/vit-gpt2-image-captioning',
   data: await (await fetch('https://picsum.photos/300/300')).blob(),
 })
 
 // Using your own dedicated inference endpoint: https://hf.co/docs/inference-endpoints/
-const gpt2 = inference.endpoint('https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/gpt2');
-const { generated_text } = await gpt2.textGeneration({ inputs: 'The answer to the universe is' });
+const gpt2Client = client.endpoint('https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/gpt2');
+const { generated_text } = await gpt2Client.textGeneration({ inputs: 'The answer to the universe is' });
 
 // Chat Completion
-const llamaEndpoint = inference.endpoint(
+const llamaEndpoint = client.endpoint(
   "https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.1-8B-Instruct"
 );
 const out = await llamaEndpoint.chatCompletion({
diff --git a/packages/inference/test/InferenceClient.spec.ts b/packages/inference/test/InferenceClient.spec.ts
@@ -376,7 +376,7 @@ describe.concurrent("InferenceClient", () => {
 				);
 			});
 
-			it("textGeneration - gpt2", async () => {
+			it.skip("textGeneration - gpt2", async () => {
 				expect(
 					await hf.textGeneration({
 						model: "gpt2",
@@ -387,7 +387,7 @@ describe.concurrent("InferenceClient", () => {
 				});
 			});
 
-			it("textGeneration - openai-community/gpt2", async () => {
+			it.skip("textGeneration - openai-community/gpt2", async () => {
 				expect(
 					await hf.textGeneration({
 						model: "openai-community/gpt2",
diff --git a/packages/tasks-gen/scripts/inference-tgi-import.ts b/packages/tasks-gen/scripts/inference-tgi-import.ts
@@ -34,6 +34,13 @@ function toCamelCase(str: string, joiner = "") {
 		.join(joiner);
 }
 
+const OVERRIDES_TYPES_RENAME_PROPERTIES: Record<string, Record<string, string>> = {
+	ChatCompletionInputFunctionDefinition: { arguments: "parameters" },
+};
+const OVERRIDES_TYPES_OVERRIDE_PROPERTY_TYPE: Record<string, Record<string, unknown>> = {
+	ChatCompletionOutputFunctionDefinition: { arguments: { type: "string" } },
+};
+
 async function _extractAndAdapt(task: string, mainComponentName: string, type: "input" | "output" | "stream_output") {
 	console.debug(`✨ Importing`, task, type);
 
@@ -57,6 +64,17 @@ async function _extractAndAdapt(task: string, mainComponentName: string, type: "
 				_scan(item);
 			}
 		} else if (data && typeof data === "object") {
+			/// This next section can be removed when we don't use TGI as source of types.
+			if (typeof data.title === "string" && data.title in OVERRIDES_TYPES_RENAME_PROPERTIES) {
+				const [[oldName, newName]] = Object.entries(OVERRIDES_TYPES_RENAME_PROPERTIES[data.title]);
+				data.required = JSON.parse(JSON.stringify(data.required).replaceAll(oldName, newName));
+				data.properties = JSON.parse(JSON.stringify(data.properties).replaceAll(oldName, newName));
+			}
+			if (typeof data.title === "string" && data.title in OVERRIDES_TYPES_OVERRIDE_PROPERTY_TYPE) {
+				const [[prop, newType]] = Object.entries(OVERRIDES_TYPES_OVERRIDE_PROPERTY_TYPE[data.title]);
+				(data.properties as Record<string, unknown>)[prop] = newType;
+			}
+			/// End of overrides section
 			for (const key of Object.keys(data)) {
 				if (key === "$ref" && typeof data[key] === "string") {
 					// Verify reference exists
diff --git a/packages/tasks/src/tasks/chat-completion/inference.ts b/packages/tasks/src/tasks/chat-completion/inference.ts
@@ -130,9 +130,9 @@ export interface ChatCompletionInputToolCall {
 	[property: string]: unknown;
 }
 export interface ChatCompletionInputFunctionDefinition {
-	arguments: unknown;
 	description?: string;
 	name: string;
+	parameters: unknown;
 	[property: string]: unknown;
 }
 export interface ChatCompletionInputGrammarType {
@@ -235,7 +235,7 @@ export interface ChatCompletionOutputToolCall {
 	[property: string]: unknown;
 }
 export interface ChatCompletionOutputFunctionDefinition {
-	arguments: unknown;
+	arguments: string;
 	description?: string;
 	name: string;
 	[property: string]: unknown;
diff --git a/packages/tasks/src/tasks/chat-completion/spec/input.json b/packages/tasks/src/tasks/chat-completion/spec/input.json
@@ -275,9 +275,9 @@
 		},
 		"ChatCompletionInputFunctionDefinition": {
 			"type": "object",
-			"required": ["name", "arguments"],
+			"required": ["name", "parameters"],
 			"properties": {
-				"arguments": {},
+				"parameters": {},
 				"description": {
 					"type": "string",
 					"nullable": true
diff --git a/packages/tasks/src/tasks/chat-completion/spec/output.json b/packages/tasks/src/tasks/chat-completion/spec/output.json
@@ -173,7 +173,9 @@
 			"type": "object",
 			"required": ["name", "arguments"],
 			"properties": {
-				"arguments": {},
+				"arguments": {
+					"type": "string"
+				},
 				"description": {
 					"type": "string",
 					"nullable": true

Original file line number	Diff line number	Diff line change
`@@ -130,9 +130,9 @@ export interface ChatCompletionInputToolCall {`
`130`	`130`	`[property: string]: unknown;`
`131`	`131`	`}`
`132`	`132`	`export interface ChatCompletionInputFunctionDefinition {`
`133`		`- arguments: unknown;`
`134`	`133`	`description?: string;`
`135`	`134`	`name: string;`
	`135`	`+ parameters: unknown;`
`136`	`136`	`[property: string]: unknown;`
`137`	`137`	`}`
`138`	`138`	`export interface ChatCompletionInputGrammarType {`
`@@ -235,7 +235,7 @@ export interface ChatCompletionOutputToolCall {`
`235`	`235`	`[property: string]: unknown;`
`236`	`236`	`}`
`237`	`237`	`export interface ChatCompletionOutputFunctionDefinition {`
`238`		`- arguments: unknown;`
	`238`	`+ arguments: string;`
`239`	`239`	`description?: string;`
`240`	`240`	`name: string;`
`241`	`241`	`[property: string]: unknown;`