[Responses API] Structured output (#1586)

Wauplin · web-flow · commit 9c06344a87f3 · 2025-07-02T12:17:34.000+02:00
Built on top of #1576. Based on https://platform.openai.com/docs/guides/structured-outputs Works both with and without streaming. ## Non-stream **Run** ```bash pnpm run example structured_output ``` (which core logic is:) ```js (...) const response = await openai.responses.parse({ model: "Qwen/Qwen2.5-VL-72B-Instruct", provider: "nebius", input: [ { role: "system", content: "You are a helpful math tutor. Guide the user through the solution step by step.", }, { role: "user", content: "how can I solve 8x + 7 = -23" }, ], text: { format: zodTextFormat(MathReasoning, "math_reasoning"), }, }); (...) ``` **Output:** ```js { steps: [ { explanation: 'To solve for x, we need to isolate it on one side of the equation. We start by subtracting 7 from both sides of the equation.', output: '8x + 7 - 7 = -23 - 7' }, { explanation: 'Simplify the equation after performing the subtraction.', output: '8x = -30' }, { explanation: 'Now that we have isolated the term with x, we divide both sides by 8 to get x by itself.', output: '8x / 8 = -30 / 8' }, { explanation: 'Perform the division to find the value of x.', output: 'x = -30 / 8' }, { explanation: 'Simplify the fraction if possible.', output: 'x = -15 / 4' } ], final_answer: 'The solution is x = -15/4 or x = -3.75.' } ``` ## Stream **Run** ```bash pnpm run example structured_output_streaming ``` (which core logic is:) ```js const stream = openai.responses .stream({ model: "Qwen/Qwen2.5-VL-72B-Instruct", provider: "nebius", instructions: "Extract the event information.", input: "Alice and Bob are going to a science fair on Friday.", text: { format: zodTextFormat(CalendarEvent, "calendar_event"), }, }) .on("response.refusal.delta", (event) => { process.stdout.write(event.delta); }) .on("response.output_text.delta", (event) => { process.stdout.write(event.delta); }) .on("response.output_text.done", () => { process.stdout.write("\n"); }) .on("response.error", (event) => { console.error(event.error); }); const result = await stream.finalResponse(); console.log(result.output_parsed); ``` **Output:** ```js { "name": "Science Fair", "date": "Friday", "participants": ["Alice", "Bob"] } { name: 'Science Fair', date: 'Friday', participants: [ 'Alice', 'Bob' ] } ```
diff --git a/packages/responses-server/examples/structured_output.js b/packages/responses-server/examples/structured_output.js
@@ -0,0 +1,32 @@
+import OpenAI from "openai";
+import { zodTextFormat } from "openai/helpers/zod";
+import { z } from "zod";
+
+const openai = new OpenAI({ baseURL: "http://localhost:3000/v1", apiKey: process.env.HF_TOKEN });
+
+const Step = z.object({
+	explanation: z.string(),
+	output: z.string(),
+});
+
+const MathReasoning = z.object({
+	steps: z.array(Step),
+	final_answer: z.string(),
+});
+
+const response = await openai.responses.parse({
+	model: "Qwen/Qwen2.5-VL-72B-Instruct",
+	provider: "nebius",
+	input: [
+		{
+			role: "system",
+			content: "You are a helpful math tutor. Guide the user through the solution step by step.",
+		},
+		{ role: "user", content: "how can I solve 8x + 7 = -23" },
+	],
+	text: {
+		format: zodTextFormat(MathReasoning, "math_reasoning"),
+	},
+});
+
+console.log(response.output_parsed);
diff --git a/packages/responses-server/examples/structured_output_streaming.js b/packages/responses-server/examples/structured_output_streaming.js
@@ -0,0 +1,36 @@
+import { OpenAI } from "openai";
+import { zodTextFormat } from "openai/helpers/zod";
+import { z } from "zod";
+
+const CalendarEvent = z.object({
+	name: z.string(),
+	date: z.string(),
+	participants: z.array(z.string()),
+});
+
+const openai = new OpenAI({ baseURL: "http://localhost:3000/v1", apiKey: process.env.HF_TOKEN });
+const stream = openai.responses
+	.stream({
+		model: "Qwen/Qwen2.5-VL-72B-Instruct",
+		provider: "nebius",
+		instructions: "Extract the event information.",
+		input: "Alice and Bob are going to a science fair on Friday.",
+		text: {
+			format: zodTextFormat(CalendarEvent, "calendar_event"),
+		},
+	})
+	.on("response.refusal.delta", (event) => {
+		process.stdout.write(event.delta);
+	})
+	.on("response.output_text.delta", (event) => {
+		process.stdout.write(event.delta);
+	})
+	.on("response.output_text.done", () => {
+		process.stdout.write("\n");
+	})
+	.on("response.error", (event) => {
+		console.error(event.error);
+	});
+
+const result = await stream.finalResponse();
+console.log(result.output_parsed);
diff --git a/packages/responses-server/src/routes/responses.ts b/packages/responses-server/src/routes/responses.ts
@@ -3,7 +3,11 @@ import { type ValidatedRequest } from "../middleware/validation.js";
 import { type CreateResponseParams } from "../schemas.js";
 import { generateUniqueId } from "../lib/generateUniqueId.js";
 import { InferenceClient } from "@huggingface/inference";
-import type { ChatCompletionInputMessage, ChatCompletionInputMessageChunkType } from "@huggingface/tasks";
+import type {
+	ChatCompletionInputMessage,
+	ChatCompletionInputMessageChunkType,
+	ChatCompletionInput,
+} from "@huggingface/tasks";
 
 import type {
 	Response,
@@ -69,13 +73,28 @@ export const postCreateResponse = async (
 		messages.push({ role: "user", content: req.body.input });
 	}
 
-	const payload = {
+	const payload: ChatCompletionInput = {
 		model: req.body.model,
+		provider: req.body.provider,
 		messages: messages,
 		max_tokens: req.body.max_output_tokens === null ? undefined : req.body.max_output_tokens,
 		temperature: req.body.temperature,
 		top_p: req.body.top_p,
 		stream: req.body.stream,
+		response_format: req.body.text?.format
+			? {
+					type: req.body.text.format.type,
+					json_schema:
+						req.body.text.format.type === "json_schema"
+							? {
+									description: req.body.text.format.description,
+									name: req.body.text.format.name,
+									schema: req.body.text.format.schema,
+									strict: req.body.text.format.strict,
+							  }
+							: undefined,
+			  }
+			: undefined,
 	};
 
 	const responseObject: Omit<
@@ -225,12 +244,7 @@ export const postCreateResponse = async (
 	}
 
 	try {
-		const chatCompletionResponse = await client.chatCompletion({
-			model: req.body.model,
-			messages: messages,
-			temperature: req.body.temperature,
-			top_p: req.body.top_p,
-		});
+		const chatCompletionResponse = await client.chatCompletion(payload);
 
 		responseObject.status = "completed";
 		responseObject.output = chatCompletionResponse.choices[0].message.content
diff --git a/packages/responses-server/src/schemas.ts b/packages/responses-server/src/schemas.ts
@@ -83,6 +83,7 @@ export const createResponseParamsSchema = z.object({
 		.nullable()
 		.default(null),
 	model: z.string(),
+	provider: z.string().optional(),
 	// previous_response_id: z.string().nullable().default(null),
 	// reasoning: z.object({
 	// 	effort: z.enum(["low", "medium", "high"]).default("medium"),
@@ -91,7 +92,28 @@ export const createResponseParamsSchema = z.object({
 	// store: z.boolean().default(true),
 	stream: z.boolean().default(false),
 	temperature: z.number().min(0).max(2).default(1),
-	// text:
+	text: z
+		.object({
+			format: z.union([
+				z.object({
+					type: z.literal("text"),
+				}),
+				z.object({
+					type: z.literal("json_object"),
+				}),
+				z.object({
+					type: z.literal("json_schema"),
+					name: z
+						.string()
+						.max(64, "Must be at most 64 characters")
+						.regex(/^[a-zA-Z0-9_-]+$/, "Only letters, numbers, underscores, and dashes are allowed"),
+					description: z.string().optional(),
+					schema: z.record(z.any()),
+					strict: z.boolean().default(false),
+				}),
+			]),
+		})
+		.optional(),
 	// tool_choice:
 	// tools:
 	// top_logprobs: z.number().min(0).max(20).nullable().default(null),