openai · rm-openai · Mar 19, 2025 · Mar 19, 2025 · TimoVink · Mar 17, 2025
diff --git a/docs/agents.md b/docs/agents.md
@@ -130,3 +130,16 @@ robot_agent = pirate_agent.clone(
     instructions="Write like a robot",
 )
 ```
+
+## Forcing tool use
+
+Supplying a list of tools doesn't always mean the LLM will use a tool. You can force tool use by setting [`ModelSettings.tool_choice`][agents.model_settings.ModelSettings.tool_choice]. Valid values are:
+
+1. `auto`, which allows the LLM to decide whether or not to use a tool.
+2. `required`, which requires the LLM to use a tool (but it can intelligently decide which tool).
+3. `none`, which requires the LLM to _not_ use a tool.
+4. Setting a specific string e.g. `my_tool`, which requires the LLM to use that specific tool.
+
+!!! note
+
+    If requiring tool use, you should consider setting [`Agent.tool_use_behavior`] to stop the Agent from running when a tool output is produced. Otherwise, the Agent might run in an infinite loop, where the LLM produces a tool call , and the tool result is sent to the LLM, and this infinite loops because the LLM is always forced to use a tool.
diff --git a/examples/agent_patterns/forcing_tool_use.py b/examples/agent_patterns/forcing_tool_use.py
@@ -0,0 +1,99 @@
+from __future__ import annotations
+
+import asyncio
+from typing import Any, Literal
+
+from pydantic import BaseModel
+
+from agents import (
+    Agent,
+    FunctionToolResult,
+    ModelSettings,
+    RunContextWrapper,
+    Runner,
+    ToolsToFinalOutputFunction,
+    ToolsToFinalOutputResult,
+    function_tool,
+)
+
+"""
+This example shows how to force the agent to use a tool. It uses `ModelSettings(tool_choice="required")`
+to force the agent to use any tool.
+
+You can run it with 3 options:
+1. `default`: The default behavior, which is to send the tool output to the LLM. In this case,
+    `tool_choice` is not set, because otherwise it would result in an infinite loop - the LLM would
+    call the tool, the tool would run and send the results to the LLM, and that would repeat
+    (because the model is forced to use a tool every time.)
+2. `first_tool_result`: The first tool result is used as the final output.
+3. `custom`: A custom tool use behavior function is used. The custom function receives all the tool
+    results, and chooses to use the first tool result to generate the final output.
+
+Usage:
+python examples/agent_patterns/forcing_tool_use.py -t default
+python examples/agent_patterns/forcing_tool_use.py -t first_tool
+python examples/agent_patterns/forcing_tool_use.py -t custom
+"""
+
+
+class Weather(BaseModel):
+    city: str
+    temperature_range: str
+    conditions: str
+
+
+@function_tool
+def get_weather(city: str) -> Weather:
+    print("[debug] get_weather called")
+    return Weather(city=city, temperature_range="14-20C", conditions="Sunny with wind")
+
+
+async def custom_tool_use_behavior(
+    context: RunContextWrapper[Any], results: list[FunctionToolResult]
+) -> ToolsToFinalOutputResult:
+    weather: Weather = results[0].output
+    return ToolsToFinalOutputResult(
+        is_final_output=True, final_output=f"{weather.city} is {weather.conditions}."
+    )
+
+
+async def main(tool_use_behavior: Literal["default", "first_tool", "custom"] = "default"):
+    if tool_use_behavior == "default":
+        behavior: Literal["run_llm_again", "stop_on_first_tool"] | ToolsToFinalOutputFunction = (
+            "run_llm_again"
+        )
+    elif tool_use_behavior == "first_tool":
+        behavior = "stop_on_first_tool"
+    elif tool_use_behavior == "custom":
+        behavior = custom_tool_use_behavior
+
+    agent = Agent(
+        name="Weather agent",
+        instructions="You are a helpful agent.",
+        tools=[get_weather],
+        tool_use_behavior=behavior,
+        model_settings=ModelSettings(
+            tool_choice="required" if tool_use_behavior != "default" else None
+        ),
+    )
+
+    result = await Runner.run(agent, input="What's the weather in Tokyo?")
+    print(result.final_output)
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-t",
+        "--tool-use-behavior",
+        type=str,
+        required=True,
+        choices=["default", "first_tool", "custom"],
+        help="The behavior to use for tool use. Default will cause tool outputs to be sent to the model. "
+        "first_tool_result will cause the first tool result to be used as the final output. "
+        "custom will use a custom tool use behavior function.",
+    )
+    args = parser.parse_args()
+    asyncio.run(main(args.tool_use_behavior))
diff --git a/examples/basic/tools.py b/examples/basic/tools.py
@@ -0,0 +1,34 @@
+import asyncio
+
+from pydantic import BaseModel
+
+from agents import Agent, Runner, function_tool
+
+
+class Weather(BaseModel):
+    city: str
+    temperature_range: str
+    conditions: str
+
+
+@function_tool
+def get_weather(city: str) -> Weather:
+    print("[debug] get_weather called")
+    return Weather(city=city, temperature_range="14-20C", conditions="Sunny with wind.")
+
+
+agent = Agent(
+    name="Hello world",
+    instructions="You are a helpful agent.",
+    tools=[get_weather],
+)
+
+
+async def main():
+    result = await Runner.run(agent, input="What's the weather in Tokyo?")
+    print(result.final_output)
+    # The weather in Tokyo is sunny.
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/src/agents/__init__.py b/src/agents/__init__.py
@@ -5,7 +5,7 @@
 from openai import AsyncOpenAI
 
 from . import _config
-from .agent import Agent
+from .agent import Agent, ToolsToFinalOutputFunction, ToolsToFinalOutputResult
 from .agent_output import AgentOutputSchema
 from .computer import AsyncComputer, Button, Computer, Environment
 from .exceptions import (
@@ -57,6 +57,7 @@
     ComputerTool,
     FileSearchTool,
     FunctionTool,
+    FunctionToolResult,
     Tool,
     WebSearchTool,
     default_tool_error_function,
@@ -137,6 +138,8 @@ def enable_verbose_stdout_logging():
 
 __all__ = [
     "Agent",
+    "ToolsToFinalOutputFunction",
+    "ToolsToFinalOutputResult",
     "Runner",
     "Model",
     "ModelProvider",
@@ -190,6 +193,7 @@ def enable_verbose_stdout_logging():
     "AgentUpdatedStreamEvent",
     "StreamEvent",
     "FunctionTool",
+    "FunctionToolResult",
     "ComputerTool",
     "FileSearchTool",
     "Tool",

diff --git a/src/agents/_run_impl.py b/src/agents/_run_impl.py
@@ -1,8 +1,10 @@
 from __future__ import annotations
 
 import asyncio
+import inspect
+from collections.abc import Awaitable
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, cast
 
 from openai.types.responses import (
     ResponseComputerToolCall,
@@ -25,7 +27,7 @@
 from openai.types.responses.response_input_param import ComputerCallOutput
 from openai.types.responses.response_reasoning_item import ResponseReasoningItem
 
-from .agent import Agent
+from .agent import Agent, ToolsToFinalOutputResult
 from .agent_output import AgentOutputSchema
 from .computer import AsyncComputer, Computer
 from .exceptions import AgentsException, ModelBehaviorError, UserError
@@ -48,7 +50,7 @@
 from .models.interface import ModelTracing
 from .run_context import RunContextWrapper, TContext
 from .stream_events import RunItemStreamEvent, StreamEvent
-from .tool import ComputerTool, FunctionTool
+from .tool import ComputerTool, FunctionTool, FunctionToolResult
 from .tracing import (
     SpanError,
     Trace,
@@ -70,6 +72,8 @@ class QueueCompleteSentinel:
 
 QUEUE_COMPLETE_SENTINEL = QueueCompleteSentinel()
 
+_NOT_FINAL_OUTPUT = ToolsToFinalOutputResult(is_final_output=False, final_output=None)
+
 
 @dataclass
 class ToolRunHandoff:
@@ -199,7 +203,7 @@ async def execute_tools_and_side_effects(
                 config=run_config,
             ),
         )
-        new_step_items.extend(function_results)
+        new_step_items.extend([result.run_item for result in function_results])
         new_step_items.extend(computer_results)
 
         # Second, check if there are any handoffs
@@ -216,6 +220,36 @@ async def execute_tools_and_side_effects(
                 run_config=run_config,
             )
 
+        # Third, we'll check if the tool use should result in a final output
+        check_tool_use = await cls._check_for_final_output_from_tools(
+            agent=agent,
+            tool_results=function_results,
+            context_wrapper=context_wrapper,
+            config=run_config,
+        )
+
+        if check_tool_use.is_final_output:
+            # If the output type is str, then let's just stringify it
+            if not agent.output_type or agent.output_type is str:
+                check_tool_use.final_output = str(check_tool_use.final_output)
+
+            if check_tool_use.final_output is None:
+                logger.error(
+                    "Model returned a final output of None. Not raising an error because we assume"
+                    "you know what you're doing."
+                )
+
+            return await cls.execute_final_output(
+                agent=agent,
+                original_input=original_input,
+                new_response=new_response,
+                pre_step_items=pre_step_items,
+                new_step_items=new_step_items,
+                final_output=check_tool_use.final_output,
+                hooks=hooks,
+                context_wrapper=context_wrapper,
+            )
+
         # Now we can check if the model also produced a final output
         message_items = [item for item in new_step_items if isinstance(item, MessageOutputItem)]
 
@@ -355,10 +389,10 @@ async def execute_function_tool_calls(
         hooks: RunHooks[TContext],
         context_wrapper: RunContextWrapper[TContext],
         config: RunConfig,
-    ) -> list[RunItem]:
+    ) -> list[FunctionToolResult]:
         async def run_single_tool(
             func_tool: FunctionTool, tool_call: ResponseFunctionToolCall
-        ) -> str:
+        ) -> Any:
             with function_span(func_tool.name) as span_fn:
                 if config.trace_include_sensitive_data:
                     span_fn.span_data.input = tool_call.arguments
@@ -404,10 +438,14 @@ async def run_single_tool(
         results = await asyncio.gather(*tasks)
 
         return [
-            ToolCallOutputItem(
-                output=str(result),
-                raw_item=ItemHelpers.tool_call_output_item(tool_run.tool_call, str(result)),
-                agent=agent,
+            FunctionToolResult(
+                tool=tool_run.function_tool,
+                output=result,
+                run_item=ToolCallOutputItem(
+                    output=result,
+                    raw_item=ItemHelpers.tool_call_output_item(tool_run.tool_call, str(result)),
+                    agent=agent,
+                ),
             )
             for tool_run, result in zip(tool_runs, results)
         ]
@@ -646,6 +684,47 @@ def stream_step_result_to_queue(
             if event:
                 queue.put_nowait(event)
 
+    @classmethod
+    async def _check_for_final_output_from_tools(
+        cls,
+        *,
+        agent: Agent[TContext],
+        tool_results: list[FunctionToolResult],
+        context_wrapper: RunContextWrapper[TContext],
+        config: RunConfig,
+    ) -> ToolsToFinalOutputResult:
+        """Returns (i, final_output)."""
+        if not tool_results:
+            return _NOT_FINAL_OUTPUT
+
+        if agent.tool_use_behavior == "run_llm_again":
+            return _NOT_FINAL_OUTPUT
+        elif agent.tool_use_behavior == "stop_on_first_tool":
+            return ToolsToFinalOutputResult(
+                is_final_output=True, final_output=tool_results[0].output
+            )
+        elif isinstance(agent.tool_use_behavior, dict):
+            names = agent.tool_use_behavior.get("stop_at_tool_names", [])
+            for tool_result in tool_results:
+                if tool_result.tool.name in names:
+                    return ToolsToFinalOutputResult(
+                        is_final_output=True, final_output=tool_result.output
+                    )
+            return ToolsToFinalOutputResult(is_final_output=False, final_output=None)
+        elif callable(agent.tool_use_behavior):
+            if inspect.iscoroutinefunction(agent.tool_use_behavior):
+                return await cast(
+                    Awaitable[ToolsToFinalOutputResult],
+                    agent.tool_use_behavior(context_wrapper, tool_results),
+                )
+            else:
+                return cast(
+                    ToolsToFinalOutputResult, agent.tool_use_behavior(context_wrapper, tool_results)
+                )
+
+        logger.error(f"Invalid tool_use_behavior: {agent.tool_use_behavior}")
+        raise UserError(f"Invalid tool_use_behavior: {agent.tool_use_behavior}")
+
 
 class TraceCtxManager:
     """Creates a trace only if there is no current trace, and manages the trace lifecycle."""