openai · hiromesh · Mar 12, 2025
diff --git a/src/agents/_run_impl.py b/src/agents/_run_impl.py
@@ -358,10 +358,11 @@ async def execute_function_tool_calls(
     ) -> list[RunItem]:
         async def run_single_tool(
             func_tool: FunctionTool, tool_call: ResponseFunctionToolCall
-        ) -> str:
+        ) -> tuple[str, bool]:
             with function_span(func_tool.name) as span_fn:
                 if config.trace_include_sensitive_data:
                     span_fn.span_data.input = tool_call.arguments
+                is_error: bool = False
                 try:
                     _, _, result = await asyncio.gather(
                         hooks.on_tool_start(context_wrapper, agent, func_tool),
@@ -372,30 +373,32 @@ async def run_single_tool(
                         ),
                         func_tool.on_invoke_tool(context_wrapper, tool_call.arguments),
                     )
-
-                    await asyncio.gather(
-                        hooks.on_tool_end(context_wrapper, agent, func_tool, result),
-                        (
-                            agent.hooks.on_tool_end(context_wrapper, agent, func_tool, result)
-                            if agent.hooks
-                            else _utils.noop_coroutine()
-                        ),
-                    )
+
                 except Exception as e:
+                    is_error = True
+                    result = f"Error running tool {func_tool.name}: {e}"
                     _utils.attach_error_to_current_span(
                         SpanError(
                             message="Error running tool",
                             data={"tool_name": func_tool.name, "error": str(e)},
                         )
                     )
-                    if isinstance(e, AgentsException):
-                        raise e
-                    raise UserError(f"Error running tool {func_tool.name}: {e}") from e
+                    logger.warning(result)
 
+                await asyncio.gather(
+                        hooks.on_tool_end(
+                            context_wrapper, agent, func_tool, result),
+                        (
+                            agent.hooks.on_tool_end(
+                                context_wrapper, agent, func_tool, result)
+                            if agent.hooks
+                            else _utils.noop_coroutine()
+                        ),
+                    )
                 if config.trace_include_sensitive_data:
                     span_fn.span_data.output = result
-            return result
-
+            return result, is_error
+                    
         tasks = []
         for tool_run in tool_runs:
             function_tool = tool_run.function_tool
@@ -405,9 +408,14 @@ async def run_single_tool(
 
         return [
             ToolCallOutputItem(
-                output=str(result),
-                raw_item=ItemHelpers.tool_call_output_item(tool_run.tool_call, str(result)),
+                output=result[0],
+                raw_item=ItemHelpers.tool_call_output_item(
+                    tool_run.tool_call, 
+                    result[0], 
+                    is_error=result[1]
+                ),
                 agent=agent,
+                is_error=result[1]
             )
             for tool_run, result in zip(tool_runs, results)
         ]

diff --git a/src/agents/items.py b/src/agents/items.py
@@ -131,6 +131,9 @@ class ToolCallOutputItem(RunItemBase[Union[FunctionCallOutput, ComputerCallOutpu
 
     output: str
     """The output of the tool call."""
+
+    is_error: bool = False
+    """Indicates whether this output represents an error during tool execution."""
 
     type: Literal["tool_call_output_item"] = "tool_call_output_item"
 
@@ -236,11 +239,12 @@ def text_message_output(cls, message: MessageOutputItem) -> str:
 
     @classmethod
     def tool_call_output_item(
-        cls, tool_call: ResponseFunctionToolCall, output: str
+        cls, tool_call: ResponseFunctionToolCall, output: str, is_error: bool = False
     ) -> FunctionCallOutput:
         """Creates a tool call output item from a tool call and its output."""
         return {
             "call_id": tool_call.call_id,
             "output": output,
             "type": "function_call_output",
+            "is_error": is_error,
         }
diff --git a/tests/test_function_tool.py b/tests/test_function_tool.py
@@ -255,3 +255,23 @@ def custom_sync_error_function(ctx: RunContextWrapper[Any], error: Exception) ->
 
     result = await tool.on_invoke_tool(ctx, '{"a": 1, "b": 2}')
     assert result == "error_ValueError"
+
+
+@pytest.mark.asyncio
+async def test_tool_error_handling_in_run_impl():
+    """Test error handling when a tool fails."""
+    import unittest.mock as mock
+    from agents.items import ItemHelpers
+
+    @function_tool
+    def failing_tool(input_text: str) -> int:
+        """This tool always fails"""
+        raise ValueError(f"Tool execution failed: {input_text}")
+
+    ctx = RunContextWrapper({})
+    result = await failing_tool.on_invoke_tool(ctx, '{"input_text": "test"}')
+    assert "Tool execution failed" in result, "Result should contain error message"
+
+    mock_call = mock.Mock(call_id="test_id")
+    assert not ItemHelpers.tool_call_output_item(mock_call, "success").get("is_error")
+    assert ItemHelpers.tool_call_output_item(mock_call, "error", is_error=True).get("is_error")