Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: implement screenshot caching via Responses API reference IDs #90

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/agents/models/openai_responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ def _convert_tool(cls, tool: Tool) -> tuple[ToolParam, IncludeLiteral | None]:
"display_width": tool.computer.dimensions[0],
"display_height": tool.computer.dimensions[1],
}
includes = None
includes = "computer_call_output.output.image_url"

else:
raise UserError(f"Unknown tool type: {type(tool)}, tool")
Expand Down
4 changes: 3 additions & 1 deletion tests/test_openai_responses_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,9 @@ def drag(self, path: list[tuple[int, int]]) -> None:
assert isinstance(converted.includes, list)
# The includes list should have exactly the include for file search when include_search_results
# is True.
assert converted.includes == ["file_search_call.results"]
assert "file_search_call.results" in converted.includes
# The includes list should also have the include for computer tool screenshots
assert "computer_call_output.output.image_url" in converted.includes
# There should be exactly four converted tool dicts.
assert len(converted.tools) == 4
# Extract types and verify.
Expand Down
53 changes: 53 additions & 0 deletions tests/test_reference_id_screenshots.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
"""Tests for the reference ID screenshot mechanism in ComputerAction."""

from unittest.mock import MagicMock, patch

import pytest
from agents import Agent, RunConfig, RunContextWrapper, RunHooks
from agents._run_impl import ComputerAction, ToolRunComputerAction
from agents.items import ToolCallOutputItem
from agents.tool import ComputerTool
from openai.types.responses.response_computer_tool_call import (
ActionClick,
ResponseComputerToolCall,
)
from tests.test_computer_action import LoggingComputer


@pytest.mark.asyncio
@patch("agents.models.openai_provider._openai_shared.get_default_openai_client")
@patch("openai.AsyncOpenAI")
async def test_reference_id_screenshots(mock_openai, mock_get_client):
"""Test that screenshots are sent with reference IDs."""
# Mock the OpenAI client to avoid API key requirement
mock_get_client.return_value = MagicMock()
computer = LoggingComputer(screenshot_return="test_screenshot")
comptool = ComputerTool(computer=computer)
# Create a dummy click action
action = ActionClick(type="click", x=1, y=2, button="left")
tool_call = ResponseComputerToolCall(
id="tool1",
type="computer_call",
action=action,
call_id="tool1",
pending_safety_checks=[],
status="completed",
)
# Setup agent and hooks
agent = Agent(name="test_agent", tools=[comptool])
run_hooks = RunHooks()
context_wrapper = RunContextWrapper(context=None)
# Execute the computer action
output_item = await ComputerAction.execute(
agent=agent,
action=ToolRunComputerAction(tool_call=tool_call, computer_tool=comptool),
hooks=run_hooks,
context_wrapper=context_wrapper,
config=RunConfig(),
)
# Verify that the output item has the correct structure
assert isinstance(output_item, ToolCallOutputItem)
assert "data:image/png;base64," in output_item.output
# Verify that the screenshot was generated
screenshot_calls = [call for call in computer.calls if call[0] == "screenshot"]
assert len(screenshot_calls) == 1