Skip to content

Commit 1eeb5df

Browse files
test(openai-agents): Avoid calling SDK-internal functions (#5437)
1 parent 8f80fa7 commit 1eeb5df

File tree

1 file changed

+108
-92
lines changed

1 file changed

+108
-92
lines changed

tests/integrations/openai_agents/test_openai_agents.py

Lines changed: 108 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,83 @@ async def EXAMPLE_STREAMED_RESPONSE(*args, **kwargs):
143143
)
144144

145145

146+
async def EXAMPLE_STREAMED_RESPONSE_WITH_DELTA(*args, **kwargs):
147+
yield ResponseCreatedEvent(
148+
response=Response(
149+
id="chat-id",
150+
output=[],
151+
parallel_tool_calls=False,
152+
tool_choice="none",
153+
tools=[],
154+
created_at=10000000,
155+
model="response-model-id",
156+
object="response",
157+
),
158+
type="response.created",
159+
sequence_number=0,
160+
)
161+
162+
yield ResponseTextDeltaEvent(
163+
type="response.output_text.delta",
164+
item_id="message-id",
165+
output_index=0,
166+
content_index=0,
167+
delta="Hello",
168+
logprobs=[],
169+
sequence_number=1,
170+
)
171+
172+
yield ResponseTextDeltaEvent(
173+
type="response.output_text.delta",
174+
item_id="message-id",
175+
output_index=0,
176+
content_index=0,
177+
delta=" world!",
178+
logprobs=[],
179+
sequence_number=2,
180+
)
181+
182+
yield ResponseCompletedEvent(
183+
response=Response(
184+
id="chat-id",
185+
output=[
186+
ResponseOutputMessage(
187+
id="message-id",
188+
content=[
189+
ResponseOutputText(
190+
annotations=[],
191+
text="Hello world!",
192+
type="output_text",
193+
),
194+
],
195+
role="assistant",
196+
status="completed",
197+
type="message",
198+
),
199+
],
200+
parallel_tool_calls=False,
201+
tool_choice="none",
202+
tools=[],
203+
created_at=10000000,
204+
model="response-model-id",
205+
object="response",
206+
usage=ResponseUsage(
207+
input_tokens=20,
208+
input_tokens_details=InputTokensDetails(
209+
cached_tokens=5,
210+
),
211+
output_tokens=10,
212+
output_tokens_details=OutputTokensDetails(
213+
reasoning_tokens=8,
214+
),
215+
total_tokens=30,
216+
),
217+
),
218+
type="response.completed",
219+
sequence_number=3,
220+
)
221+
222+
146223
@pytest.fixture
147224
def mock_usage():
148225
return Usage(
@@ -2692,27 +2769,6 @@ def test_openai_agents_message_truncation(sentry_init, capture_events):
26922769
assert "small message 5" in str(parsed_messages[0])
26932770

26942771

2695-
def test_streaming_patches_applied(sentry_init):
2696-
"""
2697-
Test that the streaming patches are applied correctly.
2698-
"""
2699-
sentry_init(
2700-
integrations=[OpenAIAgentsIntegration()],
2701-
traces_sample_rate=1.0,
2702-
)
2703-
2704-
# Verify that run_streamed is patched (will have __wrapped__ attribute if patched)
2705-
import agents
2706-
2707-
# Check that the method exists and has been modified
2708-
assert hasattr(agents.run.DEFAULT_AGENT_RUNNER, "run_streamed")
2709-
assert hasattr(agents.run.AgentRunner, "_run_single_turn_streamed")
2710-
2711-
# Verify the patches were applied by checking for our wrapper
2712-
run_streamed_func = agents.run.DEFAULT_AGENT_RUNNER.run_streamed
2713-
assert run_streamed_func is not None
2714-
2715-
27162772
@pytest.mark.asyncio
27172773
async def test_streaming_span_update_captures_response_data(
27182774
sentry_init, test_agent, mock_usage
@@ -2777,86 +2833,46 @@ async def test_streaming_ttft_on_chat_span(sentry_init, test_agent):
27772833
Events WITHOUT delta (like ResponseCompletedEvent, ResponseCreatedEvent, etc.)
27782834
should NOT trigger TTFT.
27792835
"""
2780-
from sentry_sdk.integrations.openai_agents.patches.models import (
2781-
_create_get_model_wrapper,
2836+
client = AsyncOpenAI(api_key="z")
2837+
client.responses._post = AsyncMock(return_value=EXAMPLE_RESPONSE)
2838+
2839+
model = OpenAIResponsesModel(model="gpt-4", openai_client=client)
2840+
2841+
agent_with_tool = test_agent.clone(
2842+
model=model,
27822843
)
27832844

27842845
sentry_init(
27852846
integrations=[OpenAIAgentsIntegration()],
27862847
traces_sample_rate=1.0,
27872848
)
27882849

2789-
# Create a mock model with stream_response and get_response
2790-
class MockModel:
2791-
model = "gpt-4"
2792-
2793-
async def get_response(self, *args, **kwargs):
2794-
# Not used in this test, but required by the wrapper
2795-
pass
2796-
2797-
async def stream_response(self, *args, **kwargs):
2798-
# First event: ResponseCreatedEvent (no delta - should NOT trigger TTFT)
2799-
created_event = MagicMock(spec=["type", "sequence_number"])
2800-
created_event.type = "response.created"
2801-
yield created_event
2802-
2803-
# Simulate server-side processing delay before first token
2804-
await asyncio.sleep(0.05) # 50ms delay
2805-
2806-
# Second event: ResponseTextDeltaEvent (HAS delta - triggers TTFT)
2807-
text_delta_event = MagicMock(spec=["delta", "type", "content_index"])
2808-
text_delta_event.delta = "Hello"
2809-
text_delta_event.type = "response.output_text.delta"
2810-
yield text_delta_event
2811-
2812-
# Third event: more text content (also has delta, but TTFT already recorded)
2813-
text_delta_event2 = MagicMock(spec=["delta", "type", "content_index"])
2814-
text_delta_event2.delta = " world!"
2815-
text_delta_event2.type = "response.output_text.delta"
2816-
yield text_delta_event2
2817-
2818-
# Final event: ResponseCompletedEvent (has response, no delta)
2819-
completed_event = MagicMock(spec=["response", "type", "sequence_number"])
2820-
completed_event.response = MagicMock()
2821-
completed_event.response.model = "gpt-4"
2822-
completed_event.response.usage = Usage(
2823-
requests=1,
2824-
input_tokens=10,
2825-
output_tokens=5,
2826-
total_tokens=15,
2827-
)
2828-
completed_event.response.output = []
2829-
yield completed_event
2830-
2831-
# Create a mock original _get_model that returns our mock model
2832-
def mock_get_model(agent, run_config):
2833-
return MockModel()
2834-
2835-
# Wrap it with our integration wrapper
2836-
wrapped_get_model = _create_get_model_wrapper(mock_get_model)
2837-
2838-
with sentry_sdk.start_transaction(name="test_ttft", sampled=True) as transaction:
2839-
# Get the wrapped model (this applies the stream_response wrapper)
2840-
wrapped_model = wrapped_get_model(None, test_agent, MagicMock())
2841-
2842-
# Call the wrapped stream_response and consume all events
2843-
async for _event in wrapped_model.stream_response():
2844-
pass
2845-
2846-
# Verify TTFT is recorded on the chat span (must be inside transaction context)
2847-
chat_spans = [
2848-
s for s in transaction._span_recorder.spans if s.op == "gen_ai.chat"
2849-
]
2850-
assert len(chat_spans) >= 1
2851-
chat_span = chat_spans[0]
2850+
with patch.object(
2851+
model._client.responses,
2852+
"create",
2853+
side_effect=EXAMPLE_STREAMED_RESPONSE_WITH_DELTA,
2854+
) as _:
2855+
with sentry_sdk.start_transaction(
2856+
name="test_ttft", sampled=True
2857+
) as transaction:
2858+
result = agents.Runner.run_streamed(
2859+
agent_with_tool,
2860+
"Please use the simple test tool",
2861+
run_config=test_run_config,
2862+
)
28522863

2853-
assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in chat_span._data
2854-
ttft_value = chat_span._data[SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
2855-
# TTFT should be at least 40ms (our simulated delay minus some variance) but reasonable
2856-
assert 0.04 < ttft_value < 1.0, f"TTFT {ttft_value} should be around 50ms"
2864+
async for event in result.stream_events():
2865+
pass
2866+
2867+
# Verify TTFT is recorded on the chat span (must be inside transaction context)
2868+
chat_spans = [
2869+
s for s in transaction._span_recorder.spans if s.op == "gen_ai.chat"
2870+
]
2871+
assert len(chat_spans) >= 1
2872+
chat_span = chat_spans[0]
28572873

2858-
# Verify streaming flag is set
2859-
assert chat_span._data.get(SPANDATA.GEN_AI_RESPONSE_STREAMING) is True
2874+
assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in chat_span._data
2875+
assert chat_span._data.get(SPANDATA.GEN_AI_RESPONSE_STREAMING) is True
28602876

28612877

28622878
@pytest.mark.skipif(

0 commit comments

Comments
 (0)