Skip to content

Commit cd9b9ab

Browse files
authored
Remove redundant weaker tracing assertions (#261)
Following up on #25, this removes uses of `fetch_traces` and `fetch_ordered_spans` where there's already a stronger assertion using `fetch_normalized_spans`. This is to help move towards the stronger style as much as possible, since people are still adding weaker assertions such as [this](https://github.com/openai/openai-agents-python/blob/7a0ca7930e31e5d12f7785dfcb0a2f123739b21a/tests/test_concurrency.py#L59-L61) in #91. The next step will be to find remaining uses of `fetch_ordered_spans` that can be replaced.
2 parents 851f0ce + f329619 commit cd9b9ab

5 files changed

+4
-311
lines changed

Makefile

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ sync:
55
.PHONY: format
66
format:
77
uv run ruff format
8+
uv run ruff check --fix
89

910
.PHONY: lint
1011
lint:

tests/test_agent_tracing.py

+1-40
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from .fake_model import FakeModel
1111
from .test_responses import get_text_message
12-
from .testing_processor import fetch_normalized_spans, fetch_ordered_spans, fetch_traces
12+
from .testing_processor import fetch_normalized_spans, fetch_traces
1313

1414

1515
@pytest.mark.asyncio
@@ -23,9 +23,6 @@ async def test_single_run_is_single_trace():
2323

2424
await Runner.run(agent, input="first_test")
2525

26-
traces = fetch_traces()
27-
assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
28-
2926
assert fetch_normalized_spans() == snapshot(
3027
[
3128
{
@@ -45,12 +42,6 @@ async def test_single_run_is_single_trace():
4542
]
4643
)
4744

48-
spans = fetch_ordered_spans()
49-
assert len(spans) == 1, (
50-
f"Got {len(spans)}, but expected 1: the agent span. data:"
51-
f"{[span.span_data for span in spans]}"
52-
)
53-
5445

5546
@pytest.mark.asyncio
5647
async def test_multiple_runs_are_multiple_traces():
@@ -69,9 +60,6 @@ async def test_multiple_runs_are_multiple_traces():
6960
await Runner.run(agent, input="first_test")
7061
await Runner.run(agent, input="second_test")
7162

72-
traces = fetch_traces()
73-
assert len(traces) == 2, f"Expected 2 traces, got {len(traces)}"
74-
7563
assert fetch_normalized_spans() == snapshot(
7664
[
7765
{
@@ -105,9 +93,6 @@ async def test_multiple_runs_are_multiple_traces():
10593
]
10694
)
10795

108-
spans = fetch_ordered_spans()
109-
assert len(spans) == 2, f"Got {len(spans)}, but expected 2: agent span per run"
110-
11196

11297
@pytest.mark.asyncio
11398
async def test_wrapped_trace_is_single_trace():
@@ -129,9 +114,6 @@ async def test_wrapped_trace_is_single_trace():
129114
await Runner.run(agent, input="second_test")
130115
await Runner.run(agent, input="third_test")
131116

132-
traces = fetch_traces()
133-
assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
134-
135117
assert fetch_normalized_spans() == snapshot(
136118
[
137119
{
@@ -169,9 +151,6 @@ async def test_wrapped_trace_is_single_trace():
169151
]
170152
)
171153

172-
spans = fetch_ordered_spans()
173-
assert len(spans) == 3, f"Got {len(spans)}, but expected 3: the agent span per run"
174-
175154

176155
@pytest.mark.asyncio
177156
async def test_parent_disabled_trace_disabled_agent_trace():
@@ -185,15 +164,8 @@ async def test_parent_disabled_trace_disabled_agent_trace():
185164

186165
await Runner.run(agent, input="first_test")
187166

188-
traces = fetch_traces()
189-
assert len(traces) == 0, f"Expected 0 traces, got {len(traces)}"
190167
assert fetch_normalized_spans() == snapshot([])
191168

192-
spans = fetch_ordered_spans()
193-
assert len(spans) == 0, (
194-
f"Expected no spans, got {len(spans)}, with {[x.span_data for x in spans]}"
195-
)
196-
197169

198170
@pytest.mark.asyncio
199171
async def test_manual_disabling_works():
@@ -206,13 +178,8 @@ async def test_manual_disabling_works():
206178

207179
await Runner.run(agent, input="first_test", run_config=RunConfig(tracing_disabled=True))
208180

209-
traces = fetch_traces()
210-
assert len(traces) == 0, f"Expected 0 traces, got {len(traces)}"
211181
assert fetch_normalized_spans() == snapshot([])
212182

213-
spans = fetch_ordered_spans()
214-
assert len(spans) == 0, f"Got {len(spans)}, but expected no spans"
215-
216183

217184
@pytest.mark.asyncio
218185
async def test_trace_config_works():
@@ -255,9 +222,6 @@ async def test_not_starting_streaming_creates_trace():
255222
break
256223
await asyncio.sleep(0.1)
257224

258-
traces = fetch_traces()
259-
assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
260-
261225
assert fetch_normalized_spans() == snapshot(
262226
[
263227
{
@@ -277,9 +241,6 @@ async def test_not_starting_streaming_creates_trace():
277241
]
278242
)
279243

280-
spans = fetch_ordered_spans()
281-
assert len(spans) == 1, f"Got {len(spans)}, but expected 1: the agent span"
282-
283244
# Await the stream to avoid warnings about it not being awaited
284245
async for _ in result.stream_events():
285246
pass

tests/test_responses_tracing.py

-13
Original file line numberDiff line numberDiff line change
@@ -64,13 +64,6 @@ async def dummy_fetch_response(
6464
]
6565
)
6666

67-
spans = fetch_ordered_spans()
68-
assert len(spans) == 1
69-
70-
assert isinstance(spans[0].span_data, ResponseSpanData)
71-
assert spans[0].span_data.response is not None
72-
assert spans[0].span_data.response.id == "dummy-id"
73-
7467

7568
@pytest.mark.allow_call_model_methods
7669
@pytest.mark.asyncio
@@ -164,12 +157,6 @@ async def __aiter__(self):
164157
]
165158
)
166159

167-
spans = fetch_ordered_spans()
168-
assert len(spans) == 1
169-
assert isinstance(spans[0].span_data, ResponseSpanData)
170-
assert spans[0].span_data.response is not None
171-
assert spans[0].span_data.response.id == "dummy-id-123"
172-
173160

174161
@pytest.mark.allow_call_model_methods
175162
@pytest.mark.asyncio

tests/test_tracing_errors.py

+1-87
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
Runner,
1919
TResponseInputItem,
2020
)
21-
from agents.tracing import AgentSpanData, FunctionSpanData, GenerationSpanData
2221

2322
from .fake_model import FakeModel
2423
from .test_responses import (
@@ -28,7 +27,7 @@
2827
get_handoff_tool_call,
2928
get_text_message,
3029
)
31-
from .testing_processor import fetch_normalized_spans, fetch_ordered_spans, fetch_traces
30+
from .testing_processor import fetch_normalized_spans
3231

3332

3433
@pytest.mark.asyncio
@@ -43,9 +42,6 @@ async def test_single_turn_model_error():
4342
with pytest.raises(ValueError):
4443
await Runner.run(agent, input="first_test")
4544

46-
traces = fetch_traces()
47-
assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
48-
4945
assert fetch_normalized_spans() == snapshot(
5046
[
5147
{
@@ -74,13 +70,6 @@ async def test_single_turn_model_error():
7470
]
7571
)
7672

77-
spans = fetch_ordered_spans()
78-
assert len(spans) == 2, f"should have agent and generation spans, got {len(spans)}"
79-
80-
generation_span = spans[1]
81-
assert isinstance(generation_span.span_data, GenerationSpanData)
82-
assert generation_span.error, "should have error"
83-
8473

8574
@pytest.mark.asyncio
8675
async def test_multi_turn_no_handoffs():
@@ -106,9 +95,6 @@ async def test_multi_turn_no_handoffs():
10695
with pytest.raises(ValueError):
10796
await Runner.run(agent, input="first_test")
10897

109-
traces = fetch_traces()
110-
assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
111-
11298
assert fetch_normalized_spans() == snapshot(
11399
[
114100
{
@@ -146,15 +132,6 @@ async def test_multi_turn_no_handoffs():
146132
]
147133
)
148134

149-
spans = fetch_ordered_spans()
150-
assert len(spans) == 4, (
151-
f"should have agent, generation, tool, generation, got {len(spans)} with data: "
152-
f"{[x.span_data for x in spans]}"
153-
)
154-
155-
last_generation_span = [x for x in spans if isinstance(x.span_data, GenerationSpanData)][-1]
156-
assert last_generation_span.error, "should have error"
157-
158135

159136
@pytest.mark.asyncio
160137
async def test_tool_call_error():
@@ -173,9 +150,6 @@ async def test_tool_call_error():
173150
with pytest.raises(ModelBehaviorError):
174151
await Runner.run(agent, input="first_test")
175152

176-
traces = fetch_traces()
177-
assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
178-
179153
assert fetch_normalized_spans() == snapshot(
180154
[
181155
{
@@ -209,15 +183,6 @@ async def test_tool_call_error():
209183
]
210184
)
211185

212-
spans = fetch_ordered_spans()
213-
assert len(spans) == 3, (
214-
f"should have agent, generation, tool spans, got {len(spans)} with data: "
215-
f"{[x.span_data for x in spans]}"
216-
)
217-
218-
function_span = [x for x in spans if isinstance(x.span_data, FunctionSpanData)][0]
219-
assert function_span.error, "should have error"
220-
221186

222187
@pytest.mark.asyncio
223188
async def test_multiple_handoff_doesnt_error():
@@ -255,9 +220,6 @@ async def test_multiple_handoff_doesnt_error():
255220
result = await Runner.run(agent_3, input="user_message")
256221
assert result.last_agent == agent_1, "should have picked first handoff"
257222

258-
traces = fetch_traces()
259-
assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
260-
261223
assert fetch_normalized_spans() == snapshot(
262224
[
263225
{
@@ -295,12 +257,6 @@ async def test_multiple_handoff_doesnt_error():
295257
]
296258
)
297259

298-
spans = fetch_ordered_spans()
299-
assert len(spans) == 7, (
300-
f"should have 2 agent, 1 function, 3 generation, 1 handoff, got {len(spans)} with data: "
301-
f"{[x.span_data for x in spans]}"
302-
)
303-
304260

305261
class Foo(TypedDict):
306262
bar: str
@@ -326,9 +282,6 @@ async def test_multiple_final_output_doesnt_error():
326282
result = await Runner.run(agent_1, input="user_message")
327283
assert result.final_output == Foo(bar="abc")
328284

329-
traces = fetch_traces()
330-
assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
331-
332285
assert fetch_normalized_spans() == snapshot(
333286
[
334287
{
@@ -344,12 +297,6 @@ async def test_multiple_final_output_doesnt_error():
344297
]
345298
)
346299

347-
spans = fetch_ordered_spans()
348-
assert len(spans) == 2, (
349-
f"should have 1 agent, 1 generation, got {len(spans)} with data: "
350-
f"{[x.span_data for x in spans]}"
351-
)
352-
353300

354301
@pytest.mark.asyncio
355302
async def test_handoffs_lead_to_correct_agent_spans():
@@ -399,9 +346,6 @@ async def test_handoffs_lead_to_correct_agent_spans():
399346
f"should have ended on the third agent, got {result.last_agent.name}"
400347
)
401348

402-
traces = fetch_traces()
403-
assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
404-
405349
assert fetch_normalized_spans() == snapshot(
406350
[
407351
{
@@ -472,12 +416,6 @@ async def test_handoffs_lead_to_correct_agent_spans():
472416
]
473417
)
474418

475-
spans = fetch_ordered_spans()
476-
assert len(spans) == 12, (
477-
f"should have 3 agents, 2 function, 5 generation, 2 handoff, got {len(spans)} with data: "
478-
f"{[x.span_data for x in spans]}"
479-
)
480-
481419

482420
@pytest.mark.asyncio
483421
async def test_max_turns_exceeded():
@@ -503,9 +441,6 @@ async def test_max_turns_exceeded():
503441
with pytest.raises(MaxTurnsExceeded):
504442
await Runner.run(agent, input="user_message", max_turns=2)
505443

506-
traces = fetch_traces()
507-
assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
508-
509444
assert fetch_normalized_spans() == snapshot(
510445
[
511446
{
@@ -538,15 +473,6 @@ async def test_max_turns_exceeded():
538473
]
539474
)
540475

541-
spans = fetch_ordered_spans()
542-
assert len(spans) == 5, (
543-
f"should have 1 agent span, 2 generations, 2 function calls, got "
544-
f"{len(spans)} with data: {[x.span_data for x in spans]}"
545-
)
546-
547-
agent_span = [x for x in spans if isinstance(x.span_data, AgentSpanData)][-1]
548-
assert agent_span.error, "last agent should have error"
549-
550476

551477
def guardrail_function(
552478
context: RunContextWrapper[Any], agent: Agent[Any], input: str | list[TResponseInputItem]
@@ -568,9 +494,6 @@ async def test_guardrail_error():
568494
with pytest.raises(InputGuardrailTripwireTriggered):
569495
await Runner.run(agent, input="user_message")
570496

571-
traces = fetch_traces()
572-
assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
573-
574497
assert fetch_normalized_spans() == snapshot(
575498
[
576499
{
@@ -594,12 +517,3 @@ async def test_guardrail_error():
594517
}
595518
]
596519
)
597-
598-
spans = fetch_ordered_spans()
599-
assert len(spans) == 2, (
600-
f"should have 1 agent, 1 guardrail, got {len(spans)} with data: "
601-
f"{[x.span_data for x in spans]}"
602-
)
603-
604-
agent_span = [x for x in spans if isinstance(x.span_data, AgentSpanData)][-1]
605-
assert agent_span.error, "last agent should have error"

0 commit comments

Comments
 (0)