Remove redundant weaker tracing assertions (#261)

rm-openai · web-flow · commit cd9b9ab28d70 · 2025-03-21T10:31:41.000-04:00
Following up on #25, this removes uses of `fetch_traces` and `fetch_ordered_spans` where there's already a stronger assertion using `fetch_normalized_spans`. This is to help move towards the stronger style as much as possible, since people are still adding weaker assertions such as [this](https://github.com/openai/openai-agents-python/blob/7a0ca7930e31e5d12f7785dfcb0a2f123739b21a/tests/test_concurrency.py#L59-L61) in #91. The next step will be to find remaining uses of `fetch_ordered_spans` that can be replaced.
diff --git a/Makefile b/Makefile
@@ -5,6 +5,7 @@ sync:
 .PHONY: format
 format: 
 	uv run ruff format
+	uv run ruff check --fix
 
 .PHONY: lint
 lint: 
diff --git a/tests/test_agent_tracing.py b/tests/test_agent_tracing.py
@@ -9,7 +9,7 @@
 
 from .fake_model import FakeModel
 from .test_responses import get_text_message
-from .testing_processor import fetch_normalized_spans, fetch_ordered_spans, fetch_traces
+from .testing_processor import fetch_normalized_spans, fetch_traces
 
 
 @pytest.mark.asyncio
@@ -23,9 +23,6 @@ async def test_single_run_is_single_trace():
 
     await Runner.run(agent, input="first_test")
 
-    traces = fetch_traces()
-    assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
-
     assert fetch_normalized_spans() == snapshot(
         [
             {
@@ -45,12 +42,6 @@ async def test_single_run_is_single_trace():
         ]
     )
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 1, (
-        f"Got {len(spans)}, but expected 1: the agent span. data:"
-        f"{[span.span_data for span in spans]}"
-    )
-
 
 @pytest.mark.asyncio
 async def test_multiple_runs_are_multiple_traces():
@@ -69,9 +60,6 @@ async def test_multiple_runs_are_multiple_traces():
     await Runner.run(agent, input="first_test")
     await Runner.run(agent, input="second_test")
 
-    traces = fetch_traces()
-    assert len(traces) == 2, f"Expected 2 traces, got {len(traces)}"
-
     assert fetch_normalized_spans() == snapshot(
         [
             {
@@ -105,9 +93,6 @@ async def test_multiple_runs_are_multiple_traces():
         ]
     )
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 2, f"Got {len(spans)}, but expected 2: agent span per run"
-
 
 @pytest.mark.asyncio
 async def test_wrapped_trace_is_single_trace():
@@ -129,9 +114,6 @@ async def test_wrapped_trace_is_single_trace():
         await Runner.run(agent, input="second_test")
         await Runner.run(agent, input="third_test")
 
-    traces = fetch_traces()
-    assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
-
     assert fetch_normalized_spans() == snapshot(
         [
             {
@@ -169,9 +151,6 @@ async def test_wrapped_trace_is_single_trace():
         ]
     )
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 3, f"Got {len(spans)}, but expected 3: the agent span per run"
-
 
 @pytest.mark.asyncio
 async def test_parent_disabled_trace_disabled_agent_trace():
@@ -185,15 +164,8 @@ async def test_parent_disabled_trace_disabled_agent_trace():
 
         await Runner.run(agent, input="first_test")
 
-    traces = fetch_traces()
-    assert len(traces) == 0, f"Expected 0 traces, got {len(traces)}"
     assert fetch_normalized_spans() == snapshot([])
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 0, (
-        f"Expected no spans, got {len(spans)}, with {[x.span_data for x in spans]}"
-    )
-
 
 @pytest.mark.asyncio
 async def test_manual_disabling_works():
@@ -206,13 +178,8 @@ async def test_manual_disabling_works():
 
     await Runner.run(agent, input="first_test", run_config=RunConfig(tracing_disabled=True))
 
-    traces = fetch_traces()
-    assert len(traces) == 0, f"Expected 0 traces, got {len(traces)}"
     assert fetch_normalized_spans() == snapshot([])
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 0, f"Got {len(spans)}, but expected no spans"
-
 
 @pytest.mark.asyncio
 async def test_trace_config_works():
@@ -255,9 +222,6 @@ async def test_not_starting_streaming_creates_trace():
             break
         await asyncio.sleep(0.1)
 
-    traces = fetch_traces()
-    assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
-
     assert fetch_normalized_spans() == snapshot(
         [
             {
@@ -277,9 +241,6 @@ async def test_not_starting_streaming_creates_trace():
         ]
     )
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 1, f"Got {len(spans)}, but expected 1: the agent span"
-
     # Await the stream to avoid warnings about it not being awaited
     async for _ in result.stream_events():
         pass
diff --git a/tests/test_responses_tracing.py b/tests/test_responses_tracing.py
@@ -64,13 +64,6 @@ async def dummy_fetch_response(
         ]
     )
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 1
-
-    assert isinstance(spans[0].span_data, ResponseSpanData)
-    assert spans[0].span_data.response is not None
-    assert spans[0].span_data.response.id == "dummy-id"
-
 
 @pytest.mark.allow_call_model_methods
 @pytest.mark.asyncio
@@ -164,12 +157,6 @@ async def __aiter__(self):
         ]
     )
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 1
-    assert isinstance(spans[0].span_data, ResponseSpanData)
-    assert spans[0].span_data.response is not None
-    assert spans[0].span_data.response.id == "dummy-id-123"
-
 
 @pytest.mark.allow_call_model_methods
 @pytest.mark.asyncio
diff --git a/tests/test_tracing_errors.py b/tests/test_tracing_errors.py
@@ -18,7 +18,6 @@
     Runner,
     TResponseInputItem,
 )
-from agents.tracing import AgentSpanData, FunctionSpanData, GenerationSpanData
 
 from .fake_model import FakeModel
 from .test_responses import (
@@ -28,7 +27,7 @@
     get_handoff_tool_call,
     get_text_message,
 )
-from .testing_processor import fetch_normalized_spans, fetch_ordered_spans, fetch_traces
+from .testing_processor import fetch_normalized_spans
 
 
 @pytest.mark.asyncio
@@ -43,9 +42,6 @@ async def test_single_turn_model_error():
     with pytest.raises(ValueError):
         await Runner.run(agent, input="first_test")
 
-    traces = fetch_traces()
-    assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
-
     assert fetch_normalized_spans() == snapshot(
         [
             {
@@ -74,13 +70,6 @@ async def test_single_turn_model_error():
         ]
     )
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 2, f"should have agent and generation spans, got {len(spans)}"
-
-    generation_span = spans[1]
-    assert isinstance(generation_span.span_data, GenerationSpanData)
-    assert generation_span.error, "should have error"
-
 
 @pytest.mark.asyncio
 async def test_multi_turn_no_handoffs():
@@ -106,9 +95,6 @@ async def test_multi_turn_no_handoffs():
     with pytest.raises(ValueError):
         await Runner.run(agent, input="first_test")
 
-    traces = fetch_traces()
-    assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
-
     assert fetch_normalized_spans() == snapshot(
         [
             {
@@ -146,15 +132,6 @@ async def test_multi_turn_no_handoffs():
         ]
     )
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 4, (
-        f"should have agent, generation, tool, generation, got {len(spans)} with data: "
-        f"{[x.span_data for x in spans]}"
-    )
-
-    last_generation_span = [x for x in spans if isinstance(x.span_data, GenerationSpanData)][-1]
-    assert last_generation_span.error, "should have error"
-
 
 @pytest.mark.asyncio
 async def test_tool_call_error():
@@ -173,9 +150,6 @@ async def test_tool_call_error():
     with pytest.raises(ModelBehaviorError):
         await Runner.run(agent, input="first_test")
 
-    traces = fetch_traces()
-    assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
-
     assert fetch_normalized_spans() == snapshot(
         [
             {
@@ -209,15 +183,6 @@ async def test_tool_call_error():
         ]
     )
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 3, (
-        f"should have agent, generation, tool spans, got {len(spans)} with data: "
-        f"{[x.span_data for x in spans]}"
-    )
-
-    function_span = [x for x in spans if isinstance(x.span_data, FunctionSpanData)][0]
-    assert function_span.error, "should have error"
-
 
 @pytest.mark.asyncio
 async def test_multiple_handoff_doesnt_error():
@@ -255,9 +220,6 @@ async def test_multiple_handoff_doesnt_error():
     result = await Runner.run(agent_3, input="user_message")
     assert result.last_agent == agent_1, "should have picked first handoff"
 
-    traces = fetch_traces()
-    assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
-
     assert fetch_normalized_spans() == snapshot(
         [
             {
@@ -295,12 +257,6 @@ async def test_multiple_handoff_doesnt_error():
         ]
     )
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 7, (
-        f"should have 2 agent, 1 function, 3 generation, 1 handoff, got {len(spans)} with data: "
-        f"{[x.span_data for x in spans]}"
-    )
-
 
 class Foo(TypedDict):
     bar: str
@@ -326,9 +282,6 @@ async def test_multiple_final_output_doesnt_error():
     result = await Runner.run(agent_1, input="user_message")
     assert result.final_output == Foo(bar="abc")
 
-    traces = fetch_traces()
-    assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
-
     assert fetch_normalized_spans() == snapshot(
         [
             {
@@ -344,12 +297,6 @@ async def test_multiple_final_output_doesnt_error():
         ]
     )
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 2, (
-        f"should have 1 agent, 1 generation, got {len(spans)} with data: "
-        f"{[x.span_data for x in spans]}"
-    )
-
 
 @pytest.mark.asyncio
 async def test_handoffs_lead_to_correct_agent_spans():
@@ -399,9 +346,6 @@ async def test_handoffs_lead_to_correct_agent_spans():
         f"should have ended on the third agent, got {result.last_agent.name}"
     )
 
-    traces = fetch_traces()
-    assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
-
     assert fetch_normalized_spans() == snapshot(
         [
             {
@@ -472,12 +416,6 @@ async def test_handoffs_lead_to_correct_agent_spans():
         ]
     )
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 12, (
-        f"should have 3 agents, 2 function, 5 generation, 2 handoff, got {len(spans)} with data: "
-        f"{[x.span_data for x in spans]}"
-    )
-
 
 @pytest.mark.asyncio
 async def test_max_turns_exceeded():
@@ -503,9 +441,6 @@ async def test_max_turns_exceeded():
     with pytest.raises(MaxTurnsExceeded):
         await Runner.run(agent, input="user_message", max_turns=2)
 
-    traces = fetch_traces()
-    assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
-
     assert fetch_normalized_spans() == snapshot(
         [
             {
@@ -538,15 +473,6 @@ async def test_max_turns_exceeded():
         ]
     )
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 5, (
-        f"should have 1 agent span, 2 generations, 2 function calls, got "
-        f"{len(spans)} with data: {[x.span_data for x in spans]}"
-    )
-
-    agent_span = [x for x in spans if isinstance(x.span_data, AgentSpanData)][-1]
-    assert agent_span.error, "last agent should have error"
-
 
 def guardrail_function(
     context: RunContextWrapper[Any], agent: Agent[Any], input: str | list[TResponseInputItem]
@@ -568,9 +494,6 @@ async def test_guardrail_error():
     with pytest.raises(InputGuardrailTripwireTriggered):
         await Runner.run(agent, input="user_message")
 
-    traces = fetch_traces()
-    assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
-
     assert fetch_normalized_spans() == snapshot(
         [
             {
@@ -594,12 +517,3 @@ async def test_guardrail_error():
             }
         ]
     )
-
-    spans = fetch_ordered_spans()
-    assert len(spans) == 2, (
-        f"should have 1 agent, 1 guardrail, got {len(spans)} with data: "
-        f"{[x.span_data for x in spans]}"
-    )
-
-    agent_span = [x for x in spans if isinstance(x.span_data, AgentSpanData)][-1]
-    assert agent_span.error, "last agent should have error"
diff --git a/tests/test_tracing_errors_streamed.py b/tests/test_tracing_errors_streamed.py

Original file line number	Diff line number	Diff line change
`@@ -64,13 +64,6 @@ async def dummy_fetch_response(`
`64`	`64`	`]`
`65`	`65`	`)`
`66`	`66`
`67`		`- spans = fetch_ordered_spans()`
`68`		`- assert len(spans) == 1`
`69`		`-`
`70`		`- assert isinstance(spans[0].span_data, ResponseSpanData)`
`71`		`- assert spans[0].span_data.response is not None`
`72`		`- assert spans[0].span_data.response.id == "dummy-id"`
`73`		`-`
`74`	`67`
`75`	`68`	`@pytest.mark.allow_call_model_methods`
`76`	`69`	`@pytest.mark.asyncio`
`@@ -164,12 +157,6 @@ async def __aiter__(self):`
`164`	`157`	`]`
`165`	`158`	`)`
`166`	`159`
`167`		`- spans = fetch_ordered_spans()`
`168`		`- assert len(spans) == 1`
`169`		`- assert isinstance(spans[0].span_data, ResponseSpanData)`
`170`		`- assert spans[0].span_data.response is not None`
`171`		`- assert spans[0].span_data.response.id == "dummy-id-123"`
`172`		`-`
`173`	`160`
`174`	`161`	`@pytest.mark.allow_call_model_methods`
`175`	`162`	`@pytest.mark.asyncio`
Original file line number	Diff line number	Diff line change
`@@ -18,7 +18,6 @@`
`18`	`18`	`Runner,`
`19`	`19`	`TResponseInputItem,`
`20`	`20`	`)`
`21`		`-from agents.tracing import AgentSpanData, FunctionSpanData, GenerationSpanData`
`22`	`21`
`23`	`22`	`from .fake_model import FakeModel`
`24`	`23`	`from .test_responses import (`
`@@ -28,7 +27,7 @@`
`28`	`27`	`get_handoff_tool_call,`
`29`	`28`	`get_text_message,`
`30`	`29`	`)`
`31`		`-from .testing_processor import fetch_normalized_spans, fetch_ordered_spans, fetch_traces`
	`30`	`+from .testing_processor import fetch_normalized_spans`
`32`	`31`
`33`	`32`
`34`	`33`	`@pytest.mark.asyncio`
`@@ -43,9 +42,6 @@ async def test_single_turn_model_error():`
`43`	`42`	`with pytest.raises(ValueError):`
`44`	`43`	`await Runner.run(agent, input="first_test")`
`45`	`44`
`46`		`- traces = fetch_traces()`
`47`		`- assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"`
`48`		`-`
`49`	`45`	`assert fetch_normalized_spans() == snapshot(`
`50`	`46`	`[`
`51`	`47`	`{`
`@@ -74,13 +70,6 @@ async def test_single_turn_model_error():`
`74`	`70`	`]`
`75`	`71`	`)`
`76`	`72`
`77`		`- spans = fetch_ordered_spans()`
`78`		`- assert len(spans) == 2, f"should have agent and generation spans, got {len(spans)}"`
`79`		`-`
`80`		`- generation_span = spans[1]`
`81`		`- assert isinstance(generation_span.span_data, GenerationSpanData)`
`82`		`- assert generation_span.error, "should have error"`
`83`		`-`
`84`	`73`
`85`	`74`	`@pytest.mark.asyncio`
`86`	`75`	`async def test_multi_turn_no_handoffs():`
`@@ -106,9 +95,6 @@ async def test_multi_turn_no_handoffs():`
`106`	`95`	`with pytest.raises(ValueError):`
`107`	`96`	`await Runner.run(agent, input="first_test")`
`108`	`97`
`109`		`- traces = fetch_traces()`
`110`		`- assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"`
`111`		`-`
`112`	`98`	`assert fetch_normalized_spans() == snapshot(`
`113`	`99`	`[`
`114`	`100`	`{`
`@@ -146,15 +132,6 @@ async def test_multi_turn_no_handoffs():`
`146`	`132`	`]`
`147`	`133`	`)`
`148`	`134`
`149`		`- spans = fetch_ordered_spans()`
`150`		`- assert len(spans) == 4, (`
`151`		`- f"should have agent, generation, tool, generation, got {len(spans)} with data: "`
`152`		`- f"{[x.span_data for x in spans]}"`
`153`		`- )`
`154`		`-`
`155`		`- last_generation_span = [x for x in spans if isinstance(x.span_data, GenerationSpanData)][-1]`
`156`		`- assert last_generation_span.error, "should have error"`
`157`		`-`
`158`	`135`
`159`	`136`	`@pytest.mark.asyncio`
`160`	`137`	`async def test_tool_call_error():`
`@@ -173,9 +150,6 @@ async def test_tool_call_error():`
`173`	`150`	`with pytest.raises(ModelBehaviorError):`
`174`	`151`	`await Runner.run(agent, input="first_test")`
`175`	`152`
`176`		`- traces = fetch_traces()`
`177`		`- assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"`
`178`		`-`
`179`	`153`	`assert fetch_normalized_spans() == snapshot(`
`180`	`154`	`[`
`181`	`155`	`{`
`@@ -209,15 +183,6 @@ async def test_tool_call_error():`
`209`	`183`	`]`
`210`	`184`	`)`
`211`	`185`
`212`		`- spans = fetch_ordered_spans()`
`213`		`- assert len(spans) == 3, (`
`214`		`- f"should have agent, generation, tool spans, got {len(spans)} with data: "`
`215`		`- f"{[x.span_data for x in spans]}"`
`216`		`- )`
`217`		`-`
`218`		`- function_span = [x for x in spans if isinstance(x.span_data, FunctionSpanData)][0]`
`219`		`- assert function_span.error, "should have error"`
`220`		`-`
`221`	`186`
`222`	`187`	`@pytest.mark.asyncio`
`223`	`188`	`async def test_multiple_handoff_doesnt_error():`
`@@ -255,9 +220,6 @@ async def test_multiple_handoff_doesnt_error():`
`255`	`220`	`result = await Runner.run(agent_3, input="user_message")`
`256`	`221`	`assert result.last_agent == agent_1, "should have picked first handoff"`
`257`	`222`
`258`		`- traces = fetch_traces()`
`259`		`- assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"`
`260`		`-`
`261`	`223`	`assert fetch_normalized_spans() == snapshot(`
`262`	`224`	`[`
`263`	`225`	`{`
`@@ -295,12 +257,6 @@ async def test_multiple_handoff_doesnt_error():`
`295`	`257`	`]`
`296`	`258`	`)`
`297`	`259`
`298`		`- spans = fetch_ordered_spans()`
`299`		`- assert len(spans) == 7, (`
`300`		`- f"should have 2 agent, 1 function, 3 generation, 1 handoff, got {len(spans)} with data: "`
`301`		`- f"{[x.span_data for x in spans]}"`
`302`		`- )`
`303`		`-`
`304`	`260`
`305`	`261`	`class Foo(TypedDict):`
`306`	`262`	`bar: str`
`@@ -326,9 +282,6 @@ async def test_multiple_final_output_doesnt_error():`
`326`	`282`	`result = await Runner.run(agent_1, input="user_message")`
`327`	`283`	`assert result.final_output == Foo(bar="abc")`
`328`	`284`
`329`		`- traces = fetch_traces()`
`330`		`- assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"`
`331`		`-`
`332`	`285`	`assert fetch_normalized_spans() == snapshot(`
`333`	`286`	`[`
`334`	`287`	`{`
`@@ -344,12 +297,6 @@ async def test_multiple_final_output_doesnt_error():`
`344`	`297`	`]`
`345`	`298`	`)`
`346`	`299`
`347`		`- spans = fetch_ordered_spans()`
`348`		`- assert len(spans) == 2, (`
`349`		`- f"should have 1 agent, 1 generation, got {len(spans)} with data: "`
`350`		`- f"{[x.span_data for x in spans]}"`
`351`		`- )`
`352`		`-`
`353`	`300`
`354`	`301`	`@pytest.mark.asyncio`
`355`	`302`	`async def test_handoffs_lead_to_correct_agent_spans():`
`@@ -399,9 +346,6 @@ async def test_handoffs_lead_to_correct_agent_spans():`
`399`	`346`	`f"should have ended on the third agent, got {result.last_agent.name}"`
`400`	`347`	`)`
`401`	`348`
`402`		`- traces = fetch_traces()`
`403`		`- assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"`
`404`		`-`
`405`	`349`	`assert fetch_normalized_spans() == snapshot(`
`406`	`350`	`[`
`407`	`351`	`{`
`@@ -472,12 +416,6 @@ async def test_handoffs_lead_to_correct_agent_spans():`
`472`	`416`	`]`
`473`	`417`	`)`
`474`	`418`
`475`		`- spans = fetch_ordered_spans()`
`476`		`- assert len(spans) == 12, (`
`477`		`- f"should have 3 agents, 2 function, 5 generation, 2 handoff, got {len(spans)} with data: "`
`478`		`- f"{[x.span_data for x in spans]}"`
`479`		`- )`
`480`		`-`
`481`	`419`
`482`	`420`	`@pytest.mark.asyncio`
`483`	`421`	`async def test_max_turns_exceeded():`
`@@ -503,9 +441,6 @@ async def test_max_turns_exceeded():`
`503`	`441`	`with pytest.raises(MaxTurnsExceeded):`
`504`	`442`	`await Runner.run(agent, input="user_message", max_turns=2)`
`505`	`443`
`506`		`- traces = fetch_traces()`
`507`		`- assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"`
`508`		`-`
`509`	`444`	`assert fetch_normalized_spans() == snapshot(`
`510`	`445`	`[`
`511`	`446`	`{`
`@@ -538,15 +473,6 @@ async def test_max_turns_exceeded():`
`538`	`473`	`]`
`539`	`474`	`)`
`540`	`475`
`541`		`- spans = fetch_ordered_spans()`
`542`		`- assert len(spans) == 5, (`
`543`		`- f"should have 1 agent span, 2 generations, 2 function calls, got "`
`544`		`- f"{len(spans)} with data: {[x.span_data for x in spans]}"`
`545`		`- )`
`546`		`-`
`547`		`- agent_span = [x for x in spans if isinstance(x.span_data, AgentSpanData)][-1]`
`548`		`- assert agent_span.error, "last agent should have error"`
`549`		`-`
`550`	`476`
`551`	`477`	`def guardrail_function(`
`552`	`478`	`context: RunContextWrapper[Any], agent: Agent[Any], input: str \| list[TResponseInputItem]`
`@@ -568,9 +494,6 @@ async def test_guardrail_error():`
`568`	`494`	`with pytest.raises(InputGuardrailTripwireTriggered):`
`569`	`495`	`await Runner.run(agent, input="user_message")`
`570`	`496`
`571`		`- traces = fetch_traces()`
`572`		`- assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"`
`573`		`-`
`574`	`497`	`assert fetch_normalized_spans() == snapshot(`
`575`	`498`	`[`
`576`	`499`	`{`
`@@ -594,12 +517,3 @@ async def test_guardrail_error():`
`594`	`517`	`}`
`595`	`518`	`]`
`596`	`519`	`)`
`597`		`-`
`598`		`- spans = fetch_ordered_spans()`
`599`		`- assert len(spans) == 2, (`
`600`		`- f"should have 1 agent, 1 guardrail, got {len(spans)} with data: "`
`601`		`- f"{[x.span_data for x in spans]}"`
`602`		`- )`
`603`		`-`
`604`		`- agent_span = [x for x in spans if isinstance(x.span_data, AgentSpanData)][-1]`
`605`		`- assert agent_span.error, "last agent should have error"`