getsentry · nora-shap · Feb 6, 2026 · Feb 5, 2026 · nora-shap · Feb 5, 2026
@@ -12,8 +12,8 @@
 from sentry.seer.explorer.client import SeerExplorerClient
 from sentry.seer.explorer.tools import get_trace_waterfall
 from sentry.seer.models import SeerPermissionError
-from sentry.seer.sentry_data_models import TraceMetadata
 from sentry.tasks.base import instrumented_task
+from sentry.tasks.llm_issue_detection.detection import TraceMetadataWithSpanCount
 from sentry.tasks.llm_issue_detection.trace_data import (
     get_project_top_transaction_traces_for_llm_detection,
 )
@@ -151,7 +151,9 @@ def _build_instrumentation_prompt(trace_json: str, project_slug: str) -> str:
     )
 
 
-def sample_trace_for_instrumentation_analysis(project: Project) -> TraceMetadata | None:
+def sample_trace_for_instrumentation_analysis(
+    project: Project,
+) -> TraceMetadataWithSpanCount | None:
     """
     Sample ONE trace for instrumentation analysis.
     Uses top transaction sampling with random time offset.

@@ -19,9 +19,6 @@
 from sentry.seer.sentry_data_models import TraceMetadata
 from sentry.seer.signed_seer_api import make_signed_seer_api_request
 from sentry.tasks.base import instrumented_task
-from sentry.tasks.llm_issue_detection.trace_data import (
-    get_project_top_transaction_traces_for_llm_detection,
-)
 from sentry.taskworker.namespaces import issues_tasks
 from sentry.utils import json
 from sentry.utils.redis import redis_clusters
@@ -92,8 +89,12 @@ class DetectedIssue(BaseModel):
     transaction_name: str
 
 
+class TraceMetadataWithSpanCount(TraceMetadata):
+    span_count: int
+
+
 class IssueDetectionRequest(BaseModel):
-    traces: list[TraceMetadata]
+    traces: list[TraceMetadataWithSpanCount]
     organization_id: int
     project_id: int
     org_slug: str
@@ -242,6 +243,10 @@ def detect_llm_issues_for_project(project_id: int) -> None:
     For each deduped transaction, gets first trace_id from the start of time window, which has small random variation.
     Sends these trace_ids to seer, which uses get_trace_waterfall to construct an EAPTrace to analyze.
     """
+    from sentry.tasks.llm_issue_detection.trace_data import (  # circular imports
+        get_project_top_transaction_traces_for_llm_detection,
+    )
+
     project = Project.objects.get_from_cache(id=project_id)
     organization = project.organization
     organization_id = organization.id
@@ -270,7 +275,7 @@ def detect_llm_issues_for_project(project_id: int) -> None:
         sentry_sdk.metrics.count("llm_issue_detection.trace.skipped", skipped)
 
     # Take up to NUM_TRANSACTIONS_TO_PROCESS
-    traces_to_send: list[TraceMetadata] = [
+    traces_to_send: list[TraceMetadataWithSpanCount] = [
         t for t in evidence_traces if t.trace_id in unprocessed_ids
     ][:NUM_TRANSACTIONS_TO_PROCESS]
 

@@ -12,6 +12,7 @@
 from sentry.seer.sentry_data_models import TraceMetadata
 from sentry.snuba.referrer import Referrer
 from sentry.snuba.spans_rpc import Spans
+from sentry.tasks.llm_issue_detection.detection import TraceMetadataWithSpanCount
 
 logger = logging.getLogger(__name__)
 
@@ -25,16 +26,16 @@ def get_valid_trace_ids_by_span_count(
     trace_ids: list[str],
     snuba_params: SnubaParams,
     config: SearchResolverConfig,
-) -> set[str]:
+) -> dict[str, int]:
     """
     Query span counts for all trace_ids in one query.
-    Return set of trace_ids with valid span counts.
+    Returns a dict mapping trace_id to span count for traces with valid span counts.
 
     This filters out traces that are too small (lack context) or too large
     (exceed LLM context limits) before sending to Seer for analysis.
     """
     if not trace_ids:
-        return set()
+        return {}
 
     result = Spans.run_table_query(
         params=snuba_params,
@@ -49,7 +50,7 @@ def get_valid_trace_ids_by_span_count(
     )
 
     return {
-        row["trace"]
+        row["trace"]: row["count()"]
         for row in result.get("data", [])
         if LOWER_SPAN_LIMIT <= row["count()"] <= UPPER_SPAN_LIMIT
     }
@@ -59,7 +60,7 @@ def get_project_top_transaction_traces_for_llm_detection(
     project_id: int,
     limit: int,
     start_time_delta_minutes: int,
-) -> list[TraceMetadata]:
+) -> list[TraceMetadataWithSpanCount]:
     """
     Get top transactions by total time spent, return one semi-randomly chosen trace per transaction.
     Filters traces by span count before returning.
@@ -156,4 +157,12 @@ def _build_snuba_params(start: datetime) -> SnubaParams:
         all_trace_ids, transaction_snuba_params, config
     )
 
-    return [t for t in trace_metadata if t.trace_id in valid_trace_ids]
+    return [
+        TraceMetadataWithSpanCount(
+            trace_id=t.trace_id,
+            transaction_name=t.transaction_name,
+            span_count=valid_trace_ids[t.trace_id],
+        )
+        for t in trace_metadata
+        if t.trace_id in valid_trace_ids
+    ]
@@ -47,7 +47,7 @@ def test_run_detection_dispatches_sub_tasks(self, mock_apply_async):
     @with_feature("organizations:gen-ai-features")
     @patch("sentry.tasks.llm_issue_detection.detection.make_signed_seer_api_request")
     @patch(
-        "sentry.tasks.llm_issue_detection.detection.get_project_top_transaction_traces_for_llm_detection"
+        "sentry.tasks.llm_issue_detection.trace_data.get_project_top_transaction_traces_for_llm_detection"
     )
     def test_detect_llm_issues_no_transactions(self, mock_get_transactions, mock_seer_request):
         mock_get_transactions.return_value = []
@@ -322,25 +322,25 @@ class TestGetValidTraceIdsBySpanCount:
             # All valid
             (
                 {"data": [{"trace": "a", "count()": 50}, {"trace": "b", "count()": 100}]},
-                {"a", "b"},
+                {"a": 50, "b": 100},
             ),
             # Some below lower limit
             (
                 {"data": [{"trace": "a", "count()": 10}, {"trace": "b", "count()": 50}]},
-                {"b"},
+                {"b": 50},
             ),
             # Some above upper limit
             (
                 {"data": [{"trace": "a", "count()": 50}, {"trace": "b", "count()": 600}]},
-                {"a"},
+                {"a": 50},
             ),
             # Empty result
-            ({"data": []}, set()),
+            ({"data": []}, {}),
         ],
     )
     @patch("sentry.tasks.llm_issue_detection.trace_data.Spans.run_table_query")
     def test_filters_by_span_count(
-        self, mock_spans_query: Mock, query_result: dict, expected: set
+        self, mock_spans_query: Mock, query_result: dict, expected: dict[str, int]
     ) -> None:
         mock_spans_query.return_value = query_result
         mock_snuba_params = Mock()
@@ -363,7 +363,7 @@ def setUp(self) -> None:
     @patch("sentry.tasks.llm_issue_detection.trace_data.get_valid_trace_ids_by_span_count")
     def test_returns_deduped_transaction_traces(self, mock_span_count) -> None:
         # Mock span count check to return all traces as valid
-        mock_span_count.side_effect = lambda trace_ids, *args: set(trace_ids)
+        mock_span_count.side_effect = lambda trace_ids, *args: {tid: 50 for tid in trace_ids}
 
         trace_id_1 = uuid.uuid4().hex
         span1 = self.create_span(