Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions src/sentry/autopilot/tasks/trace_instrumentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
from sentry.seer.explorer.client import SeerExplorerClient
from sentry.seer.explorer.tools import get_trace_waterfall
from sentry.seer.models import SeerPermissionError
from sentry.seer.sentry_data_models import TraceMetadata
from sentry.tasks.base import instrumented_task
from sentry.tasks.llm_issue_detection.detection import TraceMetadataWithSpanCount
from sentry.tasks.llm_issue_detection.trace_data import (
get_project_top_transaction_traces_for_llm_detection,
)
Expand Down Expand Up @@ -151,7 +151,9 @@ def _build_instrumentation_prompt(trace_json: str, project_slug: str) -> str:
)


def sample_trace_for_instrumentation_analysis(project: Project) -> TraceMetadata | None:
def sample_trace_for_instrumentation_analysis(
project: Project,
) -> TraceMetadataWithSpanCount | None:
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ArthurKnaus heads up that I changed the return value for get_project_top_transaction_traces_for_llm_detection, looks like it shouldn't effect your usage at all 👍 👍 👍

"""
Sample ONE trace for instrumentation analysis.
Uses top transaction sampling with random time offset.
Expand Down
15 changes: 10 additions & 5 deletions src/sentry/tasks/llm_issue_detection/detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,6 @@
from sentry.seer.sentry_data_models import TraceMetadata
from sentry.seer.signed_seer_api import make_signed_seer_api_request
from sentry.tasks.base import instrumented_task
from sentry.tasks.llm_issue_detection.trace_data import (
get_project_top_transaction_traces_for_llm_detection,
)
from sentry.taskworker.namespaces import issues_tasks
from sentry.utils import json
from sentry.utils.redis import redis_clusters
Expand Down Expand Up @@ -92,8 +89,12 @@ class DetectedIssue(BaseModel):
transaction_name: str


class TraceMetadataWithSpanCount(TraceMetadata):
span_count: int


class IssueDetectionRequest(BaseModel):
traces: list[TraceMetadata]
traces: list[TraceMetadataWithSpanCount]
organization_id: int
project_id: int
org_slug: str
Expand Down Expand Up @@ -242,6 +243,10 @@ def detect_llm_issues_for_project(project_id: int) -> None:
For each deduped transaction, gets first trace_id from the start of time window, which has small random variation.
Sends these trace_ids to seer, which uses get_trace_waterfall to construct an EAPTrace to analyze.
"""
from sentry.tasks.llm_issue_detection.trace_data import ( # circular imports
get_project_top_transaction_traces_for_llm_detection,
)

project = Project.objects.get_from_cache(id=project_id)
organization = project.organization
organization_id = organization.id
Expand Down Expand Up @@ -270,7 +275,7 @@ def detect_llm_issues_for_project(project_id: int) -> None:
sentry_sdk.metrics.count("llm_issue_detection.trace.skipped", skipped)

# Take up to NUM_TRANSACTIONS_TO_PROCESS
traces_to_send: list[TraceMetadata] = [
traces_to_send: list[TraceMetadataWithSpanCount] = [
t for t in evidence_traces if t.trace_id in unprocessed_ids
][:NUM_TRANSACTIONS_TO_PROCESS]

Expand Down
21 changes: 15 additions & 6 deletions src/sentry/tasks/llm_issue_detection/trace_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from sentry.seer.sentry_data_models import TraceMetadata
from sentry.snuba.referrer import Referrer
from sentry.snuba.spans_rpc import Spans
from sentry.tasks.llm_issue_detection.detection import TraceMetadataWithSpanCount

logger = logging.getLogger(__name__)

Expand All @@ -25,16 +26,16 @@ def get_valid_trace_ids_by_span_count(
trace_ids: list[str],
snuba_params: SnubaParams,
config: SearchResolverConfig,
) -> set[str]:
) -> dict[str, int]:
"""
Query span counts for all trace_ids in one query.
Return set of trace_ids with valid span counts.
Returns a dict mapping trace_id to span count for traces with valid span counts.

This filters out traces that are too small (lack context) or too large
(exceed LLM context limits) before sending to Seer for analysis.
"""
if not trace_ids:
return set()
return {}

result = Spans.run_table_query(
params=snuba_params,
Expand All @@ -49,7 +50,7 @@ def get_valid_trace_ids_by_span_count(
)

return {
row["trace"]
row["trace"]: row["count()"]
for row in result.get("data", [])
if LOWER_SPAN_LIMIT <= row["count()"] <= UPPER_SPAN_LIMIT
}
Expand All @@ -59,7 +60,7 @@ def get_project_top_transaction_traces_for_llm_detection(
project_id: int,
limit: int,
start_time_delta_minutes: int,
) -> list[TraceMetadata]:
) -> list[TraceMetadataWithSpanCount]:
"""
Get top transactions by total time spent, return one semi-randomly chosen trace per transaction.
Filters traces by span count before returning.
Expand Down Expand Up @@ -156,4 +157,12 @@ def _build_snuba_params(start: datetime) -> SnubaParams:
all_trace_ids, transaction_snuba_params, config
)

return [t for t in trace_metadata if t.trace_id in valid_trace_ids]
return [
TraceMetadataWithSpanCount(
trace_id=t.trace_id,
transaction_name=t.transaction_name,
span_count=valid_trace_ids[t.trace_id],
)
for t in trace_metadata
if t.trace_id in valid_trace_ids
]
14 changes: 7 additions & 7 deletions tests/sentry/tasks/test_llm_issue_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def test_run_detection_dispatches_sub_tasks(self, mock_apply_async):
@with_feature("organizations:gen-ai-features")
@patch("sentry.tasks.llm_issue_detection.detection.make_signed_seer_api_request")
@patch(
"sentry.tasks.llm_issue_detection.detection.get_project_top_transaction_traces_for_llm_detection"
"sentry.tasks.llm_issue_detection.trace_data.get_project_top_transaction_traces_for_llm_detection"
)
def test_detect_llm_issues_no_transactions(self, mock_get_transactions, mock_seer_request):
mock_get_transactions.return_value = []
Expand Down Expand Up @@ -322,25 +322,25 @@ class TestGetValidTraceIdsBySpanCount:
# All valid
(
{"data": [{"trace": "a", "count()": 50}, {"trace": "b", "count()": 100}]},
{"a", "b"},
{"a": 50, "b": 100},
),
# Some below lower limit
(
{"data": [{"trace": "a", "count()": 10}, {"trace": "b", "count()": 50}]},
{"b"},
{"b": 50},
),
# Some above upper limit
(
{"data": [{"trace": "a", "count()": 50}, {"trace": "b", "count()": 600}]},
{"a"},
{"a": 50},
),
# Empty result
({"data": []}, set()),
({"data": []}, {}),
],
)
@patch("sentry.tasks.llm_issue_detection.trace_data.Spans.run_table_query")
def test_filters_by_span_count(
self, mock_spans_query: Mock, query_result: dict, expected: set
self, mock_spans_query: Mock, query_result: dict, expected: dict[str, int]
) -> None:
mock_spans_query.return_value = query_result
mock_snuba_params = Mock()
Expand All @@ -363,7 +363,7 @@ def setUp(self) -> None:
@patch("sentry.tasks.llm_issue_detection.trace_data.get_valid_trace_ids_by_span_count")
def test_returns_deduped_transaction_traces(self, mock_span_count) -> None:
# Mock span count check to return all traces as valid
mock_span_count.side_effect = lambda trace_ids, *args: set(trace_ids)
mock_span_count.side_effect = lambda trace_ids, *args: {tid: 50 for tid in trace_ids}

trace_id_1 = uuid.uuid4().hex
span1 = self.create_span(
Expand Down
Loading