Skip to content

Commit 627da95

Browse files
committed
feat: implement query_metrics
query_metrics currently has no implementation, meaning once a metric is emitted there is no way in llama stack to query it from the store. implement query_metrics for the meta_reference provider which follows a similar style to `query_traces`, using the trace_store to format an SQL query and execute it in this case the parameters for the query are `metric.METRIC_NAME, start_time, and end_time`. this required client side changes since the client had no `query_metrics` or any associated resources, so any tests here will fail but I will provider manual execution logs for the new tests I am adding order the metrics by timestamp. Additionally add `unit` to the `MetricDataPoint` class since this adds much more context to the metric being queried. these metrics can also be aggregated via a `granularity` parameter. This was pre-defined as a string like: `1m, 1h, 1d` where metrics occuring in same timespan specified are aggregated together. Signed-off-by: Charlie Doern <[email protected]>
1 parent 9b70bb9 commit 627da95

File tree

5 files changed

+245
-4
lines changed

5 files changed

+245
-4
lines changed

docs/_static/llama-stack-spec.html

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15846,12 +15846,16 @@
1584615846
"value": {
1584715847
"type": "number",
1584815848
"description": "The numeric value of the metric at this timestamp"
15849+
},
15850+
"unit": {
15851+
"type": "string"
1584915852
}
1585015853
},
1585115854
"additionalProperties": false,
1585215855
"required": [
1585315856
"timestamp",
15854-
"value"
15857+
"value",
15858+
"unit"
1585515859
],
1585615860
"title": "MetricDataPoint",
1585715861
"description": "A single data point in a metric time series."

docs/_static/llama-stack-spec.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11774,10 +11774,13 @@ components:
1177411774
type: number
1177511775
description: >-
1177611776
The numeric value of the metric at this timestamp
11777+
unit:
11778+
type: string
1177711779
additionalProperties: false
1177811780
required:
1177911781
- timestamp
1178011782
- value
11783+
- unit
1178111784
title: MetricDataPoint
1178211785
description: >-
1178311786
A single data point in a metric time series.

llama_stack/apis/telemetry/telemetry.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,7 @@ class MetricDataPoint(BaseModel):
386386

387387
timestamp: int
388388
value: float
389+
unit: str
389390

390391

391392
@json_schema_type

llama_stack/providers/inline/telemetry/meta_reference/telemetry.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
# This source code is licensed under the terms described in the LICENSE file in
55
# the root directory of this source tree.
66

7+
import datetime
78
import logging
89
import threading
910
from typing import Any
@@ -149,7 +150,36 @@ async def query_metrics(
149150
query_type: MetricQueryType = MetricQueryType.RANGE,
150151
label_matchers: list[MetricLabelMatcher] | None = None,
151152
) -> QueryMetricsResponse:
152-
raise NotImplementedError("Querying metrics is not implemented")
153+
"""Query metrics from the telemetry store.
154+
155+
Args:
156+
metric_name: The name of the metric to query (e.g., "prompt_tokens")
157+
start_time: Start time as Unix timestamp
158+
end_time: End time as Unix timestamp (defaults to now if None)
159+
granularity: Time granularity for aggregation
160+
query_type: Type of query (RANGE or INSTANT)
161+
label_matchers: Label filters to apply
162+
163+
Returns:
164+
QueryMetricsResponse with metric time series data
165+
"""
166+
# Convert timestamps to datetime objects
167+
start_dt = datetime.datetime.fromtimestamp(start_time, datetime.UTC)
168+
end_dt = datetime.datetime.fromtimestamp(end_time, datetime.UTC) if end_time else None
169+
170+
# Use SQLite trace store if available
171+
if hasattr(self, "trace_store") and self.trace_store:
172+
return await self.trace_store.query_metrics(
173+
metric_name=metric_name,
174+
start_time=start_dt,
175+
end_time=end_dt,
176+
granularity=granularity,
177+
query_type=query_type,
178+
label_matchers=label_matchers,
179+
)
180+
181+
# Fallback to empty response if no trace store
182+
return QueryMetricsResponse(data=[])
153183

154184
def _log_unstructured(self, event: UnstructuredLogEvent, ttl_seconds: int) -> None:
155185
with self._lock:

llama_stack/providers/utils/telemetry/sqlite_trace_store.py

Lines changed: 205 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,23 @@
55
# the root directory of this source tree.
66

77
import json
8-
from datetime import datetime
8+
from datetime import UTC, datetime
99
from typing import Protocol
1010

1111
import aiosqlite
1212

13-
from llama_stack.apis.telemetry import QueryCondition, Span, SpanWithStatus, Trace
13+
from llama_stack.apis.telemetry import (
14+
MetricDataPoint,
15+
MetricLabel,
16+
MetricLabelMatcher,
17+
MetricQueryType,
18+
MetricSeries,
19+
QueryCondition,
20+
QueryMetricsResponse,
21+
Span,
22+
SpanWithStatus,
23+
Trace,
24+
)
1425

1526

1627
class TraceStore(Protocol):
@@ -29,11 +40,203 @@ async def get_span_tree(
2940
max_depth: int | None = None,
3041
) -> dict[str, SpanWithStatus]: ...
3142

43+
async def query_metrics(
44+
self,
45+
metric_name: str,
46+
start_time: datetime,
47+
end_time: datetime | None = None,
48+
granularity: str | None = "1d",
49+
query_type: MetricQueryType = MetricQueryType.RANGE,
50+
label_matchers: list[MetricLabelMatcher] | None = None,
51+
) -> QueryMetricsResponse: ...
52+
3253

3354
class SQLiteTraceStore(TraceStore):
3455
def __init__(self, conn_string: str):
3556
self.conn_string = conn_string
3657

58+
async def query_metrics(
59+
self,
60+
metric_name: str,
61+
start_time: datetime,
62+
end_time: datetime | None = None,
63+
granularity: str | None = None,
64+
query_type: MetricQueryType = MetricQueryType.RANGE,
65+
label_matchers: list[MetricLabelMatcher] | None = None,
66+
) -> QueryMetricsResponse:
67+
"""Query metrics from span events stored in SQLite.
68+
Args:
69+
metric_name: The name of the metric to query (e.g., "prompt_tokens")
70+
start_time: Start time for the query range
71+
end_time: End time for the query range (defaults to now if None)
72+
granularity: Time granularity for aggregation (e.g., "1m", "5m", "1h", "1d")
73+
query_type: Type of query (RANGE or INSTANT)
74+
label_matchers: Label filters to apply
75+
Returns:
76+
QueryMetricsResponse with metric time series data
77+
"""
78+
if end_time is None:
79+
end_time = datetime.now(UTC)
80+
81+
# Build the base query with aggregation
82+
if query_type == MetricQueryType.INSTANT:
83+
# For instant queries, aggregate all data into a single point
84+
query = """
85+
SELECT
86+
se.name,
87+
SUM(CAST(json_extract(se.attributes, '$.value') AS REAL)) as value,
88+
json_extract(se.attributes, '$.unit') as unit,
89+
se.attributes
90+
FROM span_events se
91+
WHERE se.name = ?
92+
AND se.timestamp BETWEEN ? AND ?
93+
"""
94+
else:
95+
# For range queries, aggregate by time buckets based on granularity
96+
if granularity:
97+
time_format = self._get_time_format_for_granularity(granularity)
98+
99+
query = f"""
100+
SELECT
101+
se.name,
102+
SUM(CAST(json_extract(se.attributes, '$.value') AS REAL)) as value,
103+
json_extract(se.attributes, '$.unit') as unit,
104+
se.attributes,
105+
strftime({time_format}, se.timestamp) as bucket_start
106+
FROM span_events se
107+
WHERE se.name = ?
108+
AND se.timestamp BETWEEN ? AND ?
109+
"""
110+
else:
111+
# For no granularity (None), return individual data points
112+
query = """
113+
SELECT
114+
se.name,
115+
json_extract(se.attributes, '$.value') as value,
116+
json_extract(se.attributes, '$.unit') as unit,
117+
se.attributes,
118+
se.timestamp
119+
FROM span_events se
120+
WHERE se.name = ?
121+
AND se.timestamp BETWEEN ? AND ?
122+
"""
123+
124+
params = [f"metric.{metric_name}", start_time.isoformat(), end_time.isoformat()]
125+
126+
# Add label matchers if provided
127+
if label_matchers:
128+
for matcher in label_matchers:
129+
if matcher.operator == "=":
130+
query += f" AND json_extract(se.attributes, '$.{matcher.name}') = ?"
131+
params.append(matcher.value)
132+
elif matcher.operator == "!=":
133+
query += f" AND json_extract(se.attributes, '$.{matcher.name}') != ?"
134+
params.append(matcher.value)
135+
elif matcher.operator == "=~":
136+
query += f" AND json_extract(se.attributes, '$.{matcher.name}') LIKE ?"
137+
params.append(f"%{matcher.value}%")
138+
elif matcher.operator == "!~":
139+
query += f" AND json_extract(se.attributes, '$.{matcher.name}') NOT LIKE ?"
140+
params.append(f"%{matcher.value}%")
141+
142+
if query_type == MetricQueryType.RANGE and granularity:
143+
group_time_format = self._get_time_format_for_granularity(granularity)
144+
query += f" GROUP BY strftime({group_time_format}, se.timestamp), json_extract(se.attributes, '$.unit')"
145+
query += " ORDER BY bucket_start"
146+
elif query_type == MetricQueryType.INSTANT:
147+
query += " GROUP BY json_extract(se.attributes, '$.unit')"
148+
else:
149+
# For range queries without granularity (no aggregation)
150+
query += " ORDER BY se.timestamp"
151+
152+
# Execute query
153+
async with aiosqlite.connect(self.conn_string) as conn:
154+
conn.row_factory = aiosqlite.Row
155+
async with conn.execute(query, params) as cursor:
156+
rows = await cursor.fetchall()
157+
158+
if not rows:
159+
return QueryMetricsResponse(data=[])
160+
161+
# Parse metric data
162+
data_points = []
163+
labels: list[MetricLabel] = []
164+
165+
for row in rows:
166+
# Parse JSON attributes
167+
attributes = json.loads(row["attributes"])
168+
169+
# Extract metric value and unit
170+
value = row["value"]
171+
unit = row["unit"] or ""
172+
173+
# Extract labels from attributes
174+
metric_labels = []
175+
for key, val in attributes.items():
176+
if key not in ["value", "unit"]:
177+
metric_labels.append(MetricLabel(name=key, value=str(val)))
178+
179+
# Create data point
180+
if query_type == MetricQueryType.RANGE and granularity:
181+
# Parse bucket start time for aggregated range queries
182+
try:
183+
bucket_start_raw = row["bucket_start"]
184+
if bucket_start_raw is not None:
185+
bucket_start = datetime.fromisoformat(bucket_start_raw)
186+
else:
187+
# Error out if bucket_start is None
188+
raise ValueError("bucket_start is None - this indicates a query configuration error")
189+
except KeyError as e:
190+
# Error out if bucket_start column doesn't exist in the result
191+
raise ValueError(
192+
"bucket_start column not found in query result when trying to use granularity. Timestamps in the database might be mis-formatted"
193+
) from e
194+
timestamp = int(bucket_start.timestamp())
195+
elif query_type == MetricQueryType.INSTANT:
196+
# Use current time for instant queries
197+
timestamp = int(datetime.now(UTC).timestamp())
198+
else:
199+
# Use original timestamp for non-aggregated queries
200+
# Parse timestamp from database
201+
timestamp_raw = row["timestamp"]
202+
if timestamp_raw is not None:
203+
timestamp_iso = datetime.fromisoformat(timestamp_raw)
204+
else:
205+
raise ValueError("timestamp is None - this indicates a data integrity issue")
206+
timestamp = int(timestamp_iso.timestamp())
207+
208+
data_points.append(
209+
MetricDataPoint(
210+
timestamp=timestamp,
211+
value=value,
212+
unit=unit,
213+
)
214+
)
215+
216+
# Create metric series
217+
metric_series = [MetricSeries(metric=metric_name, labels=labels, values=data_points)]
218+
219+
return QueryMetricsResponse(data=metric_series)
220+
221+
def _get_time_format_for_granularity(self, granularity: str | None) -> str:
222+
"""Get the SQLite strftime format string for a given granularity.
223+
Args:
224+
granularity: Granularity string (e.g., "1m", "5m", "1h", "1d")
225+
Returns:
226+
SQLite strftime format string for the granularity
227+
"""
228+
if granularity is None:
229+
raise ValueError("granularity cannot be None for this method - use separate logic for no aggregation")
230+
231+
if granularity.endswith("d"):
232+
return "'%Y-%m-%d 00:00:00'"
233+
elif granularity.endswith("h"):
234+
return "'%Y-%m-%d %H:00:00'"
235+
elif granularity.endswith("m"):
236+
return "'%Y-%m-%d %H:%M:00'"
237+
else:
238+
return "'%Y-%m-%d %H:00:00'" # Default to hour-level
239+
37240
async def query_traces(
38241
self,
39242
attribute_filters: list[QueryCondition] | None = None,

0 commit comments

Comments
 (0)