Skip to content

Commit 5150864

Browse files
authored
[sqlserver] Emit raw query statements and plans for non prepared statements (#19421)
* emit raw statement text * fix import * emit raw statement event in activity sampling * add raw_query_statement tag * add db instance * emit raw plan events * fix tags * add changelog * conditionally emit raw plan event * lint * fix lint * update config description
1 parent 9c123b9 commit 5150864

File tree

13 files changed

+274
-19
lines changed

13 files changed

+274
-19
lines changed

sqlserver/assets/configuration/spec.yaml

+16
Original file line numberDiff line numberDiff line change
@@ -850,6 +850,22 @@ files:
850850
type: boolean
851851
example: false
852852
display_default: false
853+
- name: collect_raw_query_statement
854+
hidden: true
855+
description: |
856+
Configure the collection of raw query statements in query activity and execution plans.
857+
Raw query statements and execution plans may contain sensitive information in query text.
858+
Enabling this option will allow the collection and ingestion of raw query statements and
859+
execution plans into Datadog. This option is disabled by default.
860+
Note: This option only applies when `dbm` is enabled.
861+
options:
862+
- name: enabled
863+
description: |
864+
Set to `true` to collect the raw query statements.
865+
value:
866+
type: boolean
867+
example: false
868+
display_default: false
853869
- name: log_unobfuscated_queries
854870
hidden: true
855871
description: |

sqlserver/changelog.d/19421.added

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add support for collecting raw query statements and explain plans when `collect_raw_query_statement.enabled` is true.

sqlserver/datadog_checks/sqlserver/activity.py

+86-3
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,12 @@
99

1010
from datadog_checks.base import is_affirmative
1111
from datadog_checks.base.utils.db.sql import compute_sql_signature
12-
from datadog_checks.base.utils.db.utils import DBMAsyncJob, default_json_event_encoding, obfuscate_sql_with_metadata
12+
from datadog_checks.base.utils.db.utils import (
13+
DBMAsyncJob,
14+
RateLimitingTTLCache,
15+
default_json_event_encoding,
16+
obfuscate_sql_with_metadata,
17+
)
1318
from datadog_checks.base.utils.serialization import json
1419
from datadog_checks.base.utils.tracking import tracked_method
1520
from datadog_checks.sqlserver.config import SQLServerConfig
@@ -70,13 +75,15 @@
7075
sess.host_name as host_name,
7176
sess.program_name as program_name,
7277
sess.is_user_process as is_user_process,
78+
{input_buffer_columns}
7379
{exec_request_columns}
7480
FROM sys.dm_exec_sessions sess
7581
INNER JOIN sys.dm_exec_connections c
7682
ON sess.session_id = c.session_id
7783
INNER JOIN sys.dm_exec_requests req
7884
ON c.connection_id = req.connection_id
7985
CROSS APPLY sys.dm_exec_sql_text(req.sql_handle) qt
86+
{input_buffer_join}
8087
WHERE
8188
sess.session_id != @@spid AND
8289
sess.status != 'sleeping'
@@ -153,6 +160,12 @@
153160
"context_info",
154161
]
155162

163+
INPUT_BUFFER_COLUMNS = [
164+
"input_buffer.event_info as raw_statement",
165+
]
166+
167+
INPUT_BUFFER_JOIN = "OUTER APPLY sys.dm_exec_input_buffer(req.session_id, req.request_id) input_buffer"
168+
156169

157170
def _hash_to_hex(hash) -> str:
158171
return binascii.hexlify(hash).decode("utf-8")
@@ -195,6 +208,12 @@ def __init__(self, check, config: SQLServerConfig):
195208
)
196209
self._time_since_last_activity_event = 0
197210

211+
self._collect_raw_query_statement = self._config.collect_raw_query_statement.get("enabled", False)
212+
self._raw_statement_text_cache = RateLimitingTTLCache(
213+
maxsize=self._config.collect_raw_query_statement["cache_max_size"],
214+
ttl=60 * 60 / self._config.collect_raw_query_statement["samples_per_hour_per_query"],
215+
)
216+
198217
def _close_db_conn(self):
199218
pass
200219

@@ -245,12 +264,14 @@ def _append_filter(filter: str) -> str:
245264
return rows
246265

247266
@tracked_method(agent_check_getter=agent_check_getter, track_result_length=True)
248-
def _get_activity(self, cursor, exec_request_columns):
267+
def _get_activity(self, cursor, exec_request_columns, input_buffer_columns, input_buffer_join):
249268
self.log.debug("collecting sql server activity")
250269
query = ACTIVITY_QUERY.format(
251270
exec_request_columns=', '.join(['req.{}'.format(r) for r in exec_request_columns]),
252271
proc_char_limit=self._config.stored_procedure_characters_limit,
253272
tail_text_size=TAIL_TEXT_SIZE,
273+
input_buffer_columns=input_buffer_columns,
274+
input_buffer_join=input_buffer_join,
254275
)
255276
self.log.debug("Running query [%s]", query)
256277
cursor.execute(query)
@@ -293,13 +314,69 @@ def _normalize_queries_and_filter_rows(self, rows, max_bytes_limit):
293314
normalized_rows.append(row)
294315
return normalized_rows
295316

317+
@tracked_method(agent_check_getter=agent_check_getter)
318+
def _rows_to_raw_statement_events(self, rows):
319+
for row in rows:
320+
query_signature = row.get('query_signature')
321+
if not query_signature:
322+
continue
323+
324+
raw_statement = row.pop("raw_statement", None)
325+
if not raw_statement:
326+
self.log.debug("No raw statement found for query_signature=%s", query_signature)
327+
continue
328+
329+
raw_query_signature = compute_sql_signature(raw_statement)
330+
row["raw_query_signature"] = raw_query_signature
331+
raw_statement_key = (query_signature, raw_query_signature)
332+
333+
if not self._raw_statement_text_cache.acquire(raw_statement_key):
334+
continue
335+
336+
yield {
337+
"timestamp": time.time() * 1000,
338+
"host": self._check.resolved_hostname,
339+
"ddagentversion": datadog_agent.get_version(),
340+
"ddsource": "sqlserver",
341+
"dbm_type": "rqt",
342+
"ddtags": ",".join(self.tags),
343+
'service': self._config.service,
344+
"db": {
345+
"instance": row.get('database_name', None),
346+
"query_signature": query_signature,
347+
"raw_query_signature": raw_query_signature,
348+
"statement": raw_statement,
349+
"metadata": {
350+
"tables": row['dd_tables'],
351+
"commands": row['dd_commands'],
352+
"comments": row.get('dd_comments', None),
353+
},
354+
"procedure_signature": row.get("procedure_signature"),
355+
"procedure_name": row.get("procedure_name"),
356+
},
357+
"sqlserver": {
358+
"query_hash": row.get("query_hash"),
359+
"query_plan_hash": row.get("query_plan_hash"),
360+
},
361+
}
362+
296363
def _get_exec_requests_cols_cached(self, cursor, expected_cols):
297364
if self._exec_requests_cols_cached:
298365
return self._exec_requests_cols_cached
299366

300367
self._exec_requests_cols_cached = self._get_available_requests_columns(cursor, expected_cols)
301368
return self._exec_requests_cols_cached
302369

370+
def _get_input_buffer_columns_and_join(self):
371+
input_buffer_columns = ""
372+
input_buffer_join = ""
373+
374+
if self._collect_raw_query_statement:
375+
input_buffer_columns = ", ".join(INPUT_BUFFER_COLUMNS) + ","
376+
input_buffer_join = INPUT_BUFFER_JOIN
377+
378+
return input_buffer_columns, input_buffer_join
379+
303380
def _get_available_requests_columns(self, cursor, all_expected_columns):
304381
cursor.execute("select TOP 0 * from sys.dm_exec_requests")
305382
all_columns = {i[0] for i in cursor.description}
@@ -418,8 +495,14 @@ def collect_activity(self):
418495
with self._check.connection.get_managed_cursor(key_prefix=self._conn_key_prefix) as cursor:
419496
connections = self._get_active_connections(cursor)
420497
request_cols = self._get_exec_requests_cols_cached(cursor, DM_EXEC_REQUESTS_COLS)
421-
rows = self._get_activity(cursor, request_cols)
498+
input_buffer_columns, input_buffer_join = self._get_input_buffer_columns_and_join()
499+
rows = self._get_activity(cursor, request_cols, input_buffer_columns, input_buffer_join)
422500
normalized_rows = self._normalize_queries_and_filter_rows(rows, MAX_PAYLOAD_BYTES)
501+
if self._collect_raw_query_statement:
502+
for raw_statement_event in self._rows_to_raw_statement_events(normalized_rows):
503+
self._check.database_monitoring_query_sample(
504+
json.dumps(raw_statement_event, default=default_json_event_encoding)
505+
)
423506
event = self._create_activity_event(normalized_rows, connections)
424507
payload = json.dumps(event, default=default_json_event_encoding)
425508
self._check.database_monitoring_query_activity(payload)

sqlserver/datadog_checks/sqlserver/config.py

+16-5
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,6 @@
1818
class SQLServerConfig:
1919
def __init__(self, init_config, instance, log):
2020
self.log = log
21-
self.tags: list[str] = self._build_tags(
22-
custom_tags=instance.get('tags', []),
23-
propagate_agent_tags=self._should_propagate_agent_tags(instance, init_config),
24-
)
2521
self.reported_hostname: str = instance.get('reported_hostname')
2622
self.autodiscovery: bool = is_affirmative(instance.get('database_autodiscovery'))
2723
self.autodiscovery_include: list[str] = instance.get('autodiscovery_include', ['.*']) or ['.*']
@@ -107,13 +103,25 @@ def __init__(self, init_config, instance, log):
107103
}
108104
)
109105
)
106+
collect_raw_query_statement_config: dict = instance.get('collect_raw_query_statement', {}) or {}
107+
self.collect_raw_query_statement = {
108+
"enabled": is_affirmative(collect_raw_query_statement_config.get('enabled', False)),
109+
"cache_max_size": int(collect_raw_query_statement_config.get('cache_max_size', 10000)),
110+
"samples_per_hour_per_query": int(collect_raw_query_statement_config.get('samples_per_hour_per_query', 1)),
111+
}
110112
self.log_unobfuscated_queries: bool = is_affirmative(instance.get('log_unobfuscated_queries', False))
111113
self.log_unobfuscated_plans: bool = is_affirmative(instance.get('log_unobfuscated_plans', False))
112114
self.stored_procedure_characters_limit: int = instance.get('stored_procedure_characters_limit', PROC_CHAR_LIMIT)
113115
self.connection_host: str = instance['host']
114116
self.service = instance.get('service') or init_config.get('service') or ''
115117
self.db_fragmentation_object_names = instance.get('db_fragmentation_object_names', []) or []
116118

119+
self.tags: list[str] = self._build_tags(
120+
custom_tags=instance.get('tags', []),
121+
propagate_agent_tags=self._should_propagate_agent_tags(instance, init_config),
122+
additional_tags=["raw_query_statement:enabled"] if self.collect_raw_query_statement["enabled"] else [],
123+
)
124+
117125
def _compile_valid_patterns(self, patterns: list[str]) -> re.Pattern:
118126
valid_patterns = []
119127

@@ -135,7 +143,7 @@ def _compile_valid_patterns(self, patterns: list[str]) -> re.Pattern:
135143
# create unmatchable regex - https://stackoverflow.com/a/1845097/2157429
136144
return re.compile(r'(?!x)x')
137145

138-
def _build_tags(self, custom_tags, propagate_agent_tags):
146+
def _build_tags(self, custom_tags, propagate_agent_tags, additional_tags):
139147
# Clean up tags in case there was a None entry in the instance
140148
# e.g. if the yaml contains tags: but no actual tags
141149
if custom_tags is None:
@@ -151,6 +159,9 @@ def _build_tags(self, custom_tags, propagate_agent_tags):
151159
raise ConfigurationError(
152160
'propagate_agent_tags enabled but there was an error fetching agent tags {}'.format(e)
153161
)
162+
163+
if additional_tags:
164+
tags.extend(additional_tags)
154165
return tags
155166

156167
@staticmethod

sqlserver/datadog_checks/sqlserver/config_models/instance.py

+9
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,14 @@ class Azure(BaseModel):
4848
fully_qualified_domain_name: Optional[str] = None
4949

5050

51+
class CollectRawQueryStatement(BaseModel):
52+
model_config = ConfigDict(
53+
arbitrary_types_allowed=True,
54+
frozen=True,
55+
)
56+
enabled: Optional[bool] = None
57+
58+
5159
class CollectSettings(BaseModel):
5260
model_config = ConfigDict(
5361
arbitrary_types_allowed=True,
@@ -344,6 +352,7 @@ class InstanceConfig(BaseModel):
344352
autodiscovery_include: Optional[tuple[str, ...]] = None
345353
aws: Optional[Aws] = None
346354
azure: Optional[Azure] = None
355+
collect_raw_query_statement: Optional[CollectRawQueryStatement] = None
347356
collect_settings: Optional[CollectSettings] = None
348357
command_timeout: Optional[int] = None
349358
connection_string: Optional[str] = None

sqlserver/datadog_checks/sqlserver/statements.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# Licensed under a 3-clause BSD style license (see LICENSE)
44

55
import binascii
6+
import copy
67
import math
78
import time
89

@@ -234,6 +235,8 @@ def __init__(self, check, config: SQLServerConfig):
234235
self._last_stats_query_time = None
235236
self._max_query_metrics = self._config.statement_metrics_config.get("max_queries", 250)
236237

238+
self._collect_raw_query_statement = self._config.collect_raw_query_statement.get("enabled", False)
239+
237240
def _init_caches(self):
238241
# full_statement_text_cache: limit the ingestion rate of full statement text events per query_signature
239242
self._full_statement_text_cache = TTLCache(
@@ -585,7 +588,7 @@ def _collect_plans(self, rows, cursor, deadline):
585588
query_signature = None
586589
if 'database_name' in row:
587590
tags += ["db:{}".format(row['database_name'])]
588-
yield {
591+
obfuscated_plan_event = {
589592
"host": self._check.resolved_hostname,
590593
"ddagentversion": datadog_agent.get_version(),
591594
"ddsource": "sqlserver",
@@ -623,3 +626,9 @@ def _collect_plans(self, rows, cursor, deadline):
623626
'total_elapsed_time': row.get('total_elapsed_time', None),
624627
},
625628
}
629+
yield obfuscated_plan_event
630+
if self._collect_raw_query_statement:
631+
raw_plan_event = copy.deepcopy(obfuscated_plan_event)
632+
raw_plan_event["dbm_type"] = "rqp" # raw query plan
633+
raw_plan_event["db"]["plan"]["definition"] = raw_plan
634+
yield raw_plan_event

sqlserver/tests/compose-ha/sql/aoag_primary.sql

+7
Original file line numberDiff line numberDiff line change
@@ -154,8 +154,15 @@ BEGIN
154154
END;
155155
GO
156156

157+
CREATE PROCEDURE fredProcParams @Name nvarchar(8) = NULL AS
158+
BEGIN
159+
SELECT * FROM ϑings WHERE name like @Name;
160+
END;
161+
GO
162+
157163
GRANT EXECUTE on bobProcParams to bob;
158164
GRANT EXECUTE on bobProc to bob;
165+
GRANT EXECUTE on fredProcParams to fred;
159166
GRANT EXECUTE on bobProc to fred;
160167
GO
161168

sqlserver/tests/compose-high-cardinality-windows/setup.sql

+7
Original file line numberDiff line numberDiff line change
@@ -151,8 +151,15 @@ BEGIN
151151
END;
152152
GO
153153

154+
CREATE PROCEDURE fredProcParams @Name nvarchar(8) = NULL AS
155+
BEGIN
156+
SELECT * FROM ϑings WHERE name like @Name;
157+
END;
158+
GO
159+
154160
GRANT EXECUTE on bobProcParams to bob;
155161
GRANT EXECUTE on bobProc to bob;
162+
GRANT EXECUTE on fredProcParams to fred;
156163
GRANT EXECUTE on bobProc to fred;
157164
GO
158165

sqlserver/tests/compose-high-cardinality/setup.sql

+7
Original file line numberDiff line numberDiff line change
@@ -223,8 +223,15 @@ BEGIN
223223
END;
224224
GO
225225

226+
CREATE PROCEDURE fredProcParams @Name nvarchar(8) = NULL AS
227+
BEGIN
228+
SELECT * FROM ϑings WHERE name like @Name;
229+
END;
230+
GO
231+
226232
GRANT EXECUTE on bobProcParams to bob;
227233
GRANT EXECUTE on bobProc to bob;
234+
GRANT EXECUTE on fredProcParams to fred;
228235
GRANT EXECUTE on bobProc to fred;
229236
GO
230237

sqlserver/tests/compose-windows/setup.sql

+8
Original file line numberDiff line numberDiff line change
@@ -152,8 +152,16 @@ BEGIN
152152
SELECT id FROM ϑings WHERE name = @P2;
153153
END;
154154
GO
155+
156+
CREATE PROCEDURE fredProcParams @Name nvarchar(8) = NULL AS
157+
BEGIN
158+
SELECT * FROM ϑings WHERE name like @Name;
159+
END;
160+
GO
161+
155162
GRANT EXECUTE on bobProcParams to bob;
156163
GRANT EXECUTE on bobProc to bob;
164+
GRANT EXECUTE on fredProcParams to fred;
157165
GRANT EXECUTE on bobProc to fred;
158166
GO
159167

sqlserver/tests/compose/setup.sql

+7
Original file line numberDiff line numberDiff line change
@@ -137,8 +137,15 @@ BEGIN
137137
END;
138138
GO
139139

140+
CREATE PROCEDURE fredProcParams @Name nvarchar(8) = NULL AS
141+
BEGIN
142+
SELECT * FROM ϑings WHERE name like @Name;
143+
END;
144+
GO
145+
140146
GRANT EXECUTE on bobProcParams to bob;
141147
GRANT EXECUTE on bobProc to bob;
148+
GRANT EXECUTE on fredProcParams to fred;
142149
GRANT EXECUTE on bobProc to fred;
143150
GO
144151

0 commit comments

Comments
 (0)