Skip to content

Commit 475d23d

Browse files
authored
Fix Azure SQL schema query selection (DataDog#23533)
1 parent 6451848 commit 475d23d

6 files changed

Lines changed: 243 additions & 22 deletions

File tree

sqlserver/changelog.d/23533.fixed

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix Azure SQL Database and Azure SQL Managed Instance schema collection by using database compatibility level for schema query selection.

sqlserver/datadog_checks/sqlserver/queries.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
# `{}` is replaced with comma-separated ODBC `?` placeholders (values bound via parameters).
88
DB_QUERY = """
99
SELECT
10-
db.database_id AS id, db.name AS name, db.collation_name AS collation, dp.name AS owner
10+
db.database_id AS id, db.name AS name, db.collation_name AS collation, dp.name AS owner,
11+
db.compatibility_level AS compatibility_level
1112
FROM
1213
sys.databases db LEFT JOIN sys.database_principals dp ON db.owner_sid = dp.sid
1314
WHERE db.name IN ({});

sqlserver/datadog_checks/sqlserver/schemas.py

Lines changed: 48 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,15 @@
88
from typing import TYPE_CHECKING, TypedDict
99

1010
from datadog_checks.base.utils.serialization import json
11-
from datadog_checks.sqlserver.utils import construct_use_statement, execute_query
11+
from datadog_checks.sqlserver.utils import construct_use_statement, execute_query, is_azure_database
1212

1313
if TYPE_CHECKING:
1414
from datadog_checks.sqlserver import SQLServer
1515

1616
from datadog_checks.base.utils.db.schemas import SchemaCollector, SchemaCollectorConfig
1717
from datadog_checks.sqlserver.const import (
1818
DEFAULT_SCHEMAS_COLLECTION_INTERVAL,
19+
STATIC_INFO_ENGINE_EDITION,
1920
STATIC_INFO_MAJOR_VERSION,
2021
)
2122
from datadog_checks.sqlserver.queries import (
@@ -32,13 +33,16 @@
3233

3334
KEY_PREFIX = "dbm-schemas-"
3435
KEY_PREFIX_PRE_2017 = "dbm-schemas-pre-2017"
36+
# The modern schema query uses database-scoped JSON output, which requires SQL Server 2016 compatibility.
37+
MINIMUM_JSON_COMPATIBILITY_LEVEL = 130
3538

3639

3740
class DatabaseInfo(TypedDict):
3841
name: str
3942
id: str
4043
collation: str
4144
owner: str
45+
compatibility_level: str
4246

4347

4448
# The schema collector sends lists of DatabaseObjects to the agent
@@ -81,41 +85,73 @@ def __init__(self, check: SQLServer):
8185
)
8286
config.max_tables = check._config.schema_config.get('max_tables', 300)
8387
self._is_2016_or_earlier = None
88+
self._database_compatibility_levels: dict[str, int] = {}
8489
super().__init__(check, config)
8590

86-
def collect_schemas(self):
87-
# We wait until collect is called to check for static information
88-
major_version = int(self._check.static_info_cache.get(STATIC_INFO_MAJOR_VERSION) or 0)
89-
if major_version == 0:
90-
self._check.log.debug("major_version is not available yet, defaulting to 2016 or earlier")
91-
self._is_2016_or_earlier = major_version <= 13
92-
93-
super().collect_schemas()
94-
9591
@property
9692
def kind(self):
9793
return "sqlserver_databases"
9894

99-
def _get_databases(self):
95+
def _get_databases(self) -> list[DatabaseInfo]:
10096
database_names = self._check.get_databases()
10197
with self._check.connection.open_managed_default_connection(KEY_PREFIX):
10298
with self._check.connection.get_managed_cursor(KEY_PREFIX) as cursor:
10399
if not database_names:
104100
return []
105101
placeholders = ",".join(["?"] * len(database_names))
106102
query = DB_QUERY.format(placeholders)
107-
return execute_query(query, cursor, convert_results_to_str=True, parameters=tuple(database_names))
103+
databases = execute_query(query, cursor, convert_results_to_str=True, parameters=tuple(database_names))
104+
self._record_database_compatibility_levels(databases)
105+
return databases
108106

109107
@contextlib.contextmanager
110108
def _get_cursor(self, database_name):
111109
with self._check.connection.open_managed_default_connection(KEY_PREFIX):
112110
with self._check.connection.get_managed_cursor(KEY_PREFIX) as cursor:
113111
switch_db_statement = construct_use_statement(database_name)
114112
cursor.execute(switch_db_statement)
113+
self._is_2016_or_earlier = self._should_use_legacy_schema_query(database_name)
115114
query = self._get_tables_query()
116115
cursor.execute(query)
117116
yield cursor
118117

118+
def _record_database_compatibility_levels(self, databases: list[DatabaseInfo]) -> None:
119+
self._database_compatibility_levels = {}
120+
for database in databases:
121+
self._database_compatibility_levels[database["name"]] = int(database["compatibility_level"])
122+
123+
def _should_use_legacy_schema_query(self, database_name: str) -> bool:
124+
"""
125+
Return whether the current database needs the legacy schema query.
126+
127+
The modern schema query depends on two SQL Server features:
128+
- STRING_AGG, used to build index column lists. On self-managed SQL Server, this requires SQL Server 2017
129+
or later. Azure SQL Database and Azure SQL Managed Instance report ProductMajorVersion 12 while still
130+
supporting STRING_AGG, so only self-managed SQL Server uses this version gate.
131+
- JSON output, used for column, index, and foreign key metadata. This is controlled by each database's
132+
compatibility_level and requires level 130 or higher.
133+
134+
If ProductMajorVersion is missing, use the legacy query because we cannot confirm that STRING_AGG is
135+
available.
136+
"""
137+
engine_edition = self._check.static_info_cache.get(STATIC_INFO_ENGINE_EDITION)
138+
if not is_azure_database(engine_edition):
139+
major_version = int(self._check.static_info_cache.get(STATIC_INFO_MAJOR_VERSION) or 0)
140+
if major_version == 0:
141+
self._check.log.debug("major_version is not available yet, using legacy schema query")
142+
return True
143+
if major_version <= 13:
144+
return True
145+
146+
compatibility_level = self._database_compatibility_levels.get(database_name)
147+
if compatibility_level is None:
148+
self._check.log.debug(
149+
"compatibility_level is not available for SQL Server database %s, using pre-2017 schema query",
150+
database_name,
151+
)
152+
return True
153+
return compatibility_level < MINIMUM_JSON_COMPATIBILITY_LEVEL
154+
119155
def _get_tables_query(self):
120156
limit = int(self._config.max_tables or 1_000_000)
121157

sqlserver/tests/test_metadata.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,11 @@
2121
pyodbc = None
2222

2323

24+
def normalize_compatibility_level(actual_payload):
25+
assert actual_payload['compatibility_level'].isdigit()
26+
actual_payload['compatibility_level'] = 'normalized_value'
27+
28+
2429
@pytest.fixture
2530
def dbm_instance(instance_docker):
2631
instance_docker['dbm'] = True
@@ -101,6 +106,7 @@ def test_collect_schemas(aggregator, dd_run_check, dbm_instance):
101106
'name': 'datadog_test_schemas_second',
102107
"collation": "SQL_Latin1_General_CP1_CI_AS",
103108
'owner': 'dbo',
109+
'compatibility_level': 'normalized_value',
104110
'schemas': [
105111
{
106112
'name': 'dbo',
@@ -147,6 +153,7 @@ def test_collect_schemas(aggregator, dd_run_check, dbm_instance):
147153
'name': 'datadog_test_schemas',
148154
"collation": "SQL_Latin1_General_CP1_CI_AS",
149155
'owner': 'dbo',
156+
'compatibility_level': 'normalized_value',
150157
'schemas': [
151158
{
152159
'name': 'test_schema',
@@ -373,6 +380,8 @@ def test_collect_schemas(aggregator, dd_run_check, dbm_instance):
373380
assert db_name in databases_to_find
374381
# id's are env dependant
375382
normalize_ids(actual_payload)
383+
# compatibility_level varies by SQL Server version
384+
normalize_compatibility_level(actual_payload)
376385
# index columns may be in any order
377386
normalize_indexes_columns(actual_payload)
378387
matches = deep_compare(actual_payload, expected_data_for_db[db_name])

sqlserver/tests/test_schemas.py

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,12 @@
1515

1616
pytestmark = [pytest.mark.integration, pytest.mark.usefixtures('dd_environment')]
1717

18+
SCHEMA_DATABASE = 'datadog_test_schemas'
19+
1820

1921
@pytest.fixture
2022
def dbm_instance(instance_docker):
23+
instance_docker['database'] = SCHEMA_DATABASE
2124
instance_docker['dbm'] = True
2225
instance_docker['min_collection_interval'] = 0.1
2326
instance_docker['query_samples'] = {'enabled': False}
@@ -47,11 +50,17 @@ def _check(instance: dict, init_config: dict = None):
4750
c.cancel()
4851

4952

53+
def create_schema_collector(check: SQLServer) -> SQLServerSchemaCollector:
54+
collector = SQLServerSchemaCollector(check)
55+
collector._get_databases()
56+
return collector
57+
58+
5059
def test_get_cursor(dbm_instance, integration_check):
5160
check = integration_check(dbm_instance)
52-
collector = SQLServerSchemaCollector(check)
61+
collector = create_schema_collector(check)
5362

54-
with collector._get_cursor('datadog_test_schemas') as cursor:
63+
with collector._get_cursor(SCHEMA_DATABASE) as cursor:
5564
assert cursor is not None
5665
schemas = []
5766
rows = cursor.fetchall_dict()
@@ -65,9 +74,9 @@ def test_get_cursor(dbm_instance, integration_check):
6574

6675
def test_tables(dbm_instance, integration_check):
6776
check = integration_check(dbm_instance)
68-
collector = SQLServerSchemaCollector(check)
77+
collector = create_schema_collector(check)
6978

70-
with collector._get_cursor('datadog_test_schemas') as cursor:
79+
with collector._get_cursor(SCHEMA_DATABASE) as cursor:
7180
assert cursor is not None
7281
tables = []
7382
rows = cursor.fetchall_dict()
@@ -80,13 +89,12 @@ def test_tables(dbm_instance, integration_check):
8089

8190
def test_columns(dbm_instance, integration_check):
8291
check = integration_check(dbm_instance)
83-
collector = SQLServerSchemaCollector(check)
92+
collector = create_schema_collector(check)
8493

85-
with collector._get_cursor('datadog_test_schemas') as cursor:
94+
with collector._get_cursor(SCHEMA_DATABASE) as cursor:
8695
assert cursor is not None
8796
# Assert that at least one row has columns
8897
rows = cursor.fetchall_dict()
89-
print(rows)
9098
assert any(row['columns'] for row in rows)
9199
for row in rows:
92100
if row['columns']:
@@ -101,9 +109,9 @@ def test_columns(dbm_instance, integration_check):
101109

102110
def test_indexes(dbm_instance, integration_check):
103111
check = integration_check(dbm_instance)
104-
collector = SQLServerSchemaCollector(check)
112+
collector = create_schema_collector(check)
105113

106-
with collector._get_cursor('datadog_test_schemas') as cursor:
114+
with collector._get_cursor(SCHEMA_DATABASE) as cursor:
107115
assert cursor is not None
108116
# Assert that at least one row has indexes
109117
rows = cursor.fetchall_dict()

0 commit comments

Comments
 (0)