Fix Azure SQL schema query selection (DataDog#23533)

lu-zhengda · web-flow · commit 475d23dc0d45 · 2026-05-01T20:31:59.000Z
diff --git a/sqlserver/changelog.d/23533.fixed b/sqlserver/changelog.d/23533.fixed
@@ -0,0 +1 @@
+Fix Azure SQL Database and Azure SQL Managed Instance schema collection by using database compatibility level for schema query selection.
diff --git a/sqlserver/datadog_checks/sqlserver/queries.py b/sqlserver/datadog_checks/sqlserver/queries.py
@@ -7,7 +7,8 @@
 # `{}` is replaced with comma-separated ODBC `?` placeholders (values bound via parameters).
 DB_QUERY = """
 SELECT
-    db.database_id AS id, db.name AS name, db.collation_name AS collation, dp.name AS owner
+    db.database_id AS id, db.name AS name, db.collation_name AS collation, dp.name AS owner,
+    db.compatibility_level AS compatibility_level
 FROM
     sys.databases db LEFT JOIN sys.database_principals dp ON db.owner_sid = dp.sid
 WHERE db.name IN ({});
diff --git a/sqlserver/datadog_checks/sqlserver/schemas.py b/sqlserver/datadog_checks/sqlserver/schemas.py
@@ -8,14 +8,15 @@
 from typing import TYPE_CHECKING, TypedDict
 
 from datadog_checks.base.utils.serialization import json
-from datadog_checks.sqlserver.utils import construct_use_statement, execute_query
+from datadog_checks.sqlserver.utils import construct_use_statement, execute_query, is_azure_database
 
 if TYPE_CHECKING:
     from datadog_checks.sqlserver import SQLServer
 
 from datadog_checks.base.utils.db.schemas import SchemaCollector, SchemaCollectorConfig
 from datadog_checks.sqlserver.const import (
     DEFAULT_SCHEMAS_COLLECTION_INTERVAL,
+    STATIC_INFO_ENGINE_EDITION,
     STATIC_INFO_MAJOR_VERSION,
 )
 from datadog_checks.sqlserver.queries import (
@@ -32,13 +33,16 @@
 
 KEY_PREFIX = "dbm-schemas-"
 KEY_PREFIX_PRE_2017 = "dbm-schemas-pre-2017"
+# The modern schema query uses database-scoped JSON output, which requires SQL Server 2016 compatibility.
+MINIMUM_JSON_COMPATIBILITY_LEVEL = 130
 
 
 class DatabaseInfo(TypedDict):
     name: str
     id: str
     collation: str
     owner: str
+    compatibility_level: str
 
 
 # The schema collector sends lists of DatabaseObjects to the agent
@@ -81,41 +85,73 @@ def __init__(self, check: SQLServer):
         )
         config.max_tables = check._config.schema_config.get('max_tables', 300)
         self._is_2016_or_earlier = None
+        self._database_compatibility_levels: dict[str, int] = {}
         super().__init__(check, config)
 
-    def collect_schemas(self):
-        # We wait until collect is called to check for static information
-        major_version = int(self._check.static_info_cache.get(STATIC_INFO_MAJOR_VERSION) or 0)
-        if major_version == 0:
-            self._check.log.debug("major_version is not available yet, defaulting to 2016 or earlier")
-        self._is_2016_or_earlier = major_version <= 13
-
-        super().collect_schemas()
-
     @property
     def kind(self):
         return "sqlserver_databases"
 
-    def _get_databases(self):
+    def _get_databases(self) -> list[DatabaseInfo]:
         database_names = self._check.get_databases()
         with self._check.connection.open_managed_default_connection(KEY_PREFIX):
             with self._check.connection.get_managed_cursor(KEY_PREFIX) as cursor:
                 if not database_names:
                     return []
                 placeholders = ",".join(["?"] * len(database_names))
                 query = DB_QUERY.format(placeholders)
-                return execute_query(query, cursor, convert_results_to_str=True, parameters=tuple(database_names))
+                databases = execute_query(query, cursor, convert_results_to_str=True, parameters=tuple(database_names))
+                self._record_database_compatibility_levels(databases)
+                return databases
 
     @contextlib.contextmanager
     def _get_cursor(self, database_name):
         with self._check.connection.open_managed_default_connection(KEY_PREFIX):
             with self._check.connection.get_managed_cursor(KEY_PREFIX) as cursor:
                 switch_db_statement = construct_use_statement(database_name)
                 cursor.execute(switch_db_statement)
+                self._is_2016_or_earlier = self._should_use_legacy_schema_query(database_name)
                 query = self._get_tables_query()
                 cursor.execute(query)
                 yield cursor
 
+    def _record_database_compatibility_levels(self, databases: list[DatabaseInfo]) -> None:
+        self._database_compatibility_levels = {}
+        for database in databases:
+            self._database_compatibility_levels[database["name"]] = int(database["compatibility_level"])
+
+    def _should_use_legacy_schema_query(self, database_name: str) -> bool:
+        """
+        Return whether the current database needs the legacy schema query.
+
+        The modern schema query depends on two SQL Server features:
+        - STRING_AGG, used to build index column lists. On self-managed SQL Server, this requires SQL Server 2017
+          or later. Azure SQL Database and Azure SQL Managed Instance report ProductMajorVersion 12 while still
+          supporting STRING_AGG, so only self-managed SQL Server uses this version gate.
+        - JSON output, used for column, index, and foreign key metadata. This is controlled by each database's
+          compatibility_level and requires level 130 or higher.
+
+        If ProductMajorVersion is missing, use the legacy query because we cannot confirm that STRING_AGG is
+        available.
+        """
+        engine_edition = self._check.static_info_cache.get(STATIC_INFO_ENGINE_EDITION)
+        if not is_azure_database(engine_edition):
+            major_version = int(self._check.static_info_cache.get(STATIC_INFO_MAJOR_VERSION) or 0)
+            if major_version == 0:
+                self._check.log.debug("major_version is not available yet, using legacy schema query")
+                return True
+            if major_version <= 13:
+                return True
+
+        compatibility_level = self._database_compatibility_levels.get(database_name)
+        if compatibility_level is None:
+            self._check.log.debug(
+                "compatibility_level is not available for SQL Server database %s, using pre-2017 schema query",
+                database_name,
+            )
+            return True
+        return compatibility_level < MINIMUM_JSON_COMPATIBILITY_LEVEL
+
     def _get_tables_query(self):
         limit = int(self._config.max_tables or 1_000_000)
 
diff --git a/sqlserver/tests/test_metadata.py b/sqlserver/tests/test_metadata.py
@@ -21,6 +21,11 @@
     pyodbc = None
 
 
+def normalize_compatibility_level(actual_payload):
+    assert actual_payload['compatibility_level'].isdigit()
+    actual_payload['compatibility_level'] = 'normalized_value'
+
+
 @pytest.fixture
 def dbm_instance(instance_docker):
     instance_docker['dbm'] = True
@@ -101,6 +106,7 @@ def test_collect_schemas(aggregator, dd_run_check, dbm_instance):
         'name': 'datadog_test_schemas_second',
         "collation": "SQL_Latin1_General_CP1_CI_AS",
         'owner': 'dbo',
+        'compatibility_level': 'normalized_value',
         'schemas': [
             {
                 'name': 'dbo',
@@ -147,6 +153,7 @@ def test_collect_schemas(aggregator, dd_run_check, dbm_instance):
         'name': 'datadog_test_schemas',
         "collation": "SQL_Latin1_General_CP1_CI_AS",
         'owner': 'dbo',
+        'compatibility_level': 'normalized_value',
         'schemas': [
             {
                 'name': 'test_schema',
@@ -373,6 +380,8 @@ def test_collect_schemas(aggregator, dd_run_check, dbm_instance):
         assert db_name in databases_to_find
         # id's are env dependant
         normalize_ids(actual_payload)
+        # compatibility_level varies by SQL Server version
+        normalize_compatibility_level(actual_payload)
         # index columns may be in any order
         normalize_indexes_columns(actual_payload)
         matches = deep_compare(actual_payload, expected_data_for_db[db_name])
diff --git a/sqlserver/tests/test_schemas.py b/sqlserver/tests/test_schemas.py
@@ -15,9 +15,12 @@
 
 pytestmark = [pytest.mark.integration, pytest.mark.usefixtures('dd_environment')]
 
+SCHEMA_DATABASE = 'datadog_test_schemas'
+
 
 @pytest.fixture
 def dbm_instance(instance_docker):
+    instance_docker['database'] = SCHEMA_DATABASE
     instance_docker['dbm'] = True
     instance_docker['min_collection_interval'] = 0.1
     instance_docker['query_samples'] = {'enabled': False}
@@ -47,11 +50,17 @@ def _check(instance: dict, init_config: dict = None):
         c.cancel()
 
 
+def create_schema_collector(check: SQLServer) -> SQLServerSchemaCollector:
+    collector = SQLServerSchemaCollector(check)
+    collector._get_databases()
+    return collector
+
+
 def test_get_cursor(dbm_instance, integration_check):
     check = integration_check(dbm_instance)
-    collector = SQLServerSchemaCollector(check)
+    collector = create_schema_collector(check)
 
-    with collector._get_cursor('datadog_test_schemas') as cursor:
+    with collector._get_cursor(SCHEMA_DATABASE) as cursor:
         assert cursor is not None
         schemas = []
         rows = cursor.fetchall_dict()
@@ -65,9 +74,9 @@ def test_get_cursor(dbm_instance, integration_check):
 
 def test_tables(dbm_instance, integration_check):
     check = integration_check(dbm_instance)
-    collector = SQLServerSchemaCollector(check)
+    collector = create_schema_collector(check)
 
-    with collector._get_cursor('datadog_test_schemas') as cursor:
+    with collector._get_cursor(SCHEMA_DATABASE) as cursor:
         assert cursor is not None
         tables = []
         rows = cursor.fetchall_dict()
@@ -80,13 +89,12 @@ def test_tables(dbm_instance, integration_check):
 
 def test_columns(dbm_instance, integration_check):
     check = integration_check(dbm_instance)
-    collector = SQLServerSchemaCollector(check)
+    collector = create_schema_collector(check)
 
-    with collector._get_cursor('datadog_test_schemas') as cursor:
+    with collector._get_cursor(SCHEMA_DATABASE) as cursor:
         assert cursor is not None
         # Assert that at least one row has columns
         rows = cursor.fetchall_dict()
-        print(rows)
         assert any(row['columns'] for row in rows)
         for row in rows:
             if row['columns']:
@@ -101,9 +109,9 @@ def test_columns(dbm_instance, integration_check):
 
 def test_indexes(dbm_instance, integration_check):
     check = integration_check(dbm_instance)
-    collector = SQLServerSchemaCollector(check)
+    collector = create_schema_collector(check)
 
-    with collector._get_cursor('datadog_test_schemas') as cursor:
+    with collector._get_cursor(SCHEMA_DATABASE) as cursor:
         assert cursor is not None
         # Assert that at least one row has indexes
         rows = cursor.fetchall_dict()
diff --git a/sqlserver/tests/test_unit.py b/sqlserver/tests/test_unit.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+Fix Azure SQL Database and Azure SQL Managed Instance schema collection by using database compatibility level for schema query selection.`