Skip to content

Commit d7f8f86

Browse files
authored
[DBM][SQL Server] FCI Metrics without Cluster Name (DataDog#23264)
* updated fci query * added tests * removed unused variable * minor changes to the test * added changelog * updated comment
1 parent ebc02a5 commit d7f8f86

3 files changed

Lines changed: 74 additions & 5 deletions

File tree

sqlserver/changelog.d/23264.fixed

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
updates the FCI metric collection so that we can still report FCI metrics without needing a cluster name.

sqlserver/datadog_checks/sqlserver/database_metrics/fci_metrics.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,17 @@
1818
status,
1919
is_current_owner
2020
FROM sys.dm_os_cluster_nodes
21-
-- `sys.dm_hadr_cluster` does not have a related column to join on, this cross join will add the
22-
-- `cluster_name` column to every row by multiplying all the rows in the left table against
23-
-- all the rows in the right table. Note, there will only be one row from `sys.dm_hadr_cluster`.
24-
CROSS JOIN (SELECT TOP 1 cluster_name FROM sys.dm_hadr_cluster) AS FC
21+
-- `sys.dm_hadr_cluster` does not have a related column to join on. OUTER APPLY attaches the
22+
-- `cluster_name` column to every row from `sys.dm_os_cluster_nodes`, preserving those rows
23+
-- even when `sys.dm_hadr_cluster` returns no rows (e.g. when Always On is not enabled, in
24+
-- which case `cluster_name` is NULL). Note, there will only be at most one row from
25+
-- `sys.dm_hadr_cluster`.
26+
OUTER APPLY (SELECT TOP 1 cluster_name FROM sys.dm_hadr_cluster) AS FC
2527
""".strip(),
2628
"columns": [
2729
{"name": "node_name", "type": "tag"},
2830
{"name": "status", "type": "tag"},
29-
{"name": "failover_cluster", "type": "tag"},
31+
{"name": "failover_cluster", "type": "tag_not_null"},
3032
{"name": "fci.status", "type": "gauge"},
3133
{"name": "fci.is_current_owner", "type": "gauge"},
3234
],

sqlserver/tests/test_database_metrics.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -642,6 +642,72 @@ def execute_query_handler_mocked(query, db=None):
642642
aggregator.assert_metric(metric_name, value=metric_value, tags=expected_tags)
643643

644644

645+
@pytest.mark.integration
646+
@pytest.mark.usefixtures('dd_environment')
647+
def test_sqlserver_fci_metrics_without_cluster_name(
648+
aggregator,
649+
dd_run_check,
650+
init_config,
651+
instance_docker_metrics,
652+
):
653+
"""When sys.dm_hadr_cluster has no rows (HADR not configured), the OUTER APPLY returns NULL
654+
for cluster_name. FCI metrics should still be collected, and the failover_cluster tag should
655+
be omitted thanks to the tag_not_null column type."""
656+
instance_docker_metrics['database_autodiscovery'] = True
657+
instance_docker_metrics['database_metrics'] = {
658+
'fci_metrics': {'enabled': True},
659+
}
660+
661+
# Columns: (node_name, status_description, cluster_name, status, is_current_owner)
662+
mocked_results = [
663+
('node1', 'up', None, 0, 1),
664+
('node2', 'down', None, 1, 0),
665+
]
666+
667+
sqlserver_check = SQLServer(CHECK_NAME, init_config, [instance_docker_metrics])
668+
669+
def execute_query_handler_mocked(query, db=None):
670+
return mocked_results
671+
672+
fci_metrics = SqlserverFciMetrics(
673+
config=sqlserver_check._config,
674+
new_query_executor=sqlserver_check._new_query_executor,
675+
server_static_info=STATIC_SERVER_INFO,
676+
execute_query_handler=execute_query_handler_mocked,
677+
)
678+
679+
sqlserver_check._database_metrics = [fci_metrics]
680+
681+
dd_run_check(sqlserver_check)
682+
683+
base_tags = sqlserver_check._config.tags + [
684+
"database_hostname:{}".format("stubbed.hostname"),
685+
"database_instance:{}".format("stubbed.hostname"),
686+
"dd.internal.resource:database_instance:{}".format("stubbed.hostname"),
687+
"sqlserver_servername:{}".format(sqlserver_check.static_info_cache[STATIC_INFO_SERVERNAME].lower()),
688+
]
689+
690+
metric_names = fci_metrics.metric_names()[0]
691+
692+
for node_name, status, _cluster_name, *metric_values in mocked_results:
693+
# Metrics should still be emitted with node_name and status tags, but without failover_cluster
694+
expected_tags = [
695+
f'node_name:{node_name}',
696+
f'status:{status}',
697+
] + base_tags
698+
699+
for metric_name, metric_value in zip(metric_names, metric_values):
700+
aggregator.assert_metric(metric_name, value=metric_value, tags=expected_tags)
701+
702+
# Verify that no failover_cluster tag was emitted (tag_not_null skips NULL values)
703+
for metric_name in metric_names:
704+
for metric_stub in aggregator.metrics(metric_name):
705+
if f'node_name:{node_name}' in metric_stub.tags:
706+
assert all(not tag.startswith('failover_cluster:') for tag in metric_stub.tags), (
707+
"failover_cluster tag should not be present when cluster_name is NULL"
708+
)
709+
710+
645711
@pytest.mark.integration
646712
@pytest.mark.usefixtures('dd_environment')
647713
@pytest.mark.parametrize('include_primary_log_shipping_metrics', [True, False])

0 commit comments

Comments
 (0)