Skip to content

Commit 3a7bece

Browse files
Update IBM-i service check behavior (DataDog#23986)
* adjust service check behavior, add new test * fix changelog name number issue * add new test to increase coverage
1 parent c97cf97 commit 3a7bece

4 files changed

Lines changed: 30 additions & 14 deletions

File tree

ibm_i/assets/service_checks.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,6 @@
1111
"critical"
1212
],
1313
"name": "Can Connect",
14-
"description": "Returns `CRITICAL` if the Agent is unable to connect and collect metrics from the monitored IBM i instance, otherwise returns `OK`."
14+
"description": "Returns CRITICAL if the Agent is unable to establish a connection to the monitored IBM i instance, otherwise returns OK."
1515
}
1616
]

ibm_i/changelog.d/23986.changed

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Change `ibm_i.can_connect` service check to report OK when connection is successful, even if a query fails or times out. This aligns with the `.can_connect` service checks of our other integrations,

ibm_i/datadog_checks/ibm_i/check.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,9 @@ def __init__(self, name, init_config, instances):
2828
self._connection_string = None
2929
self._subprocess = None
3030
self._query_manager = None
31-
self._current_errors = 0
3231
self.check_initializations.append(self.set_up_query_manager)
3332

3433
def check(self, _):
35-
self._current_errors = 0
36-
3734
try:
3835
self.query_manager.execute()
3936
check_status = AgentCheck.OK
@@ -47,10 +44,6 @@ def check(self, _):
4744
check_status = AgentCheck.CRITICAL
4845
hostname = self.config.hostname if self.config else None
4946

50-
# At least one query failed, set the service check as failing
51-
if self._current_errors:
52-
check_status = AgentCheck.CRITICAL
53-
5447
if check_status is not None:
5548
self.service_check(
5649
self.SERVICE_CHECK_NAME,
@@ -63,10 +56,6 @@ def cancel(self):
6356
# When the check gets cancelled, clean up the connection subprocess.
6457
self._delete_connection_subprocess()
6558

66-
def handle_query_error(self, error):
67-
self._current_errors += 1
68-
return error
69-
7059
@property
7160
def connection_subprocess(self):
7261
if self._subprocess is None:
@@ -253,7 +242,6 @@ def set_up_query_manager(self):
253242
tags=self.config.tags,
254243
queries=query_list,
255244
hostname=hostname,
256-
error_handler=self.handle_query_error,
257245
)
258246
self._query_manager.compile_queries()
259247

ibm_i/tests/test_ibm_i.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -317,5 +317,32 @@ def test_check_query_error(aggregator, instance):
317317
assert check._query_manager is not None
318318
assert check._query_manager.hostname == "host"
319319
check.check(instance)
320-
aggregator.assert_service_check("ibm_i.can_connect", count=2, status=AgentCheck.CRITICAL)
320+
aggregator.assert_service_check("ibm_i.can_connect", count=2, status=AgentCheck.OK)
321+
aggregator.assert_all_metrics_covered()
322+
323+
324+
def test_connection_failure(aggregator, instance):
325+
check = IbmICheck('ibm_i', {}, [instance])
326+
check.log = mock.MagicMock()
327+
check.load_configuration_models()
328+
329+
with mock.patch('datadog_checks.ibm_i.IbmICheck.set_up_query_manager'):
330+
check.check(instance)
331+
assert check._query_manager is None
332+
aggregator.assert_service_check("ibm_i.can_connect", count=1, status=AgentCheck.CRITICAL)
333+
aggregator.assert_all_metrics_covered()
334+
335+
336+
def test_check_query_manager_execute_error(aggregator, instance):
337+
check = IbmICheck('ibm_i', {}, [instance])
338+
check.log = mock.MagicMock()
339+
check.load_configuration_models()
340+
check._query_manager = mock.MagicMock(hostname="host")
341+
check._query_manager.execute.side_effect = Exception("boom")
342+
343+
with mock.patch('datadog_checks.ibm_i.IbmICheck._delete_connection_subprocess') as delete_conn:
344+
check.check(instance)
345+
346+
delete_conn.assert_called_once()
347+
aggregator.assert_service_check("ibm_i.can_connect", count=1, status=AgentCheck.CRITICAL)
321348
aggregator.assert_all_metrics_covered()

0 commit comments

Comments
 (0)