Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ibm_i/assets/service_checks.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@
"critical"
],
"name": "Can Connect",
"description": "Returns `CRITICAL` if the Agent is unable to connect and collect metrics from the monitored IBM i instance, otherwise returns `OK`."
"description": "Returns CRITICAL if the Agent is unable to establish a connection to the monitored IBM i instance, otherwise returns OK."
}
]
1 change: 1 addition & 0 deletions ibm_i/changelog.d/23986.changed
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Change `ibm_i.can_connect` service check to report OK when connection is successful, even if a query fails or times out. This aligns with the `.can_connect` service checks of our other integrations,
12 changes: 0 additions & 12 deletions ibm_i/datadog_checks/ibm_i/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,9 @@ def __init__(self, name, init_config, instances):
self._connection_string = None
self._subprocess = None
self._query_manager = None
self._current_errors = 0
self.check_initializations.append(self.set_up_query_manager)

def check(self, _):
self._current_errors = 0

try:
self.query_manager.execute()
check_status = AgentCheck.OK
Expand All @@ -47,10 +44,6 @@ def check(self, _):
check_status = AgentCheck.CRITICAL
hostname = self.config.hostname if self.config else None

# At least one query failed, set the service check as failing
if self._current_errors:
check_status = AgentCheck.CRITICAL

if check_status is not None:
self.service_check(
self.SERVICE_CHECK_NAME,
Expand All @@ -63,10 +56,6 @@ def cancel(self):
# When the check gets cancelled, clean up the connection subprocess.
self._delete_connection_subprocess()

def handle_query_error(self, error):
self._current_errors += 1
return error

@property
def connection_subprocess(self):
if self._subprocess is None:
Expand Down Expand Up @@ -253,7 +242,6 @@ def set_up_query_manager(self):
tags=self.config.tags,
queries=query_list,
hostname=hostname,
error_handler=self.handle_query_error,
)
self._query_manager.compile_queries()

Expand Down
29 changes: 28 additions & 1 deletion ibm_i/tests/test_ibm_i.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,5 +317,32 @@ def test_check_query_error(aggregator, instance):
assert check._query_manager is not None
assert check._query_manager.hostname == "host"
check.check(instance)
aggregator.assert_service_check("ibm_i.can_connect", count=2, status=AgentCheck.CRITICAL)
aggregator.assert_service_check("ibm_i.can_connect", count=2, status=AgentCheck.OK)
aggregator.assert_all_metrics_covered()


def test_connection_failure(aggregator, instance):
check = IbmICheck('ibm_i', {}, [instance])
check.log = mock.MagicMock()
check.load_configuration_models()

with mock.patch('datadog_checks.ibm_i.IbmICheck.set_up_query_manager'):
check.check(instance)
assert check._query_manager is None
aggregator.assert_service_check("ibm_i.can_connect", count=1, status=AgentCheck.CRITICAL)
aggregator.assert_all_metrics_covered()


def test_check_query_manager_execute_error(aggregator, instance):
check = IbmICheck('ibm_i', {}, [instance])
check.log = mock.MagicMock()
check.load_configuration_models()
check._query_manager = mock.MagicMock(hostname="host")
check._query_manager.execute.side_effect = Exception("boom")

with mock.patch('datadog_checks.ibm_i.IbmICheck._delete_connection_subprocess') as delete_conn:
check.check(instance)

delete_conn.assert_called_once()
aggregator.assert_service_check("ibm_i.can_connect", count=1, status=AgentCheck.CRITICAL)
aggregator.assert_all_metrics_covered()
3 changes: 0 additions & 3 deletions network_path/metadata.csv
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,4 @@ datadog.network_path.collector.worker.task_duration.count,gauge,,second,,Duratio
datadog.network_path.collector.worker.task_duration.max,gauge,,second,,Duration of a worker task.,0,network_path,,,
datadog.network_path.collector.worker.task_duration.median,gauge,,second,,Duration of a worker task.,0,network_path,,,
datadog.network_path.collector.workers,gauge,,,,The number of workers used to process pathtests concurrently.,0,network_path,,,
datadog.network_path.path.hops,gauge,,,,The number of hops of the collected pathtrace (traceroute).,0,network_path,,,
datadog.network_path.path.monitored,gauge,,,,Paths monitored count. Make 'sum by {X}' queries to count all the Paths with the tag X,0,network_path,,,
datadog.network_path.path.reachable,gauge,,,,"The value is 1 if the path is reachable, 0 otherwise. Reachability is determined by the status of the destination/target of the pathtest.",0,network_path,,,
datadog.network_path.path.unreachable,gauge,,,,"The value is 1 if the path is unreachable, 0 otherwise. Reachability is determined by the status of the destination/target of the pathtest.",0,network_path,,,
Loading