Skip to content

Commit b40c9eb

Browse files
authored
[AI-5553] Add ha metrics (DataDog#20763)
* add ha metrics * Add changelog * Validate metadata * Add more entries
1 parent 8fb1c21 commit b40c9eb

6 files changed

Lines changed: 54 additions & 1 deletion

File tree

proxmox/changelog.d/20763.added

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add HA metrics.

proxmox/datadog_checks/proxmox/check.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,22 @@ def _get_vm_hostname(self, vm_id, vm_name, node):
5454
hostname = hostname_json.get("data", {}).get("result", {}).get("host-name", vm_name)
5555
return hostname
5656

57+
def _collect_ha_metrics(self):
58+
ha_response = self.http.get(f"{self.config.proxmox_server}/cluster/ha/status/current")
59+
ha_response_json = ha_response.json()
60+
ha_statuses = ha_response_json.get('data', [])
61+
for ha_status in ha_statuses:
62+
if not ha_status.get('type') == 'quorum':
63+
continue
64+
status = ha_status.get('status')
65+
quorate = ha_status.get('quorate')
66+
status_value = status == "OK"
67+
node = ha_status.get('node')
68+
tags = [f'node_status:{status}']
69+
self.gauge('ha.quorum', status_value, hostname=node, tags=tags)
70+
if isinstance(quorate, int):
71+
self.gauge('ha.quorate', quorate, hostname=node, tags=tags)
72+
5773
def _collect_performance_metrics(self):
5874
metrics_response = self.http.get(f"{self.config.proxmox_server}/cluster/metrics/export")
5975
metrics_response_json = metrics_response.json()
@@ -173,3 +189,4 @@ def check(self, _):
173189

174190
self._collect_resource_metrics()
175191
self._collect_performance_metrics()
192+
self._collect_ha_metrics()

proxmox/metadata.csv

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ proxmox.disk.read,count,,,,,0,proxmox,disk read,,
1515
proxmox.disk.total,gauge,,,,,0,proxmox,disk total,,
1616
proxmox.disk.used,gauge,,,,,0,proxmox,disk used,,
1717
proxmox.disk.write,count,,,,,0,proxmox,disk write,,
18+
proxmox.ha.quorate,gauge,,,,Whether or not the cluster is in quorate,0,proxmox,ha quorate,,
19+
proxmox.ha.quorum,gauge,,,,Whether or not the node is in quorum,0,proxmox,ha quorum,,
1820
proxmox.mem,gauge,,,,,0,proxmox,mem,,
1921
proxmox.mem.max,gauge,,,,,0,proxmox,max mem,,
2022
proxmox.mem.total,gauge,,,,,0,proxmox,mem total,,

proxmox/tests/common.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@
4646
'proxmox.swap.used',
4747
]
4848

49+
HA_METIRCS = ['proxmox.ha.quorate', 'proxmox.ha.quorum']
50+
4951
NODE_RESOURCE_METRICS = set(RESOURCE_METRICS) - {
5052
'proxmox.diskread',
5153
'proxmox.diskwrite',
@@ -77,4 +79,4 @@
7779

7880
STORAGE_PERF_METRICS = {'proxmox.disk.total', 'proxmox.disk.used'}
7981

80-
ALL_METRICS = BASE_METRICS + RESOURCE_METRICS + PERF_METRICS
82+
ALL_METRICS = BASE_METRICS + RESOURCE_METRICS + PERF_METRICS + HA_METIRCS
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
{
2+
"data": [
3+
{
4+
"status": "OK",
5+
"type": "quorum",
6+
"node": "ip-122-82-3-112",
7+
"quorate": 1,
8+
"id": "quorum"
9+
},
10+
{
11+
"status": "OK",
12+
"type": "lrm",
13+
"node": "ip-122-82-3-112",
14+
"id": "lrm"
15+
},
16+
{
17+
"status": "OK",
18+
"type": "quorum",
19+
"node": "ip-122-82-3-112",
20+
"id": "quorum"
21+
}
22+
]
23+
}

proxmox/tests/test_unit.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,3 +474,11 @@ def test_perf_metrics_error(dd_run_check, caplog, instance):
474474
caplog.set_level(logging.DEBUG)
475475
dd_run_check(check)
476476
assert "Invalid metric entry found; metric name: disk.used, resource id: storage/ip-122-82-3-112" in caplog.text
477+
478+
479+
@pytest.mark.usefixtures('mock_http_get')
480+
def test_ha_metrics(dd_run_check, aggregator, instance):
481+
check = ProxmoxCheck('proxmox', {}, [instance])
482+
dd_run_check(check)
483+
aggregator.assert_metric('proxmox.ha.quorum', hostname='ip-122-82-3-112', tags=['node_status:OK'])
484+
aggregator.assert_metric('proxmox.ha.quorate', hostname='ip-122-82-3-112', tags=['node_status:OK'])

0 commit comments

Comments
 (0)