Skip to content

Commit 5fec804

Browse files
authored
Feature/extend prometheus exporter (#363)
* feature: Add Grafana dashboard * feature: Enhance the built-in Prometheus exporter * Expose PVE node updates (gauge) * Expose VM metrics * Show count of PegaProx users * Show count of currently logged in PegaProx users
1 parent 2617b83 commit 5fec804

4 files changed

Lines changed: 544 additions & 17 deletions

File tree

misc/grafana/README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,12 @@ PegaProx exposes its metrics over HTTPS on port 5000 under the path /api/metrics
2525
metrics_path: /api/metrics
2626
scheme: https
2727
authorization:
28-
type: Bearer
29-
credentials: pgx_token123token123
28+
type: Bearer
29+
credentials: pgx_token123token123
3030
tls_config:
31-
insecure_skip_verify: true
31+
insecure_skip_verify: true
3232
static_configs:
33-
- targets: ['pegaprox01.int.gyptazy.com:5000']
33+
- targets: ['pegaprox01.int.gyptazy.com:5000']
3434
```
3535

3636
## Grafana Dashboard
Lines changed: 375 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,375 @@
1+
{
2+
"id": null,
3+
"uid": "pegaprox-overview",
4+
"title": "PegaProx Cluster Overview",
5+
"description": "Author: Florian Paul Azim Hoberg (@gyptazy) | Version: 1.1 | Notes: Added guest resource, network, PegaProx user, and node update metrics",
6+
"tags": ["pegaprox", "proxmox"],
7+
"timezone": "browser",
8+
"schemaVersion": 39,
9+
"version": 1,
10+
"refresh": "10s",
11+
"panels": [
12+
{
13+
"type": "stat",
14+
"title": "Connected Clusters",
15+
"gridPos": { "x": 0, "y": 0, "w": 4, "h": 4 },
16+
"targets": [
17+
{ "expr": "sum(pegaprox_cluster_connected)", "refId": "A" }
18+
]
19+
},
20+
{
21+
"type": "stat",
22+
"title": "Quorum OK",
23+
"gridPos": { "x": 4, "y": 0, "w": 4, "h": 4 },
24+
"targets": [
25+
{ "expr": "min(pegaprox_cluster_quorum_held)", "refId": "A" }
26+
],
27+
"fieldConfig": {
28+
"defaults": {
29+
"mappings": [
30+
{ "options": { "0": { "text": "NO", "color": "red" }, "1": { "text": "YES", "color": "green" } } }
31+
]
32+
}
33+
}
34+
},
35+
{
36+
"type": "stat",
37+
"title": "Active Sessions",
38+
"gridPos": { "x": 8, "y": 0, "w": 4, "h": 4 },
39+
"targets": [
40+
{ "expr": "pegaprox_sessions_active", "refId": "A" }
41+
]
42+
},
43+
{
44+
"type": "stat",
45+
"title": "Logged-in Users",
46+
"gridPos": { "x": 12, "y": 0, "w": 4, "h": 4 },
47+
"targets": [
48+
{ "expr": "pegaprox_users_logged_in", "refId": "A" }
49+
]
50+
},
51+
{
52+
"type": "stat",
53+
"title": "PegaProx Users",
54+
"gridPos": { "x": 16, "y": 0, "w": 4, "h": 4 },
55+
"targets": [
56+
{ "expr": "pegaprox_users_total", "legendFormat": "Total", "refId": "A" },
57+
{ "expr": "pegaprox_users_enabled", "legendFormat": "Enabled", "refId": "B" }
58+
]
59+
},
60+
{
61+
"type": "stat",
62+
"title": "Nodes With Updates",
63+
"gridPos": { "x": 20, "y": 0, "w": 4, "h": 4 },
64+
"targets": [
65+
{ "expr": "sum(pegaprox_node_apt_updates_available)", "refId": "A" }
66+
],
67+
"fieldConfig": {
68+
"defaults": {
69+
"thresholds": {
70+
"mode": "absolute",
71+
"steps": [
72+
{ "color": "green", "value": null },
73+
{ "color": "orange", "value": 1 }
74+
]
75+
}
76+
}
77+
}
78+
},
79+
80+
{
81+
"type": "stat",
82+
"title": "VM Running Ratio",
83+
"gridPos": { "x": 0, "y": 4, "w": 8, "h": 4 },
84+
"targets": [
85+
{
86+
"expr": "sum(pegaprox_cluster_vms_running) / sum(pegaprox_cluster_vms_total)",
87+
"refId": "A"
88+
}
89+
],
90+
"fieldConfig": {
91+
"defaults": {
92+
"unit": "percentunit"
93+
}
94+
}
95+
},
96+
97+
{
98+
"type": "bargauge",
99+
"title": "Nodes Online per Cluster",
100+
"gridPos": { "x": 8, "y": 4, "w": 8, "h": 6 },
101+
"targets": [
102+
{
103+
"expr": "pegaprox_cluster_nodes_online",
104+
"legendFormat": "{{cluster}}",
105+
"refId": "A"
106+
}
107+
]
108+
},
109+
{
110+
"type": "bargauge",
111+
"title": "VM Count per Cluster",
112+
"gridPos": { "x": 16, "y": 4, "w": 8, "h": 6 },
113+
"targets": [
114+
{
115+
"expr": "pegaprox_cluster_vms_total",
116+
"legendFormat": "{{cluster}}",
117+
"refId": "A"
118+
}
119+
]
120+
},
121+
122+
{
123+
"type": "timeseries",
124+
"title": "CPU Usage per Node",
125+
"gridPos": { "x": 0, "y": 10, "w": 12, "h": 8 },
126+
"targets": [
127+
{
128+
"expr": "pegaprox_node_cpu_percent",
129+
"legendFormat": "{{cluster}} / {{node}}",
130+
"refId": "A"
131+
}
132+
],
133+
"fieldConfig": {
134+
"defaults": {
135+
"unit": "percent",
136+
"thresholds": {
137+
"mode": "absolute",
138+
"steps": [
139+
{ "color": "green", "value": null },
140+
{ "color": "orange", "value": 70 },
141+
{ "color": "red", "value": 90 }
142+
]
143+
}
144+
}
145+
}
146+
},
147+
{
148+
"type": "timeseries",
149+
"title": "Memory Usage per Node",
150+
"gridPos": { "x": 12, "y": 10, "w": 12, "h": 8 },
151+
"targets": [
152+
{
153+
"expr": "pegaprox_node_mem_percent",
154+
"legendFormat": "{{cluster}} / {{node}}",
155+
"refId": "A"
156+
}
157+
],
158+
"fieldConfig": {
159+
"defaults": {
160+
"unit": "percent"
161+
}
162+
}
163+
},
164+
165+
{
166+
"type": "table",
167+
"title": "Node Status",
168+
"gridPos": { "x": 0, "y": 18, "w": 12, "h": 6 },
169+
"targets": [
170+
{
171+
"expr": "pegaprox_node_online",
172+
"legendFormat": "{{cluster}} / {{node}}",
173+
"refId": "A"
174+
}
175+
]
176+
},
177+
{
178+
"type": "table",
179+
"title": "Node APT Update Availability",
180+
"gridPos": { "x": 12, "y": 18, "w": 12, "h": 6 },
181+
"targets": [
182+
{
183+
"expr": "pegaprox_node_apt_updates_available",
184+
"legendFormat": "{{cluster}} / {{node}}",
185+
"refId": "A"
186+
}
187+
],
188+
"fieldConfig": {
189+
"defaults": {
190+
"mappings": [
191+
{ "options": { "0": { "text": "No updates", "color": "green" }, "1": { "text": "Updates available", "color": "orange" } } }
192+
]
193+
}
194+
}
195+
},
196+
197+
{
198+
"type": "timeseries",
199+
"title": "VM Running vs Total",
200+
"gridPos": { "x": 0, "y": 24, "w": 24, "h": 8 },
201+
"targets": [
202+
{
203+
"expr": "sum(pegaprox_cluster_vms_running)",
204+
"legendFormat": "Running",
205+
"refId": "A"
206+
},
207+
{
208+
"expr": "sum(pegaprox_cluster_vms_total)",
209+
"legendFormat": "Total",
210+
"refId": "B"
211+
}
212+
]
213+
},
214+
{
215+
"type": "bargauge",
216+
"title": "Running Guests by Type",
217+
"gridPos": { "x": 0, "y": 32, "w": 8, "h": 6 },
218+
"targets": [
219+
{
220+
"expr": "sum by (type) (pegaprox_guest_running)",
221+
"legendFormat": "{{type}}",
222+
"refId": "A"
223+
}
224+
]
225+
},
226+
{
227+
"type": "timeseries",
228+
"title": "Guest CPU Usage",
229+
"gridPos": { "x": 8, "y": 32, "w": 8, "h": 6 },
230+
"targets": [
231+
{
232+
"expr": "topk(10, pegaprox_guest_cpu_percent)",
233+
"legendFormat": "{{cluster}} / {{node}} / {{name}}",
234+
"refId": "A"
235+
}
236+
],
237+
"fieldConfig": {
238+
"defaults": {
239+
"unit": "percent"
240+
}
241+
}
242+
},
243+
{
244+
"type": "timeseries",
245+
"title": "Guest Memory Usage",
246+
"gridPos": { "x": 16, "y": 32, "w": 8, "h": 6 },
247+
"targets": [
248+
{
249+
"expr": "topk(10, pegaprox_guest_mem_percent)",
250+
"legendFormat": "{{cluster}} / {{node}} / {{name}}",
251+
"refId": "A"
252+
}
253+
],
254+
"fieldConfig": {
255+
"defaults": {
256+
"unit": "percent"
257+
}
258+
}
259+
},
260+
{
261+
"type": "timeseries",
262+
"title": "Guest Disk Usage",
263+
"gridPos": { "x": 0, "y": 38, "w": 12, "h": 6 },
264+
"targets": [
265+
{
266+
"expr": "topk(10, pegaprox_guest_disk_percent)",
267+
"legendFormat": "{{cluster}} / {{node}} / {{name}}",
268+
"refId": "A"
269+
}
270+
],
271+
"fieldConfig": {
272+
"defaults": {
273+
"unit": "percent"
274+
}
275+
}
276+
},
277+
{
278+
"type": "table",
279+
"title": "Guest Resource Inventory",
280+
"gridPos": { "x": 12, "y": 38, "w": 12, "h": 6 },
281+
"targets": [
282+
{
283+
"expr": "pegaprox_guest_mem_total_bytes",
284+
"legendFormat": "{{cluster}} / {{node}} / {{type}} / {{vmid}} / {{name}}",
285+
"refId": "A"
286+
},
287+
{
288+
"expr": "pegaprox_guest_disk_total_bytes",
289+
"legendFormat": "{{cluster}} / {{node}} / {{type}} / {{vmid}} / {{name}}",
290+
"refId": "B"
291+
}
292+
],
293+
"fieldConfig": {
294+
"defaults": {
295+
"unit": "bytes"
296+
}
297+
}
298+
},
299+
{
300+
"type": "timeseries",
301+
"title": "Guest Network Receive",
302+
"gridPos": { "x": 0, "y": 44, "w": 12, "h": 6 },
303+
"targets": [
304+
{
305+
"expr": "topk(10, rate(pegaprox_guest_network_receive_bytes_total[5m]))",
306+
"legendFormat": "{{cluster}} / {{node}} / {{name}}",
307+
"refId": "A"
308+
}
309+
],
310+
"fieldConfig": {
311+
"defaults": {
312+
"unit": "Bps"
313+
}
314+
}
315+
},
316+
{
317+
"type": "timeseries",
318+
"title": "Guest Network Transmit",
319+
"gridPos": { "x": 12, "y": 44, "w": 12, "h": 6 },
320+
"targets": [
321+
{
322+
"expr": "topk(10, rate(pegaprox_guest_network_transmit_bytes_total[5m]))",
323+
"legendFormat": "{{cluster}} / {{node}} / {{name}}",
324+
"refId": "A"
325+
}
326+
],
327+
"fieldConfig": {
328+
"defaults": {
329+
"unit": "Bps"
330+
}
331+
}
332+
},
333+
334+
{
335+
"type": "stat",
336+
"title": "PBS Connectivity",
337+
"gridPos": { "x": 0, "y": 50, "w": 12, "h": 4 },
338+
"targets": [
339+
{
340+
"expr": "pegaprox_pbs_connected",
341+
"refId": "A"
342+
}
343+
]
344+
},
345+
{
346+
"type": "stat",
347+
"title": "ESXi Connectivity",
348+
"gridPos": { "x": 12, "y": 50, "w": 12, "h": 4 },
349+
"targets": [
350+
{
351+
"expr": "pegaprox_esxi_connected",
352+
"refId": "A"
353+
}
354+
]
355+
}
356+
],
357+
"templating": {
358+
"list": [
359+
{
360+
"name": "cluster",
361+
"type": "query",
362+
"datasource": "Prometheus",
363+
"query": "label_values(pegaprox_cluster_connected, cluster)",
364+
"refresh": 1
365+
},
366+
{
367+
"name": "node",
368+
"type": "query",
369+
"datasource": "Prometheus",
370+
"query": "label_values(pegaprox_node_cpu_percent, node)",
371+
"refresh": 1
372+
}
373+
]
374+
}
375+
}

0 commit comments

Comments
 (0)