Skip to content

Commit 6ca7a77

Browse files
authored
ceph-health-quick-view: alerts and region filter (#56)
Add a two new panels to summarize all the firing alerts as well as active Ceph warnings and errors across regions. Also add a filter variable to filter the new panels for a specific region(s). Signed-off-by: Tatjana Dehler <tatjana.dehler@clyso.com>
1 parent f0ddf4c commit 6ca7a77

3 files changed

Lines changed: 285 additions & 10 deletions

File tree

charts/ceph-operations/Chart.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ apiVersion: v2
22
name: ceph-operations
33
description: Ceph operations bundle
44
type: application
5-
version: 1.8.1
5+
version: 1.8.2
66
maintainers:
77
- name: sumitarora2786
88
- name: richardtief

charts/ceph-operations/perses-dashboards-global/ceph-health-quick-view.json

Lines changed: 282 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,9 @@
3333
{
3434
"header": "Region",
3535
"hide": false,
36-
"name": "region"
36+
"name": "region",
37+
"enableSorting": true,
38+
"sort": "asc"
3739
},
3840
{
3941
"header": "Health Status",
@@ -178,7 +180,7 @@
178180
"kind": "PrometheusTimeSeriesQuery",
179181
"spec": {
180182
"minStep": "",
181-
"query": "count(ALERTS{alertstate=\"firing\",alertname=~\"^Ceph.+\"}) by (region)",
183+
"query": "count(ALERTS{alertstate=\"firing\", alertname=~\"^Ceph.+\"}) by (region)",
182184
"seriesNameFormat": ""
183185
}
184186
}
@@ -191,7 +193,7 @@
191193
"kind": "PrometheusTimeSeriesQuery",
192194
"spec": {
193195
"minStep": "",
194-
"query": "sum(irate(ceph_osd_op_w_in_bytes{}[$__rate_interval])) by (region)",
196+
"query": "sum(irate(ceph_osd_op_w_in_bytes[$__rate_interval])) by (region)",
195197
"seriesNameFormat": ""
196198
}
197199
}
@@ -204,7 +206,7 @@
204206
"kind": "PrometheusTimeSeriesQuery",
205207
"spec": {
206208
"minStep": "",
207-
"query": "sum(irate(ceph_osd_op_r_out_bytes{}[$__rate_interval])) by (region)",
209+
"query": "sum(irate(ceph_osd_op_r_out_bytes[$__rate_interval])) by (region)",
208210
"seriesNameFormat": ""
209211
}
210212
}
@@ -217,7 +219,7 @@
217219
"kind": "PrometheusTimeSeriesQuery",
218220
"spec": {
219221
"minStep": "",
220-
"query": "sum(irate(ceph_osd_op_w{}[$__rate_interval])) by (region)",
222+
"query": "sum(irate(ceph_osd_op_w[$__rate_interval])) by (region)",
221223
"seriesNameFormat": ""
222224
}
223225
}
@@ -230,7 +232,7 @@
230232
"kind": "PrometheusTimeSeriesQuery",
231233
"spec": {
232234
"minStep": "",
233-
"query": "sum(irate(ceph_osd_op_r{}[$__rate_interval])) by (region)",
235+
"query": "sum(irate(ceph_osd_op_r[$__rate_interval])) by (region)",
234236
"seriesNameFormat": ""
235237
}
236238
}
@@ -290,13 +292,257 @@
290292
}
291293
]
292294
}
295+
},
296+
"879f4d7930f54835a4d0202d2681492b": {
297+
"kind": "Panel",
298+
"spec": {
299+
"display": {
300+
"name": "Ceph Health Details"
301+
},
302+
"plugin": {
303+
"kind": "Table",
304+
"spec": {
305+
"columnSettings": [
306+
{
307+
"enableSorting": true,
308+
"header": "Region",
309+
"name": "region",
310+
"sort": "asc",
311+
"width": 50
312+
},
313+
{
314+
"header": "Name",
315+
"name": "name"
316+
},
317+
{
318+
"header": "Instance",
319+
"hide": true,
320+
"name": "instance",
321+
"width": 100
322+
},
323+
{
324+
"header": "Pod",
325+
"hide": true,
326+
"name": "pod"
327+
},
328+
{
329+
"header": "Severity",
330+
"hide": true,
331+
"name": "severity",
332+
"width": "auto"
333+
},
334+
{
335+
"hide": true,
336+
"name": "__name__"
337+
},
338+
{
339+
"hide": true,
340+
"name": "cluster"
341+
},
342+
{
343+
"hide": true,
344+
"name": "cluster_type"
345+
},
346+
{
347+
"hide": true,
348+
"name": "container"
349+
},
350+
{
351+
"hide": true,
352+
"name": "endpoint"
353+
},
354+
{
355+
"hide": true,
356+
"name": "job"
357+
},
358+
{
359+
"hide": true,
360+
"name": "namespace"
361+
},
362+
{
363+
"hide": true,
364+
"name": "prometheus"
365+
},
366+
{
367+
"hide": true,
368+
"name": "service"
369+
},
370+
{
371+
"hide": true,
372+
"name": "timestamp"
373+
},
374+
{
375+
"hide": true,
376+
"name": "value"
377+
}
378+
],
379+
"density": "standard",
380+
"pagination": true
381+
}
382+
},
383+
"queries": [
384+
{
385+
"kind": "TimeSeriesQuery",
386+
"spec": {
387+
"plugin": {
388+
"kind": "PrometheusTimeSeriesQuery",
389+
"spec": {
390+
"query": "ceph_health_detail{region=~\"$region\"} > 0"
391+
}
392+
}
393+
}
394+
}
395+
]
396+
}
397+
},
398+
"CephAlerts": {
399+
"kind": "Panel",
400+
"spec": {
401+
"display": {
402+
"name": "Ceph Alerts"
403+
},
404+
"plugin": {
405+
"kind": "Table",
406+
"spec": {
407+
"columnSettings": [
408+
{
409+
"enableSorting": true,
410+
"header": "Region",
411+
"name": "region",
412+
"sort": "asc",
413+
"width": 50
414+
},
415+
{
416+
"header": "Alert Name",
417+
"name": "alertname",
418+
"width": 80
419+
},
420+
{
421+
"header": "Name",
422+
"name": "name"
423+
},
424+
{
425+
"header": "Node",
426+
"name": "node"
427+
},
428+
{
429+
"header": "Pod",
430+
"hide": true,
431+
"name": "pod"
432+
},
433+
{
434+
"hide": true,
435+
"name": "__name__"
436+
},
437+
{
438+
"hide": true,
439+
"name": "alertstate"
440+
},
441+
{
442+
"hide": true,
443+
"name": "cluster"
444+
},
445+
{
446+
"hide": true,
447+
"name": "cluster_type"
448+
},
449+
{
450+
"hide": true,
451+
"name": "container"
452+
},
453+
{
454+
"hide": true,
455+
"name": "endpoint"
456+
},
457+
{
458+
"hide": true,
459+
"name": "inhibited_by"
460+
},
461+
{
462+
"hide": true,
463+
"name": "instance"
464+
},
465+
{
466+
"hide": true,
467+
"name": "job"
468+
},
469+
{
470+
"hide": true,
471+
"name": "namespace"
472+
},
473+
{
474+
"hide": true,
475+
"name": "prometheus"
476+
},
477+
{
478+
"hide": true,
479+
"name": "service"
480+
},
481+
{
482+
"hide": true,
483+
"name": "severity"
484+
},
485+
{
486+
"hide": true,
487+
"name": "support_group"
488+
},
489+
{
490+
"hide": true,
491+
"name": "timestamp"
492+
},
493+
{
494+
"hide": true,
495+
"name": "type"
496+
},
497+
{
498+
"hide": true,
499+
"name": "value"
500+
},
501+
{
502+
"hide": true,
503+
"name": "oid"
504+
}
505+
],
506+
"density": "standard",
507+
"pagination": true
508+
}
509+
},
510+
"queries": [
511+
{
512+
"kind": "TimeSeriesQuery",
513+
"spec": {
514+
"plugin": {
515+
"kind": "PrometheusTimeSeriesQuery",
516+
"spec": {
517+
"query": "ALERTS{alertstate=\"firing\", alertname=~\"^Ceph.+\", region=~\"$region\"}"
518+
}
519+
}
520+
}
521+
}
522+
]
523+
}
293524
}
294525
},
295526
"layouts": [
296527
{
297528
"kind": "Grid",
298529
"spec": {
530+
"display": {
531+
"title": "",
532+
"collapse": {
533+
"open": true
534+
}
535+
},
299536
"items": [
537+
{
538+
"x": 0,
539+
"y": 14,
540+
"width": 12,
541+
"height": 11,
542+
"content": {
543+
"$ref": "#/spec/panels/CephAlerts"
544+
}
545+
},
300546
{
301547
"x": 0,
302548
"y": 0,
@@ -305,12 +551,41 @@
305551
"content": {
306552
"$ref": "#/spec/panels/0"
307553
}
554+
},
555+
{
556+
"x": 12,
557+
"y": 14,
558+
"width": 12,
559+
"height": 11,
560+
"content": {
561+
"$ref": "#/spec/panels/879f4d7930f54835a4d0202d2681492b"
562+
}
308563
}
309564
]
310565
}
311566
}
312567
],
313-
"variables": [],
568+
"variables": [
569+
{
570+
"kind": "ListVariable",
571+
"spec": {
572+
"display": {
573+
"name": "Region",
574+
"hidden": false
575+
},
576+
"defaultValue": "$__all",
577+
"allowAllValue": true,
578+
"allowMultiple": true,
579+
"plugin": {
580+
"kind": "PrometheusLabelValuesVariable",
581+
"spec": {
582+
"labelName": "region"
583+
}
584+
},
585+
"name": "region"
586+
}
587+
}
588+
],
314589
"duration": "1h",
315590
"refreshInterval": "0s"
316591
}

charts/ceph-operations/plugindefinition.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,15 @@ kind: ClusterPluginDefinition
66
metadata:
77
name: ceph-operations
88
spec:
9-
version: 1.8.1
9+
version: 1.8.2
1010
displayName: Ceph operations bundle
1111
description: Operations bundle for the Ceph storage backend
1212
docMarkDownUrl: https://raw.githubusercontent.com/cobaltcore-dev/cloud-storage-operations/main/ceph-operations/README.md
1313
icon: https://raw.githubusercontent.com/cobaltcore-dev/cloud-storage-operations/main/ceph-operations/ceph-logo.png
1414
helmChart:
1515
name: ceph-operations
1616
repository: oci://ghcr.io/cobaltcore-dev/cloud-storage-operations/charts
17-
version: 1.8.1
17+
version: 1.8.2
1818
options:
1919
- name: prometheusRules.create
2020
description: Create Prometheus rules

0 commit comments

Comments
 (0)