Skip to content

Commit 58f9ae2

Browse files
committed
Adjust K8up metrics scraping to respect exported namespace label
This is required in order to make the K8up metrics usable in OpenShift's user workload monitoring (UWM). Without this, the backup namespace is scraped as `exported_namespace` and `namespace` is the K8up namespace for all timeseries. This makes the metrics useless for UWM users, since UWM automatically restricts all queries with `namespace="<query source namespace>"` which can't match backups in the source namespace, if the backup namespace is exported as `exported_namespace`. The commit also adjusts the component-managed alerting rules to work with the new namespace label setup. Additionally, the commit also fixes the alert expression for the `K8upBackupNotRunning` alert to actually fire for individual namespaces.
1 parent 53411c3 commit 58f9ae2

3 files changed

Lines changed: 28 additions & 10 deletions

File tree

class/defaults.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -163,14 +163,14 @@ parameters:
163163
severity: critical
164164
K8upBackupNotRunning:
165165
annotations:
166-
message: No K8up jobs were run in {{ $labels.exported_namespace }} within the last 24 hours. Check the operator, there might be a deadlock
167-
expr: sum(rate(k8up_jobs_total[25h])) == 0 and on(namespace) k8up_schedules_gauge > 0
166+
message: No K8up jobs were run in {{ $labels.namespace }} within the last 24 hours. Check the operator, there might be a deadlock
167+
expr: sum by (namespace) (rate(k8up_jobs_total[25h])) == 0 and on(namespace) k8up_schedules_gauge > 0
168168
for: 1m
169169
labels:
170170
severity: critical
171171
K8upJobStuck:
172172
annotations:
173-
message: Queued K8up jobs in {{ $labels.exported_namespace }} for the last hour.
173+
message: Queued K8up jobs in {{ $labels.namespace }} for the last hour.
174174
expr: k8up_jobs_queued_gauge{jobType="backup"} > 0 and on(namespace) k8up_schedules_gauge > 0
175175
for: 1h
176176
labels:

component/monitoring.jsonnet

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,19 @@ local service_monitor = com.namespaced(params.namespace, {
2727
{
2828
interval: '10s',
2929
port: 'http',
30+
// NOTE(sg): This is required to ensure that the backup namespace is
31+
// preserved as label `namespace`. Without this, the scraped metrics
32+
// have the backup namespace as `exported_namespace` and are useless
33+
// for OCP User Workload monitoring users, because UWM only allows
34+
// querying metrics whose `namespace` label matches the alert rule
35+
// source namespace.
36+
honorLabels: true,
37+
// add k8up namespace as label `k8up_namespace`.
38+
relabelings: [ {
39+
action: 'replace',
40+
sourceLabels: [ 'namespace' ],
41+
targetLabel: 'k8up_namespace',
42+
} ],
3043
},
3144
],
3245
selector: {

tests/golden/defaults/backup-k8up/backup-k8up/30_monitoring.yaml

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,19 +13,18 @@ spec:
1313
rules:
1414
- alert: K8upBackupNotRunning
1515
annotations:
16-
message: No K8up jobs were run in {{ $labels.exported_namespace }} within
17-
the last 24 hours. Check the operator, there might be a deadlock
18-
expr: sum(rate(k8up_jobs_total[25h])) == 0 and on(namespace) k8up_schedules_gauge
19-
> 0
16+
message: No K8up jobs were run in {{ $labels.namespace }} within the last
17+
24 hours. Check the operator, there might be a deadlock
18+
expr: sum by (namespace) (rate(k8up_jobs_total[25h])) == 0 and on(namespace)
19+
k8up_schedules_gauge > 0
2020
for: 1m
2121
labels:
2222
severity: critical
2323
syn: 'true'
2424
syn_component: backup-k8up
2525
- alert: K8upJobStuck
2626
annotations:
27-
message: Queued K8up jobs in {{ $labels.exported_namespace }} for the
28-
last hour.
27+
message: Queued K8up jobs in {{ $labels.namespace }} for the last hour.
2928
expr: k8up_jobs_queued_gauge{jobType="backup"} > 0 and on(namespace) k8up_schedules_gauge
3029
> 0
3130
for: 1h
@@ -127,8 +126,14 @@ metadata:
127126
namespace: syn-backup-k8up
128127
spec:
129128
endpoints:
130-
- interval: 10s
129+
- honorLabels: true
130+
interval: 10s
131131
port: http
132+
relabelings:
133+
- action: replace
134+
sourceLabels:
135+
- namespace
136+
targetLabel: k8up_namespace
132137
selector:
133138
matchLabels:
134139
app.kubernetes.io/instance: k8up

0 commit comments

Comments
 (0)