Skip to content

Commit c97299b

Browse files
authored
Merge pull request #324 from stackrox/ebenshet/tenant-memory-alerts-shhh
Reduce RHACSOperatorMemoryUtilizationCritical from critical to warning
2 parents afe5454 + 5e081a6 commit c97299b

2 files changed

Lines changed: 0 additions & 34 deletions

File tree

resources/prometheus/prometheus-rules.yaml

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -260,17 +260,6 @@ spec:
260260
summary: tenant '{{ $labels.namespace }}' container '{{ $labels.container }}' in pod '{{ $labels.pod }}' is reaching its memory limit.
261261
description: tenant '{{ $labels.namespace }}' container '{{ $labels.container }}' in pod '{{ $labels.pod }}' reached {{ $value | humanizePercentage }} of its memory limit and is at risk of being OOM killed.
262262
sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-cloud-service/runbooks/-/blob/master/sops/dp-039-tenant-workload-memory-utilization-high.md"
263-
- alert: RHACSTenantWorkloadMemoryUtilizationCritical
264-
expr: |
265-
rhacs_tenants:namespace:pod:container:max_memory_usage_ratio{container=~"central|scanner|indexer|matcher"} >= 0.95
266-
for: 10m
267-
labels:
268-
severity: critical
269-
annotations:
270-
summary: tenant '{{ $labels.namespace }}' container '{{ $labels.container }}' in pod '{{ $labels.pod }}' is critically reaching its memory limit.
271-
description: tenant '{{ $labels.namespace }}' container '{{ $labels.container }}' in pod '{{ $labels.pod }}' reached {{ $value | humanizePercentage }} of its memory limit and is at high risk of being OOM killed.
272-
sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-cloud-service/runbooks/-/blob/master/sops/dp-039-tenant-workload-memory-utilization-high.md"
273-
274263
- name: rhacs-operator
275264
rules:
276265
- expr: |

resources/prometheus/unit_tests/RHACSTenantWorkloadMemoryUtilizationHigh.yaml

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -28,26 +28,3 @@ tests:
2828
summary: tenant 'rhacs-aaaaaaaaaaaaaaaaaaaa' container 'central' in pod 'mypod' is reaching its memory limit.
2929
description: tenant 'rhacs-aaaaaaaaaaaaaaaaaaaa' container 'central' in pod 'mypod' reached 90% of its memory limit and is at risk of being OOM killed.
3030
sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-cloud-service/runbooks/-/blob/master/sops/dp-039-tenant-workload-memory-utilization-high.md"
31-
- interval: 1m
32-
input_series:
33-
- series: container_memory_working_set_bytes{namespace="rhacs-aaaaaaaaaaaaaaaaaaaa",pod="mypod",container="central"}
34-
values: "50+0x10 95+0x10"
35-
- series: container_spec_memory_limit_bytes{namespace="rhacs-aaaaaaaaaaaaaaaaaaaa",pod="mypod",container="central"}
36-
values: "100+0x20"
37-
alert_rule_test:
38-
- eval_time: 1m
39-
alertname: RHACSTenantWorkloadMemoryUtilizationCritical
40-
exp_alerts: []
41-
- eval_time: 21m
42-
alertname: RHACSTenantWorkloadMemoryUtilizationCritical
43-
exp_alerts:
44-
- exp_labels:
45-
alertname: RHACSTenantWorkloadMemoryUtilizationCritical
46-
severity: critical
47-
namespace: rhacs-aaaaaaaaaaaaaaaaaaaa
48-
pod: mypod
49-
container: central
50-
exp_annotations:
51-
description: tenant 'rhacs-aaaaaaaaaaaaaaaaaaaa' container 'central' in pod 'mypod' reached 95% of its memory limit and is at high risk of being OOM killed.
52-
summary: tenant 'rhacs-aaaaaaaaaaaaaaaaaaaa' container 'central' in pod 'mypod' is critically reaching its memory limit.
53-
sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-cloud-service/runbooks/-/blob/master/sops/dp-039-tenant-workload-memory-utilization-high.md"

0 commit comments

Comments
 (0)