Skip to content

Commit 06554f2

Browse files
committed
fix(chart): high-priority alert templating and replica secret config (#181)
Signed-off-by: Philippe Noël <philippemnoel@gmail.com>
1 parent b1d64c2 commit 06554f2

9 files changed

+17
-7
lines changed

charts/cluster/prometheus_rules/cluster-logical_replication_errors-critical.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ alert: {{ $alert }}
44
annotations:
55
summary: CNPG Cluster critical logical replication errors
66
description: |-
7-
CloudNativePG Cluster's "{{ .namespace }}/{{ .cluster }}" "{{ "{{ .subname }}" }}" subscription has experienced {{ .value }} errors in the last 5 minutes.
7+
CloudNativePG Cluster's "{{ .namespace }}/{{ .cluster }}" "{{ .labels.subname }}" subscription has experienced {{ .value }} errors in the last 5 minutes.
88
99
CRITICAL: High error rate indicates persistent replication issues requiring immediate attention. This could lead to significant data inconsistency or complete replication failure. Errors include both apply errors and sync errors. The subscription may stop working if errors continue.
1010
runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterLogicalReplicationErrors.md

charts/cluster/prometheus_rules/cluster-logical_replication_errors.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ alert: {{ $alert }}
44
annotations:
55
summary: CNPG Cluster logical replication errors detected
66
description: |-
7-
CloudNativePG Cluster's "{{ .namespace }}/{{ .cluster }}" "{{ "{{ .subname }}" }}" subscription has experienced {{ .value }} errors.
7+
CloudNativePG Cluster's "{{ .namespace }}/{{ .cluster }}" "{{ .labels.subname }}" subscription has experienced {{ .value }} errors.
88
99
This includes both apply errors (during normal replication) and sync errors (during initial table sync). Errors indicate data consistency issues that need immediate attention to prevent data divergence.
1010
runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/{{ $alert }}.md

charts/cluster/prometheus_rules/cluster-logical_replication_lagging-critical.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ alert: {{ $alert }}
44
annotations:
55
summary: CNPG Cluster critical logical replication lag
66
description: |-
7-
CloudNativePG Cluster's "{{ .namespace }}/{{ .cluster }}" "{{ "{{ .subname }}" }}" subscription is experiencing critical replication lag!
7+
CloudNativePG Cluster's "{{ .namespace }}/{{ .cluster }}" "{{ .labels.subname }}" subscription is experiencing critical replication lag!
88
99
{{- if .labels.lag_type }}
1010
Lag type: {{ .labels.lag_type }}

charts/cluster/prometheus_rules/cluster-logical_replication_lagging.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ alert: {{ $alert }}
44
annotations:
55
summary: CNPG Cluster logical replication lagging
66
description: |-
7-
CloudNativePG Cluster's "{{ .namespace }}/{{ .cluster }}" "{{ "{{ .subname }}" }}" subscription is experiencing replication lag.
7+
CloudNativePG Cluster's "{{ .namespace }}/{{ .cluster }}" "{{ .labels.subname }}" subscription is experiencing replication lag.
88
99
{{- if .labels.lag_type }}
1010
Lag type: {{ .labels.lag_type }}

charts/cluster/prometheus_rules/cluster-logical_replication_stopped-critical.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ alert: {{ $alert }}
44
annotations:
55
summary: CNPG Cluster logical replication subscription CRITICAL
66
description: |-
7-
CloudNativePG Cluster's "{{ .namespace }}/{{ .cluster }}" "{{ "{{ .subname }}" }}" subscription is in a critical state.
7+
CloudNativePG Cluster's "{{ .namespace }}/{{ .cluster }}" "{{ .labels.subname }}" subscription is in a critical state.
88
99
CRITICAL: The subscription has been stopped for more than 15 minutes. This will lead to significant data divergence and requires immediate intervention.
1010

charts/cluster/prometheus_rules/cluster-logical_replication_stopped.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ alert: {{ $alert }}
44
annotations:
55
summary: CNPG Cluster logical replication subscription stopped
66
description: |-
7-
CloudNativePG Cluster's "{{ .namespace }}/{{ .cluster }}" "{{ "{{ .subname }}" }}" subscription is stopped.
7+
CloudNativePG Cluster's "{{ .namespace }}/{{ .cluster }}" "{{ .labels.subname }}" subscription is stopped.
88
99
Status: {{ .labels.stop_reason }}
1010

charts/cluster/templates/prometheus-rule.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ spec:
1717
{{- $_ := set $dict "value" "{{ $value }}" -}}
1818
{{- $_ := set $dict "namespace" .Release.Namespace -}}
1919
{{- $_ := set $dict "cluster" (include "cluster.fullname" .) -}}
20-
{{- $_ := set $dict "labels" (dict "job" "{{ $labels.job }}" "node" "{{ $labels.node }}" "pod" "{{ $labels.pod }}") -}}
20+
{{- $_ := set $dict "labels" (dict "job" "{{ $labels.job }}" "node" "{{ $labels.node }}" "pod" "{{ $labels.pod }}" "subname" "{{ $labels.subname }}" "lag_type" "{{ $labels.lag_type }}" "stop_reason" "{{ $labels.stop_reason }}") -}}
2121
{{- $_ := set $dict "podSelector" (printf "%s-([1-9][0-9]*)$" (include "cluster.fullname" .)) -}}
2222
{{- $_ := set $dict "Values" .Values -}}
2323
{{- $_ := set $dict "Template" .Template -}}

charts/cluster/values.schema.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -856,11 +856,17 @@
856856
"passwordSecret": {
857857
"type": "object",
858858
"properties": {
859+
"create": {
860+
"type": "boolean"
861+
},
859862
"key": {
860863
"type": "string"
861864
},
862865
"name": {
863866
"type": "string"
867+
},
868+
"value": {
869+
"type": "string"
864870
}
865871
}
866872
},

charts/cluster/values.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -568,8 +568,12 @@ replica:
568568
name: ""
569569
key: ""
570570
passwordSecret:
571+
# -- Whether to create a secret for the password
572+
create: false
571573
name: ""
572574
key: ""
575+
# -- The password value to use when creating the secret
576+
value: ""
573577
##
574578
# Database management configuration
575579
databases: []

0 commit comments

Comments
 (0)