From 06554f2350cb8a2ade7199f7e11ff2cd710f93ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20No=C3=ABl?= <21990816+philippemnoel@users.noreply.github.com> Date: Mon, 2 Mar 2026 16:55:03 -0800 Subject: [PATCH] fix(chart): high-priority alert templating and replica secret config (#181) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Philippe Noël --- .../cluster-logical_replication_errors-critical.yaml | 2 +- .../cluster-logical_replication_errors.yaml | 2 +- .../cluster-logical_replication_lagging-critical.yaml | 2 +- .../cluster-logical_replication_lagging.yaml | 2 +- .../cluster-logical_replication_stopped-critical.yaml | 2 +- .../cluster-logical_replication_stopped.yaml | 2 +- charts/cluster/templates/prometheus-rule.yaml | 2 +- charts/cluster/values.schema.json | 6 ++++++ charts/cluster/values.yaml | 4 ++++ 9 files changed, 17 insertions(+), 7 deletions(-) diff --git a/charts/cluster/prometheus_rules/cluster-logical_replication_errors-critical.yaml b/charts/cluster/prometheus_rules/cluster-logical_replication_errors-critical.yaml index 93f469bf9c..e43506e632 100644 --- a/charts/cluster/prometheus_rules/cluster-logical_replication_errors-critical.yaml +++ b/charts/cluster/prometheus_rules/cluster-logical_replication_errors-critical.yaml @@ -4,7 +4,7 @@ alert: {{ $alert }} annotations: summary: CNPG Cluster critical logical replication errors description: |- - CloudNativePG Cluster's "{{ .namespace }}/{{ .cluster }}" "{{ "{{ .subname }}" }}" subscription has experienced {{ .value }} errors in the last 5 minutes. + CloudNativePG Cluster's "{{ .namespace }}/{{ .cluster }}" "{{ .labels.subname }}" subscription has experienced {{ .value }} errors in the last 5 minutes. CRITICAL: High error rate indicates persistent replication issues requiring immediate attention. This could lead to significant data inconsistency or complete replication failure. Errors include both apply errors and sync errors. The subscription may stop working if errors continue. runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterLogicalReplicationErrors.md diff --git a/charts/cluster/prometheus_rules/cluster-logical_replication_errors.yaml b/charts/cluster/prometheus_rules/cluster-logical_replication_errors.yaml index d8987dc83d..4b89ecdfa6 100644 --- a/charts/cluster/prometheus_rules/cluster-logical_replication_errors.yaml +++ b/charts/cluster/prometheus_rules/cluster-logical_replication_errors.yaml @@ -4,7 +4,7 @@ alert: {{ $alert }} annotations: summary: CNPG Cluster logical replication errors detected description: |- - CloudNativePG Cluster's "{{ .namespace }}/{{ .cluster }}" "{{ "{{ .subname }}" }}" subscription has experienced {{ .value }} errors. + CloudNativePG Cluster's "{{ .namespace }}/{{ .cluster }}" "{{ .labels.subname }}" subscription has experienced {{ .value }} errors. This includes both apply errors (during normal replication) and sync errors (during initial table sync). Errors indicate data consistency issues that need immediate attention to prevent data divergence. runbook_url: https://github.com/cloudnative-pg/charts/blob/main/charts/cluster/docs/runbooks/{{ $alert }}.md diff --git a/charts/cluster/prometheus_rules/cluster-logical_replication_lagging-critical.yaml b/charts/cluster/prometheus_rules/cluster-logical_replication_lagging-critical.yaml index d80c82627c..0d554e98af 100644 --- a/charts/cluster/prometheus_rules/cluster-logical_replication_lagging-critical.yaml +++ b/charts/cluster/prometheus_rules/cluster-logical_replication_lagging-critical.yaml @@ -4,7 +4,7 @@ alert: {{ $alert }} annotations: summary: CNPG Cluster critical logical replication lag description: |- - CloudNativePG Cluster's "{{ .namespace }}/{{ .cluster }}" "{{ "{{ .subname }}" }}" subscription is experiencing critical replication lag! + CloudNativePG Cluster's "{{ .namespace }}/{{ .cluster }}" "{{ .labels.subname }}" subscription is experiencing critical replication lag! {{- if .labels.lag_type }} Lag type: {{ .labels.lag_type }} diff --git a/charts/cluster/prometheus_rules/cluster-logical_replication_lagging.yaml b/charts/cluster/prometheus_rules/cluster-logical_replication_lagging.yaml index 6994837cde..731bcb3223 100644 --- a/charts/cluster/prometheus_rules/cluster-logical_replication_lagging.yaml +++ b/charts/cluster/prometheus_rules/cluster-logical_replication_lagging.yaml @@ -4,7 +4,7 @@ alert: {{ $alert }} annotations: summary: CNPG Cluster logical replication lagging description: |- - CloudNativePG Cluster's "{{ .namespace }}/{{ .cluster }}" "{{ "{{ .subname }}" }}" subscription is experiencing replication lag. + CloudNativePG Cluster's "{{ .namespace }}/{{ .cluster }}" "{{ .labels.subname }}" subscription is experiencing replication lag. {{- if .labels.lag_type }} Lag type: {{ .labels.lag_type }} diff --git a/charts/cluster/prometheus_rules/cluster-logical_replication_stopped-critical.yaml b/charts/cluster/prometheus_rules/cluster-logical_replication_stopped-critical.yaml index 0e2dd72418..3e0a0b5531 100644 --- a/charts/cluster/prometheus_rules/cluster-logical_replication_stopped-critical.yaml +++ b/charts/cluster/prometheus_rules/cluster-logical_replication_stopped-critical.yaml @@ -4,7 +4,7 @@ alert: {{ $alert }} annotations: summary: CNPG Cluster logical replication subscription CRITICAL description: |- - CloudNativePG Cluster's "{{ .namespace }}/{{ .cluster }}" "{{ "{{ .subname }}" }}" subscription is in a critical state. + CloudNativePG Cluster's "{{ .namespace }}/{{ .cluster }}" "{{ .labels.subname }}" subscription is in a critical state. CRITICAL: The subscription has been stopped for more than 15 minutes. This will lead to significant data divergence and requires immediate intervention. diff --git a/charts/cluster/prometheus_rules/cluster-logical_replication_stopped.yaml b/charts/cluster/prometheus_rules/cluster-logical_replication_stopped.yaml index 8865ae05de..c784d9e7f8 100644 --- a/charts/cluster/prometheus_rules/cluster-logical_replication_stopped.yaml +++ b/charts/cluster/prometheus_rules/cluster-logical_replication_stopped.yaml @@ -4,7 +4,7 @@ alert: {{ $alert }} annotations: summary: CNPG Cluster logical replication subscription stopped description: |- - CloudNativePG Cluster's "{{ .namespace }}/{{ .cluster }}" "{{ "{{ .subname }}" }}" subscription is stopped. + CloudNativePG Cluster's "{{ .namespace }}/{{ .cluster }}" "{{ .labels.subname }}" subscription is stopped. Status: {{ .labels.stop_reason }} diff --git a/charts/cluster/templates/prometheus-rule.yaml b/charts/cluster/templates/prometheus-rule.yaml index 360b4798de..f92037c0e0 100644 --- a/charts/cluster/templates/prometheus-rule.yaml +++ b/charts/cluster/templates/prometheus-rule.yaml @@ -17,7 +17,7 @@ spec: {{- $_ := set $dict "value" "{{ $value }}" -}} {{- $_ := set $dict "namespace" .Release.Namespace -}} {{- $_ := set $dict "cluster" (include "cluster.fullname" .) -}} - {{- $_ := set $dict "labels" (dict "job" "{{ $labels.job }}" "node" "{{ $labels.node }}" "pod" "{{ $labels.pod }}") -}} + {{- $_ := set $dict "labels" (dict "job" "{{ $labels.job }}" "node" "{{ $labels.node }}" "pod" "{{ $labels.pod }}" "subname" "{{ $labels.subname }}" "lag_type" "{{ $labels.lag_type }}" "stop_reason" "{{ $labels.stop_reason }}") -}} {{- $_ := set $dict "podSelector" (printf "%s-([1-9][0-9]*)$" (include "cluster.fullname" .)) -}} {{- $_ := set $dict "Values" .Values -}} {{- $_ := set $dict "Template" .Template -}} diff --git a/charts/cluster/values.schema.json b/charts/cluster/values.schema.json index 017abe7f53..81e5e4a0a7 100644 --- a/charts/cluster/values.schema.json +++ b/charts/cluster/values.schema.json @@ -856,11 +856,17 @@ "passwordSecret": { "type": "object", "properties": { + "create": { + "type": "boolean" + }, "key": { "type": "string" }, "name": { "type": "string" + }, + "value": { + "type": "string" } } }, diff --git a/charts/cluster/values.yaml b/charts/cluster/values.yaml index 7b02acbabb..3c56339f7e 100644 --- a/charts/cluster/values.yaml +++ b/charts/cluster/values.yaml @@ -568,8 +568,12 @@ replica: name: "" key: "" passwordSecret: + # -- Whether to create a secret for the password + create: false name: "" key: "" + # -- The password value to use when creating the secret + value: "" ## # Database management configuration databases: []