Reject stale master identity when rejoining redis cluster after restart

lmiccini · claude · lmiccini · commit 6464ca2184d2 · 2026-04-14T07:24:17.000+02:00
When the former-master pod restarts after deletion, peer sentinels
respond to "sentinel master redis" immediately but still report the
restarting pod as master (within the down-after-milliseconds window).
Both the redis and sentinel containers would accept this stale answer
and configure themselves as master-of-self, creating a split-brain.

Consolidate peer discovery, retry, and stale-master detection into a
single wait_for_master() function in common.sh. If any peer sentinel
reports us as master, the function skips that answer and keeps retrying
until failover completes and a different master is elected. This also
removes the duplicated retry logic from both startup scripts.

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/templates/redis/bin/common.sh b/templates/redis/bin/common.sh
@@ -84,25 +84,40 @@ function remove_pod_label() {
     configure_pod_label $pod "$patch" "(200|422)"
 }
 
-# Contact peer sentinels to discover an existing cluster.
-# Tries each peer pod individually by FQDN (skipping self) to avoid
-# connecting to our own uninitialized sentinel through the headless
-# service DNS, which can resolve to any pod including ourselves.
-# Prints the sentinel master output on success.
-function discover_master() {
+# Wait for a peer sentinel to report a valid master for the cluster.
+# Contacts each peer pod individually by FQDN (skipping self) to avoid
+# the headless service DNS resolving to our own uninitialized sentinel.
+# If a peer still reports US as master (stale info before
+# down-after-milliseconds triggers failover), keeps retrying until
+# failover completes and a different master is elected.
+# Prints the master address on success (FQDN or IP).
+function wait_for_master() {
+    local retries=${SENTINEL_RETRIES:-10}
+    local delay=${SENTINEL_RETRY_DELAY:-3}
     local pod_ordinal=${POD_NAME##*-}
     local pod_base=${POD_NAME%-*}
-    local ordinal=0
-    while [ $ordinal -le 9 ]; do
-        if [ "$ordinal" != "$pod_ordinal" ]; then
-            local peer="${pod_base}-${ordinal}.${SVC_FQDN}"
-            local output
-            output=$(timeout ${TIMEOUT} $REDIS_CLI_CMD -h ${peer} -p 26379 sentinel master redis 2>/dev/null) && {
-                echo "$output"
-                return 0
-            }
-        fi
-        ordinal=$((ordinal + 1))
+
+    for i in $(seq 1 $retries); do
+        local ordinal=0
+        while [ $ordinal -le 9 ]; do
+            if [ "$ordinal" != "$pod_ordinal" ]; then
+                local peer="${pod_base}-${ordinal}.${SVC_FQDN}"
+                local output
+                output=$(timeout ${TIMEOUT} $REDIS_CLI_CMD -h ${peer} -p 26379 sentinel master redis 2>/dev/null) && {
+                    local master
+                    master=$(echo "$output" | awk '/^ip$/ {getline; print $0; exit}')
+                    # If the peer still thinks WE are master, it has stale
+                    # pre-failover info — try remaining peers before waiting.
+                    if ! echo "$master" | grep -q "^${POD_NAME}\."; then
+                        echo "$master"
+                        return 0
+                    fi
+                }
+            fi
+            ordinal=$((ordinal + 1))
+        done
+        log "Attempt $i/$retries: no valid master found, retrying in ${delay}s..."
+        sleep $delay
     done
     return 1
 }
diff --git a/templates/redis/bin/start_redis_replication.sh b/templates/redis/bin/start_redis_replication.sh
@@ -6,23 +6,11 @@ generate_configs
 sudo -E kolla_set_configs
 
 # 1. check if a redis cluster is already running by contacting peer sentinels
-#    Try each peer pod individually (skipping self) to avoid the race where
-#    the headless service DNS resolves to our own pod, whose sentinel isn't
-#    ready yet. Retry multiple times so that peers have time to become
-#    reachable while still allowing fresh cluster bootstrap when no sentinel
-#    exists at all.
-SENTINEL_RETRIES=${SENTINEL_RETRIES:-10}
-SENTINEL_RETRY_DELAY=${SENTINEL_RETRY_DELAY:-3}
-for i in $(seq 1 $SENTINEL_RETRIES); do
-    output=$(discover_master)
-    if [ $? -eq 0 ]; then
-        master=$(echo "$output" | awk '/^ip$/ {getline; print $0; exit}')
-        log "Connecting to the existing Redis cluster (master: ${master})"
-        exec redis-server $REDIS_CONFIG --protected-mode no --replicaof "$master" 6379
-    fi
-    log "Attempt $i/$SENTINEL_RETRIES: could not contact any peer sentinel, retrying in ${SENTINEL_RETRY_DELAY}s..."
-    sleep $SENTINEL_RETRY_DELAY
-done
+master=$(wait_for_master)
+if [ $? -eq 0 ]; then
+    log "Connecting to the existing Redis cluster (master: ${master})"
+    exec redis-server $REDIS_CONFIG --protected-mode no --replicaof "$master" 6379
+fi
 
 # 2. else bootstrap a new cluster (assume we should be the first redis pod)
 if is_bootstrap_pod $POD_NAME; then
diff --git a/templates/redis/bin/start_sentinel.sh b/templates/redis/bin/start_sentinel.sh
@@ -6,24 +6,12 @@ generate_configs
 sudo -E kolla_set_configs
 
 # 1. check if a redis cluster is already running by contacting peer sentinels
-#    Try each peer pod individually (skipping self) to avoid the race where
-#    the headless service DNS resolves to our own pod, whose sentinel isn't
-#    ready yet. Retry multiple times so that peers have time to become
-#    reachable while still allowing fresh cluster bootstrap when no sentinel
-#    exists at all.
-SENTINEL_RETRIES=${SENTINEL_RETRIES:-10}
-SENTINEL_RETRY_DELAY=${SENTINEL_RETRY_DELAY:-3}
-for i in $(seq 1 $SENTINEL_RETRIES); do
-    output=$(discover_master)
-    if [ $? -eq 0 ]; then
-        master=$(echo "$output" | awk '/^ip$/ {getline; print $0; exit}')
-        log "Connecting to the existing sentinel cluster (master: $master)"
-        echo "sentinel monitor redis ${master} 6379 ${SENTINEL_QUORUM}" >> $SENTINEL_CONFIG
-        exec redis-sentinel $SENTINEL_CONFIG
-    fi
-    log "Attempt $i/$SENTINEL_RETRIES: could not contact any peer sentinel, retrying in ${SENTINEL_RETRY_DELAY}s..."
-    sleep $SENTINEL_RETRY_DELAY
-done
+master=$(wait_for_master)
+if [ $? -eq 0 ]; then
+    log "Connecting to the existing sentinel cluster (master: $master)"
+    echo "sentinel monitor redis ${master} 6379 ${SENTINEL_QUORUM}" >> $SENTINEL_CONFIG
+    exec redis-sentinel $SENTINEL_CONFIG
+fi
 
 # 2. else let the pod's redis server bootstrap a new cluster and monitor it
 # (assume we should be the first redis pod)