|
501 | 501 | identify_announce_ip |
502 | 502 | done |
503 | 503 |
|
| 504 | + QUORUM_FAIL_COUNT=0 |
| 505 | + MAX_QUORUM_FAILURES=${MAX_QUORUM_FAILURES:-5} |
| 506 | + |
504 | 507 | trap "exit 0" TERM |
505 | 508 | while true; do |
506 | 509 | sleep {{ .Values.splitBrainDetection.interval }} |
|
509 | 512 | identify_master |
510 | 513 |
|
511 | 514 | if [ "$MASTER" = "$ANNOUNCE_IP" ]; then |
| 515 | + QUORUM_FAIL_COUNT=0 |
512 | 516 | redis_role |
513 | 517 | if [ "$ROLE" != "master" ]; then |
514 | 518 | echo "waiting for redis to become master" |
|
522 | 526 | fi |
523 | 527 | fi |
524 | 528 | elif [ "${MASTER}" ]; then |
| 529 | + QUORUM_FAIL_COUNT=0 |
525 | 530 | identify_redis_master |
526 | 531 | if [ "$REDIS_MASTER" != "$MASTER" ]; then |
527 | 532 | echo "Redis master and local master are not the same. waiting." |
528 | 533 | sleep {{ .Values.splitBrainDetection.retryInterval }} |
529 | 534 | identify_master |
530 | | - identify_redis_master |
531 | | - echo "Redis master is ${MASTER}, expected master is ${REDIS_MASTER}. No need to reinitialize." |
532 | | - if [ "${REDIS_MASTER}" != "${MASTER}" ]; then |
533 | | - echo "Redis master is ${MASTER}, expected master is ${REDIS_MASTER}, reinitializing" |
534 | | - reinit |
| 535 | + if [ "$MASTER" = "$ANNOUNCE_IP" ]; then |
| 536 | + echo "This pod became master during wait, skipping reinit" |
| 537 | + else |
| 538 | + identify_redis_master |
| 539 | + echo "Redis master is ${MASTER}, expected master is ${REDIS_MASTER}. No need to reinitialize." |
| 540 | + if [ "${REDIS_MASTER}" != "${MASTER}" ]; then |
| 541 | + echo "Redis master is ${MASTER}, expected master is ${REDIS_MASTER}, reinitializing" |
| 542 | + reinit |
| 543 | + fi |
| 544 | + fi |
| 545 | + fi |
| 546 | + else |
| 547 | + QUORUM_FAIL_COUNT=$((QUORUM_FAIL_COUNT + 1)) |
| 548 | + echo "WARNING: Sentinel returned no master (quorum may be broken). Failure count: $QUORUM_FAIL_COUNT/$MAX_QUORUM_FAILURES" |
| 549 | + if [ "$QUORUM_FAIL_COUNT" -ge "$MAX_QUORUM_FAILURES" ]; then |
| 550 | + echo "ERROR: Quorum broken for $MAX_QUORUM_FAILURES consecutive checks. Attempting sentinel reset..." |
| 551 | + if [ "$SENTINEL_PORT" -eq 0 ]; then |
| 552 | + redis-cli -h "${SERVICE}" -p "${SENTINEL_TLS_PORT}" {{ if .Values.sentinel.auth }} -a "${SENTINELAUTH}" --no-auth-warning{{ end }} --tls --cacert /tls-certs/{{ .Values.tls.caCertFile }} {{ if ne (default "yes" .Values.sentinel.authClients) "no"}} --cert /tls-certs/{{ .Values.tls.certFile }} --key /tls-certs/{{ .Values.tls.keyFile }}{{ end }} sentinel reset "${MASTER_GROUP}" || true |
| 553 | + else |
| 554 | + redis-cli -h "${SERVICE}" -p "${SENTINEL_PORT}" {{ if .Values.sentinel.auth }} -a "${SENTINELAUTH}" --no-auth-warning{{ end }} sentinel reset "${MASTER_GROUP}" || true |
535 | 555 | fi |
| 556 | + QUORUM_FAIL_COUNT=0 |
536 | 557 | fi |
537 | 558 | fi |
538 | 559 | done |
|
0 commit comments