Skip to content

Commit 2a76d2b

Browse files
s0m01n3sergmour
authored andcommitted
External-master initial commit.
Signed-off-by: Sergei Mouravyov <sergei.mouravyov@gmail.com>
1 parent 44479b3 commit 2a76d2b

10 files changed

Lines changed: 272 additions & 5 deletions

File tree

api/redisreplication/v1beta2/redisreplication_types.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,25 @@ import (
66
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
77
)
88

9+
// ExternalMaster defines an external Redis master endpoint for slave-only deployments.
10+
// When set (non-nil), all pods in this deployment become read-replicas of the specified
11+
// external master instead of having a local master elected within the cluster.
12+
// This is useful for cross-cluster replication where the primary cluster runs a full
13+
// RedisReplication deployment and secondary clusters run slave-only deployments.
14+
// Cannot be combined with Sentinel.
15+
// +k8s:deepcopy-gen=true
16+
type ExternalMaster struct {
17+
// Host is the DNS name or IP address of the external Redis master.
18+
// +kubebuilder:validation:MinLength=1
19+
Host string `json:"host"`
20+
// Port is the port of the external Redis master.
21+
// Defaults to 6379 when omitted.
22+
// +kubebuilder:validation:Minimum=1
23+
// +kubebuilder:validation:Maximum=65535
24+
// +kubebuilder:default:=6379
25+
Port *int32 `json:"port,omitempty"`
26+
}
27+
928
type RedisReplicationSpec struct {
1029
Size *int32 `json:"clusterSize"`
1130
KubernetesConfig common.KubernetesConfig `json:"kubernetesConfig"`
@@ -31,6 +50,12 @@ type RedisReplicationSpec struct {
3150
TopologySpreadConstrains []corev1.TopologySpreadConstraint `json:"topologySpreadConstraints,omitempty"`
3251
HostPort *int `json:"hostPort,omitempty"`
3352
Sentinel *Sentinel `json:"sentinel,omitempty"`
53+
// ExternalMaster configures slave-only mode where all pods replicate from an
54+
// external Redis master residing in another cluster. When enabled, no local
55+
// master is elected, the master-role service is not created, and all
56+
// leader-election / failover logic is skipped.
57+
// Cannot be combined with Sentinel.
58+
ExternalMaster *ExternalMaster `json:"externalMaster,omitempty"`
3459
}
3560

3661
type Sentinel struct {

api/redisreplication/v1beta2/redisreplication_types_helper.go

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,33 @@
11
package v1beta2
22

3-
import "fmt"
3+
import (
4+
"fmt"
5+
"strconv"
6+
)
47

58
func (cr *RedisReplication) EnableSentinel() bool {
69
return cr != nil && cr.Spec.Sentinel != nil && cr.Spec.Sentinel.Size > 0
710
}
811

12+
// UseExternalMaster returns true when slave-only mode with an external master is active.
13+
// Presence of a non-nil ExternalMaster spec field activates this mode.
14+
func (cr *RedisReplication) UseExternalMaster() bool {
15+
return cr != nil && cr.Spec.ExternalMaster != nil
16+
}
17+
18+
// GetExternalMasterPort returns the configured external master port, defaulting to 6379.
19+
func (cr *RedisReplication) GetExternalMasterPort() int32 {
20+
if cr.Spec.ExternalMaster != nil && cr.Spec.ExternalMaster.Port != nil {
21+
return *cr.Spec.ExternalMaster.Port
22+
}
23+
return 6379
24+
}
25+
26+
// GetExternalMasterEndpoint returns the "host:port" string of the external master.
27+
func (cr *RedisReplication) GetExternalMasterEndpoint() string {
28+
return cr.Spec.ExternalMaster.Host + ":" + strconv.Itoa(int(cr.GetExternalMasterPort()))
29+
}
30+
931
func (cr *RedisReplication) SentinelStatefulSet() string {
1032
return cr.Name + "-s"
1133
}
@@ -25,6 +47,14 @@ func (cr *RedisReplication) MasterService() string {
2547
// GetConnectionInfo returns connection info for clients based on the mode.
2648
// The dnsDomain parameter should be the cluster DNS domain (e.g., "cluster.local").
2749
func (cr *RedisReplication) GetConnectionInfo(dnsDomain string) *ConnectionInfo {
50+
if cr.UseExternalMaster() {
51+
// In slave-only mode return the external master endpoint so ConnectionInfo
52+
// is consistent with the "write endpoint" semantics used in other modes.
53+
return &ConnectionInfo{
54+
Host: cr.Spec.ExternalMaster.Host,
55+
Port: int(cr.GetExternalMasterPort()),
56+
}
57+
}
2858
if cr.EnableSentinel() {
2959
return &ConnectionInfo{
3060
Host: fmt.Sprintf("%s.%s.svc.%s", cr.SentinelHLService(), cr.Namespace, dnsDomain),

api/redisreplication/v1beta2/redisreplication_webhook.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,29 @@ func (r *RedisReplication) validate(_ *RedisReplication) (admission.Warnings, er
8080
}
8181
}
8282

83+
// Validate ExternalMaster configuration.
84+
// Presence of the externalMaster field activates slave-only mode (nil = disabled).
85+
if r.Spec.ExternalMaster != nil {
86+
// Sentinel and ExternalMaster are mutually exclusive:
87+
// ExternalMaster skips all leader-election / failover logic, so there
88+
// is no local master for Sentinel to monitor.
89+
if r.Spec.Sentinel != nil && r.Spec.Sentinel.Size > 0 {
90+
errors = append(errors, field.Invalid(
91+
field.NewPath("spec").Child("externalMaster"),
92+
r.Spec.ExternalMaster,
93+
"externalMaster cannot be combined with sentinel: sentinel requires a local master, "+
94+
"but externalMaster mode creates read-replicas only",
95+
))
96+
}
97+
// Host is mandatory
98+
if r.Spec.ExternalMaster.Host == "" {
99+
errors = append(errors, field.Required(
100+
field.NewPath("spec").Child("externalMaster").Child("host"),
101+
"host must be set when externalMaster is configured",
102+
))
103+
}
104+
}
105+
83106
if len(errors) == 0 {
84107
return nil, nil
85108
}

api/redisreplication/v1beta2/zz_generated.deepcopy.go

Lines changed: 25 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

charts/redis-replication/templates/redis-replication.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,3 +164,8 @@ spec:
164164
additionalSentinelConfig: {{ .Values.sentinel.additionalSentinelConfig | quote }}
165165
{{- end }}
166166
{{- end }}
167+
{{- if and .Values.externalMaster .Values.externalMaster.enabled }}
168+
externalMaster:
169+
host: {{ required "externalMaster.host is required when externalMaster.enabled is true" .Values.externalMaster.host | quote }}
170+
port: {{ .Values.externalMaster.port | default 6379 }}
171+
{{- end }}

charts/redis-replication/values.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,3 +241,15 @@ sentinel:
241241
# -- Additional raw sentinel.conf lines
242242
# additionalSentinelConfig: |
243243
# sentinel auth-pass mymaster password123
244+
245+
# -- External master configuration for slave-only (cross-cluster) deployments.
246+
# When enabled, all pods become read-replicas of the specified external Redis master.
247+
# Cannot be combined with sentinel.
248+
externalMaster:
249+
# -- Enable slave-only mode. All pods become read-replicas of host:port.
250+
# Cannot be combined with sentinel.
251+
enabled: false
252+
# -- DNS name or IP address of the external Redis master.
253+
host: ""
254+
# -- Port of the external Redis master. Defaults to 6379.
255+
port: 6379

config/crd/bases/redis.redis.opstreelabs.in_redisreplications.yaml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1192,6 +1192,31 @@ spec:
11921192
- name
11931193
type: object
11941194
type: array
1195+
externalMaster:
1196+
description: |-
1197+
ExternalMaster configures slave-only mode where all pods replicate from an
1198+
external Redis master residing in another cluster. When enabled, no local
1199+
master is elected, the master-role service is not created, and all
1200+
leader-election / failover logic is skipped.
1201+
Cannot be combined with Sentinel.
1202+
properties:
1203+
host:
1204+
description: Host is the DNS name or IP address of the external
1205+
Redis master.
1206+
minLength: 1
1207+
type: string
1208+
port:
1209+
default: 6379
1210+
description: |-
1211+
Port is the port of the external Redis master.
1212+
Defaults to 6379 when omitted.
1213+
format: int32
1214+
maximum: 65535
1215+
minimum: 1
1216+
type: integer
1217+
required:
1218+
- host
1219+
type: object
11951220
hostPort:
11961221
type: integer
11971222
initContainer:

internal/controller/redisreplication/redisreplication_controller.go

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,18 @@ func (r *Reconciler) sentinelResetIfNeed(ctx context.Context, inst *rrvb2.RedisR
345345
}
346346

347347
func (r *Reconciler) reconcileRedis(ctx context.Context, instance *rrvb2.RedisReplication) (ctrl.Result, error) {
348+
// Slave-only mode: skip all local master-election and failover logic.
349+
// Every pod is configured as a read-replica of the external master.
350+
if instance.UseExternalMaster() {
351+
if err := k8sutils.ConfigureExternalMasterReplication(ctx, r.K8sClient, instance); err != nil {
352+
return intctrlutil.RequeueAfter(ctx, time.Second*60, "failed to configure external master replication")
353+
}
354+
monitoring.RedisReplicationReplicasSizeMismatch.WithLabelValues(instance.Namespace, instance.Name).Set(0)
355+
monitoring.RedisReplicationReplicasSizeCurrent.WithLabelValues(instance.Namespace, instance.Name).Set(float64(*instance.Spec.Size))
356+
monitoring.RedisReplicationReplicasSizeDesired.WithLabelValues(instance.Namespace, instance.Name).Set(float64(*instance.Spec.Size))
357+
return intctrlutil.Reconciled()
358+
}
359+
348360
if instance.EnableSentinel() {
349361
if !r.IsStatefulSetReady(ctx, instance.Namespace, instance.SentinelStatefulSet()) {
350362
return intctrlutil.RequeueAfter(ctx, time.Second*30, "waiting for sentinel statefulset to be ready")
@@ -432,8 +444,26 @@ func (r *Reconciler) reconcileRedis(ctx context.Context, instance *rrvb2.RedisRe
432444
// reconcileStatus update status and label.
433445
func (r *Reconciler) reconcileStatus(ctx context.Context, instance *rrvb2.RedisReplication) (ctrl.Result, error) {
434446
var err error
435-
var realMaster string
436447

448+
// Slave-only mode: no local master to discover.
449+
// Status.MasterNode is set to the external "host:port"; all pods are slaves,
450+
// so UpdateRedisRoleLabel will label them all with redis-role=slave which
451+
// makes the replica service selector resolve correctly.
452+
if instance.UseExternalMaster() {
453+
externalMaster := instance.GetExternalMasterEndpoint()
454+
if err = r.UpdateRedisReplicationMaster(ctx, instance, externalMaster); err != nil {
455+
return intctrlutil.RequeueE(ctx, err, "")
456+
}
457+
labels := common.GetRedisLabels(instance.GetName(), common.SetupTypeReplication, "replication", instance.GetLabels())
458+
if err = r.Healer.UpdateRedisRoleLabel(ctx, instance.GetNamespace(), labels, instance.Spec.KubernetesConfig.ExistingPasswordSecret, instance.Spec.TLS); err != nil {
459+
return intctrlutil.RequeueE(ctx, err, "")
460+
}
461+
// All pods are slaves of the external master.
462+
monitoring.RedisReplicationConnectedSlavesTotal.WithLabelValues(instance.Namespace, instance.Name).Set(float64(*instance.Spec.Size))
463+
return intctrlutil.Reconciled()
464+
}
465+
466+
var realMaster string
437467
masterNodes, err := k8sutils.GetRedisNodesByRole(ctx, r.K8sClient, instance, "master")
438468
if err != nil {
439469
return intctrlutil.RequeueE(ctx, err, "")

internal/k8sutils/redis-replication.go

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,13 @@ func CreateReplicationService(ctx context.Context, cr *rrvb2.RedisReplication, c
5656
log.FromContext(ctx).Error(err, "Cannot create additional service for Redis Replication")
5757
return err
5858
}
59-
if err := CreateOrUpdateService(ctx, cr.Namespace, masterObjectMetaInfo, redisReplicationAsOwner(cr), disableMetrics, false, "ClusterIP", common.RedisPort, cl); err != nil {
60-
log.FromContext(ctx).Error(err, "Cannot create master service for Redis")
61-
return err
59+
// In slave-only mode (ExternalMaster enabled) there is no local master pod,
60+
// so the master-role selector service must not be created (req 8).
61+
if !cr.UseExternalMaster() {
62+
if err := CreateOrUpdateService(ctx, cr.Namespace, masterObjectMetaInfo, redisReplicationAsOwner(cr), disableMetrics, false, "ClusterIP", common.RedisPort, cl); err != nil {
63+
log.FromContext(ctx).Error(err, "Cannot create master service for Redis")
64+
return err
65+
}
6266
}
6367
if err := CreateOrUpdateService(ctx, cr.Namespace, replicaObjectMetaInfo, redisReplicationAsOwner(cr), disableMetrics, false, "ClusterIP", common.RedisPort, cl); err != nil {
6468
log.FromContext(ctx).Error(err, "Cannot create replica service for Redis")

internal/k8sutils/redis.go

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -814,6 +814,94 @@ func GetRedisReplicationRealMaster(ctx context.Context, client kubernetes.Interf
814814
return ""
815815
}
816816

817+
// ConfigureExternalMasterReplication configures every pod in the replication StatefulSet
818+
// as a read-replica (slave) of an external Redis master. It is used exclusively in
819+
// slave-only mode (ExternalMaster.Enabled = true).
820+
//
821+
// For each pod the function:
822+
// 1. Checks the current INFO replication output; if the pod is already a slave of the
823+
// correct external master it is skipped to avoid unnecessary resync disruption.
824+
// 2. Issues CONFIG SET masterauth before REPLICAOF so authentication to the external
825+
// master succeeds on the first attempt.
826+
// 3. Issues REPLICAOF <host> <port>.
827+
//
828+
// All operations are idempotent and safe to call on every reconcile loop.
829+
func ConfigureExternalMasterReplication(ctx context.Context, client kubernetes.Interface, cr *rrvb2.RedisReplication) error {
830+
logger := log.FromContext(ctx)
831+
832+
externalHost := cr.Spec.ExternalMaster.Host
833+
externalPort := cr.GetExternalMasterPort()
834+
externalPortStr := strconv.Itoa(int(externalPort))
835+
836+
var pass string
837+
if cr.Spec.KubernetesConfig.ExistingPasswordSecret != nil {
838+
var err error
839+
pass, err = getRedisPassword(
840+
ctx, client, cr.Namespace,
841+
*cr.Spec.KubernetesConfig.ExistingPasswordSecret.Name,
842+
*cr.Spec.KubernetesConfig.ExistingPasswordSecret.Key,
843+
)
844+
if err != nil {
845+
logger.Error(err, "Failed to get Redis password for external master replication")
846+
return err
847+
}
848+
}
849+
850+
replicas := cr.Spec.GetReplicationCounts("replication")
851+
for i := 0; i < int(replicas); i++ {
852+
podName := cr.Name + "-" + strconv.Itoa(i)
853+
redisClient := configureRedisReplicationClient(ctx, client, cr, podName)
854+
defer redisClient.Close()
855+
856+
// Check current replication state; skip pod if already correctly configured
857+
// to avoid unnecessary resync disruption every 30 s.
858+
info, err := redisClient.Info(ctx, "Replication").Result()
859+
if err != nil {
860+
logger.Error(err, "Failed to get replication info, skipping pod", "pod", podName)
861+
continue
862+
}
863+
if isAlreadySlaveOf(info, externalHost, externalPortStr) {
864+
logger.V(1).Info("Pod already replicating from correct external master, skipping",
865+
"pod", podName, "host", externalHost, "port", externalPort)
866+
continue
867+
}
868+
869+
// Set masterauth BEFORE issuing REPLICAOF so the first handshake authenticates.
870+
if pass != "" {
871+
if err := redisClient.ConfigSet(ctx, "masterauth", pass).Err(); err != nil {
872+
logger.Error(err, "Failed to set masterauth on pod", "pod", podName)
873+
return err
874+
}
875+
}
876+
877+
logger.V(1).Info("Configuring pod as slave of external master",
878+
"pod", podName, "host", externalHost, "port", externalPort)
879+
if err := redisClient.SlaveOf(ctx, externalHost, externalPortStr).Err(); err != nil {
880+
logger.Error(err, "Failed to issue REPLICAOF to external master", "pod", podName)
881+
return err
882+
}
883+
}
884+
885+
return nil
886+
}
887+
888+
// isAlreadySlaveOf reports whether an INFO replication output indicates the instance
889+
// is already a slave of the given host:port. Both host and port must match exactly.
890+
func isAlreadySlaveOf(info, host, port string) bool {
891+
var isSlave, correctHost, correctPort bool
892+
for _, line := range strings.Split(info, "\r\n") {
893+
switch {
894+
case strings.HasPrefix(line, "role:"):
895+
isSlave = strings.TrimPrefix(line, "role:") == "slave"
896+
case strings.HasPrefix(line, "master_host:"):
897+
correctHost = strings.TrimPrefix(line, "master_host:") == host
898+
case strings.HasPrefix(line, "master_port:"):
899+
correctPort = strings.TrimPrefix(line, "master_port:") == port
900+
}
901+
}
902+
return isSlave && correctHost && correctPort
903+
}
904+
817905
// SetRedisClusterDynamicConfig applies dynamic configuration to each Redis instance in the cluster
818906
func SetRedisClusterDynamicConfig(ctx context.Context, client kubernetes.Interface, cr *rcvb2.RedisCluster) error {
819907
// Get dynamic configuration

0 commit comments

Comments
 (0)