Skip to content

Commit 1320861

Browse files
authored
Merge pull request rook#17373 from degorenko/fix-mds-standby-wanted
mds: fix incorrect behaviour for CephFS when no active standby
2 parents e80e050 + b3ffac9 commit 1320861

5 files changed

Lines changed: 66 additions & 4 deletions

File tree

pkg/daemon/ceph/client/filesystem.go

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,13 +119,23 @@ func getFilesystem(context *clusterd.Context, clusterInfo *ClusterInfo, fsName s
119119
}
120120

121121
// AllowStandbyReplay gets detailed status information about a Ceph filesystem.
122-
func AllowStandbyReplay(context *clusterd.Context, clusterInfo *ClusterInfo, fsName string, allowStandbyReplay bool) error {
122+
func AllowStandbyReplay(context *clusterd.Context, clusterInfo *ClusterInfo, fsName string, allowStandbyReplay bool, wantedStandbyDaemons int32) error {
123123
logger.Infof("setting allow_standby_replay to %t for filesystem %q", allowStandbyReplay, fsName)
124124
args := []string{"fs", "set", fsName, "allow_standby_replay", strconv.FormatBool(allowStandbyReplay)}
125125
_, err := NewCephCommand(context, clusterInfo, args).Run()
126126
if err != nil {
127127
return errors.Wrapf(err, "failed to set allow_standby_replay to filesystem %s", fsName)
128128
}
129+
// if we don't need standby, set standby_count_wanted=0 to avoid
130+
// getting 'insufficient standby MDS daemons' health issue
131+
if !allowStandbyReplay {
132+
wantedStandbyDaemons = 0
133+
}
134+
args = []string{"fs", "set", fsName, "standby_count_wanted", fmt.Sprint(wantedStandbyDaemons)}
135+
_, err = NewCephCommand(context, clusterInfo, args).Run()
136+
if err != nil {
137+
return errors.Wrapf(err, "failed to set standby_count_wanted to filesystem %s", fsName)
138+
}
129139

130140
return nil
131141
}

pkg/daemon/ceph/client/filesystem_test.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -757,3 +757,49 @@ func TestListSubvolumesInGroup(t *testing.T) {
757757
assert.Empty(t, ret)
758758
})
759759
}
760+
761+
func TestAllowStandbyReplay(t *testing.T) {
762+
cephFSName := "myfs"
763+
newContext := func(standbyWanted bool, standbyCount int32) *clusterd.Context {
764+
t.Helper()
765+
766+
executor := &exectest.MockExecutor{
767+
MockExecuteCommandWithOutput: func(command string, args ...string) (string, error) {
768+
t.Logf("Command: %s %v", command, args)
769+
if args[0] == "fs" && args[1] == "set" && args[2] == cephFSName {
770+
switch args[3] {
771+
case "allow_standby_replay":
772+
if args[4] == fmt.Sprint(standbyWanted) {
773+
return "", nil
774+
}
775+
case "standby_count_wanted":
776+
if args[4] == fmt.Sprint(standbyCount) {
777+
return "", nil
778+
}
779+
}
780+
}
781+
panic(fmt.Sprintf("unhandled command %q %v", command, args))
782+
},
783+
}
784+
785+
return &clusterd.Context{Executor: executor}
786+
}
787+
788+
t.Run("no standby mds", func(t *testing.T) {
789+
ctx := newContext(false, 0)
790+
err := AllowStandbyReplay(ctx, AdminTestClusterInfo("mycluster"), cephFSName, false, int32(1))
791+
assert.NoError(t, err)
792+
})
793+
794+
t.Run("standby mds", func(t *testing.T) {
795+
ctx := newContext(true, 1)
796+
err := AllowStandbyReplay(ctx, AdminTestClusterInfo("mycluster"), cephFSName, true, int32(1))
797+
assert.NoError(t, err)
798+
})
799+
800+
t.Run("standby mds multiple", func(t *testing.T) {
801+
ctx := newContext(true, 3)
802+
err := AllowStandbyReplay(ctx, AdminTestClusterInfo("mycluster"), cephFSName, true, int32(3))
803+
assert.NoError(t, err)
804+
})
805+
}

pkg/operator/ceph/file/filesystem.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ func createFilesystem(
6666
return errors.Wrapf(err, "failed to create filesystem %q", fs.Name)
6767
}
6868
}
69-
if err := cephclient.AllowStandbyReplay(context, clusterInfo, fs.Name, fs.Spec.MetadataServer.ActiveStandby); err != nil {
69+
if err := cephclient.AllowStandbyReplay(context, clusterInfo, fs.Name, fs.Spec.MetadataServer.ActiveStandby, fs.Spec.MetadataServer.ActiveCount); err != nil {
7070
return errors.Wrapf(err, "failed to set allow_standby_replay to filesystem %q", fs.Name)
7171
}
7272

pkg/operator/ceph/file/filesystem_test.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,8 @@ func fsExecutor(t *testing.T, fsName, configDir string, multiFS bool, createData
215215
return "", nil
216216
} else if slices.Contains(args, "set") && slices.Contains(args, "allow_standby_replay") {
217217
return "", nil
218+
} else if slices.Contains(args, "set") && slices.Contains(args, "standby_count_wanted") {
219+
return "", nil
218220
} else if slices.Contains(args, "config") && slices.Contains(args, "mds_join_fs") {
219221
return "", nil
220222
} else if slices.Contains(args, "flag") && slices.Contains(args, "enable_multiple") {
@@ -296,6 +298,8 @@ func fsExecutor(t *testing.T, fsName, configDir string, multiFS bool, createData
296298
return "", nil
297299
} else if slices.Contains(args, "set") && slices.Contains(args, "allow_standby_replay") {
298300
return "", nil
301+
} else if slices.Contains(args, "set") && slices.Contains(args, "standby_count_wanted") {
302+
return "", nil
299303
} else if slices.Contains(args, "config") && slices.Contains(args, "mds_join_fs") {
300304
return "", nil
301305
} else if slices.Contains(args, "flag") && slices.Contains(args, "enable_multiple") {
@@ -568,6 +572,8 @@ func TestUpgradeFilesystem(t *testing.T) {
568572
return "", nil
569573
} else if slices.Contains(args, "allow_standby_replay") {
570574
return "", nil
575+
} else if slices.Contains(args, "standby_count_wanted") {
576+
return "", nil
571577
}
572578
}
573579
if slices.Contains(args, "versions") {

pkg/operator/ceph/file/mds/mds.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ func (c *Cluster) upgradeMDS() error {
268268
log.NamedInfo(nsName, logger, "upgrading MDS cluster for filesystem %q", c.fs.Name)
269269

270270
// 1. set allow_standby_replay to false
271-
if err := cephclient.AllowStandbyReplay(c.context, c.clusterInfo, c.fs.Name, false); err != nil {
271+
if err := cephclient.AllowStandbyReplay(c.context, c.clusterInfo, c.fs.Name, false, 0); err != nil {
272272
return errors.Wrap(err, "failed to setting allow_standby_replay to false")
273273
}
274274

@@ -422,7 +422,7 @@ func finishedWithDaemonUpgrade(context *clusterd.Context, clusterInfo *cephclien
422422
}
423423

424424
// set allow_standby_replay back
425-
if err := cephclient.AllowStandbyReplay(context, clusterInfo, fsName, fs.Spec.MetadataServer.ActiveStandby); err != nil {
425+
if err := cephclient.AllowStandbyReplay(context, clusterInfo, fsName, fs.Spec.MetadataServer.ActiveStandby, fs.Spec.MetadataServer.ActiveCount); err != nil {
426426
return errors.Wrap(err, "failed to set allow_standby_replay to true")
427427
}
428428

0 commit comments

Comments
 (0)