Skip to content

Commit bc937f4

Browse files
authored
bugfix: fix juicefsruntime stucks with "worker not ready" (#5103)
* bugfix: fix juicefsruntime stucks with "worker not ready" and no worker pod created Signed-off-by: trafalgarzzz <trafalgarz@outlook.com> * fix unit tests Signed-off-by: trafalgarzzz <trafalgarz@outlook.com> --------- Signed-off-by: trafalgarzzz <trafalgarz@outlook.com>
1 parent 261fad7 commit bc937f4

5 files changed

Lines changed: 13 additions & 35 deletions

File tree

pkg/ddc/alluxio/deprecated_label_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ func getTestAlluxioEngine(client client.Client, name string, namespace string) *
5353
// - name (string): The name of the resource.
5454
// - suffix (string): The suffix of the label, which is not used in this test case.
5555
//
56-
// This test checks various combinations of `namespace` and `name`,
56+
// This test checks various combinations of `namespace` and `name`,
5757
// and validates whether the generated label name matches the expected output, ensuring the function logic is correct.
5858
func TestAlluxioEngine_GetDeprecatedCommonLabelname(t *testing.T) {
5959
testCases := []struct {

pkg/ddc/juicefs/master.go

Lines changed: 7 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -22,19 +22,20 @@ import (
2222

2323
"github.com/fluid-cloudnative/fluid/pkg/utils/kubeclient"
2424

25-
corev1 "k8s.io/api/core/v1"
2625
apierrs "k8s.io/apimachinery/pkg/api/errors"
2726
"k8s.io/client-go/util/retry"
2827

2928
datav1alpha1 "github.com/fluid-cloudnative/fluid/api/v1alpha1"
30-
"github.com/fluid-cloudnative/fluid/pkg/utils"
3129
)
3230

3331
func (j JuiceFSEngine) CheckMasterReady() (ready bool, err error) {
3432
// JuiceFS Runtime has no master role
3533
return true, nil
3634
}
3735

36+
// ShouldSetupMaster checks if a further call of func `SetupMaster` is needed.
37+
// JuiceFS Runtime has no master role, so the function check runtime.status.WorkerPhase
38+
// to know if juicefs is installed and set up.
3839
func (j JuiceFSEngine) ShouldSetupMaster() (should bool, err error) {
3940
runtime, err := j.getRuntime()
4041
if err != nil {
@@ -50,6 +51,9 @@ func (j JuiceFSEngine) ShouldSetupMaster() (should bool, err error) {
5051
return
5152
}
5253

54+
// SetupMaster installs juicefs components into the cluster.
55+
// JuiceFS Runtime has no master role, implementing func `SetupMaster` here
56+
// is just for a same lifecycle as other runtimes (other runtimes may have master component)
5357
func (j JuiceFSEngine) SetupMaster() (err error) {
5458
workerName := j.getWorkerName()
5559

@@ -58,7 +62,7 @@ func (j JuiceFSEngine) SetupMaster() (err error) {
5862
if err != nil && apierrs.IsNotFound(err) {
5963
//1. Is not found error
6064
j.Log.V(1).Info("SetupMaster", "worker", workerName)
61-
return j.setupMasterInternal()
65+
return j.installJuiceFS()
6266
} else if err != nil {
6367
//2. Other errors
6468
return
@@ -75,26 +79,10 @@ func (j JuiceFSEngine) SetupMaster() (err error) {
7579
}
7680
runtimeToUpdate := runtime.DeepCopy()
7781

78-
runtimeToUpdate.Status.WorkerPhase = datav1alpha1.RuntimePhaseNotReady
79-
replicas := runtimeToUpdate.Spec.Worker.Replicas
80-
if replicas == 0 {
81-
replicas = 1
82-
}
83-
8482
// Init selector for worker
8583
runtimeToUpdate.Status.Selector = j.getWorkerSelectors()
86-
runtimeToUpdate.Status.DesiredWorkerNumberScheduled = replicas
8784
runtimeToUpdate.Status.ValueFileConfigmap = j.getHelmValuesConfigMapName()
8885

89-
if len(runtimeToUpdate.Status.Conditions) == 0 {
90-
runtimeToUpdate.Status.Conditions = []datav1alpha1.RuntimeCondition{}
91-
}
92-
cond := utils.NewRuntimeCondition(datav1alpha1.RuntimeWorkersInitialized, datav1alpha1.RuntimeWorkersInitializedReason,
93-
"The worker is initialized.", corev1.ConditionTrue)
94-
runtimeToUpdate.Status.Conditions =
95-
utils.UpdateRuntimeCondition(runtimeToUpdate.Status.Conditions,
96-
cond)
97-
9886
if !reflect.DeepEqual(runtime.Status, runtimeToUpdate.Status) {
9987
return j.Client.Status().Update(context.TODO(), runtimeToUpdate)
10088
}

pkg/ddc/juicefs/master_internal.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,7 @@ import (
2929
"github.com/fluid-cloudnative/fluid/pkg/utils/kubeclient"
3030
)
3131

32-
// setup fuse
33-
func (j *JuiceFSEngine) setupMasterInternal() (err error) {
32+
func (j *JuiceFSEngine) installJuiceFS() (err error) {
3433
var (
3534
chartName = utils.GetChartsDirectory() + "/" + common.JuiceFSChart
3635
)

pkg/ddc/juicefs/master_internal_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ func TestSetupMasterInternal(t *testing.T) {
153153
if err != nil {
154154
t.Fatal(err.Error())
155155
}
156-
err = engine.setupMasterInternal()
156+
err = engine.installJuiceFS()
157157
if err != nil {
158158
t.Errorf("fail to exec check helm release: %v", err)
159159
}
@@ -164,7 +164,7 @@ func TestSetupMasterInternal(t *testing.T) {
164164
if err != nil {
165165
t.Fatal(err.Error())
166166
}
167-
err = engine.setupMasterInternal()
167+
err = engine.installJuiceFS()
168168
if err == nil {
169169
t.Errorf("fail to catch the error: %v", err)
170170
}
@@ -181,7 +181,7 @@ func TestSetupMasterInternal(t *testing.T) {
181181
if err != nil {
182182
t.Fatal(err.Error())
183183
}
184-
err = engine.setupMasterInternal()
184+
err = engine.installJuiceFS()
185185
if err == nil {
186186
t.Errorf("fail to catch the error")
187187
}
@@ -192,7 +192,7 @@ func TestSetupMasterInternal(t *testing.T) {
192192
if err != nil {
193193
t.Fatal(err.Error())
194194
}
195-
err = engine.setupMasterInternal()
195+
err = engine.installJuiceFS()
196196
fmt.Println(err)
197197
if err != nil {
198198
t.Errorf("fail to install release")

pkg/ddc/juicefs/master_test.go

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -147,15 +147,6 @@ func TestJuiceFSEngine_SetupMaster(t *testing.T) {
147147
t.Errorf("fail to exec the func with error %v", err)
148148
return
149149
}
150-
juicefsruntime, err := test.engine.getRuntime()
151-
if err != nil {
152-
t.Errorf("fail to get the runtime")
153-
return
154-
}
155-
if juicefsruntime.Status.WorkerPhase == datav1alpha1.RuntimePhaseNone {
156-
t.Errorf("fail to update the runtime")
157-
return
158-
}
159150
}
160151
}
161152

0 commit comments

Comments
 (0)