Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions api/core/v1alpha1/tikv_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,12 @@ const (
// condition
TiKVCondLeadersEvicted = "LeadersEvicted"
// reason
ReasonNotEvicted = "NotEvicted"
ReasonEvicting = "Evicting"
ReasonEvicted = "Evicted"
ReasonNotEvicted = "NotEvicted"
ReasonEvicting = "Evicting"
ReasonEvicted = "Evicted"
ReasonStoreNotExist = "StoreNotExist"

// Deprecated: replaced by ReasonStoreNotExist
ReasonStoreIsRemoved = "StoreIsRemoved"
)

Expand Down
2 changes: 1 addition & 1 deletion pkg/controllers/tikv/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ func (r *Reconciler) NewRunner(state *tasks.ReconcileContext, reporter task.Task
tasks.TaskOfflineStore(state, r.PDClientManager),
tasks.TaskConfigMap(state, r.Client),
common.TaskPVC[scope.TiKV](state, r.Client, r.VolumeModifierFactory, tasks.PVCNewer()),
tasks.TaskPod(state, r.Client),
tasks.TaskPod(state, r.Client, r.PDClientManager),
tasks.TaskStoreLabels(state, r.Client, r.PDClientManager),
tasks.TaskEvictLeader(state, r.PDClientManager),
common.TaskInstanceConditionSynced[scope.TiKV](state),
Expand Down
7 changes: 5 additions & 2 deletions pkg/controllers/tikv/tasks/ctx.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,13 @@ import (
type ReconcileContext struct {
State

LeaderEvicting bool
ShouldEvictLeader bool
LeaderEvicting bool

Store *pdv1.Store
PDSynced bool

Store *pdv1.Store

// IsStoreReady will be set only when pd is synced and the store is ok
// It may be outdated so the tikv is healthy only when the pod is also available
// If it's true and the pod is ready but not available,
Expand Down Expand Up @@ -97,6 +99,7 @@ func TaskContextInfoFromPD(state *ReconcileContext, cm pdm.PDClientManager) task
if coreutil.ShouldSuspendCompute(state.Cluster()) {
return task.Complete().With("cluster is suspending")
}

scheduler, err := c.Underlay().GetEvictLeaderScheduler(ctx, state.Store.ID)
if err != nil {
return task.Fail().With("pd is unexpectedly crashed: %v", err)
Expand Down
74 changes: 58 additions & 16 deletions pkg/controllers/tikv/tasks/evict_leader.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,13 @@ package tasks

import (
"context"
"fmt"

"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"github.com/pingcap/tidb-operator/api/v2/core/v1alpha1"
pdv1 "github.com/pingcap/tidb-operator/v2/pkg/timanager/apis/pd/v1"
pdm "github.com/pingcap/tidb-operator/v2/pkg/timanager/pd"
"github.com/pingcap/tidb-operator/v2/pkg/utils/task/v3"
)
Expand All @@ -27,26 +33,33 @@ func TaskEvictLeader(state *ReconcileContext, m pdm.PDClientManager) task.Task {
if !ok {
return task.Wait().With("wait if pd client is not registered")
}
switch {
case !state.PDSynced:
return task.Wait().With("pd is unsynced")
case state.Store == nil:
if state.Store == nil {
if syncLeadersEvictedCond(state.TiKV(), nil, state.LeaderEvicting) {
state.SetStatusChanged()
}
return task.Complete().With("store has been deleted or not created")
case state.Instance().IsOffline() || state.IsPodTerminating():
if !state.LeaderEvicting {
if err := pc.Underlay().BeginEvictLeader(ctx, state.Store.ID); err != nil {
return task.Fail().With("cannot add evict leader scheduler: %v", err)
}
}

if state.ShouldEvictLeader && !state.LeaderEvicting {
if err := pc.Underlay().BeginEvictLeader(ctx, state.Store.ID); err != nil {
return task.Fail().With("cannot add evict leader scheduler: %v", err)
}
return task.Complete().With("ensure evict leader scheduler exists")
default:
if state.LeaderEvicting {
if err := pc.Underlay().EndEvictLeader(ctx, state.Store.ID); err != nil {
return task.Fail().With("cannot remove evict leader scheduler: %v", err)
}
state.LeaderEvicting = true
}

if state.LeaderEvicting && !state.ShouldEvictLeader {
if err := pc.Underlay().EndEvictLeader(ctx, state.Store.ID); err != nil {
return task.Fail().With("cannot remove evict leader scheduler: %v", err)
}
return task.Complete().With("ensure evict leader scheduler doesn't exist")
state.LeaderEvicting = false
}

needUpdate := syncLeadersEvictedCond(state.TiKV(), state.Store, state.LeaderEvicting)
if needUpdate {
state.SetStatusChanged()
}

return task.Complete().With("sync evict leader scheduler, expected: %v, actual: %v", state.ShouldEvictLeader, state.LeaderEvicting)
})
}

Expand All @@ -69,3 +82,32 @@ func TaskEndEvictLeader(state *ReconcileContext, m pdm.PDClientManager) task.Tas
return task.Complete().With(msg)
})
}

// Status of this condition can only transfer as the below
func syncLeadersEvictedCond(tikv *v1alpha1.TiKV, store *pdv1.Store, isEvicting bool) bool {
status := metav1.ConditionFalse
reason := v1alpha1.ReasonNotEvicted
msg := "leaders are not all evicted"
switch {
case store == nil:
status = metav1.ConditionTrue
reason = v1alpha1.ReasonStoreNotExist
msg = "store does not exist"
case isEvicting && store.LeaderCount == 0:
status = metav1.ConditionTrue
reason = v1alpha1.ReasonEvicted
msg = "all leaders are evicted"
case isEvicting:
status = metav1.ConditionFalse
reason = v1alpha1.ReasonEvicting
msg = fmt.Sprintf("not all leaders are evicted, still: %v", store.LeaderCount)
}

return meta.SetStatusCondition(&tikv.Status.Conditions, metav1.Condition{
Type: v1alpha1.TiKVCondLeadersEvicted,
Status: status,
ObservedGeneration: tikv.Generation,
Reason: reason,
Message: msg,
})
}
156 changes: 156 additions & 0 deletions pkg/controllers/tikv/tasks/evict_leader_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
// Copyright 2024 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package tasks

import (
"context"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.uber.org/mock/gomock"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"github.com/pingcap/tidb-operator/api/v2/core/v1alpha1"
pdapi "github.com/pingcap/tidb-operator/v2/pkg/pdapi/v1"
stateutil "github.com/pingcap/tidb-operator/v2/pkg/state"
pdv1 "github.com/pingcap/tidb-operator/v2/pkg/timanager/apis/pd/v1"
pdm "github.com/pingcap/tidb-operator/v2/pkg/timanager/pd"
"github.com/pingcap/tidb-operator/v2/pkg/utils/fake"
"github.com/pingcap/tidb-operator/v2/pkg/utils/task/v3"
)

func TestTaskEvictLeader(t *testing.T) {
cases := []struct {
desc string
state *ReconcileContext
expectBegin bool
expectEnd bool
expectEvicting bool
expectedStatus task.Status
}{
{
desc: "begin evict leader when requested",
state: &ReconcileContext{
State: &state{
tikv: fake.FakeObj[v1alpha1.TiKV]("aaa-xxx"),
pod: fake.FakeObj[corev1.Pod]("aaa-tikv-xxx"),
},
ShouldEvictLeader: true,
PDSynced: true,
Store: &pdv1.Store{
ID: "1",
},
},
expectBegin: true,
expectEvicting: true,
expectedStatus: task.SComplete,
},
{
desc: "end evict leader when annotation is absent",
state: &ReconcileContext{
State: &state{
tikv: fake.FakeObj[v1alpha1.TiKV]("aaa-xxx"),
pod: fake.FakeObj[corev1.Pod]("aaa-tikv-xxx"),
},
PDSynced: true,
LeaderEvicting: true,
Store: &pdv1.Store{
ID: "1",
},
},
expectEnd: true,
expectEvicting: false,
expectedStatus: task.SComplete,
},
{
desc: "sync leaders evicted condition when store is absent",
state: &ReconcileContext{
State: &state{
tikv: fake.FakeObj("aaa-xxx", func(obj *v1alpha1.TiKV) *v1alpha1.TiKV {
obj.Generation = 3
return obj
}),
pod: fake.FakeObj[corev1.Pod]("aaa-tikv-xxx"),
},
PDSynced: true,
LeaderEvicting: true,
Store: nil,
},
expectEvicting: true,
expectedStatus: task.SComplete,
},
}

for i := range cases {
c := &cases[i]
t.Run(c.desc, func(tt *testing.T) {
tt.Parallel()

ctrl := gomock.NewController(tt)
mockPDClient := pdm.NewMockPDClient(ctrl)
mockUnderlay := pdapi.NewMockPDClient(ctrl)
mockPDClient.EXPECT().Underlay().Return(mockUnderlay).AnyTimes()
if c.expectBegin {
mockUnderlay.EXPECT().BeginEvictLeader(gomock.Any(), "1").Return(nil)
}
if c.expectEnd {
mockUnderlay.EXPECT().EndEvictLeader(gomock.Any(), "1").Return(nil)
}

s := c.state.State.(*state)
s.IPDClient = &stubPDClientState{client: mockPDClient}

res, done := task.RunTask(context.Background(), TaskEvictLeader(c.state, nil))
assert.Equal(tt, c.expectedStatus, res.Status())
assert.False(tt, done)
assert.Equal(tt, c.expectEvicting, c.state.LeaderEvicting)
if c.state.Store == nil {
cond := findCondition(c.state.TiKV().Status.Conditions, v1alpha1.TiKVCondLeadersEvicted)
require.NotNil(tt, cond)
assert.Equal(tt, metav1.ConditionTrue, cond.Status)
assert.Equal(tt, v1alpha1.ReasonStoreNotExist, cond.Reason)
}
})
}
}

type stubPDClientState struct {
client pdm.PDClient
}

func (s *stubPDClientState) GetPDClient(pdm.PDClientManager) (pdm.PDClient, bool) {
return s.client, true
}

var _ stateutil.IPDClient = (*stubPDClientState)(nil)

type stubPDClientUnavailableState struct{}

func (s *stubPDClientUnavailableState) GetPDClient(pdm.PDClientManager) (pdm.PDClient, bool) {
return nil, false
}

var _ stateutil.IPDClient = (*stubPDClientUnavailableState)(nil)

func findCondition(conds []metav1.Condition, typ string) *metav1.Condition {
for i := range conds {
if conds[i].Type == typ {
return &conds[i]
}
}
return nil
}
28 changes: 3 additions & 25 deletions pkg/controllers/tikv/tasks/offline.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,6 @@ import (
"context"
"time"

"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/utils/ptr"

"github.com/pingcap/tidb-operator/api/v2/core/v1alpha1"
coreutil "github.com/pingcap/tidb-operator/v2/pkg/apiutil/core/v1alpha1"
"github.com/pingcap/tidb-operator/v2/pkg/controllers/common"
"github.com/pingcap/tidb-operator/v2/pkg/runtime/scope"
Expand All @@ -47,18 +42,11 @@ func TaskOfflineStore(state *ReconcileContext, m pdm.PDClientManager) task.Task
// If the store is nil, it means the store has been deleted or not created yet.
// No need to check if leaders are evicted.
if state.Store != nil && isOffline {
var reason string
beginTime := getBeginEvictLeaderTime(tikv)
switch {
case state.LeaderEvicting && state.GetLeaderCount() == 0:
reason = "leaders have been all evicted"
case beginTime != nil && beginTime.Add(defaultLeaderEvictTimeout).Before(time.Now()):
reason = "leader eviction timeout"
}
state.ShouldEvictLeader = true

if reason == "" {
if err := CheckTiKVLeadersEvictedOrTimeout(tikv, defaultLeaderEvictTimeout); err != nil {
return task.Retry(defaultLeaderEvictTimeout+jitter).
With("waiting for leaders evicted or timeout, current leader count: %d", state.GetLeaderCount())
With("waiting for leaders evicted or timeout: %v", err)
}
}

Expand Down Expand Up @@ -87,13 +75,3 @@ func TaskOfflineStore(state *ReconcileContext, m pdm.PDClientManager) task.Task
return task.Complete().With("offline is completed or no need, spec.offline: %v", isOffline)
})
}

// getBeginEvictLeaderTime returns the time when the leader eviction started.
// If the condition is not found or the status is not False, it returns nil.
func getBeginEvictLeaderTime(tikv *v1alpha1.TiKV) *time.Time {
cond := meta.FindStatusCondition(tikv.Status.Conditions, v1alpha1.TiKVCondLeadersEvicted)
if cond != nil && cond.Status == metav1.ConditionFalse {
return ptr.To(cond.LastTransitionTime.Time)
}
return nil
}
Loading
Loading