Skip to content

Commit 1ef027e

Browse files
committed
fix: failed to drain unmanaged clusterqueue
Signed-off-by: thxCode <thxcode0824@gmail.com>
1 parent 6713343 commit 1ef027e

10 files changed

Lines changed: 89 additions & 48 deletions

File tree

.claude/skills/gpustack-operator-e2e/SKILL.md

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,43 @@ kubectl -n "$NS" patch nodefeature "${NODE}-gpustack-worker" --type=merge \
244244
kubectl -n default delete instance gpustack-e2e-instance
245245
```
246246

247+
### 4b. Managed toggle — a *second, independent* drain trigger (run when the change touches the ResourceFlavor/Cohort Node-watch)
248+
249+
Excluding a node from management (`gpustack.ai/managed=false`) must drain its single-node
250+
ResourceFlavors with the **same** chain as §4 (flavor `schedule.gpustack.ai/drain=true` → ClusterQueue
251+
`HoldAndDrain` → the InstanceType's running Instances `spec.stop=true`). What is non-obvious is that it
252+
is a *different trigger on a different code path*:
253+
254+
- A §4 capacity reshape changes a *feature label*, so any feature-prefix predicate fires. **A managed
255+
toggle changes only `gpustack.ai/managed`** — no feature label — so it drains **only if** the
256+
`ResourceFlavorReconciler`/`CohortReconciler` Node-watch `UpdateFunc` predicates include
257+
`systemname.ManagedLabelKey` in their `mapx.EqualWithStringPrefix(...)`
258+
(`pkg/worker/controllers/worker/{resourceflavor,cohort}.go`). Missing it is the historical bug: the
259+
flavor is never enqueued or drained, while the ClusterQueue silently recomputes to a misleading
260+
`0/-1` (Active but negative-remaining) quota and the Instance keeps running.
261+
- **Restart masks it.** The `For`-watch start-up resync re-reconciles every ResourceFlavor, so a freshly
262+
(re)started operator drains the orphan regardless of the predicate. Verify against a **continuously
263+
running** operator — do not restart between the toggle and the assertion.
264+
- Toggle via the NodeFeature, not the node (§4 explains why: NFD reverts a direct node label). The unit
265+
cases `unmanaged node drains flavor` / `unmanaged node deletes cohort` only guard the index filter,
266+
**not** the predicate — so this live check is the only guard for the enqueue path.
267+
268+
```bash
269+
NS=gpustack-system; NODE=$(kubectl get nodes -o jsonpath='{.items[0].metadata.name}')
270+
before=$(kubectl get node "$NODE" -o jsonpath='{.metadata.labels.gpustack\.ai/managed}')
271+
272+
# Toggle out of management, then poll the §4 chain (flavor drain → CQ HoldAndDrain → Instance stop).
273+
kubectl -n "$NS" patch nodefeature "${NODE}-gpustack-worker" --type=merge \
274+
-p '{"spec":{"labels":{"gpustack.ai/managed":"false"}}}'
275+
276+
# Restore (skip if doing a full §6 teardown).
277+
kubectl -n "$NS" patch nodefeature "${NODE}-gpustack-worker" --type=merge \
278+
-p "{\"spec\":{\"labels\":{\"gpustack.ai/managed\":\"${before:-true}\"}}}"
279+
```
280+
281+
> Toggling a node that hosts a *running* Instance Stops that Instance, so on a shared cluster pick a node
282+
> whose Instances you can disrupt (or one with none, to assert the flavor/CQ drain alone).
283+
247284
## 5. Optional — simulated accelerator & drain-recycle (accelerated chain)
248285

249286
This exercises the accelerated chain and the drain-recycle behavior (the `ResourceFlavor` tombstone,

pkg/manager/config.go

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ import (
2727

2828
type Config struct {
2929
InformerCacheResyncPeriod time.Duration
30-
AggressiveEventFiltering bool
3130
LoopbackKubeConfigPath string
3231
LoopbackKubeRestConfig rest.Config
3332
LoopbackKubeHTTPClient *http.Client
@@ -120,11 +119,10 @@ func (c *Config) Apply(ctx context.Context) (*Manager, error) {
120119
return nil, fmt.Errorf("create controller manager: %w", err)
121120
}
122121
ctrlManager = CtrlManager{
123-
Manager: rawCtrlManager,
124-
aggressiveEventFiltering: c.AggressiveEventFiltering,
125-
options: ctrlMgrOpts,
126-
httpClient: c.LoopbackKubeHTTPClient,
127-
indexedFields: sets.Set[string]{},
122+
Manager: rawCtrlManager,
123+
options: ctrlMgrOpts,
124+
httpClient: c.LoopbackKubeHTTPClient,
125+
indexedFields: sets.Set[string]{},
128126
}
129127
}
130128

pkg/manager/helper.go

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,10 @@ type (
2121
// CtrlManager is a wrapper around ctrl.Manager.
2222
CtrlManager struct {
2323
ctrl.Manager
24-
aggressiveEventFiltering bool
25-
options ctrl.Options
26-
httpClient *http.Client
27-
disableController bool
28-
indexedFields sets.Set[string]
24+
options ctrl.Options
25+
httpClient *http.Client
26+
disableController bool
27+
indexedFields sets.Set[string]
2928
}
3029

3130
// RepeatableCtrlFieldIndexer is a wrapper around ctrlcli.FieldIndexer.
@@ -111,11 +110,6 @@ func (m CtrlManager) GetLeaderElectionNamespacedName() types.NamespacedName {
111110
}
112111
}
113112

114-
// AllowAggressiveEventFiltering returns whether aggressive event filtering is allowed.
115-
func (m CtrlManager) AllowAggressiveEventFiltering() bool {
116-
return m.aggressiveEventFiltering
117-
}
118-
119113
// _CtrlManagerSentinel is a ctrlmgr.Runnable implementation for observing
120114
// whether the ctrl.Manager is started.
121115
type _CtrlManagerSentinel struct {

pkg/manager/option.go

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ type Options struct {
2727
// Control.
2828
GopoolWorkerFactor int
2929
InformerCacheResyncPeriod time.Duration
30-
AggressiveEventFiltering bool
3130

3231
// Connect Kubernetes.
3332
KubeConnTimeout time.Duration
@@ -83,8 +82,6 @@ func (o *Options) AddFlags(fs *pflag.FlagSet, opts ...FlagOption) {
8382
"it is calculated by the number of CPU cores multiplied by this factor.")
8483
fs.DurationVar(&o.InformerCacheResyncPeriod, "informer-cache-resync-period", o.InformerCacheResyncPeriod,
8584
"the period at which the informer's cache is resynced.")
86-
fs.BoolVar(&o.AggressiveEventFiltering, "aggressive-event-filtering", o.AggressiveEventFiltering,
87-
"indicates to reduce event filtering threshold to make the controllers more aggressive to react to the changes of the cluster.")
8885

8986
// Connect Kubernetes.
9087
fs.DurationVar(&o.KubeConnTimeout, "kube-conn-timeout", o.KubeConnTimeout,
@@ -220,7 +217,6 @@ func (o *Options) Complete(ctx context.Context) (*Config, error) {
220217

221218
return &Config{
222219
InformerCacheResyncPeriod: o.InformerCacheResyncPeriod,
223-
AggressiveEventFiltering: o.AggressiveEventFiltering,
224220
LoopbackKubeConfigPath: lpCfgPath,
225221
LoopbackKubeRestConfig: *lpRestCfg,
226222
LoopbackKubeHTTPClient: lpHttpCli,

pkg/worker/controllers/worker/clusterqueue.go

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -449,7 +449,6 @@ func (r *ClusterQueueReconciler) SetupController(ctx context.Context, opts contr
449449
r.APIReader = opts.Manager.GetAPIReader()
450450

451451
dedupWindow := ctrlhandlerx.NewDedupWindow[ctrlreconcile.Request]()
452-
aggressive := opts.Manager.AllowAggressiveEventFiltering()
453452

454453
return ctrl.NewControllerManagedBy(opts.Manager).
455454
Named("clusterqueue").
@@ -510,15 +509,11 @@ func (r *ClusterQueueReconciler) SetupController(ctx context.Context, opts contr
510509
// - updated if its feature labels or allocatable resources have changed.
511510
ctrlpredicate.Funcs{
512511
UpdateFunc: func(e ctrlevent.UpdateEvent) bool {
513-
if aggressive {
514-
return true
515-
}
516-
517512
oldNd, newNd := e.ObjectOld.(*core.Node), e.ObjectNew.(*core.Node)
518513
if newNd.DeletionTimestamp == nil {
519514
// Fire when feature labels have changed.
520515
if !mapx.EqualWithStringPrefix(oldNd.Labels, newNd.Labels,
521-
systemname.LabelPrefix,
516+
systemname.ManagedLabelKey,
522517
nodefeature.FeatureLabelPrefix,
523518
nodefeature.GeneralFeatureLabelPrefix,
524519
nodefeature.AcceleratableFeatureLabelPrefix) {

pkg/worker/controllers/worker/cohort.go

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,6 @@ func (r *CohortReconciler) SetupController(ctx context.Context, opts controller.
195195
r.Client = opts.Manager.GetClient()
196196

197197
dedupWindow := ctrlhandlerx.NewDedupWindow[ctrlreconcile.Request]()
198-
aggressive := opts.Manager.AllowAggressiveEventFiltering()
199198

200199
return ctrl.NewControllerManagedBy(opts.Manager).
201200
Named("cohort").
@@ -211,17 +210,14 @@ func (r *CohortReconciler) SetupController(ctx context.Context, opts controller.
211210
// Trigger reconciliation when a Node is:
212211
// - created.
213212
// - deleted.
214-
// - updated if its feature labels have changed.
213+
// - updated if its managed mark or feature labels have changed.
215214
ctrlpredicate.Funcs{
216215
UpdateFunc: func(e ctrlevent.UpdateEvent) bool {
217-
if aggressive {
218-
return true
219-
}
220-
221216
oldNd, newNd := e.ObjectOld.(*core.Node), e.ObjectNew.(*core.Node)
222217
if newNd.DeletionTimestamp == nil {
223-
// Fire when feature labels have changed.
218+
// Fire when the managed mark or feature labels have changed.
224219
return !mapx.EqualWithStringPrefix(oldNd.Labels, newNd.Labels,
220+
systemname.ManagedLabelKey,
225221
nodefeature.FeatureLabelPrefix,
226222
nodefeature.GeneralFeatureLabelPrefix,
227223
nodefeature.AcceleratableFeatureLabelPrefix)

pkg/worker/controllers/worker/cohort_test.go

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717
"gpustack.ai/gpustack/pkg/kubeclients/kubernetes/scheme"
1818
"gpustack.ai/gpustack/pkg/nodefeature"
1919
"gpustack.ai/gpustack/pkg/systemmeta"
20+
"gpustack.ai/gpustack/pkg/systemname"
2021
)
2122

2223
// newInstanceTypeClusterQueue builds an "instancetypes" ClusterQueue that
@@ -57,6 +58,7 @@ func TestCohortReconciler_Reconcile(t *testing.T) {
5758
name string
5859

5960
withNode bool
61+
nodeUnmanaged bool // node present but gpustack.ai/managed=false
6062
withCohort bool
6163
withClusterQueue bool
6264

@@ -82,14 +84,29 @@ func TestCohortReconciler_Reconcile(t *testing.T) {
8284
name: "no node no ClusterQueue deletes cohort",
8385
withCohort: true,
8486
},
87+
{
88+
// A Node exists but is no longer managed (gpustack.ai/managed=false), so
89+
// indexNodeByCohortProfile excludes it. With no ClusterQueue either, the
90+
// cohort is idle and must be deleted. Guards the index's managed filter
91+
// (the path a node leaving management relies on); does NOT exercise the
92+
// Node-watch predicate.
93+
name: "unmanaged node deletes cohort",
94+
withNode: true,
95+
nodeUnmanaged: true,
96+
withCohort: true,
97+
},
8598
}
8699

87100
for _, c := range cases {
88101
c := c
89102
t.Run(c.name, func(t *testing.T) {
90103
var objs []ctrlcli.Object
91104
if c.withNode {
92-
objs = append(objs, newGeneralNode("node-1"))
105+
nd := newGeneralNode("node-1")
106+
if c.nodeUnmanaged {
107+
nd.Labels[systemname.ManagedLabelKey] = "false"
108+
}
109+
objs = append(objs, nd)
93110
}
94111
if c.withCohort {
95112
objs = append(objs, &kueue.Cohort{ObjectMeta: meta.ObjectMeta{Name: cohortName}})

pkg/worker/controllers/worker/nodefeature.go

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,6 @@ func (r *NodeFeatureReconciler) Reconcile(ctx context.Context, req ctrl.Request)
108108
func (r *NodeFeatureReconciler) SetupController(_ context.Context, opts controller.SetupOptions) error {
109109
r.Client = opts.Manager.GetClient()
110110

111-
aggressive := opts.Manager.AllowAggressiveEventFiltering()
112111
return ctrl.NewControllerManagedBy(opts.Manager).
113112
Named("nodefeature").
114113
For(
@@ -122,10 +121,6 @@ func (r *NodeFeatureReconciler) SetupController(_ context.Context, opts controll
122121
return false
123122
},
124123
UpdateFunc: func(e ctrlevent.UpdateEvent) bool {
125-
if aggressive {
126-
return e.ObjectNew.GetDeletionTimestamp() == nil
127-
}
128-
129124
oldNd, newNd := e.ObjectOld.(*core.Node), e.ObjectNew.(*core.Node)
130125
if newNd.DeletionTimestamp == nil {
131126
// Fire when labels have changed.

pkg/worker/controllers/worker/resourceflavor.go

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -236,8 +236,6 @@ func (r *ResourceFlavorReconciler) SetupController(ctx context.Context, opts con
236236

237237
r.Client = opts.Manager.GetClient()
238238

239-
aggressive := opts.Manager.AllowAggressiveEventFiltering()
240-
241239
return ctrl.NewControllerManagedBy(opts.Manager).
242240
Named("resourceflavor").
243241
For(
@@ -295,17 +293,15 @@ func (r *ResourceFlavorReconciler) SetupController(ctx context.Context, opts con
295293
// Trigger reconciliation when a Node is:
296294
// - created.
297295
// - deleted (so a flavor losing its last Node gets drained).
298-
// - updated if its feature labels or taints have changed.
296+
// - updated if its managed mark, feature labels or taints have
297+
// changed (a node leaving management drains its orphaned flavors).
299298
ctrlpredicate.Funcs{
300299
UpdateFunc: func(e ctrlevent.UpdateEvent) bool {
301-
if aggressive {
302-
return e.ObjectNew.GetDeletionTimestamp() == nil
303-
}
304-
305300
oldNd, newNd := e.ObjectOld.(*core.Node), e.ObjectNew.(*core.Node)
306301
if newNd.DeletionTimestamp == nil {
307-
// Fire when labels have changed.
302+
// Fire when the managed mark or feature labels have changed.
308303
if !mapx.EqualWithStringPrefix(oldNd.Labels, newNd.Labels,
304+
systemname.ManagedLabelKey,
309305
nodefeature.FeatureLabelPrefix,
310306
nodefeature.GeneralFeatureLabelPrefix,
311307
nodefeature.AcceleratableFeatureLabelPrefix) {

pkg/worker/controllers/worker/resourceflavor_test.go

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ func TestResourceFlavorReconciler_Reconcile(t *testing.T) {
8989
withFlavor bool
9090
flavorDraining bool
9191
withNode bool
92+
nodeUnmanaged bool // node present but gpustack.ai/managed=false
9293

9394
wantExists bool
9495
wantDraining bool // _ResourceFlavorDrainAnnoKey present
@@ -128,6 +129,18 @@ func TestResourceFlavorReconciler_Reconcile(t *testing.T) {
128129
withNode: true,
129130
wantExists: true,
130131
},
132+
{
133+
// A Node exists but is no longer managed (gpustack.ai/managed=false), so
134+
// indexNodeByFlavorProfile excludes it: the flavor is orphaned and must be
135+
// marked draining. Guards the index's managed filter (the path a node
136+
// leaving management relies on); does NOT exercise the Node-watch predicate.
137+
name: "unmanaged node drains flavor",
138+
withFlavor: true,
139+
withNode: true,
140+
nodeUnmanaged: true,
141+
wantExists: true,
142+
wantDraining: true,
143+
},
131144
}
132145

133146
for _, c := range cases {
@@ -138,7 +151,11 @@ func TestResourceFlavorReconciler_Reconcile(t *testing.T) {
138151
objs = append(objs, newNodesResourceFlavor(flavorName, c.flavorDraining))
139152
}
140153
if c.withNode {
141-
objs = append(objs, newGeneralNode("node-1"))
154+
nd := newGeneralNode("node-1")
155+
if c.nodeUnmanaged {
156+
nd.Labels[systemname.ManagedLabelKey] = "false"
157+
}
158+
objs = append(objs, nd)
142159
}
143160
cli := buildFlavorClient(objs...)
144161

0 commit comments

Comments
 (0)