Skip to content

Commit d02c3fa

Browse files
scotwellsclaude
andcommitted
feat(controller): gate new instances on referenced data behind a feature flag
Instances whose template references ConfigMaps or Secrets now receive a ReferencedData scheduling gate at creation, so the provider does not launch them before their mounted data exists on the cell. Launching ungated would surface as containers missing env vars or mount sources rather than a clear pending state. Stamping is controlled by the new enableReferencedDataGate feature flag, default off: the gate must not be introduced until the cell gate-clearing reconciler and provider gate-honoring are deployed everywhere, otherwise gated instances would stall indefinitely or launch without their data. Templates with no references never receive the gate regardless of the flag. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
1 parent 2a06ed4 commit d02c3fa

7 files changed

Lines changed: 88 additions & 8 deletions

File tree

cmd/main.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,9 +273,12 @@ func main() {
273273
}
274274

275275
if enableCellControllers {
276+
wdOpts := controller.WorkloadDeploymentReconcilerOptions{
277+
EnableReferencedDataGate: serverConfig.FeatureFlags.EnableReferencedDataGate,
278+
}
276279
if err = (&controller.WorkloadDeploymentReconciler{
277280
NetworkingEnabled: features.FeatureGate.Enabled(features.NetworkingIntegration),
278-
}).SetupWithManager(mgr); err != nil {
281+
}).SetupWithManager(mgr, wdOpts); err != nil {
279282
setupLog.Error(err, "unable to create controller", "controller", "WorkloadDeployment")
280283
os.Exit(1)
281284
}

internal/config/config.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ type WorkloadOperator struct {
3737

3838
Discovery DiscoveryConfig `json:"discovery"`
3939

40+
// FeatureFlags configures optional management-plane feature gates.
41+
FeatureFlags FeatureFlagsConfig `json:"featureFlags,omitempty"`
42+
4043
// ReferencedData configures the ReferencedDataController.
4144
ReferencedData ReferencedDataConfig `json:"referencedData,omitempty"`
4245
}
@@ -60,6 +63,26 @@ type ReferencedDataConfig struct {
6063

6164
// +k8s:deepcopy-gen=true
6265

66+
// FeatureFlagsConfig holds management-plane feature gates. All flags default
67+
// to false (off) unless explicitly enabled, so that new capabilities can be
68+
// merged and deployed safely before the full feature rollout is complete.
69+
type FeatureFlagsConfig struct {
70+
// EnableReferencedDataGate controls whether new Instances receive the
71+
// "ReferencedData" scheduling gate when the workload template references
72+
// ConfigMaps or Secrets.
73+
//
74+
// This gate MUST NOT be enabled until both the cell gate-clearing reconciler
75+
// (Phase 2) and the unikraft provider gate-honoring (Phase 3) are confirmed
76+
// deployed everywhere. Enabling it prematurely will cause gated instances to
77+
// either stall indefinitely (cell not yet clearing) or launch without the
78+
// referenced data mounted (provider not yet honoring gates).
79+
//
80+
// Defaults to false.
81+
EnableReferencedDataGate bool `json:"enableReferencedDataGate,omitempty"`
82+
}
83+
84+
// +k8s:deepcopy-gen=true
85+
6386
type WebhookServerConfig struct {
6487
// Host is the address that the server will listen on.
6588
// Defaults to "" - all addresses.

internal/config/zz_generated.deepcopy.go

Lines changed: 16 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

internal/controller/instancecontrol/scheduling_gates.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,12 @@ type SchedulingGate string
55
const (
66
NetworkSchedulingGate SchedulingGate = "Network"
77
QuotaSchedulingGate SchedulingGate = "Quota"
8+
9+
// ReferencedDataSchedulingGate is stamped on new instances when the workload
10+
// template references ConfigMaps or Secrets AND the management-plane feature
11+
// flag EnableReferencedDataGate is enabled. It is cleared by the cell
12+
// InstanceReconciler once all expected companion objects are present.
13+
ReferencedDataSchedulingGate SchedulingGate = "ReferencedData"
814
)
915

1016
func (s SchedulingGate) String() string {

internal/controller/instancecontrol/stateful/stateful_control.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313

1414
"go.datum.net/compute/api/v1alpha"
1515
"go.datum.net/compute/internal/controller/instancecontrol"
16+
"go.datum.net/compute/internal/referenceddata"
1617
)
1718

1819
// Options controls optional behaviours of the stateful instance control strategy.
@@ -22,6 +23,12 @@ type Options struct {
2223
// disabled so that Instances are not blocked waiting for a NetworkBinding.
2324
// Defaults to true.
2425
NetworkingEnabled bool
26+
27+
// EnableReferencedDataGate controls whether new Instances receive the
28+
// ReferencedData scheduling gate when the workload template references
29+
// ConfigMaps or Secrets. Defaults to false. See FeatureFlagsConfig for the
30+
// full safety rationale.
31+
EnableReferencedDataGate bool
2532
}
2633

2734
// Behavior inspired by https://github.com/kubernetes/kubernetes/tree/master/pkg/controller/statefulset
@@ -103,6 +110,16 @@ func (c *statefulControl) GetActions(
103110
{Name: instancecontrol.NetworkSchedulingGate.String()},
104111
}, gates...)
105112
}
113+
114+
// Stamp the ReferencedData gate only when the management-plane feature
115+
// flag is on AND the template actually references ConfigMaps or Secrets.
116+
// The gate must not be inserted before the cell gate-clearing reconciler
117+
// and provider gate-honoring are deployed everywhere — see
118+
// FeatureFlagsConfig.EnableReferencedDataGate for the full rationale.
119+
if c.opts.EnableReferencedDataGate && referenceddata.TemplateReferencesData(deployment.Spec.Template) {
120+
gates = append(gates, v1alpha.SchedulingGate{Name: instancecontrol.ReferencedDataSchedulingGate.String()})
121+
}
122+
106123
desiredInstances[i].Spec.Controller = &v1alpha.InstanceController{
107124
TemplateHash: instanceTemplateHash,
108125
SchedulingGates: gates,

internal/controller/instancecontrol/stateful/stateful_control_test.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ func init() {
2929

3030
func TestFreshDeployment(t *testing.T) {
3131
ctx := context.Background()
32-
control := New()
32+
control := NewWithOptions(Options{})
3333

3434
deployment := getWorkloadDeployment("test-fresh-deploy", 2)
3535

@@ -56,7 +56,7 @@ func TestFreshDeployment(t *testing.T) {
5656
// at creation time and ignores spec changes on an existing pod.
5757
func TestUpdateWithAllReadyInstances(t *testing.T) {
5858
ctx := context.Background()
59-
control := New()
59+
control := NewWithOptions(Options{})
6060

6161
deployment := getWorkloadDeployment("test-deploy", 2)
6262

@@ -82,7 +82,7 @@ func TestUpdateWithAllReadyInstances(t *testing.T) {
8282

8383
func TestScaleUpWithNotReadyInstance(t *testing.T) {
8484
ctx := context.Background()
85-
control := New()
85+
control := NewWithOptions(Options{})
8686

8787
deployment := getWorkloadDeployment("test-deploy", 3)
8888

@@ -112,7 +112,7 @@ func TestScaleUpWithNotReadyInstance(t *testing.T) {
112112

113113
func TestScaleUpWithDeletingReadyInstance(t *testing.T) {
114114
ctx := context.Background()
115-
control := New()
115+
control := NewWithOptions(Options{})
116116

117117
deployment := getWorkloadDeployment("test-deploy", 3)
118118

@@ -139,7 +139,7 @@ func TestScaleUpWithDeletingReadyInstance(t *testing.T) {
139139

140140
func TestScaleDownWithAllReadyInstances(t *testing.T) {
141141
ctx := context.Background()
142-
control := New()
142+
control := NewWithOptions(Options{})
143143

144144
deployment := getWorkloadDeployment("test-deploy", 1)
145145

internal/controller/workloaddeployment_controller.go

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,11 @@ type WorkloadDeploymentReconciler struct {
4545
// actively removed if present), and the networking step is treated as
4646
// immediately ready. Defaults to true.
4747
NetworkingEnabled bool
48+
49+
// enableReferencedDataGate mirrors FeatureFlagsConfig.EnableReferencedDataGate.
50+
// When true, new Instances whose template references ConfigMaps or Secrets
51+
// receive the ReferencedData scheduling gate at creation time.
52+
enableReferencedDataGate bool
4853
}
4954

5055
// +kubebuilder:rbac:groups=compute.datumapis.com,resources=workloaddeployments,verbs=get;list;watch;create;update;patch;delete
@@ -113,7 +118,8 @@ func (r *WorkloadDeploymentReconciler) Reconcile(ctx context.Context, req mcreco
113118
}
114119

115120
instanceControl := instancecontrolstateful.NewWithOptions(instancecontrolstateful.Options{
116-
NetworkingEnabled: r.NetworkingEnabled,
121+
NetworkingEnabled: r.NetworkingEnabled,
122+
EnableReferencedDataGate: r.enableReferencedDataGate,
117123
})
118124

119125
actions, err := instanceControl.GetActions(ctx, cl.GetScheme(), &deployment, instances.Items)
@@ -549,9 +555,18 @@ func (r *WorkloadDeploymentReconciler) Finalize(ctx context.Context, obj client.
549555
return finalizer.Result{}, errDeploymentHasInstances
550556
}
551557

558+
// WorkloadDeploymentReconcilerOptions configures the WorkloadDeploymentReconciler.
559+
type WorkloadDeploymentReconcilerOptions struct {
560+
// EnableReferencedDataGate mirrors FeatureFlagsConfig.EnableReferencedDataGate.
561+
EnableReferencedDataGate bool
562+
}
563+
552564
// SetupWithManager sets up the controller with the Manager.
553-
func (r *WorkloadDeploymentReconciler) SetupWithManager(mgr mcmanager.Manager) error {
565+
func (r *WorkloadDeploymentReconciler) SetupWithManager(mgr mcmanager.Manager, opts ...WorkloadDeploymentReconcilerOptions) error {
554566
r.mgr = mgr
567+
for _, o := range opts {
568+
r.enableReferencedDataGate = o.EnableReferencedDataGate
569+
}
555570
r.finalizers = finalizer.NewFinalizers()
556571
if err := r.finalizers.Register(workloadControllerFinalizer, r); err != nil {
557572
return fmt.Errorf("failed to register finalizer: %w", err)

0 commit comments

Comments
 (0)