Skip to content

Commit 1450ec4

Browse files
authored
CSPL-4630: add feature gate infrastructure using k8s FeatureGate pattern
Introduce a feature gate registry backed by k8s.io/component-base/featuregate so new capabilities can be gated behind --feature-gates=<Gate>=true|false with Alpha/Beta/GA lifecycle. Migrate the validation webhook toggle from the ENABLE_VALIDATION_WEBHOOK env var to a ValidationWebhook feature gate with backwards-compatible env var support. - Add pkg/config/featuregates.go registry with SetupFeatureGates and IsFeatureGateEnabled helpers, plus unit tests - Wire feature gate flags into cmd/main.go and log effective gate states at startup - Use os.LookupEnv for ENABLE_VALIDATION_WEBHOOK so the deprecation warning fires whenever the env var is set, not just when it equals "true" - Update kustomization files with SPLUNK_GENERAL_TERMS value - Add docs/FeatureGates.md with usage and instructions for adding new gates - Update docs/ValidationWebhook.md with CLI-over-env-var precedence note
1 parent d0c6386 commit 1450ec4

10 files changed

Lines changed: 350 additions & 62 deletions

File tree

cmd/main.go

Lines changed: 40 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"path/filepath"
2626
"time"
2727

28+
"github.com/spf13/pflag"
2829
"sigs.k8s.io/controller-runtime/pkg/metrics/filters"
2930

3031
intController "github.com/splunk/splunk-operator/internal/controller"
@@ -89,24 +90,44 @@ func main() {
8990
// TLS certificate configuration for metrics
9091
var metricsCertPath, metricsCertName, metricsCertKey string
9192

92-
flag.StringVar(&logEncoder, "log-encoder", "json", "log encoding ('json' or 'console')")
93-
flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
94-
flag.BoolVar(&enableLeaderElection, "leader-elect", false,
93+
pflag.StringVar(&logEncoder, "log-encoder", "json", "log encoding ('json' or 'console')")
94+
pflag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
95+
pflag.BoolVar(&enableLeaderElection, "leader-elect", false,
9596
"Enable leader election for controller manager. "+
9697
"Enabling this will ensure there is only one active controller manager.")
97-
flag.BoolVar(&pprofActive, "pprof", true, "Enable pprof endpoint")
98-
flag.IntVar(&logLevel, "log-level", int(zapcore.InfoLevel), "set log level")
99-
flag.IntVar(&leaseDurationSecond, "lease-duration", leaseDurationSecond, "manager lease duration in seconds")
100-
flag.IntVar(&renewDeadlineSecond, "renew-duration", renewDeadlineSecond, "manager renew duration in seconds")
101-
flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metrics endpoint binds to. "+
98+
pflag.BoolVar(&pprofActive, "pprof", true, "Enable pprof endpoint")
99+
pflag.IntVar(&logLevel, "log-level", int(zapcore.InfoLevel), "set log level")
100+
pflag.IntVar(&leaseDurationSecond, "lease-duration", leaseDurationSecond, "manager lease duration in seconds")
101+
pflag.IntVar(&renewDeadlineSecond, "renew-duration", renewDeadlineSecond, "manager renew duration in seconds")
102+
pflag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metrics endpoint binds to. "+
102103
"Use :8443 for HTTPS or :8080 for HTTP, or leave as 0 to disable the metrics service.")
103-
flag.BoolVar(&secureMetrics, "metrics-secure", false,
104+
pflag.BoolVar(&secureMetrics, "metrics-secure", false,
104105
"If set, the metrics endpoint is served securely via HTTPS. Use --metrics-secure=false to use HTTP instead.")
105106

106107
// TLS certificate flags for metrics server
107-
flag.StringVar(&metricsCertPath, "metrics-cert-path", "", "The directory that contains the metrics server certificate.")
108-
flag.StringVar(&metricsCertName, "metrics-cert-name", "tls.crt", "The name of the metrics server certificate file.")
109-
flag.StringVar(&metricsCertKey, "metrics-cert-key", "tls.key", "The name of the metrics server key file.")
108+
pflag.StringVar(&metricsCertPath, "metrics-cert-path", "", "The directory that contains the metrics server certificate.")
109+
pflag.StringVar(&metricsCertName, "metrics-cert-name", "tls.crt", "The name of the metrics server certificate file.")
110+
pflag.StringVar(&metricsCertKey, "metrics-cert-key", "tls.key", "The name of the metrics server key file.")
111+
112+
config.DefaultMutableFeatureGate.AddFlag(pflag.CommandLine)
113+
114+
opts := zap.Options{
115+
Development: true,
116+
TimeEncoder: zapcore.RFC3339NanoTimeEncoder,
117+
}
118+
opts.BindFlags(flag.CommandLine)
119+
pflag.CommandLine.AddGoFlagSet(flag.CommandLine)
120+
pflag.Parse()
121+
122+
ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts)))
123+
124+
if allGates := config.DefaultMutableFeatureGate.GetAll(); len(allGates) > 0 {
125+
effectiveStates := make(map[string]bool, len(allGates))
126+
for gate := range allGates {
127+
effectiveStates[string(gate)] = config.DefaultMutableFeatureGate.Enabled(gate)
128+
}
129+
setupLog.Info("Feature gates initialized", "gates", effectiveStates)
130+
}
110131

111132
// Metrics endpoint is enabled in 'config/default/kustomization.yaml'. The Metrics options configure the server.
112133
// More info:
@@ -147,16 +168,6 @@ func main() {
147168
renewDeadline = time.Duration(renewDeadlineSecond) * time.Second
148169
}
149170

150-
opts := zap.Options{
151-
Development: true,
152-
TimeEncoder: zapcore.RFC3339NanoTimeEncoder,
153-
}
154-
opts.BindFlags(flag.CommandLine)
155-
flag.Parse()
156-
157-
// Logging setup
158-
ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts)))
159-
160171
// Configure metrics certificate watcher if metrics certs are provided
161172
var metricsCertWatcher *certwatcher.CertWatcher
162173
if len(metricsCertPath) > 0 {
@@ -280,10 +291,11 @@ func main() {
280291
os.Exit(1)
281292
}
282293

283-
// Setup centralized validation webhook server (opt-in via ENABLE_VALIDATION_WEBHOOK env var, defaults to false)
284-
enableWebhooks := os.Getenv("ENABLE_VALIDATION_WEBHOOK")
285-
if enableWebhooks == "true" {
286-
// Parse optional timeout configurations from environment
294+
if _, ok := os.LookupEnv("ENABLE_VALIDATION_WEBHOOK"); ok {
295+
setupLog.Info("DEPRECATED: ENABLE_VALIDATION_WEBHOOK env var is deprecated and will be removed in a future release; use --feature-gates=ValidationWebhook=true instead")
296+
}
297+
298+
if config.DefaultMutableFeatureGate.Enabled(config.ValidationWebhook) {
287299
readTimeout := 10 * time.Second
288300
if val := os.Getenv("WEBHOOK_READ_TIMEOUT"); val != "" {
289301
if d, err := time.ParseDuration(val); err == nil {
@@ -306,16 +318,15 @@ func main() {
306318
Client: mgr.GetClient(),
307319
})
308320

309-
// Add webhook server as a runnable to the manager
310321
if err := mgr.Add(manager.RunnableFunc(func(ctx context.Context) error {
311322
return webhookServer.Start(ctx)
312323
})); err != nil {
313324
setupLog.Error(err, "unable to add webhook server to manager")
314325
os.Exit(1)
315326
}
316-
setupLog.Info("Validation webhook enabled via ENABLE_VALIDATION_WEBHOOK=true")
327+
setupLog.Info("Validation webhook enabled")
317328
} else {
318-
setupLog.Info("Validation webhook disabled (set ENABLE_VALIDATION_WEBHOOK=true to enable)")
329+
setupLog.Info("Validation webhook disabled (set --feature-gates=ValidationWebhook=true to enable)")
319330
}
320331
//+kubebuilder:scaffold:builder
321332

config/default-with-webhook/kustomization-cluster.yaml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Adds namespace to all resources.
22
# Cluster-scoped deployment WITH webhook enabled (opt-in)
33
# Requires cert-manager to be installed in the cluster
4-
namespace: splunk-operator
4+
namespace: splunk-operator
55

66
# Value of this field is prepended to the
77
# names of all resources, e.g. a deployment named
@@ -115,7 +115,7 @@ patches:
115115
patch: |-
116116
- op: add
117117
path: /spec/template/spec/containers/0/env
118-
value:
118+
value:
119119
- name: WATCH_NAMESPACE
120120
value: WATCH_NAMESPACE_VALUE
121121
- name: RELATED_IMAGE_SPLUNK_ENTERPRISE
@@ -124,12 +124,13 @@ patches:
124124
value: splunk-operator
125125
- name: SPLUNK_GENERAL_TERMS
126126
value: SPLUNK_GENERAL_TERMS_VALUE
127-
- name: ENABLE_VALIDATION_WEBHOOK
128-
value: "true"
129127
- name: POD_NAME
130128
valueFrom:
131129
fieldRef:
132130
fieldPath: metadata.name
131+
- op: add
132+
path: /spec/template/spec/containers/0/args/-
133+
value: --feature-gates=ValidationWebhook=true
133134
# [METRICS] The following patch will enable the metrics endpoint using HTTPS and the port :8443.
134135
# More info: https://book.kubebuilder.io/reference/metrics
135136
- path: manager_metrics_patch.yaml

config/default-with-webhook/kustomization-namespace.yaml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Adds namespace to all resources.
22
# Namespace-scoped deployment WITH webhook enabled (opt-in)
33
# Requires cert-manager to be installed in the cluster
4-
namespace: splunk-operator
4+
namespace: splunk-operator
55

66
# Value of this field is prepended to the
77
# names of all resources, e.g. a deployment named
@@ -115,7 +115,7 @@ patches:
115115
patch: |-
116116
- op: add
117117
path: /spec/template/spec/containers/0/env
118-
value:
118+
value:
119119
- name: WATCH_NAMESPACE
120120
valueFrom:
121121
fieldRef:
@@ -126,12 +126,13 @@ patches:
126126
value: splunk-operator
127127
- name: SPLUNK_GENERAL_TERMS
128128
value: SPLUNK_GENERAL_TERMS_VALUE
129-
- name: ENABLE_VALIDATION_WEBHOOK
130-
value: "true"
131129
- name: POD_NAME
132130
valueFrom:
133131
fieldRef:
134132
fieldPath: metadata.name
133+
- op: add
134+
path: /spec/template/spec/containers/0/args/-
135+
value: --feature-gates=ValidationWebhook=true
135136
# [METRICS] The following patch will enable the metrics endpoint using HTTPS and the port :8443.
136137
# More info: https://book.kubebuilder.io/reference/metrics
137138
- path: manager_metrics_patch.yaml

config/default-with-webhook/kustomization.yaml

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Adds namespace to all resources.
22
# Cluster-scoped deployment WITH webhook enabled (opt-in)
33
# Requires cert-manager to be installed in the cluster
4-
namespace: splunk-operator
4+
namespace: splunk-operator
55

66
# Value of this field is prepended to the
77
# names of all resources, e.g. a deployment named
@@ -115,21 +115,22 @@ patches:
115115
patch: |-
116116
- op: add
117117
path: /spec/template/spec/containers/0/env
118-
value:
118+
value:
119119
- name: WATCH_NAMESPACE
120120
value: WATCH_NAMESPACE_VALUE
121121
- name: RELATED_IMAGE_SPLUNK_ENTERPRISE
122122
value: SPLUNK_ENTERPRISE_IMAGE
123123
- name: OPERATOR_NAME
124124
value: splunk-operator
125125
- name: SPLUNK_GENERAL_TERMS
126-
value: WATCH_NAMESPACE_VALUE
127-
- name: ENABLE_VALIDATION_WEBHOOK
128-
value: "true"
126+
value: SPLUNK_GENERAL_TERMS_VALUE
129127
- name: POD_NAME
130128
valueFrom:
131129
fieldRef:
132130
fieldPath: metadata.name
131+
- op: add
132+
path: /spec/template/spec/containers/0/args/-
133+
value: --feature-gates=ValidationWebhook=true
133134
# [METRICS] The following patch will enable the metrics endpoint using HTTPS and the port :8443.
134135
# More info: https://book.kubebuilder.io/reference/metrics
135136
- path: manager_metrics_patch.yaml

docs/FeatureGates.md

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
# Feature Gates
2+
3+
The Splunk Operator uses the Kubernetes [FeatureGate](https://pkg.go.dev/k8s.io/component-base/featuregate) pattern to control rollout of new functionality. Feature gates allow new code to be merged to the main branch without activating in production, giving teams a safe, per-environment opt-in mechanism.
4+
5+
## Usage
6+
7+
Enable or disable feature gates at operator startup:
8+
9+
```bash
10+
/manager --feature-gates=ValidationWebhook=true
11+
```
12+
13+
## Maturity Lifecycle
14+
15+
| Stage | Default | Can Override | Next Step |
16+
|-----------|---------|-------------|-----------------------------------------|
17+
| **Alpha** | off | Yes | Promote to Beta after validation |
18+
| **Beta** | on | Yes | Promote to GA after sustained stability |
19+
| **GA** | on | No | Remove gate in a future release |
20+
21+
## Current Feature Gates
22+
23+
| Gate | Default | Stage | Since | Description |
24+
|-----------------------|---------|-------|---------|----------------------------------------------------------|
25+
| `ValidationWebhook` | `false` | Alpha | v3.2.0 | Centralized validation webhook server for CR admission |
26+
27+
## Adding a New Feature Gate
28+
29+
Follow these steps:
30+
31+
### 1. Register the gate in `pkg/config/featuregates.go`
32+
33+
Add a constant and an entry in `defaultFeatureGates`:
34+
35+
```go
36+
const (
37+
MyNewFeature featuregate.Feature = "MyNewFeature"
38+
)
39+
40+
var defaultFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{
41+
// existing gates …
42+
MyNewFeature: {Default: false, PreRelease: featuregate.Alpha},
43+
}
44+
```
45+
46+
### 2. Guard the code path
47+
48+
Check the gate wherever the feature-specific logic runs:
49+
50+
```go
51+
if config.DefaultMutableFeatureGate.Enabled(config.MyNewFeature) {
52+
// feature-specific logic
53+
}
54+
```
55+
56+
This can guard anything — a reconciler code path, a helper function, a webhook handler, an HTTP endpoint, etc.
57+
58+
### Example: Gating a New Controller (CRD)
59+
60+
When the feature gate introduces an entirely new CRD and controller, there are additional steps beyond the basic gate check. All three steps below are **mandatory** for any new CRD behind a feature gate.
61+
62+
#### a. Gate controller registration in `cmd/main.go`
63+
64+
Wrap the `SetupWithManager` call so the controller only starts when the gate is on:
65+
66+
```go
67+
if config.DefaultMutableFeatureGate.Enabled(config.MyNewFeature) {
68+
if err = (&controller.MyNewReconciler{
69+
Client: mgr.GetClient(),
70+
Scheme: mgr.GetScheme(),
71+
}).SetupWithManager(mgr); err != nil {
72+
setupLog.Error(err, "unable to create controller", "controller", "MyNew")
73+
os.Exit(1)
74+
}
75+
}
76+
```
77+
78+
#### b. Add a validating webhook for the gated CRD group
79+
80+
A validating webhook **must** reject CR creation when the gate is off. Without this, users can create resources that no controller will reconcile, leading to silent failures:
81+
82+
```go
83+
func (v *MyNewValidator) ValidateCreate(ctx context.Context, obj runtime.Object) (admission.Warnings, error) {
84+
if !config.DefaultMutableFeatureGate.Enabled(config.MyNewFeature) {
85+
return nil, fmt.Errorf(
86+
"the MyNewFeature feature is not enabled; "+
87+
"set --feature-gates=MyNewFeature=true to activate")
88+
}
89+
return nil, nil
90+
}
91+
```
92+
93+
#### c. Label the CRD manifests
94+
95+
Every gated CRD **must** carry maturity annotations and labels in `config/crd/bases/`. These signal to operators and tooling which gate controls the CRD and its current stability level:
96+
97+
```yaml
98+
metadata:
99+
annotations:
100+
splunk.com/feature-gate: MyNewFeature
101+
splunk.com/feature-stage: Alpha
102+
labels:
103+
splunk.com/feature-stage: alpha
104+
```
105+
106+
## Promoting a Gate
107+
108+
- **Alpha → Beta**: Change `Default: false` to `Default: true` in `featuregates.go`; update the CRD label to `beta`
109+
- **Beta → GA**: Set `LockToDefault: true` in the `FeatureSpec`; update the CRD label to `ga`
110+
- **GA → Removed**: Delete the constant and `FeatureSpec` entry; remove the `if` guard in `cmd/main.go`; remove the CRD annotations/labels and the validating webhook

0 commit comments

Comments
 (0)