@@ -39,13 +39,13 @@ const (
3939// or the device-plugin; the driver is installed separately (host-installed or via an
4040// NVIDIADriver CR) and GPUClusterConfig waits for driver readiness before proceeding.
4141type GPUClusterConfigSpec struct {
42- // DraDriver defines the spec for the NVIDIA DRA driver stack (gpus + computeDomains).
43- DraDriver DraDriverSpec `json:"draDriver"`
42+ // DRADriver defines the spec for the NVIDIA DRA driver stack (gpus + computeDomains).
43+ DRADriver DRADriverSpec `json:"draDriver"`
4444
4545 // DCGM defines the spec for the standalone NVIDIA DCGM hostengine. Disabled by default;
46- // when disabled, dcgm-exporter uses its embedded nv-hostengine.
47- // NOTE: the reused enabled field carries no server-side default, so an omitted enabled
48- // is nil here; the controller is responsible for interpreting nil (see defaults handling) .
46+ // when disabled, dcgm-exporter uses its embedded nv-hostengine. NOTE: the reused enabled
47+ // field carries no server-side default and its IsEnabled() treats nil as enabled, so the
48+ // controller must default nil enabled to disabled here .
4949 DCGM * nvidiav1.DCGMSpec `json:"dcgm,omitempty"`
5050
5151 // DCGMExporter defines the spec for NVIDIA DCGM Exporter. Enabled by default, but the
@@ -64,9 +64,9 @@ type GPUClusterConfigSpec struct {
6464 Daemonsets nvidiav1.DaemonsetsSpec `json:"daemonsets,omitempty"`
6565}
6666
67- // DraDriverSpec defines the spec for the NVIDIA DRA driver stack. There is no top-level
67+ // DRADriverSpec defines the spec for the NVIDIA DRA driver stack. There is no top-level
6868// enabled toggle; enablement is per capability (gpus / computeDomains).
69- type DraDriverSpec struct {
69+ type DRADriverSpec struct {
7070 // NVIDIA DRA driver image repository
7171 // +kubebuilder:validation:Optional
7272 Repository string `json:"repository,omitempty"`
@@ -87,45 +87,60 @@ type DraDriverSpec struct {
8787 // +kubebuilder:validation:Optional
8888 ImagePullSecrets []string `json:"imagePullSecrets,omitempty"`
8989
90+ // FeatureGates is a map of feature gate names to a boolean enabling or disabling each.
91+ // It is rendered as the FEATURE_GATES environment variable on the DRA driver containers.
92+ // +kubebuilder:validation:Optional
93+ FeatureGates map [string ]bool `json:"featureGates,omitempty"`
94+
9095 // GPUs configures the gpu.nvidia.com capability of the DRA driver.
91- GPUs DraDriverGPUsSpec `json:"gpus,omitempty"`
96+ GPUs DRADriverGPUsSpec `json:"gpus,omitempty"`
9297
9398 // ComputeDomains configures the compute-domain capability of the DRA driver.
94- ComputeDomains DraDriverComputeDomainsSpec `json:"computeDomains,omitempty"`
99+ ComputeDomains DRADriverComputeDomainsSpec `json:"computeDomains,omitempty"`
100+ }
101+
102+ // IsGPUsEnabled returns true if the gpus capability of the DRA driver is enabled.
103+ func (d * DRADriverSpec ) IsGPUsEnabled () bool {
104+ return d .GPUs .Enabled != nil && * d .GPUs .Enabled
105+ }
106+
107+ // IsComputeDomainsEnabled returns true if the computeDomains capability of the DRA driver is enabled.
108+ func (d * DRADriverSpec ) IsComputeDomainsEnabled () bool {
109+ return d .ComputeDomains .Enabled != nil && * d .ComputeDomains .Enabled
95110}
96111
97- // DraDriverGPUsSpec configures the gpus capability of the DRA driver. It maps onto the
112+ // DRADriverGPUsSpec configures the gpus capability of the DRA driver. It maps onto the
98113// gpus container of the upstream kubelet-plugin DaemonSet.
99- type DraDriverGPUsSpec struct {
114+ type DRADriverGPUsSpec struct {
100115 // Enabled indicates if the gpus capability of the DRA driver is enabled.
101116 // +kubebuilder:default=true
102117 Enabled * bool `json:"enabled,omitempty"`
103118
104119 // KubeletPlugin configures the kubelet-plugin workload for the gpus capability.
105- KubeletPlugin DraDriverKubeletPluginSpec `json:"kubeletPlugin,omitempty"`
120+ KubeletPlugin DRADriverKubeletPluginSpec `json:"kubeletPlugin,omitempty"`
106121}
107122
108- // DraDriverComputeDomainsSpec configures the computeDomains capability of the DRA driver.
123+ // DRADriverComputeDomainsSpec configures the computeDomains capability of the DRA driver.
109124// The kubeletPlugin maps onto the computeDomains container of the upstream kubelet-plugin
110125// DaemonSet; the controller is a separate Deployment.
111- type DraDriverComputeDomainsSpec struct {
126+ type DRADriverComputeDomainsSpec struct {
112127 // Enabled indicates if the computeDomains capability of the DRA driver is enabled.
113128 // +kubebuilder:default=true
114129 Enabled * bool `json:"enabled,omitempty"`
115130
116131 // Controller configures the compute-domain controller Deployment.
117- Controller DraDriverControllerSpec `json:"controller,omitempty"`
132+ Controller DRADriverControllerSpec `json:"controller,omitempty"`
118133
119134 // KubeletPlugin configures the kubelet-plugin workload for the computeDomains capability.
120- KubeletPlugin DraDriverKubeletPluginSpec `json:"kubeletPlugin,omitempty"`
135+ KubeletPlugin DRADriverKubeletPluginSpec `json:"kubeletPlugin,omitempty"`
121136}
122137
123- // DraDriverKubeletPluginSpec defines configuration for a DRA driver kubelet-plugin container.
138+ // DRADriverKubeletPluginSpec defines configuration for a DRA driver kubelet-plugin container.
124139// Per-component scheduling fields augment/override the shared daemonsets defaults for this
125140// workload. The gpus and computeDomains kubelet-plugin blocks map onto the two containers of
126141// a single kubelet-plugin DaemonSet, so the renderer reconciles pod-level scheduling when
127142// both blocks set it.
128- type DraDriverKubeletPluginSpec struct {
143+ type DRADriverKubeletPluginSpec struct {
129144 // Optional: List of environment variables
130145 // +kubebuilder:validation:Optional
131146 Env []nvidiav1.EnvVar `json:"env,omitempty"`
@@ -156,10 +171,10 @@ type DraDriverKubeletPluginSpec struct {
156171 Affinity * corev1.Affinity `json:"affinity,omitempty"`
157172}
158173
159- // DraDriverControllerSpec defines configuration for the compute-domain controller Deployment.
174+ // DRADriverControllerSpec defines configuration for the compute-domain controller Deployment.
160175// As a Deployment (not a DaemonSet) it carries its own scheduling configuration rather than
161176// inheriting the shared daemonsets defaults.
162- type DraDriverControllerSpec struct {
177+ type DRADriverControllerSpec struct {
163178 // Optional: List of environment variables
164179 // +kubebuilder:validation:Optional
165180 Env []nvidiav1.EnvVar `json:"env,omitempty"`
0 commit comments