Skip to content

Commit bfecd6c

Browse files
committed
fix: Address review comments
Signed-off-by: Tsubasa Watanabe <w.tsubasa@fujitsu.com>
1 parent 3429d04 commit bfecd6c

10 files changed

Lines changed: 168 additions & 48 deletions

File tree

api/nvidia.com/resource/v1beta1/computedomain.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ const (
2828
ComputeDomainChannelAllocationModeSingle = "Single"
2929
ComputeDomainChannelAllocationModeAll = "All"
3030

31-
ComputeDomainBindingConditions = "IMEXDaemonSettingsDone"
32-
ComputeDomainBindingFailureConditions = "IMEXDaemonSettingsFailed"
31+
ComputeDomainBindingConditions = "ComputeDomainReady"
32+
ComputeDomainBindingFailureConditions = "ComputeDomainNotReady"
3333
)
3434

3535
// +genclient

cmd/compute-domain-controller/computedomain.go

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828
"k8s.io/klog/v2"
2929

3030
nvapi "github.com/NVIDIA/k8s-dra-driver-gpu/api/nvidia.com/resource/v1beta1"
31+
"github.com/NVIDIA/k8s-dra-driver-gpu/pkg/featuregates"
3132
nvinformers "github.com/NVIDIA/k8s-dra-driver-gpu/pkg/nvidia.com/informers/externalversions"
3233
nvlisters "github.com/NVIDIA/k8s-dra-driver-gpu/pkg/nvidia.com/listers/resource/v1beta1"
3334
)
@@ -160,8 +161,10 @@ func (m *ComputeDomainManager) Start(ctx context.Context) (rerr error) {
160161
return fmt.Errorf("error starting Node manager: %w", err)
161162
}
162163

163-
if err := m.resourceClaimManager.Start(ctx); err != nil {
164-
return fmt.Errorf("error starting ResourceClaim manager: %w", err)
164+
if featuregates.Enabled(featuregates.ComputeDomainBindingConditions) {
165+
if err := m.resourceClaimManager.Start(ctx); err != nil {
166+
return fmt.Errorf("error starting ResourceClaim manager: %w", err)
167+
}
165168
}
166169

167170
return nil
@@ -177,8 +180,10 @@ func (m *ComputeDomainManager) Stop() error {
177180
if err := m.nodeManager.Stop(); err != nil {
178181
return fmt.Errorf("error stopping Node manager: %w", err)
179182
}
180-
if err := m.resourceClaimManager.Stop(); err != nil {
181-
return fmt.Errorf("error stopping ResourceClaim manager: %w", err)
183+
if featuregates.Enabled(featuregates.ComputeDomainBindingConditions) {
184+
if err := m.resourceClaimManager.Stop(); err != nil {
185+
return fmt.Errorf("error stopping ResourceClaim manager: %w", err)
186+
}
182187
}
183188
if m.cancelContext != nil {
184189
m.cancelContext()

cmd/compute-domain-controller/daemonsetpods.go

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -85,18 +85,10 @@ func (m *DaemonSetPodManager) Start(ctx context.Context) (rerr error) {
8585
}
8686
}()
8787

88-
if err := addComputeDomainLabelIndexer[*corev1.Pod](m.informer); err != nil {
89-
return fmt.Errorf("error adding indexer for MultiNodeEnvironment label: %w", err)
88+
if err := addComputeDomainNodePodInexer(m.informer); err != nil {
89+
return fmt.Errorf("error adding ComputeDomain Node indexer: %w", err)
9090
}
9191

92-
m.mutationCache = cache.NewIntegerResourceVersionMutationCache(
93-
klog.Background(),
94-
m.informer.GetStore(),
95-
m.informer.GetIndexer(),
96-
mutationCacheTTL,
97-
true,
98-
)
99-
10092
m.waitGroup.Add(1)
10193
go func() {
10294
defer m.waitGroup.Done()
@@ -124,16 +116,12 @@ func (m *DaemonSetPodManager) List() ([]*corev1.Pod, error) {
124116
}
125117

126118
func (m *DaemonSetPodManager) Get(ctx context.Context, cdUID string, nodeName string) (*corev1.Pod, error) {
127-
pods, err := getByComputeDomainUID[*corev1.Pod](ctx, m.mutationCache, cdUID)
119+
pod, err := getByComputeDomainUIDAndNode(ctx, m.informer.GetIndexer(), cdUID, nodeName)
128120
if err != nil {
129121
return nil, fmt.Errorf("error retrieving DaemonSetPods: %w", err)
130122
}
131-
132-
for _, p := range pods {
133-
if p.Spec.NodeName == nodeName {
134-
return p, nil
135-
}
123+
if pod == nil {
124+
return nil, fmt.Errorf("no DeamonSetPod with name %s matching DomainID %s found", nodeName, cdUID)
136125
}
137-
138-
return nil, fmt.Errorf("no DeamonSetPod with name %s matching DomainID %s found", nodeName, cdUID)
126+
return pod, nil
139127
}

cmd/compute-domain-controller/indexers.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"context"
2121
"fmt"
2222

23+
corev1 "k8s.io/api/core/v1"
2324
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2425
"k8s.io/client-go/tools/cache"
2526
)
@@ -73,3 +74,41 @@ func getByComputeDomainUID[T1 *T2, T2 any](ctx context.Context, indexer Indexer,
7374

7475
return ds, nil
7576
}
77+
78+
func addComputeDomainNodePodInexer(informer cache.SharedIndexInformer) error {
79+
return informer.AddIndexers(cache.Indexers{
80+
"computeDomainNode": func(obj any) ([]string, error) {
81+
pod, ok := obj.(*corev1.Pod)
82+
if !ok {
83+
return nil, fmt.Errorf("expected a *corev1.Pod but got %T", obj)
84+
}
85+
labels := pod.GetObjectMeta().GetLabels()
86+
if value, exists := labels[computeDomainLabelKey]; exists {
87+
nodeName := pod.Spec.NodeName
88+
if len(nodeName) != 0 {
89+
return []string{fmt.Sprintf("%s/%s", value, nodeName)}, nil
90+
}
91+
}
92+
return nil, nil
93+
},
94+
})
95+
}
96+
97+
func getByComputeDomainUIDAndNode(ctx context.Context, indexer Indexer, cdUID string, nodeName string) (*corev1.Pod, error) {
98+
key := fmt.Sprintf("%s/%s", cdUID, nodeName)
99+
objs, err := indexer.ByIndex("computeDomainNode", key)
100+
if err != nil {
101+
return nil, fmt.Errorf("error getting Pod via ComputeDomain+Node index: %w", err)
102+
}
103+
if len(objs) == 0 {
104+
return nil, nil
105+
}
106+
if len(objs) > 1 {
107+
return nil, fmt.Errorf("multiple DaemonSetPods found for cdUID %s and nodeName %s", cdUID, nodeName)
108+
}
109+
pod, ok := objs[0].(*corev1.Pod)
110+
if !ok {
111+
return nil, fmt.Errorf("failed to cast to *corev1.Pod")
112+
}
113+
return pod, nil
114+
}

cmd/compute-domain-controller/mnsdaemonset.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"fmt"
2222

2323
appsv1 "k8s.io/api/apps/v1"
24+
corev1 "k8s.io/api/core/v1"
2425

2526
nvapi "github.com/NVIDIA/k8s-dra-driver-gpu/api/nvidia.com/resource/v1beta1"
2627
)
@@ -124,3 +125,7 @@ func (m *MultiNamespaceDaemonSetManager) AssertRemoved(ctx context.Context, cdUI
124125
}
125126
return nil
126127
}
128+
129+
func (m *MultiNamespaceDaemonSetManager) GetDaemonSetPod(ctx context.Context, namspace, cdUID, nodeName string) (*corev1.Pod, error) {
130+
return nil, nil
131+
}

cmd/compute-domain-controller/resourceclaim.go

Lines changed: 32 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ func (m *ResourceClaimManager) Stop() error {
159159

160160
func (m *ResourceClaimManager) addOrUpdate(ctx context.Context, obj any) error {
161161
rc, ok := obj.(*resourcev1.ResourceClaim)
162-
if !ok || rc == nil {
162+
if !ok {
163163
return fmt.Errorf("failed to cast object to ResourceClaim")
164164
}
165165

@@ -168,13 +168,22 @@ func (m *ResourceClaimManager) addOrUpdate(ctx context.Context, obj any) error {
168168
}
169169

170170
// Check the allocationResult of ResourceClaim to determine whether it should be monitored.
171-
isEligible, domainID := m.checkClaimEligibleForComputeDomainLabeling(rc)
172-
if !isEligible {
171+
req, err := m.getComputeDomainChannelRequestConfig(rc)
172+
if err != nil {
173+
return fmt.Errorf("error getting config for ComputeDomainChannel request from ResourceClaim %s/%s: %w", rc.Namespace, rc.Name, err)
174+
}
175+
176+
if req == nil {
173177
return nil
174178
}
175179

176-
if domainID == "" {
177-
return fmt.Errorf("matching ResourceClaim %s/%s has no domainID in allocation config", rc.Namespace, rc.Name)
180+
// Get domain id
181+
domainID := req.DomainID
182+
183+
//Check namespace
184+
err = m.AssertComputeDomainNamespace(rc.Namespace, domainID)
185+
if err != nil {
186+
return fmt.Errorf("failed to assert Namespace for computeDomain with domainID %s and ResourceClaim %s/%s: %w", domainID, rc.Namespace, rc.Name, err)
178187
}
179188

180189
currentAllocationTimestamp := ""
@@ -198,12 +207,6 @@ func (m *ResourceClaimManager) addOrUpdate(ctx context.Context, obj any) error {
198207
// Cancel a monitor that has already timed out.
199208
m.cancelTimeoutedMonitor(ctx, rc, rcMonitors, currentAllocationTimestamp)
200209

201-
//Check namespace
202-
err = m.AssertComputeDomainNamespace(rc.Namespace, domainID)
203-
if err != nil {
204-
return fmt.Errorf("failed to assert Namespace for computeDomain with domainID %s and ResourceClaim %s/%s: %w", domainID, rc.Namespace, rc.Name, err)
205-
}
206-
207210
//Check if monitoring already launched and return if yes
208211
if _, loaded := rcMonitors.Load(currentAllocationTimestamp); loaded {
209212
return nil
@@ -300,17 +303,24 @@ func (m *ResourceClaimManager) addOrUpdate(ctx context.Context, obj any) error {
300303
return nil
301304
}
302305

303-
// Checks if ResourceClaim is Eligible for Compute Domain Labeling and returns domainID
304-
func (m *ResourceClaimManager) checkClaimEligibleForComputeDomainLabeling(rc *resourcev1.ResourceClaim) (bool, string) {
305-
var domainID string
306+
// getComputeDomainChannelRequestConfig determines if a ResourceClaim is a monitoring target by filtering
307+
// the allocationResult and returns the config information associated with the target allocationResult.
308+
//
309+
// The processing target must meet the following conditions:
310+
// - The driver is "compute-domain.nvidia.com"
311+
// - The device is a channel device (determined by whether its corresponding config is ComputeDomainChannelConfig)
312+
// - The device has BindingConditions
313+
// - The device is not set any BindingConditions/BindingFailureConditions
314+
func (m *ResourceClaimManager) getComputeDomainChannelRequestConfig(rc *resourcev1.ResourceClaim) (*nvapi.ComputeDomainChannelConfig, error) {
315+
var config *nvapi.ComputeDomainChannelConfig
306316

307317
configs, err := GetOpaqueDeviceConfigs(
308318
nvapi.StrictDecoder,
309319
DriverName,
310320
rc.Status.Allocation.Devices.Config,
311321
)
312322
if err != nil {
313-
return false, ""
323+
return nil, err
314324
}
315325

316326
var configResults []*resourcev1.DeviceRequestAllocationResult
@@ -324,8 +334,8 @@ func (m *ResourceClaimManager) checkClaimEligibleForComputeDomainLabeling(rc *re
324334
if slices.Contains(c.Requests, result.Request) {
325335
if _, ok := c.Config.(*nvapi.ComputeDomainChannelConfig); ok {
326336
configResults = append(configResults, &result)
327-
if domainID == "" {
328-
domainID = c.Config.(*nvapi.ComputeDomainChannelConfig).DomainID
337+
if config == nil {
338+
config = c.Config.(*nvapi.ComputeDomainChannelConfig)
329339
}
330340
break
331341
}
@@ -341,19 +351,19 @@ func (m *ResourceClaimManager) checkClaimEligibleForComputeDomainLabeling(rc *re
341351
for _, cond := range deviceStatus.Conditions {
342352
// Check the device is not set BindingConditions
343353
if cond.Type == nvapi.ComputeDomainBindingConditions && cond.Status == metav1.ConditionTrue {
344-
return false, domainID
354+
return nil, nil
345355
}
346356
// Check the device is not set BindingFailureConditions
347357
if cond.Type == nvapi.ComputeDomainBindingFailureConditions && cond.Status == metav1.ConditionTrue {
348-
return false, domainID
358+
return nil, nil
349359
}
350360
}
351361
}
352362
}
353-
return true, domainID
363+
return config, nil
354364
}
355365
}
356-
return false, domainID
366+
return nil, nil
357367

358368
}
359369

@@ -418,8 +428,8 @@ func (m *ResourceClaimManager) setBindingConditions(ctx context.Context, rc *res
418428
for _, allocationDevice := range rcToUpdate.Status.Allocation.Devices.Results {
419429
device := &resourcev1.AllocatedDeviceStatus{
420430
Driver: allocationDevice.Driver,
421-
Device: allocationDevice.Device,
422431
Pool: allocationDevice.Pool,
432+
Device: allocationDevice.Device,
423433
}
424434
rcToUpdate.Status.Devices = append(rcToUpdate.Status.Devices, *device)
425435
}

cmd/compute-domain-kubelet-plugin/computedomain.go

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,50 @@ func (m *ComputeDomainManager) isCurrentNodeReadyInClique(ctx context.Context, c
293293
return false
294294
}
295295

296+
func (m *ComputeDomainManager) AssertComputeDomainNamespace(ctx context.Context, claimNamespace, cdUID string) error {
297+
cd, err := m.GetComputeDomain(ctx, cdUID)
298+
if err != nil {
299+
return fmt.Errorf("error getting ComputeDomain: %w", err)
300+
}
301+
if cd == nil {
302+
return fmt.Errorf("ComputeDomain not found: %s", cdUID)
303+
}
304+
305+
if cd.Namespace != claimNamespace {
306+
return fmt.Errorf("the ResourceClaim's namespace is different than the ComputeDomain's namespace")
307+
}
308+
309+
return nil
310+
}
311+
312+
func (m *ComputeDomainManager) AddNodeLabel(ctx context.Context, cdUID string) error {
313+
node, err := m.config.clientsets.Core.CoreV1().Nodes().Get(ctx, m.config.flags.nodeName, metav1.GetOptions{})
314+
if err != nil {
315+
return fmt.Errorf("error retrieving Node: %w", err)
316+
}
317+
318+
currentValue, exists := node.Labels[computeDomainLabelKey]
319+
if exists && currentValue != cdUID {
320+
return fmt.Errorf("label already exists for a different ComputeDomain")
321+
}
322+
323+
if exists && currentValue == cdUID {
324+
return nil
325+
}
326+
327+
newNode := node.DeepCopy()
328+
if newNode.Labels == nil {
329+
newNode.Labels = make(map[string]string)
330+
}
331+
newNode.Labels[computeDomainLabelKey] = cdUID
332+
333+
if _, err = m.config.clientsets.Core.CoreV1().Nodes().Update(ctx, newNode, metav1.UpdateOptions{}); err != nil {
334+
return fmt.Errorf("error updating Node with label: %w", err)
335+
}
336+
337+
return nil
338+
}
339+
296340
// RemoveNodeLabel() attempts removal and returns no error if the label was
297341
// removed or didn't exist in the first place.
298342
func (m *ComputeDomainManager) RemoveNodeLabel(ctx context.Context, cdUID string) error {

cmd/compute-domain-kubelet-plugin/device_state.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -487,6 +487,21 @@ func (s *DeviceState) applyComputeDomainChannelConfig(ctx context.Context, confi
487487
return nil, fmt.Errorf("allocation failed: %w", err)
488488
}
489489

490+
if featuregates.Enabled(featuregates.ComputeDomainBindingConditions) {
491+
goto skipAssertNamespaceAddNodeLabel
492+
}
493+
494+
// Create any necessary ComputeDomain channels and gather their CDI container edits.
495+
if err := s.computeDomainManager.AssertComputeDomainNamespace(ctx, claim.Namespace, config.DomainID); err != nil {
496+
return nil, permanentError{fmt.Errorf("error asserting ComputeDomain's namespace: %w", err)}
497+
}
498+
499+
if err := s.computeDomainManager.AddNodeLabel(ctx, config.DomainID); err != nil {
500+
return nil, fmt.Errorf("error adding Node label for ComputeDomain: %w", err)
501+
}
502+
503+
skipAssertNamespaceAddNodeLabel:
504+
490505
if err := s.computeDomainManager.AssertComputeDomainReady(ctx, config.DomainID); err != nil {
491506
return nil, fmt.Errorf("error asserting ComputeDomain Ready: %w", err)
492507
}

cmd/compute-domain-kubelet-plugin/deviceinfo.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"fmt"
2121

2222
nvapi "github.com/NVIDIA/k8s-dra-driver-gpu/api/nvidia.com/resource/v1beta1"
23+
"github.com/NVIDIA/k8s-dra-driver-gpu/pkg/featuregates"
2324
resourceapi "k8s.io/api/resource/v1"
2425
"k8s.io/utils/ptr"
2526
)
@@ -59,8 +60,10 @@ func (d *ComputeDomainChannelInfo) GetDevice() resourceapi.Device {
5960
IntValue: ptr.To(int64(d.ID)),
6061
},
6162
},
62-
BindingConditions: []string{nvapi.ComputeDomainBindingConditions},
63-
BindingFailureConditions: []string{nvapi.ComputeDomainBindingFailureConditions},
63+
}
64+
if featuregates.Enabled(featuregates.ComputeDomainBindingConditions) {
65+
device.BindingConditions = []string{nvapi.ComputeDomainBindingConditions}
66+
device.BindingFailureConditions = []string{nvapi.ComputeDomainBindingFailureConditions}
6467
}
6568
return device
6669
}

pkg/featuregates/featuregates.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,10 @@ const (
5555
// CrashOnNVLinkFabricErrors causes the kubelet plugin to crash instead of
5656
// falling back to non-fabric mode when NVLink fabric errors are detected.
5757
CrashOnNVLinkFabricErrors featuregate.Feature = "CrashOnNVLinkFabricErrors"
58+
59+
// ComputeDomainBindingConditions enables scheduling of workload pods with channel devices
60+
// to be delayed by DRADeviceBindingConditions until the IMEX Daemon Pods complete their processing.
61+
ComputeDomainBindingConditions featuregate.Feature = "ComputeDomainBindingConditions"
5862
)
5963

6064
// defaultFeatureGates contains the default settings for all project-specific feature gates.
@@ -116,6 +120,13 @@ var defaultFeatureGates = map[featuregate.Feature]featuregate.VersionedSpecs{
116120
Version: version.MajorMinor(25, 12),
117121
},
118122
},
123+
ComputeDomainBindingConditions: {
124+
{
125+
Default: false,
126+
PreRelease: featuregate.Alpha,
127+
Version: version.MajorMinor(25, 12),
128+
},
129+
},
119130
}
120131

121132
var (

0 commit comments

Comments
 (0)