1717package main
1818
1919import (
20+ "context"
21+ "encoding/json"
2022 "fmt"
2123 "slices"
2224 "sync"
2325
2426 resourceapi "k8s.io/api/resource/v1"
27+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2528 "k8s.io/apimachinery/pkg/runtime"
29+ metav1apply "k8s.io/client-go/applyconfigurations/meta/v1"
30+ resourceapply "k8s.io/client-go/applyconfigurations/resource/v1"
31+ "k8s.io/klog/v2"
2632 drapbv1 "k8s.io/kubelet/pkg/apis/dra/v1beta1"
2733 "k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
2834
@@ -61,6 +67,7 @@ type DeviceState struct {
6167 cdi * CDIHandler
6268 allocatable AllocatableDevices
6369 checkpointManager checkpointmanager.CheckpointManager
70+ config * Config
6471}
6572
6673func NewDeviceState (config * Config ) (* DeviceState , error ) {
@@ -88,6 +95,7 @@ func NewDeviceState(config *Config) (*DeviceState, error) {
8895 cdi : cdi ,
8996 allocatable : allocatable ,
9097 checkpointManager : checkpointManager ,
98+ config : config ,
9199 }
92100
93101 checkpoints , err := state .checkpointManager .ListCheckpoints ()
@@ -109,7 +117,7 @@ func NewDeviceState(config *Config) (*DeviceState, error) {
109117 return state , nil
110118}
111119
112- func (s * DeviceState ) Prepare (claim * resourceapi.ResourceClaim ) ([]* drapbv1.Device , error ) {
120+ func (s * DeviceState ) Prepare (ctx context. Context , claim * resourceapi.ResourceClaim ) ([]* drapbv1.Device , error ) {
113121 s .Lock ()
114122 defer s .Unlock ()
115123
@@ -125,7 +133,7 @@ func (s *DeviceState) Prepare(claim *resourceapi.ResourceClaim) ([]*drapbv1.Devi
125133 return preparedClaims [claimUID ].GetDevices (), nil
126134 }
127135
128- preparedDevices , err := s .prepareDevices (claim )
136+ preparedDevices , err := s .prepareDevices (ctx , claim )
129137 if err != nil {
130138 return nil , fmt .Errorf ("prepare failed: %v" , err )
131139 }
@@ -173,7 +181,7 @@ func (s *DeviceState) Unprepare(claimUID string) error {
173181 return nil
174182}
175183
176- func (s * DeviceState ) prepareDevices (claim * resourceapi.ResourceClaim ) (PreparedDevices , error ) {
184+ func (s * DeviceState ) prepareDevices (ctx context. Context , claim * resourceapi.ResourceClaim ) (PreparedDevices , error ) {
177185 if claim .Status .Allocation == nil {
178186 return nil , fmt .Errorf ("claim not yet allocated" )
179187 }
@@ -196,13 +204,20 @@ func (s *DeviceState) prepareDevices(claim *resourceapi.ResourceClaim) (Prepared
196204 Config : configapi .DefaultGpuConfig (),
197205 })
198206
207+ // build device status
208+ var devicesStatus []* resourceapply.AllocatedDeviceStatusApplyConfiguration
209+
199210 // Look through the configs and figure out which one will be applied to
200211 // each device allocation result based on their order of precedence.
201212 configResultsMap := make (map [runtime.Object ][]* resourceapi.DeviceRequestAllocationResult )
202213 for _ , result := range claim .Status .Allocation .Devices .Results {
203214 if _ , exists := s .allocatable [result .Device ]; ! exists {
204215 return nil , fmt .Errorf ("requested GPU is not allocatable: %v" , result .Device )
205216 }
217+
218+ deviceStatus := s .buildDeviceStatus (& result )
219+ devicesStatus = append (devicesStatus , deviceStatus )
220+
206221 for _ , c := range slices .Backward (configs ) {
207222 if len (c .Requests ) == 0 || slices .Contains (c .Requests , result .Request ) {
208223 configResultsMap [c .Config ] = append (configResultsMap [c .Config ], & result )
@@ -211,6 +226,11 @@ func (s *DeviceState) prepareDevices(claim *resourceapi.ResourceClaim) (Prepared
211226 }
212227 }
213228
229+ klog .Infof ("Adding device attribute to claim %s/%s" , claim .Namespace , claim .Name )
230+ if err := s .applyDeviceStatus (ctx , claim .Namespace , claim .Name , devicesStatus ... ); err != nil {
231+ klog .Warningf ("Failed to update device attributes for claim %s/%s: %v" , claim .Namespace , claim .Name , err )
232+ }
233+
214234 // Normalize, validate, and apply all configs associated with devices that
215235 // need to be prepared. Track container edits generated from applying the
216236 // config to the set of device allocation results.
@@ -380,3 +400,56 @@ func GetOpaqueDeviceConfigs(
380400
381401 return resultConfigs , nil
382402}
403+
404+ func (s * DeviceState ) buildDeviceStatus (res * resourceapi.DeviceRequestAllocationResult ) * resourceapply.AllocatedDeviceStatusApplyConfiguration {
405+ dn := res .Device
406+ deviceInfo := make (map [string ]interface {})
407+
408+ if d , ok := s .allocatable [dn ]; ok {
409+ if d .Attributes != nil {
410+ attributes := d .Attributes
411+
412+ if uuid , ok := attributes ["uuid" ]; ok {
413+ deviceInfo ["uuid" ] = uuid
414+ }
415+ if model , ok := attributes ["model" ]; ok {
416+ deviceInfo ["model" ] = model
417+ }
418+ if driverVersion , ok := attributes ["driverVersion" ]; ok {
419+ deviceInfo ["driverVersion" ] = driverVersion
420+ }
421+ }
422+ }
423+
424+ jsonBytes , err := json .Marshal (deviceInfo )
425+ if err != nil {
426+ klog .Errorf ("Failed to marshal device data: %v" , err )
427+ jsonBytes = []byte ("{}" )
428+ }
429+ data := runtime.RawExtension {
430+ Raw : jsonBytes ,
431+ }
432+ cond := metav1apply .Condition ().
433+ WithType ("Ready" ).
434+ WithStatus (metav1 .ConditionTrue ).
435+ WithReason ("GPUDeviceReady" ).
436+ WithMessage ("GPUDeviceAllocated" ).
437+ WithLastTransitionTime (metav1 .Now ())
438+
439+ return resourceapply .AllocatedDeviceStatus ().
440+ WithDevice (dn ).
441+ WithDriver (res .Driver ).
442+ WithPool (res .Pool ).
443+ WithConditions (cond ).
444+ WithData (data )
445+ }
446+
447+ func (s * DeviceState ) applyDeviceStatus (ctx context.Context , ns , name string , devices ... * resourceapply.AllocatedDeviceStatusApplyConfiguration ) error {
448+ claim := resourceapply .ResourceClaim (name , ns ).
449+ WithStatus (resourceapply .ResourceClaimStatus ().WithDevices (devices ... ))
450+
451+ opts := metav1.ApplyOptions {FieldManager : consts .DriverName , Force : true }
452+
453+ _ , err := s .config .coreclient .ResourceV1 ().ResourceClaims (ns ).ApplyStatus (ctx , claim , opts )
454+ return err
455+ }
0 commit comments