@@ -509,6 +509,16 @@ func (m *NvidiaDevicePlugin) Allocate(ctx context.Context, reqs *pluginapi.Alloc
509509 ReadOnly : true },
510510 )
511511 }
512+
513+ // If pass-device-specs is enabled, explicitly mount GPU device nodes via kubelet
514+ // This allows containers to access GPU devices without requiring nvidia-container-runtime
515+ // making it compatible with standard OCI runtimes (containerd, docker, etc.)
516+ if config .PassDeviceSpecs {
517+ deviceSpecs := m .GetDeviceSpecs (devreq )
518+ response .Devices = append (response .Devices , deviceSpecs ... )
519+ klog .V (3 ).Infof ("Added %d device specs to allocation response for pod %s" ,
520+ len (deviceSpecs ), current .Name )
521+ }
512522 }
513523 responses .ContainerResponses = append (responses .ContainerResponses , & response )
514524 }
@@ -662,6 +672,103 @@ func (m *NvidiaDevicePlugin) GetContainerDeviceStrArray(c util.ContainerDevices)
662672 return tmp
663673}
664674
675+ // GetDeviceSpecs returns a list of pluginapi.DeviceSpec for the given container devices
676+ // This method is used when PassDeviceSpecs is enabled to explicitly mount GPU device nodes
677+ // via kubelet's Device Plugin API, enabling GPU access without nvidia-container-runtime
678+ func (m * NvidiaDevicePlugin ) GetDeviceSpecs (containerDevices util.ContainerDevices ) []* pluginapi.DeviceSpec {
679+ // Define optional control devices that should be checked for existence before adding
680+ // These devices may not be present on all systems
681+ optionalDevices := map [string ]bool {
682+ "/dev/nvidiactl" : true ,
683+ "/dev/nvidia-uvm" : true ,
684+ "/dev/nvidia-uvm-tools" : true ,
685+ "/dev/nvidia-modeset" : true ,
686+ }
687+
688+ var deviceSpecs []* pluginapi.DeviceSpec
689+ devicePathsMap := make (map [string ]bool ) // Track unique paths to avoid duplicates
690+
691+ // Get all available devices from the cache to lookup device paths by UUID
692+ var allDevices []* Device
693+ if m .migStrategy == "none" {
694+ allDevices = m .deviceCache .GetCache ()
695+ } else if m .migStrategy == "mixed" {
696+ allDevices = m .cachedDevices
697+ }
698+
699+ // For each requested device, find its Device object and extract paths
700+ for _ , containerDevice := range containerDevices {
701+ deviceUUID := containerDevice .UUID
702+
703+ // Handle MIG devices (UUIDs contain "[")
704+ if strings .Contains (deviceUUID , "[" ) {
705+ // For MIG devices, get the actual MIG UUID after template generation
706+ devtype , devindex := util .GetIndexAndTypeFromUUID (deviceUUID )
707+ position , needsReset := m .GenerateMigTemplate (devtype , devindex , containerDevice )
708+ if needsReset {
709+ m .ApplyMigTemplate ()
710+ }
711+ deviceUUID = util .GetMigUUIDFromIndex (deviceUUID , position )
712+ }
713+
714+ // Find the Device object matching this UUID
715+ for _ , device := range allDevices {
716+ if device .ID == deviceUUID {
717+ // Add all paths from this device
718+ for _ , path := range device .Paths {
719+ if ! devicePathsMap [path ] {
720+ devicePathsMap [path ] = true
721+ spec := & pluginapi.DeviceSpec {
722+ ContainerPath : path ,
723+ HostPath : path , // Use same path for both container and host
724+ Permissions : "rw" ,
725+ }
726+ deviceSpecs = append (deviceSpecs , spec )
727+ klog .V (4 ).Infof ("Added device spec for GPU device: %s" , path )
728+ }
729+ }
730+ break
731+ }
732+ }
733+ }
734+
735+ // Add control devices (nvidiactl, nvidia-uvm, etc.) that are shared across all GPUs
736+ // These are required for CUDA to function properly
737+ controlDevicePaths := []string {
738+ "/dev/nvidiactl" ,
739+ "/dev/nvidia-uvm" ,
740+ "/dev/nvidia-uvm-tools" ,
741+ "/dev/nvidia-modeset" ,
742+ }
743+
744+ for _ , path := range controlDevicePaths {
745+ // Skip if already added
746+ if devicePathsMap [path ] {
747+ continue
748+ }
749+
750+ // For optional devices, check if they exist on the host before adding
751+ if optionalDevices [path ] {
752+ if _ , err := os .Stat (path ); err != nil {
753+ klog .V (4 ).Infof ("Skipping optional device %s: not present on host" , path )
754+ continue
755+ }
756+ }
757+
758+ devicePathsMap [path ] = true
759+ spec := & pluginapi.DeviceSpec {
760+ ContainerPath : path ,
761+ HostPath : path ,
762+ Permissions : "rw" ,
763+ }
764+ deviceSpecs = append (deviceSpecs , spec )
765+ klog .V (4 ).Infof ("Added device spec for control device: %s" , path )
766+ }
767+
768+ klog .V (3 ).Infof ("Generated %d device specs for container" , len (deviceSpecs ))
769+ return deviceSpecs
770+ }
771+
665772func (m * NvidiaDevicePlugin ) GenerateMigTemplate (devtype string , devindex int , val util.ContainerDevice ) (int , bool ) {
666773 needsreset := false
667774 position := - 1 // Initialize to an invalid position
0 commit comments