@@ -995,58 +995,19 @@ func configureGpus(config *container.Config, hostConfig *container.HostConfig, v
995995 // AMD: ids are DRI render node paths, e.g., /dev/dri/renderD128
996996 switch vendor {
997997 case host .GpuVendorNvidia :
998- // hostConfig.Resources.DeviceRequests = append(
999- // hostConfig.Resources.DeviceRequests,
1000- // container.DeviceRequest{
1001- // // Request all capabilities to maximize compatibility with all sorts of GPU workloads.
1002- // // Default capabilities: utility, compute.
1003- // // https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/1.16.0/docker-specialized.html
1004- // Capabilities: [][]string{{"gpu", "utility", "compute", "graphics", "video", "display", "compat32"}},
1005- // DeviceIDs: ids,
1006- // },
1007- // )
1008- for i := range 8 {
1009- devPath := fmt .Sprintf ("/dev/nvidia%d" , i )
1010- hostConfig .Resources .Devices = append (
1011- hostConfig .Resources .Devices ,
1012- container.DeviceMapping {
1013- PathOnHost : devPath ,
1014- PathInContainer : devPath ,
1015- CgroupPermissions : "rwm" ,
1016- },
1017- )
1018- }
1019- hostConfig .Resources .Devices = append (
1020- hostConfig .Resources .Devices ,
1021- container.DeviceMapping {
1022- PathOnHost : "/dev/nvidia-uvm" ,
1023- PathInContainer : "/dev/nvidia-uvm" ,
1024- CgroupPermissions : "rwm" ,
1025- },
1026- )
1027- hostConfig .Resources .Devices = append (
1028- hostConfig .Resources .Devices ,
1029- container.DeviceMapping {
1030- PathOnHost : "/dev/nvidiactl" ,
1031- PathInContainer : "/dev/nvidiactl" ,
1032- CgroupPermissions : "rwm" ,
998+ hostConfig .Resources .DeviceRequests = append (
999+ hostConfig .Resources .DeviceRequests ,
1000+ container.DeviceRequest {
1001+ // Request all capabilities to maximize compatibility with all sorts of GPU workloads.
1002+ // Default capabilities: utility, compute.
1003+ // https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/1.16.0/docker-specialized.html
1004+ Capabilities : [][]string {{"gpu" , "utility" , "compute" , "graphics" , "video" , "display" , "compat32" }},
1005+ DeviceIDs : ids ,
10331006 },
10341007 )
10351008 hostConfig .Mounts = append (
10361009 hostConfig .Mounts ,
1037- mount.Mount {Type : mount .TypeBind , Source : "/var/lib/nvidia/lib64" , Target : "/usr/local/nvidia/lib64" },
1038- )
1039- hostConfig .Mounts = append (
1040- hostConfig .Mounts ,
1041- mount.Mount {Type : mount .TypeBind , Source : "/var/lib/nvidia/bin" , Target : "/usr/local/nvidia/bin" },
1042- )
1043- hostConfig .Mounts = append (
1044- hostConfig .Mounts ,
1045- mount.Mount {Type : mount .TypeBind , Source : "/var/lib/tcpx/lib64" , Target : "/usr/local/tcpx/lib64" },
1046- )
1047- hostConfig .Mounts = append (
1048- hostConfig .Mounts ,
1049- mount.Mount {Type : mount .TypeBind , Source : "/run/tcpx" , Target : "/run/tcpx" },
1010+ mount.Mount {Type : mount .TypeBind , Source : "/dev/aperture_devices" , Target : "/dev/aperture_devices" },
10501011 )
10511012 case host .GpuVendorAmd :
10521013 // All options are listed here: https://hub.docker.com/r/rocm/pytorch
0 commit comments