Skip to content

Commit 10ee830

Browse files
committed
Add volume information
Signed-off-by: 630445639 <630445639@qq.com>
1 parent ac0d588 commit 10ee830

6 files changed

Lines changed: 32 additions & 18 deletions

File tree

cmd/nvidia/main.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,13 @@ var (
1111
mps = flag.Bool("mps", false, "Enable or Disable MPS")
1212
healthCheck = flag.Bool("health-check", false, "Enable or disable Health check")
1313
memoryUnit = flag.String("memory-unit", "GiB", "Set memoryUnit of the GPU Memroy, support 'GiB' and 'MiB'")
14+
mpspipe = flag.String("mps-pipe", "/tmp/nvidia-mps", " pipes and UNIX domain sockets")
1415
)
1516

1617
func main() {
1718
flag.Parse()
1819
log.V(1).Infoln("Start gpushare device plugin")
19-
ngm := nvidia.NewSharedGPUManager(*mps, *healthCheck, translatememoryUnits(*memoryUnit))
20+
ngm := nvidia.NewSharedGPUManager(*mps, *healthCheck, *mpspipe, translatememoryUnits(*memoryUnit))
2021
err := ngm.Run()
2122
if err != nil {
2223
log.Fatalf("Failed due to %v", err)

device-plugin-ds.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ spec:
2525
- gpushare-device-plugin-v2
2626
- -logtostderr
2727
- --v=5
28+
# - --mps-pipe=/root/nvidia-mps // mps-client and mps-server communicate through the directory.You can modify it.
29+
# - --mps=true //if you want to use mps
2830
- --memory-unit=GiB
2931
resources:
3032
limits:

pkg/gpu/nvidia/allocate.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ func (m *NvidiaDevicePlugin) buildErrResponse(reqs *pluginapi.AllocateRequest, p
3434
},
3535
}
3636
if m.mps {
37-
response.Envs[EnvPercentage] = fmt.Sprintf("%d", 100*uint(len(req.DevicesIDs))/getGPUMemory()) //
37+
response.Envs[EnvMPSActiveThreadPercentage] = fmt.Sprintf("%d", 100*uint(len(req.DevicesIDs))/getGPUMemory())
38+
response.Envs[EnvMPSPipeDirectory] = fmt.Sprintf(m.mpspipe)
3839
}
3940
responses.ContainerResponses = append(responses.ContainerResponses, &response)
4041
}
@@ -125,7 +126,13 @@ func (m *NvidiaDevicePlugin) Allocate(ctx context.Context,
125126
},
126127
}
127128
if m.mps {
128-
response.Envs[EnvPercentage] = fmt.Sprintf("%d", 100*reqGPU/getGPUMemory())
129+
response.Envs[EnvMPSActiveThreadPercentage] = fmt.Sprintf("%d", 100*reqGPU/getGPUMemory())
130+
response.Envs[EnvMPSPipeDirectory] = fmt.Sprintf(m.mpspipe)
131+
mount := pluginapi.Mount{
132+
ContainerPath: m.mpspipe,
133+
HostPath: m.mpspipe,
134+
}
135+
response.Mounts = append(response.Mounts, &mount)
129136
}
130137
responses.ContainerResponses = append(responses.ContainerResponses, &response)
131138
}

pkg/gpu/nvidia/const.go

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,16 @@ const (
2121
containerLogPathLabelKey = "io.kubernetes.container.logpath"
2222
sandboxIDLabelKey = "io.kubernetes.sandbox.id"
2323

24-
envNVGPU = "NVIDIA_VISIBLE_DEVICES"
25-
EnvResourceIndex = "ALIYUN_COM_GPU_MEM_IDX"
26-
EnvResourceByPod = "ALIYUN_COM_GPU_MEM_POD"
27-
EnvResourceByContainer = "ALIYUN_COM_GPU_MEM_CONTAINER"
28-
EnvResourceByDev = "ALIYUN_COM_GPU_MEM_DEV"
29-
EnvAssignedFlag = "ALIYUN_COM_GPU_MEM_ASSIGNED"
30-
EnvResourceAssumeTime = "ALIYUN_COM_GPU_MEM_ASSUME_TIME"
31-
EnvPercentage = "CUDA_MPS_ACTIVE_THREAD_PERCENTAGE"
32-
EnvResourceAssignTime = "ALIYUN_COM_GPU_MEM_ASSIGN_TIME"
24+
envNVGPU = "NVIDIA_VISIBLE_DEVICES"
25+
EnvResourceIndex = "ALIYUN_COM_GPU_MEM_IDX"
26+
EnvResourceByPod = "ALIYUN_COM_GPU_MEM_POD"
27+
EnvResourceByContainer = "ALIYUN_COM_GPU_MEM_CONTAINER"
28+
EnvResourceByDev = "ALIYUN_COM_GPU_MEM_DEV"
29+
EnvAssignedFlag = "ALIYUN_COM_GPU_MEM_ASSIGNED"
30+
EnvResourceAssumeTime = "ALIYUN_COM_GPU_MEM_ASSUME_TIME"
31+
EnvMPSPipeDirectory = "CUDA_MPS_PIPE_DIRECTORY"
32+
EnvMPSActiveThreadPercentage = "CUDA_MPS_ACTIVE_THREAD_PERCENTAGE"
33+
EnvResourceAssignTime = "ALIYUN_COM_GPU_MEM_ASSIGN_TIME"
3334

3435
GiBPrefix = MemoryUnit("GiB")
3536
MiBPrefix = MemoryUnit("MiB")

pkg/gpu/nvidia/gpumanager.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,15 @@ import (
1414
type sharedGPUManager struct {
1515
enableMPS bool
1616
healthCheck bool
17+
mpspipe string
1718
}
1819

19-
func NewSharedGPUManager(enableMPS, healthCheck bool, bp MemoryUnit) *sharedGPUManager {
20+
func NewSharedGPUManager(enableMPS, healthCheck bool, mpspipe string, bp MemoryUnit) *sharedGPUManager {
2021
metric = bp
2122
return &sharedGPUManager{
2223
enableMPS: enableMPS,
2324
healthCheck: healthCheck,
25+
mpspipe: mpspipe,
2426
}
2527
}
2628

@@ -61,7 +63,7 @@ L:
6163
devicePlugin.Stop()
6264
}
6365

64-
devicePlugin = NewNvidiaDevicePlugin(ngm.enableMPS, ngm.healthCheck)
66+
devicePlugin = NewNvidiaDevicePlugin(ngm.enableMPS, ngm.healthCheck, ngm.mpspipe)
6567
if err := devicePlugin.Serve(); err != nil {
6668
log.Warningf("Failed to start device plugin due to %v", err)
6769
} else {

pkg/gpu/nvidia/server.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ type NvidiaDevicePlugin struct {
2222
devIndxMap map[uint]string
2323
socket string
2424
mps bool
25+
mpspipe string
2526
healthCheck bool
2627

2728
stop chan struct{}
@@ -32,7 +33,7 @@ type NvidiaDevicePlugin struct {
3233
}
3334

3435
// NewNvidiaDevicePlugin returns an initialized NvidiaDevicePlugin
35-
func NewNvidiaDevicePlugin(mps, healthCheck bool) *NvidiaDevicePlugin {
36+
func NewNvidiaDevicePlugin(mps, healthCheck bool, mpspipe string) *NvidiaDevicePlugin {
3637
devs, devNameMap := getDevices()
3738
devList := []string{}
3839

@@ -54,10 +55,10 @@ func NewNvidiaDevicePlugin(mps, healthCheck bool) *NvidiaDevicePlugin {
5455
devNameMap: devNameMap,
5556
socket: serverSock,
5657
mps: mps,
58+
mpspipe: mpspipe,
5759
healthCheck: healthCheck,
58-
59-
stop: make(chan struct{}),
60-
health: make(chan *pluginapi.Device),
60+
stop: make(chan struct{}),
61+
health: make(chan *pluginapi.Device),
6162
}
6263
}
6364

0 commit comments

Comments
 (0)