@@ -21,18 +21,22 @@ func init() {
2121 kubeInit ()
2222}
2323
24- func buildErrResponse (reqs * pluginapi.AllocateRequest , podReqGPU uint ) * pluginapi.AllocateResponse {
24+ func ( m * NvidiaDevicePlugin ) buildErrResponse (reqs * pluginapi.AllocateRequest , podReqGPU uint ) * pluginapi.AllocateResponse {
2525 responses := pluginapi.AllocateResponse {}
2626 for _ , req := range reqs .ContainerRequests {
2727 response := pluginapi.ContainerAllocateResponse {
2828 Envs : map [string ]string {
29- envNVGPU : fmt .Sprintf ("no-gpu-has-%dMiB -to-run" , podReqGPU ),
29+ envNVGPU : fmt .Sprintf ("no-gpu-has-%dGiB -to-run" , podReqGPU ),
3030 EnvResourceIndex : fmt .Sprintf ("-1" ),
3131 EnvResourceByPod : fmt .Sprintf ("%d" , podReqGPU ),
3232 EnvResourceByContainer : fmt .Sprintf ("%d" , uint (len (req .DevicesIDs ))),
3333 EnvResourceByDev : fmt .Sprintf ("%d" , getGPUMemory ()),
3434 },
3535 }
36+ if m .mps {
37+ response .Envs [EnvMPSActiveThreadPercentage ] = fmt .Sprintf ("%d" , 100 * uint (len (req .DevicesIDs ))/ getGPUMemory ())
38+ response .Envs [EnvMPSPipeDirectory ] = fmt .Sprintf (m .mpspipe )
39+ }
3640 responses .ContainerResponses = append (responses .ContainerResponses , & response )
3741 }
3842 return & responses
@@ -62,7 +66,7 @@ func (m *NvidiaDevicePlugin) Allocate(ctx context.Context,
6266 pods , err := getCandidatePods ()
6367 if err != nil {
6468 log .Infof ("invalid allocation requst: Failed to find candidate pods due to %v" , err )
65- return buildErrResponse (reqs , podReqGPU ), nil
69+ return m . buildErrResponse (reqs , podReqGPU ), nil
6670 }
6771
6872 if log .V (4 ) {
@@ -106,7 +110,7 @@ func (m *NvidiaDevicePlugin) Allocate(ctx context.Context,
106110 }
107111
108112 if id < 0 {
109- return buildErrResponse (reqs , podReqGPU ), nil
113+ return m . buildErrResponse (reqs , podReqGPU ), nil
110114 }
111115
112116 // 1. Create container requests
@@ -121,6 +125,15 @@ func (m *NvidiaDevicePlugin) Allocate(ctx context.Context,
121125 EnvResourceByDev : fmt .Sprintf ("%d" , getGPUMemory ()),
122126 },
123127 }
128+ if m .mps {
129+ response .Envs [EnvMPSActiveThreadPercentage ] = fmt .Sprintf ("%d" , 100 * reqGPU / getGPUMemory ())
130+ response .Envs [EnvMPSPipeDirectory ] = fmt .Sprintf (m .mpspipe )
131+ mount := pluginapi.Mount {
132+ ContainerPath : m .mpspipe ,
133+ HostPath : m .mpspipe ,
134+ }
135+ response .Mounts = append (response .Mounts , & mount )
136+ }
124137 responses .ContainerResponses = append (responses .ContainerResponses , & response )
125138 }
126139
@@ -134,25 +147,25 @@ func (m *NvidiaDevicePlugin) Allocate(ctx context.Context,
134147 pod , err := clientset .CoreV1 ().Pods (assumePod .Namespace ).Get (assumePod .Name , metav1.GetOptions {})
135148 if err != nil {
136149 log .Warningf ("Failed due to %v" , err )
137- return buildErrResponse (reqs , podReqGPU ), nil
150+ return m . buildErrResponse (reqs , podReqGPU ), nil
138151 }
139152 newPod = updatePodAnnotations (pod )
140153 _ , err = clientset .CoreV1 ().Pods (newPod .Namespace ).Update (newPod )
141154 if err != nil {
142155 log .Warningf ("Failed due to %v" , err )
143- return buildErrResponse (reqs , podReqGPU ), nil
156+ return m . buildErrResponse (reqs , podReqGPU ), nil
144157 }
145158 } else {
146159 log .Warningf ("Failed due to %v" , err )
147- return buildErrResponse (reqs , podReqGPU ), nil
160+ return m . buildErrResponse (reqs , podReqGPU ), nil
148161 }
149162 }
150163
151164 } else {
152165 log .Warningf ("invalid allocation requst: request GPU memory %d can't be satisfied." ,
153166 podReqGPU )
154167 // return &responses, fmt.Errorf("invalid allocation requst: request GPU memory %d can't be satisfied", reqGPU)
155- return buildErrResponse (reqs , podReqGPU ), nil
168+ return m . buildErrResponse (reqs , podReqGPU ), nil
156169 }
157170
158171 log .Infof ("new allocated GPUs info %v" , & responses )
0 commit comments