Skip to content

Commit d1cd2c1

Browse files
committed
fix: resolve index out of range panic in fetchContainerInfo for Ascend devices
- Pre-allocate bizContainerDevices based on actual container count to prevent out-of-bounds access when annotation device count differs from pod container count - Merge devices from all device types per container instead of overwriting, which previously caused device data loss for multi-device-type pods - Add container index boundary check in DecodePodDevices Ascend branch to align with NVIDIA/Hygon/Metax device handling Fixes #94 Signed-off-by: handong <cygnushan@yunify.com>
1 parent f60dbed commit d1cd2c1

3 files changed

Lines changed: 183 additions & 5 deletions

File tree

server/internal/data/pod.go

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -112,11 +112,20 @@ func (r *podRepo) fetchContainerInfo(pod *corev1.Pod) []*biz.Container {
112112
if err != nil {
113113
return containers
114114
}
115-
bizContainerDevices := []biz.ContainerDevices{}
115+
// Merge devices from all device types per container index.
116+
// pdevices: map[deviceType]PodSingleDevice([]ContainerDevices)
117+
numContainers := len(pod.Spec.Containers)
118+
bizContainerDevices := make([]biz.ContainerDevices, numContainers)
116119
for _, pds := range pdevices {
117-
copier.Copy(&bizContainerDevices, pds)
120+
var bizPds biz.PodSingleDevice
121+
copier.Copy(&bizPds, pds)
122+
for i, cd := range bizPds {
123+
if i < numContainers {
124+
bizContainerDevices[i] = append(bizContainerDevices[i], cd...)
125+
}
126+
}
118127
}
119-
if len(bizContainerDevices) < 1 {
128+
if len(pdevices) == 0 {
120129
return containers
121130
}
122131

server/internal/provider/util/util.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,14 @@ func DecodePodDevices(pod *corev1.Pod, log *log.Helper) (PodDevices, error) {
317317
pd[devType] = make(PodSingleDevice, 0)
318318
switch devType {
319319
case AscendGPUDevice, Ascend310PGPUDevice:
320-
for _, s := range strings.Split(str, OnePodMultiContainerSplitSymbol) {
320+
for i, s := range strings.Split(str, OnePodMultiContainerSplitSymbol) {
321+
if i >= len(pod.Spec.Containers) {
322+
break
323+
}
324+
if s == "" {
325+
pd[devType] = append(pd[devType], ContainerDevices{})
326+
continue
327+
}
321328
cd, err := DecodeNpuContainerDevices(s)
322329
if err != nil {
323330
return PodDevices{}, nil

server/internal/provider/util/util_test.go

Lines changed: 163 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,13 @@ limitations under the License.
1616
package util
1717

1818
import (
19-
"github.com/go-kratos/kratos/v2/log"
19+
"os"
20+
"reflect"
2021
"testing"
22+
23+
"github.com/go-kratos/kratos/v2/log"
24+
corev1 "k8s.io/api/core/v1"
25+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2126
)
2227

2328
var inRequestDevices map[string]string
@@ -345,3 +350,160 @@ func Test_DecodePodDevices(t *testing.T) {
345350
})
346351
}
347352
}
353+
354+
func Test_DecodePodDevices_Ascend(t *testing.T) {
355+
// Ensure Ascend device keys are registered in SupportDevices.
356+
SupportDevices["Ascend"] = "hami.io/Ascend910B-devices-allocated"
357+
SupportDevices["Ascend310P"] = "hami.io/Ascend310P-devices-allocated"
358+
359+
logger := log.NewHelper(log.NewStdLogger(os.Stdout))
360+
361+
tests := []struct {
362+
name string
363+
pod *corev1.Pod
364+
want PodDevices
365+
wantErr bool
366+
wantErrMsg string
367+
}{
368+
{
369+
name: "Ascend310P single container single device",
370+
pod: &corev1.Pod{
371+
ObjectMeta: metav1.ObjectMeta{
372+
Annotations: map[string]string{
373+
"hami.io/Ascend310P-devices-allocated": "E0766E64-20C0AB59-CC9AB1A4-3778030A-83003019,Ascend310P,6144,100:",
374+
},
375+
},
376+
Spec: corev1.PodSpec{
377+
Containers: []corev1.Container{{Name: "main"}},
378+
},
379+
},
380+
want: PodDevices{
381+
"Ascend310P": {
382+
{
383+
{Idx: 0, UUID: "E0766E64-20C0AB59-CC9AB1A4-3778030A-83003019", Type: "Ascend310P", Usedmem: 6144, Usedcores: 25},
384+
},
385+
},
386+
},
387+
wantErr: false,
388+
},
389+
{
390+
name: "Ascend310P two containers",
391+
pod: &corev1.Pod{
392+
ObjectMeta: metav1.ObjectMeta{
393+
Annotations: map[string]string{
394+
"hami.io/Ascend310P-devices-allocated": "E0766E64-20C0AB59-CC9AB1A4-3778030A-83003019,Ascend310P,6144,100:;D7E96E64-214123F1-E8E618E4-AED8030A-E3003039,Ascend310P,6144,100:",
395+
},
396+
},
397+
Spec: corev1.PodSpec{
398+
Containers: []corev1.Container{
399+
{Name: "ctr1"},
400+
{Name: "ctr2"},
401+
},
402+
},
403+
},
404+
want: PodDevices{
405+
"Ascend310P": {
406+
{
407+
{Idx: 0, UUID: "E0766E64-20C0AB59-CC9AB1A4-3778030A-83003019", Type: "Ascend310P", Usedmem: 6144, Usedcores: 25},
408+
},
409+
{
410+
{Idx: 0, UUID: "D7E96E64-214123F1-E8E618E4-AED8030A-E3003039", Type: "Ascend310P", Usedmem: 6144, Usedcores: 25},
411+
},
412+
},
413+
},
414+
wantErr: false,
415+
},
416+
{
417+
name: "Ascend310P annotation segments exceed container count - should truncate",
418+
pod: &corev1.Pod{
419+
ObjectMeta: metav1.ObjectMeta{
420+
Annotations: map[string]string{
421+
// Two device segments but only one container in spec.
422+
"hami.io/Ascend310P-devices-allocated": "E0766E64-20C0AB59-CC9AB1A4-3778030A-83003019,Ascend310P,6144,100:;D7E96E64-214123F1-E8E618E4-AED8030A-E3003039,Ascend310P,6144,100:",
423+
},
424+
},
425+
Spec: corev1.PodSpec{
426+
Containers: []corev1.Container{{Name: "main"}},
427+
},
428+
},
429+
want: PodDevices{
430+
"Ascend310P": {
431+
{
432+
{Idx: 0, UUID: "E0766E64-20C0AB59-CC9AB1A4-3778030A-83003019", Type: "Ascend310P", Usedmem: 6144, Usedcores: 25},
433+
},
434+
},
435+
},
436+
wantErr: false,
437+
},
438+
{
439+
name: "Ascend310P empty segment in middle should produce empty ContainerDevices placeholder",
440+
pod: &corev1.Pod{
441+
ObjectMeta: metav1.ObjectMeta{
442+
Annotations: map[string]string{
443+
"hami.io/Ascend310P-devices-allocated": "E0766E64-20C0AB59-CC9AB1A4-3778030A-83003019,Ascend310P,6144,100:;;D7E96E64-214123F1-E8E618E4-AED8030A-E3003039,Ascend310P,6144,100:",
444+
},
445+
},
446+
Spec: corev1.PodSpec{
447+
Containers: []corev1.Container{
448+
{Name: "ctr1"},
449+
{Name: "ctr2"},
450+
{Name: "ctr3"},
451+
},
452+
},
453+
},
454+
want: PodDevices{
455+
"Ascend310P": {
456+
{
457+
{Idx: 0, UUID: "E0766E64-20C0AB59-CC9AB1A4-3778030A-83003019", Type: "Ascend310P", Usedmem: 6144, Usedcores: 25},
458+
},
459+
{}, // empty segment placeholder
460+
{
461+
{Idx: 0, UUID: "D7E96E64-214123F1-E8E618E4-AED8030A-E3003039", Type: "Ascend310P", Usedmem: 6144, Usedcores: 25},
462+
},
463+
},
464+
},
465+
wantErr: false,
466+
},
467+
{
468+
name: "Ascend310P malformed device should return error",
469+
pod: &corev1.Pod{
470+
ObjectMeta: metav1.ObjectMeta{
471+
Annotations: map[string]string{
472+
"hami.io/Ascend310P-devices-allocated": "bad-format-string",
473+
},
474+
},
475+
Spec: corev1.PodSpec{
476+
Containers: []corev1.Container{{Name: "main"}},
477+
},
478+
},
479+
want: PodDevices{},
480+
wantErr: true,
481+
},
482+
{
483+
name: "empty annotations should return empty PodDevices",
484+
pod: &corev1.Pod{
485+
ObjectMeta: metav1.ObjectMeta{
486+
Annotations: map[string]string{},
487+
},
488+
Spec: corev1.PodSpec{
489+
Containers: []corev1.Container{{Name: "main"}},
490+
},
491+
},
492+
want: PodDevices{},
493+
wantErr: false,
494+
},
495+
}
496+
497+
for _, tt := range tests {
498+
t.Run(tt.name, func(t *testing.T) {
499+
got, err := DecodePodDevices(tt.pod, logger)
500+
if (err != nil) != tt.wantErr {
501+
t.Errorf("DecodePodDevices() error = %v, wantErr %v", err, tt.wantErr)
502+
return
503+
}
504+
if !reflect.DeepEqual(got, tt.want) {
505+
t.Errorf("DecodePodDevices() = %+v, want %+v", got, tt.want)
506+
}
507+
})
508+
}
509+
}

0 commit comments

Comments
 (0)