Skip to content

Commit 4ef4b80

Browse files
Merge pull request #110 from charford/fix/vgpu-monitor-invalid-uuid-panic
vgpu-monitor panics with invalid UTF-8 when collecting per-container metrics during container initialization
2 parents ab97420 + b2490ce commit 4ef4b80

1 file changed

Lines changed: 5 additions & 0 deletions

File tree

cmd/vgpu-monitor/metrics.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"net/http"
2323
"strings"
2424
"time"
25+
"unicode/utf8"
2526

2627
"volcano.sh/k8s-device-plugin/pkg/monitor/nvidia"
2728
"volcano.sh/k8s-device-plugin/pkg/plugin/vgpu/config"
@@ -225,6 +226,10 @@ func (cc ClusterManagerCollector) Collect(ch chan<- prometheus.Metric) {
225226
}
226227
for i := 0; i < c.Info.DeviceNum(); i++ {
227228
uuid := c.Info.DeviceUUID(i)[0:40]
229+
if !utf8.ValidString(uuid) {
230+
klog.Warningf("skipping device %d for pod %s/%s: UUID contains invalid UTF-8 (shared memory not yet initialized)", i, pod.Namespace, pod.Name)
231+
continue
232+
}
228233
memoryTotal := c.Info.DeviceMemoryTotal(i)
229234
memoryLimit := c.Info.DeviceMemoryLimit(i)
230235
memoryContextSize := c.Info.DeviceMemoryContextSize(i)

0 commit comments

Comments
 (0)