Enrich per-process GPU metrics with container/pod labels

gnurizen · gnurizen · commit 1258b6b6ea71 · 2026-06-23T12:48:47.000-04:00
Attach Kubernetes namespace/pod/container labels to per-process GPU
metrics so they share identity with parca-agent's profiles, enabling
queries like GPU utilization by pod and joins between GPU metrics and
flame graphs on the same pod/container labels. This is the per-process
attribution dcgm-exporter cannot do.

- gpumetrics.LabelResolver: optional interface for resolving extra
  attributes from a host PID; threaded into per-process collection and
  applied once per PID across its utilization samples.
- gpumetrics.ContainerLabelResolver: backs the interface with
  parca-agent's container metadata provider, returning a curated,
  low-cardinality allow-list (namespace, pod, pod_container_name,
  pod_container_id, pod_uid, pod_controller_kind/name).
- main.go builds the resolver from the node name and passes it to the
  producer; resolver construction failure falls back to pid/comm only
  rather than disabling GPU metrics.

NVML reports host PIDs and the metadata provider reads host /proc, so
this assumes the agent runs in the host PID namespace (the standard
deployment).
diff --git a/gpumetrics/enrich.go b/gpumetrics/enrich.go
@@ -0,0 +1,80 @@
+// Copyright 2026 The Parca Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package gpumetrics
+
+import (
+	"context"
+
+	"github.com/prometheus/prometheus/model/labels"
+	"go.opentelemetry.io/ebpf-profiler/libpf"
+
+	"github.com/parca-dev/parca-agent/reporter/metadata"
+)
+
+// defaultEnrichmentLabels is the curated set of container/pod labels attached to
+// per-process GPU metrics. It is intentionally small: each label combination is
+// a distinct time series, so we keep only stable, low-churn identifiers useful
+// for grouping (namespace, pod, container) and deliberately exclude high-churn
+// or verbose labels (pod_container_image, pod_ip, ...) that would inflate
+// cardinality. These match labels parca-agent already attaches to profiles, so
+// GPU metrics and profiles join on the same pod/container identity.
+var defaultEnrichmentLabels = map[string]struct{}{
+	"namespace":           {},
+	"pod":                 {},
+	"pod_container_name":  {},
+	"pod_container_id":    {},
+	"pod_uid":             {},
+	"pod_controller_kind": {},
+	"pod_controller_name": {},
+}
+
+// ContainerLabelResolver enriches per-process GPU metrics with Kubernetes
+// container/pod labels, using parca-agent's container metadata provider. It
+// implements LabelResolver.
+type ContainerLabelResolver struct {
+	ctx      context.Context
+	provider metadata.MetadataProvider
+	allow    map[string]struct{}
+}
+
+// NewContainerLabelResolver builds a resolver backed by the container metadata
+// provider for the given Kubernetes node. The provider maintains its own caches,
+// so per-PID lookups on the hot path are cheap after the first resolution.
+func NewContainerLabelResolver(ctx context.Context, nodeName string) (*ContainerLabelResolver, error) {
+	provider, err := metadata.NewContainerMetadataProvider(ctx, nodeName)
+	if err != nil {
+		return nil, err
+	}
+	return &ContainerLabelResolver{
+		ctx:      ctx,
+		provider: provider,
+		allow:    defaultEnrichmentLabels,
+	}, nil
+}
+
+// LabelsForPID returns the curated container/pod labels for a host PID. PIDs
+// that don't belong to a container (or that can't be resolved) yield an empty
+// map, leaving the data point with only its pid/comm attributes.
+func (r *ContainerLabelResolver) LabelsForPID(pid uint32) map[string]string {
+	lb := labels.NewBuilder(labels.EmptyLabels())
+	r.provider.AddMetadata(r.ctx, libpf.PID(pid), lb)
+
+	out := make(map[string]string, len(r.allow))
+	lb.Range(func(l labels.Label) {
+		if _, ok := r.allow[l.Name]; ok && l.Value != "" {
+			out[l.Name] = l.Value
+		}
+	})
+	return out
+}
diff --git a/gpumetrics/nvidia.go b/gpumetrics/nvidia.go
@@ -65,16 +65,29 @@ const (
 	metricNameGPUUtilizationPercent       = "gpu_utilization_percent"
 )
 
+// LabelResolver resolves additional attributes (e.g. Kubernetes namespace,
+// pod, container) for a process by its host PID. The returned labels are
+// attached to per-process GPU metric data points so they share identity with
+// parca-agent's profiles. NVML reports host PIDs, so the resolver must also
+// operate in the host PID namespace (the standard parca-agent deployment).
+type LabelResolver interface {
+	LabelsForPID(pid uint32) map[string]string
+}
+
 // Producer collects NVIDIA GPU metrics and implements metricexport.Producer.
 type Producer struct {
-	devices []*perDeviceState
+	devices  []*perDeviceState
+	resolver LabelResolver
 }
 
 // NewProducer initializes NVML and enumerates the available NVIDIA devices.
 // It returns an error if NVML is unavailable (e.g. no driver / no GPU), which
 // the caller should treat as "GPU metrics disabled on this node" rather than
 // fatal.
-func NewProducer() (*Producer, error) {
+//
+// resolver may be nil, in which case per-process metrics carry only pid and
+// comm; otherwise its labels are attached to per-process data points.
+func NewProducer(resolver LabelResolver) (*Producer, error) {
 	ret := nvml.Init()
 	if !errors.Is(ret, nvml.SUCCESS) {
 		return nil, fmt.Errorf("failed to initialize NVML library: %s", nvml.ErrorString(ret))
@@ -108,6 +121,7 @@ func NewProducer() (*Producer, error) {
 			uuid:       uuid,
 			index:      i,
 			powerLimit: powerLimit,
+			resolver:   resolver,
 
 			mu: &sync.RWMutex{},
 			lastTimestamp: map[string]uint64{
@@ -119,7 +133,7 @@ func NewProducer() (*Producer, error) {
 			gauges: map[string]pmetric.Gauge{},
 		}
 	}
-	return &Producer{devices: devices}, nil
+	return &Producer{devices: devices, resolver: resolver}, nil
 }
 
 // DeviceCount reports how many NVIDIA devices were enumerated.
@@ -251,6 +265,7 @@ type perDeviceState struct {
 	uuid       string
 	index      int
 	powerLimit uint32
+	resolver   LabelResolver
 
 	mu            *sync.RWMutex
 	lastTimestamp map[string]uint64
@@ -413,6 +428,13 @@ func (ds *perDeviceState) collectMemoryUtilization() error {
 	return nil
 }
 
+// putExtraLabels copies resolver-provided labels onto a data point's attributes.
+func putExtraLabels(attrs pcommon.Map, extra map[string]string) {
+	for k, v := range extra {
+		attrs.PutStr(k, v)
+	}
+}
+
 func (ds *perDeviceState) collectProcessUtilization() error {
 	util := pmetric.NewGauge()
 	utilMem := pmetric.NewGauge()
@@ -452,12 +474,20 @@ func (ds *perDeviceState) collectProcessUtilization() error {
 			return fmt.Errorf("failed to get process name for %d - pid: %d - %s", ds.index, process.Pid, nvml.ErrorString(ret))
 		}
 
+		// Resolve container/pod labels once per process; they're identical
+		// across this process's utilization samples.
+		var extraLabels map[string]string
+		if ds.resolver != nil {
+			extraLabels = ds.resolver.LabelsForPID(process.Pid)
+		}
+
 		for _, sample := range utilization {
 			dpUtil := util.DataPoints().AppendEmpty()
 			dpUtil.Attributes().PutStr(attributeUUID, ds.uuid)
 			dpUtil.Attributes().PutInt(attributeIndex, int64(ds.index))
 			dpUtil.Attributes().PutInt(attributePID, int64(process.Pid))
 			dpUtil.Attributes().PutStr(attributeComm, processName)
+			putExtraLabels(dpUtil.Attributes(), extraLabels)
 			dpUtil.SetTimestamp(pcommon.Timestamp(ts.UnixNano()))
 			dpUtil.SetIntValue(int64(sample.SmUtil))
 
@@ -466,6 +496,7 @@ func (ds *perDeviceState) collectProcessUtilization() error {
 			dpMem.Attributes().PutInt(attributeIndex, int64(ds.index))
 			dpMem.Attributes().PutInt(attributePID, int64(process.Pid))
 			dpMem.Attributes().PutStr(attributeComm, processName)
+			putExtraLabels(dpMem.Attributes(), extraLabels)
 			dpMem.SetTimestamp(pcommon.Timestamp(ts.UnixNano()))
 			dpMem.SetIntValue(int64(sample.MemUtil))
 		}
diff --git a/main.go b/main.go
@@ -567,7 +567,7 @@ func mainWithExitCode() flags.ExitCode {
 	if f.GpuMetrics.Enable {
 		if grpcConn == nil {
 			log.Warn("--gpu-metrics-enable is set but no remote-store is configured; GPU metrics disabled")
-		} else if producer, err := gpumetrics.NewProducer(); err != nil {
+		} else if producer, err := gpumetrics.NewProducer(newGPULabelResolver(mainCtx, f.Node)); err != nil {
 			log.Warnf("GPU metrics disabled (NVML unavailable): %v", err)
 		} else {
 			log.Infof("GPU metrics enabled: collecting from %d NVIDIA device(s)", producer.DeviceCount())
@@ -704,6 +704,19 @@ func getTracePipe() (*os.File, error) {
 	return nil, os.ErrNotExist
 }
 
+// newGPULabelResolver builds a container/pod label resolver for GPU per-process
+// metrics. On failure it logs and returns a nil resolver, so per-process metrics
+// fall back to pid/comm only rather than disabling GPU metrics entirely. The
+// return type is the interface, so a nil here is a true nil interface.
+func newGPULabelResolver(ctx context.Context, nodeName string) gpumetrics.LabelResolver {
+	resolver, err := gpumetrics.NewContainerLabelResolver(ctx, nodeName)
+	if err != nil {
+		log.Warnf("GPU metrics: container label enrichment disabled: %v", err)
+		return nil
+	}
+	return resolver
+}
+
 func readTracePipe(ctx context.Context) {
 	tp, err := getTracePipe()
 	if err != nil {