|
| 1 | +// Copyright 2026 The Parca Authors |
| 2 | +// Licensed under the Apache License, Version 2.0 (the "License"); |
| 3 | +// you may not use this file except in compliance with the License. |
| 4 | +// You may obtain a copy of the License at |
| 5 | +// |
| 6 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 7 | +// |
| 8 | +// Unless required by applicable law or agreed to in writing, software |
| 9 | +// distributed under the License is distributed on an "AS IS" BASIS, |
| 10 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 11 | +// See the License for the specific language governing permissions and |
| 12 | +// limitations under the License. |
| 13 | + |
| 14 | +package gpumetrics |
| 15 | + |
| 16 | +import ( |
| 17 | + "context" |
| 18 | + |
| 19 | + "github.com/prometheus/prometheus/model/labels" |
| 20 | + "go.opentelemetry.io/ebpf-profiler/libpf" |
| 21 | + |
| 22 | + "github.com/parca-dev/parca-agent/reporter/metadata" |
| 23 | +) |
| 24 | + |
| 25 | +// defaultEnrichmentLabels is the curated set of container/pod labels attached to |
| 26 | +// per-process GPU metrics. It is intentionally small: each label combination is |
| 27 | +// a distinct time series, so we keep only stable, low-churn identifiers useful |
| 28 | +// for grouping (namespace, pod, container) and deliberately exclude high-churn |
| 29 | +// or verbose labels (pod_container_image, pod_ip, ...) that would inflate |
| 30 | +// cardinality. These match labels parca-agent already attaches to profiles, so |
| 31 | +// GPU metrics and profiles join on the same pod/container identity. |
| 32 | +var defaultEnrichmentLabels = map[string]struct{}{ |
| 33 | + "namespace": {}, |
| 34 | + "pod": {}, |
| 35 | + "pod_container_name": {}, |
| 36 | + "pod_container_id": {}, |
| 37 | + "pod_uid": {}, |
| 38 | + "pod_controller_kind": {}, |
| 39 | + "pod_controller_name": {}, |
| 40 | +} |
| 41 | + |
| 42 | +// ContainerLabelResolver enriches per-process GPU metrics with Kubernetes |
| 43 | +// container/pod labels, using parca-agent's container metadata provider. It |
| 44 | +// implements LabelResolver. |
| 45 | +type ContainerLabelResolver struct { |
| 46 | + ctx context.Context |
| 47 | + provider metadata.MetadataProvider |
| 48 | + allow map[string]struct{} |
| 49 | +} |
| 50 | + |
| 51 | +// NewContainerLabelResolver builds a resolver backed by the container metadata |
| 52 | +// provider for the given Kubernetes node. The provider maintains its own caches, |
| 53 | +// so per-PID lookups on the hot path are cheap after the first resolution. |
| 54 | +func NewContainerLabelResolver(ctx context.Context, nodeName string) (*ContainerLabelResolver, error) { |
| 55 | + provider, err := metadata.NewContainerMetadataProvider(ctx, nodeName) |
| 56 | + if err != nil { |
| 57 | + return nil, err |
| 58 | + } |
| 59 | + return &ContainerLabelResolver{ |
| 60 | + ctx: ctx, |
| 61 | + provider: provider, |
| 62 | + allow: defaultEnrichmentLabels, |
| 63 | + }, nil |
| 64 | +} |
| 65 | + |
| 66 | +// LabelsForPID returns the curated container/pod labels for a host PID. PIDs |
| 67 | +// that don't belong to a container (or that can't be resolved) yield an empty |
| 68 | +// map, leaving the data point with only its pid/comm attributes. |
| 69 | +func (r *ContainerLabelResolver) LabelsForPID(pid uint32) map[string]string { |
| 70 | + lb := labels.NewBuilder(labels.EmptyLabels()) |
| 71 | + r.provider.AddMetadata(r.ctx, libpf.PID(pid), lb) |
| 72 | + |
| 73 | + out := make(map[string]string, len(r.allow)) |
| 74 | + lb.Range(func(l labels.Label) { |
| 75 | + if _, ok := r.allow[l.Name]; ok && l.Value != "" { |
| 76 | + out[l.Name] = l.Value |
| 77 | + } |
| 78 | + }) |
| 79 | + return out |
| 80 | +} |
0 commit comments