Skip to content

Commit f5c2828

Browse files
committed
Add collector for SR-IOV network Virtual Function statistics
Add a new netvf collector that exposes SR-IOV network VF statistics and configuration via rtnetlink. The collector queries netlink for interfaces with Virtual Functions and exposes per-VF metrics: - node_net_vf_info: VF configuration (MAC, VLAN, link state, spoof check, trust, PCI address) - node_net_vf_{receive,transmit}_{packets,bytes}_total: traffic counters - node_net_vf_{broadcast,multicast}_packets_total: packet type counters - node_net_vf_{receive,transmit}_dropped_total: drop counters All metrics include a pci_address label resolved from the sysfs virtfn symlink, enabling direct correlation with workloads that reference VFs by PCI BDF address (e.g. OpenStack Nova, libvirt, DPDK). The collector is disabled by default and can be enabled with --collector.netvf. PF device filtering is supported via --collector.netvf.device-include/exclude flags. Signed-off-by: Anthony Harivel <aharivel@redhat.com>
1 parent 5b58537 commit f5c2828

File tree

3 files changed

+586
-0
lines changed

3 files changed

+586
-0
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ hwmon | chip | --collector.hwmon.chip-include | --collector.hwmon.chip-exclude
106106
hwmon | sensor | --collector.hwmon.sensor-include | --collector.hwmon.sensor-exclude
107107
interrupts | name | --collector.interrupts.name-include | --collector.interrupts.name-exclude
108108
netdev | device | --collector.netdev.device-include | --collector.netdev.device-exclude
109+
netvf | device | --collector.netvf.device-include | --collector.netvf.device-exclude
109110
qdisk | device | --collector.qdisk.device-include | --collector.qdisk.device-exclude
110111
slabinfo | slab-names | --collector.slabinfo.slabs-include | --collector.slabinfo.slabs-exclude
111112
sysctl | all | --collector.sysctl.include | N/A
@@ -202,6 +203,7 @@ logind | Exposes session counts from [logind](http://www.freedesktop.org/wiki/So
202203
meminfo\_numa | Exposes memory statistics from `/sys/devices/system/node/node[0-9]*/meminfo`, `/sys/devices/system/node/node[0-9]*/numastat`. | Linux
203204
mountstats | Exposes filesystem statistics from `/proc/self/mountstats`. Exposes detailed NFS client statistics. | Linux
204205
network_route | Exposes the routing table as metrics | Linux
206+
netvf | Exposes SR-IOV Virtual Function statistics and configuration from netlink. | Linux
205207
pcidevice | Exposes pci devices' information including their link status and parent devices. | Linux
206208
perf | Exposes perf based metrics (Warning: Metrics are dependent on kernel configuration and settings). | Linux
207209
processes | Exposes aggregate process statistics from `/proc`. | Linux

collector/netvf_linux.go

Lines changed: 278 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,278 @@
1+
// Copyright The Prometheus Authors
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
14+
//go:build !nonetvf
15+
16+
package collector
17+
18+
import (
19+
"errors"
20+
"fmt"
21+
"log/slog"
22+
"os"
23+
"path/filepath"
24+
25+
"github.com/alecthomas/kingpin/v2"
26+
"github.com/jsimonetti/rtnetlink/v2"
27+
"github.com/prometheus/client_golang/prometheus"
28+
)
29+
30+
const netvfSubsystem = "net_vf"
31+
32+
var (
33+
netvfDeviceInclude = kingpin.Flag("collector.netvf.device-include", "Regexp of PF devices to include (mutually exclusive to device-exclude).").String()
34+
netvfDeviceExclude = kingpin.Flag("collector.netvf.device-exclude", "Regexp of PF devices to exclude (mutually exclusive to device-include).").String()
35+
)
36+
37+
func init() {
38+
registerCollector("netvf", defaultDisabled, NewNetVFCollector)
39+
}
40+
41+
type netvfCollector struct {
42+
logger *slog.Logger
43+
deviceFilter deviceFilter
44+
45+
info *prometheus.Desc
46+
receivePackets *prometheus.Desc
47+
transmitPackets *prometheus.Desc
48+
receiveBytes *prometheus.Desc
49+
transmitBytes *prometheus.Desc
50+
broadcast *prometheus.Desc
51+
multicast *prometheus.Desc
52+
receiveDropped *prometheus.Desc
53+
transmitDropped *prometheus.Desc
54+
}
55+
56+
func NewNetVFCollector(logger *slog.Logger) (Collector, error) {
57+
if *netvfDeviceExclude != "" && *netvfDeviceInclude != "" {
58+
return nil, errors.New("device-exclude & device-include are mutually exclusive")
59+
}
60+
61+
if *netvfDeviceExclude != "" {
62+
logger.Info("Parsed flag --collector.netvf.device-exclude", "flag", *netvfDeviceExclude)
63+
}
64+
65+
if *netvfDeviceInclude != "" {
66+
logger.Info("Parsed flag --collector.netvf.device-include", "flag", *netvfDeviceInclude)
67+
}
68+
69+
return &netvfCollector{
70+
logger: logger,
71+
deviceFilter: newDeviceFilter(*netvfDeviceExclude, *netvfDeviceInclude),
72+
info: prometheus.NewDesc(
73+
prometheus.BuildFQName(namespace, netvfSubsystem, "info"),
74+
"Virtual Function configuration information.",
75+
[]string{"device", "vf", "mac", "vlan", "link_state", "spoof_check", "trust", "pci_address"}, nil,
76+
),
77+
receivePackets: prometheus.NewDesc(
78+
prometheus.BuildFQName(namespace, netvfSubsystem, "receive_packets_total"),
79+
"Number of received packets by the VF.",
80+
[]string{"device", "vf", "pci_address"}, nil,
81+
),
82+
transmitPackets: prometheus.NewDesc(
83+
prometheus.BuildFQName(namespace, netvfSubsystem, "transmit_packets_total"),
84+
"Number of transmitted packets by the VF.",
85+
[]string{"device", "vf", "pci_address"}, nil,
86+
),
87+
receiveBytes: prometheus.NewDesc(
88+
prometheus.BuildFQName(namespace, netvfSubsystem, "receive_bytes_total"),
89+
"Number of received bytes by the VF.",
90+
[]string{"device", "vf", "pci_address"}, nil,
91+
),
92+
transmitBytes: prometheus.NewDesc(
93+
prometheus.BuildFQName(namespace, netvfSubsystem, "transmit_bytes_total"),
94+
"Number of transmitted bytes by the VF.",
95+
[]string{"device", "vf", "pci_address"}, nil,
96+
),
97+
broadcast: prometheus.NewDesc(
98+
prometheus.BuildFQName(namespace, netvfSubsystem, "broadcast_packets_total"),
99+
"Number of broadcast packets received by the VF.",
100+
[]string{"device", "vf", "pci_address"}, nil,
101+
),
102+
multicast: prometheus.NewDesc(
103+
prometheus.BuildFQName(namespace, netvfSubsystem, "multicast_packets_total"),
104+
"Number of multicast packets received by the VF.",
105+
[]string{"device", "vf", "pci_address"}, nil,
106+
),
107+
receiveDropped: prometheus.NewDesc(
108+
prometheus.BuildFQName(namespace, netvfSubsystem, "receive_dropped_total"),
109+
"Number of dropped received packets by the VF.",
110+
[]string{"device", "vf", "pci_address"}, nil,
111+
),
112+
transmitDropped: prometheus.NewDesc(
113+
prometheus.BuildFQName(namespace, netvfSubsystem, "transmit_dropped_total"),
114+
"Number of dropped transmitted packets by the VF.",
115+
[]string{"device", "vf", "pci_address"}, nil,
116+
),
117+
}, nil
118+
}
119+
120+
func (c *netvfCollector) Update(ch chan<- prometheus.Metric) error {
121+
conn, err := rtnetlink.Dial(nil)
122+
if err != nil {
123+
return fmt.Errorf("failed to connect to rtnetlink: %w", err)
124+
}
125+
defer conn.Close()
126+
127+
links, err := conn.Link.ListWithVFInfo()
128+
if err != nil {
129+
return fmt.Errorf("failed to list interfaces with VF info: %w", err)
130+
}
131+
132+
vfCount := 0
133+
for _, link := range links {
134+
if link.Attributes == nil {
135+
continue
136+
}
137+
138+
// Skip interfaces without VFs
139+
if link.Attributes.NumVF == nil || *link.Attributes.NumVF == 0 {
140+
continue
141+
}
142+
143+
device := link.Attributes.Name
144+
145+
// Apply device filter
146+
if c.deviceFilter.ignored(device) {
147+
c.logger.Debug("Ignoring device", "device", device)
148+
continue
149+
}
150+
151+
for _, vf := range link.Attributes.VFInfoList {
152+
vfID := fmt.Sprintf("%d", vf.ID)
153+
154+
// Emit info metric with VF configuration
155+
mac := ""
156+
if vf.MAC != nil {
157+
mac = vf.MAC.String()
158+
}
159+
vlan := fmt.Sprintf("%d", vf.Vlan)
160+
linkState := vfLinkStateString(vf.LinkState)
161+
spoofCheck := fmt.Sprintf("%t", vf.SpoofCheck)
162+
trust := fmt.Sprintf("%t", vf.Trust)
163+
pciAddress := resolveVFPCIAddress(sysFilePath("class"), device, vf.ID)
164+
165+
ch <- prometheus.MustNewConstMetric(c.info, prometheus.GaugeValue, 1, device, vfID, mac, vlan, linkState, spoofCheck, trust, pciAddress)
166+
167+
// Emit stats metrics if available
168+
if vf.Stats == nil {
169+
c.logger.Debug("VF has no stats", "device", device, "vf", vf.ID)
170+
vfCount++
171+
continue
172+
}
173+
174+
stats := vf.Stats
175+
176+
ch <- prometheus.MustNewConstMetric(c.receivePackets, prometheus.CounterValue, float64(stats.RxPackets), device, vfID, pciAddress)
177+
ch <- prometheus.MustNewConstMetric(c.transmitPackets, prometheus.CounterValue, float64(stats.TxPackets), device, vfID, pciAddress)
178+
ch <- prometheus.MustNewConstMetric(c.receiveBytes, prometheus.CounterValue, float64(stats.RxBytes), device, vfID, pciAddress)
179+
ch <- prometheus.MustNewConstMetric(c.transmitBytes, prometheus.CounterValue, float64(stats.TxBytes), device, vfID, pciAddress)
180+
ch <- prometheus.MustNewConstMetric(c.broadcast, prometheus.CounterValue, float64(stats.Broadcast), device, vfID, pciAddress)
181+
ch <- prometheus.MustNewConstMetric(c.multicast, prometheus.CounterValue, float64(stats.Multicast), device, vfID, pciAddress)
182+
ch <- prometheus.MustNewConstMetric(c.receiveDropped, prometheus.CounterValue, float64(stats.RxDropped), device, vfID, pciAddress)
183+
ch <- prometheus.MustNewConstMetric(c.transmitDropped, prometheus.CounterValue, float64(stats.TxDropped), device, vfID, pciAddress)
184+
185+
vfCount++
186+
}
187+
}
188+
189+
if vfCount == 0 {
190+
return ErrNoData
191+
}
192+
193+
return nil
194+
}
195+
196+
func vfLinkStateString(state rtnetlink.VFLinkState) string {
197+
switch state {
198+
case rtnetlink.VFLinkStateAuto:
199+
return "auto"
200+
case rtnetlink.VFLinkStateEnable:
201+
return "enable"
202+
case rtnetlink.VFLinkStateDisable:
203+
return "disable"
204+
default:
205+
return "unknown"
206+
}
207+
}
208+
209+
// resolveVFPCIAddress resolves the PCI BDF address of a VF by reading the
210+
// sysfs virtfn symlink: <sysClassPath>/net/<pfDevice>/device/virtfn<vfID>.
211+
// Returns empty string if the symlink doesn't exist or can't be resolved.
212+
func resolveVFPCIAddress(sysClassPath, pfDevice string, vfID uint32) string {
213+
virtfnPath := filepath.Join(sysClassPath, "net", pfDevice, "device", fmt.Sprintf("virtfn%d", vfID))
214+
resolved, err := os.Readlink(virtfnPath)
215+
if err != nil {
216+
return ""
217+
}
218+
return filepath.Base(resolved)
219+
}
220+
221+
// vfMetrics holds parsed VF metrics for a single VF
222+
type vfMetrics struct {
223+
Device string
224+
VFID uint32
225+
MAC string
226+
Vlan uint32
227+
LinkState string
228+
SpoofCheck bool
229+
Trust bool
230+
PCIAddress string
231+
Stats *rtnetlink.VFStats
232+
}
233+
234+
// parseVFInfo extracts VF information from link messages for testing.
235+
// sysClassPath is the path to the sysfs class directory used to resolve VF PCI addresses.
236+
func parseVFInfo(links []rtnetlink.LinkMessage, filter *deviceFilter, logger *slog.Logger, sysClassPath string) []vfMetrics {
237+
var result []vfMetrics
238+
239+
for _, link := range links {
240+
if link.Attributes == nil {
241+
continue
242+
}
243+
244+
// Skip interfaces without VFs
245+
if link.Attributes.NumVF == nil || *link.Attributes.NumVF == 0 {
246+
continue
247+
}
248+
249+
device := link.Attributes.Name
250+
251+
// Apply device filter
252+
if filter.ignored(device) {
253+
logger.Debug("Ignoring device", "device", device)
254+
continue
255+
}
256+
257+
for _, vf := range link.Attributes.VFInfoList {
258+
mac := ""
259+
if vf.MAC != nil {
260+
mac = vf.MAC.String()
261+
}
262+
263+
result = append(result, vfMetrics{
264+
Device: device,
265+
VFID: vf.ID,
266+
MAC: mac,
267+
Vlan: vf.Vlan,
268+
LinkState: vfLinkStateString(vf.LinkState),
269+
SpoofCheck: vf.SpoofCheck,
270+
Trust: vf.Trust,
271+
PCIAddress: resolveVFPCIAddress(sysClassPath, device, vf.ID),
272+
Stats: vf.Stats,
273+
})
274+
}
275+
}
276+
277+
return result
278+
}

0 commit comments

Comments
 (0)