Skip to content

Commit b5ea870

Browse files
committed
Add collector for SR-IOV Virtual Function statistics
Add a new netvf collector that exposes SR-IOV VF statistics and configuration via rtnetlink. The collector queries netlink for interfaces with Virtual Functions and exposes per-VF metrics: - node_net_vf_info: VF configuration (MAC, VLAN, link state, spoof check, trust) - node_net_vf_{receive,transmit}_{packets,bytes}_total: traffic counters - node_net_vf_{broadcast,multicast}_packets_total: packet type counters - node_net_vf_{receive,transmit}_dropped_total: drop counters The collector is disabled by default and can be enabled with --collector.netvf. PF device filtering is supported via --collector.netvf.device-include/exclude flags. Signed-off-by: Anthony Harivel <aharivel@redhat.com>
1 parent 5b58537 commit b5ea870

File tree

3 files changed

+586
-0
lines changed

3 files changed

+586
-0
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ hwmon | chip | --collector.hwmon.chip-include | --collector.hwmon.chip-exclude
106106
hwmon | sensor | --collector.hwmon.sensor-include | --collector.hwmon.sensor-exclude
107107
interrupts | name | --collector.interrupts.name-include | --collector.interrupts.name-exclude
108108
netdev | device | --collector.netdev.device-include | --collector.netdev.device-exclude
109+
netvf | device | --collector.netvf.device-include | --collector.netvf.device-exclude
109110
qdisk | device | --collector.qdisk.device-include | --collector.qdisk.device-exclude
110111
slabinfo | slab-names | --collector.slabinfo.slabs-include | --collector.slabinfo.slabs-exclude
111112
sysctl | all | --collector.sysctl.include | N/A
@@ -202,6 +203,7 @@ logind | Exposes session counts from [logind](http://www.freedesktop.org/wiki/So
202203
meminfo\_numa | Exposes memory statistics from `/sys/devices/system/node/node[0-9]*/meminfo`, `/sys/devices/system/node/node[0-9]*/numastat`. | Linux
203204
mountstats | Exposes filesystem statistics from `/proc/self/mountstats`. Exposes detailed NFS client statistics. | Linux
204205
network_route | Exposes the routing table as metrics | Linux
206+
netvf | Exposes SR-IOV Virtual Function statistics and configuration from netlink. | Linux
205207
pcidevice | Exposes pci devices' information including their link status and parent devices. | Linux
206208
perf | Exposes perf based metrics (Warning: Metrics are dependent on kernel configuration and settings). | Linux
207209
processes | Exposes aggregate process statistics from `/proc`. | Linux

collector/netvf_linux.go

Lines changed: 278 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,278 @@
1+
// Copyright The Prometheus Authors
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
14+
//go:build !nonetvf
15+
16+
package collector
17+
18+
import (
19+
"errors"
20+
"fmt"
21+
"log/slog"
22+
"os"
23+
"path/filepath"
24+
25+
"github.com/alecthomas/kingpin/v2"
26+
"github.com/jsimonetti/rtnetlink/v2"
27+
"github.com/prometheus/client_golang/prometheus"
28+
)
29+
30+
const netvfSubsystem = "net_vf"
31+
32+
var (
33+
netvfDeviceInclude = kingpin.Flag("collector.netvf.device-include", "Regexp of PF devices to include (mutually exclusive to device-exclude).").String()
34+
netvfDeviceExclude = kingpin.Flag("collector.netvf.device-exclude", "Regexp of PF devices to exclude (mutually exclusive to device-include).").String()
35+
)
36+
37+
func init() {
38+
registerCollector("netvf", defaultDisabled, NewNetVFCollector)
39+
}
40+
41+
type netvfCollector struct {
42+
logger *slog.Logger
43+
deviceFilter deviceFilter
44+
45+
info *prometheus.Desc
46+
receivePackets *prometheus.Desc
47+
transmitPackets *prometheus.Desc
48+
receiveBytes *prometheus.Desc
49+
transmitBytes *prometheus.Desc
50+
broadcast *prometheus.Desc
51+
multicast *prometheus.Desc
52+
receiveDropped *prometheus.Desc
53+
transmitDropped *prometheus.Desc
54+
}
55+
56+
func NewNetVFCollector(logger *slog.Logger) (Collector, error) {
57+
if *netvfDeviceExclude != "" && *netvfDeviceInclude != "" {
58+
return nil, errors.New("device-exclude & device-include are mutually exclusive")
59+
}
60+
61+
if *netvfDeviceExclude != "" {
62+
logger.Info("Parsed flag --collector.netvf.device-exclude", "flag", *netvfDeviceExclude)
63+
}
64+
65+
if *netvfDeviceInclude != "" {
66+
logger.Info("Parsed flag --collector.netvf.device-include", "flag", *netvfDeviceInclude)
67+
}
68+
69+
return &netvfCollector{
70+
logger: logger,
71+
deviceFilter: newDeviceFilter(*netvfDeviceExclude, *netvfDeviceInclude),
72+
info: prometheus.NewDesc(
73+
prometheus.BuildFQName(namespace, netvfSubsystem, "info"),
74+
"Virtual Function configuration information.",
75+
[]string{"device", "vf", "mac", "vlan", "link_state", "spoof_check", "trust", "pci_address"}, nil,
76+
),
77+
receivePackets: prometheus.NewDesc(
78+
prometheus.BuildFQName(namespace, netvfSubsystem, "receive_packets_total"),
79+
"Number of received packets by the VF.",
80+
[]string{"device", "vf", "pci_address"}, nil,
81+
),
82+
transmitPackets: prometheus.NewDesc(
83+
prometheus.BuildFQName(namespace, netvfSubsystem, "transmit_packets_total"),
84+
"Number of transmitted packets by the VF.",
85+
[]string{"device", "vf", "pci_address"}, nil,
86+
),
87+
receiveBytes: prometheus.NewDesc(
88+
prometheus.BuildFQName(namespace, netvfSubsystem, "receive_bytes_total"),
89+
"Number of received bytes by the VF.",
90+
[]string{"device", "vf", "pci_address"}, nil,
91+
),
92+
transmitBytes: prometheus.NewDesc(
93+
prometheus.BuildFQName(namespace, netvfSubsystem, "transmit_bytes_total"),
94+
"Number of transmitted bytes by the VF.",
95+
[]string{"device", "vf", "pci_address"}, nil,
96+
),
97+
broadcast: prometheus.NewDesc(
98+
prometheus.BuildFQName(namespace, netvfSubsystem, "broadcast_packets_total"),
99+
"Number of broadcast packets received by the VF.",
100+
[]string{"device", "vf", "pci_address"}, nil,
101+
),
102+
multicast: prometheus.NewDesc(
103+
prometheus.BuildFQName(namespace, netvfSubsystem, "multicast_packets_total"),
104+
"Number of multicast packets received by the VF.",
105+
[]string{"device", "vf", "pci_address"}, nil,
106+
),
107+
receiveDropped: prometheus.NewDesc(
108+
prometheus.BuildFQName(namespace, netvfSubsystem, "receive_dropped_total"),
109+
"Number of dropped received packets by the VF.",
110+
[]string{"device", "vf", "pci_address"}, nil,
111+
),
112+
transmitDropped: prometheus.NewDesc(
113+
prometheus.BuildFQName(namespace, netvfSubsystem, "transmit_dropped_total"),
114+
"Number of dropped transmitted packets by the VF.",
115+
[]string{"device", "vf", "pci_address"}, nil,
116+
),
117+
}, nil
118+
}
119+
120+
func (c *netvfCollector) Update(ch chan<- prometheus.Metric) error {
121+
conn, err := rtnetlink.Dial(nil)
122+
if err != nil {
123+
return fmt.Errorf("failed to connect to rtnetlink: %w", err)
124+
}
125+
defer conn.Close()
126+
127+
links, err := conn.Link.ListWithVFInfo()
128+
if err != nil {
129+
return fmt.Errorf("failed to list interfaces with VF info: %w", err)
130+
}
131+
132+
vfCount := 0
133+
for _, link := range links {
134+
if link.Attributes == nil {
135+
continue
136+
}
137+
138+
// Skip interfaces without VFs
139+
if link.Attributes.NumVF == nil || *link.Attributes.NumVF == 0 {
140+
continue
141+
}
142+
143+
device := link.Attributes.Name
144+
145+
// Apply device filter
146+
if c.deviceFilter.ignored(device) {
147+
c.logger.Debug("Ignoring device", "device", device)
148+
continue
149+
}
150+
151+
for _, vf := range link.Attributes.VFInfoList {
152+
vfID := fmt.Sprintf("%d", vf.ID)
153+
154+
// Emit info metric with VF configuration
155+
mac := ""
156+
if vf.MAC != nil {
157+
mac = vf.MAC.String()
158+
}
159+
vlan := fmt.Sprintf("%d", vf.Vlan)
160+
linkState := vfLinkStateString(vf.LinkState)
161+
spoofCheck := fmt.Sprintf("%t", vf.SpoofCheck)
162+
trust := fmt.Sprintf("%t", vf.Trust)
163+
pciAddress := resolveVFPCIAddress(sysFilePath("class"), device, vf.ID)
164+
165+
ch <- prometheus.MustNewConstMetric(c.info, prometheus.GaugeValue, 1, device, vfID, mac, vlan, linkState, spoofCheck, trust, pciAddress)
166+
167+
// Emit stats metrics if available
168+
if vf.Stats == nil {
169+
c.logger.Debug("VF has no stats", "device", device, "vf", vf.ID)
170+
vfCount++
171+
continue
172+
}
173+
174+
stats := vf.Stats
175+
176+
ch <- prometheus.MustNewConstMetric(c.receivePackets, prometheus.CounterValue, float64(stats.RxPackets), device, vfID, pciAddress)
177+
ch <- prometheus.MustNewConstMetric(c.transmitPackets, prometheus.CounterValue, float64(stats.TxPackets), device, vfID, pciAddress)
178+
ch <- prometheus.MustNewConstMetric(c.receiveBytes, prometheus.CounterValue, float64(stats.RxBytes), device, vfID, pciAddress)
179+
ch <- prometheus.MustNewConstMetric(c.transmitBytes, prometheus.CounterValue, float64(stats.TxBytes), device, vfID, pciAddress)
180+
ch <- prometheus.MustNewConstMetric(c.broadcast, prometheus.CounterValue, float64(stats.Broadcast), device, vfID, pciAddress)
181+
ch <- prometheus.MustNewConstMetric(c.multicast, prometheus.CounterValue, float64(stats.Multicast), device, vfID, pciAddress)
182+
ch <- prometheus.MustNewConstMetric(c.receiveDropped, prometheus.CounterValue, float64(stats.RxDropped), device, vfID, pciAddress)
183+
ch <- prometheus.MustNewConstMetric(c.transmitDropped, prometheus.CounterValue, float64(stats.TxDropped), device, vfID, pciAddress)
184+
185+
vfCount++
186+
}
187+
}
188+
189+
if vfCount == 0 {
190+
return ErrNoData
191+
}
192+
193+
return nil
194+
}
195+
196+
func vfLinkStateString(state rtnetlink.VFLinkState) string {
197+
switch state {
198+
case rtnetlink.VFLinkStateAuto:
199+
return "auto"
200+
case rtnetlink.VFLinkStateEnable:
201+
return "enable"
202+
case rtnetlink.VFLinkStateDisable:
203+
return "disable"
204+
default:
205+
return "unknown"
206+
}
207+
}
208+
209+
// resolveVFPCIAddress resolves the PCI BDF address of a VF by reading the
210+
// sysfs virtfn symlink: <sysClassPath>/net/<pfDevice>/device/virtfn<vfID>.
211+
// Returns empty string if the symlink doesn't exist or can't be resolved.
212+
func resolveVFPCIAddress(sysClassPath, pfDevice string, vfID uint32) string {
213+
virtfnPath := filepath.Join(sysClassPath, "net", pfDevice, "device", fmt.Sprintf("virtfn%d", vfID))
214+
resolved, err := os.Readlink(virtfnPath)
215+
if err != nil {
216+
return ""
217+
}
218+
return filepath.Base(resolved)
219+
}
220+
221+
// vfMetrics holds parsed VF metrics for a single VF
222+
type vfMetrics struct {
223+
Device string
224+
VFID uint32
225+
MAC string
226+
Vlan uint32
227+
LinkState string
228+
SpoofCheck bool
229+
Trust bool
230+
PCIAddress string
231+
Stats *rtnetlink.VFStats
232+
}
233+
234+
// parseVFInfo extracts VF information from link messages for testing.
235+
// sysClassPath is the path to the sysfs class directory used to resolve VF PCI addresses.
236+
func parseVFInfo(links []rtnetlink.LinkMessage, filter *deviceFilter, logger *slog.Logger, sysClassPath string) []vfMetrics {
237+
var result []vfMetrics
238+
239+
for _, link := range links {
240+
if link.Attributes == nil {
241+
continue
242+
}
243+
244+
// Skip interfaces without VFs
245+
if link.Attributes.NumVF == nil || *link.Attributes.NumVF == 0 {
246+
continue
247+
}
248+
249+
device := link.Attributes.Name
250+
251+
// Apply device filter
252+
if filter.ignored(device) {
253+
logger.Debug("Ignoring device", "device", device)
254+
continue
255+
}
256+
257+
for _, vf := range link.Attributes.VFInfoList {
258+
mac := ""
259+
if vf.MAC != nil {
260+
mac = vf.MAC.String()
261+
}
262+
263+
result = append(result, vfMetrics{
264+
Device: device,
265+
VFID: vf.ID,
266+
MAC: mac,
267+
Vlan: vf.Vlan,
268+
LinkState: vfLinkStateString(vf.LinkState),
269+
SpoofCheck: vf.SpoofCheck,
270+
Trust: vf.Trust,
271+
PCIAddress: resolveVFPCIAddress(sysClassPath, device, vf.ID),
272+
Stats: vf.Stats,
273+
})
274+
}
275+
}
276+
277+
return result
278+
}

0 commit comments

Comments
 (0)