@@ -20,14 +20,23 @@ import (
2020 "fmt"
2121 "log/slog"
2222 "os"
23+ "path/filepath"
2324 "strconv"
25+ "strings"
2426
2527 "github.com/prometheus/client_golang/prometheus"
2628 "github.com/prometheus/procfs/sysfs"
2729)
2830
31+ // efaVendorID is the PCI vendor ID for AWS Elastic Fabric Adapter.
32+ // EFA devices register under /sys/class/infiniband but do NOT populate the
33+ // IB-spec port_xmit_data / port_rcv_data counters. Bytes/packets live in
34+ // hw_counters/{tx,rx}_{bytes,pkts} as raw values (no IB 4-octet scaling).
35+ const efaVendorID = "0x1d0f"
36+
2937type infinibandCollector struct {
3038 fs sysfs.FS
39+ sysPath string
3140 metricDescs map [string ]* prometheus.Desc
3241 logger * slog.Logger
3342 subsystem string
@@ -46,6 +55,7 @@ func NewInfiniBandCollector(logger *slog.Logger) (Collector, error) {
4655 if err != nil {
4756 return nil , fmt .Errorf ("failed to open sysfs: %w" , err )
4857 }
58+ i .sysPath = * sysPath
4959 i .logger = logger
5060
5161 // Detailed description for all metrics.
@@ -115,6 +125,21 @@ func NewInfiniBandCollector(logger *slog.Logger) (Collector, error) {
115125 "rx_read_requests_total" : "The number of received READ requests for the associated QPs." ,
116126 "rx_write_requests_total" : "The number of received WRITE requests for the associated QPs." ,
117127 "rx_icrc_encapsulated_errors_total" : "The number of RoCE packets with ICRC errors. This counter was added in MLNX_OFED 4.4 and kernel 4.19" ,
128+
129+ // EFA-specific hw_counters (vendor 0x1d0f). EFA NICs do not follow the
130+ // IB spec for port_xmit_data / port_rcv_data, so the IB code path leaves
131+ // port_data_*_bytes_total empty. The EFA branch in Update() fills those
132+ // from hw_counters/{tx,rx}_bytes and additionally emits the diagnostic
133+ // counters listed here under the efa_ prefix to avoid clashing with the
134+ // Mellanox-specific hw_counters above.
135+ "efa_rx_drops_total" : "EFA: packets dropped on receive (hw_counters/rx_drops)." ,
136+ "efa_retrans_packets_total" : "EFA: retransmitted packets (hw_counters/retrans_pkts)." ,
137+ "efa_retrans_bytes_total" : "EFA: retransmitted bytes (hw_counters/retrans_bytes)." ,
138+ "efa_retrans_timeout_events_total" : "EFA: retransmit timeout events (hw_counters/retrans_timeout_events)." ,
139+ "efa_unresponsive_remote_events_total" : "EFA: unresponsive remote events (hw_counters/unresponsive_remote_events)." ,
140+ "efa_impaired_remote_conn_events_total" : "EFA: impaired remote connection events (hw_counters/impaired_remote_conn_events)." ,
141+ "efa_rdma_read_bytes_total" : "EFA: RDMA read bytes (hw_counters/rdma_read_bytes)." ,
142+ "efa_rdma_write_bytes_total" : "EFA: RDMA write bytes (hw_counters/rdma_write_bytes)." ,
118143 }
119144
120145 i .metricDescs = make (map [string ]* prometheus.Desc )
@@ -142,6 +167,45 @@ func (c *infinibandCollector) pushCounter(ch chan<- prometheus.Metric, name stri
142167 }
143168}
144169
170+ // isEFADevice reports whether the InfiniBand-class device is an AWS EFA NIC
171+ // by checking its PCI vendor ID (0x1d0f). EFA NICs register under
172+ // /sys/class/infiniband but do not follow the IB spec for byte/packet
173+ // counters, so they need a different read path (hw_counters/).
174+ func (c * infinibandCollector ) isEFADevice (deviceName string ) bool {
175+ path := filepath .Join (c .sysPath , "class" , "infiniband" , deviceName , "device" , "vendor" )
176+ data , err := os .ReadFile (path )
177+ if err != nil {
178+ return false
179+ }
180+ return strings .TrimSpace (string (data )) == efaVendorID
181+ }
182+
183+ // readEFAHWCounter reads a single uint64 counter from
184+ // /sys/class/infiniband/<device>/ports/<port>/hw_counters/<counter>.
185+ // Returns nil if the file is missing or unparseable, so pushCounter can skip
186+ // emitting absent series.
187+ func (c * infinibandCollector ) readEFAHWCounter (deviceName string , port uint , counter string ) * uint64 {
188+ path := filepath .Join (c .sysPath , "class" , "infiniband" , deviceName ,
189+ "ports" , strconv .FormatUint (uint64 (port ), 10 ), "hw_counters" , counter )
190+ data , err := os .ReadFile (path )
191+ if err != nil {
192+ return nil
193+ }
194+ v , err := strconv .ParseUint (strings .TrimSpace (string (data )), 10 , 64 )
195+ if err != nil {
196+ c .logger .Debug ("failed to parse EFA hw_counter" ,
197+ "path" , path , "err" , err )
198+ return nil
199+ }
200+ return & v
201+ }
202+
203+ // pushEFACounter is a convenience wrapper that reads a single hw_counter and
204+ // pushes it as a Prometheus counter if present.
205+ func (c * infinibandCollector ) pushEFACounter (ch chan <- prometheus.Metric , metricName , counterFile , deviceName string , port uint , portStr string ) {
206+ c .pushCounter (ch , metricName , c .readEFAHWCounter (deviceName , port , counterFile ), deviceName , portStr )
207+ }
208+
145209func (c * infinibandCollector ) Update (ch chan <- prometheus.Metric ) error {
146210 devices , err := c .fs .InfiniBandClass ()
147211 if err != nil {
@@ -162,13 +226,42 @@ func (c *infinibandCollector) Update(ch chan<- prometheus.Metric) error {
162226 infoValue := 1.0
163227 ch <- prometheus .MustNewConstMetric (infoDesc , prometheus .GaugeValue , infoValue , device .Name , device .BoardID , device .FirmwareVersion , device .HCAType )
164228
229+ // EFA NICs share /sys/class/infiniband layout with IB but use
230+ // hw_counters/ for byte/packet stats (raw values, no IB ×4 scaling).
231+ // Detect once per device to avoid stat'ing /sys repeatedly per port.
232+ isEFA := c .isEFADevice (device .Name )
233+
165234 for _ , port := range device .Ports {
166235 portStr := strconv .FormatUint (uint64 (port .Port ), 10 )
167236
168237 c .pushMetric (ch , "state_id" , uint64 (port .StateID ), port .Name , portStr , prometheus .GaugeValue )
169238 c .pushMetric (ch , "physical_state_id" , uint64 (port .PhysStateID ), port .Name , portStr , prometheus .GaugeValue )
170239 c .pushMetric (ch , "rate_bytes_per_second" , port .Rate , port .Name , portStr , prometheus .GaugeValue )
171240
241+ if isEFA {
242+ // EFA path: port.Counters (from procfs/sysfs IB-spec parser)
243+ // is empty/zero, so we read hw_counters/ directly and emit
244+ // under the existing port_data_* / port_packets_* metric
245+ // names so existing IB dashboards transparently see EFA data.
246+ c .pushEFACounter (ch , "port_data_transmitted_bytes_total" , "tx_bytes" , port .Name , port .Port , portStr )
247+ c .pushEFACounter (ch , "port_data_received_bytes_total" , "rx_bytes" , port .Name , port .Port , portStr )
248+ c .pushEFACounter (ch , "port_packets_transmitted_total" , "tx_pkts" , port .Name , port .Port , portStr )
249+ c .pushEFACounter (ch , "port_packets_received_total" , "rx_pkts" , port .Name , port .Port , portStr )
250+
251+ // EFA-only diagnostic counters — emitted under efa_* names to
252+ // avoid colliding with IB-spec semantics. Useful for tracking
253+ // fabric retransmissions and unresponsive peers.
254+ c .pushEFACounter (ch , "efa_rx_drops_total" , "rx_drops" , port .Name , port .Port , portStr )
255+ c .pushEFACounter (ch , "efa_retrans_packets_total" , "retrans_pkts" , port .Name , port .Port , portStr )
256+ c .pushEFACounter (ch , "efa_retrans_bytes_total" , "retrans_bytes" , port .Name , port .Port , portStr )
257+ c .pushEFACounter (ch , "efa_retrans_timeout_events_total" , "retrans_timeout_events" , port .Name , port .Port , portStr )
258+ c .pushEFACounter (ch , "efa_unresponsive_remote_events_total" , "unresponsive_remote_events" , port .Name , port .Port , portStr )
259+ c .pushEFACounter (ch , "efa_impaired_remote_conn_events_total" , "impaired_remote_conn_events" , port .Name , port .Port , portStr )
260+ c .pushEFACounter (ch , "efa_rdma_read_bytes_total" , "rdma_read_bytes" , port .Name , port .Port , portStr )
261+ c .pushEFACounter (ch , "efa_rdma_write_bytes_total" , "rdma_write_bytes" , port .Name , port .Port , portStr )
262+ continue
263+ }
264+
172265 c .pushCounter (ch , "legacy_multicast_packets_received_total" , port .Counters .LegacyPortMulticastRcvPackets , port .Name , portStr )
173266 c .pushCounter (ch , "legacy_multicast_packets_transmitted_total" , port .Counters .LegacyPortMulticastXmitPackets , port .Name , portStr )
174267 c .pushCounter (ch , "legacy_data_received_bytes_total" , port .Counters .LegacyPortRcvData64 , port .Name , portStr )
0 commit comments