3535 directionKey = attribute .Key ("direction" )
3636 attrTX = metric .WithAttributes (directionKey .String ("tx" ))
3737 attrRX = metric .WithAttributes (directionKey .String ("rx" ))
38+ attrRead = metric .WithAttributes (directionKey .String ("read" ))
39+ attrWrite = metric .WithAttributes (directionKey .String ("write" ))
3840
3941 // Counters — global totals, no sandbox_id to avoid high cardinality.
4042 fcNetFails = utils .Must (telemetry .GetCounter (fcMeter , telemetry .SandboxFCNetFails ))
@@ -49,12 +51,23 @@ var (
4951 // TX-only: no RX equivalent in Firecracker metrics.
5052 fcNetRateLimiterEventCount = utils .Must (telemetry .GetHistogram (fcMeter , telemetry .SandboxFCNetRateLimiterEventCount ))
5153 fcNetRemainingReqs = utils .Must (telemetry .GetHistogram (fcMeter , telemetry .SandboxFCNetRemainingReqs ))
54+
55+ // Block counters.
56+ fcBlockFails = utils .Must (telemetry .GetCounter (fcMeter , telemetry .SandboxFCBlockFails ))
57+ fcBlockNoAvailBuffer = utils .Must (telemetry .GetCounter (fcMeter , telemetry .SandboxFCBlockNoAvailBuffer ))
58+
59+ // Block histograms.
60+ fcBlockBytes = utils .Must (telemetry .GetHistogram (fcMeter , telemetry .SandboxFCBlockBytes ))
61+ fcBlockCount = utils .Must (telemetry .GetHistogram (fcMeter , telemetry .SandboxFCBlockCount ))
62+ fcBlockRateLimiterThrottled = utils .Must (telemetry .GetHistogram (fcMeter , telemetry .SandboxFCBlockRateLimiterThrottled ))
63+ fcBlockRateLimiterEventCount = utils .Must (telemetry .GetHistogram (fcMeter , telemetry .SandboxFCBlockRateLimiterEventCount ))
64+ fcBlockIOEngineThrottled = utils .Must (telemetry .GetHistogram (fcMeter , telemetry .SandboxFCBlockIOEngineThrottled ))
65+ fcBlockRemainingReqs = utils .Must (telemetry .GetHistogram (fcMeter , telemetry .SandboxFCBlockRemainingReqs ))
5266)
5367
54- // firecrackerNetMetrics holds the Firecracker net metrics fields we care about.
55- // Firecracker serializes SharedIncMetric fields as per-flush deltas (not cumulative totals):
56- // each JSON line contains the increment since the previous flush.
57- // Flush interval defaults to 60 s; additional flushes are triggered by FlushMetrics API calls.
68+ // firecrackerNetMetrics is a subset of Firecracker's NetDeviceMetrics we export via OTEL.
69+ // Full metric list: https://github.com/firecracker-microvm/firecracker/blob/main/docs/metrics.md
70+ // Values are per-flush deltas; flush defaults to 60 s, additional flushes via FlushMetrics API.
5871type firecrackerNetMetrics struct {
5972 // TX
6073 TxBytesCount uint64 `json:"tx_bytes_count"`
@@ -76,13 +89,31 @@ type firecrackerNetMetrics struct {
7689 TapReadFails uint64 `json:"tap_read_fails"`
7790}
7891
92+ // firecrackerBlockMetrics is a subset of Firecracker's BlockDeviceMetrics we export via OTEL.
93+ // Full metric list: https://github.com/firecracker-microvm/firecracker/blob/main/docs/metrics.md
94+ // Values are per-flush deltas. The aggregate "block" key sums over all drives; we only have one (rootfs).
95+ type firecrackerBlockMetrics struct {
96+ ReadBytes uint64 `json:"read_bytes"`
97+ WriteBytes uint64 `json:"write_bytes"`
98+ ReadCount uint64 `json:"read_count"`
99+ WriteCount uint64 `json:"write_count"`
100+ RateLimiterThrottledEvents uint64 `json:"rate_limiter_throttled_events"`
101+ RateLimiterEventCount uint64 `json:"rate_limiter_event_count"`
102+ IOEngineThrottledEvents uint64 `json:"io_engine_throttled_events"`
103+ NoAvailBuffer uint64 `json:"no_avail_buffer"`
104+ ExecuteFails uint64 `json:"execute_fails"`
105+ EventFails uint64 `json:"event_fails"`
106+ RemainingReqsCount uint64 `json:"remaining_reqs_count"`
107+ }
108+
79109// firecrackerMetrics is the top-level structure of one Firecracker metrics JSON line.
80110type firecrackerMetrics struct {
81- Net firecrackerNetMetrics `json:"net"`
111+ Net firecrackerNetMetrics `json:"net"`
112+ Block firecrackerBlockMetrics `json:"block"`
82113}
83114
84115// startMetricsReader opens the metrics FIFO and starts a goroutine that reads
85- // Firecracker metrics lines and exports net device metrics via OTEL.
116+ // Firecracker metrics lines and exports metrics via OTEL.
86117// It must be called before setMetrics so that the FIFO is open for reading
87118// before Firecracker opens the write end in response to PUT /metrics.
88119func (p * Process ) startMetricsReader (ctx context.Context ) {
@@ -204,6 +235,31 @@ func (p *Process) startMetricsReader(ctx context.Context) {
204235 if n .TapReadFails > 0 {
205236 fcNetTapIOFails .Add (ctx , int64 (n .TapReadFails ), attrRX )
206237 }
238+
239+ // Block histograms — values are already per-flush deltas from Firecracker.
240+ b := & m .Block
241+
242+ fcBlockBytes .Record (ctx , int64 (b .ReadBytes ), attrRead )
243+ fcBlockBytes .Record (ctx , int64 (b .WriteBytes ), attrWrite )
244+ fcBlockCount .Record (ctx , int64 (b .ReadCount ), attrRead )
245+ fcBlockCount .Record (ctx , int64 (b .WriteCount ), attrWrite )
246+ fcBlockRateLimiterEventCount .Record (ctx , int64 (b .RateLimiterEventCount ))
247+ fcBlockRemainingReqs .Record (ctx , int64 (b .RemainingReqsCount ))
248+
249+ if b .RateLimiterThrottledEvents > 0 {
250+ fcBlockRateLimiterThrottled .Record (ctx , int64 (b .RateLimiterThrottledEvents ))
251+ }
252+ if b .IOEngineThrottledEvents > 0 {
253+ fcBlockIOEngineThrottled .Record (ctx , int64 (b .IOEngineThrottledEvents ))
254+ }
255+
256+ // Block global error/event counters.
257+ if b .ExecuteFails > 0 || b .EventFails > 0 {
258+ fcBlockFails .Add (ctx , int64 (b .ExecuteFails )+ int64 (b .EventFails ))
259+ }
260+ if b .NoAvailBuffer > 0 {
261+ fcBlockNoAvailBuffer .Add (ctx , int64 (b .NoAvailBuffer ))
262+ }
207263 }
208264
209265 if err := scanner .Err (); err != nil {
0 commit comments