Skip to content

Commit 0c9821b

Browse files
authored
feat: add rootfs usage rate limits and observability (#2349)
1 parent ed8d312 commit 0c9821b

7 files changed

Lines changed: 206 additions & 30 deletions

File tree

packages/orchestrator/pkg/sandbox/fc/client.go

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -205,19 +205,20 @@ func (c *apiClient) setBootSource(ctx context.Context, kernelArgs string, kernel
205205
return err
206206
}
207207

208-
func (c *apiClient) setRootfsDrive(ctx context.Context, rootfsPath string, ioEngine *string) error {
209-
rootfs := "rootfs"
208+
func (c *apiClient) setRootfsDrive(ctx context.Context, rootfsPath string, ioEngine *string, rateLimiter *models.RateLimiter) error {
209+
driveID := rootfsDriveID
210210

211211
isRootDevice := true
212212
driversConfig := operations.PutGuestDriveByIDParams{
213213
Context: ctx,
214-
DriveID: rootfs,
214+
DriveID: driveID,
215215
Body: &models.Drive{
216-
DriveID: &rootfs,
216+
DriveID: &driveID,
217217
PathOnHost: rootfsPath,
218218
IsRootDevice: &isRootDevice,
219219
IsReadOnly: false,
220220
IoEngine: ioEngine,
221+
RateLimiter: rateLimiter,
221222
},
222223
}
223224

@@ -248,10 +249,10 @@ func buildTokenBucket(b TokenBucketConfig) *models.TokenBucket {
248249
return bucket
249250
}
250251

251-
// buildTxRateLimiter constructs a Firecracker RateLimiter from a TxRateLimiterConfig.
252+
// buildRateLimiter constructs a Firecracker RateLimiter from a RateLimiterConfig.
252253
// Either bucket is omitted when its BucketSize is < 0.
253254
// Returns nil only when both buckets are disabled.
254-
func buildTxRateLimiter(config TxRateLimiterConfig) *models.RateLimiter {
255+
func buildRateLimiter(config RateLimiterConfig) *models.RateLimiter {
255256
ops := buildTokenBucket(config.Ops)
256257
bw := buildTokenBucket(config.Bandwidth)
257258

@@ -266,8 +267,8 @@ func buildTxRateLimiter(config TxRateLimiterConfig) *models.RateLimiter {
266267
// Both buckets are disabled when their BucketSize < 0; if all are disabled an empty
267268
// RateLimiter is sent to reset any limit persisted in a snapshot.
268269
// This always sends a PATCH so snapshot-persisted limits are overwritten.
269-
func (c *apiClient) setTxRateLimit(ctx context.Context, ifaceID string, config TxRateLimiterConfig) error {
270-
limiter := buildTxRateLimiter(config)
270+
func (c *apiClient) setTxRateLimit(ctx context.Context, ifaceID string, config RateLimiterConfig) error {
271+
limiter := buildRateLimiter(config)
271272
if limiter == nil {
272273
limiter = &models.RateLimiter{} // empty = reset
273274
}
@@ -289,6 +290,33 @@ func (c *apiClient) setTxRateLimit(ctx context.Context, ifaceID string, config T
289290
return nil
290291
}
291292

293+
// setDriveRateLimit applies or clears a Firecracker VMM-level block device rate limit.
294+
// Both buckets are disabled when their BucketSize < 0; if all are disabled an empty
295+
// RateLimiter is sent to reset any limit persisted in a snapshot.
296+
// This always sends a PATCH so snapshot-persisted limits are overwritten.
297+
func (c *apiClient) setDriveRateLimit(ctx context.Context, driveID string, config RateLimiterConfig) error {
298+
limiter := buildRateLimiter(config)
299+
if limiter == nil {
300+
limiter = &models.RateLimiter{} // empty = reset
301+
}
302+
303+
params := operations.PatchGuestDriveByIDParams{
304+
Context: ctx,
305+
DriveID: driveID,
306+
Body: &models.PartialDrive{
307+
DriveID: &driveID,
308+
RateLimiter: limiter,
309+
},
310+
}
311+
312+
_, err := c.client.Operations.PatchGuestDriveByID(&params)
313+
if err != nil {
314+
return fmt.Errorf("error setting drive rate limit: %w", err)
315+
}
316+
317+
return nil
318+
}
319+
292320
func (c *apiClient) setNetworkInterface(ctx context.Context, ifaceID string, tapName string, tapMac string, txRateLimiter *models.RateLimiter) error {
293321
networkConfig := operations.PutGuestNetworkInterfaceByIDParams{
294322
Context: ctx,

packages/orchestrator/pkg/sandbox/fc/config.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ const (
1919

2020
SandboxRootfsFile = "rootfs.ext4"
2121

22+
rootfsDriveID = "rootfs"
23+
2224
entropyBytesSize int64 = 1024 // 1 KB
2325
entropyRefillTime int64 = 100
2426
entropyOneTimeBurst int64 = 0

packages/orchestrator/pkg/sandbox/fc/fc_metrics.go

Lines changed: 62 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ var (
3535
directionKey = attribute.Key("direction")
3636
attrTX = metric.WithAttributes(directionKey.String("tx"))
3737
attrRX = metric.WithAttributes(directionKey.String("rx"))
38+
attrRead = metric.WithAttributes(directionKey.String("read"))
39+
attrWrite = metric.WithAttributes(directionKey.String("write"))
3840

3941
// Counters — global totals, no sandbox_id to avoid high cardinality.
4042
fcNetFails = utils.Must(telemetry.GetCounter(fcMeter, telemetry.SandboxFCNetFails))
@@ -49,12 +51,23 @@ var (
4951
// TX-only: no RX equivalent in Firecracker metrics.
5052
fcNetRateLimiterEventCount = utils.Must(telemetry.GetHistogram(fcMeter, telemetry.SandboxFCNetRateLimiterEventCount))
5153
fcNetRemainingReqs = utils.Must(telemetry.GetHistogram(fcMeter, telemetry.SandboxFCNetRemainingReqs))
54+
55+
// Block counters.
56+
fcBlockFails = utils.Must(telemetry.GetCounter(fcMeter, telemetry.SandboxFCBlockFails))
57+
fcBlockNoAvailBuffer = utils.Must(telemetry.GetCounter(fcMeter, telemetry.SandboxFCBlockNoAvailBuffer))
58+
59+
// Block histograms.
60+
fcBlockBytes = utils.Must(telemetry.GetHistogram(fcMeter, telemetry.SandboxFCBlockBytes))
61+
fcBlockCount = utils.Must(telemetry.GetHistogram(fcMeter, telemetry.SandboxFCBlockCount))
62+
fcBlockRateLimiterThrottled = utils.Must(telemetry.GetHistogram(fcMeter, telemetry.SandboxFCBlockRateLimiterThrottled))
63+
fcBlockRateLimiterEventCount = utils.Must(telemetry.GetHistogram(fcMeter, telemetry.SandboxFCBlockRateLimiterEventCount))
64+
fcBlockIOEngineThrottled = utils.Must(telemetry.GetHistogram(fcMeter, telemetry.SandboxFCBlockIOEngineThrottled))
65+
fcBlockRemainingReqs = utils.Must(telemetry.GetHistogram(fcMeter, telemetry.SandboxFCBlockRemainingReqs))
5266
)
5367

54-
// firecrackerNetMetrics holds the Firecracker net metrics fields we care about.
55-
// Firecracker serializes SharedIncMetric fields as per-flush deltas (not cumulative totals):
56-
// each JSON line contains the increment since the previous flush.
57-
// Flush interval defaults to 60 s; additional flushes are triggered by FlushMetrics API calls.
68+
// firecrackerNetMetrics is a subset of Firecracker's NetDeviceMetrics we export via OTEL.
69+
// Full metric list: https://github.com/firecracker-microvm/firecracker/blob/main/docs/metrics.md
70+
// Values are per-flush deltas; flush defaults to 60 s, additional flushes via FlushMetrics API.
5871
type firecrackerNetMetrics struct {
5972
// TX
6073
TxBytesCount uint64 `json:"tx_bytes_count"`
@@ -76,13 +89,31 @@ type firecrackerNetMetrics struct {
7689
TapReadFails uint64 `json:"tap_read_fails"`
7790
}
7891

92+
// firecrackerBlockMetrics is a subset of Firecracker's BlockDeviceMetrics we export via OTEL.
93+
// Full metric list: https://github.com/firecracker-microvm/firecracker/blob/main/docs/metrics.md
94+
// Values are per-flush deltas. The aggregate "block" key sums over all drives; we only have one (rootfs).
95+
type firecrackerBlockMetrics struct {
96+
ReadBytes uint64 `json:"read_bytes"`
97+
WriteBytes uint64 `json:"write_bytes"`
98+
ReadCount uint64 `json:"read_count"`
99+
WriteCount uint64 `json:"write_count"`
100+
RateLimiterThrottledEvents uint64 `json:"rate_limiter_throttled_events"`
101+
RateLimiterEventCount uint64 `json:"rate_limiter_event_count"`
102+
IOEngineThrottledEvents uint64 `json:"io_engine_throttled_events"`
103+
NoAvailBuffer uint64 `json:"no_avail_buffer"`
104+
ExecuteFails uint64 `json:"execute_fails"`
105+
EventFails uint64 `json:"event_fails"`
106+
RemainingReqsCount uint64 `json:"remaining_reqs_count"`
107+
}
108+
79109
// firecrackerMetrics is the top-level structure of one Firecracker metrics JSON line.
80110
type firecrackerMetrics struct {
81-
Net firecrackerNetMetrics `json:"net"`
111+
Net firecrackerNetMetrics `json:"net"`
112+
Block firecrackerBlockMetrics `json:"block"`
82113
}
83114

84115
// startMetricsReader opens the metrics FIFO and starts a goroutine that reads
85-
// Firecracker metrics lines and exports net device metrics via OTEL.
116+
// Firecracker metrics lines and exports metrics via OTEL.
86117
// It must be called before setMetrics so that the FIFO is open for reading
87118
// before Firecracker opens the write end in response to PUT /metrics.
88119
func (p *Process) startMetricsReader(ctx context.Context) {
@@ -204,6 +235,31 @@ func (p *Process) startMetricsReader(ctx context.Context) {
204235
if n.TapReadFails > 0 {
205236
fcNetTapIOFails.Add(ctx, int64(n.TapReadFails), attrRX)
206237
}
238+
239+
// Block histograms — values are already per-flush deltas from Firecracker.
240+
b := &m.Block
241+
242+
fcBlockBytes.Record(ctx, int64(b.ReadBytes), attrRead)
243+
fcBlockBytes.Record(ctx, int64(b.WriteBytes), attrWrite)
244+
fcBlockCount.Record(ctx, int64(b.ReadCount), attrRead)
245+
fcBlockCount.Record(ctx, int64(b.WriteCount), attrWrite)
246+
fcBlockRateLimiterEventCount.Record(ctx, int64(b.RateLimiterEventCount))
247+
fcBlockRemainingReqs.Record(ctx, int64(b.RemainingReqsCount))
248+
249+
if b.RateLimiterThrottledEvents > 0 {
250+
fcBlockRateLimiterThrottled.Record(ctx, int64(b.RateLimiterThrottledEvents))
251+
}
252+
if b.IOEngineThrottledEvents > 0 {
253+
fcBlockIOEngineThrottled.Record(ctx, int64(b.IOEngineThrottledEvents))
254+
}
255+
256+
// Block global error/event counters.
257+
if b.ExecuteFails > 0 || b.EventFails > 0 {
258+
fcBlockFails.Add(ctx, int64(b.ExecuteFails)+int64(b.EventFails))
259+
}
260+
if b.NoAvailBuffer > 0 {
261+
fcBlockNoAvailBuffer.Add(ctx, int64(b.NoAvailBuffer))
262+
}
207263
}
208264

209265
if err := scanner.Err(); err != nil {

packages/orchestrator/pkg/sandbox/fc/process.go

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -108,9 +108,9 @@ type TokenBucketConfig struct {
108108
RefillTimeMs int64
109109
}
110110

111-
// TxRateLimiterConfig holds TX rate limit parameters for a VM's network interface.
111+
// RateLimiterConfig holds rate limit parameters for a Firecracker device (network or block).
112112
// Mirrors the Firecracker RateLimiter structure: two independent token buckets.
113-
type TxRateLimiterConfig struct {
113+
type RateLimiterConfig struct {
114114
Ops TokenBucketConfig // packets; effective rate = BucketSize * 1000 / RefillTimeMs ops/s
115115
Bandwidth TokenBucketConfig // bytes; effective rate = BucketSize * 1000 / RefillTimeMs bytes/s
116116
}
@@ -300,7 +300,8 @@ func (p *Process) Create(
300300
memoryMB int64,
301301
hugePages bool,
302302
options ProcessOptions,
303-
txRateLimit TxRateLimiterConfig,
303+
txRateLimit RateLimiterConfig,
304+
driveRateLimit RateLimiterConfig,
304305
cgroupFD int,
305306
) error {
306307
ctx, childSpan := tracer.Start(ctx, "create-fc")
@@ -405,15 +406,15 @@ func (p *Process) Create(
405406
}
406407
telemetry.ReportEvent(ctx, "symlinked rootfs")
407408

408-
err = p.client.setRootfsDrive(ctx, p.rootfsPath, options.IoEngine)
409+
err = p.client.setRootfsDrive(ctx, p.rootfsPath, options.IoEngine, buildRateLimiter(driveRateLimit))
409410
if err != nil {
410411
fcStopErr := p.Stop(ctx)
411412

412413
return errors.Join(fmt.Errorf("error setting fc drivers config: %w", err), fcStopErr)
413414
}
414415
telemetry.ReportEvent(ctx, "set fc drivers config")
415416

416-
err = p.client.setNetworkInterface(ctx, p.slot.VpeerName(), p.slot.TapName(), p.slot.TapMAC(), buildTxRateLimiter(txRateLimit))
417+
err = p.client.setNetworkInterface(ctx, p.slot.VpeerName(), p.slot.TapName(), p.slot.TapMAC(), buildRateLimiter(txRateLimit))
417418
if err != nil {
418419
fcStopErr := p.Stop(ctx)
419420

@@ -457,7 +458,8 @@ func (p *Process) Resume(
457458
uffdReady chan struct{},
458459
accessToken *string,
459460
cgroupFD int,
460-
txRateLimit TxRateLimiterConfig,
461+
txRateLimit RateLimiterConfig,
462+
driveRateLimit RateLimiterConfig,
461463
) error {
462464
ctx, span := tracer.Start(ctx, "resume-fc")
463465
defer span.End()
@@ -551,15 +553,22 @@ func (p *Process) Resume(
551553
return errors.Join(fmt.Errorf("error loading snapshot: %w", err), fcStopErr)
552554
}
553555

554-
// Always apply/reset the TX rate limit before resuming so any rate limit
555-
// persisted in the snapshot is overwritten by the current config.
556+
// Always apply/reset rate limits before resuming so any limits
557+
// persisted in the snapshot are overwritten by the current config.
556558
if setErr := p.client.setTxRateLimit(ctx, p.slot.VpeerName(), txRateLimit); setErr != nil {
557559
fcStopErr := p.Stop(ctx)
558560

559561
return errors.Join(fmt.Errorf("error setting TX rate limit: %w", setErr), fcStopErr)
560562
}
561563
telemetry.ReportEvent(ctx, "configured tx rate limit")
562564

565+
if setErr := p.client.setDriveRateLimit(ctx, rootfsDriveID, driveRateLimit); setErr != nil {
566+
fcStopErr := p.Stop(ctx)
567+
568+
return errors.Join(fmt.Errorf("error setting drive rate limit: %w", setErr), fcStopErr)
569+
}
570+
telemetry.ReportEvent(ctx, "configured drive rate limit")
571+
563572
err = p.client.resumeVM(ctx)
564573
if err != nil {
565574
fcStopErr := p.Stop(ctx)

packages/orchestrator/pkg/sandbox/sandbox.go

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,7 @@ func (f *Factory) CreateSandbox(
430430
}
431431

432432
throttleConfig := featureflags.GetTCPFirewallEgressThrottleConfig(ctx, f.featureFlags)
433+
driveThrottleConfig := featureflags.GetBlockDriveThrottleConfig(ctx, f.featureFlags)
433434

434435
telemetry.ReportEvent(ctx, "created fc client")
435436

@@ -500,10 +501,14 @@ func (f *Factory) CreateSandbox(
500501
config.RamMB,
501502
config.HugePages,
502503
processOptions,
503-
fc.TxRateLimiterConfig{
504+
fc.RateLimiterConfig{
504505
Ops: fc.TokenBucketConfig(throttleConfig.Ops),
505506
Bandwidth: fc.TokenBucketConfig(throttleConfig.Bandwidth),
506507
},
508+
fc.RateLimiterConfig{
509+
Ops: fc.TokenBucketConfig(driveThrottleConfig.Ops),
510+
Bandwidth: fc.TokenBucketConfig(driveThrottleConfig.Bandwidth),
511+
},
507512
cgroupFD,
508513
)
509514
if err != nil {
@@ -756,6 +761,7 @@ func (f *Factory) ResumeSandbox(
756761
}
757762

758763
resumeThrottleConfig := featureflags.GetTCPFirewallEgressThrottleConfig(ctx, f.featureFlags)
764+
resumeDriveThrottleConfig := featureflags.GetBlockDriveThrottleConfig(ctx, f.featureFlags)
759765

760766
telemetry.ReportEvent(ctx, "created FC process")
761767

@@ -861,10 +867,14 @@ func (f *Factory) ResumeSandbox(
861867
fcUffd.Ready(),
862868
config.Envd.AccessToken,
863869
cgroupFD,
864-
fc.TxRateLimiterConfig{
870+
fc.RateLimiterConfig{
865871
Ops: fc.TokenBucketConfig(resumeThrottleConfig.Ops),
866872
Bandwidth: fc.TokenBucketConfig(resumeThrottleConfig.Bandwidth),
867873
},
874+
fc.RateLimiterConfig{
875+
Ops: fc.TokenBucketConfig(resumeDriveThrottleConfig.Ops),
876+
Bandwidth: fc.TokenBucketConfig(resumeDriveThrottleConfig.Bandwidth),
877+
},
868878
)
869879

870880
if fcStartErr != nil {

packages/shared/pkg/featureflags/flags.go

Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -352,10 +352,8 @@ type TCPFirewallEgressThrottleConfigValue struct {
352352
Bandwidth TokenBucketConfig
353353
}
354354

355-
// GetTCPFirewallEgressThrottleConfig fetches and parses the TCPFirewallEgressThrottleConfig flag.
356-
func GetTCPFirewallEgressThrottleConfig(ctx context.Context, ff *Client) TCPFirewallEgressThrottleConfigValue {
357-
value := ff.JSONFlag(ctx, TCPFirewallEgressThrottleConfig)
358-
355+
// parseThrottleBuckets parses "ops" and "bandwidth" token bucket configs from a JSON flag value.
356+
func parseThrottleBuckets(value ldvalue.Value) (ops, bandwidth TokenBucketConfig) {
359357
parseBucket := func(key string) TokenBucketConfig {
360358
b := value.GetByKey(key)
361359
if b.IsNull() {
@@ -375,8 +373,45 @@ func GetTCPFirewallEgressThrottleConfig(ctx context.Context, ff *Client) TCPFire
375373
}
376374
}
377375

376+
return parseBucket("ops"), parseBucket("bandwidth")
377+
}
378+
379+
// GetTCPFirewallEgressThrottleConfig fetches and parses the TCPFirewallEgressThrottleConfig flag.
380+
func GetTCPFirewallEgressThrottleConfig(ctx context.Context, ff *Client) TCPFirewallEgressThrottleConfigValue {
381+
value := ff.JSONFlag(ctx, TCPFirewallEgressThrottleConfig)
382+
ops, bw := parseThrottleBuckets(value)
383+
378384
return TCPFirewallEgressThrottleConfigValue{
379-
Ops: parseBucket("ops"),
380-
Bandwidth: parseBucket("bandwidth"),
385+
Ops: ops,
386+
Bandwidth: bw,
387+
}
388+
}
389+
390+
// BlockDriveThrottleConfig controls per-sandbox block device (disk) throttling via Firecracker's
391+
// VMM-level token bucket rate limiters on the rootfs drive.
392+
// Structure mirrors the Firecracker RateLimiter API: two independent token buckets.
393+
// Set bucketSize to -1 to disable a bucket.
394+
//
395+
// Ops bucket (IOPS): effective rate = ops.bucketSize * 1000 / ops.refillTimeMs ops/s.
396+
// Bandwidth bucket (bytes): effective rate = bandwidth.bucketSize * 1000 / bandwidth.refillTimeMs bytes/s.
397+
var BlockDriveThrottleConfig = newJSONFlag("block-drive-throttle-config", ldvalue.FromJSONMarshal(map[string]any{
398+
"ops": map[string]any{"bucketSize": -1, "oneTimeBurst": 0, "refillTimeMs": 1000},
399+
"bandwidth": map[string]any{"bucketSize": -1, "oneTimeBurst": 0, "refillTimeMs": 1000},
400+
}))
401+
402+
// BlockDriveThrottleConfigValue holds the parsed values of BlockDriveThrottleConfig.
403+
type BlockDriveThrottleConfigValue struct {
404+
Ops TokenBucketConfig
405+
Bandwidth TokenBucketConfig
406+
}
407+
408+
// GetBlockDriveThrottleConfig fetches and parses the BlockDriveThrottleConfig flag.
409+
func GetBlockDriveThrottleConfig(ctx context.Context, ff *Client) BlockDriveThrottleConfigValue {
410+
value := ff.JSONFlag(ctx, BlockDriveThrottleConfig)
411+
ops, bw := parseThrottleBuckets(value)
412+
413+
return BlockDriveThrottleConfigValue{
414+
Ops: ops,
415+
Bandwidth: bw,
381416
}
382417
}

0 commit comments

Comments
 (0)