Skip to content
2 changes: 1 addition & 1 deletion packages/orchestrator/benchmarks/benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ func (tc *testContainer) testOneItem(b *testing.B, buildID, kernelVersion, fcVer
KernelVersion: kernelVersion,
FirecrackerVersion: fcVersion,
})
snap, err := sbx.Pause(ctx, templateMetadata)
snap, err := sbx.Pause(ctx, templateMetadata, sandbox.SnapshotUseCasePause)
require.NoError(b, err)
require.NotNil(b, snap)

Expand Down
2 changes: 1 addition & 1 deletion packages/orchestrator/cmd/resume-build/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -640,7 +640,7 @@ func (r *runner) pauseOnce(ctx context.Context, opts pauseOptions, verbose bool)

// Pause and create snapshot
pauseStart := time.Now()
snapshot, err := sbx.Pause(ctx, newMeta)
snapshot, err := sbx.Pause(ctx, newMeta, sandbox.SnapshotUseCasePause)
Comment thread
ValentaTomas marked this conversation as resolved.
pauseDur := time.Since(pauseStart)
totalDur := time.Since(t0)

Expand Down
5 changes: 5 additions & 0 deletions packages/orchestrator/pkg/sandbox/sandbox.go
Original file line number Diff line number Diff line change
Expand Up @@ -1032,6 +1032,7 @@ func (s *Sandbox) Shutdown(ctx context.Context) error {
func (s *Sandbox) Pause(
ctx context.Context,
m metadata.Template,
useCase SnapshotUseCase,
) (st *Snapshot, e error) {
ctx, span := tracer.Start(ctx, "sandbox-snapshot")
defer span.End()
Expand Down Expand Up @@ -1092,6 +1093,7 @@ func (s *Sandbox) Pause(
if err != nil {
return nil, fmt.Errorf("failed to get memfile metadata: %w", err)
}
recordSnapshotDiff(ctx, "memfile", memfileDiffMetadata, originalMemfile.Header(), useCase)
Comment thread
ValentaTomas marked this conversation as resolved.

// Start POSTPROCESSING
memfileDiff, memfileDiffHeader, err := pauseProcessMemory(
Expand All @@ -1116,6 +1118,7 @@ func (s *Sandbox) Pause(
closeHook: s.Close,
},
s.config.DefaultCacheDir,
useCase,
)
if err != nil {
return nil, fmt.Errorf("error while post processing: %w", err)
Expand Down Expand Up @@ -1200,6 +1203,7 @@ func pauseProcessRootfs(
originalHeader *header.Header,
diffCreator DiffCreator,
cacheDir string,
useCase SnapshotUseCase,
) (d build.Diff, h *header.Header, e error) {
ctx, span := tracer.Start(ctx, "process-rootfs")
defer span.End()
Expand All @@ -1216,6 +1220,7 @@ func pauseProcessRootfs(
return nil, nil, fmt.Errorf("error creating diff: %w", err)
}
telemetry.ReportEvent(ctx, "exported rootfs")
recordSnapshotDiff(ctx, "rootfs", rootfsDiffMetadata, originalHeader, useCase)

rootfsDiff, err := rootfsDiffFile.CloseToDiff(int64(originalHeader.Metadata.BlockSize))
if err != nil {
Expand Down
77 changes: 77 additions & 0 deletions packages/orchestrator/pkg/sandbox/snapshot_metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package sandbox

import (
"context"

"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"

"github.com/e2b-dev/infra/packages/shared/pkg/storage/header"
"github.com/e2b-dev/infra/packages/shared/pkg/telemetry"
"github.com/e2b-dev/infra/packages/shared/pkg/utils"
)

var (
snapshotDiffBytes = utils.Must(telemetry.GetHistogram(meter, telemetry.SnapshotDiffBytes))
snapshotDiffRatioBp = utils.Must(telemetry.GetHistogram(meter, telemetry.SnapshotDiffRatioBp))
snapshotTotalBytes = utils.Must(telemetry.GetHistogram(meter, telemetry.SnapshotTotalBytes))
)

type SnapshotUseCase string

const (
SnapshotUseCasePause SnapshotUseCase = "pause"
SnapshotUseCaseBuild SnapshotUseCase = "build"
)

func recordSnapshotDiff(
ctx context.Context,
fileType string,
dm *header.DiffMetadata,
original *header.Header,
useCase SnapshotUseCase,
) {
if dm == nil || original == nil || original.Metadata == nil {
return
}
bs := int64(original.Metadata.BlockSize)
total := int64(original.Metadata.Size)

ft := attribute.String("file_type", fileType)
uc := attribute.String("use_case", string(useCase))

snapshotTotalBytes.Record(ctx, total, metric.WithAttributes(ft, uc))

var dirtyBytes, emptyBytes int64
if dm.Dirty != nil {
dirtyBytes = int64(dm.Dirty.GetCardinality()) * bs
}
if dm.Empty != nil {
emptyBytes = int64(dm.Empty.GetCardinality()) * bs
}
for kind, b := range map[string]int64{
"dirty": dirtyBytes,
"empty": emptyBytes,
} {
attrs := metric.WithAttributes(ft, attribute.String("kind", kind), uc)
snapshotDiffBytes.Record(ctx, b, attrs)
snapshotDiffRatioBp.Record(ctx, ratioBp(b, total), attrs)
}
}

// ratioBp returns num/denom in basis points (10000 = 100.00%) so we keep
// sub-percent resolution. Grafana panels divide by 100 to display percent.
func ratioBp(num, denom int64) int64 {
if denom <= 0 {
return 0
}
bp := num * 10000 / denom
if bp < 0 {
return 0
}
if bp > 10000 {
return 10000
}

return bp
}
2 changes: 1 addition & 1 deletion packages/orchestrator/pkg/server/sandboxes.go
Original file line number Diff line number Diff line change
Expand Up @@ -746,7 +746,7 @@ func (s *Server) snapshotAndCacheSandbox(
FirecrackerVersion: sbx.Config.FirecrackerConfig.FirecrackerVersion,
})

snapshot, err := sbx.Pause(ctx, meta)
snapshot, err := sbx.Pause(ctx, meta, sandbox.SnapshotUseCasePause)
if err != nil {
return nil, fmt.Errorf("error snapshotting sandbox: %w", err)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,7 @@ func (lb *LayerExecutor) PauseAndUpload(
snapshot, err := sbx.Pause(
ctx,
meta,
sandbox.SnapshotUseCaseBuild,
)
if err != nil {
return fmt.Errorf("error processing vm: %w", err)
Expand Down
12 changes: 12 additions & 0 deletions packages/shared/pkg/telemetry/meters.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,10 @@ const (
SandboxFCBlockRateLimiterEventCount HistogramType = "orchestrator.sandbox.fc.block.rate_limiter_event_count"
SandboxFCBlockIOEngineThrottled HistogramType = "orchestrator.sandbox.fc.block.io_engine_throttled"
SandboxFCBlockRemainingReqs HistogramType = "orchestrator.sandbox.fc.block.remaining_reqs"

SnapshotDiffBytes HistogramType = "orchestrator.sandbox.snapshot.diff.bytes"
SnapshotDiffRatioBp HistogramType = "orchestrator.sandbox.snapshot.diff.ratio_bp"
SnapshotTotalBytes HistogramType = "orchestrator.sandbox.snapshot.total.bytes"
)

const (
Expand Down Expand Up @@ -352,6 +356,10 @@ var histogramDesc = map[HistogramType]string{
SandboxFCBlockRateLimiterEventCount: "Distribution of Firecracker VMM block rate limiter events per metrics flush",
SandboxFCBlockIOEngineThrottled: "Distribution of Firecracker VMM block ops throttled by io_uring engine per metrics flush",
SandboxFCBlockRemainingReqs: "Distribution of Firecracker VMM block queue remaining-request events per metrics flush",

SnapshotDiffBytes: "Per-snapshot dirty/empty bytes per file",
SnapshotDiffRatioBp: "Per-snapshot dirty/empty as fraction of total mapped size, in basis points (10000=100%)",
SnapshotTotalBytes: "Per-snapshot total mapped size of the file",
}

var histogramUnits = map[HistogramType]string{
Expand Down Expand Up @@ -382,6 +390,10 @@ var histogramUnits = map[HistogramType]string{
SandboxFCBlockRateLimiterEventCount: "{event}",
SandboxFCBlockIOEngineThrottled: "{op}",
SandboxFCBlockRemainingReqs: "{event}",

SnapshotDiffBytes: "{By}",
SnapshotDiffRatioBp: "{1}",
SnapshotTotalBytes: "{By}",
}

func GetHistogram(meter metric.Meter, name HistogramType) (metric.Int64Histogram, error) {
Expand Down
Loading