|
| 1 | +// run with something like: |
| 2 | +// |
| 3 | +// sudo `which go` test -benchtime=15s -bench=. -v |
| 4 | +// sudo modprobe nbd |
| 5 | +// echo 1024 | sudo tee /proc/sys/vm/nr_hugepages |
| 6 | +package main |
| 7 | + |
| 8 | +import ( |
| 9 | + "net/http" |
| 10 | + "net/url" |
| 11 | + "os" |
| 12 | + "path/filepath" |
| 13 | + "testing" |
| 14 | + "time" |
| 15 | + |
| 16 | + "github.com/google/uuid" |
| 17 | + "github.com/stretchr/testify/assert" |
| 18 | + "github.com/stretchr/testify/require" |
| 19 | + "go.opentelemetry.io/otel" |
| 20 | + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" |
| 21 | + "go.opentelemetry.io/otel/metric/noop" |
| 22 | + "go.uber.org/zap" |
| 23 | + |
| 24 | + "github.com/e2b-dev/infra/packages/orchestrator/internal/proxy" |
| 25 | + "github.com/e2b-dev/infra/packages/orchestrator/internal/sandbox" |
| 26 | + blockmetrics "github.com/e2b-dev/infra/packages/orchestrator/internal/sandbox/block/metrics" |
| 27 | + "github.com/e2b-dev/infra/packages/orchestrator/internal/sandbox/nbd" |
| 28 | + "github.com/e2b-dev/infra/packages/orchestrator/internal/sandbox/network" |
| 29 | + "github.com/e2b-dev/infra/packages/orchestrator/internal/sandbox/template" |
| 30 | + "github.com/e2b-dev/infra/packages/orchestrator/internal/template/build" |
| 31 | + "github.com/e2b-dev/infra/packages/orchestrator/internal/template/build/config" |
| 32 | + "github.com/e2b-dev/infra/packages/orchestrator/internal/template/build/metrics" |
| 33 | + artifactsregistry "github.com/e2b-dev/infra/packages/shared/pkg/artifacts-registry" |
| 34 | + "github.com/e2b-dev/infra/packages/shared/pkg/dockerhub" |
| 35 | + featureflags "github.com/e2b-dev/infra/packages/shared/pkg/feature-flags" |
| 36 | + "github.com/e2b-dev/infra/packages/shared/pkg/limit" |
| 37 | + sbxlogger "github.com/e2b-dev/infra/packages/shared/pkg/logger/sandbox" |
| 38 | + "github.com/e2b-dev/infra/packages/shared/pkg/smap" |
| 39 | + "github.com/e2b-dev/infra/packages/shared/pkg/storage" |
| 40 | + "github.com/e2b-dev/infra/packages/shared/pkg/telemetry" |
| 41 | + "github.com/e2b-dev/infra/packages/shared/pkg/utils" |
| 42 | +) |
| 43 | + |
| 44 | +var tracer = otel.Tracer("github.com/e2b-dev/infra/packages/orchestrator") |
| 45 | + |
| 46 | +func BenchmarkBaseImageLaunch(b *testing.B) { |
| 47 | + if os.Geteuid() != 0 { |
| 48 | + b.Skip("skipping benchmark because not running as root") |
| 49 | + } |
| 50 | + |
| 51 | + // test configuration |
| 52 | + const ( |
| 53 | + testType = onlyStart |
| 54 | + baseImage = "e2bdev/base" |
| 55 | + kernelVersion = "vmlinux-6.1.102" |
| 56 | + fcVersion = "v1.10.1_1fcdaec08" |
| 57 | + templateID = "fcb33d09-3141-42c4-8d3b-c2df411681db" |
| 58 | + buildID = "ba6aae36-74f7-487a-b6f7-74fd7c94e479" |
| 59 | + useHugePages = false |
| 60 | + allowInternetAccess = true |
| 61 | + ) |
| 62 | + |
| 63 | + // cache paths, to speed up test runs. these paths aren't wiped between tests |
| 64 | + persistenceDir := filepath.Join(os.TempDir(), "e2b-orchestrator-benchmark") |
| 65 | + kernelsDir := filepath.Join(persistenceDir, "kernels") |
| 66 | + sandboxDir := filepath.Join(persistenceDir, "sandbox") |
| 67 | + err := os.MkdirAll(kernelsDir, 0o755) |
| 68 | + require.NoError(b, err) |
| 69 | + |
| 70 | + // ephemeral data |
| 71 | + tempDir := b.TempDir() |
| 72 | + clientID := uuid.NewString() |
| 73 | + |
| 74 | + abs := func(s string) string { |
| 75 | + return utils.Must(filepath.Abs(s)) |
| 76 | + } |
| 77 | + |
| 78 | + endpoint := os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT") |
| 79 | + if endpoint != "" { |
| 80 | + spanExporter, err := telemetry.NewSpanExporter(b.Context(), |
| 81 | + otlptracegrpc.WithEndpoint(endpoint), |
| 82 | + ) |
| 83 | + defer func() { |
| 84 | + err := spanExporter.Shutdown(b.Context()) |
| 85 | + assert.NoError(b, err) |
| 86 | + }() |
| 87 | + require.NoError(b, err) |
| 88 | + resource, err := telemetry.GetResource(b.Context(), "node-id", "BenchmarkBaseImageLaunch", "service-commit", "service-version", "service-instance-id") |
| 89 | + require.NoError(b, err) |
| 90 | + tracerProvider := telemetry.NewTracerProvider(spanExporter, resource) |
| 91 | + otel.SetTracerProvider(tracerProvider) |
| 92 | + } |
| 93 | + |
| 94 | + linuxKernelURL, err := url.JoinPath("https://storage.googleapis.com/e2b-prod-public-builds/kernels/", kernelVersion, "vmlinux.bin") |
| 95 | + require.NoError(b, err) |
| 96 | + linuxKernelFilename := filepath.Join(kernelsDir, kernelVersion, "vmlinux.bin") |
| 97 | + |
| 98 | + downloadKernel(b, linuxKernelFilename, linuxKernelURL) |
| 99 | + |
| 100 | + // hacks, these should go away |
| 101 | + b.Setenv("ARTIFACTS_REGISTRY_PROVIDER", "Local") |
| 102 | + b.Setenv("USE_LOCAL_NAMESPACE_STORAGE", "true") |
| 103 | + b.Setenv("STORAGE_PROVIDER", "Local") |
| 104 | + b.Setenv("ORCHESTRATOR_BASE_PATH", tempDir) |
| 105 | + b.Setenv("HOST_ENVD_PATH", abs(filepath.Join("..", "envd", "bin", "envd"))) |
| 106 | + b.Setenv("FIRECRACKER_VERSIONS_DIR", abs(filepath.Join("..", "fc-versions", "builds"))) |
| 107 | + b.Setenv("HOST_KERNELS_DIR", abs(kernelsDir)) |
| 108 | + b.Setenv("SANDBOX_DIR", abs(sandboxDir)) |
| 109 | + b.Setenv("SNAPSHOT_CACHE_DIR", abs(filepath.Join(tempDir, "snapshot-cache"))) |
| 110 | + b.Setenv("LOCAL_TEMPLATE_STORAGE_BASE_PATH", abs(filepath.Join(persistenceDir, "templates"))) |
| 111 | + |
| 112 | + // prep directories |
| 113 | + for _, subdir := range []string{"build", "build-templates" /*"fc-vm",*/, "sandbox", "snapshot-cache", "template"} { |
| 114 | + fullDirName := filepath.Join(tempDir, subdir) |
| 115 | + err := os.MkdirAll(fullDirName, 0o755) |
| 116 | + require.NoError(b, err) |
| 117 | + } |
| 118 | + |
| 119 | + logger, err := zap.NewDevelopment() |
| 120 | + require.NoError(b, err) |
| 121 | + |
| 122 | + sbxlogger.SetSandboxLoggerInternal(logger) |
| 123 | + // sbxlogger.SetSandboxLoggerExternal(logger) |
| 124 | + |
| 125 | + networkPool, err := network.NewPool( |
| 126 | + b.Context(), noop.MeterProvider{}, 8, 8, clientID, |
| 127 | + ) |
| 128 | + require.NoError(b, err) |
| 129 | + defer func() { |
| 130 | + err := networkPool.Close(b.Context()) |
| 131 | + assert.NoError(b, err) |
| 132 | + }() |
| 133 | + |
| 134 | + devicePool, err := nbd.NewDevicePool(b.Context(), noop.MeterProvider{}) |
| 135 | + require.NoError(b, err, "do you have the nbd kernel module installed?") |
| 136 | + defer func() { |
| 137 | + err := devicePool.Close(b.Context()) |
| 138 | + assert.NoError(b, err) |
| 139 | + }() |
| 140 | + |
| 141 | + featureFlags, err := featureflags.NewClient() |
| 142 | + require.NoError(b, err) |
| 143 | + defer func() { |
| 144 | + err := featureFlags.Close(b.Context()) |
| 145 | + assert.NoError(b, err) |
| 146 | + }() |
| 147 | + |
| 148 | + limiter, err := limit.New(b.Context(), featureFlags) |
| 149 | + require.NoError(b, err) |
| 150 | + |
| 151 | + persistence, err := storage.GetTemplateStorageProvider(b.Context(), limiter) |
| 152 | + require.NoError(b, err) |
| 153 | + |
| 154 | + blockMetrics, err := blockmetrics.NewMetrics(&noop.MeterProvider{}) |
| 155 | + require.NoError(b, err) |
| 156 | + |
| 157 | + templateCache, err := template.NewCache(b.Context(), featureFlags, persistence, blockMetrics) |
| 158 | + require.NoError(b, err) |
| 159 | + |
| 160 | + sandboxFactory := sandbox.NewFactory(networkPool, devicePool, featureFlags, true) |
| 161 | + |
| 162 | + dockerhubRepository, err := dockerhub.GetRemoteRepository(b.Context()) |
| 163 | + require.NoError(b, err) |
| 164 | + defer func() { |
| 165 | + err := dockerhubRepository.Close() |
| 166 | + assert.NoError(b, err) |
| 167 | + }() |
| 168 | + |
| 169 | + accessToken := "access-token" |
| 170 | + sandboxConfig := sandbox.Config{ |
| 171 | + BaseTemplateID: templateID, |
| 172 | + Vcpu: 2, |
| 173 | + RamMB: 512, |
| 174 | + TotalDiskSizeMB: 2 * 1024, |
| 175 | + HugePages: useHugePages, |
| 176 | + AllowInternetAccess: ptr(allowInternetAccess), |
| 177 | + Envd: sandbox.EnvdMetadata{ |
| 178 | + Vars: map[string]string{"HELLO": "WORLD"}, |
| 179 | + AccessToken: &accessToken, |
| 180 | + Version: "1.2.3", |
| 181 | + }, |
| 182 | + } |
| 183 | + |
| 184 | + runtime := sandbox.RuntimeMetadata{ |
| 185 | + TemplateID: templateID, |
| 186 | + SandboxID: "sandbox-id", |
| 187 | + ExecutionID: "execution-id", |
| 188 | + TeamID: "team-id", |
| 189 | + } |
| 190 | + |
| 191 | + artifactRegistry, err := artifactsregistry.GetArtifactsRegistryProvider(b.Context()) |
| 192 | + require.NoError(b, err) |
| 193 | + |
| 194 | + persistenceTemplate, err := storage.GetTemplateStorageProvider(b.Context(), nil) |
| 195 | + require.NoError(b, err) |
| 196 | + |
| 197 | + persistenceBuild, err := storage.GetBuildCacheStorageProvider(b.Context(), nil) |
| 198 | + require.NoError(b, err) |
| 199 | + |
| 200 | + var proxyPort uint = 5007 |
| 201 | + |
| 202 | + sandboxes := smap.New[*sandbox.Sandbox]() |
| 203 | + |
| 204 | + sandboxProxy, err := proxy.NewSandboxProxy(noop.MeterProvider{}, proxyPort, sandboxes) |
| 205 | + require.NoError(b, err) |
| 206 | + go func() { |
| 207 | + err := sandboxProxy.Start(b.Context()) |
| 208 | + assert.ErrorIs(b, http.ErrServerClosed, err) |
| 209 | + }() |
| 210 | + defer func() { |
| 211 | + err := sandboxProxy.Close(b.Context()) |
| 212 | + assert.NoError(b, err) |
| 213 | + }() |
| 214 | + |
| 215 | + buildMetrics, err := metrics.NewBuildMetrics(noop.MeterProvider{}) |
| 216 | + require.NoError(b, err) |
| 217 | + |
| 218 | + builder := build.NewBuilder( |
| 219 | + logger, |
| 220 | + sandboxFactory, |
| 221 | + persistenceTemplate, |
| 222 | + persistenceBuild, |
| 223 | + artifactRegistry, |
| 224 | + dockerhubRepository, |
| 225 | + sandboxProxy, |
| 226 | + sandboxes, |
| 227 | + templateCache, |
| 228 | + buildMetrics, |
| 229 | + ) |
| 230 | + |
| 231 | + buildPath := filepath.Join(os.Getenv("LOCAL_TEMPLATE_STORAGE_BASE_PATH"), buildID, "rootfs.ext4") |
| 232 | + if _, err := os.Stat(buildPath); os.IsNotExist(err) { |
| 233 | + // build template |
| 234 | + force := true |
| 235 | + templateConfig := config.TemplateConfig{ |
| 236 | + TemplateID: templateID, |
| 237 | + FromImage: baseImage, |
| 238 | + Force: &force, |
| 239 | + VCpuCount: sandboxConfig.Vcpu, |
| 240 | + MemoryMB: sandboxConfig.RamMB, |
| 241 | + StartCmd: "echo 'start cmd debug' && sleep 10 && echo 'done starting command debug'", |
| 242 | + DiskSizeMB: sandboxConfig.TotalDiskSizeMB, |
| 243 | + HugePages: sandboxConfig.HugePages, |
| 244 | + } |
| 245 | + |
| 246 | + metadata := storage.TemplateFiles{ |
| 247 | + BuildID: buildID, |
| 248 | + KernelVersion: kernelVersion, |
| 249 | + FirecrackerVersion: fcVersion, |
| 250 | + } |
| 251 | + _, err = builder.Build(b.Context(), metadata, templateConfig, logger.Core()) |
| 252 | + require.NoError(b, err) |
| 253 | + } |
| 254 | + |
| 255 | + // retrieve template |
| 256 | + tmpl, err := templateCache.GetTemplate( |
| 257 | + b.Context(), |
| 258 | + buildID, |
| 259 | + kernelVersion, |
| 260 | + fcVersion, |
| 261 | + false, |
| 262 | + false, |
| 263 | + ) |
| 264 | + require.NoError(b, err) |
| 265 | + |
| 266 | + tc := testContainer{ |
| 267 | + sandboxFactory: sandboxFactory, |
| 268 | + testType: testType, |
| 269 | + tmpl: tmpl, |
| 270 | + sandboxConfig: sandboxConfig, |
| 271 | + runtime: runtime, |
| 272 | + } |
| 273 | + |
| 274 | + for b.Loop() { |
| 275 | + tc.testOneItem(b, buildID, kernelVersion, fcVersion) |
| 276 | + } |
| 277 | +} |
| 278 | + |
| 279 | +func ptr[T any](v T) *T { |
| 280 | + return &v |
| 281 | +} |
| 282 | + |
| 283 | +type testCycle string |
| 284 | + |
| 285 | +const ( |
| 286 | + onlyStart testCycle = "only-start" |
| 287 | + startAndPause testCycle = "start-and-pause" |
| 288 | + startPauseResume testCycle = "start-pause-resume" |
| 289 | +) |
| 290 | + |
| 291 | +type testContainer struct { |
| 292 | + testType testCycle |
| 293 | + sandboxFactory *sandbox.Factory |
| 294 | + tmpl template.Template |
| 295 | + sandboxConfig sandbox.Config |
| 296 | + runtime sandbox.RuntimeMetadata |
| 297 | +} |
| 298 | + |
| 299 | +func (tc *testContainer) testOneItem(b *testing.B, buildID, kernelVersion, fcVersion string) { |
| 300 | + b.Helper() |
| 301 | + |
| 302 | + ctx, span := tracer.Start(b.Context(), "testOneItem") |
| 303 | + defer span.End() |
| 304 | + |
| 305 | + sbx, err := tc.sandboxFactory.ResumeSandbox( |
| 306 | + ctx, |
| 307 | + tc.tmpl, |
| 308 | + tc.sandboxConfig, |
| 309 | + tc.runtime, |
| 310 | + uuid.NewString(), |
| 311 | + time.Now(), |
| 312 | + time.Now().Add(time.Second*15), |
| 313 | + nil, |
| 314 | + ) |
| 315 | + require.NoError(b, err) |
| 316 | + |
| 317 | + if tc.testType == onlyStart { |
| 318 | + b.StopTimer() |
| 319 | + err = sbx.Close(ctx) |
| 320 | + require.NoError(b, err) |
| 321 | + b.StartTimer() |
| 322 | + return |
| 323 | + } |
| 324 | + |
| 325 | + meta, err := sbx.Template.Metadata() |
| 326 | + require.NoError(b, err) |
| 327 | + |
| 328 | + templateMetadata := meta.SameVersionTemplate(storage.TemplateFiles{ |
| 329 | + BuildID: buildID, |
| 330 | + KernelVersion: kernelVersion, |
| 331 | + FirecrackerVersion: fcVersion, |
| 332 | + }) |
| 333 | + snap, err := sbx.Pause(ctx, templateMetadata) |
| 334 | + require.NoError(b, err) |
| 335 | + require.NotNil(b, snap) |
| 336 | + |
| 337 | + if tc.testType == startAndPause { |
| 338 | + b.StopTimer() |
| 339 | + err = sbx.Close(ctx) |
| 340 | + require.NoError(b, err) |
| 341 | + b.StartTimer() |
| 342 | + } |
| 343 | + |
| 344 | + // resume sandbox |
| 345 | + sbx, err = tc.sandboxFactory.ResumeSandbox(ctx, tc.tmpl, tc.sandboxConfig, tc.runtime, uuid.NewString(), time.Now(), time.Now().Add(time.Second*15), nil) |
| 346 | + require.NoError(b, err) |
| 347 | + |
| 348 | + // close sandbox |
| 349 | + err = sbx.Close(ctx) |
| 350 | + require.NoError(b, err) |
| 351 | +} |
| 352 | + |
| 353 | +func downloadKernel(b *testing.B, filename, url string) { |
| 354 | + b.Helper() |
| 355 | + |
| 356 | + dirname := filepath.Dir(filename) |
| 357 | + err := os.MkdirAll(dirname, 0o755) |
| 358 | + require.NoError(b, err) |
| 359 | + |
| 360 | + // kernel already exists |
| 361 | + if _, err := os.Stat(filename); err == nil { |
| 362 | + return |
| 363 | + } |
| 364 | + |
| 365 | + client := &http.Client{} |
| 366 | + req, err := http.NewRequestWithContext(b.Context(), http.MethodGet, url, nil) |
| 367 | + require.NoError(b, err) |
| 368 | + response, err := client.Do(req) |
| 369 | + require.NoError(b, err) |
| 370 | + require.Equal(b, http.StatusOK, response.StatusCode) |
| 371 | + defer response.Body.Close() |
| 372 | + |
| 373 | + file, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, 0o644) |
| 374 | + require.NoError(b, err) |
| 375 | + defer file.Close() |
| 376 | + |
| 377 | + _, err = file.ReadFrom(response.Body) |
| 378 | + require.NoError(b, err) |
| 379 | +} |
0 commit comments