Skip to content

Commit 62c9d16

Browse files
authored
Test harness (#1285)
1 parent db9783d commit 62c9d16

2 files changed

Lines changed: 380 additions & 1 deletion

File tree

Lines changed: 379 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,379 @@
1+
// run with something like:
2+
//
3+
// sudo `which go` test -benchtime=15s -bench=. -v
4+
// sudo modprobe nbd
5+
// echo 1024 | sudo tee /proc/sys/vm/nr_hugepages
6+
package main
7+
8+
import (
9+
"net/http"
10+
"net/url"
11+
"os"
12+
"path/filepath"
13+
"testing"
14+
"time"
15+
16+
"github.com/google/uuid"
17+
"github.com/stretchr/testify/assert"
18+
"github.com/stretchr/testify/require"
19+
"go.opentelemetry.io/otel"
20+
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
21+
"go.opentelemetry.io/otel/metric/noop"
22+
"go.uber.org/zap"
23+
24+
"github.com/e2b-dev/infra/packages/orchestrator/internal/proxy"
25+
"github.com/e2b-dev/infra/packages/orchestrator/internal/sandbox"
26+
blockmetrics "github.com/e2b-dev/infra/packages/orchestrator/internal/sandbox/block/metrics"
27+
"github.com/e2b-dev/infra/packages/orchestrator/internal/sandbox/nbd"
28+
"github.com/e2b-dev/infra/packages/orchestrator/internal/sandbox/network"
29+
"github.com/e2b-dev/infra/packages/orchestrator/internal/sandbox/template"
30+
"github.com/e2b-dev/infra/packages/orchestrator/internal/template/build"
31+
"github.com/e2b-dev/infra/packages/orchestrator/internal/template/build/config"
32+
"github.com/e2b-dev/infra/packages/orchestrator/internal/template/build/metrics"
33+
artifactsregistry "github.com/e2b-dev/infra/packages/shared/pkg/artifacts-registry"
34+
"github.com/e2b-dev/infra/packages/shared/pkg/dockerhub"
35+
featureflags "github.com/e2b-dev/infra/packages/shared/pkg/feature-flags"
36+
"github.com/e2b-dev/infra/packages/shared/pkg/limit"
37+
sbxlogger "github.com/e2b-dev/infra/packages/shared/pkg/logger/sandbox"
38+
"github.com/e2b-dev/infra/packages/shared/pkg/smap"
39+
"github.com/e2b-dev/infra/packages/shared/pkg/storage"
40+
"github.com/e2b-dev/infra/packages/shared/pkg/telemetry"
41+
"github.com/e2b-dev/infra/packages/shared/pkg/utils"
42+
)
43+
44+
var tracer = otel.Tracer("github.com/e2b-dev/infra/packages/orchestrator")
45+
46+
func BenchmarkBaseImageLaunch(b *testing.B) {
47+
if os.Geteuid() != 0 {
48+
b.Skip("skipping benchmark because not running as root")
49+
}
50+
51+
// test configuration
52+
const (
53+
testType = onlyStart
54+
baseImage = "e2bdev/base"
55+
kernelVersion = "vmlinux-6.1.102"
56+
fcVersion = "v1.10.1_1fcdaec08"
57+
templateID = "fcb33d09-3141-42c4-8d3b-c2df411681db"
58+
buildID = "ba6aae36-74f7-487a-b6f7-74fd7c94e479"
59+
useHugePages = false
60+
allowInternetAccess = true
61+
)
62+
63+
// cache paths, to speed up test runs. these paths aren't wiped between tests
64+
persistenceDir := filepath.Join(os.TempDir(), "e2b-orchestrator-benchmark")
65+
kernelsDir := filepath.Join(persistenceDir, "kernels")
66+
sandboxDir := filepath.Join(persistenceDir, "sandbox")
67+
err := os.MkdirAll(kernelsDir, 0o755)
68+
require.NoError(b, err)
69+
70+
// ephemeral data
71+
tempDir := b.TempDir()
72+
clientID := uuid.NewString()
73+
74+
abs := func(s string) string {
75+
return utils.Must(filepath.Abs(s))
76+
}
77+
78+
endpoint := os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT")
79+
if endpoint != "" {
80+
spanExporter, err := telemetry.NewSpanExporter(b.Context(),
81+
otlptracegrpc.WithEndpoint(endpoint),
82+
)
83+
defer func() {
84+
err := spanExporter.Shutdown(b.Context())
85+
assert.NoError(b, err)
86+
}()
87+
require.NoError(b, err)
88+
resource, err := telemetry.GetResource(b.Context(), "node-id", "BenchmarkBaseImageLaunch", "service-commit", "service-version", "service-instance-id")
89+
require.NoError(b, err)
90+
tracerProvider := telemetry.NewTracerProvider(spanExporter, resource)
91+
otel.SetTracerProvider(tracerProvider)
92+
}
93+
94+
linuxKernelURL, err := url.JoinPath("https://storage.googleapis.com/e2b-prod-public-builds/kernels/", kernelVersion, "vmlinux.bin")
95+
require.NoError(b, err)
96+
linuxKernelFilename := filepath.Join(kernelsDir, kernelVersion, "vmlinux.bin")
97+
98+
downloadKernel(b, linuxKernelFilename, linuxKernelURL)
99+
100+
// hacks, these should go away
101+
b.Setenv("ARTIFACTS_REGISTRY_PROVIDER", "Local")
102+
b.Setenv("USE_LOCAL_NAMESPACE_STORAGE", "true")
103+
b.Setenv("STORAGE_PROVIDER", "Local")
104+
b.Setenv("ORCHESTRATOR_BASE_PATH", tempDir)
105+
b.Setenv("HOST_ENVD_PATH", abs(filepath.Join("..", "envd", "bin", "envd")))
106+
b.Setenv("FIRECRACKER_VERSIONS_DIR", abs(filepath.Join("..", "fc-versions", "builds")))
107+
b.Setenv("HOST_KERNELS_DIR", abs(kernelsDir))
108+
b.Setenv("SANDBOX_DIR", abs(sandboxDir))
109+
b.Setenv("SNAPSHOT_CACHE_DIR", abs(filepath.Join(tempDir, "snapshot-cache")))
110+
b.Setenv("LOCAL_TEMPLATE_STORAGE_BASE_PATH", abs(filepath.Join(persistenceDir, "templates")))
111+
112+
// prep directories
113+
for _, subdir := range []string{"build", "build-templates" /*"fc-vm",*/, "sandbox", "snapshot-cache", "template"} {
114+
fullDirName := filepath.Join(tempDir, subdir)
115+
err := os.MkdirAll(fullDirName, 0o755)
116+
require.NoError(b, err)
117+
}
118+
119+
logger, err := zap.NewDevelopment()
120+
require.NoError(b, err)
121+
122+
sbxlogger.SetSandboxLoggerInternal(logger)
123+
// sbxlogger.SetSandboxLoggerExternal(logger)
124+
125+
networkPool, err := network.NewPool(
126+
b.Context(), noop.MeterProvider{}, 8, 8, clientID,
127+
)
128+
require.NoError(b, err)
129+
defer func() {
130+
err := networkPool.Close(b.Context())
131+
assert.NoError(b, err)
132+
}()
133+
134+
devicePool, err := nbd.NewDevicePool(b.Context(), noop.MeterProvider{})
135+
require.NoError(b, err, "do you have the nbd kernel module installed?")
136+
defer func() {
137+
err := devicePool.Close(b.Context())
138+
assert.NoError(b, err)
139+
}()
140+
141+
featureFlags, err := featureflags.NewClient()
142+
require.NoError(b, err)
143+
defer func() {
144+
err := featureFlags.Close(b.Context())
145+
assert.NoError(b, err)
146+
}()
147+
148+
limiter, err := limit.New(b.Context(), featureFlags)
149+
require.NoError(b, err)
150+
151+
persistence, err := storage.GetTemplateStorageProvider(b.Context(), limiter)
152+
require.NoError(b, err)
153+
154+
blockMetrics, err := blockmetrics.NewMetrics(&noop.MeterProvider{})
155+
require.NoError(b, err)
156+
157+
templateCache, err := template.NewCache(b.Context(), featureFlags, persistence, blockMetrics)
158+
require.NoError(b, err)
159+
160+
sandboxFactory := sandbox.NewFactory(networkPool, devicePool, featureFlags, true)
161+
162+
dockerhubRepository, err := dockerhub.GetRemoteRepository(b.Context())
163+
require.NoError(b, err)
164+
defer func() {
165+
err := dockerhubRepository.Close()
166+
assert.NoError(b, err)
167+
}()
168+
169+
accessToken := "access-token"
170+
sandboxConfig := sandbox.Config{
171+
BaseTemplateID: templateID,
172+
Vcpu: 2,
173+
RamMB: 512,
174+
TotalDiskSizeMB: 2 * 1024,
175+
HugePages: useHugePages,
176+
AllowInternetAccess: ptr(allowInternetAccess),
177+
Envd: sandbox.EnvdMetadata{
178+
Vars: map[string]string{"HELLO": "WORLD"},
179+
AccessToken: &accessToken,
180+
Version: "1.2.3",
181+
},
182+
}
183+
184+
runtime := sandbox.RuntimeMetadata{
185+
TemplateID: templateID,
186+
SandboxID: "sandbox-id",
187+
ExecutionID: "execution-id",
188+
TeamID: "team-id",
189+
}
190+
191+
artifactRegistry, err := artifactsregistry.GetArtifactsRegistryProvider(b.Context())
192+
require.NoError(b, err)
193+
194+
persistenceTemplate, err := storage.GetTemplateStorageProvider(b.Context(), nil)
195+
require.NoError(b, err)
196+
197+
persistenceBuild, err := storage.GetBuildCacheStorageProvider(b.Context(), nil)
198+
require.NoError(b, err)
199+
200+
var proxyPort uint = 5007
201+
202+
sandboxes := smap.New[*sandbox.Sandbox]()
203+
204+
sandboxProxy, err := proxy.NewSandboxProxy(noop.MeterProvider{}, proxyPort, sandboxes)
205+
require.NoError(b, err)
206+
go func() {
207+
err := sandboxProxy.Start(b.Context())
208+
assert.ErrorIs(b, http.ErrServerClosed, err)
209+
}()
210+
defer func() {
211+
err := sandboxProxy.Close(b.Context())
212+
assert.NoError(b, err)
213+
}()
214+
215+
buildMetrics, err := metrics.NewBuildMetrics(noop.MeterProvider{})
216+
require.NoError(b, err)
217+
218+
builder := build.NewBuilder(
219+
logger,
220+
sandboxFactory,
221+
persistenceTemplate,
222+
persistenceBuild,
223+
artifactRegistry,
224+
dockerhubRepository,
225+
sandboxProxy,
226+
sandboxes,
227+
templateCache,
228+
buildMetrics,
229+
)
230+
231+
buildPath := filepath.Join(os.Getenv("LOCAL_TEMPLATE_STORAGE_BASE_PATH"), buildID, "rootfs.ext4")
232+
if _, err := os.Stat(buildPath); os.IsNotExist(err) {
233+
// build template
234+
force := true
235+
templateConfig := config.TemplateConfig{
236+
TemplateID: templateID,
237+
FromImage: baseImage,
238+
Force: &force,
239+
VCpuCount: sandboxConfig.Vcpu,
240+
MemoryMB: sandboxConfig.RamMB,
241+
StartCmd: "echo 'start cmd debug' && sleep 10 && echo 'done starting command debug'",
242+
DiskSizeMB: sandboxConfig.TotalDiskSizeMB,
243+
HugePages: sandboxConfig.HugePages,
244+
}
245+
246+
metadata := storage.TemplateFiles{
247+
BuildID: buildID,
248+
KernelVersion: kernelVersion,
249+
FirecrackerVersion: fcVersion,
250+
}
251+
_, err = builder.Build(b.Context(), metadata, templateConfig, logger.Core())
252+
require.NoError(b, err)
253+
}
254+
255+
// retrieve template
256+
tmpl, err := templateCache.GetTemplate(
257+
b.Context(),
258+
buildID,
259+
kernelVersion,
260+
fcVersion,
261+
false,
262+
false,
263+
)
264+
require.NoError(b, err)
265+
266+
tc := testContainer{
267+
sandboxFactory: sandboxFactory,
268+
testType: testType,
269+
tmpl: tmpl,
270+
sandboxConfig: sandboxConfig,
271+
runtime: runtime,
272+
}
273+
274+
for b.Loop() {
275+
tc.testOneItem(b, buildID, kernelVersion, fcVersion)
276+
}
277+
}
278+
279+
func ptr[T any](v T) *T {
280+
return &v
281+
}
282+
283+
type testCycle string
284+
285+
const (
286+
onlyStart testCycle = "only-start"
287+
startAndPause testCycle = "start-and-pause"
288+
startPauseResume testCycle = "start-pause-resume"
289+
)
290+
291+
type testContainer struct {
292+
testType testCycle
293+
sandboxFactory *sandbox.Factory
294+
tmpl template.Template
295+
sandboxConfig sandbox.Config
296+
runtime sandbox.RuntimeMetadata
297+
}
298+
299+
func (tc *testContainer) testOneItem(b *testing.B, buildID, kernelVersion, fcVersion string) {
300+
b.Helper()
301+
302+
ctx, span := tracer.Start(b.Context(), "testOneItem")
303+
defer span.End()
304+
305+
sbx, err := tc.sandboxFactory.ResumeSandbox(
306+
ctx,
307+
tc.tmpl,
308+
tc.sandboxConfig,
309+
tc.runtime,
310+
uuid.NewString(),
311+
time.Now(),
312+
time.Now().Add(time.Second*15),
313+
nil,
314+
)
315+
require.NoError(b, err)
316+
317+
if tc.testType == onlyStart {
318+
b.StopTimer()
319+
err = sbx.Close(ctx)
320+
require.NoError(b, err)
321+
b.StartTimer()
322+
return
323+
}
324+
325+
meta, err := sbx.Template.Metadata()
326+
require.NoError(b, err)
327+
328+
templateMetadata := meta.SameVersionTemplate(storage.TemplateFiles{
329+
BuildID: buildID,
330+
KernelVersion: kernelVersion,
331+
FirecrackerVersion: fcVersion,
332+
})
333+
snap, err := sbx.Pause(ctx, templateMetadata)
334+
require.NoError(b, err)
335+
require.NotNil(b, snap)
336+
337+
if tc.testType == startAndPause {
338+
b.StopTimer()
339+
err = sbx.Close(ctx)
340+
require.NoError(b, err)
341+
b.StartTimer()
342+
}
343+
344+
// resume sandbox
345+
sbx, err = tc.sandboxFactory.ResumeSandbox(ctx, tc.tmpl, tc.sandboxConfig, tc.runtime, uuid.NewString(), time.Now(), time.Now().Add(time.Second*15), nil)
346+
require.NoError(b, err)
347+
348+
// close sandbox
349+
err = sbx.Close(ctx)
350+
require.NoError(b, err)
351+
}
352+
353+
func downloadKernel(b *testing.B, filename, url string) {
354+
b.Helper()
355+
356+
dirname := filepath.Dir(filename)
357+
err := os.MkdirAll(dirname, 0o755)
358+
require.NoError(b, err)
359+
360+
// kernel already exists
361+
if _, err := os.Stat(filename); err == nil {
362+
return
363+
}
364+
365+
client := &http.Client{}
366+
req, err := http.NewRequestWithContext(b.Context(), http.MethodGet, url, nil)
367+
require.NoError(b, err)
368+
response, err := client.Do(req)
369+
require.NoError(b, err)
370+
require.Equal(b, http.StatusOK, response.StatusCode)
371+
defer response.Body.Close()
372+
373+
file, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, 0o644)
374+
require.NoError(b, err)
375+
defer file.Close()
376+
377+
_, err = file.ReadFrom(response.Body)
378+
require.NoError(b, err)
379+
}

packages/orchestrator/go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ require (
5050
go.opentelemetry.io/otel v1.38.0
5151
go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.14.0
5252
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.38.0
53+
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0
5354
go.opentelemetry.io/otel/metric v1.38.0
5455
go.opentelemetry.io/otel/sdk/metric v1.38.0
5556
go.opentelemetry.io/otel/trace v1.38.0
@@ -234,7 +235,6 @@ require (
234235
go.opentelemetry.io/contrib/detectors/gcp v1.38.0 // indirect
235236
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 // indirect
236237
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 // indirect
237-
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0 // indirect
238238
go.opentelemetry.io/otel/log v0.14.0 // indirect
239239
go.opentelemetry.io/otel/sdk v1.38.0 // indirect
240240
go.opentelemetry.io/otel/sdk/log v0.14.0 // indirect

0 commit comments

Comments
 (0)