diff --git a/test/spike_wasmtime_hermetic/BUILD.bazel b/test/spike_wasmtime_hermetic/BUILD.bazel new file mode 100644 index 00000000..42760c3f --- /dev/null +++ b/test/spike_wasmtime_hermetic/BUILD.bazel @@ -0,0 +1,45 @@ +"""SPIKE: hermetic tool execution under wasmtime via a single preopened root. + +Demonstrates running a tool compiled to WebAssembly as a Bazel action that maps +declared inputs/outputs through ONE preopened root (`wasmtime run --dir .::/`), +the model standardized by WebAssembly/wasi-testsuite#264. + +See SPIKE.md for the write-up. +""" + +load("@bazel_skylib//rules:diff_test.bzl", "diff_test") +load("@rules_wasm_component//rust:defs.bzl", "rust_wasm_binary") +load("//wasm/private:wasm_tool_run.bzl", "wasm_tool_run") + +package(default_visibility = ["//visibility:public"]) + +# The tool: a plain WASI command (wasm32-wasip2 component) that reads an input +# file, uppercases it, writes an output file, and probes hermeticity. +rust_wasm_binary( + name = "transform_tool", + srcs = ["transform.rs"], +) + +# Run it hermetically: only `sample.txt` is a declared input, so the single +# root the guest sees contains exactly that (plus the tool). The action fails +# if the tool's hermeticity probe can reach a host path. +wasm_tool_run( + name = "run_transform", + srcs = ["sample.txt"], + out = "sample.UPPER.txt", + tool = ":transform_tool", +) + +# Functional + hermeticity assertion in one: the output must equal the golden +# (proves read+transform+write through the single root), and the action only +# succeeds if the in-guest hermeticity probe was denied. +diff_test( + name = "transform_output_test", + file1 = ":run_transform", + file2 = "expected.txt", +) + +test_suite( + name = "all", + tests = [":transform_output_test"], +) diff --git a/test/spike_wasmtime_hermetic/SPIKE.md b/test/spike_wasmtime_hermetic/SPIKE.md new file mode 100644 index 00000000..2baaf546 --- /dev/null +++ b/test/spike_wasmtime_hermetic/SPIKE.md @@ -0,0 +1,108 @@ +# Spike: hermetic tool execution under wasmtime via a single preopened root + +**Question (from the wasi-testsuite#264 discussion):** now that WASI is moving to +a single-root filesystem model, can we map the Bazel-hermetic file model onto +wasmtime and call tools-as-wasm hermetically — the thing the older +multi-preopen / absolute-host-path approach couldn't do cleanly? + +**Answer: yes, the single-root model works — with one Bazel-specific caveat that +this spike pins down exactly.** + +## What this spike contains + +- `transform.rs` — a tiny WASI command (built to `wasm32-wasip2` via + `rust_wasm_binary`) that reads an input file, uppercases it, writes an output + file, and **probes hermeticity**: it tries to read `/etc/hostname` and + `/etc/passwd` and exits non-zero if either succeeds. +- `//wasm/private:wasm_tool_run.bzl` — an experimental rule that runs the tool + as a hermetic Bazel action, mapping inputs/outputs through **one** preopened + root (`wasmtime run --dir root::/`). +- `transform_output_test` — a `diff_test` that is green only if the tool read + + transformed + wrote through the single root **and** the hermeticity probe was + denied. + +Run it: `bazel test //test/spike_wasmtime_hermetic:all` + +## Findings + +### 1. The single-root model works, and hermeticity is free + +Given one preopened root, the guest sees a normal filesystem rooted at `/` and +**cannot escape it**. There is no WASI way to name a host-absolute path; the only +thing visible is what is inside the preopen. Combined with Bazel's sandbox (which +already contains only the declared inputs), hermeticity is the intersection of +two independent deny-by-default systems. The probe reading `/etc/hostname` is +denied every run — `sandbox confirmed` in the action log. + +### 2. The real obstacle: Bazel stages inputs as symlinks that escape the preopen + +The first attempt — preopen the action's execroot directly (`--dir .::/`) and +pass guest-absolute paths — failed with: + +``` +cannot read /test/.../sample.txt: Operation not permitted (os error 63) +``` + +**errno 63 is WASI `ENOTCAPABLE`**, not a generic error. Root cause, confirmed +two ways: + +- A hand-made symlink pointing outside the preopen reproduces the identical + `ENOTCAPABLE`. +- Bazel stages the declared input in the sandbox as a symlink pointing *out* of + the sandbox execroot: + `sandbox/.../execroot/_main/.../sample.txt -> /private/var/tmp/.../execroot/_main/.../sample.txt` + +WASI refuses to traverse a symlink that escapes a preopen. wasmtime has **no +flag** to relax this (`--dir` takes only `HOST[::GUEST]`; no follow-symlinks / +permissions option). + +### 3. The fix: materialize inputs as real files inside the root + +The preopen **directory** may itself be a symlink — wasmtime canonicalizes it +when it opens the preopen at startup. Only files traversed *inside the guest* +must not escape. So copying the declared inputs into one real root directory and +preopening that makes everything work (this spike does the copy with `cp -L`; +see "Production path"). The module file the loader reads is fine either way — +only WASI guest filesystem access is capability-checked. + +### 4. Why the old approach couldn't do this + +The previous model enumerated individual host **absolute** preopens +(`--dir /abs/a --dir /abs/b …`) plus a fragile argv convention naming the first +preopen. That fights Bazel head-on: Bazel paths are relocatable and staged as a +symlink farm, so there were no stable host-absolute paths to hand WASI, and any +that were handed in pointed at escaping symlinks. The single-root direction +(wasi-testsuite#264) replaces all of that with **one** root + guest-absolute +paths — which maps cleanly onto "one materialized sandbox directory." + +## Efficiency + +This spike validates *correctness/hermeticity*, not yet speed. The two known +startup costs and their existing levers in this repo: + +- **JIT compile per invocation** → AOT-compile the tool to `.cwasm` + (`wasm_precompile` / `--allow-precompiled`) so wasmtime loads precompiled code. +- **Guest init per invocation** → Wizer pre-initialization (already supported via + the wasmtime toolchain) snapshots post-init state. + +A real benchmark (native tool vs JIT wasm vs AOT+wizer wasm, over N invocations) +is the next step before claiming "efficient" with a number. + +## Production path (not done here — spike scope) + +1. Replace the `cp` staging with a hermetic copy into a `TreeArtifact` + (aspect bazel-lib `copy_to_directory`) or a small Rust launcher — **no shell** + per RULE #1. The shell `cp` here is the one spike shortcut. +2. Support multiple / structured outputs (declare a `TreeArtifact` the tool + writes into) instead of a single `/out`. +3. AOT + Wizer wiring and a benchmark target. +4. Decide the input path convention (flat `/basename` here vs. mirrored tree). + +## Verdict + +The single-root filesystem direction makes hermetic tool-as-wasm execution under +wasmtime genuinely viable for the Bazel model — the capability was always there, +but the ergonomics now line up. The one thing a production rule MUST handle is +materializing declared inputs as real files in the root (Bazel's symlink staging ++ WASI's no-escape rule = `ENOTCAPABLE` otherwise). That is the concrete result +this spike was built to find. diff --git a/test/spike_wasmtime_hermetic/expected.txt b/test/spike_wasmtime_hermetic/expected.txt new file mode 100644 index 00000000..5eb7dc4a --- /dev/null +++ b/test/spike_wasmtime_hermetic/expected.txt @@ -0,0 +1,2 @@ +HELLO FROM THE BAZEL SANDBOX +SECOND LINE diff --git a/test/spike_wasmtime_hermetic/sample.txt b/test/spike_wasmtime_hermetic/sample.txt new file mode 100644 index 00000000..dead2bc8 --- /dev/null +++ b/test/spike_wasmtime_hermetic/sample.txt @@ -0,0 +1,2 @@ +hello from the bazel sandbox +second line diff --git a/test/spike_wasmtime_hermetic/transform.rs b/test/spike_wasmtime_hermetic/transform.rs new file mode 100644 index 00000000..ec392af5 --- /dev/null +++ b/test/spike_wasmtime_hermetic/transform.rs @@ -0,0 +1,46 @@ +// SPIKE demonstrator: a WASI command run hermetically under wasmtime via a +// single preopened root (`wasmtime run --dir .::/`). +// +// Contract (argv): +// - reads the input file, uppercases it, writes the output file +// - both paths are guest-absolute under the single root +// +// It also probes hermeticity: a host-absolute path outside the sandbox must be +// unreachable. Under a single-root preopen the guest cannot escape the root, so +// the probe must fail; if it ever succeeds we exit non-zero so the Bazel action +// fails loudly rather than silently leaking. +use std::env; +use std::fs; +use std::process::exit; + +fn main() { + let args: Vec = env::args().collect(); + if args.len() < 3 { + eprintln!("usage: transform "); + exit(2); + } + let input = &args[1]; + let output = &args[2]; + + let contents = match fs::read_to_string(input) { + Ok(c) => c, + Err(e) => { + eprintln!("cannot read {}: {}", input, e); + exit(1); + } + }; + + // Hermeticity probe: must NOT be able to read a host path outside the root. + for forbidden in ["/etc/hostname", "/etc/passwd"] { + if fs::read_to_string(forbidden).is_ok() { + eprintln!("HERMETICITY VIOLATION: read host {}", forbidden); + exit(3); + } + } + + if let Err(e) = fs::write(output, contents.to_uppercase()) { + eprintln!("cannot write {}: {}", output, e); + exit(1); + } + eprintln!("ok: {} -> {} ({} bytes, sandbox confirmed)", input, output, contents.len()); +} diff --git a/wasm/private/wasm_tool_run.bzl b/wasm/private/wasm_tool_run.bzl new file mode 100644 index 00000000..2d36410f --- /dev/null +++ b/wasm/private/wasm_tool_run.bzl @@ -0,0 +1,99 @@ +"""SPIKE: hermetic tool execution under wasmtime via a single preopened root. + +Proof-of-concept for running a tool compiled to WebAssembly as a hermetic Bazel +action, mapping files in/out through ONE preopened directory +(`wasmtime run --dir root::/`) rather than a list of per-file/absolute-path +preopens — the model standardized by WebAssembly/wasi-testsuite#264. + +KEY FINDING (see SPIKE.md): + - The single-root model itself works perfectly: a guest given one preopened + root sees a normal filesystem and cannot escape it (hermeticity is the + intersection of Bazel's sandbox and WASI's deny-by-default capabilities — + the guest cannot even name a host-absolute path). + - BUT Bazel stages an action's declared inputs as *symlinks pointing outside* + the sandbox, and WASI refuses to traverse a symlink that escapes a preopen + (errno 63 = ENOTCAPABLE). So the inputs must be materialized as REAL files + inside the preopened root. The preopen directory may itself be a symlink + (wasmtime canonicalizes it when it opens the preopen); only files traversed + *inside* the guest must not escape. + - This is exactly why the older multi-preopen / absolute-host-path approach + fought Bazel: you cannot hand WASI Bazel's relocatable symlink farm. One + materialized root + guest-absolute paths is the clean shape. + +SPIKE-ONLY: the input staging below uses run_shell (cp) for brevity. Production +should replace it with a hermetic copy (aspect bazel-lib `copy_to_directory` +into a TreeArtifact, or a small Rust launcher) per RULE #1 — no shell. The +wasmtime invocation and the single-root model are the parts being validated. +""" + +def _wasm_tool_run_impl(ctx): + wasmtime = ctx.toolchains["@rules_wasm_component//toolchains:wasmtime_toolchain_type"].wasmtime + + out = ctx.actions.declare_file(ctx.attr.out) + root = "_wasm_root" + + # Stage declared inputs as REAL files (cp -L dereferences Bazel's staging + # symlinks) into one root, run wasmtime mapping that root as the single + # guest "/", then collect the single output the tool wrote to /out. + copies = "\n".join([ + 'cp -L "{src}" "{root}/{base}"'.format(src = s.path, root = root, base = s.basename) + for s in ctx.files.srcs + ]) + guest_inputs = " ".join(['"/{}"'.format(s.basename) for s in ctx.files.srcs]) + extra = " ".join(['"{}"'.format(a) for a in ctx.attr.extra_args]) + + command = """set -e +mkdir -p "{root}" +{copies} +"{wasmtime}" run --dir "{root}::/" "{tool}" {inputs} "/out" {extra} +cp "{root}/out" "{out}" +""".format( + root = root, + copies = copies, + wasmtime = wasmtime.path, + tool = ctx.file.tool.path, + inputs = guest_inputs, + extra = extra, + out = out.path, + ) + + ctx.actions.run_shell( + command = command, + inputs = ctx.files.srcs + [ctx.file.tool], + outputs = [out], + tools = [wasmtime], + mnemonic = "WasmToolRun", + progress_message = "Running %s under wasmtime (single-root hermetic) -> %s" % ( + ctx.file.tool.short_path, + out.short_path, + ), + ) + + return [DefaultInfo(files = depset([out]))] + +wasm_tool_run = rule( + implementation = _wasm_tool_run_impl, + attrs = { + "tool": attr.label( + allow_single_file = True, + mandatory = True, + doc = "WebAssembly tool (wasi:cli/command) to execute with wasmtime. " + + "rust_wasm_binary names its output without a .wasm extension.", + ), + "srcs": attr.label_list( + allow_files = True, + doc = "Declared input files, materialized as real files inside the " + + "single preopened root; the guest sees them at / " + + "and nothing else.", + ), + "out": attr.string( + mandatory = True, + doc = "Name of the single output file the tool writes (guest /out).", + ), + "extra_args": attr.string_list( + doc = "Extra literal argv appended after the input/output paths.", + ), + }, + toolchains = ["@rules_wasm_component//toolchains:wasmtime_toolchain_type"], + doc = "SPIKE: run a wasm tool hermetically via a single preopened root.", +)