diff --git a/cmd/publish-server/main.go b/cmd/publish-server/main.go index d7fd652..670ee13 100644 --- a/cmd/publish-server/main.go +++ b/cmd/publish-server/main.go @@ -32,7 +32,9 @@ import ( "log" "net/http" "os" + "regexp" "strings" + "time" "github.com/pilot-protocol/app-template/internal/publish" ) @@ -49,6 +51,8 @@ type server struct { adminToken string origins []string registrar publish.BrokerRegistrar // registers managed apps with the broker on approval + r2 *publish.R2 // artifact registry (nil = uploads disabled) + selfBase string // public base URL of THIS server, for proxy artifact URLs } func main() { @@ -79,7 +83,14 @@ func main() { adminToken: os.Getenv("ADMIN_TOKEN"), // CORS: only the production website may call the API. ALLOWED_ORIGINS // overrides (e.g. add a local origin for testing); default is prod. - origins: splitOrigins(allowedOriginsEnv()), + origins: splitOrigins(allowedOriginsEnv()), + r2: publish.R2FromEnv(), + selfBase: strings.TrimRight(os.Getenv("PUBLISH_SELF_URL"), "/"), + } + if s.r2 != nil { + log.Printf("artifact registry: R2 bucket %q (public base %q)", s.r2.Bucket, s.r2.PublicBase) + } else { + log.Printf("artifact registry: disabled (set R2_ENDPOINT/R2_BUCKET + AWS keys to enable uploads)") } // Managed-app approval registers the app with the broker by writing its // registry file (BROKER_REGISTRY). Unset = managed registration is logged @@ -100,6 +111,10 @@ func main() { }) mux.HandleFunc("/api/preview", s.cors(s.apiPreview)) mux.HandleFunc("/api/submit", s.cors(s.apiSubmit)) + mux.HandleFunc("/api/artifact/presign", s.cors(s.apiArtifactPresign)) + // Signing proxy: install-time GET of an artifact when no public domain is set. + // Unauthenticated by design (the daemon fetches it); R2 holds the real bytes. + mux.HandleFunc("GET /artifact/", s.artifactProxy) // Self-contained admin assets (embedded). The dashboard depends on nothing // from the website — its CSS ships in this binary and is served from here. mux.Handle("GET /static/", http.FileServer(http.FS(assets))) @@ -210,6 +225,91 @@ func (s *server) apiSubmit(w http.ResponseWriter, r *http.Request) { writeJSON(w, 202, map[string]any{"case_id": c.CaseID, "status": c.Status}) } +// ── artifact registry (R2) ──────────────────────────────────────────────────── + +// presignReq is the website Artifacts step's request for a direct-to-R2 upload +// slot: it identifies the app + target platform + filename, and gets back a +// short-lived PUT URL plus the stable public URL to record in the submission. +type presignReq struct { + ID string `json:"id"` + Version string `json:"version"` + OS string `json:"os"` + Arch string `json:"arch"` + Filename string `json:"filename"` +} + +var ( + reArtifactID = regexp.MustCompile(`^io\.pilot\.[a-z0-9]([a-z0-9-]*[a-z0-9])?$`) + reArtifactVer = regexp.MustCompile(`^\d+\.\d+\.\d+(?:-[0-9A-Za-z.-]+)?$`) + reArtifactFile = regexp.MustCompile(`^[A-Za-z0-9][A-Za-z0-9._-]*$`) + okArtifactOS = map[string]bool{"linux": true, "darwin": true} + okArtifactArch = map[string]bool{"amd64": true, "arm64": true} +) + +func (s *server) apiArtifactPresign(w http.ResponseWriter, r *http.Request) { + if s.r2 == nil { + writeJSON(w, 503, map[string]any{"error": "artifact uploads are not configured on this server"}) + return + } + var req presignReq + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + writeJSON(w, 400, map[string]any{"error": "bad json: " + err.Error()}) + return + } + var errs []string + if !reArtifactID.MatchString(req.ID) { + errs = append(errs, "id must be io.pilot.") + } + if !reArtifactVer.MatchString(req.Version) { + errs = append(errs, "version must be semver") + } + if !okArtifactOS[req.OS] { + errs = append(errs, "os must be linux or darwin") + } + if !okArtifactArch[req.Arch] { + errs = append(errs, "arch must be amd64 or arm64") + } + if !reArtifactFile.MatchString(req.Filename) { + errs = append(errs, "filename must be a plain name (letters, digits, . _ -)") + } + if len(errs) > 0 { + writeJSON(w, 422, map[string]any{"errors": errs}) + return + } + key := publish.ArtifactKey(req.ID, req.Version, req.OS, req.Arch, req.Filename) + putURL, err := s.r2.PresignPut(key, 15*time.Minute, time.Now()) + if err != nil { + writeJSON(w, 500, map[string]any{"error": "presign: " + err.Error()}) + return + } + writeJSON(w, 200, map[string]any{ + "key": key, + "put_url": putURL, + "public_url": s.r2.PublicURL(key, s.selfBase), + "expires_in": 900, + }) +} + +// artifactProxy 302-redirects an install-time GET to a fresh presigned R2 GET, so +// installs work off a stable URL even when the bucket has no public domain. +func (s *server) artifactProxy(w http.ResponseWriter, r *http.Request) { + if s.r2 == nil { + http.Error(w, "artifact registry not configured", http.StatusServiceUnavailable) + return + } + key := strings.TrimPrefix(r.URL.Path, "/artifact/") + if key == "" || strings.Contains(key, "..") { + http.Error(w, "bad key", http.StatusBadRequest) + return + } + getURL, err := s.r2.PresignGet(key, 10*time.Minute, time.Now()) + if err != nil { + http.Error(w, "presign: "+err.Error(), http.StatusInternalServerError) + return + } + http.Redirect(w, r, getURL, http.StatusFound) +} + // adminBuild kicks off the async bundle build for a submitted (or previously // failed) case. Admin-token gated, same as approve/reject. The build runs in a // background goroutine; the case flips submitted/build_failed → building → diff --git a/docs/NATIVE-APPS.md b/docs/NATIVE-APPS.md index e2f3b7c..558fa4e 100644 --- a/docs/NATIVE-APPS.md +++ b/docs/NATIVE-APPS.md @@ -1,6 +1,16 @@ # Native (binary-delivery) apps — design -> Status: DESIGN + TODO. Native/CLI apps are **Coming soon** — blocked at the +> **SUPERSEDED (2026-06-22) for the delivery model.** This doc proposed delivering +> native binaries *by reference* (customer-hosted URL, "we never store the bytes"). +> The shipped implementation instead **hosts the bytes in a Pilot-run Cloudflare +> R2 artifact registry**: the publisher uploads per-OS/arch binaries in the +> publish form's Artifacts step, and the generated cli adapter fetches + verifies +> + stages + execs them at install (with install order + optional args). See +> **`docs/R2-ARTIFACT-REGISTRY.md`** for the canonical, implemented design. The +> `assets[]` schema and the daemon-side staging notes below remain useful +> background, but where they disagree with R2-ARTIFACT-REGISTRY.md, that doc wins. + +> Status (original): DESIGN + TODO. Native/CLI apps are **Coming soon** — blocked at the > wizard's type step; only HTTP (translation-only) apps ship today. Decision > (2026-06-17): native apps deliver the real binary via a **customer-hosted URL + > per-OS/arch sha256**, pinned in the signed manifest and **fetched + verified + diff --git a/docs/PUBLISHING-SMOLMACHINES.md b/docs/PUBLISHING-SMOLMACHINES.md new file mode 100644 index 0000000..2b42cec --- /dev/null +++ b/docs/PUBLISHING-SMOLMACHINES.md @@ -0,0 +1,104 @@ +# Publishing Smol Machines (io.pilot.smolmachines) to the Pilot app store + +End-to-end runbook: build every platform artifact, host them in the R2 registry, +produce the catalogue entry, and land it. The app is a passthrough cli adapter +over the `smolvm` binary (no enumerated methods). + +## 0. Identity & descriptions (fixed) + +- **id:** `io.pilot.smolmachines` · **version:** `1.2.0` · **namespace/method prefix:** `smolmachines` +- **command:** `smolvm` · **method:** `smolmachines.exec` (passthrough) + auto `smolmachines.help` +- **short** (catalogue / `appstore list`): the one-liner. +- **long** (`appstore view` → metadata `description_md`): the full bullet description. +- See `submissions/io.pilot.smolmachines/submission.json` for the exact text. + +## 1. Build all platform artifacts (the binaries the publisher uploads) + +smolvm releases per-platform tarballs. Re-host them in the registry under the app id: + +```bash +for p in "darwin arm64 arm64" "linux arm64 arm64" "linux amd64 x86_64"; do + set -- $p; OS=$1 PARCH=$2 SARCH=$3 + T=smolvm-1.2.0-$OS-$SARCH.tar.gz + gh release download v1.2.0 --repo smol-machines/smolvm --pattern "$T" --clobber + aws s3 cp "$T" "s3://pilot-artifacts-prod/io.pilot.smolmachines/1.2.0/$OS-$PARCH/$T" \ + --endpoint-url=https://.r2.cloudflarestorage.com +done +``` +Record each `sha256` (computed in the browser at upload time, or `shasum -a 256`). +The artifact for each platform is the **tar.gz** with `unpack: tar.gz`, +`exec_path: smolvm-1.2.0--/smolvm`, `order: 1`. + +> Note: smolvm needs the whole tarball (wrapper + `smolvm-bin` + `lib/` + sparse VM +> images), not just one file — hence `unpack: tar.gz`. + +## 2. Submit + build the adapter bundle + +POST the submission to the publish-server (or drive the website Artifacts step): + +```bash +curl -X POST $PUBLISH_API/api/submit -H 'Content-Type: application/json' \ + --data @submissions/io.pilot.smolmachines/submission.json # → {case_id, status:submitted} +# admin builds (per platform): scaffold adapter → sign manifest → emit install.json/install.sh → self-verify +``` +`BuildBundle` cross-compiles the adapter for all four targets and **self-verifies each +through the catalogue gate** — a green build IS the §7.1 preflight passing. + +Each bundle contains: signed `manifest.json` (`proc.exec→smolvm`, `net.dial `, +`fs.write $APP`, `fs.read $APP/install.json`, `protection: guarded`), `bin/smolvm-app`, +`install.json` (prod R2 URLs + shas), `install.sh`. + +## 3. Host the bundles + metadata, build the catalogue entry + +```bash +# bundles +for PLAT in darwin-arm64 darwin-amd64 linux-arm64 linux-amd64; do + aws s3 cp io.pilot.smolmachines-1.2.0-$PLAT.tar.gz \ + s3://pilot-artifacts-prod/bundles/io.pilot.smolmachines/1.2.0/ --endpoint-url=$EP +done +# rich metadata.json (carries the LONG description_md) +aws s3 cp metadata.json s3://pilot-artifacts-prod/catalogue/apps/io.pilot.smolmachines/metadata.json --endpoint-url=$EP +``` + +Catalogue v2 entry (the line that lands in the platform catalogue): +```json +{ "id":"io.pilot.smolmachines", "version":"1.2.0", + "description":"", + "display_name":"Smol Machines", "vendor":"smol machines", "license":"Apache-2.0", + "source_url":"https://github.com/smol-machines/smolvm", + "bundle_url":"/bundles/.../io.pilot.smolmachines-1.2.0-linux-amd64.tar.gz", + "bundle_sha256":"", + "bundles": { "darwin/arm64":{...}, "darwin/amd64":{...}, "linux/arm64":{...}, "linux/amd64":{...} }, + "metadata_url":"/catalogue/apps/io.pilot.smolmachines/metadata.json", + "metadata_sha256":"" } +``` +- `description` (short) → `appstore list`. `metadata.description_md` (long) → `appstore view`. + +## 4. Sign + land the catalogue entry + +The catalogue is signature-gated; pilotctl verifies `.sig` against the +**embedded release catalogue key**. In production this is done by the publish +automation (app-template#28 auto-signs with the `CATALOG_SIGN_KEY` CI secret) when +you **Approve** the case — it opens the one-line catalogue PR on the platform repo +(`TeoSlayer/pilotprotocol` → `catalogue/catalogue.json`). Merge that PR and hosts +pick it up on next `pilotctl appstore catalogue`. + +Manual/local signing (testing only) requires a pilotctl built with your key: +`pilotctl appstore sign-catalogue --key catalogue.json`. + +## 5. Install + verify on a host + +```bash +pilotctl appstore catalogue | grep smolmachines # short description shows here +pilotctl appstore view io.pilot.smolmachines # long description_md shows here +pilotctl appstore install io.pilot.smolmachines # fetch+verify+stage from R2 +pilotctl appstore call io.pilot.smolmachines smolmachines.exec \ + '{"args":["machine","run","--net","--image","alpine","--","echo","hi"]}' +``` + +## Prerequisites (must be deployed first — see R2-PREDEPLOY-REPORT.md) + +1. Daemon on the **proc.exec** app-store version (pilotprotocol#317 → app-store#24). +2. pilotctl carries `install.json`/`install.sh` on install + daemon wires + `TrustedPublishers` (pilotprotocol#318). Without #2 the trust anchor rejects every app. +3. R2 bucket **CORS** for browser uploads; publish-server R2 env set. diff --git a/docs/R2-ARTIFACT-REGISTRY.md b/docs/R2-ARTIFACT-REGISTRY.md new file mode 100644 index 0000000..c4b33e7 --- /dev/null +++ b/docs/R2-ARTIFACT-REGISTRY.md @@ -0,0 +1,115 @@ +# R2 Artifact Registry — native binary delivery for cli apps + +> Status: IMPLEMENTED (RC). Lets the Pilot app store **host** publisher-supplied, +> platform-specific, versioned, signed binaries in Cloudflare R2 and install them +> — in a declared order, with optional install args — via the generated cli +> adapter. Builds on the cli-app support (proc.exec + CLI adapter) from +> app-store#24 / app-template#31. **Supersedes** the "deliver by reference, never +> store the bytes" stance in `NATIVE-APPS.md`: we now store the bytes in R2. + +## Why + +`NATIVE-APPS.md` / `CLI-ADAPTER.md` shipped the *translation* half (a cli adapter +that execs a local command under `proc.exec`) but assumed the binary was already +on the host. Delivering it is the point of a store. This adds the *delivery* +half: the publisher uploads per-OS/arch binaries to a Pilot-run R2 registry, and +the adapter fetches + verifies + stages them at install. + +## The flow + +``` +PUBLISH FORM (Artifacts step) BUILD (publish-api) INSTALL (host) +upload binaries → R2 ─────▶ generate adapter + install.json ─▶ adapter staging (stage.go) +set install order + args fold into the bundle tarball fetch R2 → verify sha → stage + (sha-pinned in the catalogue) → run install args (order) + → exec the staged command +``` + +1. **Artifacts step** (publish form, website). The publisher uploads each + platform binary (or `.tar.gz`) to the R2 registry and sets, per artifact: + target `os`/`arch`, `exec_path`, install `order`, optional install `args`, and + `unpack` for archives. The form submits a JSON `Submission` carrying + `artifacts[]` (R2 url + sha256 + order + args — never the bytes). +2. **Submit** (`POST /api/submit`). `Submission.Validate()` checks the artifacts + (cli-only, known os/arch, https URL, 64-hex sha, relative `exec_path` under + `$APP`, per-platform-unique order). The sha is the integrity anchor. +3. **Build** (`/admin/build` → `BuildBundle`). In addition to the signed adapter, + the build emits **`install.json`** (the staging spec, from `cfg.Assets`) into + every platform tarball, and the manifest gains the delivery grants + (`proc.exec`, `fs.write $APP`, `net.dial `). The whole tarball is + sha-pinned in the catalogue, so `install.json` (and the expected asset shas) + can't be altered undetected. +4. **Install + call** (host). The generated cli adapter calls `StageAssets($APP)` + on first spawn (`internal/backend/stage.go`): read `install.json` → select the + asset(s) for `runtime.GOOS/GOARCH` → in ascending `order`, fetch from R2, + verify sha256, stage under `$APP` (single file, or `tar.gz` extracted via the + host `tar`), run any install `args` — then exec the staged `exec_path` per call. + +## R2 layout + +``` +s3://pilot-artifacts-{dev,prod}///-/ + io.pilot.smolvm/1.2.0/darwin-arm64/smolvm-1.2.0-darwin-arm64.tar.gz +``` +Write-once (a new app version = a new prefix). Buckets `pilot-artifacts-dev` and +`pilot-artifacts-prod` exist on the Pilot R2 account. **Public read** is served by +an r2.dev managed URL (dev: `https://pub-2328865fa11041b8a5efba00b940ec14.r2.dev`); +production should attach a custom domain (e.g. `artifacts.pilotprotocol.network`). +Generated install scripts reference the public base URL. + +## Schema + +`pilot.app.yaml` / `scaffold.Config` gains `assets[]` (see `example.pilot.app.yaml`); +the publish `Submission` gains `artifacts[]`. Both map to: + +| field | meaning | +|---|---| +| `role` | `binary` (chmod +x, default) \| `data` | +| `os` / `arch` | host match: `linux`/`darwin`, `amd64`/`arm64` | +| `url` | https R2 public URL of the artifact | +| `sha256` | 64-hex of the uploaded object; verified after download | +| `unpack` | `""` (single file) \| `tar.gz` (extract under `$APP`) | +| `exec_path` | dest under `$APP`, or the path inside the extracted tree | +| `order` | ascending install sequence (unique per platform) | +| `args` | optional post-stage invocation (e.g. a one-time setup) | + +## Integrity & security + +- **sha256** on every asset, checked after download; mismatch refuses to install. +- The **bundle tarball is sha-pinned** in the catalogue, so `install.json` is + tamper-evident transitively (no app-store manifest-schema change needed). +- **`proc.exec`** (app-store#24) authorizes the exec; **`fs.write $APP`** and + **`net.dial `** authorize staging. cli apps ship `protection: guarded`. +- Archive extraction uses the host `tar` (handles GNU/sparse artifacts Go's + `archive/tar` rejects) **after** a name-scan that rejects absolute paths and + `..` traversal (zip-slip defence). + +## E2E + +`scripts/e2e-smolvm.sh` + `internal/scaffold/r2_e2e_test.go` (`TestR2AssetDeliveryE2E`): +download smolvm (`smol-machines/smolvm`, a real multi-file microVM CLI: wrapper + +binary + libs + sparse disk images) for the host, upload it to `pilot-artifacts-dev`, +then build the generated adapter and let it fetch+verify+extract from R2 and exec +it — asserting `smolvm --version → "smolvm 1.2.0"`. The Go test is env-gated +(`PILOT_E2E_ASSET_URL/_SHA256/_EXECPATH/...`) so CI needs no live bucket; the +script wires it up against the real registry. + +## Build / repo coordination + +| Repo | Role | +|---|---| +| **app-template** (this) | schema, build-time `install.json` gen, staging runtime, manifest grants, e2e — the bulk | +| **app-store** #24 | `proc.exec` capability (reused as the exec permission) | +| **pilotprotocol** #317 | daemon dep bump so it accepts `proc.exec` | +| website #44 | publish wizard cli path; **TODO**: add the Artifacts step (uploads + order/args) as a thin client over a presign endpoint | + +## Follow-ups + +- **Presign upload endpoint** (`POST /api/artifact/presign`) + a signing-proxy + `GET /artifact/...` so the form uploads straight to R2 and installs can run off + a stable proxy URL where a public domain isn't configured. (The e2e uploads via + the S3 API directly.) +- **Server-side re-verify** of each artifact sha against the stored R2 object at + submit time. +- Production **custom domain** for `pilot-artifacts-prod` (needs a Cloudflare API + token with R2 + DNS scope; the S3 keys can't enable public access). diff --git a/docs/R2-DEPLOYMENT.md b/docs/R2-DEPLOYMENT.md new file mode 100644 index 0000000..e91363a --- /dev/null +++ b/docs/R2-DEPLOYMENT.md @@ -0,0 +1,78 @@ +# R2 Artifact Registry — Deployment Plan + +Ship native-CLI app delivery: a publisher uploads platform binaries through the +publish form, they land in a Pilot-run Cloudflare R2 registry, and a Pilot user +runs `pilotctl appstore install ` to fetch + verify + stage + run them. This +plan lists every change, in dependency order, plus the infra and the validated +prerequisites surfaced by the end-to-end run. + +## 1. Cloudflare R2 + +| Item | Value / action | +|---|---| +| Account | `` (S3 endpoint `https://.r2.cloudflarestorage.com`) | +| Buckets | `pilot-artifacts-dev`, `pilot-artifacts-prod` (created) | +| Object layout | `//-/` (binaries); `bundles///` (signed bundles) | +| Public read (dev) | r2.dev managed URL `https://pub-2328865fa11041b8a5efba00b940ec14.r2.dev` | +| Public read (prod) | r2.dev managed URL `https://pub-f09f9a4ea848491198d48e329ba030e3.r2.dev` | +| **CORS (required)** | `PUT,GET,HEAD` from the website origins — **without this the browser upload fails** (preflight blocked). Applied to both buckets via `put-bucket-cors`. | +| Production hardening | swap the r2.dev URL for a custom domain (`artifacts.pilotprotocol.network`) once a Cloudflare API token with R2+DNS scope is available; update `R2_PUBLIC_BASE`. | + +CORS config applied (keep the website origins current): +```json +{"CORSRules":[{"AllowedOrigins":["https://pilotprotocol.network","https://www.pilotprotocol.network"], + "AllowedMethods":["GET","PUT","HEAD"],"AllowedHeaders":["*"],"ExposeHeaders":["ETag"],"MaxAgeSeconds":3600}]} +``` + +## 2. publish-server env (the VM) + +``` +R2_ENDPOINT=https://.r2.cloudflarestorage.com +R2_BUCKET=pilot-artifacts-prod +R2_PUBLIC_BASE=https://pub-f09f9a4ea848491198d48e329ba030e3.r2.dev # or the custom domain +AWS_ACCESS_KEY_ID / AWS_SECRET_ACCESS_KEY # R2 S3 keys (scoped to the artifacts buckets) +PUBLISH_SELF_URL=https://publish-api.pilotprotocol.network # for the signing-proxy fallback +``` +Adds: `POST /api/artifact/presign` (browser → direct R2 upload) and the signing-proxy +`GET /artifact/...`. Unset = uploads return 503 (graceful). + +## 3. The PRs (dependency order) + +| # | Repo | PR | What | Status | +|---|---|---|---|---| +| 1 | app-store | **#24** | `proc.exec` capability + hardened target | open (existing) | +| 2 | app-template | **#31** | CLI adapter + publish-api + proc.exec scaffolding | open (existing) | +| 3 | app-template | **NEW: R2 artifact registry** | `assets`/`artifacts` schema (os/arch/url/sha/unpack/exec_path/**deps**/order/args), `install.json` + `install.sh` generation, `stage.go` staging runtime, manifest delivery grants, `/api/artifact/presign` + signing proxy + R2 SigV4 client, full test suite + smolvm e2e | branch `feat/r2-artifacts-on-cli` | +| 4 | website | **#44** | CLI publish-form path | open (existing) | +| 5 | website | **NEW: Artifacts step** | wizard step: per-platform upload via presign, install order, dependencies, install args; `artifacts[]` in the submit payload | branch `cli-publish-form` + Artifacts commit | +| 6 | pilotprotocol | **#317** | daemon dep bump to the proc.exec app-store version | open (existing) | +| 7 | pilotprotocol | **NEW: install + trust wiring** | `pilotctl install` carries `install.json`/`install.sh` into `$APP`; daemon populates `manifest.TrustedPublishers` (from the publisher registry) and honors `PILOT_APPSTORE_ROOT` | patch ready (this run) | + +### Why #7 is a hard blocker (surfaced by the e2e) +- `pilotctl appstore install` only staged `manifest.json` + the binary, **dropping `install.json`** — so the adapter had nothing to stage from. Fixed: carry the install spec files. +- app-store **#23 enforces the trust anchor** (non-sideloaded installs), but **nothing populated `manifest.TrustedPublishers`** — so the proc.exec daemon skips **every** catalogue app (cosift/sixtyfour included), not just new ones. Fixed: wire `TrustedPublishers` from the reviewed publisher registry. This MUST ship with #6 or the app store breaks on upgrade. + +## 4. Autonomous publish flow (unchanged shape, now with artifacts) + +``` +website form (Artifacts step → presign → R2 upload) + → POST /api/submit {Submission + artifacts[]} (CORS-locked) + → admin Build → BuildBundle: scaffold adapter + sign manifest + emit install.json/install.sh + → self-verify through the catalogue gate (per platform) + → admin Approve → release bundles + open the one-line catalogue.json PR (signed) + → catalogue merge → pilotctl appstore install +``` +- **Correct catalogue entry**: v2 with `bundle_url`+`bundle_sha256` (primary) and a per-platform + `bundles` map; signed with the embedded catalogue key (pilotctl verifies the `.sig` on load). +- **Correct manifest**: `proc.exec → `, `fs.write $APP`, `net.dial `, + `fs.read $APP/install.json`, `protection: guarded`; binary sha pinned + ed25519-signed. +- **Install script**: `install.json` (machine-readable, consumed by `stage.go`) + `install.sh` + (standalone POSIX, dependency-ordered) ship in every bundle, both tested. + +## 5. Pre-deploy validation (this run) + +- ✅ R2 dev+prod buckets, public URLs, CORS — browser upload of the real smolvm tarball succeeded (sha computed client-side, presigned PUT, served back). +- ✅ Full website form (Astro, PR #44 + Artifacts step) drove submit → API → build end-to-end. +- ✅ `pilotctl appstore catalogue/install/list/call` against a proc.exec daemon: install fetched the right platform bundle from R2, sha-verified, staged install.json, daemon spawned the adapter (full grants), the adapter fetched + verified + extracted smolvm from R2 and exec'd it. +- ✅ `install.sh` generation + dependency ordering: unit + golden tested. +- See `R2-PREDEPLOY-REPORT.md` for the smol-machines vanilla-vs-pilot task comparison + preflight. diff --git a/docs/R2-PREDEPLOY-REPORT.md b/docs/R2-PREDEPLOY-REPORT.md new file mode 100644 index 0000000..26712b6 --- /dev/null +++ b/docs/R2-PREDEPLOY-REPORT.md @@ -0,0 +1,79 @@ +# R2 Artifact Registry — Pre-Deployment Report + +Validated end-to-end on macOS/darwin-arm64, 2026-06-22, against live Cloudflare R2 +and a locally-built proc.exec-aware pilot daemon. Test app: **smol machines** +(`smol-machines/smolvm` v1.2.0) — a real microVM CLI shipped as a multi-file +tar.gz (wrapper + binary + libs + sparse disk images). + +## Verdict + +**GO, conditional on the three daemon/pilotctl prerequisites below shipping together.** +Every layer of the publish→deliver→install→run path works; the conditions are +deployment wiring, not design gaps. + +## The task, vanilla vs Pilot + +Task: *run a command inside an ephemeral, isolated Alpine Linux microVM and capture +its output* (proves a real separate kernel, not a container on the host). + +| | Vanilla | Pilot app store | +|---|---|---| +| Invocation | `smolvm machine run --net --image alpine -- sh -c "echo …; uname -a; cat /etc/alpine-release"` | `pilotctl appstore call io.pilot.smolvm smolvm.exec '{"args":["machine","run","--net","--image","alpine","--","sh","-c","…"]}'` | +| Exit | 0 | 0 | +| stdout | `hello from microVM` / `Linux 6.12.87 … aarch64` / `3.24.1` | `hello from microVM via pilot` / `Linux 6.12.87 … aarch64` / `3.24.1` | +| Binary | must be pre-installed | delivered from R2, sha-verified, staged, exec'd — host had nothing | +| Isolation | hypervisor microVM | identical microVM, plus `proc.exec`-guarded, scrubbed-env adapter | + +Identical results. The Pilot path adds delivery + integrity + capability sandboxing +at zero behavioral cost. + +## Preflight checks (all ✅) + +| # | Check | Result | +|---|---|---| +| 1 | R2 dev+prod buckets exist | `pilot-artifacts-dev`, `pilot-artifacts-prod` | +| 2 | Public read URLs | dev `pub-2328865f…`, prod `pub-f09f9a4e…` → 200, byte-exact | +| 3 | Bucket CORS for browser upload | `PUT/GET/HEAD` from site origins — applied (required) | +| 4 | Presign endpoint | `POST /api/artifact/presign` → presigned PUT + public URL; live round-trip OK | +| 5 | Browser upload via the real form | 30 MB smolvm tarball uploaded; sha computed client-side; submit carried `artifacts[]` | +| 6 | Build emits install spec | `install.json` + `install.sh` in every platform bundle | +| 7 | Dependency ordering | topological (deps override raw order); unit + golden tested | +| 8 | Standalone `install.sh` | fetch → sha-verify → extract → staged binary runs (`smolvm 1.2.0`) | +| 9 | Manifest correctness | `proc.exec→smolvm`, `fs.write $APP`, `net.dial `, `fs.read $APP/install.json`, `guarded`, sha-pinned + signed | +| 10 | Catalogue entry | v2, per-platform `bundles` map, signed; `pilotctl appstore catalogue` lists it | +| 11 | `pilotctl appstore install` | fetched the correct os/arch bundle from R2, sha256 OK, extracted (with install.json) | +| 12 | Daemon spawn | proc.exec accepted, trust anchor satisfied, `sideloaded=false` (full grants) | +| 13 | Adapter staging from R2 | fetched + verified + extracted smolvm tree under `$APP` on first spawn | +| 14 | `pilotctl appstore call` | `smolvm.version` → `smolvm 1.2.0`; `smolvm.exec` booted a real microVM | +| 15 | Integrity negative path | sha mismatch refuses to stage (covered in stage.go + tests) | + +## Prerequisites that MUST ship (surfaced by the run) + +1. **Daemon upgraded to proc.exec** (`pilotprotocol#317` → app-store#24). The host's + live daemon is **v1.12.2 and rejects `proc.exec`** — native CLI apps cannot install + until it ships. *(Validated against a locally-built proc.exec daemon.)* +2. **pilotctl install must carry `install.json`/`install.sh`** into `$APP`. Stock + install staged only `manifest.json` + the binary, so the adapter had nothing to + stage from. *(Patched in the proposed pilotprotocol PR; verified.)* +3. **Daemon must populate `manifest.TrustedPublishers`.** app-store#23 enforces the + trust anchor for catalogue installs, but nothing wired the list — so the proc.exec + daemon **skips every catalogue app** (cosift/sixtyfour included), not just new ones. + Wire it from the reviewed publisher registry. *(Patched + verified; this is the + highest-risk item — shipping #1 without it bricks the existing app store.)* + +## Infra to set (non-code) + +- R2 CORS on both buckets (applied); production custom domain when a CF API token exists. +- publish-server env: `R2_ENDPOINT`, `R2_BUCKET`, `R2_PUBLIC_BASE`, R2 S3 keys, `PUBLISH_SELF_URL`. +- Daemon: `PILOT_TRUSTED_PUBLISHERS` (or registry-backed) = the platform publisher key. + +## Notes / smaller findings + +- smolvm ships **sparse** disk images; Go's `archive/tar` rejects them, so `stage.go` + and `install.sh` extract via the host `tar` (with a path-safety name-scan first). +- The catalogue is signature-gated; pilotctl verifies the `.sig` against an embedded + key. Test used a rebuilt pilotctl with an overridden catalogue key (the documented + `-ldflags` path) — production signs with the real release key (already wired via + app-template#28 auto-signing). +- Daemon overlay-reconnect churn degraded re-spawn after repeated reinstalls in the + test; a clean restart recovered immediately. Worth a soak test, not a blocker. diff --git a/docs/USING-SMOLMACHINES-VIA-PILOT.md b/docs/USING-SMOLMACHINES-VIA-PILOT.md new file mode 100644 index 0000000..279b7f9 --- /dev/null +++ b/docs/USING-SMOLMACHINES-VIA-PILOT.md @@ -0,0 +1,68 @@ +# Using Smol Machines via Pilot — utility guide + +How an agent drives `io.pilot.smolmachines` (the smolvm microVM engine) through the +Pilot app store. The app is a **passthrough**: one method, `smolmachines.exec`, +forwards a verbatim `smolvm` argv into a hardware-isolated microVM and returns the +result. No methods are enumerated per-subcommand — this guide + `smolmachines.help` +are how you discover the surface. + +## Install + discover + +```bash +pilotctl appstore install io.pilot.smolmachines # fetch + verify + stage the binary from the registry +pilotctl appstore view io.pilot.smolmachines # the long store page (what it does, how to use) +pilotctl appstore call io.pilot.smolmachines smolmachines.help '{}' # live method surface + params + latency +``` + +## The one convention: `smolmachines.exec` takes a verbatim argv + +```bash +pilotctl appstore call io.pilot.smolmachines smolmachines.exec '{"args":[ ...smolvm argv... ]}' +``` +The `args` array is exactly what you would type after `smolvm`. The reply is JSON +`{"stdout","stderr","exit"}` (a non-zero exit is a normal result, not an error). +There is no allowlist — **every** smolvm subcommand and flag is reachable. + +## Command surface (what to put in `args`) + +| Goal | `args` | +|---|---| +| Run a one-off command in a throwaway VM | `["machine","run","--net","--image","alpine","--","sh","-c","echo hi"]` | +| Run a script in a language image | `["machine","run","--net","--image","python:3.12-alpine","--","python3","-c","print(2**100)"]` | +| List machines | `["machine","ls","--json"]` | +| Create a persistent VM | `["machine","create","--net","--name","dev","--image","ubuntu"]` | +| Start / stop / delete it | `["machine","start","--name","dev"]` · `["machine","stop","--name","dev"]` · `["machine","delete","--name","dev","-f"]` | +| Run in a persistent VM (changes persist) | `["machine","exec","--name","dev","--","apt-get","install","-y","python3"]` | +| Copy a file in, then out | `["machine","cp","./x.py","dev:/workspace/x.py"]` then `["machine","cp","dev:/workspace/out.json","./out.json"]` | +| Pack a portable artifact | `["pack","create","--image","python:3.12-alpine","-o","./py"]` | +| Status of a VM | `["machine","status","--name","dev"]` | + +Full command tree: `machine run|exec|create|start|stop|delete|shell|status|ls|cp|update|monitor|prune`, +`pack create|run`, `serve`, `config`. To read smolvm's own reference at runtime: +`{"args":["--help"]}` or `{"args":["machine","run","--help"]}`. + +## Conventions & suggestions + +- **Networking is OFF by default.** Add `--net` when the workload needs the network. Scope it with `--allow-host H` / `--allow-cidr C`. +- **`run` is ephemeral, `exec` is persistent.** `machine run` discards everything on exit (best for untrusted/one-off). `machine create` + `machine exec` keep filesystem changes across calls — use a stable `--name` and the VM survives between Pilot calls. +- **Use `/workspace`** for data you want to keep or copy out; it persists across `exec` and `stop`/`start`. +- **Pass inputs/outputs with `machine cp`** (host↔VM), or mount a host dir with `-v HOST:GUEST`. +- **Secrets:** `--secret-env GUEST=HOSTVAR` references a *host* env var. The Pilot adapter scrubs the child env to a minimal baseline, so a host var is only visible if the publisher listed it in `env_passthrough`. Prefer `machine cp` of a file, or `--ssh-agent` for git/ssh, when a secret must reach the VM. +- **Pin images by digest** (`name@sha256:…`) for reproducibility. +- **Latency:** VM-booting calls are the `slow` class (seconds: image pull + boot). `machine ls`/`status`/`--help` are sub-second. + +## Not supported over Pilot IPC + +The adapter is one-shot request/response (no PTY, bounded output). These don't work and should be avoided: +- **Interactive sessions** — `-it`, `machine shell`, interactive `/bin/sh` (no live TTY/stdin stream). +- **Long-running servers** — `serve start` (blocks until the call timeout). +- **Huge stdin/stdout** — `--image -` (multi-GB docker-save over a JSON field); output is capped at 4 MiB (truncation flagged). Use `--image ` or `machine cp` instead. + +## End-to-end example + +```bash +pilotctl appstore install io.pilot.smolmachines +pilotctl appstore call io.pilot.smolmachines smolmachines.exec \ + '{"args":["machine","run","--net","--image","python:3.12-alpine","--","python3","-c","import platform;print(platform.platform())"]}' +# → {"stdout":"Linux-6.12...-aarch64-with","stderr":"...pull progress...","exit":0} +``` diff --git a/docs/samples/ab-report-smolvm.html b/docs/samples/ab-report-smolvm.html new file mode 100644 index 0000000..88539fd --- /dev/null +++ b/docs/samples/ab-report-smolvm.html @@ -0,0 +1,704 @@ + +A/B report — io.pilot.smolvm + +

Vanilla vs Pilot — A/B report

+

App io.pilot.smolvm · delivered from the Pilot R2 artifact registry · generated by scripts/ab_report.py

+ +

Summary

+
CommandVanilla (ms)Pilot (ms)Δ overheadMatch
Version60833+773
List machines42456+414
Run command in an ephemeral Alpine microVM58545492-362
Compute in a Python microVM93749501+127
+ +

Adapter-generated help — smolvm.help (local, no backend), 471 ms

+
+
Pilot · smolvm.help (generated by the adapter)
{
+  "app": "io.pilot.smolvm",
+  "version": "1.2.0",
+  "description": "Run portable, lightweight microVMs (smol machines) from the app store.",
+  "duration_classes": {
+    "fast": "<~1s \u2014 status or cheap call",
+    "med": "~1-5s \u2014 moderate work",
+    "slow": "~5-30s \u2014 heavy / multi-step"
+  },
+  "methods": [
+    {
+      "method": "smolvm.version",
+      "kind": "utility",
+      "summary": "Print the smolvm version.",
+      "duration": "fast"
+    },
+    {
+      "method": "smolvm.exec",
+      "kind": "utility",
+      "summary": "Run any smolvm subcommand; payload {\"args\":[...]}.",
+      "params": {
+        "args": "array \u2014 verbatim argv forwarded to smolvm"
+      },
+      "duration": "slow"
+    },
+    {
+      "method": "smolvm.help",
+      "kind": "meta",
+      "summary": "This document \u2014 every method with params, kind, and duration class.",
+      "duration": "fast",
+      "typical_roundtrip": "instant (local, no backend call)"
+    }
+  ]
+}
+
Vanilla · smolvm --help (37 ms)
# smolvm — Agent Reference
+
+A tool to build and run portable, self-contained virtual machines locally. <200ms boot time. No daemon, no Docker.
+
+## Quick Reference
+
+```bash
+# Ephemeral (cleaned up after exit)
+smolvm machine run --net --image alpine -- echo hello
+smolvm machine run --net -it --image alpine -- /bin/sh   # interactive shell
+smolvm machine run --net --image python:3.12-alpine -- python3 script.py
+
+# Persistent (survives across exec sessions and stop/start)
+smolvm machine create --net --name myvm
+smolvm machine start --name myvm
+smolvm machine exec --name myvm -- apk add python3   # installs persist
+smolvm machine exec --name myvm -- which python3      # still there
+smolvm machine shell --name myvm               # interactive shell (auto-starts if stopped)
+smolvm machine stop --name myvm
+smolvm machine delete --name myvm
+
+# Image-based persistent (filesystem changes persist across exec sessions)
+smolvm machine create --net --image ubuntu --name myvm
+smolvm machine start --name myvm
+smolvm machine exec --name myvm -- apt-get update
+smolvm machine exec --name myvm -- apt-get install -y python3
+smolvm machine exec --name myvm -- which python3      # still there after exit+re-exec
+
+# SSH agent forwarding (git/ssh without exposing keys)
+smolvm machine run --ssh-agent --net --image alpine -- ssh-add -l
+smolvm machine create --name myvm --ssh-agent --net
+
+# Inject secrets into workload env (referenced from host env var / file)
+smolvm machine run --secret-env OPENAI_API_KEY=OPENAI_API_KEY -- ./app
+smolvm machine run -s Smolfile -- ./app   # Smolfile [secrets] resolves at launch
+
+# Pack into portable executable
+smolvm pack create --image python:3.12-alpine -o ./my-python
+./my-python run -- python3 -c "print('hello')"
+
+# Create machine from packed artifact (fast start, no pull)
+smolvm machine create --name my-vm --from ./my-python.smolmachine
+smolvm machine start --name my-vm
+smolvm machine exec --name my-vm -- pip install requests
+
+# Use local container images (CI, air-gapped, fast iteration)
+docker save myapp:latest -o myapp.tar
+smolvm machine run --image ./myapp.tar -- ./app           # from a docker/podman save archive
+docker save myapp:latest | smolvm machine run --image - -- ./app   # from stdin
+smolvm machine run --image ./rootfs/ -- ./app             # from an unpacked rootfs dir
+smolvm machine create --name myvm --image ./myapp.tar     # persistent, from a local archive
+```
+
+## When to Use What
+
+| Goal | Command |
+|------|---------|
+| Run a one-off command in isolation | `smolvm machine run --net --image IMAGE -- CMD` |
+| Interactive shell (ephemeral) | `smolvm machine run --net -it --image IMAGE -- /bin/sh` |
+| Interactive shell (persistent) | `smolvm machine shell --name NAME` |
+| Persistent dev environment | `machine create` → `machine start` → `machine exec` |
+| Ship software as a binary | `smolvm pack create --image IMAGE -o OUTPUT` |
+| Fast persistent machine from packed artifact | `machine create --name NAME --from FILE.smolmachine` |
+| Use local container images (CI / air-gapped / fast iteration) | `--image ./archive.tar`, `--image -` (stdin), or `--image ./rootfs/` |
+| Use git/ssh with private keys safely | Add `--ssh-agent` to run or create |
+| Inject API keys / tokens without putting them on the command line | `--secret-env`/`--secret-file` flags or Smolfile `[secrets]` |
+| Minimal VM without image | `smolvm machine run -s Smolfile` (bare VM) |
+| Change mounts/ports/resources on existing VM | `machine update --name NAME -v ./src:/app -p 8080:8080` |
+| Declarative VM config | Create a Smolfile, use `--smolfile`/`-s` flag |
+
+### Persistence Model
+
+- **`machine run`** — ephemeral. All changes are discarded when the command exits.
+- **`machine exec`** — persistent. Filesystem changes (package installs, config edits) persist across exec sessions for the same machine, whether bare or image-based. Changes are stored in an overlay on the machine's storage disk.
+- **`machine stop` + `start`** — changes persist across restarts. The persistent overlay is remounted preserving previous changes.
+- **`pack run`** — ephemeral. Each run starts fresh from the packed image.
+- **`pack start` + `exec`** — daemon mode. `/workspace` persists across exec sessions and stop/start. Container overlay resets per exec (package installs don't persist — use `/workspace` for durable data).
+- **`machine create --from .smolmachine`** — creates a persistent named machine from a packed artifact. Boots from pre-extracted layers (~250ms, no image pull). Full `machine exec` persistence — package installs, file writes all survive across exec and stop/start.
+
+## CLI Structure
+
+All commands use named flags (no positional args except `machine create --name NAME` and `machine delete --name NAME`).
+
+```
+smolvm machine run --image IMAGE [-- COMMAND]     # ephemeral
+smolvm machine exec --name NAME [-- COMMAND]      # run in existing VM
+smolvm machine shell [--name NAME]                # interactive shell (auto-starts)
+smolvm machine create --name NAME [OPTIONS]              # create persistent
+smolvm machine create --name NAME --from FILE.smolmachine  # from packed artifact
+smolvm machine start [--name NAME]                # start (default: "default")
+smolvm machine stop [--name NAME]                 # stop
+smolvm machine delete --name NAME [-f]                   # delete
+smolvm machine status [--name NAME]               # check state
+smolvm machine ls [--json]                        # list all
+smolvm machine update --name NAME [OPTIONS]              # modify stopped machine settings
+smolvm machine cp SRC DST                         # copy files (host↔VM)
+smolvm machine exec --stream --name NAME -- CMD   # streaming output
+smolvm machine monitor [--name NAME]              # foreground health + restart
+
+smolvm pack create --image IMAGE -o PATH          # package
+smolvm pack create --from-vm NAME -o PATH         # pack from VM snapshot
+smolvm pack run [--sidecar PATH] [-- CMD]         # run .smolmachine
+
+smolvm serve start [--listen ADDR:PORT|PATH]      # HTTP API
+smolvm config registries edit                     # registry auth
+
+# Secrets are references to host env vars / files, resolved at launch — no
+# built-in store. Attach them on the command line or in a Smolfile [secrets].
+smolvm machine run    --secret-env GUEST_VAR=HOST_VAR     # from host env var
+smolvm machine run    --secret-file GUEST_VAR=/abs/path   # from host file
+smolvm machine create --name NAME --secret-env GUEST_VAR=HOST_VAR  # persists the ref
+smolvm machine exec --name NAME --secret-env GUEST_VAR=HOST_VAR -- cmd
+```
+
+## Artifact References
+
+Artifact references follow OCI conventions and support both tags and digests:
+
+```
+python-dev                                        # bare name (default registry + latest)
+python-dev:v1.0                                   # name + tag
+binsquare/custom:v1                               # namespace + name + tag
+smolmachines.com/python-dev:latest                # registry + name + tag
+smolmachines.com/binsquare/custom:v1              # registry + namespace + name + tag
+python-dev@sha256:abcdef0123...                   # digest reference (immutable)
+```
+
+Default registry: `registry.smolmachines.com`. Digest references require `sha256:` followed by exactly 64 hex characters.
+
+### Local container images
+
+`--image` also accepts a local source — useful for CI, air-gapped hosts, and fast
+local iteration. smolvm stays a microVM runtime and delegates all image work
+(flatten, whiteouts, config) to container tooling (`crane`/`docker`/`podman`); the
+archive is flattened with `crane export`.
+
+```
+./image.tar  ./image.tar.gz  ./image.tgz   # a `docker save` / `podman save` archive (gzip ok)
+-                                           # the same archive streamed on stdin
+./rootfs/                                   # an already-unpacked root filesystem directory
+```
+
+A source is treated as local when it starts with `/`, `./`, `../`, is `-`, or ends in
+`.tar`/`.tar.gz`/`.tgz`; everything else is a registry reference (so bare `alpine`
+still pulls). Archives are cached content-addressed by hash and re-resolved on
+`machine start`. `--image -` cannot be combined with `-i`/`-t` (both read stdin).
+
+smolvm boots images, it does not build them: a Dockerfile passed to `--image` is
+rejected with a hint to build first (`docker build … && docker save … | … --image -`).
+
+## Key Flags
+
+| Flag | Short | Used on | Description |
+|------|-------|---------|-------------|
+| `--image` | `-I` | run, create, pack create | OCI image, or a local source: a `docker save` archive (`./img.tar`, or `-` for stdin) or unpacked rootfs dir (`./rootfs/`) |
+| `--name` | `-n` | run, start, stop, status, exec, update | Machine name (default: "default") |
+| `--net` | | run, create | Enable outbound networking (off by default) |
+| `--gpu` | | run, create | Enable GPU acceleration (Vulkan via virtio-gpu) |
+| `--gpu-vram` | | run, create | GPU shared-memory region size in MiB (default: 4096). Ignored without `--gpu`. |
+| `--volume` | `-v` | run, create, update | Mount host dir: `HOST:GUEST[:ro]` |
+| `--port` | `-p` | run, create, update | Port mapping: `HOST:GUEST` |
+| `--smolfile` | `-s` | run, create, pack create | Load config from Smolfile |
+| `--interactive` | `-i` | run, exec | Keep stdin open |
+| `--tty` | `-t` | run, exec | Allocate pseudo-TTY |
+| `--allow-cidr` | | run, create | CIDR egress filter (implies --net) |
+| `--allow-host` | | run, create | Hostname egress filter, resolved at VM start (implies --net) |
+| `--ssh-agent` | | run, create | Forward host SSH agent (git/ssh without exposing keys) |
+
+## Smolfile Reference
+
+A Smolfile is a TOML file declaring a VM workload. Use with `--smolfile`/`-s`.
+
+```toml
+# Top-level: workload definition
+image = "python:3.12-alpine"          # OCI image (omit for bare Alpine)
+entrypoint = ["/app/run"]             # overrides image ENTRYPOINT
+cmd = ["serve"]                       # overrides image CMD
+env = ["PORT=8080", "DEBUG=1"]        # environment variables
+workdir = "/app"                      # working directory
+
+# Resources
+cpus = 2                              # vCPUs (default: 4)
+memory = 1024                         # MiB (default: 8192, elastic via balloon)
+net = true                            # outbound networking (default: false)
+gpu = true                            # GPU acceleration (default: false)
+gpu_vram = 4096                       # GPU VRAM MiB (default: 4096, ignored unless gpu=true)
+storage = 40                          # storage disk GiB (default: 20)
+overlay = 4                           # overlay disk GiB (default: 2)
+
+# Network policy — egress filtering by hostname and/or CIDR
+[network]
+allow_hosts = ["api.stripe.com"]      # resolved at VM start (implies net)
+allow_cidrs = ["10.0.0.0/8"]         # IP/CIDR ranges (implies net)
+
+# Dev profile (used by `machine run` and `machine create`)
+[dev]
+volumes = ["./src:/app"]              # host bind mounts
+ports = ["8080:8080"]                 # port forwarding
+init = ["pip install -r requirements.txt"]  # run on every VM start
+env = ["APP_MODE=dev"]                # dev-only env (extends top-level)
+workdir = "/app"                      # dev-only workdir
+
+# Artifact profile (used by `pack create`)
+[artifact]
+cpus = 4                              # override resources for distribution
+memory = 2048
+entrypoint = ["/app/run"]             # override entrypoint for packed binary
+oci_platform = "linux/amd64"          # target OCI platform
+
+# Health check (used by `machine monitor`)
+[health]
+exec = ["curl", "-f", "http://127.0.0.1:8080/health"]
+interval = "10s"
+timeout = "2s"
+retries = 3
+startup_grace = "20s"
+
+# Credential forwarding
+[auth]
+ssh_agent = true                      # forward host SSH agent into the VM
+
+# Secrets — references to host sources, resolved at workload launch
+[secrets]
+DATABASE_URL   = { from_env   = "PROD_DB_URL" }      # host env var (at launch)
+GCP_CREDS      = { from_file  = "/abs/creds.json" }  # host file (at launch)
+```
+
+### Merge Precedence
+
+CLI flags override Smolfile values:
+
+```
+image:      --image flag > Smolfile image > None (bare Alpine)
+entrypoint: Smolfile entrypoint > image metadata
+cmd:        trailing args (after --) > Smolfile cmd > image metadata
+env:        top-level env + [dev].env + CLI -e (all merged)
+volumes:    [dev].volumes + CLI -v (all merged)
+ports:      [dev].ports + CLI -p (all merged)
+init:       [dev].init + CLI --init (all merged)
+cpus/mem:   CLI flag > Smolfile > defaults (4 CPU, 8192 MiB)
+```
+
+## Networking
+
+- **Off by default** — VMs have no outbound access unless `--net` is specified
+- `--net` enables full outbound (TCP/UDP, DNS)
+- `--allow-host api.stripe.com` enables egress only to resolved IPs of that hostname (implies `--net`). Also enables DNS filtering — only allowed hostnames can be resolved.
+- `--allow-cidr 10.0.0.0/8` enables egress only to specified IP ranges (implies `--net`)
+- `--allow-host` and `--allow-cidr` can be combined and used multiple times
+- `--outbound-localhost-only` restricts to 127.0.0.0/8 and ::1 (implies `--net`)
+- `-p HOST:GUEST` forwards a host port to the VM (TCP)
+- Smolfile: use `[network] allow_hosts` and `[network] allow_cidrs`
+
+### Proxy Support
+
+Pass proxy settings into VMs with `-e` when behind a corporate proxy or VPN:
+
+```bash
+smolvm machine run --net \
+  -e https_proxy=http://proxy.corp:3128 \
+  -e http_proxy=http://proxy.corp:3128 \
+  -e no_proxy=localhost,127.0.0.1 \
+  --image alpine -- wget -q -O /dev/null https://example.com
+```
+
+Or declare them in a Smolfile:
+
+```toml
+net = true
+env = [
+  "https_proxy=http://proxy.corp:3128",
+  "http_proxy=http://proxy.corp:3128",
+  "no_proxy=localhost,127.0.0.1"
+]
+```
+
+Proxy vars are NOT forwarded automatically — each VM gets exactly the env you specify. The VM uses the host's DNS server (from `/etc/resolv.conf`) for name resolution.
+
+## SSH Agent Forwarding
+
+Forward the host's SSH agent into the VM so git, ssh, and scp work with your keys — without the private keys ever entering the VM.
+
+```bash
+# CLI flag
+smolvm machine run --ssh-agent --net --image alpine -- ssh-add -l
+smolvm machine create --name myvm --ssh-agent --net
+
+# Smolfile
+# [auth]
+# ssh_agent = true
+```
+
+Inside the VM, `SSH_AUTH_SOCK` is set automatically. Any tool that uses the SSH agent protocol (git, ssh, scp) works transparently:
+
+```bash
+smolvm machine exec --name myvm -- git clone git@github.com:org/private-repo.git
+smolvm machine exec --name myvm -- ssh deploy@server "systemctl restart app"
+```
+
+The host SSH agent signs challenges but never sends private keys across the boundary. Even with root inside the VM, keys cannot be extracted — this is enforced by the SSH agent protocol and the hypervisor isolation.
+
+Requires `SSH_AUTH_SOCK` to be set on the host. If missing, smolvm exits with an error and remediation instructions.
+
+## GPU Acceleration
+
+Enable the host GPU inside a VM with `--gpu`. Guest Vulkan talks to the host GPU via virtio-gpu/Venus; ANGLE uses it as the WebGL/OpenGL ES backend.
+
+**Host setup:**
+- macOS — bundled, no extra installs needed.
+- Linux — install virglrenderer from the system package manager before use:
+  - Alpine: `apk add virglrenderer mesa-vulkan-intel` (or `mesa-vulkan-ati` for AMD)
+  - Debian/Ubuntu: `apt install virglrenderer0 mesa-vulkan-drivers`
+
+```bash
+# One-shot GPU workload
+smolvm machine run --gpu --image alpine -- sh -c '
+  apk add --no-cache mesa-vulkan-virtio vulkan-tools
+  VK_ICD_FILENAMES=/usr/share/vulkan/icd.d/virtio_icd.x86_64.json \
+  vulkaninfo --summary 2>/dev/null | grep deviceName
+'
+# → deviceName = Virtio-GPU Venus (Intel(R) UHD Graphics ...)
+
+# Persistent GPU machine
+smolvm machine create --name browser --gpu --gpu-vram 2048
+smolvm machine start --name browser
+smolvm machine exec --name browser -- \
+  chromium --headless=new --no-sandbox --use-gl=angle --use-angle=vulkan \
+    --screenshot=/tmp/out.png --window-size=1280,800 https://example.com
+```
+
+The guest must set `VK_ICD_FILENAMES` so the Vulkan loader finds the virtio ICD. Put it in `env` in a Smolfile to avoid repeating it on every exec:
+
+```toml
+gpu = true
+gpu_vram = 2048
+env = ["VK_ICD_FILENAMES=/usr/share/vulkan/icd.d/virtio_icd.x86_64.json"]
+```
+
+For a complete working example see [`examples/headless-browser/browser.smolfile`](examples/headless-browser/browser.smolfile).
+## Secrets
+
+smolvm stores no secret material. A secret is a *reference* to a value that
+already lives on the host — a host environment variable or a host file — and is
+resolved into the workload's process environment at launch time. Bring your own
+secrets manager (Vault, 1Password, AWS, sops, your shell): render the value into
+an env var or file, then point a ref at it. Only the reference is ever
+persisted; the resolved value never lands in the VM record, the database, or a
+`.smolmachine` pack.
+
+Attach refs on the command line:
+
+```bash
+# From a host environment variable (GUEST_VAR=HOST_VAR)
+smolvm machine run    --secret-env OPENAI_API_KEY=OPENAI_API_KEY -- ./app
+smolvm machine create --name web --secret-env DATABASE_URL=PROD_DB_URL   # persists the ref
+smolvm machine exec --name web --secret-env TOKEN=CI_TOKEN -- ./deploy
+
+# From a host file (GUEST_VAR=/absolute/path)
+smolvm machine run --secret-file GCP_CREDS=/abs/creds.json -- ./app
+
+# Bridge any external manager through the env/file seam, e.g. 1Password:
+op run --env-file=secrets.env -- smolvm machine run -- ./app
+```
+
+Or reference them from a Smolfile. The left-hand key becomes the env var name in
+the guest workload:
+
+```toml
+[secrets]
+DATABASE_URL = { from_env  = "PROD_DB_URL" }    # host env var (at launch)
+GCP_CREDS    = { from_file = "/abs/creds.json" } # absolute host file (at launch)
+```
+
+Exactly one of `from_env`, `from_file` must be set per entry; `from_file` paths
+must be absolute. Resolved values are appended *after* top-level `env` and CLI
+`-e` flags. Resolution is late-bound, so rotating the underlying env var or file
+takes effect at the next launch with nothing to re-sync.
+
+**Threat model:** this is defense-in-depth, not zero-knowledge. The target
+process sees plaintext in its own environment, and root inside the guest can
+read any `/proc/*/environ`. Use SSH agent forwarding instead when a secret must
+never leave the host.
+
+**Where they're resolved:** `machine run`, `machine create` + `machine start`,
+and `machine exec` resolve refs against *this host* under a trusted-local scope.
+Untrusted surfaces — HTTP API request bodies and portable `.smolmachine` packs —
+are treated as untrusted callers and may carry **no** resolvable secret ref:
+`from_env` would expose the server's env and `from_file` would be an arbitrary
+host-file read, so both are rejected. Configure secrets locally instead.
+
+## File Copy
+
+Copy files between the host and a running machine using `machine:path` syntax:
+
+```bash
+# Upload a file to the VM
+smolvm machine cp ./script.py myvm:/workspace/script.py
+
+# Download a file from the VM
+smolvm machine cp myvm:/workspace/output.json ./output.json
+```
+
+**Image-based VMs (--image):** Files copied with `cp` are visible to
+`exec` at the same path, and vice versa. This works for any path —
+`/tmp`, `/home`, `/workspace`, etc. Under the hood, `cp` routes
+through the container's overlay filesystem so both commands see the
+same files.
+
+**`/workspace` shared directory:** Every machine has a `/workspace`
+directory — bare VMs, image-based VMs, and machines created from
+`.smolmachine` artifacts. It persists across `exec` sessions and
+across `stop`/`start` cycles. It's a good default location for
+scripts, data, and results. Passing `-v /host/dir:/workspace` replaces
+the default storage-disk workspace with your host directory for that
+run — the host mount takes priority and the storage workspace is skipped:
+
+```bash
+# Typical agent workflow: copy code in, execute, extract results
+smolvm machine create --name r-sandbox --image r-base:latest --net
+smolvm machine start --name r-sandbox
+
+smolvm machine cp analysis.R r-sandbox:/workspace/analysis.R
+smolvm machine exec --name r-sandbox -- Rscript /workspace/analysis.R
+smolvm machine cp r-sandbox:/workspace/results.csv ./results.csv
+
+smolvm machine stop --name r-sandbox
+```
+
+**Behavior and limits:**
+
+- Files up to 1 MiB transfer as a single message — no perceptible
+  overhead beyond the agent round-trip.
+- Larger files stream automatically: 1 MiB chunks for upload, 16 MiB
+  chunks for download. The split is asymmetric because the
+  host→guest direction has tighter socket-buffer headroom.
+- Per-transfer cap is **4 GiB** in either direction. Files at or
+  above this size are rejected up front (`total_size exceeds maximum`
+  on upload; `exceeding the byte cap` on download). For larger
+  blobs, mount a host directory with `--volume` instead of copying.
+- A throttled progress line prints to stderr while large transfers
+  run, including bytes-so-far, percentage (uploads), and rate.
+  Pipe captures (`> file`) only see the upload/download summary,
+  not the progress noise.
+- Atomic on the guest side: a partially-written file never appears
+  at the target path. If the transfer fails or the connection drops
+  mid-stream, the staging file is cleaned up and the original
+  destination (if any) is unaffected.
+
+Typical throughput on macOS (Apple Silicon): ~35-42 MB/s upload,
+~170 MB/s download.
+
+## Streaming Exec
+
+Stream command output in real-time instead of buffering:
+
+```bash
+# CLI — prints output as it arrives
+smolvm machine exec --stream --name myvm -- python3 train.py
+
+# API — Server-Sent Events
+POST /api/v1/machines/:name/exec/stream
+Content-Type: application/json
+{"command": ["python3", "train.py"]}
+
+# Response: text/event-stream
+# event: stdout
+# data: Epoch 1/10...
+# event: exit
+# data: {"exitCode":0}
+```
+## Bare VM Mode
+
+`machine run` works without `--image` when a Smolfile provides the workload config, or for direct Alpine shell access:
+
+```bash
+# Bare Alpine shell
+smolvm machine run -it
+
+# Smolfile with entrypoint/cmd (no container overhead)
+smolvm machine run -s Smolfile
+
+# Bare VM with init setup, detached
+smolvm machine run -d -s Smolfile
+```
+
+Bare VMs run commands directly in the Alpine rootfs — no OCI image pull needed. Use this when you need a minimal Linux environment.
+
+## Packed Binaries (.smolmachine)
+
+`smolvm pack create` produces two files:
+- `my-app` — stub binary with embedded VM runtime (platform-specific)
+- `my-app.smolmachine` — VM payload: rootfs, OCI layers, storage (cross-platform)
+
+The packed binary runs as a normal executable:
+```bash
+./my-app run -- python3 -c "print('hello')"  # ephemeral, cleaned up after exit
+./my-app start                               # persistent daemon mode
+./my-app exec -- pip install x               # exec into daemon
+./my-app stop                                # stop daemon
+```
+
+Alternatively, create a named machine from the `.smolmachine` for full lifecycle management:
+```bash
+smolvm machine create --name my-vm --from my-app.smolmachine
+smolvm machine start --name my-vm            # ~250ms boot, no image pull
+smolvm machine exec --name my-vm -- pip install x   # fully persistent
+smolvm machine stop --name my-vm
+smolvm machine ls                            # shows my-vm
+```
+
+The `.smolmachine` manifest includes registry-oriented metadata:
+- `host_platform` — host OS+arch this machine runs on (e.g., `darwin/arm64`), distinct from `platform` which is the guest
+- `created` — RFC 3339 timestamp of when the machine was packed
+- `smolvm_version` — version of smolvm that built it
+
+## HTTP API
+
+Start with `smolvm serve start --listen 127.0.0.1:8080` or `smolvm serve start --listen $XDG_RUNTIME_DIR/smolvm.sock`. Key endpoints:
+
+```
+POST   /api/v1/machines                    Create machine
+GET    /api/v1/machines                    List machines
+GET    /api/v1/machines/:name              Get machine
+POST   /api/v1/machines/:name/start        Start machine
+POST   /api/v1/machines/:name/stop         Stop machine
+DELETE /api/v1/machines/:name              Delete machine
+POST   /api/v1/machines/:name/exec         Execute command
+POST   /api/v1/machines/:name/exec/stream  Streaming exec (SSE)
+PUT    /api/v1/machines/:name/files/*path  Upload file
+GET    /api/v1/machines/:name/files/*path  Download file
+GET    /api/v1/machines/:name/logs         Stream logs (SSE)
+POST   /api/v1/machines/:name/images/pull  Pull OCI image
+```
+
+OpenAPI spec: `smolvm serve openapi`
+
+## Important Defaults
+
+- Machine name defaults to `"default"` when `--name` is omitted
+- Network is **off** by default (security-first)
+- CPUs: 4, Memory: 8192 MiB, Storage: 20 GiB, Overlay: 2 GiB
+- Packed binaries use the same defaults (CPUs: 4, Memory: 8192 MiB)
+- Memory and CPU are elastic via virtio balloon — the host only commits what the guest actually uses and reclaims the rest
+
+## Important Behaviors
+
+- **Observational commands don't stop running VMs.** `machine images`, `machine status`, `machine ls` and similar read-only commands leave a running VM in its current state. If the VM was already running before the command, it stays running after.
+- **`machine prune` works on a running VM.** Regular prune only removes unreferenced layers and is safe while containers are active. `prune --all` requires the VM to be stopped first since it deletes manifests for layers that may be in use.
+- **`machine exec` persists filesystem changes.** Package installs, config edits, and file writes inside `exec` survive across sessions. This works for both bare VMs and image-based VMs (created with `--image`).
+- **`machine update` modifies a stopped machine.** Add/remove mounts, ports, env vars, or change CPU/memory without recreating the VM. Changes take effect on next `machine start`. Requires the machine to be stopped.
+- **`machine run` is always ephemeral.** The VM is created, the command runs, and everything is cleaned up. No state carries over.
+- **`-v host:/workspace` replaces the default workspace.** Every image-based VM exposes `/workspace` backed by the VM's storage disk. Mounting a host directory at `/workspace` takes priority — the host share is used instead and the storage-disk workspace is not mounted. Any other target path (e.g. `/data`, `/app`) does not affect `/workspace`.
+
+
+Usage: smolvm-bin <COMMAND>
+
+Commands:
+  machine  Manage machines (create, start, stop, exec) [aliases: vm]
+  serve    Start the HTTP API server for programmatic control
+  pack     Package and run self-contained VM executables
+  config   Manage smolvm configuration (registries, defaults)
+  help     Print this message or the help of the given subcommand(s)
+
+Options:
+  -h, --help
+          Print help (see a summary with '-h')
+
+  -V, --version
+          Print version
+
+Agents: run `smolvm --help` for full documentation including CLI reference and Smolfile schema
+
+ +

Per-command detail

+ +
+

Version

+

enumerated method → `smolvm --version`

+
+
Vanilla CLI
smolvm --version
exit 060 ms
smolvm 1.2.0
+
Pilot app store
pilotctl appstore call io.pilot.smolvm smolvm.version '{}'
exit 0833 ms
smolvm 1.2.0
+
+
adapter overhead: +773 ms + (vanilla 60 ms · pilot 833 ms)
+
+
+

List machines

+

passthrough → `smolvm machine ls`

+
+
Vanilla CLI
smolvm machine ls
exit 042 ms
No machines found
+
Pilot app store
pilotctl appstore call io.pilot.smolvm smolvm.exec '{"args": ["machine", "ls"]}'
exit 0456 ms
No machines found
+
+
adapter overhead: +414 ms + (vanilla 42 ms · pilot 456 ms)
+
+
+

Run command in an ephemeral Alpine microVM

+

boots a real isolated VM (separate kernel)

+
+
Vanilla CLI
smolvm machine run --net --image alpine -- sh -c echo hello from microVM; uname -a; cat /etc/alpine-release
exit 05854 ms
hello from microVM
+Linux container 6.12.87 #1 SMP Fri May  8 14:25:15 CEST 2026 aarch64 Linux
+3.24.1
+── stderr ──
+Starting ephemeral machine (vm-c39e250d)...
+Pulling image alpine...
+Pulling image alpine... [====================] 100%
+Pulling image alpine... [====================] 100% — syncing...
+Pulling image alpine... done.
+
Pilot app store
pilotctl appstore call io.pilot.smolvm smolvm.exec '{"args": ["machine", "run", "--net", "--image", "alpine", "--", "sh", "-c", "echo hello from microVM; uname -a; cat /etc/alpine-release"]}'
exit 05492 ms
hello from microVM
+Linux container 6.12.87 #1 SMP Fri May  8 14:25:15 CEST 2026 aarch64 Linux
+3.24.1
+── stderr ──
+Starting ephemeral machine (vm-3b055ee1)...
+Pulling image alpine...
Pulling image alpine... [====================] 100%
Pulling image alpine... [====================] 100% — syncing...
Pulling image alpine... done.
+
+
adapter overhead: -362 ms + (vanilla 5854 ms · pilot 5492 ms)
+
+
+

Compute in a Python microVM

+

pulls python:3.12-alpine, runs Python in the VM

+
+
Vanilla CLI
smolvm machine run --net --image python:3.12-alpine -- python3 -c print('2**100 =', 2**100)
exit 09374 ms
2**100 = 1267650600228229401496703205376
+── stderr ──
+Starting ephemeral machine (vm-25a8264d)...
+Pulling image python:3.12-alpine...
+Pulling image python:3.12-alpine... [=====>              ] 25%
+Pulling image python:3.12-alpine... [==========>         ] 50%
+Pulling image python:3.12-alpine... [===============>    ] 75%
+Pulling image python:3.12-alpine... [====================] 100%
+Pulling image python:3.12-alpine... [====================] 100% — syncing...
+Pulling image python:3.12-alpine... done.
+
Pilot app store
pilotctl appstore call io.pilot.smolvm smolvm.exec '{"args": ["machine", "run", "--net", "--image", "python:3.12-alpine", "--", "python3", "-c", "print('2**100 =', 2**100)"]}'
exit 09501 ms
2**100 = 1267650600228229401496703205376
+── stderr ──
+Starting ephemeral machine (vm-215457f3)...
+Pulling image python:3.12-alpine...
Pulling image python:3.12-alpine... [=====>              ] 25%
Pulling image python:3.12-alpine... [==========>         ] 50%
Pulling image python:3.12-alpine... [===============>    ] 75%
Pulling image python:3.12-alpine... [====================] 100%
Pulling image python:3.12-alpine... [====================] 100% — syncing...
Pulling image python:3.12-alpine... done.
+
+
adapter overhead: +127 ms + (vanilla 9374 ms · pilot 9501 ms)
+
diff --git a/docs/samples/smolmachines-submission.json b/docs/samples/smolmachines-submission.json new file mode 100644 index 0000000..c00cafa --- /dev/null +++ b/docs/samples/smolmachines-submission.json @@ -0,0 +1,89 @@ +{ + "id": "io.pilot.smolmachines", + "version": "1.2.0", + "description": "Smol Machines \u2014 spin up fast, hardware-isolated Linux microVMs on demand (sub-second boot, real hypervisor isolation) to safely run untrusted code, GPU tasks, or headless browser automation in a disposable sandbox.", + "email": "alex@vulturelabs.io", + "backend": { + "type": "cli", + "command": [ + "smolvm" + ] + }, + "methods": [ + { + "name": "smolmachines.exec", + "description": "Run any smolvm subcommand in a fast, hardware-isolated Linux microVM. Payload is {\"args\":[...]} \u2014 the verbatim smolvm argv. Command surface: `machine run` (ephemeral VM, one-off command), `machine create|start|exec|stop|delete|shell|status|ls|cp|update|monitor|prune` (persistent VMs; `exec` persists filesystem changes), `pack create|run` (portable .smolmachine artifacts), `serve` (HTTP API), `config`. Key flags: `--net` (networking is OFF by default), `--image `, `-v HOST:GUEST`, `-p HOST:GUEST`, `--gpu`, `--ssh-agent`, `--secret-env GUEST=HOST`. Example args: [\"machine\",\"run\",\"--net\",\"--image\",\"alpine\",\"--\",\"sh\",\"-c\",\"echo hi\"]. Not supported over IPC: interactive sessions (-it / `machine shell`) and long-running `serve`.", + "latency": "slow", + "params": [ + { + "name": "args", + "type": "array", + "required": true, + "description": "verbatim smolvm argv, e.g. [\"machine\",\"run\",\"--net\",\"--image\",\"alpine\",\"--\",\"echo\",\"hi\"]" + } + ], + "cli": { + "passthrough": true + } + } + ], + "listing": { + "display_name": "Smol Machines", + "tagline": "Fast, hardware-isolated microVMs on demand", + "app_description": "Smol Machines \u2014 the app-store front door for the smolmachines VM engine. It lets an agent spin up fast, hardware-isolated Linux microVMs on demand (sub-second boot, real hypervisor isolation \u2014 not shared-kernel containers), then run workloads in a disposable sandbox. Free to use. Portable .smolmachine artifacts run identically on macOS and Linux, locally or in the cloud.\n\nUse it to:\n- Run untrusted or AI-generated code safely, with networking off by default\n- Give an agent a real Linux shell \u2014 a stateful, isolated execution backend\n- Automate headless browsers (GPU-accelerated) for scraping, screenshots, and web tasks\n- Run GPU/compute jobs via Vulkan with container-like speed\n- Spin up disposable dev sandboxes \u2014 a clean VM per task, torn down after\n- Keep persistent dev VMs \u2014 installed packages survive restarts\n- Run CI-style jobs \u2014 build, test, lint in clean environments\n- Fan out parallel ephemeral workers thanks to sub-second boot\n- Analyze malware / suspicious files in a throwaway environment\n- Build once, run anywhere \u2014 same artifact local, cloud, or self-hosted\n\nDiscover the live method surface at runtime with smolmachines.help, which lists each method's parameters and latency class.", + "license": "Apache-2.0", + "homepage": "https://smolmachines.com", + "source_url": "https://github.com/smol-machines/smolvm", + "categories": [ + "dev", + "virtualization", + "security" + ], + "keywords": [ + "microvm", + "sandbox", + "vm", + "isolation", + "gpu", + "ci" + ] + }, + "vendor": { + "name": "smol machines", + "url": "https://smolmachines.com", + "agent_usage": "Agents call smolmachines.exec with a smolvm argv to boot a disposable Linux microVM and run a command in isolation; output returns as {stdout,stderr,exit}.", + "capabilities": "Boot ephemeral/persistent Linux microVMs; run commands in isolation; GPU compute; headless browser; pack/run portable VM artifacts." + }, + "artifacts": [ + { + "os": "darwin", + "arch": "arm64", + "url": "https://pub-f09f9a4ea848491198d48e329ba030e3.r2.dev/io.pilot.smolmachines/1.2.0/darwin-arm64/smolvm-1.2.0-darwin-arm64.tar.gz", + "sha256": "5f53abad672c57042b68a327b9038a759aefd070143e22658a35d400825b3319", + "unpack": "tar.gz", + "exec_path": "smolvm-1.2.0-darwin-arm64/smolvm", + "order": 1, + "role": "binary" + }, + { + "os": "linux", + "arch": "arm64", + "url": "https://pub-f09f9a4ea848491198d48e329ba030e3.r2.dev/io.pilot.smolmachines/1.2.0/linux-arm64/smolvm-1.2.0-linux-arm64.tar.gz", + "sha256": "d5473cc9d5d1dbaafc0ee5bcfc98323d2246c476650e26b038761840b2fdc5a8", + "unpack": "tar.gz", + "exec_path": "smolvm-1.2.0-linux-arm64/smolvm", + "order": 1, + "role": "binary" + }, + { + "os": "linux", + "arch": "amd64", + "url": "https://pub-f09f9a4ea848491198d48e329ba030e3.r2.dev/io.pilot.smolmachines/1.2.0/linux-amd64/smolvm-1.2.0-linux-x86_64.tar.gz", + "sha256": "e0e3980d95b26362385767849f5652972378ec6e1e81ce2ec37104405698efe6", + "unpack": "tar.gz", + "exec_path": "smolvm-1.2.0-linux-x86_64/smolvm", + "order": 1, + "role": "binary" + } + ] +} \ No newline at end of file diff --git a/internal/publish/build.go b/internal/publish/build.go index 2835d03..e9b2227 100644 --- a/internal/publish/build.go +++ b/internal/publish/build.go @@ -82,6 +82,24 @@ func BuildBundle(cfg *scaffold.Config, priv ed25519.PrivateKey) (*Bundle, error) if err != nil { return nil, err } + // install.json (the registry staging spec, generated for asset-delivering + // apps) ships in every platform tarball alongside the manifest. It is the + // same across platforms — it carries each platform's asset — so it is staged + // into the shared bundle dir once, before the per-platform loop tars it. + if spec, err := os.ReadFile(filepath.Join(tmp, "install.json")); err == nil { + if err := os.MkdirAll(filepath.Join(tmp, "bundle"), 0o755); err != nil { + return nil, err + } + if err := os.WriteFile(filepath.Join(tmp, "bundle", "install.json"), spec, 0o644); err != nil { + return nil, fmt.Errorf("stage install.json into bundle: %w", err) + } + // install.sh ships beside it (transparency / direct-install path). + if script, err := os.ReadFile(filepath.Join(tmp, "install.sh")); err == nil { + if err := os.WriteFile(filepath.Join(tmp, "bundle", "install.sh"), script, 0o755); err != nil { + return nil, fmt.Errorf("stage install.sh into bundle: %w", err) + } + } + } var ( platforms []PlatformBundle diff --git a/internal/publish/cli_e2e_test.go b/internal/publish/cli_e2e_test.go index 68b30ef..db03cbf 100644 --- a/internal/publish/cli_e2e_test.go +++ b/internal/publish/cli_e2e_test.go @@ -150,6 +150,87 @@ func TestCLISubmissionBuildsAndVerifies(t *testing.T) { } } +// sampleCLIAssetsSubmission fronts a CLI that is DELIVERED from the R2 artifact +// registry rather than assumed-installed: it carries per-platform artifacts with +// install order + (here) a post-stage install arg. +func sampleCLIAssetsSubmission() Submission { + s := sampleCLISubmission() + s.ID = "io.pilot.toolx" + s.Description = "Delivers and fronts the toolx CLI." + s.Backend.Command = []string{"toolx"} + s.Methods = []SubMethod{ + {Name: "toolx.version", Description: "Print toolx version.", Latency: "fast", CLI: SubCLIRoute{Args: []string{"version"}}}, + {Name: "toolx.exec", Description: "Run any toolx subcommand.", Latency: "med", + Params: []SubParam{{Name: "args", Type: "array"}}, CLI: SubCLIRoute{Passthrough: true}}, + } + s.Artifacts = []SubArtifact{ + {OS: "darwin", Arch: "arm64", URL: "https://pub-x.r2.dev/io.pilot.toolx/0.1.0/darwin-arm64/toolx", + SHA256: "1111111111111111111111111111111111111111111111111111111111111111", ExecPath: "bin/toolx", Order: 1}, + {OS: "linux", Arch: "amd64", URL: "https://pub-x.r2.dev/io.pilot.toolx/0.1.0/linux-amd64/toolx", + SHA256: "2222222222222222222222222222222222222222222222222222222222222222", ExecPath: "bin/toolx", Order: 1, + Args: []string{"--version"}}, + } + return s +} + +// TestCLIAssetsSubmissionBuildsAndVerifies proves an artifact-delivering cli app +// builds through the real pipeline (scaffold → cross-compile → sign → catalogue +// self-verify) for every platform, ships install.json in the bundle, and the +// shipped manifest carries the delivery grants (proc.exec + fs.write $APP + +// net.dial to the registry host). +func TestCLIAssetsSubmissionBuildsAndVerifies(t *testing.T) { + if testing.Short() { + t.Skip("cross-compiles the cli adapter for all platforms; skipped under -short") + } + if errs := sampleCLIAssetsSubmission().Validate(); len(errs) != 0 { + t.Fatalf("a well-formed cli+assets submission must validate, got: %v", errs) + } + priv, err := LoadOrCreateKey(t.TempDir() + "/k.key") + if err != nil { + t.Fatal(err) + } + b, err := BuildBundle(sampleCLIAssetsSubmission().ToConfig(), priv) + if err != nil { + t.Fatalf("BuildBundle for an asset-delivering app failed: %v", err) + } + + // install.json must ship in the bundle and list both platforms. + spec := fileFromTarball(t, b.Primary().Tarball, "./install.json") + var got struct { + Command string `json:"command"` + Assets []struct { + OS, Arch, URL, SHA256, ExecPath string + Order int + Args []string + } `json:"assets"` + } + if err := json.Unmarshal(spec, &got); err != nil { + t.Fatalf("parse shipped install.json: %v", err) + } + if got.Command != "toolx" || len(got.Assets) != 2 { + t.Fatalf("install.json: want command=toolx + 2 assets, got %+v", got) + } + + // Manifest must carry the delivery grants on top of proc.exec. + mfRaw := fileFromTarball(t, b.Primary().Tarball, "./manifest.json") + var mf struct { + Grants []struct{ Cap, Target string } `json:"grants"` + } + if err := json.Unmarshal(mfRaw, &mf); err != nil { + t.Fatalf("parse shipped manifest: %v", err) + } + want := map[string]string{"proc.exec": "toolx", "fs.write": "$APP", "net.dial": "pub-x.r2.dev"} + have := map[string]string{} + for _, g := range mf.Grants { + have[g.Cap] = g.Target + } + for cap, target := range want { + if have[cap] != target { + t.Errorf("manifest missing grant %s=%q (got %q)", cap, target, have[cap]) + } + } +} + func hasSubErr(errs []string, substr string) bool { for _, e := range errs { if strings.Contains(e, substr) { diff --git a/internal/publish/r2.go b/internal/publish/r2.go new file mode 100644 index 0000000..47a3df3 --- /dev/null +++ b/internal/publish/r2.go @@ -0,0 +1,180 @@ +package publish + +import ( + "crypto/hmac" + "crypto/sha256" + "encoding/hex" + "fmt" + "os" + "sort" + "strconv" + "strings" + "time" +) + +// R2 is the Cloudflare R2 artifact registry the publish-server uploads to. It +// holds S3 credentials and presigns PUT/GET URLs with SigV4 (path-style, +// UNSIGNED-PAYLOAD) using only the standard library — no AWS SDK dependency. +// +// Endpoint is the account S3 API endpoint, e.g. +// https://.r2.cloudflarestorage.com. PublicBase is the public read base +// (r2.dev managed URL or a custom domain) used to build install URLs; empty +// means reads go through the signing proxy instead. +type R2 struct { + Endpoint string // https://.r2.cloudflarestorage.com + Bucket string // pilot-artifacts-dev | pilot-artifacts-prod + Region string // "auto" for R2 + AccessKey string + SecretKey string + PublicBase string // https://pub-….r2.dev (optional) +} + +// R2FromEnv builds an R2 from the standard env vars, or returns (nil) when no +// credentials are configured (the artifact endpoints then report 503). +// +// R2_ENDPOINT, R2_BUCKET, R2_PUBLIC_BASE, +// AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_DEFAULT_REGION (default "auto") +func R2FromEnv() *R2 { + ak := os.Getenv("AWS_ACCESS_KEY_ID") + sk := os.Getenv("AWS_SECRET_ACCESS_KEY") + ep := os.Getenv("R2_ENDPOINT") + bucket := os.Getenv("R2_BUCKET") + if ak == "" || sk == "" || ep == "" || bucket == "" { + return nil + } + region := os.Getenv("AWS_DEFAULT_REGION") + if region == "" { + region = "auto" + } + return &R2{ + Endpoint: strings.TrimRight(ep, "/"), Bucket: bucket, Region: region, + AccessKey: ak, SecretKey: sk, PublicBase: strings.TrimRight(os.Getenv("R2_PUBLIC_BASE"), "/"), + } +} + +// ArtifactKey is the canonical object key for one platform binary. +func ArtifactKey(id, version, os, arch, filename string) string { + return fmt.Sprintf("%s/%s/%s-%s/%s", id, version, os, arch, filename) +} + +// PublicURL is the install-time download URL for a key: the public base when set, +// else the signing proxy path served by GET /artifact/. +func (r *R2) PublicURL(key, proxyBase string) string { + if r.PublicBase != "" { + return r.PublicBase + "/" + pathEscapeKeepSlash(key) + } + return strings.TrimRight(proxyBase, "/") + "/artifact/" + pathEscapeKeepSlash(key) +} + +// PresignPut returns a presigned URL the browser can PUT the object to directly. +func (r *R2) PresignPut(key string, expires time.Duration, now time.Time) (string, error) { + return r.presign("PUT", key, expires, now) +} + +// PresignGet returns a presigned URL for reading the object (signing proxy). +func (r *R2) PresignGet(key string, expires time.Duration, now time.Time) (string, error) { + return r.presign("GET", key, expires, now) +} + +func (r *R2) host() string { + h := r.Endpoint + h = strings.TrimPrefix(h, "https://") + h = strings.TrimPrefix(h, "http://") + return strings.TrimRight(h, "/") +} + +// presign builds a SigV4 query-signed URL (path-style, UNSIGNED-PAYLOAD). +func (r *R2) presign(method, key string, expires time.Duration, now time.Time) (string, error) { + if r == nil { + return "", fmt.Errorf("r2: not configured") + } + host := r.host() + now = now.UTC() + amzDate := now.Format("20060102T150405Z") + dateStamp := now.Format("20060102") + scope := dateStamp + "/" + r.Region + "/s3/aws4_request" + + q := map[string]string{ + "X-Amz-Algorithm": "AWS4-HMAC-SHA256", + "X-Amz-Credential": r.AccessKey + "/" + scope, + "X-Amz-Date": amzDate, + "X-Amz-Expires": strconv.Itoa(int(expires.Seconds())), + "X-Amz-SignedHeaders": "host", + } + canonicalURI := "/" + r.Bucket + "/" + pathEscapeKeepSlash(key) + canonicalQuery := canonicalQueryString(q) + canonicalHeaders := "host:" + host + "\n" + canonicalRequest := strings.Join([]string{ + method, canonicalURI, canonicalQuery, canonicalHeaders, "host", "UNSIGNED-PAYLOAD", + }, "\n") + + stringToSign := strings.Join([]string{ + "AWS4-HMAC-SHA256", amzDate, scope, hexSHA256([]byte(canonicalRequest)), + }, "\n") + + signingKey := hmacSHA256( + hmacSHA256( + hmacSHA256( + hmacSHA256([]byte("AWS4"+r.SecretKey), []byte(dateStamp)), + []byte(r.Region)), + []byte("s3")), + []byte("aws4_request")) + sig := hex.EncodeToString(hmacSHA256(signingKey, []byte(stringToSign))) + + return "https://" + host + canonicalURI + "?" + canonicalQuery + "&X-Amz-Signature=" + sig, nil +} + +func hmacSHA256(key, data []byte) []byte { + h := hmac.New(sha256.New, key) + h.Write(data) + return h.Sum(nil) +} + +func hexSHA256(b []byte) string { + s := sha256.Sum256(b) + return hex.EncodeToString(s[:]) +} + +// canonicalQueryString sorts params by key and RFC3986-encodes both sides +// (every reserved char escaped), as SigV4 requires. +func canonicalQueryString(q map[string]string) string { + keys := make([]string, 0, len(q)) + for k := range q { + keys = append(keys, k) + } + sort.Strings(keys) + parts := make([]string, 0, len(q)) + for _, k := range keys { + parts = append(parts, rfc3986Escape(k, true)+"="+rfc3986Escape(q[k], true)) + } + return strings.Join(parts, "&") +} + +// pathEscapeKeepSlash escapes a key path RFC3986-style but keeps "/" literal +// (S3 canonical URI encodes each segment, not the separators). +func pathEscapeKeepSlash(key string) string { + segs := strings.Split(key, "/") + for i, s := range segs { + segs[i] = rfc3986Escape(s, true) + } + return strings.Join(segs, "/") +} + +// rfc3986Escape encodes per AWS rules: unreserved (A-Za-z0-9-_.~) pass through, +// everything else becomes %XX (uppercase). encodeSlash controls "/". +func rfc3986Escape(s string, encodeSlash bool) string { + var b strings.Builder + for i := 0; i < len(s); i++ { + c := s[i] + switch { + case c >= 'A' && c <= 'Z', c >= 'a' && c <= 'z', c >= '0' && c <= '9', + c == '-', c == '_', c == '.', c == '~': + b.WriteByte(c) + case c == '/' && !encodeSlash: + b.WriteByte(c) + default: + fmt.Fprintf(&b, "%%%02X", c) + } + } + return b.String() +} diff --git a/internal/publish/r2_test.go b/internal/publish/r2_test.go new file mode 100644 index 0000000..e748673 --- /dev/null +++ b/internal/publish/r2_test.go @@ -0,0 +1,83 @@ +package publish + +import ( + "bytes" + "io" + "net/http" + "os" + "strings" + "testing" + "time" +) + +// TestPresignStructure checks the presigned URL is well-formed (host, path-style +// key, all required SigV4 query params) without needing live credentials. +func TestPresignStructure(t *testing.T) { + r := &R2{ + Endpoint: "https://acct.r2.cloudflarestorage.com", Bucket: "pilot-artifacts-dev", + Region: "auto", AccessKey: "AKID", SecretKey: "secret", + } + key := ArtifactKey("io.pilot.smolvm", "1.2.0", "darwin", "arm64", "smolvm.tar.gz") + if key != "io.pilot.smolvm/1.2.0/darwin-arm64/smolvm.tar.gz" { + t.Fatalf("key = %q", key) + } + u, err := r.PresignPut(key, 15*time.Minute, time.Unix(1700000000, 0)) + if err != nil { + t.Fatal(err) + } + for _, want := range []string{ + "https://acct.r2.cloudflarestorage.com/pilot-artifacts-dev/io.pilot.smolvm/1.2.0/darwin-arm64/smolvm.tar.gz?", + "X-Amz-Algorithm=AWS4-HMAC-SHA256", + "X-Amz-Credential=AKID%2F", + "X-Amz-Expires=900", + "X-Amz-SignedHeaders=host", + "X-Amz-Signature=", + } { + if !strings.Contains(u, want) { + t.Errorf("presigned URL missing %q\n%s", want, u) + } + } +} + +// TestPresignRoundTripLive PUTs an object via a presigned URL and reads it back +// from the public base — the real upload path the website's Artifacts step uses. +// Gated on live R2 creds so CI without secrets skips it. +// +// R2_ENDPOINT, R2_BUCKET, R2_PUBLIC_BASE, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY +func TestPresignRoundTripLive(t *testing.T) { + r := R2FromEnv() + if r == nil { + t.Skip("set R2_ENDPOINT/R2_BUCKET + AWS keys to run the live presign round-trip") + } + key := ArtifactKey("io.pilot._presigntest", "0.0.0", "linux", "amd64", "probe.txt") + body := []byte("pilot presign round-trip " + time.Now().UTC().Format(time.RFC3339Nano)) + + putURL, err := r.PresignPut(key, 10*time.Minute, time.Now()) + if err != nil { + t.Fatal(err) + } + req, _ := http.NewRequest(http.MethodPut, putURL, bytes.NewReader(body)) + resp, err := http.DefaultClient.Do(req) + if err != nil { + t.Fatalf("PUT: %v", err) + } + rb, _ := io.ReadAll(resp.Body) + resp.Body.Close() + if resp.StatusCode != 200 { + t.Fatalf("presigned PUT failed: HTTP %d: %s", resp.StatusCode, rb) + } + + if r.PublicBase != "" { + get, err := http.Get(r.PublicURL(key, "")) + if err != nil { + t.Fatalf("public GET: %v", err) + } + gb, _ := io.ReadAll(get.Body) + get.Body.Close() + if get.StatusCode != 200 || !bytes.Equal(gb, body) { + t.Fatalf("public read mismatch: HTTP %d, body=%q", get.StatusCode, gb) + } + } + t.Logf("presigned PUT + public read OK for %s", key) + _ = os.Stdout +} diff --git a/internal/publish/submission.go b/internal/publish/submission.go index 871d4ec..079192f 100644 --- a/internal/publish/submission.go +++ b/internal/publish/submission.go @@ -2,6 +2,7 @@ package publish import ( "fmt" + "net/url" "regexp" "sort" "strings" @@ -26,6 +27,31 @@ type Submission struct { Methods []SubMethod `json:"methods"` Listing SubListing `json:"listing"` Vendor SubVendor `json:"vendor"` + + // Artifacts is the native-binary delivery set for a cli app: the + // platform-specific binaries the publisher uploaded to the Pilot R2 artifact + // registry in the form's Artifacts step, with the install order and any + // optional install args. Empty for http apps and for cli apps whose command + // is already present on the host. ToConfig maps these to scaffold.Asset. + Artifacts []SubArtifact `json:"artifacts"` +} + +// SubArtifact is one uploaded, platform-specific, signed binary in the publish +// form's Artifacts step. URL is the R2 location returned by the presign upload; +// SHA256 is verified server-side against the stored object before the case is +// accepted, and again on the host at install. Mirrors scaffold.Asset. +type SubArtifact struct { + Role string `json:"role"` // "binary" (default) | "data" + Name string `json:"name"` // per-platform id (default: exec_path basename); referenced by deps + OS string `json:"os"` // linux | darwin + Arch string `json:"arch"` // amd64 | arm64 + URL string `json:"url"` // R2 public URL + SHA256 string `json:"sha256"` // 64-hex of the uploaded object + Unpack string `json:"unpack"` // "" (single file) | "tar.gz" (extract under $APP) + ExecPath string `json:"exec_path"` // dest under $APP, or path inside the extracted tree + Deps []string `json:"deps"` // names of same-platform artifacts installed first + Order int `json:"order"` // tiebreaker among independent artifacts (per platform) + Args []string `json:"args"` // optional post-stage install args } // SubBackend selects and configures the data plane the adapter forwards to: @@ -175,6 +201,7 @@ func (s Submission) Validate() []string { } else if !reURL.MatchString(strings.TrimSpace(s.Backend.BaseURL)) { e = append(e, "Backend base URL must be an absolute http(s) URL") } + e = append(e, s.validateArtifacts()...) if len(s.Methods) == 0 { e = append(e, "Add at least one method") } @@ -215,6 +242,62 @@ func (s Submission) Validate() []string { return e } +var ( + subSHA256 = regexp.MustCompile(`^[0-9a-f]{64}$`) + subOSOK = map[string]bool{"linux": true, "darwin": true} + subArchOK = map[string]bool{"amd64": true, "arm64": true} +) + +// validateArtifacts mirrors the scaffold asset rules at the submission boundary +// so a publisher gets clear, server-authoritative errors before any build: +// artifacts are cli-only, each names a known os/arch, an https R2 URL, a 64-hex +// sha256, and a relative exec_path under $APP; install order is unique per +// platform. (The sha is additionally re-verified against the stored R2 object on +// submit, and on the host at install.) +func (s Submission) validateArtifacts() []string { + if len(s.Artifacts) == 0 { + return nil + } + var e []string + if !s.Backend.IsCLI() { + e = append(e, "Artifacts (binary delivery) are only valid for a cli backend") + } + orders := map[string]bool{} + for i, a := range s.Artifacts { + role := a.Role + if role == "" { + role = "binary" + } + if role != "binary" && role != "data" { + e = append(e, fmt.Sprintf("Artifact %d: role %q must be binary or data", i+1, a.Role)) + } + if a.Unpack != "" && a.Unpack != "tar.gz" { + e = append(e, fmt.Sprintf("Artifact %d: unpack %q must be empty or \"tar.gz\"", i+1, a.Unpack)) + } + if !subOSOK[a.OS] { + e = append(e, fmt.Sprintf("Artifact %d: os %q must be linux or darwin", i+1, a.OS)) + } + if !subArchOK[a.Arch] { + e = append(e, fmt.Sprintf("Artifact %d: arch %q must be amd64 or arm64", i+1, a.Arch)) + } + if u, err := url.Parse(strings.TrimSpace(a.URL)); err != nil || u.Scheme != "https" || u.Host == "" { + e = append(e, fmt.Sprintf("Artifact %d: url must be an absolute https URL (the R2 upload location)", i+1)) + } + if !subSHA256.MatchString(a.SHA256) { + e = append(e, fmt.Sprintf("Artifact %d: sha256 must be 64 lowercase hex chars", i+1)) + } + if a.ExecPath == "" || strings.HasPrefix(a.ExecPath, "/") || strings.Contains(a.ExecPath, "..") { + e = append(e, fmt.Sprintf("Artifact %d: exec_path must be a relative path under $APP (no leading / or \"..\")", i+1)) + } + key := fmt.Sprintf("%s/%s#%d", a.OS, a.Arch, a.Order) + if orders[key] { + e = append(e, fmt.Sprintf("Artifact %d: duplicate install order %d for %s/%s", i+1, a.Order, a.OS, a.Arch)) + } + orders[key] = true + } + return e +} + // ToConfig derives the buildable adapter spec from the submission (the fields // the generator needs). Review-only fields (vendor free-text, agent-usage, // capabilities, binary URL) are intentionally not part of it. @@ -229,14 +312,15 @@ func (s Submission) ToConfig() *scaffold.Config { Description: s.Description, Backend: backend, Listing: scaffold.Listing{ - DisplayName: s.Listing.DisplayName, - Tagline: s.Listing.Tagline, - Homepage: s.Listing.Homepage, - SourceURL: s.Listing.SourceURL, - License: s.Listing.License, - Categories: s.Listing.Categories, - Keywords: s.Listing.Keywords, - Vendor: scaffold.Vendor{Name: s.Vendor.Name, URL: s.Vendor.URL, Contact: s.Vendor.Contact}, + DisplayName: s.Listing.DisplayName, + Tagline: s.Listing.Tagline, + AppDescription: s.Listing.AppDescription, + Homepage: s.Listing.Homepage, + SourceURL: s.Listing.SourceURL, + License: s.Listing.License, + Categories: s.Listing.Categories, + Keywords: s.Listing.Keywords, + Vendor: scaffold.Vendor{Name: s.Vendor.Name, URL: s.Vendor.URL, Contact: s.Vendor.Contact}, }, } // HTTP byo apps carry auth headers; managed apps are keyless (the broker @@ -285,6 +369,12 @@ func (s Submission) ToConfig() *scaffold.Config { } cfg.Methods = append(cfg.Methods, method) } + for _, a := range s.Artifacts { + cfg.Assets = append(cfg.Assets, scaffold.Asset{ + Role: a.Role, Name: a.Name, OS: a.OS, Arch: a.Arch, URL: a.URL, SHA256: a.SHA256, + Unpack: a.Unpack, ExecPath: a.ExecPath, Deps: a.Deps, Order: a.Order, Args: a.Args, + }) + } cfg.Resolve() return cfg } diff --git a/internal/scaffold/compile_test.go b/internal/scaffold/compile_test.go index 199a071..1e36ea9 100644 --- a/internal/scaffold/compile_test.go +++ b/internal/scaffold/compile_test.go @@ -63,6 +63,69 @@ func TestGeneratedCLIProjectCompiles(t *testing.T) { } } +// cliAssetsSpec is a cli app that delivers its binary from the R2 artifact +// registry: an asset per host plus an enumerated + passthrough method. It +// exercises the generated staging runtime (backend/stage.go) and the asset-aware +// main, both of which only render when assets are present. +const cliAssetsSpec = ` +id: io.pilot.toolx +app_version: 0.2.0 +description: "Delivers and wraps the toolx CLI." +backend: + type: cli + command: ["toolx"] +assets: + - {os: darwin, arch: arm64, url: "https://pub-x.r2.dev/io.pilot.toolx/0.2.0/darwin-arm64/toolx", sha256: "1111111111111111111111111111111111111111111111111111111111111111", exec_path: bin/toolx, order: 1} + - {os: linux, arch: amd64, url: "https://pub-x.r2.dev/io.pilot.toolx/0.2.0/linux-amd64/toolx", sha256: "2222222222222222222222222222222222222222222222222222222222222222", exec_path: bin/toolx, order: 1} +methods: + - name: toolx.version + summary: "Print version." + duration: fast + cli: {args: ["version"]} + - name: toolx.exec + summary: "Passthrough." + duration: med + cli: {passthrough: true} +` + +// TestGeneratedCLIWithAssetsCompiles type-checks the asset-delivery code paths: +// the staging runtime and the asset-aware main are generated only when an app +// ships assets, so an unused import or a bad template there is invisible to the +// no-asset cli compile test. It also asserts install.json is emitted. +func TestGeneratedCLIWithAssetsCompiles(t *testing.T) { + if testing.Short() { + t.Skip("skipping compile test in -short mode") + } + goBin, err := exec.LookPath("go") + if err != nil { + t.Skip("go toolchain not available") + } + + cfg := parseSpec(t, cliAssetsSpec) + dir := t.TempDir() + if _, err := Generate(cfg, dir); err != nil { + t.Fatalf("generate: %v", err) + } + if _, err := os.Stat(filepath.Join(dir, "install.json")); err != nil { + t.Fatalf("install.json must be emitted for an asset-delivering app: %v", err) + } + if _, err := os.Stat(filepath.Join(dir, "internal", "backend", "stage.go")); err != nil { + t.Fatalf("stage.go must be generated for an asset-delivering app: %v", err) + } + if sum, err := os.ReadFile(filepath.Join("..", "..", "go.sum")); err == nil { + if err := os.WriteFile(filepath.Join(dir, "go.sum"), sum, 0o644); err != nil { + t.Fatalf("seed go.sum: %v", err) + } + } + + cmd := exec.Command(goBin, "build", "./...") + cmd.Dir = dir + cmd.Env = append(os.Environ(), "GOFLAGS=-mod=mod") + if out, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("generated cli+assets project failed to compile: %v\n%s", err, out) + } +} + // TestCLIRouteValidation pins the cli route rules: passthrough is mutually // exclusive with baked args/flags, and an empty route is rejected. func TestCLIRouteValidation(t *testing.T) { diff --git a/internal/scaffold/config.go b/internal/scaffold/config.go index d55a783..33308c1 100644 --- a/internal/scaffold/config.go +++ b/internal/scaffold/config.go @@ -44,25 +44,95 @@ type Config struct { Grants Grants `yaml:"grants"` Listing Listing `yaml:"listing"` // store-page metadata (catalogue v2) - // TODO(native-apps): add `Assets []Asset` (per-OS/arch download url + sha256 + - // exec_path) for native/CLI apps that deliver a real binary. The generator - // emits manifest `assets`; the daemon fetches/verifies/stages at install. - // See docs/NATIVE-APPS.md. Coming soon — http (translation-only) ships first. + // Assets is the native-binary delivery set for a cli backend: the + // platform-specific binaries the publisher uploaded to the Pilot R2 artifact + // registry. At install the generated adapter fetches the asset matching the + // host os/arch, verifies its sha256, stages it under $APP/, and (in + // `order`) runs any with install `args`. The fronted command then execs the + // staged path instead of an assumed-installed binary. Empty for http apps and + // for cli apps whose command is already present on the host. See + // docs/R2-ARTIFACT-REGISTRY.md. + Assets []Asset `yaml:"assets"` +} + +// Asset is one platform-specific file delivered from the R2 artifact registry. +// Integrity is the sha256 (verified at install); the whole bundle tarball is +// itself sha-pinned in the catalogue, so install.json (which carries these +// shas) cannot be tampered with undetected. +type Asset struct { + Role string `yaml:"role" json:"role"` // "binary" (default, chmod +x) | "data" + Name string `yaml:"name" json:"name"` // stable id within a platform (default: exec_path basename); referenced by other assets' deps + OS string `yaml:"os" json:"os"` // linux | darwin + Arch string `yaml:"arch" json:"arch"` // amd64 | arm64 + URL string `yaml:"url" json:"url"` // https download (R2 public URL) + SHA256 string `yaml:"sha256" json:"sha256"` // 64-hex of the downloaded object; verified after download + Unpack string `yaml:"unpack" json:"unpack"` // "" (single file) | "tar.gz" (extract archive under $APP) + ExecPath string `yaml:"exec_path" json:"exec_path"` // dest under $APP for a single file, or the path INSIDE the extracted tree for an archive (e.g. smolvm-1.2.0-darwin-arm64/smolvm) + Deps []string `yaml:"deps" json:"deps"` // names of assets on the same platform that must install first + Order int `yaml:"order" json:"order"` // tiebreaker among assets with no dependency relation (ascending) + Args []string `yaml:"args" json:"args"` // optional post-stage invocation, run as "$APP/ args..." +} + +// AssetName is the stable per-platform id used in dependency edges: the explicit +// name, else the exec_path basename. +func (a Asset) AssetName() string { + if a.Name != "" { + return a.Name + } + return a.ExecPath[strings.LastIndexByte(a.ExecPath, '/')+1:] +} + +// HasAssets reports whether this app delivers native binaries from the registry. +func (c *Config) HasAssets() bool { return len(c.Assets) > 0 } + +// PrimaryExecPath is the staged path the fronted command resolves to: the asset +// whose exec_path basename matches command[0] (the binary the adapter execs). +// Empty when there are no assets or no match (the command stays as-is). +func (c *Config) PrimaryExecPath() string { + if len(c.Backend.Command) == 0 { + return "" + } + cmd := c.Backend.Command[0] + for _, a := range c.Assets { + if a.Role == "data" { + continue + } + if base := a.ExecPath[strings.LastIndexByte(a.ExecPath, '/')+1:]; base == cmd || a.ExecPath == cmd { + return a.ExecPath + } + } + return "" +} + +// AssetHosts returns the unique hostnames the adapter must dial to fetch assets, +// for the manifest net.dial grants. Sorted for deterministic generation. +func (c *Config) AssetHosts() []string { + seen := map[string]bool{} + var hosts []string + for _, a := range c.Assets { + if u, err := url.Parse(a.URL); err == nil && u.Hostname() != "" && !seen[u.Hostname()] { + seen[u.Hostname()] = true + hosts = append(hosts, u.Hostname()) + } + } + sort.Strings(hosts) + return hosts } // Listing is the store-page metadata that drives the catalogue v2 rich view // (display_name, vendor, categories, …) and the per-app metadata.json. Optional // but strongly recommended — without it a published app renders a bare listing. type Listing struct { - DisplayName string `yaml:"display_name"` // default: Title-cased namespace - Tagline string `yaml:"tagline"` - Homepage string `yaml:"homepage"` - SourceURL string `yaml:"source_url"` - License string `yaml:"license"` // SPDX id, e.g. "MIT", "AGPL-3.0-or-later" - Categories []string `yaml:"categories"` - Keywords []string `yaml:"keywords"` - Vendor Vendor `yaml:"vendor"` - Changelog []ChangelogRel `yaml:"changelog"` + DisplayName string `yaml:"display_name"` // default: Title-cased namespace + Tagline string `yaml:"tagline"` + AppDescription string `yaml:"app_description"` // long-form markdown for the store page (metadata description_md); falls back to the one-line description + Homepage string `yaml:"homepage"` + SourceURL string `yaml:"source_url"` + License string `yaml:"license"` // SPDX id, e.g. "MIT", "AGPL-3.0-or-later" + Categories []string `yaml:"categories"` + Keywords []string `yaml:"keywords"` + Vendor Vendor `yaml:"vendor"` + Changelog []ChangelogRel `yaml:"changelog"` } // Vendor identifies the publisher on the store page. @@ -247,6 +317,15 @@ type RawGrant struct { var ( idPattern = regexp.MustCompile(`^[a-z0-9]([a-z0-9_-]*[a-z0-9])?(\.[a-z0-9]([a-z0-9_-]*[a-z0-9])?)+$`) semverPattern = regexp.MustCompile(`^\d+\.\d+\.\d+(?:-[0-9A-Za-z.-]+)?$`) + sha256Pattern = regexp.MustCompile(`^[0-9a-f]{64}$`) +) + +// knownOS / knownArch are the host targets the registry + staging understand. +// These match scaffold/build platform tuples (DefaultPlatforms) and the daemon's +// runtime.GOOS/GOARCH values. +var ( + knownOS = map[string]bool{"linux": true, "darwin": true} + knownArch = map[string]bool{"amd64": true, "arm64": true} ) // Parse decodes a pilot.app.yaml document (strict: unknown keys are errors, so @@ -370,6 +449,7 @@ func (c *Config) Validate() []error { } } } + errs = append(errs, c.validateAssets()...) if len(c.Methods) == 0 { errs = append(errs, fmt.Errorf("at least one method must be declared")) } @@ -434,6 +514,146 @@ func (c *Config) Validate() []error { return errs } +// validateAssets enforces the registry-delivery rules: assets are cli-only, +// each names a known os/arch, an https URL, a 64-hex sha256, and an exec_path +// that stays under $APP (no absolute path, no "..", no leading slash). Orders +// must be unique so the install sequence is deterministic, and (os,arch,role) +// must be unique so the host match is unambiguous. +func (c *Config) validateAssets() []error { + if len(c.Assets) == 0 { + return nil + } + var errs []error + if c.Backend.Type != "cli" { + errs = append(errs, fmt.Errorf("assets are only valid for a cli backend (an http app delivers no binary)")) + } + // Orders and binary roles are scoped per host platform: each host installs + // only its own (os,arch) assets, so two platforms may both use order 1, but + // within one platform the order must be unique (deterministic sequence) and a + // platform must not ship two binaries for the same exec_path. + orders := map[string]bool{} + platforms := map[string]bool{} + for i, a := range c.Assets { + role := a.Role + if role == "" { + role = "binary" + } + if role != "binary" && role != "data" { + errs = append(errs, fmt.Errorf("assets[%d].role %q must be \"binary\" or \"data\"", i, a.Role)) + } + if a.Unpack != "" && a.Unpack != "tar.gz" { + errs = append(errs, fmt.Errorf("assets[%d].unpack %q must be \"\" or \"tar.gz\"", i, a.Unpack)) + } + if !knownOS[a.OS] { + errs = append(errs, fmt.Errorf("assets[%d].os %q must be linux or darwin", i, a.OS)) + } + if !knownArch[a.Arch] { + errs = append(errs, fmt.Errorf("assets[%d].arch %q must be amd64 or arm64", i, a.Arch)) + } + if u, err := url.Parse(a.URL); err != nil || u.Scheme != "https" || u.Host == "" { + errs = append(errs, fmt.Errorf("assets[%d].url %q must be an absolute https URL", i, a.URL)) + } + if !sha256Pattern.MatchString(a.SHA256) { + errs = append(errs, fmt.Errorf("assets[%d].sha256 %q must be 64 lowercase hex chars", i, a.SHA256)) + } + if a.ExecPath == "" || strings.HasPrefix(a.ExecPath, "/") || strings.Contains(a.ExecPath, "..") { + errs = append(errs, fmt.Errorf("assets[%d].exec_path %q must be a relative path under $APP (no leading / and no \"..\")", i, a.ExecPath)) + } + plat := a.OS + "/" + a.Arch + orderKey := fmt.Sprintf("%s#%d", plat, a.Order) + if orders[orderKey] { + errs = append(errs, fmt.Errorf("assets[%d]: duplicate install order %d for %s — orders must be unique within a platform", i, a.Order, plat)) + } + orders[orderKey] = true + key := plat + "/" + a.ExecPath + if platforms[key] { + errs = append(errs, fmt.Errorf("assets[%d]: duplicate asset for %s at %s", i, plat, a.ExecPath)) + } + platforms[key] = true + } + // Per-platform: dependency names must resolve to a sibling and form a DAG. + for _, plat := range c.assetPlatforms() { + if _, err := c.ResolveAssets(plat[0], plat[1]); err != nil { + errs = append(errs, fmt.Errorf("assets for %s/%s: %w", plat[0], plat[1], err)) + } + } + return errs +} + +// assetPlatforms lists the distinct (os,arch) tuples present in Assets. +func (c *Config) assetPlatforms() [][2]string { + seen := map[string]bool{} + var out [][2]string + for _, a := range c.Assets { + k := a.OS + "/" + a.Arch + if !seen[k] { + seen[k] = true + out = append(out, [2]string{a.OS, a.Arch}) + } + } + return out +} + +// ResolveAssets returns the assets for one host platform in install order: a +// topological sort over `deps` (an asset installs after everything it depends +// on), with `order` then name as the deterministic tiebreaker among assets that +// have no dependency relation. Errors on an unknown dep name or a cycle. +func (c *Config) ResolveAssets(os, arch string) ([]Asset, error) { + var plat []Asset + for _, a := range c.Assets { + if a.OS == os && a.Arch == arch { + plat = append(plat, a) + } + } + byName := map[string]Asset{} + for _, a := range plat { + byName[a.AssetName()] = a + } + // Kahn's algorithm with a deterministic ready-set ordering. + indeg := map[string]int{} + for _, a := range plat { + indeg[a.AssetName()] = 0 + } + for _, a := range plat { + for _, d := range a.Deps { + if _, ok := byName[d]; !ok { + return nil, fmt.Errorf("asset %q depends on unknown asset %q", a.AssetName(), d) + } + indeg[a.AssetName()]++ + } + } + less := func(x, y Asset) bool { + if x.Order != y.Order { + return x.Order < y.Order + } + return x.AssetName() < y.AssetName() + } + var out []Asset + for len(out) < len(plat) { + var ready []Asset + for _, a := range plat { + if indeg[a.AssetName()] == 0 { + ready = append(ready, a) + } + } + if len(ready) == 0 { + return nil, fmt.Errorf("dependency cycle among assets") + } + sort.Slice(ready, func(i, j int) bool { return less(ready[i], ready[j]) }) + next := ready[0] + indeg[next.AssetName()] = -1 // mark consumed + out = append(out, next) + for _, a := range plat { + for _, d := range a.Deps { + if d == next.AssetName() { + indeg[a.AssetName()]-- + } + } + } + } + return out, nil +} + // BackendHost returns the net.dial target for the grant block (http only). func (c *Config) BackendHost() string { u, err := url.Parse(c.Backend.BaseURL) diff --git a/internal/scaffold/install.go b/internal/scaffold/install.go new file mode 100644 index 0000000..55a6121 --- /dev/null +++ b/internal/scaffold/install.go @@ -0,0 +1,183 @@ +package scaffold + +import ( + "encoding/json" + "fmt" + "path" + "sort" + "strings" +) + +// InstallSpec is the staging contract shipped as install.json in the bundle. The +// generated adapter reads it at startup and, for the asset(s) matching the host +// os/arch, fetches → sha256-verifies → stages under $APP/ → runs any +// install args, in the resolved install order. It is the machine-readable form +// of the publisher's Artifacts step (R2 location + dependencies + order + args). +// +// Integrity: each asset carries its own sha256, and the whole bundle tarball is +// itself sha-pinned in the catalogue (bundle_sha256), so install.json cannot be +// altered without failing the install-time tarball check. +type InstallSpec struct { + Schema int `json:"schema"` // 1 + App string `json:"app"` // io.pilot. + Version string `json:"version"` // app_version + Command string `json:"command"` // base command the adapter execs (proc.exec target) + Assets []InstallAsset `json:"assets"` +} + +// InstallAsset mirrors scaffold.Asset in the on-disk install spec. Order is the +// RESOLVED per-platform install sequence (0-based) after dependency sorting, so +// a consumer can simply install ascending-by-order within its platform. +type InstallAsset struct { + Name string `json:"name"` + Role string `json:"role"` // binary | data + OS string `json:"os"` // linux | darwin + Arch string `json:"arch"` // amd64 | arm64 + URL string `json:"url"` // https (R2 public URL) + SHA256 string `json:"sha256"` // 64-hex of the downloaded object + Unpack string `json:"unpack"` // "" | "tar.gz" + ExecPath string `json:"exec_path"` // dest under $APP, or path inside the extracted tree + Deps []string `json:"deps"` // names of same-platform assets installed first + Order int `json:"order"` // resolved install sequence within the platform + Args []string `json:"args"` // optional post-stage invocation +} + +// resolvedAssets returns every asset across all platforms, each platform's set +// dependency-ordered (Order set to the resolved 0-based sequence). The slice is +// grouped by platform and sorted (os, arch, order) so install.json/install.sh +// are deterministic. +func (c *Config) resolvedAssets() ([]InstallAsset, error) { + var out []InstallAsset + plats := c.assetPlatforms() + sort.Slice(plats, func(i, j int) bool { + if plats[i][0] != plats[j][0] { + return plats[i][0] < plats[j][0] + } + return plats[i][1] < plats[j][1] + }) + for _, p := range plats { + seq, err := c.ResolveAssets(p[0], p[1]) + if err != nil { + return nil, err + } + for i, a := range seq { + role := a.Role + if role == "" { + role = "binary" + } + out = append(out, InstallAsset{ + Name: a.AssetName(), Role: role, OS: a.OS, Arch: a.Arch, URL: a.URL, + SHA256: a.SHA256, Unpack: a.Unpack, ExecPath: path.Clean(a.ExecPath), + Deps: a.Deps, Order: i, Args: a.Args, + }) + } + } + return out, nil +} + +// marshalInstallSpec builds install.json from cfg.Assets in resolved order. +func marshalInstallSpec(c *Config) ([]byte, error) { + var cmd string + if len(c.Backend.Command) > 0 { + cmd = c.Backend.Command[0] + } + assets, err := c.resolvedAssets() + if err != nil { + return nil, err + } + spec := InstallSpec{Schema: 1, App: c.ID, Version: c.AppVersion, Command: cmd, Assets: assets} + b, err := json.MarshalIndent(spec, "", " ") + if err != nil { + return nil, err + } + return append(b, '\n'), nil +} + +// renderInstallScript produces a standalone, dependency-free POSIX install.sh +// that performs the SAME install the adapter's stage.go does — fetch from R2, +// sha256-verify, stage under $APP (single file or tar.gz), run install args — in +// the resolved order, for the host's os/arch. It ships in the bundle for +// transparency and for hosts/operators that want to run the install directly +// (e.g. debugging, or a non-Go runtime). The adapter remains the default path. +func renderInstallScript(c *Config) ([]byte, error) { + assets, err := c.resolvedAssets() + if err != nil { + return nil, err + } + var b strings.Builder + w := func(format string, a ...any) { fmt.Fprintf(&b, format, a...); b.WriteByte('\n') } + + w("#!/usr/bin/env sh") + w("# install.sh — GENERATED by pilot-app for %s %s.", c.ID, c.AppVersion) + w("# Fetches this app's artifacts from the Pilot R2 registry, verifies each") + w("# sha256, stages them under $APP, and runs any install steps — in dependency") + w("# order. This mirrors the adapter's built-in staging (internal/backend/stage.go);") + w("# either is sufficient. Usage: APP=/path/to/app sh install.sh") + w("set -eu") + w("") + w(`APP="${APP:-$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)}"`) + w(`OS="$(uname -s | tr '[:upper:]' '[:lower:]')"`) + w(`ARCH="$(uname -m)"`) + w(`case "$ARCH" in aarch64) ARCH=arm64;; x86_64) ARCH=amd64;; esac`) + w(`echo "pilot-install: staging %s artifacts for ${OS}/${ARCH} into ${APP}"`, c.ID) + w("") + w("sha256_of() {") + w(` if command -v sha256sum >/dev/null 2>&1; then sha256sum "$1" | awk '{print $1}';`) + w(` else shasum -a 256 "$1" | awk '{print $1}'; fi`) + w("}") + w("") + w("stage_one() { # url sha unpack exec_path role") + w(` url="$1"; want="$2"; unpack="$3"; exec_path="$4"; role="$5"`) + w(` tmp="$(mktemp)"`) + w(` echo " fetch $url"`) + w(` curl -fSL "$url" -o "$tmp"`) + w(` got="$(sha256_of "$tmp")"`) + w(` if [ "$got" != "$want" ]; then echo " sha256 MISMATCH for $exec_path: want $want got $got" >&2; rm -f "$tmp"; exit 1; fi`) + w(` echo " verify ok ($want)"`) + w(` if [ "$unpack" = "tar.gz" ]; then`) + w(` tar -xzf "$tmp" -C "$APP"; rm -f "$tmp"`) + w(` else`) + w(` dest="$APP/$exec_path"; mkdir -p "$(dirname -- "$dest")"; mv "$tmp" "$dest"`) + w(` if [ "$role" != "data" ]; then chmod 0755 "$dest"; fi`) + w(` fi`) + w(` echo " staged $exec_path"`) + w("}") + w("") + // Emit a guarded block per platform so one script serves every host. + plats := map[string]bool{} + var order []string + for _, a := range assets { + k := a.OS + "/" + a.Arch + if !plats[k] { + plats[k] = true + order = append(order, k) + } + } + for _, k := range order { + os, arch, _ := strings.Cut(k, "/") + w(`if [ "$OS" = "%s" ] && [ "$ARCH" = "%s" ]; then`, os, arch) + for _, a := range assets { + if a.OS != os || a.Arch != arch { + continue + } + w(` stage_one %q %q %q %q %q`, a.URL, a.SHA256, a.Unpack, a.ExecPath, a.Role) + if len(a.Args) > 0 { + w(` echo " run %s %s"`, a.ExecPath, shJoin(a.Args)) + w(` "$APP/%s" %s`, a.ExecPath, shJoin(a.Args)) + } + } + w(` echo "pilot-install: done"; exit 0`) + w(`fi`) + } + w(`echo "pilot-install: no artifact for ${OS}/${ARCH}" >&2; exit 1`) + return []byte(b.String()), nil +} + +// shJoin renders args as a single-quoted, space-joined POSIX argv fragment. +func shJoin(args []string) string { + parts := make([]string, len(args)) + for i, a := range args { + parts[i] = "'" + strings.ReplaceAll(a, "'", `'\''`) + "'" + } + return strings.Join(parts, " ") +} diff --git a/internal/scaffold/install_test.go b/internal/scaffold/install_test.go new file mode 100644 index 0000000..44914fc --- /dev/null +++ b/internal/scaffold/install_test.go @@ -0,0 +1,125 @@ +package scaffold + +import ( + "encoding/json" + "strings" + "testing" +) + +// depSpec: three assets on one platform where deps force an order that differs +// from the raw `order` field, proving the topological resolver (not just the +// integer order) drives the install sequence. +// +// runtime (order 9, no deps) +// plugin (order 1, deps: [runtime]) -> must come AFTER runtime despite lower order +// tool (order 5, deps: [plugin]) -> must come last +const depSpec = ` +id: io.pilot.toolx +app_version: 0.3.0 +description: "Multi-artifact app with dependencies." +backend: + type: cli + command: ["tool"] +assets: + - {name: plugin, os: darwin, arch: arm64, url: "https://r.example/plugin", sha256: "1111111111111111111111111111111111111111111111111111111111111111", exec_path: bin/plugin, order: 1, deps: [runtime]} + - {name: tool, os: darwin, arch: arm64, url: "https://r.example/tool", sha256: "2222222222222222222222222222222222222222222222222222222222222222", exec_path: bin/tool, order: 5, deps: [plugin], args: ["--init"]} + - {name: runtime, os: darwin, arch: arm64, url: "https://r.example/runtime", sha256: "3333333333333333333333333333333333333333333333333333333333333333", exec_path: bin/runtime, order: 9} +methods: + - {name: toolx.run, summary: "run", cli: {passthrough: true}} +` + +func TestDependencyInstallOrder(t *testing.T) { + cfg := parseSpec(t, depSpec) + if errs := cfg.Validate(); len(errs) != 0 { + t.Fatalf("valid dep spec must pass: %v", errs) + } + seq, err := cfg.ResolveAssets("darwin", "arm64") + if err != nil { + t.Fatalf("resolve: %v", err) + } + got := []string{seq[0].AssetName(), seq[1].AssetName(), seq[2].AssetName()} + want := []string{"runtime", "plugin", "tool"} + for i := range want { + if got[i] != want[i] { + t.Fatalf("install order = %v, want %v (deps must override raw order)", got, want) + } + } +} + +func TestInstallSpecAndScriptHonorDeps(t *testing.T) { + cfg := parseSpec(t, depSpec) + + // install.json: resolved Order is the topo index, not the raw order field. + raw, err := marshalInstallSpec(cfg) + if err != nil { + t.Fatal(err) + } + var spec InstallSpec + if err := json.Unmarshal(raw, &spec); err != nil { + t.Fatal(err) + } + pos := map[string]int{} + for _, a := range spec.Assets { + pos[a.Name] = a.Order + } + if !(pos["runtime"] < pos["plugin"] && pos["plugin"] < pos["tool"]) { + t.Fatalf("install.json order wrong: %+v", pos) + } + + // install.sh: the staged lines must appear in dependency order, and the + // tool's install arg must be emitted after it stages. + sh, err := renderInstallScript(cfg) + if err != nil { + t.Fatal(err) + } + s := string(sh) + ir := strings.Index(s, "https://r.example/runtime") + ip := strings.Index(s, "https://r.example/plugin") + it := strings.Index(s, "https://r.example/tool") + if !(ir >= 0 && ir < ip && ip < it) { + t.Fatalf("install.sh stage order wrong (runtime=%d plugin=%d tool=%d)", ir, ip, it) + } + if !strings.Contains(s, `"$APP/bin/tool" '--init'`) { + t.Errorf("install.sh missing the tool's install-arg invocation:\n%s", s) + } + if !strings.HasPrefix(s, "#!/usr/bin/env sh") { + t.Errorf("install.sh missing shebang") + } +} + +// parseNoValidate parses + resolves but does NOT fail on validation errors, so +// negative cases can assert the error themselves. +func parseNoValidate(t *testing.T, spec string) *Config { + t.Helper() + cfg, err := Parse([]byte(spec)) + if err != nil { + t.Fatalf("parse: %v", err) + } + cfg.Resolve() + return cfg +} + +func TestDependencyCycleRejected(t *testing.T) { + cyc := strings.Replace(depSpec, "exec_path: bin/runtime, order: 9}", "exec_path: bin/runtime, order: 9, deps: [tool]}", 1) + errs := parseNoValidate(t, cyc).Validate() + if !anyContains(errs, "cycle") { + t.Fatalf("a dependency cycle must be rejected, got: %v", errs) + } +} + +func TestUnknownDepRejected(t *testing.T) { + bad := strings.Replace(depSpec, "deps: [plugin]", "deps: [nope]", 1) + errs := parseNoValidate(t, bad).Validate() + if !anyContains(errs, "unknown asset") { + t.Fatalf("an unknown dep must be rejected, got: %v", errs) + } +} + +func anyContains(errs []error, sub string) bool { + for _, e := range errs { + if strings.Contains(e.Error(), sub) { + return true + } + } + return false +} diff --git a/internal/scaffold/metadata.go b/internal/scaffold/metadata.go index 2725132..e4cd2c4 100644 --- a/internal/scaffold/metadata.go +++ b/internal/scaffold/metadata.go @@ -2,6 +2,7 @@ package scaffold import ( "encoding/json" + "strings" ) // Metadata is the per-app catalogue v2 record (catalogue/apps//metadata.json) @@ -70,7 +71,9 @@ func BuildMetadata(c *Config) Metadata { changelog := c.Listing.Changelog if len(changelog) == 0 { - changelog = []ChangelogRel{{Version: c.AppVersion, Notes: []string{c.Description}}} + // A neutral default note — do NOT echo the one-line description here, so the + // store-page "Description" (description_md) is the only prose a viewer reads. + changelog = []ChangelogRel{{Version: c.AppVersion, Notes: []string{"Released v" + c.AppVersion}}} } // Managed apps require a daemon that provisions a per-app identity (--identity) @@ -94,7 +97,7 @@ func BuildMetadata(c *Config) Metadata { ID: c.ID, DisplayName: c.Listing.DisplayName, Tagline: c.Listing.Tagline, - DescriptionMD: c.Description, + DescriptionMD: descOr(c.Listing.AppDescription, c.Description), Vendor: MetaVendor{ Name: c.Listing.Vendor.Name, URL: c.Listing.Vendor.URL, @@ -120,3 +123,11 @@ func marshalMetadata(m Metadata) ([]byte, error) { } return append(b, '\n'), nil } + +// descOr returns the long app description when set, else the one-line description. +func descOr(long, short string) string { + if strings.TrimSpace(long) != "" { + return long + } + return short +} diff --git a/internal/scaffold/r2_e2e_test.go b/internal/scaffold/r2_e2e_test.go new file mode 100644 index 0000000..783d70b --- /dev/null +++ b/internal/scaffold/r2_e2e_test.go @@ -0,0 +1,178 @@ +//go:build !windows + +package scaffold + +import ( + "encoding/json" + "fmt" + "net" + "os" + "os/exec" + "path/filepath" + "runtime" + "testing" + "time" + + "github.com/pilot-protocol/app-store/pkg/ipc" +) + +// TestR2AssetDeliveryE2E is the full native-app proof: it scaffolds a cli +// adapter for a real, complex CLI whose binary is DELIVERED from the Pilot R2 +// artifact registry (not assumed-installed), builds it, and runs it exactly as +// the daemon would. On first spawn the adapter reads install.json, fetches the +// tar.gz asset from its R2 URL, verifies the sha256, extracts it under $APP, and +// execs the staged command — proving discover→install→call works for a binary +// the host never had. +// +// Env-driven so the committed test needs no live bucket in CI; scripts/e2e-smolvm.sh +// uploads the artifact and sets these: +// +// PILOT_E2E_ASSET_URL https R2 public URL of the artifact (a .tar.gz) +// PILOT_E2E_ASSET_SHA256 its sha256 +// PILOT_E2E_ASSET_EXECPATH path to the command INSIDE the extracted tree +// PILOT_E2E_ASSET_CALLARG arg that makes the CLI print its version (e.g. --version) +// PILOT_E2E_ASSET_EXPECT substring the version output must contain (e.g. 1.2.0) +func TestR2AssetDeliveryE2E(t *testing.T) { + url := os.Getenv("PILOT_E2E_ASSET_URL") + sha := os.Getenv("PILOT_E2E_ASSET_SHA256") + execPath := os.Getenv("PILOT_E2E_ASSET_EXECPATH") + if url == "" || sha == "" || execPath == "" { + t.Skip("set PILOT_E2E_ASSET_URL/_SHA256/_EXECPATH to run the live R2 delivery e2e (see scripts/e2e-smolvm.sh)") + } + callArg := envOr("PILOT_E2E_ASSET_CALLARG", "--version") + expect := os.Getenv("PILOT_E2E_ASSET_EXPECT") + if _, err := exec.LookPath("go"); err != nil { + t.Skip("go toolchain not available") + } + + root := t.TempDir() + // command basename must match the staged exec_path basename so the adapter + // resolves the fronted command to the staged binary. + cmd := filepath.Base(execPath) + spec := fmt.Sprintf(` +id: io.pilot.smolvm +app_version: 1.2.0 +description: "Delivers and fronts the smolvm microVM CLI from the R2 registry." +namespace: smolvm +backend: + type: cli + command: ["%s"] +assets: + - os: %s + arch: %s + url: "%s" + sha256: "%s" + unpack: tar.gz + exec_path: "%s" + order: 1 +methods: + - name: smolvm.version + summary: "Print the smolvm version." + cli: {args: ["%s"]} + - name: smolvm.exec + summary: "Run any smolvm subcommand." + cli: {passthrough: true} +`, cmd, runtime.GOOS, runtime.GOARCH, url, sha, execPath, callArg) + + cfg := parseSpec(t, spec) + proj := filepath.Join(root, "proj") + if _, err := Generate(cfg, proj); err != nil { + t.Fatalf("generate: %v", err) + } + if sum, err := os.ReadFile(filepath.Join("..", "..", "go.sum")); err == nil { + _ = os.WriteFile(filepath.Join(proj, "go.sum"), sum, 0o644) + } + + bin := filepath.Join(root, "adapter") + build := exec.Command("go", "build", "-o", bin, "./cmd/"+cfg.BinaryName) + build.Dir = proj + build.Env = append(os.Environ(), "GOFLAGS=-mod=mod") + if out, err := build.CombinedOutput(); err != nil { + t.Fatalf("build adapter: %v\n%s", err, out) + } + + // Run the adapter as the daemon would. $APP is the manifest dir (proj), where + // install.json was generated — the adapter stages the asset there on startup. + sock := filepath.Join(root, "app.sock") + adapter := exec.Command(bin, "--socket", sock, "--manifest", filepath.Join(proj, "manifest.json")) + adapter.Stderr = os.Stderr + if err := adapter.Start(); err != nil { + t.Fatalf("start adapter: %v", err) + } + defer func() { _ = adapter.Process.Kill(); _, _ = adapter.Process.Wait() }() + + // Staging downloads + extracts the artifact BEFORE the socket appears, so + // allow generous time for the fetch from R2. + deadline := time.Now().Add(150 * time.Second) + for time.Now().Before(deadline) { + if _, err := os.Stat(sock); err == nil { + break + } + time.Sleep(100 * time.Millisecond) + } + if _, err := os.Stat(sock); err != nil { + t.Fatalf("adapter socket never appeared — staging from R2 likely failed (see adapter stderr above)") + } + + // The asset must actually be on disk under $APP, delivered from R2. + staged := filepath.Join(proj, filepath.FromSlash(execPath)) + if _, err := os.Stat(staged); err != nil { + t.Fatalf("staged command not found at %s: %v", staged, err) + } + + call := func(method, args string) json.RawMessage { + t.Helper() + conn, err := net.DialTimeout("unix", sock, 5*time.Second) + if err != nil { + t.Fatalf("dial: %v", err) + } + defer conn.Close() + var out json.RawMessage + if err := ipc.Call(conn, method, json.RawMessage(args), &out); err != nil { + t.Fatalf("call %s: %v", method, err) + } + return out + } + + // smolvm.version → the adapter execs the R2-delivered binary and returns its + // output. Version text isn't JSON, so it comes back wrapped as {stdout,...}. + var res struct { + Stdout string `json:"stdout"` + Stderr string `json:"stderr"` + Exit int `json:"exit"` + } + raw := call("smolvm.version", "{}") + if err := json.Unmarshal(raw, &res); err != nil { + t.Fatalf("smolvm.version reply not the wrapped shape: %v (raw=%s)", err, raw) + } + got := res.Stdout + res.Stderr + t.Logf("smolvm.version via R2-delivered binary → exit=%d out=%q", res.Exit, got) + if expect != "" && !contains(got, expect) { + t.Fatalf("version output %q did not contain %q", got, expect) + } + + // Discovery still works locally. + if h := string(call("smolvm.help", "{}")); !json.Valid([]byte(h)) { + t.Fatalf("smolvm.help invalid: %s", h) + } +} + +func envOr(k, def string) string { + if v := os.Getenv(k); v != "" { + return v + } + return def +} + +func contains(s, sub string) bool { + return len(sub) == 0 || (len(s) >= len(sub) && indexOf(s, sub) >= 0) +} + +func indexOf(s, sub string) int { + for i := 0; i+len(sub) <= len(s); i++ { + if s[i:i+len(sub)] == sub { + return i + } + } + return -1 +} diff --git a/internal/scaffold/scaffold.go b/internal/scaffold/scaffold.go index 94a721a..55826f4 100644 --- a/internal/scaffold/scaffold.go +++ b/internal/scaffold/scaffold.go @@ -43,6 +43,11 @@ func Generate(cfg *Config, outDir string) ([]string, error) { } case "cli": files = append(files, file{filepath.Join("internal", "backend", "exec.go"), "client_cli.go.tmpl"}) + // Native-binary delivery: emit the staging runtime only when the app + // actually ships assets (an already-installed cli needs no stager). + if cfg.HasAssets() { + files = append(files, file{filepath.Join("internal", "backend", "stage.go"), "stage.go.tmpl"}) + } } var written []string @@ -62,6 +67,29 @@ func Generate(cfg *Config, outDir string) ([]string, error) { } written = append(written, "metadata.json") + // install.json (the registry staging spec) ships in the bundle alongside the + // manifest. The adapter reads it at startup to fetch/verify/stage each asset. + // Built from a Go model (not a text template) so the JSON is assembled safely. + if cfg.HasAssets() { + spec, err := marshalInstallSpec(cfg) + if err != nil { + return written, fmt.Errorf("build install.json: %w", err) + } + if err := os.WriteFile(filepath.Join(outDir, "install.json"), spec, 0o644); err != nil { + return written, fmt.Errorf("write install.json: %w", err) + } + written = append(written, "install.json") + + script, err := renderInstallScript(cfg) + if err != nil { + return written, fmt.Errorf("build install.sh: %w", err) + } + if err := os.WriteFile(filepath.Join(outDir, "install.sh"), script, 0o755); err != nil { + return written, fmt.Errorf("write install.sh: %w", err) + } + written = append(written, "install.sh") + } + for _, f := range files { rendered, err := render(f.tmpl, cfg) if err != nil { diff --git a/internal/scaffold/templates/client_cli.go.tmpl b/internal/scaffold/templates/client_cli.go.tmpl index 49eca28..9edcb5f 100644 --- a/internal/scaffold/templates/client_cli.go.tmpl +++ b/internal/scaffold/templates/client_cli.go.tmpl @@ -20,12 +20,14 @@ // - A non-zero exit is a normal result returned structurally ({stdout, stderr, // exit}); only spawn/timeout failures surface as IPC errors. // -// TODO(native-apps): COMING SOON. The real model delivers the binary via the -// app store (manifest `assets`: per-OS/arch url+sha256, staged at $APP/), -// and this runner execs that staged path — not an assumed-installed command. -// See docs/NATIVE-APPS.md. The command must currently already be installed on the -// operator's host. Exec is a declared `proc.exec` capability (user-consented at -// install); it is not yet brokered per-call. See docs/CLI-ADAPTER.md. +// Native delivery: when the app ships `assets`, the binary is delivered from the +// Pilot R2 artifact registry (per-OS/arch url+sha256 in install.json, fetched + +// verified + staged at $APP/ by backend/stage.go), and main rewrites +// this runner's base command to that staged path — so the host need not have the +// CLI pre-installed. With no assets, the base command is resolved from PATH as +// before. Exec is the declared `proc.exec` capability (user-consented at +// install); it is not brokered per-call. See docs/R2-ARTIFACT-REGISTRY.md and +// docs/CLI-ADAPTER.md. package backend import ( diff --git a/internal/scaffold/templates/example.pilot.app.yaml b/internal/scaffold/templates/example.pilot.app.yaml index 51d1f17..00a124b 100644 --- a/internal/scaffold/templates/example.pilot.app.yaml +++ b/internal/scaffold/templates/example.pilot.app.yaml @@ -88,7 +88,7 @@ listing: # date: "2026-06-16" # notes: ["Initial release"] -# --- a cli backend instead (COMING SOON; see docs/CLI-ADAPTER.md) --- +# --- a cli backend instead (see docs/CLI-ADAPTER.md) --- # backend: # type: cli # command: ["weathercli"] # base argv; method args appended @@ -108,3 +108,17 @@ listing: # params: {args: "verbatim argv forwarded to weathercli"} # cli: # passthrough: true + +# --- native delivery: ship the binary from the Pilot R2 artifact registry --- +# For a cli app whose binary is NOT already on the host, list the per-OS/arch +# artifacts you uploaded in the publish form's Artifacts step. At install the +# adapter fetches the asset matching the host, verifies its sha256, stages it +# under $APP, runs any install `args` (in `order`), and execs the staged path. +# See docs/R2-ARTIFACT-REGISTRY.md. +# assets: +# # a single self-contained binary: +# - {os: linux, arch: amd64, url: "https://artifacts.pilotprotocol.network/io.pilot.weather/0.1.0/linux-amd64/weathercli", +# sha256: "<64-hex>", exec_path: bin/weathercli, order: 1} +# # an archive that unpacks to a wrapper + libs, with a one-time setup step: +# - {os: darwin, arch: arm64, url: "https://artifacts.pilotprotocol.network/io.pilot.weather/0.1.0/darwin-arm64/weathercli.tar.gz", +# sha256: "<64-hex>", unpack: tar.gz, exec_path: weathercli-0.1.0-darwin-arm64/weathercli, order: 1, args: ["--accept-license"]} diff --git a/internal/scaffold/templates/main.go.tmpl b/internal/scaffold/templates/main.go.tmpl index 2ac0834..ec8306c 100644 --- a/internal/scaffold/templates/main.go.tmpl +++ b/internal/scaffold/templates/main.go.tmpl @@ -69,8 +69,27 @@ func main() { if err != nil { log.Fatalf("{{.BinaryName}}: backend config: %v", err) } +{{- else}} +{{- if .HasAssets}} + // Native delivery: fetch this host's binaries from the Pilot R2 artifact + // registry (verify sha → stage under $APP → run ordered install args) and + // exec the staged path, not an assumed-installed command. See backend/stage.go. + appDir := os.Getenv("APP") + if *manifestPath != "" { + appDir = filepath.Dir(*manifestPath) + } + stagedCmd, err := backend.StageAssets(appDir) + if err != nil { + log.Fatalf("{{.BinaryName}}: install assets: %v", err) + } + base := {{printf "%#v" .Backend.Command}} + if stagedCmd != "" { + base[0] = stagedCmd + } + runner := backend.NewRunner(base{{range .Backend.EnvPassthrough}}, {{printf "%q" .}}{{end}}) {{- else}} runner := backend.NewRunner({{printf "%#v" .Backend.Command}}{{range .Backend.EnvPassthrough}}, {{printf "%q" .}}{{end}}) +{{- end}} {{- end}} d := ipc.NewDispatcher() diff --git a/internal/scaffold/templates/manifest.json.tmpl b/internal/scaffold/templates/manifest.json.tmpl index 860a14c..0a1539d 100644 --- a/internal/scaffold/templates/manifest.json.tmpl +++ b/internal/scaffold/templates/manifest.json.tmpl @@ -13,9 +13,6 @@ {{- end}} "{{.Namespace}}.help" ], -{{/* TODO(native-apps): emit an "assets" array (per-OS/arch download url + sha256 - + exec_path) for native/CLI apps; fold asset shas into the signed payload. - See docs/NATIVE-APPS.md. http (translation-only) apps need no assets. */}} "grants": [ {"cap": "fs.read", "target": "$APP/config.json"}, {{- if .Backend.NeedsSecrets}} @@ -30,6 +27,14 @@ {{- end}} {{- if eq .Backend.Type "cli"}} {"cap": "proc.exec", "target": "{{index .Backend.Command 0}}"}, +{{- end}} +{{- if .HasAssets}} + {"cap": "fs.read", "target": "$APP/install.json"}, + {"cap": "fs.write", "target": "$APP"}, +{{- range .AssetHosts}} + {"cap": "net.dial", "target": "{{.}}", + "if": {"kind": "rate", "params": {"per": "min", "limit": {{$.Grants.RatePerMin}}}}}, +{{- end}} {{- end}} {"cap": "audit.log", "target": "*"} {{- if .Backend.X402}}, diff --git a/internal/scaffold/templates/stage.go.tmpl b/internal/scaffold/templates/stage.go.tmpl new file mode 100644 index 0000000..4f0451b --- /dev/null +++ b/internal/scaffold/templates/stage.go.tmpl @@ -0,0 +1,314 @@ +// Asset staging for {{.ID}} — native binary delivery from the Pilot R2 artifact +// registry. GENERATED by pilot-app (only for cli apps that ship assets); edit +// pilot.app.yaml and re-generate. +// +// At startup the adapter calls StageAssets($APP). It reads $APP/install.json +// (shipped in the bundle), selects the asset(s) matching this host's os/arch, +// and for each — in ascending install order — fetches it from its R2 URL, +// verifies its sha256 against the (tamper-pinned) install spec, stages it under +// $APP (a single file at exec_path, or a tar.gz extracted in place), and runs +// any install args. The fronted command then execs the staged exec_path instead +// of an assumed-installed binary. +// +// Integrity: each asset's sha256 is checked after download; the whole bundle +// tarball is itself sha-pinned in the catalogue, so install.json (and thus the +// expected shas) cannot be altered without failing the install-time check. +package backend + +import ( + "bufio" + "context" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "os" + "os/exec" + "path" + "path/filepath" + "runtime" + "sort" + "strings" + "time" +) + +// fetchTimeout bounds a single asset download so a stuck registry can't hang the +// adapter's startup indefinitely. +const fetchTimeout = 10 * time.Minute + +// installStepTimeout bounds one post-stage install command (e.g. a one-time +// " init" step). +const installStepTimeout = 2 * time.Minute + +// maxAssetBytes caps a single download / extracted file so a malicious or +// corrupt artifact can't fill the disk. +const maxAssetBytes = 2 << 30 // 2 GiB + +type installSpec struct { + Schema int `json:"schema"` + App string `json:"app"` + Version string `json:"version"` + Command string `json:"command"` + Assets []installAsset `json:"assets"` +} + +type installAsset struct { + Role string `json:"role"` + OS string `json:"os"` + Arch string `json:"arch"` + URL string `json:"url"` + SHA256 string `json:"sha256"` + Unpack string `json:"unpack"` + ExecPath string `json:"exec_path"` + Order int `json:"order"` + Args []string `json:"args"` +} + +// StageAssets materializes the registry assets for this host and returns the +// absolute path of the staged command binary (the asset whose exec_path matches +// install.json's "command"). When there is no install.json the app ships no +// assets, so it returns ("", nil) and the caller keeps the command as-is. +func StageAssets(appDir string) (string, error) { + raw, err := os.ReadFile(filepath.Join(appDir, "install.json")) + if errors.Is(err, os.ErrNotExist) { + return "", nil + } + if err != nil { + return "", fmt.Errorf("stage: read install.json: %w", err) + } + var spec installSpec + if err := json.Unmarshal(raw, &spec); err != nil { + return "", fmt.Errorf("stage: parse install.json: %w", err) + } + + var host []installAsset + for _, a := range spec.Assets { + if a.OS == runtime.GOOS && a.Arch == runtime.GOARCH { + host = append(host, a) + } + } + if len(host) == 0 { + return "", fmt.Errorf("stage: no asset for %s/%s; available: %s", runtime.GOOS, runtime.GOARCH, availablePlatforms(spec.Assets)) + } + sort.SliceStable(host, func(i, j int) bool { return host[i].Order < host[j].Order }) + + var cmdPath string + for _, a := range host { + execPath, err := stageOne(appDir, a) + if err != nil { + return "", err + } + if base := path.Base(a.ExecPath); base == spec.Command || a.ExecPath == spec.Command { + cmdPath = execPath + } + if len(a.Args) > 0 { + if err := runInstallStep(execPath, a.Args); err != nil { + return "", fmt.Errorf("stage: install step for %q failed: %w", a.ExecPath, err) + } + } + } + return cmdPath, nil +} + +// stageOne ensures the asset is materialized under appDir and returns the +// absolute exec path. A single-file asset is written to $APP/exec_path; a +// tar.gz asset is extracted in place and exec_path names a file inside the +// extracted tree. Staging is idempotent: a sha-stamped marker skips re-work on +// re-spawn. +func stageOne(appDir string, a installAsset) (string, error) { + execAbs := filepath.Join(appDir, filepath.FromSlash(a.ExecPath)) + marker := filepath.Join(appDir, ".staged", a.SHA256) + if _, err := os.Stat(marker); err == nil { + if _, err := os.Stat(execAbs); err == nil { + return execAbs, nil // already staged + verified + } + } + + body, err := download(a.URL, a.SHA256) + if err != nil { + return "", err + } + defer os.Remove(body) + + switch a.Unpack { + case "tar.gz": + if err := extractTarGz(body, appDir); err != nil { + return "", fmt.Errorf("stage: extract %q: %w", a.ExecPath, err) + } + default: + if err := installFile(body, execAbs, a.Role); err != nil { + return "", err + } + } + if _, err := os.Stat(execAbs); err != nil { + return "", fmt.Errorf("stage: exec_path %q missing after staging %s: %w", a.ExecPath, a.URL, err) + } + if err := os.MkdirAll(filepath.Dir(marker), 0o755); err == nil { + _ = os.WriteFile(marker, nil, 0o644) + } + return execAbs, nil +} + +// download fetches url to a temp file, verifying its sha256 streams-as-it-goes. +// Returns the temp file path (caller removes it). A mismatch is fatal so a +// tampered or wrong artifact is never installed. +func download(url, wantSHA string) (string, error) { + ctx, cancel := context.WithTimeout(context.Background(), fetchTimeout) + defer cancel() + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return "", fmt.Errorf("stage: request %s: %w", url, err) + } + resp, err := http.DefaultClient.Do(req) + if err != nil { + return "", fmt.Errorf("stage: fetch %s: %w", url, err) + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("stage: fetch %s: HTTP %d", url, resp.StatusCode) + } + + tmp, err := os.CreateTemp("", "pilot-asset-*") + if err != nil { + return "", fmt.Errorf("stage: temp file: %w", err) + } + h := sha256.New() + if _, err := io.Copy(io.MultiWriter(tmp, h), io.LimitReader(resp.Body, maxAssetBytes)); err != nil { + tmp.Close() + os.Remove(tmp.Name()) + return "", fmt.Errorf("stage: download %s: %w", url, err) + } + tmp.Close() + if got := hex.EncodeToString(h.Sum(nil)); got != wantSHA { + os.Remove(tmp.Name()) + return "", fmt.Errorf("stage: sha256 mismatch for %s: want %s, got %s (refusing to install tampered or wrong artifact)", url, wantSHA, got) + } + return tmp.Name(), nil +} + +// installFile atomically moves a downloaded single-file asset into place under +// $APP, chmod 0755 for a binary (default) or 0644 for data. +func installFile(src, dest, role string) error { + if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil { + return fmt.Errorf("stage: mkdir for %q: %w", dest, err) + } + mode := os.FileMode(0o755) + if role == "data" { + mode = 0o644 + } + if err := os.Chmod(src, mode); err != nil { + return fmt.Errorf("stage: chmod %q: %w", dest, err) + } + if err := os.Rename(src, dest); err != nil { + // Rename can fail across filesystems (temp dir vs $APP); fall back to copy. + if err := copyFile(src, dest, mode); err != nil { + return fmt.Errorf("stage: install %q: %w", dest, err) + } + } + return nil +} + +// extractTarGz unpacks a gzipped tar under dir using the host's tar(1). The +// system tar is used deliberately: real-world CLI bundles use GNU/sparse tar +// features (e.g. sparse disk images) that Go's archive/tar rejects, while tar(1) +// is present on every linux/darwin host and handles them. Before extracting, +// every entry name is scanned and any absolute path or "../" traversal is +// rejected (zip-slip defence), since tar's own stripping is not relied upon. +func extractTarGz(archive, dir string) error { + if err := assertSafeArchive(archive); err != nil { + return err + } + if err := os.MkdirAll(dir, 0o755); err != nil { + return err + } + ctx, cancel := context.WithTimeout(context.Background(), installStepTimeout) + defer cancel() + cmd := exec.CommandContext(ctx, "tar", "-xzf", archive, "-C", dir) + if out, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("tar -xzf: %w: %s", err, strings.TrimSpace(string(out))) + } + return nil +} + +// assertSafeArchive lists the archive (tar -tzf) and rejects any member that is +// an absolute path or escapes the extraction root via "..". +func assertSafeArchive(archive string) error { + ctx, cancel := context.WithTimeout(context.Background(), installStepTimeout) + defer cancel() + cmd := exec.CommandContext(ctx, "tar", "-tzf", archive) + out, err := cmd.Output() + if err != nil { + return fmt.Errorf("tar -tzf (list): %w", err) + } + sc := bufio.NewScanner(strings.NewReader(string(out))) + sc.Buffer(make([]byte, 0, 64*1024), 1<<20) + for sc.Scan() { + name := strings.TrimSpace(sc.Text()) + if name == "" { + continue + } + if strings.HasPrefix(name, "/") { + return fmt.Errorf("unsafe absolute path %q in archive", name) + } + for _, seg := range strings.Split(name, "/") { + if seg == ".." { + return fmt.Errorf("unsafe traversal path %q in archive", name) + } + } + } + return sc.Err() +} + +func copyFile(src, dest string, mode os.FileMode) error { + in, err := os.Open(src) + if err != nil { + return err + } + defer in.Close() + out, err := os.OpenFile(dest, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, mode) + if err != nil { + return err + } + if _, err := io.Copy(out, in); err != nil { + out.Close() + return err + } + return out.Close() +} + +// runInstallStep runs a one-time post-stage command (the publisher's optional +// install args), e.g. " init". A non-zero exit fails the install so a +// broken setup never silently serves. +func runInstallStep(execPath string, args []string) error { + ctx, cancel := context.WithTimeout(context.Background(), installStepTimeout) + defer cancel() + cmd := exec.CommandContext(ctx, execPath, args...) + cmd.Env = os.Environ() + out, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("%s %s: %w: %s", execPath, strings.Join(args, " "), err, strings.TrimSpace(string(out))) + } + return nil +} + +// availablePlatforms renders the os/arch tuples an app DOES ship, for a clear +// "no binary for your platform" error an agent can act on. +func availablePlatforms(assets []installAsset) string { + seen := map[string]bool{} + var out []string + for _, a := range assets { + k := a.OS + "/" + a.Arch + if !seen[k] { + seen[k] = true + out = append(out, k) + } + } + sort.Strings(out) + if len(out) == 0 { + return "none" + } + return strings.Join(out, ", ") +} diff --git a/scripts/ab_report.py b/scripts/ab_report.py new file mode 100755 index 0000000..44c4b5d --- /dev/null +++ b/scripts/ab_report.py @@ -0,0 +1,186 @@ +#!/usr/bin/env python3 +"""ab_report.py — vanilla-vs-Pilot A/B report for a native cli app. + +Runs a set of EQUIVALENT commands two ways — the vanilla CLI binary directly, +and through the Pilot app store (`pilotctl appstore call ...`) — capturing +each command's output, exit code, and wall-clock time, plus the adapter's +generated `.help` document. Emits a self-contained HTML report. + +Reused by CI (.github/workflows): on a PR tied to an app, build + install that +app, then run this to produce the report artifact. + +Env / args: + PILOT_APPSTORE_ROOT, PILOT_APPSTORE_CATALOG_URL passed through to pilotctl +Usage: + ab_report.py --app io.pilot.smolvm --ns smolvm \ + --pilot /path/to/pilotctl --vanilla /path/to/smolvm --out report.html +""" +import argparse, html, json, os, subprocess, sys, time + + +def run(cmd, stdin=None, env=None): + """Run argv, return (stdout, stderr, exit, ms).""" + t0 = time.time() + try: + p = subprocess.run(cmd, capture_output=True, text=True, env=env, + input=stdin, timeout=600) + ms = int((time.time() - t0) * 1000) + return p.stdout, p.stderr, p.returncode, ms + except subprocess.TimeoutExpired: + return "", "TIMEOUT after 600s", 124, int((time.time() - t0) * 1000) + + +def pilot_call(pilot, app, method, payload, env): + """pilotctl appstore call → (raw, reply_obj, ms).""" + out, err, code, ms = run([pilot, "appstore", "call", app, method, + json.dumps(payload), "--timeout", "8m"], env=env) + reply = None + blob = out + i = blob.find("{") + if i >= 0: + j = blob.rfind("}") + try: + reply = json.loads(blob[i:j + 1]) + except Exception: + reply = None + return (out + ("\n" + err if err.strip() else ""), reply, ms) + + +def reply_view(reply, raw): + """Normalize a pilot reply to (stdout, stderr, exit).""" + if isinstance(reply, dict) and "exit" in reply: + return reply.get("stdout", ""), reply.get("stderr", ""), reply.get("exit", "") + # enumerated/help replies are raw JSON objects + return (json.dumps(reply, indent=2) if reply is not None else raw), "", 0 + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--app", required=True) + ap.add_argument("--ns", required=True) + ap.add_argument("--pilot", required=True) + ap.add_argument("--vanilla", required=True) + ap.add_argument("--out", default="ab-report.html") + a = ap.parse_args() + env = dict(os.environ) + + # Equivalent command pairs. Each: label, the smolvm argv, the pilot method + + # payload, and whether it boots a VM (for the note). + argv_run = ["machine", "run", "--net", "--image", "alpine", "--", "sh", "-c", + "echo hello from microVM; uname -a; cat /etc/alpine-release"] + argv_py = ["machine", "run", "--net", "--image", "python:3.12-alpine", "--", + "python3", "-c", "print('2**100 =', 2**100)"] + pairs = [ + dict(label="Version", note="enumerated method → `smolvm --version`", + vanilla=["--version"], method=f"{a.ns}.version", payload={}), + dict(label="List machines", note="passthrough → `smolvm machine ls`", + vanilla=["machine", "ls"], method=f"{a.ns}.exec", + payload={"args": ["machine", "ls"]}), + dict(label="Run command in an ephemeral Alpine microVM", + note="boots a real isolated VM (separate kernel)", + vanilla=argv_run, method=f"{a.ns}.exec", payload={"args": argv_run}), + dict(label="Compute in a Python microVM", + note="pulls python:3.12-alpine, runs Python in the VM", + vanilla=argv_py, method=f"{a.ns}.exec", payload={"args": argv_py}), + ] + + rows = [] + for p in pairs: + vout, verr, vcode, vms = run([a.vanilla] + p["vanilla"], env=env) + praw, preply, pms = pilot_call(a.pilot, a.app, p["method"], p["payload"], env) + pout, perr, pcode = reply_view(preply, praw) + rows.append(dict(p=p, vanilla=dict(cmd=" ".join([os.path.basename(a.vanilla)] + p["vanilla"]), + out=vout, err=verr, code=vcode, ms=vms), + pilot=dict(cmd=f"pilotctl appstore call {a.app} {p['method']} '{json.dumps(p['payload'])}'", + out=pout, err=perr, code=pcode, ms=pms))) + + # Adapter-generated help document. + hraw, hreply, hms = pilot_call(a.pilot, a.app, f"{a.ns}.help", {}, env) + help_doc = json.dumps(hreply, indent=2) if hreply else hraw + vhelp_out, _, _, vhelp_ms = run([a.vanilla, "--help"], env=env) + + render(a, rows, help_doc, hms, vhelp_out, vhelp_ms) + print(f"wrote {a.out}") + + +def esc(s): + return html.escape(s if isinstance(s, str) else str(s)) + + +def render(a, rows, help_doc, hms, vhelp_out, vhelp_ms): + def block(d): + cls = "ok" if d["code"] == 0 else "bad" + body = esc(d["out"].rstrip()) + if d["err"].strip(): + body += f'\n── stderr ──\n' + esc(d["err"].rstrip()) + return (f'
{esc(d["cmd"])}
' + f'
exit {d["code"]}' + f'{d["ms"]} ms
' + f'
{body}
') + + cards = [] + for r in rows: + v, pl = r["vanilla"], r["pilot"] + delta = pl["ms"] - v["ms"] + cards.append(f""" +
+

{esc(r['p']['label'])}

+

{esc(r['p']['note'])}

+
+
Vanilla CLI
{block(v)}
+
Pilot app store
{block(pl)}
+
+
adapter overhead: {'+' if delta>=0 else ''}{delta} ms + (vanilla {v['ms']} ms · pilot {pl['ms']} ms)
+
""") + + summary = "".join( + f"{esc(r['p']['label'])}{r['vanilla']['ms']}" + f"{r['pilot']['ms']}" + f"{'+' if r['pilot']['ms']-r['vanilla']['ms']>=0 else ''}{r['pilot']['ms']-r['vanilla']['ms']}" + f"{'✓' if r['vanilla']['code']==r['pilot']['code']==0 else '⚠'}" + for r in rows) + + out = f""" +A/B report — {esc(a.app)} + +

Vanilla vs Pilot — A/B report

+

App {esc(a.app)} · delivered from the Pilot R2 artifact registry · generated by scripts/ab_report.py

+ +

Summary

+{summary}
CommandVanilla (ms)Pilot (ms)Δ overheadMatch
+ +

Adapter-generated help — {esc(a.ns)}.help (local, no backend), {hms} ms

+
+
Pilot · {esc(a.ns)}.help (generated by the adapter)
{esc(help_doc.rstrip())}
+
Vanilla · smolvm --help ({vhelp_ms} ms)
{esc(vhelp_out.rstrip())}
+
+ +

Per-command detail

+{''.join(cards)} +""" + with open(a.out, "w") as f: + f.write(out) + + +if __name__ == "__main__": + main() diff --git a/scripts/e2e-smolvm.sh b/scripts/e2e-smolvm.sh new file mode 100755 index 0000000..39449f5 --- /dev/null +++ b/scripts/e2e-smolvm.sh @@ -0,0 +1,77 @@ +#!/usr/bin/env bash +# e2e-smolvm.sh — end-to-end proof of native-app delivery from the Pilot R2 +# artifact registry, using a real, complex CLI: smolvm (smol-machines/smolvm), a +# microVM runtime shipped as a tar.gz (wrapper script + binary + libs + images). +# +# Flow (mirrors what a publisher + a host actually do): +# 1. download smolvm's release tarball for THIS host (publisher has the artifact) +# 2. sha256 it and upload it to the R2 artifact registry (dev bucket) (the publish form's Artifacts step) +# 3. run the scaffold runtime e2e: build the generated cli adapter, (pilotctl appstore install + call) +# let it fetch+verify+extract the artifact from R2 and exec it +# +# Requirements: bash, curl/tar, aws CLI, go, and R2 S3 credentials in the env: +# AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, R2_ENDPOINT, R2_BUCKET, R2_PUBLIC_BASE +# Sensible defaults target the pilot-artifacts-dev bucket. +set -euo pipefail + +SMOLVM_VERSION="${SMOLVM_VERSION:-1.2.0}" +R2_ENDPOINT="${R2_ENDPOINT:?set R2_ENDPOINT to your account S3 endpoint}" +R2_BUCKET="${R2_BUCKET:-pilot-artifacts-dev}" +R2_PUBLIC_BASE="${R2_PUBLIC_BASE:-https://pub-2328865fa11041b8a5efba00b940ec14.r2.dev}" +export AWS_DEFAULT_REGION="${AWS_DEFAULT_REGION:-auto}" + +if [[ -z "${AWS_ACCESS_KEY_ID:-}" || -z "${AWS_SECRET_ACCESS_KEY:-}" ]]; then + echo "error: set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY (R2 S3 keys) in the env" >&2 + exit 2 +fi + +repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +work="$(mktemp -d)" +trap 'rm -rf "$work"' EXIT + +# --- 1. host platform → smolvm asset name + pilot os/arch tuple --------------- +os="$(uname -s | tr '[:upper:]' '[:lower:]')" # darwin | linux +machine="$(uname -m)" +case "$machine" in + arm64|aarch64) smol_arch=arm64; pilot_arch=arm64 ;; + x86_64|amd64) smol_arch=x86_64; pilot_arch=amd64 ;; + *) echo "unsupported arch: $machine" >&2; exit 2 ;; +esac +dirname="smolvm-${SMOLVM_VERSION}-${os}-${smol_arch}" +tarball="${dirname}.tar.gz" +echo "==> host ${os}/${pilot_arch}; smolvm asset ${tarball}" + +# --- 2. fetch the release tarball --------------------------------------------- +echo "==> downloading smolvm ${SMOLVM_VERSION}" +if command -v gh >/dev/null 2>&1; then + gh release download "v${SMOLVM_VERSION}" --repo smol-machines/smolvm --pattern "$tarball" --dir "$work" --clobber +else + curl -fsSL "https://github.com/smol-machines/smolvm/releases/download/v${SMOLVM_VERSION}/${tarball}" -o "$work/$tarball" +fi + +sha="$(shasum -a 256 "$work/$tarball" | awk '{print $1}')" +echo "==> sha256=${sha}" + +# --- 3. upload to the R2 artifact registry (the Artifacts step) --------------- +key="io.pilot.smolvm/${SMOLVM_VERSION}/${os}-${pilot_arch}/${tarball}" +echo "==> uploading to s3://${R2_BUCKET}/${key}" +aws s3 cp "$work/$tarball" "s3://${R2_BUCKET}/${key}" --endpoint-url="$R2_ENDPOINT" >/dev/null +public_url="${R2_PUBLIC_BASE}/${key}" + +# verify the public URL serves the exact bytes we uploaded +echo "==> verifying public URL integrity" +got="$(curl -fsSL "$public_url" | shasum -a 256 | awk '{print $1}')" +[[ "$got" == "$sha" ]] || { echo "public URL sha mismatch: $got != $sha" >&2; exit 1; } +echo " ok: ${public_url}" + +# --- 4. run the install+call e2e against the live R2 object ------------------- +echo "==> running adapter delivery e2e (build → fetch from R2 → verify → extract → exec)" +cd "$repo_root" +PILOT_E2E_ASSET_URL="$public_url" \ +PILOT_E2E_ASSET_SHA256="$sha" \ +PILOT_E2E_ASSET_EXECPATH="${dirname}/smolvm" \ +PILOT_E2E_ASSET_CALLARG="--version" \ +PILOT_E2E_ASSET_EXPECT="$SMOLVM_VERSION" \ + go test ./internal/scaffold/ -run TestR2AssetDeliveryE2E -v -count=1 + +echo "==> e2e OK: smolvm ${SMOLVM_VERSION} delivered from R2 and executed via the pilot cli adapter"