diff --git a/docs/deploy/migrating-to-hyperframes-lambda.mdx b/docs/deploy/migrating-to-hyperframes-lambda.mdx index ea4802dd8..bb66b36a9 100644 --- a/docs/deploy/migrating-to-hyperframes-lambda.mdx +++ b/docs/deploy/migrating-to-hyperframes-lambda.mdx @@ -42,7 +42,7 @@ Most adopters' render config maps directly: | `fps` | `--fps=30` (CLI) or `config.fps` (SDK) | 24, 30, 60 only — non-integer NTSC rationals are an in-process-only feature. | | `width` / `height` | `--width` / `--height` flags, or `config.width` / `config.height` | Even integers ≤ 7680 (yuv420p parity). | | `codec: 'h264' / 'h265'` | `--codec=h264` or `--codec=h265` (mp4 only) | h265 uses libx265 with closed-GOP keyint params so chunked concat-copy round-trips losslessly. | -| Output format | `--format=mp4 / mov / png-sequence` | Distributed mode refuses webm + HDR at plan time. | +| Output format | `--format=mp4 / mov / webm / png-sequence` | webm uses libvpx-vp9 + closed-GOP concat-copy. Distributed mode still refuses HDR mp4 at plan time. | | Quality preset | `--quality=draft / standard / high` | Maps onto ffmpeg encoder presets. | | Chunk size in frames | `--chunk-size=240` (default 240) | ~8s at 30 fps; sized to fit Lambda's 15-min cap with headroom. | | Max parallel chunks | `--max-parallel-chunks=16` (default 16) | Caps the Map state's fan-out. | @@ -64,9 +64,11 @@ HyperFrames refuses `data-gpu-mode="hardware"` in distributed mode — hardware `hdrMode: 'force-hdr'` is rejected at plan time. The v1.5 backlog covers HDR mp4 via `-bsf:v hevc_metadata` re-application; for now, HDR renders use the in-process renderer outside Lambda. -### No webm distributed +### webm uses closed-GOP VP9 -VP9 in matroska doesn't round-trip cleanly through concat-copy (the moov-atom keyframe assumptions don't hold). webm renders use the in-process renderer or accept a controlled re-encode at the assemble stage — coming in v1.5. The Lambda handler refuses webm with `FORMAT_NOT_SUPPORTED_IN_DISTRIBUTED` so the failure is loud. +webm distributed renders go through libvpx-vp9 with `-g `, `-keyint_min `, `-auto-alt-ref 0`, and `-cpu-used 2`. The alt-ref disable is the load-bearing bit: libvpx-vp9's default non-displayable alt-ref frames can land anywhere in a GOP, which breaks concat-copy at chunk seams. Closed-GOP forces a keyframe at every chunk boundary so `ffmpeg -f concat -c copy` round-trips losslessly. Output is `yuva420p` to preserve alpha. Audio is muxed as Opus. + +Distributed webm files are typically ~10-25% larger than the same composition rendered in-process at the same CRF, because closed-GOP forces more keyframes than the in-process single-pass would emit. Per-chunk encode is also slower than libvpx-vp9's default speed/quality tradeoff (`-cpu-used 2` is more conservative than the default for `-deadline good`). The single-machine in-process renderer remains the right choice for short webm renders; distributed pays for itself once a render's wall-clock exceeds what one machine delivers. ### State files are local by default @@ -78,7 +80,7 @@ The default policy doc emitted by `hyperframes lambda policies user/role` uses ` ## Migration checklist -1. **Inventory** the compositions you want to migrate. Filter out anything that needs HDR or webm — those stay on your current framework for now. +1. **Inventory** the compositions you want to migrate. Filter out anything that needs HDR — that stays on your current framework for now. webm renders distributed via closed-GOP VP9 + concat-copy (see the webm section above). 2. **Translate** each composition to plain HTML. The `[Concepts](/concepts)` page covers the data-attribute conventions; the `/hyperframes` skill (`npx skills add heygen-com/hyperframes`) makes Claude / Cursor / Codex aware of them too. 3. **Wire** the new composition into your build pipeline alongside the old one. HyperFrames doesn't need an external bundler — you can `npx hyperframes preview` against the HTML directly. 4. **Deploy** in a separate AWS account or with a `--stack-name=hyperframes-staging` first. Run a real render with `--wait`; verify the output bytes. diff --git a/packages/aws-lambda/src/events.ts b/packages/aws-lambda/src/events.ts index b8b50a9ad..d4dfad20a 100644 --- a/packages/aws-lambda/src/events.ts +++ b/packages/aws-lambda/src/events.ts @@ -16,7 +16,7 @@ * results per §2.4). */ -import type { DistributedRenderConfig } from "@hyperframes/producer/distributed"; +import type { DistributedFormat, DistributedRenderConfig } from "@hyperframes/producer/distributed"; /** Discriminator for the three roles the one Lambda image fulfills. */ export type LambdaAction = "plan" | "renderChunk" | "assemble"; @@ -65,7 +65,7 @@ export interface RenderChunkEvent { /** S3 URI prefix where the chunk output should be uploaded (`s3://bucket/{prefix}/`). */ ChunkOutputS3Prefix: string; /** Output container format from the plan's encoder.json; drives file vs frame-dir handling. */ - Format: "mp4" | "mov" | "png-sequence" | "webm"; + Format: DistributedFormat; } /** Activity C: fetch planDir + all chunks + audio, assemble, upload final. */ @@ -80,7 +80,7 @@ export interface AssembleEvent { /** Final output S3 URI (`s3://bucket/key.mp4`). */ OutputS3Uri: string; /** Output container format; drives file vs frame-dir handling. */ - Format: "mp4" | "mov" | "png-sequence" | "webm"; + Format: DistributedFormat; } /** @@ -106,7 +106,7 @@ export interface PlanLambdaResult { Fps: 24 | 30 | 60; Width: number; Height: number; - Format: "mp4" | "mov" | "png-sequence" | "webm"; + Format: DistributedFormat; HasAudio: boolean; AudioS3Uri: string | null; FfmpegVersion: string; diff --git a/packages/aws-lambda/src/formatExtension.ts b/packages/aws-lambda/src/formatExtension.ts index b33ef4352..cd6833212 100644 --- a/packages/aws-lambda/src/formatExtension.ts +++ b/packages/aws-lambda/src/formatExtension.ts @@ -6,7 +6,9 @@ * looks like vs a png-sequence. */ -export type DistributedFormat = "mp4" | "mov" | "png-sequence" | "webm"; +import type { DistributedFormat } from "@hyperframes/producer/distributed"; + +export type { DistributedFormat } from "@hyperframes/producer/distributed"; // Closed-enum lookup table. TS enforces exhaustiveness via the // `Record` annotation — adding a format to diff --git a/packages/aws-lambda/src/handler.ts b/packages/aws-lambda/src/handler.ts index 78a2a3b77..283473cd3 100644 --- a/packages/aws-lambda/src/handler.ts +++ b/packages/aws-lambda/src/handler.ts @@ -25,7 +25,7 @@ import { renderChunk, } from "@hyperframes/producer/distributed"; import { resolveChromeExecutablePath } from "./chromium.js"; -import { formatExtension } from "./formatExtension.js"; +import { type DistributedFormat, formatExtension } from "./formatExtension.js"; import type { AssembleEvent, AssembleLambdaResult, @@ -433,7 +433,7 @@ async function downloadChunkObjects( s3: S3Client, uris: string[], workDir: string, - format: "mp4" | "mov" | "png-sequence" | "webm", + format: DistributedFormat, ): Promise { const chunksDir = join(workDir, "chunks"); mkdirSync(chunksDir, { recursive: true }); diff --git a/packages/aws-lambda/src/sdk/index.ts b/packages/aws-lambda/src/sdk/index.ts index a45928d53..a388bf587 100644 --- a/packages/aws-lambda/src/sdk/index.ts +++ b/packages/aws-lambda/src/sdk/index.ts @@ -24,3 +24,4 @@ export { } from "./costAccounting.js"; export { InvalidConfigError, validateDistributedRenderConfig } from "./validateConfig.js"; export type { SerializableDistributedRenderConfig } from "../events.js"; +export type { DistributedFormat } from "../formatExtension.js"; diff --git a/packages/aws-lambda/src/sdk/validateConfig.ts b/packages/aws-lambda/src/sdk/validateConfig.ts index dcda6c002..87d15ef6d 100644 --- a/packages/aws-lambda/src/sdk/validateConfig.ts +++ b/packages/aws-lambda/src/sdk/validateConfig.ts @@ -18,6 +18,7 @@ * size cap, GPU mode at runtime) needs the actual planner. */ +import type { DistributedFormat } from "../formatExtension.js"; import type { SerializableDistributedRenderConfig } from "../events.js"; /** Thrown for any client-side `SerializableDistributedRenderConfig` violation. */ @@ -32,7 +33,12 @@ export class InvalidConfigError extends Error { } const ALLOWED_FPS = [24, 30, 60] as const; -const ALLOWED_FORMATS = ["mp4", "mov", "png-sequence", "webm"] as const; +const ALLOWED_FORMATS = [ + "mp4", + "mov", + "png-sequence", + "webm", +] as const satisfies readonly DistributedFormat[]; const ALLOWED_CODECS = ["h264", "h265"] as const; const ALLOWED_QUALITIES = ["draft", "standard", "high"] as const; const ALLOWED_RUNTIME_CAPS = ["lambda", "temporal", "cloud-run-job", "k8s-job", "none"] as const; diff --git a/packages/cli/src/commands/lambda.ts b/packages/cli/src/commands/lambda.ts index c8c9fbff9..be1a7d192 100644 --- a/packages/cli/src/commands/lambda.ts +++ b/packages/cli/src/commands/lambda.ts @@ -9,6 +9,7 @@ */ import { defineCommand } from "citty"; +import type { DistributedFormat } from "@hyperframes/aws-lambda/sdk"; import type { Example } from "./_examples.js"; import { c } from "../ui/colors.js"; @@ -100,7 +101,7 @@ export default defineCommand({ width: { type: "string", description: "Render width in pixels" }, height: { type: "string", description: "Render height in pixels" }, fps: { type: "string", description: "Render fps (24 | 30 | 60)" }, - format: { type: "string", description: "mp4 | mov | png-sequence (default: mp4)" }, + format: { type: "string", description: "mp4 | mov | png-sequence | webm (default: mp4)" }, codec: { type: "string", description: "h264 | h265 (mp4 only)" }, quality: { type: "string", description: "draft | standard | high" }, "chunk-size": { type: "string", description: "Frames per chunk (default: 240)" }, @@ -325,7 +326,12 @@ function parseEnum( throw new Error(`${errorPrefix} must be ${allowed.join("|")}; got ${s}`); } -const FORMATS = ["mp4", "mov", "png-sequence"] as const; +const FORMATS = [ + "mp4", + "mov", + "png-sequence", + "webm", +] as const satisfies readonly DistributedFormat[]; const CODECS = ["h264", "h265"] as const; const QUALITIES = ["draft", "standard", "high"] as const; const CHROME_SOURCES = ["sparticuz", "chrome-headless-shell"] as const; diff --git a/packages/cli/src/commands/lambda/render.ts b/packages/cli/src/commands/lambda/render.ts index 34edc505f..9531a5e9a 100644 --- a/packages/cli/src/commands/lambda/render.ts +++ b/packages/cli/src/commands/lambda/render.ts @@ -5,7 +5,10 @@ */ import { resolve as resolvePath } from "node:path"; -import type { SerializableDistributedRenderConfig } from "@hyperframes/aws-lambda/sdk"; +import type { + DistributedFormat, + SerializableDistributedRenderConfig, +} from "@hyperframes/aws-lambda/sdk"; import { c } from "../../ui/colors.js"; import { requireStack, stateFilePath } from "./state.js"; @@ -23,7 +26,7 @@ export interface RenderArgs { fps: 24 | 30 | 60; width: number; height: number; - format: "mp4" | "mov" | "png-sequence"; + format: DistributedFormat; codec?: "h264" | "h265"; quality?: "draft" | "standard" | "high"; chunkSize?: number; diff --git a/packages/engine/src/services/chunkEncoder.test.ts b/packages/engine/src/services/chunkEncoder.test.ts index 54dcd6609..868a38ad0 100644 --- a/packages/engine/src/services/chunkEncoder.test.ts +++ b/packages/engine/src/services/chunkEncoder.test.ts @@ -551,10 +551,6 @@ describe("buildEncoderArgs lockGopForChunkConcat", () => { expect(args.indexOf("-x264-params")).toBe(-1); }); - // Closed-GOP for libvpx-vp9 is required to make `ffmpeg -f concat -c copy` - // stitch VP9 chunks losslessly: every chunk's first frame must be an - // independently-decodable keyframe with no alt-ref references reaching - // back across the seam. it("true appends closed-GOP args for libvpx-vp9", () => { const args = buildEncoderArgs( { @@ -570,16 +566,9 @@ describe("buildEncoderArgs lockGopForChunkConcat", () => { ); expect(args[args.indexOf("-g") + 1]).toBe("240"); expect(args[args.indexOf("-keyint_min") + 1]).toBe("240"); - // Alt-ref frames are non-displayable references that break concat-copy - // at chunk seams; closed-GOP must disable them. expect(args[args.indexOf("-auto-alt-ref") + 1]).toBe("0"); - // cpu-used is locked so workers with different libvpx-vp9 defaults - // produce visually consistent output across chunk boundaries. expect(args[args.indexOf("-cpu-used") + 1]).toBe("2"); - // libvpx-vp9 uses `-deadline good` for non-ultrafast presets — the - // closed-GOP path doesn't change that. expect(args[args.indexOf("-deadline") + 1]).toBe("good"); - // x264/x265-only params must not leak into the VP9 branch. expect(args.indexOf("-x264-params")).toBe(-1); expect(args.indexOf("-x265-params")).toBe(-1); expect(args.indexOf("-sc_threshold")).toBe(-1); diff --git a/packages/engine/src/services/chunkEncoder.ts b/packages/engine/src/services/chunkEncoder.ts index ca26bc867..bed6cd6b3 100644 --- a/packages/engine/src/services/chunkEncoder.ts +++ b/packages/engine/src/services/chunkEncoder.ts @@ -255,25 +255,13 @@ export function buildEncoderArgs( args.push("-deadline", preset === "ultrafast" ? "realtime" : "good"); args.push("-row-mt", "1"); - // Closed-GOP args for distributed chunk concat-copy. Mirrors the - // libx264/libx265 branch above: `lockGopForChunkConcat=true` lays a - // keyframe at every chunk boundary so `ffmpeg -f concat -c copy` can - // stitch sibling chunks losslessly. - // - // VP9-specific: `-auto-alt-ref 0` is mandatory. Alt-ref (a.k.a. - // "ARNR") frames are non-displayable references libvpx-vp9 inserts - // anywhere in the GOP for compression; they break concat-copy at - // chunk seams because the boundary frame is no longer the first - // displayable reference. The alpha branch below already disables - // alt-ref for an unrelated reason (alpha + alt-ref is unsupported); - // closed-GOP extends that to every pixel format. - // - // `-cpu-used 2` pins the libvpx-vp9 speed/quality tradeoff so chunks - // encoded on workers with different default cpu-used values still - // produce visually consistent output across seams. libvpx-vp9's - // default with `-deadline good` has drifted across versions - // historically — locking it makes the planHash round-trip - // deterministic. + // `-auto-alt-ref 0` is mandatory for chunk concat-copy: libvpx-vp9's + // alt-ref frames can reference frames in either direction inside a + // GOP, so a chunk-boundary frame is not guaranteed to be the first + // displayable reference when alt-ref is on. `-cpu-used 2` pins the + // speed/quality tradeoff against libvpx-vp9 default drift across + // versions, so the planHash round-trips deterministically across + // worker images. const lockGopVp9 = options.lockGopForChunkConcat === true; if (lockGopVp9) { if ( @@ -299,10 +287,8 @@ export function buildEncoderArgs( } if (pixelFormat === "yuva420p") { // Alpha + alt-ref is unsupported by libvpx-vp9. The closed-GOP - // branch above already disables alt-ref; only push the flag for - // the non-locked alpha case to keep the args list clean (a second - // `-auto-alt-ref 0` is harmless but noisier in `ffmpeg -loglevel` - // diagnostics). + // branch above already emits `-auto-alt-ref 0`, so skip the + // duplicate push. if (!lockGopVp9) { args.push("-auto-alt-ref", "0"); } diff --git a/packages/producer/src/distributed.ts b/packages/producer/src/distributed.ts index 64bca0c1c..f2db8f660 100644 --- a/packages/producer/src/distributed.ts +++ b/packages/producer/src/distributed.ts @@ -70,6 +70,11 @@ export { // ── Assemble (Activity C) ─────────────────────────────────────────────────── export { assemble, type AssembleResult } from "./services/distributed/assemble.js"; +// ── Format union ──────────────────────────────────────────────────────────── +// Canonical output-format type. The aws-lambda package re-exports it so +// CLI / adopter SDKs can derive runtime allowlists from one source. +export type { DistributedFormat } from "./services/distributed/shared.js"; + // ── Plan-time shared types from `freezePlan` ─────────────────────────────── // Re-exported so adopters that deserialize a planDir's `meta/encoder.json` // or `meta/chunks.json` see the same shapes the producer wrote them as. diff --git a/packages/producer/src/regression-harness-distributed.ts b/packages/producer/src/regression-harness-distributed.ts index 5283b31f1..545980191 100644 --- a/packages/producer/src/regression-harness-distributed.ts +++ b/packages/producer/src/regression-harness-distributed.ts @@ -35,6 +35,7 @@ import { existsSync, mkdirSync } from "node:fs"; import { join } from "node:path"; import type { Fps } from "@hyperframes/core"; import { assemble, plan, renderChunk } from "./distributed.js"; +import type { DistributedFormat } from "./services/distributed/shared.js"; /** * Three-mode contract that backs `--mode=` on the regression @@ -81,7 +82,7 @@ export type DistributedSupportResult = { supported: true } | { supported: false; */ export function checkDistributedSupport(renderConfig: { fps: Fps; - format?: "mp4" | "webm" | "mov" | "png-sequence"; + format?: DistributedFormat; hdr?: boolean; }): DistributedSupportResult { if (renderConfig.fps.den !== 1) { @@ -120,7 +121,7 @@ export interface RunDistributedSimulatedInput { renderedOutputPath: string; /** From the fixture's renderConfig — must pass `checkDistributedSupport`. */ fps: 24 | 30 | 60; - format: "mp4" | "mov" | "png-sequence" | "webm"; + format: DistributedFormat; /** * Codec for `format: "mp4"`. Defaults to `"h264"`; pass `"h265"` to * exercise the libx265 closed-GOP path. Ignored for non-mp4 formats — diff --git a/packages/producer/src/regression-harness-lambda-local-types.ts b/packages/producer/src/regression-harness-lambda-local-types.ts index 54d11c25f..528bfb9d4 100644 --- a/packages/producer/src/regression-harness-lambda-local-types.ts +++ b/packages/producer/src/regression-harness-lambda-local-types.ts @@ -9,6 +9,8 @@ * type-check pass. */ +import type { DistributedFormat } from "./services/distributed/shared.js"; + /** Inputs for {@link runLambdaLocalRender}. Same contract as `runDistributedSimulatedRender`. */ export interface RunLambdaLocalInput { projectDir: string; @@ -26,7 +28,7 @@ export interface RunLambdaLocalInput { */ width: number; height: number; - format: "mp4" | "mov" | "png-sequence" | "webm"; + format: DistributedFormat; codec?: "h264" | "h265"; chunkSize?: number; maxParallelChunks?: number; diff --git a/packages/producer/src/regression-harness.ts b/packages/producer/src/regression-harness.ts index 19d5b8f3e..3a59df870 100644 --- a/packages/producer/src/regression-harness.ts +++ b/packages/producer/src/regression-harness.ts @@ -51,6 +51,7 @@ import { // imports) into the program even though the tsconfig `exclude` list // nominally hides it. `tsx` resolves the path normally at runtime. import type { RunLambdaLocalRender } from "./regression-harness-lambda-local-types.js"; +import type { DistributedFormat } from "./services/distributed/shared.js"; const LAMBDA_LOCAL_MODULE = "./regression-harness-lambda-local.js"; @@ -97,7 +98,7 @@ type TestMetadata = { * `"mp4"`. Distributed mode supports all four — webm goes through * libvpx-vp9 with closed-GOP concat-copy. */ - format?: "mp4" | "webm" | "mov" | "png-sequence"; + format?: DistributedFormat; /** * Codec selection for `format: "mp4"`, forwarded to * `DistributedRenderConfig.codec`. The in-process renderer doesn't take diff --git a/packages/producer/src/services/deterministicFonts.ts b/packages/producer/src/services/deterministicFonts.ts index d96c7060e..0dd0ebbb1 100644 --- a/packages/producer/src/services/deterministicFonts.ts +++ b/packages/producer/src/services/deterministicFonts.ts @@ -1,5 +1,5 @@ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs"; -import { homedir } from "node:os"; +import { homedir, tmpdir } from "node:os"; import { join } from "node:path"; import { parseHTML } from "linkedom"; @@ -330,7 +330,19 @@ function warnUnresolvedFonts(unresolved: string[]): void { // Google Fonts on-demand fetch + local cache // --------------------------------------------------------------------------- -const GOOGLE_FONTS_CACHE_DIR = join(homedir(), ".cache", "hyperframes", "fonts"); +// On AWS Lambda `$HOME` resolves to a `/home/sbx_*` tree that's +// read-only; only `/tmp` is writable. Route the cache there when +// running inside Lambda, and honor `HYPERFRAMES_FONT_CACHE_DIR` as +// an explicit override for any environment. +function resolveFontCacheRoot(): string { + return ( + process.env.HYPERFRAMES_FONT_CACHE_DIR ?? + (process.env.AWS_LAMBDA_FUNCTION_NAME + ? join(tmpdir(), "hyperframes", "fonts") + : join(homedir(), ".cache", "hyperframes", "fonts")) + ); +} +const GOOGLE_FONTS_CACHE_DIR = resolveFontCacheRoot(); // Chrome UA triggers woff2 responses from Google Fonts CSS API const WOFF2_USER_AGENT = diff --git a/packages/producer/src/services/distributed/assemble.ts b/packages/producer/src/services/distributed/assemble.ts index ec47f1178..79b59d19e 100644 --- a/packages/producer/src/services/distributed/assemble.ts +++ b/packages/producer/src/services/distributed/assemble.ts @@ -38,6 +38,7 @@ import { applyFaststart, muxVideoWithAudio, runFfmpeg } from "@hyperframes/engin import { defaultLogger, type ProducerLogger } from "../../logger.js"; import { padOrTrimAudioToVideoFrameCount } from "../render/audioPadTrim.js"; import type { ChunkSliceJson } from "../render/stages/freezePlan.js"; +import type { DistributedFormat } from "./shared.js"; /** * Result of {@link assemble}. `fileSize` reflects the final file on disk @@ -61,7 +62,7 @@ interface PlanJsonForAssemble { fpsDen: number; width: number; height: number; - format: "mp4" | "mov" | "png-sequence" | "webm"; + format: DistributedFormat; }; } diff --git a/packages/producer/src/services/distributed/plan.test.ts b/packages/producer/src/services/distributed/plan.test.ts index 462cf77d8..1229dde3b 100644 --- a/packages/producer/src/services/distributed/plan.test.ts +++ b/packages/producer/src/services/distributed/plan.test.ts @@ -467,10 +467,9 @@ describe("plan() — webm format (distributed VP9)", () => { it( 'maps `format: "webm"` to libvpx-vp9-software + yuva420p', async () => { - // Webm is distributed-supported via closed-GOP concat-copy (PR 8.1 - // proved the contract; this test pins the plan-time encoder choice). - // yuva420p preserves the format's reason for existing — alpha video - // for web playback over colored backgrounds. + // Pins the plan-time encoder choice for webm: libvpx-vp9-software + // with yuva420p so the format's alpha-channel contract round-trips + // through chunked rendering. const planDir = join(runRoot, "plan-webm-vp9"); mkdirSync(planDir, { recursive: true }); const result = await plan( diff --git a/packages/producer/src/services/distributed/plan.ts b/packages/producer/src/services/distributed/plan.ts index fff15a707..94c0f49f4 100644 --- a/packages/producer/src/services/distributed/plan.ts +++ b/packages/producer/src/services/distributed/plan.ts @@ -36,7 +36,7 @@ import { } from "node:fs"; import { join, relative, sep } from "node:path"; import { type CanvasResolution } from "@hyperframes/core"; -import { type EngineConfig, resolveConfig } from "@hyperframes/engine"; +import { type EngineConfig, getEncoderPreset, resolveConfig } from "@hyperframes/engine"; import { defaultLogger, type ProducerLogger } from "../../logger.js"; import { runAudioStage } from "../render/stages/audioStage.js"; import { runCompileStage } from "../render/stages/compileStage.js"; @@ -57,6 +57,7 @@ import { validateNoGpuEncode, validateNoSystemFonts } from "../render/planValida import { snapshotRuntimeEnv } from "../render/runtimeEnvSnapshot.js"; import { buildSyntheticRenderJob, + type DistributedFormat, PLAN_VIDEOS_META_RELATIVE_PATH, type PlanVideosJson, readFfmpegVersion, @@ -86,7 +87,7 @@ export interface DistributedRenderConfig { * `tests/distributed/_smoke/webm-concat-copy.test.ts` for the gating * experiment that proved the contract. */ - format: "mp4" | "mov" | "png-sequence" | "webm"; + format: DistributedFormat; /** * Codec selection for `format: "mp4"`. `"h264"` (the default) → libx264 + * yuv420p; `"h265"` → libx265 + yuv420p with closed-GOP keyint params @@ -176,7 +177,7 @@ export interface PlanResult { fps: 24 | 30 | 60; width: number; height: number; - format: "mp4" | "mov" | "png-sequence" | "webm"; + format: DistributedFormat; ffmpegVersion: string; producerVersion: string; } @@ -522,28 +523,15 @@ function buildLockedRenderConfig(input: { * caller error immediately rather than producing a silently-wrong planDir * whose chunk worker would override the codec choice. */ -function resolveEncoderTriple(config: DistributedRenderConfig): { +type EncoderTriple = { encoder: LockedRenderConfig["encoder"]; pixelFormat: string; preset: string; -} { +}; + +function resolveEncoderTriple(config: DistributedRenderConfig): EncoderTriple { if (config.format === "mp4") { - const codec = config.codec ?? "h264"; - // Explicit unknown-codec throw rather than silent fall-through to h264. - // A JS caller building config from JSON who passes `codec: "h266"` or - // `codec: "H265"` (typo / wrong case) would otherwise produce h264 - // output with no signal. The non-mp4-format branch below already throws - // for the symmetric "wrong combination" case — match that shape. - if (codec !== "h264" && codec !== "h265") { - throw new Error( - `[plan] DistributedRenderConfig.codec must be "h264" or "h265" for format="mp4"; ` + - `received ${JSON.stringify(codec)}. Omit codec to default to h264.`, - ); - } - if (codec === "h265") { - return { encoder: "libx265-software", pixelFormat: "yuv420p", preset: "medium" }; - } - return { encoder: "libx264-software", pixelFormat: "yuv420p", preset: "medium" }; + return resolveMp4EncoderTriple(config.codec); } if (config.codec !== undefined) { throw new Error( @@ -553,16 +541,46 @@ function resolveEncoderTriple(config: DistributedRenderConfig): { `libvpx-vp9, and png-sequence has no encoder.`, ); } - if (config.format === "mov") { + return resolveNonMp4EncoderTriple(config.format, config.quality ?? "standard"); +} + +function resolveMp4EncoderTriple(codec: DistributedRenderConfig["codec"]): EncoderTriple { + const c = codec ?? "h264"; + // Explicit unknown-codec throw rather than silent fall-through to h264. + // A JS caller building config from JSON who passes `codec: "h266"` or + // `codec: "H265"` (typo / wrong case) would otherwise produce h264 + // output with no signal. The non-mp4-format branch already throws for + // the symmetric "wrong combination" case — match that shape. + if (c !== "h264" && c !== "h265") { + throw new Error( + `[plan] DistributedRenderConfig.codec must be "h264" or "h265" for format="mp4"; ` + + `received ${JSON.stringify(c)}. Omit codec to default to h264.`, + ); + } + if (c === "h265") { + return { encoder: "libx265-software", pixelFormat: "yuv420p", preset: "medium" }; + } + return { encoder: "libx264-software", pixelFormat: "yuv420p", preset: "medium" }; +} + +function resolveNonMp4EncoderTriple( + format: Exclude, + quality: "draft" | "standard" | "high", +): EncoderTriple { + if (format === "mov") { return { encoder: "prores-software", pixelFormat: "yuva444p10le", preset: "4444" }; } - if (config.format === "webm") { - // webm distributes via closed-GOP libvpx-vp9 + concat-copy. yuva420p - // matches the in-process renderer's webm pixel format (alpha-capable - // — the format's main reason for existing). `getEncoderPreset` in - // the engine returns "good" for non-draft quality tiers; that becomes - // libvpx-vp9's `-deadline good` at encode time. - return { encoder: "libvpx-vp9-software", pixelFormat: "yuva420p", preset: "good" }; + if (format === "webm") { + // Defer to `getEncoderPreset` for the libvpx-vp9 preset string so the + // draft tier maps to `-deadline realtime` instead of `-deadline good`; + // hardcoding "good" here would silently override that mapping for + // `quality: "draft"`. + const enginePreset = getEncoderPreset(quality, "webm"); + return { + encoder: "libvpx-vp9-software", + pixelFormat: enginePreset.pixelFormat, + preset: enginePreset.preset, + }; } return { encoder: "png-sequence", pixelFormat: "rgba", preset: "lossless" }; } diff --git a/packages/producer/src/services/distributed/renderChunk.ts b/packages/producer/src/services/distributed/renderChunk.ts index 6c8567ce8..c434f287c 100644 --- a/packages/producer/src/services/distributed/renderChunk.ts +++ b/packages/producer/src/services/distributed/renderChunk.ts @@ -68,6 +68,7 @@ import { applyRuntimeEnvSnapshot } from "../render/runtimeEnvSnapshot.js"; import { buildVirtualTimeShim, createFileServer, type FileServerHandle } from "../fileServer.js"; import { buildSyntheticRenderJob, + type DistributedFormat, PLAN_VIDEOS_META_RELATIVE_PATH, type PlanVideosJson, readFfmpegVersion, @@ -199,7 +200,7 @@ interface PlanJson { fpsDen: number; width: number; height: number; - format: "mp4" | "mov" | "png-sequence" | "webm"; + format: DistributedFormat; }; chunkCount: number; totalFrames: number; diff --git a/packages/producer/src/services/distributed/shared.ts b/packages/producer/src/services/distributed/shared.ts index 309fea1f5..86f678d7d 100644 --- a/packages/producer/src/services/distributed/shared.ts +++ b/packages/producer/src/services/distributed/shared.ts @@ -14,6 +14,14 @@ import { type VideoElement, type VideoMetadata } from "@hyperframes/engine"; import { type RenderConfig, type RenderJob, createRenderJob } from "../renderOrchestrator.js"; import { defaultLogger, type ProducerLogger } from "../../logger.js"; +/** + * Output container formats the distributed pipeline supports end-to-end. + * Single source of truth for the format union — `plan()`, `renderChunk()`, + * `assemble()`, the aws-lambda handler, and the harness all derive from + * this type. Adding a new format starts here. + */ +export type DistributedFormat = "mp4" | "mov" | "png-sequence" | "webm"; + /** * Filename of the per-video extraction manifest written by `plan()` into * `/meta/` and consumed by `renderChunk()` to rebuild the diff --git a/packages/producer/src/services/render/stages/planHash.ts b/packages/producer/src/services/render/stages/planHash.ts index ae03e4157..127db7c24 100644 --- a/packages/producer/src/services/render/stages/planHash.ts +++ b/packages/producer/src/services/render/stages/planHash.ts @@ -32,6 +32,7 @@ */ import { createHash } from "node:crypto"; +import type { DistributedFormat } from "../../distributed/shared.js"; /** * Schema-version prefix mixed into every digest. Bump the trailing version @@ -71,7 +72,7 @@ export interface PlanDimensions { fpsDen: number; width: number; height: number; - format: "mp4" | "mov" | "png-sequence" | "webm"; + format: DistributedFormat; } export interface PlanHashInput { diff --git a/packages/producer/tests/distributed/_smoke/webm-concat-copy.test.ts b/packages/producer/tests/distributed/_smoke/webm-concat-copy.test.ts index be295aa30..e5f9e3d69 100644 --- a/packages/producer/tests/distributed/_smoke/webm-concat-copy.test.ts +++ b/packages/producer/tests/distributed/_smoke/webm-concat-copy.test.ts @@ -1,36 +1,20 @@ /** * Smoke test for the WebM (VP9) distributed concat-copy path. * - * PR 8.1 gating experiment — answers the question: - * "Does `buildEncoderArgs(..., { codec: 'vp9', lockGopForChunkConcat: true, gopSize: N })` - * produce VP9 chunk files that `ffmpeg -f concat -c copy` can stitch - * into a single playable WebM?" + * Asserts that `buildEncoderArgs(..., { codec: "vp9", + * lockGopForChunkConcat: true, gopSize: N })` produces VP9 chunk files + * that `ffmpeg -f concat -c copy` can stitch into a single playable + * WebM. * - * YES → PR 8.2 ships Path A: drop webm from FORMAT_NOT_SUPPORTED_IN_DISTRIBUTED - * and wire lockGopForChunkConcat=true through the distributed plan(). + * Uses direct ffmpeg invocation instead of `plan() / renderChunk() / + * assemble()` so the contract this test pins is exactly the encoder-arg + * surface — independent of plan-time validation, file servers, browser + * capture, and the rest of the distributed-pipeline stack. * - * NO → PR 8.2 ships Path B: re-encode the concat'd chunks in `assemble()` - * (slower; loses encode parallelism but is reliably correct). - * - * Why direct ffmpeg invocation (instead of plan/renderChunk/assemble): the - * full distributed pipeline currently REFUSES webm at plan time, so we can't - * exercise it end-to-end yet. This smoke test bypasses the producer pipeline - * and only validates the ffmpeg-level contract — the encoder args we'll wire - * into the pipeline in 8.2. - * - * The test generates 60 frames (2s @ 30fps) of an animated test pattern - * (`testsrc2` from ffmpeg's lavfi), splits them into 4 chunks of 15 frames - * each via direct `ffmpeg` invocations using the args from - * `buildEncoderArgs(..., { lockGopForChunkConcat: true, gopSize: 15 })`, - * concat-copies them, and runs three independent verifications: - * - * 1. `ffprobe -show_streams` — output is a valid WebM with one VP9 stream - * 2. `ffmpeg -i ... -f null -` — output decodes cleanly (no seam errors) - * 3. `ffprobe -count_frames` — frame count equals sum of chunk frames - * - * If concat-copy fails in any way the test reports the precise failure - * fingerprint in the error message so PR 8.2 has the data it needs to pick - * Path A vs Path B. + * Each chunk + concat-copy + ffprobe verification surfaces its failure + * fingerprint in the error message so a regression-driven concat-copy + * failure (alt-ref reaching across a seam, libvpx bumping its default + * cpu-used, etc.) can be diagnosed without re-running locally. */ import { afterAll, beforeAll, describe, expect, it } from "bun:test"; @@ -201,14 +185,13 @@ describe("webm VP9 concat-copy smoke", () => { outputPath, ]); - // Surface ffmpeg's full stderr in the assertion message so 8.2 has the - // failure fingerprint when concat-copy is broken (e.g. - // "Non-monotonous DTS in output stream", "missing keyframe at chunk 2", - // matroska/webm cluster errors). + // Surface ffmpeg's full stderr in the assertion message — a broken + // concat-copy fails with something specific ("Non-monotonous DTS", + // "missing keyframe at chunk 2", matroska/webm cluster errors) that + // the message above wouldn't disambiguate. if (result.exitCode !== 0) { throw new Error( `[smoke concat-copy] failed (exit ${result.exitCode}). ` + - `This means PR 8.2 must take Path B (re-encode in assemble). ` + `Failure fingerprint: ${result.stderr.slice(-1000)}`, ); } @@ -264,7 +247,7 @@ describe("webm VP9 concat-copy smoke", () => { throw new Error( `[smoke decode-test] ffmpeg -f null - reported decode errors ` + `(exit ${result.exitCode}). This means concat-copy seams produce ` + - `invalid VP9 references — PR 8.2 must take Path B (re-encode in assemble). ` + + `invalid VP9 references ` + `Failure fingerprint: ${result.stderr.slice(-1000) || "(no stderr; check exit code)"}`, ); } @@ -296,9 +279,8 @@ describe("webm VP9 concat-copy smoke", () => { const nbFrames = Number.parseInt(result.stdout.trim(), 10); if (!Number.isFinite(nbFrames) || nbFrames !== TOTAL_FRAMES) { throw new Error( - `[smoke ffprobe count_frames] expected ${TOTAL_FRAMES} frames, got ${result.stdout.trim()}. ` + - `This means concat-copy dropped frames at one or more chunk seams — ` + - `PR 8.2 must take Path B (re-encode in assemble).`, + `[smoke ffprobe count_frames] expected ${TOTAL_FRAMES} frames, got ${result.stdout.trim()} ` + + `— concat-copy dropped frames at one or more chunk seams.`, ); } expect(nbFrames).toBe(TOTAL_FRAMES); @@ -306,231 +288,195 @@ describe("webm VP9 concat-copy smoke", () => { }); describe("webm VP9 concat-copy smoke (yuva420p alpha)", () => { - // The wired-up distributed webm path uses yuva420p, not yuv420p — that - // matches the in-process renderer's webm pixel format (alpha video, the - // format's main reason for existing). yuva420p VP9 streams have a few - // extra concat-copy hazards that yuv420p doesn't (the alpha sub-stream - // is muxed via `-metadata:s:v:0 alpha_mode=1` and concat-copy must - // preserve that metadata across chunks). - // - // This block re-runs the same three verifications on yuva420p output to - // pin the contract for what the distributed pipeline actually emits. - let alphaRoot: string; - let alphaFramesDir: string; - let alphaChunkDir: string; - let alphaConcatListPath: string; - let alphaOutputPath: string; - - beforeAll(() => { - alphaRoot = mkdtempSync(join(tmpdir(), "hf-webm-concat-smoke-alpha-")); - alphaFramesDir = join(alphaRoot, "frames"); - alphaChunkDir = join(alphaRoot, "chunks"); - mkdirSync(alphaFramesDir, { recursive: true }); - mkdirSync(alphaChunkDir, { recursive: true }); - alphaConcatListPath = join(alphaRoot, "concat-list.txt"); - alphaOutputPath = join(alphaRoot, "output.webm"); - - // For alpha frames, generate RGBA PNGs with spatially-varying alpha - // so the encoder can't drop the alpha plane as uniform/redundant. - // `testsrc2 + format=rgba` (the prior shape) produced uniformly- - // opaque alpha and the libvpx-vp9 encoder silently downgraded the - // output to yuv420p — masking any bug in the alpha pipeline. Here - // `geq=a='X*255/W'` writes a horizontal alpha gradient on top of - // the testsrc2 RGB so the alpha track has real per-pixel content. - const frameGen = runFfmpegSync([ - "-hide_banner", - "-y", - "-f", - "lavfi", - "-i", - `testsrc2=s=${WIDTH}x${HEIGHT}:r=${FPS}:d=${TOTAL_FRAMES / FPS}`, - "-vf", - "format=rgba,geq=r='r(X,Y)':g='g(X,Y)':b='b(X,Y)':a='X*255/W'", - "-frames:v", - String(TOTAL_FRAMES), - join(alphaFramesDir, "frame_%04d.png"), - ]); - if (frameGen.exitCode !== 0) { - throw new Error( - `[alpha smoke setup] frame generation failed (exit ${frameGen.exitCode}): ` + - frameGen.stderr.slice(-400), - ); - } - }); + // The wired-up distributed webm path uses yuva420p. This block proves + // (a) the closed-GOP args + alpha pixel format don't break concat-copy + // at the bitstream level, and (b) the alpha plane round-trips with + // real spatial content — catching the failure mode where the encoder + // accepted yuva420p input but dropped the alpha sub-stream silently. + // The source frames carry a per-pixel alpha gradient so the encoder + // cannot treat the alpha plane as uniform/redundant and drop it. + it("encode + concat-copy + decode round-trip works for yuva420p", () => { + const alphaRoot = mkdtempSync(join(tmpdir(), "hf-webm-concat-smoke-alpha-")); + try { + const alphaFramesDir = join(alphaRoot, "frames"); + const alphaChunkDir = join(alphaRoot, "chunks"); + mkdirSync(alphaFramesDir, { recursive: true }); + mkdirSync(alphaChunkDir, { recursive: true }); + const alphaConcatListPath = join(alphaRoot, "concat-list.txt"); + const alphaOutputPath = join(alphaRoot, "output.webm"); - afterAll(() => { - rmSync(alphaRoot, { recursive: true, force: true }); - }); - - it("encodes 4 yuva420p VP9 chunks with closed-GOP args", () => { - for (let chunkIdx = 0; chunkIdx < CHUNK_COUNT; chunkIdx++) { - const startNumber = chunkIdx * CHUNK_SIZE + 1; - const chunkPath = join(alphaChunkDir, `chunk_${String(chunkIdx).padStart(4, "0")}.webm`); - const inputArgs = [ - "-framerate", - String(FPS), - "-start_number", - String(startNumber), + // `geq=a='X*255/W'` writes a horizontal alpha gradient on top of + // the testsrc2 RGB. `testsrc2 + format=rgba` alone produced + // uniformly-opaque alpha and libvpx-vp9 silently downgraded the + // output to yuv420p, masking any alpha-pipeline bug — the + // gradient ensures the encoder has spatially-varying alpha to + // preserve. + const frameGen = runFfmpegSync([ + "-hide_banner", + "-y", + "-f", + "lavfi", "-i", - join(alphaFramesDir, "frame_%04d.png"), + `testsrc2=s=${WIDTH}x${HEIGHT}:r=${FPS}:d=${TOTAL_FRAMES / FPS}`, + "-vf", + "format=rgba,geq=r='r(X,Y)':g='g(X,Y)':b='b(X,Y)':a='X*255/W'", "-frames:v", - String(CHUNK_SIZE), - ]; - const args = buildEncoderArgs( - { - fps: { num: FPS, den: 1 }, - width: WIDTH, - height: HEIGHT, - codec: "vp9", - preset: "good", - quality: 32, - // yuva420p is what the distributed pipeline actually emits for - // webm; the alpha branch in chunkEncoder.ts adds the - // `-metadata:s:v:0 alpha_mode=1` tag we want to verify - // round-trips through concat-copy. - pixelFormat: "yuva420p", - lockGopForChunkConcat: true, - gopSize: CHUNK_SIZE, - }, - inputArgs, - chunkPath, - ); - const result = runFfmpegSync(["-hide_banner", "-loglevel", "error", ...args]); - if (result.exitCode !== 0) { + String(TOTAL_FRAMES), + join(alphaFramesDir, "frame_%04d.png"), + ]); + if (frameGen.exitCode !== 0) { throw new Error( - `[alpha smoke chunk ${chunkIdx}] yuva420p VP9 encode failed (exit ${result.exitCode}):\n` + - `args: ${JSON.stringify(args)}\n` + - `stderr: ${result.stderr.slice(-1000)}`, + `[alpha smoke setup] frame generation failed: ${frameGen.stderr.slice(-400)}`, ); } - expect(existsSync(chunkPath)).toBe(true); - } - }); - - it("concat-copies the 4 yuva420p chunks into a single alpha WebM", () => { - const lines: string[] = []; - for (let chunkIdx = 0; chunkIdx < CHUNK_COUNT; chunkIdx++) { - const chunkPath = join(alphaChunkDir, `chunk_${String(chunkIdx).padStart(4, "0")}.webm`); - lines.push(`file '${chunkPath.replace(/'/g, "'\\''")}'`); - } - writeFileSync(alphaConcatListPath, `${lines.join("\n")}\n`, "utf-8"); - const result = runFfmpegSync([ - "-hide_banner", - "-loglevel", - "error", - "-f", - "concat", - "-safe", - "0", - "-i", - alphaConcatListPath, - "-c", - "copy", - "-y", - alphaOutputPath, - ]); + const chunkPaths: string[] = []; + for (let chunkIdx = 0; chunkIdx < CHUNK_COUNT; chunkIdx++) { + const startNumber = chunkIdx * CHUNK_SIZE + 1; + const chunkPath = join(alphaChunkDir, `chunk_${String(chunkIdx).padStart(4, "0")}.webm`); + chunkPaths.push(chunkPath); + const args = buildEncoderArgs( + { + fps: { num: FPS, den: 1 }, + width: WIDTH, + height: HEIGHT, + codec: "vp9", + preset: "good", + quality: 32, + pixelFormat: "yuva420p", + lockGopForChunkConcat: true, + gopSize: CHUNK_SIZE, + }, + [ + "-framerate", + String(FPS), + "-start_number", + String(startNumber), + "-i", + join(alphaFramesDir, "frame_%04d.png"), + "-frames:v", + String(CHUNK_SIZE), + ], + chunkPath, + ); + const result = runFfmpegSync(["-hide_banner", "-loglevel", "error", ...args]); + if (result.exitCode !== 0) { + throw new Error( + `[alpha smoke chunk ${chunkIdx}] yuva420p VP9 encode failed: ${result.stderr.slice(-1000)}`, + ); + } + } - if (result.exitCode !== 0) { - throw new Error( - `[alpha smoke concat-copy] failed (exit ${result.exitCode}). ` + - `yuva420p webm concat-copy is broken — PR 8.2 must take Path B. ` + - `Failure fingerprint: ${result.stderr.slice(-1000)}`, + writeFileSync( + alphaConcatListPath, + `${chunkPaths.map((p) => `file '${p.replace(/'/g, "'\\''")}'`).join("\n")}\n`, + "utf-8", ); - } - expect(existsSync(alphaOutputPath)).toBe(true); - expect(statSync(alphaOutputPath).size).toBeGreaterThan(0); - }); + const concatResult = runFfmpegSync([ + "-hide_banner", + "-loglevel", + "error", + "-f", + "concat", + "-safe", + "0", + "-i", + alphaConcatListPath, + "-c", + "copy", + "-y", + alphaOutputPath, + ]); + if (concatResult.exitCode !== 0) { + throw new Error(`[alpha smoke concat-copy] failed: ${concatResult.stderr.slice(-1000)}`); + } - it("decodes alpha-track WebM cleanly without seam errors", () => { - const decodeResult = runFfmpegSync([ - "-hide_banner", - "-v", - "error", - "-i", - alphaOutputPath, - "-f", - "null", - "-", - ]); - // Gate only on exit code — `-v error` ffmpeg builds can emit - // non-fatal stderr (DTS warnings, container-quirk notes) and we - // don't want the test to flake on chatty stderr in a future - // libavformat upgrade. Surface stderr in the failure message for - // forensic context. - if (decodeResult.exitCode !== 0) { - throw new Error( - `[alpha smoke decode-test] failed (exit ${decodeResult.exitCode}). ` + - `Failure fingerprint: ${decodeResult.stderr.slice(-1000) || "(no stderr)"}`, - ); - } + // Decode-test gates only on exit code — `-v error` ffmpeg builds + // can emit non-fatal stderr (DTS warnings, container-quirk notes) + // and we don't want the test to flake on chatty stderr in a + // future libavformat upgrade. + const decodeResult = runFfmpegSync([ + "-hide_banner", + "-v", + "error", + "-i", + alphaOutputPath, + "-f", + "null", + "-", + ]); + if (decodeResult.exitCode !== 0) { + throw new Error( + `[alpha smoke decode-test] failed (exit ${decodeResult.exitCode}): ` + + `${decodeResult.stderr.slice(-1000) || "(no stderr)"}`, + ); + } - const probeResult = runFfprobeSync([ - "-v", - "error", - "-select_streams", - "v:0", - "-show_streams", - alphaOutputPath, - ]); - expect(probeResult.exitCode).toBe(0); - expect(probeResult.stdout).toMatch(/codec_name=vp9/); - // libvpx-vp9 stores the alpha plane as a Matroska `BlockAdditional` - // sidecar, NOT in the main stream's `pix_fmt` — so `ffprobe` always - // reports `pix_fmt=yuv420p` for VP9-with-alpha. The right signal that - // alpha encoding was enabled is the stream-level `TAG:ALPHA_MODE=1` - // tag the encoder writes when `-metadata:s:v:0 alpha_mode=1` is set - // on a yuva420p input. - expect(probeResult.stdout).toMatch(/ALPHA_MODE=1/); - }); + // libvpx-vp9 stores the alpha plane as a Matroska `BlockAdditional` + // sidecar, NOT in the main stream's `pix_fmt` — `ffprobe` always + // reports `pix_fmt=yuv420p` for VP9-with-alpha. The right signal + // is the stream-level `TAG:ALPHA_MODE=1` tag the encoder writes + // when `-metadata:s:v:0 alpha_mode=1` is set on yuva420p input. + const probeResult = runFfprobeSync([ + "-v", + "error", + "-select_streams", + "v:0", + "-show_streams", + alphaOutputPath, + ]); + expect(probeResult.exitCode).toBe(0); + expect(probeResult.stdout).toMatch(/codec_name=vp9/); + expect(probeResult.stdout).toMatch(/ALPHA_MODE=1/); - it("alpha plane round-trips through concat-copy with spatially-varying content", () => { - // Decode the concat-copied WebM via the libvpx-vp9 decoder forced to - // RGBA, then extract the alpha plane and check it has real spatial - // variance — catches the failure mode where the encoder accepted - // yuva420p input but dropped the alpha sub-stream silently - // (uniform alpha would mask any plan-time bug like the `needsAlpha` - // hole that hid this PR's bug before review caught it). The - // gradient source produces YMIN ≈ 0 / YMAX ≈ 255 on the alpha - // plane; uniform alpha would give YMIN == YMAX. Spread > 100 is a - // generous floor that catches the bad case cleanly. - // - // `-c:v libvpx-vp9` before `-i` is the load-bearing piece: ffmpeg's - // default VP9 decoder path strips the BlockAdditional alpha track - // when decoding to non-rgba pixel formats; forcing the libvpx-vp9 - // decoder + `-pix_fmt rgba` is how we get the alpha plane back. - const statsResult = runFfmpegSync([ - "-hide_banner", - "-v", - "error", - "-c:v", - "libvpx-vp9", - "-i", - alphaOutputPath, - "-pix_fmt", - "rgba", - "-vf", - "extractplanes=a,signalstats,metadata=mode=print:file=-", - "-f", - "null", - "-", - ]); - if (statsResult.exitCode !== 0) { - throw new Error( - `[alpha smoke signalstats] failed (exit ${statsResult.exitCode}): ` + - `${statsResult.stderr.slice(-500)}`, - ); - } - const yminMatch = statsResult.stdout.match(/lavfi\.signalstats\.YMIN=(\d+)/); - const ymaxMatch = statsResult.stdout.match(/lavfi\.signalstats\.YMAX=(\d+)/); - if (!yminMatch || !ymaxMatch) { - throw new Error( - `[alpha smoke signalstats] could not parse YMIN/YMAX from output: ` + - `${statsResult.stdout.slice(0, 500)}`, - ); + // Decode the alpha plane and check it has spatially-varying + // content — catches the case where the encoder accepted yuva420p + // input but dropped the alpha sub-stream silently (a uniform + // alpha plane would mask any plan-time bug like a misconfigured + // `needsAlpha` gate). The horizontal gradient source produces + // YMIN ≈ 0 / YMAX ≈ 255 on the alpha plane; uniform alpha would + // give YMIN == YMAX. Spread > 100 cleanly rejects the bad case. + // + // `-c:v libvpx-vp9` before `-i` is load-bearing: ffmpeg's default + // VP9 decoder strips the BlockAdditional alpha track when + // decoding to non-rgba pixel formats; forcing the libvpx-vp9 + // decoder + `-pix_fmt rgba` is how the alpha plane comes back. + const statsResult = runFfmpegSync([ + "-hide_banner", + "-v", + "error", + "-c:v", + "libvpx-vp9", + "-i", + alphaOutputPath, + "-pix_fmt", + "rgba", + "-vf", + "extractplanes=a,signalstats,metadata=mode=print:file=-", + "-f", + "null", + "-", + ]); + if (statsResult.exitCode !== 0) { + throw new Error( + `[alpha smoke signalstats] failed (exit ${statsResult.exitCode}): ` + + `${statsResult.stderr.slice(-500)}`, + ); + } + const yminMatch = statsResult.stdout.match(/lavfi\.signalstats\.YMIN=(\d+)/); + const ymaxMatch = statsResult.stdout.match(/lavfi\.signalstats\.YMAX=(\d+)/); + if (!yminMatch || !ymaxMatch) { + throw new Error( + `[alpha smoke signalstats] could not parse YMIN/YMAX from output: ` + + `${statsResult.stdout.slice(0, 500)}`, + ); + } + const ymin = Number.parseInt(yminMatch[1], 10); + const ymax = Number.parseInt(ymaxMatch[1], 10); + expect(ymax - ymin).toBeGreaterThan(100); + expect(statSync(alphaOutputPath).size).toBeGreaterThan(0); + } finally { + rmSync(alphaRoot, { recursive: true, force: true }); } - const ymin = Number.parseInt(yminMatch[1], 10); - const ymax = Number.parseInt(ymaxMatch[1], 10); - expect(ymax - ymin).toBeGreaterThan(100); }); }); diff --git a/packages/producer/tests/distributed/webm-vp9/output/compiled.html b/packages/producer/tests/distributed/webm-vp9/output/compiled.html index c2f95979e..ba2edc000 100644 --- a/packages/producer/tests/distributed/webm-vp9/output/compiled.html +++ b/packages/producer/tests/distributed/webm-vp9/output/compiled.html @@ -129,10 +129,8 @@ No audio element on purpose. Opus frame quantization at 20ms grain pads a 2-second silent track past 2.0s of container time, which extends the muxed webm's duration past nb_frames/fps and trips the - harness PSNR sampler at the very last checkpoint. The chunk- - boundary contracts this fixture pins are video-only; omitting - audio keeps container duration == 2.0s exactly. Other webm-with- - audio fixtures cover the mux path separately when added. + harness PSNR sampler at the very last checkpoint. Omitting audio + keeps container duration == 2.0s exactly. --> diff --git a/packages/producer/tests/distributed/webm-vp9/output/output.webm b/packages/producer/tests/distributed/webm-vp9/output/output.webm index d541643af..15af2a018 100644 --- a/packages/producer/tests/distributed/webm-vp9/output/output.webm +++ b/packages/producer/tests/distributed/webm-vp9/output/output.webm @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b98efde6993524eaec419b2a7d3c37f33dcc84602bbd89cfcb1eea41cc125506 +oid sha256:44d40fa4cd520eb59966ada4ef3eff70f5125fa580408dfd488e10ec883cd0b3 size 78674 diff --git a/packages/producer/tests/distributed/webm-vp9/src/index.html b/packages/producer/tests/distributed/webm-vp9/src/index.html index e73c0db59..03b9b25eb 100644 --- a/packages/producer/tests/distributed/webm-vp9/src/index.html +++ b/packages/producer/tests/distributed/webm-vp9/src/index.html @@ -93,10 +93,8 @@ No audio element on purpose. Opus frame quantization at 20ms grain pads a 2-second silent track past 2.0s of container time, which extends the muxed webm's duration past nb_frames/fps and trips the - harness PSNR sampler at the very last checkpoint. The chunk- - boundary contracts this fixture pins are video-only; omitting - audio keeps container duration == 2.0s exactly. Other webm-with- - audio fixtures cover the mux path separately when added. + harness PSNR sampler at the very last checkpoint. Omitting audio + keeps container duration == 2.0s exactly. -->