diff --git a/packages/engine/src/index.ts b/packages/engine/src/index.ts index 97c54e06e..e8f067c4f 100644 --- a/packages/engine/src/index.ts +++ b/packages/engine/src/index.ts @@ -93,6 +93,7 @@ export { // ── Encoding ─────────────────────────────────────────────────────────────────── export { + buildEncoderArgs, encodeFramesFromDir, encodeFramesChunkedConcat, muxVideoWithAudio, diff --git a/packages/engine/src/services/chunkEncoder.test.ts b/packages/engine/src/services/chunkEncoder.test.ts index ccebf2b3d..54dcd6609 100644 --- a/packages/engine/src/services/chunkEncoder.test.ts +++ b/packages/engine/src/services/chunkEncoder.test.ts @@ -551,7 +551,11 @@ describe("buildEncoderArgs lockGopForChunkConcat", () => { expect(args.indexOf("-x264-params")).toBe(-1); }); - it("true is a no-op on VP9", () => { + // Closed-GOP for libvpx-vp9 is required to make `ffmpeg -f concat -c copy` + // stitch VP9 chunks losslessly: every chunk's first frame must be an + // independently-decodable keyframe with no alt-ref references reaching + // back across the seam. + it("true appends closed-GOP args for libvpx-vp9", () => { const args = buildEncoderArgs( { ...baseOptions, @@ -564,9 +568,82 @@ describe("buildEncoderArgs lockGopForChunkConcat", () => { inputArgs, "out.webm", ); + expect(args[args.indexOf("-g") + 1]).toBe("240"); + expect(args[args.indexOf("-keyint_min") + 1]).toBe("240"); + // Alt-ref frames are non-displayable references that break concat-copy + // at chunk seams; closed-GOP must disable them. + expect(args[args.indexOf("-auto-alt-ref") + 1]).toBe("0"); + // cpu-used is locked so workers with different libvpx-vp9 defaults + // produce visually consistent output across chunk boundaries. + expect(args[args.indexOf("-cpu-used") + 1]).toBe("2"); + // libvpx-vp9 uses `-deadline good` for non-ultrafast presets — the + // closed-GOP path doesn't change that. + expect(args[args.indexOf("-deadline") + 1]).toBe("good"); + // x264/x265-only params must not leak into the VP9 branch. + expect(args.indexOf("-x264-params")).toBe(-1); + expect(args.indexOf("-x265-params")).toBe(-1); + expect(args.indexOf("-sc_threshold")).toBe(-1); + expect(args.indexOf("-force_key_frames")).toBe(-1); + }); + + it("default (false) omits closed-GOP args for libvpx-vp9", () => { + const args = buildEncoderArgs( + { ...baseOptions, codec: "vp9", preset: "good", quality: 23 }, + inputArgs, + "out.webm", + ); expect(args).not.toContain("-g"); expect(args).not.toContain("-keyint_min"); - expect(args).not.toContain("-force_key_frames"); + expect(args).not.toContain("-cpu-used"); + // The non-locked, non-alpha VP9 path leaves `-auto-alt-ref` at the + // libvpx default. Alpha branches still emit `-auto-alt-ref 0` for an + // unrelated reason (alpha + alt-ref is unsupported), but that's a + // separate test below. + expect(args).not.toContain("-auto-alt-ref"); + }); + + it("true with alpha pixel format keeps alpha metadata and emits -auto-alt-ref once", () => { + // Regression: alpha + closed-GOP must NOT double-push `-auto-alt-ref 0`. + // Both paths want it disabled; the encoder branch emits it exactly once. + const args = buildEncoderArgs( + { + ...baseOptions, + codec: "vp9", + preset: "good", + quality: 23, + pixelFormat: "yuva420p", + lockGopForChunkConcat: true, + gopSize: 240, + }, + inputArgs, + "out.webm", + ); + const autoAltRefIndices = args.reduce((acc, a, i) => { + if (a === "-auto-alt-ref") acc.push(i); + return acc; + }, []); + expect(autoAltRefIndices.length).toBe(1); + expect(args[autoAltRefIndices[0] + 1]).toBe("0"); + expect(args[args.indexOf("-metadata:s:v:0") + 1]).toBe("alpha_mode=1"); + expect(args[args.indexOf("-g") + 1]).toBe("240"); + }); + + it("vp9 + lockGopForChunkConcat=true throws on missing gopSize", () => { + // Mirrors the libx264/libx265 branch: closed-GOP without a GOP size + // makes no sense — surface the caller error eagerly. + expect(() => + buildEncoderArgs( + { + ...baseOptions, + codec: "vp9", + preset: "good", + quality: 23, + lockGopForChunkConcat: true, + }, + inputArgs, + "out.webm", + ), + ).toThrow(/lockGopForChunkConcat=true requires a positive integer gopSize/); }); it("true is a no-op on ProRes (intra-only — no GOP forcing needed)", () => { diff --git a/packages/engine/src/services/chunkEncoder.ts b/packages/engine/src/services/chunkEncoder.ts index ea8a718bf..ca26bc867 100644 --- a/packages/engine/src/services/chunkEncoder.ts +++ b/packages/engine/src/services/chunkEncoder.ts @@ -254,8 +254,58 @@ export function buildEncoderArgs( args.push("-c:v", "libvpx-vp9", "-b:v", bitrate || "0", "-crf", String(quality)); args.push("-deadline", preset === "ultrafast" ? "realtime" : "good"); args.push("-row-mt", "1"); + + // Closed-GOP args for distributed chunk concat-copy. Mirrors the + // libx264/libx265 branch above: `lockGopForChunkConcat=true` lays a + // keyframe at every chunk boundary so `ffmpeg -f concat -c copy` can + // stitch sibling chunks losslessly. + // + // VP9-specific: `-auto-alt-ref 0` is mandatory. Alt-ref (a.k.a. + // "ARNR") frames are non-displayable references libvpx-vp9 inserts + // anywhere in the GOP for compression; they break concat-copy at + // chunk seams because the boundary frame is no longer the first + // displayable reference. The alpha branch below already disables + // alt-ref for an unrelated reason (alpha + alt-ref is unsupported); + // closed-GOP extends that to every pixel format. + // + // `-cpu-used 2` pins the libvpx-vp9 speed/quality tradeoff so chunks + // encoded on workers with different default cpu-used values still + // produce visually consistent output across seams. libvpx-vp9's + // default with `-deadline good` has drifted across versions + // historically — locking it makes the planHash round-trip + // deterministic. + const lockGopVp9 = options.lockGopForChunkConcat === true; + if (lockGopVp9) { + if ( + typeof options.gopSize !== "number" || + !Number.isFinite(options.gopSize) || + options.gopSize <= 0 + ) { + throw new Error( + `[chunkEncoder] lockGopForChunkConcat=true requires a positive integer gopSize (received ${String(options.gopSize)})`, + ); + } + const gop = Math.floor(options.gopSize); + args.push( + "-g", + String(gop), + "-keyint_min", + String(gop), + "-auto-alt-ref", + "0", + "-cpu-used", + "2", + ); + } if (pixelFormat === "yuva420p") { - args.push("-auto-alt-ref", "0"); + // Alpha + alt-ref is unsupported by libvpx-vp9. The closed-GOP + // branch above already disables alt-ref; only push the flag for + // the non-locked alpha case to keep the args list clean (a second + // `-auto-alt-ref 0` is harmless but noisier in `ffmpeg -loglevel` + // diagnostics). + if (!lockGopVp9) { + args.push("-auto-alt-ref", "0"); + } args.push("-metadata:s:v:0", "alpha_mode=1"); } } else if (codec === "prores") { diff --git a/packages/engine/src/services/chunkEncoder.types.ts b/packages/engine/src/services/chunkEncoder.types.ts index aaf8c4bd1..b31b8fcfa 100644 --- a/packages/engine/src/services/chunkEncoder.types.ts +++ b/packages/engine/src/services/chunkEncoder.types.ts @@ -22,8 +22,15 @@ export interface EncoderOptions { * (open-GOP, scenecut-driven keyframes), preserving the in-process * renderer's byte-identical output. * - * Only honored by the SW libx264 / libx265 paths. GPU encoders, vp9, and - * prores ignore the flag (their concat-copy story is separate). + * Honored by the SW libx264 / libx265 / libvpx-vp9 paths. GPU encoders + * and ProRes ignore the flag — GPU concat-copy is a separate story and + * ProRes is intra-only (every frame is already a keyframe, so no + * closed-GOP forcing is needed). + * + * For libvpx-vp9, closed-GOP also forces `-auto-alt-ref 0` so the + * boundary frame between chunks remains independently decodable — + * libvpx-vp9's default alt-ref frames can land anywhere in the GOP + * for compression and break concat-copy seams. */ lockGopForChunkConcat?: boolean; /** diff --git a/packages/producer/tests/distributed/_smoke/webm-concat-copy.test.ts b/packages/producer/tests/distributed/_smoke/webm-concat-copy.test.ts new file mode 100644 index 000000000..2e1098bc8 --- /dev/null +++ b/packages/producer/tests/distributed/_smoke/webm-concat-copy.test.ts @@ -0,0 +1,306 @@ +/** + * Smoke test for the WebM (VP9) distributed concat-copy path. + * + * PR 8.1 gating experiment — answers the question: + * "Does `buildEncoderArgs(..., { codec: 'vp9', lockGopForChunkConcat: true, gopSize: N })` + * produce VP9 chunk files that `ffmpeg -f concat -c copy` can stitch + * into a single playable WebM?" + * + * YES → PR 8.2 ships Path A: drop webm from FORMAT_NOT_SUPPORTED_IN_DISTRIBUTED + * and wire lockGopForChunkConcat=true through the distributed plan(). + * + * NO → PR 8.2 ships Path B: re-encode the concat'd chunks in `assemble()` + * (slower; loses encode parallelism but is reliably correct). + * + * Why direct ffmpeg invocation (instead of plan/renderChunk/assemble): the + * full distributed pipeline currently REFUSES webm at plan time, so we can't + * exercise it end-to-end yet. This smoke test bypasses the producer pipeline + * and only validates the ffmpeg-level contract — the encoder args we'll wire + * into the pipeline in 8.2. + * + * The test generates 60 frames (2s @ 30fps) of an animated test pattern + * (`testsrc2` from ffmpeg's lavfi), splits them into 4 chunks of 15 frames + * each via direct `ffmpeg` invocations using the args from + * `buildEncoderArgs(..., { lockGopForChunkConcat: true, gopSize: 15 })`, + * concat-copies them, and runs three independent verifications: + * + * 1. `ffprobe -show_streams` — output is a valid WebM with one VP9 stream + * 2. `ffmpeg -i ... -f null -` — output decodes cleanly (no seam errors) + * 3. `ffprobe -count_frames` — frame count equals sum of chunk frames + * + * If concat-copy fails in any way the test reports the precise failure + * fingerprint in the error message so PR 8.2 has the data it needs to pick + * Path A vs Path B. + */ + +import { afterAll, beforeAll, describe, expect, it } from "bun:test"; +import { spawnSync } from "node:child_process"; +import { existsSync, mkdirSync, mkdtempSync, rmSync, statSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { buildEncoderArgs } from "@hyperframes/engine"; + +const FPS = 30; +const TOTAL_FRAMES = 60; +const CHUNK_SIZE = 15; +const CHUNK_COUNT = TOTAL_FRAMES / CHUNK_SIZE; // 4 +const WIDTH = 320; +const HEIGHT = 240; + +let runRoot: string; +let framesDir: string; +let chunkDir: string; +let concatListPath: string; +let outputPath: string; +let frameGenStderr = ""; + +interface FfmpegResult { + exitCode: number | null; + stderr: string; + stdout: string; +} + +function runFfmpegSync(args: string[]): FfmpegResult { + const result = spawnSync("ffmpeg", args, { encoding: "utf8" }); + return { + exitCode: result.status, + stderr: result.stderr ?? "", + stdout: result.stdout ?? "", + }; +} + +function runFfprobeSync(args: string[]): FfmpegResult { + const result = spawnSync("ffprobe", args, { encoding: "utf8" }); + return { + exitCode: result.status, + stderr: result.stderr ?? "", + stdout: result.stdout ?? "", + }; +} + +beforeAll(() => { + runRoot = mkdtempSync(join(tmpdir(), "hf-webm-concat-smoke-")); + framesDir = join(runRoot, "frames"); + chunkDir = join(runRoot, "chunks"); + mkdirSync(framesDir, { recursive: true }); + mkdirSync(chunkDir, { recursive: true }); + concatListPath = join(runRoot, "concat-list.txt"); + outputPath = join(runRoot, "output.webm"); + + // Generate 60 PNG frames using lavfi testsrc2 (animated counter / color + // bars — easy to eyeball for seam errors if a human inspects the output). + // Each frame is a real image; we use a frame sequence rather than a single + // mp4 source so the per-chunk encode is a pure image2 → VP9 pass with no + // intermediate decode. + const frameGen = runFfmpegSync([ + "-hide_banner", + "-y", + "-f", + "lavfi", + "-i", + `testsrc2=s=${WIDTH}x${HEIGHT}:r=${FPS}:d=${TOTAL_FRAMES / FPS}`, + "-frames:v", + String(TOTAL_FRAMES), + join(framesDir, "frame_%04d.png"), + ]); + frameGenStderr = frameGen.stderr; + if (frameGen.exitCode !== 0) { + throw new Error( + `[smoke setup] frame generation failed (exit ${frameGen.exitCode}): ${frameGen.stderr.slice(-400)}`, + ); + } +}); + +afterAll(() => { + rmSync(runRoot, { recursive: true, force: true }); +}); + +describe("webm VP9 concat-copy smoke", () => { + it("generates 60 source PNG frames", () => { + // Sanity check — if testsrc2 frame generation broke, downstream + // failures would be miscategorized as concat-copy errors. + const firstFrame = join(framesDir, "frame_0001.png"); + const lastFrame = join(framesDir, `frame_${String(TOTAL_FRAMES).padStart(4, "0")}.png`); + expect(existsSync(firstFrame)).toBe(true); + expect(existsSync(lastFrame)).toBe(true); + expect(frameGenStderr).toBeDefined(); + }); + + it("encodes 4 VP9 chunks with closed-GOP args from buildEncoderArgs", () => { + // The contract this test asserts: buildEncoderArgs with + // lockGopForChunkConcat=true + codec=vp9 + gopSize=chunkSize produces + // VP9 chunks whose first frame is an independently-decodable keyframe + // and whose alt-ref behavior doesn't reach back across chunk seams. + // + // Use the exact args buildEncoderArgs returns. We only swap the input + // args (image2 input range per chunk) — the encoder args (everything + // after `-r `) are byte-identical to what a real renderChunk() + // call would invoke. + for (let chunkIdx = 0; chunkIdx < CHUNK_COUNT; chunkIdx++) { + const startNumber = chunkIdx * CHUNK_SIZE + 1; // image2 frame numbers are 1-based + const chunkPath = join(chunkDir, `chunk_${String(chunkIdx).padStart(4, "0")}.webm`); + const inputArgs = [ + "-framerate", + String(FPS), + "-start_number", + String(startNumber), + "-i", + join(framesDir, "frame_%04d.png"), + "-frames:v", + String(CHUNK_SIZE), + ]; + const args = buildEncoderArgs( + { + fps: { num: FPS, den: 1 }, + width: WIDTH, + height: HEIGHT, + codec: "vp9", + preset: "good", + quality: 32, + pixelFormat: "yuv420p", + lockGopForChunkConcat: true, + gopSize: CHUNK_SIZE, + }, + inputArgs, + chunkPath, + ); + const result = runFfmpegSync(["-hide_banner", "-loglevel", "error", ...args]); + if (result.exitCode !== 0) { + throw new Error( + `[smoke chunk ${chunkIdx}] VP9 encode failed (exit ${result.exitCode}):\n` + + `args: ${JSON.stringify(args)}\n` + + `stderr: ${result.stderr.slice(-1000)}`, + ); + } + expect(existsSync(chunkPath)).toBe(true); + expect(statSync(chunkPath).size).toBeGreaterThan(0); + } + }); + + it("concat-copies the 4 chunks into a single WebM", () => { + const lines: string[] = []; + for (let chunkIdx = 0; chunkIdx < CHUNK_COUNT; chunkIdx++) { + const chunkPath = join(chunkDir, `chunk_${String(chunkIdx).padStart(4, "0")}.webm`); + lines.push(`file '${chunkPath.replace(/'/g, "'\\''")}'`); + } + writeFileSync(concatListPath, `${lines.join("\n")}\n`, "utf-8"); + + const result = runFfmpegSync([ + "-hide_banner", + "-loglevel", + "error", + "-f", + "concat", + "-safe", + "0", + "-i", + concatListPath, + "-c", + "copy", + "-y", + outputPath, + ]); + + // Surface ffmpeg's full stderr in the assertion message so 8.2 has the + // failure fingerprint when concat-copy is broken (e.g. + // "Non-monotonous DTS in output stream", "missing keyframe at chunk 2", + // matroska/webm cluster errors). + if (result.exitCode !== 0) { + throw new Error( + `[smoke concat-copy] failed (exit ${result.exitCode}). ` + + `This means PR 8.2 must take Path B (re-encode in assemble). ` + + `Failure fingerprint: ${result.stderr.slice(-1000)}`, + ); + } + expect(existsSync(outputPath)).toBe(true); + expect(statSync(outputPath).size).toBeGreaterThan(0); + }); + + it("ffprobe -show_streams reports a single playable VP9 stream", () => { + // First verification — the output file is structurally a valid WebM + // with one video stream encoded as VP9. A broken concat-copy can + // produce a file whose container parses but whose stream metadata is + // corrupted (no codec ID, zero duration, broken pixel format). + const result = runFfprobeSync([ + "-v", + "error", + "-select_streams", + "v:0", + "-show_entries", + "stream=codec_name,width,height,pix_fmt,r_frame_rate", + "-of", + "default=noprint_wrappers=1", + outputPath, + ]); + if (result.exitCode !== 0) { + throw new Error( + `[smoke ffprobe] -show_streams failed (exit ${result.exitCode}). ` + + `This means concat-copy produced a structurally broken WebM. ` + + `Failure fingerprint: ${result.stderr.slice(-1000)}`, + ); + } + expect(result.stdout).toMatch(/codec_name=vp9/); + expect(result.stdout).toMatch(new RegExp(`width=${WIDTH}`)); + expect(result.stdout).toMatch(new RegExp(`height=${HEIGHT}`)); + }); + + it("ffmpeg -i ... -f null - decodes the concat'd WebM without errors", () => { + // Second verification — the bitstream actually decodes end-to-end. + // A WebM whose containers parse but whose VP9 frames reference + // non-existent alt-ref frames (because alt-ref crossed a chunk + // seam) will fail here with "Reference frame not found" or + // "Invalid frame" errors. + const result = runFfmpegSync([ + "-hide_banner", + "-v", + "error", + "-i", + outputPath, + "-f", + "null", + "-", + ]); + if (result.exitCode !== 0 || result.stderr.length > 0) { + throw new Error( + `[smoke decode-test] ffmpeg -f null - reported decode errors ` + + `(exit ${result.exitCode}). This means concat-copy seams produce ` + + `invalid VP9 references — PR 8.2 must take Path B (re-encode in assemble). ` + + `Failure fingerprint: ${result.stderr.slice(-1000) || "(no stderr; check exit code)"}`, + ); + } + }); + + it("ffprobe -count_frames matches the sum of chunk frames", () => { + // Third verification — playable frame count equals what we encoded. + // A broken concat-copy can produce a file that decodes "without + // errors" up to the first bad seam and then silently truncates, + // leaving fewer frames than expected. + const result = runFfprobeSync([ + "-v", + "error", + "-select_streams", + "v:0", + "-count_frames", + "-show_entries", + "stream=nb_read_frames", + "-of", + "default=noprint_wrappers=1:nokey=1", + outputPath, + ]); + if (result.exitCode !== 0) { + throw new Error( + `[smoke ffprobe count_frames] failed (exit ${result.exitCode}): ` + + `${result.stderr.slice(-1000)}`, + ); + } + const nbFrames = Number.parseInt(result.stdout.trim(), 10); + if (!Number.isFinite(nbFrames) || nbFrames !== TOTAL_FRAMES) { + throw new Error( + `[smoke ffprobe count_frames] expected ${TOTAL_FRAMES} frames, got ${result.stdout.trim()}. ` + + `This means concat-copy dropped frames at one or more chunk seams — ` + + `PR 8.2 must take Path B (re-encode in assemble).`, + ); + } + expect(nbFrames).toBe(TOTAL_FRAMES); + }); +});