Skip to content

Commit e7247d8

Browse files
Merge pull request #434 from Enriquefft/fix/export-audio-duration-validation
fix: validate export duration and fix audio trim in speed-aware path
2 parents 56d3d59 + dd8c001 commit e7247d8

4 files changed

Lines changed: 211 additions & 31 deletions

File tree

src/lib/exporter/audioEncoder.ts

Lines changed: 103 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import type { VideoMuxer } from "./muxer";
55
const AUDIO_BITRATE = 128_000;
66
const DECODE_BACKPRESSURE_LIMIT = 20;
77
const MIN_SPEED_REGION_DELTA_MS = 0.0001;
8+
const SEEK_TIMEOUT_MS = 5_000;
89

910
export class AudioProcessor {
1011
private cancelled = false;
@@ -18,9 +19,9 @@ export class AudioProcessor {
1819
demuxer: WebDemuxer,
1920
muxer: VideoMuxer,
2021
videoUrl: string,
21-
trimRegions?: TrimRegion[],
22-
speedRegions?: SpeedRegion[],
23-
readEndSec?: number,
22+
trimRegions: TrimRegion[] | undefined,
23+
speedRegions: SpeedRegion[] | undefined,
24+
validatedDurationSec: number,
2425
): Promise<void> {
2526
const sortedTrims = trimRegions ? [...trimRegions].sort((a, b) => a.startMs - b.startMs) : [];
2627
const sortedSpeedRegions = speedRegions
@@ -35,14 +36,19 @@ export class AudioProcessor {
3536
videoUrl,
3637
sortedTrims,
3738
sortedSpeedRegions,
39+
validatedDurationSec,
3840
);
39-
if (!this.cancelled) {
41+
if (!this.cancelled && renderedAudioBlob.size > 0) {
4042
await this.muxRenderedAudioBlob(renderedAudioBlob, muxer);
4143
return;
4244
}
45+
return;
4346
}
4447

4548
// No speed edits: keep the original demux/decode/encode path with trim timestamp remap.
49+
// The +0.5s buffer mirrors streamingDecoder.decodeAll's read window so the trim-only
50+
// and speed-aware paths agree on how far to read past the validated duration boundary.
51+
const readEndSec = validatedDurationSec + 0.5;
4652
await this.processTrimOnlyAudio(demuxer, muxer, sortedTrims, readEndSec);
4753
}
4854

@@ -55,7 +61,7 @@ export class AudioProcessor {
5561
): Promise<void> {
5662
let audioConfig: AudioDecoderConfig;
5763
try {
58-
audioConfig = (await demuxer.getDecoderConfig("audio")) as AudioDecoderConfig;
64+
audioConfig = await demuxer.getDecoderConfig("audio");
5965
} catch {
6066
console.warn("[AudioProcessor] No audio track found, skipping");
6167
return;
@@ -80,11 +86,10 @@ export class AudioProcessor {
8086
typeof readEndSec === "number" && Number.isFinite(readEndSec)
8187
? Math.max(0, readEndSec)
8288
: undefined;
83-
const audioStream = (
89+
const audioStream =
8490
safeReadEndSec !== undefined
8591
? demuxer.read("audio", 0, safeReadEndSec)
86-
: demuxer.read("audio")
87-
) as ReadableStream<EncodedAudioChunk>;
92+
: demuxer.read("audio");
8893
const reader = audioStream.getReader();
8994

9095
try {
@@ -187,6 +192,7 @@ export class AudioProcessor {
187192
videoUrl: string,
188193
trimRegions: TrimRegion[],
189194
speedRegions: SpeedRegion[],
195+
validatedDurationSec: number,
190196
): Promise<Blob> {
191197
const media = document.createElement("audio");
192198
media.src = videoUrl;
@@ -211,15 +217,44 @@ export class AudioProcessor {
211217
const destinationNode = audioContext.createMediaStreamDestination();
212218
sourceNode.connect(destinationNode);
213219

214-
const { recorder, recordedBlobPromise } = this.startAudioRecording(destinationNode.stream);
215220
let rafId: number | null = null;
221+
let recorder: MediaRecorder | null = null;
222+
let recordedBlobPromise: Promise<Blob> | null = null;
216223

217224
try {
218225
if (audioContext.state === "suspended") {
219226
await audioContext.resume();
220227
}
221228

222-
await this.seekTo(media, 0);
229+
// Skip past any initial trim region(s) before recording starts to avoid
230+
// capturing trimmed audio during the first rAF frames of playback.
231+
// Loops to handle back-to-back or overlapping trims at t=0.
232+
const effectiveEnd = validatedDurationSec;
233+
let startPosition = 0;
234+
for (let i = 0; i <= trimRegions.length; i++) {
235+
const activeTrim = this.findActiveTrimRegion(startPosition * 1000, trimRegions);
236+
if (!activeTrim) break;
237+
startPosition = activeTrim.endMs / 1000;
238+
if (startPosition >= effectiveEnd) break;
239+
}
240+
241+
if (startPosition >= effectiveEnd) {
242+
// All content is trimmed — return silent blob
243+
return new Blob([], { type: "audio/webm" });
244+
}
245+
246+
await this.seekTo(media, startPosition);
247+
248+
// Set initial playback rate for the starting position
249+
const initialSpeedRegion = this.findActiveSpeedRegion(startPosition * 1000, speedRegions);
250+
if (initialSpeedRegion) {
251+
media.playbackRate = initialSpeedRegion.speed;
252+
}
253+
254+
// Start recording only AFTER seeking past trims
255+
const recording = this.startAudioRecording(destinationNode.stream);
256+
recorder = recording.recorder;
257+
recordedBlobPromise = recording.recordedBlobPromise;
223258
await media.play();
224259

225260
await new Promise<void>((resolve, reject) => {
@@ -249,24 +284,66 @@ export class AudioProcessor {
249284
return;
250285
}
251286

287+
// Stop playback at validated duration — browser's media.duration
288+
// may be inflated from bad container metadata.
289+
if (media.currentTime >= validatedDurationSec) {
290+
media.pause();
291+
cleanup();
292+
resolve();
293+
return;
294+
}
295+
252296
const currentTimeMs = media.currentTime * 1000;
253297
const activeTrimRegion = this.findActiveTrimRegion(currentTimeMs, trimRegions);
254298

255299
if (activeTrimRegion && !media.paused && !media.ended) {
256300
const skipToTime = activeTrimRegion.endMs / 1000;
257-
if (skipToTime >= media.duration) {
301+
if (skipToTime >= media.duration || skipToTime >= validatedDurationSec) {
258302
media.pause();
259303
cleanup();
260304
resolve();
261305
return;
262306
}
307+
// Pause recording during trim seek to prevent capturing
308+
// silence/noise as the audio element seeks.
309+
media.pause();
310+
if (recorder?.state === "recording") recorder.pause();
311+
const onSeeked = () => {
312+
clearTimeout(seekTimer);
313+
if (this.cancelled) {
314+
cleanup();
315+
resolve();
316+
return;
317+
}
318+
if (recorder?.state === "paused") recorder.resume();
319+
media
320+
.play()
321+
.then(() => {
322+
if (!this.cancelled) rafId = requestAnimationFrame(tick);
323+
})
324+
.catch((err) => {
325+
cleanup();
326+
reject(
327+
new Error(
328+
`Failed to resume playback after trim seek: ${err instanceof Error ? err.message : String(err)}`,
329+
),
330+
);
331+
});
332+
};
333+
const seekTimer = window.setTimeout(() => {
334+
media.removeEventListener("seeked", onSeeked);
335+
cleanup();
336+
reject(new Error("Audio seek timed out while skipping trim region"));
337+
}, SEEK_TIMEOUT_MS);
338+
media.addEventListener("seeked", onSeeked, { once: true });
263339
media.currentTime = skipToTime;
264-
} else {
265-
const activeSpeedRegion = this.findActiveSpeedRegion(currentTimeMs, speedRegions);
266-
const playbackRate = activeSpeedRegion ? activeSpeedRegion.speed : 1;
267-
if (Math.abs(media.playbackRate - playbackRate) > 0.0001) {
268-
media.playbackRate = playbackRate;
269-
}
340+
return;
341+
}
342+
343+
const activeSpeedRegion = this.findActiveSpeedRegion(currentTimeMs, speedRegions);
344+
const playbackRate = activeSpeedRegion ? activeSpeedRegion.speed : 1;
345+
if (Math.abs(media.playbackRate - playbackRate) > 0.0001) {
346+
media.playbackRate = playbackRate;
270347
}
271348

272349
if (!media.paused && !media.ended) {
@@ -286,7 +363,7 @@ export class AudioProcessor {
286363
cancelAnimationFrame(rafId);
287364
}
288365
media.pause();
289-
if (recorder.state !== "inactive") {
366+
if (recorder && recorder.state !== "inactive") {
290367
recorder.stop();
291368
}
292369
destinationNode.stream.getTracks().forEach((track) => track.stop());
@@ -297,6 +374,12 @@ export class AudioProcessor {
297374
media.load();
298375
}
299376

377+
if (!recordedBlobPromise) {
378+
// Invariant: either an early return above fires, or startAudioRecording ran and
379+
// populated recordedBlobPromise before the playback Promise resolved. Reaching
380+
// here means that contract was broken — fail loud instead of returning silence.
381+
throw new Error("Audio recorder finished without assigning recordedBlobPromise");
382+
}
300383
const recordedBlob = await recordedBlobPromise;
301384
if (this.cancelled) {
302385
throw new Error("Export cancelled");
@@ -314,8 +397,8 @@ export class AudioProcessor {
314397

315398
try {
316399
await demuxer.load(file);
317-
const audioConfig = (await demuxer.getDecoderConfig("audio")) as AudioDecoderConfig;
318-
const reader = (demuxer.read("audio") as ReadableStream<EncodedAudioChunk>).getReader();
400+
const audioConfig = await demuxer.getDecoderConfig("audio");
401+
const reader = demuxer.read("audio").getReader();
319402
let isFirstChunk = true;
320403

321404
try {

src/lib/exporter/streamingDecoder.test.ts

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,44 @@
11
import { describe, expect, it } from "vitest";
2-
import { shouldFailDecodeEndedEarly } from "./streamingDecoder";
2+
import { shouldFailDecodeEndedEarly, validateDuration } from "./streamingDecoder";
3+
4+
describe("validateDuration", () => {
5+
it("returns scanned duration when container reports Infinity", () => {
6+
expect(validateDuration(Infinity, 15.3)).toBe(15.3);
7+
});
8+
9+
it("returns scanned duration when container reports 0", () => {
10+
expect(validateDuration(0, 15.3)).toBe(15.3);
11+
});
12+
13+
it("returns scanned duration when container reports NaN", () => {
14+
expect(validateDuration(NaN, 15.3)).toBe(15.3);
15+
});
16+
17+
it("returns scanned duration when container is inflated beyond threshold", () => {
18+
expect(validateDuration(42, 15.3)).toBe(15.3);
19+
});
20+
21+
it("returns container duration when values are close", () => {
22+
expect(validateDuration(15.5, 15.3)).toBe(15.5);
23+
});
24+
25+
it("returns container duration when scanned is slightly higher", () => {
26+
// container < scanned (scanned overshoot from last frame duration)
27+
expect(validateDuration(15.0, 15.3)).toBe(15.0);
28+
});
29+
30+
it("returns scanned duration when container under-reports beyond threshold", () => {
31+
expect(validateDuration(10, 15.3)).toBe(15.3);
32+
});
33+
34+
it("returns container duration when scanned is zero (corrupted/empty file)", () => {
35+
expect(validateDuration(10, 0)).toBe(10);
36+
});
37+
38+
it("returns 0 when both container is NaN and scanned is zero", () => {
39+
expect(validateDuration(NaN, 0)).toBe(0);
40+
});
41+
});
342

443
describe("shouldFailDecodeEndedEarly", () => {
544
it("does not fail once every segment has been satisfied", () => {

src/lib/exporter/streamingDecoder.ts

Lines changed: 66 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,37 @@ type EarlyDecodeEndCheck = {
7070
const EARLY_DECODE_END_THRESHOLD_SEC = 1;
7171
const METADATA_TAIL_TOLERANCE_SEC = 1.5;
7272
const STREAM_DURATION_MATCH_TOLERANCE_SEC = 0.25;
73+
const DURATION_DIVERGENCE_THRESHOLD_SEC = 1.5;
74+
// Fallback upper bound for the packet scan when no reliable duration hint is
75+
// available. Explicit end is required (some containers are truncated without
76+
// one), but the hint-derived bound would cap the scan prematurely when
77+
// container/stream duration are missing or corrupt.
78+
const SCAN_UNBOUNDED_FALLBACK_SEC = 24 * 60 * 60;
79+
80+
/**
81+
* Validate container duration against actual packet timestamps.
82+
*
83+
* Chrome/Electron's MediaRecorder writes WebM containers with unreliable
84+
* Duration fields (often Infinity, 0, or inflated) — especially on Linux.
85+
* This function picks the most trustworthy duration value.
86+
*
87+
* @param containerDuration Duration from the container-level metadata
88+
* @param scannedDuration Duration derived from actual packet timestamps (ground truth)
89+
*/
90+
export function validateDuration(containerDuration: number, scannedDuration: number): number {
91+
if (scannedDuration <= 0) {
92+
// Zero scanned duration means corrupted/empty file — fall back to container
93+
// (downstream shouldFailDecodeEndedEarly will catch truly empty files)
94+
return Number.isFinite(containerDuration) ? Math.max(containerDuration, 0) : 0;
95+
}
96+
if (!Number.isFinite(containerDuration) || containerDuration <= 0) {
97+
return scannedDuration;
98+
}
99+
if (Math.abs(containerDuration - scannedDuration) > DURATION_DIVERGENCE_THRESHOLD_SEC) {
100+
return scannedDuration;
101+
}
102+
return containerDuration;
103+
}
73104

74105
export function shouldFailDecodeEndedEarly({
75106
cancelled,
@@ -201,10 +232,43 @@ export class StreamingVideoDecoder {
201232

202233
const audioStream = mediaInfo.streams.find((s) => s.codec_type_string === "audio");
203234

235+
// Scan video packets to find the true content boundary.
236+
// MediaRecorder (especially on Linux) writes unreliable container durations.
237+
// Packet timestamps are ground truth — no decode needed, just timestamp reads.
238+
// Pass explicit range because some containers are truncated without one.
239+
// Sanitize because mediaInfo.duration can be NaN/Infinity (Chromium Linux bug),
240+
// which would propagate into demuxer.read() as an invalid endpoint.
241+
const containerDurationSec = Number.isFinite(mediaInfo.duration) ? mediaInfo.duration : 0;
242+
const streamDurationSec =
243+
typeof videoStream?.duration === "number" && Number.isFinite(videoStream.duration)
244+
? videoStream.duration
245+
: 0;
246+
const hintedDurationSec = Math.max(containerDurationSec, streamDurationSec, 0);
247+
const scanEndSec =
248+
hintedDurationSec > 0 ? hintedDurationSec + 0.5 : SCAN_UNBOUNDED_FALLBACK_SEC;
249+
let maxPacketEndUs = 0;
250+
const scanReader = this.demuxer.read("video", 0, scanEndSec).getReader();
251+
try {
252+
while (true) {
253+
const { done, value } = await scanReader.read();
254+
if (done || !value) break;
255+
const endUs = value.timestamp + (value.duration ?? 0);
256+
if (endUs > maxPacketEndUs) maxPacketEndUs = endUs;
257+
}
258+
} finally {
259+
try {
260+
await scanReader.cancel();
261+
} catch {
262+
/* already closed */
263+
}
264+
}
265+
const scannedDuration = maxPacketEndUs / 1_000_000;
266+
const validatedDuration = validateDuration(mediaInfo.duration, scannedDuration);
267+
204268
this.metadata = {
205269
width: videoStream?.width || 1920,
206270
height: videoStream?.height || 1080,
207-
duration: mediaInfo.duration,
271+
duration: validatedDuration,
208272
streamDuration:
209273
typeof videoStream?.duration === "number" && Number.isFinite(videoStream.duration)
210274
? videoStream.duration
@@ -305,7 +369,7 @@ export class StreamingVideoDecoder {
305369

306370
// One forward stream through the whole file.
307371
// Pass explicit range because some containers are truncated when no end is provided.
308-
const readEndSec = Math.max(this.metadata.duration, this.metadata.streamDuration ?? 0) + 0.5;
372+
const readEndSec = this.metadata.duration + 0.5;
309373
const reader = this.demuxer.read("video", 0, readEndSec).getReader();
310374

311375
// Feed chunks to decoder in background with backpressure

src/lib/exporter/videoExporter.ts

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -157,17 +157,11 @@ export class VideoExporter {
157157
this.muxer = muxer;
158158
await muxer.initialize();
159159

160-
const { effectiveDuration, totalFrames } = streamingDecoder.getExportMetrics(
160+
const { totalFrames } = streamingDecoder.getExportMetrics(
161161
this.config.frameRate,
162162
this.config.trimRegions,
163163
this.config.speedRegions,
164164
);
165-
const readEndSec = Math.max(videoInfo.duration, videoInfo.streamDuration ?? 0) + 0.5;
166-
167-
console.log("[VideoExporter] Original duration:", videoInfo.duration, "s");
168-
console.log("[VideoExporter] Effective duration:", effectiveDuration, "s");
169-
console.log("[VideoExporter] Total frames to export:", totalFrames);
170-
console.log("[VideoExporter] Using streaming decode (web-demuxer + VideoDecoder)");
171165

172166
const frameDuration = 1_000_000 / this.config.frameRate;
173167
let frameIndex = 0;
@@ -346,7 +340,7 @@ export class VideoExporter {
346340
this.config.videoUrl,
347341
this.config.trimRegions,
348342
this.config.speedRegions,
349-
readEndSec,
343+
videoInfo.duration,
350344
);
351345
}
352346
}

0 commit comments

Comments
 (0)