@@ -14,6 +14,7 @@ import { runFfmpeg } from "../utils/runFfmpeg.js";
1414import { unwrapTemplate } from "../utils/htmlTemplate.js" ;
1515import { resolveProjectRelativeSrc } from "./videoFrameExtractor.js" ;
1616import type { AudioElement , AudioTrack , MixResult } from "./audioMixer.types.js" ;
17+ import { applyVolumeEnvelopeToWav } from "./audioVolumeEnvelope.js" ;
1718
1819export type { AudioElement , MixResult } from "./audioMixer.types.js" ;
1920
@@ -30,10 +31,89 @@ function escapeExpressionCommas(expression: string): string {
3031 return expression . replace ( / \\ / g, "\\\\" ) . replace ( / , / g, "\\," ) ;
3132}
3233
33- function buildVolumeExpression ( track : AudioTrack ) : string {
34+ /**
35+ * Upper bound on volume-automation keyframes folded into the FFmpeg `volume`
36+ * expression. The expression nests one `if(lt(...))` per keyframe, and
37+ * FFmpeg's expression evaluator has a finite nesting depth: past ~95 levels
38+ * (build-dependent — lower on some Linux ffmpeg builds) `volume=...:eval=frame`
39+ * fails filter-graph init, which fails the whole mix and drops the audio track
40+ * entirely. The 60 Hz timeline probe routinely emits 100–300 keyframes for a
41+ * multi-second fade (GH #1066 follow-up: a 171-keyframe GSAP fade rendered with
42+ * no audio). 32 segments keeps a wide safety margin and is far more resolution
43+ * than a piecewise-linear volume envelope needs.
44+ */
45+ const MAX_VOLUME_SEGMENTS = 32 ;
46+
47+ /**
48+ * Volume delta below which a keyframe is collinear enough to drop. Kept tight
49+ * (0.5% linear) so the rendered piecewise-linear envelope tracks the GSAP curve
50+ * the browser plays in preview to within ~0.2 dB across the audible range — well
51+ * under the ~1 dB loudness JND, so render stays WYSIWYG with preview. A full
52+ * ease-in/ease-out fade still reduces to ~25 segments, inside MAX_VOLUME_SEGMENTS.
53+ */
54+ const VOLUME_SIMPLIFY_EPSILON = 0.005 ;
55+
56+ /**
57+ * Reduce a sorted keyframe list to a perceptually-equivalent piecewise-linear
58+ * envelope with a bounded segment count.
59+ *
60+ * Ramer–Douglas–Peucker drops control points lying within
61+ * `VOLUME_SIMPLIFY_EPSILON` of the line through their neighbours (a linear fade
62+ * collapses to its two endpoints; an eased fade to a handful). A uniform
63+ * downsample backstop then bounds pathological inputs (e.g. audio-rate volume
64+ * oscillation) to `MAX_VOLUME_SEGMENTS`. Endpoints are always preserved so the
65+ * envelope still spans the full clip.
66+ */
67+ function simplifyVolumeKeyframes (
68+ keyframes : { time : number ; volume : number } [ ] ,
69+ ) : { time : number ; volume : number } [ ] {
70+ if ( keyframes . length < 3 ) return keyframes ;
71+
72+ const keep = new Array < boolean > ( keyframes . length ) . fill ( false ) ;
73+ keep [ 0 ] = true ;
74+ keep [ keyframes . length - 1 ] = true ;
75+ const stack : [ number , number ] [ ] = [ [ 0 , keyframes . length - 1 ] ] ;
76+ while ( stack . length > 0 ) {
77+ const [ startIndex , endIndex ] = stack . pop ( ) ! ;
78+ const start = keyframes [ startIndex ] ! ;
79+ const end = keyframes [ endIndex ] ! ;
80+ const span = end . time - start . time ;
81+ let maxDistance = VOLUME_SIMPLIFY_EPSILON ;
82+ let splitIndex = - 1 ;
83+ for ( let i = startIndex + 1 ; i < endIndex ; i += 1 ) {
84+ const point = keyframes [ i ] ! ;
85+ const interpolated =
86+ span === 0
87+ ? start . volume
88+ : start . volume + ( ( end . volume - start . volume ) * ( point . time - start . time ) ) / span ;
89+ const distance = Math . abs ( point . volume - interpolated ) ;
90+ if ( distance > maxDistance ) {
91+ maxDistance = distance ;
92+ splitIndex = i ;
93+ }
94+ }
95+ if ( splitIndex !== - 1 ) {
96+ keep [ splitIndex ] = true ;
97+ stack . push ( [ startIndex , splitIndex ] , [ splitIndex , endIndex ] ) ;
98+ }
99+ }
100+
101+ const simplified = keyframes . filter ( ( _ , i ) => keep [ i ] ) ;
102+ if ( simplified . length <= MAX_VOLUME_SEGMENTS ) return simplified ;
103+
104+ const step = ( simplified . length - 1 ) / ( MAX_VOLUME_SEGMENTS - 1 ) ;
105+ const sampled : { time : number ; volume : number } [ ] = [ ] ;
106+ for ( let i = 0 ; i < MAX_VOLUME_SEGMENTS ; i += 1 ) {
107+ const point = simplified [ Math . round ( i * step ) ] ! ;
108+ if ( sampled . length === 0 || point . time > sampled . at ( - 1 ) ! . time ) sampled . push ( point ) ;
109+ }
110+ return sampled ;
111+ }
112+
113+ function buildVolumeExpression ( track : AudioTrack , ignoreKeyframes = false ) : string {
34114 const trimDuration = track . end - track . start ;
35115 const staticVolume = clampVolume ( track . volume ) ;
36- const keyframes = ( track . volumeKeyframes ?? [ ] )
116+ const keyframes = ( ignoreKeyframes ? [ ] : ( track . volumeKeyframes ?? [ ] ) )
37117 . filter ( ( keyframe ) => Number . isFinite ( keyframe . time ) && Number . isFinite ( keyframe . volume ) )
38118 . map ( ( keyframe ) => ( {
39119 time : Math . max ( 0 , Math . min ( trimDuration , keyframe . time - track . start ) ) ,
@@ -57,14 +137,19 @@ function buildVolumeExpression(track: AudioTrack): string {
57137 }
58138 }
59139
60- if ( deduped . length === 1 ) {
61- return `volume=${ formatFilterNumber ( deduped [ 0 ] ! . volume ) } ` ;
140+ // Collapse the densely-sampled probe output to a bounded piecewise-linear
141+ // envelope. Without this, the nested-if expression below grows one level per
142+ // keyframe and overflows FFmpeg's expression evaluator (see MAX_VOLUME_SEGMENTS).
143+ const simplified = simplifyVolumeKeyframes ( deduped ) ;
144+
145+ if ( simplified . length === 1 ) {
146+ return `volume=${ formatFilterNumber ( simplified [ 0 ] ! . volume ) } ` ;
62147 }
63148
64- let expression = formatFilterNumber ( deduped . at ( - 1 ) ! . volume ) ;
65- for ( let i = deduped . length - 2 ; i >= 0 ; i -= 1 ) {
66- const current = deduped [ i ] ! ;
67- const next = deduped [ i + 1 ] ! ;
149+ let expression = formatFilterNumber ( simplified . at ( - 1 ) ! . volume ) ;
150+ for ( let i = simplified . length - 2 ; i >= 0 ; i -= 1 ) {
151+ const current = simplified [ i ] ! ;
152+ const next = simplified [ i + 1 ] ! ;
68153 const currentTime = formatFilterNumber ( current . time ) ;
69154 const nextTime = formatFilterNumber ( next . time ) ;
70155 const currentVolume = formatFilterNumber ( current . volume ) ;
@@ -299,42 +384,58 @@ async function mixAudioTracks(
299384 const outputDir = dirname ( outputPath ) ;
300385 if ( ! existsSync ( outputDir ) ) mkdirSync ( outputDir , { recursive : true } ) ;
301386
302- const inputs : string [ ] = [ ] ;
303- const filterParts : string [ ] = [ ] ;
304-
305- tracks . forEach ( ( track , i ) => {
306- inputs . push ( "-i" , track . srcPath ) ;
307- const delayMs = Math . round ( track . start * 1000 ) ;
308- const trimDuration = track . end - track . start ;
309- const volumeFilter = buildVolumeExpression ( track ) ;
310- filterParts . push (
311- `[${ i } :a]atrim=0:${ trimDuration } ,${ volumeFilter } ,adelay=${ delayMs } |${ delayMs } ,apad=whole_dur=${ totalDuration } [a${ i } ]` ,
312- ) ;
313- } ) ;
314-
315- const mixInputs = tracks . map ( ( _ , i ) => `[a${ i } ]` ) . join ( "" ) ;
316- const weights = tracks . map ( ( ) => "1" ) . join ( " " ) ;
317- const mixFilter = `${ mixInputs } amix=inputs=${ tracks . length } :duration=longest:dropout_transition=0:normalize=0:weights='${ weights } '[mixed]` ;
318- const postMixGainFilter = `[mixed]volume=${ masterOutputGain } [out]` ;
319- const fullFilter = [ ...filterParts , mixFilter , postMixGainFilter ] . join ( ";" ) ;
387+ const buildArgs = ( ignoreAutomation : boolean ) : string [ ] => {
388+ const inputs : string [ ] = [ ] ;
389+ const filterParts : string [ ] = [ ] ;
390+ tracks . forEach ( ( track , i ) => {
391+ inputs . push ( "-i" , track . srcPath ) ;
392+ const delayMs = Math . round ( track . start * 1000 ) ;
393+ const trimDuration = track . end - track . start ;
394+ const volumeFilter = buildVolumeExpression ( track , ignoreAutomation ) ;
395+ filterParts . push (
396+ `[${ i } :a]atrim=0:${ trimDuration } ,${ volumeFilter } ,adelay=${ delayMs } |${ delayMs } ,apad=whole_dur=${ totalDuration } [a${ i } ]` ,
397+ ) ;
398+ } ) ;
320399
321- const args = [
322- ...inputs ,
323- "-filter_complex" ,
324- fullFilter ,
325- "-map" ,
326- "[out]" ,
327- "-acodec" ,
328- "aac" ,
329- "-b:a" ,
330- "192k" ,
331- "-t" ,
332- String ( totalDuration ) ,
333- "-y" ,
334- outputPath ,
335- ] ;
400+ const mixInputs = tracks . map ( ( _ , i ) => `[a${ i } ]` ) . join ( "" ) ;
401+ const weights = tracks . map ( ( ) => "1" ) . join ( " " ) ;
402+ const mixFilter = `${ mixInputs } amix=inputs=${ tracks . length } :duration=longest:dropout_transition=0:normalize=0:weights='${ weights } '[mixed]` ;
403+ const postMixGainFilter = `[mixed]volume=${ masterOutputGain } [out]` ;
404+ const fullFilter = [ ...filterParts , mixFilter , postMixGainFilter ] . join ( ";" ) ;
405+
406+ return [
407+ ...inputs ,
408+ "-filter_complex" ,
409+ fullFilter ,
410+ "-map" ,
411+ "[out]" ,
412+ "-acodec" ,
413+ "aac" ,
414+ "-b:a" ,
415+ "192k" ,
416+ "-t" ,
417+ String ( totalDuration ) ,
418+ "-y" ,
419+ outputPath ,
420+ ] ;
421+ } ;
336422
337- const result = await runFfmpeg ( args , { signal, timeout : ffmpegProcessTimeout } ) ;
423+ let result = await runFfmpeg ( buildArgs ( false ) , { signal, timeout : ffmpegProcessTimeout } ) ;
424+
425+ // Defense in depth: volume automation is folded into an FFmpeg `volume`
426+ // expression whose evaluator limits are build-dependent (see
427+ // MAX_VOLUME_SEGMENTS). If that ever fails the mix, retry once without the
428+ // automation so the track renders at its base volume rather than being
429+ // dropped from the output entirely — a missing fade beats missing audio.
430+ let degradedAutomation = false ;
431+ const hasAutomation = tracks . some ( ( track ) => ( track . volumeKeyframes ?. length ?? 0 ) > 0 ) ;
432+ if ( ! result . success && ! signal ?. aborted && hasAutomation ) {
433+ const retry = await runFfmpeg ( buildArgs ( true ) , { signal, timeout : ffmpegProcessTimeout } ) ;
434+ if ( retry . success ) {
435+ result = retry ;
436+ degradedAutomation = true ;
437+ }
438+ }
338439
339440 if ( signal ?. aborted ) {
340441 return {
@@ -360,6 +461,9 @@ async function mixAudioTracks(
360461 outputPath,
361462 durationMs : result . durationMs ,
362463 tracksProcessed : tracks . length ,
464+ error : degradedAutomation
465+ ? "Volume automation exceeded this ffmpeg build's expression limits; rendered at base volume"
466+ : undefined ,
363467 } ;
364468}
365469
@@ -452,15 +556,29 @@ export async function processCompositionAudio(
452556 audioSrcPath = trimmedPath ;
453557 }
454558
559+ // Primary volume-automation path: bake the envelope into the PCM samples
560+ // (sample-accurate, no keyframe ceiling). If the WAV isn't the expected
561+ // 16-bit PCM, fall back to the ffmpeg expression path by leaving the
562+ // keyframes on the track for buildVolumeExpression to handle.
563+ let bakedEnvelope = false ;
564+ if ( element . volumeKeyframes && element . volumeKeyframes . length > 0 ) {
565+ bakedEnvelope = applyVolumeEnvelopeToWav (
566+ audioSrcPath ,
567+ element . volumeKeyframes ,
568+ element . start ,
569+ element . volume ?? 1.0 ,
570+ ) ;
571+ }
455572 tracks . push ( {
456573 id : element . id ,
457574 srcPath : audioSrcPath ,
458575 start : element . start ,
459576 end : element . end ,
460577 mediaStart : element . mediaStart ,
461578 duration : element . end - element . start ,
462- volume : element . volume ?? 1.0 ,
463- volumeKeyframes : element . volumeKeyframes ,
579+ // Gain is already in the samples when baked, so mix at unity.
580+ volume : bakedEnvelope ? 1.0 : ( element . volume ?? 1.0 ) ,
581+ volumeKeyframes : bakedEnvelope ? undefined : element . volumeKeyframes ,
464582 } ) ;
465583 } catch ( err : unknown ) {
466584 errors . push ( `Error: ${ element . id } — ${ err instanceof Error ? err . message : String ( err ) } ` ) ;
0 commit comments