Skip to content

Commit 167a222

Browse files
committed
fix: support animated audio volume
1 parent 7461f1d commit 167a222

10 files changed

Lines changed: 324 additions & 8 deletions

File tree

packages/core/src/runtime/init.ts

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1241,6 +1241,10 @@ export function initSandboxRuntimeModular(): void {
12411241
for (const mediaEl of mediaEls) {
12421242
if (metadataBoundMedia.has(mediaEl)) continue;
12431243
metadataBoundMedia.add(mediaEl);
1244+
const parsedVolume = Number.parseFloat(mediaEl.dataset.volume ?? "");
1245+
if (Number.isFinite(parsedVolume)) {
1246+
mediaEl.volume = Math.max(0, Math.min(1, parsedVolume));
1247+
}
12441248
mediaEl.addEventListener("loadedmetadata", scheduleMetadataDurationHydration);
12451249
mediaEl.addEventListener("durationchange", scheduleMetadataDurationHydration);
12461250

@@ -1312,6 +1316,7 @@ export function initSandboxRuntimeModular(): void {
13121316
userMuted: state.bridgeMuted,
13131317
userVolume: state.bridgeVolume,
13141318
forceSync,
1319+
onElementVolume: (el, volume) => webAudio.setElementVolume(el, volume),
13151320
onAutoplayBlocked: () => {
13161321
if (state.mediaAutoplayBlockedPosted) return;
13171322
state.mediaAutoplayBlockedPosted = true;
@@ -1461,8 +1466,8 @@ export function initSandboxRuntimeModular(): void {
14611466
externalCompositionsReady = true;
14621467
bindRootTimelineIfAvailable();
14631468
window.__renderReady = true;
1464-
runAdapters("discover", state.currentTime);
14651469
bindMediaMetadataListeners();
1470+
runAdapters("discover", state.currentTime);
14661471
installAssetFailureDiagnostics();
14671472
applyCaptionOverrides();
14681473
postTimeline();
@@ -1667,8 +1672,8 @@ export function initSandboxRuntimeModular(): void {
16671672
] as RuntimeDeterministicAdapter[];
16681673
patchVideoTextureCompat();
16691674
installRuntimeErrorDiagnostics();
1670-
runAdapters("discover");
16711675
bindMediaMetadataListeners();
1676+
runAdapters("discover");
16721677
// ── Single-clock transport ──
16731678
//
16741679
// TransportClock is the sole time authority. GSAP is always paused —

packages/core/src/runtime/media.test.ts

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,41 @@ describe("syncRuntimeMedia", () => {
343343
expect(clip.el.volume).toBeCloseTo(0.3);
344344
});
345345

346+
it("preserves authored volume changes made between sync ticks", () => {
347+
const clip = createMockClip({ start: 0, end: 10, volume: 0 });
348+
syncRuntimeMedia({ clips: [clip], timeSeconds: 0, playing: false, playbackRate: 1 });
349+
expect(clip.el.volume).toBe(0);
350+
351+
clip.el.volume = 0.5;
352+
syncRuntimeMedia({ clips: [clip], timeSeconds: 0.5, playing: false, playbackRate: 1 });
353+
354+
expect(clip.el.volume).toBe(0.5);
355+
});
356+
357+
it("reports the effective element volume to external audio transports", () => {
358+
const clip = createMockClip({ start: 0, end: 10, volume: 0 });
359+
const onElementVolume = vi.fn();
360+
syncRuntimeMedia({
361+
clips: [clip],
362+
timeSeconds: 0,
363+
playing: false,
364+
playbackRate: 1,
365+
onElementVolume,
366+
});
367+
clip.el.volume = 0.75;
368+
syncRuntimeMedia({
369+
clips: [clip],
370+
timeSeconds: 1,
371+
playing: false,
372+
playbackRate: 1,
373+
userVolume: 0.5,
374+
onElementVolume,
375+
});
376+
377+
expect(clip.el.volume).toBeCloseTo(0.375);
378+
expect(onElementVolume).toHaveBeenLastCalledWith(clip.el, 0.375);
379+
});
380+
346381
it("hard-syncs on the first active tick (sub-composition activation, mediaStart offsets)", () => {
347382
const clip = createMockClip({ start: 0, end: 10, mediaStart: 0 });
348383
Object.defineProperty(clip.el, "currentTime", { value: 0, writable: true });

packages/core/src/runtime/media.ts

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,13 @@ function markPlayRequested(el: HTMLMediaElement): void {
103103
el.addEventListener("error", clear, { once: true });
104104
}
105105

106+
const lastRuntimeAppliedVolume = new WeakMap<HTMLMediaElement, number>();
107+
108+
function clampVolume(volume: number): number {
109+
if (!Number.isFinite(volume)) return 1;
110+
return Math.max(0, Math.min(1, volume));
111+
}
112+
106113
export function syncRuntimeMedia(params: {
107114
clips: RuntimeMediaClip[];
108115
timeSeconds: number;
@@ -132,6 +139,7 @@ export function syncRuntimeMedia(params: {
132139
* outbound message; further invocations are suppressed by the caller.
133140
*/
134141
onAutoplayBlocked?: () => void;
142+
onElementVolume?: (el: HTMLMediaElement, volume: number) => void;
135143
forceSync?: boolean;
136144
}): void {
137145
// Either flag silences output. Combined up front so the per-clip loop is
@@ -151,8 +159,19 @@ export function syncRuntimeMedia(params: {
151159
relTime = clip.mediaStart + ((relTime - clip.mediaStart) % loopLength);
152160
}
153161
}
154-
const userVol = params.userVolume ?? 1;
155-
el.volume = (clip.volume ?? 1) * userVol;
162+
const userVol = clampVolume(params.userVolume ?? 1);
163+
const fallbackAuthorVolume = clampVolume(clip.volume ?? 1);
164+
const previousRuntimeVolume = lastRuntimeAppliedVolume.get(el);
165+
const currentElementVolume = clampVolume(el.volume);
166+
const authorVolume =
167+
previousRuntimeVolume !== undefined &&
168+
Math.abs(currentElementVolume - previousRuntimeVolume) > 0.0001
169+
? currentElementVolume
170+
: fallbackAuthorVolume;
171+
const effectiveVolume = clampVolume(authorVolume * userVol);
172+
el.volume = effectiveVolume;
173+
lastRuntimeAppliedVolume.set(el, effectiveVolume);
174+
params.onElementVolume?.(el, effectiveVolume);
156175
if (shouldMute) el.muted = true;
157176
// Ensure full preload for every active media element. Streaming
158177
// formats (MP3) may arrive with preload="metadata", which only
@@ -283,6 +302,7 @@ export function syncRuntimeMedia(params: {
283302
lastOffset.delete(el);
284303
strictDriftSamples.delete(el);
285304
seekLoadRetried.delete(el);
305+
lastRuntimeAppliedVolume.delete(el);
286306
if (!el.paused) el.pause();
287307
}
288308
}

packages/core/src/runtime/webAudioTransport.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,18 @@ export class WebAudioTransport {
191191
}
192192
}
193193

194+
setElementVolume(el: HTMLMediaElement, volume: number): void {
195+
const safeVolume = Math.max(0, Math.min(1, volume));
196+
for (const source of this._activeSources) {
197+
if (source.el !== el) continue;
198+
try {
199+
source.gainNode.gain.value = safeVolume;
200+
} catch (err) {
201+
swallow("webAudioTransport.setElementVolume", err);
202+
}
203+
}
204+
}
205+
194206
setMuted(muted: boolean): void {
195207
if (this._masterGain) {
196208
this._masterGain.gain.value = muted ? 0 : 1;

packages/engine/src/index.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,12 @@ export {
135135
export { createVideoFrameInjector } from "./services/videoFrameInjector.js";
136136

137137
export { parseAudioElements, processCompositionAudio } from "./services/audioMixer.js";
138-
export type { AudioElement, AudioTrack, MixResult } from "./services/audioMixer.types.js";
138+
export type {
139+
AudioElement,
140+
AudioTrack,
141+
AudioVolumeKeyframe,
142+
MixResult,
143+
} from "./services/audioMixer.types.js";
139144

140145
// ── Parallel rendering ─────────────────────────────────────────────────────────
141146
export {

packages/engine/src/services/audioMixer.test.ts

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,4 +64,47 @@ describe("processCompositionAudio", () => {
6464
expect(filter).toContain("volume=0");
6565
expect(filter).toContain("[mixed]volume=1[out]");
6666
});
67+
68+
it("uses frame-evaluated volume automation when keyframes are present", async () => {
69+
const baseDir = mkdtempSync(join(tmpdir(), "hf-audio-base-"));
70+
const workDir = mkdtempSync(join(tmpdir(), "hf-audio-work-"));
71+
tempDirs.push(baseDir, workDir);
72+
73+
writeFileSync(join(baseDir, "voice.wav"), "stub");
74+
75+
const result = await processCompositionAudio(
76+
[
77+
{
78+
id: "voice",
79+
src: "voice.wav",
80+
start: 2,
81+
end: 5,
82+
mediaStart: 0,
83+
layer: 0,
84+
volume: 0,
85+
volumeKeyframes: [
86+
{ time: 2, volume: 0 },
87+
{ time: 3, volume: 1 },
88+
{ time: 5, volume: 0.5 },
89+
],
90+
type: "audio",
91+
},
92+
],
93+
baseDir,
94+
workDir,
95+
join(baseDir, "out.m4a"),
96+
5,
97+
);
98+
99+
expect(result.success).toBe(true);
100+
101+
const mixArgs = runFfmpegMock.mock.calls[1]?.[0];
102+
const filterIndex = mixArgs.indexOf("-filter_complex");
103+
const filter = mixArgs[filterIndex + 1];
104+
105+
expect(filter).toContain("volume=");
106+
expect(filter).toContain(":eval=frame");
107+
expect(filter).toContain("lt(t\\,1)");
108+
expect(filter).toContain("adelay=2000|2000");
109+
});
67110
});

packages/engine/src/services/audioMixer.ts

Lines changed: 64 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,67 @@ import { unwrapTemplate } from "../utils/htmlTemplate.js";
1515
import { resolveProjectRelativeSrc } from "./videoFrameExtractor.js";
1616
import type { AudioElement, AudioTrack, MixResult } from "./audioMixer.types.js";
1717

18-
export type { AudioElement, AudioTrack, MixResult } from "./audioMixer.types.js";
18+
export type { AudioElement, MixResult } from "./audioMixer.types.js";
19+
20+
function clampVolume(volume: number): number {
21+
if (!Number.isFinite(volume)) return 1;
22+
return Math.max(0, Math.min(1, volume));
23+
}
24+
25+
function formatFilterNumber(value: number): string {
26+
return Number(value.toFixed(6)).toString();
27+
}
28+
29+
function escapeExpressionCommas(expression: string): string {
30+
return expression.replace(/,/g, "\\,");
31+
}
32+
33+
function buildVolumeExpression(track: AudioTrack): string {
34+
const trimDuration = track.end - track.start;
35+
const staticVolume = clampVolume(track.volume);
36+
const keyframes = (track.volumeKeyframes ?? [])
37+
.filter((keyframe) => Number.isFinite(keyframe.time) && Number.isFinite(keyframe.volume))
38+
.map((keyframe) => ({
39+
time: Math.max(0, Math.min(trimDuration, keyframe.time - track.start)),
40+
volume: clampVolume(keyframe.volume),
41+
}))
42+
.sort((a, b) => a.time - b.time);
43+
44+
if (keyframes.length === 0) return `volume=${formatFilterNumber(staticVolume)}`;
45+
46+
if (keyframes[0]!.time > 0) {
47+
keyframes.unshift({ time: 0, volume: staticVolume });
48+
}
49+
50+
const deduped: typeof keyframes = [];
51+
for (const keyframe of keyframes) {
52+
const previous = deduped.at(-1);
53+
if (previous && Math.abs(previous.time - keyframe.time) < 0.000001) {
54+
previous.volume = keyframe.volume;
55+
} else {
56+
deduped.push(keyframe);
57+
}
58+
}
59+
60+
if (deduped.length === 1) {
61+
return `volume=${formatFilterNumber(deduped[0]!.volume)}`;
62+
}
63+
64+
let expression = formatFilterNumber(deduped.at(-1)!.volume);
65+
for (let i = deduped.length - 2; i >= 0; i -= 1) {
66+
const current = deduped[i]!;
67+
const next = deduped[i + 1]!;
68+
const currentTime = formatFilterNumber(current.time);
69+
const nextTime = formatFilterNumber(next.time);
70+
const currentVolume = formatFilterNumber(current.volume);
71+
const span = Math.max(0.000001, next.time - current.time);
72+
const slope = formatFilterNumber((next.volume - current.volume) / span);
73+
const segment = `${currentVolume}+(${slope})*(t-${currentTime})`;
74+
expression = `if(lt(t,${nextTime}),${segment},${expression})`;
75+
}
76+
77+
return `volume=${escapeExpressionCommas(expression)}:eval=frame`;
78+
}
1979

2080
interface ExtractResult {
2181
success: boolean;
@@ -246,8 +306,9 @@ async function mixAudioTracks(
246306
inputs.push("-i", track.srcPath);
247307
const delayMs = Math.round(track.start * 1000);
248308
const trimDuration = track.end - track.start;
309+
const volumeFilter = buildVolumeExpression(track);
249310
filterParts.push(
250-
`[${i}:a]atrim=0:${trimDuration},volume=${track.volume},adelay=${delayMs}|${delayMs},apad=whole_dur=${totalDuration}[a${i}]`,
311+
`[${i}:a]atrim=0:${trimDuration},${volumeFilter},adelay=${delayMs}|${delayMs},apad=whole_dur=${totalDuration}[a${i}]`,
251312
);
252313
});
253314

@@ -399,6 +460,7 @@ export async function processCompositionAudio(
399460
mediaStart: element.mediaStart,
400461
duration: element.end - element.start,
401462
volume: element.volume ?? 1.0,
463+
volumeKeyframes: element.volumeKeyframes,
402464
});
403465
} catch (err: unknown) {
404466
errors.push(`Error: ${element.id}${err instanceof Error ? err.message : String(err)}`);

packages/engine/src/services/audioMixer.types.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
export interface AudioVolumeKeyframe {
2+
time: number;
3+
volume: number;
4+
}
5+
16
export interface AudioElement {
27
id: string;
38
src: string;
@@ -6,6 +11,7 @@ export interface AudioElement {
611
mediaStart: number;
712
layer: number;
813
volume?: number;
14+
volumeKeyframes?: AudioVolumeKeyframe[];
915
type: "audio" | "video";
1016
}
1117

@@ -17,6 +23,7 @@ export interface AudioTrack {
1723
mediaStart: number;
1824
duration: number;
1925
volume: number;
26+
volumeKeyframes?: AudioVolumeKeyframe[];
2027
}
2128

2229
export interface MixResult {

0 commit comments

Comments
 (0)