Skip to content

Commit 2b09b51

Browse files
committed
feat: support iOS 26 Speaker Selection API via shared WebAudio relay element
1 parent 3a623b2 commit 2b09b51

4 files changed

Lines changed: 221 additions & 26 deletions

File tree

src/room/Room.ts

Lines changed: 61 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -103,17 +103,20 @@ import {
103103
import {
104104
Future,
105105
createDummyVideoStreamTrack,
106+
disposeSharedRelay,
106107
extractChatMessage,
107108
extractTranscriptionSegments,
108109
getDisconnectReasonFromConnectionError,
109110
getEmptyAudioStreamTrack,
111+
getOrCreateSharedRelay,
110112
isBrowserSupported,
111113
isCloud,
112114
isLocalAudioTrack,
113115
isLocalParticipant,
114116
isReactNative,
115117
isRemotePub,
116118
isSafariBased,
119+
isSafariSpeakerSelectionSupported,
117120
isWeb,
118121
numberToBigInt,
119122
sleep,
@@ -1446,14 +1449,28 @@ class Room extends (EventEmitter as new () => TypedEmitter<RoomEventCallbacks>)
14461449
if (success && isMuted) shouldTriggerImmediateDeviceChange = true;
14471450
} else if (kind === 'audiooutput') {
14481451
shouldTriggerImmediateDeviceChange = true;
1449-
if (
1450-
(!supportsSetSinkId() && !this.options.webAudioMix) ||
1451-
(this.options.webAudioMix && this.audioContext && !('setSinkId' in this.audioContext))
1452-
) {
1452+
// True when we can route output via AudioContext.setSinkId directly, e.g.,
1453+
// Chrome / Edge / Firefox + webAudioMix : true (use AudioContext.setSinkId)
1454+
// Safari macOS (any version) : false (AudioContext.setSinkId not implemented)
1455+
// Safari iOS (any version) : false (AudioContext.setSinkId not implemented)
1456+
// any browser without webAudioMix : false
1457+
const audioContextHasSinkId =
1458+
this.options.webAudioMix && !!this.audioContext && 'setSinkId' in this.audioContext;
1459+
// True when we route output via the shared relay <audio> element (iOS 26 path).
1460+
// iOS 26+ Safari + webAudioMix : true
1461+
// macOS Safari 26+ + webAudioMix : true (also benefits from the relay approach)
1462+
// anything else : false
1463+
const useSharedRelay = isSafariSpeakerSelectionSupported() && !!this.audioContext;
1464+
// Throw only when neither HTMLMediaElement.setSinkId nor AudioContext.setSinkId is usable.
1465+
// When webAudioMix=true but AudioContext.setSinkId is unavailable (e.g. iOS 26 Safari),
1466+
// we fall through and rely on the shared relay-element setSinkId path.
1467+
if (!supportsSetSinkId() && !audioContextHasSinkId) {
14531468
throw new Error('cannot switch audio output, the current browser does not support it');
14541469
}
1455-
if (this.options.webAudioMix) {
1456-
// setting `default` for web audio output doesn't work, so we need to normalize the id before
1470+
if (audioContextHasSinkId) {
1471+
// AudioContext.setSinkId (Chrome) doesn't accept 'default', so resolve to a real device id.
1472+
// On iOS 26 we use HTMLMediaElement.setSinkId on a relay element instead, which does
1473+
// accept 'default', so skip normalization there.
14571474
deviceId =
14581475
(await DeviceManager.getInstance().normalizeDeviceId('audiooutput', deviceId)) ?? '';
14591476
}
@@ -1462,16 +1479,28 @@ class Room extends (EventEmitter as new () => TypedEmitter<RoomEventCallbacks>)
14621479
this.options.audioOutput.deviceId = deviceId;
14631480

14641481
try {
1465-
if (this.options.webAudioMix) {
1482+
if (audioContextHasSinkId) {
14661483
// @ts-expect-error setSinkId is not yet in the typescript type of AudioContext
14671484
this.audioContext?.setSinkId(deviceId);
14681485
}
14691486

1470-
// also set audio output on all audio elements, even if webAudioMix is enabled in order to workaround echo cancellation not working on chrome with non-default output devices
1471-
// see https://issues.chromium.org/issues/40252911#comment7
1472-
await Promise.all(
1473-
Array.from(this.remoteParticipants.values()).map((p) => p.setAudioOutput({ deviceId })),
1474-
);
1487+
if (useSharedRelay) {
1488+
// iOS 26 path: route via a single shared relay <audio> element on the AudioContext.
1489+
// Calling setSinkId here (within the user gesture) grants iOS permission for this
1490+
// element, which persists for the call — so new remote tracks joining later route
1491+
// through the already-permitted element without needing their own user gesture.
1492+
await (getOrCreateSharedRelay(this.audioContext!).relayElement.setSinkId(
1493+
deviceId,
1494+
) as Promise<void>);
1495+
} else {
1496+
// Standard path for browsers with HTMLMediaElement.setSinkId (Chrome, Firefox, etc.):
1497+
// apply setSinkId on each participant's attached <audio> element directly.
1498+
// Note: Chrome with webAudioMix=true also needs this for echo cancellation with
1499+
// non-default output devices — see https://issues.chromium.org/issues/40252911#comment7
1500+
await Promise.all(
1501+
Array.from(this.remoteParticipants.values()).map((p) => p.setAudioOutput({ deviceId })),
1502+
);
1503+
}
14751504
} catch (e) {
14761505
this.options.audioOutput.deviceId = prevDeviceId;
14771506
throw e;
@@ -1806,9 +1835,12 @@ class Room extends (EventEmitter as new () => TypedEmitter<RoomEventCallbacks>)
18061835
this.remoteParticipants.clear();
18071836
this.sidToIdentity.clear();
18081837
this.activeSpeakers = [];
1809-
if (this.audioContext && typeof this.options.webAudioMix === 'boolean') {
1810-
this.audioContext.close();
1811-
this.audioContext = undefined;
1838+
if (this.audioContext) {
1839+
disposeSharedRelay(this.audioContext);
1840+
if (typeof this.options.webAudioMix === 'boolean') {
1841+
this.audioContext.close();
1842+
this.audioContext = undefined;
1843+
}
18121844
}
18131845
if (isWeb()) {
18141846
window.removeEventListener('beforeunload', this.onPageLeave);
@@ -2229,7 +2261,15 @@ class Room extends (EventEmitter as new () => TypedEmitter<RoomEventCallbacks>)
22292261
continue;
22302262
}
22312263
const devicesOfKind = availableDevices.filter((d) => d.kind === kind);
2232-
const activeDevice = this.getActiveDevice(kind);
2264+
// For audiooutput, also check options.audioOutput.deviceId as a fallback: switchActiveDevice
2265+
// sets that field before awaiting setSinkId, but only updates activeDeviceMap after the
2266+
// await resolves. If a devicechange handler fires inside that window before any audiooutput
2267+
// switch has been recorded, activeDeviceMap is empty and the fallback yields the in-flight
2268+
// selection rather than nothing.
2269+
const activeDevice =
2270+
kind === 'audiooutput'
2271+
? (this.getActiveDevice(kind) ?? this.options.audioOutput?.deviceId)
2272+
: this.getActiveDevice(kind);
22332273

22342274
if (activeDevice === previousDevices.filter((info) => info.kind === kind)[0]?.deviceId) {
22352275
// in Safari the first device is always the default, so we assume a user on the default device would like to switch to the default once it changes
@@ -2247,18 +2287,19 @@ class Room extends (EventEmitter as new () => TypedEmitter<RoomEventCallbacks>)
22472287
// switch to first available device if previously active device is not available any more
22482288
if (
22492289
devicesOfKind.length > 0 &&
2250-
!devicesOfKind.find((deviceInfo) => deviceInfo.deviceId === this.getActiveDevice(kind)) &&
2290+
!devicesOfKind.find((deviceInfo) => deviceInfo.deviceId === activeDevice) &&
22512291
// avoid switching audio output on safari without explicit user action as it leads to slowed down audio playback
2252-
(kind !== 'audiooutput' || !isSafariBased())
2292+
// exception: iOS/Safari 26+ supports the Speaker Selection API
2293+
(kind !== 'audiooutput' || !isSafariBased() || isSafariSpeakerSelectionSupported())
22532294
) {
22542295
await this.switchActiveDevice(kind, devicesOfKind[0].deviceId);
22552296
}
22562297
}
22572298
}
22582299

22592300
private handleDeviceChange = async () => {
2260-
if (getBrowser()?.os !== 'iOS') {
2261-
// default devices are non deterministic on iOS, so we don't attempt to select them here
2301+
if (getBrowser()?.os !== 'iOS' || isSafariSpeakerSelectionSupported()) {
2302+
// default devices are non deterministic on iOS (pre-26), so we don't attempt to select them here
22622303
await this.selectDefaultDevices();
22632304
}
22642305
this.emit(RoomEvent.MediaDevicesChanged);

src/room/track/RemoteAudioTrack.ts

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,12 @@ import { TrackEvent } from '../events';
22
import type { AudioReceiverStats } from '../stats';
33
import { computeBitrate } from '../stats';
44
import type { LoggerOptions } from '../types';
5-
import { isReactNative, supportsSetSinkId } from '../utils';
5+
import {
6+
getOrCreateSharedRelay,
7+
isReactNative,
8+
isSafariSpeakerSelectionSupported,
9+
supportsSetSinkId,
10+
} from '../utils';
611
import RemoteTrack from './RemoteTrack';
712
import { Track } from './Track';
813
import type { AudioOutputOptions } from './options';
@@ -82,6 +87,13 @@ export default class RemoteAudioTrack extends RemoteTrack<Track.Kind.Audio> {
8287
*/
8388
async setSinkId(deviceId: string) {
8489
this.sinkId = deviceId;
90+
// On iOS 26, audio routing is handled by the shared relay element at the AudioContext
91+
// level — see Room.switchActiveDevice. The attached elements here are muted/vol=0, and
92+
// calling setSinkId on them would throw NotAllowedError without a concurrent user gesture
93+
// (e.g. when a participant joins after a device switch).
94+
if (isSafariSpeakerSelectionSupported()) {
95+
return;
96+
}
8597
await Promise.all(
8698
this.attachedElements.map((elm) => {
8799
if (!supportsSetSinkId(elm)) {
@@ -103,8 +115,11 @@ export default class RemoteAudioTrack extends RemoteTrack<Track.Kind.Audio> {
103115
super.attach(element);
104116
}
105117

106-
if (this.sinkId && supportsSetSinkId(element)) {
107-
element.setSinkId(this.sinkId).catch((e) => {
118+
// Skip setSinkId on the primary element on iOS 26: the element is muted/vol=0 below and
119+
// audio routing happens via the shared relay, so calling setSinkId here would only throw
120+
// NotAllowedError when no user gesture is active.
121+
if (this.sinkId && supportsSetSinkId(element) && !isSafariSpeakerSelectionSupported()) {
122+
(element.setSinkId(this.sinkId) as Promise<void>).catch((e) => {
108123
this.log.error('Failed to set sink id on remote audio track', e, this.logContext);
109124
});
110125
}
@@ -189,7 +204,20 @@ export default class RemoteAudioTrack extends RemoteTrack<Track.Kind.Audio> {
189204
});
190205
this.gainNode = context.createGain();
191206
lastNode.connect(this.gainNode);
192-
this.gainNode.connect(context.destination);
207+
208+
// When AudioContext.setSinkId is unavailable (notably iOS 26 Safari), audio is routed via
209+
// a shared MediaStreamDestinationNode + relay <audio> element. setSinkId() is called once
210+
// on that relay element during the user gesture in Room.switchActiveDevice and applies to
211+
// all remote audio. This sidesteps the iOS quirk where HTMLMediaElement.setSinkId() has no
212+
// effect on elements backed by WebRTC remote tracks (those go through the WebRTC engine's
213+
// internal pipeline → AVAudioSession, bypassing AVPlayer).
214+
const useSharedRelay = !('setSinkId' in context);
215+
if (useSharedRelay) {
216+
this.gainNode.connect(getOrCreateSharedRelay(context).destinationNode);
217+
} else {
218+
// AudioContext.setSinkId() is available (Chrome, Firefox etc.) — use it directly.
219+
this.gainNode.connect(context.destination);
220+
}
193221

194222
if (this.elementVolume) {
195223
this.gainNode.gain.setTargetAtTime(this.elementVolume, 0, 0.1);

src/room/utils.test.ts

Lines changed: 84 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,15 @@
11
import { ClientInfo_Capability } from '@livekit/protocol';
2-
import { describe, expect, it } from 'vitest';
3-
import { extractMaxAgeFromRequestHeaders, getClientInfo, splitUtf8, toWebsocketUrl } from './utils';
2+
import { describe, expect, it, vi } from 'vitest';
3+
import {
4+
disposeSharedRelay,
5+
extractMaxAgeFromRequestHeaders,
6+
getClientInfo,
7+
getOrCreateSharedRelay,
8+
isSafariSpeakerSelectionSupported,
9+
splitUtf8,
10+
supportsSetSinkId,
11+
toWebsocketUrl,
12+
} from './utils';
413

514
describe('toWebsocketUrl', () => {
615
it('leaves wss urls alone', () => {
@@ -326,3 +335,76 @@ describe('supportsSetSinkId', () => {
326335
expect(supportsSetSinkId(fakeAudio)).toBe(false);
327336
});
328337
});
338+
339+
describe('getOrCreateSharedRelay / disposeSharedRelay', () => {
340+
function createMockContext() {
341+
const stream = new MediaStream();
342+
const destNode = {
343+
stream,
344+
disconnect: vi.fn(),
345+
} as unknown as MediaStreamAudioDestinationNode;
346+
const ctx = {
347+
createMediaStreamDestination: vi.fn(() => destNode),
348+
} as unknown as AudioContext;
349+
return { ctx, destNode, stream };
350+
}
351+
352+
it('creates a hidden DOM-attached <audio> element on first call', () => {
353+
const { ctx, destNode, stream } = createMockContext();
354+
const relay = getOrCreateSharedRelay(ctx);
355+
expect(relay.destinationNode).toBe(destNode);
356+
expect(relay.relayElement.srcObject).toBe(stream);
357+
expect(relay.relayElement.parentElement).toBe(document.body);
358+
expect(relay.relayElement.hidden).toBe(true);
359+
expect(relay.relayElement.autoplay).toBe(true);
360+
disposeSharedRelay(ctx);
361+
});
362+
363+
it('returns the same relay on subsequent calls for the same context (singleton)', () => {
364+
const { ctx } = createMockContext();
365+
const a = getOrCreateSharedRelay(ctx);
366+
const b = getOrCreateSharedRelay(ctx);
367+
expect(a).toBe(b);
368+
expect(ctx.createMediaStreamDestination).toHaveBeenCalledTimes(1);
369+
disposeSharedRelay(ctx);
370+
});
371+
372+
it('creates independent relays for different contexts', () => {
373+
const a = createMockContext();
374+
const b = createMockContext();
375+
const relayA = getOrCreateSharedRelay(a.ctx);
376+
const relayB = getOrCreateSharedRelay(b.ctx);
377+
expect(relayA).not.toBe(relayB);
378+
expect(relayA.destinationNode).toBe(a.destNode);
379+
expect(relayB.destinationNode).toBe(b.destNode);
380+
disposeSharedRelay(a.ctx);
381+
disposeSharedRelay(b.ctx);
382+
});
383+
384+
it('removes the element from the DOM and disconnects the destination on dispose', () => {
385+
const { ctx, destNode } = createMockContext();
386+
const relay = getOrCreateSharedRelay(ctx);
387+
expect(relay.relayElement.parentElement).toBe(document.body);
388+
389+
disposeSharedRelay(ctx);
390+
391+
expect(relay.relayElement.parentElement).toBeNull();
392+
expect(destNode.disconnect).toHaveBeenCalled();
393+
});
394+
395+
it('creates a fresh relay after dispose', () => {
396+
const { ctx } = createMockContext();
397+
const first = getOrCreateSharedRelay(ctx);
398+
disposeSharedRelay(ctx);
399+
const second = getOrCreateSharedRelay(ctx);
400+
expect(second).not.toBe(first);
401+
expect(ctx.createMediaStreamDestination).toHaveBeenCalledTimes(2);
402+
disposeSharedRelay(ctx);
403+
});
404+
405+
it('is a no-op when no relay exists for the context', () => {
406+
const { ctx, destNode } = createMockContext();
407+
expect(() => disposeSharedRelay(ctx)).not.toThrow();
408+
expect(destNode.disconnect).not.toHaveBeenCalled();
409+
});
410+
});

src/room/utils.ts

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,50 @@ export function isSVCCodec(codec?: string): boolean {
145145
return codec === 'av1' || codec === 'vp9';
146146
}
147147

148+
/** A shared relay element + destination node attached to an AudioContext on iOS 26.
149+
* iOS 26 grants setSinkId permission per-element (not per-origin), so a new <audio> element
150+
* created when a remote participant joins would fail setSinkId without a user gesture.
151+
* Routing all remote tracks through a single shared relay element (created once, setSinkId'd
152+
* once during the user gesture in Room.switchActiveDevice) sidesteps this. */
153+
const SHARED_RELAY_KEY = '_livekitSharedRelay';
154+
155+
type AudioContextWithSharedRelay = AudioContext & {
156+
[SHARED_RELAY_KEY]?: {
157+
destinationNode: MediaStreamAudioDestinationNode;
158+
relayElement: HTMLAudioElement;
159+
};
160+
};
161+
162+
export function getOrCreateSharedRelay(context: AudioContext): {
163+
destinationNode: MediaStreamAudioDestinationNode;
164+
relayElement: HTMLAudioElement;
165+
} {
166+
const ctx = context as AudioContextWithSharedRelay;
167+
if (!ctx[SHARED_RELAY_KEY]) {
168+
const destinationNode = context.createMediaStreamDestination();
169+
const relayElement = document.createElement('audio');
170+
relayElement.hidden = true;
171+
relayElement.autoplay = true;
172+
relayElement.srcObject = destinationNode.stream;
173+
document.body?.appendChild(relayElement);
174+
relayElement.play().catch(() => {});
175+
ctx[SHARED_RELAY_KEY] = { destinationNode, relayElement };
176+
}
177+
return ctx[SHARED_RELAY_KEY]!;
178+
}
179+
180+
export function disposeSharedRelay(context: AudioContext) {
181+
const ctx = context as AudioContextWithSharedRelay;
182+
const relay = ctx[SHARED_RELAY_KEY];
183+
if (relay) {
184+
relay.relayElement.pause();
185+
relay.relayElement.srcObject = null;
186+
relay.relayElement.parentElement?.removeChild(relay.relayElement);
187+
relay.destinationNode.disconnect();
188+
delete ctx[SHARED_RELAY_KEY];
189+
}
190+
}
191+
148192
export function supportsSetSinkId(elm?: HTMLMediaElement): boolean {
149193
if (!document) return false;
150194
// iOS/Safari 26+ has Speaker Selection API — trust the version check rather than

0 commit comments

Comments
 (0)