From 08c3692acea6a0345811ce98167e886b4eb99a6e Mon Sep 17 00:00:00 2001 From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com> Date: Tue, 12 May 2026 21:02:34 +0800 Subject: [PATCH 1/3] impl 1 --- Docs/audio.md | 21 ++- .../AudioManager+AudioProcessingMode.swift | 152 ++++++++++++++++++ .../LiveKit/Audio/Manager/AudioManager.swift | 10 +- .../Types/Options/AudioCaptureOptions.swift | 34 ++-- .../AudioProcessingTests.swift | 4 +- 5 files changed, 195 insertions(+), 26 deletions(-) create mode 100644 Sources/LiveKit/Audio/Manager/AudioManager+AudioProcessingMode.swift diff --git a/Docs/audio.md b/Docs/audio.md index 385ae7af4..bc6722b1a 100644 --- a/Docs/audio.md +++ b/Docs/audio.md @@ -50,21 +50,34 @@ When set to `false`, the audio session remains active after the LiveKit call end Apple's voice processing is enabled by default, such as echo cancellation and auto-gain control. -If your app doesn't require voice processing at all, you can disable it entirely: +Use `AudioProcessingMode` to choose the processing backend before publishing or starting local recording: + +```swift +try AudioManager.shared.setAudioProcessingMode(.automatic) // default +try AudioManager.shared.setAudioProcessingMode(.system) // require Apple VPIO +try AudioManager.shared.setAudioProcessingMode(.software) // use WebRTC APM +try AudioManager.shared.setAudioProcessingMode(.disabled) // no voice processing +``` + +Mode changes are only supported while the audio engine is idle. To switch during a call, +unpublish or stop local recording first, set the mode, then publish or start recording again. + +If your app doesn't require voice processing at all, you can also use the compatibility API: ```swift try AudioManager.shared.setVoiceProcessingEnabled(false) ``` -This restarts the internal `AVAudioEngine` to apply the change. It can cause a short audio glitch, so it is recommended to set it once before connecting to a Room. Disabling voice processing also disables muted speaker detection. +This is equivalent to `try AudioManager.shared.setAudioProcessingMode(.disabled)`. +Disabling system voice processing also disables muted speaker detection. -If your app requires toggling voice processing at run-time, it is recommended to use: +If your app only needs to bypass Apple's system processing at run-time, use: ```swift AudioManager.shared.isVoiceProcessingBypassed = true ``` -Set it back to `false` to re-enable processing. This uses `AVAudioEngine`'s [isVoiceProcessingBypassed](https://developer.apple.com/documentation/avfaudio/avaudioinputnode/isvoiceprocessingbypassed) and works seamlessly at run-time. +Set it back to `false` to re-enable Apple's processing. This uses `AVAudioEngine`'s [isVoiceProcessingBypassed](https://developer.apple.com/documentation/avfaudio/avaudioinputnode/isvoiceprocessingbypassed) and works seamlessly at run-time, but it does not switch to WebRTC software processing. ## Other audio ducking diff --git a/Sources/LiveKit/Audio/Manager/AudioManager+AudioProcessingMode.swift b/Sources/LiveKit/Audio/Manager/AudioManager+AudioProcessingMode.swift new file mode 100644 index 000000000..9c80890cd --- /dev/null +++ b/Sources/LiveKit/Audio/Manager/AudioManager+AudioProcessingMode.swift @@ -0,0 +1,152 @@ +/* + * Copyright 2026 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +internal import LiveKitWebRTC + +public enum AudioProcessingMode: Sendable { + /// Prefer Apple's system voice processing when available, otherwise use WebRTC software processing. + case automatic + /// Require Apple's system voice processing. + case system + /// Use WebRTC software processing and disable Apple's system voice processing. + case software + /// Disable both Apple's system voice processing and WebRTC software processing. + case disabled + case unknown +} + +public enum AudioProcessingLifecycle: Sendable { + case idle + case running + case transitioning + case failed + case unknown +} + +public enum AudioProcessingBackend: Sendable { + case disabled + case system + case software + case unavailable + case unknown +} + +public struct AudioProcessingState: Sendable { + public let requestedMode: AudioProcessingMode + public let lifecycle: AudioProcessingLifecycle + public let backend: AudioProcessingBackend + public let transitionFrom: AudioProcessingMode + public let transitionTo: AudioProcessingMode + public let lastError: Int + public let isSystemBypassed: Bool + public let isSystemAGCEnabled: Bool + public let isSoftwareEchoCancellationEnabled: Bool + public let isSoftwareNoiseSuppressionEnabled: Bool + public let isSoftwareAutoGainControlEnabled: Bool + public let isSoftwareHighpassFilterEnabled: Bool +} + +public extension AudioManager { + var audioProcessingMode: AudioProcessingMode { + RTC.audioDeviceModule.audioProcessingMode.toLKType() + } + + var audioProcessingState: AudioProcessingState { + RTC.audioDeviceModule.audioProcessingState.toLKType() + } + + func setAudioProcessingMode(_ mode: AudioProcessingMode) throws { + guard mode != .unknown else { + throw LiveKitError(.invalidState, message: "Unsupported audio processing mode specified") + } + + guard RTC.pcFactoryState.admType == .audioEngine else { + throw LiveKitError(.invalidState, message: "Audio processing mode is only supported by the audioEngine audio device module") + } + + let result = RTC.audioDeviceModule.setAudioProcessingMode(mode.toRTCType()) + try checkAdmResult(code: result) + } +} + +// MARK: - Internal + +extension LKRTCAudioProcessingMode { + func toLKType() -> AudioProcessingMode { + switch self { + case .automatic: return .automatic + case .system: return .system + case .software: return .software + case .disabled: return .disabled + @unknown default: return .unknown + } + } +} + +extension AudioProcessingMode { + func toRTCType() -> LKRTCAudioProcessingMode { + switch self { + case .automatic: .automatic + case .system: .system + case .software: .software + case .disabled: .disabled + case .unknown: .automatic + } + } +} + +extension LKRTCAudioProcessingLifecycle { + func toLKType() -> AudioProcessingLifecycle { + switch self { + case .idle: return .idle + case .running: return .running + case .transitioning: return .transitioning + case .failed: return .failed + @unknown default: return .unknown + } + } +} + +extension LKRTCAudioProcessingBackend { + func toLKType() -> AudioProcessingBackend { + switch self { + case .disabled: return .disabled + case .system: return .system + case .software: return .software + case .unavailable: return .unavailable + @unknown default: return .unknown + } + } +} + +extension LKRTCAudioProcessingState { + func toLKType() -> AudioProcessingState { + AudioProcessingState( + requestedMode: requestedMode.toLKType(), + lifecycle: lifecycle.toLKType(), + backend: backend.toLKType(), + transitionFrom: transitionFrom.toLKType(), + transitionTo: transitionTo.toLKType(), + lastError: lastError, + isSystemBypassed: systemBypassed, + isSystemAGCEnabled: systemAGCEnabled, + isSoftwareEchoCancellationEnabled: softwareEchoCancellation, + isSoftwareNoiseSuppressionEnabled: softwareNoiseSuppression, + isSoftwareAutoGainControlEnabled: softwareAutoGainControl, + isSoftwareHighpassFilterEnabled: softwareHighpassFilter + ) + } +} diff --git a/Sources/LiveKit/Audio/Manager/AudioManager.swift b/Sources/LiveKit/Audio/Manager/AudioManager.swift index 672a64c51..e8c97d60e 100644 --- a/Sources/LiveKit/Audio/Manager/AudioManager.swift +++ b/Sources/LiveKit/Audio/Manager/AudioManager.swift @@ -320,12 +320,13 @@ public class AudioManager: Loggable { set { RTC.audioDeviceModule.duckingLevel = newValue.toRTCType() } } - /// The main flag that determines whether to enable Voice-Processing I/O of the internal AVAudioEngine. Toggling this requires restarting the AudioEngine. - /// Setting this to `false` prevents any voice-processing-related initialization, and muted talker detection will not work. - /// Typically, it is recommended to keep this set to `true` and toggle ``isVoiceProcessingBypassed`` when possible. + /// Whether the current audio processing mode uses Voice-Processing I/O of the internal AVAudioEngine. + /// Use ``setAudioProcessingMode(_:)`` for explicit system/software/disabled selection. /// Defaults to `true`. public var isVoiceProcessingEnabled: Bool { RTC.audioDeviceModule.isVoiceProcessingEnabled } + /// Compatibility API for enabling automatic processing or disabling processing entirely. + /// Prefer ``setAudioProcessingMode(_:)`` for new code. public func setVoiceProcessingEnabled(_ enabled: Bool) throws { let result = RTC.audioDeviceModule.setVoiceProcessingEnabled(enabled) try checkAdmResult(code: result) @@ -544,6 +545,7 @@ let kAudioEngineErrorFailedToConfigureAudioSession = -4100 let kAudioEngineErrorAudioSessionCategoryRecordingRequired = -4102 let kAudioEngineErrorInsufficientDevicePermission = -4101 +let kAudioEngineInvalidStateError = -5000 extension AudioManager { func checkAdmResult(code: Int) throws { @@ -553,6 +555,8 @@ extension AudioManager { throw LiveKitError(.deviceAccessDenied, message: "Device permissions are not granted") } else if code == kAudioEngineErrorAudioSessionCategoryRecordingRequired { throw LiveKitError(.audioSession, message: "Recording category required for audio session") + } else if code == kAudioEngineInvalidStateError { + throw LiveKitError(.invalidState, message: "Audio engine returned invalid state") } else if code != 0 { throw LiveKitError(.audioEngine, message: "Audio engine returned error code: \(code)") } diff --git a/Sources/LiveKit/Types/Options/AudioCaptureOptions.swift b/Sources/LiveKit/Types/Options/AudioCaptureOptions.swift index 99c8ecc21..c558df64a 100644 --- a/Sources/LiveKit/Types/Options/AudioCaptureOptions.swift +++ b/Sources/LiveKit/Types/Options/AudioCaptureOptions.swift @@ -20,20 +20,23 @@ internal import LiveKitWebRTC @objcMembers public final class AudioCaptureOptions: NSObject, CaptureOptions, Sendable { - // Defaults are `true` on all platforms. In practice these options only affect - // software (WebRTC) APM on iOS Simulator. On iOS device or macOS, Apple's VPIO - // handles AEC/AGC/NS and software APM is always off regardless of these flags. + // Defaults are `true` on all platforms. These options affect WebRTC's + // software APM. In the default audio processing mode, Apple's VPIO handles + // AEC/AGC/NS on iOS device and macOS, so software APM is off. Use + // `AudioManager.shared.setAudioProcessingMode(.software)` to explicitly + // select WebRTC software processing on supported AudioEngineDevice builds. // // Platform behavior: - // - iOS device or macOS: VPIO is active. Software APM is always off. These - // flags are effectively ignored for runtime processing, but still reported - // to the server as audio track features for telemetry. + // - iOS device or macOS with `.automatic`: VPIO is active. Software APM is + // off. These flags are still reported to the server as audio track + // features for telemetry. + // - iOS device or macOS with `.software`: Software APM is active and these + // flags are respected. // - iOS Simulator: VPIO is not reliably available. Software APM is used and // these flags are respected. // - // To control VPIO on device, see ``AudioManager/isVoiceProcessingEnabled``, - // ``AudioManager/isVoiceProcessingBypassed``, and - // ``AudioManager/isVoiceProcessingAGCEnabled``. + // To control the processing backend, see + // ``AudioManager/setAudioProcessingMode(_:)``. public static let defaultEchoCancellation = true public static let defaultAutoGainControl = true public static let defaultNoiseSuppression = true @@ -47,20 +50,17 @@ public final class AudioCaptureOptions: NSObject, CaptureOptions, Sendable { ) /// Whether to enable software (WebRTC's) echo cancellation. - /// Only takes effect on iOS Simulator. On iOS device or macOS, Apple's VPIO - /// handles AEC and this flag is ignored for runtime processing. - /// See ``AudioManager/isVoiceProcessingBypassed`` for device-side VPIO controls. + /// Takes effect when WebRTC software processing is active. + /// See ``AudioManager/setAudioProcessingMode(_:)`` for backend selection. public let echoCancellation: Bool /// Whether to enable software (WebRTC's) gain control. - /// Only takes effect on iOS Simulator. On iOS device or macOS, Apple's VPIO - /// handles AGC and this flag is ignored for runtime processing. - /// See ``AudioManager/isVoiceProcessingAGCEnabled`` for device-side VPIO controls. + /// Takes effect when WebRTC software processing is active. + /// See ``AudioManager/setAudioProcessingMode(_:)`` for backend selection. public let autoGainControl: Bool /// Whether to enable software (WebRTC's) noise suppression. - /// Only takes effect on iOS Simulator. On iOS device or macOS, Apple's VPIO - /// handles NS and this flag is ignored for runtime processing. + /// Takes effect when WebRTC software processing is active. public let noiseSuppression: Bool public let highpassFilter: Bool diff --git a/Tests/LiveKitAudioTests/AudioProcessingTests.swift b/Tests/LiveKitAudioTests/AudioProcessingTests.swift index 4552ba193..c6f781e9f 100644 --- a/Tests/LiveKitAudioTests/AudioProcessingTests.swift +++ b/Tests/LiveKitAudioTests/AudioProcessingTests.swift @@ -65,8 +65,8 @@ import LiveKitTestSupport } @Test func optionsAppliedToAudioProcessingModule() async throws { - // Disable Apple VPIO. - AudioManager.shared.isVoiceProcessingBypassed = true + try AudioManager.shared.setAudioProcessingMode(.software) + defer { try? AudioManager.shared.setAudioProcessingMode(.automatic) } try await TestEnvironment.withRoom(RoomTestingOptions(canPublish: true)) { room in let allOnOptions = AudioCaptureOptions( From 202e88000733c89db55399bce9ecc7dc431f55b0 Mon Sep 17 00:00:00 2001 From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com> Date: Tue, 12 May 2026 21:41:23 +0800 Subject: [PATCH 2/3] simplify 1 --- .../AudioManager+AudioProcessingMode.swift | 78 ------------------- 1 file changed, 78 deletions(-) diff --git a/Sources/LiveKit/Audio/Manager/AudioManager+AudioProcessingMode.swift b/Sources/LiveKit/Audio/Manager/AudioManager+AudioProcessingMode.swift index 9c80890cd..906be94b8 100644 --- a/Sources/LiveKit/Audio/Manager/AudioManager+AudioProcessingMode.swift +++ b/Sources/LiveKit/Audio/Manager/AudioManager+AudioProcessingMode.swift @@ -28,46 +28,11 @@ public enum AudioProcessingMode: Sendable { case unknown } -public enum AudioProcessingLifecycle: Sendable { - case idle - case running - case transitioning - case failed - case unknown -} - -public enum AudioProcessingBackend: Sendable { - case disabled - case system - case software - case unavailable - case unknown -} - -public struct AudioProcessingState: Sendable { - public let requestedMode: AudioProcessingMode - public let lifecycle: AudioProcessingLifecycle - public let backend: AudioProcessingBackend - public let transitionFrom: AudioProcessingMode - public let transitionTo: AudioProcessingMode - public let lastError: Int - public let isSystemBypassed: Bool - public let isSystemAGCEnabled: Bool - public let isSoftwareEchoCancellationEnabled: Bool - public let isSoftwareNoiseSuppressionEnabled: Bool - public let isSoftwareAutoGainControlEnabled: Bool - public let isSoftwareHighpassFilterEnabled: Bool -} - public extension AudioManager { var audioProcessingMode: AudioProcessingMode { RTC.audioDeviceModule.audioProcessingMode.toLKType() } - var audioProcessingState: AudioProcessingState { - RTC.audioDeviceModule.audioProcessingState.toLKType() - } - func setAudioProcessingMode(_ mode: AudioProcessingMode) throws { guard mode != .unknown else { throw LiveKitError(.invalidState, message: "Unsupported audio processing mode specified") @@ -107,46 +72,3 @@ extension AudioProcessingMode { } } } - -extension LKRTCAudioProcessingLifecycle { - func toLKType() -> AudioProcessingLifecycle { - switch self { - case .idle: return .idle - case .running: return .running - case .transitioning: return .transitioning - case .failed: return .failed - @unknown default: return .unknown - } - } -} - -extension LKRTCAudioProcessingBackend { - func toLKType() -> AudioProcessingBackend { - switch self { - case .disabled: return .disabled - case .system: return .system - case .software: return .software - case .unavailable: return .unavailable - @unknown default: return .unknown - } - } -} - -extension LKRTCAudioProcessingState { - func toLKType() -> AudioProcessingState { - AudioProcessingState( - requestedMode: requestedMode.toLKType(), - lifecycle: lifecycle.toLKType(), - backend: backend.toLKType(), - transitionFrom: transitionFrom.toLKType(), - transitionTo: transitionTo.toLKType(), - lastError: lastError, - isSystemBypassed: systemBypassed, - isSystemAGCEnabled: systemAGCEnabled, - isSoftwareEchoCancellationEnabled: softwareEchoCancellation, - isSoftwareNoiseSuppressionEnabled: softwareNoiseSuppression, - isSoftwareAutoGainControlEnabled: softwareAutoGainControl, - isSoftwareHighpassFilterEnabled: softwareHighpassFilter - ) - } -} From 17caa8d4522838b5c72b64f5c4a98cb988eb2ecf Mon Sep 17 00:00:00 2001 From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com> Date: Wed, 13 May 2026 19:18:40 +0800 Subject: [PATCH 3/3] update --- Docs/audio.md | 14 ++++++++------ .../AudioManager+AudioProcessingMode.swift | 18 +++++++----------- .../LiveKit/Audio/Manager/AudioManager.swift | 2 +- .../Types/Options/AudioCaptureOptions.swift | 12 ++++++------ 4 files changed, 22 insertions(+), 24 deletions(-) diff --git a/Docs/audio.md b/Docs/audio.md index bc6722b1a..0f0b50b79 100644 --- a/Docs/audio.md +++ b/Docs/audio.md @@ -48,19 +48,21 @@ When set to `false`, the audio session remains active after the LiveKit call end ## Disabling Voice Processing -Apple's voice processing is enabled by default, such as echo cancellation and auto-gain control. +Platform voice processing is enabled by default, such as echo cancellation and auto-gain control. +On Apple platforms this is Apple's Voice-Processing I/O. Use `AudioProcessingMode` to choose the processing backend before publishing or starting local recording: ```swift try AudioManager.shared.setAudioProcessingMode(.automatic) // default -try AudioManager.shared.setAudioProcessingMode(.system) // require Apple VPIO +try AudioManager.shared.setAudioProcessingMode(.platform) // require platform processing try AudioManager.shared.setAudioProcessingMode(.software) // use WebRTC APM try AudioManager.shared.setAudioProcessingMode(.disabled) // no voice processing ``` -Mode changes are only supported while the audio engine is idle. To switch during a call, -unpublish or stop local recording first, set the mode, then publish or start recording again. +Mode changes are only supported by audio device modules that implement this API, and only +while audio is idle. To switch during a call, unpublish or stop local recording first, set +the mode, then publish or start recording again. If your app doesn't require voice processing at all, you can also use the compatibility API: @@ -69,9 +71,9 @@ try AudioManager.shared.setVoiceProcessingEnabled(false) ``` This is equivalent to `try AudioManager.shared.setAudioProcessingMode(.disabled)`. -Disabling system voice processing also disables muted speaker detection. +Disabling platform voice processing also disables muted speaker detection. -If your app only needs to bypass Apple's system processing at run-time, use: +If your app only needs to bypass Apple's platform processing at run-time, use: ```swift AudioManager.shared.isVoiceProcessingBypassed = true diff --git a/Sources/LiveKit/Audio/Manager/AudioManager+AudioProcessingMode.swift b/Sources/LiveKit/Audio/Manager/AudioManager+AudioProcessingMode.swift index 906be94b8..77ab58c74 100644 --- a/Sources/LiveKit/Audio/Manager/AudioManager+AudioProcessingMode.swift +++ b/Sources/LiveKit/Audio/Manager/AudioManager+AudioProcessingMode.swift @@ -17,13 +17,13 @@ internal import LiveKitWebRTC public enum AudioProcessingMode: Sendable { - /// Prefer Apple's system voice processing when available, otherwise use WebRTC software processing. + /// Prefer platform voice processing when available, otherwise use WebRTC software processing. case automatic - /// Require Apple's system voice processing. - case system - /// Use WebRTC software processing and disable Apple's system voice processing. + /// Require platform voice processing. + case platform + /// Use WebRTC software processing and disable platform voice processing. case software - /// Disable both Apple's system voice processing and WebRTC software processing. + /// Disable both platform voice processing and WebRTC software processing. case disabled case unknown } @@ -38,10 +38,6 @@ public extension AudioManager { throw LiveKitError(.invalidState, message: "Unsupported audio processing mode specified") } - guard RTC.pcFactoryState.admType == .audioEngine else { - throw LiveKitError(.invalidState, message: "Audio processing mode is only supported by the audioEngine audio device module") - } - let result = RTC.audioDeviceModule.setAudioProcessingMode(mode.toRTCType()) try checkAdmResult(code: result) } @@ -53,7 +49,7 @@ extension LKRTCAudioProcessingMode { func toLKType() -> AudioProcessingMode { switch self { case .automatic: return .automatic - case .system: return .system + case .platform: return .platform case .software: return .software case .disabled: return .disabled @unknown default: return .unknown @@ -65,7 +61,7 @@ extension AudioProcessingMode { func toRTCType() -> LKRTCAudioProcessingMode { switch self { case .automatic: .automatic - case .system: .system + case .platform: .platform case .software: .software case .disabled: .disabled case .unknown: .automatic diff --git a/Sources/LiveKit/Audio/Manager/AudioManager.swift b/Sources/LiveKit/Audio/Manager/AudioManager.swift index e8c97d60e..a70bd2a05 100644 --- a/Sources/LiveKit/Audio/Manager/AudioManager.swift +++ b/Sources/LiveKit/Audio/Manager/AudioManager.swift @@ -321,7 +321,7 @@ public class AudioManager: Loggable { } /// Whether the current audio processing mode uses Voice-Processing I/O of the internal AVAudioEngine. - /// Use ``setAudioProcessingMode(_:)`` for explicit system/software/disabled selection. + /// Use ``setAudioProcessingMode(_:)`` for explicit platform/software/disabled selection. /// Defaults to `true`. public var isVoiceProcessingEnabled: Bool { RTC.audioDeviceModule.isVoiceProcessingEnabled } diff --git a/Sources/LiveKit/Types/Options/AudioCaptureOptions.swift b/Sources/LiveKit/Types/Options/AudioCaptureOptions.swift index c558df64a..38516e1d7 100644 --- a/Sources/LiveKit/Types/Options/AudioCaptureOptions.swift +++ b/Sources/LiveKit/Types/Options/AudioCaptureOptions.swift @@ -21,15 +21,15 @@ internal import LiveKitWebRTC @objcMembers public final class AudioCaptureOptions: NSObject, CaptureOptions, Sendable { // Defaults are `true` on all platforms. These options affect WebRTC's - // software APM. In the default audio processing mode, Apple's VPIO handles - // AEC/AGC/NS on iOS device and macOS, so software APM is off. Use + // software APM. In the default audio processing mode, platform processing + // handles AEC/AGC/NS on iOS device and macOS, so software APM is off. Use // `AudioManager.shared.setAudioProcessingMode(.software)` to explicitly - // select WebRTC software processing on supported AudioEngineDevice builds. + // select WebRTC software processing on supported WebRTC builds. // // Platform behavior: - // - iOS device or macOS with `.automatic`: VPIO is active. Software APM is - // off. These flags are still reported to the server as audio track - // features for telemetry. + // - iOS device or macOS with `.automatic`: platform processing is active. + // Software APM is off. These flags are still reported to the server as + // audio track features for telemetry. // - iOS device or macOS with `.software`: Software APM is active and these // flags are respected. // - iOS Simulator: VPIO is not reliably available. Software APM is used and