Skip to content

Commit b840a51

Browse files
committed
feat: adding the audio ducking
1 parent 7b311f0 commit b840a51

4 files changed

Lines changed: 139 additions & 11 deletions

File tree

android/src/main/java/com/speech/SpeechModule.kt

Lines changed: 67 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,11 @@ import java.util.UUID
44
import java.util.Locale
55
import android.os.Build
66
import android.os.Bundle
7+
import android.content.Context
78
import android.speech.tts.Voice
9+
import android.media.AudioManager
10+
import android.media.AudioAttributes
11+
import android.media.AudioFocusRequest
812
import android.annotation.SuppressLint
913
import android.speech.tts.TextToSpeech
1014
import com.facebook.react.bridge.Promise
@@ -27,11 +31,11 @@ class SpeechModule(reactContext: ReactApplicationContext) :
2731
companion object {
2832
const val NAME = "Speech"
2933

30-
@SuppressLint("ConstantLocale")
3134
private val defaultOptions: Map<String, Any> = mapOf(
3235
"rate" to 0.5f,
3336
"pitch" to 1.0f,
3437
"volume" to 1.0f,
38+
"ducking" to false,
3539
"language" to Locale.getDefault().toLanguageTag()
3640
)
3741
}
@@ -55,10 +59,59 @@ class SpeechModule(reactContext: ReactApplicationContext) :
5559
private var currentQueueIndex = -1
5660
private val speechQueue = mutableListOf<SpeechQueueItem>()
5761

62+
private val audioManager: AudioManager by lazy {
63+
reactApplicationContext.getSystemService(Context.AUDIO_SERVICE) as AudioManager
64+
}
65+
private var audioFocusChangeListener: AudioManager.OnAudioFocusChangeListener? = null
66+
private var audioFocusRequest: AudioFocusRequest? = null
67+
private var isDucking = false
68+
5869
init {
5970
initializeTTS()
6071
}
6172

73+
private fun activateDuckingSession() {
74+
if (!isDucking) return
75+
76+
audioFocusChangeListener = AudioManager.OnAudioFocusChangeListener { }
77+
78+
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
79+
val audioAttributes = AudioAttributes.Builder()
80+
.setUsage(AudioAttributes.USAGE_ASSISTANCE_ACCESSIBILITY)
81+
.setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
82+
.build()
83+
val focusRequest = AudioFocusRequest.Builder(AudioManager.AUDIOFOCUS_GAIN_TRANSIENT_MAY_DUCK)
84+
.setAudioAttributes(audioAttributes)
85+
.setOnAudioFocusChangeListener(audioFocusChangeListener!!)
86+
.build()
87+
audioFocusRequest = focusRequest
88+
audioManager.requestAudioFocus(focusRequest)
89+
} else {
90+
@Suppress("DEPRECATION")
91+
audioManager.requestAudioFocus(
92+
audioFocusChangeListener,
93+
AudioManager.STREAM_MUSIC,
94+
AudioManager.AUDIOFOCUS_GAIN_TRANSIENT_MAY_DUCK
95+
)
96+
}
97+
}
98+
99+
private fun deactivateDuckingSession() {
100+
if (!isDucking) return
101+
audioFocusChangeListener ?: return
102+
103+
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
104+
audioFocusRequest?.let { request ->
105+
audioManager.abandonAudioFocusRequest(request)
106+
}
107+
} else {
108+
@Suppress("DEPRECATION")
109+
audioManager.abandonAudioFocus(audioFocusChangeListener)
110+
}
111+
audioFocusChangeListener = null
112+
audioFocusRequest = null
113+
}
114+
62115
private fun processPendingOperations() {
63116
val operations = ArrayList(pendingOperations)
64117
pendingOperations.clear()
@@ -143,6 +196,7 @@ class SpeechModule(reactContext: ReactApplicationContext) :
143196
synchronized(queueLock) {
144197
speechQueue.find { it.utteranceId == utteranceId }?.let { item ->
145198
item.status = SpeechStatus.COMPLETED
199+
deactivateDuckingSession()
146200
emitOnFinish(getEventData(utteranceId))
147201
if (!isPaused) {
148202
currentQueueIndex++
@@ -155,6 +209,7 @@ class SpeechModule(reactContext: ReactApplicationContext) :
155209
synchronized(queueLock) {
156210
speechQueue.find { it.utteranceId == utteranceId }?.let { item ->
157211
item.status = SpeechStatus.ERROR
212+
deactivateDuckingSession()
158213
emitOnError(getEventData(utteranceId))
159214
if (!isPaused) {
160215
currentQueueIndex++
@@ -265,6 +320,9 @@ class SpeechModule(reactContext: ReactApplicationContext) :
265320
private fun getValidatedOptions(options: ReadableMap): Map<String, Any> {
266321
val validated = globalOptions.toMutableMap()
267322

323+
if (options.hasKey("ducking")) {
324+
validated["ducking"] = options.getBoolean("ducking")
325+
}
268326
if (options.hasKey("voice")) {
269327
options.getString("voice")?.let { validated["voice"] = it }
270328
}
@@ -368,6 +426,7 @@ class SpeechModule(reactContext: ReactApplicationContext) :
368426
ensureInitialized(promise) {
369427
if (synthesizer.isSpeaking || isPaused) {
370428
synthesizer.stop()
429+
deactivateDuckingSession()
371430
synchronized(queueLock) {
372431
if (currentQueueIndex in speechQueue.indices) {
373432
val item = speechQueue[currentQueueIndex]
@@ -387,6 +446,7 @@ class SpeechModule(reactContext: ReactApplicationContext) :
387446
} else {
388447
isPaused = true
389448
synthesizer.stop()
449+
deactivateDuckingSession()
390450
promise.resolve(true)
391451
}
392452
}
@@ -403,6 +463,7 @@ class SpeechModule(reactContext: ReactApplicationContext) :
403463
if (pausedItemIndex >= 0) {
404464
currentQueueIndex = pausedItemIndex
405465
isPaused = false
466+
activateDuckingSession()
406467
processNextQueueItem()
407468
promise.resolve(true)
408469
} else {
@@ -419,6 +480,8 @@ class SpeechModule(reactContext: ReactApplicationContext) :
419480
return
420481
}
421482
ensureInitialized(promise) {
483+
isDucking = globalOptions["ducking"] as? Boolean ?: false
484+
activateDuckingSession()
422485
val utteranceId = getUniqueID()
423486
val queueItem = SpeechQueueItem(text = text, options = emptyMap(), utteranceId = utteranceId)
424487
synchronized(queueLock) {
@@ -438,8 +501,10 @@ class SpeechModule(reactContext: ReactApplicationContext) :
438501
return
439502
}
440503
ensureInitialized(promise) {
504+
val validatedOptions = getValidatedOptions(options)
505+
isDucking = validatedOptions["ducking"] as? Boolean ?: false
506+
activateDuckingSession()
441507
val utteranceId = getUniqueID()
442-
val validatedOptions = getValidatedOptions(options)
443508
val queueItem = SpeechQueueItem(text = text, options = validatedOptions, utteranceId = utteranceId)
444509
synchronized(queueLock) {
445510
speechQueue.add(queueItem)

example/src/views/RootView.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ const RootView: React.FC = () => {
2222
>([]);
2323

2424
React.useEffect(() => {
25-
// Speech.initialize({silentMode: 'ignore'});
25+
// Speech.initialize({silentMode: 'obey', ducking: true});
2626

2727
const onSpeechEnd = () => {
2828
setIsStarted(false);

ios/Speech.mm

Lines changed: 60 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
@implementation Speech
66
{
7+
BOOL isDucking;
78
NSDictionary *defaultOptions;
89
}
910

@@ -19,25 +20,65 @@ - (instancetype)init {
1920
if (self) {
2021
_synthesizer = [[AVSpeechSynthesizer alloc] init];
2122
_synthesizer.delegate = self;
22-
23+
2324
defaultOptions = @{
2425
@"pitch": @(1.0),
2526
@"volume": @(1.0),
27+
@"ducking": @(NO),
2628
@"silentMode": @"obey",
2729
@"rate": @(AVSpeechUtteranceDefaultSpeechRate),
2830
@"language": [AVSpeechSynthesisVoice currentLanguageCode] ?: @"en-US"
2931
};
30-
3132
self.globalOptions = [defaultOptions copy];
3233
}
3334
return self;
3435
}
3536

36-
- (void)configureSilentModeSession:(NSString *)silentMode {
37+
- (void)activateDuckingSession {
38+
if (!isDucking) {
39+
return;
40+
}
41+
NSError *error = nil;
42+
AVAudioSession *session = [AVAudioSession sharedInstance];
43+
44+
[session setCategory:AVAudioSessionCategoryPlayback
45+
mode:AVAudioSessionModeSpokenAudio
46+
options:AVAudioSessionCategoryOptionDuckOthers
47+
error:&error];
48+
if (error) {
49+
NSLog(@"[Speech] Failed to set audio session configuration for ducking: %@", error.localizedDescription);
50+
return;
51+
}
52+
[session setActive:YES error:&error];
53+
if (error) {
54+
NSLog(@"[Speech] Failed to activate audio session for ducking: %@", error.localizedDescription);
55+
}
56+
}
57+
58+
- (void)deactivateDuckingSession {
59+
if (!isDucking) {
60+
return;
61+
}
3762
NSError *error = nil;
63+
[[AVAudioSession sharedInstance] setActive:NO
64+
withOptions:AVAudioSessionSetActiveOptionNotifyOthersOnDeactivation
65+
error:&error];
66+
67+
if (error) {
68+
NSLog(@"[Speech] AVAudioSession setActive (deactivate) error: %@", error.localizedDescription);
69+
}
70+
}
3871

72+
- (void)configureSilentModeSession:(NSString *)silentMode {
73+
if (isDucking || [silentMode isEqualToString:@"obey"]) {
74+
return;
75+
}
76+
NSError *error = nil;
3977
if ([silentMode isEqualToString:@"ignore"]) {
40-
[[AVAudioSession sharedInstance] setCategory:AVAudioSessionCategoryPlayback error:&error];
78+
[[AVAudioSession sharedInstance] setCategory:AVAudioSessionCategoryPlayback
79+
mode:AVAudioSessionModeSpokenAudio
80+
options:AVAudioSessionCategoryOptionInterruptSpokenAudioAndMixWithOthers
81+
error:&error];
4182
} else if ([silentMode isEqualToString:@"respect"]) {
4283
[[AVAudioSession sharedInstance] setCategory:AVAudioSessionCategoryAmbient error:&error];
4384
}
@@ -64,6 +105,9 @@ - (NSDictionary *)getVoiceItem:(AVSpeechSynthesisVoice *)voice {
64105
- (NSDictionary *)getValidatedOptions:(VoiceOptions &)options {
65106
NSMutableDictionary *validatedOptions = [self.globalOptions mutableCopy];
66107

108+
if (options.ducking()) {
109+
validatedOptions[@"ducking"] = @(options.ducking().value());
110+
}
67111
if (options.voice()) {
68112
validatedOptions[@"voice"] = options.voice();
69113
}
@@ -165,6 +209,7 @@ - (void)stop:(RCTPromiseResolveBlock)resolve reject:(RCTPromiseRejectBlock)rejec
165209
- (void)pause:(RCTPromiseResolveBlock)resolve reject:(RCTPromiseRejectBlock)reject {
166210
if (self.synthesizer.isSpeaking && !self.synthesizer.isPaused) {
167211
BOOL paused = [self.synthesizer pauseSpeakingAtBoundary:AVSpeechBoundaryImmediate];
212+
[self deactivateDuckingSession];
168213
resolve(@(paused));
169214
} else {
170215
resolve(@(false));
@@ -173,6 +218,7 @@ - (void)pause:(RCTPromiseResolveBlock)resolve reject:(RCTPromiseRejectBlock)reje
173218

174219
- (void)resume:(RCTPromiseResolveBlock)resolve reject:(RCTPromiseRejectBlock)reject {
175220
if (self.synthesizer.isPaused) {
221+
[self activateDuckingSession];
176222
BOOL resumed = [self.synthesizer continueSpeaking];
177223
resolve(@(resumed));
178224
} else {
@@ -192,13 +238,17 @@ - (void)speak:(NSString *)text
192238
AVSpeechUtterance *utterance;
193239

194240
@try {
241+
isDucking = [self.globalOptions[@"ducking"] boolValue];
242+
243+
[self activateDuckingSession];
195244
[self configureSilentModeSession:self.globalOptions[@"silentMode"]];
196245

197246
utterance = [self getUtterance:text withOptions:self.globalOptions];
198247
[self.synthesizer speakUtterance:utterance];
199248
resolve(nil);
200249
}
201250
@catch (NSException *exception) {
251+
[self deactivateDuckingSession];
202252
[self emitOnError:[self getEventData:utterance]];
203253
reject(@"speech_error", exception.reason, nil);
204254
}
@@ -218,13 +268,17 @@ - (void)speakWithOptions:(NSString *)text
218268

219269
@try {
220270
NSDictionary *validatedOptions = [self getValidatedOptions:options];
271+
isDucking = [validatedOptions[@"ducking"] boolValue];
272+
273+
[self activateDuckingSession];
221274
[self configureSilentModeSession:validatedOptions[@"silentMode"]];
222275

223276
utterance = [self getUtterance:text withOptions:validatedOptions];
224277
[self.synthesizer speakUtterance:utterance];
225278
resolve(nil);
226279
}
227280
@catch (NSException *exception) {
281+
[self deactivateDuckingSession];
228282
[self emitOnError:[self getEventData:utterance]];
229283
reject(@"speech_error", exception.reason, nil);
230284
}
@@ -246,6 +300,7 @@ - (void)speechSynthesizer:(AVSpeechSynthesizer *)synthesizer
246300

247301
- (void)speechSynthesizer:(AVSpeechSynthesizer *)synthesizer
248302
didFinishSpeechUtterance:(AVSpeechUtterance *)utterance {
303+
[self deactivateDuckingSession];
249304
[self emitOnFinish:[self getEventData:utterance]];
250305
}
251306

@@ -261,6 +316,7 @@ - (void)speechSynthesizer:(AVSpeechSynthesizer *)synthesizer
261316

262317
- (void)speechSynthesizer:(AVSpeechSynthesizer *)synthesizer
263318
didCancelSpeechUtterance:(AVSpeechUtterance *)utterance {
319+
[self deactivateDuckingSession];
264320
[self emitOnStopped:[self getEventData:utterance]];
265321
}
266322

src/NativeSpeech.ts

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,19 @@ export interface VoiceProps {
3131
}
3232
export interface VoiceOptions {
3333
/**
34-
* Determines how speech audio should behave with the iOS silent switch (ringer)
34+
* If `true`, audio from other apps will be temporarily lowered (ducked) while speech is active.
35+
* This is for critical announcements (e.g., navigation) and takes priority over `silentMode` on iOS.
36+
* @default false
37+
*/
38+
ducking?: boolean;
39+
/**
40+
* Determines how speech audio interacts with the device's silent (ringer) switch.
41+
* This option is ignored if `ducking` is `true`.
3542
* @platform ios
3643
*
37-
* - `obey`: (Default) The library does not change the app's audio session. Speech audio will follow the app's current audio configuration or the system default
38-
* - `respect`: Speech audio will be silenced by the ringer switch. This is for non-critical speech
39-
* - `ignore`: Speech audio will play even if the ringer switch is on silent. Use this for critical speech like navigation
44+
* - `obey`: (Default) Does not change the app's audio session. Speech follows the system default.
45+
* - `respect`: Speech will be silenced by the ringer switch. Use for non-critical audio.
46+
* - `ignore`: Speech will play even if the ringer is off. Use for critical audio when ducking is not desired.
4047
*/
4148
silentMode?: 'obey' | 'respect' | 'ignore';
4249
/** The language code to use (e.g., 'en', 'fr', 'en-US', 'fr-FR') */

0 commit comments

Comments
 (0)