Skip to content

Commit 0a7e105

Browse files
committed
fix: Improve manual interrupt handling for Google Live API
- Add ignoreGoogleAudioUntilNextTurn flag to prevent audio from continuing after manual interrupt - Set flag when user taps status capsule during Google response - Ignore incoming audio chunks while flag is active - Clear flag when new model turn starts (user triggered new response) - Clear flag on natural barge-in detection and session setup - This allows immediate transition to LISTENING state without hearing residual audio
1 parent 982cdfa commit 0a7e105

3 files changed

Lines changed: 37 additions & 1 deletion

File tree

app/src/main/java/ch/fhnw/pepper_realtime/controller/ChatInterruptController.kt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,13 @@ class ChatInterruptController @Inject constructor(
3232

3333
Log.d(TAG, "🚨 Interrupt: isResponseGenerating=$isGenerating, isAudioPlaying=$isPlaying, isGoogle=$isGoogle")
3434

35-
// Google Live API handles interruption automatically via barge-in detection
35+
// Google Live API: Set flag to ignore incoming audio until next user turn
36+
// The server will stop generating when it detects user audio (barge-in)
37+
if (isGoogle) {
38+
viewModel.setIgnoreGoogleAudio(true)
39+
Log.d(TAG, "Google: Manual interrupt - ignoring audio until next turn")
40+
}
41+
3642
// Only send OpenAI-specific cancel/truncate commands for non-Google providers
3743
if (!isGoogle) {
3844
if (isGenerating) {

app/src/main/java/ch/fhnw/pepper_realtime/controller/ChatRealtimeHandler.kt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,13 @@ class ChatRealtimeHandler(
142142
if (responseId != null && responseId == viewModel.cancelledResponseId) {
143143
return
144144
}
145+
146+
// Google Live API: Ignore audio after manual interrupt until next turn
147+
// This prevents continued playback after user taps interrupt button
148+
if (responseId == null && viewModel.ignoreGoogleAudioUntilNextTurn.value) {
149+
Log.d(TAG, "Ignoring Google audio chunk (manual interrupt active)")
150+
return
151+
}
145152

146153
// Note: State transition to SPEAKING is handled by AudioPlayer.Listener.onPlaybackStarted()
147154
// when playback actually begins, not here when audio chunks arrive
@@ -372,6 +379,8 @@ class ChatRealtimeHandler(
372379

373380
override fun onGoogleSetupComplete() {
374381
Log.i(TAG, "Google Live API setup complete")
382+
// Reset interrupt flag for fresh session
383+
viewModel.setIgnoreGoogleAudio(false)
375384
// Notify session manager that Google setup is confirmed
376385
sessionManager.onGoogleSetupComplete()
377386
}
@@ -381,6 +390,9 @@ class ChatRealtimeHandler(
381390
// Increment turn counter for new response ID
382391
googleTurnCounter++
383392

393+
// Clear the manual interrupt flag - new turn means user triggered new response
394+
viewModel.setIgnoreGoogleAudio(false)
395+
384396
// Reset thinking bubble tracking for new turn
385397
currentThinkingBubbleId = null
386398

@@ -400,6 +412,9 @@ class ChatRealtimeHandler(
400412

401413
// Clear pending tool calls on interrupt
402414
pendingGoogleToolCalls.clear()
415+
416+
// Clear the manual interrupt flag - server confirmed interruption
417+
viewModel.setIgnoreGoogleAudio(false)
403418
}
404419

405420
override fun onGoogleToolCall(functionCalls: List<GoogleLiveEvents.FunctionCall>) {

app/src/main/java/ch/fhnw/pepper_realtime/ui/ChatViewModel.kt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,21 @@ class ChatViewModel @Inject constructor(
7272
// Video streaming state
7373
private val _isVideoStreamActive = MutableStateFlow(false)
7474
private val _videoPreviewFrame = MutableStateFlow<android.graphics.Bitmap?>(null)
75+
76+
// Google Live API: Flag to ignore incoming audio after manual interrupt
77+
// This prevents audio from continuing to play after user taps the interrupt button
78+
// The flag is cleared when a new model turn starts (triggered by user's next speech)
79+
private val _ignoreGoogleAudioUntilNextTurn = MutableStateFlow(false)
80+
val ignoreGoogleAudioUntilNextTurn: StateFlow<Boolean> = _ignoreGoogleAudioUntilNextTurn.asStateFlow()
81+
82+
fun setIgnoreGoogleAudio(ignore: Boolean) {
83+
_ignoreGoogleAudioUntilNextTurn.value = ignore
84+
if (ignore) {
85+
Log.d(TAG, "🔇 Google audio ignored until next turn (manual interrupt)")
86+
} else {
87+
Log.d(TAG, "🔊 Google audio acceptance resumed (new turn started)")
88+
}
89+
}
7590

7691
// Connection State
7792
private val _isConnected = MutableStateFlow(false)

0 commit comments

Comments
 (0)