Skip to content

Commit 3155f88

Browse files
committed
Fix animation tool: stop GestureController before execution, skip response if announced
- Add skipResponseIfAnnounced property to Tool interface - PlayAnimationTool: stop GestureController before starting animation to prevent resource conflicts - PlayAnimationTool: set skipResponseIfAnnounced=true to avoid duplicate responses - ChatRealtimeHandler: skip tool response for Google if tool was announced (NON_BLOCKING with no response) - ChatSessionController: add requestResponse parameter to sendToolResult - ChatTurnListener: remove status update in onExitSpeaking to prevent state overwrites - RealtimeSessionManager: mark skipResponseIfAnnounced tools as NON_BLOCKING for Google
1 parent 20319b9 commit 3155f88

7 files changed

Lines changed: 102 additions & 37 deletions

File tree

app/src/main/java/ch/fhnw/pepper_realtime/controller/ChatRealtimeHandler.kt

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,16 @@ class ChatRealtimeHandler(
218218
continue
219219
}
220220

221-
viewModel.isExpectingFinalAnswerAfterToolCall = true
221+
// Check if tool should skip response when already announced
222+
val tool = toolRegistry.getOrCreateTool(toolName.orEmpty())
223+
val skipResponse = messageItems.isNotEmpty() && (tool?.skipResponseIfAnnounced == true)
224+
val shouldRequestResponse = !skipResponse
225+
226+
if (skipResponse) {
227+
Log.d(TAG, "Tool '$toolName' was announced in response, skipping follow-up response request")
228+
}
229+
230+
viewModel.isExpectingFinalAnswerAfterToolCall = shouldRequestResponse
222231

223232
applicationScope.launch(ioDispatcher) {
224233
val toolResult: String = try {
@@ -240,7 +249,7 @@ class ChatRealtimeHandler(
240249
Handler(Looper.getMainLooper()).post {
241250
viewModel.updateLatestFunctionCallResult(fResult)
242251
}
243-
sessionController?.sendToolResult(callId ?: "", fResult)
252+
sessionController?.sendToolResult(callId ?: "", fResult, requestResponse = shouldRequestResponse)
244253
?: Log.e(TAG, "SessionController is null, cannot send tool result")
245254
}
246255
}
@@ -441,7 +450,17 @@ class ChatRealtimeHandler(
441450
val functionCall = ChatMessage.createFunctionCall(toolName, args.toString(), ChatMessage.Sender.ROBOT)
442451
viewModel.addMessage(functionCall)
443452

444-
viewModel.isExpectingFinalAnswerAfterToolCall = true
453+
// Check if tool should skip response when already announced
454+
// For Google: if audio is playing, the model announced the action
455+
val tool = toolRegistry.getOrCreateTool(toolName)
456+
val wasAnnounced = audioPlayer.isPlaying()
457+
val skipResponse = wasAnnounced && (tool?.skipResponseIfAnnounced == true)
458+
459+
if (skipResponse) {
460+
Log.d(TAG, "Google: Tool '$toolName' was announced (audio playing), using SILENT scheduling")
461+
}
462+
463+
viewModel.isExpectingFinalAnswerAfterToolCall = !skipResponse
445464

446465
applicationScope.launch(ioDispatcher) {
447466
val toolResult: String = try {
@@ -471,12 +490,18 @@ class ChatRealtimeHandler(
471490
}
472491

473492
// Send result back to Google
474-
// Note: Google Live API continues generation automatically after receiving toolResponse
475-
// For analyze_vision, use SILENT scheduling to avoid double response (image triggers response via turnComplete=true)
476-
val scheduling = if (toolName == "analyze_vision") "SILENT" else null
477-
val sent = sessionManager.sendGoogleToolResult(callId, toolName, toolResult, scheduling)
478-
if (!sent) {
479-
Log.e(TAG, "Failed to send Google tool result for $toolName")
493+
// Use SILENT scheduling to prevent follow-up response when:
494+
// - analyze_vision: image triggers response via turnComplete=true
495+
// - skipResponse: tool was announced, no need for another response
496+
// TEST: If skipResponse, don't send tool response at all to see if that prevents duplicate audio
497+
if (skipResponse) {
498+
Log.d(TAG, "Google: Skipping tool response for '$toolName' (skipResponseIfAnnounced)")
499+
} else {
500+
val scheduling = if (toolName == "analyze_vision") "SILENT" else null
501+
val sent = sessionManager.sendGoogleToolResult(callId, toolName, toolResult, scheduling)
502+
if (!sent) {
503+
Log.e(TAG, "Failed to send Google tool result for $toolName")
504+
}
480505
}
481506
}
482507
}

app/src/main/java/ch/fhnw/pepper_realtime/controller/ChatSessionController.kt

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -151,10 +151,10 @@ class ChatSessionController @Inject constructor(
151151
}
152152
}
153153

154-
fun sendToolResult(callId: String, result: String, toolName: String? = null) {
154+
fun sendToolResult(callId: String, result: String, toolName: String? = null, requestResponse: Boolean = true) {
155155
if (!sessionManager.isConnected) return
156156
try {
157-
viewModel.isExpectingFinalAnswerAfterToolCall = true
157+
viewModel.isExpectingFinalAnswerAfterToolCall = requestResponse
158158

159159
val isGoogle = settingsRepository.apiProviderEnum.isGoogleProvider()
160160

@@ -172,21 +172,30 @@ class ChatSessionController @Inject constructor(
172172
return
173173
}
174174

175-
viewModel.setResponseGenerating(true)
176-
177175
// Google Live API continues generation automatically after toolResponse
178176
// OpenAI needs explicit response.create
179-
if (!isGoogle) {
177+
// Skip if requestResponse=false (tool already announced, no follow-up needed)
178+
if (!isGoogle && requestResponse) {
179+
viewModel.setResponseGenerating(true)
180180
val sentToolResponse = sessionManager.requestResponse()
181181
if (!sentToolResponse) {
182182
viewModel.setResponseGenerating(false)
183183
Log.e(TAG, "Failed to send tool response request")
184184
return
185185
}
186-
}
187-
188-
if (turnManager != null && turnManager.state != TurnManager.State.SPEAKING) {
189-
turnManager.setState(TurnManager.State.THINKING)
186+
if (turnManager != null && turnManager.state != TurnManager.State.SPEAKING) {
187+
turnManager.setState(TurnManager.State.THINKING)
188+
}
189+
} else if (!requestResponse) {
190+
// No response requested - transition to LISTENING after audio finishes
191+
// Don't set responseGenerating=true since we're not expecting a response
192+
Log.d(TAG, "Tool completed without requesting response, will return to LISTENING after audio")
193+
} else {
194+
// Google: generating continues automatically
195+
viewModel.setResponseGenerating(true)
196+
if (turnManager != null && turnManager.state != TurnManager.State.SPEAKING) {
197+
turnManager.setState(TurnManager.State.THINKING)
198+
}
190199
}
191200
} catch (e: Exception) {
192201
Log.e(TAG, "Error sending tool result", e)

app/src/main/java/ch/fhnw/pepper_realtime/controller/ChatTurnListener.kt

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -101,14 +101,9 @@ class ChatTurnListener @Inject constructor(
101101
Log.i(TAG, "State: Exiting SPEAKING - stopping gestures")
102102
gestureController.stopNow()
103103

104-
// Check user's mute intent to determine status text
105-
// Note: Actual mic state change happens in onEnterListening
106-
val userWantsMicOn = viewModel.userWantsMicOn.value
107-
if (!userWantsMicOn) {
108-
viewModel.setStatusText(getString(R.string.status_muted_tap_to_unmute))
109-
} else {
110-
viewModel.setStatusText(getString(R.string.status_listening))
111-
}
104+
// Don't set status text here - let onEnterListening or onEnterThinking handle it
105+
// Setting "listening" here would override the correct "thinking" status when
106+
// transitioning SPEAKING -> THINKING
112107
viewModel.setInterruptFabVisible(false)
113108
}
114109

app/src/main/java/ch/fhnw/pepper_realtime/network/RealtimeSessionManager.kt

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -439,8 +439,7 @@ class RealtimeSessionManager @Inject constructor() {
439439
*/
440440
fun sendGoogleToolResult(callId: String, toolName: String, resultJson: String, scheduling: String? = null): Boolean {
441441
return try {
442-
// Normalize tool response to the format used in the reference Live API app:
443-
// response: { success: boolean, result: <any json>, error?: string, scheduling?: string }
442+
// Normalize tool response
444443
val parsedResult = try {
445444
JSONObject(resultJson)
446445
} catch (_: Exception) {
@@ -454,7 +453,8 @@ class RealtimeSessionManager @Inject constructor() {
454453
val err = parsedResult?.optString("error", "") ?: ""
455454
if (err.isNotEmpty()) put("error", err)
456455
}
457-
// Add scheduling for NON_BLOCKING tools (e.g., SILENT for analyze_vision)
456+
// scheduling is inside response for NON_BLOCKING functions
457+
// SILENT = don't generate a follow-up response for this tool result
458458
if (scheduling != null) {
459459
put("scheduling", scheduling)
460460
}
@@ -616,10 +616,13 @@ class RealtimeSessionManager @Inject constructor() {
616616
put("name", openAiDef.optString("name", toolName))
617617
put("description", openAiDef.optString("description", ""))
618618

619-
// analyze_vision should be non-blocking so image can be sent with turnComplete=true
620-
// and the tool response won't trigger a second response (uses scheduling=SILENT)
621-
if (toolName == "analyze_vision") {
619+
// Non-blocking tools:
620+
// - analyze_vision: image sent with turnComplete=true, response uses scheduling=SILENT
621+
// - skipResponseIfAnnounced tools: action may be announced, response uses scheduling=SILENT
622+
// NON_BLOCKING is required for scheduling to work
623+
if (toolName == "analyze_vision" || tool.skipResponseIfAnnounced) {
622624
put("behavior", "NON_BLOCKING")
625+
Log.i(TAG, "Tool '$toolName' marked as NON_BLOCKING (skipResponseIfAnnounced=${tool.skipResponseIfAnnounced})")
623626
}
624627

625628
// Parameters

app/src/main/java/ch/fhnw/pepper_realtime/tools/Tool.kt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,18 @@ interface Tool {
3737
val apiKeyRequirement: ApiKeyRequirement
3838
get() = ApiKeyRequirement.None
3939

40+
/**
41+
* If true, no response will be requested after tool execution
42+
* when the model already announced the action in the same response.
43+
*
44+
* Use this for "fire and forget" tools like animations where the
45+
* announcement IS the response and no follow-up is needed.
46+
*
47+
* Default is false - most tools need a response to present results.
48+
*/
49+
val skipResponseIfAnnounced: Boolean
50+
get() = false
51+
4052
/**
4153
* Check if this tool is currently available based on context.
4254
* Default implementation checks API key availability based on apiKeyRequirement.

app/src/pepper/java/ch/fhnw/pepper_realtime/tools/entertainment/PlayAnimationTool.kt

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@ import org.json.JSONObject
1313
/**
1414
* Tool for playing preinstalled Pepper animations.
1515
* Supports various emotional and gestural animations.
16+
*
17+
* IMPORTANT: This tool stops the GestureController before playing the animation
18+
* to avoid resource conflicts. The GestureController will automatically restart
19+
* when the TurnManager enters SPEAKING state again.
1620
*/
1721
class PlayAnimationTool : Tool {
1822

@@ -22,11 +26,14 @@ class PlayAnimationTool : Tool {
2226

2327
override fun getName(): String = "play_animation"
2428

29+
// Don't request another response if the model already announced the animation
30+
override val skipResponseIfAnnounced: Boolean = true
31+
2532
override fun getDefinition(): JSONObject {
2633
return JSONObject().apply {
2734
put("type", "function")
2835
put("name", getName())
29-
put("description", "Play a preinstalled Pepper animation. Use the hello_01 animation when the user wants you to wave or say hello.")
36+
put("description", "Play a preinstalled Pepper animation. Use hello_01 when the user wants you to wave or say hello.")
3037
put("parameters", JSONObject().apply {
3138
put("type", "object")
3239
put("properties", JSONObject().apply {
@@ -67,10 +74,14 @@ class PlayAnimationTool : Tool {
6774
return JSONObject().put("error", "QiContext not ready").toString()
6875
}
6976

77+
// Stop gesture controller to free animation resources before starting
78+
Log.d(TAG, "Stopping GestureController before playing animation: $name")
79+
context.gestureController.stopNow()
80+
7081
try {
7182
val qiContext = context.qiContext as QiContext
7283

73-
// Start animation asynchronously
84+
// Start animation asynchronously (non-blocking)
7485
AnimationBuilder.with(qiContext)
7586
.withResources(resId)
7687
.buildAsync()
@@ -82,12 +93,19 @@ class PlayAnimationTool : Tool {
8293
.run()
8394
}
8495
.thenConsume { future ->
85-
if (future.hasError()) {
86-
Log.e(TAG, "Animation failed", future.error)
96+
if (future.isSuccess) {
97+
Log.i(TAG, "Animation '$name' completed successfully")
98+
} else if (future.hasError()) {
99+
Log.e(TAG, "Animation '$name' failed", future.error)
87100
}
88101
}
89102
} catch (e: Exception) {
90-
Log.e(TAG, "Error starting animation", e)
103+
Log.e(TAG, "Error starting animation '$name'", e)
104+
return JSONObject()
105+
.put("status", "failed")
106+
.put("name", name)
107+
.put("error", e.message ?: "Unknown error")
108+
.toString()
91109
}
92110

93111
return JSONObject()

app/src/standalone/java/ch/fhnw/pepper_realtime/tools/entertainment/PlayAnimationTool.kt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ class PlayAnimationTool : Tool {
1717

1818
override fun getName(): String = "play_animation"
1919

20+
// Don't request another response if the model already announced the animation
21+
override val skipResponseIfAnnounced: Boolean = true
22+
2023
override fun getDefinition(): JSONObject {
2124
return JSONObject().apply {
2225
put("type", "function")

0 commit comments

Comments
 (0)