1919- The widget uses `ontoolinputpartial` to receive text as it streams
2020- Widget calls private tools to create TTS queue, add text, and poll audio
2121- Audio plays in the widget using Web Audio API
22+ - Model context updates show playback progress to the LLM
23+ - Native theming adapts to dark/light mode automatically
24+ - Fullscreen mode with Escape key to exit
25+ - Multi-widget speak lock coordinates playback across instances
2226
2327Usage:
2428 # Start the MCP server
@@ -101,13 +105,17 @@ class TTSQueueState:
101105
102106 # Tracking
103107 created_at : float = field (default_factory = time .time )
108+ last_activity : float = field (default_factory = time .time ) # Last text or end signal
104109 lock : asyncio .Lock = field (default_factory = asyncio .Lock )
105110 task : asyncio .Task | None = None
106111
107112
108113# Active TTS queues
109114tts_queues : dict [str , TTSQueueState ] = {}
110115
116+ # Queue timeout: if no activity for this long, mark as error
117+ QUEUE_TIMEOUT_SECONDS = 30
118+
111119
112120# ------------------------------------------------------
113121# Public Tool: say
@@ -254,16 +262,10 @@ def add_tts_text(queue_id: str, text: str) -> list[types.TextContent]:
254262 # Queue the text (non-blocking)
255263 try :
256264 state .text_queue .put_nowait (text )
265+ state .last_activity = time .time () # Update activity timestamp
257266 except asyncio .QueueFull :
258267 return [types .TextContent (type = "text" , text = '{"error": "Queue full"}' )]
259268
260- # BACKPRESSURE: Return queue depth so widget can throttle:
261- # import json
262- # return [types.TextContent(type="text", text=json.dumps({
263- # "queued": True,
264- # "queue_depth": state.text_queue.qsize()
265- # }))]
266-
267269 return [types .TextContent (type = "text" , text = '{"queued": true}' )]
268270
269271
@@ -276,16 +278,20 @@ def end_tts_queue(queue_id: str) -> list[types.TextContent]:
276278 """
277279 state = tts_queues .get (queue_id )
278280 if not state :
281+ logger .warning (f"end_tts_queue called for unknown queue: { queue_id } " )
279282 return [types .TextContent (type = "text" , text = '{"error": "Queue not found"}' )]
280283 if state .end_signaled :
284+ logger .info (f"end_tts_queue called for already-ended queue: { queue_id } " )
281285 return [types .TextContent (type = "text" , text = '{"already_ended": true}' )]
282286
283287 state .end_signaled = True
288+ state .last_activity = time .time () # Update activity timestamp
284289 try :
285290 state .text_queue .put_nowait (None ) # EOF marker
286291 except asyncio .QueueFull :
287292 pass
288293
294+ logger .info (f"end_tts_queue called for queue: { queue_id } " )
289295 return [types .TextContent (type = "text" , text = '{"ended": true}' )]
290296
291297
@@ -328,18 +334,23 @@ def poll_tts_audio(queue_id: str) -> list[types.TextContent]:
328334 queue_id: The queue ID from create_tts_queue
329335 """
330336 import json
337+ import time
331338
332339 state = tts_queues .get (queue_id )
333340 if not state :
334341 return [types .TextContent (type = "text" , text = '{"error": "Queue not found"}' )]
335342
343+ # Update last activity to prevent timeout during active polling
344+ state .last_activity = time .time ()
345+
336346 # Get new chunks (use sync approach since we can't await in tool)
337347 # The lock is async, so we need to be careful here
338348 # For simplicity, just grab what's available without locking
339349 new_chunks = state .audio_chunks [state .chunks_delivered :]
340350 state .chunks_delivered = len (state .audio_chunks )
341351
342- done = state .status == "complete" and state .chunks_delivered >= len (state .audio_chunks )
352+ # Consider queues with errors as "done" so widget stops polling
353+ done = (state .status == "complete" or state .status == "error" ) and state .chunks_delivered >= len (state .audio_chunks )
343354
344355 response = {
345356 "chunks" : [
@@ -356,7 +367,11 @@ def poll_tts_audio(queue_id: str) -> list[types.TextContent]:
356367 "status" : state .status ,
357368 }
358369
359- # Clean up completed queues
370+ # Include error message if present
371+ if state .error_message :
372+ response ["error" ] = state .error_message
373+
374+ # Clean up completed or errored queues
360375 if done :
361376 # Schedule cleanup after a delay
362377 async def cleanup ():
@@ -555,7 +570,21 @@ async def _run_tts_queue(state: TTSQueueState):
555570
556571 try :
557572 while True :
558- text_item = await state .text_queue .get ()
573+ # Wait for text with timeout to detect stale queues
574+ try :
575+ text_item = await asyncio .wait_for (
576+ state .text_queue .get (),
577+ timeout = 5.0 # Check every 5 seconds
578+ )
579+ except asyncio .TimeoutError :
580+ # Check if queue is stale (no activity for too long)
581+ if time .time () - state .last_activity > QUEUE_TIMEOUT_SECONDS :
582+ logger .warning (f"TTS queue { state .id } timeout after { QUEUE_TIMEOUT_SECONDS } s of inactivity" )
583+ state .status = "error"
584+ state .error_message = f"Queue timeout: no activity for { QUEUE_TIMEOUT_SECONDS } s"
585+ break
586+ # Continue waiting - queue might still be active
587+ continue
559588
560589 if text_item is None :
561590 # EOF - flush remaining text
@@ -904,15 +933,33 @@ def generate_sync():
904933 const app = appRef.current;
905934 if (isPollingRef.current || !app) return;
906935 isPollingRef.current = true;
936+
937+ let emptyPollCount = 0;
907938 while (queueIdRef.current) {
908939 try {
909940 const result = await app.callServerTool({ name: "poll_tts_audio", arguments: { queue_id: queueIdRef.current } });
910941 const data = JSON.parse(result.content[0].text);
911- if (data.error) break;
942+ if (data.error) {
943+ console.log('[TTS] Queue error:', data.error);
944+ break;
945+ }
912946 for (const chunk of data.chunks) await scheduleAudioChunk(chunk);
913947 if (data.done) { allAudioReceivedRef.current = true; break; }
914- await new Promise(r => setTimeout(r, data.chunks.length > 0 ? 30 : 80));
915- } catch (err) { break; }
948+
949+ // Adaptive backoff: faster when streaming, slower when waiting
950+ if (data.chunks.length > 0) {
951+ emptyPollCount = 0; // Reset - we're getting chunks
952+ await new Promise(r => setTimeout(r, 20)); // Fast poll during streaming
953+ } else {
954+ emptyPollCount++;
955+ // Exponential backoff for empty polls: 50ms, 100ms, 150ms max
956+ const delay = Math.min(50 + (emptyPollCount * 50), 150);
957+ await new Promise(r => setTimeout(r, delay));
958+ }
959+ } catch (err) {
960+ console.log('[TTS] Polling error:', err);
961+ break;
962+ }
916963 }
917964 isPollingRef.current = false;
918965 }, [scheduleAudioChunk]);
@@ -1083,23 +1130,26 @@ def generate_sync():
10831130 onAppCreated: (app) => {
10841131 appRef.current = app;
10851132 app.ontoolinputpartial = async (params) => {
1086- console.log('[TTS] ontoolinputpartial called' );
1133+ console.log('[TTS] ontoolinputpartial called, queueId:', queueIdRef.current );
10871134 const newText = params.arguments?.text;
10881135 if (!newText) return;
10891136 // Detect new session: text doesn't continue from where we left off
10901137 const isNewSession = lastTextRef.current.length > 0 && !newText.startsWith(lastTextRef.current);
1091- if (isNewSession) console.log('[TTS] new session detected in partial');
10921138 if (isNewSession) {
1139+ console.log('[TTS] new session detected in partial - resetting queue');
10931140 // Reset for new session
10941141 queueIdRef.current = null;
10951142 lastTextRef.current = "";
10961143 }
10971144 setDisplayText(newText);
1098- if (!queueIdRef.current && !(await initTTSQueue())) return;
1145+ if (!queueIdRef.current && !(await initTTSQueue())) {
1146+ console.log('[TTS] initTTSQueue failed in partial');
1147+ return;
1148+ }
10991149 await sendTextToTTS(newText);
11001150 };
11011151 app.ontoolinput = async (params) => {
1102- console.log('[TTS] ontoolinput called' );
1152+ console.log('[TTS] ontoolinput called, queueId:', queueIdRef.current );
11031153 const text = params.arguments?.text;
11041154 if (!text) return;
11051155 // Read voice setting (defaults to cosette)
@@ -1110,16 +1160,20 @@ def generate_sync():
11101160 setAutoPlay(shouldAutoPlay);
11111161 // Detect new session: text doesn't continue from where we left off
11121162 const isNewSession = lastTextRef.current.length > 0 && !text.startsWith(lastTextRef.current);
1113- if (isNewSession) console.log('[TTS] new session detected in input');
11141163 if (isNewSession) {
1164+ console.log('[TTS] new session detected in input - resetting queue');
11151165 queueIdRef.current = null;
11161166 lastTextRef.current = "";
11171167 }
11181168 setDisplayText(text);
1119- if (!queueIdRef.current && !(await initTTSQueue())) return;
1169+ if (!queueIdRef.current && !(await initTTSQueue())) {
1170+ console.log('[TTS] initTTSQueue failed in input');
1171+ return;
1172+ }
11201173 await sendTextToTTS(text);
11211174 };
11221175 app.ontoolresult = async (params) => {
1176+ console.log('[TTS] ontoolresult called, queueId:', queueIdRef.current);
11231177 fullTextRef.current = lastTextRef.current;
11241178 // Read widget UUID from tool result _meta for speak lock coordination
11251179 const resultUuid = params.content?.[0]?._meta?.widgetUUID;
@@ -1128,8 +1182,13 @@ def generate_sync():
11281182 console.log('[TTS] Widget UUID:', resultUuid);
11291183 }
11301184 if (queueIdRef.current) {
1185+ console.log('[TTS] Calling end_tts_queue for:', queueIdRef.current);
11311186 try { await app.callServerTool({ name: "end_tts_queue", arguments: { queue_id: queueIdRef.current } }); }
1132- catch (err) {}
1187+ catch (err) {
1188+ console.log('[TTS] end_tts_queue error:', err);
1189+ }
1190+ } else {
1191+ console.log('[TTS] No queueId to end in ontoolresult');
11331192 }
11341193 // DON'T reset here - let audio continue playing
11351194 // New session detection happens in ontoolinputpartial via text comparison
@@ -1220,12 +1279,21 @@ def generate_sync():
12201279 const pendingText = displayText.slice(charPosition);
12211280
12221281 return (
1223- <main className={`container` + (displayMode === "fullscreen" ? ` fullscreen` : ``)} style={{
1224- paddingTop: hostContext?.safeAreaInsets?.top,
1225- paddingRight: hostContext?.safeAreaInsets?.right,
1226- paddingBottom: hostContext?.safeAreaInsets?.bottom,
1227- paddingLeft: hostContext?.safeAreaInsets?.left,
1228- }}>
1282+ <main
1283+ className={`container` + (displayMode === "fullscreen" ? ` fullscreen` : ``)}
1284+ style={{
1285+ paddingTop: hostContext?.safeAreaInsets?.top,
1286+ paddingRight: hostContext?.safeAreaInsets?.right,
1287+ paddingBottom: hostContext?.safeAreaInsets?.bottom,
1288+ paddingLeft: hostContext?.safeAreaInsets?.left,
1289+ }}
1290+ tabIndex={0}
1291+ onKeyDown={(e) => {
1292+ if (e.key === "Escape" && displayMode === "fullscreen") {
1293+ toggleFullscreen();
1294+ }
1295+ }}
1296+ >
12291297 <div className="textWrapper">
12301298 <div className="textDisplay" onClick={togglePlayPause} style={{cursor: "pointer"}}>
12311299 <span className="spoken">{spokenText}</span>
0 commit comments