modelcontextprotocol
diff --git a/‎README.md‎
Lines changed: 2 additions & 2 deletions b/‎README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/qr-server/grid-cell.png‎
-12.1 KB b/‎examples/qr-server/grid-cell.png‎
-12.1 KB
diff --git a/‎examples/say-server/README.md‎
Lines changed: 7 additions & 0 deletions b/‎examples/say-server/README.md‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎examples/say-server/grid-cell.png‎
37.9 KB b/‎examples/say-server/grid-cell.png‎
37.9 KB
diff --git a/‎examples/say-server/package.json‎
Lines changed: 4 additions & 22 deletions b/‎examples/say-server/package.json‎
Lines changed: 4 additions & 22 deletions
diff --git a/‎examples/say-server/screenshot.png‎
22.4 KB b/‎examples/say-server/screenshot.png‎
22.4 KB
diff --git a/‎examples/say-server/server.py‎
Lines changed: 94 additions & 26 deletions b/‎examples/say-server/server.py‎
Lines changed: 94 additions & 26 deletions
diff --git a/‎tests/e2e/servers.spec.ts-snapshots/say-server.png‎
38.7 KB b/‎tests/e2e/servers.spec.ts-snapshots/say-server.png‎
38.7 KB
@@ -67,8 +67,8 @@ Or edit your `package.json` manually:
 | [**Scenario Modeler**](examples/scenario-modeler-server) | [**Budget Allocator**](examples/budget-allocator-server) | [**Customer Segmentation**](examples/customer-segmentation-server) |
 | [![System Monitor](examples/system-monitor-server/grid-cell.png "Real-time OS metrics")](examples/system-monitor-server) | [![Transcript](examples/transcript-server/grid-cell.png "Live speech transcription")](examples/transcript-server) | [![Video Resource](examples/video-resource-server/grid-cell.png "Binary video via MCP resources")](examples/video-resource-server) |
 | [**System Monitor**](examples/system-monitor-server) | [**Transcript**](examples/transcript-server) | [**Video Resource**](examples/video-resource-server) |
-| [![PDF Server](examples/pdf-server/grid-cell.png "Interactive PDF viewer with chunked loading")](examples/pdf-server) | [![QR Code](examples/qr-server/grid-cell.png "QR code generator")](examples/qr-server) | |
-| [**PDF Server**](examples/pdf-server) | [**QR Code (Python)**](examples/qr-server) | |
+| [![PDF Server](examples/pdf-server/grid-cell.png "Interactive PDF viewer with chunked loading")](examples/pdf-server) | [![QR Code](examples/qr-server/grid-cell.png "QR code generator")](examples/qr-server) | [![Say Demo](examples/say-server/grid-cell.png "Text-to-speech demo")](examples/say-server) |
+| [**PDF Server**](examples/pdf-server) | [**QR Code (Python)**](examples/qr-server) | [**Say Demo**](examples/say-server) |
 
 ### Starter Templates
 
 
@@ -8,6 +8,10 @@ This example showcases several MCP App capabilities:
 
 - **Single-file executable**: Python server with embedded React UI - no build step required
 - **Partial tool inputs** (`ontoolinputpartial`): Widget receives streaming text as it's being generated
+- **Model context updates**: Widget updates the LLM with playback progress ("Playing: ...snippet...")
+- **Native theming**: Uses CSS variables for automatic dark/light mode adaptation
+- **Fullscreen mode**: Toggle fullscreen via `requestDisplayMode()` API, press Escape to exit
+- **Multi-widget speak lock**: Coordinates multiple TTS widgets via localStorage so only one plays at a time
 - **Hidden tools** (`visibility: ["app"]`): Private tools only accessible to the widget, not the model
 - **CSP metadata**: Resource declares required domains (`esm.sh`) for in-browser transpilation
 
@@ -85,11 +89,14 @@ Connect to `http://localhost:3109/mcp` and call the `say` tool:
 The default voice is `cosette`. Use the `list_voices` tool or pass a `voice` parameter to `say`:
 
 ### Predefined Voices
+
 - `alba`, `marius`, `javert`, `jean` - from [alba-mackenna](https://huggingface.co/kyutai/tts-voices/tree/main/alba-mackenna) (CC BY 4.0)
 - `cosette`, `eponine`, `azelma`, `fantine` - from [VCTK dataset](https://huggingface.co/kyutai/tts-voices/tree/main/vctk) (CC BY 4.0)
 
 ### Custom Voices
+
 You can also use HuggingFace URLs or local file paths:
+
 ```json
 {"text": "Hello!", "voice": "hf://kyutai/tts-voices/voice-donations/alice.wav"}
 {"text": "Hello!", "voice": "/path/to/my-voice.wav"}
 
@@ -1,7 +1,7 @@
 {
   "name": "@modelcontextprotocol/server-say",
   "version": "0.4.1",
-  "type": "module",
+  "private": true,
   "description": "Streaming TTS MCP App Server with karaoke-style text highlighting",
   "repository": {
     "type": "git",
@@ -10,26 +10,8 @@
   },
   "license": "MIT",
   "scripts": {
-    "build": "tsc --noEmit && cross-env INPUT=mcp-app.html vite build",
-    "watch": "cross-env INPUT=mcp-app.html vite build --watch",
-    "serve": "uv run server.py",
-    "start": "cross-env NODE_ENV=development npm run build && npm run serve",
-    "dev": "cross-env NODE_ENV=development concurrently 'npm run watch' 'npm run serve'"
-  },
-  "dependencies": {
-    "@modelcontextprotocol/ext-apps": "^0.4.1",
-    "react": "^19.2.0",
-    "react-dom": "^19.2.0"
-  },
-  "devDependencies": {
-    "@types/node": "^22.0.0",
-    "@types/react": "^19.2.2",
-    "@types/react-dom": "^19.2.2",
-    "@vitejs/plugin-react": "^4.3.4",
-    "concurrently": "^9.2.1",
-    "cross-env": "^10.1.0",
-    "typescript": "^5.9.3",
-    "vite": "^6.0.0",
-    "vite-plugin-singlefile": "^2.3.0"
+    "start": "uv run server.py",
+    "dev": "uv run server.py",
+    "build": "echo 'No build step needed for Python server'"
   }
 }
@@ -19,6 +19,10 @@
 - The widget uses `ontoolinputpartial` to receive text as it streams
 - Widget calls private tools to create TTS queue, add text, and poll audio
 - Audio plays in the widget using Web Audio API
+- Model context updates show playback progress to the LLM
+- Native theming adapts to dark/light mode automatically
+- Fullscreen mode with Escape key to exit
+- Multi-widget speak lock coordinates playback across instances
 
 Usage:
   # Start the MCP server
@@ -101,13 +105,17 @@ class TTSQueueState:
 
     # Tracking
     created_at: float = field(default_factory=time.time)
+    last_activity: float = field(default_factory=time.time)  # Last text or end signal
     lock: asyncio.Lock = field(default_factory=asyncio.Lock)
     task: asyncio.Task | None = None
 
 
 # Active TTS queues
 tts_queues: dict[str, TTSQueueState] = {}
 
+# Queue timeout: if no activity for this long, mark as error
+QUEUE_TIMEOUT_SECONDS = 30
+
 
 # ------------------------------------------------------
 # Public Tool: say
@@ -254,16 +262,10 @@ def add_tts_text(queue_id: str, text: str) -> list[types.TextContent]:
     # Queue the text (non-blocking)
     try:
         state.text_queue.put_nowait(text)
+        state.last_activity = time.time()  # Update activity timestamp
     except asyncio.QueueFull:
         return [types.TextContent(type="text", text='{"error": "Queue full"}')]
 
-    # BACKPRESSURE: Return queue depth so widget can throttle:
-    # import json
-    # return [types.TextContent(type="text", text=json.dumps({
-    #     "queued": True,
-    #     "queue_depth": state.text_queue.qsize()
-    # }))]
-
     return [types.TextContent(type="text", text='{"queued": true}')]
 
 
@@ -276,16 +278,20 @@ def end_tts_queue(queue_id: str) -> list[types.TextContent]:
     """
     state = tts_queues.get(queue_id)
     if not state:
+        logger.warning(f"end_tts_queue called for unknown queue: {queue_id}")
         return [types.TextContent(type="text", text='{"error": "Queue not found"}')]
     if state.end_signaled:
+        logger.info(f"end_tts_queue called for already-ended queue: {queue_id}")
         return [types.TextContent(type="text", text='{"already_ended": true}')]
 
     state.end_signaled = True
+    state.last_activity = time.time()  # Update activity timestamp
     try:
         state.text_queue.put_nowait(None)  # EOF marker
     except asyncio.QueueFull:
         pass
 
+    logger.info(f"end_tts_queue called for queue: {queue_id}")
     return [types.TextContent(type="text", text='{"ended": true}')]
 
 
@@ -328,18 +334,23 @@ def poll_tts_audio(queue_id: str) -> list[types.TextContent]:
         queue_id: The queue ID from create_tts_queue
     """
     import json
+    import time
 
     state = tts_queues.get(queue_id)
     if not state:
         return [types.TextContent(type="text", text='{"error": "Queue not found"}')]
 
+    # Update last activity to prevent timeout during active polling
+    state.last_activity = time.time()
+
     # Get new chunks (use sync approach since we can't await in tool)
     # The lock is async, so we need to be careful here
     # For simplicity, just grab what's available without locking
     new_chunks = state.audio_chunks[state.chunks_delivered:]
     state.chunks_delivered = len(state.audio_chunks)
 
-    done = state.status == "complete" and state.chunks_delivered >= len(state.audio_chunks)
+    # Consider queues with errors as "done" so widget stops polling
+    done = (state.status == "complete" or state.status == "error") and state.chunks_delivered >= len(state.audio_chunks)
 
     response = {
         "chunks": [
@@ -356,7 +367,11 @@ def poll_tts_audio(queue_id: str) -> list[types.TextContent]:
         "status": state.status,
     }
 
-    # Clean up completed queues
+    # Include error message if present
+    if state.error_message:
+        response["error"] = state.error_message
+
+    # Clean up completed or errored queues
     if done:
         # Schedule cleanup after a delay
         async def cleanup():
@@ -555,7 +570,21 @@ async def _run_tts_queue(state: TTSQueueState):
 
     try:
         while True:
-            text_item = await state.text_queue.get()
+            # Wait for text with timeout to detect stale queues
+            try:
+                text_item = await asyncio.wait_for(
+                    state.text_queue.get(),
+                    timeout=5.0  # Check every 5 seconds
+                )
+            except asyncio.TimeoutError:
+                # Check if queue is stale (no activity for too long)
+                if time.time() - state.last_activity > QUEUE_TIMEOUT_SECONDS:
+                    logger.warning(f"TTS queue {state.id} timeout after {QUEUE_TIMEOUT_SECONDS}s of inactivity")
+                    state.status = "error"
+                    state.error_message = f"Queue timeout: no activity for {QUEUE_TIMEOUT_SECONDS}s"
+                    break
+                # Continue waiting - queue might still be active
+                continue
 
             if text_item is None:
                 # EOF - flush remaining text
@@ -904,15 +933,33 @@ def generate_sync():
         const app = appRef.current;
         if (isPollingRef.current || !app) return;
         isPollingRef.current = true;
+
+        let emptyPollCount = 0;
         while (queueIdRef.current) {
           try {
             const result = await app.callServerTool({ name: "poll_tts_audio", arguments: { queue_id: queueIdRef.current } });
             const data = JSON.parse(result.content[0].text);
-            if (data.error) break;
+            if (data.error) {
+              console.log('[TTS] Queue error:', data.error);
+              break;
+            }
             for (const chunk of data.chunks) await scheduleAudioChunk(chunk);
             if (data.done) { allAudioReceivedRef.current = true; break; }
-            await new Promise(r => setTimeout(r, data.chunks.length > 0 ? 30 : 80));
-          } catch (err) { break; }
+
+            // Adaptive backoff: faster when streaming, slower when waiting
+            if (data.chunks.length > 0) {
+              emptyPollCount = 0;  // Reset - we're getting chunks
+              await new Promise(r => setTimeout(r, 20));  // Fast poll during streaming
+            } else {
+              emptyPollCount++;
+              // Exponential backoff for empty polls: 50ms, 100ms, 150ms max
+              const delay = Math.min(50 + (emptyPollCount * 50), 150);
+              await new Promise(r => setTimeout(r, delay));
+            }
+          } catch (err) {
+            console.log('[TTS] Polling error:', err);
+            break;
+          }
         }
         isPollingRef.current = false;
       }, [scheduleAudioChunk]);
@@ -1083,23 +1130,26 @@ def generate_sync():
         onAppCreated: (app) => {
           appRef.current = app;
           app.ontoolinputpartial = async (params) => {
-            console.log('[TTS] ontoolinputpartial called');
+            console.log('[TTS] ontoolinputpartial called, queueId:', queueIdRef.current);
             const newText = params.arguments?.text;
             if (!newText) return;
             // Detect new session: text doesn't continue from where we left off
             const isNewSession = lastTextRef.current.length > 0 && !newText.startsWith(lastTextRef.current);
-            if (isNewSession) console.log('[TTS] new session detected in partial');
             if (isNewSession) {
+              console.log('[TTS] new session detected in partial - resetting queue');
               // Reset for new session
               queueIdRef.current = null;
               lastTextRef.current = "";
             }
             setDisplayText(newText);
-            if (!queueIdRef.current && !(await initTTSQueue())) return;
+            if (!queueIdRef.current && !(await initTTSQueue())) {
+              console.log('[TTS] initTTSQueue failed in partial');
+              return;
+            }
             await sendTextToTTS(newText);
           };
           app.ontoolinput = async (params) => {
-            console.log('[TTS] ontoolinput called');
+            console.log('[TTS] ontoolinput called, queueId:', queueIdRef.current);
             const text = params.arguments?.text;
             if (!text) return;
             // Read voice setting (defaults to cosette)
@@ -1110,16 +1160,20 @@ def generate_sync():
             setAutoPlay(shouldAutoPlay);
             // Detect new session: text doesn't continue from where we left off
             const isNewSession = lastTextRef.current.length > 0 && !text.startsWith(lastTextRef.current);
-            if (isNewSession) console.log('[TTS] new session detected in input');
             if (isNewSession) {
+              console.log('[TTS] new session detected in input - resetting queue');
               queueIdRef.current = null;
               lastTextRef.current = "";
             }
             setDisplayText(text);
-            if (!queueIdRef.current && !(await initTTSQueue())) return;
+            if (!queueIdRef.current && !(await initTTSQueue())) {
+              console.log('[TTS] initTTSQueue failed in input');
+              return;
+            }
             await sendTextToTTS(text);
           };
           app.ontoolresult = async (params) => {
+            console.log('[TTS] ontoolresult called, queueId:', queueIdRef.current);
             fullTextRef.current = lastTextRef.current;
             // Read widget UUID from tool result _meta for speak lock coordination
             const resultUuid = params.content?.[0]?._meta?.widgetUUID;
@@ -1128,8 +1182,13 @@ def generate_sync():
               console.log('[TTS] Widget UUID:', resultUuid);
             }
             if (queueIdRef.current) {
+              console.log('[TTS] Calling end_tts_queue for:', queueIdRef.current);
               try { await app.callServerTool({ name: "end_tts_queue", arguments: { queue_id: queueIdRef.current } }); }
-              catch (err) {}
+              catch (err) {
+                console.log('[TTS] end_tts_queue error:', err);
+              }
+            } else {
+              console.log('[TTS] No queueId to end in ontoolresult');
             }
             // DON'T reset here - let audio continue playing
             // New session detection happens in ontoolinputpartial via text comparison
@@ -1220,12 +1279,21 @@ def generate_sync():
       const pendingText = displayText.slice(charPosition);
 
       return (
-        <main className={`container` + (displayMode === "fullscreen" ? ` fullscreen` : ``)} style={{
-          paddingTop: hostContext?.safeAreaInsets?.top,
-          paddingRight: hostContext?.safeAreaInsets?.right,
-          paddingBottom: hostContext?.safeAreaInsets?.bottom,
-          paddingLeft: hostContext?.safeAreaInsets?.left,
-        }}>
+        <main
+          className={`container` + (displayMode === "fullscreen" ? ` fullscreen` : ``)}
+          style={{
+            paddingTop: hostContext?.safeAreaInsets?.top,
+            paddingRight: hostContext?.safeAreaInsets?.right,
+            paddingBottom: hostContext?.safeAreaInsets?.bottom,
+            paddingLeft: hostContext?.safeAreaInsets?.left,
+          }}
+          tabIndex={0}
+          onKeyDown={(e) => {
+            if (e.key === "Escape" && displayMode === "fullscreen") {
+              toggleFullscreen();
+            }
+          }}
+        >
           <div className="textWrapper">
             <div className="textDisplay" onClick={togglePlayPause} style={{cursor: "pointer"}}>
               <span className="spoken">{spokenText}</span>