coder · ammario · May 2, 2026 · May 2, 2026 · May 2, 2026 · May 2, 2026
diff --git a/src/browser/components/PinnedTodoList/PinnedTodoList.test.tsx b/src/browser/components/PinnedTodoList/PinnedTodoList.test.tsx
@@ -52,8 +52,6 @@ function buildWorkspaceState(workspaceId: string, state: MockWorkspaceState): Wo
     pendingStreamModel: null,
     runtimeStatus: null,
     autoRetryStatus: null,
-    streamingTokenCount: undefined,
-    streamingTPS: undefined,
   };
 }
 

diff --git a/src/browser/features/Messages/AssistantMessage.tsx b/src/browser/features/Messages/AssistantMessage.tsx
@@ -184,10 +184,11 @@ export const AssistantMessage: React.FC<AssistantMessageProps> = ({
     // renders parseIncompleteMarkdown=false, matching the prior static render exactly.
     const contentElement = (
       <TypewriterMarkdown
-        deltas={[content]}
+        content={content}
         isComplete={!isStreaming}
         streamKey={message.historyId}
         streamSource={message.streamPresentation?.source}
+        workspaceId={workspaceId}
       />
     );
 

diff --git a/src/browser/features/Messages/ChatBarrier/StreamingBarrier.test.tsx b/src/browser/features/Messages/ChatBarrier/StreamingBarrier.test.tsx
@@ -13,8 +13,6 @@ interface MockWorkspaceState {
   pendingStreamStartTime: number | null;
   pendingStreamModel: string | null;
   runtimeStatus: { phase: string; detail?: string } | null;
-  streamingTokenCount: number | undefined;
-  streamingTPS: number | undefined;
 }
 
 function createWorkspaceState(overrides: Partial<MockWorkspaceState> = {}): MockWorkspaceState {
@@ -27,8 +25,6 @@ function createWorkspaceState(overrides: Partial<MockWorkspaceState> = {}): Mock
     pendingStreamStartTime: null,
     pendingStreamModel: null,
     runtimeStatus: null,
-    streamingTokenCount: undefined,
-    streamingTPS: undefined,
     ...overrides,
   };
 
@@ -39,10 +35,17 @@ function createWorkspaceState(overrides: Partial<MockWorkspaceState> = {}): Mock
   return state;
 }
 
+interface MockStreamingStats {
+  tokenCount: number;
+  tps: number;
+  charsPerSec: number;
+}
+
 const STATUS_DISPLAY_DELAY_MS = 1000;
 const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
 
 let currentWorkspaceState = createWorkspaceState();
+let currentStreamingStats: MockStreamingStats | null = null;
 let hasInterruptingStream = false;
 const setInterrupting = mock((_workspaceId: string) => undefined);
 const interruptStream = mock((_input: unknown) =>
@@ -70,6 +73,7 @@ void mock.module("@/browser/stores/WorkspaceStore", () => ({
   useWorkspaceStoreRaw: () => ({
     setInterrupting,
   }),
+  useWorkspaceStreamingStats: () => currentStreamingStats,
 }));
 
 void mock.module("@/browser/contexts/API", () => ({
@@ -119,6 +123,7 @@ describe("StreamingBarrier", () => {
     globalThis.document = globalThis.window.document;
 
     currentWorkspaceState = createWorkspaceState();
+    currentStreamingStats = null;
     hasInterruptingStream = false;
     setInterrupting.mockClear();
     interruptStream.mockClear();
@@ -289,9 +294,8 @@ describe("StreamingBarrier", () => {
     currentWorkspaceState = createWorkspaceState({
       canInterrupt: true,
       currentModel: "anthropic:claude-opus-4-6",
-      streamingTokenCount: 42,
-      streamingTPS: 18,
     });
+    currentStreamingStats = { tokenCount: 42, tps: 18, charsPerSec: 72 };
     view.rerender(<StreamingBarrier workspaceId="ws-1" />);
 
     expect(view.getByText("claude-opus-4-6 streaming...")).toBeTruthy();

diff --git a/src/browser/features/Messages/ChatBarrier/StreamingBarrier.tsx b/src/browser/features/Messages/ChatBarrier/StreamingBarrier.tsx
@@ -9,6 +9,7 @@ import {
   useWorkspaceState,
   useWorkspaceAggregator,
   useWorkspaceStoreRaw,
+  useWorkspaceStreamingStats,
 } from "@/browser/stores/WorkspaceStore";
 import { getDefaultModel } from "@/browser/hooks/useModelsFromSettings";
 import { useSettings } from "@/browser/contexts/SettingsContext";
@@ -146,6 +147,9 @@ export const StreamingBarrier: React.FC<StreamingBarrierProps> = ({
   const workspaceState = useWorkspaceState(workspaceId);
   const aggregator = useWorkspaceAggregator(workspaceId);
   const storeRaw = useWorkspaceStoreRaw();
+  // Subscribe directly to live streaming stats (token count + TPS) so per-delta
+  // updates re-render this leaf only, not the entire ChatPane subtree.
+  const streamingStats = useWorkspaceStreamingStats(workspaceId);
   const { api } = useAPI();
   const { open: openSettings } = useSettings();
 
@@ -172,9 +176,10 @@ export const StreamingBarrier: React.FC<StreamingBarrierProps> = ({
   // Only show token count during active streaming/compacting
   const showTokenCount = phase === "streaming" || phase === "compacting";
 
-  // Get live streaming stats from workspace state (updated on each stream-delta)
-  const tokenCount = showTokenCount ? workspaceState.streamingTokenCount : undefined;
-  const tps = showTokenCount ? workspaceState.streamingTPS : undefined;
+  // Live streaming stats come from a leaf subscription so per-delta updates
+  // don't cascade through the full chat subtree.
+  const tokenCount = showTokenCount ? streamingStats?.tokenCount : undefined;
+  const tps = showTokenCount ? streamingStats?.tps : undefined;
 
   // Model to display:
   // - "starting" phase: prefer pendingStreamModel (from muxMetadata), then localStorage

diff --git a/src/browser/features/Messages/MessageRenderer.tsx b/src/browser/features/Messages/MessageRenderer.tsx
@@ -121,7 +121,9 @@ export const MessageRenderer = React.memo<MessageRendererProps>(
         );
         break;
       case "reasoning":
-        renderedMessage = <ReasoningMessage message={message} className={className} />;
+        renderedMessage = (
+          <ReasoningMessage message={message} className={className} workspaceId={workspaceId} />
+        );
         break;
       case "stream-error":
         renderedMessage = <StreamErrorMessage message={message} className={className} />;

diff --git a/src/browser/features/Messages/ReasoningMessage.tsx b/src/browser/features/Messages/ReasoningMessage.tsx
@@ -9,6 +9,12 @@ import { Lightbulb } from "lucide-react";
 interface ReasoningMessageProps {
   message: DisplayedMessage & { type: "reasoning" };
   className?: string;
+  /**
+   * Workspace this reasoning belongs to. Forwarded to TypewriterMarkdown so the
+   * smoothing engine can target the model's live emission rate. Optional —
+   * tests render this component without a workspace context.
+   */
+  workspaceId?: string;
 }
 
 const REASONING_FONT_CLASSES = "font-primary text-[12px] leading-[18px]";
@@ -40,7 +46,11 @@ function parseLeadingBoldSummary(
   };
 }
 
-export const ReasoningMessage: React.FC<ReasoningMessageProps> = ({ message, className }) => {
+export const ReasoningMessage: React.FC<ReasoningMessageProps> = ({
+  message,
+  className,
+  workspaceId,
+}) => {
   const [isExpanded, setIsExpanded] = useState(message.isStreaming);
   // Track the height when expanded to reserve space during collapse transitions
   const [expandedHeight, setExpandedHeight] = useState<number | null>(null);
@@ -119,13 +129,18 @@ export const ReasoningMessage: React.FC<ReasoningMessageProps> = ({ message, cla
     // stream end would unmount/remount the markdown subtree and visibly flash the
     // content. isComplete={!isStreaming} cleanly bypasses the smoothing engine once
     // the stream ends, matching the prior static-render behavior.
+    // React Compiler auto-memoizes this normalize call between renders that
+    // share the same `content` value; no manual useMemo needed.
+    const normalizedContent = normalizeReasoningMarkdown(content);
+
     return (
       <TypewriterMarkdown
-        deltas={[normalizeReasoningMarkdown(content)]}
+        content={normalizedContent}
         isComplete={!isStreaming}
         preserveLineBreaks
         streamKey={message.historyId}
         streamSource={message.streamPresentation?.source}
+        workspaceId={workspaceId}
       />
     );
   };

diff --git a/src/browser/features/Messages/TypewriterMarkdown.test.tsx b/src/browser/features/Messages/TypewriterMarkdown.test.tsx
@@ -77,7 +77,7 @@ describe("TypewriterMarkdown", () => {
 
     const view = render(
       <TypewriterMarkdown
-        deltas={["Hello world"]}
+        content="Hello world"
         isComplete={false}
         streamKey="msg-1"
         streamSource="live"
@@ -90,13 +90,14 @@ describe("TypewriterMarkdown", () => {
       isStreaming: true,
       bypassSmoothing: false,
       streamKey: "msg-1",
+      liveCharsPerSec: 0,
     });
   });
 
   test("bypasses smoothing for replay streams", () => {
     render(
       <TypewriterMarkdown
-        deltas={["Replayed content"]}
+        content="Replayed content"
         isComplete={false}
         streamKey="msg-2"
         streamSource="replay"

diff --git a/src/browser/features/Messages/TypewriterMarkdown.tsx b/src/browser/features/Messages/TypewriterMarkdown.tsx
@@ -1,11 +1,13 @@
-import React, { useMemo } from "react";
+import React from "react";
 import { useSmoothStreamingText } from "@/browser/hooks/useSmoothStreamingText";
+import { useWorkspaceStreamingStats } from "@/browser/stores/WorkspaceStore";
 import { cn } from "@/common/lib/utils";
 import { MarkdownCore } from "./MarkdownCore";
 import { StreamingContext } from "./StreamingContext";
 
 interface TypewriterMarkdownProps {
-  deltas: string[];
+  /** Full text to render. During streaming this grows monotonically. */
+  content: string;
   isComplete: boolean;
   className?: string;
   /**
@@ -18,31 +20,50 @@ interface TypewriterMarkdownProps {
   streamKey?: string;
   /** Whether this stream originated from live tokens or replay. Defaults to "live". */
   streamSource?: "live" | "replay";
+  /**
+   * Workspace this content belongs to. When provided, the smoothing engine is
+   * fed the live model emission rate so the visible cursor tracks the model's
+   * actual output rather than the constant BASE rate. Optional because some
+   * surfaces (storybook, preview popovers) render markdown without a workspace.
+   */
+  workspaceId?: string;
 }
 
-// Use React.memo to prevent unnecessary re-renders from parent
-export const TypewriterMarkdown = React.memo<TypewriterMarkdownProps>(function TypewriterMarkdown({
-  deltas,
+// React Compiler memoizes this component automatically based on prop changes;
+// no manual React.memo wrapper. The previous deltas: string[] shape forced a new
+// array literal on every parent render and defeated the memo anyway.
+export const TypewriterMarkdown: React.FC<TypewriterMarkdownProps> = ({
+  content,
   isComplete,
   className,
   preserveLineBreaks,
   streamKey,
   streamSource = "live",
-}) {
-  const fullContent = deltas.join("");
-  const isStreaming = !isComplete && fullContent.length > 0;
+  workspaceId,
+}) => {
+  const isStreaming = !isComplete && content.length > 0;
+
+  // Read the live model emission rate (chars/sec) for the active stream of this
+  // workspace. The hook subscribes to its own MapStore so per-delta updates
+  // re-render this component WITHOUT cascading through the parent — see
+  // useWorkspaceStreamingStats. Hooks must run unconditionally; pass an empty
+  // string when no workspace is provided so the subscription is a stable no-op.
+  const streamingStats = useWorkspaceStreamingStats(workspaceId ?? "");
+  const liveCharsPerSec = isStreaming && workspaceId ? (streamingStats?.charsPerSec ?? 0) : 0;
 
-  // Two-clock streaming: ingestion (fullContent) vs presentation (visibleText).
+  // Two-clock streaming: ingestion (content) vs presentation (visibleText).
   // The jitter buffer reveals text at a steady cadence instead of bursty token clumps.
   // Replay and completed streams bypass smoothing entirely.
   const { visibleText } = useSmoothStreamingText({
-    fullText: fullContent,
+    fullText: content,
     isStreaming,
     bypassSmoothing: streamSource === "replay",
     streamKey: streamKey ?? "",
+    liveCharsPerSec,
   });
 
-  const streamingContextValue = useMemo(() => ({ isStreaming }), [isStreaming]);
+  // React Compiler memoizes this object; no manual useMemo needed.
+  const streamingContextValue = { isStreaming };
 
   return (
     <StreamingContext.Provider value={streamingContextValue}>
@@ -55,4 +76,4 @@ export const TypewriterMarkdown = React.memo<TypewriterMarkdownProps>(function T
       </div>
     </StreamingContext.Provider>
   );
-});
+};
diff --git a/src/browser/hooks/useSmoothStreamingText.ts b/src/browser/hooks/useSmoothStreamingText.ts
@@ -7,6 +7,13 @@ export interface UseSmoothStreamingTextOptions {
   bypassSmoothing: boolean;
   /** Changing this resets the engine (new stream). */
   streamKey: string;
+  /**
+   * Optional hint at the source's current emission rate (chars/sec). When
+   * supplied, the smoothing engine targets this rate instead of the BASE
+   * fallback so the visible cursor tracks the model's actual output. Pass 0
+   * (or omit) when unknown.
+   */
+  liveCharsPerSec?: number;
 }
 
 export interface UseSmoothStreamingTextResult {
@@ -66,7 +73,16 @@ export function useSmoothStreamingText(
   }
 
   const engine = engineRef.current;
-  engine.update(options.fullText, options.isStreaming, options.bypassSmoothing);
+  // engine.update is idempotent (pure projection of inputs onto engine state),
+  // so calling it during render is safe — including under StrictMode double-render.
+  // Keeping it inline lets the returned visibleText reflect the very latest fullText
+  // on the same render the stream ends, avoiding a one-frame visual lag at the seam.
+  engine.update(
+    options.fullText,
+    options.isStreaming,
+    options.bypassSmoothing,
+    options.liveCharsPerSec ?? 0
+  );
 
   const [visibleLength, setVisibleLength] = useState(() => engine.visibleLength);
   const visibleLengthRef = useRef(visibleLength);
@@ -113,7 +129,14 @@ export function useSmoothStreamingText(
     ) {
       rafIdRef.current = requestAnimationFrame(frameRef.current);
     }
-  }, [engine, options.fullText, options.isStreaming, options.bypassSmoothing, options.streamKey]);
+  }, [
+    engine,
+    options.fullText,
+    options.isStreaming,
+    options.bypassSmoothing,
+    options.streamKey,
+    options.liveCharsPerSec,
+  ]);
 
   // Lifecycle: stop RAF when streaming ends or stream key changes, and on unmount.
   useEffect(() => {