Revert "🤖 perf: word-pace text reveal; remove DOM-level streaming animations (#3221)"

ammario · ammario · commit 2792deb725fb · 2026-05-05T13:14:40.000-05:00
This reverts commit bbc1504.
diff --git a/bun.lock b/bun.lock
diff --git a/src/browser/features/Messages/MarkdownCore.tsx b/src/browser/features/Messages/MarkdownCore.tsx
@@ -144,10 +144,7 @@ export const MarkdownCore = React.memo<MarkdownCoreProps>(
           // Use "static" mode for completed content to bypass useTransition() deferral.
           // After ORPC migration, async event boundaries let React deprioritize transitions indefinitely.
           mode={parseIncompleteMarkdown ? "streaming" : "static"}
-          // space-y-2: reduce from default space-y-4 (16px) to space-y-2 (8px).
-          // Streaming smoothness comes from word-paced reveal in
-          // SmoothTextEngine; no DOM-level animation here.
-          className="space-y-2"
+          className="space-y-2" // Reduce from default space-y-4 (16px) to space-y-2 (8px)
           controls={{ table: false, code: true, mermaid: true }} // Disable table copy/download, keep code/mermaid controls
         >
           {normalizedContent}
diff --git a/src/browser/features/Messages/TypewriterMarkdown.tsx b/src/browser/features/Messages/TypewriterMarkdown.tsx
@@ -71,15 +71,6 @@ export const TypewriterMarkdown: React.FC<TypewriterMarkdownProps> = ({
   // React Compiler memoizes this object; no manual useMemo needed.
   const streamingContextValue = { isStreaming };
 
-  // Smoothness comes entirely from the engine's word-paced reveal cadence.
-  // No DOM-level animation, mask, or shimmer here — earlier iterations
-  // (per-block fade, per-word fade, per-line wrap, animated mask, shimmer
-  // overlay) all introduced perceptual artifacts (left-to-right shimmer,
-  // bottom-edge obfuscation, abrupt fades) that the eye registered as
-  // jitter regardless of curve/duration tuning. Word-paced reveal at the
-  // engine level (see SmoothTextEngine) matches how production chat UIs
-  // (ChatGPT, Claude.ai) feel smooth: humans parse text in word units, so
-  // the right granularity for reveal is the word, not the character.
   return (
     <StreamingContext.Provider value={streamingContextValue}>
       <div className={cn("markdown-content", className)}>
diff --git a/src/browser/styles/globals.css b/src/browser/styles/globals.css
@@ -1542,13 +1542,6 @@ code {
   white-space: normal;
 }
 
-/* Streaming smoothness is achieved entirely via word-paced reveal in the
- * SmoothTextEngine (see src/browser/utils/streaming/SmoothTextEngine.ts).
- * No DOM-level fade, mask, or shimmer — earlier iterations introduced
- * perceptual artifacts (bottom-edge obfuscation, abrupt fade transitions,
- * left-to-right per-word shimmer) that the eye registered as jitter
- * regardless of curve / duration tuning. */
-
 .markdown-content h1,
 .markdown-content h2,
 .markdown-content h3,
diff --git a/src/browser/utils/streaming/SmoothTextEngine.test.ts b/src/browser/utils/streaming/SmoothTextEngine.test.ts
@@ -6,22 +6,6 @@ function makeText(length: number): string {
   return "x".repeat(length);
 }
 
-/**
- * Realistic whitespace-bearing text for tests that exercise word-paced reveal
- * cadence. Uses fixed 5-char "words" + 1 space = 6 chars per atom — short
- * enough to fit comfortably under WORD_PACE_MAX_CHARS=12 so the cap doesn't
- * dominate behavior.
- */
-function makeWords(length: number): string {
-  const words: string[] = [];
-  let total = 0;
-  while (total < length) {
-    words.push("abcde");
-    total += 6; // 5 chars + 1 space
-  }
-  return words.join(" ").slice(0, length);
-}
-
 describe("SmoothTextEngine", () => {
   it("reveals text steadily and reaches full length", () => {
     const engine = new SmoothTextEngine();
@@ -129,17 +113,17 @@ describe("SmoothTextEngine", () => {
 
   it("does not force reveal when budget is below one char", () => {
     const engine = new SmoothTextEngine();
-    // For a 1-char string with no whitespace, the next reveal atom is the
-    // entire string (cost=1). With ~74 cps adaptive rate at 4ms per tick:
-    // ~0.30 budget per tick. The engine waits until floor(charBudget) >= 1
-    // before revealing — frame-rate invariance means partial budget rolls over.
+    // With a 1-char backlog, adaptive rate is at floor (~24 cps).
+    // At 4ms per tick: 24 * 0.004 = 0.096 budget per tick.
+    // The required-char gate is min(MIN_FRAME_CHARS, backlog) = min(2, 1) = 1
+    // for this 1-char stream, so it reveals once budget reaches 1.0.
     engine.update("x", true, false);
 
-    // First tick at 4ms should not reveal (budget ~0.30 < 1).
+    // First tick at 4ms should not reveal (budget ~0.10).
     const afterFirstTick = engine.tick(4);
     expect(afterFirstTick).toBe(0);
 
-    // Several more small ticks should still not reveal (budget < 1).
+    // Several more small ticks should still not reveal.
     engine.tick(4);
     engine.tick(4);
     expect(engine.visibleLength).toBe(0);
@@ -153,14 +137,12 @@ describe("SmoothTextEngine", () => {
 
   it("targets the live model rate when provided", () => {
     // With a model rate of 200 cps the engine should reveal materially faster
-    // than at the BASE rate of 72 cps for the same backlog. Uses realistic
-    // word-bearing text so the rate differential maps onto distinct word
-    // counts revealed in the same wall-time window.
+    // than at the BASE rate of 72 cps for the same backlog.
     const baseEngine = new SmoothTextEngine();
     const modelAwareEngine = new SmoothTextEngine();
 
-    baseEngine.update(makeWords(50), true, false);
-    modelAwareEngine.update(makeWords(50), true, false, 200);
+    baseEngine.update(makeText(50), true, false);
+    modelAwareEngine.update(makeText(50), true, false, 200);
 
     for (let i = 0; i < 10; i++) {
       baseEngine.tick(16);
@@ -170,93 +152,6 @@ describe("SmoothTextEngine", () => {
     expect(modelAwareEngine.visibleLength).toBeGreaterThan(baseEngine.visibleLength);
   });
 
-  it("reveals at most one atom per tick even with huge budget", () => {
-    // Time-smoothing: even when budget covers many atoms (catch-up burst,
-    // very high adaptive rate), reveals must be spread across ticks so the
-    // user sees one word per animation frame. Multi-atom reveals would
-    // bypass the temporal cadence and read as bursty.
-    const engine = new SmoothTextEngine();
-    // 5-char words + space = 6-char atoms. 100 chars = ~17 atoms.
-    engine.update(makeWords(100), true, false, 1000); // very high model rate
-
-    // Even one tick at the dt clamp ceiling shouldn't reveal more than the
-    // largest possible atom (WORD_PACE_MAX_CHARS=12).
-    const before = engine.visibleLength;
-    engine.tick(33);
-    const revealed = engine.visibleLength - before;
-
-    // ≤ 12 chars (one atom max). With 6-char atoms it's exactly 6.
-    expect(revealed).toBeLessThanOrEqual(STREAM_SMOOTHING.WORD_PACE_MAX_CHARS);
-  });
-
-  it("clamps dt so a long pause doesn't burst on resume", () => {
-    // RAF gaps (tab visibility, debugger pauses) can produce multi-second
-    // dt values. Without clamping, budget = adaptiveRate * dt would balloon
-    // and feed downstream into multi-atom reveals (or in earlier engine
-    // designs, a 10s pause would dump the entire backlog in one frame).
-    const engine = new SmoothTextEngine();
-    engine.update(makeWords(200), true, false, 200);
-
-    const before = engine.visibleLength;
-    engine.tick(10_000); // 10-second "pause"
-    const revealed = engine.visibleLength - before;
-
-    // Same single-atom cap as a normal tick — the clamp ensures budget
-    // accumulated from a 10s gap is no larger than from a 33ms gap.
-    expect(revealed).toBeLessThanOrEqual(STREAM_SMOOTHING.WORD_PACE_MAX_CHARS);
-  });
-
-  it("treats Unicode whitespace as word boundaries", () => {
-    // Non-English content uses NBSP \u00A0, ideographic space \u3000, etc.
-    // The boundary scanner must recognize them or the entire stream is treated
-    // as one no-whitespace run capped at WORD_PACE_MAX_CHARS chunks. Each of
-    // these strings has a single Unicode whitespace separator at index 5.
-    const cases = [
-      "Hello\u00a0world", // NBSP
-      "Hello\u2003world", // em space
-      "Hello\u2009world", // thin space
-      "Hello\u3000world", // ideographic space
-      "Hello\u2028world", // line separator
-    ];
-
-    for (const text of cases) {
-      const engine = new SmoothTextEngine();
-      engine.update(text, true, false);
-      // Tick until "Hello<sep>" is revealed (cost = 6) — boundary scan must
-      // land at index 6, not at the WORD_PACE_MAX_CHARS cap of 12.
-      let observed = engine.visibleLength;
-      for (let i = 0; i < 50 && engine.visibleLength < 6; i++) {
-        engine.tick(16);
-        observed = engine.visibleLength;
-        if (observed >= 6 && observed < text.length) break;
-      }
-      expect(observed).toBe(6);
-    }
-  });
-
-  it("reveals only at whitespace boundaries", () => {
-    // Word-paced reveal: visibleLength must always land just after a
-    // whitespace character (or at 0 / fullLength). Prevents mid-word reveals
-    // that the eye registers as character-by-character chop.
-    const engine = new SmoothTextEngine();
-    const text = "Hello world. How are you doing today?";
-    engine.update(text, true, false);
-
-    const seenLengths = new Set<number>([engine.visibleLength]);
-    for (let i = 0; i < 200 && !engine.isCaughtUp; i++) {
-      engine.tick(16);
-      seenLengths.add(engine.visibleLength);
-    }
-
-    expect(engine.isCaughtUp).toBe(true);
-    for (const len of seenLengths) {
-      if (len === 0 || len === text.length) continue;
-      // The character immediately before the reveal cursor must be whitespace.
-      const charBefore = text[len - 1];
-      expect(/\s/.test(charBefore)).toBe(true);
-    }
-  });
-
   it("soft-catches-up large lag without a hard snap", () => {
     const engine = new SmoothTextEngine();
 
diff --git a/src/browser/utils/streaming/SmoothTextEngine.ts b/src/browser/utils/streaming/SmoothTextEngine.ts
@@ -1,15 +1,6 @@
 import { STREAM_SMOOTHING } from "@/constants/streaming";
 import { clamp } from "@/common/utils/clamp";
 
-/**
- * Module-level regex (compiled once, reused across ticks) for whitespace-
- * boundary detection in word-paced reveal. `\s` covers all Unicode whitespace
- * (per ECMA-262): ASCII space/tab/LF/CR/FF/VT, NBSP, line/paragraph
- * separators, thin/em/ideographic spaces, etc. — so non-English text paces
- * at proper word boundaries.
- */
-const WHITESPACE_REGEX = /\s/;
-
 /**
  * Compute target reveal rate (chars/sec) given current backlog and a hint of how
  * fast the source is producing characters.
@@ -52,32 +43,13 @@ function getAdaptiveRate(backlog: number, liveCharsPerSec: number): number {
  * The ingestion clock (incoming full text) is external; this class manages only
  * the presentation clock (visible prefix length) using a character budget model.
  *
- * **Reveal granularity is word-sized AND temporally paced.** Each tick reveals
- * AT MOST ONE atom (a word + trailing whitespace, capped at
- * {@link STREAM_SMOOTHING.WORD_PACE_MAX_CHARS}). Multi-atom bursts are
- * impossible by construction — even when budget is large (catch-up after a
- * long RAF gap, high adaptive rate during burst), reveals are spread across
- * frames so the user sees one word per animation frame at the maximum tempo.
- * Combined with the dt clamp ({@link STREAM_SMOOTHING.MAX_TICK_MS}), this
- * caps cadence at ~60 words/sec on a 60Hz display.
- *
- * Why word-sized AND time-paced:
- *  - Word-sized: humans parse text in word units. Character-paced reveal
- *    triggers an extra decoding step the eye registers as choppy.
- *  - Time-paced: even at word granularity, dumping 3 atoms in one frame
- *    reads as bursty. One atom per frame is the smoothest possible cadence
- *    the display can express.
- *  - Production chat UIs (ChatGPT, Claude.ai) feel smooth precisely because
- *    they emit at word boundaries at a steady tempo.
- *
  * The engine is model-aware: callers should pass {@link update}'s
  * `liveCharsPerSec` if they know the source's emission rate. Without it the
  * engine targets {@link STREAM_SMOOTHING.BASE_CHARS_PER_SEC}, which can lag
  * behind fast models and make the user wait through a backlog drain after the
  * stream ends.
  */
 export class SmoothTextEngine {
-  private fullText = "";
   private fullLength = 0;
   private visibleLengthValue = 0;
   private charBudget = 0;
@@ -114,10 +86,6 @@ export class SmoothTextEngine {
     bypassSmoothing: boolean,
     liveCharsPerSec = 0
   ): void {
-    // Retain the full text so tick() can locate whitespace boundaries for
-    // word-paced reveal. The hook (useSmoothStreamingText) already holds it,
-    // so the extra reference is "free" — JS strings are immutable and shared.
-    this.fullText = fullText;
     this.fullLength = fullText.length;
     this.isStreaming = isStreaming;
     this.bypassSmoothing = bypassSmoothing;
@@ -137,30 +105,6 @@ export class SmoothTextEngine {
     this.enforceMaxVisualLag();
   }
 
-  /**
-   * Find the position to advance visibleLength to from `from`. Returns the
-   * index AFTER the next whitespace character so the whitespace is included
-   * in the reveal (the next word stays hidden until its own boundary is
-   * reached). Returns `min(from + WORD_PACE_MAX_CHARS, fullLength)` if no
-   * whitespace is found within that span — guarantees long URLs / identifiers
-   * still progress in bounded chunks.
-   *
-   * Uses `\s` (matches all Unicode whitespace: ASCII space/tab/newline/CR/FF,
-   * NBSP \u00A0, line/paragraph separators \u2028/\u2029, thin space \u2009,
-   * em space \u2003, ideographic space \u3000, etc.) so non-English content
-   * paces at proper word boundaries. CJK text without internal whitespace
-   * still falls back to the WORD_PACE_MAX_CHARS chunk cap.
-   */
-  private findNextRevealBoundary(from: number): number {
-    const cap = Math.min(this.fullLength, from + STREAM_SMOOTHING.WORD_PACE_MAX_CHARS);
-    for (let i = from; i < cap; i++) {
-      if (WHITESPACE_REGEX.test(this.fullText[i] ?? "")) {
-        return i + 1;
-      }
-    }
-    return cap;
-  }
-
   /**
    * Advance the presentation clock by a timestep.
    */
@@ -185,28 +129,23 @@ export class SmoothTextEngine {
     const backlog = this.fullLength - this.visibleLengthValue;
     const adaptiveRate = getAdaptiveRate(backlog, this.liveCharsPerSec);
 
-    // Clamp dt to MAX_TICK_MS. A long RAF gap (tab visibility, slow frames,
-    // debugger pauses) would otherwise dump huge budget that bursts on resume,
-    // bypassing the per-tick atom cap. Backlog drains via subsequent ticks,
-    // which arrive at frame rate once RAF resumes; the hard-snap safety net
-    // (enforceMaxVisualLag) handles pathological cases beyond MAX_VISUAL_LAG_CHARS.
-    const clampedDt = Math.min(dtMs, STREAM_SMOOTHING.MAX_TICK_MS);
-    this.charBudget += adaptiveRate * (clampedDt / 1000);
-
-    // Single-atom reveal per tick. Even when budget covers multiple atoms
-    // (catch-up burst, high adaptive rate), defer to subsequent ticks so the
-    // user sees one word per animation frame. This is the smoothest possible
-    // temporal cadence the display can express; multi-atom-per-tick reveals
-    // would read as bursty even at word granularity.
-    const nextBoundary = this.findNextRevealBoundary(this.visibleLengthValue);
-    const cost = nextBoundary - this.visibleLengthValue;
-    // Math.floor guarantees monotone progress across tick rates — partial
-    // budget rolls over so a 240Hz display accumulates across several frames.
-    if (cost > 0 && Math.floor(this.charBudget) >= cost) {
-      this.visibleLengthValue = nextBoundary;
-      this.charBudget -= cost;
+    this.charBudget += adaptiveRate * (dtMs / 1000);
+
+    // Budget-gated reveal: require at least MIN_FRAME_CHARS to accrue. This
+    // makes cadence frame-rate invariant — a 240Hz display accumulates budget
+    // across several frames before revealing, instead of forcing 1 char/frame
+    // at any refresh rate. At the tail of a stream the requirement is capped
+    // by backlog so we always finish revealing the last 1 char.
+    const wholeCharsReady = Math.floor(this.charBudget);
+    const requiredChars = Math.min(STREAM_SMOOTHING.MIN_FRAME_CHARS, backlog);
+    if (wholeCharsReady < requiredChars) {
+      return this.visibleLengthValue;
     }
 
+    const reveal = Math.min(wholeCharsReady, STREAM_SMOOTHING.MAX_FRAME_CHARS);
+    this.visibleLengthValue = Math.min(this.fullLength, this.visibleLengthValue + reveal);
+    this.charBudget -= reveal;
+
     return this.visibleLengthValue;
   }
 
@@ -222,7 +161,6 @@ export class SmoothTextEngine {
    * Reset all engine state, typically when a new stream starts.
    */
   reset(): void {
-    this.fullText = "";
     this.fullLength = 0;
     this.visibleLengthValue = 0;
     this.charBudget = 0;
diff --git a/src/constants/streaming.ts b/src/constants/streaming.ts
@@ -37,25 +37,12 @@ export const STREAM_SMOOTHING = {
    * below this.
    */
   MAX_VISUAL_LAG_CHARS: 1024,
+  /** Max characters revealed in a single animation frame. */
+  MAX_FRAME_CHARS: 48,
   /**
-   * Maximum characters in a single reveal "atom" when no whitespace boundary
-   * is found. The engine paces text in word-sized atoms (a run of non-whitespace
-   * plus its trailing whitespace); for a long no-whitespace run (e.g., a URL or
-   * minified identifier) we cap the atom at this length so the engine doesn't
-   * stall waiting for budget to cover an unbounded chunk. ~12 covers nearly all
-   * English words ("consideration" = 13, "JavaScript" = 10) without dumping
-   * long URLs in a single 200-char shot.
+   * Min characters revealed per tick once budget permits. Set to 2 so reveals
+   * coalesce to ~30 Hz at the base rate instead of ~60 Hz — equal visual
+   * smoothness to humans, half the markdown-reparse cost.
    */
-  WORD_PACE_MAX_CHARS: 12,
-  /**
-   * Upper bound on the dt fed to the engine in a single tick. Long RAF gaps
-   * (tab visibility, slow frames, debugger pauses) would otherwise accumulate
-   * huge budget that bursts on resume — bypassing the per-tick atom cap below.
-   * Clamping dt to ~2 frames at 60Hz means at the worst case (max model rate
-   * 420 cps × 33ms = 13.86 chars per tick), budget can cover at most one
-   * WORD_PACE_MAX_CHARS-sized atom per tick. Combined with the 1-atom-per-tick
-   * reveal cap, this enforces a strict temporal cadence: each visual reveal
-   * is its own animation frame.
-   */
-  MAX_TICK_MS: 33,
+  MIN_FRAME_CHARS: 2,
 } as const;