Skip to content

Commit a476613

Browse files
committed
fix: floor TPS time-span to 1s + clear token state on stream-error
Streaming TPS pill could briefly show inflated values at the start of a new stream then 'drop abruptly' to a real value, looking like cached stale data. Two root causes: 1. calculateTPS divided by (now - firstDelta.timestamp). For a brand new stream's first delta that span is just a few ms — e.g. '50 tokens / 0.005s = 10000 t/s'. As more deltas accumulate the window broadens and TPS settles, hence the visible drop. Phase 1's microtask cadence exposed this where the prior idle-callback batching used to mask it. Floor the divisor to a 1s minimum window so the rate smoothly ramps up over the first second of a stream instead of spiking. Underestimation during the settling window is acceptable; order-of-magnitude overestimation isn't. 2. The stream-error event handler in applyWorkspaceChatEventToAggregator didn't call clearTokenState, leaving the errored message's deltaHistory entry to leak. Match stream-end / stream-abort and clear it so a follow-up stream starts with a clean slate. Adds tests for both behaviors.
1 parent 0a945ed commit a476613

4 files changed

Lines changed: 78 additions & 4 deletions

File tree

src/browser/utils/messages/StreamingTPSCalculator.test.ts

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,14 +81,47 @@ describe("StreamingTPSCalculator", () => {
8181
expect(calculateTPS(deltas, now)).toBe(15);
8282
});
8383

84-
test("returns 0 when time span is zero", () => {
84+
test("uses minimum 1s window when time span is zero", () => {
8585
const now = 1000;
8686
const deltas: DeltaRecord[] = [
8787
{ tokens: 100, timestamp: 1000, type: "text" }, // Same timestamp as now
8888
];
89+
// 100 tokens divided by the 1s minimum window = 100 t/s.
90+
// (Floor avoids the divide-by-tiny-window artifact that otherwise reports
91+
// tens of thousands of t/s when only one chunk has arrived.)
92+
expect(calculateTPS(deltas, now)).toBe(100);
93+
});
94+
95+
test("returns 0 when time span is negative (clock skew)", () => {
96+
const now = 500;
97+
const deltas: DeltaRecord[] = [
98+
{ tokens: 100, timestamp: 1000, type: "text" }, // future-dated, treat as invalid
99+
];
89100
expect(calculateTPS(deltas, now)).toBe(0);
90101
});
91102

103+
test("floors tiny time spans to the 1s minimum to avoid inflated rates", () => {
104+
// Realistic scenario: first delta of a fresh stream — only a few ms have
105+
// elapsed between the chunk's timestamp and the read.
106+
const now = 1005;
107+
const deltas: DeltaRecord[] = [{ tokens: 50, timestamp: 1000, type: "text" }];
108+
// Without the floor: 50 tokens / 0.005s = 10000 t/s (bogus).
109+
// With the 1s floor: 50 tokens / 1s = 50 t/s.
110+
expect(calculateTPS(deltas, now)).toBe(50);
111+
});
112+
113+
test("uses raw time span once it exceeds the 1s minimum", () => {
114+
// 1.5s after first delta — the floor stops applying and we return the
115+
// honest rate.
116+
const now = 2500;
117+
const deltas: DeltaRecord[] = [
118+
{ tokens: 50, timestamp: 1000, type: "text" },
119+
{ tokens: 100, timestamp: 2000, type: "text" },
120+
];
121+
// 150 tokens over 1.5s = 100 t/s.
122+
expect(calculateTPS(deltas, now)).toBe(100);
123+
});
124+
92125
test("uses current time by default", () => {
93126
const now = Date.now();
94127
const deltas: DeltaRecord[] = [

src/browser/utils/messages/applyWorkspaceChatEventToAggregator.test.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,24 @@ describe("applyWorkspaceChatEventToAggregator", () => {
208208
expect(aggregator.calls).toEqual(["clearTokenState:msg-1", "handleStreamAbort:msg-1"]);
209209
});
210210

211+
test("stream-error clears token state after calling handleStreamError", () => {
212+
const aggregator = new StubAggregator();
213+
214+
const event: WorkspaceChatMessage = {
215+
type: "stream-error",
216+
messageId: "msg-1",
217+
error: "boom",
218+
errorType: "network",
219+
};
220+
221+
const hint = applyWorkspaceChatEventToAggregator(aggregator, event);
222+
223+
expect(hint).toBe("immediate");
224+
// Without the clearTokenState call, the next stream's TPS/token-count
225+
// calculation could draw from the errored message's leaked deltaHistory.
226+
expect(aggregator.calls).toEqual(["handleStreamError:msg-1", "clearTokenState:msg-1"]);
227+
});
228+
211229
test("tool-call-delta routes to handleToolCallDelta and is throttled", () => {
212230
const aggregator = new StubAggregator();
213231

src/browser/utils/messages/applyWorkspaceChatEventToAggregator.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,10 @@ export function applyWorkspaceChatEventToAggregator(
171171
}
172172

173173
aggregator.handleStreamError(event);
174+
// Match stream-end / stream-abort: drop the per-message deltaHistory entry
175+
// so a subsequent stream isn't influenced by leaked TPS/token state from
176+
// the errored one.
177+
aggregator.clearTokenState(event.messageId);
174178
return "immediate";
175179
}
176180

src/common/utils/tokens/tps.ts

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,25 @@ export interface DeltaRecord {
1212

1313
const DEFAULT_TPS_WINDOW_MS = 60000; // 60 second trailing window
1414

15+
/**
16+
* Minimum time span used in the TPS divisor.
17+
*
18+
* Without this floor, the first delta of a stream produces a wildly inflated
19+
* TPS: with one delta `timeSpanMs = now - delta.timestamp` is typically a
20+
* single-digit milliseconds value (the gap between when the chunk arrived and
21+
* when we read it), so e.g. `50 tokens / 0.005s = 10000 t/s`. The user-visible
22+
* effect is "TPS starts very high then drops abruptly" as more deltas arrive
23+
* and broaden the window. Flooring to a 1s minimum window makes the rate
24+
* smoothly ramp up from ~0 toward the steady-state value over the first
25+
* second of a stream — a much more honest measurement.
26+
*
27+
* Trade-off: in the very first second the reported TPS slightly *under*-counts
28+
* (we divide by 1s even when only 200ms have elapsed). That's acceptable —
29+
* understatement during a settling window is far better than a misleading
30+
* order-of-magnitude overstatement.
31+
*/
32+
const MIN_TPS_TIME_SPAN_MS = 1000;
33+
1534
/**
1635
* Calculate tokens-per-second from a history of delta records.
1736
*/
@@ -23,9 +42,9 @@ export function calculateTPS(deltas: DeltaRecord[], now: number = Date.now()): n
2342
if (recentDeltas.length === 0) return 0;
2443

2544
const totalTokens = recentDeltas.reduce((sum, d) => sum + (d.tokens || 0), 0);
26-
const timeSpanMs = now - recentDeltas[0].timestamp;
27-
const timeSpanSec = timeSpanMs / 1000;
28-
if (timeSpanSec <= 0) return 0;
45+
const rawTimeSpanMs = now - recentDeltas[0].timestamp;
46+
if (rawTimeSpanMs < 0) return 0;
47+
const timeSpanSec = Math.max(rawTimeSpanMs, MIN_TPS_TIME_SPAN_MS) / 1000;
2948

3049
return Math.round(totalTokens / timeSpanSec);
3150
}

0 commit comments

Comments
 (0)