Skip to content

Commit a88f688

Browse files
webjunkieclaude
andauthored
feat: Warn PostHog staff before continuing stale, costly conversations (#3122)
Co-authored-by: Claude <noreply@anthropic.com>
1 parent 6a9d332 commit a88f688

7 files changed

Lines changed: 448 additions & 3 deletions

File tree

packages/core/src/sessions/contextUsage.test.ts

Lines changed: 120 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
import type { AcpMessage } from "@posthog/shared";
22
import { describe, expect, it } from "vitest";
3-
import { createContextUsageTracker, extractContextUsage } from "./contextUsage";
3+
import {
4+
createContextUsageTracker,
5+
DEFAULT_STALE_COSTLY_THRESHOLD,
6+
extractContextUsage,
7+
extractLastActivityAt,
8+
shouldWarnStaleCostlyConversation,
9+
} from "./contextUsage";
410

511
function usageUpdateEvent(used: number, size: number): AcpMessage {
612
return {
@@ -132,3 +138,116 @@ describe("createContextUsageTracker", () => {
132138
expect(tracker.update(events)).toEqual(extractContextUsage(events));
133139
});
134140
});
141+
142+
describe("shouldWarnStaleCostlyConversation", () => {
143+
const now = 1_000_000_000;
144+
const threshold = { tokens: 40_000, staleMs: 5 * 60 * 1000 };
145+
146+
it.each([
147+
{
148+
name: "large + stale → warn",
149+
usedTokens: 50_000,
150+
idleMs: 10 * 60 * 1000,
151+
expected: true,
152+
},
153+
{
154+
name: "large + fresh → no warn",
155+
usedTokens: 50_000,
156+
idleMs: 60 * 1000,
157+
expected: false,
158+
},
159+
{
160+
name: "small + stale → no warn",
161+
usedTokens: 10_000,
162+
idleMs: 10 * 60 * 1000,
163+
expected: false,
164+
},
165+
{
166+
name: "small + fresh → no warn",
167+
usedTokens: 10_000,
168+
idleMs: 60 * 1000,
169+
expected: false,
170+
},
171+
{
172+
name: "exactly at both thresholds → warn",
173+
usedTokens: 40_000,
174+
idleMs: 5 * 60 * 1000,
175+
expected: true,
176+
},
177+
{
178+
name: "one token below the size threshold → no warn",
179+
usedTokens: 39_999,
180+
idleMs: 10 * 60 * 1000,
181+
expected: false,
182+
},
183+
{
184+
name: "one ms below the stale threshold → no warn",
185+
usedTokens: 50_000,
186+
idleMs: 5 * 60 * 1000 - 1,
187+
expected: false,
188+
},
189+
])("$name", ({ usedTokens, idleMs, expected }) => {
190+
expect(
191+
shouldWarnStaleCostlyConversation({
192+
usedTokens,
193+
lastActivityAt: now - idleMs,
194+
now,
195+
threshold,
196+
}),
197+
).toBe(expected);
198+
});
199+
200+
it("never warns without a last-activity timestamp", () => {
201+
expect(
202+
shouldWarnStaleCostlyConversation({
203+
usedTokens: 1_000_000,
204+
lastActivityAt: null,
205+
now,
206+
threshold,
207+
}),
208+
).toBe(false);
209+
});
210+
211+
it("treats a future timestamp (clock skew) as fresh", () => {
212+
expect(
213+
shouldWarnStaleCostlyConversation({
214+
usedTokens: 50_000,
215+
lastActivityAt: now + 60_000,
216+
now,
217+
threshold,
218+
}),
219+
).toBe(false);
220+
});
221+
222+
it("falls back to DEFAULT_STALE_COSTLY_THRESHOLD when none is given", () => {
223+
expect(
224+
shouldWarnStaleCostlyConversation({
225+
usedTokens: DEFAULT_STALE_COSTLY_THRESHOLD.tokens,
226+
lastActivityAt: now - DEFAULT_STALE_COSTLY_THRESHOLD.staleMs,
227+
now,
228+
}),
229+
).toBe(true);
230+
});
231+
});
232+
233+
describe("extractLastActivityAt", () => {
234+
it("returns null for an empty event list", () => {
235+
expect(extractLastActivityAt([])).toBeNull();
236+
});
237+
238+
it("returns the ts of the most recent event", () => {
239+
const events: AcpMessage[] = [
240+
{ ...agentChunkEvent(), ts: 10 },
241+
{ ...usageUpdateEvent(50_000, 200_000), ts: 20 },
242+
];
243+
expect(extractLastActivityAt(events)).toBe(20);
244+
});
245+
246+
it("returns the maximum ts even when events are out of order", () => {
247+
const events: AcpMessage[] = [
248+
{ ...usageUpdateEvent(50_000, 200_000), ts: 30 },
249+
{ ...agentChunkEvent(), ts: 10 },
250+
];
251+
expect(extractLastActivityAt(events)).toBe(30);
252+
});
253+
});

packages/core/src/sessions/contextUsage.ts

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,3 +111,76 @@ function extractBreakdown(msg: AcpMessage["message"]): ContextBreakdown | null {
111111
const params = msg.params as { breakdown?: ContextBreakdown } | undefined;
112112
return params?.breakdown ?? null;
113113
}
114+
115+
/**
116+
* Threshold controlling when {@link shouldWarnStaleCostlyConversation} fires.
117+
*/
118+
export interface StaleCostlyThreshold {
119+
/** Minimum context tokens for a conversation to count as "large". */
120+
tokens: number;
121+
/**
122+
* Minimum idle time (ms) before a conversation counts as "stale". See
123+
* {@link DEFAULT_STALE_COSTLY_THRESHOLD} for how this bound is chosen.
124+
*/
125+
staleMs: number;
126+
}
127+
128+
/**
129+
* Defaults for the stale-costly conversation warning.
130+
*
131+
* `tokens` (100k): only conversations big enough that a cold prefix rebuild is
132+
* a real cost — roughly 10% of the 1M context window — trip the warning.
133+
*
134+
* `staleMs` (60 min): Anthropic ephemeral caches default to a 5-minute TTL and
135+
* can opt into a 1-hour one; the effective value depends on what the Agent SDK
136+
* requests, which we don't control or observe. We deliberately use the longer
137+
* 1-hour bound so the cache is almost certainly cold before we warn — warning
138+
* while it is still warm would nag about a continuation that is in fact cheap,
139+
* the worse failure. Erring long only means we occasionally skip a warning,
140+
* never that we nag needlessly.
141+
*/
142+
export const DEFAULT_STALE_COSTLY_THRESHOLD: StaleCostlyThreshold = {
143+
tokens: 100_000,
144+
staleMs: 60 * 60 * 1000,
145+
};
146+
147+
/**
148+
* Decide whether to warn that continuing a conversation will be costly: true
149+
* when it is both large (>= `threshold.tokens`) and stale (idle >=
150+
* `threshold.staleMs`). See {@link DEFAULT_STALE_COSTLY_THRESHOLD} for the
151+
* pricing rationale behind the defaults.
152+
*
153+
* Pure and time-injected (no `Date.now()`). A `null` `lastActivityAt` never
154+
* warns, and a future timestamp (clock skew) reads as fresh.
155+
*/
156+
export function shouldWarnStaleCostlyConversation(args: {
157+
usedTokens: number;
158+
lastActivityAt: number | null;
159+
now: number;
160+
threshold?: StaleCostlyThreshold;
161+
}): boolean {
162+
const { usedTokens, lastActivityAt, now } = args;
163+
const threshold = args.threshold ?? DEFAULT_STALE_COSTLY_THRESHOLD;
164+
if (lastActivityAt === null) return false;
165+
if (usedTokens < threshold.tokens) return false;
166+
return now - lastActivityAt >= threshold.staleMs;
167+
}
168+
169+
/**
170+
* Best-effort "time of last activity" for a session: the most recent (maximum)
171+
* `ts` among events, or null for an empty list. Scans for the max rather than
172+
* trusting positional order, so an out-of-order append can't report a staler
173+
* time than reality. (Uses a loop, not the spread form of `Math.max`, which can
174+
* overflow the stack on long event lists.) Heuristic proxy for prompt-cache
175+
* freshness — `ts` is stamped on *any* AcpMessage (agent chunks, tool calls,
176+
* client-side events), not only turns sent to the model. Good enough for a soft
177+
* cost warning; not a billing signal.
178+
*/
179+
export function extractLastActivityAt(events: AcpMessage[]): number | null {
180+
if (events.length === 0) return null;
181+
let latest = events[0].ts;
182+
for (let i = 1; i < events.length; i++) {
183+
if (events[i].ts > latest) latest = events[i].ts;
184+
}
185+
return latest;
186+
}

packages/ui/src/features/sessions/components/SessionView.tsx

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import { QueuedMessagesDock } from "@posthog/ui/features/sessions/components/Que
2929
import { ReasoningLevelSelector } from "@posthog/ui/features/sessions/components/ReasoningLevelSelector";
3030
import { RawLogsView } from "@posthog/ui/features/sessions/components/raw-logs/RawLogsView";
3131
import { SessionResourcesBar } from "@posthog/ui/features/sessions/components/SessionResourcesBar";
32+
import { StaleConversationCostDialog } from "@posthog/ui/features/sessions/components/StaleConversationCostDialog";
3233
import { SteerQueueToggle } from "@posthog/ui/features/sessions/components/SteerQueueToggle";
3334
import { ThreadView } from "@posthog/ui/features/sessions/components/ThreadView";
3435
import { CHAT_CONTENT_MAX_WIDTH } from "@posthog/ui/features/sessions/constants";
@@ -46,6 +47,7 @@ import {
4647
} from "@posthog/ui/features/sessions/sessionViewStore";
4748
import type { Plan } from "@posthog/ui/features/sessions/types";
4849
import { useSessionHandoffInProgress } from "@posthog/ui/features/sessions/useSession";
50+
import { useStaleConversationGate } from "@posthog/ui/features/sessions/useStaleConversationGate";
4951
import { useSettingsStore } from "@posthog/ui/features/settings/settingsStore";
5052
import { useIsWorkspaceCloudRun } from "@posthog/ui/features/workspace/useWorkspace";
5153
import { useConnectivity } from "@posthog/ui/hooks/useConnectivity";
@@ -265,6 +267,10 @@ export function SessionView({
265267
[isOnline, onBeforeSubmit],
266268
);
267269

270+
// Warn PostHog staff before continuing a large, idle conversation whose
271+
// prompt cache has likely expired (see useStaleConversationGate).
272+
const staleGate = useStaleConversationGate(sessionId, events);
273+
268274
const [isDraggingFile, setIsDraggingFile] = useState(false);
269275
const editorRef = useRef<PromptInputHandle>(null);
270276
const dragCounterRef = useRef(0);
@@ -617,16 +623,45 @@ export function SessionView({
617623
}
618624
>
619625
{taskId && <QueuedMessagesDock taskId={taskId} />}
626+
{staleGate.dismissed && (
627+
<Flex justify="center" mb="2">
628+
<Button
629+
variant="soft"
630+
color="amber"
631+
size="1"
632+
onClick={staleGate.onReopen}
633+
>
634+
<Warning size={14} weight="fill" />
635+
Conversation paused to avoid a costly reload —
636+
review
637+
</Button>
638+
</Flex>
639+
)}
640+
<StaleConversationCostDialog
641+
open={staleGate.dialogOpen}
642+
usedTokens={staleGate.usedTokens}
643+
lastActivityAt={staleGate.lastActivityAt}
644+
costUsd={staleGate.costUsd}
645+
onContinue={staleGate.onContinue}
646+
onOpenChange={staleGate.onDialogOpenChange}
647+
/>
620648
<PromptInput
621649
ref={editorRef}
622650
sessionId={sessionId}
623651
placeholder="Type a message... @ to mention files, ! for bash mode, / for skills"
624-
disabled={!isRunning && !handoffInProgress}
652+
disabled={
653+
(!isRunning && !handoffInProgress) ||
654+
staleGate.active
655+
}
625656
submitDisabledExternal={
626657
handoffInProgress || !isOnline
627658
}
628659
submitTooltipOverride={
629-
!isOnline ? "No internet connection" : undefined
660+
staleGate.active
661+
? "Large idle conversation — review the cost notice to continue"
662+
: !isOnline
663+
? "No internet connection"
664+
: undefined
630665
}
631666
isLoading={!!isPromptPending}
632667
isActiveSession={isActiveSession}
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
import { Warning } from "@phosphor-icons/react";
2+
import { formatRelativeTimeLong } from "@posthog/shared";
3+
import { formatTokensCompact } from "@posthog/ui/features/sessions/contextColors";
4+
import { AlertDialog, Button, Flex } from "@radix-ui/themes";
5+
6+
interface StaleConversationCostDialogProps {
7+
open: boolean;
8+
usedTokens: number;
9+
lastActivityAt: number | null;
10+
/** Cumulative session cost so far, when the gateway reports it. */
11+
costUsd: number | null;
12+
onContinue: () => void;
13+
onOpenChange: (open: boolean) => void;
14+
}
15+
16+
export function StaleConversationCostDialog({
17+
open,
18+
usedTokens,
19+
lastActivityAt,
20+
costUsd,
21+
onContinue,
22+
onOpenChange,
23+
}: StaleConversationCostDialogProps) {
24+
const activity =
25+
lastActivityAt !== null
26+
? `was last active ${formatRelativeTimeLong(lastActivityAt)}`
27+
: "has been idle";
28+
return (
29+
<AlertDialog.Root open={open} onOpenChange={onOpenChange}>
30+
<AlertDialog.Content maxWidth="460px" size="2">
31+
<AlertDialog.Title className="text-base">
32+
<Flex align="center" gap="2">
33+
<Warning size={18} weight="fill" color="var(--orange-9)" />
34+
Continue this large, idle conversation?
35+
</Flex>
36+
</AlertDialog.Title>
37+
<AlertDialog.Description className="text-sm">
38+
This conversation holds about {formatTokensCompact(usedTokens)} tokens
39+
and {activity}. Its prompt cache has likely expired, so your next
40+
message re-processes the whole conversation at full input price
41+
instead of the ~10% cached rate
42+
{costUsd !== null ? ` (≈$${costUsd.toFixed(2)} spent so far)` : ""}.
43+
Starting a new conversation avoids the cost — continue only if you
44+
need this thread's context.
45+
</AlertDialog.Description>
46+
47+
<Flex justify="end" gap="2" mt="4">
48+
<AlertDialog.Cancel>
49+
<Button variant="soft" color="gray" size="1">
50+
Not now
51+
</Button>
52+
</AlertDialog.Cancel>
53+
<AlertDialog.Action>
54+
<Button variant="solid" size="1" onClick={onContinue}>
55+
Continue anyway
56+
</Button>
57+
</AlertDialog.Action>
58+
</Flex>
59+
</AlertDialog.Content>
60+
</AlertDialog.Root>
61+
);
62+
}
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import { beforeEach, describe, expect, it } from "vitest";
2+
import { useStaleConversationGateStore } from "./staleConversationGateStore";
3+
4+
const acknowledged = (id: string) =>
5+
useStaleConversationGateStore.getState().acknowledgedSessions.has(id);
6+
7+
describe("useStaleConversationGateStore", () => {
8+
beforeEach(() => {
9+
useStaleConversationGateStore.setState({ acknowledgedSessions: new Set() });
10+
});
11+
12+
it("starts with nothing acknowledged", () => {
13+
expect(acknowledged("s1")).toBe(false);
14+
});
15+
16+
it("acknowledges a single session without affecting others", () => {
17+
useStaleConversationGateStore.getState().acknowledge("s1");
18+
expect(acknowledged("s1")).toBe(true);
19+
expect(acknowledged("s2")).toBe(false);
20+
});
21+
22+
it("replaces the Set immutably on acknowledge", () => {
23+
const before =
24+
useStaleConversationGateStore.getState().acknowledgedSessions;
25+
useStaleConversationGateStore.getState().acknowledge("s1");
26+
const after = useStaleConversationGateStore.getState().acknowledgedSessions;
27+
expect(after).not.toBe(before);
28+
expect(before.has("s1")).toBe(false);
29+
});
30+
31+
it("is idempotent — acknowledging twice keeps the same reference", () => {
32+
useStaleConversationGateStore.getState().acknowledge("s1");
33+
const first = useStaleConversationGateStore.getState().acknowledgedSessions;
34+
useStaleConversationGateStore.getState().acknowledge("s1");
35+
const second =
36+
useStaleConversationGateStore.getState().acknowledgedSessions;
37+
expect(second).toBe(first);
38+
});
39+
});

0 commit comments

Comments
 (0)