Skip to content

Commit 3855e04

Browse files
committed
refactor(providers): always-on prompt caching via a directly-tested helper
- Remove the PROMPT_CACHE_DISABLED kill switch — prompt caching is always on. - Extract the Anthropic tagging into applyAnthropicPromptCache(payload, tools, systemPrompt) in anthropic/utils.ts: one place that gates and mutates the system block + last tool, replacing the two inline blocks in core.ts. - Add direct unit tests for the helper (system→cached block, last-tool tagged, relocated/blanked-system still tags tools, below-threshold and tool-less cases untouched) so the actual payload mutation is verified, not just the gate. No behavior change to outputs; verified on vitest 4.1.8 (CI's version).
1 parent 3a44936 commit 3855e04

5 files changed

Lines changed: 124 additions & 48 deletions

File tree

apps/sim/providers/anthropic/core.ts

Lines changed: 4 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import { getErrorMessage, toError } from '@sim/utils/errors'
66
import type { BlockTokens, IterationToolCall, StreamingExecution } from '@/executor/types'
77
import { MAX_TOOL_ITERATIONS } from '@/providers'
88
import {
9+
applyAnthropicPromptCache,
910
checkForForcedToolUsage,
1011
createReadableStreamFromAnthropicStream,
1112
} from '@/providers/anthropic/utils'
@@ -16,7 +17,6 @@ import {
1617
supportsNativeStructuredOutputs,
1718
supportsTemperature,
1819
} from '@/providers/models'
19-
import { shouldCacheStaticPrefix } from '@/providers/prompt-cache'
2020
import { createStreamingExecution } from '@/providers/streaming-execution'
2121
import { adaptAnthropicToolSchema } from '@/providers/tool-schema-adapter'
2222
import { enrichLastModelSegment } from '@/providers/trace-enrichment'
@@ -327,21 +327,9 @@ export async function executeAnthropicProviderRequest(
327327

328328
// Prompt caching: mark the static prefix (system + tools) with an ephemeral
329329
// cache breakpoint so repeated calls (agent tool-loops, multi-turn) reuse it.
330-
// Must run after the structured-output block above, which assumes `system` is
331-
// still a string. Tools are tagged at their assignment below.
332-
// Gate on the original request system prompt, not payload.system: when there
333-
// are no context/chat messages the system text is relocated into a user
334-
// message and payload.system is blanked (see above), but the prefix is still
335-
// worth caching (the tools, at least).
336-
const cacheStaticPrefix = shouldCacheStaticPrefix({
337-
systemPrompt: request.systemPrompt,
338-
hasTools: !!anthropicTools?.length,
339-
toolsApproxChars: anthropicTools ? JSON.stringify(anthropicTools).length : 0,
340-
})
341-
342-
if (cacheStaticPrefix && typeof payload.system === 'string' && payload.system.length > 0) {
343-
payload.system = [{ type: 'text', text: payload.system, cache_control: { type: 'ephemeral' } }]
344-
}
330+
// Runs after the structured-output block above, which assumes `system` is still
331+
// a string. Mutates payload.system and the last tool in place.
332+
applyAnthropicPromptCache(payload, anthropicTools, request.systemPrompt)
345333

346334
// Add extended thinking configuration if supported and requested
347335
// The 'none' sentinel means "disable thinking" — skip configuration entirely.
@@ -385,13 +373,6 @@ export async function executeAnthropicProviderRequest(
385373
}
386374

387375
if (anthropicTools?.length) {
388-
if (cacheStaticPrefix) {
389-
const lastIndex = anthropicTools.length - 1
390-
anthropicTools[lastIndex] = {
391-
...anthropicTools[lastIndex],
392-
cache_control: { type: 'ephemeral' },
393-
}
394-
}
395376
payload.tools = anthropicTools
396377
// Per Anthropic docs: forced tool_choice (type: "tool" or "any") is incompatible with
397378
// thinking. Only auto and none are supported when thinking is enabled.
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
/**
2+
* @vitest-environment node
3+
*/
4+
import type { TextBlockParam, Tool } from '@anthropic-ai/sdk/resources'
5+
import { describe, expect, it } from 'vitest'
6+
import { applyAnthropicPromptCache } from '@/providers/anthropic/utils'
7+
8+
const LARGE = 'x'.repeat(8_000) // ~2,000 est. tokens, above the 1,024 gate
9+
const SMALL = 'x'.repeat(400) // ~100 est. tokens, below the gate
10+
11+
const tool = (name: string): Tool => ({
12+
name,
13+
description: 'does a thing',
14+
input_schema: { type: 'object', properties: {} },
15+
})
16+
17+
describe('applyAnthropicPromptCache', () => {
18+
it('converts a large system prompt to a cached text block and tags the last tool', () => {
19+
const payload: { system?: string | TextBlockParam[] } = { system: LARGE }
20+
const tools = [tool('a'), tool('b')]
21+
22+
applyAnthropicPromptCache(payload, tools, LARGE)
23+
24+
expect(Array.isArray(payload.system)).toBe(true)
25+
const blocks = payload.system as TextBlockParam[]
26+
expect(blocks).toHaveLength(1)
27+
expect(blocks[0]).toMatchObject({
28+
type: 'text',
29+
text: LARGE,
30+
cache_control: { type: 'ephemeral' },
31+
})
32+
// Only the LAST tool carries the breakpoint; earlier tools are untouched.
33+
expect(tools[0].cache_control).toBeUndefined()
34+
expect(tools[1].cache_control).toEqual({ type: 'ephemeral' })
35+
})
36+
37+
it('tags the system block when the system alone is large and there are no tools', () => {
38+
const payload: { system?: string | TextBlockParam[] } = { system: LARGE }
39+
40+
applyAnthropicPromptCache(payload, undefined, LARGE)
41+
42+
const blocks = payload.system as TextBlockParam[]
43+
expect(blocks[0].cache_control).toEqual({ type: 'ephemeral' })
44+
})
45+
46+
it('tags the tools even when payload.system was relocated/blanked (gate uses the request prompt)', () => {
47+
// No-messages path: the provider moves the system text into a user message
48+
// and blanks payload.system, but the original prompt is large, so the tools
49+
// prefix is still worth caching.
50+
const payload: { system?: string | TextBlockParam[] } = { system: '' }
51+
const tools = [tool('a')]
52+
53+
applyAnthropicPromptCache(payload, tools, LARGE)
54+
55+
expect(payload.system).toBe('') // empty system is never converted
56+
expect(tools[0].cache_control).toEqual({ type: 'ephemeral' })
57+
})
58+
59+
it('leaves a small, tool-less prefix untouched (no write surcharge on one-shot calls)', () => {
60+
const payload: { system?: string | TextBlockParam[] } = { system: SMALL }
61+
62+
applyAnthropicPromptCache(payload, undefined, SMALL)
63+
64+
expect(payload.system).toBe(SMALL)
65+
})
66+
67+
it('does nothing when the combined prefix is below the threshold', () => {
68+
const payload: { system?: string | TextBlockParam[] } = { system: SMALL }
69+
const tools = [tool('a')]
70+
71+
applyAnthropicPromptCache(payload, tools, SMALL)
72+
73+
expect(payload.system).toBe(SMALL)
74+
expect(tools[0].cache_control).toBeUndefined()
75+
})
76+
})

apps/sim/providers/anthropic/utils.ts

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,57 @@ import type {
22
RawMessageDeltaEvent,
33
RawMessageStartEvent,
44
RawMessageStreamEvent,
5+
TextBlockParam,
6+
Tool,
57
Usage,
68
} from '@anthropic-ai/sdk/resources'
79
import { createLogger } from '@sim/logger'
810
import { randomFloat } from '@sim/utils/random'
11+
import { shouldCacheStaticPrefix } from '@/providers/prompt-cache'
912
import { trackForcedToolUsage } from '@/providers/utils'
1013

1114
const logger = createLogger('AnthropicUtils')
1215

16+
/** Mutable view of the parts of the Anthropic payload that carry cache breakpoints. */
17+
interface AnthropicCacheablePayload {
18+
system?: string | Array<TextBlockParam>
19+
}
20+
21+
/**
22+
* Marks the static request prefix (system prompt + tools) with an ephemeral
23+
* cache breakpoint when {@link shouldCacheStaticPrefix} deems it worthwhile, so
24+
* repeated calls reuse the cached prefix. Mutates `payload.system` (string → a
25+
* single cached text block) and the last entry of `tools` in place.
26+
*
27+
* `systemPrompt` is the ORIGINAL request system prompt, used only for the
28+
* worthiness gate: on the no-messages path the provider relocates the system
29+
* text into a user message and blanks `payload.system`, but the tools prefix is
30+
* still worth caching there.
31+
*/
32+
export function applyAnthropicPromptCache(
33+
payload: AnthropicCacheablePayload,
34+
tools: Tool[] | undefined,
35+
systemPrompt: string | null | undefined
36+
): void {
37+
const shouldCache = shouldCacheStaticPrefix({
38+
systemPrompt,
39+
hasTools: !!tools?.length,
40+
toolsApproxChars: tools ? JSON.stringify(tools).length : 0,
41+
})
42+
if (!shouldCache) {
43+
return
44+
}
45+
46+
if (typeof payload.system === 'string' && payload.system.length > 0) {
47+
payload.system = [{ type: 'text', text: payload.system, cache_control: { type: 'ephemeral' } }]
48+
}
49+
50+
if (tools?.length) {
51+
const lastIndex = tools.length - 1
52+
tools[lastIndex] = { ...tools[lastIndex], cache_control: { type: 'ephemeral' } }
53+
}
54+
}
55+
1356
export interface AnthropicStreamUsage {
1457
input_tokens: number
1558
output_tokens: number

apps/sim/providers/prompt-cache.test.ts

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,13 @@
11
/**
22
* @vitest-environment node
33
*/
4-
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
4+
import { describe, expect, it } from 'vitest'
55
import { shouldCacheStaticPrefix } from '@/providers/prompt-cache'
66

77
const LARGE = 'x'.repeat(8_000) // ~2,000 est. tokens, above the 1,024 gate
88
const SMALL = 'x'.repeat(400) // ~100 est. tokens, below the gate
99

1010
describe('shouldCacheStaticPrefix', () => {
11-
// vi.stubEnv cleanly sets/restores the kill switch without `delete` (which
12-
// biome rewrites) or assigning `undefined` (which coerces to the string
13-
// "undefined" and leaks to other tests in the worker).
14-
beforeEach(() => {
15-
vi.stubEnv('PROMPT_CACHE_DISABLED', '')
16-
})
17-
18-
afterEach(() => {
19-
vi.unstubAllEnvs()
20-
})
21-
2211
it('caches a large system prompt that has tools (agent loop)', () => {
2312
expect(shouldCacheStaticPrefix({ systemPrompt: LARGE, hasTools: true })).toBe(true)
2413
})
@@ -51,9 +40,4 @@ describe('shouldCacheStaticPrefix', () => {
5140
).toBe(false)
5241
expect(shouldCacheStaticPrefix({ systemPrompt: null, hasTools: true })).toBe(false)
5342
})
54-
55-
it('is disabled by the PROMPT_CACHE_DISABLED kill switch', () => {
56-
vi.stubEnv('PROMPT_CACHE_DISABLED', 'true')
57-
expect(shouldCacheStaticPrefix({ systemPrompt: LARGE, hasTools: true })).toBe(false)
58-
})
5943
})

apps/sim/providers/prompt-cache.ts

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
import { getEnv, isTruthy } from '@/lib/core/config/env'
2-
31
/**
42
* Minimum estimated static-prefix size (system + tool definitions) before it is
53
* worth marking a prompt-cache breakpoint. This is a rough lower bound across
@@ -23,18 +21,12 @@ function estimateTokens(text: string): number {
2321
* a large system prompt is typically reused across runs within the cache TTL.
2422
* A small, tool-less prompt is intentionally skipped so a one-shot call never
2523
* pays the cache-write surcharge for a prefix that is never read back.
26-
*
27-
* Set `PROMPT_CACHE_DISABLED=true` to turn this off globally (kill switch).
2824
*/
2925
export function shouldCacheStaticPrefix(params: {
3026
systemPrompt: string | null | undefined
3127
hasTools: boolean
3228
toolsApproxChars?: number
3329
}): boolean {
34-
if (isTruthy(getEnv('PROMPT_CACHE_DISABLED'))) {
35-
return false
36-
}
37-
3830
const system = params.systemPrompt ?? ''
3931
if (!system) {
4032
return false

0 commit comments

Comments
 (0)