Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions apps/sim/providers/anthropic/core.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import {
supportsNativeStructuredOutputs,
supportsTemperature,
} from '@/providers/models'
import { shouldCacheStaticPrefix } from '@/providers/prompt-cache'
import { createStreamingExecution } from '@/providers/streaming-execution'
import { adaptAnthropicToolSchema } from '@/providers/tool-schema-adapter'
import { enrichLastModelSegment } from '@/providers/trace-enrichment'
Expand Down Expand Up @@ -324,6 +325,24 @@ export async function executeAnthropicProviderRequest(
}
}

// Prompt caching: mark the static prefix (system + tools) with an ephemeral
// cache breakpoint so repeated calls (agent tool-loops, multi-turn) reuse it.
// Must run after the structured-output block above, which assumes `system` is
// still a string. Tools are tagged at their assignment below.
// Gate on the original request system prompt, not payload.system: when there
// are no context/chat messages the system text is relocated into a user
// message and payload.system is blanked (see above), but the prefix is still
// worth caching (the tools, at least).
const cacheStaticPrefix = shouldCacheStaticPrefix({
systemPrompt: request.systemPrompt,
hasTools: !!anthropicTools?.length,
toolsApproxChars: anthropicTools ? JSON.stringify(anthropicTools).length : 0,
})
Comment thread
waleedlatif1 marked this conversation as resolved.
Outdated

if (cacheStaticPrefix && typeof payload.system === 'string' && payload.system.length > 0) {
payload.system = [{ type: 'text', text: payload.system, cache_control: { type: 'ephemeral' } }]
}

// Add extended thinking configuration if supported and requested
// The 'none' sentinel means "disable thinking" — skip configuration entirely.
if (request.thinkingLevel && request.thinkingLevel !== 'none') {
Comment thread
waleedlatif1 marked this conversation as resolved.
Expand Down Expand Up @@ -366,6 +385,13 @@ export async function executeAnthropicProviderRequest(
}

if (anthropicTools?.length) {
if (cacheStaticPrefix) {
const lastIndex = anthropicTools.length - 1
anthropicTools[lastIndex] = {
...anthropicTools[lastIndex],
cache_control: { type: 'ephemeral' },
}
}
payload.tools = anthropicTools
// Per Anthropic docs: forced tool_choice (type: "tool" or "any") is incompatible with
// thinking. Only auto and none are supported when thinking is enabled.
Expand Down
59 changes: 59 additions & 0 deletions apps/sim/providers/prompt-cache.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/**
* @vitest-environment node
*/
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
import { shouldCacheStaticPrefix } from '@/providers/prompt-cache'

const LARGE = 'x'.repeat(8_000) // ~2,000 est. tokens, above the 1,024 gate
const SMALL = 'x'.repeat(400) // ~100 est. tokens, below the gate

describe('shouldCacheStaticPrefix', () => {
// vi.stubEnv cleanly sets/restores the kill switch without `delete` (which
// biome rewrites) or assigning `undefined` (which coerces to the string
// "undefined" and leaks to other tests in the worker).
beforeEach(() => {
vi.stubEnv('PROMPT_CACHE_DISABLED', '')
})

afterEach(() => {
vi.unstubAllEnvs()
})

it('caches a large system prompt that has tools (agent loop)', () => {
expect(shouldCacheStaticPrefix({ systemPrompt: LARGE, hasTools: true })).toBe(true)
})

it('caches a large system prompt even without tools', () => {
expect(shouldCacheStaticPrefix({ systemPrompt: LARGE, hasTools: false })).toBe(true)
})

it('reaches the threshold via tools when the system prompt alone is below it', () => {
// Small system + large serialized tools clears the combined threshold, and
// tools imply reuse, so it should cache.
expect(
shouldCacheStaticPrefix({ systemPrompt: SMALL, hasTools: true, toolsApproxChars: 8_000 })
).toBe(true)
})

it('does NOT cache a small, tool-less prompt (one-shot write surcharge avoided)', () => {
expect(shouldCacheStaticPrefix({ systemPrompt: SMALL, hasTools: false })).toBe(false)
})

it('does NOT cache a small system even with tools when the combined prefix is below threshold', () => {
expect(
shouldCacheStaticPrefix({ systemPrompt: SMALL, hasTools: true, toolsApproxChars: 400 })
).toBe(false)
})

it('does NOT cache when there is no system prompt', () => {
expect(
shouldCacheStaticPrefix({ systemPrompt: '', hasTools: true, toolsApproxChars: 8_000 })
).toBe(false)
expect(shouldCacheStaticPrefix({ systemPrompt: null, hasTools: true })).toBe(false)
})

it('is disabled by the PROMPT_CACHE_DISABLED kill switch', () => {
vi.stubEnv('PROMPT_CACHE_DISABLED', 'true')
expect(shouldCacheStaticPrefix({ systemPrompt: LARGE, hasTools: true })).toBe(false)
})
})
55 changes: 55 additions & 0 deletions apps/sim/providers/prompt-cache.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import { getEnv, isTruthy } from '@/lib/core/config/env'

/**
* Minimum estimated static-prefix size (system + tool definitions) before it is
* worth marking a prompt-cache breakpoint. This is a rough lower bound across
* Claude models (some require more); below it, providers silently skip caching
* anyway, so this only avoids spending a breakpoint on a trivially small prefix.
*/
const MIN_CACHEABLE_PREFIX_TOKENS = 1024

/** Rough token estimate (~4 chars/token) — fast and good enough for a gate. */
function estimateTokens(text: string): number {
return Math.ceil(text.length / 4)
}

/**
* Decides whether to inject prompt-cache breakpoints on the static prefix
* (system prompt + tool definitions) for providers that require explicit cache
* control (Anthropic, Bedrock, and Anthropic models via OpenRouter).
*
* Caching only pays off when the prefix is large enough to be cacheable AND is
* actually re-read: agent tool-loops re-send the prefix on every iteration, and
* a large system prompt is typically reused across runs within the cache TTL.
* A small, tool-less prompt is intentionally skipped so a one-shot call never
* pays the cache-write surcharge for a prefix that is never read back.
*
* Set `PROMPT_CACHE_DISABLED=true` to turn this off globally (kill switch).
*/
export function shouldCacheStaticPrefix(params: {
systemPrompt: string | null | undefined
hasTools: boolean
toolsApproxChars?: number
}): boolean {
if (isTruthy(getEnv('PROMPT_CACHE_DISABLED'))) {
return false
}

const system = params.systemPrompt ?? ''
if (!system) {
return false
}

const systemTokens = estimateTokens(system)
const toolTokens = params.toolsApproxChars ? Math.ceil(params.toolsApproxChars / 4) : 0
const prefixTokens = systemTokens + toolTokens

if (prefixTokens < MIN_CACHEABLE_PREFIX_TOKENS) {
return false
}

// Tools imply an agent loop (the prefix is re-read each iteration). Without
// tools, only cache when the system prompt alone is large enough to be worth
// the write on its own.
return params.hasTools || systemTokens >= MIN_CACHEABLE_PREFIX_TOKENS
}
Comment thread
waleedlatif1 marked this conversation as resolved.
Loading