Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions apps/sim/providers/anthropic/core.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import { getErrorMessage, toError } from '@sim/utils/errors'
import type { BlockTokens, IterationToolCall, StreamingExecution } from '@/executor/types'
import { MAX_TOOL_ITERATIONS } from '@/providers'
import {
applyAnthropicPromptCache,
checkForForcedToolUsage,
createReadableStreamFromAnthropicStream,
} from '@/providers/anthropic/utils'
Expand Down Expand Up @@ -324,6 +325,12 @@ export async function executeAnthropicProviderRequest(
}
}

// Prompt caching: mark the static prefix (system + tools) with an ephemeral
// cache breakpoint so repeated calls (agent tool-loops, multi-turn) reuse it.
// Runs after the structured-output block above, which assumes `system` is still
// a string. Mutates payload.system and the last tool in place.
applyAnthropicPromptCache(payload, anthropicTools, request.systemPrompt)

// Add extended thinking configuration if supported and requested
// The 'none' sentinel means "disable thinking" — skip configuration entirely.
if (request.thinkingLevel && request.thinkingLevel !== 'none') {
Comment thread
waleedlatif1 marked this conversation as resolved.
Expand Down
76 changes: 76 additions & 0 deletions apps/sim/providers/anthropic/utils.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/**
* @vitest-environment node
*/
import type { TextBlockParam, Tool } from '@anthropic-ai/sdk/resources'
import { describe, expect, it } from 'vitest'
import { applyAnthropicPromptCache } from '@/providers/anthropic/utils'

const LARGE = 'x'.repeat(8_000) // ~2,000 est. tokens, above the 1,024 gate
const SMALL = 'x'.repeat(400) // ~100 est. tokens, below the gate

const tool = (name: string): Tool => ({
name,
description: 'does a thing',
input_schema: { type: 'object', properties: {} },
})

describe('applyAnthropicPromptCache', () => {
it('converts a large system prompt to a cached text block and tags the last tool', () => {
const payload: { system?: string | TextBlockParam[] } = { system: LARGE }
const tools = [tool('a'), tool('b')]

applyAnthropicPromptCache(payload, tools, LARGE)

expect(Array.isArray(payload.system)).toBe(true)
const blocks = payload.system as TextBlockParam[]
expect(blocks).toHaveLength(1)
expect(blocks[0]).toMatchObject({
type: 'text',
text: LARGE,
cache_control: { type: 'ephemeral' },
})
// Only the LAST tool carries the breakpoint; earlier tools are untouched.
expect(tools[0].cache_control).toBeUndefined()
expect(tools[1].cache_control).toEqual({ type: 'ephemeral' })
})

it('tags the system block when the system alone is large and there are no tools', () => {
const payload: { system?: string | TextBlockParam[] } = { system: LARGE }

applyAnthropicPromptCache(payload, undefined, LARGE)

const blocks = payload.system as TextBlockParam[]
expect(blocks[0].cache_control).toEqual({ type: 'ephemeral' })
})

it('tags the tools even when payload.system was relocated/blanked (gate uses the request prompt)', () => {
// No-messages path: the provider moves the system text into a user message
// and blanks payload.system, but the original prompt is large, so the tools
// prefix is still worth caching.
const payload: { system?: string | TextBlockParam[] } = { system: '' }
const tools = [tool('a')]

applyAnthropicPromptCache(payload, tools, LARGE)

expect(payload.system).toBe('') // empty system is never converted
expect(tools[0].cache_control).toEqual({ type: 'ephemeral' })
})

it('leaves a small, tool-less prefix untouched (no write surcharge on one-shot calls)', () => {
const payload: { system?: string | TextBlockParam[] } = { system: SMALL }

applyAnthropicPromptCache(payload, undefined, SMALL)

expect(payload.system).toBe(SMALL)
})

it('does nothing when the combined prefix is below the threshold', () => {
const payload: { system?: string | TextBlockParam[] } = { system: SMALL }
const tools = [tool('a')]

applyAnthropicPromptCache(payload, tools, SMALL)

expect(payload.system).toBe(SMALL)
expect(tools[0].cache_control).toBeUndefined()
})
})
43 changes: 43 additions & 0 deletions apps/sim/providers/anthropic/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,57 @@ import type {
RawMessageDeltaEvent,
RawMessageStartEvent,
RawMessageStreamEvent,
TextBlockParam,
Tool,
Usage,
} from '@anthropic-ai/sdk/resources'
import { createLogger } from '@sim/logger'
import { randomFloat } from '@sim/utils/random'
import { shouldCacheStaticPrefix } from '@/providers/prompt-cache'
import { trackForcedToolUsage } from '@/providers/utils'

const logger = createLogger('AnthropicUtils')

/** Mutable view of the parts of the Anthropic payload that carry cache breakpoints. */
interface AnthropicCacheablePayload {
system?: string | Array<TextBlockParam>
}

/**
* Marks the static request prefix (system prompt + tools) with an ephemeral
* cache breakpoint when {@link shouldCacheStaticPrefix} deems it worthwhile, so
* repeated calls reuse the cached prefix. Mutates `payload.system` (string → a
* single cached text block) and the last entry of `tools` in place.
*
* `systemPrompt` is the ORIGINAL request system prompt, used only for the
* worthiness gate: on the no-messages path the provider relocates the system
* text into a user message and blanks `payload.system`, but the tools prefix is
* still worth caching there.
*/
export function applyAnthropicPromptCache(
payload: AnthropicCacheablePayload,
tools: Tool[] | undefined,
systemPrompt: string | null | undefined
): void {
const shouldCache = shouldCacheStaticPrefix({
systemPrompt,
hasTools: !!tools?.length,
toolsApproxChars: tools ? JSON.stringify(tools).length : 0,
})
Comment thread
waleedlatif1 marked this conversation as resolved.
if (!shouldCache) {
return
}

if (typeof payload.system === 'string' && payload.system.length > 0) {
payload.system = [{ type: 'text', text: payload.system, cache_control: { type: 'ephemeral' } }]
}

if (tools?.length) {
const lastIndex = tools.length - 1
tools[lastIndex] = { ...tools[lastIndex], cache_control: { type: 'ephemeral' } }
}
}

export interface AnthropicStreamUsage {
input_tokens: number
output_tokens: number
Expand Down
43 changes: 43 additions & 0 deletions apps/sim/providers/prompt-cache.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/**
* @vitest-environment node
*/
import { describe, expect, it } from 'vitest'
import { shouldCacheStaticPrefix } from '@/providers/prompt-cache'

const LARGE = 'x'.repeat(8_000) // ~2,000 est. tokens, above the 1,024 gate
const SMALL = 'x'.repeat(400) // ~100 est. tokens, below the gate

describe('shouldCacheStaticPrefix', () => {
it('caches a large system prompt that has tools (agent loop)', () => {
expect(shouldCacheStaticPrefix({ systemPrompt: LARGE, hasTools: true })).toBe(true)
})

it('caches a large system prompt even without tools', () => {
expect(shouldCacheStaticPrefix({ systemPrompt: LARGE, hasTools: false })).toBe(true)
})

it('reaches the threshold via tools when the system prompt alone is below it', () => {
// Small system + large serialized tools clears the combined threshold, and
// tools imply reuse, so it should cache.
expect(
shouldCacheStaticPrefix({ systemPrompt: SMALL, hasTools: true, toolsApproxChars: 8_000 })
).toBe(true)
})

it('does NOT cache a small, tool-less prompt (one-shot write surcharge avoided)', () => {
expect(shouldCacheStaticPrefix({ systemPrompt: SMALL, hasTools: false })).toBe(false)
})

it('does NOT cache a small system even with tools when the combined prefix is below threshold', () => {
expect(
shouldCacheStaticPrefix({ systemPrompt: SMALL, hasTools: true, toolsApproxChars: 400 })
).toBe(false)
})

it('does NOT cache when there is no system prompt', () => {
expect(
shouldCacheStaticPrefix({ systemPrompt: '', hasTools: true, toolsApproxChars: 8_000 })
).toBe(false)
expect(shouldCacheStaticPrefix({ systemPrompt: null, hasTools: true })).toBe(false)
})
})
47 changes: 47 additions & 0 deletions apps/sim/providers/prompt-cache.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/**
* Minimum estimated static-prefix size (system + tool definitions) before it is
* worth marking a prompt-cache breakpoint. This is a rough lower bound across
* Claude models (some require more); below it, providers silently skip caching
* anyway, so this only avoids spending a breakpoint on a trivially small prefix.
*/
const MIN_CACHEABLE_PREFIX_TOKENS = 1024

/** Rough token estimate (~4 chars/token) — fast and good enough for a gate. */
function estimateTokens(text: string): number {
return Math.ceil(text.length / 4)
}

/**
* Decides whether to inject prompt-cache breakpoints on the static prefix
* (system prompt + tool definitions) for providers that require explicit cache
* control (Anthropic, Bedrock, and Anthropic models via OpenRouter).
*
* Caching only pays off when the prefix is large enough to be cacheable AND is
* actually re-read: agent tool-loops re-send the prefix on every iteration, and
* a large system prompt is typically reused across runs within the cache TTL.
* A small, tool-less prompt is intentionally skipped so a one-shot call never
* pays the cache-write surcharge for a prefix that is never read back.
*/
export function shouldCacheStaticPrefix(params: {
systemPrompt: string | null | undefined
hasTools: boolean
toolsApproxChars?: number
}): boolean {
const system = params.systemPrompt ?? ''
if (!system) {
return false
}

const systemTokens = estimateTokens(system)
const toolTokens = params.toolsApproxChars ? Math.ceil(params.toolsApproxChars / 4) : 0
const prefixTokens = systemTokens + toolTokens

if (prefixTokens < MIN_CACHEABLE_PREFIX_TOKENS) {
return false
}

// Tools imply an agent loop (the prefix is re-read each iteration). Without
// tools, only cache when the system prompt alone is large enough to be worth
// the write on its own.
return params.hasTools || systemTokens >= MIN_CACHEABLE_PREFIX_TOKENS
}
Comment thread
waleedlatif1 marked this conversation as resolved.
Loading