Skip to content

Commit 7754a05

Browse files
ericdalloeca-agent
andcommitted
Improve Anthropic prompt caching with static/dynamic split
Split system prompt into static (agent prompt, rules, skills, stable contexts) and dynamic (cursor, MCP resources, MCP instructions) blocks with separate cache_control markers. Static instructions are memoized per chat in db, so only the small dynamic block is recomputed each turn. - prompt.clj: split build-chat-instructions into build-static/dynamic, return {:static :dynamic} map, volatile contexts (cursor, mcpResource) go to dynamic block - anthropic.clj: 3-block system prompt (identity, static cached, dynamic cached), add cache_control to last tool via add-cache-to-last-tool - llm_api.clj: flatten instructions map for non-Anthropic providers - chat.clj: cache static instructions in [:chats chat-id :prompt-cache] 🤖 Generated with [eca](https://eca.dev) Co-Authored-By: eca <git@eca.dev>
1 parent 64599e7 commit 7754a05

File tree

7 files changed

+182
-62
lines changed

7 files changed

+182
-62
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
- Fix exceptions on openai responses models when creating tasks.
66
- Fix potential infinite auto-compact loop when context overflow persists after compaction. #391
7+
- Improve Anthropic prompt caching: split system prompt into static/dynamic blocks, add cache markers to the tools array, and memoize static instructions per chat.
78

89
## 0.123.2
910

src/eca/features/chat.clj

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -923,15 +923,15 @@
923923
(f.context/agents-file-contexts db)
924924
(f.context/raw-contexts->refined contexts db))
925925
repo-map* (delay (f.index/repo-map db config {:as-string? true}))
926-
instructions (f.prompt/build-chat-instructions refined-contexts
927-
rules
928-
skills
929-
repo-map*
930-
agent
931-
config
932-
chat-id
933-
all-tools
934-
db)
926+
cached-static (get-in db [:chats chat-id :prompt-cache :static])
927+
instructions (if cached-static
928+
{:static cached-static
929+
:dynamic (f.prompt/build-dynamic-instructions refined-contexts db)}
930+
(let [result (f.prompt/build-chat-instructions
931+
refined-contexts rules skills repo-map*
932+
agent config chat-id all-tools db)]
933+
(swap! db* assoc-in [:chats chat-id :prompt-cache :static] (:static result))
934+
result))
935935
image-contents (->> refined-contexts
936936
(filter #(= :image (:type %))))
937937
expanded-prompt-contexts (when-let [contexts-str (some-> (f.context/contexts-str-from-prompt message db)

src/eca/features/prompt.clj

Lines changed: 43 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -126,8 +126,17 @@
126126
"</mcp-server-instructions>"
127127
""))))
128128

129-
(defn build-chat-instructions [refined-contexts rules skills repo-map* agent-name config chat-id all-tools db]
130-
(let [selmer-ctx (->base-selmer-ctx all-tools chat-id db)]
129+
(def ^:private volatile-context-types
130+
"Context types that change between turns and belong in the dynamic prompt block."
131+
#{:cursor :mcpResource})
132+
133+
(defn build-static-instructions
134+
"Builds the stable portion of the system prompt: agent prompt, rules, skills,
135+
stable contexts, and additional system info. Stable within a session — callers
136+
should cache the result in [:chats chat-id :prompt-cache :static]."
137+
[refined-contexts rules skills repo-map* agent-name config chat-id all-tools db]
138+
(let [selmer-ctx (->base-selmer-ctx all-tools chat-id db)
139+
stable-contexts (remove #(volatile-context-types (:type %)) refined-contexts)]
131140
(multi-str
132141
(selmer/render (eca-chat-prompt agent-name config) selmer-ctx)
133142
(when (seq rules)
@@ -152,14 +161,43 @@
152161
skills)
153162
"</skills>"
154163
""])
155-
(when (seq refined-contexts)
164+
(when (seq stable-contexts)
156165
["## Contexts"
157166
""
158-
(contexts-str refined-contexts repo-map* (get-in db [:chats chat-id :startup-context]))])
159-
(mcp-instructions-section db)
167+
(contexts-str stable-contexts repo-map* (get-in db [:chats chat-id :startup-context]))])
160168
""
161169
(selmer/render (load-builtin-prompt "additional_system_info.md") selmer-ctx))))
162170

171+
(defn build-dynamic-instructions
172+
"Builds the volatile portion of the system prompt: cursor/MCP resource contexts
173+
and MCP server instructions. Recomputed every turn. Returns nil when empty."
174+
[refined-contexts db]
175+
(let [volatile-contexts (filter #(volatile-context-types (:type %)) refined-contexts)
176+
result (multi-str
177+
(when (seq volatile-contexts)
178+
(contexts-str volatile-contexts nil nil))
179+
(mcp-instructions-section db))]
180+
(when-not (string/blank? result) result)))
181+
182+
(defn build-chat-instructions
183+
"Returns {:static \"...\" :dynamic \"...\"}.
184+
Static content (agent prompt, rules, skills, stable contexts) is stable within a session.
185+
Dynamic content (cursor, MCP resources, MCP instructions) is recomputed every turn.
186+
Callers should cache :static in [:chats chat-id :prompt-cache :static] for
187+
Anthropic API cache prefix stability across turns."
188+
[refined-contexts rules skills repo-map* agent-name config chat-id all-tools db]
189+
{:static (build-static-instructions refined-contexts rules skills repo-map*
190+
agent-name config chat-id all-tools db)
191+
:dynamic (build-dynamic-instructions refined-contexts db)})
192+
193+
(defn instructions->str
194+
"Flattens a {:static :dynamic} instructions map into a single string.
195+
Used by non-Anthropic providers that don't support split system prompt blocks."
196+
[{:keys [static dynamic]}]
197+
(if dynamic
198+
(multi-str static dynamic)
199+
static))
200+
163201
(defn build-rewrite-instructions [text path full-text range all-tools config db]
164202
(let [legacy-prompt-file (-> config :rewrite :systemPromptFile)
165203
legacy-config-prompt (-> config :rewrite :systemPrompt)

src/eca/llm_api.clj

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
[babashka.fs :as fs]
44
[clojure.string :as string]
55
[eca.config :as config]
6+
[eca.features.prompt :as f.prompt]
67
[eca.llm-providers.anthropic :as llm-providers.anthropic]
78
[eca.llm-providers.azure]
89
[eca.llm-providers.copilot]
@@ -185,6 +186,8 @@
185186
reasoning-history (or (:reasoningHistory model-config) :all)
186187
[auth-type api-key] (llm-util/provider-api-key provider provider-auth config)
187188
api-url (llm-util/provider-api-url provider config)
189+
;; Flatten {:static :dynamic} instructions map into a single string for non-Anthropic providers
190+
flat-instructions (if (map? instructions) (f.prompt/instructions->str instructions) instructions)
188191
callbacks (when-not sync?
189192
{:on-message-received on-message-received
190193
:on-error on-error
@@ -199,7 +202,7 @@
199202
(= "openai" provider)
200203
(handler
201204
{:model real-model
202-
:instructions instructions
205+
:instructions flat-instructions
203206
:user-messages user-messages
204207
:max-output-tokens max-output-tokens
205208
:reason? reason?
@@ -248,7 +251,7 @@
248251
"copilot-integration-id" "vscode-chat"}
249252
extra-headers))
250253
base-opts {:model real-model
251-
:instructions instructions
254+
:instructions flat-instructions
252255
:user-messages user-messages
253256
:max-output-tokens max-output-tokens
254257
:reason? reason?
@@ -278,7 +281,7 @@
278281
(= "google" provider)
279282
(handler
280283
{:model real-model
281-
:instructions instructions
284+
:instructions flat-instructions
282285
:user-messages user-messages
283286
:max-output-tokens max-output-tokens
284287
:reason? reason?
@@ -303,7 +306,7 @@
303306
:reason? (:reason? model-capabilities)
304307
:supports-image? supports-image?
305308
:model real-model
306-
:instructions instructions
309+
:instructions flat-instructions
307310
:user-messages user-messages
308311
:past-messages past-messages
309312
:tools tools
@@ -320,7 +323,7 @@
320323
http-client (:httpClient provider-config)]
321324
(handler
322325
{:model real-model
323-
:instructions instructions
326+
:instructions flat-instructions
324327
:user-messages user-messages
325328
:max-output-tokens max-output-tokens
326329
:web-search web-search

src/eca/llm_providers/anthropic.clj

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,17 @@
6666
:name (:full-name tool)}) tools)
6767
web-search (conj {:type "web_search_20250305"
6868
:name "web_search"
69-
:max_uses 10
70-
:cache_control {:type "ephemeral"}})))
69+
:max_uses 10})))
70+
71+
(defn ^:private add-cache-to-last-tool
72+
"Adds cache_control to the last tool in the tools array, ensuring
73+
the full tools list is part of the cached prefix."
74+
[tools]
75+
(if (seq tools)
76+
(shared/update-last
77+
(vec tools)
78+
(fn [tool] (assoc tool :cache_control {:type "ephemeral"})))
79+
tools))
7180

7281
(defn ^:private base-request! [{:keys [rid body api-url api-key auth-type url-relative-path content-block* on-error on-stream http-client extra-headers cancelled? stream-idle-timeout-seconds]}]
7382
(let [url (join-api-url api-url (or url-relative-path messages-path))
@@ -303,15 +312,21 @@
303312
merge-adjacent-assistants
304313
merge-adjacent-tool-results)
305314
stream? (boolean callbacks)
315+
{:keys [static dynamic]} (if (map? instructions)
316+
instructions
317+
{:static instructions :dynamic nil})
318+
system-blocks (cond-> [{:type "text" :text "You are Claude Code, Anthropic's official CLI for Claude."}
319+
{:type "text" :text static :cache_control {:type "ephemeral"}}]
320+
(not (string/blank? dynamic))
321+
(conj {:type "text" :text dynamic :cache_control {:type "ephemeral"}}))
306322
body (merge
307323
(assoc-some
308324
{:model model
309325
:messages (add-cache-to-last-message messages)
310326
:max_tokens (or max-output-tokens 32000)
311327
:stream stream?
312-
:tools (->tools tools web-search)
313-
:system [{:type "text" :text "You are Claude Code, Anthropic's official CLI for Claude."}
314-
{:type "text" :text instructions :cache_control {:type "ephemeral"}}]}
328+
:tools (add-cache-to-last-tool (->tools tools web-search))
329+
:system system-blocks}
315330
:thinking (when reason?
316331
{:type "enabled" :budget_tokens 2048}))
317332
extra-payload)

0 commit comments

Comments
 (0)