Skip to content

Commit f3163da

Browse files
committed
feat: add stable prompt cache boundary
1 parent 0c55f54 commit f3163da

7 files changed

Lines changed: 339 additions & 9 deletions

File tree

docs/content/docs/(core)/prompts.mdx

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,22 @@ The channel system prompt is the most complex, assembled from multiple dynamic c
124124
125125
{{ worker_capabilities }}
126126
127+
{{ system_prompt_cache_boundary }}
128+
129+
{%- if available_channels %}
130+
{{ available_channels }}
131+
{%- endif %}
132+
133+
{%- if working_memory %}
134+
{{ working_memory }}
135+
{%- endif %}
136+
137+
{%- if knowledge_synthesis %}
138+
## Knowledge Context
139+
140+
{{ knowledge_synthesis }}
141+
{%- endif %}
142+
127143
{%- if conversation_context %}
128144
## Conversation Context
129145
@@ -137,6 +153,35 @@ The channel system prompt is the most complex, assembled from multiple dynamic c
137153
{%- endif %}
138154
```
139155

156+
## Prompt Cache Boundary
157+
158+
The channel prompt includes a cache boundary after the stable instruction prefix and before volatile runtime context. Anthropic requests split the system prompt at that marker. The stable prefix receives `cache_control`. Status, working memory, knowledge context, channel activity, and conversation context do not.
159+
160+
Other providers strip the marker before sending instructions.
161+
162+
Keep stable sections above the boundary:
163+
164+
- identity context
165+
- base channel rules
166+
- adapter guidance
167+
- skills
168+
- worker capabilities
169+
170+
Keep volatile sections below it:
171+
172+
- available channels
173+
- org and project context
174+
- working memory
175+
- channel activity
176+
- participant context
177+
- knowledge context
178+
- conversation context
179+
- current status
180+
- message coalescing hints
181+
- backfilled transcript data
182+
183+
The `token_usage` table records `cache_read_tokens` and `cache_write_tokens`. Use those fields to check whether prompt-cache changes are paying off.
184+
140185
## Adding a New Language
141186

142187
1. Create language directory:

prompts/en/channel.md.j2

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,8 @@ When in doubt, skip. Being a lurker who speaks when it matters is better than be
164164

165165
{{ worker_capabilities }}
166166

167+
{{ system_prompt_cache_boundary }}
168+
167169
{%- if available_channels %}
168170
{{ available_channels }}
169171
{%- endif %}

src/llm/anthropic/params.rs

Lines changed: 136 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -132,21 +132,40 @@ fn build_system_prompt(
132132
}
133133

134134
if let Some(preamble) = &request.preamble {
135-
let mut preamble_block = serde_json::json!({
136-
"type": "text",
137-
"text": preamble,
138-
});
139-
if let Some(cc) = cache_control {
140-
preamble_block["cache_control"] = cc.clone();
135+
if let Some((stable_prefix, volatile_suffix)) =
136+
crate::prompts::engine::split_system_prompt_cache_boundary(preamble)
137+
{
138+
push_system_text_block(&mut system_blocks, stable_prefix, cache_control);
139+
push_system_text_block(&mut system_blocks, volatile_suffix, &None);
140+
} else {
141+
push_system_text_block(&mut system_blocks, preamble, cache_control);
141142
}
142-
system_blocks.push(preamble_block);
143143
}
144144

145145
if !system_blocks.is_empty() {
146146
body["system"] = serde_json::json!(system_blocks);
147147
}
148148
}
149149

150+
fn push_system_text_block(
151+
system_blocks: &mut Vec<serde_json::Value>,
152+
text: &str,
153+
cache_control: &Option<serde_json::Value>,
154+
) {
155+
if text.trim().is_empty() {
156+
return;
157+
}
158+
159+
let mut block = serde_json::json!({
160+
"type": "text",
161+
"text": text,
162+
});
163+
if let Some(cache_control) = cache_control {
164+
block["cache_control"] = cache_control.clone();
165+
}
166+
system_blocks.push(block);
167+
}
168+
150169
/// Build tool definitions, optionally normalizing names. Returns the original
151170
/// tool (name, description) pairs for reverse-mapping on response.
152171
fn build_tools(
@@ -201,6 +220,23 @@ fn build_tools(
201220
#[cfg(test)]
202221
mod tests {
203222
use super::*;
223+
use rig::completion::{Message, ToolDefinition};
224+
use rig::one_or_many::OneOrMany;
225+
226+
fn completion_request_with_preamble(preamble: &str) -> CompletionRequest {
227+
CompletionRequest {
228+
model: None,
229+
preamble: Some(preamble.to_string()),
230+
chat_history: OneOrMany::one(Message::user("hello")),
231+
documents: Vec::new(),
232+
tools: Vec::new(),
233+
temperature: None,
234+
max_tokens: None,
235+
tool_choice: None,
236+
additional_params: None,
237+
output_schema: None,
238+
}
239+
}
204240

205241
#[test]
206242
fn adaptive_thinking_detected_for_4_6_models() {
@@ -218,4 +254,97 @@ mod tests {
218254
assert!(!supports_adaptive_thinking("claude-opus-4-0"));
219255
assert!(!supports_adaptive_thinking("gpt-4o"));
220256
}
257+
258+
#[test]
259+
fn system_prompt_cache_boundary_splits_preamble_cache_control() {
260+
let request = completion_request_with_preamble(&format!(
261+
"stable prefix\n{}\nvolatile suffix",
262+
crate::prompts::engine::SYSTEM_PROMPT_CACHE_BOUNDARY
263+
));
264+
let expected_cache_control = serde_json::json!({"type": "ephemeral"});
265+
let cache_control = Some(expected_cache_control.clone());
266+
let mut body = serde_json::json!({});
267+
268+
build_system_prompt(&mut body, &request, false, &cache_control);
269+
270+
let system_blocks = body["system"]
271+
.as_array()
272+
.expect("system prompt should be an array");
273+
assert_eq!(system_blocks.len(), 2);
274+
assert_eq!(system_blocks[0]["text"], "stable prefix\n");
275+
assert_eq!(system_blocks[0]["cache_control"], expected_cache_control);
276+
assert_eq!(system_blocks[1]["text"], "\nvolatile suffix");
277+
assert!(system_blocks[1].get("cache_control").is_none());
278+
}
279+
280+
#[test]
281+
fn system_prompt_without_cache_boundary_preserves_existing_cache_behavior() {
282+
let request = completion_request_with_preamble("stable prompt");
283+
let expected_cache_control = serde_json::json!({"type": "ephemeral"});
284+
let cache_control = Some(expected_cache_control.clone());
285+
let mut body = serde_json::json!({});
286+
287+
build_system_prompt(&mut body, &request, false, &cache_control);
288+
289+
let system_blocks = body["system"]
290+
.as_array()
291+
.expect("system prompt should be an array");
292+
assert_eq!(system_blocks.len(), 1);
293+
assert_eq!(system_blocks[0]["text"], "stable prompt");
294+
assert_eq!(system_blocks[0]["cache_control"], expected_cache_control);
295+
}
296+
297+
#[test]
298+
fn build_anthropic_request_keeps_cache_boundary_out_of_volatile_system_block() {
299+
let client = reqwest::Client::new();
300+
let mut request = completion_request_with_preamble(&format!(
301+
"stable prefix\n{}\nvolatile suffix",
302+
crate::prompts::engine::SYSTEM_PROMPT_CACHE_BOUNDARY
303+
));
304+
request.tools = vec![ToolDefinition {
305+
name: "reply".to_string(),
306+
description: "Send a reply".to_string(),
307+
parameters: serde_json::json!({
308+
"type": "object",
309+
"properties": {
310+
"text": {"type": "string"}
311+
}
312+
}),
313+
}];
314+
315+
let anthropic_request = build_anthropic_request(
316+
&client,
317+
"sk-ant-test",
318+
"https://api.anthropic.com",
319+
"claude-sonnet-4-5",
320+
&request,
321+
"auto",
322+
false,
323+
);
324+
let http_request = anthropic_request
325+
.builder
326+
.build()
327+
.expect("request should build");
328+
let body = http_request
329+
.body()
330+
.and_then(reqwest::Body::as_bytes)
331+
.expect("request body should be buffered JSON");
332+
let body: serde_json::Value =
333+
serde_json::from_slice(body).expect("request body should be JSON");
334+
335+
let system_blocks = body["system"]
336+
.as_array()
337+
.expect("system prompt should be an array");
338+
assert_eq!(system_blocks.len(), 2);
339+
assert!(system_blocks[0]["cache_control"].is_object());
340+
assert!(system_blocks[1].get("cache_control").is_none());
341+
assert_eq!(system_blocks[0]["text"], "stable prefix\n");
342+
assert_eq!(system_blocks[1]["text"], "\nvolatile suffix");
343+
344+
let tools = body["tools"]
345+
.as_array()
346+
.expect("tool definitions should be an array");
347+
assert_eq!(tools.len(), 1);
348+
assert!(tools[0]["cache_control"].is_object());
349+
}
221350
}

src/llm/model.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -833,6 +833,7 @@ impl SpacebotModel {
833833
let mut messages = Vec::new();
834834

835835
if let Some(preamble) = &request.preamble {
836+
let preamble = crate::prompts::strip_system_prompt_cache_boundary(preamble);
836837
messages.push(serde_json::json!({
837838
"role": "system",
838839
"content": preamble,
@@ -945,6 +946,7 @@ impl SpacebotModel {
945946
});
946947

947948
if let Some(preamble) = &request.preamble {
949+
let preamble = crate::prompts::strip_system_prompt_cache_boundary(preamble);
948950
body["instructions"] = serde_json::json!(preamble);
949951
} else if is_chatgpt_codex {
950952
body["instructions"] = serde_json::json!(
@@ -1071,6 +1073,7 @@ impl SpacebotModel {
10711073
});
10721074

10731075
if let Some(preamble) = &request.preamble {
1076+
let preamble = crate::prompts::strip_system_prompt_cache_boundary(preamble);
10741077
body["instructions"] = serde_json::json!(preamble);
10751078
} else if is_chatgpt_codex {
10761079
body["instructions"] = serde_json::json!(
@@ -1380,6 +1383,7 @@ impl SpacebotModel {
13801383
let mut messages = Vec::new();
13811384

13821385
if let Some(preamble) = &request.preamble {
1386+
let preamble = crate::prompts::strip_system_prompt_cache_boundary(preamble);
13831387
messages.push(serde_json::json!({
13841388
"role": "system",
13851389
"content": preamble,
@@ -1472,6 +1476,7 @@ impl SpacebotModel {
14721476
let mut messages = Vec::new();
14731477

14741478
if let Some(preamble) = &request.preamble {
1479+
let preamble = crate::prompts::strip_system_prompt_cache_boundary(preamble);
14751480
messages.push(serde_json::json!({
14761481
"role": "system",
14771482
"content": preamble,

src/mcp.rs

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -605,6 +605,7 @@ impl McpManager {
605605
}
606606
}
607607

608+
names.sort();
608609
names
609610
}
610611

@@ -853,6 +854,68 @@ fn interpolate_env_placeholders(value: &str) -> String {
853854
mod tests {
854855
use super::*;
855856

857+
fn test_mcp_config(name: &str) -> McpServerConfig {
858+
McpServerConfig {
859+
name: name.to_string(),
860+
enabled: true,
861+
transport: McpTransport::Stdio {
862+
command: "test".to_string(),
863+
args: Vec::new(),
864+
env: HashMap::new(),
865+
},
866+
}
867+
}
868+
869+
fn test_tool(name: &str, description: Option<&str>) -> rmcp::model::Tool {
870+
let mut tool = rmcp::model::Tool::default();
871+
tool.name = Cow::Owned(name.to_string());
872+
tool.description = description.map(|description| Cow::Owned(description.to_string()));
873+
tool
874+
}
875+
876+
#[tokio::test]
877+
async fn get_tool_names_returns_deterministic_sorted_names() {
878+
let manager = McpManager::new(Vec::new());
879+
880+
let later_connection = Arc::new(McpConnection::new(test_mcp_config("z_server")));
881+
{
882+
let mut tools = later_connection.tools.write().await;
883+
*tools = vec![test_tool("z_tool", Some("z desc"))];
884+
}
885+
{
886+
let mut state = later_connection.state.write().await;
887+
*state = McpConnectionState::Connected;
888+
}
889+
890+
let earlier_connection = Arc::new(McpConnection::new(test_mcp_config("a_server")));
891+
{
892+
let mut tools = earlier_connection.tools.write().await;
893+
*tools = vec![
894+
test_tool("b_tool", None),
895+
test_tool("a_tool", Some("a desc")),
896+
];
897+
}
898+
{
899+
let mut state = earlier_connection.state.write().await;
900+
*state = McpConnectionState::Connected;
901+
}
902+
903+
{
904+
let mut connections = manager.connections.write().await;
905+
connections.insert("z_server".to_string(), later_connection);
906+
connections.insert("a_server".to_string(), earlier_connection);
907+
}
908+
909+
assert_eq!(
910+
manager.get_tool_names().await,
911+
vec![
912+
"a_tool — a desc",
913+
"b_tool — from a_server",
914+
"z_tool — z desc"
915+
]
916+
);
917+
}
918+
856919
#[test]
857920
fn parse_bearer_token_strips_bearer_prefix() {
858921
let token = parse_bearer_token("Bearer abc123", "test").unwrap();

src/prompts.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
pub mod engine;
22
pub mod text;
33

4-
pub use engine::{PromptEngine, SkillInfo};
4+
pub use engine::{PromptEngine, SkillInfo, strip_system_prompt_cache_boundary};
55
pub use text::{get as get_text, init as init_language};

0 commit comments

Comments
 (0)