feat: add stable prompt cache boundary

vsumner · vsumner · commit f3163dab0ed7 · 2026-04-19T22:13:17.000-04:00
diff --git a/docs/content/docs/(core)/prompts.mdx b/docs/content/docs/(core)/prompts.mdx
@@ -124,6 +124,22 @@ The channel system prompt is the most complex, assembled from multiple dynamic c
 
 {{ worker_capabilities }}
 
+{{ system_prompt_cache_boundary }}
+
+{%- if available_channels %}
+{{ available_channels }}
+{%- endif %}
+
+{%- if working_memory %}
+{{ working_memory }}
+{%- endif %}
+
+{%- if knowledge_synthesis %}
+## Knowledge Context
+
+{{ knowledge_synthesis }}
+{%- endif %}
+
 {%- if conversation_context %}
 ## Conversation Context
 
@@ -137,6 +153,35 @@ The channel system prompt is the most complex, assembled from multiple dynamic c
 {%- endif %}
 ```
 
+## Prompt Cache Boundary
+
+The channel prompt includes a cache boundary after the stable instruction prefix and before volatile runtime context. Anthropic requests split the system prompt at that marker. The stable prefix receives `cache_control`. Status, working memory, knowledge context, channel activity, and conversation context do not.
+
+Other providers strip the marker before sending instructions.
+
+Keep stable sections above the boundary:
+
+- identity context
+- base channel rules
+- adapter guidance
+- skills
+- worker capabilities
+
+Keep volatile sections below it:
+
+- available channels
+- org and project context
+- working memory
+- channel activity
+- participant context
+- knowledge context
+- conversation context
+- current status
+- message coalescing hints
+- backfilled transcript data
+
+The `token_usage` table records `cache_read_tokens` and `cache_write_tokens`. Use those fields to check whether prompt-cache changes are paying off.
+
 ## Adding a New Language
 
 1. Create language directory:
diff --git a/prompts/en/channel.md.j2 b/prompts/en/channel.md.j2
@@ -164,6 +164,8 @@ When in doubt, skip. Being a lurker who speaks when it matters is better than be
 
 {{ worker_capabilities }}
 
+{{ system_prompt_cache_boundary }}
+
 {%- if available_channels %}
 {{ available_channels }}
 {%- endif %}
diff --git a/src/llm/anthropic/params.rs b/src/llm/anthropic/params.rs
@@ -132,21 +132,40 @@ fn build_system_prompt(
     }
 
     if let Some(preamble) = &request.preamble {
-        let mut preamble_block = serde_json::json!({
-            "type": "text",
-            "text": preamble,
-        });
-        if let Some(cc) = cache_control {
-            preamble_block["cache_control"] = cc.clone();
+        if let Some((stable_prefix, volatile_suffix)) =
+            crate::prompts::engine::split_system_prompt_cache_boundary(preamble)
+        {
+            push_system_text_block(&mut system_blocks, stable_prefix, cache_control);
+            push_system_text_block(&mut system_blocks, volatile_suffix, &None);
+        } else {
+            push_system_text_block(&mut system_blocks, preamble, cache_control);
         }
-        system_blocks.push(preamble_block);
     }
 
     if !system_blocks.is_empty() {
         body["system"] = serde_json::json!(system_blocks);
     }
 }
 
+fn push_system_text_block(
+    system_blocks: &mut Vec<serde_json::Value>,
+    text: &str,
+    cache_control: &Option<serde_json::Value>,
+) {
+    if text.trim().is_empty() {
+        return;
+    }
+
+    let mut block = serde_json::json!({
+        "type": "text",
+        "text": text,
+    });
+    if let Some(cache_control) = cache_control {
+        block["cache_control"] = cache_control.clone();
+    }
+    system_blocks.push(block);
+}
+
 /// Build tool definitions, optionally normalizing names. Returns the original
 /// tool (name, description) pairs for reverse-mapping on response.
 fn build_tools(
@@ -201,6 +220,23 @@ fn build_tools(
 #[cfg(test)]
 mod tests {
     use super::*;
+    use rig::completion::{Message, ToolDefinition};
+    use rig::one_or_many::OneOrMany;
+
+    fn completion_request_with_preamble(preamble: &str) -> CompletionRequest {
+        CompletionRequest {
+            model: None,
+            preamble: Some(preamble.to_string()),
+            chat_history: OneOrMany::one(Message::user("hello")),
+            documents: Vec::new(),
+            tools: Vec::new(),
+            temperature: None,
+            max_tokens: None,
+            tool_choice: None,
+            additional_params: None,
+            output_schema: None,
+        }
+    }
 
     #[test]
     fn adaptive_thinking_detected_for_4_6_models() {
@@ -218,4 +254,97 @@ mod tests {
         assert!(!supports_adaptive_thinking("claude-opus-4-0"));
         assert!(!supports_adaptive_thinking("gpt-4o"));
     }
+
+    #[test]
+    fn system_prompt_cache_boundary_splits_preamble_cache_control() {
+        let request = completion_request_with_preamble(&format!(
+            "stable prefix\n{}\nvolatile suffix",
+            crate::prompts::engine::SYSTEM_PROMPT_CACHE_BOUNDARY
+        ));
+        let expected_cache_control = serde_json::json!({"type": "ephemeral"});
+        let cache_control = Some(expected_cache_control.clone());
+        let mut body = serde_json::json!({});
+
+        build_system_prompt(&mut body, &request, false, &cache_control);
+
+        let system_blocks = body["system"]
+            .as_array()
+            .expect("system prompt should be an array");
+        assert_eq!(system_blocks.len(), 2);
+        assert_eq!(system_blocks[0]["text"], "stable prefix\n");
+        assert_eq!(system_blocks[0]["cache_control"], expected_cache_control);
+        assert_eq!(system_blocks[1]["text"], "\nvolatile suffix");
+        assert!(system_blocks[1].get("cache_control").is_none());
+    }
+
+    #[test]
+    fn system_prompt_without_cache_boundary_preserves_existing_cache_behavior() {
+        let request = completion_request_with_preamble("stable prompt");
+        let expected_cache_control = serde_json::json!({"type": "ephemeral"});
+        let cache_control = Some(expected_cache_control.clone());
+        let mut body = serde_json::json!({});
+
+        build_system_prompt(&mut body, &request, false, &cache_control);
+
+        let system_blocks = body["system"]
+            .as_array()
+            .expect("system prompt should be an array");
+        assert_eq!(system_blocks.len(), 1);
+        assert_eq!(system_blocks[0]["text"], "stable prompt");
+        assert_eq!(system_blocks[0]["cache_control"], expected_cache_control);
+    }
+
+    #[test]
+    fn build_anthropic_request_keeps_cache_boundary_out_of_volatile_system_block() {
+        let client = reqwest::Client::new();
+        let mut request = completion_request_with_preamble(&format!(
+            "stable prefix\n{}\nvolatile suffix",
+            crate::prompts::engine::SYSTEM_PROMPT_CACHE_BOUNDARY
+        ));
+        request.tools = vec![ToolDefinition {
+            name: "reply".to_string(),
+            description: "Send a reply".to_string(),
+            parameters: serde_json::json!({
+                "type": "object",
+                "properties": {
+                    "text": {"type": "string"}
+                }
+            }),
+        }];
+
+        let anthropic_request = build_anthropic_request(
+            &client,
+            "sk-ant-test",
+            "https://api.anthropic.com",
+            "claude-sonnet-4-5",
+            &request,
+            "auto",
+            false,
+        );
+        let http_request = anthropic_request
+            .builder
+            .build()
+            .expect("request should build");
+        let body = http_request
+            .body()
+            .and_then(reqwest::Body::as_bytes)
+            .expect("request body should be buffered JSON");
+        let body: serde_json::Value =
+            serde_json::from_slice(body).expect("request body should be JSON");
+
+        let system_blocks = body["system"]
+            .as_array()
+            .expect("system prompt should be an array");
+        assert_eq!(system_blocks.len(), 2);
+        assert!(system_blocks[0]["cache_control"].is_object());
+        assert!(system_blocks[1].get("cache_control").is_none());
+        assert_eq!(system_blocks[0]["text"], "stable prefix\n");
+        assert_eq!(system_blocks[1]["text"], "\nvolatile suffix");
+
+        let tools = body["tools"]
+            .as_array()
+            .expect("tool definitions should be an array");
+        assert_eq!(tools.len(), 1);
+        assert!(tools[0]["cache_control"].is_object());
+    }
 }
diff --git a/src/llm/model.rs b/src/llm/model.rs
@@ -833,6 +833,7 @@ impl SpacebotModel {
         let mut messages = Vec::new();
 
         if let Some(preamble) = &request.preamble {
+            let preamble = crate::prompts::strip_system_prompt_cache_boundary(preamble);
             messages.push(serde_json::json!({
                 "role": "system",
                 "content": preamble,
@@ -945,6 +946,7 @@ impl SpacebotModel {
         });
 
         if let Some(preamble) = &request.preamble {
+            let preamble = crate::prompts::strip_system_prompt_cache_boundary(preamble);
             body["instructions"] = serde_json::json!(preamble);
         } else if is_chatgpt_codex {
             body["instructions"] = serde_json::json!(
@@ -1071,6 +1073,7 @@ impl SpacebotModel {
         });
 
         if let Some(preamble) = &request.preamble {
+            let preamble = crate::prompts::strip_system_prompt_cache_boundary(preamble);
             body["instructions"] = serde_json::json!(preamble);
         } else if is_chatgpt_codex {
             body["instructions"] = serde_json::json!(
@@ -1380,6 +1383,7 @@ impl SpacebotModel {
         let mut messages = Vec::new();
 
         if let Some(preamble) = &request.preamble {
+            let preamble = crate::prompts::strip_system_prompt_cache_boundary(preamble);
             messages.push(serde_json::json!({
                 "role": "system",
                 "content": preamble,
@@ -1472,6 +1476,7 @@ impl SpacebotModel {
         let mut messages = Vec::new();
 
         if let Some(preamble) = &request.preamble {
+            let preamble = crate::prompts::strip_system_prompt_cache_boundary(preamble);
             messages.push(serde_json::json!({
                 "role": "system",
                 "content": preamble,
diff --git a/src/mcp.rs b/src/mcp.rs
@@ -605,6 +605,7 @@ impl McpManager {
             }
         }
 
+        names.sort();
         names
     }
 
@@ -853,6 +854,68 @@ fn interpolate_env_placeholders(value: &str) -> String {
 mod tests {
     use super::*;
 
+    fn test_mcp_config(name: &str) -> McpServerConfig {
+        McpServerConfig {
+            name: name.to_string(),
+            enabled: true,
+            transport: McpTransport::Stdio {
+                command: "test".to_string(),
+                args: Vec::new(),
+                env: HashMap::new(),
+            },
+        }
+    }
+
+    fn test_tool(name: &str, description: Option<&str>) -> rmcp::model::Tool {
+        let mut tool = rmcp::model::Tool::default();
+        tool.name = Cow::Owned(name.to_string());
+        tool.description = description.map(|description| Cow::Owned(description.to_string()));
+        tool
+    }
+
+    #[tokio::test]
+    async fn get_tool_names_returns_deterministic_sorted_names() {
+        let manager = McpManager::new(Vec::new());
+
+        let later_connection = Arc::new(McpConnection::new(test_mcp_config("z_server")));
+        {
+            let mut tools = later_connection.tools.write().await;
+            *tools = vec![test_tool("z_tool", Some("z desc"))];
+        }
+        {
+            let mut state = later_connection.state.write().await;
+            *state = McpConnectionState::Connected;
+        }
+
+        let earlier_connection = Arc::new(McpConnection::new(test_mcp_config("a_server")));
+        {
+            let mut tools = earlier_connection.tools.write().await;
+            *tools = vec![
+                test_tool("b_tool", None),
+                test_tool("a_tool", Some("a desc")),
+            ];
+        }
+        {
+            let mut state = earlier_connection.state.write().await;
+            *state = McpConnectionState::Connected;
+        }
+
+        {
+            let mut connections = manager.connections.write().await;
+            connections.insert("z_server".to_string(), later_connection);
+            connections.insert("a_server".to_string(), earlier_connection);
+        }
+
+        assert_eq!(
+            manager.get_tool_names().await,
+            vec![
+                "a_tool — a desc",
+                "b_tool — from a_server",
+                "z_tool — z desc"
+            ]
+        );
+    }
+
     #[test]
     fn parse_bearer_token_strips_bearer_prefix() {
         let token = parse_bearer_token("Bearer abc123", "test").unwrap();
diff --git a/src/prompts.rs b/src/prompts.rs
@@ -1,5 +1,5 @@
 pub mod engine;
 pub mod text;
 
-pub use engine::{PromptEngine, SkillInfo};
+pub use engine::{PromptEngine, SkillInfo, strip_system_prompt_cache_boundary};
 pub use text::{get as get_text, init as init_language};
diff --git a/src/prompts/engine.rs b/src/prompts/engine.rs