Skip to content

Commit bd83123

Browse files
committed
Implement cost optimizations: caching, token-efficient tools, and output truncation
1 parent 5183c96 commit bd83123

5 files changed

Lines changed: 60 additions & 13 deletions

File tree

src/api/anthropic.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@ impl AnthropicClient {
2323
.map_err(|e| SofosError::Config(format!("Invalid API key format: {}", e)))?,
2424
);
2525
headers.insert("anthropic-version", HeaderValue::from_static(API_VERSION));
26+
headers.insert(
27+
"anthropic-beta",
28+
HeaderValue::from_static("token-efficient-tools-2025-02-19"),
29+
);
2630
headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
2731

2832
let client = reqwest::Client::builder()

src/repl/request_builder.rs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -62,16 +62,16 @@ impl<'a> RequestBuilder<'a> {
6262
reasoning: reasoning_config,
6363
};
6464

65-
// For Anthropic, drop tool cache metadata to avoid cache block limits
65+
// For Anthropic, enable cache on last tool to mark cache breakpoint
6666
if matches!(self.client, Anthropic(_)) {
6767
if let Some(tools) = request.tools.as_mut() {
68-
for tool in tools.iter_mut() {
69-
match tool {
70-
crate::api::Tool::Regular { cache_control, .. }
71-
| crate::api::Tool::AnthropicWebSearch { cache_control, .. } => {
72-
*cache_control = None;
68+
if let Some(last_tool) = tools.last_mut() {
69+
match last_tool {
70+
Tool::Regular { cache_control, .. }
71+
| Tool::AnthropicWebSearch { cache_control, .. } => {
72+
*cache_control = Some(crate::api::CacheControl::ephemeral(None));
7373
}
74-
crate::api::Tool::OpenAIWebSearch { .. } => {}
74+
Tool::OpenAIWebSearch { .. } => {}
7575
}
7676
}
7777
}

src/tools/bashexec.rs

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,24 @@ use std::process::Command;
66
use std::sync::{Arc, Mutex};
77

88
const MAX_OUTPUT_SIZE: usize = 10 * 1024 * 1024; // 10MB limit
9+
const MAX_TOOL_OUTPUT_TOKENS: usize = 8_000; // ~28KB, prevents excessive context usage
10+
11+
/// Truncate bash output if it exceeds token limit for context efficiency
12+
fn truncate_for_context(content: &str, max_tokens: usize) -> String {
13+
let estimated_tokens = content.len() / 4;
14+
if estimated_tokens > max_tokens {
15+
let truncate_at = max_tokens * 4;
16+
let truncated_content = &content[..truncate_at.min(content.len())];
17+
format!(
18+
"{}...\n\n[TRUNCATED: Output has ~{} tokens, showing first ~{} tokens. Re-run with output redirection if you need the full output.]",
19+
truncated_content,
20+
estimated_tokens,
21+
max_tokens
22+
)
23+
} else {
24+
content.to_string()
25+
}
26+
}
927

1028
/// Convert Unix signal number to human-readable name
1129
#[cfg(unix)]
@@ -157,10 +175,11 @@ impl BashExecutor {
157175
}
158176
}
159177
};
160-
return Ok(format!(
178+
let error_output = format!(
161179
"Command failed with {}\nSTDOUT:\n{}\nSTDERR:\n{}",
162180
exit_info, stdout, stderr
163-
));
181+
);
182+
return Ok(truncate_for_context(&error_output, MAX_TOOL_OUTPUT_TOKENS));
164183
}
165184

166185
let mut result = String::new();
@@ -180,7 +199,7 @@ impl BashExecutor {
180199
result = "Command executed successfully (no output)".to_string();
181200
}
182201

183-
Ok(result)
202+
Ok(truncate_for_context(&result, MAX_TOOL_OUTPUT_TOKENS))
184203
}
185204

186205
fn enforce_read_permissions(

src/tools/filesystem.rs

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,24 @@ use std::fs;
44
use std::path::{Path, PathBuf};
55

66
const MAX_FILE_SIZE: u64 = 50 * 1024 * 1024; // 50MB limit
7+
const MAX_TOOL_OUTPUT_TOKENS: usize = 8_000; // ~28KB, prevents excessive context usage
8+
9+
/// Truncate file content if it exceeds token limit for context efficiency
10+
fn truncate_for_context(content: &str, max_tokens: usize) -> String {
11+
let estimated_tokens = content.len() / 4;
12+
if estimated_tokens > max_tokens {
13+
let truncate_at = max_tokens * 4;
14+
let truncated_content = &content[..truncate_at.min(content.len())];
15+
format!(
16+
"{}...\n\n[TRUNCATED: File has ~{} tokens, showing first ~{} tokens. Use search_code or request specific line ranges if you need more.]",
17+
truncated_content,
18+
estimated_tokens,
19+
max_tokens
20+
)
21+
} else {
22+
content.to_string()
23+
}
24+
}
725

826
/// FileSystemTool provides secure file operations sandboxed to a workspace directory
927
#[derive(Clone)]
@@ -87,8 +105,10 @@ impl FileSystemTool {
87105
)));
88106
}
89107

90-
fs::read_to_string(&validated_path)
91-
.with_context(|| format!("Failed to read file: {}", path))
108+
let content = fs::read_to_string(&validated_path)
109+
.with_context(|| format!("Failed to read file: {}", path))?;
110+
111+
Ok(truncate_for_context(&content, MAX_TOOL_OUTPUT_TOKENS))
92112
}
93113

94114
/// Read a file that may be outside the workspace
@@ -123,7 +143,10 @@ impl FileSystemTool {
123143
)));
124144
}
125145

126-
fs::read_to_string(&canonical).with_context(|| format!("Failed to read file: {}", original))
146+
let content = fs::read_to_string(&canonical)
147+
.with_context(|| format!("Failed to read file: {}", original))?;
148+
149+
Ok(truncate_for_context(&content, MAX_TOOL_OUTPUT_TOKENS))
127150
}
128151

129152
pub fn write_file(&self, path: &str, content: &str) -> Result<()> {

src/ui/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,7 @@ impl UI {
527527
"claude-opus-4-5" => (5.0, 25.0),
528528
"claude-haiku-4-5" => (1.0, 5.0),
529529
"gpt-5.1-codex-max" | "gpt-5.1-codex" | "gpt-5-codex" => (1.25, 10.0),
530+
"gpt-5.2" => (1.75, 14.0),
530531
// Default fallback (use Sonnet 4.5 pricing)
531532
_ => (3.0, 15.0),
532533
};

0 commit comments

Comments
 (0)