Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions src/agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ use crate::build;
use crate::bus::{Bus, BusEvent, SubagentStatus, ToolEvent, ToolStatus};
use crate::cache_tracker::CacheTracker;
use crate::compaction::CompactionEvent;
use crate::prefix_cache_stable;
use crate::id;
use crate::logging;
use crate::message::{
Expand Down Expand Up @@ -522,6 +523,19 @@ impl Agent {
self.note_compaction_applied();
self.persist_session_best_effort("compaction completion");
}
let messages = if prefix_cache_stable::is_prefix_cache_stable_mode() {
let (truncated, truncate_count) =
prefix_cache_stable::truncate_tool_results_for_api(&messages);
if truncate_count > 0 {
logging::info(&format!(
"Prefix-cache mode: truncated {} tool results for API",
truncate_count
));
}
truncated
Comment on lines +527 to +535
} else {
messages
};
let user_count = messages
.iter()
.filter(|message| matches!(message.role, Role::User))
Expand All @@ -543,6 +557,19 @@ impl Agent {

let all_messages = self.session.provider_messages();
let messages = all_messages.to_vec();
let messages = if prefix_cache_stable::is_prefix_cache_stable_mode() {
let (truncated, truncate_count) =
prefix_cache_stable::truncate_tool_results_for_api(&messages);
if truncate_count > 0 {
logging::info(&format!(
"Prefix-cache mode: truncated {} tool results for API (session path)",
truncate_count
));
}
Comment on lines +560 to +568
truncated
} else {
messages
};
let user_count = messages
.iter()
.filter(|message| matches!(message.role, Role::User))
Expand Down
30 changes: 30 additions & 0 deletions src/agent/turn_loops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,29 @@ impl Agent {
// false-positive violations every turn (prior turn's memory ≠ current history prefix).
self.record_client_cache_request(&messages);

// Preflight check for DeepSeek prefix-cache stability mode.
// If the payload is near the context limit, warn early so compaction
// can fold history before the API returns a 400.
if prefix_cache_stable::is_prefix_cache_stable_mode() {
let preflight =
prefix_cache_stable::preflight_check(&messages, &tools, &self.provider.model());
if preflight.needs_action {
logging::warn(&format!(
"Prefix-cache preflight: context at {:.1}% ({} / {} tokens) — emergency fold recommended",
preflight.ratio * 100.0,
preflight.estimate_tokens,
preflight.ctx_max,
));
} else if preflight.ratio > 0.5 {
logging::info(&format!(
"Prefix-cache preflight: context at {:.1}% ({} / {} tokens)",
preflight.ratio * 100.0,
preflight.estimate_tokens,
preflight.ctx_max,
));
}
}

// Inject memory as a user message at the end (preserves cache prefix)
let mut messages_with_memory: Vec<Message> = messages.iter().cloned().collect();
if let Some(memory) = memory_pending.as_ref() {
Expand Down Expand Up @@ -503,6 +526,13 @@ impl Agent {
usage_cache_read,
usage_cache_creation,
);
// Record cache usage for prefix-cache hit-rate tracking
self.cache_tracker.record_usage(usage_cache_read, usage_input.unwrap_or(0));
if prefix_cache_stable::is_prefix_cache_stable_mode()
&& self.cache_tracker.usage_turn_count() % 5 == 0
{
logging::info(&format!("Prefix-cache stats: {}", self.cache_tracker.cache_hit_summary()));
Comment on lines +529 to +534
}
}

if print_output
Expand Down
59 changes: 59 additions & 0 deletions src/cache_tracker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ pub struct CacheTracker {
hash_history: VecDeque<u64>,
/// Whether append-only was violated on the last request
last_violation: Option<CacheViolation>,
/// Cumulative cache hit tokens (from provider-reported usage)
cache_hit_tokens: u64,
/// Cumulative cache miss tokens (from provider-reported usage)
cache_miss_tokens: u64,
/// Number of turns with usage data recorded
usage_turns: u32,
}

/// Information about a cache violation
Expand Down Expand Up @@ -207,6 +213,59 @@ impl CacheTracker {
pub fn had_violation(&self) -> bool {
self.last_violation.is_some()
}

/// Record provider-reported cache usage for cache-hit-rate tracking.
/// Call this after each successful API response when usage data is available.
pub fn record_usage(&mut self, cache_read_input_tokens: Option<u64>, input_tokens: u64) {
if let Some(hit) = cache_read_input_tokens {
self.cache_hit_tokens += hit;
// Miss tokens = total input minus cache hits
let miss = input_tokens.saturating_sub(hit);
self.cache_miss_tokens += miss;
} else {
// Provider doesn't report cache hits; count all as miss
self.cache_miss_tokens += input_tokens;
}
self.usage_turns += 1;
}

/// Cumulative cache hit tokens
pub fn cache_hit_tokens(&self) -> u64 {
self.cache_hit_tokens
}

/// Cumulative cache miss tokens
pub fn cache_miss_tokens(&self) -> u64 {
self.cache_miss_tokens
}

/// Number of turns with usage data recorded
pub fn usage_turn_count(&self) -> u32 {
self.usage_turns
}

/// Cache hit rate as a ratio (0.0–1.0), or None if no usage recorded
pub fn cache_hit_rate(&self) -> Option<f64> {
let total = self.cache_hit_tokens + self.cache_miss_tokens;
if total == 0 {
return None;
}
Some(self.cache_hit_tokens as f64 / total as f64)
}

/// Human-readable cache hit summary
pub fn cache_hit_summary(&self) -> String {
match self.cache_hit_rate() {
None => "no cache usage data yet".to_string(),
Some(rate) => format!(
"cache hit: {:.1}% ({} hit / {} miss tokens over {} turns)",
rate * 100.0,
self.cache_hit_tokens,
self.cache_miss_tokens,
self.usage_turns
),
}
}
}

#[cfg(test)]
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ pub mod build;
pub mod bus;
pub mod cache_tracker;
pub mod catchup;
pub mod prefix_cache_stable;
pub mod channel;
pub mod cli;
pub mod compaction;
Expand Down
Loading
Loading