|
1 | 1 | use crate::api::{Message, SystemPrompt, utils::truncate_at_char_boundary}; |
2 | 2 | use crate::config::SofosConfig; |
3 | 3 |
|
| 4 | +/// Hard floor on the number of messages `trim_if_needed` will keep, |
| 5 | +/// even when the per-message budget would normally drop more. Below |
| 6 | +/// this, conversations lose enough context that the model starts |
| 7 | +/// hallucinating prior tool results. |
| 8 | +const TRIM_MIN_MESSAGES: usize = 10; |
| 9 | + |
| 10 | +/// Per-end cap on retained characters when `truncate_tool_results` |
| 11 | +/// shortens a long tool output during compaction. The middle is |
| 12 | +/// replaced with an elision marker. |
| 13 | +const COMPACTION_TOOL_RESULT_KEEP_CHARS: usize = 500; |
| 14 | + |
4 | 15 | #[derive(Clone)] |
5 | 16 | pub struct ConversationHistory { |
6 | 17 | messages: Vec<Message>, |
7 | 18 | system_prompt: Vec<SystemPrompt>, |
8 | 19 | config: SofosConfig, |
9 | | - /// Set when `trim_if_needed` printed the floor-hit warning; cleared |
10 | | - /// the next time we end a trim under budget. Stops the warning from |
11 | | - /// firing on every message append once we're stuck at the 10-message |
12 | | - /// floor. |
| 20 | + /// Latches the floor-hit warning so it fires once per stuck-at-floor |
| 21 | + /// episode, not on every append. |
13 | 22 | warned_at_floor: bool, |
14 | 23 | /// Index of the message whose last block carries the secondary |
15 | 24 | /// Anthropic `cache_control` marker (the "anchor"). Stays put across |
@@ -300,17 +309,21 @@ Show imperial units only when the user explicitly asks for them."#, |
300 | 309 | // The warning describes our internal trim heuristic, not the |
301 | 310 | // model's API context window — those are different numbers. |
302 | 311 | // The condition below means: we tried to trim down to budget |
303 | | - // but hit the 10-message floor. The model API will still accept |
304 | | - // the request; this just warns the user that auto-trim can't |
305 | | - // help further. Dedup with `warned_at_floor` so a long agent |
306 | | - // loop doesn't print the warning on every tool round-trip. |
307 | | - let at_floor = total_tokens > self.config.max_context_tokens && self.messages.len() <= 10; |
| 312 | + // but hit the `TRIM_MIN_MESSAGES` floor. The model API will |
| 313 | + // still accept the request; this just warns the user that |
| 314 | + // auto-trim can't help further. Dedup with `warned_at_floor` |
| 315 | + // so a long agent loop doesn't print the warning on every |
| 316 | + // tool round-trip. |
| 317 | + let at_floor = total_tokens > self.config.max_context_tokens |
| 318 | + && self.messages.len() <= TRIM_MIN_MESSAGES; |
308 | 319 | if at_floor { |
309 | 320 | if !self.warned_at_floor { |
310 | 321 | eprintln!( |
311 | | - "⚠️ Auto-trim hit the 10-message floor at ~{} tokens (budget {}). \ |
| 322 | + "⚠️ Auto-trim hit the {floor}-message floor at ~{tokens} tokens (budget {budget}). \ |
312 | 323 | Run /compact or /clear if responses start degrading.", |
313 | | - total_tokens, self.config.max_context_tokens |
| 324 | + floor = TRIM_MIN_MESSAGES, |
| 325 | + tokens = total_tokens, |
| 326 | + budget = self.config.max_context_tokens, |
314 | 327 | ); |
315 | 328 | self.warned_at_floor = true; |
316 | 329 | } |
@@ -573,7 +586,7 @@ Show imperial units only when the user explicitly asks for them."#, |
573 | 586 | // stamp a marker on a now-mismatched position. |
574 | 587 | self.cache_anchor_message_idx = None; |
575 | 588 | let threshold = self.config.tool_result_truncate_threshold; |
576 | | - let keep_chars = 500; |
| 589 | + let keep_chars = COMPACTION_TOOL_RESULT_KEEP_CHARS; |
577 | 590 |
|
578 | 591 | for msg in self.messages[..up_to].iter_mut() { |
579 | 592 | if let crate::api::MessageContent::Blocks { content } = &mut msg.content { |
|
0 commit comments