1jehuang · cooleryu · May 11, 2026
diff --git a/crates/jcode-provider-metadata/src/lib.rs b/crates/jcode-provider-metadata/src/lib.rs
@@ -1346,7 +1346,10 @@ mod tests {
         assert_eq!(OLLAMA_PROFILE.default_model, None);
         assert!(!OLLAMA_PROFILE.requires_api_key);
 
-        assert_eq!(OLLAMA_LOGIN_PROVIDER.auth_kind, LoginProviderAuthKind::Local);
+        assert_eq!(
+            OLLAMA_LOGIN_PROVIDER.auth_kind,
+            LoginProviderAuthKind::Local
+        );
         assert_eq!(OLLAMA_LOGIN_PROVIDER.auth_status_method, "local endpoint");
         assert!(matches!(
             OLLAMA_LOGIN_PROVIDER.target,

diff --git a/src/compaction.rs b/src/compaction.rs
@@ -259,6 +259,12 @@ impl CompactionManager {
         self.semantic_embed_cache.clear();
         self.semantic_embed_cache_counter = 0;
         self.total_turns = total_messages;
+        if state.compacted_count > total_messages {
+            crate::logging::warn(&format!(
+                "[compaction] restore: stale compacted_count={} exceeds messages.len()={}; clamping",
+                state.compacted_count, total_messages
+            ));
+        }
         self.compacted_count = state.compacted_count.min(total_messages);
         self.active_message_chars = 0;
         self.active_message_chars_dirty = total_messages > self.compacted_count;
@@ -287,6 +293,29 @@ impl CompactionManager {
         self.active_message_chars_dirty = false;
     }
 
+    fn clamp_stale_compacted_count(&mut self, all_messages: &[Message], caller: &str) -> bool {
+        if all_messages.is_empty() {
+            return false;
+        }
+
+        if self.compacted_count <= all_messages.len() {
+            return false;
+        }
+
+        crate::logging::warn(&format!(
+            "[compaction] {}: stale compacted_count={} exceeds messages.len()={}; clamping",
+            caller,
+            self.compacted_count,
+            all_messages.len()
+        ));
+        self.compacted_count = all_messages.len();
+        self.total_turns = all_messages.len();
+        self.active_message_chars = 0;
+        self.active_message_chars_dirty = false;
+        self.observed_input_tokens = None;
+        true
+    }
+
     pub fn restore_persisted_stored_state_with(
         &mut self,
         state: &crate::session::StoredCompactionState,
@@ -603,22 +632,17 @@ impl CompactionManager {
     /// Get the active (uncompacted) messages from a full message list.
     /// Skips the first `compacted_count` messages.
     fn active_messages<'a>(&self, all_messages: &'a [Message]) -> &'a [Message] {
-        if self.compacted_count <= all_messages.len() {
-            &all_messages[self.compacted_count..]
-        } else {
-            // Edge case: messages were cleared/replaced with fewer items
-            all_messages
-        }
+        let start = self.compacted_count.min(all_messages.len());
+        &all_messages[start..]
     }
 
     fn active_message_chars_with(&self, all_messages: &[Message]) -> usize {
+        let active = self.active_messages(all_messages);
         if self.active_message_chars_dirty
-            || self.active_messages_count() != self.active_messages(all_messages).len()
+            || self.active_messages_count() != active.len()
+            || self.compacted_count > all_messages.len()
         {
-            self.active_messages(all_messages)
-                .iter()
-                .map(message_char_count)
-                .sum()
+            active.iter().map(message_char_count).sum()
         } else {
             self.active_message_chars
         }
@@ -901,6 +925,8 @@ impl CompactionManager {
     /// Check if background compaction is done and apply it, updating rolling
     /// token-estimate state from the provided full message list.
     pub fn check_and_apply_compaction_with(&mut self, all_messages: &[Message]) {
+        self.clamp_stale_compacted_count(all_messages, "check_and_apply_compaction_with");
+
         let task = match self.pending_task.take() {
             Some(task) => task,
             None => return,
@@ -931,7 +957,10 @@ impl CompactionManager {
                 };
 
                 // Advance the compacted count — these messages are now summarized
-                self.compacted_count += self.pending_cutoff;
+                self.compacted_count = self.compacted_count.saturating_add(self.pending_cutoff);
+                if !all_messages.is_empty() {
+                    self.compacted_count = self.compacted_count.min(all_messages.len());
+                }
                 self.active_message_chars = self
                     .active_message_chars_with(all_messages)
                     .saturating_sub(compacted_chars);
@@ -1010,7 +1039,9 @@ impl CompactionManager {
     /// Get messages for API call (with summary if compacted).
     /// Takes the full message list from the caller.
     pub fn messages_for_api_with(&mut self, all_messages: &[Message]) -> Vec<Message> {
+        self.clamp_stale_compacted_count(all_messages, "messages_for_api_with");
         self.check_and_apply_compaction_with(all_messages);
+        self.clamp_stale_compacted_count(all_messages, "messages_for_api_with");
         self.discard_oversized_openai_native_compaction();
 
         let active = self.active_messages(all_messages);
@@ -1196,6 +1227,7 @@ impl CompactionManager {
     /// exceed the token budget, progressively keeps fewer turns down to
     /// `MIN_TURNS_TO_KEEP`.
     pub fn hard_compact_with(&mut self, all_messages: &[Message]) -> Result<usize, String> {
+        self.clamp_stale_compacted_count(all_messages, "hard_compact_with");
         let active = self.active_messages(all_messages);
 
         if active.len() <= MIN_TURNS_TO_KEEP {
@@ -1257,7 +1289,10 @@ impl CompactionManager {
             original_turn_count: cutoff,
         };
 
-        self.compacted_count += cutoff;
+        self.compacted_count = self
+            .compacted_count
+            .saturating_add(cutoff)
+            .min(all_messages.len());
         self.active_message_chars = remaining_suffix_chars[cutoff];
         self.active_message_chars_dirty = false;
         self.active_summary = Some(summary);

diff --git a/src/compaction_tests.rs b/src/compaction_tests.rs
@@ -375,6 +375,130 @@ fn test_hard_compact_preserves_recent_turns() {
     );
 }
 
+#[test]
+fn test_bug_175_messages_for_api_self_heals_stale_compacted_count() {
+    let mut manager = CompactionManager::new().with_budget(1_000);
+    let mut messages = Vec::new();
+    for i in 0..30 {
+        messages.push(make_text_message(
+            Role::User,
+            &format!("turn {} {}", i, "x".repeat(120)),
+        ));
+        manager.notify_message_added();
+    }
+
+    manager.compacted_count = 100;
+    manager.total_turns = 100;
+    manager.active_message_chars = messages.iter().map(message_char_count).sum();
+    manager.active_message_chars_dirty = false;
+    manager.active_summary = Some(jcode_compaction_core::Summary {
+        text: "# Existing summary".to_string(),
+        openai_encrypted_content: None,
+        covers_up_to_turn: 100,
+        original_turn_count: 100,
+    });
+
+    let api_messages = manager.messages_for_api_with(&messages);
+
+    assert_eq!(
+        manager.compacted_count(),
+        messages.len(),
+        "stale compacted_count should be clamped to the caller's full message list"
+    );
+    assert_eq!(
+        api_messages.len(),
+        1,
+        "stale compacted_count must not replay the full transcript after the summary"
+    );
+    assert_eq!(manager.stats_with(&messages).active_messages, 0);
+}
+
+#[test]
+fn test_bug_175_active_messages_clamps_stale_compacted_count() {
+    let mut manager = CompactionManager::new();
+    let messages = vec![
+        make_text_message(Role::User, "first"),
+        make_text_message(Role::Assistant, "second"),
+        make_text_message(Role::User, "third"),
+    ];
+
+    manager.compacted_count = 10;
+    manager.total_turns = 10;
+
+    assert!(
+        manager.active_messages(&messages).is_empty(),
+        "a stale compacted_count must produce an empty active tail, not replay all messages"
+    );
+}
+
+#[test]
+fn test_bug_175_token_estimate_ignores_stale_cached_active_chars() {
+    let mut manager = CompactionManager::new().with_budget(1_000);
+    let mut messages = Vec::new();
+    for i in 0..3 {
+        messages.push(make_text_message(
+            Role::User,
+            &format!("turn {} {}", i, "x".repeat(10_000)),
+        ));
+        manager.notify_message_added();
+    }
+
+    manager.compacted_count = 10;
+    manager.total_turns = 10;
+    manager.active_message_chars = 1_000_000;
+    manager.active_message_chars_dirty = false;
+
+    assert_eq!(
+        manager.token_estimate_with(&messages),
+        0,
+        "stale cached active_message_chars should not keep emergency compaction above threshold"
+    );
+}
+
+#[test]
+fn test_bug_175_hard_compact_does_not_inflate_stale_compacted_count() {
+    let mut manager = CompactionManager::new().with_budget(1_000);
+    let mut messages = Vec::new();
+    for i in 0..30 {
+        messages.push(make_text_message(
+            Role::User,
+            &format!("turn {} {}", i, "z".repeat(200)),
+        ));
+        manager.notify_message_added();
+    }
+
+    manager.compacted_count = 100;
+    manager.total_turns = 100;
+    manager.active_message_chars_dirty = true;
+    manager.active_summary = Some(jcode_compaction_core::Summary {
+        text: "# Existing summary".to_string(),
+        openai_encrypted_content: None,
+        covers_up_to_turn: 100,
+        original_turn_count: 100,
+    });
+
+    let result = manager.hard_compact_with(&messages);
+
+    assert!(
+        result.is_err(),
+        "there are no active messages left after clamping stale compacted_count"
+    );
+    assert_eq!(
+        manager.compacted_count(),
+        messages.len(),
+        "hard compact should never push compacted_count past messages.len()"
+    );
+    let markers = manager
+        .active_summary
+        .as_ref()
+        .map(|summary| summary.text.matches("[Emergency compaction]").count())
+        .unwrap_or(0);
+    assert_eq!(
+        markers, 0,
+        "stale compacted state should not append another emergency summary block"
+    );
+}
+
 // ── safe_compaction_cutoff: tool call/result pair integrity ─────────
 
 #[test]

diff --git a/src/provider/openrouter_sse_stream.rs b/src/provider/openrouter_sse_stream.rs
@@ -363,12 +363,12 @@ impl OpenRouterStream {
                         .and_then(|c| c.as_str())
                         && !reasoning_content.is_empty()
                     {
-                        let reasoning_delta = if reasoning_content.starts_with(&self.reasoning_buffer)
-                        {
-                            &reasoning_content[self.reasoning_buffer.len()..]
-                        } else {
-                            reasoning_content
-                        };
+                        let reasoning_delta =
+                            if reasoning_content.starts_with(&self.reasoning_buffer) {
+                                &reasoning_content[self.reasoning_buffer.len()..]
+                            } else {
+                                reasoning_content
+                            };
                         self.reasoning_buffer = reasoning_content.to_string();
                         if !reasoning_delta.is_empty() {
                             self.pending

diff --git a/src/provider/openrouter_tests.rs b/src/provider/openrouter_tests.rs
@@ -1159,7 +1159,8 @@ fn test_parse_next_event_emits_only_incremental_reasoning_content() {
     }
 
     stream.buffer =
-        "data:{\"choices\":[{\"delta\":{\"reasoning_content\":\"Thinking more\"}}]}\n\n".to_string();
+        "data:{\"choices\":[{\"delta\":{\"reasoning_content\":\"Thinking more\"}}]}\n\n"
+            .to_string();
     match stream.parse_next_event() {
         Some(StreamEvent::ThinkingDelta(text)) => assert_eq!(text, " more"),
         other => panic!("expected incremental ThinkingDelta, got {:?}", other),