@@ -150,18 +150,33 @@ impl ResponseHandler {
150150 . map ( crate :: api:: MessageContentBlock :: from_content_block_for_api)
151151 . collect ( ) ;
152152 if truncated_by_max_tokens {
153- // Drop tool-use blocks from a truncated response.
154- // Their arguments may be half-formed JSON, and storing
155- // a `tool_use` without the matching `tool_result`
156- // leaves the next request in a shape the provider
157- // will reject.
153+ // Drop every tool-related block from a truncated
154+ // response. Their arguments may be half-formed
155+ // JSON, and leaving a `tool_use` without the
156+ // matching `tool_result` (or a server tool result
157+ // without its `server_tool_use`) puts the next
158+ // request in a shape the provider will reject.
158159 message_blocks. retain ( |block| {
159160 !matches ! (
160161 block,
161162 crate :: api:: MessageContentBlock :: ToolUse { .. }
162163 | crate :: api:: MessageContentBlock :: ServerToolUse { .. }
164+ | crate :: api:: MessageContentBlock :: WebSearchToolResult { .. }
163165 )
164166 } ) ;
167+ if message_blocks. is_empty ( ) {
168+ // The truncated response was tool-use only. Record
169+ // a short placeholder so the conversation keeps
170+ // alternating user / assistant — without it the
171+ // next user turn would land directly after the
172+ // previous one and the provider would reject the
173+ // request.
174+ message_blocks. push ( crate :: api:: MessageContentBlock :: Text {
175+ text : "[Response cut off by token limit before any visible content.]"
176+ . to_string ( ) ,
177+ cache_control : None ,
178+ } ) ;
179+ }
165180 }
166181 if !message_blocks. is_empty ( ) {
167182 self . conversation . add_assistant_with_blocks ( message_blocks) ;
@@ -670,3 +685,232 @@ impl ResponseHandler {
670685 & self . conversation
671686 }
672687}
688+
689+ #[ cfg( test) ]
690+ mod truncation_tests {
691+ use super :: * ;
692+ use crate :: api:: {
693+ AnthropicClient , ContentBlock , MessageContent , MessageContentBlock , ReasoningEffort ,
694+ } ;
695+ use crate :: tools:: ToolExecutor ;
696+ use serde_json:: json;
697+ use tempfile:: TempDir ;
698+
699+ fn build_handler ( ) -> ( TempDir , ResponseHandler ) {
700+ let workspace = TempDir :: new ( ) . expect ( "temp workspace" ) ;
701+ let client = LlmClient :: Anthropic (
702+ AnthropicClient :: new ( "test-key" . to_string ( ) ) . expect ( "anthropic client" ) ,
703+ ) ;
704+ let tool_executor =
705+ ToolExecutor :: new ( workspace. path ( ) . to_path_buf ( ) , None , None , false , false )
706+ . expect ( "tool executor" ) ;
707+ let conversation = ConversationHistory :: new ( ) ;
708+ let interrupt = Arc :: new ( AtomicBool :: new ( false ) ) ;
709+ let steer = Arc :: new ( std:: sync:: Mutex :: new ( Vec :: new ( ) ) ) ;
710+ let handler = ResponseHandler :: new (
711+ client,
712+ tool_executor,
713+ conversation,
714+ "claude-sonnet-4-6" . to_string ( ) ,
715+ 8_192 ,
716+ ReasoningEffort :: Off ,
717+ Vec :: new ( ) ,
718+ interrupt,
719+ steer,
720+ "test-session" . to_string ( ) ,
721+ ) ;
722+ ( workspace, handler)
723+ }
724+
725+ fn assistant_blocks ( handler : & ResponseHandler ) -> Vec < MessageContentBlock > {
726+ let last = handler
727+ . conversation ( )
728+ . messages ( )
729+ . last ( )
730+ . cloned ( )
731+ . expect ( "conversation has at least one message" ) ;
732+ assert_eq ! (
733+ last. role, "assistant" ,
734+ "expected the last turn to be assistant"
735+ ) ;
736+ match last. content {
737+ MessageContent :: Blocks { content } => content,
738+ MessageContent :: Text { content } => vec ! [ MessageContentBlock :: Text {
739+ text: content,
740+ cache_control: None ,
741+ } ] ,
742+ }
743+ }
744+
745+ fn block_kinds ( blocks : & [ MessageContentBlock ] ) -> Vec < & ' static str > {
746+ blocks
747+ . iter ( )
748+ . map ( |b| match b {
749+ MessageContentBlock :: Text { .. } => "text" ,
750+ MessageContentBlock :: Image { .. } => "image" ,
751+ MessageContentBlock :: Thinking { .. } => "thinking" ,
752+ MessageContentBlock :: Summary { .. } => "summary" ,
753+ MessageContentBlock :: Compaction { .. } => "compaction" ,
754+ MessageContentBlock :: Reasoning { .. } => "reasoning" ,
755+ MessageContentBlock :: ToolUse { .. } => "tool_use" ,
756+ MessageContentBlock :: ServerToolUse { .. } => "server_tool_use" ,
757+ MessageContentBlock :: ToolResult { .. } => "tool_result" ,
758+ MessageContentBlock :: WebSearchToolResult { .. } => "web_search_tool_result" ,
759+ } )
760+ . collect ( )
761+ }
762+
763+ fn call_handler ( handler : & mut ResponseHandler , blocks : Vec < ContentBlock > , stop : Option < & str > ) {
764+ let mut display = Vec :: new ( ) ;
765+ let ( mut a, mut b, mut c, mut d, mut e) = ( 0 , 0 , 0 , 0 , 0 ) ;
766+ let rt = tokio:: runtime:: Builder :: new_current_thread ( )
767+ . enable_all ( )
768+ . build ( )
769+ . expect ( "test runtime" ) ;
770+ rt. block_on ( handler. handle_response (
771+ blocks,
772+ stop. map ( str:: to_string) ,
773+ & mut display,
774+ & mut a,
775+ & mut b,
776+ & mut c,
777+ & mut d,
778+ & mut e,
779+ ) )
780+ . expect ( "handle_response should not error on the truncation early-return paths" ) ;
781+ }
782+
783+ /// A truncated response that contains text plus a partial `tool_use`
784+ /// must keep the text in the conversation and drop the `tool_use`,
785+ /// because storing a tool call without the matching tool result
786+ /// puts the next request into a shape the provider will reject.
787+ #[ test]
788+ fn truncated_with_text_and_tool_use_drops_only_the_tool_use ( ) {
789+ let ( _ws, mut handler) = build_handler ( ) ;
790+ let blocks = vec ! [
791+ ContentBlock :: Text {
792+ text: "Looking at the file..." . to_string( ) ,
793+ } ,
794+ ContentBlock :: ToolUse {
795+ id: "tool_001" . to_string( ) ,
796+ name: "read_file" . to_string( ) ,
797+ input: json!( { "path" : "src/main.rs" } ) ,
798+ } ,
799+ ] ;
800+
801+ call_handler ( & mut handler, blocks, Some ( "max_tokens" ) ) ;
802+
803+ let kinds = block_kinds ( & assistant_blocks ( & handler) ) ;
804+ assert_eq ! ( kinds, vec![ "text" ] , "tool_use must not survive truncation" ) ;
805+ }
806+
807+ /// When the only block in a truncated response is a `tool_use`,
808+ /// stripping it would leave the assistant turn empty and the next
809+ /// user message would land directly after the prior user message,
810+ /// which the provider rejects. A placeholder text block keeps the
811+ /// conversation alternating.
812+ #[ test]
813+ fn truncated_with_only_tool_use_inserts_placeholder_text ( ) {
814+ let ( _ws, mut handler) = build_handler ( ) ;
815+ let blocks = vec ! [ ContentBlock :: ToolUse {
816+ id: "tool_002" . to_string( ) ,
817+ name: "read_file" . to_string( ) ,
818+ input: json!( { "path" : "src/lib.rs" } ) ,
819+ } ] ;
820+
821+ call_handler ( & mut handler, blocks, Some ( "max_tokens" ) ) ;
822+
823+ let assistant = assistant_blocks ( & handler) ;
824+ assert_eq ! ( block_kinds( & assistant) , vec![ "text" ] ) ;
825+ match & assistant[ 0 ] {
826+ MessageContentBlock :: Text { text, .. } => {
827+ assert ! (
828+ text. contains( "cut off" ) ,
829+ "placeholder should mention truncation, got: {text}"
830+ ) ;
831+ }
832+ other => panic ! ( "expected text placeholder, got {other:?}" ) ,
833+ }
834+ }
835+
836+ /// A truncated response that also carries a `WebSearchToolResult`
837+ /// must drop the result alongside the matching `server_tool_use`.
838+ /// Keeping the result without its use orphans the pair and the
839+ /// next request fails.
840+ #[ test]
841+ fn truncated_drops_server_tool_use_and_web_search_result_together ( ) {
842+ let ( _ws, mut handler) = build_handler ( ) ;
843+ let blocks = vec ! [
844+ ContentBlock :: Text {
845+ text: "Here is what I found:" . to_string( ) ,
846+ } ,
847+ ContentBlock :: ServerToolUse {
848+ id: "srv_001" . to_string( ) ,
849+ name: "web_search" . to_string( ) ,
850+ input: json!( { "query" : "rust async" } ) ,
851+ } ,
852+ ContentBlock :: WebSearchToolResult {
853+ tool_use_id: "srv_001" . to_string( ) ,
854+ content: Vec :: new( ) ,
855+ } ,
856+ ] ;
857+
858+ call_handler ( & mut handler, blocks, Some ( "max_tokens" ) ) ;
859+
860+ let kinds = block_kinds ( & assistant_blocks ( & handler) ) ;
861+ assert_eq ! (
862+ kinds,
863+ vec![ "text" ] ,
864+ "server_tool_use and its web_search_tool_result must be dropped together"
865+ ) ;
866+ }
867+
868+ /// A non-truncated response that contains only text and ends the
869+ /// turn (no tool calls) should land unchanged in the conversation,
870+ /// without the truncation filter or placeholder being applied.
871+ #[ test]
872+ fn non_truncated_text_only_response_is_passed_through ( ) {
873+ let ( _ws, mut handler) = build_handler ( ) ;
874+ let blocks = vec ! [ ContentBlock :: Text {
875+ text: "All done." . to_string( ) ,
876+ } ] ;
877+
878+ call_handler ( & mut handler, blocks, Some ( "end_turn" ) ) ;
879+
880+ let assistant = assistant_blocks ( & handler) ;
881+ assert_eq ! ( block_kinds( & assistant) , vec![ "text" ] ) ;
882+ match & assistant[ 0 ] {
883+ MessageContentBlock :: Text { text, .. } => {
884+ assert_eq ! ( text, "All done." ) ;
885+ }
886+ other => panic ! ( "expected the original text block, got {other:?}" ) ,
887+ }
888+ }
889+
890+ /// Reasoning and thinking blocks must survive truncation — they
891+ /// have no pairing requirement with a later message, so dropping
892+ /// them would lose useful context for the next turn.
893+ #[ test]
894+ fn truncated_keeps_thinking_and_reasoning_blocks ( ) {
895+ let ( _ws, mut handler) = build_handler ( ) ;
896+ let blocks = vec ! [
897+ ContentBlock :: Thinking {
898+ thinking: "Need to read the file first." . to_string( ) ,
899+ signature: "sig" . to_string( ) ,
900+ } ,
901+ ContentBlock :: Text {
902+ text: "Let me look." . to_string( ) ,
903+ } ,
904+ ContentBlock :: ToolUse {
905+ id: "tool_003" . to_string( ) ,
906+ name: "read_file" . to_string( ) ,
907+ input: json!( { "path" : "x" } ) ,
908+ } ,
909+ ] ;
910+
911+ call_handler ( & mut handler, blocks, Some ( "max_tokens" ) ) ;
912+
913+ let kinds = block_kinds ( & assistant_blocks ( & handler) ) ;
914+ assert_eq ! ( kinds, vec![ "thinking" , "text" ] ) ;
915+ }
916+ }
0 commit comments