Skip to content

Commit 18daa9d

Browse files
committed
Fix UTF-8 panics in string truncation and improve OpenAI argument parsing
1 parent 5dcdfcd commit 18daa9d

4 files changed

Lines changed: 100 additions & 15 deletions

File tree

src/api/openai.rs

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -194,8 +194,7 @@ impl OpenAIClient {
194194

195195
if let Some(tool_calls) = item.tool_calls {
196196
for call in tool_calls {
197-
let input = serde_json::from_str::<serde_json::Value>(&call.arguments)
198-
.unwrap_or_else(|_| json!({"raw_arguments": call.arguments}));
197+
let input = parse_tool_arguments(&call.arguments);
199198
content_blocks.push(ContentBlock::ToolUse {
200199
id: call.id,
201200
name: call.name,
@@ -208,8 +207,7 @@ impl OpenAIClient {
208207
if let (Some(name), Some(arguments), Some(call_id)) =
209208
(item.name, item.arguments, item.call_id)
210209
{
211-
let input = serde_json::from_str::<serde_json::Value>(&arguments)
212-
.unwrap_or_else(|_| json!({"raw_arguments": arguments}));
210+
let input = parse_tool_arguments(&arguments);
213211
content_blocks.push(ContentBlock::ToolUse {
214212
id: call_id,
215213
name,
@@ -259,6 +257,44 @@ impl OpenAIClient {
259257
}
260258
}
261259

260+
/// Try to parse tool call arguments as JSON, with recovery for common issues.
261+
fn parse_tool_arguments(args: &str) -> serde_json::Value {
262+
// Try normal parsing first
263+
if let Ok(v) = serde_json::from_str::<serde_json::Value>(args) {
264+
return v;
265+
}
266+
267+
// Try trimming whitespace
268+
let trimmed = args.trim();
269+
if trimmed != args {
270+
if let Ok(v) = serde_json::from_str::<serde_json::Value>(trimmed) {
271+
return v;
272+
}
273+
}
274+
275+
// Try removing trailing commas before } or ]
276+
let fixed = trimmed.replace(",}", "}").replace(",]", "]");
277+
if fixed != trimmed {
278+
if let Ok(v) = serde_json::from_str::<serde_json::Value>(&fixed) {
279+
return v;
280+
}
281+
}
282+
283+
// Try adding missing closing brace
284+
if trimmed.starts_with('{') && !trimmed.ends_with('}') {
285+
let braced = format!("{}}}", trimmed.trim_end_matches(','));
286+
if let Ok(v) = serde_json::from_str::<serde_json::Value>(&braced) {
287+
return v;
288+
}
289+
}
290+
291+
eprintln!(
292+
" \x1b[33m⚠\x1b[0m Failed to parse tool arguments as JSON: {}",
293+
&args[..args.len().min(200)]
294+
);
295+
json!({"raw_arguments": args})
296+
}
297+
262298
fn build_response_input(request: &CreateMessageRequest) -> Vec<serde_json::Value> {
263299
let mut input = Vec::new();
264300

src/repl/conversation.rs

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -296,8 +296,19 @@ Show imperial units only when the user explicitly asks for them."#,
296296
if result_text.len() > threshold {
297297
let original_len = result_text.len();
298298
let actual_keep = keep_chars.min(original_len / 3);
299-
let start = &result_text[..actual_keep];
300-
let end = &result_text[result_text.len() - actual_keep..];
299+
// Snap to char boundaries to avoid panicking on multi-byte chars
300+
let mut start_end = actual_keep;
301+
while start_end < original_len
302+
&& !result_text.is_char_boundary(start_end)
303+
{
304+
start_end += 1;
305+
}
306+
let mut end_start = original_len.saturating_sub(actual_keep);
307+
while end_start > 0 && !result_text.is_char_boundary(end_start) {
308+
end_start -= 1;
309+
}
310+
let start = &result_text[..start_end];
311+
let end = &result_text[end_start..];
301312
*result_text = format!(
302313
"{}\n...[truncated {} chars]...\n{}",
303314
start, original_len, end
@@ -333,15 +344,21 @@ Show imperial units only when the user explicitly asks for them."#,
333344
crate::api::MessageContentBlock::ToolUse { name, input, .. } => {
334345
let input_str = serde_json::to_string(input).unwrap_or_default();
335346
let input_preview = if input_str.len() > 200 {
336-
format!("{}...", &input_str[..200])
347+
format!(
348+
"{}...",
349+
&input_str[..truncate_at_char_boundary(&input_str, 200)]
350+
)
337351
} else {
338352
input_str
339353
};
340354
parts.push(format!("[Tool call: {}({})]", name, input_preview));
341355
}
342356
crate::api::MessageContentBlock::ToolResult { content, .. } => {
343357
let preview = if content.len() > 300 {
344-
format!("{}...", &content[..300])
358+
format!(
359+
"{}...",
360+
&content[..truncate_at_char_boundary(content, 300)]
361+
)
345362
} else {
346363
content.clone()
347364
};
@@ -431,6 +448,18 @@ Show imperial units only when the user explicitly asks for them."#,
431448
}
432449
}
433450

451+
/// Find the largest byte index <= `max_bytes` that is a char boundary.
452+
fn truncate_at_char_boundary(s: &str, max_bytes: usize) -> usize {
453+
if max_bytes >= s.len() {
454+
return s.len();
455+
}
456+
let mut i = max_bytes;
457+
while i > 0 && !s.is_char_boundary(i) {
458+
i -= 1;
459+
}
460+
i
461+
}
462+
434463
impl Default for ConversationHistory {
435464
fn default() -> Self {
436465
Self::new()

src/tools/bashexec.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,11 @@ const MAX_TOOL_OUTPUT_TOKENS: usize = 16_000; // ~56KB, prevents excessive conte
1212
fn truncate_for_context(content: &str, max_tokens: usize) -> String {
1313
let estimated_tokens = content.len() / 4;
1414
if estimated_tokens > max_tokens {
15-
let truncate_at = max_tokens * 4;
16-
let truncated_content = &content[..truncate_at.min(content.len())];
15+
let mut truncate_at = (max_tokens * 4).min(content.len());
16+
while truncate_at > 0 && !content.is_char_boundary(truncate_at) {
17+
truncate_at -= 1;
18+
}
19+
let truncated_content = &content[..truncate_at];
1720
format!(
1821
"{}...\n\n[TRUNCATED: Output has ~{} tokens, showing first ~{} tokens. Re-run with output redirection if you need the full output.]",
1922
truncated_content, estimated_tokens, max_tokens
@@ -426,7 +429,7 @@ impl BashExecutor {
426429
{
427430
return format!(
428431
"Command '{}' contains output redirection ('>' or '>>')\n\
429-
Hint: Use write_file tool to create or modify files. Note: '2>&1' is allowed.",
432+
Hint: Use write_file tool to create or edit_file/morph_edit_file to modify files. Note: '2>&1' is allowed.",
430433
command
431434
);
432435
}

src/tools/mod.rs

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,19 @@ impl ToolExecutor {
137137
}
138138

139139
pub async fn execute(&self, tool_name: &str, input: &Value) -> Result<ToolExecutionResult> {
140+
// Recover from raw_arguments fallback (malformed JSON from model)
141+
let recovered;
142+
let input = if let Some(raw) = input.get("raw_arguments").and_then(|v| v.as_str()) {
143+
if input.as_object().is_none_or(|o| o.len() == 1) {
144+
recovered = serde_json::from_str::<Value>(raw.trim()).unwrap_or(input.clone());
145+
&recovered
146+
} else {
147+
input
148+
}
149+
} else {
150+
input
151+
};
152+
140153
// Check if this is an MCP tool first
141154
if let Some(mcp_manager) = &self.mcp_manager {
142155
if mcp_manager.is_mcp_tool(tool_name).await {
@@ -704,12 +717,16 @@ impl ToolExecutor {
704717

705718
let text = utils::html_to_text(&body);
706719

707-
let max_chars = 64_000;
708-
let truncated = if text.len() > max_chars {
720+
let max_bytes = 64_000;
721+
let truncated = if text.len() > max_bytes {
722+
let mut end = max_bytes;
723+
while end > 0 && !text.is_char_boundary(end) {
724+
end -= 1;
725+
}
709726
format!(
710727
"{}\n\n[TRUNCATED: showing first ~{} chars of {}]",
711-
&text[..max_chars],
712-
max_chars,
728+
&text[..end],
729+
max_bytes,
713730
text.len()
714731
)
715732
} else {

0 commit comments

Comments
 (0)