Skip to content

Commit 822ab6e

Browse files
committed
feat: improve tool-call robustness and harness normalization
1 parent f4071b6 commit 822ab6e

8 files changed

Lines changed: 1108 additions & 7 deletions

File tree

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/rexos-harness/src/lib.rs

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,9 @@ pub async fn bootstrap_with_prompt(
129129
)
130130
.await?;
131131

132+
normalize_features_json(workspace_dir)?;
133+
ensure_features_populated(workspace_dir)?;
134+
132135
run_init_script(workspace_dir)?;
133136
commit_checkpoint_if_dirty(workspace_dir, "chore: rexos harness bootstrap")?;
134137
Ok(())
@@ -344,6 +347,7 @@ Your job:
344347
Rules:
345348
- Work only inside the workspace directory.
346349
- Prefer tools (`fs_read`, `fs_write`, `shell`) to inspect and change files.
350+
- Do NOT just describe tool calls; actually call tools when you need to edit files.
347351
- After edits, run the workspace init script (`./init.sh`, or `./init.ps1` on Windows) and ensure it succeeds.
348352
- Commit your changes to git with a descriptive message.
349353
"#
@@ -356,13 +360,101 @@ Rules:
356360
- Work only inside the workspace directory.
357361
- Make small, incremental progress (one feature at a time).
358362
- Prefer using tools (`fs_read`, `fs_write`, `shell`) to inspect and change files.
363+
- Do NOT just describe tool calls; actually call tools when you need to edit files.
359364
- If you change code, run the workspace init script (smoke checks) and fix any failures.
360365
- If both `init.sh` and `init.ps1` exist, keep them functionally equivalent.
361366
- Append a short summary to `rexos-progress.md`.
362367
- Commit meaningful progress to git with a descriptive message.
363368
"#
364369
}
365370

371+
fn ensure_features_populated(workspace_dir: &Path) -> anyhow::Result<()> {
372+
let path = workspace_dir.join(FEATURES_JSON);
373+
let raw = std::fs::read_to_string(&path).with_context(|| format!("read {}", path.display()))?;
374+
let v: serde_json::Value =
375+
serde_json::from_str(&raw).with_context(|| format!("parse {}", path.display()))?;
376+
let n = v
377+
.get("features")
378+
.and_then(|v| v.as_array())
379+
.map(|a| a.len())
380+
.unwrap_or(0);
381+
if n == 0 {
382+
bail!(
383+
"initializer did not populate features.json (features=[]). Ensure your model supports tool calling and actually uses fs_write/shell to update the workspace."
384+
);
385+
}
386+
Ok(())
387+
}
388+
389+
fn normalize_features_json(workspace_dir: &Path) -> anyhow::Result<()> {
390+
let path = workspace_dir.join(FEATURES_JSON);
391+
let raw = std::fs::read_to_string(&path).with_context(|| format!("read {}", path.display()))?;
392+
let mut v: serde_json::Value =
393+
serde_json::from_str(&raw).with_context(|| format!("parse {}", path.display()))?;
394+
395+
let mut changed = false;
396+
397+
if v.get("version").and_then(|x| x.as_i64()).is_none() {
398+
v["version"] = serde_json::Value::Number(1.into());
399+
changed = true;
400+
}
401+
402+
if v.get("updated_at").and_then(|x| x.as_str()).is_none() {
403+
v["updated_at"] = serde_json::Value::String(String::new());
404+
changed = true;
405+
}
406+
407+
let default_editing = "Only change `passes` (false -> true) and optionally `notes`. Do not delete or reorder items.";
408+
let default_completion = "A feature can only be marked passing after required tests/smoke checks are run.";
409+
410+
if v.get("rules").and_then(|x| x.as_object()).is_none() {
411+
v["rules"] = serde_json::json!({
412+
"editing": default_editing,
413+
"completion": default_completion
414+
});
415+
changed = true;
416+
} else if let Some(obj) = v.get_mut("rules").and_then(|x| x.as_object_mut()) {
417+
if obj.get("editing").and_then(|x| x.as_str()).is_none() {
418+
obj.insert(
419+
"editing".to_string(),
420+
serde_json::Value::String(default_editing.to_string()),
421+
);
422+
changed = true;
423+
}
424+
if obj.get("completion").and_then(|x| x.as_str()).is_none() {
425+
obj.insert(
426+
"completion".to_string(),
427+
serde_json::Value::String(default_completion.to_string()),
428+
);
429+
changed = true;
430+
}
431+
}
432+
433+
if v.get("features").and_then(|x| x.as_array()).is_none() {
434+
v["features"] = serde_json::Value::Array(Vec::new());
435+
changed = true;
436+
}
437+
438+
if let Some(arr) = v.get_mut("features").and_then(|x| x.as_array_mut()) {
439+
for f in arr {
440+
let Some(obj) = f.as_object_mut() else {
441+
continue;
442+
};
443+
if obj.get("passes").and_then(|x| x.as_bool()).is_none() {
444+
obj.insert("passes".to_string(), serde_json::Value::Bool(false));
445+
changed = true;
446+
}
447+
}
448+
}
449+
450+
if changed {
451+
let s = serde_json::to_string_pretty(&v).context("serialize features.json")?;
452+
std::fs::write(&path, s).with_context(|| format!("write {}", path.display()))?;
453+
}
454+
455+
Ok(())
456+
}
457+
366458
#[derive(Debug, Clone, Copy)]
367459
enum InitScript {
368460
Bash,

crates/rexos-llm/src/openai_compat.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,9 @@ pub struct OpenAiCompatibleClient {
121121
impl OpenAiCompatibleClient {
122122
pub fn new(base_url: String, api_key: Option<String>) -> anyhow::Result<Self> {
123123
let base_url = base_url.trim_end_matches('/').to_string();
124+
let timeout = openai_compat_timeout();
124125
let http = reqwest::Client::builder()
125-
.timeout(Duration::from_secs(60))
126+
.timeout(timeout)
126127
.build()
127128
.context("build http client")?;
128129

@@ -181,3 +182,14 @@ impl OpenAiCompatibleClient {
181182
})
182183
}
183184
}
185+
186+
fn openai_compat_timeout() -> Duration {
187+
const DEFAULT_SECS: u64 = 600;
188+
match std::env::var("REXOS_OPENAI_COMPAT_TIMEOUT_SECS") {
189+
Ok(raw) => match raw.trim().parse::<u64>() {
190+
Ok(secs) if secs > 0 => Duration::from_secs(secs),
191+
_ => Duration::from_secs(DEFAULT_SECS),
192+
},
193+
Err(_) => Duration::from_secs(DEFAULT_SECS),
194+
}
195+
}

crates/rexos-runtime/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ rust-version.workspace = true
77

88
[dependencies]
99
anyhow.workspace = true
10+
serde.workspace = true
11+
serde_json.workspace = true
1012
rexos-kernel = { path = "../rexos-kernel" }
1113
rexos-llm = { path = "../rexos-llm" }
1214
rexos-memory = { path = "../rexos-memory" }

crates/rexos-runtime/src/lib.rs

Lines changed: 170 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use anyhow::{bail, Context};
55

66
use rexos_kernel::router::{ModelRouter, TaskKind};
77
use rexos_llm::driver::LlmDriver;
8-
use rexos_llm::openai_compat::{ChatCompletionRequest, ChatMessage, Role};
8+
use rexos_llm::openai_compat::{ChatCompletionRequest, ChatMessage, Role, ToolCall, ToolFunction};
99
use rexos_llm::registry::LlmRegistry;
1010
use rexos_memory::MemoryStore;
1111
use rexos_tools::Toolset;
@@ -76,7 +76,7 @@ impl AgentRuntime {
7676
model: model.clone(),
7777
messages: messages.clone(),
7878
tools: tool_defs.clone(),
79-
temperature: None,
79+
temperature: Some(0.0),
8080
};
8181

8282
let assistant = self
@@ -89,9 +89,14 @@ impl AgentRuntime {
8989

9090
let tool_calls = match assistant.tool_calls.clone() {
9191
Some(calls) if !calls.is_empty() => calls,
92-
_ => {
93-
return Ok(assistant.content.unwrap_or_default());
94-
}
92+
_ => match assistant
93+
.content
94+
.as_deref()
95+
.and_then(parse_tool_calls_from_json_content)
96+
{
97+
Some(calls) => calls,
98+
None => return Ok(assistant.content.unwrap_or_default()),
99+
},
95100
};
96101

97102
for call in tool_calls {
@@ -102,8 +107,9 @@ impl AgentRuntime {
102107
bail!("tool loop detected: {sig}");
103108
}
104109

110+
let args_json = normalize_tool_arguments(&call.function.name, &call.function.arguments);
105111
let output = tools
106-
.call(&call.function.name, &call.function.arguments)
112+
.call(&call.function.name, &args_json)
107113
.await
108114
.with_context(|| format!("tool {}", call.function.name))?;
109115

@@ -143,3 +149,161 @@ impl AgentRuntime {
143149
driver.chat(req).await
144150
}
145151
}
152+
153+
#[derive(Debug, serde::Deserialize)]
154+
struct JsonToolCall {
155+
name: String,
156+
#[serde(alias = "args")]
157+
#[serde(default)]
158+
arguments: Option<serde_json::Value>,
159+
#[serde(flatten)]
160+
extra: serde_json::Map<String, serde_json::Value>,
161+
}
162+
163+
fn normalize_tool_arguments(tool_name: &str, raw_arguments_json: &str) -> String {
164+
let Ok(v) = serde_json::from_str::<serde_json::Value>(raw_arguments_json) else {
165+
return raw_arguments_json.to_string();
166+
};
167+
168+
let Some(obj) = v.as_object() else {
169+
return raw_arguments_json.to_string();
170+
};
171+
172+
let matches_name = obj
173+
.get("function")
174+
.and_then(|v| v.as_str())
175+
.or_else(|| obj.get("name").and_then(|v| v.as_str()))
176+
.map(|name| name == tool_name)
177+
.unwrap_or(true);
178+
if !matches_name {
179+
return raw_arguments_json.to_string();
180+
}
181+
182+
let Some(inner) = obj.get("arguments") else {
183+
return raw_arguments_json.to_string();
184+
};
185+
186+
if let Some(s) = inner.as_str() {
187+
return s.to_string();
188+
}
189+
190+
serde_json::to_string(inner).unwrap_or_else(|_| raw_arguments_json.to_string())
191+
}
192+
193+
fn parse_tool_calls_from_json_content(content: &str) -> Option<Vec<ToolCall>> {
194+
let trimmed = content.trim();
195+
if trimmed.is_empty() {
196+
return None;
197+
}
198+
199+
if let Ok(value) = serde_json::from_str::<serde_json::Value>(trimmed) {
200+
if let Some(calls) = parse_json_tool_calls_from_value(value) {
201+
return Some(into_tool_calls(calls));
202+
}
203+
}
204+
205+
let calls = extract_json_tool_calls_from_text(trimmed);
206+
if calls.is_empty() {
207+
return None;
208+
}
209+
Some(into_tool_calls(calls))
210+
}
211+
212+
fn into_tool_calls(calls: Vec<JsonToolCall>) -> Vec<ToolCall> {
213+
let mut out = Vec::new();
214+
for (idx, call) in calls.into_iter().enumerate() {
215+
let args_value = call
216+
.arguments
217+
.unwrap_or_else(|| serde_json::Value::Object(call.extra));
218+
let args = if let Some(s) = args_value.as_str() {
219+
s.to_string()
220+
} else {
221+
serde_json::to_string(&args_value).unwrap_or_else(|_| "{}".to_string())
222+
};
223+
out.push(ToolCall {
224+
id: format!("call_json_{}", idx + 1),
225+
kind: "function".to_string(),
226+
function: ToolFunction {
227+
name: call.name,
228+
arguments: args,
229+
},
230+
});
231+
}
232+
out
233+
}
234+
235+
fn parse_json_tool_calls_from_value(value: serde_json::Value) -> Option<Vec<JsonToolCall>> {
236+
if let Some(arr) = value.as_array() {
237+
let mut calls = Vec::new();
238+
for item in arr {
239+
calls.push(serde_json::from_value::<JsonToolCall>(item.clone()).ok()?);
240+
}
241+
return Some(calls);
242+
}
243+
244+
serde_json::from_value::<JsonToolCall>(value).ok().map(|c| vec![c])
245+
}
246+
247+
fn extract_json_tool_calls_from_text(content: &str) -> Vec<JsonToolCall> {
248+
let mut calls = Vec::new();
249+
for (start, _) in content.match_indices('{') {
250+
if calls.len() >= 16 {
251+
break;
252+
}
253+
let Some(end) = find_balanced_json_object_end(content, start) else {
254+
continue;
255+
};
256+
let slice = &content[start..end];
257+
let Ok(value) = serde_json::from_str::<serde_json::Value>(slice) else {
258+
continue;
259+
};
260+
let Some(mut parsed) = parse_json_tool_calls_from_value(value) else {
261+
continue;
262+
};
263+
calls.append(&mut parsed);
264+
}
265+
calls
266+
}
267+
268+
fn find_balanced_json_object_end(s: &str, start: usize) -> Option<usize> {
269+
let bytes = s.as_bytes();
270+
if start >= bytes.len() || bytes[start] != b'{' {
271+
return None;
272+
}
273+
274+
let mut depth: i32 = 0;
275+
let mut in_string = false;
276+
let mut escape = false;
277+
278+
for (i, &b) in bytes.iter().enumerate().skip(start) {
279+
if in_string {
280+
if escape {
281+
escape = false;
282+
continue;
283+
}
284+
if b == b'\\' {
285+
escape = true;
286+
continue;
287+
}
288+
if b == b'"' {
289+
in_string = false;
290+
continue;
291+
}
292+
continue;
293+
}
294+
295+
match b {
296+
b'"' => in_string = true,
297+
b'{' => depth += 1,
298+
b'}' => {
299+
depth -= 1;
300+
if depth == 0 {
301+
return Some(i + 1);
302+
}
303+
}
304+
_ => {}
305+
}
306+
}
307+
308+
None
309+
}

0 commit comments

Comments
 (0)