Skip to content

Commit fa67e4d

Browse files
committed
Sanitize dirty JSON tool responses
1 parent f9d8167 commit fa67e4d

2 files changed

Lines changed: 53 additions & 1 deletion

File tree

helpers/extract_tools.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,40 @@
44
from helpers.modules import load_classes_from_file, load_classes_from_folder # keep here for backwards compatibility
55
from typing import Any
66

7+
8+
def _sanitize_for_json_parsing(content: str) -> str:
9+
"""
10+
Remove common LLM output artifacts that break JSON parsing:
11+
markdown fences, XML-like wrappers, text outside JSON, and leading/trailing whitespace.
12+
"""
13+
if not isinstance(content, str):
14+
return ""
15+
16+
# 1. Strip markdown code blocks (```json ... ``` and ``` ... ```)
17+
cleaned = re.sub(r'^```[a-zA-Z]*\s*\n?', '', content, flags=re.MULTILINE)
18+
cleaned = re.sub(r'\n?```\s*$', '', cleaned, flags=re.MULTILINE)
19+
20+
# 2. Strip XML-style wrappers (<invoke>...</invoke>)
21+
cleaned = re.sub(r'<\s*invoke\b[^>]*>.*?</\s*invoke\s*>', '', cleaned, flags=re.DOTALL)
22+
cleaned = re.sub(r'<\s*/\s*invoke\s*>', '', cleaned, flags=re.DOTALL)
23+
24+
# 3. Remove function-style wrappers (functions.tool_name:123)
25+
cleaned = re.sub(r'functions\.\w+\s*:\s*\w+\s*', '', cleaned)
26+
27+
# 4. Remove leading text before the first '{' if any
28+
first_brace = cleaned.find('{')
29+
if first_brace > 0:
30+
cleaned = cleaned[first_brace:]
31+
32+
return cleaned
33+
34+
735
def json_parse_dirty(json: str) -> dict[str, Any] | None:
836
if not json or not isinstance(json, str):
937
return None
1038

11-
ext_json = extract_json_object_string(json.strip())
39+
sanitized = _sanitize_for_json_parsing(json)
40+
ext_json = extract_json_object_string(sanitized.strip())
1241
if ext_json:
1342
try:
1443
data = DirtyJson.parse_string(ext_json)
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
from __future__ import annotations
2+
3+
import sys
4+
from pathlib import Path
5+
6+
PROJECT_ROOT = Path(__file__).resolve().parents[1]
7+
if str(PROJECT_ROOT) not in sys.path:
8+
sys.path.insert(0, str(PROJECT_ROOT))
9+
10+
from helpers.extract_tools import json_parse_dirty
11+
12+
13+
def test_json_parse_dirty_sanitizes_common_llm_wrappers() -> None:
14+
payload = """```json
15+
<invoke>discard this wrapper artifact</invoke>
16+
functions.search:123
17+
{"tool_name":"search","tool_args":{"query":"agent zero"}}
18+
```"""
19+
20+
assert json_parse_dirty(payload) == {
21+
"tool_name": "search",
22+
"tool_args": {"query": "agent zero"},
23+
}

0 commit comments

Comments
 (0)