Skip to content

Commit 1eca869

Browse files
fix(json_parser): tolerate dict/list input in validate_and_repair_json
Some providers return chat.completions message.content as an already-parsed object rather than a JSON-encoded string — observed with Cloudflare Workers AI on the /compat route for @cf/meta/llama-4-scout-17b-16e-instruct under response_format=json_schema. The OpenAI Python SDK raises a Pydantic ValidationError ("JSON input should be string, bytes or bytearray"), and honcho's repair fallback _parse_or_repair_structured_content then routes to validate_and_repair_json — which crashed at json_str.strip() with "'dict' object has no attribute 'strip'". Encode non-string input via json.dumps() at the function boundary so the rest of the repair pipeline keeps assuming string input. Loosen the parameter type to Any to reflect the new contract. Adds tests/utils/test_json_parser.py with regression coverage for dict, list, and nested-dict inputs (plus existing string + whitespace cases).
1 parent 9b74886 commit 1eca869

2 files changed

Lines changed: 61 additions & 2 deletions

File tree

src/utils/json_parser.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -352,8 +352,18 @@ def simple_bracket_repair(json_str: str) -> str:
352352
return repaired
353353

354354

355-
def validate_and_repair_json(json_str: str) -> str:
356-
"""Main function with comprehensive repair strategies"""
355+
def validate_and_repair_json(json_str: Any) -> str:
356+
"""Main function with comprehensive repair strategies.
357+
358+
Accepts a string (the normal case) or a dict/list, since some providers
359+
(e.g. Cloudflare Workers AI on the /compat route for llama-4-scout)
360+
return chat.completions content as an already-parsed object instead of
361+
a JSON-encoded string. JSON-encode non-string input first so the rest
362+
of the repair pipeline (which assumes string operations like .strip())
363+
keeps working.
364+
"""
365+
if not isinstance(json_str, str):
366+
json_str = json.dumps(json_str)
357367
json_str = json_str.strip()
358368

359369
# Try parsing with repair library

tests/utils/test_json_parser.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
"""Tests for src/utils/json_parser.py."""
2+
3+
import json
4+
5+
import pytest
6+
7+
from src.utils.json_parser import validate_and_repair_json
8+
9+
10+
class TestValidateAndRepairJsonInputTypes:
11+
"""Regression tests for non-string inputs.
12+
13+
Some providers (Cloudflare Workers AI /compat for llama-4-scout)
14+
return chat.completions content as an already-parsed dict instead
15+
of a JSON-encoded string. The repair pipeline must JSON-encode the
16+
input first instead of crashing on .strip().
17+
"""
18+
19+
def test_dict_input_produces_same_result_as_equivalent_string(self):
20+
payload = {"explicit": [{"content": "user_alice exists"}]}
21+
result_from_dict = validate_and_repair_json(payload)
22+
result_from_str = validate_and_repair_json(json.dumps(payload))
23+
assert json.loads(result_from_dict) == json.loads(result_from_str)
24+
25+
def test_list_input_does_not_crash(self):
26+
payload = [{"fact": "a"}, {"fact": "b"}]
27+
result = validate_and_repair_json(payload)
28+
assert json.loads(result) == payload
29+
30+
def test_nested_dict_input(self):
31+
payload = {
32+
"explicit": [
33+
{"content": "fact one"},
34+
{"content": "fact two"},
35+
],
36+
"implicit": [],
37+
}
38+
result = validate_and_repair_json(payload)
39+
assert json.loads(result) == payload
40+
41+
def test_string_input_still_works(self):
42+
payload = '{"key": "value"}'
43+
result = validate_and_repair_json(payload)
44+
assert json.loads(result) == {"key": "value"}
45+
46+
def test_string_input_with_whitespace_still_stripped(self):
47+
payload = ' {"key": "value"} \n'
48+
result = validate_and_repair_json(payload)
49+
assert json.loads(result) == {"key": "value"}

0 commit comments

Comments
 (0)