Skip to content

Commit 4062040

Browse files
authored
fix(parsing): schema-aware tool-arg coercion for XML-style parsers (#28)
1 parent f99428a commit 4062040

18 files changed

Lines changed: 360 additions & 40 deletions

renderers/base.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -297,8 +297,23 @@ def render_ids(
297297
"""Render messages to token IDs (without attribution metadata)."""
298298
...
299299

300-
def parse_response(self, token_ids: list[int]) -> ParsedResponse:
301-
"""Parse completion tokens back into a structured message."""
300+
def parse_response(
301+
self,
302+
token_ids: list[int],
303+
*,
304+
tools: list[ToolSpec] | None = None,
305+
) -> ParsedResponse:
306+
"""Parse completion tokens back into a structured message.
307+
308+
``tools`` is the same list passed to ``render`` for this turn.
309+
XML-style formats (Qwen3.5, GLM, MiniMax, Laguna) render argument
310+
values verbatim inside ``<arg_value>`` tags with no quoting, so
311+
a value like ``true`` is ambiguous between bool and the string
312+
``"true"``. When ``tools`` is supplied, the parser consults each
313+
parameter's declared JSON-schema type to preserve string args
314+
verbatim. Without ``tools``, parsers fall back to the historical
315+
``json.loads``-with-text-fallback behavior.
316+
"""
302317
...
303318

304319
def get_stop_token_ids(self) -> list[int]:

renderers/client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ def _prepare():
150150
completion_ids = choice.get("token_ids") or []
151151

152152
parsed = await _maybe_offload(
153-
renderer, lambda: renderer.parse_response(completion_ids)
153+
renderer, lambda: renderer.parse_response(completion_ids, tools=tools)
154154
)
155155

156156
# ChatCompletionLogProbs flatten: {"content": [{"logprob": ...}, ...]}

renderers/deepseek_v3.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,12 @@ def render_ids(
221221
add_generation_prompt=add_generation_prompt,
222222
).token_ids
223223

224-
def parse_response(self, token_ids: list[int]) -> ParsedResponse:
224+
def parse_response(
225+
self,
226+
token_ids: list[int],
227+
*,
228+
tools: list[ToolSpec] | None = None, # noqa: ARG002 — args land in a ```json fence, schema not needed
229+
) -> ParsedResponse:
225230
return parse_deepseek_v3(
226231
self._tokenizer,
227232
token_ids,

renderers/default.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,12 @@ def render_ids(
167167
messages, tools=tools, add_generation_prompt=add_generation_prompt
168168
)
169169

170-
def parse_response(self, token_ids: list[int]) -> ParsedResponse:
170+
def parse_response(
171+
self,
172+
token_ids: list[int],
173+
*,
174+
tools: list[ToolSpec] | None = None, # noqa: ARG002 — DefaultRenderer relies on configured tool_parser, schema not consulted here
175+
) -> ParsedResponse:
171176
# 1. Extract tool calls while we still have token ids (most formats
172177
# use special-token delimiters, so id-level matching is reliable).
173178
if self._tool_parser is not None:

renderers/glm45.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,12 @@ def render_ids(
214214
add_generation_prompt=add_generation_prompt,
215215
).token_ids
216216

217-
def parse_response(self, token_ids: list[int]) -> ParsedResponse:
217+
def parse_response(
218+
self,
219+
token_ids: list[int],
220+
*,
221+
tools: list[ToolSpec] | None = None,
222+
) -> ParsedResponse:
218223
return parse_glm(
219224
self._tokenizer,
220225
token_ids,
@@ -227,6 +232,7 @@ def parse_response(self, token_ids: list[int]) -> ParsedResponse:
227232
arg_key_end_id=self._arg_key_end,
228233
arg_value_id=self._arg_value,
229234
arg_value_end_id=self._arg_value_end,
235+
tools=tools,
230236
)
231237

232238
def get_stop_token_ids(self) -> list[int]:

renderers/glm5.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,12 @@ def render_ids(
226226
add_generation_prompt=add_generation_prompt,
227227
).token_ids
228228

229-
def parse_response(self, token_ids: list[int]) -> ParsedResponse:
229+
def parse_response(
230+
self,
231+
token_ids: list[int],
232+
*,
233+
tools: list[ToolSpec] | None = None,
234+
) -> ParsedResponse:
230235
return parse_glm(
231236
self._tokenizer,
232237
token_ids,
@@ -239,6 +244,7 @@ def parse_response(self, token_ids: list[int]) -> ParsedResponse:
239244
arg_key_end_id=self._arg_key_end,
240245
arg_value_id=self._arg_value,
241246
arg_value_end_id=self._arg_value_end,
247+
tools=tools,
242248
)
243249

244250
def get_stop_token_ids(self) -> list[int]:

renderers/gpt_oss.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,12 @@ def render_ids(
307307
add_generation_prompt=add_generation_prompt,
308308
).token_ids
309309

310-
def parse_response(self, token_ids: list[int]) -> ParsedResponse:
310+
def parse_response(
311+
self,
312+
token_ids: list[int],
313+
*,
314+
tools: list[ToolSpec] | None = None, # noqa: ARG002 — harmony args land in a JSON object, schema not needed
315+
) -> ParsedResponse:
311316
return parse_gpt_oss(
312317
self._tokenizer,
313318
token_ids,

renderers/kimi_k2.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,12 @@ def render_ids(
279279
add_generation_prompt=add_generation_prompt,
280280
).token_ids
281281

282-
def parse_response(self, token_ids: list[int]) -> ParsedResponse:
282+
def parse_response(
283+
self,
284+
token_ids: list[int],
285+
*,
286+
tools: list[ToolSpec] | None = None, # noqa: ARG002 — section-JSON wire format quotes strings, schema not needed
287+
) -> ParsedResponse:
283288
return parse_kimi_k2(
284289
self._tokenizer,
285290
token_ids,

renderers/kimi_k25.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -904,7 +904,12 @@ def render_ids(
904904
add_generation_prompt=add_generation_prompt,
905905
).token_ids
906906

907-
def parse_response(self, token_ids: list[int]) -> ParsedResponse:
907+
def parse_response(
908+
self,
909+
token_ids: list[int],
910+
*,
911+
tools: list[ToolSpec] | None = None, # noqa: ARG002 — section-JSON wire format quotes strings, schema not needed
912+
) -> ParsedResponse:
908913
stop_ids: set[int] = {self._im_end}
909914
if self._endoftext is not None:
910915
stop_ids.add(self._endoftext)

renderers/laguna_xs2.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,12 @@ def render_ids(
238238
add_generation_prompt=add_generation_prompt,
239239
).token_ids
240240

241-
def parse_response(self, token_ids: list[int]) -> ParsedResponse:
241+
def parse_response(
242+
self,
243+
token_ids: list[int],
244+
*,
245+
tools: list[ToolSpec] | None = None,
246+
) -> ParsedResponse:
242247
return parse_laguna_xs2(
243248
self._tokenizer,
244249
token_ids,
@@ -247,6 +252,7 @@ def parse_response(self, token_ids: list[int]) -> ParsedResponse:
247252
think_end_id=self._think_end,
248253
tool_call_id=self._tool_call,
249254
tool_call_end_id=self._tool_call_end,
255+
tools=tools,
250256
)
251257

252258
def get_stop_token_ids(self) -> list[int]:

0 commit comments

Comments
 (0)