Skip to content

Commit 2caced5

Browse files
Sanitize message author_name for OpenAI Chat Completions
OpenAI's Chat Completions endpoint validates the message `name` field against the pattern `^[^\s<|\\/>]+$`. Our agents have display names with whitespace (e.g. `Chief Architect`, `AKS Expert`), which caused a 400 BadRequest after switching the default client to `AzureOpenAIChatClientWithRetry`. Add `_sanitize_author_name` / `_sanitize_author_names` helpers that replace runs of disallowed characters (whitespace, `<`, `|`, `\`, `/`, `>`) with a single underscore and strip leading/trailing underscores. Names that sanitize down to an empty string are dropped entirely so the field can be omitted from the request. The sanitizer is applied inside `AzureOpenAIChatClientWithRetry._inner_get_response` after context trimming (and again after the trim-fallback retry inside `_non_streaming_with_retry`) so the wire format passes validation while in-memory `Message` objects keep their original display names for orchestration logic. Originals are never mutated — modified messages are shallow-copied before the name is rewritten. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 3ab3076 commit 2caced5

2 files changed

Lines changed: 183 additions & 0 deletions

File tree

src/processor/src/libs/agent_framework/azure_openai_response_retry.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,11 @@
66
from __future__ import annotations
77

88
import asyncio
9+
import copy
910
import logging
1011
import os
1112
import random
13+
import re
1214
from dataclasses import dataclass
1315
from typing import Any, MutableSequence
1416

@@ -265,6 +267,85 @@ def _set_message_text(message: Any, new_text: str) -> Any:
265267
return message
266268

267269

270+
# OpenAI Chat Completions requires message `name` to match this pattern:
271+
# ^[^\s<|\\/>]+$
272+
# Agent display names like "Chief Architect" contain spaces and are rejected.
273+
# We replace any run of disallowed characters with a single underscore so the
274+
# wire-format passes validation while preserving readability.
275+
_OPENAI_NAME_INVALID_CHARS = re.compile(r"[\s<|\\/>]+")
276+
277+
278+
def _sanitize_author_name(name: Any) -> Any:
279+
"""Sanitize a single author_name for OpenAI Chat Completions.
280+
281+
Returns the original value when it is not a string, is empty, or is already
282+
valid. Otherwise returns a string with disallowed characters collapsed to
283+
underscores and surrounding underscores stripped. If the result would be
284+
empty (e.g. name was all whitespace), returns ``None`` so the field can be
285+
dropped entirely.
286+
"""
287+
if not isinstance(name, str) or not name:
288+
return name
289+
if not _OPENAI_NAME_INVALID_CHARS.search(name):
290+
return name
291+
sanitized = _OPENAI_NAME_INVALID_CHARS.sub("_", name).strip("_")
292+
return sanitized or None
293+
294+
295+
def _sanitize_author_names(
296+
messages: MutableSequence[Any],
297+
) -> MutableSequence[Any] | list[Any]:
298+
"""Return ``messages`` with each entry's author_name sanitized.
299+
300+
- For dict-shaped messages, the ``name`` key is rewritten on a shallow copy
301+
(and removed if the sanitized value would be empty).
302+
- For ``agent_framework.Message``-like objects, ``author_name`` is rewritten
303+
on a shallow copy so the originals (which may live in long-lived agent
304+
state) are not mutated.
305+
- Messages that don't need sanitization are returned unchanged. If nothing
306+
needed sanitization the original sequence is returned as-is.
307+
"""
308+
out: list[Any] = []
309+
any_changed = False
310+
for m in messages:
311+
# Dict form: {"role": ..., "name": ..., "content": ...}
312+
if isinstance(m, dict):
313+
name = m.get("name")
314+
if isinstance(name, str):
315+
sanitized = _sanitize_author_name(name)
316+
if sanitized != name:
317+
new_m = dict(m)
318+
if sanitized:
319+
new_m["name"] = sanitized
320+
else:
321+
new_m.pop("name", None)
322+
out.append(new_m)
323+
any_changed = True
324+
continue
325+
out.append(m)
326+
continue
327+
328+
# Object form (agent_framework Message): has .author_name attribute.
329+
name = getattr(m, "author_name", None)
330+
if isinstance(name, str):
331+
sanitized = _sanitize_author_name(name)
332+
if sanitized != name:
333+
try:
334+
new_m = copy.copy(m)
335+
new_m.author_name = sanitized
336+
out.append(new_m)
337+
any_changed = True
338+
continue
339+
except Exception:
340+
# Last-resort in-place fallback if copy/setattr is blocked.
341+
try:
342+
m.author_name = sanitized
343+
except Exception:
344+
pass
345+
out.append(m)
346+
return out if any_changed else messages
347+
348+
268349
@dataclass(frozen=True)
269350
class ContextTrimConfig:
270351
"""Character-budget based context trimming.
@@ -709,6 +790,11 @@ def _inner_get_response(
709790
)
710791
effective_messages = messages
711792

793+
# OpenAI Chat Completions validates message `name` against ^[^\s<|\\/>]+$.
794+
# Sanitize before sending so agent display names like "Chief Architect"
795+
# don't trip a 400 BadRequest. Originals are shallow-copied, not mutated.
796+
effective_messages = _sanitize_author_names(effective_messages)
797+
712798
if stream:
713799
# For streaming, delegate to the parent which returns a proper
714800
# ResponseStream. The framework checks isinstance(result, ResponseStream)
@@ -813,6 +899,8 @@ async def _non_streaming_with_retry(
813899
len(original_messages),
814900
len(trimmed),
815901
)
902+
# Re-sanitize names on the freshly-trimmed messages before retry.
903+
trimmed = _sanitize_author_names(trimmed)
816904
trim_delay = min(
817905
self._retry_config.base_delay_seconds,
818906
self._retry_config.max_delay_seconds,

src/processor/src/tests/unit/libs/agent_framework/test_azure_openai_response_retry_utils.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
RateLimitRetryConfig,
88
_looks_like_context_length,
99
_looks_like_rate_limit,
10+
_sanitize_author_name,
11+
_sanitize_author_names,
1012
_trim_messages,
1113
_truncate_text,
1214
)
@@ -85,3 +87,96 @@ def test_trim_messages_keeps_system_and_tails_and_truncates_long_messages() -> N
8587
# The last message is intentionally never truncated (agent needs full context).
8688
assert len(trimmed[1]["content"]) <= 50
8789
assert len(trimmed[2]["content"]) == 100
90+
91+
92+
# ---------------------------------------------------------------------------
93+
# author_name sanitization (Chat Completions name pattern: ^[^\s<|\\/>]+$)
94+
# ---------------------------------------------------------------------------
95+
96+
97+
def test_sanitize_author_name_passthrough_for_valid_names() -> None:
98+
assert _sanitize_author_name("Coordinator") == "Coordinator"
99+
assert _sanitize_author_name("ResultGenerator") == "ResultGenerator"
100+
assert _sanitize_author_name("agent-1_2.x") == "agent-1_2.x"
101+
102+
103+
def test_sanitize_author_name_replaces_whitespace_and_specials() -> None:
104+
assert _sanitize_author_name("Chief Architect") == "Chief_Architect"
105+
assert _sanitize_author_name("AKS Expert") == "AKS_Expert"
106+
# Tabs/newlines collapse to a single underscore.
107+
assert _sanitize_author_name("a\tb\nc") == "a_b_c"
108+
# Each disallowed char in the pattern is replaced.
109+
assert _sanitize_author_name("foo/bar\\baz|qux<x>y") == "foo_bar_baz_qux_x_y"
110+
111+
112+
def test_sanitize_author_name_handles_edge_cases() -> None:
113+
assert _sanitize_author_name(None) is None
114+
assert _sanitize_author_name("") == ""
115+
assert _sanitize_author_name(123) == 123
116+
# All-invalid input collapses to empty -> None (so callers drop the field).
117+
assert _sanitize_author_name(" ") is None
118+
# Leading/trailing underscores from sanitization are stripped.
119+
assert _sanitize_author_name(" Chief Architect ") == "Chief_Architect"
120+
121+
122+
def test_sanitize_author_names_dict_messages_shallow_copy() -> None:
123+
original = [
124+
{"role": "system", "content": "sys"},
125+
{"role": "assistant", "name": "Chief Architect", "content": "hi"},
126+
{"role": "user", "name": "Coordinator", "content": "ok"},
127+
]
128+
out = _sanitize_author_names(original)
129+
130+
# New list when changes happened.
131+
assert out is not original
132+
# Originals untouched.
133+
assert original[1]["name"] == "Chief Architect"
134+
# Unchanged messages share identity with originals (shallow copy only when needed).
135+
assert out[0] is original[0]
136+
assert out[2] is original[2]
137+
# Changed message is a new dict with sanitized name.
138+
assert out[1] is not original[1]
139+
assert out[1]["name"] == "Chief_Architect"
140+
assert out[1]["content"] == "hi"
141+
142+
143+
def test_sanitize_author_names_dict_messages_drops_empty_name() -> None:
144+
original = [
145+
{"role": "assistant", "name": " ", "content": "hello"},
146+
]
147+
out = _sanitize_author_names(original)
148+
assert "name" not in out[0]
149+
assert out[0]["content"] == "hello"
150+
151+
152+
def test_sanitize_author_names_returns_input_when_nothing_changes() -> None:
153+
original = [
154+
{"role": "system", "content": "sys"},
155+
{"role": "assistant", "name": "Coordinator", "content": "hi"},
156+
]
157+
out = _sanitize_author_names(original)
158+
# Same sequence object returned to avoid pointless copies.
159+
assert out is original
160+
161+
162+
def test_sanitize_author_names_object_messages_shallow_copy() -> None:
163+
class _Msg:
164+
def __init__(self, role: str, author_name: str | None, content: str) -> None:
165+
self.role = role
166+
self.author_name = author_name
167+
self.content = content
168+
169+
m1 = _Msg("assistant", "Chief Architect", "hi")
170+
m2 = _Msg("assistant", "Coordinator", "ok")
171+
original = [m1, m2]
172+
173+
out = _sanitize_author_names(original)
174+
175+
# Original object untouched.
176+
assert m1.author_name == "Chief Architect"
177+
# Changed message replaced with a shallow copy carrying sanitized name.
178+
assert out[0] is not m1
179+
assert out[0].author_name == "Chief_Architect"
180+
assert out[0].content == "hi"
181+
# Unchanged message is the same instance.
182+
assert out[1] is m2

0 commit comments

Comments
 (0)