Sanitize message author_name for OpenAI Chat Completions

Prachig-Microsoft · Copilot · Prachig-Microsoft · commit 2caced57a2e7 · 2026-06-12T22:18:28.000+05:30
OpenAI's Chat Completions endpoint validates the message `name` field against the pattern `^[^\s&lt;|\\/&gt;]+$`. Our agents have display names with whitespace (e.g. `Chief Architect`, `AKS Expert`), which caused a 400 BadRequest after switching the default client to `AzureOpenAIChatClientWithRetry`.

Add `_sanitize_author_name` / `_sanitize_author_names` helpers that replace runs of disallowed characters (whitespace, `&lt;`, `|`, `\`, `/`, `&gt;`) with a single underscore and strip leading/trailing underscores. Names that sanitize down to an empty string are dropped entirely so the field can be omitted from the request.

The sanitizer is applied inside `AzureOpenAIChatClientWithRetry._inner_get_response` after context trimming (and again after the trim-fallback retry inside `_non_streaming_with_retry`) so the wire format passes validation while in-memory `Message` objects keep their original display names for orchestration logic. Originals are never mutated — modified messages are shallow-copied before the name is rewritten.

Co-authored-by: Copilot &lt;223556219+Copilot@users.noreply.github.com&gt;
diff --git a/src/processor/src/libs/agent_framework/azure_openai_response_retry.py b/src/processor/src/libs/agent_framework/azure_openai_response_retry.py
@@ -6,9 +6,11 @@
 from __future__ import annotations
 
 import asyncio
+import copy
 import logging
 import os
 import random
+import re
 from dataclasses import dataclass
 from typing import Any, MutableSequence
 
@@ -265,6 +267,85 @@ def _set_message_text(message: Any, new_text: str) -> Any:
     return message
 
 
+# OpenAI Chat Completions requires message `name` to match this pattern:
+#   ^[^\s<|\\/>]+$
+# Agent display names like "Chief Architect" contain spaces and are rejected.
+# We replace any run of disallowed characters with a single underscore so the
+# wire-format passes validation while preserving readability.
+_OPENAI_NAME_INVALID_CHARS = re.compile(r"[\s<|\\/>]+")
+
+
+def _sanitize_author_name(name: Any) -> Any:
+    """Sanitize a single author_name for OpenAI Chat Completions.
+
+    Returns the original value when it is not a string, is empty, or is already
+    valid. Otherwise returns a string with disallowed characters collapsed to
+    underscores and surrounding underscores stripped. If the result would be
+    empty (e.g. name was all whitespace), returns ``None`` so the field can be
+    dropped entirely.
+    """
+    if not isinstance(name, str) or not name:
+        return name
+    if not _OPENAI_NAME_INVALID_CHARS.search(name):
+        return name
+    sanitized = _OPENAI_NAME_INVALID_CHARS.sub("_", name).strip("_")
+    return sanitized or None
+
+
+def _sanitize_author_names(
+    messages: MutableSequence[Any],
+) -> MutableSequence[Any] | list[Any]:
+    """Return ``messages`` with each entry's author_name sanitized.
+
+    - For dict-shaped messages, the ``name`` key is rewritten on a shallow copy
+      (and removed if the sanitized value would be empty).
+    - For ``agent_framework.Message``-like objects, ``author_name`` is rewritten
+      on a shallow copy so the originals (which may live in long-lived agent
+      state) are not mutated.
+    - Messages that don't need sanitization are returned unchanged. If nothing
+      needed sanitization the original sequence is returned as-is.
+    """
+    out: list[Any] = []
+    any_changed = False
+    for m in messages:
+        # Dict form: {"role": ..., "name": ..., "content": ...}
+        if isinstance(m, dict):
+            name = m.get("name")
+            if isinstance(name, str):
+                sanitized = _sanitize_author_name(name)
+                if sanitized != name:
+                    new_m = dict(m)
+                    if sanitized:
+                        new_m["name"] = sanitized
+                    else:
+                        new_m.pop("name", None)
+                    out.append(new_m)
+                    any_changed = True
+                    continue
+            out.append(m)
+            continue
+
+        # Object form (agent_framework Message): has .author_name attribute.
+        name = getattr(m, "author_name", None)
+        if isinstance(name, str):
+            sanitized = _sanitize_author_name(name)
+            if sanitized != name:
+                try:
+                    new_m = copy.copy(m)
+                    new_m.author_name = sanitized
+                    out.append(new_m)
+                    any_changed = True
+                    continue
+                except Exception:
+                    # Last-resort in-place fallback if copy/setattr is blocked.
+                    try:
+                        m.author_name = sanitized
+                    except Exception:
+                        pass
+        out.append(m)
+    return out if any_changed else messages
+
+
 @dataclass(frozen=True)
 class ContextTrimConfig:
     """Character-budget based context trimming.
@@ -709,6 +790,11 @@ def _inner_get_response(
             )
             effective_messages = messages
 
+        # OpenAI Chat Completions validates message `name` against ^[^\s<|\\/>]+$.
+        # Sanitize before sending so agent display names like "Chief Architect"
+        # don't trip a 400 BadRequest. Originals are shallow-copied, not mutated.
+        effective_messages = _sanitize_author_names(effective_messages)
+
         if stream:
             # For streaming, delegate to the parent which returns a proper
             # ResponseStream. The framework checks isinstance(result, ResponseStream)
@@ -813,6 +899,8 @@ async def _non_streaming_with_retry(
                 len(original_messages),
                 len(trimmed),
             )
+            # Re-sanitize names on the freshly-trimmed messages before retry.
+            trimmed = _sanitize_author_names(trimmed)
             trim_delay = min(
                 self._retry_config.base_delay_seconds,
                 self._retry_config.max_delay_seconds,
diff --git a/src/processor/src/tests/unit/libs/agent_framework/test_azure_openai_response_retry_utils.py b/src/processor/src/tests/unit/libs/agent_framework/test_azure_openai_response_retry_utils.py
@@ -7,6 +7,8 @@
     RateLimitRetryConfig,
     _looks_like_context_length,
     _looks_like_rate_limit,
+    _sanitize_author_name,
+    _sanitize_author_names,
     _trim_messages,
     _truncate_text,
 )
@@ -85,3 +87,96 @@ def test_trim_messages_keeps_system_and_tails_and_truncates_long_messages() -> N
     # The last message is intentionally never truncated (agent needs full context).
     assert len(trimmed[1]["content"]) <= 50
     assert len(trimmed[2]["content"]) == 100
+
+
+# ---------------------------------------------------------------------------
+# author_name sanitization (Chat Completions name pattern: ^[^\s<|\\/>]+$)
+# ---------------------------------------------------------------------------
+
+
+def test_sanitize_author_name_passthrough_for_valid_names() -> None:
+    assert _sanitize_author_name("Coordinator") == "Coordinator"
+    assert _sanitize_author_name("ResultGenerator") == "ResultGenerator"
+    assert _sanitize_author_name("agent-1_2.x") == "agent-1_2.x"
+
+
+def test_sanitize_author_name_replaces_whitespace_and_specials() -> None:
+    assert _sanitize_author_name("Chief Architect") == "Chief_Architect"
+    assert _sanitize_author_name("AKS Expert") == "AKS_Expert"
+    # Tabs/newlines collapse to a single underscore.
+    assert _sanitize_author_name("a\tb\nc") == "a_b_c"
+    # Each disallowed char in the pattern is replaced.
+    assert _sanitize_author_name("foo/bar\\baz|qux<x>y") == "foo_bar_baz_qux_x_y"
+
+
+def test_sanitize_author_name_handles_edge_cases() -> None:
+    assert _sanitize_author_name(None) is None
+    assert _sanitize_author_name("") == ""
+    assert _sanitize_author_name(123) == 123
+    # All-invalid input collapses to empty -> None (so callers drop the field).
+    assert _sanitize_author_name("   ") is None
+    # Leading/trailing underscores from sanitization are stripped.
+    assert _sanitize_author_name("  Chief  Architect  ") == "Chief_Architect"
+
+
+def test_sanitize_author_names_dict_messages_shallow_copy() -> None:
+    original = [
+        {"role": "system", "content": "sys"},
+        {"role": "assistant", "name": "Chief Architect", "content": "hi"},
+        {"role": "user", "name": "Coordinator", "content": "ok"},
+    ]
+    out = _sanitize_author_names(original)
+
+    # New list when changes happened.
+    assert out is not original
+    # Originals untouched.
+    assert original[1]["name"] == "Chief Architect"
+    # Unchanged messages share identity with originals (shallow copy only when needed).
+    assert out[0] is original[0]
+    assert out[2] is original[2]
+    # Changed message is a new dict with sanitized name.
+    assert out[1] is not original[1]
+    assert out[1]["name"] == "Chief_Architect"
+    assert out[1]["content"] == "hi"
+
+
+def test_sanitize_author_names_dict_messages_drops_empty_name() -> None:
+    original = [
+        {"role": "assistant", "name": "   ", "content": "hello"},
+    ]
+    out = _sanitize_author_names(original)
+    assert "name" not in out[0]
+    assert out[0]["content"] == "hello"
+
+
+def test_sanitize_author_names_returns_input_when_nothing_changes() -> None:
+    original = [
+        {"role": "system", "content": "sys"},
+        {"role": "assistant", "name": "Coordinator", "content": "hi"},
+    ]
+    out = _sanitize_author_names(original)
+    # Same sequence object returned to avoid pointless copies.
+    assert out is original
+
+
+def test_sanitize_author_names_object_messages_shallow_copy() -> None:
+    class _Msg:
+        def __init__(self, role: str, author_name: str | None, content: str) -> None:
+            self.role = role
+            self.author_name = author_name
+            self.content = content
+
+    m1 = _Msg("assistant", "Chief Architect", "hi")
+    m2 = _Msg("assistant", "Coordinator", "ok")
+    original = [m1, m2]
+
+    out = _sanitize_author_names(original)
+
+    # Original object untouched.
+    assert m1.author_name == "Chief Architect"
+    # Changed message replaced with a shallow copy carrying sanitized name.
+    assert out[0] is not m1
+    assert out[0].author_name == "Chief_Architect"
+    assert out[0].content == "hi"
+    # Unchanged message is the same instance.
+    assert out[1] is m2