Skip to content

Commit 24067fd

Browse files
committed
fix(vllm-router): fix routing oddities early
Fix issues when routing to older versions of vLLM or envoy AI gateway
1 parent ba1c6b3 commit 24067fd

1 file changed

Lines changed: 93 additions & 4 deletions

File tree

src/vllm_router/services/request_service/rewriter.py

Lines changed: 93 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
"""
2020

2121
import abc
22+
import json
2223

2324
from vllm_router.log import init_logger
2425
from vllm_router.utils import SingletonABCMeta
@@ -70,6 +71,92 @@ def rewrite_request(self, request_body: str, model: str, endpoint: str) -> str:
7071
return request_body
7172

7273

74+
class MessagesRewriter(RequestRewriter):
75+
"""
76+
A request rewriter for Anthropic Messages API and OpenAI Chat Completions API
77+
requests that normalizes messages before forwarding to the backend.
78+
79+
Normalizations:
80+
- Filters out messages with empty/null content (some backends reject them).
81+
- For ``/v1/messages``, promotes ``role: "system"`` entries in the messages
82+
array to the top-level ``system`` parameter (handles the ``mid-conversation-system``
83+
beta format sent by e.g. Claude Code).
84+
"""
85+
86+
def rewrite_request(self, request_body: str, model: str, endpoint: str) -> str:
87+
try:
88+
body = json.loads(request_body)
89+
except json.JSONDecodeError:
90+
return request_body
91+
92+
messages = body.get("messages")
93+
if not messages or not isinstance(messages, list):
94+
return request_body
95+
96+
# Guard: skip messages with empty content (some backends reject them).
97+
messages = [m for m in messages if _message_has_content(m)]
98+
99+
if not messages:
100+
return request_body
101+
102+
# For Anthropic Messages API, also promote role: "system" to top-level system param.
103+
if endpoint == "/v1/messages":
104+
system_messages = [m for m in messages if m.get("role") == "system"]
105+
if system_messages:
106+
system_content = _join_system_content(system_messages)
107+
body["messages"] = [m for m in messages if m.get("role") != "system"]
108+
if body.get("system") is not None:
109+
existing = body["system"]
110+
if isinstance(existing, str):
111+
body["system"] = existing + "\n" + system_content
112+
elif isinstance(existing, list):
113+
body["system"].append({"type": "text", "text": system_content})
114+
else:
115+
body["system"] = system_content
116+
else:
117+
body["system"] = system_content
118+
119+
logger.info(
120+
"Promoted %d system message(s) from messages array to top-level system param",
121+
len(system_messages),
122+
)
123+
return json.dumps(body)
124+
125+
body["messages"] = messages
126+
return json.dumps(body)
127+
128+
# For chat completions, just apply the empty-content guard.
129+
if endpoint in ("/v1/chat/completions", "/chat/completions"):
130+
body["messages"] = messages
131+
return json.dumps(body)
132+
133+
return request_body
134+
135+
136+
def _message_has_content(message: dict) -> bool:
137+
content = message.get("content")
138+
if content is None:
139+
return False
140+
if isinstance(content, str):
141+
return content.strip() != ""
142+
if isinstance(content, list):
143+
return len(content) > 0
144+
return bool(content)
145+
146+
147+
def _join_system_content(system_messages: list[dict]) -> str:
148+
parts = []
149+
for msg in system_messages:
150+
content = msg.get("content")
151+
if isinstance(content, str):
152+
parts.append(content)
153+
elif isinstance(content, list):
154+
for block in content:
155+
if isinstance(block, dict) and block.get("type") == "text":
156+
parts.append(block.get("text", ""))
157+
return "\n".join(parts)
158+
159+
73160
# Singleton instance
74161
_request_rewriter_instance = None
75162

@@ -87,10 +174,12 @@ def initialize_request_rewriter(rewriter_type: str, **kwargs) -> RequestRewriter
87174
"""
88175
global _request_rewriter_instance
89176

90-
# TODO: Implement different rewriter types
91-
# For now, just use the NoopRequestRewriter
92-
_request_rewriter_instance = NoopRequestRewriter()
93-
logger.info(f"Initialized placeholder request rewriter (type: {rewriter_type})")
177+
if rewriter_type == "messages":
178+
_request_rewriter_instance = MessagesRewriter()
179+
logger.info("Initialized MessagesRewriter")
180+
else:
181+
_request_rewriter_instance = NoopRequestRewriter()
182+
logger.info(f"Initialized placeholder request rewriter (type: {rewriter_type})")
94183

95184
return _request_rewriter_instance
96185

0 commit comments

Comments
 (0)