Skip to content

Commit 05baeee

Browse files
eicherseijikouroshHakha
authored andcommitted
[Serve][LLM] Tighten body-forwarding gate per /simplify
- router.py: drop the redundant else-branch (defaults already match). - haproxy.py: `str(bool).lower()` instead of "true"/"false" ternary. - haproxy_templates.py: flatten nested {% if %}{% if %} for tune.bufsize. - lua.tmpl, test: drop WHAT-comment narration. Signed-off-by: Seiji Eicher <seiji@anyscale.com>
1 parent 8a14086 commit 05baeee

5 files changed

Lines changed: 8 additions & 20 deletions

File tree

python/ray/llm/_internal/serve/core/ingress/router.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -94,15 +94,11 @@ async def check_health(self):
9494

9595
@router_app.post("/internal/route")
9696
async def route(self, request: Request):
97-
# HAProxy only forwards a body when
98-
# RAY_SERVE_INGRESS_REQUEST_ROUTER_FORWARD_BODY=1, so skip the read
99-
# entirely on the default path. See haproxy.py / lua template.
97+
body = None
98+
body_truncated = False
10099
if RAY_SERVE_INGRESS_REQUEST_ROUTER_FORWARD_BODY:
101100
body = await request.body()
102101
body_truncated = _BODY_TRUNCATED_HEADER in request.headers
103-
else:
104-
body = None
105-
body_truncated = False
106102
try:
107103
host, port, replica_id = self._pick_replica(
108104
request_body=body, body_truncated=body_truncated

python/ray/serve/_private/haproxy.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -825,9 +825,7 @@ def _write_ingress_request_router_lua(
825825

826826
content = _load_lua_template().substitute(
827827
TIMEOUT_S=RAY_SERVE_HAPROXY_INGRESS_REQUEST_ROUTER_TIMEOUT_S,
828-
FORWARD_BODY="true"
829-
if RAY_SERVE_INGRESS_REQUEST_ROUTER_FORWARD_BODY
830-
else "false",
828+
FORWARD_BODY=str(RAY_SERVE_INGRESS_REQUEST_ROUTER_FORWARD_BODY).lower(),
831829
ROUTERS=_format_routers_lua(routers),
832830
REPLICA_TARGETS=_format_replica_targets_lua(targets),
833831
)

python/ray/serve/_private/haproxy_templates.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@
2828
nbthread {{ config.nbthread }}
2929
{%- if has_ingress_request_router %}
3030
lua-load-per-thread {{ ingress_request_router_lua_path }}
31-
{%- if ingress_request_router_forward_body %}
32-
tune.bufsize {{ ingress_request_router_bufsize }}
3331
{%- endif %}
32+
{%- if has_ingress_request_router and ingress_request_router_forward_body %}
33+
tune.bufsize {{ ingress_request_router_bufsize }}
3434
{%- endif %}
3535
{%- if config.enable_hap_optimization %}
3636
server-state-base {{ config.server_state_base }}

python/ray/serve/_private/ingress_request_router.lua.tmpl

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,9 @@
22
-- router. Templated at config-reload time; placeholders are filled in by
33
-- _write_ingress_request_router_lua in haproxy.py.
44
--
5-
-- Body forwarding is gated by FORWARD_BODY (rendered from
6-
-- RAY_SERVE_INGRESS_REQUEST_ROUTER_FORWARD_BODY). When false (default), we
7-
-- POST an empty body to /internal/route and skip the
8-
-- `wait-for-body`/`tune.bufsize` cost on every routed request. When true,
9-
-- we forward the (possibly truncated) body so body-aware routing policies
10-
-- can read it; bodies exceeding tune.bufsize are sent with X-Body-Truncated
11-
-- since prefix-routing only needs the head.
5+
-- Bodies exceeding tune.bufsize are truncated; we forward what we have with
6+
-- X-Body-Truncated since prefix-routing only needs the head of the body.
7+
-- See RAY_SERVE_INGRESS_REQUEST_ROUTER_FORWARD_BODY in constants.py.
128

139
local ROUTER_REQUEST_TIMEOUT_S = ${TIMEOUT_S}
1410
local FORWARD_BODY = ${FORWARD_BODY}

python/ray/serve/tests/test_haproxy_api.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -645,8 +645,6 @@ async def test_ingress_request_router_end_to_end(haproxy_api_cleanup, monkeypatc
645645
"""Run actual HAProxy against a fake router + two replicas; verify a POST
646646
is pinned to the replica the router selects, while a GET (which doesn't
647647
trigger the router-routed path) is not."""
648-
# Body forwarding to /internal/route is opt-in. This test asserts the body
649-
# made it through the router, so flip it on.
650648
monkeypatch.setattr(
651649
"ray.serve._private.haproxy.RAY_SERVE_INGRESS_REQUEST_ROUTER_FORWARD_BODY",
652650
True,

0 commit comments

Comments
 (0)