Skip to content

Commit 0390ad4

Browse files
eicherseijikouroshHakha
authored andcommitted
[Serve][LLM] Drop LLM-side body-forwarding gate
The expensive part of body forwarding lives in HAProxy + Lua (the `wait-for-body` round-trip, `tune.bufsize` connection cost, and body re-emit on the socket). On the LLMRouter side, `await request.body()` on a Content-Length: 0 request is essentially free (one async tick, returns b""), so gating it earns nothing. Reverts the LLM-side branch and keeps the body read unconditional, making this PR Serve-only. Signed-off-by: Seiji Eicher <seiji@anyscale.com>
1 parent 05baeee commit 0390ad4

3 files changed

Lines changed: 3 additions & 123 deletions

File tree

python/ray/llm/_internal/serve/core/ingress/router.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,6 @@
55

66
from ray import serve
77
from ray.serve._private.common import ReplicaID
8-
from ray.serve._private.constants import (
9-
RAY_SERVE_INGRESS_REQUEST_ROUTER_FORWARD_BODY,
10-
)
118
from ray.serve.handle import DeploymentHandle
129

1310
_BODY_TRUNCATED_HEADER = "x-body-truncated"
@@ -29,6 +26,7 @@
2926

3027
router_app = FastAPI()
3128

29+
3230
@serve.ingress(router_app)
3331
class LLMRouter:
3432
"""Ingress request router for direct streaming.
@@ -94,11 +92,8 @@ async def check_health(self):
9492

9593
@router_app.post("/internal/route")
9694
async def route(self, request: Request):
97-
body = None
98-
body_truncated = False
99-
if RAY_SERVE_INGRESS_REQUEST_ROUTER_FORWARD_BODY:
100-
body = await request.body()
101-
body_truncated = _BODY_TRUNCATED_HEADER in request.headers
95+
body = await request.body()
96+
body_truncated = _BODY_TRUNCATED_HEADER in request.headers
10297
try:
10398
host, port, replica_id = self._pick_replica(
10499
request_body=body, body_truncated=body_truncated

python/ray/llm/tests/serve/cpu/deployments/routers/test_builder_ingress.py

Lines changed: 0 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -438,54 +438,6 @@ def test_direct_streaming_rejects_ingress_config(
438438
with pytest.raises(ValueError, match=match):
439439
build_openai_app(LLMServingArgs(llm_configs=[llm_config], **builder_kwargs))
440440

441-
def test_direct_streaming_rejects_multiple_llm_configs(
442-
self, llm_config, disable_placement_bundles, monkeypatch
443-
):
444-
monkeypatch.setattr(
445-
"ray.llm._internal.serve.core.ingress.builder."
446-
"RAY_SERVE_LLM_ENABLE_DIRECT_STREAMING",
447-
True,
448-
)
449-
other_llm_config = LLMConfig(
450-
model_loading_config=ModelLoadingConfig(model_id="other-model")
451-
)
452-
453-
with pytest.raises(
454-
ValueError,
455-
match="currently supports exactly one LLM config",
456-
):
457-
build_openai_app(LLMServingArgs(llm_configs=[llm_config, other_llm_config]))
458-
459-
@pytest.mark.parametrize(
460-
("builder_kwargs", "match"),
461-
[
462-
(
463-
{"ingress_deployment_config": {"num_replicas": 2}},
464-
"does not support ingress_deployment_config",
465-
),
466-
(
467-
{"ingress_cls_config": {"ingress_extra_kwargs": {"key": "value"}}},
468-
"does not support ingress_cls_config",
469-
),
470-
],
471-
)
472-
def test_direct_streaming_rejects_ingress_config(
473-
self,
474-
llm_config,
475-
disable_placement_bundles,
476-
monkeypatch,
477-
builder_kwargs,
478-
match,
479-
):
480-
monkeypatch.setattr(
481-
"ray.llm._internal.serve.core.ingress.builder."
482-
"RAY_SERVE_LLM_ENABLE_DIRECT_STREAMING",
483-
True,
484-
)
485-
486-
with pytest.raises(ValueError, match=match):
487-
build_openai_app(LLMServingArgs(llm_configs=[llm_config], **builder_kwargs))
488-
489441

490442
class TestIngressScaleToZero:
491443
"""Tests for ingress scale-to-zero behavior when all models have min_replicas=0."""

python/ray/serve/_private/controller.py

Lines changed: 0 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1552,73 +1552,6 @@ def _get_target_groups_for_app(
15521552

15531553
return target_groups
15541554

1555-
def _get_target_groups_for_app_with_ingress_request_router(
1556-
self,
1557-
app_name: str,
1558-
route_prefix: str,
1559-
ingress_request_router_deployment_name: str,
1560-
) -> List[TargetGroup]:
1561-
"""Create target groups for ingress bypass mode.
1562-
1563-
Ingress request router targets serve /internal/route for Lua
1564-
routing decisions. Main targets serve data plane traffic via
1565-
direct ingress.
1566-
"""
1567-
ingress_request_router_targets = self._get_targets_for_protocol(
1568-
self._get_running_replica_details_for_deployment(
1569-
app_name, ingress_request_router_deployment_name
1570-
),
1571-
RequestProtocol.HTTP,
1572-
)
1573-
1574-
backend_deployment_names = (
1575-
deployment_name
1576-
for deployment_name in self.application_state_manager.get_deployments(
1577-
app_name
1578-
)
1579-
ingress_request_router_targets = None
1580-
else:
1581-
ingress_request_router_targets = self._get_targets_for_protocol(
1582-
self._get_running_replica_details_for_deployment(
1583-
app_name, ingress_request_router_deployment_name
1584-
),
1585-
RequestProtocol.HTTP,
1586-
)
1587-
1588-
target_groups = []
1589-
1590-
# Create targets for each protocol
1591-
http_targets = self._get_targets_for_protocol(
1592-
replica_details, RequestProtocol.HTTP
1593-
)
1594-
if http_targets:
1595-
target_groups.append(
1596-
TargetGroup(
1597-
protocol=RequestProtocol.HTTP,
1598-
route_prefix=route_prefix,
1599-
targets=http_targets,
1600-
app_name=app_name,
1601-
ingress_request_router_targets=ingress_request_router_targets,
1602-
)
1603-
)
1604-
1605-
# Add gRPC targets if enabled
1606-
if is_grpc_enabled(self.get_grpc_config()):
1607-
grpc_targets = self._get_targets_for_protocol(
1608-
replica_details, RequestProtocol.GRPC
1609-
)
1610-
if grpc_targets:
1611-
target_groups.append(
1612-
TargetGroup(
1613-
protocol=RequestProtocol.GRPC,
1614-
route_prefix=route_prefix,
1615-
targets=grpc_targets,
1616-
app_name=app_name,
1617-
)
1618-
)
1619-
1620-
return target_groups
1621-
16221555
def _get_target_groups_for_app_with_no_running_replicas(
16231556
self, route_prefix: str, app_name: str
16241557
) -> List[TargetGroup]:

0 commit comments

Comments
 (0)