Skip to content

Commit e80211d

Browse files
committed
Cleanup
Signed-off-by: Kourosh Hakhamaneshi <kourosh@anyscale.com>
1 parent 697bcad commit e80211d

4 files changed

Lines changed: 186 additions & 364 deletions

File tree

python/ray/llm/_internal/serve/core/ingress/builder.py

Lines changed: 25 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -28,70 +28,32 @@
2828
logger = get_logger(__name__)
2929

3030

31-
def _get_llm_deployment_names(llm_deployments: List[Application]) -> List[str]:
32-
return [app._bound_deployment.name for app in llm_deployments]
33-
34-
35-
def _validate_direct_streaming_llm_configs(llm_configs: List[LLMConfig]) -> None:
36-
"""Validate temporary direct-streaming app-shape constraints."""
37-
if len(llm_configs) > 1:
38-
raise ValueError(
39-
"RAY_SERVE_LLM_ENABLE_DIRECT_STREAMING currently supports exactly one "
40-
"LLM config. Multi-model direct streaming requires composing multiple "
41-
"LLMServer deployments into the main application graph, which is not "
42-
"supported yet."
43-
)
44-
45-
46-
def _build_direct_streaming_llm_deployments(
47-
llm_configs: List[LLMConfig],
48-
) -> List[Application]:
49-
"""Build LLMServer deployments with late-bound ASGI ingress enabled."""
50-
deployments = []
51-
for llm_config in llm_configs:
52-
server_cls = llm_config.server_cls or LLMServer
53-
deployments.append(
54-
build_llm_deployment(
55-
llm_config,
56-
deployment_cls=serve.ingress(FastAPI())(server_cls),
57-
)
58-
)
59-
return deployments
31+
def _build_direct_streaming_llm_deployment(llm_config: LLMConfig) -> Application:
32+
"""Build the LLMServer deployment with late-bound ASGI ingress enabled."""
33+
server_cls = llm_config.server_cls or LLMServer
34+
return build_llm_deployment(
35+
llm_config,
36+
deployment_cls=serve.ingress(FastAPI())(server_cls),
37+
)
6038

6139

62-
def build_openai_ingress_request_router(
63-
builder_config: dict,
64-
*,
65-
llm_deployment_names: List[str],
66-
) -> Application:
40+
def _build_openai_ingress_request_router(*, llm_deployment_name: str) -> Application:
6741
"""Build the ingress request router peer for OpenAI compatible LLM apps.
6842
69-
The returned Application should be attached to the ingress application with
43+
The returned Application is attached to the ingress application with
7044
``Application._with_ingress_request_router``.
7145
"""
72-
builder_config = LLMServingArgs.model_validate(builder_config)
73-
llm_configs = builder_config.llm_configs
74-
_validate_direct_streaming_builder_config(builder_config)
75-
7646
from ray.llm._internal.serve.core.ingress.router import LLMRouter
7747

78-
num_ingress_request_router_replicas = 1
79-
logger.info(
80-
"Creating "
81-
f"{num_ingress_request_router_replicas} ingress request router "
82-
"replicas (LLMRouter)"
83-
)
48+
logger.info("Creating 1 ingress request router replica (LLMRouter)")
8449

85-
# Late-bind by deployment name to avoid pulling LLMServer Applications into
86-
# the router's recursive build.
50+
# Late-bind by deployment name to avoid pulling the LLMServer Application
51+
# into the router's recursive build.
8752
return serve.deployment(
8853
LLMRouter,
89-
num_replicas=num_ingress_request_router_replicas,
54+
num_replicas=1,
9055
max_ongoing_requests=1000,
91-
).bind(
92-
llm_deployment_names=llm_deployment_names,
93-
llm_configs_pre=llm_configs,
94-
)
56+
).bind(llm_deployment_name=llm_deployment_name)
9557

9658

9759
class IngressClsConfig(BaseModelExtended):
@@ -176,7 +138,13 @@ def _validate_model_ids(self):
176138
def _validate_direct_streaming_builder_config(
177139
builder_config: LLMServingArgs,
178140
) -> None:
179-
_validate_direct_streaming_llm_configs(builder_config.llm_configs)
141+
if len(builder_config.llm_configs) > 1:
142+
raise ValueError(
143+
"RAY_SERVE_LLM_ENABLE_DIRECT_STREAMING currently supports exactly one "
144+
"LLM config. Multi-model direct streaming requires composing multiple "
145+
"LLMServer deployments into the main application graph, which is not "
146+
"supported yet."
147+
)
180148

181149
if builder_config.ingress_deployment_config:
182150
raise ValueError(
@@ -218,15 +186,14 @@ def build_openai_app(builder_config: dict) -> Application:
218186
# before the regular OpenAiIngress wiring.
219187
if RAY_SERVE_LLM_ENABLE_DIRECT_STREAMING:
220188
_validate_direct_streaming_builder_config(builder_config)
221-
direct_deployments = _build_direct_streaming_llm_deployments(llm_configs)
189+
direct_deployment = _build_direct_streaming_llm_deployment(llm_configs[0])
222190
logger.info(
223191
"Direct streaming enabled: "
224192
"LLMServer=ingress, LLMRouter=ingress_request_router"
225193
)
226-
return direct_deployments[0]._with_ingress_request_router(
227-
build_openai_ingress_request_router(
228-
builder_config,
229-
llm_deployment_names=_get_llm_deployment_names(direct_deployments),
194+
return direct_deployment._with_ingress_request_router(
195+
_build_openai_ingress_request_router(
196+
llm_deployment_name=direct_deployment._bound_deployment.name,
230197
)
231198
)
232199

0 commit comments

Comments
 (0)