Skip to content

Commit 294549f

Browse files
nvyutwuclaude
andcommitted
fix: address review feedback for multi-model-name support
- Deduplicate aliases fully (not just primary) using ordered set logic - Document gRPC single-model limitation in docstring Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent cdda8d8 commit 294549f

File tree

2 files changed

+9
-4
lines changed

2 files changed

+9
-4
lines changed

tensorrt_llm/commands/serve.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,9 @@ def launch_grpc_server(host: str,
335335
host: Host to bind to
336336
port: Port to bind to
337337
llm_args: Arguments for LLM initialization (from get_llm_args)
338-
served_model_name: Custom model name for API responses (defaults to model path)
338+
served_model_name: Model name(s) for API responses (defaults to model path).
339+
Note: the gRPC server only uses the first (primary) name. Multiple
340+
aliases are supported by the HTTP/OpenAI server only.
339341
"""
340342
import grpc
341343

tensorrt_llm/serve/openai_server.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -205,9 +205,12 @@ def __init__(
205205
if model_dir.exists() and model_dir.is_dir():
206206
primary = model_dir.name
207207
self.model = primary
208-
self.served_model_names: List[str] = [primary] + [
209-
n for n in names[1:] if n != primary
210-
]
208+
seen = {primary}
209+
self.served_model_names: List[str] = [primary]
210+
for n in names[1:]:
211+
if n not in seen:
212+
seen.add(n)
213+
self.served_model_names.append(n)
211214
self.metrics_collector = None
212215
self.perf_metrics = None
213216
self.perf_metrics_lock = None

0 commit comments

Comments
 (0)