feat: autodetect model name

lianakoleva · lianakoleva · commit a2dcf43c5499 · 2026-05-08T15:42:18.000-07:00
Signed-off-by: Liana Koleva &lt;43767763+lianakoleva@users.noreply.github.com&gt;
diff --git a/docs/cli-options.md b/docs/cli-options.md
@@ -116,9 +116,10 @@ aiperf profile --model your_model --url localhost:8000 --goodput "request_latenc
 
 ### Endpoint
 
-#### `-m`, `--model-names`, `--model` `<list>` _(Required)_
+#### `-m`, `--model-names`, `--model` `<list>`
 
 Model name(s) to be benchmarked. Can be a comma-separated list or a single model name.
+If omitted, `aiperf profile` attempts to auto-detect a model from `GET {url}/v1/models`.
 
 #### `--model-selection-strategy` `<str>`
 
diff --git a/src/aiperf/cli_commands/profile.py b/src/aiperf/cli_commands/profile.py
@@ -2,9 +2,11 @@
 # SPDX-License-Identifier: Apache-2.0
 """CLI command for running the Profile subcommand."""
 
+import asyncio
 from cyclopts import App
 
 from aiperf.common.config import ServiceConfig, UserConfig
+from aiperf.common.config.cli_parameter import CLIParameter
 
 app = App(name="profile")
 
@@ -53,8 +55,46 @@ def profile(
 
         service_config = service_config or load_service_config()
 
+        # If the user didn't provide --model/--model-names, try to discover
+        # one from the server's OpenAI-compatible model list.
+        if not user_config.endpoint.model_names:
+            import logging
+
+            from aiperf.common.config.config_defaults import OutputDefaults
+            from aiperf.common.models.model_autodetect import (
+                autodetect_model_names_from_v1_models,
+            )
+
+            # Install a basic stderr handler so the log message is visible even
+            # when `--wait-for-model-timeout` is left at the default (0).
+            if not logging.getLogger().handlers:
+                logging.basicConfig(
+                    level=logging.INFO,
+                    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+                )
+
+            raw_headers = user_config.input.headers or []
+            headers = {str(k): str(v) for k, v in raw_headers}
+            if user_config.endpoint.api_key:
+                headers["Authorization"] = f"Bearer {user_config.endpoint.api_key}"
+
+            user_config.endpoint.model_names = asyncio.run(
+                autodetect_model_names_from_v1_models(
+                    urls=user_config.endpoint.urls,
+                    headers=headers,
+                )
+            )
+
+            # `UserConfig` computed an artifact directory during config-load.
+            # If it used the default artifact directory (not overridden by the
+            # user), update it to reflect the discovered model name.
+            if "artifact_directory" not in user_config.output.model_fields_set:
+                user_config.output.artifact_directory = OutputDefaults.ARTIFACT_DIRECTORY
+                user_config.output.artifact_directory = (
+                    user_config._compute_artifact_directory()
+                )
+
         if user_config.endpoint.wait_for_model_timeout > 0:
-            import asyncio
             import logging
 
             from aiperf.common.readiness_probe import wait_for_endpoint
diff --git a/src/aiperf/common/config/endpoint_config.py b/src/aiperf/common/config/endpoint_config.py
@@ -85,8 +85,12 @@ def validate_wait_for_model_coherent(self) -> Self:
     model_names: Annotated[
         list[str],
         Field(
-            ...,  # This must be set by the user
-            description="Model name(s) to be benchmarked. Can be a comma-separated list or a single model name.",
+            default_factory=list,
+            description=(
+                "Model name(s) to be benchmarked. Can be a comma-separated list or a "
+                "single model name. If omitted, `aiperf profile` will attempt to "
+                "auto-detect a model from `GET {url}/v1/models`."
+            ),
         ),
         BeforeValidator(parse_str_or_list),
         CLIParameter(
diff --git a/src/aiperf/common/config/user_config.py b/src/aiperf/common/config/user_config.py
@@ -760,6 +760,11 @@ def _compute_artifact_directory(self) -> Path:
 
     def _get_artifact_model_name(self) -> str:
         """Get the artifact model name based on the user selected options."""
+        if not self.endpoint.model_names:
+            # When --model is omitted, `aiperf profile` will auto-detect models
+            # later. Use a safe placeholder so config-load doesn't crash.
+            return "auto"
+
         model_name: str = self.endpoint.model_names[0]
         if len(self.endpoint.model_names) > 1:
             model_name = f"{model_name}_multi"
diff --git a/src/aiperf/common/models/model_autodetect.py b/src/aiperf/common/models/model_autodetect.py
@@ -0,0 +1,97 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""
+Model autodetection helpers
+
+Used by `aiperf profile` when `--model/--model-names` is omitted:
+attempt to fetch the server's model list from `GET {base_url}/v1/models`.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+import orjson
+
+from aiperf.common.aiperf_logger import AIPerfLogger
+from aiperf.transports.aiohttp_client import AioHttpClient
+
+_logger = AIPerfLogger(__name__)
+
+
+async def autodetect_model_names_from_v1_models(
+    *,
+    urls: list[str],
+    headers: dict[str, str],
+    timeout_s: float = 10.0,
+) -> list[str]:
+    """Fetch `GET {url}/v1/models` and return a best-effort model list.
+
+    Selection strategy: return only the first discovered model id.
+    """
+
+    if not urls:
+        raise ValueError("Autodetection requires at least one --url base URL")
+
+    # Use the first URL for discovery. If you have multiple URLs with
+    # different model sets, you should pass --model explicitly.
+    base_url = urls[0].rstrip("/")
+    models_url = base_url + "/v1/models"
+
+    client = AioHttpClient(timeout=timeout_s)
+    try:
+        record = await client.get_request(models_url, headers=headers)
+    finally:
+        await client.close()
+
+    status = record.status
+    if status != 200:
+        raise ValueError(
+            f"Failed to auto-detect models from {models_url}: HTTP status={status}"
+        )
+
+    if not record.responses:
+        raise ValueError(f"Empty response body while autodetecting {models_url}")
+
+    response_obj: Any = record.responses[0]
+    body_text = getattr(response_obj, "text", None)
+    if not isinstance(body_text, str) or not body_text:
+        raise ValueError(f"Non-text response while autodetecting {models_url}")
+
+    try:
+        payload = orjson.loads(body_text)
+    except orjson.JSONDecodeError as e:
+        raise ValueError(
+            f"Invalid JSON returned from {models_url} while autodetecting models"
+        ) from e
+
+    if not isinstance(payload, dict):
+        raise ValueError(f"Unexpected /v1/models response shape from {models_url}")
+
+    data = payload.get("data")
+    if not isinstance(data, list):
+        raise ValueError(f"Unexpected /v1/models response: missing data[] in {models_url}")
+
+    ids: list[str] = []
+    for entry in data:
+        if isinstance(entry, dict):
+            model_id = entry.get("id")
+            if isinstance(model_id, str) and model_id:
+                ids.append(model_id)
+
+    if not ids:
+        raise ValueError(f"No model ids found in /v1/models response from {models_url}")
+
+    chosen = ids[0]
+    if len(ids) > 1:
+        _logger.warning(
+            f"{len(ids)} models returned by {models_url}; "
+            "pass --model to select one explicitly"
+        )
+        _logger.warning(
+            f"No --model provided; using first listed model '{chosen}'"
+        )
+    else:
+        _logger.info(f"Auto-detected model '{chosen}' from {models_url}")
+    return [chosen]
+
diff --git a/tests/unit/common/config/test_endpoint_config.py b/tests/unit/common/config/test_endpoint_config.py
@@ -18,7 +18,8 @@ def test_endpoint_config_defaults():
     the configuration is initialized correctly with expected default values.
     """
 
-    # NOTE: Model names must be filled out
+    # Model names default to [] so config-load doesn't crash when
+    # `--model` is omitted (e.g. `aiperf profile` autodetects).
     config = EndpointConfig(model_names=["gpt2"])
 
     assert config.model_selection_strategy == EndpointDefaults.MODEL_SELECTION_STRATEGY
diff --git a/tests/unit/common/test_model_autodetect.py b/tests/unit/common/test_model_autodetect.py
@@ -0,0 +1,122 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+from __future__ import annotations
+
+import asyncio
+from typing import Any
+
+import orjson
+import pytest
+
+from aiperf.common.models.model_autodetect import (
+    autodetect_model_names_from_v1_models,
+)
+
+
+class _FakeRecord:
+    def __init__(self, *, status: int, body_text: str) -> None:
+        self.status = status
+        resp = type("_Resp", (), {"text": body_text})()
+        self.responses = [resp]
+
+
+class _FakeClient:
+    def __init__(self, *, status: int, body_text: str) -> None:
+        self._status = status
+        self._body_text = body_text
+        self.urls: list[str] = []
+        self.headers: list[dict[str, str]] = []
+        self.closed = False
+
+    async def get_request(
+        self, url: str, headers: dict[str, str], **_: Any
+    ) -> _FakeRecord:
+        self.urls.append(url)
+        self.headers.append(headers)
+        return _FakeRecord(status=self._status, body_text=self._body_text)
+
+    async def close(self) -> None:
+        self.closed = True
+
+
+def _install_fake_aiohttp(
+    monkeypatch: pytest.MonkeyPatch, *, status: int, body_text: str
+) -> _FakeClient:
+    fake = _FakeClient(status=status, body_text=body_text)
+
+    def _factory(*_: Any, **__: Any) -> _FakeClient:
+        return fake
+
+    monkeypatch.setattr(
+        "aiperf.common.models.model_autodetect.AioHttpClient",
+        _factory,
+    )
+    return fake
+
+
+def test_autodetect_picks_first_id_from_data(
+    monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture
+) -> None:
+    import logging
+
+    caplog.set_level(logging.WARNING, logger="aiperf.common.models.model_autodetect")
+    body_text = orjson.dumps(
+        {"data": [{"id": "model-a"}, {"id": "model-b"}]}
+    ).decode("utf-8")
+    fake = _install_fake_aiohttp(
+        monkeypatch, status=200, body_text=body_text
+    )
+
+    result = asyncio.run(
+        autodetect_model_names_from_v1_models(
+            urls=["http://localhost:8000"],
+            headers={"Authorization": "Bearer token"},
+            timeout_s=1.0,
+        )
+    )
+
+    assert result == ["model-a"]
+    assert fake.urls == ["http://localhost:8000/v1/models"]
+    assert fake.headers[0]["Authorization"] == "Bearer token"
+    assert fake.closed is True
+    assert "2 models returned" in caplog.text
+    assert "pass --model" in caplog.text
+    assert "first listed model 'model-a'" in caplog.text
+
+
+def test_autodetect_single_model_logs_info_not_warning(
+    monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture
+) -> None:
+    import logging
+
+    caplog.set_level(logging.INFO, logger="aiperf.common.models.model_autodetect")
+    body_text = orjson.dumps({"data": [{"id": "only-one"}]}).decode("utf-8")
+    _install_fake_aiohttp(monkeypatch, status=200, body_text=body_text)
+
+    asyncio.run(
+        autodetect_model_names_from_v1_models(
+            urls=["http://localhost:8000"],
+            headers={},
+            timeout_s=1.0,
+        )
+    )
+
+    assert "Auto-detected model 'only-one'" in caplog.text
+    assert "pass --model" not in caplog.text
+
+
+def test_autodetect_raises_on_non_200(monkeypatch: pytest.MonkeyPatch) -> None:
+    fake_body_text = "oops"
+    _install_fake_aiohttp(
+        monkeypatch, status=404, body_text=fake_body_text
+    )
+
+    with pytest.raises(ValueError, match="Failed to auto-detect models"):
+        asyncio.run(
+            autodetect_model_names_from_v1_models(
+                urls=["http://localhost:8000"],
+                headers={},
+                timeout_s=1.0,
+            )
+        )