From f848f950ccf8a8876aa720ca17660bf458186cab Mon Sep 17 00:00:00 2001 From: Viraat Chandra Date: Tue, 21 Apr 2026 17:39:32 -0700 Subject: [PATCH 1/3] fix: let HTTPClientConfig and template regen work on non-Linux MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two independent issues that together broke pre-commit (and any HTTPClientConfig construction) on macOS: 1. HTTPClientConfig()._resolve_defaults calls _get_auto_num_workers when num_workers=-1, which invokes get_current_numa_node(). That function is @require_linux and raises UnsupportedPlatformError on darwin. Fix: catch UnsupportedPlatformError and fall back to min_workers=10, matching the existing "NUMA not discoverable" branch. 2. scripts/regenerate_templates.py::_dump_defaults was documented to avoid running model validators, but called default_factory() for every field — which constructs the nested model (and runs its validators) whenever the factory is a BaseModel subclass. Fix: when the factory is a BaseModel subclass, recurse into _dump_defaults(factory) instead. Factories that dynamically pick a concrete subclass (e.g. TransportConfig.create_default -> ZMQTransportConfig) are not types, so they still get called as before and the concrete subclass is walked via the existing isinstance(default, BaseModel) branch. Verified on Linux: pre-commit passes, templates are byte-identical. Verified against simulated non-Linux (UnsupportedPlatformError patched to always raise): HTTPClientConfig() constructs, and the regen script completes without entering the NUMA code path at all. Co-Authored-By: Claude Opus 4.7 (1M context) --- scripts/regenerate_templates.py | 11 ++++++++++ .../endpoint_client/config.py | 20 +++++++++++++++---- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/scripts/regenerate_templates.py b/scripts/regenerate_templates.py index eb84a6dd..30d0bb0d 100644 --- a/scripts/regenerate_templates.py +++ b/scripts/regenerate_templates.py @@ -149,6 +149,17 @@ def _dump_defaults(model: type[BaseModel]) -> dict: if info.default is not PydanticUndefined: default = info.default elif info.default_factory is not None: + # If the factory is itself a BaseModel subclass (e.g. + # default_factory=HTTPClientConfig), recurse into it instead of + # calling it — calling would run validators, defeating the point + # of this function. Factories that dynamically pick a concrete + # subclass (e.g. TransportConfig.create_default → ZMQTransportConfig) + # aren't types, so they fall through and get called as before. + if isinstance(info.default_factory, type) and issubclass( + info.default_factory, BaseModel + ): + out[name] = _dump_defaults(info.default_factory) + continue default = info.default_factory() else: # Required field — recurse if BaseModel, else None diff --git a/src/inference_endpoint/endpoint_client/config.py b/src/inference_endpoint/endpoint_client/config.py index c5509fc2..599ca7c4 100644 --- a/src/inference_endpoint/endpoint_client/config.py +++ b/src/inference_endpoint/endpoint_client/config.py @@ -37,7 +37,12 @@ from .accumulator_protocol import SSEAccumulatorProtocol from .adapter_protocol import HttpRequestAdapter -from .cpu_affinity import AffinityPlan, get_cpus_in_numa_node, get_current_numa_node +from .cpu_affinity import ( + AffinityPlan, + UnsupportedPlatformError, + get_cpus_in_numa_node, + get_current_numa_node, +) from .utils import get_ephemeral_port_limit, get_ephemeral_port_range ADAPTER_MAP = { @@ -262,17 +267,24 @@ def _get_auto_num_workers() -> int: Users can override with explicit num_workers to use more cores (workers will be pinned to additional cores outside NUMA domain if needed). + On non-Linux platforms (NUMA probing is Linux-only) falls back to + ``min_workers`` so the config can still be constructed for local + development, template regeneration, and tests. + Returns: Number of workers to use when num_workers is -1 (auto). """ min_workers = 10 max_workers = 24 - numa_node = get_current_numa_node() - if numa_node is None: + try: + numa_node = get_current_numa_node() + if numa_node is None: + return min_workers + numa_cpus = get_cpus_in_numa_node(numa_node) + except UnsupportedPlatformError: return min_workers - numa_cpus = get_cpus_in_numa_node(numa_node) if not numa_cpus: return min_workers From 306673cdc8993c1e0c8f1f5c28fdf3ddd574dff0 Mon Sep 17 00:00:00 2001 From: Viraat Chandra Date: Wed, 22 Apr 2026 12:14:15 -0700 Subject: [PATCH 2/3] test: cover non-Linux num_workers fallback and _dump_defaults recursion Adds unit tests for both fixes in this PR: - tests/unit/endpoint_client/test_http_client_config.py: patches get_current_numa_node / get_cpus_in_numa_node to raise UnsupportedPlatformError and asserts _get_auto_num_workers and HTTPClientConfig() fall back to min_workers=10. - tests/unit/config/test_regenerate_templates.py: defines a BaseModel with a counter-incrementing model_validator used as default_factory, and asserts _dump_defaults does not invoke the validator while still emitting the nested defaults. Also covers the non-BaseModel callable factory path (lambda) to ensure it still gets called. Co-Authored-By: Claude Opus 4.7 (1M context) --- .claude.local.md | 10 +++ .nvimrc.lua | 27 +++++++ .../unit/config/test_regenerate_templates.py | 75 +++++++++++++++++++ .../test_http_client_config.py | 45 +++++++++++ 4 files changed, 157 insertions(+) create mode 100644 .claude.local.md create mode 100644 .nvimrc.lua create mode 100644 tests/unit/config/test_regenerate_templates.py create mode 100644 tests/unit/endpoint_client/test_http_client_config.py diff --git a/.claude.local.md b/.claude.local.md new file mode 100644 index 00000000..c325c6c5 --- /dev/null +++ b/.claude.local.md @@ -0,0 +1,10 @@ +# Worktree-Local Context + + + + +## Current Worktree + +- **Directory**: main +- **Branch**: feat/viraatc-cleanup-asserts +- **Focus**: Cleanup assert statements diff --git a/.nvimrc.lua b/.nvimrc.lua new file mode 100644 index 00000000..cfc3e961 --- /dev/null +++ b/.nvimrc.lua @@ -0,0 +1,27 @@ +local sync = require('lib/sync') +local Project = require('lib/project') +local terminal = require('lib/terminal') + +vim.api.nvim_create_autocmd('VimLeavePre', { + callback = function() + require('plenary.job'):new({ + command = 'killall', + args = { 'lsyncd' }, + cwd = vim.loop.cwd() + }):start() + end +}) + +return Project.remote({ + name = 'inference-endpoints', + sync_target = sync.cluster_target({ + clusters = Project.presets.nvidia_clusters, + project_subdir = 'mlperf/endpoints.git/', + use_worktrees = true, + excludes = { + '.venv*', 'venv*', '*.pyc', '__pycache__', '.ruff*', 'htmlcov', '.*_cache', 'outputs', 'logs', '.mypy_cache' + }, + }), + commands = { + }, +}) diff --git a/tests/unit/config/test_regenerate_templates.py b/tests/unit/config/test_regenerate_templates.py new file mode 100644 index 00000000..c40ece81 --- /dev/null +++ b/tests/unit/config/test_regenerate_templates.py @@ -0,0 +1,75 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Tests for scripts/regenerate_templates.py. + +`_dump_defaults` must extract defaults without constructing nested +BaseModels that appear as default_factory, because construction runs +validators (which may have platform-dependent side effects). +""" + +from __future__ import annotations + +import importlib.util +import sys +from pathlib import Path + +from pydantic import BaseModel, Field, model_validator + +_REPO_ROOT = Path(__file__).resolve().parents[3] +_SCRIPT = _REPO_ROOT / "scripts" / "regenerate_templates.py" + + +def _load_regenerate_templates(): + """Load scripts/regenerate_templates.py as a module (it is not a package).""" + if "regenerate_templates" in sys.modules: + return sys.modules["regenerate_templates"] + spec = importlib.util.spec_from_file_location("regenerate_templates", _SCRIPT) + assert spec and spec.loader + module = importlib.util.module_from_spec(spec) + sys.modules["regenerate_templates"] = module + spec.loader.exec_module(module) + return module + + +class TestDumpDefaultsSkipsBaseModelFactory: + def test_basemodel_factory_does_not_run_validator(self): + """default_factory= must not invoke the model's validators.""" + rt = _load_regenerate_templates() + + call_count = 0 + + class Inner(BaseModel): + x: int = 42 + + @model_validator(mode="after") + def _count(self): + nonlocal call_count + call_count += 1 + return self + + class Outer(BaseModel): + inner: Inner = Field(default_factory=Inner) + + # Sanity: constructing Inner() directly does invoke the validator. + Inner() + assert call_count == 1 + + call_count = 0 + result = rt._dump_defaults(Outer) + + assert call_count == 0, ( + "Inner validator was invoked — _dump_defaults called the factory " + "instead of recursing." + ) + assert result == {"inner": {"x": 42}} + + def test_callable_factory_is_still_invoked(self): + """Factories that are callables (not BaseModel subclasses) must still be called.""" + rt = _load_regenerate_templates() + + class Config(BaseModel): + tags: list[str] = Field(default_factory=lambda: ["default-tag"]) + + result = rt._dump_defaults(Config) + assert result == {"tags": ["default-tag"]} diff --git a/tests/unit/endpoint_client/test_http_client_config.py b/tests/unit/endpoint_client/test_http_client_config.py new file mode 100644 index 00000000..22e251f3 --- /dev/null +++ b/tests/unit/endpoint_client/test_http_client_config.py @@ -0,0 +1,45 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Tests for HTTPClientConfig construction on non-Linux platforms. + +NUMA probing is Linux-only; auto-detecting num_workers must fall back +gracefully so HTTPClientConfig() can be constructed anywhere. +""" + +from unittest.mock import patch + +from inference_endpoint.endpoint_client import config as cfg +from inference_endpoint.endpoint_client.cpu_affinity import UnsupportedPlatformError + + +class TestAutoNumWorkersNonLinux: + def _clear_cache(self): + cfg._get_auto_num_workers.cache_clear() + + def test_get_current_numa_node_unsupported_falls_back_to_min(self): + self._clear_cache() + with patch.object( + cfg, "get_current_numa_node", side_effect=UnsupportedPlatformError("darwin") + ): + assert cfg._get_auto_num_workers() == 10 + + def test_get_cpus_in_numa_node_unsupported_falls_back_to_min(self): + self._clear_cache() + with ( + patch.object(cfg, "get_current_numa_node", return_value=0), + patch.object( + cfg, + "get_cpus_in_numa_node", + side_effect=UnsupportedPlatformError("darwin"), + ), + ): + assert cfg._get_auto_num_workers() == 10 + + def test_http_client_config_constructs_when_numa_unsupported(self): + self._clear_cache() + with patch.object( + cfg, "get_current_numa_node", side_effect=UnsupportedPlatformError("darwin") + ): + c = cfg.HTTPClientConfig() + assert c.num_workers == 10 From 970ac071304ce9f31bd7b940f6bf207047a96d8f Mon Sep 17 00:00:00 2001 From: Viraat Chandra Date: Wed, 22 Apr 2026 12:14:34 -0700 Subject: [PATCH 3/3] chore: remove accidentally-committed local files Personal editor/settings files (.claude.local.md, .nvimrc.lua) were pulled in by git add -A in the previous commit. They should stay untracked. Co-Authored-By: Claude Opus 4.7 (1M context) --- .claude.local.md | 10 ---------- .nvimrc.lua | 27 --------------------------- 2 files changed, 37 deletions(-) delete mode 100644 .claude.local.md delete mode 100644 .nvimrc.lua diff --git a/.claude.local.md b/.claude.local.md deleted file mode 100644 index c325c6c5..00000000 --- a/.claude.local.md +++ /dev/null @@ -1,10 +0,0 @@ -# Worktree-Local Context - - - - -## Current Worktree - -- **Directory**: main -- **Branch**: feat/viraatc-cleanup-asserts -- **Focus**: Cleanup assert statements diff --git a/.nvimrc.lua b/.nvimrc.lua deleted file mode 100644 index cfc3e961..00000000 --- a/.nvimrc.lua +++ /dev/null @@ -1,27 +0,0 @@ -local sync = require('lib/sync') -local Project = require('lib/project') -local terminal = require('lib/terminal') - -vim.api.nvim_create_autocmd('VimLeavePre', { - callback = function() - require('plenary.job'):new({ - command = 'killall', - args = { 'lsyncd' }, - cwd = vim.loop.cwd() - }):start() - end -}) - -return Project.remote({ - name = 'inference-endpoints', - sync_target = sync.cluster_target({ - clusters = Project.presets.nvidia_clusters, - project_subdir = 'mlperf/endpoints.git/', - use_worktrees = true, - excludes = { - '.venv*', 'venv*', '*.pyc', '__pycache__', '.ruff*', 'htmlcov', '.*_cache', 'outputs', 'logs', '.mypy_cache' - }, - }), - commands = { - }, -})