From f848f950ccf8a8876aa720ca17660bf458186cab Mon Sep 17 00:00:00 2001
From: Viraat Chandra <viraatc@nvidia.com>
Date: Tue, 21 Apr 2026 17:39:32 -0700
Subject: [PATCH 1/3] fix: let HTTPClientConfig and template regen work on
 non-Linux
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two independent issues that together broke pre-commit (and any
HTTPClientConfig construction) on macOS:

1. HTTPClientConfig()._resolve_defaults calls _get_auto_num_workers when
   num_workers=-1, which invokes get_current_numa_node(). That function
   is @require_linux and raises UnsupportedPlatformError on darwin.
   Fix: catch UnsupportedPlatformError and fall back to min_workers=10,
   matching the existing "NUMA not discoverable" branch.

2. scripts/regenerate_templates.py::_dump_defaults was documented to
   avoid running model validators, but called default_factory() for
   every field — which constructs the nested model (and runs its
   validators) whenever the factory is a BaseModel subclass. Fix:
   when the factory is a BaseModel subclass, recurse into
   _dump_defaults(factory) instead. Factories that dynamically pick a
   concrete subclass (e.g. TransportConfig.create_default -> ZMQTransportConfig)
   are not types, so they still get called as before and the concrete
   subclass is walked via the existing isinstance(default, BaseModel)
   branch.

Verified on Linux: pre-commit passes, templates are byte-identical.
Verified against simulated non-Linux (UnsupportedPlatformError patched
to always raise): HTTPClientConfig() constructs, and the regen script
completes without entering the NUMA code path at all.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 scripts/regenerate_templates.py               | 11 ++++++++++
 .../endpoint_client/config.py                 | 20 +++++++++++++++----
 2 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/scripts/regenerate_templates.py b/scripts/regenerate_templates.py
index eb84a6dd..30d0bb0d 100644
--- a/scripts/regenerate_templates.py
+++ b/scripts/regenerate_templates.py
@@ -149,6 +149,17 @@ def _dump_defaults(model: type[BaseModel]) -> dict:
         if info.default is not PydanticUndefined:
             default = info.default
         elif info.default_factory is not None:
+            # If the factory is itself a BaseModel subclass (e.g.
+            # default_factory=HTTPClientConfig), recurse into it instead of
+            # calling it — calling would run validators, defeating the point
+            # of this function. Factories that dynamically pick a concrete
+            # subclass (e.g. TransportConfig.create_default → ZMQTransportConfig)
+            # aren't types, so they fall through and get called as before.
+            if isinstance(info.default_factory, type) and issubclass(
+                info.default_factory, BaseModel
+            ):
+                out[name] = _dump_defaults(info.default_factory)
+                continue
             default = info.default_factory()
         else:
             # Required field — recurse if BaseModel, else None
diff --git a/src/inference_endpoint/endpoint_client/config.py b/src/inference_endpoint/endpoint_client/config.py
index c5509fc2..599ca7c4 100644
--- a/src/inference_endpoint/endpoint_client/config.py
+++ b/src/inference_endpoint/endpoint_client/config.py
@@ -37,7 +37,12 @@
 
 from .accumulator_protocol import SSEAccumulatorProtocol
 from .adapter_protocol import HttpRequestAdapter
-from .cpu_affinity import AffinityPlan, get_cpus_in_numa_node, get_current_numa_node
+from .cpu_affinity import (
+    AffinityPlan,
+    UnsupportedPlatformError,
+    get_cpus_in_numa_node,
+    get_current_numa_node,
+)
 from .utils import get_ephemeral_port_limit, get_ephemeral_port_range
 
 ADAPTER_MAP = {
@@ -262,17 +267,24 @@ def _get_auto_num_workers() -> int:
     Users can override with explicit num_workers to use more cores (workers
     will be pinned to additional cores outside NUMA domain if needed).
 
+    On non-Linux platforms (NUMA probing is Linux-only) falls back to
+    ``min_workers`` so the config can still be constructed for local
+    development, template regeneration, and tests.
+
     Returns:
         Number of workers to use when num_workers is -1 (auto).
     """
     min_workers = 10
     max_workers = 24
 
-    numa_node = get_current_numa_node()
-    if numa_node is None:
+    try:
+        numa_node = get_current_numa_node()
+        if numa_node is None:
+            return min_workers
+        numa_cpus = get_cpus_in_numa_node(numa_node)
+    except UnsupportedPlatformError:
         return min_workers
 
-    numa_cpus = get_cpus_in_numa_node(numa_node)
     if not numa_cpus:
         return min_workers
 

From 306673cdc8993c1e0c8f1f5c28fdf3ddd574dff0 Mon Sep 17 00:00:00 2001
From: Viraat Chandra <viraatc@nvidia.com>
Date: Wed, 22 Apr 2026 12:14:15 -0700
Subject: [PATCH 2/3] test: cover non-Linux num_workers fallback and
 _dump_defaults recursion

Adds unit tests for both fixes in this PR:

- tests/unit/endpoint_client/test_http_client_config.py: patches
  get_current_numa_node / get_cpus_in_numa_node to raise
  UnsupportedPlatformError and asserts _get_auto_num_workers and
  HTTPClientConfig() fall back to min_workers=10.

- tests/unit/config/test_regenerate_templates.py: defines a BaseModel
  with a counter-incrementing model_validator used as default_factory,
  and asserts _dump_defaults does not invoke the validator while still
  emitting the nested defaults. Also covers the non-BaseModel callable
  factory path (lambda) to ensure it still gets called.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .claude.local.md                              | 10 +++
 .nvimrc.lua                                   | 27 +++++++
 .../unit/config/test_regenerate_templates.py  | 75 +++++++++++++++++++
 .../test_http_client_config.py                | 45 +++++++++++
 4 files changed, 157 insertions(+)
 create mode 100644 .claude.local.md
 create mode 100644 .nvimrc.lua
 create mode 100644 tests/unit/config/test_regenerate_templates.py
 create mode 100644 tests/unit/endpoint_client/test_http_client_config.py

diff --git a/.claude.local.md b/.claude.local.md
new file mode 100644
index 00000000..c325c6c5
--- /dev/null
+++ b/.claude.local.md
@@ -0,0 +1,10 @@
+# Worktree-Local Context
+
+<!-- This file is gitignored. Use it for worktree-specific context. -->
+<!-- The shared CLAUDE.md (checked into git) applies to all worktrees automatically. -->
+
+## Current Worktree
+
+- **Directory**: main
+- **Branch**: feat/viraatc-cleanup-asserts
+- **Focus**: Cleanup assert statements
diff --git a/.nvimrc.lua b/.nvimrc.lua
new file mode 100644
index 00000000..cfc3e961
--- /dev/null
+++ b/.nvimrc.lua
@@ -0,0 +1,27 @@
+local sync = require('lib/sync')
+local Project = require('lib/project')
+local terminal = require('lib/terminal')
+
+vim.api.nvim_create_autocmd('VimLeavePre', {
+  callback = function()
+    require('plenary.job'):new({
+      command = 'killall',
+      args = { 'lsyncd' },
+      cwd = vim.loop.cwd()
+    }):start()
+  end
+})
+
+return Project.remote({
+  name = 'inference-endpoints',
+  sync_target = sync.cluster_target({
+    clusters = Project.presets.nvidia_clusters,
+    project_subdir = 'mlperf/endpoints.git/',
+    use_worktrees = true,
+    excludes = {
+      '.venv*', 'venv*', '*.pyc', '__pycache__', '.ruff*', 'htmlcov', '.*_cache', 'outputs', 'logs', '.mypy_cache'
+    },
+  }),
+  commands = {
+  },
+})
diff --git a/tests/unit/config/test_regenerate_templates.py b/tests/unit/config/test_regenerate_templates.py
new file mode 100644
index 00000000..c40ece81
--- /dev/null
+++ b/tests/unit/config/test_regenerate_templates.py
@@ -0,0 +1,75 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""Tests for scripts/regenerate_templates.py.
+
+`_dump_defaults` must extract defaults without constructing nested
+BaseModels that appear as default_factory, because construction runs
+validators (which may have platform-dependent side effects).
+"""
+
+from __future__ import annotations
+
+import importlib.util
+import sys
+from pathlib import Path
+
+from pydantic import BaseModel, Field, model_validator
+
+_REPO_ROOT = Path(__file__).resolve().parents[3]
+_SCRIPT = _REPO_ROOT / "scripts" / "regenerate_templates.py"
+
+
+def _load_regenerate_templates():
+    """Load scripts/regenerate_templates.py as a module (it is not a package)."""
+    if "regenerate_templates" in sys.modules:
+        return sys.modules["regenerate_templates"]
+    spec = importlib.util.spec_from_file_location("regenerate_templates", _SCRIPT)
+    assert spec and spec.loader
+    module = importlib.util.module_from_spec(spec)
+    sys.modules["regenerate_templates"] = module
+    spec.loader.exec_module(module)
+    return module
+
+
+class TestDumpDefaultsSkipsBaseModelFactory:
+    def test_basemodel_factory_does_not_run_validator(self):
+        """default_factory=<BaseModel subclass> must not invoke the model's validators."""
+        rt = _load_regenerate_templates()
+
+        call_count = 0
+
+        class Inner(BaseModel):
+            x: int = 42
+
+            @model_validator(mode="after")
+            def _count(self):
+                nonlocal call_count
+                call_count += 1
+                return self
+
+        class Outer(BaseModel):
+            inner: Inner = Field(default_factory=Inner)
+
+        # Sanity: constructing Inner() directly does invoke the validator.
+        Inner()
+        assert call_count == 1
+
+        call_count = 0
+        result = rt._dump_defaults(Outer)
+
+        assert call_count == 0, (
+            "Inner validator was invoked — _dump_defaults called the factory "
+            "instead of recursing."
+        )
+        assert result == {"inner": {"x": 42}}
+
+    def test_callable_factory_is_still_invoked(self):
+        """Factories that are callables (not BaseModel subclasses) must still be called."""
+        rt = _load_regenerate_templates()
+
+        class Config(BaseModel):
+            tags: list[str] = Field(default_factory=lambda: ["default-tag"])
+
+        result = rt._dump_defaults(Config)
+        assert result == {"tags": ["default-tag"]}
diff --git a/tests/unit/endpoint_client/test_http_client_config.py b/tests/unit/endpoint_client/test_http_client_config.py
new file mode 100644
index 00000000..22e251f3
--- /dev/null
+++ b/tests/unit/endpoint_client/test_http_client_config.py
@@ -0,0 +1,45 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""Tests for HTTPClientConfig construction on non-Linux platforms.
+
+NUMA probing is Linux-only; auto-detecting num_workers must fall back
+gracefully so HTTPClientConfig() can be constructed anywhere.
+"""
+
+from unittest.mock import patch
+
+from inference_endpoint.endpoint_client import config as cfg
+from inference_endpoint.endpoint_client.cpu_affinity import UnsupportedPlatformError
+
+
+class TestAutoNumWorkersNonLinux:
+    def _clear_cache(self):
+        cfg._get_auto_num_workers.cache_clear()
+
+    def test_get_current_numa_node_unsupported_falls_back_to_min(self):
+        self._clear_cache()
+        with patch.object(
+            cfg, "get_current_numa_node", side_effect=UnsupportedPlatformError("darwin")
+        ):
+            assert cfg._get_auto_num_workers() == 10
+
+    def test_get_cpus_in_numa_node_unsupported_falls_back_to_min(self):
+        self._clear_cache()
+        with (
+            patch.object(cfg, "get_current_numa_node", return_value=0),
+            patch.object(
+                cfg,
+                "get_cpus_in_numa_node",
+                side_effect=UnsupportedPlatformError("darwin"),
+            ),
+        ):
+            assert cfg._get_auto_num_workers() == 10
+
+    def test_http_client_config_constructs_when_numa_unsupported(self):
+        self._clear_cache()
+        with patch.object(
+            cfg, "get_current_numa_node", side_effect=UnsupportedPlatformError("darwin")
+        ):
+            c = cfg.HTTPClientConfig()
+        assert c.num_workers == 10

From 970ac071304ce9f31bd7b940f6bf207047a96d8f Mon Sep 17 00:00:00 2001
From: Viraat Chandra <viraatc@nvidia.com>
Date: Wed, 22 Apr 2026 12:14:34 -0700
Subject: [PATCH 3/3] chore: remove accidentally-committed local files

Personal editor/settings files (.claude.local.md, .nvimrc.lua) were
pulled in by git add -A in the previous commit. They should stay
untracked.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .claude.local.md | 10 ----------
 .nvimrc.lua      | 27 ---------------------------
 2 files changed, 37 deletions(-)
 delete mode 100644 .claude.local.md
 delete mode 100644 .nvimrc.lua

diff --git a/.claude.local.md b/.claude.local.md
deleted file mode 100644
index c325c6c5..00000000
--- a/.claude.local.md
+++ /dev/null
@@ -1,10 +0,0 @@
-# Worktree-Local Context
-
-<!-- This file is gitignored. Use it for worktree-specific context. -->
-<!-- The shared CLAUDE.md (checked into git) applies to all worktrees automatically. -->
-
-## Current Worktree
-
-- **Directory**: main
-- **Branch**: feat/viraatc-cleanup-asserts
-- **Focus**: Cleanup assert statements
diff --git a/.nvimrc.lua b/.nvimrc.lua
deleted file mode 100644
index cfc3e961..00000000
--- a/.nvimrc.lua
+++ /dev/null
@@ -1,27 +0,0 @@
-local sync = require('lib/sync')
-local Project = require('lib/project')
-local terminal = require('lib/terminal')
-
-vim.api.nvim_create_autocmd('VimLeavePre', {
-  callback = function()
-    require('plenary.job'):new({
-      command = 'killall',
-      args = { 'lsyncd' },
-      cwd = vim.loop.cwd()
-    }):start()
-  end
-})
-
-return Project.remote({
-  name = 'inference-endpoints',
-  sync_target = sync.cluster_target({
-    clusters = Project.presets.nvidia_clusters,
-    project_subdir = 'mlperf/endpoints.git/',
-    use_worktrees = true,
-    excludes = {
-      '.venv*', 'venv*', '*.pyc', '__pycache__', '.ruff*', 'htmlcov', '.*_cache', 'outputs', 'logs', '.mypy_cache'
-    },
-  }),
-  commands = {
-  },
-})