Skip to content

Commit 69d3a1f

Browse files
authored
refactor(pathfinder): unify dynamic-lib subprocess probing (production, test code) (#1779)
* refactor(pathfinder): unify dynamic-lib subprocess probing Consolidate canary and test subprocess entrypoints behind a shared module and JSON payload, update call sites/tests accordingly, and remove the old test-only subprocess helpers. Made-with: Cursor * refactor(pathfinder-tests): avoid double parse Finding 1 from Claude 4.6 Opus (Thinking) (Cursor): avoid parsing the child subprocess JSON twice by using a single payload per test. Made-with: Cursor * refactor(pathfinder): streamline canary probe return Finding 2 from Claude 4.6 Opus (Thinking) (Cursor): drop redundant post-parse validation while keeping mypy satisfied via a typed local. Made-with: Cursor * slim subprocess helpers (the removed check in production code were too much on the paranoid side) * test(pathfinder): cover canary subprocess mode Finding 4 from Claude 4.6 Opus (Thinking) (Cursor): add direct canary mode coverage and validate invalid-mode handling. Made-with: Cursor * docs(pathfinder): explain subprocess entrypoint scope Finding 5 from Claude 4.6 Opus (Thinking) (Cursor): clarify why test logic lives alongside the production entrypoint and note the negligible runtime impact. Made-with: Cursor * Minor simplification of mypy workaround * inline subprocess payload parse for simplicity Replace the helper wrapper by invoking the payload parser directly in the two call sites. Made-with: Cursor * refactor(pathfinder-tests): include not-found error details Capture DynamicLibNotFoundError subclasses in the load-mode subprocess payload and parametrize coverage across the error variants. Made-with: Cursor
1 parent 811ec27 commit 69d3a1f

12 files changed

Lines changed: 330 additions & 183 deletions

cuda_pathfinder/cuda/pathfinder/_dynamic_libs/canary_probe_subprocess.py

Lines changed: 0 additions & 43 deletions
This file was deleted.
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
#!/usr/bin/env python
2+
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
from __future__ import annotations
6+
7+
import os
8+
import sys
9+
from collections.abc import Sequence
10+
11+
from cuda.pathfinder._dynamic_libs.lib_descriptor import LIB_DESCRIPTORS
12+
from cuda.pathfinder._dynamic_libs.load_dl_common import DynamicLibNotFoundError, LoadedDL
13+
from cuda.pathfinder._dynamic_libs.platform_loader import LOADER
14+
from cuda.pathfinder._dynamic_libs.subprocess_protocol import (
15+
MODE_CANARY,
16+
MODE_LOAD,
17+
STATUS_NOT_FOUND,
18+
STATUS_OK,
19+
VALID_MODES,
20+
format_dynamic_lib_subprocess_payload,
21+
)
22+
23+
# NOTE: The main entrypoint (below) serves both production (canary probe)
24+
# and tests (full loader). Keeping them together ensures a single subprocess
25+
# protocol and CLI surface, so the test subprocess stays aligned with the
26+
# production flow while avoiding a separate test-only module.
27+
# Any production-code impact is negligible since the extra logic only runs
28+
# in the subprocess entrypoint and only in test mode.
29+
30+
31+
def _probe_canary_abs_path(libname: str) -> str | None:
32+
desc = LIB_DESCRIPTORS.get(libname)
33+
if desc is None:
34+
raise ValueError(f"Unsupported canary library name: {libname!r}")
35+
try:
36+
loaded: LoadedDL | None = LOADER.load_with_system_search(desc)
37+
except DynamicLibNotFoundError:
38+
return None
39+
if loaded is None:
40+
return None
41+
abs_path: str | None = loaded.abs_path
42+
return abs_path
43+
44+
45+
def _validate_abs_path(abs_path: str) -> None:
46+
assert abs_path, f"empty path: {abs_path=!r}"
47+
assert os.path.isabs(abs_path), f"not absolute: {abs_path=!r}"
48+
assert os.path.isfile(abs_path), f"not a file: {abs_path=!r}"
49+
50+
51+
def _load_nvidia_dynamic_lib_for_test(libname: str) -> str:
52+
"""Test-only loader used by the subprocess entrypoint."""
53+
# Keep imports inside the subprocess body so startup stays focused on the
54+
# code under test rather than the parent test module.
55+
from cuda.pathfinder import load_nvidia_dynamic_lib
56+
from cuda.pathfinder._dynamic_libs.load_nvidia_dynamic_lib import _load_lib_no_cache
57+
from cuda.pathfinder._dynamic_libs.supported_nvidia_libs import (
58+
SUPPORTED_LINUX_SONAMES,
59+
SUPPORTED_WINDOWS_DLLS,
60+
)
61+
from cuda.pathfinder._utils.platform_aware import IS_WINDOWS
62+
63+
loaded_dl_fresh = load_nvidia_dynamic_lib(libname)
64+
if loaded_dl_fresh.was_already_loaded_from_elsewhere:
65+
raise RuntimeError("loaded_dl_fresh.was_already_loaded_from_elsewhere")
66+
67+
abs_path = loaded_dl_fresh.abs_path
68+
if not isinstance(abs_path, str):
69+
raise RuntimeError(f"loaded_dl_fresh.abs_path is not a string: {abs_path!r}")
70+
_validate_abs_path(abs_path)
71+
assert loaded_dl_fresh.found_via is not None
72+
73+
loaded_dl_from_cache = load_nvidia_dynamic_lib(libname)
74+
if loaded_dl_from_cache is not loaded_dl_fresh:
75+
raise RuntimeError("loaded_dl_from_cache is not loaded_dl_fresh")
76+
77+
loaded_dl_no_cache = _load_lib_no_cache(libname)
78+
supported_libs = SUPPORTED_WINDOWS_DLLS if IS_WINDOWS else SUPPORTED_LINUX_SONAMES
79+
if not loaded_dl_no_cache.was_already_loaded_from_elsewhere and libname in supported_libs:
80+
raise RuntimeError("not loaded_dl_no_cache.was_already_loaded_from_elsewhere")
81+
abs_path_no_cache = loaded_dl_no_cache.abs_path
82+
if not isinstance(abs_path_no_cache, str):
83+
raise RuntimeError(f"loaded_dl_no_cache.abs_path is not a string: {abs_path_no_cache!r}")
84+
if not os.path.samefile(abs_path_no_cache, abs_path):
85+
raise RuntimeError(f"not os.path.samefile({abs_path_no_cache=!r}, {abs_path=!r})")
86+
_validate_abs_path(abs_path_no_cache)
87+
return abs_path
88+
89+
90+
def probe_dynamic_lib_and_print_json(libname: str, mode: str) -> None:
91+
if mode == MODE_CANARY:
92+
abs_path = _probe_canary_abs_path(libname)
93+
status = STATUS_OK if abs_path is not None else STATUS_NOT_FOUND
94+
print(format_dynamic_lib_subprocess_payload(status, abs_path))
95+
return
96+
97+
if mode == MODE_LOAD:
98+
# Test-only path: exercises full loader behavior in isolation.
99+
try:
100+
abs_path = _load_nvidia_dynamic_lib_for_test(libname)
101+
except DynamicLibNotFoundError as exc:
102+
error = {
103+
"type": exc.__class__.__name__,
104+
"message": str(exc),
105+
}
106+
print(format_dynamic_lib_subprocess_payload(STATUS_NOT_FOUND, None, error=error))
107+
return
108+
print(format_dynamic_lib_subprocess_payload(STATUS_OK, abs_path))
109+
return
110+
111+
raise ValueError(f"Unsupported subprocess probe mode: {mode!r}")
112+
113+
114+
def main(argv: Sequence[str] | None = None) -> int:
115+
args = list(sys.argv[1:] if argv is None else argv)
116+
if len(args) != 2 or args[0] not in VALID_MODES:
117+
modes = ", ".join(VALID_MODES)
118+
raise SystemExit(
119+
f"Usage: python -m cuda.pathfinder._dynamic_libs.dynamic_lib_subprocess <mode> <libname>\nModes: {modes}"
120+
)
121+
mode, libname = args
122+
probe_dynamic_lib_and_print_json(libname, mode)
123+
return 0
124+
125+
126+
if __name__ == "__main__":
127+
raise SystemExit(main())

cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,9 @@
44
from __future__ import annotations
55

66
import functools
7-
import json
87
import struct
98
import subprocess
109
import sys
11-
from pathlib import Path
1210
from typing import TYPE_CHECKING
1311

1412
from cuda.pathfinder._dynamic_libs.lib_descriptor import LIB_DESCRIPTORS
@@ -28,6 +26,14 @@
2826
find_via_ctk_root,
2927
run_find_steps,
3028
)
29+
from cuda.pathfinder._dynamic_libs.subprocess_protocol import (
30+
DYNAMIC_LIB_SUBPROCESS_CWD,
31+
MODE_CANARY,
32+
STATUS_OK,
33+
DynamicLibSubprocessPayload,
34+
build_dynamic_lib_subprocess_command,
35+
parse_dynamic_lib_subprocess_payload,
36+
)
3137
from cuda.pathfinder._utils.platform_aware import IS_WINDOWS
3238

3339
if TYPE_CHECKING:
@@ -40,9 +46,7 @@
4046
name for name, desc in LIB_DESCRIPTORS.items() if (desc.windows_dlls if IS_WINDOWS else desc.linux_sonames)
4147
)
4248
_PLATFORM_NAME = "Windows" if IS_WINDOWS else "Linux"
43-
_CANARY_PROBE_MODULE = "cuda.pathfinder._dynamic_libs.canary_probe_subprocess"
4449
_CANARY_PROBE_TIMEOUT_SECONDS = 10.0
45-
_CANARY_PROBE_IMPORT_ROOT = Path(__file__).resolve().parents[3]
4650

4751
# Driver libraries: shipped with the NVIDIA display driver, always on the
4852
# system linker path. These skip all CTK search steps (site-packages,
@@ -99,34 +103,28 @@ def _resolve_system_loaded_abs_path_in_subprocess(libname: str) -> str | None:
99103
"""Resolve a canary library's absolute path in a fresh Python subprocess."""
100104
try:
101105
result = subprocess.run( # noqa: S603 - trusted argv: current interpreter + internal probe module
102-
[sys.executable, "-m", _CANARY_PROBE_MODULE, libname],
106+
build_dynamic_lib_subprocess_command(MODE_CANARY, libname),
103107
capture_output=True,
104108
text=True,
105109
timeout=_CANARY_PROBE_TIMEOUT_SECONDS,
106110
check=False,
107-
cwd=_CANARY_PROBE_IMPORT_ROOT,
111+
cwd=DYNAMIC_LIB_SUBPROCESS_CWD,
108112
)
109113
except subprocess.TimeoutExpired as exc:
110114
_raise_canary_probe_child_process_error(timeout=exc.timeout, stderr=exc.stderr)
111115

112116
if result.returncode != 0:
113117
_raise_canary_probe_child_process_error(returncode=result.returncode, stderr=result.stderr)
114118

115-
# Use the final non-empty line in case earlier output lines are emitted.
116-
lines = [line for line in result.stdout.splitlines() if line.strip()]
117-
if not lines:
118-
raise RuntimeError(f"Canary probe child process produced no stdout payload for {libname!r}")
119-
try:
120-
payload = json.loads(lines[-1])
121-
except json.JSONDecodeError:
122-
raise RuntimeError(
123-
f"Canary probe child process emitted invalid JSON payload for {libname!r}: {lines[-1]!r}"
124-
) from None
125-
if isinstance(payload, str):
126-
return payload
127-
if payload is None:
128-
return None
129-
raise RuntimeError(f"Canary probe child process emitted unexpected payload for {libname!r}: {payload!r}")
119+
payload: DynamicLibSubprocessPayload = parse_dynamic_lib_subprocess_payload(
120+
result.stdout,
121+
libname=libname,
122+
error_label="Canary probe child process",
123+
)
124+
abs_path: str | None = payload.abs_path
125+
if payload.status == STATUS_OK:
126+
return abs_path
127+
return None
130128

131129

132130
def _try_ctk_root_canary(ctx: SearchContext) -> str | None:
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
from __future__ import annotations
5+
6+
import json
7+
import sys
8+
from dataclasses import dataclass
9+
from pathlib import Path
10+
from typing import Literal
11+
12+
MODE_CANARY: Literal["canary"] = "canary"
13+
MODE_LOAD: Literal["load"] = "load"
14+
VALID_MODES: tuple[Literal["canary"], Literal["load"]] = (MODE_CANARY, MODE_LOAD)
15+
16+
STATUS_OK: Literal["ok"] = "ok"
17+
STATUS_NOT_FOUND: Literal["not-found"] = "not-found"
18+
19+
DYNAMIC_LIB_SUBPROCESS_MODULE = "cuda.pathfinder._dynamic_libs.dynamic_lib_subprocess"
20+
DYNAMIC_LIB_SUBPROCESS_CWD = Path(__file__).resolve().parents[3]
21+
22+
23+
@dataclass(frozen=True)
24+
class DynamicLibSubprocessPayload:
25+
status: Literal["ok", "not-found"]
26+
abs_path: str | None
27+
28+
29+
def format_dynamic_lib_subprocess_payload(
30+
status: Literal["ok", "not-found"],
31+
abs_path: str | None,
32+
*,
33+
error: dict[str, str] | None = None,
34+
) -> str:
35+
payload: dict[str, object] = {"status": status, "abs_path": abs_path}
36+
if error is not None:
37+
payload["error"] = error
38+
return json.dumps(payload)
39+
40+
41+
def build_dynamic_lib_subprocess_command(mode: str, libname: str) -> list[str]:
42+
return [sys.executable, "-m", DYNAMIC_LIB_SUBPROCESS_MODULE, mode, libname]
43+
44+
45+
def parse_dynamic_lib_subprocess_payload(
46+
stdout: str,
47+
*,
48+
libname: str,
49+
error_label: str,
50+
) -> DynamicLibSubprocessPayload:
51+
# Use the final non-empty line in case earlier output lines are emitted.
52+
lines = [line for line in stdout.splitlines() if line.strip()]
53+
if not lines:
54+
raise RuntimeError(f"{error_label} produced no stdout payload for {libname!r}")
55+
try:
56+
payload = json.loads(lines[-1])
57+
except json.JSONDecodeError:
58+
raise RuntimeError(f"{error_label} emitted invalid JSON payload for {libname!r}: {lines[-1]!r}") from None
59+
if not isinstance(payload, dict):
60+
raise RuntimeError(f"{error_label} emitted unexpected payload for {libname!r}: {payload!r}")
61+
status = payload.get("status")
62+
abs_path = payload.get("abs_path")
63+
if status == STATUS_OK:
64+
if not isinstance(abs_path, str):
65+
raise RuntimeError(f"{error_label} emitted unexpected payload for {libname!r}: {payload!r}")
66+
return DynamicLibSubprocessPayload(status=STATUS_OK, abs_path=abs_path)
67+
if status == STATUS_NOT_FOUND:
68+
if abs_path is not None:
69+
raise RuntimeError(f"{error_label} emitted unexpected payload for {libname!r}: {payload!r}")
70+
return DynamicLibSubprocessPayload(status=STATUS_NOT_FOUND, abs_path=None)
71+
raise RuntimeError(f"{error_label} emitted unexpected payload for {libname!r}: {payload!r}")

cuda_pathfinder/cuda/pathfinder/_testing/__init__.py

Lines changed: 0 additions & 2 deletions
This file was deleted.

0 commit comments

Comments
 (0)