Skip to content

Commit 15cfd84

Browse files
authored
fix(pathfinder): keep canary probes script-safe (#1768)
1 parent 3ed5217 commit 15cfd84

File tree

5 files changed

+169
-24
lines changed

5 files changed

+169
-24
lines changed

cuda_pathfinder/cuda/pathfinder/_dynamic_libs/canary_probe_subprocess.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
# SPDX-License-Identifier: Apache-2.0
44

55
import json
6+
import sys
7+
from collections.abc import Sequence
68

79
from cuda.pathfinder._dynamic_libs.lib_descriptor import LIB_DESCRIPTORS
810
from cuda.pathfinder._dynamic_libs.load_dl_common import DynamicLibNotFoundError, LoadedDL
@@ -27,3 +29,15 @@ def _probe_canary_abs_path(libname: str) -> str | None:
2729

2830
def probe_canary_abs_path_and_print_json(libname: str) -> None:
2931
print(json.dumps(_probe_canary_abs_path(libname)))
32+
33+
34+
def main(argv: Sequence[str] | None = None) -> int:
35+
args = list(sys.argv[1:] if argv is None else argv)
36+
if len(args) != 1:
37+
raise SystemExit("Usage: python -m cuda.pathfinder._dynamic_libs.canary_probe_subprocess <libname>")
38+
probe_canary_abs_path_and_print_json(args[0])
39+
return 0
40+
41+
42+
if __name__ == "__main__":
43+
raise SystemExit(main())

cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py

Lines changed: 44 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,11 @@
66
import functools
77
import json
88
import struct
9+
import subprocess
910
import sys
11+
from pathlib import Path
1012
from typing import TYPE_CHECKING
1113

12-
from cuda.pathfinder._dynamic_libs.canary_probe_subprocess import probe_canary_abs_path_and_print_json
1314
from cuda.pathfinder._dynamic_libs.lib_descriptor import LIB_DESCRIPTORS
1415
from cuda.pathfinder._dynamic_libs.load_dl_common import (
1516
DynamicLibNotAvailableError,
@@ -28,7 +29,6 @@
2829
run_find_steps,
2930
)
3031
from cuda.pathfinder._utils.platform_aware import IS_WINDOWS
31-
from cuda.pathfinder._utils.spawned_process_runner import run_in_spawned_child_process
3232

3333
if TYPE_CHECKING:
3434
from cuda.pathfinder._dynamic_libs.lib_descriptor import LibDescriptor
@@ -40,6 +40,9 @@
4040
name for name, desc in LIB_DESCRIPTORS.items() if (desc.windows_dlls if IS_WINDOWS else desc.linux_sonames)
4141
)
4242
_PLATFORM_NAME = "Windows" if IS_WINDOWS else "Linux"
43+
_CANARY_PROBE_MODULE = "cuda.pathfinder._dynamic_libs.canary_probe_subprocess"
44+
_CANARY_PROBE_TIMEOUT_SECONDS = 10.0
45+
_CANARY_PROBE_IMPORT_ROOT = Path(__file__).resolve().parents[3]
4346

4447
# Driver libraries: shipped with the NVIDIA display driver, always on the
4548
# system linker path. These skip all CTK search steps (site-packages,
@@ -67,15 +70,47 @@ def _load_driver_lib_no_cache(desc: LibDescriptor) -> LoadedDL:
6770
)
6871

6972

73+
def _coerce_subprocess_output(output: str | bytes | None) -> str:
74+
if isinstance(output, bytes):
75+
return output.decode(errors="replace")
76+
return "" if output is None else output
77+
78+
79+
def _raise_canary_probe_child_process_error(
80+
*,
81+
returncode: int | None = None,
82+
timeout: float | None = None,
83+
stderr: str | bytes | None = None,
84+
) -> None:
85+
if timeout is None:
86+
error_line = f"Canary probe child process exited with code {returncode}."
87+
else:
88+
error_line = f"Canary probe child process timed out after {timeout} seconds."
89+
raise ChildProcessError(
90+
f"{error_line}\n"
91+
"--- stderr-from-child-process ---\n"
92+
f"{_coerce_subprocess_output(stderr)}"
93+
"<end-of-stderr-from-child-process>\n"
94+
)
95+
96+
7097
@functools.cache
7198
def _resolve_system_loaded_abs_path_in_subprocess(libname: str) -> str | None:
72-
"""Resolve a canary library's absolute path in a spawned child process."""
73-
result = run_in_spawned_child_process(
74-
probe_canary_abs_path_and_print_json,
75-
args=(libname,),
76-
timeout=10.0,
77-
rethrow=True,
78-
)
99+
"""Resolve a canary library's absolute path in a fresh Python subprocess."""
100+
try:
101+
result = subprocess.run( # noqa: S603 - trusted argv: current interpreter + internal probe module
102+
[sys.executable, "-m", _CANARY_PROBE_MODULE, libname],
103+
capture_output=True,
104+
text=True,
105+
timeout=_CANARY_PROBE_TIMEOUT_SECONDS,
106+
check=False,
107+
cwd=_CANARY_PROBE_IMPORT_ROOT,
108+
)
109+
except subprocess.TimeoutExpired as exc:
110+
_raise_canary_probe_child_process_error(timeout=exc.timeout, stderr=exc.stderr)
111+
112+
if result.returncode != 0:
113+
_raise_canary_probe_child_process_error(returncode=result.returncode, stderr=result.stderr)
79114

80115
# Use the final non-empty line in case earlier output lines are emitted.
81116
lines = [line for line in result.stdout.splitlines() if line.strip()]

cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def find_via_ctk_root_canary(desc: HeaderDescriptor) -> LocatedHeaderDir | None:
115115
"""Try CTK header lookup via CTK-root canary probing.
116116
117117
Skips immediately if the descriptor does not opt in (``use_ctk_root_canary``).
118-
Otherwise, system-loads ``cudart`` in a spawned child process, derives
118+
Otherwise, system-loads ``cudart`` in a fully isolated Python subprocess, derives
119119
CTK root from the resolved library path, and searches the expected include
120120
layout under that root.
121121
"""
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
.. SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
.. SPDX-License-Identifier: Apache-2.0
3+
4+
.. py:currentmodule:: cuda.pathfinder
5+
6+
``cuda-pathfinder`` 1.4.3 Release notes
7+
=======================================
8+
9+
Highlights
10+
----------
11+
12+
* Fix CTK canary probing when it is reached from plain Python scripts that do
13+
not use an ``if __name__ == "__main__"`` guard. The canary now runs in a
14+
fully isolated Python subprocess without re-entering the caller's script
15+
during child-process startup.
16+
(`PR #1768 <https://github.com/NVIDIA/cuda-python/pull/1768>`_)
17+
18+
* Make the canary subprocess resolve ``cuda.pathfinder`` from the same import
19+
root as the parent process. This avoids mixed source-tree versus wheel imports
20+
in wheel-based test environments and keeps the probe behavior consistent
21+
across source and installed-package workflows.
22+
(`PR #1768 <https://github.com/NVIDIA/cuda-python/pull/1768>`_)

cuda_pathfinder/tests/test_ctk_root_discovery.py

Lines changed: 88 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,12 @@
22
# SPDX-License-Identifier: Apache-2.0
33

44

5+
import os
6+
import subprocess
7+
import sys
8+
import textwrap
9+
from pathlib import Path
10+
511
import pytest
612

713
from cuda.pathfinder._dynamic_libs import load_nvidia_dynamic_lib as load_mod
@@ -24,6 +30,7 @@
2430

2531
_MODULE = "cuda.pathfinder._dynamic_libs.load_nvidia_dynamic_lib"
2632
_STEPS_MODULE = "cuda.pathfinder._dynamic_libs.search_steps"
33+
_PACKAGE_ROOT = Path(load_mod.__file__).resolve().parents[3]
2734

2835

2936
def _ctx(libname: str = "nvvm") -> SearchContext:
@@ -184,53 +191,120 @@ def test_try_via_ctk_root_regular_lib(tmp_path):
184191

185192

186193
def test_subprocess_probe_returns_abs_path_on_string_payload(mocker):
187-
result = mocker.Mock(stdout='"/usr/local/cuda/lib64/libcudart.so.13"\n')
188-
run_mock = mocker.patch(f"{_MODULE}.run_in_spawned_child_process", return_value=result)
194+
result = subprocess.CompletedProcess(
195+
args=[],
196+
returncode=0,
197+
stdout='"/usr/local/cuda/lib64/libcudart.so.13"\n',
198+
stderr="",
199+
)
200+
run_mock = mocker.patch(f"{_MODULE}.subprocess.run", return_value=result)
189201

190202
assert _resolve_system_loaded_abs_path_in_subprocess("cudart") == "/usr/local/cuda/lib64/libcudart.so.13"
191-
assert run_mock.call_args.kwargs.get("rethrow") is True
203+
run_mock.assert_called_once_with(
204+
[sys.executable, "-m", "cuda.pathfinder._dynamic_libs.canary_probe_subprocess", "cudart"],
205+
capture_output=True,
206+
text=True,
207+
timeout=10.0,
208+
check=False,
209+
cwd=_PACKAGE_ROOT,
210+
)
192211

193212

194213
def test_subprocess_probe_returns_none_on_null_payload(mocker):
195-
result = mocker.Mock(stdout="null\n")
196-
mocker.patch(f"{_MODULE}.run_in_spawned_child_process", return_value=result)
214+
result = subprocess.CompletedProcess(args=[], returncode=0, stdout="null\n", stderr="")
215+
mocker.patch(f"{_MODULE}.subprocess.run", return_value=result)
197216

198217
assert _resolve_system_loaded_abs_path_in_subprocess("cudart") is None
199218

200219

201220
def test_subprocess_probe_raises_on_child_failure(mocker):
221+
result = subprocess.CompletedProcess(args=[], returncode=1, stdout="", stderr="child failed\n")
222+
mocker.patch(f"{_MODULE}.subprocess.run", return_value=result)
223+
224+
with pytest.raises(ChildProcessError, match="child failed"):
225+
_resolve_system_loaded_abs_path_in_subprocess("cudart")
226+
227+
228+
def test_subprocess_probe_raises_on_timeout(mocker):
202229
mocker.patch(
203-
f"{_MODULE}.run_in_spawned_child_process",
204-
side_effect=ChildProcessError("child failed"),
230+
f"{_MODULE}.subprocess.run",
231+
side_effect=subprocess.TimeoutExpired(cmd=["python"], timeout=10.0, stderr="probe hung\n"),
205232
)
206-
with pytest.raises(ChildProcessError, match="child failed"):
233+
with pytest.raises(ChildProcessError, match="timed out after 10.0 seconds"):
207234
_resolve_system_loaded_abs_path_in_subprocess("cudart")
208235

209236

210237
def test_subprocess_probe_raises_on_empty_stdout(mocker):
211-
result = mocker.Mock(stdout=" \n \n")
212-
mocker.patch(f"{_MODULE}.run_in_spawned_child_process", return_value=result)
238+
result = subprocess.CompletedProcess(args=[], returncode=0, stdout=" \n \n", stderr="")
239+
mocker.patch(f"{_MODULE}.subprocess.run", return_value=result)
213240

214241
with pytest.raises(RuntimeError, match="produced no stdout payload"):
215242
_resolve_system_loaded_abs_path_in_subprocess("cudart")
216243

217244

218245
def test_subprocess_probe_raises_on_invalid_json_payload(mocker):
219-
result = mocker.Mock(stdout="not-json\n")
220-
mocker.patch(f"{_MODULE}.run_in_spawned_child_process", return_value=result)
246+
result = subprocess.CompletedProcess(args=[], returncode=0, stdout="not-json\n", stderr="")
247+
mocker.patch(f"{_MODULE}.subprocess.run", return_value=result)
221248

222249
with pytest.raises(RuntimeError, match="invalid JSON payload"):
223250
_resolve_system_loaded_abs_path_in_subprocess("cudart")
224251

225252

226253
def test_subprocess_probe_raises_on_unexpected_json_payload(mocker):
227-
result = mocker.Mock(stdout='{"path": "/usr/local/cuda/lib64/libcudart.so.13"}\n')
228-
mocker.patch(f"{_MODULE}.run_in_spawned_child_process", return_value=result)
254+
result = subprocess.CompletedProcess(
255+
args=[],
256+
returncode=0,
257+
stdout='{"path": "/usr/local/cuda/lib64/libcudart.so.13"}\n',
258+
stderr="",
259+
)
260+
mocker.patch(f"{_MODULE}.subprocess.run", return_value=result)
229261

230262
with pytest.raises(RuntimeError, match="unexpected payload"):
231263
_resolve_system_loaded_abs_path_in_subprocess("cudart")
232264

233265

266+
def test_subprocess_probe_does_not_reenter_calling_script(tmp_path):
267+
script_path = tmp_path / "call_probe.py"
268+
run_count_path = tmp_path / "run_count.txt"
269+
script_path.write_text(
270+
textwrap.dedent(
271+
f"""
272+
from pathlib import Path
273+
274+
from cuda.pathfinder._dynamic_libs.load_nvidia_dynamic_lib import (
275+
_resolve_system_loaded_abs_path_in_subprocess,
276+
)
277+
278+
marker_path = Path({str(run_count_path)!r})
279+
run_count = int(marker_path.read_text()) if marker_path.exists() else 0
280+
marker_path.write_text(str(run_count + 1))
281+
282+
try:
283+
_resolve_system_loaded_abs_path_in_subprocess("not_a_real_lib")
284+
except Exception:
285+
pass
286+
"""
287+
),
288+
encoding="utf-8",
289+
)
290+
env = os.environ.copy()
291+
existing_pythonpath = env.get("PYTHONPATH")
292+
env["PYTHONPATH"] = (
293+
str(_PACKAGE_ROOT) if not existing_pythonpath else os.pathsep.join((str(_PACKAGE_ROOT), existing_pythonpath))
294+
)
295+
296+
result = subprocess.run( # noqa: S603 - trusted argv: current interpreter + temp script created by this test
297+
[sys.executable, str(script_path)],
298+
capture_output=True,
299+
text=True,
300+
check=False,
301+
env=env,
302+
)
303+
304+
assert result.returncode == 0, result.stderr
305+
assert run_count_path.read_text(encoding="utf-8") == "1"
306+
307+
234308
# ---------------------------------------------------------------------------
235309
# _try_ctk_root_canary
236310
# ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)