Skip to content

Commit cad93d0

Browse files
committed
fixup! feat(core.utils): address review findings on cache key and eviction
High: make_program_cache_key() now refuses to build an NVRTC key when options.name carries a directory component and neither extra_digest nor no_source_include=True is set. NVRTC searches the directory portion of the name for #include "..." lookups, and the cache cannot fingerprint those files -- a stale header would otherwise yield a stale cache hit. Names without a directory separator fall back to CWD (the same search root every NVRTC compile sees) and remain accepted unchanged. Medium: FileStreamProgramCache._enforce_size_cap() now decrements the running ``total`` whether it unlinks the candidate itself or a concurrent pruner already removed the file. Previously the suppressed FileNotFoundError left ``total`` inflated, so the loop kept evicting newer entries to "hit" a cap that had already been met -- turning ordinary contention into cache data loss. Tests cover: path-like names on POSIX/Windows separators with and without extra_digest / no_source_include; names without a directory component (default_program, bare labels, filenames) must stay allowed; non-NVRTC backends are exempt from the new guard. The eviction test monkeypatches Path.unlink to simulate a concurrent deleter winning the race and verifies the freshly-committed entry survives.
1 parent 2e9fe1b commit cad93d0

2 files changed

Lines changed: 189 additions & 5 deletions

File tree

cuda_core/cuda/core/utils/_program_cache.py

Lines changed: 59 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,25 @@ def _nvvm_fingerprint() -> str:
422422
_STR_OR_SEQUENCE_OPTION_FIELDS = frozenset({"include_path", "pre_include"})
423423

424424

425+
def _name_has_dir_component(name) -> bool:
426+
"""Return True when ``name`` is a non-empty string that contains a
427+
directory separator.
428+
429+
NVRTC interprets the ``name`` argument to ``nvrtcCreateProgram`` as a
430+
virtual filename and searches its directory for ``#include "..."``
431+
lookups (unless ``--no-source-include`` is set). Only names that carry a
432+
directory component actually introduce a new include search path that
433+
the cache cannot fingerprint; bare labels like ``"default_program"`` or
434+
``"kernel-a"`` fall back to the process CWD, which is the same search
435+
root every NVRTC compile sees regardless of ``options.name``. Checking
436+
both POSIX and Windows separators keeps the guard consistent across
437+
platforms.
438+
"""
439+
if not isinstance(name, str) or not name:
440+
return False
441+
return "/" in name or "\\" in name
442+
443+
425444
def _option_is_set(options: ProgramOptions, name: str) -> bool:
426445
"""Match how ``_program.pyx`` gates option emission, per field shape.
427446
@@ -530,10 +549,14 @@ def make_program_cache_key(
530549
``pch``, ``use_pch``, ``pch_dir``; and ``use_libdevice=True`` on the
531550
NVVM path) require ``extra_digest`` -- fingerprint the bytes the
532551
compiler will pull in and pass that digest so changes to those files
533-
force a cache miss. Options that have compile-time side effects
534-
(``create_pch``, ``time``, ``fdevice_time_trace``) cannot be cached
535-
and raise ``ValueError``; compile directly, or disable the flag, for
536-
those cases.
552+
force a cache miss. On the NVRTC path, ``options.name`` with a
553+
directory component (e.g. ``"/path/to/kernel.cu"``) also requires
554+
``extra_digest`` because NVRTC searches that directory for
555+
``#include "..."`` lookups; pass ``options.no_source_include=True``
556+
to disable the search instead of fingerprinting the headers. Options
557+
that have compile-time side effects (``create_pch``, ``time``,
558+
``fdevice_time_trace``) cannot be cached and raise ``ValueError``;
559+
compile directly, or disable the flag, for those cases.
537560
"""
538561
# Mirror Program.compile (_program.pyx Program_init lowercases code_type
539562
# before dispatch); a caller that passes "PTX" or "C++" must get the
@@ -599,6 +622,31 @@ def make_program_cache_key(
599622
f"compile will read and pass it as extra_digest=..."
600623
)
601624

625+
# NVRTC implicitly searches the directory portion of ``options.name`` for
626+
# ``#include "..."`` lookups unless ``no_source_include=True``. When the
627+
# caller sets ``options.name`` to a path-like string (contains a directory
628+
# separator), local headers in that directory can feed the compile without
629+
# the cache seeing them -- a change to one of those headers would
630+
# otherwise yield a stale cache hit. Require ``extra_digest`` (fingerprint
631+
# over the local headers) or ``no_source_include=True`` (disable the
632+
# source-directory lookup entirely) to make the dependency explicit.
633+
if (
634+
backend == "nvrtc"
635+
and extra_digest is None
636+
and not _option_is_set(options, "no_source_include")
637+
and _name_has_dir_component(getattr(options, "name", None))
638+
):
639+
raise ValueError(
640+
"make_program_cache_key() refuses to build an NVRTC key when "
641+
"options.name looks like a filesystem path (it contains a directory "
642+
"separator) and neither extra_digest nor no_source_include=True "
643+
"is set: NVRTC searches the directory portion of options.name for "
644+
'#include "..." lookups, and the cache cannot fingerprint those '
645+
"files. Either pass extra_digest=... covering any local headers "
646+
"the compile may pull in, or set options.no_source_include=True "
647+
"to disable source-directory lookup."
648+
)
649+
602650
# PTX compiles go through Linker. When the driver (cuLink) backend is
603651
# selected (nvJitLink unavailable), Program.compile rejects a subset of
604652
# options that nvJitLink would accept; reject them here too so we never
@@ -1527,6 +1575,12 @@ def _enforce_size_cap(self) -> None:
15271575
# below could declare us done while still over the limit.
15281576
total += stat_now.st_size - size
15291577
continue
1578+
# Decrement ``total`` whether we unlinked the entry ourselves or
1579+
# a concurrent pruner beat us to it -- either way the bytes no
1580+
# longer contribute to the on-disk footprint. Without this,
1581+
# ``total`` stays inflated after a suppressed FileNotFoundError
1582+
# and the loop keeps evicting newer entries to "hit" a cap that
1583+
# has already been met.
15301584
with contextlib.suppress(FileNotFoundError):
15311585
path.unlink()
1532-
total -= size
1586+
total -= size

cuda_core/tests/test_program_cache.py

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -794,6 +794,92 @@ def test_make_program_cache_key_accepts_empty_external_content(option_kw):
794794
_make_key(options=_opts(**option_kw)) # Should not raise.
795795

796796

797+
@pytest.mark.parametrize(
798+
"name",
799+
[
800+
pytest.param("/abs/path/kernel.cu", id="posix_absolute"),
801+
pytest.param("rel/path/kernel.cu", id="posix_relative"),
802+
pytest.param("C:\\src\\kernel.cu", id="windows_absolute"),
803+
pytest.param("src\\kernel.cu", id="windows_relative"),
804+
pytest.param("a/b", id="no_suffix"),
805+
],
806+
)
807+
def test_make_program_cache_key_rejects_path_name_without_extra_digest(name):
808+
"""NVRTC searches the directory portion of ``options.name`` for
809+
``#include "..."`` lookups unless ``no_source_include=True``. Any name
810+
that carries a directory separator can therefore pull in local header
811+
bytes the cache cannot observe -- require either an ``extra_digest``
812+
covering those headers or explicit opt-out via ``no_source_include``."""
813+
with pytest.raises(ValueError, match="no_source_include"):
814+
_make_key(options=_opts(name=name))
815+
816+
817+
@pytest.mark.parametrize(
818+
"name",
819+
[
820+
pytest.param("default_program", id="default_program"),
821+
pytest.param("kernel-a", id="synthetic_label"),
822+
pytest.param("kernel.cu", id="bare_filename"),
823+
pytest.param("", id="empty_string"),
824+
],
825+
)
826+
def test_make_program_cache_key_accepts_non_path_name(name):
827+
"""Names without a directory component don't introduce a new search
828+
path -- NVRTC falls back to CWD, same as every other compile. The
829+
guard must only fire on names that *add* a fingerprinting gap, so
830+
synthetic labels and bare filenames stay allowed without extra_digest."""
831+
_make_key(options=_opts(name=name)) # Should not raise.
832+
833+
834+
@pytest.mark.parametrize(
835+
"name",
836+
[
837+
pytest.param("/abs/path/kernel.cu", id="posix_absolute"),
838+
pytest.param("src\\kernel.cu", id="windows_relative"),
839+
],
840+
)
841+
def test_make_program_cache_key_path_name_accepts_no_source_include(name):
842+
"""``no_source_include=True`` disables NVRTC's source-directory search
843+
entirely, so the cache no longer has a fingerprinting gap -- the guard
844+
must accept a path-like name in that case."""
845+
_make_key(options=_opts(name=name, no_source_include=True)) # Should not raise.
846+
847+
848+
@pytest.mark.parametrize(
849+
"name",
850+
[
851+
pytest.param("/abs/path/kernel.cu", id="posix_absolute"),
852+
pytest.param("src\\kernel.cu", id="windows_relative"),
853+
],
854+
)
855+
def test_make_program_cache_key_path_name_accepts_extra_digest(name):
856+
"""``extra_digest`` is the caller's fingerprint over the header bytes
857+
the compile will pull in; once supplied, the path-name guard has no
858+
reason to reject."""
859+
_make_key(options=_opts(name=name), extra_digest=b"\x00" * 32) # Should not raise.
860+
861+
862+
@pytest.mark.parametrize(
863+
"code_type, target_type, code",
864+
[
865+
pytest.param("ptx", "cubin", ".version 7.0", id="ptx"),
866+
pytest.param("nvvm", "ptx", b"\x00", id="nvvm"),
867+
],
868+
)
869+
def test_make_program_cache_key_path_name_guard_is_nvrtc_only(code_type, target_type, code):
870+
"""The Linker path runs ``_translate_program_options`` which never
871+
routes through NVRTC's source-directory search, and NVVM ignores
872+
``options.name`` for include lookups. The path-name guard must be
873+
scoped to the NVRTC backend so it doesn't block legitimate PTX/NVVM
874+
compiles that happen to reuse a ProgramOptions carrying a real path."""
875+
_make_key(
876+
code=code,
877+
code_type=code_type,
878+
target_type=target_type,
879+
options=_opts(name="/abs/path/kernel.cu"),
880+
) # Should not raise.
881+
882+
797883
def test_make_program_cache_key_ptx_ignores_nvrtc_only_options():
798884
"""PTX compiles go through ``_translate_program_options`` which drops
799885
NVRTC-only fields (include_path, pch_*, frandom_seed, ...). Those
@@ -1972,6 +2058,50 @@ def _iter_entry_paths(self):
19722058
assert path_a.read_bytes().startswith(b"\x80\x05fresh-by-other-writer-")
19732059

19742060

2061+
def test_filestream_cache_size_cap_over_eviction_on_concurrent_delete(tmp_path, monkeypatch):
2062+
"""When a concurrent process unlinks an eviction candidate between the
2063+
eviction scan's stat and our own unlink, ``_enforce_size_cap`` must
2064+
still decrement ``total`` by the missing file's size. Forgetting to do
2065+
so leaves ``total`` inflated, the loop keeps evicting entries that are
2066+
still within the cap, and newer entries get deleted unnecessarily.
2067+
"""
2068+
from pathlib import Path
2069+
2070+
from cuda.core.utils import FileStreamProgramCache
2071+
2072+
cap = 3000
2073+
root = tmp_path / "fc"
2074+
with FileStreamProgramCache(root, max_size_bytes=cap) as cache:
2075+
cache[b"old"] = _fake_object_code(b"O" * 2000, name="old")
2076+
time.sleep(0.02)
2077+
old_path = cache._path_for_key(b"old")
2078+
assert old_path.exists()
2079+
2080+
real_unlink = Path.unlink
2081+
state = {"fired": False}
2082+
2083+
def flaky_unlink(self, *args, **kwargs):
2084+
# Simulate another process winning the unlink race exactly once
2085+
# on the ``old`` entry: perform the real unlink (the file is gone
2086+
# after this call) but surface FileNotFoundError to the caller so
2087+
# we exercise the suppressed branch in _enforce_size_cap.
2088+
if self == old_path and not state["fired"]:
2089+
state["fired"] = True
2090+
real_unlink(self, *args, **kwargs)
2091+
raise FileNotFoundError(str(self))
2092+
return real_unlink(self, *args, **kwargs)
2093+
2094+
monkeypatch.setattr(Path, "unlink", flaky_unlink)
2095+
2096+
with FileStreamProgramCache(root, max_size_bytes=cap) as cache:
2097+
# Writing ``new`` pushes total over the cap and triggers eviction.
2098+
# Only ``old`` needs to go -- ``new`` (just committed) must survive.
2099+
cache[b"new"] = _fake_object_code(b"N" * 2000, name="new")
2100+
assert state["fired"], "monkeypatched unlink never fired; test did not exercise the race"
2101+
assert b"new" in cache, "over-eviction: freshly committed entry deleted after FileNotFoundError was swallowed"
2102+
assert b"old" not in cache
2103+
2104+
19752105
def test_filestream_cache_size_cap_counts_tmp_files(tmp_path):
19762106
"""Surviving temp files occupy disk too; the soft cap must include
19772107
them, otherwise an attacker (or a flurry of crashed writers) could

0 commit comments

Comments
 (0)