test(core.utils): multiprocess stress for FileStreamProgramCache

cpcloud · cpcloud · commit ae8f470ad698 · 2026-04-14T18:15:27.000-04:00
Spawns multiple processes to hammer the cache: writers on a shared key prove last-write-wins without corruption, writers on distinct keys prove nothing is lost under contention, and a reader racing against a writer confirms torn files are never observed because os.replace is atomic. Part of issue #178.
diff --git a/cuda_core/tests/test_program_cache_multiprocess.py b/cuda_core/tests/test_program_cache_multiprocess.py
@@ -0,0 +1,118 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
+"""Multiprocess stress tests for FileStreamProgramCache.
+
+These run without a GPU. They exercise the atomic-rename write path from
+multiple processes launched via ``multiprocessing.get_context("spawn")``.
+"""
+
+from __future__ import annotations
+
+import multiprocessing as _mp
+
+
+def _worker_write(root: str, key: bytes, payload: bytes, name: str) -> None:
+    from cuda.core._module import ObjectCode
+    from cuda.core.utils import FileStreamProgramCache
+
+    with FileStreamProgramCache(root) as cache:
+        cache[key] = ObjectCode._init(payload, "cubin", name=name)
+
+
+def _worker_write_many(root: str, base: int, n: int) -> None:
+    from cuda.core._module import ObjectCode
+    from cuda.core.utils import FileStreamProgramCache
+
+    with FileStreamProgramCache(root) as cache:
+        for i in range(n):
+            key = f"proc-{base}-key-{i}".encode()
+            cache[key] = ObjectCode._init(
+                f"payload-{base}-{i}".encode(), "cubin", name=f"p{base}-{i}"
+            )
+
+
+def _worker_reader(root: str, key: bytes, rounds: int, result_queue) -> None:
+    from cuda.core.utils import FileStreamProgramCache
+
+    hits = 0
+    for _ in range(rounds):
+        with FileStreamProgramCache(root) as cache:
+            got = cache.get(key)
+            if got is not None:
+                hits += 1
+    result_queue.put(hits)
+
+
+def test_concurrent_writers_same_key_no_corruption(tmp_path):
+    from cuda.core.utils import FileStreamProgramCache
+
+    root = str(tmp_path / "fc")
+    ctx = _mp.get_context("spawn")
+    procs = [
+        ctx.Process(
+            target=_worker_write,
+            args=(root, b"shared", f"v{i}".encode() * 64, f"p{i}"),
+        )
+        for i in range(6)
+    ]
+    for p in procs:
+        p.start()
+    for p in procs:
+        p.join(timeout=60)
+        assert p.exitcode == 0, f"worker exited with {p.exitcode}"
+
+    with FileStreamProgramCache(root) as cache:
+        got = cache[b"shared"]  # must not raise; payload is one of the writers'
+        assert bytes(got._module).startswith(b"v")
+
+
+def test_concurrent_writers_distinct_keys_all_survive(tmp_path):
+    from cuda.core.utils import FileStreamProgramCache
+
+    root = str(tmp_path / "fc")
+    n_procs = 4
+    per_proc = 25
+    ctx = _mp.get_context("spawn")
+    procs = [
+        ctx.Process(target=_worker_write_many, args=(root, base, per_proc))
+        for base in range(n_procs)
+    ]
+    for p in procs:
+        p.start()
+    for p in procs:
+        p.join(timeout=60)
+        assert p.exitcode == 0
+
+    with FileStreamProgramCache(root) as cache:
+        for base in range(n_procs):
+            for i in range(per_proc):
+                key = f"proc-{base}-key-{i}".encode()
+                assert key in cache
+
+
+def test_concurrent_reader_never_sees_torn_file(tmp_path):
+    from cuda.core._module import ObjectCode
+    from cuda.core.utils import FileStreamProgramCache
+
+    root = str(tmp_path / "fc")
+    # Seed 'k' so the reader can hit; the writer writes unrelated keys so 'k'
+    # is never overwritten while the reader is active.
+    with FileStreamProgramCache(root) as cache:
+        cache[b"k"] = ObjectCode._init(b"seed" * 256, "cubin", name="seed")
+
+    ctx = _mp.get_context("spawn")
+    queue = ctx.Queue()
+    writer = ctx.Process(target=_worker_write_many, args=(root, 99, 50))
+    reader = ctx.Process(
+        target=_worker_reader, args=(root, b"k", 200, queue)
+    )
+    reader.start()
+    writer.start()
+    writer.join(timeout=60)
+    reader.join(timeout=60)
+    assert writer.exitcode == 0
+    assert reader.exitcode == 0
+    hits = queue.get(timeout=5)
+    # 'k' was never overwritten, so every read must hit.
+    assert hits == 200