|
7 | 7 |
|
8 | 8 | import numpy as np |
9 | 9 | import pytest |
| 10 | +from conftest import skipif_need_cuda_headers |
10 | 11 |
|
11 | 12 | from cuda.core.experimental import Device, LaunchConfig, Program, ProgramOptions, launch |
12 | 13 | from cuda.core.experimental._memory import _DefaultPinnedMemorySource |
@@ -152,3 +153,48 @@ def test_launch_scalar_argument(python_type, cpp_type, init_value): |
152 | 153 |
|
153 | 154 | # Check result |
154 | 155 | assert arr[0] == init_value, f"Expected {init_value}, got {arr[0]}" |
| 156 | + |
| 157 | + |
| 158 | +@skipif_need_cuda_headers # cg |
| 159 | +def test_cooperative_launch(): |
| 160 | + dev = Device() |
| 161 | + dev.set_current() |
| 162 | + s = dev.create_stream(options={"nonblocking": True}) |
| 163 | + |
| 164 | + # CUDA kernel templated on type T |
| 165 | + code = r""" |
| 166 | + #include <cooperative_groups.h> |
| 167 | +
|
| 168 | + extern "C" __global__ void test_grid_sync() { |
| 169 | + namespace cg = cooperative_groups; |
| 170 | + auto grid = cg::this_grid(); |
| 171 | + grid.sync(); |
| 172 | + } |
| 173 | + """ |
| 174 | + |
| 175 | + # Compile and force instantiation for this type |
| 176 | + arch = "".join(f"{i}" for i in dev.compute_capability) |
| 177 | + include_path = str(pathlib.Path(os.environ["CUDA_PATH"]) / pathlib.Path("include")) |
| 178 | + pro_opts = ProgramOptions(std="c++17", arch=f"sm_{arch}", include_path=include_path) |
| 179 | + prog = Program(code, code_type="c++", options=pro_opts) |
| 180 | + ker = prog.compile("cubin").get_kernel("test_grid_sync") |
| 181 | + |
| 182 | + # # Launch without setting cooperative_launch |
| 183 | + # # Commented out as this seems to be a sticky error... |
| 184 | + # config = LaunchConfig(grid=1, block=1) |
| 185 | + # launch(s, config, ker) |
| 186 | + # from cuda.core.experimental._utils.cuda_utils import CUDAError |
| 187 | + # with pytest.raises(CUDAError) as e: |
| 188 | + # s.sync() |
| 189 | + # assert "CUDA_ERROR_LAUNCH_FAILED" in str(e) |
| 190 | + |
| 191 | + # Crazy grid sizes would not work |
| 192 | + block = 128 |
| 193 | + config = LaunchConfig(grid=dev.properties.max_grid_dim_x // block + 1, block=block, cooperative_launch=True) |
| 194 | + with pytest.raises(ValueError): |
| 195 | + launch(s, config, ker) |
| 196 | + |
| 197 | + # This works just fine |
| 198 | + config = LaunchConfig(grid=1, block=1, cooperative_launch=True) |
| 199 | + launch(s, config, ker) |
| 200 | + s.sync() |
0 commit comments