forked from NVIDIA/cuda-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_linker.py
More file actions
176 lines (141 loc) · 6.92 KB
/
test_linker.py
File metadata and controls
176 lines (141 loc) · 6.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
# Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
#
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
import pytest
from conftest import skipif_testing_with_compute_sanitizer
from cuda.core.experimental import Device, Linker, LinkerOptions, Program, ProgramOptions, _linker
from cuda.core.experimental._module import ObjectCode
from cuda.core.experimental._utils.cuda_utils import CUDAError
ARCH = "sm_" + "".join(f"{i}" for i in Device().compute_capability)
kernel_a = """
extern __device__ int B();
extern __device__ int C(int a, int b);
__global__ void A() { int result = C(B(), 1);}
"""
device_function_b = "__device__ int B() { return 0; }"
device_function_c = "__device__ int C(int a, int b) { return a + b; }"
is_culink_backend = _linker._decide_nvjitlink_or_driver()
if not is_culink_backend:
from cuda.bindings import nvjitlink
nvJitLinkError = nvjitlink.nvJitLinkError
else:
class nvJitLinkError(Exception):
pass
@pytest.fixture(scope="function")
def compile_ptx_functions(init_cuda):
# Without -rdc (relocatable device code) option, the generated ptx will not included any unreferenced
# device functions, causing the link to fail
object_code_a_ptx = Program(kernel_a, "c++", ProgramOptions(relocatable_device_code=True)).compile("ptx")
object_code_b_ptx = Program(device_function_b, "c++", ProgramOptions(relocatable_device_code=True)).compile("ptx")
object_code_c_ptx = Program(device_function_c, "c++", ProgramOptions(relocatable_device_code=True)).compile("ptx")
return object_code_a_ptx, object_code_b_ptx, object_code_c_ptx
@pytest.fixture(scope="function")
def compile_ltoir_functions(init_cuda):
object_code_a_ltoir = Program(kernel_a, "c++", ProgramOptions(link_time_optimization=True)).compile("ltoir")
object_code_b_ltoir = Program(device_function_b, "c++", ProgramOptions(link_time_optimization=True)).compile(
"ltoir"
)
object_code_c_ltoir = Program(device_function_c, "c++", ProgramOptions(link_time_optimization=True)).compile(
"ltoir"
)
return object_code_a_ltoir, object_code_b_ltoir, object_code_c_ltoir
options = [
LinkerOptions(),
LinkerOptions(arch=ARCH, verbose=True),
LinkerOptions(arch=ARCH, max_register_count=32),
LinkerOptions(arch=ARCH, optimization_level=3),
LinkerOptions(arch=ARCH, debug=True),
LinkerOptions(arch=ARCH, lineinfo=True),
]
if not is_culink_backend:
options += [
LinkerOptions(arch=ARCH, time=True),
LinkerOptions(arch=ARCH, optimize_unused_variables=True),
LinkerOptions(arch=ARCH, ptxas_options="-v"),
LinkerOptions(arch=ARCH, ptxas_options=["-v", "--verbose"]),
LinkerOptions(arch=ARCH, ptxas_options=("-v", "--verbose")),
LinkerOptions(arch=ARCH, split_compile=0),
LinkerOptions(arch=ARCH, split_compile_extended=1),
# The following options are supported by nvjitlink and deprecated by culink
LinkerOptions(arch=ARCH, ftz=True),
LinkerOptions(arch=ARCH, prec_div=True),
LinkerOptions(arch=ARCH, prec_sqrt=True),
LinkerOptions(arch=ARCH, fma=True),
LinkerOptions(arch=ARCH, kernels_used="A"),
LinkerOptions(arch=ARCH, kernels_used=["C", "B"]),
LinkerOptions(arch=ARCH, kernels_used=("C", "B")),
LinkerOptions(arch=ARCH, variables_used="var1"),
LinkerOptions(arch=ARCH, variables_used=["var1", "var2"]),
LinkerOptions(arch=ARCH, variables_used=("var1", "var2")),
]
version = nvjitlink.version()
if version >= (12, 5):
options.append(LinkerOptions(arch=ARCH, no_cache=True))
@pytest.mark.parametrize("options", options)
def test_linker_init(compile_ptx_functions, options):
linker = Linker(*compile_ptx_functions, options=options)
object_code = linker.link("cubin")
assert isinstance(object_code, ObjectCode)
assert linker.backend == ("driver" if is_culink_backend else "nvJitLink")
def test_linker_init_invalid_arch(compile_ptx_functions):
err = AttributeError if is_culink_backend else nvjitlink.nvJitLinkError
with pytest.raises(err):
options = LinkerOptions(arch="99", ptx=True)
Linker(*compile_ptx_functions, options=options)
@pytest.mark.skipif(is_culink_backend, reason="culink does not support ptx option")
def test_linker_link_ptx_nvjitlink(compile_ltoir_functions):
options = LinkerOptions(arch=ARCH, link_time_optimization=True, ptx=True)
linker = Linker(*compile_ltoir_functions, options=options)
linked_code = linker.link("ptx")
assert isinstance(linked_code, ObjectCode)
assert linked_code.name == options.name
@pytest.mark.skipif(not is_culink_backend, reason="nvjitlink requires lto for ptx linking")
def test_linker_link_ptx_culink(compile_ptx_functions):
options = LinkerOptions(arch=ARCH)
linker = Linker(*compile_ptx_functions, options=options)
linked_code = linker.link("ptx")
assert isinstance(linked_code, ObjectCode)
assert linked_code.name == options.name
def test_linker_link_cubin(compile_ptx_functions):
options = LinkerOptions(arch=ARCH)
linker = Linker(*compile_ptx_functions, options=options)
linked_code = linker.link("cubin")
assert isinstance(linked_code, ObjectCode)
assert linked_code.name == options.name
def test_linker_link_ptx_multiple(compile_ptx_functions):
ptxes = tuple(ObjectCode.from_ptx(obj.code) for obj in compile_ptx_functions)
options = LinkerOptions(arch=ARCH)
linker = Linker(*ptxes, options=options)
linked_code = linker.link("cubin")
assert isinstance(linked_code, ObjectCode)
assert linked_code.name == options.name
def test_linker_link_invalid_target_type(compile_ptx_functions):
options = LinkerOptions(arch=ARCH)
linker = Linker(*compile_ptx_functions, options=options)
with pytest.raises(ValueError):
linker.link("invalid_target")
# this test causes an API error when using the culink API
@skipif_testing_with_compute_sanitizer
def test_linker_get_error_log(compile_ptx_functions):
options = LinkerOptions(arch=ARCH)
replacement_kernel = """
extern __device__ int Z();
extern __device__ int C(int a, int b);
__global__ void A() { int result = C(Z(), 1);}
"""
dummy_program = Program(replacement_kernel, "c++", ProgramOptions(relocatable_device_code=True)).compile("ptx")
linker = Linker(dummy_program, *(compile_ptx_functions[1:]), options=options)
try:
linker.link("cubin")
except (nvJitLinkError, CUDAError):
log = linker.get_error_log()
assert isinstance(log, str)
# TODO when 4902246 is addressed, we can update this to cover nvjitlink as well
if is_culink_backend:
assert log.rstrip("\x00") == "error : Undefined reference to '_Z1Zv' in 'None_ptx'"
def test_linker_get_info_log(compile_ptx_functions):
options = LinkerOptions(arch=ARCH)
linker = Linker(*compile_ptx_functions, options=options)
linker.link("cubin")
log = linker.get_info_log()
assert isinstance(log, str)