Skip to content

Commit 76f463e

Browse files
committed
Tests for a PTX call with no params. Remove redundant comments.
1 parent 411d428 commit 76f463e

3 files changed

Lines changed: 124 additions & 1 deletion

File tree

src/cleanup/ptx.jl

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ function cleanup_code(::Val{:ptx}, c, dbinfo, cleanup_opts)
5050
# Remove the "begin" and "end function" comments
5151
r"\s+// -- Begin function.+$"m => "",
5252
r"^\s+// -- End function\R"m => "",
53+
# Remove the redundant comments mentioning the main function's name
54+
r"^\s+// \.globl.+\R"m => "",
55+
r"^\s+// @.+\R"m => "",
5356
# Remove empty lines
5457
r"\R{2,}" => "\n",
5558
extra_patterns...
@@ -173,7 +176,7 @@ function indent_ptx_function_calls()
173176
indent = " "^8
174177
call_attrs = strip(m[1]) # remove any '\r' on Windows
175178
func_name = strip(m[2]) # and again
176-
if m[3] == "\n"
179+
if all(isspace, m[3])
177180
# No parameters
178181
params = ""
179182
else

test/cleanup.jl

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,27 @@ end
231231
# Make sure we didn't remove any instruction by mistake
232232
@test count(';', ptx_sample) == count(';', cleaned_ptx)
233233
end
234+
235+
@testset "Sample 3" begin
236+
# This sample calls a `@noinline` with no arguments and a return value
237+
ptx_sample = readchomp("./samples/extern_func_with_no_params.ptx")
238+
cleaned_ptx = CDC.cleanup_code(Val(:ptx), ptx_sample)
239+
240+
println(TEST_IO, "\nCleaned PTX sample 3:")
241+
println(TEST_IO, cleaned_ptx)
242+
243+
# Proper cleanup of the `@noinline f()` external function
244+
@test is_removed(r"\(\s+\)", ptx_sample, cleaned_ptx)
245+
@test count("()", cleaned_ptx) == 2 # once for the def, once for the call
246+
247+
# Others
248+
@test !has_trailing_spaces(cleaned_ptx)
249+
@test count(r"\R{2,}", cleaned_ptx) == 0 # no empty lines
250+
@test !endswith(cleaned_ptx, r"\R") # no trailing newlines
251+
252+
# Make sure we didn't remove any instruction by mistake
253+
@test count(';', ptx_sample) == count(';', cleaned_ptx)
254+
end
234255
end
235256

236257

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
//
2+
// Generated by LLVM NVPTX Back-End
3+
//
4+
5+
.version 8.5
6+
.target sm_86
7+
.address_size 64
8+
9+
// .globl _Z17gpu_test_noinline16CompilerMetadataI10StaticSizeI4_1__E12DynamicCheckvv7NDRangeILi1ES1_S1_vvEE13CuDeviceArrayI5Int64Li1ELi1EE // -- Begin function _Z17gpu_test_noinline16CompilerMetadataI10StaticSizeI4_1__E12DynamicCheckvv7NDRangeILi1ES1_S1_vvEE13CuDeviceArrayI5Int64Li1ELi1EE
10+
.extern .func (.param .b64 func_retval0) julia_noinline_func_22249
11+
()
12+
;
13+
.extern .func julia__throw_inexacterror_22243
14+
(
15+
.param .align 8 .b8 julia__throw_inexacterror_22243_param_0[16]
16+
)
17+
;
18+
.extern .func julia_throw_boundserror_22232
19+
(
20+
.param .align 8 .b8 julia_throw_boundserror_22232_param_0[16]
21+
)
22+
;
23+
// @_Z17gpu_test_noinline16CompilerMetadataI10StaticSizeI4_1__E12DynamicCheckvv7NDRangeILi1ES1_S1_vvEE13CuDeviceArrayI5Int64Li1ELi1EE
24+
.visible .entry _Z17gpu_test_noinline16CompilerMetadataI10StaticSizeI4_1__E12DynamicCheckvv7NDRangeILi1ES1_S1_vvEE13CuDeviceArrayI5Int64Li1ELi1EE(
25+
.param .align 8 .b8 _Z17gpu_test_noinline16CompilerMetadataI10StaticSizeI4_1__E12DynamicCheckvv7NDRangeILi1ES1_S1_vvEE13CuDeviceArrayI5Int64Li1ELi1EE_param_0[16],
26+
.param .align 8 .b8 _Z17gpu_test_noinline16CompilerMetadataI10StaticSizeI4_1__E12DynamicCheckvv7NDRangeILi1ES1_S1_vvEE13CuDeviceArrayI5Int64Li1ELi1EE_param_1[32]
27+
)
28+
.maxntid 1, 1, 1
29+
{
30+
.reg .pred %p<4>;
31+
.reg .b32 %r<6>;
32+
.reg .b64 %rd<6>;
33+
34+
// %bb.0: // %conversion
35+
mov.u32 %r2, %ctaid.x;
36+
mov.u32 %r3, %tid.x;
37+
add.s32 %r4, %r3, %r2;
38+
add.s32 %r5, %r4, 1;
39+
setp.gt.u32 %p1, %r5, 1;
40+
@%p1 bra $L__BB0_6;
41+
// %bb.1: // %L85
42+
ld.param.u32 %r1, [_Z17gpu_test_noinline16CompilerMetadataI10StaticSizeI4_1__E12DynamicCheckvv7NDRangeILi1ES1_S1_vvEE13CuDeviceArrayI5Int64Li1ELi1EE_param_0+8];
43+
ld.param.u64 %rd4, [_Z17gpu_test_noinline16CompilerMetadataI10StaticSizeI4_1__E12DynamicCheckvv7NDRangeILi1ES1_S1_vvEE13CuDeviceArrayI5Int64Li1ELi1EE_param_0];
44+
{ // callseq 932, 0
45+
.reg .b32 temp_param_reg;
46+
.param .b64 retval0;
47+
call.uni (retval0),
48+
julia_noinline_func_22249,
49+
(
50+
);
51+
ld.param.b64 %rd5, [retval0+0];
52+
} // callseq 932
53+
setp.gt.s64 %p2, %rd5, -1;
54+
@%p2 bra $L__BB0_3;
55+
bra.uni $L__BB0_2;
56+
$L__BB0_3: // %L98
57+
ld.param.u64 %rd2, [_Z17gpu_test_noinline16CompilerMetadataI10StaticSizeI4_1__E12DynamicCheckvv7NDRangeILi1ES1_S1_vvEE13CuDeviceArrayI5Int64Li1ELi1EE_param_1+24];
58+
setp.gt.s64 %p3, %rd2, 0;
59+
@%p3 bra $L__BB0_5;
60+
bra.uni $L__BB0_4;
61+
$L__BB0_5: // %L106
62+
ld.param.u64 %rd1, [_Z17gpu_test_noinline16CompilerMetadataI10StaticSizeI4_1__E12DynamicCheckvv7NDRangeILi1ES1_S1_vvEE13CuDeviceArrayI5Int64Li1ELi1EE_param_1];
63+
st.global.u64 [%rd1], %rd5;
64+
$L__BB0_6: // %L113
65+
ret;
66+
$L__BB0_2: // %L90
67+
{ // callseq 933, 0
68+
.reg .b32 temp_param_reg;
69+
.param .align 8 .b8 param0[16];
70+
st.param.b64 [param0+0], %rd4;
71+
st.param.b32 [param0+8], %r1;
72+
call.uni
73+
julia__throw_inexacterror_22243,
74+
(
75+
param0
76+
);
77+
} // callseq 933
78+
trap;
79+
// begin inline asm
80+
exit;
81+
// end inline asm
82+
$L__BB0_4: // %L102
83+
{ // callseq 934, 0
84+
.reg .b32 temp_param_reg;
85+
.param .align 8 .b8 param0[16];
86+
st.param.b64 [param0+0], %rd4;
87+
st.param.b32 [param0+8], %r1;
88+
call.uni
89+
julia_throw_boundserror_22232,
90+
(
91+
param0
92+
);
93+
} // callseq 934
94+
trap;
95+
// begin inline asm
96+
exit;
97+
// end inline asm
98+
// -- End function
99+
}

0 commit comments

Comments
 (0)