Skip to content

Commit ac6b47c

Browse files
committed
Remove opt_level=0 workaround for print functions
Signed-off-by: Ziheng Deng <zihengd@nvidia.com>
1 parent 4a27fb7 commit ac6b47c

File tree

3 files changed

+21
-24
lines changed

3 files changed

+21
-24
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
<!--- SPDX-FileCopyrightText: Copyright (c) <2026> NVIDIA CORPORATION & AFFILIATES. All rights reserved. -->
2+
<!--- SPDX-License-Identifier: Apache-2.0 -->
3+
4+
- Removed the `opt_level=0` workaround note from `ct.printf()` and `ct.print()` documentation; it is no longer required as of tileiras 13.2

src/cuda/tile/_stub.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2023,15 +2023,6 @@ def printf(format, *args) -> None:
20232023
Notes:
20242024
This operation has significant overhead, and should only be used
20252025
for debugging purpose.
2026-
2027-
When printing from multiple tile blocks, outputs will be interleaved.
2028-
One workaround is to set optimization level to 0:
2029-
2030-
.. code-block:: python
2031-
2032-
@ct.kernel(opt_level=0)
2033-
def my_print_kernel():
2034-
ct.printf("%d", 123)
20352026
"""
20362027

20372028

@@ -2062,8 +2053,6 @@ def print(*args, sep: str = ' ', end: str = '\n') -> None:
20622053
20632054
F-string expressions must evaluate to tile values. Constant compile-time
20642055
values are supported as string-formatted segments.
2065-
2066-
Use ``opt_level=0`` to prevent block-level output interleaving.
20672056
"""
20682057

20692058

test/test_print.py

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,68 +12,72 @@
1212

1313
from math import ceil
1414
import cuda.tile as ct
15+
from cuda.tile._bytecode.version import BytecodeVersion
16+
from cuda.tile._compiler_options import CompilerOptions
17+
from conftest import get_tileiras_version
1518

16-
# FIXME: Default opt_level causes print to be out of order.
17-
# Remove when it is fixed in tile compiler.
19+
# opt_level=0 required for correct print ordering in tileiras < 13.2
20+
_DEFAULT_OPT_LEVEL = CompilerOptions.__dataclass_fields__['opt_level'].default
21+
_OPT_LEVEL = 0 if get_tileiras_version() < BytecodeVersion.V_13_2 else _DEFAULT_OPT_LEVEL
1822

1923

20-
@ct.kernel(opt_level=0)
24+
@ct.kernel(opt_level=_OPT_LEVEL)
2125
def kernel_printf_float(x, TILE: ct.Constant[int]):
2226
bid = ct.bid(0)
2327
tx = ct.load(x, index=(bid,), shape=(TILE,))
2428
ct.printf("tile[%d]:%.5f\n", bid, tx)
2529

2630

27-
@ct.kernel(opt_level=0)
31+
@ct.kernel(opt_level=_OPT_LEVEL)
2832
def kernel_printf_int(x, TILE: ct.Constant[int]):
2933
bid = ct.bid(0)
3034
tx = ct.load(x, index=(bid,), shape=(TILE,))
3135
ct.printf("tile[%d]:%d\n", bid, tx)
3236

3337

34-
@ct.kernel(opt_level=0)
38+
@ct.kernel(opt_level=_OPT_LEVEL)
3539
def kernel_print_int(x, TILE: ct.Constant[int]):
3640
bid = ct.bid(0)
3741
tx = ct.load(x, index=(bid,), shape=(TILE,))
3842
ct.print(f"tile[{bid}]:{tx}")
3943

4044

41-
@ct.kernel(opt_level=0)
45+
@ct.kernel(opt_level=_OPT_LEVEL)
4246
def kernel_print_float(x, TILE: ct.Constant[int]):
4347
bid = ct.bid(0)
4448
tx = ct.load(x, index=(bid,), shape=(TILE,))
4549
ct.print(f"tile[{bid}]:{tx:.5f}")
4650

4751

48-
@ct.kernel(opt_level=0)
52+
@ct.kernel(opt_level=_OPT_LEVEL)
4953
def kernel_print_sep(x, TILE: ct.Constant[int]):
5054
bid = ct.bid(0)
5155
tx = ct.load(x, index=(bid,), shape=(TILE,))
5256
ct.print("tile:", tx, sep='')
5357

5458

55-
@ct.kernel(opt_level=0)
59+
@ct.kernel(opt_level=_OPT_LEVEL)
5660
def kernel_print_two_vars_with_expr(x, TILE: ct.Constant[int]):
5761
bid = ct.bid(0)
5862
tx = ct.load(x, index=(bid,), shape=(TILE,))
5963
ct.print(f"tile[{bid}]: a={tx:.6f} b={tx + tx:.6f}")
6064

6165

62-
@ct.kernel(opt_level=0)
66+
@ct.kernel(opt_level=_OPT_LEVEL)
6367
def kernel_print_no_end(x, TILE: ct.Constant[int]):
6468
bid = ct.bid(0)
6569
tx = ct.load(x, index=(bid,), shape=(TILE,))
6670
ct.print(tx, end='')
6771

6872

69-
@ct.kernel(opt_level=0)
73+
@ct.kernel(opt_level=_OPT_LEVEL)
7074
def kernel_builtin_print_int(x, TILE: ct.Constant[int]):
7175
bid = ct.bid(0)
7276
tx = ct.load(x, index=(bid,), shape=(TILE,))
7377
print(f"tile[{bid}]:{tx}")
7478

7579

76-
@ct.kernel(opt_level=0)
80+
@ct.kernel(opt_level=_OPT_LEVEL)
7781
def kernel_builtin_print_float(x, TILE: ct.Constant[int]):
7882
bid = ct.bid(0)
7983
tx = ct.load(x, index=(bid,), shape=(TILE,))
@@ -213,7 +217,7 @@ def test_builtin_print(shape, tile, dtype_str):
213217
def test_ct_print_error_conversion():
214218
from cuda.tile._exception import TileSyntaxError
215219

216-
@ct.kernel(opt_level=0)
220+
@ct.kernel(opt_level=_OPT_LEVEL)
217221
def bad_kernel(x, TILE: ct.Constant[int]):
218222
tx = ct.load(x, index=(0,), shape=(TILE,))
219223
ct.print(f"{tx!r}")
@@ -226,7 +230,7 @@ def bad_kernel(x, TILE: ct.Constant[int]):
226230
def test_ct_print_error_dynamic_format_spec():
227231
from cuda.tile._exception import TileSyntaxError
228232

229-
@ct.kernel(opt_level=0)
233+
@ct.kernel(opt_level=_OPT_LEVEL)
230234
def bad_kernel(x, TILE: ct.Constant[int]):
231235
width = 5
232236
tx = ct.load(x, index=(0,), shape=(TILE,))

0 commit comments

Comments
 (0)