Skip to content

Commit 6b7f693

Browse files
committed
Add optional tileiras dependency to pip install
- Update cupy example to use random API that doesn't depend on CTK Signed-off-by: Jay Gu <jagu@nvidia.com>
1 parent 381c4ea commit 6b7f693

7 files changed

Lines changed: 107 additions & 17 deletions

File tree

README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,9 @@ def vector_add_kernel(a, b, result):
3131
ct.store(result, index=(block_id,), tile=result_tile)
3232

3333
# Generate input arrays
34-
a = cupy.random.uniform(-5, 5, 128)
35-
b = cupy.random.uniform(-5, 5, 128)
34+
rng = cupy.random.default_rng()
35+
a = rng.random(128)
36+
b = rng.random(128)
3637
expected = cupy.asnumpy(a) + cupy.asnumpy(b)
3738

3839
# Allocate an output array and launch the kernel
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
<!--- SPDX-FileCopyrightText: Copyright (c) <2026> NVIDIA CORPORATION & AFFILIATES. All rights reserved. -->
2+
<!--- SPDX-License-Identifier: Apache-2.0 -->
3+
4+
- Support `pip install cuda-tile[tileiras]` to use `tileiras` from python environment instead of system-wide CTK installation.

docs/source/quickstart.rst

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,19 +14,34 @@ This page will guide you through getting setup and running with cuTile Python, i
1414
Prerequisites
1515
-------------------------------------------------------------------------------
1616

17-
cuTile Python requires the following:
17+
cuTile Python requires the following:
1818

1919
- Linux x86_64, Linux aarch64 or Windows x86_64
20-
- A GPU with compute capability 10.x or 12.x
20+
- A GPU with compute capability 10.x, 11.x or 12.x
2121
- NVIDIA Driver r580 or later
22-
- CUDA Toolkit 13.1 or later
2322
- Python version 3.10, 3.11, 3.12 or 3.13
2423

2524

2625
Installing cuTile Python
2726
-------------------------------------------------------------------------------
2827

29-
With the :ref:`prerequisites <quickstart-prereqs>` met, installing cuTile Python is a simple pip install:
28+
cuTile Python depends on CUDA TileIR compiler ``tileiras``, which futher depends on ``ptxas`` and ``libnvvm``
29+
from the CUDA Toolkit.
30+
31+
If your system does not have system-wide CUDA Toolkit (13.1+), you can install cuTile Python along with ``[tileiras]``,
32+
which installs ``nvidia-cuda-tileiras``, ``nvidia-cuda-nvcc`` and
33+
``nvidia-nvvm`` into your Python virtual environment.
34+
35+
.. code-block:: bash
36+
37+
pip install cuda-tile[tileiras]
38+
39+
Note: the package versions for ``nvidia-cuda-tileiras``, ``nvidia-cuda-nvcc`` and
40+
``nvidia-nvvm`` must match up to the same major.minor version.
41+
42+
43+
Alternatively if you already have system-wide CUDA Toolkit (13.1+) installed, you can install cuTile Python as a
44+
standalone package. cuTile automatically searches for ``tileiras`` from the location of CUDA Toolkit.
3045

3146
.. code-block:: bash
3247

pyproject.toml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,13 @@ dependencies = [
3737
"typing-extensions",
3838
]
3939

40+
[project.optional-dependencies]
41+
tileiras = [
42+
"nvidia-cuda-tileiras>=13.2,<13.3",
43+
"nvidia-cuda-nvcc>=13.2,<13.3",
44+
"nvidia-nvvm>=13.2,<13.3",
45+
]
46+
4047
[project.urls]
4148
Homepage = "https://github.com/nvidia/cutile-python"
4249
Repository = "https://github.com/nvidia/cutile-python"

samples/quickstart/VectorAdd_quickstart.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,9 @@ def test():
3434
tile_size = 2**4
3535
grid = (ct.cdiv(vector_size, tile_size), 1, 1)
3636

37-
a = cp.random.uniform(-1, 1, vector_size)
38-
b = cp.random.uniform(-1, 1, vector_size)
37+
rng = cp.random.default_rng()
38+
a = rng.random(vector_size)
39+
b = rng.random(vector_size)
3940
c = cp.zeros_like(a)
4041

4142
# Launch kernel

src/cuda/tile/_compile.py

Lines changed: 68 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# SPDX-FileCopyrightText: Copyright (c) <2025> NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
#
33
# SPDX-License-Identifier: Apache-2.0
4+
import importlib.metadata
45
import inspect
56
import math
67
import re
@@ -321,6 +322,7 @@ class _CompilerBinary:
321322
path: str
322323
bin_path: str
323324
ld_path: str
325+
pass_cuda_home_var: bool
324326

325327
def run(self,
326328
args: list[str],
@@ -335,6 +337,9 @@ def run(self,
335337
env = os.environ.copy()
336338
env['LD_LIBRARY_PATH'] = self.ld_path
337339
env['PATH'] = self.bin_path
340+
if not self.pass_cuda_home_var:
341+
for key in {"CUDA_HOME", "CUDA_PATH"}:
342+
env.pop(key, None)
338343
subprocess.run(command + flags, env=env, check=True, capture_output=True,
339344
timeout=timeout_sec)
340345
except subprocess.CalledProcessError as e:
@@ -347,6 +352,54 @@ def run(self,
347352
_try_get_compiler_version(self.path))
348353

349354

355+
_PIP_TILEIRAS_PACKAGES = (
356+
"nvidia-cuda-tileiras",
357+
"nvidia-cuda-nvcc",
358+
"nvidia-nvvm",
359+
)
360+
361+
362+
def _get_major_minor(version_str: str) -> tuple[int, int]:
363+
parts = version_str.split(".")
364+
return int(parts[0]), int(parts[1])
365+
366+
367+
def _find_pip_tileiras() -> Optional[str]:
368+
versions: dict[str, str] = {}
369+
for pkg in _PIP_TILEIRAS_PACKAGES:
370+
try:
371+
versions[pkg] = importlib.metadata.version(pkg)
372+
except importlib.metadata.PackageNotFoundError:
373+
return None
374+
375+
majors_minors = {pkg: _get_major_minor(v) for pkg, v in versions.items()}
376+
unique = set(majors_minors.values())
377+
if len(unique) != 1:
378+
details = ", ".join(f"{pkg} {versions[pkg]}" for pkg in _PIP_TILEIRAS_PACKAGES)
379+
warnings.warn(
380+
f"Installed NVIDIA pip packages have mismatched versions ({details}). "
381+
"Falling back to system tileiras.",
382+
stacklevel=3,
383+
)
384+
return None
385+
386+
try:
387+
import nvidia.cu13 as cu13_pkg
388+
cu13_root = cu13_pkg.__path__[0]
389+
except (ImportError, AttributeError, IndexError):
390+
logger.debug("Fail to get nvidia.cu13 package path.", exc_info=True)
391+
return None
392+
393+
pip_bin_dir = os.path.join(cu13_root, "bin")
394+
res = shutil.which("tileiras", path=pip_bin_dir)
395+
if res is None:
396+
logger.debug("Fail to find tileiras under nvidia.cu13 path.")
397+
return None
398+
399+
logger.debug(f"Found tileiras from pip package: {res}")
400+
return res
401+
402+
350403
@cache
351404
def _find_compiler_bin() -> _CompilerBinary:
352405
# search under cuda/tile/_deps
@@ -360,30 +413,39 @@ def _find_compiler_bin() -> _CompilerBinary:
360413
if (res := shutil.which("tileiras", path=deps_bin_dir)):
361414
bin_path = deps_bin_dir + ":" + bin_path
362415
ld_path = deps_lib_dir + ":" + ld_path
363-
return _CompilerBinary(res, bin_path, ld_path)
416+
return _CompilerBinary(res, bin_path, ld_path, pass_cuda_home_var=False)
417+
418+
# search from nvidia-cuda-tileiras pip package
419+
logger.debug("Searching tileiras from nvidia pip package")
420+
res = _find_pip_tileiras()
421+
if res is not None:
422+
return _CompilerBinary(res, bin_path, ld_path, pass_cuda_home_var=False)
364423

365424
# search under PATH
366425
logger.debug(f"Searching tileiras: {bin_path}")
367426
if (res := shutil.which("tileiras")):
368-
return _CompilerBinary(res, bin_path, ld_path)
427+
return _CompilerBinary(res, bin_path, ld_path, pass_cuda_home_var=True)
369428

370429
# search under CUDA_HOME
371430
if (cuda_home := _get_cuda_home()):
372431
cuda_bin_path = os.path.join(cuda_home, 'bin')
373432
logger.debug(f"Searching tileiras: {cuda_bin_path}")
374433
if (res := shutil.which("tileiras", path=cuda_bin_path)):
375434
bin_path = bin_path + ":" + cuda_bin_path
376-
return _CompilerBinary(res, bin_path, ld_path)
435+
return _CompilerBinary(res, bin_path, ld_path, pass_cuda_home_var=True)
377436

378437
# Try default CUDA Toolkit installation paths as a fallback
379438
res = _find_compiler_in_default_cuda_toolkit_paths()
380439
if res is not None:
381440
tileiras_path, bin_path = res
382-
return _CompilerBinary(tileiras_path, bin_path, ld_path)
441+
return _CompilerBinary(tileiras_path, bin_path, ld_path, pass_cuda_home_var=False)
383442

384443
cuda_home_var = "CUDA_PATH" if is_windows() else "CUDA_HOME"
385-
raise FileNotFoundError(f"'tileiras' compiler not found, "
386-
f"make sure it is available in $PATH or ${cuda_home_var}/bin")
444+
raise FileNotFoundError("'tileiras' compiler not found, "
445+
"make sure it is available as a python package via "
446+
"`pip install cuda-tile[tileiras]` or "
447+
f"available in $PATH or ${cuda_home_var}/bin via system CTK (13.1+)"
448+
" installation.")
387449

388450

389451
@cache

test/test_frontpage_example.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,9 @@ def vector_add(a: cupy.ndarray, b: cupy.ndarray, result: cupy.ndarray):
3030
import numpy as np
3131

3232
def test_vector_add():
33-
a = cupy.random.uniform(-5, 5, 128)
34-
b = cupy.random.uniform(-5, 5, 128)
33+
rng = cupy.random.default_rng()
34+
a = rng.random(128)
35+
b = rng.random(128)
3536
result = cupy.zeros_like(a)
3637

3738
vector_add(a, b, result)
@@ -42,4 +43,3 @@ def test_vector_add():
4243

4344
expected = a_np + b_np
4445
np.testing.assert_array_almost_equal(result_np, expected)
45-

0 commit comments

Comments
 (0)