Skip to content

Commit d62c7fa

Browse files
authored
Merge pull request #237 from NVIDIA/ksimpson/add_program_options
Add the options data class to program
2 parents fc8188a + c7cb7eb commit d62c7fa

19 files changed

Lines changed: 574 additions & 131 deletions

cuda_core/cuda/core/experimental/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from cuda.core.experimental._event import EventOptions
88
from cuda.core.experimental._launcher import LaunchConfig, launch
99
from cuda.core.experimental._linker import Linker, LinkerOptions
10-
from cuda.core.experimental._program import Program
10+
from cuda.core.experimental._program import Program, ProgramOptions
1111
from cuda.core.experimental._stream import Stream, StreamOptions
1212
from cuda.core.experimental._system import System
1313

cuda_core/cuda/core/experimental/_linker.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from dataclasses import dataclass
1010
from typing import List, Optional
1111

12+
from cuda.core.experimental._device import Device
1213
from cuda.core.experimental._module import ObjectCode
1314
from cuda.core.experimental._utils import check_or_create_options, driver, handle_return
1415

@@ -91,10 +92,10 @@ class LinkerOptions:
9192
9293
Attributes
9394
----------
94-
arch : str
95-
Pass the SM architecture value, such as ``-arch=sm_<CC>`` (for generating CUBIN) or
96-
``compute_<CC>`` (for generating PTX).
97-
This is a required option.
95+
arch : str, optional
96+
Pass the SM architecture value, such as ``sm_<CC>`` (for generating CUBIN) or
97+
``compute_<CC>`` (for generating PTX). If not provided, the current device's architecture
98+
will be used.
9899
max_register_count : int, optional
99100
Maximum register count.
100101
Maps to: ``-maxrregcount=<N>``.
@@ -172,7 +173,7 @@ class LinkerOptions:
172173
Default: False.
173174
"""
174175

175-
arch: str
176+
arch: Optional[str] = None
176177
max_register_count: Optional[int] = None
177178
time: Optional[bool] = None
178179
verbose: Optional[bool] = None
@@ -204,6 +205,8 @@ def __post_init__(self):
204205
def _init_nvjitlink(self):
205206
if self.arch is not None:
206207
self.formatted_options.append(f"-arch={self.arch}")
208+
else:
209+
self.formatted_options.append("-arch=sm_" + "".join(f"{i}" for i in Device().compute_capability))
207210
if self.max_register_count is not None:
208211
self.formatted_options.append(f"-maxrregcount={self.max_register_count}")
209212
if self.time is not None:

cuda_core/cuda/core/experimental/_program.py

Lines changed: 372 additions & 12 deletions
Large diffs are not rendered by default.

cuda_core/cuda/core/experimental/_utils.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import functools
66
import importlib.metadata
77
from collections import namedtuple
8+
from collections.abc import Sequence
89
from typing import Callable, Dict
910

1011
try:
@@ -93,6 +94,13 @@ def check_or_create_options(cls, options, options_description, *, keep_none=Fals
9394
return options
9495

9596

97+
def _handle_boolean_option(option: bool) -> str:
98+
"""
99+
Convert a boolean option to a string representation.
100+
"""
101+
return "true" if bool(option) else "false"
102+
103+
96104
def precondition(checker: Callable[..., None], what: str = "") -> Callable:
97105
"""
98106
A decorator that adds checks to ensure any preconditions are met.
@@ -142,6 +150,20 @@ def get_device_from_ctx(ctx_handle) -> int:
142150
return device_id
143151

144152

153+
def is_sequence(obj):
154+
"""
155+
Check if the given object is a sequence (list or tuple).
156+
"""
157+
return isinstance(obj, Sequence)
158+
159+
160+
def is_nested_sequence(obj):
161+
"""
162+
Check if the given object is a nested sequence (list or tuple with atleast one list or tuple element).
163+
"""
164+
return is_sequence(obj) and any(is_sequence(elem) for elem in obj)
165+
166+
145167
def get_binding_version():
146168
try:
147169
major_minor = importlib.metadata.version("cuda-bindings").split(".")[:2]

cuda_core/docs/source/api.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ CUDA compilation toolchain
3535

3636
:template: dataclass.rst
3737

38+
ProgramOptions
3839
LinkerOptions
3940

4041

cuda_core/docs/source/index.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ and other functionalities.
88
:maxdepth: 2
99
:caption: Contents:
1010

11-
release.md
11+
release.rst
1212
install.md
1313
interoperability.rst
1414
api.rst

cuda_core/docs/source/release.md

Lines changed: 0 additions & 12 deletions
This file was deleted.

cuda_core/docs/source/release.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
Release Notes
2+
=============
3+
4+
.. toctree::
5+
:maxdepth: 3
6+
7+
release/0.2.0-notes
8+
release/0.1.1-notes
9+
release/0.1.0-notes
Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,21 @@
1-
# `cuda.core` v0.1.0 Release notes
1+
``cuda.core`` 0.1.0 Release Notes
2+
=================================
23

34
Released on Nov 8, 2024
45

5-
## Hightlights
6+
Highlights
7+
----------
8+
69
- Initial beta release
710
- Supports all platforms that CUDA is supported
811
- Supports all CUDA 11.x/12.x drivers
912
- Supports all CUDA 11.x/12.x Toolkits
1013
- Pythonic CUDA runtime and other core functionalities
1114

12-
## Limitations
15+
Limitations
16+
-----------
1317

1418
- All APIs are currently *experimental* and subject to change without deprecation notice.
15-
Please kindly share your feedbacks with us so that we can make `cuda.core` better!
19+
Please kindly share your feedback with us so that we can make ``cuda.core`` better!
1620
- Source code release only; `pip`/`conda` support is coming in a future release
17-
- Windows TCC mode is [not yet supported](https://github.com/NVIDIA/cuda-python/issues/206)
21+
- Windows TCC mode is `not yet supported <https://github.com/NVIDIA/cuda-python/issues/206>`_

cuda_core/docs/source/release/0.1.1-notes.md

Lines changed: 0 additions & 43 deletions
This file was deleted.

0 commit comments

Comments
 (0)