Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions cuda_core/cuda/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,15 @@ def _import_versioned_module():


from cuda.core import system, utils
from cuda.core._context import Context, ContextOptions
from cuda.core._device import Device
from cuda.core._device_resources import (
DeviceResources,
SMResource,
SMResourceOptions,
WorkqueueResource,
WorkqueueResourceOptions,
)
from cuda.core._event import Event, EventOptions
from cuda.core._graphics import GraphicsResource
from cuda.core._launch_config import LaunchConfig
Expand Down
9 changes: 8 additions & 1 deletion cuda_core/cuda/core/_context.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#
# SPDX-License-Identifier: Apache-2.0

from cuda.core._resource_handles cimport ContextHandle
from cuda.core._resource_handles cimport ContextHandle, GreenCtxHandle

cdef class Context:
"""Cython declaration for Context class.
Expand All @@ -13,8 +13,15 @@ cdef class Context:

cdef:
ContextHandle _h_context
GreenCtxHandle _h_green_ctx
int _device_id
bint _is_green
object __weakref__

@staticmethod
cdef Context _from_handle(type cls, ContextHandle h_context, int device_id)

@staticmethod
cdef Context _from_green_ctx(type cls, GreenCtxHandle h_green_ctx, int device_id)

cpdef close(self)
61 changes: 57 additions & 4 deletions cuda_core/cuda/core/_context.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,32 @@
#
# SPDX-License-Identifier: Apache-2.0

from __future__ import annotations

from collections.abc import Sequence
from dataclasses import dataclass

from cuda.bindings cimport cydriver
from cuda.core._device_resources import SMResource, WorkqueueResource
from cuda.core._resource_handles cimport (
ContextHandle,
GreenCtxHandle,
as_cu,
create_context_handle_from_green_ctx,
get_context_green_ctx,
get_last_error,
as_intptr,
as_py,
)
from cuda.core._utils.cuda_utils cimport HANDLE_RETURN


__all__ = ['Context', 'ContextOptions']


DeviceResourcesT = SMResource | WorkqueueResource | Sequence[SMResource | WorkqueueResource]


cdef class Context:
"""CUDA context wrapper.

Expand All @@ -29,20 +43,56 @@ cdef class Context:
"""Create Context from existing ContextHandle (cdef-only factory)."""
cdef Context ctx = cls.__new__(cls)
ctx._h_context = h_context
ctx._h_green_ctx = get_context_green_ctx(h_context)
ctx._device_id = device_id
ctx._is_green = ctx._h_green_ctx.get() != NULL
return ctx

@staticmethod
cdef Context _from_green_ctx(type cls, GreenCtxHandle h_green_ctx, int device_id):
"""Create Context from an owning green context handle."""
cdef Context ctx = cls.__new__(cls)
ctx._h_green_ctx = h_green_ctx
ctx._h_context = create_context_handle_from_green_ctx(h_green_ctx)
if not ctx._h_context:
HANDLE_RETURN(get_last_error())
raise RuntimeError("Failed to create CUDA context view from green context")
ctx._device_id = device_id
ctx._is_green = True
return ctx

@property
def handle(self):
"""Return the underlying CUcontext handle."""
if self._h_context.get() == NULL:
if not self._h_context:
return None
if as_cu(self._h_context) == NULL:
return None
return as_py(self._h_context)

@property
def _handle(self):
return self.handle

@property
def is_green(self) -> bool:
"""True if this context was created from device resources."""
return bool(self._is_green)

cpdef close(self):
"""Release this context wrapper's underlying CUDA handles."""
cdef cydriver.CUcontext current_ctx
if self._h_context and as_cu(self._h_context) != NULL:
with nogil:
HANDLE_RETURN(cydriver.cuCtxGetCurrent(&current_ctx))
if current_ctx == as_cu(self._h_context):
raise RuntimeError(
"Cannot close a CUDA context while it is current. "
"Restore a previous context before closing this context."
)
self._h_context.reset()
self._h_green_ctx.reset()

def __eq__(self, other):
if not isinstance(other, Context):
return NotImplemented
Expand All @@ -57,9 +107,12 @@ cdef class Context:


@dataclass
class ContextOptions:
cdef class ContextOptions:
"""Options for context creation.

Currently unused, reserved for future use.
Attributes
----------
resources : :obj:`~_context.DeviceResourcesT`
Device resources used to create a green context.
"""
pass # TODO
resources: DeviceResourcesT
3 changes: 2 additions & 1 deletion cuda_core/cuda/core/_cpp/REGISTRY_DESIGN.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ carries timing/IPC flags, `KernelBox` carries the library dependency).
Without this level, a round-tripped handle would produce a new Box
with default metadata, losing information that was set at creation.

Instances: `event_registry`, `kernel_registry`, `graph_node_registry`.
Instances: `context_registry`, `stream_registry`, `event_registry`,
`kernel_registry`, `graph_node_registry`.

## Level 2: Resource Handle -> Python Object (Cython)

Expand Down
166 changes: 156 additions & 10 deletions cuda_core/cuda/core/_cpp/resource_handles.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ namespace cuda_core {
decltype(&cuDevicePrimaryCtxRetain) p_cuDevicePrimaryCtxRetain = nullptr;
decltype(&cuDevicePrimaryCtxRelease) p_cuDevicePrimaryCtxRelease = nullptr;
decltype(&cuCtxGetCurrent) p_cuCtxGetCurrent = nullptr;
decltype(&cuGreenCtxCreate) p_cuGreenCtxCreate = nullptr;
decltype(&cuGreenCtxDestroy) p_cuGreenCtxDestroy = nullptr;
decltype(&cuCtxFromGreenCtx) p_cuCtxFromGreenCtx = nullptr;
decltype(&cuDevResourceGenerateDesc) p_cuDevResourceGenerateDesc = nullptr;

decltype(&cuStreamCreateWithPriority) p_cuStreamCreateWithPriority = nullptr;
decltype(&cuStreamDestroy) p_cuStreamDestroy = nullptr;
Expand Down Expand Up @@ -223,12 +227,112 @@ void clear_last_error() noexcept {
namespace {
struct ContextBox {
CUcontext resource;
GreenCtxHandle h_green_ctx;
};

struct GreenCtxBox {
CUgreenCtx resource;
};

static const ContextBox* get_box(const ContextHandle& h) noexcept {
const CUcontext* p = h.get();
return reinterpret_cast<const ContextBox*>(
reinterpret_cast<const char*>(p) - offsetof(ContextBox, resource)
);
}

// See REGISTRY_DESIGN.md (Level 1: Driver Handle -> Resource Handle)
static HandleRegistry<CUcontext, ContextHandle> context_registry;
} // namespace

ContextHandle create_context_handle_ref(CUcontext ctx) {
auto box = std::make_shared<const ContextBox>(ContextBox{ctx});
return ContextHandle(box, &box->resource);
if (!ctx) {
return {};
}
if (auto h = context_registry.lookup(ctx)) {
return h;
}
auto box = std::shared_ptr<const ContextBox>(
new ContextBox{ctx, {}},
[](const ContextBox* b) {
context_registry.unregister_handle(b->resource);
delete b;
}
);
ContextHandle h(box, &box->resource);
context_registry.register_handle(ctx, h);
return h;
}

ContextHandle create_context_handle_from_green_ctx(const GreenCtxHandle& h_green_ctx) {
if (!h_green_ctx) {
return {};
}
if (!p_cuCtxFromGreenCtx) {
err = CUDA_ERROR_NOT_SUPPORTED;
return {};
}

GILReleaseGuard gil;
CUcontext ctx = nullptr;
if (CUDA_SUCCESS != (err = p_cuCtxFromGreenCtx(&ctx, as_cu(h_green_ctx)))) {
return {};
}

auto box = std::shared_ptr<const ContextBox>(
new ContextBox{ctx, h_green_ctx},
[](const ContextBox* b) {
context_registry.unregister_handle(b->resource);
delete b;
}
);
ContextHandle h(box, &box->resource);
context_registry.register_handle(ctx, h);
return h;
}

GreenCtxHandle get_context_green_ctx(const ContextHandle& h) noexcept {
if (!h) {
return {};
}
return get_box(h)->h_green_ctx;
}

GreenCtxHandle create_green_ctx_handle(CUdevResource* resources, unsigned int nbResources,
CUdevice dev, unsigned int flags) {
if (!p_cuDevResourceGenerateDesc || !p_cuGreenCtxCreate || !p_cuGreenCtxDestroy) {
err = CUDA_ERROR_NOT_SUPPORTED;
return {};
}

GILReleaseGuard gil;
CUdevResourceDesc desc = nullptr;
if (CUDA_SUCCESS != (err = p_cuDevResourceGenerateDesc(&desc, resources, nbResources))) {
return {};
}

CUgreenCtx green_ctx = nullptr;
if (CUDA_SUCCESS != (err = p_cuGreenCtxCreate(&green_ctx, desc, dev, flags))) {
return {};
}

auto box = std::shared_ptr<const GreenCtxBox>(
new GreenCtxBox{green_ctx},
[](const GreenCtxBox* b) {
GILReleaseGuard gil;
p_cuGreenCtxDestroy(b->resource);
delete b;
}
);
return GreenCtxHandle(box, &box->resource);
}

GreenCtxHandle create_green_ctx_handle_ref(CUgreenCtx green_ctx) {
if (!green_ctx) {
return {};
}
auto box = std::make_shared<const GreenCtxBox>(GreenCtxBox{green_ctx});
return GreenCtxHandle(box, &box->resource);
}

// Thread-local cache of primary contexts indexed by device ID
Expand All @@ -250,14 +354,16 @@ ContextHandle get_primary_context(int device_id) {
}

auto box = std::shared_ptr<const ContextBox>(
new ContextBox{ctx},
new ContextBox{ctx, {}},
[device_id](const ContextBox* b) {
context_registry.unregister_handle(b->resource);
GILReleaseGuard gil;
p_cuDevicePrimaryCtxRelease(device_id);
delete b;
}
);
auto h = ContextHandle(box, &box->resource);
context_registry.register_handle(ctx, h);

// Update cache
if (static_cast<size_t>(device_id) >= primary_context_cache.size()) {
Expand Down Expand Up @@ -286,7 +392,18 @@ ContextHandle get_current_context() {
namespace {
struct StreamBox {
CUstream resource;
ContextHandle h_context;
};

static const StreamBox* get_box(const StreamHandle& h) noexcept {
const CUstream* p = h.get();
return reinterpret_cast<const StreamBox*>(
reinterpret_cast<const char*>(p) - offsetof(StreamBox, resource)
);
}

// See REGISTRY_DESIGN.md (Level 1: Driver Handle -> Resource Handle)
static HandleRegistry<CUstream, StreamHandle> stream_registry;
} // namespace

StreamHandle create_stream_handle(const ContextHandle& h_ctx, unsigned int flags, int priority) {
Expand All @@ -297,22 +414,44 @@ StreamHandle create_stream_handle(const ContextHandle& h_ctx, unsigned int flags
}

auto box = std::shared_ptr<const StreamBox>(
new StreamBox{stream},
[h_ctx](const StreamBox* b) {
new StreamBox{stream, h_ctx},
[](const StreamBox* b) {
stream_registry.unregister_handle(b->resource);
GILReleaseGuard gil;
p_cuStreamDestroy(b->resource);
delete b;
}
);
return StreamHandle(box, &box->resource);
StreamHandle h(box, &box->resource);
stream_registry.register_handle(stream, h);
return h;
}

StreamHandle create_stream_handle_ref(CUstream stream) {
auto box = std::make_shared<const StreamBox>(StreamBox{stream});
return StreamHandle(box, &box->resource);
if (auto h = stream_registry.lookup(stream)) {
return h;
}
auto box = std::shared_ptr<const StreamBox>(
new StreamBox{stream, {}},
[](const StreamBox* b) {
stream_registry.unregister_handle(b->resource);
delete b;
}
);
StreamHandle h(box, &box->resource);
stream_registry.register_handle(stream, h);
return h;
}

StreamHandle create_stream_handle_with_owner(CUstream stream, PyObject* owner) {
if (auto h = stream_registry.lookup(stream)) {
// Reuse handles that already carry structural context metadata, e.g.
// cuda-core-owned streams. Owner-backed foreign streams still need a
// fresh handle so the supplied owner is retained.
if (get_box(h)->h_context) {
return h;
}
}
if (!owner) {
return create_stream_handle_ref(stream);
}
Expand All @@ -324,16 +463,23 @@ StreamHandle create_stream_handle_with_owner(CUstream stream, PyObject* owner) {
}
Py_INCREF(owner);
auto box = std::shared_ptr<const StreamBox>(
new StreamBox{stream},
new StreamBox{stream, {}},
[owner](const StreamBox* b) {
stream_registry.unregister_handle(b->resource);
GILAcquireGuard gil;
if (gil.acquired()) {
Py_DECREF(owner);
}
delete b;
}
);
return StreamHandle(box, &box->resource);
StreamHandle h(box, &box->resource);
stream_registry.register_handle(stream, h);
return h;
}

ContextHandle get_stream_context(const StreamHandle& h) noexcept {
return h ? get_box(h)->h_context : ContextHandle{};
}

StreamHandle get_legacy_stream() {
Expand Down
Loading