NVIDIA
diff --git a/‎.gitattributes‎
Lines changed: 3 additions & 0 deletions b/‎.gitattributes‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎ci/tools/merge_cuda_core_wheels.py‎
Lines changed: 6 additions & 0 deletions b/‎ci/tools/merge_cuda_core_wheels.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎cuda_core/MANIFEST.in‎
Lines changed: 1 addition & 0 deletions b/‎cuda_core/MANIFEST.in‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎cuda_core/build_hooks.py‎
Lines changed: 17 additions & 2 deletions b/‎cuda_core/build_hooks.py‎
Lines changed: 17 additions & 2 deletions
diff --git a/‎cuda_core/cuda/core/__init__.py‎
Lines changed: 9 additions & 0 deletions b/‎cuda_core/cuda/core/__init__.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎cuda_core/cuda/core/_context.pxd‎
Lines changed: 19 additions & 0 deletions b/‎cuda_core/cuda/core/_context.pxd‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎cuda_core/cuda/core/_context.pyx‎
Lines changed: 33 additions & 13 deletions b/‎cuda_core/cuda/core/_context.pyx‎
Lines changed: 33 additions & 13 deletions
@@ -6,6 +6,9 @@ cuda/_version.py export-subst
 # we do not own any headers checked in, don't touch them
 *.h binary
 *.hpp binary
+# Exception: headers we own (cuda_core C++ implementation)
+cuda_core/cuda/core/_cpp/*.h -binary text diff
+cuda_core/cuda/core/_cpp/*.hpp -binary text diff
 # git should not convert line endings in PNG files
 *.png binary
 *.svg binary
 
@@ -11,6 +11,7 @@ __pycache__/
 .pytest_cache/
 .benchmarks/
 *.cpp
+!*_impl.cpp
 !cuda_bindings/cuda/bindings/_lib/param_packer.cpp
 !cuda_bindings/cuda/bindings/_bindings/loader.cpp
 cache_driver
 
@@ -150,15 +150,21 @@ def merge_wheels(wheels: List[Path], output_dir: Path, show_wheel_contents: bool
             "__init__.py",
             "_version.py",
             "_include",
+            "_cpp",  # Headers for Cython development
             "cu12",
             "cu13",
         )
+        # _resource_handles is shared (not CUDA-version-specific) and must stay
+        # at top level. It's imported early in __init__.py before versioned code.
+        items_to_keep_prefix = ("_resource_handles",)
         all_items = os.scandir(base_wheel / base_dir)
         removed_count = 0
         for f in all_items:
             f_abspath = f.path
             if f.name in items_to_keep:
                 continue
+            if any(f.name.startswith(prefix) for prefix in items_to_keep_prefix):
+                continue
             if f.is_dir():
                 print(f"  Removing directory: {f.name}", file=sys.stderr)
                 shutil.rmtree(f_abspath)
 
@@ -3,3 +3,4 @@
 # SPDX-License-Identifier: Apache-2.0
 
 recursive-include cuda/core *.pyx *.pxd
+recursive-include cuda/core/_cpp *.cpp *.hpp
@@ -100,6 +100,17 @@ def module_names():
         for filename in glob.glob(f"{root_path}/**/*.pyx", recursive=True):
             yield filename[len(root_path) : -4]
 
+    def get_sources(mod_name):
+        """Get source files for a module, including any .cpp files."""
+        sources = [f"cuda/core/{mod_name}.pyx"]
+
+        # Add module-specific .cpp file from _cpp/ directory if it exists
+        cpp_file = f"cuda/core/_cpp/{mod_name.lstrip('_')}.cpp"
+        if os.path.exists(cpp_file):
+            sources.append(cpp_file)
+
+        return sources
+
     all_include_dirs = list(os.path.join(root, "include") for root in _get_cuda_paths())
     extra_compile_args = []
     if COMPILE_FOR_COVERAGE:
@@ -110,8 +121,12 @@ def module_names():
     ext_modules = tuple(
         Extension(
             f"cuda.core.{mod.replace(os.path.sep, '.')}",
-            sources=[f"cuda/core/{mod}.pyx"],
-            include_dirs=all_include_dirs,
+            sources=get_sources(mod),
+            include_dirs=[
+                "cuda/core/_include",
+                "cuda/core/_cpp",
+            ]
+            + all_include_dirs,
             language="c++",
             extra_compile_args=extra_compile_args,
         )
 
@@ -15,6 +15,15 @@
 
 import importlib
 
+# The _resource_handles module exports a PyCapsule dispatch table that other
+# extension modules access via PyCapsule_Import. We import it here to ensure
+# it's loaded before other modules try to use it.
+#
+# We use importlib.import_module with the full path to avoid triggering
+# circular import issues that can occur with relative imports during
+# package initialization.
+_resource_handles = importlib.import_module("cuda.core._resource_handles")
+
 subdir = f"cu{cuda_major}"
 try:
     versioned_mod = importlib.import_module(f".{subdir}", __package__)
 
@@ -0,0 +1,19 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from cuda.core._resource_handles cimport ContextHandle
+
+cdef class Context:
+    """Cython declaration for Context class.
+
+    This class provides access to CUDA contexts. Context objects cannot be
+    instantiated directly - use factory methods or Device/Stream APIs.
+    """
+
+    cdef:
+        ContextHandle _h_context
+        int _device_id
+
+    @staticmethod
+    cdef Context _from_handle(type cls, ContextHandle h_context, int device_id)
@@ -4,35 +4,55 @@
 
 from dataclasses import dataclass
 
-from cuda.core._utils.cuda_utils import driver
+from cuda.core._resource_handles cimport (
+    ContextHandle,
+    as_intptr,
+    as_py,
+)
 
 
-@dataclass
-class ContextOptions:
-    pass  # TODO
+__all__ = ['Context', 'ContextOptions']
 
 
 cdef class Context:
+    """CUDA context wrapper.
 
-    cdef:
-        readonly object _handle
-        int _device_id
+    Context objects represent CUDA contexts and cannot be instantiated directly.
+    Use Device or Stream APIs to obtain context objects.
+    """
 
     def __init__(self, *args, **kwargs):
         raise RuntimeError("Context objects cannot be instantiated directly. Please use Device or Stream APIs.")
 
-    @classmethod
-    def _from_ctx(cls, handle: driver.CUcontext, int device_id):
-        cdef Context ctx = Context.__new__(Context)
-        ctx._handle = handle
+    @staticmethod
+    cdef Context _from_handle(type cls, ContextHandle h_context, int device_id):
+        """Create Context from existing ContextHandle (cdef-only factory)."""
+        cdef Context ctx = cls.__new__(cls)
+        ctx._h_context = h_context
         ctx._device_id = device_id
         return ctx
 
+    @property
+    def handle(self):
+        """Return the underlying CUcontext handle."""
+        if self._h_context.get() == NULL:
+            return None
+        return as_py(self._h_context)
+
     def __eq__(self, other):
         if not isinstance(other, Context):
             return NotImplemented
         cdef Context _other = <Context>other
-        return int(self._handle) == int(_other._handle)
+        return as_intptr(self._h_context) == as_intptr(_other._h_context)
 
     def __hash__(self) -> int:
-        return hash(int(self._handle))
+        return hash((type(self), as_intptr(self._h_context)))
+
+
+@dataclass
+class ContextOptions:
+    """Options for context creation.
+
+    Currently unused, reserved for future use.
+    """
+    pass  # TODO
Original file line number	Diff line number	Diff line change
`@@ -3,3 +3,4 @@`
`3`	`3`	`# SPDX-License-Identifier: Apache-2.0`
`4`	`4`
`5`	`5`	`recursive-include cuda/core .pyx .pxd`
	`6`	`+recursive-include cuda/core/_cpp .cpp .hpp`