Skip to content

Commit 9813c20

Browse files
committed
Add destroy() method with handle invalidation, remove GRAPH_NODE_SENTINEL
Replace discard() with destroy() which calls cuGraphDestroyNode and then zeroes the CUgraphNode resource in the handle box via invalidate_graph_node_handle. This prevents stale memory access on destroyed nodes. Properties (type, pred, succ, handle) degrade gracefully to None/empty for destroyed nodes. Remove the GRAPH_NODE_SENTINEL (0x1) approach in favor of using NULL for both sentinels and destroyed nodes, which is simpler and avoids the risk of passing 0x1 to driver APIs that treat it as a valid pointer. Made-with: Cursor
1 parent 8554d30 commit 9813c20

File tree

6 files changed

+58
-26
lines changed

6 files changed

+58
-26
lines changed

cuda_core/cuda/core/_cpp/resource_handles.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -957,7 +957,7 @@ GraphHandle create_graph_handle_ref(CUgraph graph, const GraphHandle& h_parent)
957957

958958
namespace {
959959
struct GraphNodeBox {
960-
CUgraphNode resource;
960+
mutable CUgraphNode resource;
961961
GraphHandle h_graph;
962962
};
963963
} // namespace
@@ -978,6 +978,12 @@ GraphHandle graph_node_get_graph(const GraphNodeHandle& h) noexcept {
978978
return h ? get_box(h)->h_graph : GraphHandle{};
979979
}
980980

981+
void invalidate_graph_node_handle(const GraphNodeHandle& h) noexcept {
982+
if (h) {
983+
get_box(h)->resource = nullptr;
984+
}
985+
}
986+
981987
// ============================================================================
982988
// Graphics Resource Handles
983989
// ============================================================================

cuda_core/cuda/core/_cpp/resource_handles.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,9 @@ GraphNodeHandle create_graph_node_handle(CUgraphNode node, const GraphHandle& h_
415415
// Extract the owning graph handle from a node handle.
416416
GraphHandle graph_node_get_graph(const GraphNodeHandle& h) noexcept;
417417

418+
// Zero the CUgraphNode resource inside the handle, marking it invalid.
419+
void invalidate_graph_node_handle(const GraphNodeHandle& h) noexcept;
420+
418421
// ============================================================================
419422
// Graphics resource handle functions
420423
// ============================================================================

cuda_core/cuda/core/_graph/_graph_def/_graph_node.pyx

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ from cuda.core._resource_handles cimport (
4848
create_graph_handle_ref,
4949
create_graph_node_handle,
5050
graph_node_get_graph,
51+
invalidate_graph_node_handle,
5152
)
5253
from cuda.core._utils.cuda_utils cimport HANDLE_RETURN, _parse_fill_value
5354

@@ -123,16 +124,27 @@ cdef class GraphNode:
123124
"""
124125
return as_py(self._h_node)
125126

126-
def discard(self):
127-
"""Discard this node and remove all its edges from the parent graph.
127+
@property
128+
def is_valid(self):
129+
"""Whether this node is valid (not destroyed).
128130
129-
Safe to call on an already-discarded node (no-op).
131+
Returns ``False`` after :meth:`destroy` has been called.
132+
"""
133+
return as_intptr(self._h_node) != 0
134+
135+
def destroy(self):
136+
"""Destroy this node and remove all its edges from the parent graph.
137+
138+
After this call, :attr:`is_valid` returns ``False`` and the node
139+
cannot be re-added to any graph. Safe to call on an
140+
already-destroyed node (no-op).
130141
"""
131-
if self not in self.graph.nodes():
132-
return
133142
cdef cydriver.CUgraphNode node = as_cu(self._h_node)
143+
if node == NULL:
144+
return
134145
with nogil:
135146
HANDLE_RETURN(cydriver.cuGraphDestroyNode(node))
147+
invalidate_graph_node_handle(self._h_node)
136148

137149
@property
138150
def pred(self):

cuda_core/cuda/core/_resource_handles.pxd

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ cdef GraphHandle create_graph_handle_ref(cydriver.CUgraph graph, const GraphHand
186186
# Graph node handles
187187
cdef GraphNodeHandle create_graph_node_handle(cydriver.CUgraphNode node, const GraphHandle& h_graph) except+ nogil
188188
cdef GraphHandle graph_node_get_graph(const GraphNodeHandle& h) noexcept nogil
189+
cdef void invalidate_graph_node_handle(const GraphNodeHandle& h) noexcept nogil
189190

190191
# Graphics resource handles
191192
cdef GraphicsResourceHandle create_graphics_resource_handle(

cuda_core/cuda/core/_resource_handles.pyx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,8 @@ cdef extern from "_cpp/resource_handles.hpp" namespace "cuda_core":
159159
cydriver.CUgraphNode node, const GraphHandle& h_graph) except+ nogil
160160
GraphHandle graph_node_get_graph "cuda_core::graph_node_get_graph" (
161161
const GraphNodeHandle& h) noexcept nogil
162+
void invalidate_graph_node_handle "cuda_core::invalidate_graph_node_handle" (
163+
const GraphNodeHandle& h) noexcept nogil
162164

163165
# Graphics resource handles
164166
GraphicsResourceHandle create_graphics_resource_handle "cuda_core::create_graphics_resource_handle" (

cuda_core/tests/graph/test_graphdef_mutation.py

Lines changed: 28 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
from helpers.graph_kernels import compile_parallel_kernels
1010
from helpers.marks import requires_module
1111

12-
from cuda.bindings import driver
1312
from cuda.core import Device, LaunchConfig, LegacyPinnedMemoryResource
1413
from cuda.core._graph._graph_def import GraphDef, KernelNode, MemsetNode
1514
from cuda.core._utils.cuda_utils import CUDAError
@@ -153,11 +152,11 @@ def test_baseline(self, init_cuda):
153152
assert rig.nodes == rig.initial_nodes
154153
rig.close()
155154

156-
def test_discard_a1(self, init_cuda):
157-
"""Discard a1 (creates a race on arm a). Arm b yields the expected
155+
def test_destroy_a1(self, init_cuda):
156+
"""Destroy a1 (creates a race on arm a). Arm b yields the expected
158157
value, and the final step is correctly ordered after b completes."""
159158
rig = YRig()
160-
rig.a[1].discard()
159+
rig.a[1].destroy()
161160
rig.run()
162161
_, b_exp, _ = rig.expected_output
163162
assert rig.B_out == b_exp
@@ -167,10 +166,10 @@ def test_discard_a1(self, init_cuda):
167166
assert rig.nodes == rig.initial_nodes - {a1}
168167
rig.close()
169168

170-
def test_discard_a2(self, init_cuda):
171-
"""Discard a2, connect a1--r"""
169+
def test_destroy_a2(self, init_cuda):
170+
"""Destroy a2, connect a1--r"""
172171
rig = YRig()
173-
rig.a[2].discard()
172+
rig.a[2].destroy()
174173
rig.a[1].succ.add(rig.r)
175174
rig.A_OPS.pop()
176175
rig.run()
@@ -180,11 +179,11 @@ def test_discard_a2(self, init_cuda):
180179
assert rig.nodes == rig.initial_nodes - {a2}
181180
rig.close()
182181

183-
def test_discard_joint(self, init_cuda):
182+
def test_destroy_joint(self, init_cuda):
184183
"""Remove the joining node and instead add edges directly to r."""
185184
rig = YRig()
186185
_, _, a2, _, b1, j, r = rig.a + rig.b + [rig.j, rig.r]
187-
j.discard()
186+
j.destroy()
188187
r.pred = {a2, b1}
189188
rig.run()
190189
assert rig.output == rig.expected_output
@@ -266,8 +265,8 @@ def test_adjacency_set_property_setter(init_cuda):
266265
assert hub.pred == set()
267266

268267

269-
def test_discarded_node(init_cuda):
270-
"""Test uses of discarded nodes."""
268+
def test_destroyed_node(init_cuda):
269+
"""Test that destroy() invalidates a node."""
271270
mr = LegacyPinnedMemoryResource()
272271
buf = mr.allocate(4)
273272
arr = np.from_dlpack(buf).view(np.int32)
@@ -278,25 +277,34 @@ def test_discarded_node(init_cuda):
278277
a = g.memset(ptr, 0, 4)
279278
b = a.memset(ptr, 42, 4)
280279

280+
assert a.is_valid
281+
assert b.is_valid
281282
assert b in g.nodes()
282283
assert (a, b) in g.edges()
283284

284-
b.discard()
285+
b.destroy()
285286

286-
# b is removed from the graph but still usable
287+
assert not b.is_valid
287288
assert b not in g.nodes()
288289
assert (a, b) not in g.edges()
290+
291+
# Python object is invalid but using it does not crash.
289292
assert isinstance(b, MemsetNode)
290-
assert b.type == driver.CUgraphNodeType.CU_GRAPH_NODE_TYPE_KERNEL
293+
assert b.type is None
291294
assert b.pred == set()
292295
assert b.succ == set()
293-
assert b.handle != 0
294-
assert b.dptr == ptr
295-
assert b.value == 42
296-
assert b.width == 4
296+
assert b.handle is None
297+
assert b.dptr == ptr # tolerable
298+
assert b.value == 42 # tolerable
299+
assert b.width == 4 # tolerable
300+
301+
# Adding an edge to a destroyed node fails.
302+
with pytest.raises(CUDAError):
303+
a.succ.add(b)
297304

298-
# Repeated discard succeeds quietly.
299-
b.discard()
305+
# Repeated destroy succeeds quietly.
306+
b.destroy()
307+
assert not b.is_valid
300308

301309

302310
def test_add_wrong_type(init_cuda):

0 commit comments

Comments
 (0)