Skip to content
Merged
24 changes: 21 additions & 3 deletions cuda_core/cuda/core/experimental/_event.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,9 +120,27 @@ def __sub__(self, other):
try:
timing = handle_return(driver.cuEventElapsedTime(other.handle, self.handle))
except CUDAError as e:
raise RuntimeError(
"Timing capability must be enabled in order to subtract two Events; timing is disabled by default."
) from e
error_message = str(e)
if "CUDA_ERROR_INVALID_HANDLE" in error_message:
Comment thread
leofang marked this conversation as resolved.
Outdated
if self.is_timing_disabled or other.is_timing_disabled:
explanation = (
"Both Events must be created with timing enabled in order to subtract them; "
"use EventOptions(enable_timing=True) when creating both events."
)
else:
explanation = (
"Both Events must be recorded before they can be subtracted; "
"use Stream.record() to record both events to a stream."
)
elif "CUDA_ERROR_NOT_READY" in error_message:
explanation = (
"One or both events have not completed; "
"use Event.sync(), Stream.sync(), or Device.sync() to wait for the events to complete "
"before subtracting them."
)
else:
raise e
raise RuntimeError(explanation) from e
return timing

@property
Expand Down
60 changes: 60 additions & 0 deletions cuda_core/tests/test_event.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,63 @@ def test_is_done(init_cuda):
# Without a sync, the captured work might not have yet completed
# Therefore this check should never raise an exception
assert event.is_done in (True, False)


def test_error_timing_disabled():
Comment thread
leofang marked this conversation as resolved.
device = Device()
device.set_current()
enabled = EventOptions(enable_timing=True)
disabled = EventOptions(enable_timing=False)
stream = device.create_stream()

event1 = stream.record(options=enabled)
event2 = stream.record(options=disabled)
stream.sync()
with pytest.raises(RuntimeError, match="^Both Events must be created with timing enabled"):
event2 - event1

event1 = stream.record(options=disabled)
event2 = stream.record(options=disabled)
stream.sync()
with pytest.raises(RuntimeError, match="^Both Events must be created with timing enabled"):
event2 - event1

event1 = stream.record(options=enabled)
event2 = stream.record(options=enabled)
stream.sync()
event2 - event1
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd delete this last block, because that's already covered more comprehensively under the existing test_timing (test_timing_success).

Copy link
Copy Markdown
Contributor Author

@carterbox carterbox Apr 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you maybe forget to push the commits?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No. I'm still working on addressing feedback and would prefer to push the commits all at once instead of triggering the CI multiple times.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Our CI is manual trigger only, so don't worry about excessive pushes 🙂



def test_error_timing_recorded():
device = Device()
device.set_current()
enabled = EventOptions(enable_timing=True)
stream = device.create_stream()

event1 = stream.record(options=enabled)
event2 = device.create_event(options=enabled)
event3 = device.create_event(options=enabled)

stream.sync()
with pytest.raises(RuntimeError, match="^Both Events must be recorded"):
event2 - event1
with pytest.raises(RuntimeError, match="^Both Events must be recorded"):
event1 - event2
with pytest.raises(RuntimeError, match="^Both Events must be recorded"):
event3 - event2


def test_error_timing_incomplete():
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wow, this is an amazingly sophisticated unit test setup.

device = Device()
device.set_current()
enabled = EventOptions(enable_timing=True)
stream = device.create_stream()

event1 = stream.record(options=enabled)
event2 = device.create_event(options=enabled)
stream.wait(event2)
event3 = stream.record(options=enabled)

# event3 will never complete because the stream is waiting on event2 which is never recorded
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't there be no work recorded in event2 here so the stream.wait(event2) wouldn't actually wait on anything? Then event3 recording on the stream there again wouldn't be any actual work recorded so it very well could be finished?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is interesting, I would think that because event2 has no work recorded, stream.wait(event2) should raise (maybe cudaErrorInvalidResourceHandle?) instead of no-op.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Took a quick look at the actual implementation, and Keith was right here. It's a no-op and cudaSuccess is returned if event2 is not recorded.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So what's the solution for testing this case? Launching a kernel that takes a long time to complete or never returns? I believe cuda.core now has all the necessary parts to compile and launch such a kernel from this test?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@carterbox It is nerve wrecking to leave the kernel spinning and not clean it up after the event test is completed. I pushed commit add0ba1 to allow a signal passed from host to the busy kernel.

with pytest.raises(RuntimeError, match="^One or both events have not completed."):
event3 - event1
Comment thread
carterbox marked this conversation as resolved.
Loading