Skip to content

Commit 79f930d

Browse files
committed
Add bench_event and bench_stream and compare script for a summary table
1 parent f09b4f2 commit 79f930d

File tree

6 files changed

+345
-48
lines changed

6 files changed

+345
-48
lines changed

cuda_bindings/benchmarks/benchmarks/cpp/CMakeLists.txt

Lines changed: 45 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,26 @@ find_library(
3535
"${CONDA_PREFIX_HINT}/lib/stubs"
3636
)
3737

38+
# Find nvrtc.h and libnvrtc (for runtime compilation benchmarks)
39+
find_path(
40+
NVRTC_INCLUDE_DIR
41+
nvrtc.h
42+
HINTS
43+
"${CUDA_HOME_HINT}/include"
44+
"${CONDA_PREFIX_HINT}/targets/x86_64-linux/include"
45+
"${CONDA_PREFIX_HINT}/include"
46+
)
47+
48+
find_library(
49+
NVRTC_LIBRARY
50+
NAMES nvrtc
51+
HINTS
52+
"${CUDA_HOME_HINT}/lib64"
53+
"${CUDA_HOME_HINT}/lib"
54+
"${CONDA_PREFIX_HINT}/targets/x86_64-linux/lib"
55+
"${CONDA_PREFIX_HINT}/lib"
56+
)
57+
3858
if(NOT CUDA_DRIVER_INCLUDE_DIR)
3959
message(FATAL_ERROR "Could not find cuda.h. Ensure CUDA_HOME is set or install cuda-crt-dev.")
4060
endif()
@@ -43,10 +63,29 @@ if(NOT CUDA_DRIVER_LIBRARY)
4363
message(FATAL_ERROR "Could not find libcuda. Ensure the NVIDIA driver is installed.")
4464
endif()
4565

46-
add_executable(bench_pointer_attributes_cpp bench_pointer_attributes.cpp)
47-
target_include_directories(bench_pointer_attributes_cpp PRIVATE "${CUDA_DRIVER_INCLUDE_DIR}")
48-
target_link_libraries(bench_pointer_attributes_cpp PRIVATE "${CUDA_DRIVER_LIBRARY}")
66+
# Helper: add a benchmark that only needs the driver API
67+
function(add_driver_benchmark name)
68+
add_executable(${name}_cpp ${name}.cpp)
69+
target_include_directories(${name}_cpp PRIVATE "${CUDA_DRIVER_INCLUDE_DIR}")
70+
target_link_libraries(${name}_cpp PRIVATE "${CUDA_DRIVER_LIBRARY}")
71+
endfunction()
4972

50-
add_executable(bench_ctx_device_cpp bench_ctx_device.cpp)
51-
target_include_directories(bench_ctx_device_cpp PRIVATE "${CUDA_DRIVER_INCLUDE_DIR}")
52-
target_link_libraries(bench_ctx_device_cpp PRIVATE "${CUDA_DRIVER_LIBRARY}")
73+
# Helper: add a benchmark that needs driver API + NVRTC
74+
function(add_nvrtc_benchmark name)
75+
add_executable(${name}_cpp ${name}.cpp)
76+
target_include_directories(${name}_cpp PRIVATE "${CUDA_DRIVER_INCLUDE_DIR}" "${NVRTC_INCLUDE_DIR}")
77+
target_link_libraries(${name}_cpp PRIVATE "${CUDA_DRIVER_LIBRARY}" "${NVRTC_LIBRARY}")
78+
endfunction()
79+
80+
# Driver-only benchmarks
81+
add_driver_benchmark(bench_pointer_attributes)
82+
add_driver_benchmark(bench_ctx_device)
83+
add_driver_benchmark(bench_stream)
84+
add_driver_benchmark(bench_event)
85+
86+
# NVRTC benchmarks (require nvrtc for kernel compilation)
87+
if(NVRTC_INCLUDE_DIR AND NVRTC_LIBRARY)
88+
add_nvrtc_benchmark(bench_launch)
89+
else()
90+
message(WARNING "NVRTC not found — skipping bench_launch. Install cuda-nvrtc-dev.")
91+
endif()

cuda_bindings/benchmarks/benchmarks/cpp/bench_ctx_device.cpp

Lines changed: 42 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,6 @@ static void check_cu(CUresult status, const char* message) {
2222

2323
int main(int argc, char** argv) {
2424
bench::Options options = bench::parse_args(argc, argv);
25-
if (options.benchmark_name.empty()) {
26-
options.benchmark_name = "ctx_device.ctx_get_current";
27-
}
2825

2926
// Setup: init CUDA and create a context
3027
check_cu(cuInit(0), "cuInit failed");
@@ -36,30 +33,55 @@ int main(int argc, char** argv) {
3633
CUctxCreateParams ctxParams = {};
3734
check_cu(cuCtxCreate(&ctx, &ctxParams, 0, device), "cuCtxCreate failed");
3835

39-
CUcontext current_ctx = nullptr;
36+
bench::BenchmarkSuite suite(options);
4037

41-
// Run benchmark
42-
auto results = bench::run_benchmark(options, [&]() {
43-
check_cu(
44-
cuCtxGetCurrent(&current_ctx),
45-
"cuCtxGetCurrent failed"
46-
);
47-
});
38+
// --- ctx_get_current ---
39+
{
40+
CUcontext current_ctx = nullptr;
41+
suite.run("ctx_device.ctx_get_current", [&]() {
42+
check_cu(cuCtxGetCurrent(&current_ctx), "cuCtxGetCurrent failed");
43+
});
44+
}
4845

49-
// Sanity check: the call actually returned our context
50-
if (current_ctx != ctx) {
51-
std::cerr << "unexpected: cuCtxGetCurrent returned a different context\n";
46+
// --- ctx_set_current ---
47+
{
48+
suite.run("ctx_device.ctx_set_current", [&]() {
49+
check_cu(cuCtxSetCurrent(ctx), "cuCtxSetCurrent failed");
50+
});
5251
}
5352

54-
// Cleanup
55-
check_cu(cuCtxDestroy(ctx), "cuCtxDestroy failed");
53+
// --- ctx_get_device ---
54+
{
55+
CUdevice dev;
56+
suite.run("ctx_device.ctx_get_device", [&]() {
57+
check_cu(cuCtxGetDevice(&dev), "cuCtxGetDevice failed");
58+
});
59+
}
5660

57-
// Output
58-
bench::print_summary(options.benchmark_name, results);
61+
// --- device_get ---
62+
{
63+
CUdevice dev;
64+
suite.run("ctx_device.device_get", [&]() {
65+
check_cu(cuDeviceGet(&dev, 0), "cuDeviceGet failed");
66+
});
67+
}
5968

60-
if (!options.output_path.empty()) {
61-
bench::write_pyperf_json(options.output_path, options.benchmark_name, options.loops, results);
69+
// --- device_get_attribute ---
70+
{
71+
int value = 0;
72+
suite.run("ctx_device.device_get_attribute", [&]() {
73+
check_cu(
74+
cuDeviceGetAttribute(&value, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device),
75+
"cuDeviceGetAttribute failed"
76+
);
77+
});
6278
}
6379

80+
// Cleanup
81+
check_cu(cuCtxDestroy(ctx), "cuCtxDestroy failed");
82+
83+
// Write all results
84+
suite.write();
85+
6486
return 0;
6587
}
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
//
3+
// SPDX-License-Identifier: Apache-2.0
4+
5+
#include <cuda.h>
6+
7+
#include "bench_support.hpp"
8+
9+
#include <cstdlib>
10+
#include <iostream>
11+
12+
13+
static void check_cu(CUresult status, const char* message) {
14+
if (status != CUDA_SUCCESS) {
15+
const char* error_name = nullptr;
16+
cuGetErrorName(status, &error_name);
17+
std::cerr << message << ": " << (error_name ? error_name : "unknown") << '\n';
18+
std::exit(1);
19+
}
20+
}
21+
22+
23+
int main(int argc, char** argv) {
24+
bench::Options options = bench::parse_args(argc, argv);
25+
26+
// Setup
27+
check_cu(cuInit(0), "cuInit failed");
28+
29+
CUdevice device;
30+
check_cu(cuDeviceGet(&device, 0), "cuDeviceGet failed");
31+
32+
CUcontext ctx;
33+
CUctxCreateParams ctxParams = {};
34+
check_cu(cuCtxCreate(&ctx, &ctxParams, 0, device), "cuCtxCreate failed");
35+
36+
CUstream stream;
37+
check_cu(cuStreamCreate(&stream, CU_STREAM_NON_BLOCKING), "cuStreamCreate failed");
38+
39+
// Persistent event for query/synchronize/record benchmarks
40+
CUevent event;
41+
check_cu(cuEventCreate(&event, CU_EVENT_DISABLE_TIMING), "cuEventCreate failed");
42+
43+
// Record and sync so the event starts in a completed state
44+
check_cu(cuEventRecord(event, stream), "cuEventRecord failed");
45+
check_cu(cuStreamSynchronize(stream), "cuStreamSynchronize failed");
46+
47+
bench::BenchmarkSuite suite(options);
48+
49+
// --- event_create_destroy ---
50+
{
51+
CUevent e;
52+
suite.run("event.event_create_destroy", [&]() {
53+
check_cu(cuEventCreate(&e, CU_EVENT_DISABLE_TIMING), "cuEventCreate failed");
54+
check_cu(cuEventDestroy(e), "cuEventDestroy failed");
55+
});
56+
}
57+
58+
// --- event_record ---
59+
{
60+
suite.run("event.event_record", [&]() {
61+
check_cu(cuEventRecord(event, stream), "cuEventRecord failed");
62+
});
63+
}
64+
65+
// --- event_query ---
66+
{
67+
suite.run("event.event_query", [&]() {
68+
// Returns CUDA_SUCCESS if complete, CUDA_ERROR_NOT_READY if not
69+
cuEventQuery(event);
70+
});
71+
}
72+
73+
// --- event_synchronize ---
74+
{
75+
suite.run("event.event_synchronize", [&]() {
76+
check_cu(cuEventSynchronize(event), "cuEventSynchronize failed");
77+
});
78+
}
79+
80+
// Cleanup
81+
check_cu(cuEventDestroy(event), "cuEventDestroy failed");
82+
check_cu(cuStreamDestroy(stream), "cuStreamDestroy failed");
83+
check_cu(cuCtxDestroy(ctx), "cuCtxDestroy failed");
84+
85+
suite.write();
86+
87+
return 0;
88+
}

cuda_bindings/benchmarks/benchmarks/cpp/bench_pointer_attributes.cpp

Lines changed: 12 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,6 @@ static void check_cu(CUresult status, const char* message) {
2222

2323
int main(int argc, char** argv) {
2424
bench::Options options = bench::parse_args(argc, argv);
25-
if (options.benchmark_name.empty()) {
26-
options.benchmark_name = "pointer_attributes.pointer_get_attribute";
27-
}
2825

2926
// Setup: init CUDA, allocate memory
3027
check_cu(cuInit(0), "cuInit failed");
@@ -39,31 +36,24 @@ int main(int argc, char** argv) {
3936
CUdeviceptr ptr;
4037
check_cu(cuMemAlloc(&ptr, 1 << 18), "cuMemAlloc failed");
4138

42-
unsigned int memory_type = 0;
43-
44-
// Run benchmark
45-
auto results = bench::run_benchmark(options, [&]() {
46-
check_cu(
47-
cuPointerGetAttribute(&memory_type, CU_POINTER_ATTRIBUTE_MEMORY_TYPE, ptr),
48-
"cuPointerGetAttribute failed"
49-
);
50-
});
51-
52-
// Sanity check: the call actually did something
53-
if (memory_type == 0) {
54-
std::cerr << "unexpected memory_type=0\n";
39+
bench::BenchmarkSuite suite(options);
40+
41+
// --- pointer_get_attribute ---
42+
{
43+
unsigned int memory_type = 0;
44+
suite.run("pointer_attributes.pointer_get_attribute", [&]() {
45+
check_cu(
46+
cuPointerGetAttribute(&memory_type, CU_POINTER_ATTRIBUTE_MEMORY_TYPE, ptr),
47+
"cuPointerGetAttribute failed"
48+
);
49+
});
5550
}
5651

5752
// Cleanup
5853
check_cu(cuMemFree(ptr), "cuMemFree failed");
5954
check_cu(cuCtxDestroy(ctx), "cuCtxDestroy failed");
6055

61-
// Output
62-
bench::print_summary(options.benchmark_name, results);
63-
64-
if (!options.output_path.empty()) {
65-
bench::write_pyperf_json(options.output_path, options.benchmark_name, options.loops, results);
66-
}
56+
suite.write();
6757

6858
return 0;
6959
}
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
//
3+
// SPDX-License-Identifier: Apache-2.0
4+
5+
#include <cuda.h>
6+
7+
#include "bench_support.hpp"
8+
9+
#include <cstdlib>
10+
#include <iostream>
11+
12+
13+
static void check_cu(CUresult status, const char* message) {
14+
if (status != CUDA_SUCCESS) {
15+
const char* error_name = nullptr;
16+
cuGetErrorName(status, &error_name);
17+
std::cerr << message << ": " << (error_name ? error_name : "unknown") << '\n';
18+
std::exit(1);
19+
}
20+
}
21+
22+
23+
int main(int argc, char** argv) {
24+
bench::Options options = bench::parse_args(argc, argv);
25+
26+
// Setup
27+
check_cu(cuInit(0), "cuInit failed");
28+
29+
CUdevice device;
30+
check_cu(cuDeviceGet(&device, 0), "cuDeviceGet failed");
31+
32+
CUcontext ctx;
33+
CUctxCreateParams ctxParams = {};
34+
check_cu(cuCtxCreate(&ctx, &ctxParams, 0, device), "cuCtxCreate failed");
35+
36+
// Persistent stream for query/synchronize benchmarks
37+
CUstream stream;
38+
check_cu(cuStreamCreate(&stream, CU_STREAM_NON_BLOCKING), "cuStreamCreate failed");
39+
40+
bench::BenchmarkSuite suite(options);
41+
42+
// --- stream_create_destroy ---
43+
{
44+
CUstream s;
45+
suite.run("stream.stream_create_destroy", [&]() {
46+
check_cu(cuStreamCreate(&s, CU_STREAM_NON_BLOCKING), "cuStreamCreate failed");
47+
check_cu(cuStreamDestroy(s), "cuStreamDestroy failed");
48+
});
49+
}
50+
51+
// --- stream_query ---
52+
{
53+
suite.run("stream.stream_query", [&]() {
54+
// cuStreamQuery returns CUDA_SUCCESS if stream is idle,
55+
// CUDA_ERROR_NOT_READY if busy — both are valid here.
56+
cuStreamQuery(stream);
57+
});
58+
}
59+
60+
// --- stream_synchronize ---
61+
{
62+
suite.run("stream.stream_synchronize", [&]() {
63+
check_cu(cuStreamSynchronize(stream), "cuStreamSynchronize failed");
64+
});
65+
}
66+
67+
// Cleanup
68+
check_cu(cuStreamDestroy(stream), "cuStreamDestroy failed");
69+
check_cu(cuCtxDestroy(ctx), "cuCtxDestroy failed");
70+
71+
suite.write();
72+
73+
return 0;
74+
}

0 commit comments

Comments
 (0)