Skip to content

Commit 65b30e4

Browse files
committed
Add bench_ctx_device and fix JSON output
1 parent 5e688a3 commit 65b30e4

File tree

8 files changed

+323
-3
lines changed

8 files changed

+323
-3
lines changed

cuda_bindings/benchmarks/.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,7 @@ __pycache__/
1111

1212
# Override root .gitignore *.cpp rule (which targets Cython-generated files)
1313
!benchmarks/cpp/*.cpp
14+
15+
16+
results-python.json
17+
results-cpp.json
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
import time
6+
7+
from runner.runtime import ensure_context
8+
9+
from cuda.bindings import driver as cuda
10+
11+
ensure_context()
12+
13+
14+
def bench_ctx_get_current(loops: int) -> float:
15+
_cuCtxGetCurrent = cuda.cuCtxGetCurrent
16+
17+
t0 = time.perf_counter()
18+
for _ in range(loops):
19+
_cuCtxGetCurrent()
20+
return time.perf_counter() - t0

cuda_bindings/benchmarks/benchmarks/cpp/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,7 @@ endif()
4646
add_executable(bench_pointer_attributes_cpp bench_pointer_attributes.cpp)
4747
target_include_directories(bench_pointer_attributes_cpp PRIVATE "${CUDA_DRIVER_INCLUDE_DIR}")
4848
target_link_libraries(bench_pointer_attributes_cpp PRIVATE "${CUDA_DRIVER_LIBRARY}")
49+
50+
add_executable(bench_ctx_device_cpp bench_ctx_device.cpp)
51+
target_include_directories(bench_ctx_device_cpp PRIVATE "${CUDA_DRIVER_INCLUDE_DIR}")
52+
target_link_libraries(bench_ctx_device_cpp PRIVATE "${CUDA_DRIVER_LIBRARY}")
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
//
3+
// SPDX-License-Identifier: Apache-2.0
4+
5+
#include <cuda.h>
6+
7+
#include "bench_support.hpp"
8+
9+
#include <cstdlib>
10+
#include <iostream>
11+
12+
13+
static void check_cu(CUresult status, const char* message) {
14+
if (status != CUDA_SUCCESS) {
15+
const char* error_name = nullptr;
16+
cuGetErrorName(status, &error_name);
17+
std::cerr << message << ": " << (error_name ? error_name : "unknown") << '\n';
18+
std::exit(1);
19+
}
20+
}
21+
22+
23+
int main(int argc, char** argv) {
24+
bench::Options options = bench::parse_args(argc, argv);
25+
if (options.benchmark_name.empty()) {
26+
options.benchmark_name = "cpp.ctx_device.ctx_get_current";
27+
}
28+
29+
// Setup: init CUDA and create a context
30+
check_cu(cuInit(0), "cuInit failed");
31+
32+
CUdevice device;
33+
check_cu(cuDeviceGet(&device, 0), "cuDeviceGet failed");
34+
35+
CUcontext ctx;
36+
CUctxCreateParams ctxParams = {};
37+
check_cu(cuCtxCreate(&ctx, &ctxParams, 0, device), "cuCtxCreate failed");
38+
39+
CUcontext current_ctx = nullptr;
40+
41+
// Run benchmark
42+
auto results = bench::run_benchmark(options, [&]() {
43+
check_cu(
44+
cuCtxGetCurrent(&current_ctx),
45+
"cuCtxGetCurrent failed"
46+
);
47+
});
48+
49+
// Sanity check: the call actually returned our context
50+
if (current_ctx != ctx) {
51+
std::cerr << "unexpected: cuCtxGetCurrent returned a different context\n";
52+
}
53+
54+
// Cleanup
55+
check_cu(cuCtxDestroy(ctx), "cuCtxDestroy failed");
56+
57+
// Output
58+
bench::print_summary(options.benchmark_name, results);
59+
60+
if (!options.output_path.empty()) {
61+
bench::write_pyperf_json(options.output_path, options.benchmark_name, options.loops, results);
62+
}
63+
64+
return 0;
65+
}

cuda_bindings/benchmarks/pixi.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ cmd = ["cmake", "--build", "$PIXI_PROJECT_ROOT/.build/cpp"]
7676
depends-on = [{ task = "bench-cpp-configure" }]
7777

7878
[target.linux.tasks.bench-cpp]
79-
cmd = ["$PIXI_PROJECT_ROOT/.build/cpp/bench_pointer_attributes_cpp"]
79+
cmd = ["python", "$PIXI_PROJECT_ROOT/run_cpp.py"]
8080
depends-on = [{ task = "bench-cpp-build" }]
8181

8282
[target.linux.tasks.lint]
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
from runner.cpp import main
6+
7+
if __name__ == "__main__":
8+
main()
Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
import argparse
6+
import json
7+
import subprocess
8+
import sys
9+
import tempfile
10+
from pathlib import Path
11+
12+
PROJECT_ROOT = Path(__file__).resolve().parent.parent
13+
BUILD_DIR = PROJECT_ROOT / ".build" / "cpp"
14+
DEFAULT_OUTPUT = PROJECT_ROOT / "results-cpp.json"
15+
16+
BINARY_PREFIX = "bench_"
17+
BINARY_SUFFIX = "_cpp"
18+
19+
20+
def discover_binaries() -> dict[str, Path]:
21+
"""Discover C++ benchmark binaries in the build directory """
22+
if not BUILD_DIR.is_dir():
23+
return {}
24+
25+
registry: dict[str, Path] = {}
26+
for path in sorted(BUILD_DIR.iterdir()):
27+
if not path.is_file() or not path.name.startswith(BINARY_PREFIX):
28+
continue
29+
if not path.name.endswith(BINARY_SUFFIX):
30+
continue
31+
name = path.name.removeprefix(BINARY_PREFIX).removesuffix(BINARY_SUFFIX)
32+
registry[name] = path
33+
return registry
34+
35+
36+
def strip_output_args(argv: list[str]) -> list[str]:
37+
cleaned: list[str] = []
38+
skip_next = False
39+
for arg in argv:
40+
if skip_next:
41+
skip_next = False
42+
continue
43+
if arg in ("-o", "--output"):
44+
skip_next = True
45+
continue
46+
if arg.startswith("-o=") or arg.startswith("--output="):
47+
continue
48+
cleaned.append(arg)
49+
return cleaned
50+
51+
52+
def merge_pyperf_json(individual_files: list[Path], output_path: Path) -> int:
53+
"""Merge individual pyperf JSON files into a single BenchmarkSuite file.
54+
55+
Each C++ binary produces a file with structure:
56+
{"version": "1.0", "metadata": {...}, "benchmarks": [{...}]}
57+
58+
We merge them by collecting all benchmark entries into one file.
59+
"""
60+
all_benchmarks = []
61+
62+
for path in individual_files:
63+
with open(path) as f:
64+
data = json.load(f)
65+
66+
file_metadata = data.get("metadata", {})
67+
bench_name = file_metadata.get("name", "")
68+
loops = file_metadata.get("loops")
69+
unit = file_metadata.get("unit", "second")
70+
71+
for bench in data.get("benchmarks", []):
72+
for run in bench.get("runs", []):
73+
run_meta = run.setdefault("metadata", {})
74+
if bench_name:
75+
run_meta.setdefault("name", bench_name)
76+
if loops is not None:
77+
run_meta.setdefault("loops", loops)
78+
run_meta.setdefault("unit", unit)
79+
80+
all_benchmarks.append(bench)
81+
82+
merged = {
83+
"version": "1.0",
84+
"benchmarks": all_benchmarks,
85+
}
86+
87+
with open(output_path, "w") as f:
88+
json.dump(merged, f)
89+
90+
return len(all_benchmarks)
91+
92+
93+
def parse_args(argv: list[str]) -> tuple[argparse.Namespace, list[str]]:
94+
parser = argparse.ArgumentParser(
95+
description="Run C++ CUDA benchmarks",
96+
add_help=False,
97+
)
98+
parser.add_argument(
99+
"--benchmark",
100+
action="append",
101+
default=[],
102+
help="Benchmark name to run (e.g. 'ctx_device'). Repeat for multiple. Defaults to all.",
103+
)
104+
parser.add_argument(
105+
"--list",
106+
action="store_true",
107+
help="Print discovered benchmark names and exit.",
108+
)
109+
parser.add_argument(
110+
"-o",
111+
"--output",
112+
type=Path,
113+
default=DEFAULT_OUTPUT,
114+
help=f"JSON output file path (default: {DEFAULT_OUTPUT.name})",
115+
)
116+
parsed, remaining = parser.parse_known_args(argv)
117+
return parsed, remaining
118+
119+
120+
def main() -> None:
121+
parsed, remaining_argv = parse_args(sys.argv[1:])
122+
123+
registry = discover_binaries()
124+
if not registry:
125+
print(
126+
f"No C++ benchmark binaries found in {BUILD_DIR}.\n"
127+
"Run 'pixi run bench-cpp-build' first.",
128+
file=sys.stderr,
129+
)
130+
sys.exit(1)
131+
132+
if parsed.list:
133+
for name in sorted(registry):
134+
print(name)
135+
return
136+
137+
if parsed.benchmark:
138+
missing = sorted(set(parsed.benchmark) - set(registry))
139+
if missing:
140+
known = ", ".join(sorted(registry))
141+
unknown = ", ".join(missing)
142+
print(
143+
f"Unknown benchmark(s): {unknown}. Known benchmarks: {known}",
144+
file=sys.stderr,
145+
)
146+
sys.exit(1)
147+
names = parsed.benchmark
148+
else:
149+
names = sorted(registry)
150+
151+
# Strip any --output args to avoid conflicts with our output handling
152+
passthrough_argv = strip_output_args(remaining_argv)
153+
154+
output_path = parsed.output.resolve()
155+
failed = False
156+
individual_files: list[Path] = []
157+
158+
with tempfile.TemporaryDirectory(prefix="cuda_bench_cpp_") as tmpdir:
159+
tmpdir_path = Path(tmpdir)
160+
161+
for name in names:
162+
binary = registry[name]
163+
tmp_json = tmpdir_path / f"{name}.json"
164+
cmd = [str(binary), "-o", str(tmp_json), *passthrough_argv]
165+
result = subprocess.run(cmd)
166+
if result.returncode != 0:
167+
print(f"FAILED: {name} (exit code {result.returncode})", file=sys.stderr)
168+
failed = True
169+
elif tmp_json.exists():
170+
individual_files.append(tmp_json)
171+
172+
if individual_files:
173+
count = merge_pyperf_json(individual_files, output_path)
174+
print(f"\nResults saved to {output_path} ({count} benchmark(s))")
175+
176+
if failed:
177+
sys.exit(1)
178+
179+
180+
if __name__ == "__main__":
181+
main()

cuda_bindings/benchmarks/runner/main.py

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@
1212

1313
import pyperf
1414

15-
BENCH_DIR = Path(__file__).resolve().parent.parent / "benchmarks"
15+
PROJECT_ROOT = Path(__file__).resolve().parent.parent
16+
BENCH_DIR = PROJECT_ROOT / "benchmarks"
17+
DEFAULT_OUTPUT = PROJECT_ROOT / "results-python.json"
1618

1719

1820
def load_module(module_path: Path) -> ModuleType:
@@ -54,6 +56,22 @@ def discover_benchmarks() -> dict[str, Callable[[int], float]]:
5456
return registry
5557

5658

59+
def strip_pyperf_output_args(argv: list[str]) -> list[str]:
60+
cleaned: list[str] = []
61+
skip_next = False
62+
for i, arg in enumerate(argv):
63+
if skip_next:
64+
skip_next = False
65+
continue
66+
if arg in ("-o", "--output", "--append"):
67+
skip_next = True
68+
continue
69+
if arg.startswith("-o=") or arg.startswith("--output=") or arg.startswith("--append="):
70+
continue
71+
cleaned.append(arg)
72+
return cleaned
73+
74+
5775
def parse_args(argv: list[str]) -> tuple[argparse.Namespace, list[str]]:
5876
parser = argparse.ArgumentParser(add_help=False)
5977
parser.add_argument(
@@ -67,13 +85,19 @@ def parse_args(argv: list[str]) -> tuple[argparse.Namespace, list[str]]:
6785
action="store_true",
6886
help="Print discovered benchmark IDs and exit.",
6987
)
88+
parser.add_argument(
89+
"-o",
90+
"--output",
91+
type=Path,
92+
default=DEFAULT_OUTPUT,
93+
help=f"JSON output file path (default: {DEFAULT_OUTPUT.name})",
94+
)
7095
parsed, remaining = parser.parse_known_args(argv)
7196
return parsed, remaining
7297

7398

7499
def main() -> None:
75100
parsed, remaining_argv = parse_args(sys.argv[1:])
76-
sys.argv = [sys.argv[0], *remaining_argv]
77101

78102
registry = discover_benchmarks()
79103
if not registry:
@@ -94,10 +118,24 @@ def main() -> None:
94118
else:
95119
benchmark_ids = sorted(registry)
96120

121+
# Strip any --output args to avoid conflicts with our output handling
122+
output_path = parsed.output.resolve()
123+
remaining_argv = strip_pyperf_output_args(remaining_argv)
124+
is_worker = "--worker" in remaining_argv
125+
126+
# Delete the file so this run starts fresh
127+
if not is_worker:
128+
output_path.unlink(missing_ok=True)
129+
130+
sys.argv = [sys.argv[0], "--append", str(output_path), *remaining_argv]
131+
97132
runner = pyperf.Runner()
98133
for bench_id in benchmark_ids:
99134
runner.bench_time_func(bench_id, registry[bench_id])
100135

136+
if not is_worker:
137+
print(f"\nResults saved to {output_path}")
138+
101139

102140
if __name__ == "__main__":
103141
main()

0 commit comments

Comments
 (0)