Skip to content

Commit fade363

Browse files
Sébastien LoiselSébastien Loisel
authored andcommitted
Simplify CPU vs GPU benchmark: remove GPU_MIN_SIZE threshold logic
1 parent c0d06fa commit fade363

1 file changed

Lines changed: 22 additions & 39 deletions

File tree

tools/benchmark_cpu_vs_gpu.jl

Lines changed: 22 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,12 @@
11
#!/usr/bin/env julia
22
#
3-
# Benchmark: CPU vs Auto (GPU with size threshold) for fem2d_mpi_solve
3+
# Benchmark: CPU vs GPU for fem2d_mpi_solve
44
#
55
# Run with:
6-
# mpiexec -n 1 julia --project=MultiGridBarrierMPI.jl MultiGridBarrierMPI.jl/tools/benchmark_cpu_vs_gpu.jl
6+
# mpiexec -n 1 julia --project=. tools/benchmark_cpu_vs_gpu.jl
77
#
88
# Note: Metal only supports Float32, so we use Float32 for both CPU and GPU
99
# to ensure a fair comparison.
10-
#
11-
# Two modes:
12-
# - CPU: Pure CPU (no backend parameter)
13-
# - Auto: Automatic GPU/CPU selection based on GPU_MIN_SIZE threshold
1410

1511
using MPI
1612
MPI.Init()
@@ -23,85 +19,72 @@ using Metal
2319
using MultiGridBarrierMPI
2420
using MultiGridBarrier
2521
using LinearAlgebraMPI
26-
using LinearAlgebraMPI: GPU_MIN_SIZE
2722
using BenchmarkTools
2823
using Printf
2924

30-
# Configurable threshold for "Auto" mode
31-
const AUTO_THRESHOLD = 1000
32-
3325
println("\n" * "="^70)
34-
println("Benchmark: fem2d_mpi_solve - CPU vs Auto")
26+
println("Benchmark: fem2d_mpi_solve - CPU vs GPU")
3527
println(" MPI ranks: $(MPI.Comm_size(comm))")
3628
println(" Element type: Float32 (Metal requirement)")
37-
println(" Auto threshold: GPU_MIN_SIZE = $AUTO_THRESHOLD")
38-
println(" Running L = 1:6")
29+
println(" Running L = 1:7")
3930
println("="^70)
4031

4132
# Store results
4233
results = Vector{NamedTuple}()
4334

44-
for L in 1:6
35+
for L in 1:7
4536
# Get grid size
4637
g = fem2d(Float32; L=L)
4738
n = size(g.x, 1)
4839

4940
println("\n--- L = $L (n = $n) ---")
5041

51-
# Benchmark CPU (pure CPU, no backend)
42+
# Benchmark CPU
5243
println(" Benchmarking CPU...")
5344
LinearAlgebraMPI.clear_plan_cache!()
5445
b_cpu = @benchmark fem2d_mpi_solve(Float32; L=$L, verbose=false) samples=1 evals=1
5546
cpu_time = median(b_cpu.times) / 1e9
5647

57-
# Benchmark Auto (GPU_MIN_SIZE threshold)
58-
println(" Benchmarking Auto (threshold=$AUTO_THRESHOLD)...")
48+
# Benchmark GPU
49+
println(" Benchmarking GPU...")
5950
LinearAlgebraMPI.clear_plan_cache!()
60-
GPU_MIN_SIZE[] = AUTO_THRESHOLD
61-
b_auto = @benchmark fem2d_mpi_solve(Float32; L=$L, backend=LinearAlgebraMPI.mtl, verbose=false) samples=1 evals=1
62-
auto_time = median(b_auto.times) / 1e9
63-
64-
# Determine which arrays went to GPU in auto mode
65-
GPU_MIN_SIZE[] = AUTO_THRESHOLD
66-
g_test = fem2d_mpi(Float32; L=L, backend=LinearAlgebraMPI.mtl)
67-
auto_is_gpu = !(g_test.x.A isa Matrix)
51+
b_gpu = @benchmark fem2d_mpi_solve(Float32; L=$L, backend=LinearAlgebraMPI.mtl, verbose=false) samples=1 evals=1
52+
gpu_time = median(b_gpu.times) / 1e9
6853

69-
push!(results, (L=L, n=n, cpu=cpu_time, auto=auto_time, auto_gpu=auto_is_gpu))
54+
push!(results, (L=L, n=n, cpu=cpu_time, gpu=gpu_time))
7055

7156
# Print results
72-
speedup = cpu_time / auto_time
57+
speedup = cpu_time / gpu_time
7358
println(" CPU: $(round(cpu_time, digits=3))s")
74-
println(" Auto: $(round(auto_time, digits=3))s [$(auto_is_gpu ? "GPU" : "CPU")]")
59+
println(" GPU: $(round(gpu_time, digits=3))s")
7560
if speedup > 1
76-
println(" Speedup: $(round(speedup, digits=2))x (Auto faster)")
61+
println(" Speedup: $(round(speedup, digits=2))x (GPU faster)")
7762
else
78-
println(" Speedup: $(round(1/speedup, digits=2))x (CPU faster)")
63+
println(" Slowdown: $(round(1/speedup, digits=2))x (CPU faster)")
7964
end
8065
end
8166

8267
# Summary table
8368
println("\n" * "="^70)
8469
println("Summary")
8570
println("="^70)
86-
println("\n L n CPU Auto Speedup Auto backend")
87-
println(" - - --- ---- ------- ------------")
71+
println("\n L n CPU GPU Speedup")
72+
println(" - - --- --- -------")
8873
for r in results
8974
n_str = lpad(r.n, 7)
9075
cpu_str = @sprintf("%6.3fs", r.cpu)
91-
auto_str = @sprintf("%6.3fs", r.auto)
76+
gpu_str = @sprintf("%6.3fs", r.gpu)
9277

93-
speedup = r.cpu / r.auto
78+
speedup = r.cpu / r.gpu
9479
if speedup > 1
95-
speedup_str = @sprintf("%.2fx Auto", speedup)
80+
speedup_str = @sprintf("%.2fx GPU", speedup)
9681
else
9782
speedup_str = @sprintf("%.2fx CPU", 1/speedup)
9883
end
9984
speedup_str = lpad(speedup_str, 10)
10085

101-
auto_backend = r.auto_gpu ? "GPU" : "CPU"
102-
println(" $(r.L) $n_str $cpu_str $auto_str $speedup_str $auto_backend")
86+
println(" $(r.L) $n_str $cpu_str $gpu_str $speedup_str")
10387
end
10488

105-
println("\n Auto threshold: GPU_MIN_SIZE = $AUTO_THRESHOLD")
106-
println(" Speedup = CPU time / Auto time (>1 means Auto is faster)")
89+
println("\n Speedup = CPU time / GPU time (>1 means GPU is faster)")
10790
println("="^70)

0 commit comments

Comments
 (0)