|
70 | 70 | pub fn benchmarks<M: Measurement>(c: &mut Criterion<M>) { |
71 | 71 | bench_state_vec_scaling(c); |
72 | 72 | bench_individual_gates(c); |
| 73 | + bench_measurement_scaling(c); |
73 | 74 | #[cfg(feature = "parallel")] |
74 | 75 | bench_parallel_execution(c); |
75 | 76 | } |
@@ -240,6 +241,61 @@ fn bench_individual_gates<M: Measurement>(c: &mut Criterion<M>) { |
240 | 241 | group.finish(); |
241 | 242 | } |
242 | 243 |
|
| 244 | +/// Benchmark measurement performance scaling across qubit counts. |
| 245 | +/// Measures all qubits after applying H to each (maximum uncertainty). |
| 246 | +/// This isolates the GPU measurement optimization (workgroup reduction vs full readback). |
| 247 | +fn bench_measurement_scaling<M: Measurement>(c: &mut Criterion<M>) { |
| 248 | + let mut group = c.benchmark_group("Measurement Scaling"); |
| 249 | + group.sample_size(20); |
| 250 | + |
| 251 | + let qubit_counts = [10, 14, 18, 20, 22]; |
| 252 | + |
| 253 | + for &nq in &qubit_counts { |
| 254 | + // CPU baseline: StateVec |
| 255 | + group.bench_with_input( |
| 256 | + BenchmarkId::new("StateVec_CPU", nq), |
| 257 | + &nq, |
| 258 | + |b, &nq| { |
| 259 | + let mut sim = StateVecSoA::new(nq); |
| 260 | + b.iter(|| { |
| 261 | + sim.reset(); |
| 262 | + for q in 0..nq { |
| 263 | + sim.h(&[QubitId(q)]); |
| 264 | + } |
| 265 | + for q in 0..nq { |
| 266 | + black_box(sim.mz(&[QubitId(q)])); |
| 267 | + } |
| 268 | + }); |
| 269 | + }, |
| 270 | + ); |
| 271 | + |
| 272 | + // GPU: GpuStateVec (wgpu) |
| 273 | + #[cfg(feature = "gpu-sims")] |
| 274 | + { |
| 275 | + #[allow(clippy::cast_possible_truncation)] |
| 276 | + if let Ok(mut sim) = GpuStateVec::new(nq as u32) { |
| 277 | + group.bench_with_input( |
| 278 | + BenchmarkId::new("GpuStateVec_wgpu", nq), |
| 279 | + &nq, |
| 280 | + |b, &nq| { |
| 281 | + b.iter(|| { |
| 282 | + sim.reset(); |
| 283 | + for q in 0..nq { |
| 284 | + sim.h(&[QubitId(q)]); |
| 285 | + } |
| 286 | + for q in 0..nq { |
| 287 | + black_box(sim.mz(&[QubitId(q)])); |
| 288 | + } |
| 289 | + }); |
| 290 | + }, |
| 291 | + ); |
| 292 | + } |
| 293 | + } |
| 294 | + } |
| 295 | + |
| 296 | + group.finish(); |
| 297 | +} |
| 298 | + |
243 | 299 | /// Benchmark parallel vs sequential execution for large state vectors. |
244 | 300 | /// Only runs when the `parallel` feature is enabled on pecos-simulators. |
245 | 301 | #[cfg(feature = "parallel")] |
|
0 commit comments