Skip to content

Commit 9dd1a34

Browse files
committed
gpu optimizarions
1 parent 3a31332 commit 9dd1a34

3 files changed

Lines changed: 250 additions & 159 deletions

File tree

crates/benchmarks/benches/modules/state_vec_sims.rs

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ where
7070
pub fn benchmarks<M: Measurement>(c: &mut Criterion<M>) {
7171
bench_state_vec_scaling(c);
7272
bench_individual_gates(c);
73+
bench_measurement_scaling(c);
7374
#[cfg(feature = "parallel")]
7475
bench_parallel_execution(c);
7576
}
@@ -240,6 +241,61 @@ fn bench_individual_gates<M: Measurement>(c: &mut Criterion<M>) {
240241
group.finish();
241242
}
242243

244+
/// Benchmark measurement performance scaling across qubit counts.
245+
/// Measures all qubits after applying H to each (maximum uncertainty).
246+
/// This isolates the GPU measurement optimization (workgroup reduction vs full readback).
247+
fn bench_measurement_scaling<M: Measurement>(c: &mut Criterion<M>) {
248+
let mut group = c.benchmark_group("Measurement Scaling");
249+
group.sample_size(20);
250+
251+
let qubit_counts = [10, 14, 18, 20, 22];
252+
253+
for &nq in &qubit_counts {
254+
// CPU baseline: StateVec
255+
group.bench_with_input(
256+
BenchmarkId::new("StateVec_CPU", nq),
257+
&nq,
258+
|b, &nq| {
259+
let mut sim = StateVecSoA::new(nq);
260+
b.iter(|| {
261+
sim.reset();
262+
for q in 0..nq {
263+
sim.h(&[QubitId(q)]);
264+
}
265+
for q in 0..nq {
266+
black_box(sim.mz(&[QubitId(q)]));
267+
}
268+
});
269+
},
270+
);
271+
272+
// GPU: GpuStateVec (wgpu)
273+
#[cfg(feature = "gpu-sims")]
274+
{
275+
#[allow(clippy::cast_possible_truncation)]
276+
if let Ok(mut sim) = GpuStateVec::new(nq as u32) {
277+
group.bench_with_input(
278+
BenchmarkId::new("GpuStateVec_wgpu", nq),
279+
&nq,
280+
|b, &nq| {
281+
b.iter(|| {
282+
sim.reset();
283+
for q in 0..nq {
284+
sim.h(&[QubitId(q)]);
285+
}
286+
for q in 0..nq {
287+
black_box(sim.mz(&[QubitId(q)]));
288+
}
289+
});
290+
},
291+
);
292+
}
293+
}
294+
}
295+
296+
group.finish();
297+
}
298+
243299
/// Benchmark parallel vs sequential execution for large state vectors.
244300
/// Only runs when the `parallel` feature is enabled on pecos-simulators.
245301
#[cfg(feature = "parallel")]

0 commit comments

Comments
 (0)