From 93ba84bb03db4c689e40d24092864a953015916d Mon Sep 17 00:00:00 2001 From: Ciaran Ryan-Anderson Date: Fri, 27 Feb 2026 21:23:35 -0700 Subject: [PATCH 1/5] fix QuantumCircuit --- Cargo.lock | 117 ++--- Cargo.toml | 1 + crates/benchmarks/benches/benchmarks.rs | 6 + .../benches/modules/cuquantum.rs} | 46 +- crates/pecos-core/Cargo.toml | 6 + crates/pecos-core/src/gate_registry.rs | 406 ++++++++++++++++++ crates/pecos-core/src/gate_type.rs | 12 +- crates/pecos-core/src/gates.rs | 6 + crates/pecos-core/src/lib.rs | 7 + crates/pecos-core/src/value.rs | 249 +++++++++++ crates/pecos-cuquantum-sys/Cargo.toml | 2 +- crates/pecos-cuquantum/Cargo.toml | 9 - .../src/noise/biased_depolarizing.rs | 3 +- .../pecos-engines/src/noise/depolarizing.rs | 3 +- crates/pecos-engines/src/noise/utils.rs | 3 + crates/pecos-engines/src/quantum.rs | 4 +- .../pecos-experimental/src/hugr_executor.rs | 3 +- crates/pecos-qasm/src/engine.rs | 3 +- crates/pecos-quantum/src/lib.rs | 3 +- crates/pecos-quantum/src/tick_circuit.rs | 305 ++++++++++++- crates/pecos-quest/src/quantum_engine.rs | 9 +- crates/pecos/src/lib.rs | 4 +- .../pecos-rslib/src/dag_circuit_bindings.rs | 155 ++++++- .../pecos-rslib/src/gate_registry_bindings.rs | 329 ++++++++++++++ python/pecos-rslib/src/lib.rs | 4 + python/pecos-rslib/src/types_module.rs | 5 + python/quantum-pecos/src/pecos/__init__.py | 6 + .../src/pecos/circuits/quantum_circuit.py | 133 +++--- .../src/pecos/simulators/default_simulator.py | 41 +- .../src/pecos/simulators/sim_class_types.py | 48 --- uv.lock | 48 +-- 31 files changed, 1701 insertions(+), 275 deletions(-) rename crates/{pecos-cuquantum/benches/cuquantum_benchmark.rs => benchmarks/benches/modules/cuquantum.rs} (91%) create mode 100644 crates/pecos-core/src/gate_registry.rs create mode 100644 crates/pecos-core/src/value.rs create mode 100644 python/pecos-rslib/src/gate_registry_bindings.rs diff --git a/Cargo.lock b/Cargo.lock index e17914ca3..c6f85b994 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -319,7 +319,7 @@ checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" name = "benchmarks" version = "0.1.1" dependencies = [ - "criterion 0.8.2", + "criterion", "num-complex 0.4.6", "pecos", "pecos-core", @@ -341,9 +341,9 @@ dependencies = [ [[package]] name = "bindgen" -version = "0.71.1" +version = "0.72.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3" +checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" dependencies = [ "bitflags", "cexpr", @@ -1026,32 +1026,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "criterion" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" -dependencies = [ - "anes", - "cast", - "ciborium", - "clap", - "criterion-plot 0.5.0", - "is-terminal", - "itertools 0.10.5", - "num-traits", - "once_cell", - "oorandom", - "plotters", - "rayon", - "regex", - "serde", - "serde_derive", - "serde_json", - "tinytemplate", - "walkdir", -] - [[package]] name = "criterion" version = "0.8.2" @@ -1063,7 +1037,7 @@ dependencies = [ "cast", "ciborium", "clap", - "criterion-plot 0.8.2", + "criterion-plot", "itertools 0.13.0", "num-traits", "oorandom", @@ -1077,16 +1051,6 @@ dependencies = [ "walkdir", ] -[[package]] -name = "criterion-plot" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" -dependencies = [ - "cast", - "itertools 0.10.5", -] - [[package]] name = "criterion-plot" version = "0.8.2" @@ -2576,32 +2540,12 @@ dependencies = [ "serde", ] -[[package]] -name = "is-terminal" -version = "0.4.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" -dependencies = [ - "hermit-abi", - "libc", - "windows-sys 0.61.2", -] - [[package]] name = "is_terminal_polyfill" version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" -[[package]] -name = "itertools" -version = "0.10.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" -dependencies = [ - "either", -] - [[package]] name = "itertools" version = "0.13.0" @@ -2684,9 +2628,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.90" +version = "0.3.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14dc6f6450b3f6d4ed5b16327f38fed626d375a886159ca555bd7822c0c3a5a6" +checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c" dependencies = [ "once_cell", "wasm-bindgen", @@ -2779,7 +2723,7 @@ checksum = "3d0b95e02c851351f877147b7deea7b1afb1df71b63aa5f8270716e0c5720616" dependencies = [ "bitflags", "libc", - "redox_syscall 0.7.2", + "redox_syscall 0.7.3", ] [[package]] @@ -3403,6 +3347,8 @@ dependencies = [ "rand 0.10.0", "rand_core 0.10.0", "rand_xoshiro 0.8.0", + "serde", + "serde_json", "smallvec", "thiserror 2.0.18", ] @@ -3422,7 +3368,6 @@ dependencies = [ name = "pecos-cuquantum" version = "0.1.1" dependencies = [ - "criterion 0.5.1", "env_logger", "fastrand", "log", @@ -4012,9 +3957,9 @@ dependencies = [ [[package]] name = "pin-project-lite" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" [[package]] name = "pin-utils" @@ -4546,9 +4491,9 @@ checksum = "77fbdd1602101dbbd6da38e7dd8d7bd47d864a23dd1b552d5ca3c20a8f41b2a3" [[package]] name = "range-alloc" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3d6831663a5098ea164f89cff59c6284e95f4e3c76ce9848d4529f5ccca9bde" +checksum = "ca45419789ae5a7899559e9512e58ca889e41f04f1f2445e9f4b290ceccd1d08" [[package]] name = "rapidhash" @@ -4614,9 +4559,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.7.2" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d94dd2f7cd932d4dc02cc8b2b50dfd38bd079a4e5d79198b99743d7fcf9a4b4" +checksum = "6ce70a74e890531977d37e532c34d45e9055d2409ed08ddba14529471ed0be16" dependencies = [ "bitflags", ] @@ -6049,9 +5994,9 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.113" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60722a937f594b7fde9adb894d7c092fc1bb6612897c46368d18e7a20208eff2" +checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e" dependencies = [ "cfg-if", "once_cell", @@ -6062,9 +6007,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.63" +version = "0.4.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a89f4650b770e4521aa6573724e2aed4704372151bd0de9d16a3bbabb87441a" +checksum = "e9c5522b3a28661442748e09d40924dfb9ca614b21c00d3fd135720e48b67db8" dependencies = [ "cfg-if", "futures-util", @@ -6076,9 +6021,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.113" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fac8c6395094b6b91c4af293f4c79371c163f9a6f56184d2c9a85f5a95f3950" +checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -6086,9 +6031,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.113" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab3fabce6159dc20728033842636887e4877688ae94382766e00b180abac9d60" +checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3" dependencies = [ "bumpalo", "proc-macro2", @@ -6099,9 +6044,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.113" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de0e091bdb824da87dc01d967388880d017a0a9bc4f3bdc0d86ee9f9336e3bb5" +checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16" dependencies = [ "unicode-ident", ] @@ -6393,9 +6338,9 @@ checksum = "323f4da9523e9a669e1eaf9c6e763892769b1d38c623913647bfdc1532fe4549" [[package]] name = "web-sys" -version = "0.3.90" +version = "0.3.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "705eceb4ce901230f8625bd1d665128056ccbe4b7408faa625eec1ba80f59a97" +checksum = "854ba17bb104abfb26ba36da9729addc7ce7f06f5c0f90f3c391f8461cca21f9" dependencies = [ "js-sys", "wasm-bindgen", @@ -7130,18 +7075,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.39" +version = "0.8.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db6d35d663eadb6c932438e763b262fe1a70987f9ae936e60158176d710cae4a" +checksum = "a789c6e490b576db9f7e6b6d661bcc9799f7c0ac8352f56ea20193b2681532e5" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.39" +version = "0.8.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517" +checksum = "f65c489a7071a749c849713807783f70672b28094011623e200cb86dcb835953" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index c5266ceee..cd9e39d68 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -51,6 +51,7 @@ wat = "1" ron = "0.12" tket = { version = "0.17", default-features = false } tket-qsystem = { version = "0.23", default-features = false } +bindgen = "0.72" cc = "1" cxx = "1" cxx-build = "1" diff --git a/crates/benchmarks/benches/benchmarks.rs b/crates/benchmarks/benches/benchmarks.rs index 5919aa7ab..97bdfed4c 100644 --- a/crates/benchmarks/benches/benchmarks.rs +++ b/crates/benchmarks/benches/benchmarks.rs @@ -18,6 +18,8 @@ mod modules { pub mod dem_sampler; pub mod dod_statevec; // TODO: pub mod hadamard_ops; + #[cfg(feature = "cuquantum")] + pub mod cuquantum; #[cfg(feature = "gpu-sims")] pub mod gpu_influence_sampler; pub mod measurement_sampling; @@ -34,6 +36,8 @@ mod modules { pub mod trig; } +#[cfg(feature = "cuquantum")] +use modules::cuquantum; #[cfg(feature = "gpu-sims")] use modules::gpu_influence_sampler; #[cfg(feature = "cppsparsesim")] @@ -47,6 +51,8 @@ use modules::{ fn all_benchmarks(c: &mut Criterion) { allocation_overhead::benchmarks(c); cpu_stabilizer_comparison::benchmarks(c); + #[cfg(feature = "cuquantum")] + cuquantum::benchmarks(c); dem_sampler::benchmarks(c); dod_statevec::benchmarks(c); #[cfg(feature = "gpu-sims")] diff --git a/crates/pecos-cuquantum/benches/cuquantum_benchmark.rs b/crates/benchmarks/benches/modules/cuquantum.rs similarity index 91% rename from crates/pecos-cuquantum/benches/cuquantum_benchmark.rs rename to crates/benchmarks/benches/modules/cuquantum.rs index c4a0ff7c1..e1cd8081b 100644 --- a/crates/pecos-cuquantum/benches/cuquantum_benchmark.rs +++ b/crates/benchmarks/benches/modules/cuquantum.rs @@ -1,17 +1,29 @@ -//! Benchmarks for pecos-cuquantum GPU simulators +// Copyright 2026 The PECOS Developers +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +// in compliance with the License.You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under +// the License. + +//! Benchmarks for pecos-cuquantum GPU simulators. //! -//! These benchmarks compare cuQuantum GPU simulation performance against -//! other backends (e.g., wgpu-based simulators). +//! Benchmarks cuQuantum state vector and stabilizer simulation performance. //! -//! Run with: `cargo bench -p pecos-cuquantum --features integration-tests` +//! Run with: `cargo bench -p benchmarks --features cuquantum` //! //! **Requires cuQuantum to be installed.** -use criterion::{BenchmarkId, Criterion, Throughput, black_box, criterion_group, criterion_main}; +use criterion::{BenchmarkId, Criterion, Throughput}; use pecos_core::Angle64; use pecos_cuquantum::{CuStabilizer, CuStateVec, QubitId, TryClone, is_cuquantum_available}; use pecos_qsim::{ArbitraryRotationGateable, CliffordGateable, QuantumSimulator}; use std::f64::consts::PI; +use std::hint::black_box; /// Benchmark state vector simulation for different qubit counts fn bench_statevec_gates(c: &mut Criterion) { @@ -368,7 +380,7 @@ fn bench_sampling(c: &mut Criterion) { group.finish(); } -/// Benchmark Clone and TryClone operations +/// Benchmark Clone and `TryClone` operations fn bench_clone(c: &mut Criterion) { if !is_cuquantum_available() { eprintln!("Skipping clone benchmarks: cuQuantum not available"); @@ -424,15 +436,13 @@ fn bench_clone(c: &mut Criterion) { group.finish(); } -criterion_group!( - benches, - bench_statevec_gates, - bench_stabilizer_gates, - bench_bell_state, - bench_surface_code_syndrome, - bench_rotation_gates, - bench_two_qubit_rotation_gates, - bench_sampling, - bench_clone, -); -criterion_main!(benches); +pub fn benchmarks(c: &mut Criterion) { + bench_statevec_gates(c); + bench_stabilizer_gates(c); + bench_bell_state(c); + bench_surface_code_syndrome(c); + bench_rotation_gates(c); + bench_two_qubit_rotation_gates(c); + bench_sampling(c); + bench_clone(c); +} diff --git a/crates/pecos-core/Cargo.toml b/crates/pecos-core/Cargo.toml index 1b55e0cfb..7861f1a7b 100644 --- a/crates/pecos-core/Cargo.toml +++ b/crates/pecos-core/Cargo.toml @@ -22,13 +22,19 @@ smallvec.workspace = true thiserror.workspace = true # Optional dependencies for error conversions anyhow = { workspace = true, optional = true } +# Optional serde support +serde = { workspace = true, optional = true } +serde_json = { workspace = true, optional = true } [dev-dependencies] rand_xoshiro.workspace = true +serde_json.workspace = true [features] default = [] anyhow = ["dep:anyhow"] +serde = ["dep:serde"] +json = ["serde", "dep:serde_json"] [lints] workspace = true diff --git a/crates/pecos-core/src/gate_registry.rs b/crates/pecos-core/src/gate_registry.rs new file mode 100644 index 000000000..aa50eda41 --- /dev/null +++ b/crates/pecos-core/src/gate_registry.rs @@ -0,0 +1,406 @@ +// Copyright 2026 The PECOS Developers +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +// in compliance with the License.You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under +// the License. + +//! Gate registration system for ahead-of-time custom gate definitions. +//! +//! This module provides a registry where users can define custom gates with +//! decompositions into base gates. Registered gates are decomposed at simulation +//! time, not at circuit construction time. + +use crate::gate_type::GateType; +use crate::value::Value; +use crate::{Angle64, QubitId}; +use std::collections::HashMap; + +/// A concrete decomposition step: (`gate_type`, qubits, angles, metadata). +pub type ConcreteStep = (GateType, Vec, Vec, HashMap); + +/// The signature of a gate: its quantum and angle arities. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct GateSignature { + pub quantum_arity: usize, + pub angle_arity: usize, +} + +/// Where a decomposition step gets its angle value. +#[derive(Debug, Clone, PartialEq)] +pub enum AngleSource { + /// Use the i-th angle from the parent gate's input angles. + Input(u8), + /// A fixed angle value. + Fixed(Angle64), + /// Negate the i-th input angle. + NegInput(u8), +} + +/// A single gate in a decomposition sequence. +/// Qubit indices are positional -- index 0 is the first qubit of the custom gate, etc. +#[derive(Debug, Clone, PartialEq)] +pub struct DecompStep { + pub gate_type: GateType, + pub qubit_indices: Vec, + pub angles: Vec, + pub metadata: HashMap, +} + +/// Definition of a registered custom gate. +#[derive(Debug, Clone, PartialEq)] +pub struct GateDefinition { + pub name: String, + pub quantum_arity: usize, + pub angle_arity: usize, + pub decomposition: Vec, +} + +/// Registry mapping gate names to definitions with decompositions. +#[derive(Debug, Clone, Default)] +pub struct GateRegistry { + gates: HashMap, +} + +impl GateRegistry { + #[must_use] + pub fn new() -> Self { + Self::default() + } + + pub fn register(&mut self, def: GateDefinition) { + self.gates.insert(def.name.clone(), def); + } + + #[must_use] + pub fn get(&self, name: &str) -> Option<&GateDefinition> { + self.gates.get(name) + } + + #[must_use] + pub fn contains(&self, name: &str) -> bool { + self.gates.contains_key(name) + } + + #[must_use] + pub fn len(&self) -> usize { + self.gates.len() + } + + #[must_use] + pub fn is_empty(&self) -> bool { + self.gates.is_empty() + } + + /// Extract signatures from all registered gates. + #[must_use] + pub fn signatures(&self) -> HashMap { + self.gates + .iter() + .map(|(name, def)| { + ( + name.clone(), + GateSignature { + quantum_arity: def.quantum_arity, + angle_arity: def.angle_arity, + }, + ) + }) + .collect() + } + + /// Expand a custom gate into concrete (`GateType`, qubits, angles, metadata) tuples. + /// Returns None if not registered or decomposition is empty. + #[must_use] + pub fn decompose( + &self, + name: &str, + qubits: &[QubitId], + input_angles: &[Angle64], + ) -> Option> { + let def = self.gates.get(name)?; + if def.decomposition.is_empty() { + return None; + } + let mut result = Vec::with_capacity(def.decomposition.len()); + for step in &def.decomposition { + let concrete_qubits: Vec = step + .qubit_indices + .iter() + .map(|&idx| qubits[idx as usize]) + .collect(); + let concrete_angles: Vec = step + .angles + .iter() + .map(|src| match src { + AngleSource::Input(i) => input_angles[*i as usize], + AngleSource::Fixed(a) => *a, + AngleSource::NegInput(i) => -input_angles[*i as usize], + }) + .collect(); + result.push(( + step.gate_type, + concrete_qubits, + concrete_angles, + step.metadata.clone(), + )); + } + Some(result) + } +} + +/// Builder for constructing gate definitions with a fluent API. +pub struct GateDefinitionBuilder { + name: String, + quantum_arity: usize, + angle_arity: usize, + decomposition: Vec, +} + +impl GateDefinitionBuilder { + #[must_use] + pub fn new(name: impl Into, quantum_arity: usize) -> Self { + Self { + name: name.into(), + quantum_arity, + angle_arity: 0, + decomposition: Vec::new(), + } + } + + #[must_use] + pub fn angle_arity(mut self, arity: usize) -> Self { + self.angle_arity = arity; + self + } + + #[must_use] + pub fn step(mut self, gate_type: GateType, qubit_indices: &[u8]) -> Self { + self.decomposition.push(DecompStep { + gate_type, + qubit_indices: qubit_indices.to_vec(), + angles: Vec::new(), + metadata: HashMap::new(), + }); + self + } + + #[must_use] + pub fn step_with_angles( + mut self, + gate_type: GateType, + qubit_indices: &[u8], + angles: &[AngleSource], + ) -> Self { + self.decomposition.push(DecompStep { + gate_type, + qubit_indices: qubit_indices.to_vec(), + angles: angles.to_vec(), + metadata: HashMap::new(), + }); + self + } + + #[must_use] + pub fn step_with_metadata( + mut self, + gate_type: GateType, + qubit_indices: &[u8], + angles: &[AngleSource], + metadata: HashMap, + ) -> Self { + self.decomposition.push(DecompStep { + gate_type, + qubit_indices: qubit_indices.to_vec(), + angles: angles.to_vec(), + metadata, + }); + self + } + + #[must_use] + pub fn build(self) -> GateDefinition { + GateDefinition { + name: self.name, + quantum_arity: self.quantum_arity, + angle_arity: self.angle_arity, + decomposition: self.decomposition, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_builder_round_trip() { + let def = GateDefinitionBuilder::new("MY_SWAP", 2) + .step(GateType::CX, &[0, 1]) + .step(GateType::CX, &[1, 0]) + .step(GateType::CX, &[0, 1]) + .build(); + + assert_eq!(def.name, "MY_SWAP"); + assert_eq!(def.quantum_arity, 2); + assert_eq!(def.angle_arity, 0); + assert_eq!(def.decomposition.len(), 3); + assert_eq!(def.decomposition[0].gate_type, GateType::CX); + assert_eq!(def.decomposition[0].qubit_indices, vec![0, 1]); + assert_eq!(def.decomposition[1].qubit_indices, vec![1, 0]); + } + + #[test] + fn test_decompose_positional_qubits() { + let mut registry = GateRegistry::new(); + let def = GateDefinitionBuilder::new("MY_SWAP", 2) + .step(GateType::CX, &[0, 1]) + .step(GateType::CX, &[1, 0]) + .step(GateType::CX, &[0, 1]) + .build(); + registry.register(def); + + let qubits = [QubitId::from(5usize), QubitId::from(10usize)]; + let result = registry.decompose("MY_SWAP", &qubits, &[]).unwrap(); + assert_eq!(result.len(), 3); + assert_eq!( + result[0].1, + vec![QubitId::from(5usize), QubitId::from(10usize)] + ); + assert_eq!( + result[1].1, + vec![QubitId::from(10usize), QubitId::from(5usize)] + ); + assert_eq!( + result[2].1, + vec![QubitId::from(5usize), QubitId::from(10usize)] + ); + assert!(result[0].3.is_empty()); + } + + #[test] + fn test_angle_source_resolution() { + let mut registry = GateRegistry::new(); + let fixed_angle = Angle64::from_turns(0.25); + let def = GateDefinitionBuilder::new("CRZ_LIKE", 2) + .angle_arity(1) + .step_with_angles(GateType::RZ, &[1], &[AngleSource::Input(0)]) + .step(GateType::CX, &[0, 1]) + .step_with_angles(GateType::RZ, &[1], &[AngleSource::NegInput(0)]) + .step(GateType::CX, &[0, 1]) + .step_with_angles(GateType::RZ, &[0], &[AngleSource::Fixed(fixed_angle)]) + .build(); + registry.register(def); + + let qubits = [QubitId::from(0usize), QubitId::from(1usize)]; + let input_angle = Angle64::from_turns(0.125); + let result = registry + .decompose("CRZ_LIKE", &qubits, &[input_angle]) + .unwrap(); + + assert_eq!(result.len(), 5); + assert_eq!(result[0].2, vec![input_angle]); + assert!(result[1].2.is_empty()); + assert_eq!(result[2].2, vec![-input_angle]); + assert_eq!(result[4].2, vec![fixed_angle]); + } + + #[test] + fn test_step_with_metadata() { + let mut registry = GateRegistry::new(); + let mut meta = HashMap::new(); + meta.insert("duration".to_string(), Value::Float(100.0)); + meta.insert("label".to_string(), Value::String("fast".to_string())); + meta.insert("count".to_string(), Value::Int(3)); + meta.insert("noisy".to_string(), Value::Bool(true)); + + let def = GateDefinitionBuilder::new("ANNOTATED", 1) + .step_with_metadata(GateType::H, &[0], &[], meta) + .build(); + registry.register(def); + + let result = registry + .decompose("ANNOTATED", &[QubitId::from(0usize)], &[]) + .unwrap(); + assert_eq!(result.len(), 1); + assert_eq!(result[0].3.get("duration"), Some(&Value::Float(100.0))); + assert_eq!( + result[0].3.get("label"), + Some(&Value::String("fast".to_string())) + ); + assert_eq!(result[0].3.get("count"), Some(&Value::Int(3))); + assert_eq!(result[0].3.get("noisy"), Some(&Value::Bool(true))); + } + + #[test] + fn test_empty_decomposition_returns_none() { + let mut registry = GateRegistry::new(); + let def = GateDefinitionBuilder::new("EMPTY", 1).build(); + registry.register(def); + + let result = registry.decompose("EMPTY", &[QubitId::from(0usize)], &[]); + assert!(result.is_none()); + } + + #[test] + fn test_unregistered_gate_returns_none() { + let registry = GateRegistry::new(); + let result = registry.decompose("NONEXISTENT", &[QubitId::from(0usize)], &[]); + assert!(result.is_none()); + } + + #[test] + fn test_signatures() { + let mut registry = GateRegistry::new(); + let def1 = GateDefinitionBuilder::new("MY_SWAP", 2) + .step(GateType::CX, &[0, 1]) + .build(); + let def2 = GateDefinitionBuilder::new("MY_RZ", 1) + .angle_arity(1) + .step_with_angles(GateType::RZ, &[0], &[AngleSource::Input(0)]) + .build(); + registry.register(def1); + registry.register(def2); + + let sigs = registry.signatures(); + assert_eq!(sigs.len(), 2); + assert_eq!( + sigs.get("MY_SWAP"), + Some(&GateSignature { + quantum_arity: 2, + angle_arity: 0 + }) + ); + assert_eq!( + sigs.get("MY_RZ"), + Some(&GateSignature { + quantum_arity: 1, + angle_arity: 1 + }) + ); + } + + #[test] + fn test_registry_operations() { + let mut registry = GateRegistry::new(); + assert!(registry.is_empty()); + assert_eq!(registry.len(), 0); + assert!(!registry.contains("SWAP")); + + let def = GateDefinitionBuilder::new("SWAP", 2) + .step(GateType::CX, &[0, 1]) + .build(); + registry.register(def); + + assert!(!registry.is_empty()); + assert_eq!(registry.len(), 1); + assert!(registry.contains("SWAP")); + assert!(registry.get("SWAP").is_some()); + assert_eq!(registry.get("SWAP").unwrap().quantum_arity, 2); + } +} diff --git a/crates/pecos-core/src/gate_type.rs b/crates/pecos-core/src/gate_type.rs index 0348f51d3..f42e02d57 100644 --- a/crates/pecos-core/src/gate_type.rs +++ b/crates/pecos-core/src/gate_type.rs @@ -101,6 +101,8 @@ pub enum GateType { Idle = 200, MeasCrosstalkGlobalPayload = 218, MeasCrosstalkLocalPayload = 219, + /// Custom/unrecognized gate type, with actual name stored in metadata + Custom = 255, } impl From for GateType { @@ -145,6 +147,7 @@ impl From for GateType { 200 => GateType::Idle, 218 => GateType::MeasCrosstalkGlobalPayload, 219 => GateType::MeasCrosstalkLocalPayload, + 255 => GateType::Custom, _ => panic!("Invalid gate type ID: {value}"), } } @@ -188,7 +191,8 @@ impl GateType { | GateType::MeasCrosstalkLocalPayload | GateType::Prep | GateType::QAlloc - | GateType::QFree => 0, + | GateType::QFree + | GateType::Custom => 0, // Gates with one parameter GateType::RX @@ -244,7 +248,8 @@ impl GateType { | GateType::QFree | GateType::Idle | GateType::MeasCrosstalkGlobalPayload - | GateType::MeasCrosstalkLocalPayload => 1, + | GateType::MeasCrosstalkLocalPayload + | GateType::Custom => 1, // Two-qubit gates GateType::CX @@ -356,6 +361,7 @@ impl fmt::Display for GateType { GateType::Idle => write!(f, "Idle"), GateType::MeasCrosstalkGlobalPayload => write!(f, "MeasCrosstalkGlobalPayload"), GateType::MeasCrosstalkLocalPayload => write!(f, "MeasCrosstalkLocalPayload"), + GateType::Custom => write!(f, "Custom"), } } } @@ -384,6 +390,7 @@ mod tests { assert_eq!(GateType::Idle as u8, 200); assert_eq!(GateType::MeasCrosstalkGlobalPayload as u8, 218); assert_eq!(GateType::MeasCrosstalkLocalPayload as u8, 219); + assert_eq!(GateType::Custom as u8, 255); assert_eq!(GateType::from(0u8), GateType::I); assert_eq!(GateType::from(1u8), GateType::X); @@ -403,6 +410,7 @@ mod tests { assert_eq!(GateType::from(200u8), GateType::Idle); assert_eq!(GateType::from(218u8), GateType::MeasCrosstalkGlobalPayload); assert_eq!(GateType::from(219u8), GateType::MeasCrosstalkLocalPayload); + assert_eq!(GateType::from(255u8), GateType::Custom); } #[test] diff --git a/crates/pecos-core/src/gates.rs b/crates/pecos-core/src/gates.rs index ff582cbcd..95eb66fdc 100644 --- a/crates/pecos-core/src/gates.rs +++ b/crates/pecos-core/src/gates.rs @@ -110,6 +110,12 @@ impl Gate { .collect() } + /// Create a Custom gate on the given qubits + #[must_use] + pub fn custom(qubits: impl Into) -> Self { + Self::simple(GateType::Custom, qubits) + } + /// Create Identity gate on multiple qubits #[must_use] pub fn i(qubits: &[impl Into + Copy]) -> Self { diff --git a/crates/pecos-core/src/lib.rs b/crates/pecos-core/src/lib.rs index 0c5e47a4c..a8604af38 100644 --- a/crates/pecos-core/src/lib.rs +++ b/crates/pecos-core/src/lib.rs @@ -19,6 +19,7 @@ pub mod clifford_rep; pub mod duration; pub mod element; pub mod errors; +pub mod gate_registry; pub mod gate_type; pub mod gates; pub mod index_set; @@ -30,6 +31,7 @@ pub mod qubit_id; pub mod rng; pub mod sets; pub mod sorted_vec_set; +pub mod value; pub use angle::{Angle, Angle8, Angle16, Angle32, Angle64, Angle128, LossyInto}; pub use bit::{Bit, Bits}; @@ -53,6 +55,10 @@ pub use rng::{choose_weighted, coin_flip, gen_bools}; // Random utilities struct for improved RNG API pub use rng::RandomUtils; +pub use gate_registry::{ + AngleSource, ConcreteStep, DecompStep, GateDefinition, GateDefinitionBuilder, GateRegistry, + GateSignature, +}; pub use gates::{Gate, GateAngles, GateParams, GateQubits}; pub use pauli::pauli_bitmap::PauliBitmap; pub use pauli::pauli_sparse::PauliSparse; @@ -60,6 +66,7 @@ pub use pauli::pauli_string::{ParsePauliStringError, PauliString}; pub use pauli::{Pauli, PauliOperator}; pub use phase::Phase; pub use rng::choices::Choices; +pub use value::Value; // Operator algebra pub use operator::{I, Is, Operator, X, Xs, Y, Ys, Z, Zs}; diff --git a/crates/pecos-core/src/value.rs b/crates/pecos-core/src/value.rs new file mode 100644 index 000000000..c59a39915 --- /dev/null +++ b/crates/pecos-core/src/value.rs @@ -0,0 +1,249 @@ +// Copyright 2026 The PECOS Developers +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +// in compliance with the License.You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under +// the License. + +//! A general-purpose typed value for structured data. +//! +//! `Value` provides a canonical enum for carrying heterogeneous data +//! (strings, numbers, booleans, and nested structures) across the PECOS crate +//! ecosystem. Optional serde/JSON support is available behind feature flags. + +use std::collections::HashMap; +use std::fmt; + +/// A general-purpose typed value for structured data. +#[derive(Debug, Clone, PartialEq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum Value { + String(String), + Int(i64), + Float(f64), + Bool(bool), + List(Vec), + Dict(HashMap), +} + +impl fmt::Display for Value { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Value::String(s) => write!(f, "\"{s}\""), + Value::Int(i) => write!(f, "{i}"), + Value::Float(v) => write!(f, "{v}"), + Value::Bool(b) => write!(f, "{b}"), + Value::List(items) => { + write!(f, "[")?; + for (i, item) in items.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{item}")?; + } + write!(f, "]") + } + Value::Dict(map) => { + write!(f, "{{")?; + for (i, (k, v)) in map.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "\"{k}\": {v}")?; + } + write!(f, "}}") + } + } + } +} + +impl From for Value { + fn from(s: String) -> Self { + Value::String(s) + } +} + +impl From<&str> for Value { + fn from(s: &str) -> Self { + Value::String(s.to_string()) + } +} + +impl From for Value { + fn from(i: i64) -> Self { + Value::Int(i) + } +} + +impl From for Value { + fn from(f: f64) -> Self { + Value::Float(f) + } +} + +impl From for Value { + fn from(b: bool) -> Self { + Value::Bool(b) + } +} + +impl From> for Value { + fn from(v: Vec) -> Self { + Value::List(v) + } +} + +impl From> for Value { + fn from(m: HashMap) -> Self { + Value::Dict(m) + } +} + +#[cfg(feature = "json")] +impl From for Value { + fn from(json: serde_json::Value) -> Self { + match json { + serde_json::Value::Null => Value::String(String::new()), + serde_json::Value::Bool(b) => Value::Bool(b), + serde_json::Value::Number(n) => { + if let Some(i) = n.as_i64() { + Value::Int(i) + } else { + Value::Float(n.as_f64().unwrap_or(0.0)) + } + } + serde_json::Value::String(s) => Value::String(s), + serde_json::Value::Array(arr) => { + Value::List(arr.into_iter().map(Value::from).collect()) + } + serde_json::Value::Object(obj) => { + Value::Dict(obj.into_iter().map(|(k, v)| (k, Value::from(v))).collect()) + } + } + } +} + +#[cfg(feature = "json")] +impl From for serde_json::Value { + fn from(val: Value) -> Self { + match val { + Value::String(s) => serde_json::Value::String(s), + Value::Int(i) => serde_json::Value::Number(i.into()), + Value::Float(f) => serde_json::Number::from_f64(f) + .map_or(serde_json::Value::Null, serde_json::Value::Number), + Value::Bool(b) => serde_json::Value::Bool(b), + Value::List(items) => { + serde_json::Value::Array(items.into_iter().map(serde_json::Value::from).collect()) + } + Value::Dict(map) => serde_json::Value::Object( + map.into_iter() + .map(|(k, v)| (k, serde_json::Value::from(v))) + .collect(), + ), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_display() { + assert_eq!(Value::String("hello".into()).to_string(), "\"hello\""); + assert_eq!(Value::Int(42).to_string(), "42"); + assert_eq!(Value::Float(2.78).to_string(), "2.78"); + assert_eq!(Value::Bool(true).to_string(), "true"); + assert_eq!( + Value::List(vec![Value::Int(1), Value::Int(2)]).to_string(), + "[1, 2]" + ); + } + + #[test] + fn test_from_conversions() { + assert_eq!(Value::from("hello"), Value::String("hello".into())); + assert_eq!(Value::from(42i64), Value::Int(42)); + assert_eq!(Value::from(2.78f64), Value::Float(2.78)); + assert_eq!(Value::from(true), Value::Bool(true)); + } + + #[test] + fn test_nested_structures() { + let mut inner = HashMap::new(); + inner.insert("x".to_string(), Value::Int(1)); + let val = Value::Dict(inner); + + let list = Value::List(vec![val.clone(), Value::String("test".into())]); + if let Value::List(items) = &list { + assert_eq!(items.len(), 2); + if let Value::Dict(d) = &items[0] { + assert_eq!(d.get("x"), Some(&Value::Int(1))); + } else { + panic!("Expected Dict"); + } + } else { + panic!("Expected List"); + } + } + + #[cfg(feature = "json")] + #[test] + fn test_json_round_trip() { + let val = Value::Dict(HashMap::from([ + ("name".to_string(), Value::String("test".into())), + ("count".to_string(), Value::Int(42)), + ("rate".to_string(), Value::Float(2.78)), + ("active".to_string(), Value::Bool(true)), + ( + "tags".to_string(), + Value::List(vec![Value::String("a".into()), Value::String("b".into())]), + ), + ])); + + let json: serde_json::Value = val.clone().into(); + let back: Value = json.into(); + + // Int and Float round-trip correctly + assert_eq!(back.clone(), val); + + // Check JSON structure + let json2: serde_json::Value = back.into(); + assert_eq!(json2["name"], "test"); + assert_eq!(json2["count"], 42); + assert_eq!(json2["active"], true); + } + + #[cfg(feature = "json")] + #[test] + fn test_json_null_becomes_empty_string() { + let json = serde_json::Value::Null; + let val: Value = json.into(); + assert_eq!(val, Value::String(String::new())); + } + + #[cfg(feature = "json")] + #[test] + fn test_json_nested_objects() { + let json: serde_json::Value = serde_json::json!({ + "outer": { + "inner": [1, 2, 3] + } + }); + let val: Value = json.into(); + if let Value::Dict(map) = &val + && let Some(Value::Dict(inner_map)) = map.get("outer") + && let Some(Value::List(items)) = inner_map.get("inner") + { + assert_eq!(items.len(), 3); + assert_eq!(items[0], Value::Int(1)); + return; + } + panic!("Unexpected structure: {val:?}"); + } +} diff --git a/crates/pecos-cuquantum-sys/Cargo.toml b/crates/pecos-cuquantum-sys/Cargo.toml index cc89978a0..b742a6758 100644 --- a/crates/pecos-cuquantum-sys/Cargo.toml +++ b/crates/pecos-cuquantum-sys/Cargo.toml @@ -14,7 +14,7 @@ readme = "README.md" [dependencies] [build-dependencies] -bindgen = "0.71" +bindgen.workspace = true pecos-build = { path = "../pecos-build" } log.workspace = true env_logger.workspace = true diff --git a/crates/pecos-cuquantum/Cargo.toml b/crates/pecos-cuquantum/Cargo.toml index 875502661..e1a328a13 100644 --- a/crates/pecos-cuquantum/Cargo.toml +++ b/crates/pecos-cuquantum/Cargo.toml @@ -24,10 +24,6 @@ pecos-build = { path = "../pecos-build" } log.workspace = true env_logger.workspace = true -[dev-dependencies] -# For testing, we don't actually need CUDA - tests use stub mode -criterion = { version = "0.5", features = ["html_reports"] } - [features] default = [] # Enable this feature when cuQuantum is installed and you want to run integration tests @@ -36,8 +32,3 @@ integration-tests = [] [package.metadata.docs.rs] # Don't try to build docs on docs.rs (no CUDA/cuQuantum available) targets = [] - -[[bench]] -name = "cuquantum_benchmark" -harness = false -required-features = ["integration-tests"] diff --git a/crates/pecos-engines/src/noise/biased_depolarizing.rs b/crates/pecos-engines/src/noise/biased_depolarizing.rs index 97988241f..30943f26e 100644 --- a/crates/pecos-engines/src/noise/biased_depolarizing.rs +++ b/crates/pecos-engines/src/noise/biased_depolarizing.rs @@ -213,7 +213,8 @@ impl BiasedDepolarizingNoiseModel { | GateType::Idle | GateType::MeasCrosstalkLocalPayload | GateType::MeasCrosstalkGlobalPayload - | GateType::QFree => {} + | GateType::QFree + | GateType::Custom => {} } } diff --git a/crates/pecos-engines/src/noise/depolarizing.rs b/crates/pecos-engines/src/noise/depolarizing.rs index eded2c7b2..6d8b6539a 100644 --- a/crates/pecos-engines/src/noise/depolarizing.rs +++ b/crates/pecos-engines/src/noise/depolarizing.rs @@ -218,7 +218,8 @@ impl DepolarizingNoiseModel { | GateType::Idle | GateType::MeasCrosstalkLocalPayload | GateType::MeasCrosstalkGlobalPayload - | GateType::QFree => { + | GateType::QFree + | GateType::Custom => { // Just pass through with no added noise // QFree has no physical operation to apply noise to } diff --git a/crates/pecos-engines/src/noise/utils.rs b/crates/pecos-engines/src/noise/utils.rs index 5ff0dd208..e6a97af52 100644 --- a/crates/pecos-engines/src/noise/utils.rs +++ b/crates/pecos-engines/src/noise/utils.rs @@ -250,6 +250,9 @@ impl NoiseUtils { builder.add_idle(gate.params[0], &qubits_usize); } + // Custom is a placeholder (actual gate name is in metadata); skip it + GateType::Custom => {} + // Invalid cases (not enough qubits, missing parameters, etc.) _ => panic!( "Invalid gate type {:?} or insufficient parameters/qubits", diff --git a/crates/pecos-engines/src/quantum.rs b/crates/pecos-engines/src/quantum.rs index a305827dc..ba8ac761b 100644 --- a/crates/pecos-engines/src/quantum.rs +++ b/crates/pecos-engines/src/quantum.rs @@ -496,10 +496,12 @@ where | GateType::Idle | GateType::MeasCrosstalkLocalPayload | GateType::MeasCrosstalkGlobalPayload - | GateType::QFree => { + | GateType::QFree + | GateType::Custom => { // Just let the system naturally evolve for the specified duration // No active operation needed in the simulator // QFree is a no-op for state vector simulation (qubit tracking is handled elsewhere) + // Custom is a no-op placeholder (actual gate name is in metadata) } GateType::SY | GateType::SYdg | GateType::RXX | GateType::RYY => { return Err(quantum_error(format!( diff --git a/crates/pecos-experimental/src/hugr_executor.rs b/crates/pecos-experimental/src/hugr_executor.rs index b1f8564ba..3db8bc720 100644 --- a/crates/pecos-experimental/src/hugr_executor.rs +++ b/crates/pecos-experimental/src/hugr_executor.rs @@ -308,7 +308,8 @@ where | GateType::SWAP | GateType::CRZ | GateType::CH - | GateType::CCX => { + | GateType::CCX + | GateType::Custom => { return Err(HugrExecutionError::UnsupportedGate { gate_type: gate.gate_type, gate_index: gate_idx, diff --git a/crates/pecos-qasm/src/engine.rs b/crates/pecos-qasm/src/engine.rs index 0925dce9e..2d62d3ff3 100644 --- a/crates/pecos-qasm/src/engine.rs +++ b/crates/pecos-qasm/src/engine.rs @@ -627,7 +627,8 @@ impl QASMEngine { | GateType::Idle | GateType::MeasCrosstalkLocalPayload | GateType::MeasCrosstalkGlobalPayload - | GateType::QFree => Ok(()), // No-op gates (QFree is just a marker) + | GateType::QFree + | GateType::Custom => Ok(()), // No-op gates (QFree is just a marker, Custom is a placeholder) GateType::X | GateType::Z | GateType::Y diff --git a/crates/pecos-quantum/src/lib.rs b/crates/pecos-quantum/src/lib.rs index 46b734f65..2f1058619 100644 --- a/crates/pecos-quantum/src/lib.rs +++ b/crates/pecos-quantum/src/lib.rs @@ -77,7 +77,8 @@ pub use dag_circuit::{ Attribute, DagCircuit, DagTraversalIndex, MeasureHandle, PrepHandle, TraversalWorkBuffers, }; pub use tick_circuit::{ - QubitConflictError, Tick, TickCircuit, TickHandle, TickMeasureHandle, TickPrepHandle, + CustomGateError, GateSignatureMismatchError, QubitConflictError, Tick, TickCircuit, TickHandle, + TickMeasureHandle, TickPrepHandle, }; pub use tick_circuit_soa::{ CircuitIndexes, GateBatch, GateId, GateStorage, MetadataStorage, TickBatches, TickCircuitSoA, diff --git a/crates/pecos-quantum/src/tick_circuit.rs b/crates/pecos-quantum/src/tick_circuit.rs index 4806f8f82..0e655ace7 100644 --- a/crates/pecos-quantum/src/tick_circuit.rs +++ b/crates/pecos-quantum/src/tick_circuit.rs @@ -61,8 +61,8 @@ //! ``` use pecos_core::gate_type::GateType; -use pecos_core::{Angle64, Gate, GateQubits, Nanoseconds, QubitId}; -use std::collections::{BTreeMap, BTreeSet}; +use pecos_core::{Angle64, Gate, GateQubits, GateSignature, Nanoseconds, QubitId}; +use std::collections::{BTreeMap, BTreeSet, HashMap}; use crate::Attribute; use crate::dag_circuit::DagCircuit; @@ -103,6 +103,62 @@ impl fmt::Display for QubitConflictError { impl std::error::Error for QubitConflictError {} +/// Error when a custom gate is used with a different signature than previously established. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct GateSignatureMismatchError { + pub name: String, + pub expected_quantum_arity: usize, + pub actual_quantum_arity: usize, + pub expected_angle_arity: usize, + pub actual_angle_arity: usize, +} + +impl fmt::Display for GateSignatureMismatchError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "Gate '{}' signature mismatch: expected ({} qubits, {} angles), got ({} qubits, {} angles)", + self.name, + self.expected_quantum_arity, + self.expected_angle_arity, + self.actual_quantum_arity, + self.actual_angle_arity, + ) + } +} + +impl std::error::Error for GateSignatureMismatchError {} + +/// Error when adding a custom gate to a tick. +#[derive(Debug, Clone)] +pub enum CustomGateError { + SignatureMismatch(GateSignatureMismatchError), + QubitConflict(QubitConflictError), +} + +impl fmt::Display for CustomGateError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::SignatureMismatch(e) => write!(f, "{e}"), + Self::QubitConflict(e) => write!(f, "{e}"), + } + } +} + +impl std::error::Error for CustomGateError {} + +impl From for CustomGateError { + fn from(e: GateSignatureMismatchError) -> Self { + Self::SignatureMismatch(e) + } +} + +impl From for CustomGateError { + fn from(e: QubitConflictError) -> Self { + Self::QubitConflict(e) + } +} + /// A single time slice containing gates that execute in parallel. #[derive(Debug, Clone, Default)] pub struct Tick { @@ -372,6 +428,8 @@ pub struct TickCircuit { next_tick: usize, /// Circuit-level metadata. circuit_attrs: BTreeMap, + /// Gate signatures for custom gate validation (JIT + AOT). + gate_signatures: HashMap, } /// Handle to a specific tick for adding gates. @@ -452,17 +510,14 @@ impl TickCircuit { ticks: Vec::new(), next_tick: 0, circuit_attrs: BTreeMap::new(), + gate_signatures: HashMap::new(), } } - /// Get the number of ticks (excluding trailing empty ticks). + /// Get the number of ticks in the circuit. #[must_use] pub fn num_ticks(&self) -> usize { - let mut count = self.ticks.len(); - while count > 0 && self.ticks[count - 1].is_empty() { - count -= 1; - } - count + self.ticks.len() } /// Get the total number of gates across all ticks. @@ -593,6 +648,7 @@ impl TickCircuit { self.ticks.clear(); self.next_tick = 0; self.circuit_attrs.clear(); + self.gate_signatures.clear(); } /// Reserve empty ticks in advance. @@ -731,6 +787,57 @@ impl TickCircuit { .map(|tick| tick.discard(&qubit_ids)) } + // ========================================================================= + // Gate signature validation + // ========================================================================= + + /// Import gate signatures in bulk (e.g., from a `GateRegistry`). + pub fn import_signatures(&mut self, sigs: &HashMap) { + self.gate_signatures + .extend(sigs.iter().map(|(name, sig)| (name.clone(), sig.clone()))); + } + + /// Get read access to the gate signatures. + #[must_use] + pub fn gate_signatures(&self) -> &HashMap { + &self.gate_signatures + } + + /// Validate a custom gate against its previously established signature, + /// or register it if this is the first use. + /// + /// # Errors + /// + /// Returns `GateSignatureMismatchError` if the gate has been seen before + /// with a different quantum or angle arity. + pub fn validate_or_register_gate( + &mut self, + name: &str, + quantum_arity: usize, + angle_arity: usize, + ) -> Result<(), GateSignatureMismatchError> { + if let Some(existing) = self.gate_signatures.get(name) { + if existing.quantum_arity != quantum_arity || existing.angle_arity != angle_arity { + return Err(GateSignatureMismatchError { + name: name.to_string(), + expected_quantum_arity: existing.quantum_arity, + actual_quantum_arity: quantum_arity, + expected_angle_arity: existing.angle_arity, + actual_angle_arity: angle_arity, + }); + } + } else { + self.gate_signatures.insert( + name.to_string(), + GateSignature { + quantum_arity, + angle_arity, + }, + ); + } + Ok(()) + } + // ========================================================================= // Iteration helpers // ========================================================================= @@ -1442,6 +1549,52 @@ impl<'a> TickHandle<'a> { qubits.iter().map(|&q| q.into()).collect::(), )) } + + // ========================================================================= + // Custom gates with signature validation + // ========================================================================= + + /// Add a custom gate with signature validation. + /// + /// On first use, the gate name's signature (quantum arity, angle arity) + /// is recorded. Subsequent uses are validated against this signature. + /// + /// The `_symbol` metadata is automatically set to the gate name. + /// + /// # Errors + /// + /// Returns `CustomGateError::SignatureMismatch` if the arity does not match + /// a previous use, or `CustomGateError::QubitConflict` if a qubit is already + /// in use in this tick. + pub fn custom_gate( + &mut self, + name: &str, + qubits: &[usize], + angles: &[Angle64], + ) -> Result<&mut Self, CustomGateError> { + self.circuit + .validate_or_register_gate(name, qubits.len(), angles.len())?; + + let qubit_ids: GateQubits = qubits.iter().map(|&q| QubitId::from(q)).collect(); + let gate = Gate::new(GateType::Custom, angles.to_vec(), vec![], qubit_ids); + + match self.circuit.ticks[self.tick_idx].try_add_gate(gate) { + Ok(idx) => { + self.last_gate_idx = Some(idx); + // Auto-store _symbol metadata + self.circuit.ticks[self.tick_idx].set_gate_attr( + idx, + "_symbol", + Attribute::String(name.to_string()), + ); + Ok(self) + } + Err(mut err) => { + err.tick_idx = Some(self.tick_idx); + Err(CustomGateError::QubitConflict(err)) + } + } + } } // ============================================================================ @@ -2361,4 +2514,140 @@ mod tests { let removed = tc.discard(&[0], 5); assert_eq!(removed, None); } + + // ========================================================================= + // Gate signature validation tests + // ========================================================================= + + #[test] + fn test_custom_gate_jit_registration() { + let mut tc = TickCircuit::new(); + tc.tick() + .custom_gate("MY_GATE", &[0, 1], &[]) + .expect("first use should succeed"); + + assert!(tc.gate_signatures().contains_key("MY_GATE")); + let sig = &tc.gate_signatures()["MY_GATE"]; + assert_eq!(sig.quantum_arity, 2); + assert_eq!(sig.angle_arity, 0); + } + + #[test] + fn test_custom_gate_consistent_use_ok() { + let mut tc = TickCircuit::new(); + tc.tick() + .custom_gate("MY_GATE", &[0, 1], &[]) + .expect("first use"); + tc.tick() + .custom_gate("MY_GATE", &[2, 3], &[]) + .expect("consistent use should succeed"); + } + + #[test] + fn test_custom_gate_mismatch_quantum_arity() { + let mut tc = TickCircuit::new(); + tc.tick() + .custom_gate("MY_GATE", &[0, 1], &[]) + .expect("first use"); + let mut handle = tc.tick(); + let result = handle.custom_gate("MY_GATE", &[0, 1, 2], &[]); + if let Err(CustomGateError::SignatureMismatch(e)) = result { + assert_eq!(e.expected_quantum_arity, 2); + assert_eq!(e.actual_quantum_arity, 3); + } else { + panic!("expected SignatureMismatch error"); + } + } + + #[test] + fn test_custom_gate_mismatch_angle_arity() { + let mut tc = TickCircuit::new(); + let angle = Angle64::from_radians(1.0); + tc.tick() + .custom_gate("PARAM_GATE", &[0], &[angle]) + .expect("first use"); + let mut handle = tc.tick(); + let result = handle.custom_gate("PARAM_GATE", &[0], &[]); + if let Err(CustomGateError::SignatureMismatch(e)) = result { + assert_eq!(e.expected_angle_arity, 1); + assert_eq!(e.actual_angle_arity, 0); + } else { + panic!("expected SignatureMismatch error"); + } + } + + #[test] + fn test_custom_gate_stores_symbol_metadata() { + let mut tc = TickCircuit::new(); + tc.tick() + .custom_gate("FOOBAR", &[0], &[]) + .expect("should succeed"); + + let tick = tc.get_tick(0).unwrap(); + let symbol = tick.get_gate_attr(0, "_symbol"); + assert_eq!(symbol, Some(&Attribute::String("FOOBAR".to_string()))); + } + + #[test] + fn test_custom_gate_with_angles() { + let mut tc = TickCircuit::new(); + let a1 = Angle64::from_radians(0.5); + let a2 = Angle64::from_radians(1.0); + tc.tick() + .custom_gate("PARAM2", &[0], &[a1, a2]) + .expect("should succeed"); + + let tick = tc.get_tick(0).unwrap(); + let gate = &tick.gates()[0]; + assert_eq!(gate.gate_type, GateType::Custom); + assert_eq!(gate.angles.len(), 2); + assert_eq!(gate.angles[0], a1); + assert_eq!(gate.angles[1], a2); + } + + #[test] + fn test_custom_gate_qubit_conflict() { + let mut tc = TickCircuit::new(); + let mut handle = tc.tick(); + handle.h(&[0]); + let result = handle.custom_gate("MY_GATE", &[0], &[]); + assert!(matches!(result, Err(CustomGateError::QubitConflict(_)))); + } + + #[test] + fn test_import_signatures() { + let mut tc = TickCircuit::new(); + let mut sigs = HashMap::new(); + sigs.insert( + "AOT_GATE".to_string(), + GateSignature { + quantum_arity: 2, + angle_arity: 1, + }, + ); + tc.import_signatures(&sigs); + + // Now using AOT_GATE with correct arity succeeds + let angle = Angle64::from_radians(0.5); + tc.tick() + .custom_gate("AOT_GATE", &[0, 1], &[angle]) + .expect("correct arity"); + + // Wrong arity fails + let mut handle = tc.tick(); + let result = handle.custom_gate("AOT_GATE", &[0], &[angle]); + assert!(matches!(result, Err(CustomGateError::SignatureMismatch(_)))); + } + + #[test] + fn test_reset_clears_signatures() { + let mut tc = TickCircuit::new(); + tc.tick() + .custom_gate("MY_GATE", &[0, 1], &[]) + .expect("first use"); + assert!(!tc.gate_signatures().is_empty()); + + tc.reset(); + assert!(tc.gate_signatures().is_empty()); + } } diff --git a/crates/pecos-quest/src/quantum_engine.rs b/crates/pecos-quest/src/quantum_engine.rs index 9b110d3e0..a1e4d9938 100644 --- a/crates/pecos-quest/src/quantum_engine.rs +++ b/crates/pecos-quest/src/quantum_engine.rs @@ -192,7 +192,8 @@ impl Engine for QuestStateVecEngine { | GateType::Idle | GateType::MeasCrosstalkLocalPayload | GateType::MeasCrosstalkGlobalPayload - | GateType::QFree => { + | GateType::QFree + | GateType::Custom => { // No operation needed (QFree is just a marker for qubit lifecycle) } GateType::U => { @@ -412,7 +413,8 @@ impl Engine for QuestDensityMatrixEngine { | GateType::Idle | GateType::MeasCrosstalkLocalPayload | GateType::MeasCrosstalkGlobalPayload - | GateType::QFree => { + | GateType::QFree + | GateType::Custom => { // No operation needed (QFree is just a marker for qubit lifecycle) } GateType::U => { @@ -1157,10 +1159,11 @@ impl Engine for QuestCudaStateVecEngine { } GateType::I | GateType::Idle + | GateType::Custom | GateType::MeasCrosstalkLocalPayload | GateType::MeasCrosstalkGlobalPayload | GateType::QFree => { - // No operation needed (QFree is just a marker for qubit lifecycle) + // No operation needed (Custom is a placeholder whose actual gate name is in metadata) } GateType::SY | GateType::SYdg | GateType::RXX | GateType::RYY => { return Err(PecosError::Processing(format!( diff --git a/crates/pecos/src/lib.rs b/crates/pecos/src/lib.rs index 35946ac8f..0ff590510 100644 --- a/crates/pecos/src/lib.rs +++ b/crates/pecos/src/lib.rs @@ -191,8 +191,8 @@ pub mod engines { pub mod quantum { // Circuit representation from pecos-quantum pub use pecos_quantum::{ - Attribute, Circuit, CircuitMut, DagCircuit, DagWouldCycleError, Gate, GateHandle, GateType, - GateView, QubitId, Tick, TickCircuit, + Attribute, Circuit, CircuitMut, CustomGateError, DagCircuit, DagWouldCycleError, Gate, + GateHandle, GateType, GateView, QubitId, Tick, TickCircuit, }; // HUGR conversion (requires hugr feature) diff --git a/python/pecos-rslib/src/dag_circuit_bindings.rs b/python/pecos-rslib/src/dag_circuit_bindings.rs index eb74f1322..6e92e6ec9 100644 --- a/python/pecos-rslib/src/dag_circuit_bindings.rs +++ b/python/pecos-rslib/src/dag_circuit_bindings.rs @@ -23,10 +23,12 @@ //! from the pecos-quantum crate, as well as HUGR conversion utilities. use crate::dtypes::AngleParam; -use pecos::core::{Angle64, Nanoseconds, TimeUnits}; +use crate::gate_registry_bindings::PyGateRegistry; +use pecos::core::{Angle64, GateQubits, GateSignature, Nanoseconds, TimeUnits}; use pecos::quantum::{Attribute, DagCircuit, Gate, GateType, QubitId, Tick, TickCircuit}; use pyo3::prelude::*; use pyo3::types::{PyBytes, PyDict, PyList}; +use std::collections::HashMap; /// Convert a Rust Attribute to a Python object. fn attribute_to_py(py: Python<'_>, attr: &Attribute) -> Py { @@ -407,6 +409,14 @@ impl PyGateType { inner: GateType::QFree, } } + + #[classattr] + #[pyo3(name = "Custom")] + fn custom() -> Self { + Self { + inner: GateType::Custom, + } + } } impl From for PyGateType { @@ -1342,6 +1352,12 @@ pyo3::create_exception!( ); // Qubit conflict exception +pyo3::create_exception!( + pecos_rslib, + GateSignatureMismatchError, + pyo3::exceptions::PyValueError +); + pyo3::create_exception!( pecos_rslib, QubitConflictError, @@ -2154,6 +2170,52 @@ impl PyTickCircuit { } } + // ========================================================================= + // Gate signature validation + // ========================================================================= + + /// Import gate signatures for validation. + /// + /// Args: + /// sigs: A dictionary mapping gate names to (`quantum_arity`, `angle_arity`) tuples. + fn import_gate_signatures(&mut self, sigs: &Bound<'_, PyDict>) -> PyResult<()> { + let mut sig_map = HashMap::new(); + for (key, value) in sigs.iter() { + let name: String = key.extract()?; + let (quantum_arity, angle_arity): (usize, usize) = value.extract()?; + sig_map.insert( + name, + GateSignature { + quantum_arity, + angle_arity, + }, + ); + } + self.inner.import_signatures(&sig_map); + Ok(()) + } + + /// Get gate signatures as a dictionary. + /// + /// Returns: + /// A dictionary mapping gate names to (`quantum_arity`, `angle_arity`) tuples. + fn gate_signatures(&self, py: Python<'_>) -> PyResult> { + let dict = PyDict::new(py); + for (name, sig) in self.inner.gate_signatures() { + dict.set_item(name, (sig.quantum_arity, sig.angle_arity))?; + } + Ok(dict.into()) + } + + /// Import signatures from a `GateRegistry`. + /// + /// Extracts signatures from all registered gates and imports them + /// for validation when adding custom gates. + fn import_registry(&mut self, registry: &PyGateRegistry) { + let sigs = registry.inner.signatures(); + self.inner.import_signatures(&sigs); + } + fn __repr__(&self) -> String { format!( "TickCircuit(ticks={}, gates={})", @@ -2596,6 +2658,93 @@ impl PyTickHandle { Ok(slf) } + // ========================================================================= + // Custom (unrecognized) gates + // ========================================================================= + + /// Add a custom (unrecognized) gate on the given qubits. + fn custom(slf: Py, py: Python<'_>, qubits: Vec) -> PyResult> { + let qubit_ids: GateQubits = qubits.into_iter().map(QubitId::from).collect(); + slf.borrow_mut(py) + .add_gate_internal(py, Gate::custom(qubit_ids))?; + Ok(slf) + } + + /// Add a custom gate with signature validation. + /// + /// On first use, the gate name's signature (quantum arity, angle arity) + /// is recorded. Subsequent uses are validated against this signature. + /// + /// Args: + /// name: The gate name. + /// qubits: List of qubit IDs. + /// angles: Optional list of angle values (radians). + /// + /// Raises: + /// `GateSignatureMismatchError`: If the arity does not match a previous use. + /// `QubitConflictError`: If a qubit is already in use in this tick. + #[pyo3(signature = (name, qubits, angles=None))] + fn custom_gate( + slf: Py, + py: Python<'_>, + name: &str, + qubits: Vec, + angles: Option>, + ) -> PyResult> { + let angle_vals: Vec = angles + .unwrap_or_default() + .into_iter() + .map(Angle64::from_radians) + .collect(); + + let handle = slf.borrow_mut(py); + let tick_idx = handle.tick_idx; + let circuit_py = handle.circuit.clone_ref(py); + + // Validate/register and add gate + let mut circuit = circuit_py.borrow_mut(py); + match circuit + .inner + .validate_or_register_gate(name, qubits.len(), angle_vals.len()) + { + Ok(()) => {} + Err(e) => { + return Err(PyErr::new::(e.to_string())); + } + } + + let qubit_ids: GateQubits = qubits.into_iter().map(QubitId::from).collect(); + let gate = Gate::new(GateType::Custom, angle_vals, vec![], qubit_ids); + + if let Some(tick) = circuit.inner.get_tick_mut(tick_idx) { + match tick.try_add_gate(gate) { + Ok(idx) => { + tick.set_gate_attr(idx, "_symbol", Attribute::String(name.to_string())); + drop(circuit); + drop(handle); + // Update last_gate_idx through a fresh borrow + slf.borrow_mut(py).last_gate_idx = Some(idx); + Ok(slf) + } + Err(err) => { + let msg = format!( + "Qubit(s) {:?} already in use in tick {}", + err.conflicting_qubits + .iter() + .map(std::string::ToString::to_string) + .collect::>(), + tick_idx + ); + Err(PyErr::new::(msg)) + } + } + } else { + drop(circuit); + drop(handle); + Ok(slf) + } + } + // ========================================================================= // State preparation and measurement // ========================================================================= @@ -2729,6 +2878,10 @@ pub fn register_quantum_circuit_types(parent_module: &Bound<'_, PyModule>) -> Py )?; parent_module.add("HugrConversionError", py.get_type::())?; parent_module.add("QubitConflictError", py.get_type::())?; + parent_module.add( + "GateSignatureMismatchError", + py.get_type::(), + )?; // Add HUGR conversion functions parent_module.add_function(wrap_pyfunction!(py_hugr_to_dag_circuit, parent_module)?)?; diff --git a/python/pecos-rslib/src/gate_registry_bindings.rs b/python/pecos-rslib/src/gate_registry_bindings.rs new file mode 100644 index 000000000..24e0cc205 --- /dev/null +++ b/python/pecos-rslib/src/gate_registry_bindings.rs @@ -0,0 +1,329 @@ +// Copyright 2026 The PECOS Developers +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +// in compliance with the License.You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under +// the License. + +//! Python bindings for the gate registration system. + +use pecos::core::Value; +use pecos::core::gate_type::GateType; +use pecos::core::{Angle64, AngleSource, GateDefinitionBuilder, GateRegistry, QubitId}; +use pyo3::prelude::*; +use pyo3::types::{PyAny, PyDict, PyList}; +use std::collections::HashMap; + +/// Parse a gate name string into a `GateType`. +fn parse_gate_type(name: &str) -> PyResult { + match name { + "I" => Ok(GateType::I), + "X" => Ok(GateType::X), + "Y" => Ok(GateType::Y), + "Z" => Ok(GateType::Z), + "SX" => Ok(GateType::SX), + "SXdg" => Ok(GateType::SXdg), + "SY" => Ok(GateType::SY), + "SYdg" => Ok(GateType::SYdg), + "SZ" | "S" => Ok(GateType::SZ), + "SZdg" | "Sdg" => Ok(GateType::SZdg), + "H" => Ok(GateType::H), + "RX" => Ok(GateType::RX), + "RY" => Ok(GateType::RY), + "RZ" => Ok(GateType::RZ), + "T" => Ok(GateType::T), + "Tdg" => Ok(GateType::Tdg), + "U" => Ok(GateType::U), + "R1XY" => Ok(GateType::R1XY), + "CX" | "CNOT" => Ok(GateType::CX), + "CY" => Ok(GateType::CY), + "CZ" => Ok(GateType::CZ), + "CH" => Ok(GateType::CH), + "SZZ" => Ok(GateType::SZZ), + "SZZdg" => Ok(GateType::SZZdg), + "SWAP" => Ok(GateType::SWAP), + "CRZ" => Ok(GateType::CRZ), + "RXX" => Ok(GateType::RXX), + "RYY" => Ok(GateType::RYY), + "RZZ" => Ok(GateType::RZZ), + "CCX" | "Toffoli" => Ok(GateType::CCX), + "Measure" => Ok(GateType::Measure), + "MeasureLeaked" => Ok(GateType::MeasureLeaked), + "MeasureFree" => Ok(GateType::MeasureFree), + "Prep" => Ok(GateType::Prep), + "QAlloc" => Ok(GateType::QAlloc), + "QFree" => Ok(GateType::QFree), + "Idle" => Ok(GateType::Idle), + _ => Err(pyo3::exceptions::PyValueError::new_err(format!( + "Unknown gate type: '{name}'" + ))), + } +} + +/// Convert a Python object to a `Value`. +fn py_to_value(obj: &Bound<'_, PyAny>) -> PyResult { + // Try bool before int since Python bools are ints + if let Ok(b) = obj.extract::() { + return Ok(Value::Bool(b)); + } + if let Ok(i) = obj.extract::() { + return Ok(Value::Int(i)); + } + if let Ok(f) = obj.extract::() { + return Ok(Value::Float(f)); + } + if let Ok(s) = obj.extract::() { + return Ok(Value::String(s)); + } + if let Ok(dict) = obj.cast::() { + return Ok(Value::Dict(py_dict_to_value_map(dict)?)); + } + if let Ok(list) = obj.cast::() { + let items: PyResult> = list.iter().map(|item| py_to_value(&item)).collect(); + return Ok(Value::List(items?)); + } + Err(pyo3::exceptions::PyTypeError::new_err(format!( + "Metadata values must be str, int, float, bool, list, or dict, got {}", + obj.get_type().name()? + ))) +} + +/// Convert a `Value` to a Python object. +fn value_to_py(py: Python<'_>, val: &Value) -> PyResult> { + match val { + Value::String(s) => Ok(s.into_pyobject(py)?.into_any().unbind()), + Value::Int(i) => Ok(i.into_pyobject(py)?.into_any().unbind()), + Value::Float(f) => Ok(f.into_pyobject(py)?.into_any().unbind()), + Value::Bool(b) => Ok(b.into_pyobject(py)?.to_owned().into_any().unbind()), + Value::List(items) => { + let py_list = PyList::empty(py); + for item in items { + py_list.append(value_to_py(py, item)?)?; + } + Ok(py_list.unbind().into_any()) + } + Value::Dict(map) => { + let py_dict = PyDict::new(py); + for (k, v) in map { + py_dict.set_item(k, value_to_py(py, v)?)?; + } + Ok(py_dict.unbind().into_any()) + } + } +} + +/// Convert a Python dict to a `HashMap`. +fn py_dict_to_value_map(dict: &Bound<'_, PyDict>) -> PyResult> { + let mut metadata = HashMap::new(); + for (key, val) in dict.iter() { + let k: String = key.extract()?; + let v = py_to_value(&val)?; + metadata.insert(k, v); + } + Ok(metadata) +} + +/// Python-friendly angle source specification for decomposition steps. +#[pyclass(name = "AngleSource", from_py_object)] +#[derive(Clone)] +pub struct PyAngleSource { + inner: AngleSource, +} + +#[pymethods] +impl PyAngleSource { + /// Forward the i-th input angle from the parent gate. + #[staticmethod] + fn input(index: u8) -> Self { + Self { + inner: AngleSource::Input(index), + } + } + + /// Use a fixed angle value (in turns, where 1.0 = full turn). + #[staticmethod] + fn fixed(value: f64) -> Self { + Self { + inner: AngleSource::Fixed(Angle64::from_turns(value)), + } + } + + /// Negate the i-th input angle from the parent gate. + #[staticmethod] + fn neg_input(index: u8) -> Self { + Self { + inner: AngleSource::NegInput(index), + } + } + + fn __repr__(&self) -> String { + match &self.inner { + AngleSource::Input(i) => format!("AngleSource.input({i})"), + AngleSource::Fixed(a) => format!("AngleSource.fixed({a})"), + AngleSource::NegInput(i) => format!("AngleSource.neg_input({i})"), + } + } +} + +/// Builder for constructing gate definitions with a fluent API. +#[pyclass(name = "GateDefBuilder")] +pub struct PyGateDefBuilder { + inner: Option, +} + +#[pymethods] +impl PyGateDefBuilder { + /// Set the number of angle parameters this gate accepts. + fn angle_arity(slf: Py, py: Python<'_>, arity: usize) -> Py { + let mut this = slf.borrow_mut(py); + let builder = this.inner.take().expect("Builder already consumed"); + this.inner = Some(builder.angle_arity(arity)); + drop(this); + slf + } + + /// Add a non-parameterized gate step to the decomposition. + fn step( + slf: Py, + py: Python<'_>, + gate_name: &str, + qubit_indices: Vec, + ) -> PyResult> { + let gate_type = parse_gate_type(gate_name)?; + let mut this = slf.borrow_mut(py); + let builder = this.inner.take().expect("Builder already consumed"); + this.inner = Some(builder.step(gate_type, &qubit_indices)); + drop(this); + Ok(slf) + } + + /// Add a parameterized gate step to the decomposition. + fn step_with_angles( + slf: Py, + py: Python<'_>, + gate_name: &str, + qubit_indices: Vec, + angle_sources: Vec, + ) -> PyResult> { + let gate_type = parse_gate_type(gate_name)?; + let sources: Vec = angle_sources.into_iter().map(|s| s.inner).collect(); + let mut this = slf.borrow_mut(py); + let builder = this.inner.take().expect("Builder already consumed"); + this.inner = Some(builder.step_with_angles(gate_type, &qubit_indices, &sources)); + drop(this); + Ok(slf) + } + + /// Add a gate step with angles and per-step metadata. + /// + /// Metadata values can be str, int, float, or bool. + fn step_with_metadata( + slf: Py, + py: Python<'_>, + gate_name: &str, + qubit_indices: Vec, + angle_sources: Vec, + metadata: &Bound<'_, PyDict>, + ) -> PyResult> { + let gate_type = parse_gate_type(gate_name)?; + let sources: Vec = angle_sources.into_iter().map(|s| s.inner).collect(); + let meta = py_dict_to_value_map(metadata)?; + let mut this = slf.borrow_mut(py); + let builder = this.inner.take().expect("Builder already consumed"); + this.inner = Some(builder.step_with_metadata(gate_type, &qubit_indices, &sources, meta)); + drop(this); + Ok(slf) + } + + /// Finalize and register this gate definition into a registry. + fn register_into(&mut self, registry: &mut PyGateRegistry) -> PyResult<()> { + let builder = self + .inner + .take() + .ok_or_else(|| pyo3::exceptions::PyRuntimeError::new_err("Builder already consumed"))?; + registry.inner.register(builder.build()); + Ok(()) + } +} + +/// Registry mapping gate names to definitions with decompositions. +#[pyclass(name = "GateRegistry")] +pub struct PyGateRegistry { + pub(crate) inner: GateRegistry, +} + +#[pymethods] +impl PyGateRegistry { + #[new] + fn new() -> Self { + Self { + inner: GateRegistry::new(), + } + } + + /// Start building a gate definition. + fn define(&self, name: String, quantum_arity: usize) -> PyGateDefBuilder { + PyGateDefBuilder { + inner: Some(GateDefinitionBuilder::new(name, quantum_arity)), + } + } + + /// Check if a gate is registered. + fn contains(&self, name: &str) -> bool { + self.inner.contains(name) + } + + fn __len__(&self) -> usize { + self.inner.len() + } + + /// Decompose a registered gate into concrete steps. + /// + /// Returns a list of (`gate_name`, qubits, angles, metadata) tuples, or None if + /// the gate is not registered or has no decomposition. + fn decompose( + &self, + py: Python<'_>, + name: &str, + qubits: Vec, + angles: Vec, + ) -> PyResult> { + let qubit_ids: Vec = qubits.into_iter().map(QubitId::from).collect(); + let angle_vals: Vec = angles.into_iter().map(Angle64::from_turns).collect(); + + match self.inner.decompose(name, &qubit_ids, &angle_vals) { + None => Ok(py.None()), + Some(steps) => { + let result = PyList::empty(py); + for (gate_type, step_qubits, step_angles, step_meta) in steps { + let gate_name = format!("{gate_type}"); + let py_qubits: Vec = + step_qubits.iter().map(|q| usize::from(*q)).collect(); + let py_angles: Vec = step_angles + .iter() + .map(|a| a.to_radians() / std::f64::consts::TAU) + .collect(); + let py_meta = PyDict::new(py); + for (k, v) in &step_meta { + py_meta.set_item(k, value_to_py(py, v)?)?; + } + result.append((gate_name, py_qubits, py_angles, py_meta))?; + } + Ok(result.unbind().into_any()) + } + } + } +} + +/// Register gate registry types into a Python module. +pub fn register_gate_registry_types(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + Ok(()) +} diff --git a/python/pecos-rslib/src/lib.rs b/python/pecos-rslib/src/lib.rs index 9ca833a8c..0f796da1c 100644 --- a/python/pecos-rslib/src/lib.rs +++ b/python/pecos-rslib/src/lib.rs @@ -40,6 +40,7 @@ mod pecos_rng_bindings; mod phir_json_bridge; // mod qir_bindings; // Removed - replaced by llvm_bindings mod engines_module; +mod gate_registry_bindings; mod llvm_bindings; mod programs_module; mod quest_bindings; @@ -268,6 +269,9 @@ fn pecos_rslib(_py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { // Register quantum circuit types (DagCircuit, Gate, GateType, QubitId) dag_circuit_bindings::register_quantum_circuit_types(m)?; + // Register gate registry types (GateRegistry, GateDefBuilder, AngleSource) + gate_registry_bindings::register_gate_registry_types(m)?; + // Register time unit types at top level (Nanoseconds, TimeUnits) dag_circuit_bindings::register_time_unit_types(m)?; diff --git a/python/pecos-rslib/src/types_module.rs b/python/pecos-rslib/src/types_module.rs index 7c198d02b..d19dfd7a0 100644 --- a/python/pecos-rslib/src/types_module.rs +++ b/python/pecos-rslib/src/types_module.rs @@ -60,6 +60,11 @@ pub fn register_types_module(parent: &Bound<'_, PyModule>) -> PyResult<()> { types.add("ByteMessage", parent.getattr("ByteMessage")?)?; types.add("ByteMessageBuilder", parent.getattr("ByteMessageBuilder")?)?; + // Gate registry types + types.add("GateRegistry", parent.getattr("GateRegistry")?)?; + types.add("GateDefBuilder", parent.getattr("GateDefBuilder")?)?; + types.add("AngleSource", parent.getattr("AngleSource")?)?; + // Foreign object types (conditionally compiled) #[cfg(feature = "wasm")] types.add("WasmForeignObject", parent.getattr("WasmForeignObject")?)?; diff --git a/python/quantum-pecos/src/pecos/__init__.py b/python/quantum-pecos/src/pecos/__init__.py index c95f6746d..98dc10357 100644 --- a/python/quantum-pecos/src/pecos/__init__.py +++ b/python/quantum-pecos/src/pecos/__init__.py @@ -32,8 +32,11 @@ import pecos_rslib from pecos_rslib import ( + AngleSource, # Angle source specification for gate decomposition Array, # Array type with generic dtype support (Array[f64], etc.) BitInt, # Fixed-width binary integer type + GateRegistry, # Gate registration system for custom gate decomposition + GateSignatureMismatchError, # Raised when custom gate arity mismatches Nanoseconds, # Time duration in nanoseconds Pauli, # Quantum Pauli operators (I, X, Y, Z) PauliString, # Multi-qubit Pauli operators @@ -249,6 +252,7 @@ "NUMERIC_TYPES", "SIGNED_INTEGER_TYPES", "UNSIGNED_INTEGER_TYPES", + "AngleSource", # Core types "Array", # Deprecated @@ -257,6 +261,8 @@ # Type categories "Complex", "Float", + "GateRegistry", + "GateSignatureMismatchError", "GeneralNoiseModelBuilder", # Program wrapper classes for sim() - also available via pecos.programs "Guppy", diff --git a/python/quantum-pecos/src/pecos/circuits/quantum_circuit.py b/python/quantum-pecos/src/pecos/circuits/quantum_circuit.py index 2bad7d6c9..63b2d9b5a 100644 --- a/python/quantum-pecos/src/pecos/circuits/quantum_circuit.py +++ b/python/quantum-pecos/src/pecos/circuits/quantum_circuit.py @@ -190,6 +190,7 @@ class QuantumCircuit(MutableSequence): def __init__( self, circuit_setup: CircuitSetup = None, + gate_registry: object | None = None, **metadata: JSONValue, ) -> None: """Initialize a QuantumCircuit. @@ -197,6 +198,7 @@ def __init__( Args: circuit_setup (None, int, list of dict): Initial circuit configuration. Can be None (empty circuit), int (number of initial ticks), or list of dicts (pre-configured ticks). + gate_registry: Optional GateRegistry for ahead-of-time custom gate signature validation. **metadata: Additional metadata to associate with the circuit as keyword arguments. """ if TickCircuit is None: @@ -209,6 +211,10 @@ def __init__( # Track logically reserved ticks (for backwards compatibility with empty tick creation) self._reserved_ticks = 0 + if gate_registry is not None: + self._inner.import_registry(gate_registry) + self._gate_registry = gate_registry + if "tracked_qudits" in metadata: msg = "tracked_qudits is not a valid metadata key" raise ValueError(msg) @@ -259,6 +265,9 @@ def _add_gate_to_tick( # Convert locations to list, filtering out None values (placeholders for logical gates) loc_list = [loc for loc in locations if loc is not None] if not loc_list: + # No qubit operands -- store symbol as tick-level metadata + # (e.g., global barriers or marker gates) + tick_handle.meta("_symbol", symbol) return # Serialize params for storage (handle tuples -> lists) @@ -492,36 +501,17 @@ def add_with_symbol( else: add_with_symbol(method, loc) else: - # Store unrecognized gates using a no-op gate with metadata - # This allows round-trip preservation for simulator-specific gates - # Use I gate (identity) as carrier for unknown single-qubit gates + # Store unrecognized gates using validated custom_gate method. + # First use of a name establishes its signature; subsequent uses are validated. + angles = self._extract_angles(params) for loc in loc_list: - if isinstance(loc, tuple): - if len(loc) == 2: - # Two-qubit gate - use CX as carrier - result = tick_handle.cx(loc[0], loc[1]) - if hasattr(result, "meta"): - result.meta("_symbol", symbol) - result.meta("_custom_gate", "true") - if params_json: - result.meta("_params", params_json) - else: - # Multi-qubit locations as individual qubits - for q in loc: - result = tick_handle.i(q) - if hasattr(result, "meta"): - result.meta("_symbol", symbol) - result.meta("_custom_gate", "true") - if params_json: - result.meta("_params", params_json) - else: - # Single-qubit gate - use I (identity) as carrier - result = tick_handle.i(loc) - if hasattr(result, "meta"): - result.meta("_symbol", symbol) - result.meta("_custom_gate", "true") - if params_json: - result.meta("_params", params_json) + qubits = list(loc) if isinstance(loc, tuple) else [loc] + try: + result = tick_handle.custom_gate(symbol, qubits, angles if angles else None) + except QubitConflictError: + continue + if hasattr(result, "meta") and params_json: + result.meta("_params", params_json) def append( self, @@ -543,13 +533,12 @@ def append( tick_handle = self._inner.tick() for gate_symbol, gate_locations in gate_dict.items(): - if gate_locations: - self._add_gate_to_tick( - tick_handle, - gate_symbol, - gate_locations, - **params, - ) + self._add_gate_to_tick( + tick_handle, + gate_symbol, + gate_locations, + **params, + ) def update( self, @@ -575,8 +564,6 @@ def update( # Get logical and physical tick counts logical_ticks = len(self) # includes reserved ticks - # Use next_tick_index() to get actual tick count including empty ticks - # (num_ticks() excludes trailing empty ticks which breaks reserved ticks) physical_ticks = self._inner.next_tick_index() # Handle empty circuit case with negative tick index @@ -595,13 +582,12 @@ def update( tick_handle = self._inner.tick() if actual_tick >= physical_ticks else self._inner.tick_at(actual_tick) for gate_symbol, gate_locations in gate_dict.items(): - if gate_locations: - self._add_gate_to_tick( - tick_handle, - gate_symbol, - gate_locations, - **params, - ) + self._add_gate_to_tick( + tick_handle, + gate_symbol, + gate_locations, + **params, + ) def discard(self, locations: LocationSet, tick: int = -1) -> None: """Discards ``locations`` for tick ``tick``. @@ -759,6 +745,13 @@ def _iter_tick( # Create new group grouped[key] = ({location}, params) + # Handle ticks with no gates but a tick-level symbol (e.g., global barriers) + if not grouped: + tick_symbol = tick_obj.get_attr("_symbol") + if tick_symbol is not None: + yield tick_symbol, set(), {} + return + # Yield grouped results for (symbol, _), (locations, params) in grouped.items(): yield symbol, locations, params @@ -791,13 +784,12 @@ def insert( tick_handle = self._inner.insert_tick(tick) for gate_symbol, gate_locations in gate_dict.items(): - if gate_locations: - self._add_gate_to_tick( - tick_handle, - gate_symbol, - gate_locations, - **params, - ) + self._add_gate_to_tick( + tick_handle, + gate_symbol, + gate_locations, + **params, + ) def _circuit_setup(self, circuit_setup: CircuitSetup) -> None: if isinstance(circuit_setup, int): @@ -834,6 +826,17 @@ def to_json_str(self) -> str: return json.dumps(prog) + @staticmethod + def _extract_angles(params: dict) -> list[float]: + """Extract angle values from gate parameters.""" + if not params: + return [] + if "angles" in params: + return list(params["angles"]) + if "angle" in params: + return [params["angle"]] + return [] + @staticmethod def _fix_json_meta(meta: JSONDict) -> JSONDict: """Fix some of the type issues for converting json rep back to a QuantumCircuit.""" @@ -894,13 +897,12 @@ def __setitem__(self, tick: int, item: tuple[GateDict, JSONDict]) -> None: # Add new gates tick_handle = self._inner.tick_at(actual_tick) for gate_symbol, gate_locations in gate_dict.items(): - if gate_locations: - self._add_gate_to_tick( - tick_handle, - gate_symbol, - gate_locations, - **params, - ) + self._add_gate_to_tick( + tick_handle, + gate_symbol, + gate_locations, + **params, + ) def __len__(self) -> int: """Used to return number of ticks when len() is used on an instance of this class.""" @@ -1055,13 +1057,12 @@ def add( if gate_dict: tick_handle = self._circuit._inner.tick_at(self._tick_idx) for gate_symbol, gate_locations in gate_dict.items(): - if gate_locations: - self._circuit._add_gate_to_tick( - tick_handle, - gate_symbol, - gate_locations, - **params, - ) + self._circuit._add_gate_to_tick( + tick_handle, + gate_symbol, + gate_locations, + **params, + ) return self diff --git a/python/quantum-pecos/src/pecos/simulators/default_simulator.py b/python/quantum-pecos/src/pecos/simulators/default_simulator.py index 27109e3f4..ab62163a5 100644 --- a/python/quantum-pecos/src/pecos/simulators/default_simulator.py +++ b/python/quantum-pecos/src/pecos/simulators/default_simulator.py @@ -21,6 +21,8 @@ from typing import TYPE_CHECKING, Any if TYPE_CHECKING: + from pecos_rslib import GateRegistry + from pecos.circuits import QuantumCircuit JSONType = dict[str, Any] | list[Any] | str | int | float | bool | None @@ -82,12 +84,17 @@ def run_circuit( self, circuit: QuantumCircuit, removed_locations: set | None = None, + gate_registry: GateRegistry | None = None, ) -> dict[int | tuple[int, ...], JSONType]: """Run a quantum circuit on the simulator. + If a gate_registry is provided and a gate symbol is registered in it, + the gate is decomposed into base gates before being passed to run_gate. + Args: circuit (QuantumCircuit): A circuit instance or object with an appropriate items() generator. removed_locations (set | None): Optional set of locations to skip when running the circuit. + gate_registry: Optional GateRegistry for custom gate decomposition at simulation time. Returns: dict[int | tuple[int, ...], JSONType]: Circuit output. Note that this output format may differ @@ -102,9 +109,41 @@ def run_circuit( gate_locations = set(locations) - removed_locations # TODO: need to handle multi-qubit ops that are partially removed - gate_output = self.run_gate(symbol, gate_locations, **params) + if gate_registry and symbol not in self.bindings and gate_registry.contains(symbol): + gate_output = self._run_decomposed_gate(gate_registry, symbol, gate_locations, **params) + else: + gate_output = self.run_gate(symbol, gate_locations, **params) if gate_output: output.update(gate_output) return output + + def _run_decomposed_gate( + self, + gate_registry: GateRegistry, + symbol: str, + locations: set[int] | set[tuple[int, ...]], + **params: JSONType, + ) -> dict[int | tuple[int, ...], JSONType]: + """Decompose a registered gate and run each step via run_gate.""" + output = {} + for location in locations: + qubits = list(location) if isinstance(location, tuple) else [location] + angles = list(params.get("angles", ())) + steps = gate_registry.decompose(symbol, qubits, angles) + for step_symbol, step_qubits, step_angles, step_meta in steps: + step_loc = {step_qubits[0] if len(step_qubits) == 1 else tuple(step_qubits)} + step_params = dict(params) + if step_angles: + step_params["angles"] = tuple(step_angles) + if len(step_angles) == 1: + step_params["angle"] = step_angles[0] + else: + step_params.pop("angles", None) + step_params.pop("angle", None) + step_params.update(step_meta) + step_output = self.run_gate(step_symbol, step_loc, **step_params) + if step_output: + output.update(step_output) + return output diff --git a/python/quantum-pecos/src/pecos/simulators/sim_class_types.py b/python/quantum-pecos/src/pecos/simulators/sim_class_types.py index d3f83cc41..db3a8edf0 100644 --- a/python/quantum-pecos/src/pecos/simulators/sim_class_types.py +++ b/python/quantum-pecos/src/pecos/simulators/sim_class_types.py @@ -22,70 +22,22 @@ class PauliPropagation(DefaultSimulator): """Base class for Pauli-propagation simulators.""" - def __init__(self) -> None: - """Initialize the PauliPropagation simulator. - - Initializes the base DefaultSimulator and sets up bindings for - Pauli propagation simulation. - """ - super().__init__() - class Stabilizer(DefaultSimulator): """Base class for stabilizer simulators.""" - def __init__(self) -> None: - """Initialize the Stabilizer simulator. - - Initializes the base DefaultSimulator and sets up bindings for - stabilizer state simulation. - """ - super().__init__() - class StateVector(DefaultSimulator): """Base class for state-vector simulators.""" - def __init__(self) -> None: - """Initialize the StateVector simulator. - - Initializes the base DefaultSimulator and sets up bindings for - state vector simulation. - """ - super().__init__() - class StateTN(DefaultSimulator): """Base class for simulators whose state is represented as a tensor network.""" - def __init__(self) -> None: - """Initialize the StateTN simulator. - - Initializes the base DefaultSimulator and sets up bindings for - tensor network state simulation. - """ - super().__init__() - class DensityMatrix(DefaultSimulator): """Base class for density-matrix simulators.""" - def __init__(self) -> None: - """Initialize the DensityMatrix simulator. - - Initializes the base DefaultSimulator and sets up bindings for - density matrix simulation. - """ - super().__init__() - class ProcessMatrix(DefaultSimulator): """Base class for process-matrix simulators.""" - - def __init__(self) -> None: - """Initialize the ProcessMatrix simulator. - - Initializes the base DefaultSimulator and sets up bindings for - process matrix simulation. - """ - super().__init__() diff --git a/uv.lock b/uv.lock index 301f5e471..b731b7ac5 100644 --- a/uv.lock +++ b/uv.lock @@ -3891,27 +3891,27 @@ wheels = [ [[package]] name = "ruff" -version = "0.15.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/3c/3b/20d9a0bc954d51b63f20cf710cf506bfe675d1e6138139342dd5ccc90326/ruff-0.15.3.tar.gz", hash = "sha256:78757853320d8ddb9da24e614ef69a37bcbcfd477e5a6435681188d4bce4eaa1", size = 4569031, upload-time = "2026-02-26T15:39:38.015Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ed/00/c544ab1d70f86dc50a2f2a8e1262e5af5025897ccd820415f559f9f2f63f/ruff-0.15.3-py3-none-linux_armv6l.whl", hash = "sha256:f7df0fd6f889a8d8de2ddb48a9eb55150954400f2157ea15b21a2f49ecaaf988", size = 10444066, upload-time = "2026-02-26T15:39:47.708Z" }, - { url = "https://files.pythonhosted.org/packages/fb/15/9dee3f4e891261adbd690f8c6f075418a7cd76e845601b00a0da2ae2ad6e/ruff-0.15.3-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:0198b5445197d443c3bbf2cc358f4bd477fb3951e3c7f2babc13e9bb490614a8", size = 10853125, upload-time = "2026-02-26T15:40:18.943Z" }, - { url = "https://files.pythonhosted.org/packages/88/ba/fc5aeda852c89faf821d36c951df866117342e88439e1b1e1e762a07b7fd/ruff-0.15.3-py3-none-macosx_11_0_arm64.whl", hash = "sha256:adf95b5be57b25fbbbc07cd68d37414bee8729e807ad0217219558027186967e", size = 10180833, upload-time = "2026-02-26T15:40:13.282Z" }, - { url = "https://files.pythonhosted.org/packages/06/87/e2f80a39164476fac4d45752a0d4721d6645f40b7f851e48add12af9947e/ruff-0.15.3-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b56dbd9cd86489ccbad96bb58fa4c958342b5510fdeb60ea13d9d3566bd845c", size = 10536806, upload-time = "2026-02-26T15:40:24.129Z" }, - { url = "https://files.pythonhosted.org/packages/fd/89/2e5bf0ed30ea3778460ea4d8cc6cb4d88ba96d9732d2c0cc33349cd65196/ruff-0.15.3-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c6f263ce511871955d8c5401b62c7e863988ea4d0527aa0a3b1b7ecff4d4abc4", size = 10276093, upload-time = "2026-02-26T15:39:44.654Z" }, - { url = "https://files.pythonhosted.org/packages/82/cb/318206d778c7f42917ca7b0f9436cf27652d1731fe434d3c9990c4a611fa/ruff-0.15.3-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e90fa1bed82ffede5768232b9bd23212c547ab7cd74c752007ecade1d895ee1a", size = 11051593, upload-time = "2026-02-26T15:39:35.157Z" }, - { url = "https://files.pythonhosted.org/packages/58/8f/65ee4c1b88e49dd4c0a3fc43e81832536c7942f0c702b6f3d25db0f95d6c/ruff-0.15.3-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e9d53760b7061ddbe5ea9e25381332c607fc14c40bde78f8a25392a93a68d74", size = 11885820, upload-time = "2026-02-26T15:39:59.504Z" }, - { url = "https://files.pythonhosted.org/packages/db/04/d4261f6729ad9a356bc6e3223ba297acf3b66118cef4795b4a8953b255ff/ruff-0.15.3-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ec90e3b78c56c4acca4264d371dd48e29215ecb673cc2fa3c4b799b72050e491", size = 11340583, upload-time = "2026-02-26T15:39:50.781Z" }, - { url = "https://files.pythonhosted.org/packages/24/84/490f38b2bc104e0fdc9496c2a66a48fb2d24a01de46ba0c60c4f6c4d4590/ruff-0.15.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7ce448fd395f822e34c8f6f7dfcd84b6726340082950858f92c4daa6baf8915", size = 11160701, upload-time = "2026-02-26T15:40:02.447Z" }, - { url = "https://files.pythonhosted.org/packages/ad/25/eae9cb7b6c28b425ed8cbe797da89c78146071102181ba74c4cdfd06bbeb/ruff-0.15.3-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:14f7d763962d385f75b9b3b57fcc5661c56c20d8b1ddc9f5c881b5fa0ba499fa", size = 11111482, upload-time = "2026-02-26T15:39:56.462Z" }, - { url = "https://files.pythonhosted.org/packages/95/18/16d0b5ef143cb9e52724f18cbccb4b3c5cd4d4e2debbd95e2be3aeb64c9e/ruff-0.15.3-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:b57084e3a3d65418d376c7023711c37cce023cd2fb038a76ba15ee21f3c2c2ee", size = 10497151, upload-time = "2026-02-26T15:40:10.64Z" }, - { url = "https://files.pythonhosted.org/packages/bf/b4/1829314241ddba07c54a742ab387da343fe56a0267a6b6498f3e2ae99821/ruff-0.15.3-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:d567523ff7dcf3112b0f71231d18c3506dd06943359476ee64dea0f9c8f63976", size = 10281955, upload-time = "2026-02-26T15:40:16.033Z" }, - { url = "https://files.pythonhosted.org/packages/d7/93/80a4ec4bd3cf58ca9b49dccf2bd232b520db14184912fb7e0eb6f3ecc484/ruff-0.15.3-py3-none-musllinux_1_2_i686.whl", hash = "sha256:4223088d255bf31a50b6640445b39f668164d64c23e5fa403edfb1e0b11122e5", size = 10766613, upload-time = "2026-02-26T15:40:21.55Z" }, - { url = "https://files.pythonhosted.org/packages/da/92/fe016b862295dc57499997e7f2edc58119469b210f4f03ccb763fa65f130/ruff-0.15.3-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:32399ddae088970b2db6efd8d3f49981375cb828075359b6c088ed1fe63d64e1", size = 11262113, upload-time = "2026-02-26T15:39:41.5Z" }, - { url = "https://files.pythonhosted.org/packages/42/b1/77dcd05940388d9ba3de03ac4b8b598826d57935728071e1be9f2ef5b714/ruff-0.15.3-py3-none-win32.whl", hash = "sha256:1f1eb95ff614351e3a89a862b6d94e6c42c170e61916e1f20facd6c38477f5f3", size = 10509423, upload-time = "2026-02-26T15:40:05.217Z" }, - { url = "https://files.pythonhosted.org/packages/29/d5/76aab0fabbd54e8c77d02fcff2494906ba85b539d22aa9b7124f7100f008/ruff-0.15.3-py3-none-win_amd64.whl", hash = "sha256:2b22dffe5f5e1e537097aa5208684f069e495f980379c4491b1cfb198a444d0c", size = 11637739, upload-time = "2026-02-26T15:39:53.951Z" }, - { url = "https://files.pythonhosted.org/packages/f2/61/9b4e3682dfd26054321e1b2fdb67a51361dd6ec2fb63f2b50d711f8832ae/ruff-0.15.3-py3-none-win_arm64.whl", hash = "sha256:82443c14d694d4cbd9e598ede27ef5d6f08389ccad91c933be775ea2f4e66f76", size = 10957794, upload-time = "2026-02-26T15:40:08.045Z" }, +version = "0.15.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/da/31/d6e536cdebb6568ae75a7f00e4b4819ae0ad2640c3604c305a0428680b0c/ruff-0.15.4.tar.gz", hash = "sha256:3412195319e42d634470cc97aa9803d07e9d5c9223b99bcb1518f0c725f26ae1", size = 4569550, upload-time = "2026-02-26T20:04:14.959Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f2/82/c11a03cfec3a4d26a0ea1e571f0f44be5993b923f905eeddfc397c13d360/ruff-0.15.4-py3-none-linux_armv6l.whl", hash = "sha256:a1810931c41606c686bae8b5b9a8072adac2f611bb433c0ba476acba17a332e0", size = 10453333, upload-time = "2026-02-26T20:04:20.093Z" }, + { url = "https://files.pythonhosted.org/packages/ce/5d/6a1f271f6e31dffb31855996493641edc3eef8077b883eaf007a2f1c2976/ruff-0.15.4-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:5a1632c66672b8b4d3e1d1782859e98d6e0b4e70829530666644286600a33992", size = 10853356, upload-time = "2026-02-26T20:04:05.808Z" }, + { url = "https://files.pythonhosted.org/packages/b1/d8/0fab9f8842b83b1a9c2bf81b85063f65e93fb512e60effa95b0be49bfc54/ruff-0.15.4-py3-none-macosx_11_0_arm64.whl", hash = "sha256:a4386ba2cd6c0f4ff75252845906acc7c7c8e1ac567b7bc3d373686ac8c222ba", size = 10187434, upload-time = "2026-02-26T20:03:54.656Z" }, + { url = "https://files.pythonhosted.org/packages/85/cc/cc220fd9394eff5db8d94dec199eec56dd6c9f3651d8869d024867a91030/ruff-0.15.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2496488bdfd3732747558b6f95ae427ff066d1fcd054daf75f5a50674411e75", size = 10535456, upload-time = "2026-02-26T20:03:52.738Z" }, + { url = "https://files.pythonhosted.org/packages/fa/0f/bced38fa5cf24373ec767713c8e4cadc90247f3863605fb030e597878661/ruff-0.15.4-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3f1c4893841ff2d54cbda1b2860fa3260173df5ddd7b95d370186f8a5e66a4ac", size = 10287772, upload-time = "2026-02-26T20:04:08.138Z" }, + { url = "https://files.pythonhosted.org/packages/2b/90/58a1802d84fed15f8f281925b21ab3cecd813bde52a8ca033a4de8ab0e7a/ruff-0.15.4-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:820b8766bd65503b6c30aaa6331e8ef3a6e564f7999c844e9a547c40179e440a", size = 11049051, upload-time = "2026-02-26T20:04:03.53Z" }, + { url = "https://files.pythonhosted.org/packages/d2/ac/b7ad36703c35f3866584564dc15f12f91cb1a26a897dc2fd13d7cb3ae1af/ruff-0.15.4-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c9fb74bab47139c1751f900f857fa503987253c3ef89129b24ed375e72873e85", size = 11890494, upload-time = "2026-02-26T20:04:10.497Z" }, + { url = "https://files.pythonhosted.org/packages/93/3d/3eb2f47a39a8b0da99faf9c54d3eb24720add1e886a5309d4d1be73a6380/ruff-0.15.4-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f80c98765949c518142b3a50a5db89343aa90f2c2bf7799de9986498ae6176db", size = 11326221, upload-time = "2026-02-26T20:04:12.84Z" }, + { url = "https://files.pythonhosted.org/packages/ff/90/bf134f4c1e5243e62690e09d63c55df948a74084c8ac3e48a88468314da6/ruff-0.15.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:451a2e224151729b3b6c9ffb36aed9091b2996fe4bdbd11f47e27d8f2e8888ec", size = 11168459, upload-time = "2026-02-26T20:04:00.969Z" }, + { url = "https://files.pythonhosted.org/packages/b5/e5/a64d27688789b06b5d55162aafc32059bb8c989c61a5139a36e1368285eb/ruff-0.15.4-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:a8f157f2e583c513c4f5f896163a93198297371f34c04220daf40d133fdd4f7f", size = 11104366, upload-time = "2026-02-26T20:03:48.099Z" }, + { url = "https://files.pythonhosted.org/packages/f1/f6/32d1dcb66a2559763fc3027bdd65836cad9eb09d90f2ed6a63d8e9252b02/ruff-0.15.4-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:917cc68503357021f541e69b35361c99387cdbbf99bd0ea4aa6f28ca99ff5338", size = 10510887, upload-time = "2026-02-26T20:03:45.771Z" }, + { url = "https://files.pythonhosted.org/packages/ff/92/22d1ced50971c5b6433aed166fcef8c9343f567a94cf2b9d9089f6aa80fe/ruff-0.15.4-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:e9737c8161da79fd7cfec19f1e35620375bd8b2a50c3e77fa3d2c16f574105cc", size = 10285939, upload-time = "2026-02-26T20:04:22.42Z" }, + { url = "https://files.pythonhosted.org/packages/e6/f4/7c20aec3143837641a02509a4668fb146a642fd1211846634edc17eb5563/ruff-0.15.4-py3-none-musllinux_1_2_i686.whl", hash = "sha256:291258c917539e18f6ba40482fe31d6f5ac023994ee11d7bdafd716f2aab8a68", size = 10765471, upload-time = "2026-02-26T20:03:58.924Z" }, + { url = "https://files.pythonhosted.org/packages/d0/09/6d2f7586f09a16120aebdff8f64d962d7c4348313c77ebb29c566cefc357/ruff-0.15.4-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:3f83c45911da6f2cd5936c436cf86b9f09f09165f033a99dcf7477e34041cbc3", size = 11263382, upload-time = "2026-02-26T20:04:24.424Z" }, + { url = "https://files.pythonhosted.org/packages/1b/fa/2ef715a1cd329ef47c1a050e10dee91a9054b7ce2fcfdd6a06d139afb7ec/ruff-0.15.4-py3-none-win32.whl", hash = "sha256:65594a2d557d4ee9f02834fcdf0a28daa8b3b9f6cb2cb93846025a36db47ef22", size = 10506664, upload-time = "2026-02-26T20:03:50.56Z" }, + { url = "https://files.pythonhosted.org/packages/d0/a8/c688ef7e29983976820d18710f955751d9f4d4eb69df658af3d006e2ba3e/ruff-0.15.4-py3-none-win_amd64.whl", hash = "sha256:04196ad44f0df220c2ece5b0e959c2f37c777375ec744397d21d15b50a75264f", size = 11651048, upload-time = "2026-02-26T20:04:17.191Z" }, + { url = "https://files.pythonhosted.org/packages/3e/0a/9e1be9035b37448ce2e68c978f0591da94389ade5a5abafa4cf99985d1b2/ruff-0.15.4-py3-none-win_arm64.whl", hash = "sha256:60d5177e8cfc70e51b9c5fad936c634872a74209f934c1e79107d11787ad5453", size = 10966776, upload-time = "2026-02-26T20:03:56.908Z" }, ] [[package]] @@ -4449,7 +4449,7 @@ wheels = [ [[package]] name = "virtualenv" -version = "21.0.0" +version = "21.1.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "distlib" }, @@ -4458,9 +4458,9 @@ dependencies = [ { name = "python-discovery" }, { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ce/4f/d6a5ff3b020c801c808b14e2d2330cdc8ebefe1cdfbc457ecc368e971fec/virtualenv-21.0.0.tar.gz", hash = "sha256:e8efe4271b4a5efe7a4dce9d60a05fd11859406c0d6aa8464f4cf451bc132889", size = 5836591, upload-time = "2026-02-25T20:21:07.691Z" } +sdist = { url = "https://files.pythonhosted.org/packages/2f/c9/18d4b36606d6091844daa3bd93cf7dc78e6f5da21d9f21d06c221104b684/virtualenv-21.1.0.tar.gz", hash = "sha256:1990a0188c8f16b6b9cf65c9183049007375b26aad415514d377ccacf1e4fb44", size = 5840471, upload-time = "2026-02-27T08:49:29.702Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/29/d1/3f62e4f9577b28c352c11623a03fb916096d5c131303d4861b4914481b6b/virtualenv-21.0.0-py3-none-any.whl", hash = "sha256:d44e70637402c7f4b10f48491c02a6397a3a187152a70cba0b6bc7642d69fb05", size = 5817167, upload-time = "2026-02-25T20:21:05.476Z" }, + { url = "https://files.pythonhosted.org/packages/78/55/896b06bf93a49bec0f4ae2a6f1ed12bd05c8860744ac3a70eda041064e4d/virtualenv-21.1.0-py3-none-any.whl", hash = "sha256:164f5e14c5587d170cf98e60378eb91ea35bf037be313811905d3a24ea33cc07", size = 5825072, upload-time = "2026-02-27T08:49:27.516Z" }, ] [[package]] From 7368da1a4f4b710846251a30028b16f618a2a253 Mon Sep 17 00:00:00 2001 From: Ciaran Ryan-Anderson Date: Sat, 28 Feb 2026 12:37:53 -0700 Subject: [PATCH 2/5] Updating dependencies --- Cargo.lock | 201 +++++++++++--------------- Cargo.toml | 4 +- crates/benchmarks/Cargo.toml | 8 +- crates/pecos-cuquantum-sys/Cargo.toml | 2 +- crates/pecos-cuquantum/Cargo.toml | 14 +- crates/pecos-qec/Cargo.toml | 10 +- python/pecos-rslib/Cargo.toml | 2 +- uv.lock | 32 ++-- 8 files changed, 125 insertions(+), 148 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c6f85b994..ff51170f8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,9 +4,9 @@ version = 4 [[package]] name = "addr2line" -version = "0.25.1" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b5d307320b3181d6d7954e663bd7c774a838b8220fe0593c86d9fb09f498b4b" +checksum = "9698bf0769c641b18618039fe2ebd41eb3541f98433000f64e663fab7cea2c87" dependencies = [ "gimli", ] @@ -866,46 +866,47 @@ dependencies = [ [[package]] name = "cranelift-assembler-x64" -version = "0.128.4" +version = "0.129.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50a04121a197fde2fe896f8e7cac9812fc41ed6ee9c63e1906090f9f497845f6" +checksum = "40630d663279bc855bff805d6f5e8a0b6a1867f9df95b010511ac6dc894e9395" dependencies = [ "cranelift-assembler-x64-meta", ] [[package]] name = "cranelift-assembler-x64-meta" -version = "0.128.4" +version = "0.129.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a09e699a94f477303820fb2167024f091543d6240783a2d3b01a3f21c42bc744" +checksum = "3ee6aec5ceb55e5fdbcf7ef677d7c7195531360ff181ce39b2b31df11d57305f" dependencies = [ "cranelift-srcgen", ] [[package]] name = "cranelift-bforest" -version = "0.128.4" +version = "0.129.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f07732c662a9755529e332d86f8c5842171f6e98ba4d5976a178043dad838654" +checksum = "9a92d78cc3f087d7e7073828f08d98c7074a3a062b6b29a1b7783ce74305685e" dependencies = [ "cranelift-entity", ] [[package]] name = "cranelift-bitset" -version = "0.128.4" +version = "0.129.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18391da761cf362a06def7a7cf11474d79e55801dd34c2e9ba105b33dc0aef88" +checksum = "edcc73d756f2e0d7eda6144fe64a2bc69c624de893cb1be51f1442aed77881d2" dependencies = [ "serde", "serde_derive", + "wasmtime-internal-core", ] [[package]] name = "cranelift-codegen" -version = "0.128.4" +version = "0.129.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b3a09b3042c69810d255aef59ddc3b3e4c0644d1d90ecfd6e3837798cc88a3c" +checksum = "683d94c2cd0d73b41369b88da1129589bc3a2d99cf49979af1d14751f35b7a1b" dependencies = [ "bumpalo", "cranelift-assembler-x64", @@ -918,6 +919,7 @@ dependencies = [ "cranelift-isle", "gimli", "hashbrown 0.15.5", + "libm", "log", "pulley-interpreter", "regalloc2", @@ -925,14 +927,14 @@ dependencies = [ "serde", "smallvec", "target-lexicon", - "wasmtime-internal-math", + "wasmtime-internal-core", ] [[package]] name = "cranelift-codegen-meta" -version = "0.128.4" +version = "0.129.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75817926ec812241889208d1b190cadb7fedded4592a4bb01b8524babb9e4849" +checksum = "235da0e52ee3a0052d0e944c3470ff025b1f4234f6ec4089d3109f2d2ffa6cbd" dependencies = [ "cranelift-assembler-x64-meta", "cranelift-codegen-shared", @@ -943,35 +945,36 @@ dependencies = [ [[package]] name = "cranelift-codegen-shared" -version = "0.128.4" +version = "0.129.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "859158f87a59476476eda3884d883c32e08a143cf3d315095533b362a3250a63" +checksum = "20c07c6c440bd1bf920ff7597a1e743ede1f68dcd400730bd6d389effa7662af" [[package]] name = "cranelift-control" -version = "0.128.4" +version = "0.129.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03b65a9aec442d715cbf54d14548b8f395476c09cef7abe03e104a378291ab88" +checksum = "8797c022e02521901e1aee483dea3ed3c67f2bf0a26405c9dd48e8ee7a70944b" dependencies = [ "arbitrary", ] [[package]] name = "cranelift-entity" -version = "0.128.4" +version = "0.129.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8334c99a7e86060c24028732efd23bac84585770dcb752329c69f135d64f2fc1" +checksum = "59d8e72637246edd2cba337939850caa8b201f6315925ec4c156fdd089999699" dependencies = [ "cranelift-bitset", "serde", "serde_derive", + "wasmtime-internal-core", ] [[package]] name = "cranelift-frontend" -version = "0.128.4" +version = "0.129.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43ac6c095aa5b3e845d7ca3461e67e2b65249eb5401477a5ff9100369b745111" +checksum = "4c31db0085c3dfa131e739c3b26f9f9c84d69a9459627aac1ac4ef8355e3411b" dependencies = [ "cranelift-codegen", "log", @@ -981,15 +984,15 @@ dependencies = [ [[package]] name = "cranelift-isle" -version = "0.128.4" +version = "0.129.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69d3d992870ed4f0f2e82e2175275cb3a123a46e9660c6558c46417b822c91fa" +checksum = "524d804c1ebd8c542e6f64e71aa36934cec17c5da4a9ae3799796220317f5d23" [[package]] name = "cranelift-native" -version = "0.128.4" +version = "0.129.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee32e36beaf80f309edb535274cfe0349e1c5cf5799ba2d9f42e828285c6b52e" +checksum = "dc9598f02540e382e1772416eba18e93c5275b746adbbf06ac1f3cf149415270" dependencies = [ "cranelift-codegen", "libc", @@ -998,9 +1001,9 @@ dependencies = [ [[package]] name = "cranelift-srcgen" -version = "0.128.4" +version = "0.129.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "903adeaf4938e60209a97b53a2e4326cd2d356aab9764a1934630204bae381c9" +checksum = "d953932541249c91e3fa70a75ff1e52adc62979a2a8132145d4b9b3e6d1a9b6a" [[package]] name = "crc" @@ -1517,12 +1520,6 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "fallible-iterator" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" - [[package]] name = "fastrand" version = "2.3.0" @@ -1815,11 +1812,12 @@ dependencies = [ [[package]] name = "gimli" -version = "0.32.3" +version = "0.33.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7" +checksum = "0bf7f043f89559805f8c7cacc432749b2fa0d0a0a9ee46ce47164ed5ba7f126c" dependencies = [ - "fallible-iterator", + "fnv", + "hashbrown 0.16.1", "indexmap 2.13.0", "stable_deref_trait", ] @@ -2717,12 +2715,13 @@ checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" [[package]] name = "libredox" -version = "0.1.12" +version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d0b95e02c851351f877147b7deea7b1afb1df71b63aa5f8270716e0c5720616" +checksum = "1744e39d1d6a9948f4f388969627434e31128196de472883b39f148769bfe30a" dependencies = [ "bitflags", "libc", + "plain", "redox_syscall 0.7.3", ] @@ -3973,6 +3972,12 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +[[package]] +name = "plain" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" + [[package]] name = "plotters" version = "0.3.7" @@ -4188,21 +4193,21 @@ checksum = "3eb8486b569e12e2c32ad3e204dbaba5e4b5b216e9367044f25f1dba42341773" [[package]] name = "pulley-interpreter" -version = "41.0.4" +version = "42.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9812652c1feb63cf39f8780cecac154a32b22b3665806c733cd4072547233a4" +checksum = "bc2d61e068654529dc196437f8df0981db93687fdc67dec6a5de92363120b9da" dependencies = [ "cranelift-bitset", "log", "pulley-macros", - "wasmtime-internal-math", + "wasmtime-internal-core", ] [[package]] name = "pulley-macros" -version = "41.0.4" +version = "42.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56000349b6896e3d44286eb9c330891237f40b27fd43c1ccc84547d0b463cb40" +checksum = "c3f210c61b6ecfaebbba806b6d9113a222519d4e5cc4ab2d5ecca047bb7927ae" dependencies = [ "proc-macro2", "quote", @@ -6051,16 +6056,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "wasm-encoder" -version = "0.243.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c55db9c896d70bd9fa535ce83cd4e1f2ec3726b0edd2142079f594fc3be1cb35" -dependencies = [ - "leb128fmt", - "wasmparser 0.243.0", -] - [[package]] name = "wasm-encoder" version = "0.244.0" @@ -6093,19 +6088,6 @@ dependencies = [ "wasmparser 0.244.0", ] -[[package]] -name = "wasmparser" -version = "0.243.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6d8db401b0528ec316dfbe579e6ab4152d61739cfe076706d2009127970159d" -dependencies = [ - "bitflags", - "hashbrown 0.15.5", - "indexmap 2.13.0", - "semver", - "serde", -] - [[package]] name = "wasmparser" version = "0.244.0" @@ -6116,6 +6098,7 @@ dependencies = [ "hashbrown 0.15.5", "indexmap 2.13.0", "semver", + "serde", ] [[package]] @@ -6131,30 +6114,27 @@ dependencies = [ [[package]] name = "wasmprinter" -version = "0.243.0" +version = "0.244.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb2b6035559e146114c29a909a3232928ee488d6507a1504d8934e8607b36d7b" +checksum = "09390d7b2bd7b938e563e4bff10aa345ef2e27a3bc99135697514ef54495e68f" dependencies = [ "anyhow", "termcolor", - "wasmparser 0.243.0", + "wasmparser 0.244.0", ] [[package]] name = "wasmtime" -version = "41.0.4" +version = "42.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2a83182bf04af87571b4c642300479501684f26bab5597f68f68cded5b098fd" +checksum = "39bef52be4fb4c5b47d36f847172e896bc94b35c9c6a6f07117686bd16ed89a7" dependencies = [ "addr2line", - "anyhow", "async-trait", "bitflags", "bumpalo", "cc", "cfg-if", - "hashbrown 0.15.5", - "indexmap 2.13.0", "libc", "log", "mach2", @@ -6168,14 +6148,13 @@ dependencies = [ "serde_derive", "smallvec", "target-lexicon", - "wasmparser 0.243.0", + "wasmparser 0.244.0", "wasmtime-environ", + "wasmtime-internal-core", "wasmtime-internal-cranelift", "wasmtime-internal-fiber", "wasmtime-internal-jit-debug", "wasmtime-internal-jit-icache-coherence", - "wasmtime-internal-math", - "wasmtime-internal-slab", "wasmtime-internal-unwinder", "wasmtime-internal-versioned-export-macros", "wat", @@ -6184,14 +6163,15 @@ dependencies = [ [[package]] name = "wasmtime-environ" -version = "41.0.4" +version = "42.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb201c41aa23a3642365cfb2e4a183573d85127a3c9d528f56b9997c984541ab" +checksum = "bb637d5aa960ac391ca5a4cbf3e45807632e56beceeeb530e14dfa67fdfccc62" dependencies = [ "anyhow", "cranelift-bitset", "cranelift-entity", "gimli", + "hashbrown 0.15.5", "indexmap 2.13.0", "log", "object", @@ -6200,16 +6180,26 @@ dependencies = [ "serde_derive", "smallvec", "target-lexicon", - "wasm-encoder 0.243.0", - "wasmparser 0.243.0", + "wasm-encoder 0.244.0", + "wasmparser 0.244.0", "wasmprinter", + "wasmtime-internal-core", +] + +[[package]] +name = "wasmtime-internal-core" +version = "42.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03a4a3f055a804a2f3d86e816a9df78a8fa57762212a8506164959224a40cd48" +dependencies = [ + "libm", ] [[package]] name = "wasmtime-internal-cranelift" -version = "41.0.4" +version = "42.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "633e889cdae76829738db0114ab3b02fce51ea4a1cd9675a67a65fce92e8b418" +checksum = "55154a91d22ad51f9551124ce7fb49ddddc6a82c4910813db4c790c97c9ccf32" dependencies = [ "cfg-if", "cranelift-codegen", @@ -6225,18 +6215,18 @@ dependencies = [ "smallvec", "target-lexicon", "thiserror 2.0.18", - "wasmparser 0.243.0", + "wasmparser 0.244.0", "wasmtime-environ", - "wasmtime-internal-math", + "wasmtime-internal-core", "wasmtime-internal-unwinder", "wasmtime-internal-versioned-export-macros", ] [[package]] name = "wasmtime-internal-fiber" -version = "41.0.4" +version = "42.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "deb126adc5d0c72695cfb77260b357f1b81705a0f8fa30b3944e7c2219c17341" +checksum = "05decfad1021ad2efcca5c1be9855acb54b6ee7158ac4467119b30b7481508e3" dependencies = [ "cc", "cfg-if", @@ -6249,9 +6239,9 @@ dependencies = [ [[package]] name = "wasmtime-internal-jit-debug" -version = "41.0.4" +version = "42.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e66ff7f90a8002187691ff6237ffd09f954a0ebb9de8b2ff7f5c62632134120" +checksum = "924980c50427885fd4feed2049b88380178e567768aaabf29045b02eb262eaa7" dependencies = [ "cc", "wasmtime-internal-versioned-export-macros", @@ -6259,36 +6249,21 @@ dependencies = [ [[package]] name = "wasmtime-internal-jit-icache-coherence" -version = "41.0.4" +version = "42.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b96df23179ae16d54fb3a420f84ffe4383ec9dd06fad3e5bc782f85f66e8e08" +checksum = "c57d24e8d1334a0e5a8b600286ffefa1fc4c3e8176b110dff6fbc1f43c4a599b" dependencies = [ - "anyhow", "cfg-if", "libc", + "wasmtime-internal-core", "windows-sys 0.61.2", ] -[[package]] -name = "wasmtime-internal-math" -version = "41.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86d1380926682b44c383e9a67f47e7a95e60c6d3fa8c072294dab2c7de6168a0" -dependencies = [ - "libm", -] - -[[package]] -name = "wasmtime-internal-slab" -version = "41.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b63cbea1c0192c7feb7c0dfb35f47166988a3742f29f46b585ef57246c65764" - [[package]] name = "wasmtime-internal-unwinder" -version = "41.0.4" +version = "42.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f25c392c7e5fb891a7416e3c34cfbd148849271e8c58744fda875dde4bec4d6a" +checksum = "3a1a144bd4393593a868ba9df09f34a6a360cb5db6e71815f20d3f649c6e6735" dependencies = [ "cfg-if", "cranelift-codegen", @@ -6299,9 +6274,9 @@ dependencies = [ [[package]] name = "wasmtime-internal-versioned-export-macros" -version = "41.0.4" +version = "42.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70f8b9796a3f0451a7b702508b303d654de640271ac80287176de222f187a237" +checksum = "9a6948b56bb00c62dbd205ea18a4f1ceccbe1e4b8479651fdb0bab2553790f20" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index cd9e39d68..1c097f8d1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,7 +41,7 @@ pest = "2" pest_derive = "2" tempfile = "3" assert_cmd = "2" -wasmtime = { version = "41", default-features = false, features = [ +wasmtime = { version = "42", default-features = false, features = [ "cranelift", "runtime", "wat", @@ -99,6 +99,7 @@ bitvec = { version = "1", features = ["serde"] } ndarray = "0.17" # RNG +fastrand = "2" rand = "0.10" rand_core = "0.10" rand_xoshiro = "0.8" @@ -146,6 +147,7 @@ pecos-num = { version = "0.1.1", path = "crates/pecos-num" } pecos-quantum = { version = "0.1.1", path = "crates/pecos-quantum" } pecos-gpu-sims = { version = "0.1.1", path = "crates/pecos-gpu-sims" } pecos-cuquantum = { version = "0.1.1", path = "crates/pecos-cuquantum" } +pecos-cuquantum-sys = { version = "0.1.1", path = "crates/pecos-cuquantum-sys" } # Decoder crates pecos-decoder-core = { version = "0.1.1", path = "crates/pecos-decoder-core" } diff --git a/crates/benchmarks/Cargo.toml b/crates/benchmarks/Cargo.toml index 567e575ba..266705d9c 100644 --- a/crates/benchmarks/Cargo.toml +++ b/crates/benchmarks/Cargo.toml @@ -25,10 +25,10 @@ all-sims = ["gpu-sims", "cuquantum", "quest", "qulacs", "cppsparsesim"] [dependencies] # Optional simulator dependencies for benchmarking -pecos-gpu-sims = { path = "../pecos-gpu-sims", optional = true } -pecos-cuquantum = { path = "../pecos-cuquantum", optional = true } -pecos-quest = { path = "../pecos-quest", optional = true } -pecos-qulacs = { path = "../pecos-qulacs", optional = true } +pecos-gpu-sims = { workspace = true, optional = true } +pecos-cuquantum = { workspace = true, optional = true } +pecos-quest = { workspace = true, optional = true } +pecos-qulacs = { workspace = true, optional = true } pecos-cppsparsesim = { workspace = true, optional = true } pecos-core.workspace = true pecos-qsim.workspace = true diff --git a/crates/pecos-cuquantum-sys/Cargo.toml b/crates/pecos-cuquantum-sys/Cargo.toml index b742a6758..3c80927c2 100644 --- a/crates/pecos-cuquantum-sys/Cargo.toml +++ b/crates/pecos-cuquantum-sys/Cargo.toml @@ -15,7 +15,7 @@ readme = "README.md" [build-dependencies] bindgen.workspace = true -pecos-build = { path = "../pecos-build" } +pecos-build.workspace = true log.workspace = true env_logger.workspace = true diff --git a/crates/pecos-cuquantum/Cargo.toml b/crates/pecos-cuquantum/Cargo.toml index e1a328a13..237814c28 100644 --- a/crates/pecos-cuquantum/Cargo.toml +++ b/crates/pecos-cuquantum/Cargo.toml @@ -12,15 +12,15 @@ categories = ["science", "simulation"] readme = "README.md" [dependencies] -pecos-cuquantum-sys = { path = "../pecos-cuquantum-sys" } -pecos-build = { path = "../pecos-build" } -pecos-core = { path = "../pecos-core" } -pecos-qsim = { path = "../pecos-qsim" } -thiserror = "2.0" -fastrand = "2.3" +pecos-cuquantum-sys.workspace = true +pecos-build.workspace = true +pecos-core.workspace = true +pecos-qsim.workspace = true +thiserror.workspace = true +fastrand.workspace = true [build-dependencies] -pecos-build = { path = "../pecos-build" } +pecos-build.workspace = true log.workspace = true env_logger.workspace = true diff --git a/crates/pecos-qec/Cargo.toml b/crates/pecos-qec/Cargo.toml index 07b1d75c4..32bd01c4f 100644 --- a/crates/pecos-qec/Cargo.toml +++ b/crates/pecos-qec/Cargo.toml @@ -13,11 +13,11 @@ readme = "README.md" [dependencies] ndarray.workspace = true -pecos-core = { path = "../pecos-core" } -pecos-decoder-core = { path = "../pecos-decoder-core" } -pecos-quantum = { path = "../pecos-quantum" } -pecos-qsim = { path = "../pecos-qsim" } -pecos-rng = { path = "../pecos-rng" } +pecos-core.workspace = true +pecos-decoder-core.workspace = true +pecos-quantum.workspace = true +pecos-qsim.workspace = true +pecos-rng.workspace = true rand.workspace = true rand_core.workspace = true rayon.workspace = true diff --git a/python/pecos-rslib/Cargo.toml b/python/pecos-rslib/Cargo.toml index 37a228104..745fc2cea 100644 --- a/python/pecos-rslib/Cargo.toml +++ b/python/pecos-rslib/Cargo.toml @@ -35,7 +35,7 @@ pecos-qasm = { workspace = true, features = ["wasm"] } pyo3 = { workspace=true, features = ["extension-module", "abi3-py310", "generate-import-lib", "num-complex"] } rand.workspace = true -pecos-rng = { path = "../../crates/pecos-rng" } +pecos-rng.workspace = true ndarray.workspace = true num-complex.workspace = true parking_lot.workspace = true diff --git a/uv.lock b/uv.lock index b731b7ac5..fb525a643 100644 --- a/uv.lock +++ b/uv.lock @@ -2044,26 +2044,26 @@ wheels = [ [[package]] name = "maturin" -version = "1.12.4" +version = "1.12.5" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "tomli", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/2f/a6/54e73f0ec0224488ae25196ce8b4df298cae613b099ad0c4f39dd7e3a8d2/maturin-1.12.4.tar.gz", hash = "sha256:06f6438be7e723aaf4b412fb34839854b540a1350f7614fadf5bd1db2b98d5f7", size = 262134, upload-time = "2026-02-21T10:24:25.64Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e1/cd/8285f37bf968b8485e3c7eb43349a5adbccfddfc487cd4327fb9104578cc/maturin-1.12.4-py3-none-linux_armv6l.whl", hash = "sha256:cf8a0eddef9ab8773bc823c77aed3de9a5c85fb760c86448048a79ef89794c81", size = 9758449, upload-time = "2026-02-21T10:24:35.382Z" }, - { url = "https://files.pythonhosted.org/packages/d9/91/f51191db83735f77bc988c8034730bb63b750a4a1a04f9c8cba10f44ad45/maturin-1.12.4-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:eba1bd1c1513d00fec75228da98622c68a9f50f9693aaa6fb7dacb244e7bbf26", size = 18938848, upload-time = "2026-02-21T10:24:10.701Z" }, - { url = "https://files.pythonhosted.org/packages/65/47/03c422adeac93b903354b322bba632754fdb134b27ace71b5603feba5906/maturin-1.12.4-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:89749cfc0e6baf5517fa370729a98955552e42fefc406b95732d5c8e85bc90c0", size = 9791641, upload-time = "2026-02-21T10:24:21.72Z" }, - { url = "https://files.pythonhosted.org/packages/5e/30/dd78acf6afc48d358512b5ed928fd24e2bc6b68db69b1f6bba3ffd7bcaed/maturin-1.12.4-py3-none-manylinux_2_12_i686.manylinux2010_i686.musllinux_1_1_i686.whl", hash = "sha256:4d68664e5b81f282144a3b717a7e8593ec94ac87d7ae563a4c464e93d6cde877", size = 9811625, upload-time = "2026-02-21T10:24:08.152Z" }, - { url = "https://files.pythonhosted.org/packages/e3/9a/a6e358a18815ab090ef55187da0066df01a955c7c44a61fb83b127055f23/maturin-1.12.4-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.musllinux_1_1_x86_64.whl", hash = "sha256:88e09e6c386b08974fab0c7e4c07d7c7c50a0ba63095d31e930d80568488e1be", size = 10255812, upload-time = "2026-02-21T10:24:15.117Z" }, - { url = "https://files.pythonhosted.org/packages/4a/c5/84dfcce1f3475237cba6e6201a1939980025afbb41c076aa5147b10ac202/maturin-1.12.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:5cc56481b0f360571587c35a1d960ce6d0a0258d49aebb6af98fff9db837c337", size = 9645462, upload-time = "2026-02-21T10:24:28.814Z" }, - { url = "https://files.pythonhosted.org/packages/de/82/0845fff86ea044028302db17bc611e9bfe1b7b2c992756162cbe71267df5/maturin-1.12.4-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.musllinux_1_1_armv7l.whl", hash = "sha256:8fd7eb0c9bb017e98d81aa86a1d440b912fe4f7f219571035dd6ab330c82071c", size = 9593649, upload-time = "2026-02-21T10:24:33.376Z" }, - { url = "https://files.pythonhosted.org/packages/2b/14/6e8969cd48c7c8ea27d7638e572d46eeba9aa0cb370d3031eb6a3f10ff8d/maturin-1.12.4-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.musllinux_1_1_ppc64le.whl", hash = "sha256:5bb07c349dd066277a61e017a6d6e0860cd54b7b33f8ead10b9e5a4ffb740a0a", size = 12681515, upload-time = "2026-02-21T10:24:31.097Z" }, - { url = "https://files.pythonhosted.org/packages/ac/8d/2ad86623dca3cfa394049f4220188dececa6e4cefd73ac1f1385fc79c876/maturin-1.12.4-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c21baaed066b5bec893db2d261bfe3b9da054d99c018326f0bdcf1dc4c3a1eb9", size = 10448453, upload-time = "2026-02-21T10:24:26.827Z" }, - { url = "https://files.pythonhosted.org/packages/9c/eb/c66e2d3272e74dd590ae81bb51590bd98c3cd4e3f6629d4e4218bd6a5c28/maturin-1.12.4-py3-none-manylinux_2_31_riscv64.musllinux_1_1_riscv64.whl", hash = "sha256:939c4c57efa8ea982a991ee3ccb3992364622e9cbd1ede922b5cfb0f652bf517", size = 9970879, upload-time = "2026-02-21T10:24:12.881Z" }, - { url = "https://files.pythonhosted.org/packages/38/a0/998f8063d67fa19639179af7e8ea46016ceaa12f85b9720a2e4846449f43/maturin-1.12.4-py3-none-win32.whl", hash = "sha256:d72f626616292cb3e283941f47835ffc608207ebd8f95f4c50523a6631ffcb2e", size = 8518146, upload-time = "2026-02-21T10:24:17.296Z" }, - { url = "https://files.pythonhosted.org/packages/69/14/6ceea315db6e47093442ec70c2d01bb011d69f5243de5fc0e6a5fab97513/maturin-1.12.4-py3-none-win_amd64.whl", hash = "sha256:ab32c5ff7579a549421cae03e6297d3b03d7b81fa2934e3bdf24a102d99eb378", size = 9863686, upload-time = "2026-02-21T10:24:19.35Z" }, - { url = "https://files.pythonhosted.org/packages/d4/28/73e14739c6f7605ff9b9d108726d3ff529d4f91a7838739b4dd0afd33ec1/maturin-1.12.4-py3-none-win_arm64.whl", hash = "sha256:b8c05d24209af50ed9ae9e5de473c84866b9676c637fcfad123ee57f4a9ed098", size = 8557843, upload-time = "2026-02-21T10:24:23.894Z" }, +sdist = { url = "https://files.pythonhosted.org/packages/a4/15/4c41c4c951718f8c17ed1621b7999afb8d72d69c731c987b47e3c138d4ad/maturin-1.12.5.tar.gz", hash = "sha256:236943b7aff3e85ccd2b7a35ca10b64595f2c169bbb605e349e54534ff561a15", size = 267392, upload-time = "2026-02-28T12:18:14.632Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b5/9e/4b910f5e0a46d7ab0050b58eecd2eaf4e0df8665982a0c5925371d4b0593/maturin-1.12.5-py3-none-linux_armv6l.whl", hash = "sha256:e5945534107439cf4f3734f195bc54f56515f5d465e96041f4866a2f15605ee8", size = 9793120, upload-time = "2026-02-28T12:18:19.707Z" }, + { url = "https://files.pythonhosted.org/packages/c5/e0/d670dfd96dc331d4a7cedeee49705f0dc5b5d0d6e78d46b92ada2825711c/maturin-1.12.5-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:74bc126c4d4606cd526aedc993de320b1529c2659b4d3f4029a824c96ef39b92", size = 19018577, upload-time = "2026-02-28T12:18:39.779Z" }, + { url = "https://files.pythonhosted.org/packages/e2/86/91d829eb28f2d21f001df29b98f73b017e3adfaa49a3b4b2666ffcf7c12f/maturin-1.12.5-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:98e49546bff6319c3c59b22f9de43161fa09d3a756dc2f04829ea22ed00e2ed6", size = 9838517, upload-time = "2026-02-28T12:18:22.916Z" }, + { url = "https://files.pythonhosted.org/packages/bb/37/9a17341679710b79c530557703eb1fd2732aa41a5c46810633cf3305c225/maturin-1.12.5-py3-none-manylinux_2_12_i686.manylinux2010_i686.musllinux_1_1_i686.whl", hash = "sha256:e1a3ceb55349a16fef6e1662c170af2f2636a690a6fdae8edb5e71edcf5a3a5a", size = 9827192, upload-time = "2026-02-28T12:18:34.031Z" }, + { url = "https://files.pythonhosted.org/packages/b7/95/cec17826cdbfcbcc8482540de1559053a26e2c3cf4df5ee8515e04bd2cf2/maturin-1.12.5-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.musllinux_1_1_x86_64.whl", hash = "sha256:ed59f6a24a9b107a2812b8c0ef48a0f0abddcfa18e120dd028bfc8fa2883ff2c", size = 10242722, upload-time = "2026-02-28T12:18:12.52Z" }, + { url = "https://files.pythonhosted.org/packages/f4/8a/63f992c82bfbeb79293c1db294e8e4045cd8b3560ce1139aebde47c53be6/maturin-1.12.5-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:841b243e4212d343aac1e6b02a523d14bd8ae1291594fa1d875b08448863742a", size = 9686989, upload-time = "2026-02-28T12:18:16.939Z" }, + { url = "https://files.pythonhosted.org/packages/db/05/b03a7cbfa019a3cce3fc9acd47426494cd906bd19068d995e6c49e7a75cd/maturin-1.12.5-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.musllinux_1_1_armv7l.whl", hash = "sha256:7e647fad0236b80fd28b82b6f8bfe7771c7dbd8f41b96cff1f8b3d06e2ebb188", size = 9632401, upload-time = "2026-02-28T12:18:42.698Z" }, + { url = "https://files.pythonhosted.org/packages/59/ab/2d799e638df24b13ca74cc1c7b0d657653c0660d4241fd386c516ebdbc97/maturin-1.12.5-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.musllinux_1_1_ppc64le.whl", hash = "sha256:8e3ff2729a0ea5853ff000041701d3f0284d73ef0c9c29dd3568116bd5936e38", size = 12724579, upload-time = "2026-02-28T12:18:45.626Z" }, + { url = "https://files.pythonhosted.org/packages/93/1c/0eb3a9382ec11eec76451e2560812a600245e33ca3245c4ef45e32a664d5/maturin-1.12.5-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:90a499ec738f3cfbb7ad0256c357b4d5b8e74ec98568c8ffbe4911830fc8e233", size = 10446845, upload-time = "2026-02-28T12:18:28.837Z" }, + { url = "https://files.pythonhosted.org/packages/75/df/1779773ca5561abb22d968289285a6ca2f40d87472f28a7318eff26b9f24/maturin-1.12.5-py3-none-manylinux_2_31_riscv64.musllinux_1_1_riscv64.whl", hash = "sha256:710240583c0431c63975bd975b3c54987fe3f014081cac2b18d68e5176fb54c2", size = 10005785, upload-time = "2026-02-28T12:18:26.197Z" }, + { url = "https://files.pythonhosted.org/packages/c8/1b/b02bec7f44b48f2ef7c6729c931fe1329bd95074b7b85abb547146912a9b/maturin-1.12.5-py3-none-win32.whl", hash = "sha256:91c163cd96978eba35137284714065052357f6d73096956b39bce38e0e62f81a", size = 8555052, upload-time = "2026-02-28T12:18:09.819Z" }, + { url = "https://files.pythonhosted.org/packages/65/b2/ff02747eedc86972822e2c06ffb2e0cff2f01a71d626122ba289529b256d/maturin-1.12.5-py3-none-win_amd64.whl", hash = "sha256:452382b0ccd9416df5a4eabe77efad7d7ac204eabff61d043f243b888f59bc46", size = 9894151, upload-time = "2026-02-28T12:18:36.752Z" }, + { url = "https://files.pythonhosted.org/packages/e4/a4/ba4160439870cd434525943c19eedbc7a761d3dba91ee8e0b5caf56320e3/maturin-1.12.5-py3-none-win_arm64.whl", hash = "sha256:91dcb25b0d6e2c76d18af4491ca6d7df34b91f9a026415b447908b8c5c2a7fc0", size = 8590220, upload-time = "2026-02-28T12:18:31.462Z" }, ] [[package]] From 2510158d61145d596ee007342417df46836496e2 Mon Sep 17 00:00:00 2001 From: Ciaran Ryan-Anderson Date: Sat, 28 Feb 2026 15:30:50 -0700 Subject: [PATCH 3/5] benchmark against native code --- Cargo.lock | 1 + crates/benchmarks/Cargo.toml | 1 + crates/benchmarks/benches/benchmarks.rs | 6 +- .../modules/native_statevec_comparison.rs | 902 ++++++++++++++++++ crates/pecos-cuquantum/src/statevec.rs | 10 +- crates/pecos-gpu-sims/src/gpu.rs | 37 + crates/pecos-qsim/src/state_vec_soa.rs | 201 ++-- crates/pecos-quest/build_quest.rs | 269 ++---- crates/pecos-qulacs/build.rs | 11 + crates/pecos-qulacs/src/bridge.rs | 6 + crates/pecos-qulacs/src/lib.rs | 2 +- crates/pecos-qulacs/src/qulacs_wrapper.cpp | 18 + crates/pecos-qulacs/src/qulacs_wrapper.h | 6 + crates/pecos/src/bin/cli.rs | 10 +- crates/pecos/src/bin/cli/rust_cmd.rs | 43 +- scripts/native_bench/.gitignore | 1 + scripts/native_bench/bench_quest.c | 188 ++++ scripts/native_bench/bench_qulacs.cpp | 299 ++++++ scripts/native_bench/run.sh | 208 ++++ 19 files changed, 1961 insertions(+), 258 deletions(-) create mode 100644 crates/benchmarks/benches/modules/native_statevec_comparison.rs create mode 100644 scripts/native_bench/.gitignore create mode 100644 scripts/native_bench/bench_quest.c create mode 100644 scripts/native_bench/bench_qulacs.cpp create mode 100755 scripts/native_bench/run.sh diff --git a/Cargo.lock b/Cargo.lock index ff51170f8..f1146dc9e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -320,6 +320,7 @@ name = "benchmarks" version = "0.1.1" dependencies = [ "criterion", + "cxx", "num-complex 0.4.6", "pecos", "pecos-core", diff --git a/crates/benchmarks/Cargo.toml b/crates/benchmarks/Cargo.toml index 266705d9c..b83c325e4 100644 --- a/crates/benchmarks/Cargo.toml +++ b/crates/benchmarks/Cargo.toml @@ -35,6 +35,7 @@ pecos-qsim.workspace = true [dev-dependencies] criterion.workspace = true +cxx.workspace = true num-complex.workspace = true pecos = { workspace = true, features = ["runtime"] } pecos-engines.workspace = true diff --git a/crates/benchmarks/benches/benchmarks.rs b/crates/benchmarks/benches/benchmarks.rs index 97bdfed4c..3dbf686cd 100644 --- a/crates/benchmarks/benches/benchmarks.rs +++ b/crates/benchmarks/benches/benchmarks.rs @@ -23,6 +23,7 @@ mod modules { #[cfg(feature = "gpu-sims")] pub mod gpu_influence_sampler; pub mod measurement_sampling; + pub mod native_statevec_comparison; pub mod noise_models; #[cfg(feature = "cppsparsesim")] pub mod sparse_stab_vs_cpp; @@ -44,8 +45,8 @@ use modules::gpu_influence_sampler; use modules::sparse_stab_vs_cpp; use modules::{ allocation_overhead, cpu_stabilizer_comparison, dem_sampler, dod_statevec, - measurement_sampling, noise_models, rng, set_ops, sparse_state_vec, stabilizer_sims, - state_vec_sims, surface_code, trig, + measurement_sampling, native_statevec_comparison, noise_models, rng, set_ops, + sparse_state_vec, stabilizer_sims, state_vec_sims, surface_code, trig, }; fn all_benchmarks(c: &mut Criterion) { @@ -58,6 +59,7 @@ fn all_benchmarks(c: &mut Criterion) { #[cfg(feature = "gpu-sims")] gpu_influence_sampler::benchmarks(c); measurement_sampling::benchmarks(c); + native_statevec_comparison::benchmarks(c); noise_models::benchmarks(c); rng::benchmarks(c); set_ops::benchmarks(c); diff --git a/crates/benchmarks/benches/modules/native_statevec_comparison.rs b/crates/benchmarks/benches/modules/native_statevec_comparison.rs new file mode 100644 index 000000000..b9182369c --- /dev/null +++ b/crates/benchmarks/benches/modules/native_statevec_comparison.rs @@ -0,0 +1,902 @@ +// Copyright 2025 The PECOS Developers +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +// in compliance with the License.You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under +// the License. + +//! Native state vector comparison benchmarks. +//! +//! Calls QuEST and Qulacs FFI directly (bypassing the PECOS wrapper layer's qubit index +//! remapping, bounds checks, and `QubitId`/`Angle64` conversions) to give an apples-to-apples +//! comparison of raw gate computation performance against the pure-Rust PECOS simulators. +//! +//! GPU simulators (`GpuStateVec` via wgpu, `CuStateVec` via cuQuantum) are included when their +//! respective features (`gpu-sims`, `cuquantum`) are enabled. + +use criterion::{BenchmarkId, Criterion, measurement::Measurement}; +use pecos_core::{Angle64, QubitId}; +use pecos_qsim::{ + ArbitraryRotationGateable, CliffordGateable, QuantumSimulator, StateVecAoS, StateVecSoA, + StateVecSoA32, +}; +use std::hint::black_box; + +#[cfg(feature = "quest")] +use pecos_quest::bridge::ffi as quest_ffi; + +#[cfg(feature = "qulacs")] +use pecos_qulacs::bridge::ffi as qulacs_ffi; + +#[cfg(feature = "gpu-sims")] +use pecos_gpu_sims::{GpuStateVec, gates as gpu_gates}; + +#[cfg(feature = "cuquantum")] +use pecos_cuquantum::CuStateVec; + +// --------------------------------------------------------------------------- +// Helpers for PECOS simulators (trait-based calls) +// --------------------------------------------------------------------------- + +fn pecos_circuit( + sim: &mut S, + num_qubits: usize, + num_layers: usize, +) { + for _layer in 0..num_layers { + for q in 0..num_qubits { + sim.h(&[QubitId(q)]); + sim.rz(Angle64::from_radians(0.1), &[QubitId(q)]); + } + for q in 0..num_qubits - 1 { + sim.cx(&[QubitId(q), QubitId(q + 1)]); + } + } +} + +// --------------------------------------------------------------------------- +// QuEST direct FFI helpers +// --------------------------------------------------------------------------- + +#[cfg(feature = "quest")] +struct QuestState { + env_ptr: *mut u8, + qureg_ptr: *mut u8, +} + +#[cfg(feature = "quest")] +impl QuestState { + fn new(num_qubits: usize) -> Self { + let env_ptr = quest_ffi::quest_create_env(); + assert!(!env_ptr.is_null(), "Failed to create QuEST environment"); + let qureg_ptr = unsafe { + quest_ffi::quest_create_qureg(env_ptr, num_qubits as i32) + }; + assert!(!qureg_ptr.is_null(), "Failed to create QuEST qureg"); + unsafe { quest_ffi::quest_init_zero_state(qureg_ptr) }; + Self { env_ptr, qureg_ptr } + } +} + +#[cfg(feature = "quest")] +impl Drop for QuestState { + fn drop(&mut self) { + unsafe { + quest_ffi::quest_destroy_qureg(self.qureg_ptr); + quest_ffi::quest_destroy_env(self.env_ptr); + } + } +} + +#[cfg(feature = "quest")] +fn quest_circuit(qs: &QuestState, num_qubits: usize, num_layers: usize) { + let qureg = qs.qureg_ptr; + unsafe { + for _layer in 0..num_layers { + for q in 0..num_qubits { + quest_ffi::quest_apply_hadamard(qureg, q as i32); + quest_ffi::quest_apply_rotation_z(qureg, q as i32, 0.1); + } + for q in 0..num_qubits - 1 { + quest_ffi::quest_apply_cnot(qureg, q as i32, (q + 1) as i32); + } + } + } +} + +// --------------------------------------------------------------------------- +// Qulacs direct FFI helpers +// --------------------------------------------------------------------------- + +#[cfg(feature = "qulacs")] +fn qulacs_circuit( + state: &mut cxx::UniquePtr, + num_qubits: usize, + num_layers: usize, +) { + for _layer in 0..num_layers { + for q in 0..num_qubits { + qulacs_ffi::csim_h(state.pin_mut(), q); + qulacs_ffi::csim_rz(state.pin_mut(), q, 0.1); + } + for q in 0..num_qubits - 1 { + qulacs_ffi::csim_cnot(state.pin_mut(), q, q + 1); + } + } +} + +// --------------------------------------------------------------------------- +// GpuStateVec direct helpers (bypasses trait layer, calls wgpu dispatch directly) +// --------------------------------------------------------------------------- + +#[cfg(feature = "gpu-sims")] +fn gpu_circuit(sim: &mut GpuStateVec, num_qubits: usize, num_layers: usize) { + let rz_matrix = gpu_gates::rz(0.1); + for _layer in 0..num_layers { + for q in 0..num_qubits { + sim.apply_single_gate(q as u32, gpu_gates::H); + sim.apply_single_gate(q as u32, rz_matrix); + } + for q in 0..num_qubits - 1 { + sim.apply_cx(q as u32, (q + 1) as u32); + } + } +} + +// --------------------------------------------------------------------------- +// CuStateVec direct helpers (bypasses trait layer, calls custatevecApplyMatrix directly) +// --------------------------------------------------------------------------- + +#[cfg(feature = "cuquantum")] +mod cuquantum_matrices { + use std::f64::consts::FRAC_1_SQRT_2; + + pub const H: [[f64; 2]; 4] = [ + [FRAC_1_SQRT_2, 0.0], + [FRAC_1_SQRT_2, 0.0], + [FRAC_1_SQRT_2, 0.0], + [-FRAC_1_SQRT_2, 0.0], + ]; + + pub const X: [[f64; 2]; 4] = [[0.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 0.0]]; + + pub const CX: [[f64; 2]; 16] = [ + [1.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], + [0.0, 0.0], [1.0, 0.0], [0.0, 0.0], [0.0, 0.0], + [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [1.0, 0.0], + [0.0, 0.0], [0.0, 0.0], [1.0, 0.0], [0.0, 0.0], + ]; + + pub fn rz(theta: f64) -> [[f64; 2]; 4] { + let c = (theta / 2.0).cos(); + let s = (theta / 2.0).sin(); + [[c, -s], [0.0, 0.0], [0.0, 0.0], [c, s]] + } +} + +#[cfg(feature = "cuquantum")] +fn cuquantum_circuit(sim: &mut CuStateVec, num_qubits: usize, num_layers: usize) { + let rz_matrix = cuquantum_matrices::rz(0.1); + for _layer in 0..num_layers { + for q in 0..num_qubits { + sim.apply_matrix_1q(q, &cuquantum_matrices::H); + sim.apply_matrix_1q(q, &rz_matrix); + } + for q in 0..num_qubits - 1 { + sim.apply_matrix_2q(q, q + 1, &cuquantum_matrices::CX); + } + } +} + +// --------------------------------------------------------------------------- +// Benchmark group 1: Layered circuit scaling +// --------------------------------------------------------------------------- + +fn bench_native_statevec_comparison(c: &mut Criterion) { + let mut group = c.benchmark_group("Native StateVec Comparison"); + group.sample_size(20); + + let configs = [ + (10, 20), + (14, 20), + (18, 20), + (20, 20), + (22, 10), + (24, 5), + ]; + + for (num_qubits, num_layers) in configs { + let label = format!("{num_qubits}q_{num_layers}l"); + + // -- StateVecSoA (default: fusion on) -- + group.bench_with_input( + BenchmarkId::new("StateVecSoA", &label), + &(num_qubits, num_layers), + |b, &(nq, nl)| { + let mut sim = StateVecSoA::new(nq); + sim.set_parallel(false); + b.iter(|| { + sim.reset(); + pecos_circuit(&mut sim, nq, nl); + black_box(()); + }); + }, + ); + + // -- StateVecSoA (no fusion) -- + group.bench_with_input( + BenchmarkId::new("StateVecSoA/no_fusion", &label), + &(num_qubits, num_layers), + |b, &(nq, nl)| { + let mut sim = StateVecSoA::new(nq); + sim.set_parallel(false); + sim.set_fusion(false); + b.iter(|| { + sim.reset(); + pecos_circuit(&mut sim, nq, nl); + black_box(()); + }); + }, + ); + + // -- StateVecAoS -- + group.bench_with_input( + BenchmarkId::new("StateVecAoS", &label), + &(num_qubits, num_layers), + |b, &(nq, nl)| { + let mut sim = StateVecAoS::new(nq); + b.iter(|| { + sim.reset(); + pecos_circuit(&mut sim, nq, nl); + black_box(()); + }); + }, + ); + + // -- StateVecSoA32 (default: fusion on) -- + group.bench_with_input( + BenchmarkId::new("StateVecSoA32", &label), + &(num_qubits, num_layers), + |b, &(nq, nl)| { + let mut sim = StateVecSoA32::new(nq); + b.iter(|| { + sim.reset(); + pecos_circuit(&mut sim, nq, nl); + black_box(()); + }); + }, + ); + + // -- StateVecSoA32 (no fusion) -- + group.bench_with_input( + BenchmarkId::new("StateVecSoA32/no_fusion", &label), + &(num_qubits, num_layers), + |b, &(nq, nl)| { + let mut sim = StateVecSoA32::new(nq); + sim.set_fusion(false); + b.iter(|| { + sim.reset(); + pecos_circuit(&mut sim, nq, nl); + black_box(()); + }); + }, + ); + + // -- QuEST direct FFI -- + #[cfg(feature = "quest")] + { + let quest_name = if cfg!(feature = "quest-cuda") { + "QuEST_CUDA_direct" + } else { + "QuEST_direct" + }; + let qs = QuestState::new(num_qubits); + group.bench_with_input( + BenchmarkId::new(quest_name, &label), + &(num_qubits, num_layers), + |b, &(nq, nl)| { + b.iter(|| { + unsafe { quest_ffi::quest_init_zero_state(qs.qureg_ptr) }; + quest_circuit(&qs, nq, nl); + black_box(()); + }); + }, + ); + } + + // -- Qulacs direct FFI -- + #[cfg(feature = "qulacs")] + { + let mut state = qulacs_ffi::create_quantum_state(num_qubits); + group.bench_with_input( + BenchmarkId::new("Qulacs_direct", &label), + &(num_qubits, num_layers), + |b, &(nq, nl)| { + b.iter(|| { + qulacs_ffi::reset(state.pin_mut()); + qulacs_circuit(&mut state, nq, nl); + black_box(()); + }); + }, + ); + } + + // -- GpuStateVec direct (wgpu) -- + #[cfg(feature = "gpu-sims")] + if let Ok(mut sim) = GpuStateVec::new(num_qubits as u32) { + group.bench_with_input( + BenchmarkId::new("GpuStateVec_direct", &label), + &(num_qubits, num_layers), + |b, &(nq, nl)| { + b.iter(|| { + sim.reset(); + gpu_circuit(&mut sim, nq, nl); + black_box(()); + }); + }, + ); + } + + // -- CuStateVec direct (cuQuantum) -- + #[cfg(feature = "cuquantum")] + match CuStateVec::new(num_qubits) { + Ok(mut sim) => { + group.bench_with_input( + BenchmarkId::new("CuStateVec_direct", &label), + &(num_qubits, num_layers), + |b, &(nq, nl)| { + b.iter(|| { + sim.reset(); + cuquantum_circuit(&mut sim, nq, nl); + black_box(()); + }); + }, + ); + } + Err(e) => eprintln!("CuStateVec not available: {e}"), + } + + } + + group.finish(); +} + +// --------------------------------------------------------------------------- +// Benchmark group 2: Individual gate comparison +// --------------------------------------------------------------------------- + +fn bench_native_individual_gates(c: &mut Criterion) { + let mut group = c.benchmark_group("Native Individual Gates"); + group.sample_size(50); + + let num_qubits: usize = 18; + let iters: usize = 100; + + // ---- H gate ---- + + group.bench_function("H/StateVecSoA", |b| { + let mut sim = StateVecSoA::new(num_qubits); + sim.set_parallel(false); + sim.set_fusion(false); + b.iter(|| { + for _ in 0..iters { + for q in 0..num_qubits { + sim.h(&[QubitId(q)]); + } + } + black_box(()); + }); + }); + + group.bench_function("H/StateVecSoA_fused", |b| { + let mut sim = StateVecSoA::new(num_qubits); + sim.set_parallel(false); + b.iter(|| { + for _ in 0..iters { + for q in 0..num_qubits { + sim.h(&[QubitId(q)]); + } + } + black_box(()); + }); + }); + + group.bench_function("H/StateVecAoS", |b| { + let mut sim = StateVecAoS::new(num_qubits); + b.iter(|| { + for _ in 0..iters { + for q in 0..num_qubits { + sim.h(&[QubitId(q)]); + } + } + black_box(()); + }); + }); + + group.bench_function("H/StateVecSoA32", |b| { + let mut sim = StateVecSoA32::new(num_qubits); + sim.set_fusion(false); + b.iter(|| { + for _ in 0..iters { + for q in 0..num_qubits { + sim.h(&[QubitId(q)]); + } + } + black_box(()); + }); + }); + + group.bench_function("H/StateVecSoA32_fused", |b| { + let mut sim = StateVecSoA32::new(num_qubits); + b.iter(|| { + for _ in 0..iters { + for q in 0..num_qubits { + sim.h(&[QubitId(q)]); + } + } + black_box(()); + }); + }); + + #[cfg(feature = "quest")] + { + let quest_h_name = if cfg!(feature = "quest-cuda") { + "H/QuEST_CUDA_direct" + } else { + "H/QuEST_direct" + }; + group.bench_function(quest_h_name, |b| { + let qs = QuestState::new(num_qubits); + b.iter(|| { + for _ in 0..iters { + for q in 0..num_qubits { + unsafe { quest_ffi::quest_apply_hadamard(qs.qureg_ptr, q as i32) }; + } + } + black_box(()); + }); + }); + } + + #[cfg(feature = "qulacs")] + group.bench_function("H/Qulacs_direct", |b| { + let mut state = qulacs_ffi::create_quantum_state(num_qubits); + b.iter(|| { + for _ in 0..iters { + for q in 0..num_qubits { + qulacs_ffi::csim_h(state.pin_mut(), q); + } + } + black_box(()); + }); + }); + + #[cfg(feature = "gpu-sims")] + if let Ok(mut sim) = GpuStateVec::new(num_qubits as u32) { + group.bench_function("H/GpuStateVec_direct", |b| { + b.iter(|| { + for _ in 0..iters { + for q in 0..num_qubits { + sim.apply_single_gate(q as u32, gpu_gates::H); + } + } + black_box(()); + }); + }); + } + + #[cfg(feature = "cuquantum")] + if let Ok(mut sim) = CuStateVec::new(num_qubits) { + group.bench_function("H/CuStateVec_direct", |b| { + b.iter(|| { + for _ in 0..iters { + for q in 0..num_qubits { + sim.apply_matrix_1q(q, &cuquantum_matrices::H); + } + } + black_box(()); + }); + }); + } + + // ---- X gate ---- + + group.bench_function("X/StateVecSoA", |b| { + let mut sim = StateVecSoA::new(num_qubits); + sim.set_parallel(false); + sim.set_fusion(false); + b.iter(|| { + for _ in 0..iters { + for q in 0..num_qubits { + sim.x(&[QubitId(q)]); + } + } + black_box(()); + }); + }); + + group.bench_function("X/StateVecSoA_fused", |b| { + let mut sim = StateVecSoA::new(num_qubits); + sim.set_parallel(false); + b.iter(|| { + for _ in 0..iters { + for q in 0..num_qubits { + sim.x(&[QubitId(q)]); + } + } + black_box(()); + }); + }); + + group.bench_function("X/StateVecAoS", |b| { + let mut sim = StateVecAoS::new(num_qubits); + b.iter(|| { + for _ in 0..iters { + for q in 0..num_qubits { + sim.x(&[QubitId(q)]); + } + } + black_box(()); + }); + }); + + group.bench_function("X/StateVecSoA32", |b| { + let mut sim = StateVecSoA32::new(num_qubits); + sim.set_fusion(false); + b.iter(|| { + for _ in 0..iters { + for q in 0..num_qubits { + sim.x(&[QubitId(q)]); + } + } + black_box(()); + }); + }); + + group.bench_function("X/StateVecSoA32_fused", |b| { + let mut sim = StateVecSoA32::new(num_qubits); + b.iter(|| { + for _ in 0..iters { + for q in 0..num_qubits { + sim.x(&[QubitId(q)]); + } + } + black_box(()); + }); + }); + + #[cfg(feature = "quest")] + { + let quest_x_name = if cfg!(feature = "quest-cuda") { + "X/QuEST_CUDA_direct" + } else { + "X/QuEST_direct" + }; + group.bench_function(quest_x_name, |b| { + let qs = QuestState::new(num_qubits); + b.iter(|| { + for _ in 0..iters { + for q in 0..num_qubits { + unsafe { quest_ffi::quest_apply_pauli_x(qs.qureg_ptr, q as i32) }; + } + } + black_box(()); + }); + }); + } + + #[cfg(feature = "qulacs")] + group.bench_function("X/Qulacs_direct", |b| { + let mut state = qulacs_ffi::create_quantum_state(num_qubits); + b.iter(|| { + for _ in 0..iters { + for q in 0..num_qubits { + qulacs_ffi::csim_x(state.pin_mut(), q); + } + } + black_box(()); + }); + }); + + #[cfg(feature = "gpu-sims")] + if let Ok(mut sim) = GpuStateVec::new(num_qubits as u32) { + group.bench_function("X/GpuStateVec_direct", |b| { + b.iter(|| { + for _ in 0..iters { + for q in 0..num_qubits { + sim.apply_single_gate(q as u32, gpu_gates::X); + } + } + black_box(()); + }); + }); + } + + #[cfg(feature = "cuquantum")] + if let Ok(mut sim) = CuStateVec::new(num_qubits) { + group.bench_function("X/CuStateVec_direct", |b| { + b.iter(|| { + for _ in 0..iters { + for q in 0..num_qubits { + sim.apply_matrix_1q(q, &cuquantum_matrices::X); + } + } + black_box(()); + }); + }); + } + + // ---- CX gate ---- + + group.bench_function("CX/StateVecSoA", |b| { + let mut sim = StateVecSoA::new(num_qubits); + sim.set_parallel(false); + sim.set_fusion(false); + b.iter(|| { + for _ in 0..iters { + for q in 0..num_qubits - 1 { + sim.cx(&[QubitId(q), QubitId(q + 1)]); + } + } + black_box(()); + }); + }); + + group.bench_function("CX/StateVecSoA_fused", |b| { + let mut sim = StateVecSoA::new(num_qubits); + sim.set_parallel(false); + b.iter(|| { + for _ in 0..iters { + for q in 0..num_qubits - 1 { + sim.cx(&[QubitId(q), QubitId(q + 1)]); + } + } + black_box(()); + }); + }); + + group.bench_function("CX/StateVecAoS", |b| { + let mut sim = StateVecAoS::new(num_qubits); + b.iter(|| { + for _ in 0..iters { + for q in 0..num_qubits - 1 { + sim.cx(&[QubitId(q), QubitId(q + 1)]); + } + } + black_box(()); + }); + }); + + group.bench_function("CX/StateVecSoA32", |b| { + let mut sim = StateVecSoA32::new(num_qubits); + sim.set_fusion(false); + b.iter(|| { + for _ in 0..iters { + for q in 0..num_qubits - 1 { + sim.cx(&[QubitId(q), QubitId(q + 1)]); + } + } + black_box(()); + }); + }); + + group.bench_function("CX/StateVecSoA32_fused", |b| { + let mut sim = StateVecSoA32::new(num_qubits); + b.iter(|| { + for _ in 0..iters { + for q in 0..num_qubits - 1 { + sim.cx(&[QubitId(q), QubitId(q + 1)]); + } + } + black_box(()); + }); + }); + + #[cfg(feature = "quest")] + { + let quest_cx_name = if cfg!(feature = "quest-cuda") { + "CX/QuEST_CUDA_direct" + } else { + "CX/QuEST_direct" + }; + group.bench_function(quest_cx_name, |b| { + let qs = QuestState::new(num_qubits); + b.iter(|| { + for _ in 0..iters { + for q in 0..num_qubits - 1 { + unsafe { + quest_ffi::quest_apply_cnot(qs.qureg_ptr, q as i32, (q + 1) as i32); + } + } + } + black_box(()); + }); + }); + } + + #[cfg(feature = "qulacs")] + group.bench_function("CX/Qulacs_direct", |b| { + let mut state = qulacs_ffi::create_quantum_state(num_qubits); + b.iter(|| { + for _ in 0..iters { + for q in 0..num_qubits - 1 { + qulacs_ffi::csim_cnot(state.pin_mut(), q, q + 1); + } + } + black_box(()); + }); + }); + + #[cfg(feature = "gpu-sims")] + if let Ok(mut sim) = GpuStateVec::new(num_qubits as u32) { + group.bench_function("CX/GpuStateVec_direct", |b| { + b.iter(|| { + for _ in 0..iters { + for q in 0..num_qubits - 1 { + sim.apply_cx(q as u32, (q + 1) as u32); + } + } + black_box(()); + }); + }); + } + + #[cfg(feature = "cuquantum")] + if let Ok(mut sim) = CuStateVec::new(num_qubits) { + group.bench_function("CX/CuStateVec_direct", |b| { + b.iter(|| { + for _ in 0..iters { + for q in 0..num_qubits - 1 { + sim.apply_matrix_2q(q, q + 1, &cuquantum_matrices::CX); + } + } + black_box(()); + }); + }); + } + + // ---- RZ gate ---- + + group.bench_function("RZ/StateVecSoA", |b| { + let mut sim = StateVecSoA::new(num_qubits); + sim.set_parallel(false); + sim.set_fusion(false); + b.iter(|| { + for _ in 0..iters { + for q in 0..num_qubits { + sim.rz(Angle64::from_radians(0.1), &[QubitId(q)]); + } + } + black_box(()); + }); + }); + + group.bench_function("RZ/StateVecSoA_fused", |b| { + let mut sim = StateVecSoA::new(num_qubits); + sim.set_parallel(false); + b.iter(|| { + for _ in 0..iters { + for q in 0..num_qubits { + sim.rz(Angle64::from_radians(0.1), &[QubitId(q)]); + } + } + black_box(()); + }); + }); + + group.bench_function("RZ/StateVecAoS", |b| { + let mut sim = StateVecAoS::new(num_qubits); + b.iter(|| { + for _ in 0..iters { + for q in 0..num_qubits { + sim.rz(Angle64::from_radians(0.1), &[QubitId(q)]); + } + } + black_box(()); + }); + }); + + group.bench_function("RZ/StateVecSoA32", |b| { + let mut sim = StateVecSoA32::new(num_qubits); + sim.set_fusion(false); + b.iter(|| { + for _ in 0..iters { + for q in 0..num_qubits { + sim.rz(Angle64::from_radians(0.1), &[QubitId(q)]); + } + } + black_box(()); + }); + }); + + group.bench_function("RZ/StateVecSoA32_fused", |b| { + let mut sim = StateVecSoA32::new(num_qubits); + b.iter(|| { + for _ in 0..iters { + for q in 0..num_qubits { + sim.rz(Angle64::from_radians(0.1), &[QubitId(q)]); + } + } + black_box(()); + }); + }); + + #[cfg(feature = "quest")] + { + let quest_rz_name = if cfg!(feature = "quest-cuda") { + "RZ/QuEST_CUDA_direct" + } else { + "RZ/QuEST_direct" + }; + group.bench_function(quest_rz_name, |b| { + let qs = QuestState::new(num_qubits); + b.iter(|| { + for _ in 0..iters { + for q in 0..num_qubits { + unsafe { quest_ffi::quest_apply_rotation_z(qs.qureg_ptr, q as i32, 0.1) }; + } + } + black_box(()); + }); + }); + } + + #[cfg(feature = "qulacs")] + group.bench_function("RZ/Qulacs_direct", |b| { + let mut state = qulacs_ffi::create_quantum_state(num_qubits); + b.iter(|| { + for _ in 0..iters { + for q in 0..num_qubits { + qulacs_ffi::csim_rz(state.pin_mut(), q, 0.1); + } + } + black_box(()); + }); + }); + + #[cfg(feature = "gpu-sims")] + if let Ok(mut sim) = GpuStateVec::new(num_qubits as u32) { + let rz_matrix = gpu_gates::rz(0.1); + group.bench_function("RZ/GpuStateVec_direct", |b| { + b.iter(|| { + for _ in 0..iters { + for q in 0..num_qubits { + sim.apply_single_gate(q as u32, rz_matrix); + } + } + black_box(()); + }); + }); + } + + #[cfg(feature = "cuquantum")] + if let Ok(mut sim) = CuStateVec::new(num_qubits) { + let rz_matrix = cuquantum_matrices::rz(0.1); + group.bench_function("RZ/CuStateVec_direct", |b| { + b.iter(|| { + for _ in 0..iters { + for q in 0..num_qubits { + sim.apply_matrix_1q(q, &rz_matrix); + } + } + black_box(()); + }); + }); + } + + group.finish(); +} + +// --------------------------------------------------------------------------- +// Public entry point +// --------------------------------------------------------------------------- + +pub fn benchmarks(c: &mut Criterion) { + bench_native_statevec_comparison(c); + bench_native_individual_gates(c); +} diff --git a/crates/pecos-cuquantum/src/statevec.rs b/crates/pecos-cuquantum/src/statevec.rs index 3acc88b92..7f9ea7cd7 100644 --- a/crates/pecos-cuquantum/src/statevec.rs +++ b/crates/pecos-cuquantum/src/statevec.rs @@ -177,11 +177,14 @@ impl CuStateVec { /// Apply a single-qubit gate specified by a 2x2 matrix /// + /// This is the lowest-level gate application method, calling `custatevecApplyMatrix` + /// directly. Use this to bypass trait overhead in benchmarks. + /// /// # Arguments /// * `qubit` - Target qubit index /// * `matrix` - 2x2 unitary matrix in row-major order [a, b, c, d] /// Each element is [real, imag] - fn apply_matrix_1q(&mut self, qubit: usize, matrix: &[[f64; 2]; 4]) { + pub fn apply_matrix_1q(&mut self, qubit: usize, matrix: &[[f64; 2]; 4]) { debug_assert!(qubit < self.num_qubits, "qubit index out of range"); // Convert to cuDoubleComplex format @@ -232,11 +235,14 @@ impl CuStateVec { /// Apply a two-qubit gate specified by a 4x4 matrix /// + /// This is the lowest-level two-qubit gate application method, calling + /// `custatevecApplyMatrix` directly. Use this to bypass trait overhead in benchmarks. + /// /// # Arguments /// * `qubit_a` - First qubit index (lower in ordering) /// * `qubit_b` - Second qubit index (higher in ordering) /// * `matrix` - 4x4 unitary matrix in row-major order (16 complex elements) - fn apply_matrix_2q(&mut self, qubit_a: usize, qubit_b: usize, matrix: &[[f64; 2]; 16]) { + pub fn apply_matrix_2q(&mut self, qubit_a: usize, qubit_b: usize, matrix: &[[f64; 2]; 16]) { debug_assert!(qubit_a < self.num_qubits, "qubit_a index out of range"); debug_assert!(qubit_b < self.num_qubits, "qubit_b index out of range"); debug_assert!(qubit_a != qubit_b, "qubits must be different"); diff --git a/crates/pecos-gpu-sims/src/gpu.rs b/crates/pecos-gpu-sims/src/gpu.rs index 848007501..c5578c572 100644 --- a/crates/pecos-gpu-sims/src/gpu.rs +++ b/crates/pecos-gpu-sims/src/gpu.rs @@ -606,6 +606,43 @@ impl GpuStateVec { self.queue.submit(std::iter::once(encoder.finish())); } + /// Apply a single CX (CNOT) gate directly. + /// + /// This bypasses the trait layer and dispatches directly to the GPU. + pub fn apply_cx(&mut self, control: u32, target: u32) { + let params = GateParams { + target_qubit: target, + control_qubit: control, + num_qubits: self.num_qubits, + _padding: 0, + matrix_row0: [0.0; 4], + matrix_row1: [0.0; 4], + }; + + self.queue + .write_buffer(&self.params_buffer, 0, bytemuck::bytes_of(¶ms)); + + let mut encoder = self + .device + .create_command_encoder(&wgpu::CommandEncoderDescriptor { + label: Some("CX encoder"), + }); + + { + let mut pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: Some("CX pass"), + timestamp_writes: None, + }); + pass.set_pipeline(&self.cx_pipeline); + pass.set_bind_group(0, &self.gate_bind_group, &[0]); + + let (wg_x, wg_y) = Self::compute_workgroups(self.num_amplitudes); + pass.dispatch_workgroups(wg_x, wg_y, 1); + } + + self.queue.submit(std::iter::once(encoder.finish())); + } + /// Apply CX gates to multiple qubit pairs in a single GPU submission. /// /// Takes qubits as interleaved pairs: [control0, target0, control1, target1, ...] diff --git a/crates/pecos-qsim/src/state_vec_soa.rs b/crates/pecos-qsim/src/state_vec_soa.rs index 1a8922349..5ad2039a7 100644 --- a/crates/pecos-qsim/src/state_vec_soa.rs +++ b/crates/pecos-qsim/src/state_vec_soa.rs @@ -493,7 +493,7 @@ where scratch_real: Vec::new(), scratch_imag: Vec::new(), pending_gates: vec![None; num_qubits], - fusion_enabled: false, + fusion_enabled: true, parallel_enabled: false, num_threads: None, } @@ -1443,21 +1443,49 @@ where // |0⟩ component: multiply by e^(-i*theta/2) = cos - i*sin // |1⟩ component: multiply by e^(i*theta/2) = cos + i*sin - for i in (0..n).step_by(step * 2) { - for j in i..(i + step) { - let paired_j = j + step; + if step >= 4 { + // SIMD path + let cos_v = f64x4::splat(cos_t); + let sin_v = f64x4::splat(sin_t); + for i in (0..n).step_by(step * 2) { + let mut j = i; + while j + 4 <= i + step { + let paired_j = j + step; - // |0⟩: (re, im) * (cos, -sin) = (re*cos + im*sin, im*cos - re*sin) - let a_re = self.real[j]; - let a_im = self.imag[j]; - self.real[j] = a_re * cos_t + a_im * sin_t; - self.imag[j] = a_im * cos_t - a_re * sin_t; + let a_re = f64x4::from(&self.real[j..j + 4]); + let a_im = f64x4::from(&self.imag[j..j + 4]); + let b_re = f64x4::from(&self.real[paired_j..paired_j + 4]); + let b_im = f64x4::from(&self.imag[paired_j..paired_j + 4]); - // |1⟩: (re, im) * (cos, sin) = (re*cos - im*sin, im*cos + re*sin) - let b_re = self.real[paired_j]; - let b_im = self.imag[paired_j]; - self.real[paired_j] = b_re * cos_t - b_im * sin_t; - self.imag[paired_j] = b_im * cos_t + b_re * sin_t; + let new_a_re: [f64; 4] = (a_re * cos_v + a_im * sin_v).into(); + let new_a_im: [f64; 4] = (a_im * cos_v - a_re * sin_v).into(); + let new_b_re: [f64; 4] = (b_re * cos_v - b_im * sin_v).into(); + let new_b_im: [f64; 4] = (b_im * cos_v + b_re * sin_v).into(); + + self.real[j..j + 4].copy_from_slice(&new_a_re); + self.imag[j..j + 4].copy_from_slice(&new_a_im); + self.real[paired_j..paired_j + 4].copy_from_slice(&new_b_re); + self.imag[paired_j..paired_j + 4].copy_from_slice(&new_b_im); + + j += 4; + } + } + } else { + // Scalar fallback + for i in (0..n).step_by(step * 2) { + for j in i..(i + step) { + let paired_j = j + step; + + let a_re = self.real[j]; + let a_im = self.imag[j]; + self.real[j] = a_re * cos_t + a_im * sin_t; + self.imag[j] = a_im * cos_t - a_re * sin_t; + + let b_re = self.real[paired_j]; + let b_im = self.imag[paired_j]; + self.real[paired_j] = b_re * cos_t - b_im * sin_t; + self.imag[paired_j] = b_im * cos_t + b_re * sin_t; + } } } } @@ -1472,21 +1500,49 @@ where let cos_t = half_theta.cos(); let sin_t = half_theta.sin(); - for i in (0..n).step_by(step * 2) { - for j in i..(i + step) { - let paired_j = j + step; + if step >= 4 { + // SIMD path + let cos_v = f64x4::splat(cos_t); + let sin_v = f64x4::splat(sin_t); + for i in (0..n).step_by(step * 2) { + let mut j = i; + while j + 4 <= i + step { + let paired_j = j + step; - let a_re = self.real[j]; - let a_im = self.imag[j]; - let b_re = self.real[paired_j]; - let b_im = self.imag[paired_j]; + let a_re = f64x4::from(&self.real[j..j + 4]); + let a_im = f64x4::from(&self.imag[j..j + 4]); + let b_re = f64x4::from(&self.real[paired_j..paired_j + 4]); + let b_im = f64x4::from(&self.imag[paired_j..paired_j + 4]); + + let new_a_re: [f64; 4] = (cos_v * a_re + sin_v * b_im).into(); + let new_a_im: [f64; 4] = (cos_v * a_im - sin_v * b_re).into(); + let new_b_re: [f64; 4] = (sin_v * a_im + cos_v * b_re).into(); + let new_b_im: [f64; 4] = (cos_v * b_im - sin_v * a_re).into(); + + self.real[j..j + 4].copy_from_slice(&new_a_re); + self.imag[j..j + 4].copy_from_slice(&new_a_im); + self.real[paired_j..paired_j + 4].copy_from_slice(&new_b_re); + self.imag[paired_j..paired_j + 4].copy_from_slice(&new_b_im); - // new_a = cos*a - i*sin*b = (cos*a_re + sin*b_im, cos*a_im - sin*b_re) - // new_b = -i*sin*a + cos*b = (sin*a_im + cos*b_re, -sin*a_re + cos*b_im) - self.real[j] = cos_t * a_re + sin_t * b_im; - self.imag[j] = cos_t * a_im - sin_t * b_re; - self.real[paired_j] = sin_t * a_im + cos_t * b_re; - self.imag[paired_j] = -sin_t * a_re + cos_t * b_im; + j += 4; + } + } + } else { + // Scalar fallback + for i in (0..n).step_by(step * 2) { + for j in i..(i + step) { + let paired_j = j + step; + + let a_re = self.real[j]; + let a_im = self.imag[j]; + let b_re = self.real[paired_j]; + let b_im = self.imag[paired_j]; + + self.real[j] = cos_t * a_re + sin_t * b_im; + self.imag[j] = cos_t * a_im - sin_t * b_re; + self.real[paired_j] = sin_t * a_im + cos_t * b_re; + self.imag[paired_j] = -sin_t * a_re + cos_t * b_im; + } } } } @@ -1501,21 +1557,49 @@ where let cos_t = half_theta.cos(); let sin_t = half_theta.sin(); - for i in (0..n).step_by(step * 2) { - for j in i..(i + step) { - let paired_j = j + step; + if step >= 4 { + // SIMD path + let cos_v = f64x4::splat(cos_t); + let sin_v = f64x4::splat(sin_t); + for i in (0..n).step_by(step * 2) { + let mut j = i; + while j + 4 <= i + step { + let paired_j = j + step; - let a_re = self.real[j]; - let a_im = self.imag[j]; - let b_re = self.real[paired_j]; - let b_im = self.imag[paired_j]; + let a_re = f64x4::from(&self.real[j..j + 4]); + let a_im = f64x4::from(&self.imag[j..j + 4]); + let b_re = f64x4::from(&self.real[paired_j..paired_j + 4]); + let b_im = f64x4::from(&self.imag[paired_j..paired_j + 4]); + + let new_a_re: [f64; 4] = (cos_v * a_re - sin_v * b_re).into(); + let new_a_im: [f64; 4] = (cos_v * a_im - sin_v * b_im).into(); + let new_b_re: [f64; 4] = (sin_v * a_re + cos_v * b_re).into(); + let new_b_im: [f64; 4] = (sin_v * a_im + cos_v * b_im).into(); + + self.real[j..j + 4].copy_from_slice(&new_a_re); + self.imag[j..j + 4].copy_from_slice(&new_a_im); + self.real[paired_j..paired_j + 4].copy_from_slice(&new_b_re); + self.imag[paired_j..paired_j + 4].copy_from_slice(&new_b_im); + + j += 4; + } + } + } else { + // Scalar fallback + for i in (0..n).step_by(step * 2) { + for j in i..(i + step) { + let paired_j = j + step; + + let a_re = self.real[j]; + let a_im = self.imag[j]; + let b_re = self.real[paired_j]; + let b_im = self.imag[paired_j]; - // new_a = cos*a - sin*b - // new_b = sin*a + cos*b - self.real[j] = cos_t * a_re - sin_t * b_re; - self.imag[j] = cos_t * a_im - sin_t * b_im; - self.real[paired_j] = sin_t * a_re + cos_t * b_re; - self.imag[paired_j] = sin_t * a_im + cos_t * b_im; + self.real[j] = cos_t * a_re - sin_t * b_re; + self.imag[j] = cos_t * a_im - sin_t * b_im; + self.real[paired_j] = sin_t * a_re + cos_t * b_re; + self.imag[paired_j] = sin_t * a_im + cos_t * b_im; + } } } } @@ -1625,7 +1709,7 @@ where scratch_real: Vec::new(), scratch_imag: Vec::new(), pending_gates: vec![None; num_qubits], - fusion_enabled: false, + fusion_enabled: true, parallel_enabled: false, num_threads: None, } @@ -2235,9 +2319,7 @@ where let control = pair[0].index(); let target = pair[1].index(); - // Flush pending gates on both qubits before two-qubit operation - self.flush_qubit(control); - self.flush_qubit(target); + self.flush_two_qubit(control, target); let n = self.real.len(); let (q_lo, q_hi) = if control < target { @@ -2312,9 +2394,7 @@ where let q1 = pair[0].index(); let q2 = pair[1].index(); - // Flush pending gates on both qubits before two-qubit operation - self.flush_qubit(q1); - self.flush_qubit(q2); + self.flush_two_qubit(q1, q2); let n = self.real.len(); let (q_lo, q_hi) = if q1 < q2 { (q1, q2) } else { (q2, q1) }; @@ -2369,9 +2449,7 @@ where let q1 = pair[0].index(); let q2 = pair[1].index(); - // Flush pending gates on both qubits before two-qubit operation - self.flush_qubit(q1); - self.flush_qubit(q2); + self.flush_two_qubit(q1, q2); let n = self.real.len(); let (q_lo, q_hi) = if q1 < q2 { (q1, q2) } else { (q2, q1) }; @@ -2439,6 +2517,8 @@ where let control = pair[0].index(); let target = pair[1].index(); + self.flush_two_qubit(control, target); + let n = self.real.len(); let (q_lo, q_hi) = if control < target { (control, target) @@ -2541,6 +2621,8 @@ where let q1 = pair[0].index(); let q2 = pair[1].index(); + self.flush_two_qubit(q1, q2); + let n = self.real.len(); let (q_lo, q_hi) = if q1 < q2 { (q1, q2) } else { (q2, q1) }; @@ -2778,6 +2860,8 @@ where let q1 = pair[0].index(); let q2 = pair[1].index(); + self.flush_two_qubit(q1, q2); + let n = self.real.len(); let (q_lo, q_hi) = if q1 < q2 { (q1, q2) } else { (q2, q1) }; @@ -3019,6 +3103,9 @@ where for pair in qubits.chunks_exact(2) { let q1 = pair[0].index(); let q2 = pair[1].index(); + + self.flush_two_qubit(q1, q2); + let q_lo = q1.min(q2); // When both qubits >= 2, consecutive indices share the same phase @@ -3207,6 +3294,8 @@ where let q1 = pair[0].index(); let q2 = pair[1].index(); + self.flush_two_qubit(q1, q2); + let n = self.real.len(); let (q_lo, q_hi) = if q1 < q2 { (q1, q2) } else { (q2, q1) }; @@ -3609,8 +3698,7 @@ where d_im: 0.0, }; for &q in qubits { - self.flush_qubit(q.index()); - self.apply_fused_matrix(q.index(), &m); + self.queue_gate(q.index(), &m); } } else { for &q in qubits { @@ -3637,8 +3725,7 @@ where d_im: 0.0, }; for &q in qubits { - self.flush_qubit(q.index()); - self.apply_fused_matrix(q.index(), &m); + self.queue_gate(q.index(), &m); } } else { for &q in qubits { @@ -3666,8 +3753,7 @@ where d_im: sin, }; for &q in qubits { - self.flush_qubit(q.index()); - self.apply_fused_matrix(q.index(), &m); + self.queue_gate(q.index(), &m); } } else { for &q in qubits { @@ -3697,8 +3783,7 @@ where d_im: 0.0, }; for &q in qubits { - self.flush_qubit(q.index()); - self.apply_fused_matrix(q.index(), &m); + self.queue_gate(q.index(), &m); } self } diff --git a/crates/pecos-quest/build_quest.rs b/crates/pecos-quest/build_quest.rs index 3e60a13d1..7bca58638 100644 --- a/crates/pecos-quest/build_quest.rs +++ b/crates/pecos-quest/build_quest.rs @@ -1,11 +1,12 @@ //! Build script for `QuEST` integration //! -//! This build script produces: -//! 1. A static library (libquest-bridge.a) for CPU-only `QuEST` operations -//! 2. Optionally, a shared library (`libpecos_quest_cuda.so`) for CUDA operations (when cuda feature enabled) +//! This build script produces a static library (libquest-bridge.a) for `QuEST` operations. //! -//! The CUDA library is loaded at runtime via dlopen, allowing a single binary to work -//! on systems with and without CUDA installed. +//! When the `cuda` feature is enabled, GPU source files (`gpu_config.cpp`, `gpu_subroutines.cpp`) +//! are compiled with nvcc into a separate static archive (`libquest-gpu.a`) and linked in. +//! The remaining `QuEST` sources compile with the standard C++ compiler (they only contain +//! declarations with standard C++ types, no CUDA-specific syntax). This means the same +//! `quest_ffi` functions run on GPU transparently when CUDA is enabled. use log::{debug, info}; use pecos_build::{Manifest, Result, ensure_dep_ready, report_cache_config}; @@ -83,15 +84,18 @@ fn detect_cuda_path() -> Option { None } -/// Build the GPU shared library (`libpecos_quest_cuda.so`) +/// Build GPU source files with nvcc into a static archive (`libquest-gpu.a`) /// -/// This library contains the GPU-accelerated `QuEST` implementation and is loaded -/// at runtime via dlopen. This allows the main library to work on systems without CUDA. -#[allow(clippy::too_many_lines)] -fn build_gpu_shared_library(cuda_path: &str, quest_dir: &Path, out_dir: &Path) -> Option { - info!("Building GPU shared library (libpecos_quest_cuda.so)..."); +/// Only compiles the two GPU implementation files that require nvcc: +/// - `gpu_config.cpp` (GPU device management) +/// - `gpu_subroutines.cpp` (GPU kernel implementations) +/// +/// All other QuEST sources compile fine with the standard C++ compiler even +/// with `COMPILE_CUDA=1`, since the GPU headers only contain declarations +/// with standard C++ types. +fn build_quest_gpu_objects(cuda_path: &str, quest_dir: &Path, out_dir: &Path) -> Option<()> { + info!("Building GPU static archive (libquest-gpu.a)..."); - // nvcc executable name differs by platform let nvcc_name = if cfg!(target_os = "windows") { "nvcc.exe" } else { @@ -99,71 +103,28 @@ fn build_gpu_shared_library(cuda_path: &str, quest_dir: &Path, out_dir: &Path) - }; let nvcc_path = Path::new(cuda_path).join("bin").join(nvcc_name); info!("Using nvcc at: {}", nvcc_path.display()); + let quest_include_dir = quest_dir.join("include"); let quest_src_dir = quest_dir.join("src"); let gpu_dir = quest_src_dir.join("gpu"); - // Source files for the GPU library - let bridge_gpu = PathBuf::from("src/bridge_cuda.cpp"); - let gpu_config = gpu_dir.join("gpu_config.cpp"); - let gpu_subroutines = gpu_dir.join("gpu_subroutines.cpp"); - - // QuEST core files needed by the GPU library - let api_dir = quest_src_dir.join("api"); - let core_dir = quest_src_dir.join("core"); - let cpu_dir = quest_src_dir.join("cpu"); - let comm_dir = quest_src_dir.join("comm"); - - // Collect all source files - let source_files = vec![ - bridge_gpu, - gpu_config, - gpu_subroutines, - // API layer - api_dir.join("calculations.cpp"), - api_dir.join("channels.cpp"), - api_dir.join("debug.cpp"), - api_dir.join("decoherence.cpp"), - api_dir.join("environment.cpp"), - api_dir.join("initialisations.cpp"), - api_dir.join("matrices.cpp"), - api_dir.join("modes.cpp"), - api_dir.join("operations.cpp"), - api_dir.join("paulis.cpp"), - api_dir.join("qureg.cpp"), - api_dir.join("types.cpp"), - // Core utilities - core_dir.join("errors.cpp"), - core_dir.join("utilities.cpp"), - core_dir.join("validation.cpp"), - core_dir.join("memory.cpp"), - core_dir.join("printer.cpp"), - core_dir.join("randomiser.cpp"), - core_dir.join("parser.cpp"), - core_dir.join("localiser.cpp"), - core_dir.join("autodeployer.cpp"), - core_dir.join("accelerator.cpp"), - // CPU backend (still needed for some operations) - cpu_dir.join("cpu_config.cpp"), - cpu_dir.join("cpu_subroutines.cpp"), - // Communication - comm_dir.join("comm_config.cpp"), - comm_dir.join("comm_routines.cpp"), + // Only the GPU implementation files need nvcc + let gpu_sources = [ + gpu_dir.join("gpu_config.cpp"), + gpu_dir.join("gpu_subroutines.cpp"), ]; - // Compile all source files to object files let mut object_files = Vec::new(); - for src_file in &source_files { + for src_file in &gpu_sources { let file_stem = src_file.file_stem()?.to_str()?; - // Windows uses .obj extension, Unix uses .o let obj_ext = if cfg!(target_os = "windows") { "obj" } else { "o" }; - let obj_file = out_dir.join(format!("gpu_{file_stem}.{obj_ext}")); + let obj_file = out_dir.join(format!("gpu_static_{file_stem}.{obj_ext}")); - debug!("Compiling for GPU lib: {}", src_file.display()); + debug!("Compiling GPU source with nvcc: {}", src_file.display()); let mut compile_cmd = Command::new(&nvcc_path); compile_cmd .arg("-c") @@ -171,7 +132,7 @@ fn build_gpu_shared_library(cuda_path: &str, quest_dir: &Path, out_dir: &Path) - .arg("-o") .arg(&obj_file) .arg("-x") - .arg("cu") // Treat .cpp files as CUDA source + .arg("cu") .arg("-I") .arg(&quest_include_dir) .arg("-I") @@ -179,7 +140,7 @@ fn build_gpu_shared_library(cuda_path: &str, quest_dir: &Path, out_dir: &Path) - .arg("-I") .arg(quest_dir.parent()?) .arg("-I") - .arg("include") // For quest_ffi.h + .arg("include") .arg("--std=c++20") .arg("-DCOMPILE_GPU=1") .arg("-DCOMPILE_CUDA=1") @@ -188,19 +149,12 @@ fn build_gpu_shared_library(cuda_path: &str, quest_dir: &Path, out_dir: &Path) - .arg("-DCOMPILE_MPI=0") .arg("-DCOMPILE_CUQUANTUM=0") .arg("-DFLOAT_PRECISION=2") - // Target compute capability 7.5 (Turing) which supports atomicAdd(double*, double) - // sm_75 is the minimum supported by both CUDA 12.x and 13.x .arg("-arch=sm_75") - // Allow newer GCC versions (e.g., GCC 14 in manylinux_2_28) .arg("-allow-unsupported-compiler"); - // Platform-specific compiler flags if cfg!(target_os = "windows") { - // Windows/MSVC: no -fPIC needed (not applicable) - // Use /EHsc for C++ exception handling compile_cmd.arg("-Xcompiler").arg("/EHsc"); } else { - // Unix: position-independent code for shared libraries compile_cmd.arg("-Xcompiler").arg("-fPIC"); } @@ -210,7 +164,7 @@ fn build_gpu_shared_library(cuda_path: &str, quest_dir: &Path, out_dir: &Path) - let stdout_str = String::from_utf8_lossy(&output.stdout); let stderr_str = String::from_utf8_lossy(&output.stderr); eprintln!( - "ERROR: Failed to compile {} for GPU library", + "ERROR: Failed to compile {} with nvcc", src_file.display() ); eprintln!("Exit status: {:?}", output.status); @@ -226,90 +180,38 @@ fn build_gpu_shared_library(cuda_path: &str, quest_dir: &Path, out_dir: &Path) - object_files.push(obj_file); } - // Link into a shared library - let lib_name = if cfg!(target_os = "macos") { - "libpecos_quest_cuda.dylib" - } else if cfg!(target_os = "windows") { - "pecos_quest_cuda.dll" + // Archive into libquest-gpu.a + let archive_path = out_dir.join("libquest-gpu.a"); + info!("Archiving GPU objects into: {}", archive_path.display()); + + let ar_name = if cfg!(target_os = "windows") { + "lib.exe" } else { - "libpecos_quest_cuda.so" + "ar" }; - let gpu_lib_path = out_dir.join(lib_name); - - info!("Linking GPU shared library: {}", gpu_lib_path.display()); - - let mut link_cmd = Command::new(&nvcc_path); - link_cmd - .arg("-shared") - .arg("-o") - .arg(&gpu_lib_path) - .args(&object_files); - - // Platform-specific library paths and linking + let mut ar_cmd = Command::new(ar_name); if cfg!(target_os = "windows") { - // Windows: CUDA libraries are in lib\x64 - link_cmd - .arg(format!("-L{cuda_path}/lib/x64")) - .arg("-lcudart") - .arg("-lcublas"); - // Windows uses MSVC runtime, no need to explicitly link C++ stdlib + ar_cmd + .arg(format!("/OUT:{}", archive_path.display())) + .args(&object_files); } else { - // Unix: CUDA libraries are in lib64 - link_cmd - .arg(format!("-L{cuda_path}/lib64")) - .arg("-lcudart") - .arg("-lcublas"); - // Add C++ standard library - if cfg!(target_os = "macos") { - link_cmd.arg("-lc++"); - } else { - link_cmd.arg("-lstdc++"); - } + ar_cmd + .arg("rcs") + .arg(&archive_path) + .args(&object_files); } - let output = link_cmd.output().ok()?; - + let output = ar_cmd.output().ok()?; if !output.status.success() { let stderr_str = String::from_utf8_lossy(&output.stderr); - eprintln!("ERROR: Failed to link GPU shared library"); + eprintln!("ERROR: Failed to create GPU static archive"); eprintln!("{stderr_str}"); return None; } - info!( - "Successfully built GPU shared library: {}", - gpu_lib_path.display() - ); - - // Also copy to target directory for easier discovery - // Try CARGO_TARGET_DIR first, then derive from OUT_DIR - let target_lib_dir = if let Ok(target_dir) = env::var("CARGO_TARGET_DIR") { - let profile = get_build_profile(); - Some(Path::new(&target_dir).join(&profile)) - } else { - // OUT_DIR is something like: target/release/build/pecos-quest-xxx/out - // We want: target/release/ - out_dir - .parent() // build/pecos-quest-xxx - .and_then(|p| p.parent()) // build - .and_then(|p| p.parent()) // release or debug - .map(std::path::Path::to_path_buf) - }; - - if let Some(target_dir) = target_lib_dir { - let target_lib_path = target_dir.join(lib_name); - if let Some(parent) = target_lib_path.parent() { - let _ = fs::create_dir_all(parent); - } - if let Err(e) = fs::copy(&gpu_lib_path, &target_lib_path) { - debug!("Could not copy CUDA lib to target dir: {e}"); - } else { - info!("Copied CUDA lib to: {}", target_lib_path.display()); - } - } - - Some(gpu_lib_path) + info!("Successfully built GPU static archive: {}", archive_path.display()); + Some(()) } /// Patch `QuEST` GPU code for CUDA 13 compatibility @@ -401,10 +303,9 @@ fn generate_quest_header(quest_dir: &Path) -> Result<()> { // Since MULTI_LIB_HEADERS=0, we want the #if !0 block to be active // which means we need to process the #cmakedefine directives // - // IMPORTANT: The main library is ALWAYS CPU-only (COMPILE_CUDA=0). - // GPU support is provided via a separate shared library (libpecos_quest_cuda.so) - // which is compiled with nvcc and has its own COMPILE_CUDA=1 flag. - // This generated quest.h is only used by the main library. + // COMPILE_CUDA is set based on the `cuda` Cargo feature: + // - cuda feature enabled: COMPILE_CUDA=1 (GPU dispatch paths active) + // - cuda feature disabled: COMPILE_CUDA=0 (CPU only) // Process the template line by line to handle conditional blocks let mut in_multi_lib_block = false; @@ -431,7 +332,12 @@ fn generate_quest_header(quest_dir: &Path) -> Result<()> { return Some("#define COMPILE_OPENMP 0".to_string()); } if line.contains("#cmakedefine01 COMPILE_CUDA") { - // Main library is always CPU-only; GPU library is separate + // When the cuda feature is enabled, COMPILE_CUDA=1 so QuEST's + // GPU dispatch paths are active in the generated header. + let cuda_enabled = env::var("CARGO_FEATURE_CUDA").is_ok(); + if cuda_enabled { + return Some("#define COMPILE_CUDA 1".to_string()); + } return Some("#define COMPILE_CUDA 0".to_string()); } if line.contains("#cmakedefine01 COMPILE_CUQUANTUM") { @@ -614,11 +520,11 @@ fn build_cxx_bridge(quest_dir: &Path, out_dir: &Path) { let cpu_dir = quest_src_dir.join("cpu"); let comm_dir = quest_src_dir.join("comm"); - // IMPORTANT: The main library ALWAYS uses gpu_stubs.cpp (CPU only). - // GPU support is provided by a separate shared library (libpecos_quest_cuda.so) - // that is loaded at runtime via dlopen. This allows a single binary to work - // on systems with and without CUDA installed. - build.file("src/gpu_stubs.cpp"); + // When CUDA is enabled, GPU implementations come from the nvcc-compiled + // static archive (libquest-gpu.a). Otherwise, use gpu_stubs.cpp. + if !gpu_enabled { + build.file("src/gpu_stubs.cpp"); + } build .file("src/bridge.cpp") @@ -648,8 +554,7 @@ fn build_cxx_bridge(quest_dir: &Path, out_dir: &Path) { // Accelerator.cpp contains dispatch logic for both CPU and GPU .file(core_dir.join("accelerator.cpp")); - // Build the separate GPU shared library if GPU feature is enabled - // This library will be loaded at runtime via dlopen + // Build the GPU static archive if CUDA feature is enabled if gpu_enabled { let gpu_dir = quest_src_dir.join("gpu"); if !gpu_dir.exists() { @@ -659,21 +564,11 @@ fn build_cxx_bridge(quest_dir: &Path, out_dir: &Path) { std::process::exit(1); } - // Build the separate GPU shared library - if let Some(gpu_lib_path) = - build_gpu_shared_library(cuda_path.as_ref().unwrap(), quest_dir, out_dir) - { - info!( - "GPU shared library built successfully: {}", - gpu_lib_path.display() - ); - // Emit the GPU library path so downstream crates can find it - println!( - "cargo:rustc-env=PECOS_QUEST_CUDA_LIB={}", - gpu_lib_path.display() - ); + // Build GPU source files (gpu_config.cpp, gpu_subroutines.cpp) with nvcc + if build_quest_gpu_objects(cuda_path.as_ref().unwrap(), quest_dir, out_dir).is_some() { + info!("GPU static archive built successfully"); } else { - eprintln!("\nERROR: GPU feature enabled but GPU library build failed"); + eprintln!("\nERROR: GPU feature enabled but GPU archive build failed"); eprintln!(" See warnings above for compilation errors"); eprintln!(" Solutions:"); eprintln!(" 1. Use CUDA 11 or 12 instead of CUDA 13 (QuEST incompatibility)"); @@ -681,6 +576,12 @@ fn build_cxx_bridge(quest_dir: &Path, out_dir: &Path) { eprintln!(" 3. Use Python GPU simulators (CuStateVec/MPS) which work with CUDA 13"); std::process::exit(1); } + + // Link the GPU static archive and CUDA runtime libraries + println!("cargo:rustc-link-lib=static=quest-gpu"); + println!("cargo:rustc-link-search=native={}/lib64", cuda_path.as_ref().unwrap()); + println!("cargo:rustc-link-lib=cudart"); + println!("cargo:rustc-link-lib=cublas"); } // CPU backend @@ -699,21 +600,23 @@ fn build_cxx_bridge(quest_dir: &Path, out_dir: &Path) { .include("include"); // Define preprocessor flags based on features - // IMPORTANT: The main library is ALWAYS CPU-only. GPU support is provided via - // a separate shared library (libpecos_quest_cuda.so) loaded at runtime via dlopen. - // This allows a single binary to work on systems with and without CUDA. + // When CUDA is enabled, COMPILE_CUDA=1 and COMPILE_GPU=1 so the standard C++ + // compiler sees GPU dispatch declarations (which use only standard C++ types). + // The actual GPU kernel implementations are in the nvcc-compiled static archive. + let (cuda_flag, gpu_flag) = if gpu_enabled { + ("1", "1") + } else { + ("0", "0") + }; build .define("COMPILE_CPU", "1") - .define("COMPILE_OPENMP", "0") // Disable OpenMP for simplicity initially - .define("COMPILE_MPI", "0") // Disable MPI for simplicity initially - .define("FLOAT_PRECISION", "2") // Double precision by default - .define("COMPILE_CUDA", "0") // Main library never uses CUDA directly - .define("COMPILE_GPU", "0") // GPU ops are in the separate GPU library + .define("COMPILE_OPENMP", "0") + .define("COMPILE_MPI", "0") + .define("FLOAT_PRECISION", "2") + .define("COMPILE_CUDA", cuda_flag) + .define("COMPILE_GPU", gpu_flag) .define("COMPILE_CUQUANTUM", "0"); - // Note: We do NOT link cudart/cublas here. The GPU library handles CUDA linking - // and is loaded at runtime only when GPU is requested. - // Use C++20 standard (QuEST v4 uses designated initializers which require C++20) // However, on macOS there's a known issue with C++20 and cxx crate's pointer_traits // specializations, so we use C++17 there (designated initializers are a GNU extension @@ -783,10 +686,6 @@ fn build_cxx_bridge(quest_dir: &Path, out_dir: &Path) { build.compile("quest-bridge"); - // Note: GPU object files are now compiled into a separate shared library - // (libpecos_quest_cuda.so) which is built by build_gpu_shared_library() - // and loaded at runtime via dlopen. - // On macOS, ensure the C++ standard library is linked correctly // Use the system libc++ which is in the dyld shared cache (macOS Big Sur+) // We rely on the compiler's default behavior rather than explicit cargo directives diff --git a/crates/pecos-qulacs/build.rs b/crates/pecos-qulacs/build.rs index f7e8b956c..bda906591 100644 --- a/crates/pecos-qulacs/build.rs +++ b/crates/pecos-qulacs/build.rs @@ -315,6 +315,17 @@ fn configure_build( if is_release { build.define("EIGEN_NO_DEBUG", None); } + + // Enable SIMD-optimized gate kernels in Qulacs (matches Qulacs CMake USE_SIMD=Yes). + // On x86/x86_64, _USE_SIMD activates hand-written AVX2 intrinsics for gates like H, X, + // CNOT, RZ, etc. Qulacs's type.hpp will #undef _USE_SIMD if the compiler doesn't define + // __AVX2__, so this is safe even when -march=native isn't used. + // On aarch64, Qulacs uses _USE_SVE for SVE intrinsics instead. + if target.contains("x86_64") || target.contains("x86") || target.contains("i686") { + build.define("_USE_SIMD", None); + } else if target.contains("aarch64") { + build.define("_USE_SIMD", None); + } } fn create_windows_boost_stub(out_dir: &Path) { diff --git a/crates/pecos-qulacs/src/bridge.rs b/crates/pecos-qulacs/src/bridge.rs index ce298f2de..4bd175efc 100644 --- a/crates/pecos-qulacs/src/bridge.rs +++ b/crates/pecos-qulacs/src/bridge.rs @@ -58,5 +58,11 @@ pub mod ffi { // Measurement fn measure_z(state: Pin<&mut QulacsState>, qubit: usize) -> u8; + + // Direct csim-level gate functions (bypass gate object allocation) + fn csim_x(state: Pin<&mut QulacsState>, qubit: usize); + fn csim_h(state: Pin<&mut QulacsState>, qubit: usize); + fn csim_rz(state: Pin<&mut QulacsState>, qubit: usize, angle: f64); + fn csim_cnot(state: Pin<&mut QulacsState>, control: usize, target: usize); } } diff --git a/crates/pecos-qulacs/src/lib.rs b/crates/pecos-qulacs/src/lib.rs index 1d50d2699..6f734c475 100644 --- a/crates/pecos-qulacs/src/lib.rs +++ b/crates/pecos-qulacs/src/lib.rs @@ -15,7 +15,7 @@ //! This crate provides Rust bindings to the Qulacs quantum simulator C++ library, //! enabling high-performance quantum circuit simulation. -mod bridge; +pub mod bridge; use bridge::ffi; use num_complex::Complex64; diff --git a/crates/pecos-qulacs/src/qulacs_wrapper.cpp b/crates/pecos-qulacs/src/qulacs_wrapper.cpp index a1ff400f2..946e828a3 100644 --- a/crates/pecos-qulacs/src/qulacs_wrapper.cpp +++ b/crates/pecos-qulacs/src/qulacs_wrapper.cpp @@ -1,6 +1,7 @@ #include "qulacs_wrapper.h" #include "cppsim/state.hpp" #include "cppsim/gate_factory.hpp" +#include "csim/update_ops.hpp" #include #include @@ -240,3 +241,20 @@ uint8_t measure_z(QulacsState& state, size_t qubit) { // Fallback: just return 0 return 0; } + +// Direct csim-level gate functions (bypass gate object allocation) +void csim_x(QulacsState& state, size_t qubit) { + X_gate(static_cast(qubit), state.get_state()->data_c(), state.get_state()->dim); +} + +void csim_h(QulacsState& state, size_t qubit) { + H_gate(static_cast(qubit), state.get_state()->data_c(), state.get_state()->dim); +} + +void csim_rz(QulacsState& state, size_t qubit, double angle) { + RZ_gate(static_cast(qubit), -angle, state.get_state()->data_c(), state.get_state()->dim); +} + +void csim_cnot(QulacsState& state, size_t control, size_t target) { + CNOT_gate(static_cast(control), static_cast(target), state.get_state()->data_c(), state.get_state()->dim); +} diff --git a/crates/pecos-qulacs/src/qulacs_wrapper.h b/crates/pecos-qulacs/src/qulacs_wrapper.h index ab428853d..acfc025e3 100644 --- a/crates/pecos-qulacs/src/qulacs_wrapper.h +++ b/crates/pecos-qulacs/src/qulacs_wrapper.h @@ -71,3 +71,9 @@ void apply_swap(QulacsState& state, size_t qubit1, size_t qubit2); // Measurement uint8_t measure_z(QulacsState& state, size_t qubit); + +// Direct csim-level gate functions (bypass gate object allocation) +void csim_x(QulacsState& state, size_t qubit); +void csim_h(QulacsState& state, size_t qubit); +void csim_rz(QulacsState& state, size_t qubit, double angle); +void csim_cnot(QulacsState& state, size_t control, size_t target); diff --git a/crates/pecos/src/bin/cli.rs b/crates/pecos/src/bin/cli.rs index 6428a4439..e54e61a3a 100644 --- a/crates/pecos/src/bin/cli.rs +++ b/crates/pecos/src/bin/cli.rs @@ -72,8 +72,16 @@ pub enum RustCommands { check: bool, }, - /// Run benchmarks with native CPU optimizations (AVX2, etc.) + /// Run benchmarks Bench { + /// Build profile: release (default) or native (adds -C target-cpu=native) + #[arg(long, default_value = "release")] + profile: String, + + /// Additional cargo features (e.g., "qulacs", "quest") + #[arg(long)] + features: Option, + /// Benchmark filter pattern (e.g., "`SoA` Comparison", "DOD") pattern: Option, }, diff --git a/crates/pecos/src/bin/cli/rust_cmd.rs b/crates/pecos/src/bin/cli/rust_cmd.rs index d7c02aa87..d42e3180f 100644 --- a/crates/pecos/src/bin/cli/rust_cmd.rs +++ b/crates/pecos/src/bin/cli/rust_cmd.rs @@ -19,7 +19,11 @@ pub fn run(command: &super::RustCommands) -> Result<()> { include_ffi, } => run_test(*release, *include_ffi), super::RustCommands::Fmt { check } => run_fmt(*check), - super::RustCommands::Bench { pattern } => run_bench(pattern.as_deref()), + super::RustCommands::Bench { + profile, + features, + pattern, + } => run_bench(profile, features.as_deref(), pattern.as_deref()), } } @@ -545,20 +549,41 @@ fn run_fmt(check: bool) -> Result<()> { Ok(()) } -/// Run cargo bench with native CPU optimizations (AVX2, etc.) -fn run_bench(pattern: Option<&str>) -> Result<()> { - println!("Running benchmarks with native CPU optimizations..."); - +/// Run cargo bench with configurable profile and features +fn run_bench(profile: &str, features: Option<&str>, pattern: Option<&str>) -> Result<()> { let mut cmd = Command::new("cargo"); - cmd.args(["bench", "-p", "benchmarks"]); + cmd.args(["bench", "-p", "benchmarks", "--bench", "benchmarks"]); + + match profile { + "native" => { + println!("Running benchmarks with native CPU optimizations..."); + cmd.arg("--profile=native"); + // Preserve any existing RUSTFLAGS while adding target-cpu=native + let mut rustflags = std::env::var("RUSTFLAGS").unwrap_or_default(); + if !rustflags.is_empty() { + rustflags.push(' '); + } + rustflags.push_str("-C target-cpu=native"); + cmd.env("RUSTFLAGS", rustflags); + } + "release" => { + println!("Running benchmarks in release mode..."); + } + other => { + return Err(Error::Config(format!( + "Unknown bench profile '{other}'. Use 'release' or 'native'." + ))); + } + } + + if let Some(feat) = features { + cmd.arg(format!("--features={feat}")); + } if let Some(pat) = pattern { cmd.args(["--", pat]); } - // Set RUSTFLAGS for native CPU features - cmd.env("RUSTFLAGS", "-C target-cpu=native"); - let status = cmd.status(); if !matches!(status, Ok(s) if s.success()) { return Err(Error::Config("cargo bench failed".to_string())); diff --git a/scripts/native_bench/.gitignore b/scripts/native_bench/.gitignore new file mode 100644 index 000000000..567609b12 --- /dev/null +++ b/scripts/native_bench/.gitignore @@ -0,0 +1 @@ +build/ diff --git a/scripts/native_bench/bench_quest.c b/scripts/native_bench/bench_quest.c new file mode 100644 index 000000000..e5795a873 --- /dev/null +++ b/scripts/native_bench/bench_quest.c @@ -0,0 +1,188 @@ +// Copyright 2026 The PECOS Developers +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under +// the License. + +// Standalone QuEST v4 benchmark using the native C API. +// Compiled and linked against a CMake-built QuEST library so that build flags +// are entirely under CMake's control (no Rust build.rs involvement). + +#define _POSIX_C_SOURCE 199309L + +#include +#include +#include +#include "quest.h" + +// --------------------------------------------------------------------------- +// Timing helpers +// --------------------------------------------------------------------------- + +static double now_sec(void) { + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return (double)ts.tv_sec + (double)ts.tv_nsec * 1e-9; +} + +static int cmp_double(const void *a, const void *b) { + double da = *(const double *)a; + double db = *(const double *)b; + return (da > db) - (da < db); +} + +static double median(double *vals, int n) { + qsort(vals, (size_t)n, sizeof(double), cmp_double); + if (n % 2 == 1) return vals[n / 2]; + return (vals[n / 2 - 1] + vals[n / 2]) / 2.0; +} + +// --------------------------------------------------------------------------- +// Circuit: layered H + RZ + CX +// --------------------------------------------------------------------------- + +static void run_circuit(Qureg q, int num_qubits, int num_layers) { + for (int layer = 0; layer < num_layers; layer++) { + for (int qb = 0; qb < num_qubits; qb++) { + applyHadamard(q, qb); + applyRotateZ(q, qb, 0.1); + } + for (int qb = 0; qb < num_qubits - 1; qb++) { + applyControlledPauliX(q, qb, qb + 1); + } + } +} + +// --------------------------------------------------------------------------- +// Layered circuit benchmark +// --------------------------------------------------------------------------- + +static void bench_circuit(int num_qubits, int num_layers, int reps) { + Qureg q = createQureg(num_qubits); + double times[reps]; + + for (int r = 0; r < reps; r++) { + initZeroState(q); + double t0 = now_sec(); + run_circuit(q, num_qubits, num_layers); + double t1 = now_sec(); + times[r] = t1 - t0; + } + + double med = median(times, reps); + printf("circuit %2dq %2dl %12.3f us\n", num_qubits, num_layers, med * 1e6); + destroyQureg(q); +} + +// --------------------------------------------------------------------------- +// Individual gate benchmarks (18 qubits, 100 iterations) +// --------------------------------------------------------------------------- + +static void bench_gate_h(int num_qubits, int iters, int reps) { + Qureg q = createQureg(num_qubits); + initZeroState(q); + double times[reps]; + + for (int r = 0; r < reps; r++) { + double t0 = now_sec(); + for (int i = 0; i < iters; i++) + for (int qb = 0; qb < num_qubits; qb++) + applyHadamard(q, qb); + double t1 = now_sec(); + times[r] = t1 - t0; + } + + printf("gate H %12.3f us\n", median(times, reps) * 1e6); + destroyQureg(q); +} + +static void bench_gate_x(int num_qubits, int iters, int reps) { + Qureg q = createQureg(num_qubits); + initZeroState(q); + double times[reps]; + + for (int r = 0; r < reps; r++) { + double t0 = now_sec(); + for (int i = 0; i < iters; i++) + for (int qb = 0; qb < num_qubits; qb++) + applyPauliX(q, qb); + double t1 = now_sec(); + times[r] = t1 - t0; + } + + printf("gate X %12.3f us\n", median(times, reps) * 1e6); + destroyQureg(q); +} + +static void bench_gate_cx(int num_qubits, int iters, int reps) { + Qureg q = createQureg(num_qubits); + initZeroState(q); + double times[reps]; + + for (int r = 0; r < reps; r++) { + double t0 = now_sec(); + for (int i = 0; i < iters; i++) + for (int qb = 0; qb < num_qubits - 1; qb++) + applyControlledPauliX(q, qb, qb + 1); + double t1 = now_sec(); + times[r] = t1 - t0; + } + + printf("gate CX %12.3f us\n", median(times, reps) * 1e6); + destroyQureg(q); +} + +static void bench_gate_rz(int num_qubits, int iters, int reps) { + Qureg q = createQureg(num_qubits); + initZeroState(q); + double times[reps]; + + for (int r = 0; r < reps; r++) { + double t0 = now_sec(); + for (int i = 0; i < iters; i++) + for (int qb = 0; qb < num_qubits; qb++) + applyRotateZ(q, qb, 0.1); + double t1 = now_sec(); + times[r] = t1 - t0; + } + + printf("gate RZ %12.3f us\n", median(times, reps) * 1e6); + destroyQureg(q); +} + +// --------------------------------------------------------------------------- +// Main +// --------------------------------------------------------------------------- + +int main(void) { + initQuESTEnv(); + + int reps = 5; + + printf("=== QuEST v4 standalone benchmarks ===\n"); + printf("\n-- Layered circuits (median of %d runs) --\n", reps); + + int configs[][2] = { + {10, 20}, {14, 20}, {18, 20}, {20, 20}, {22, 10}, {24, 5} + }; + int n_configs = sizeof(configs) / sizeof(configs[0]); + + for (int i = 0; i < n_configs; i++) { + bench_circuit(configs[i][0], configs[i][1], reps); + } + + printf("\n-- Individual gates at 18 qubits, 100 iters (median of %d runs) --\n", reps); + bench_gate_h(18, 100, reps); + bench_gate_x(18, 100, reps); + bench_gate_cx(18, 100, reps); + bench_gate_rz(18, 100, reps); + + finalizeQuESTEnv(); + return 0; +} diff --git a/scripts/native_bench/bench_qulacs.cpp b/scripts/native_bench/bench_qulacs.cpp new file mode 100644 index 000000000..4106d31cc --- /dev/null +++ b/scripts/native_bench/bench_qulacs.cpp @@ -0,0 +1,299 @@ +// Copyright 2026 The PECOS Developers +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under +// the License. + +// Standalone Qulacs benchmark using both the gate-object API and direct csim kernels. +// Compiled and linked against a CMake-built Qulacs library so that build flags +// are entirely under CMake's control (no Rust build.rs involvement). + +#include +#include +#include +#include +#include + +#include "cppsim/state.hpp" +#include "cppsim/gate_factory.hpp" +#include "csim/update_ops.hpp" + +// --------------------------------------------------------------------------- +// Timing helpers +// --------------------------------------------------------------------------- + +static double now_sec() { + auto tp = std::chrono::steady_clock::now(); + return std::chrono::duration(tp.time_since_epoch()).count(); +} + +static double median(std::vector& vals) { + std::sort(vals.begin(), vals.end()); + size_t n = vals.size(); + if (n % 2 == 1) return vals[n / 2]; + return (vals[n / 2 - 1] + vals[n / 2]) / 2.0; +} + +// --------------------------------------------------------------------------- +// Circuit: layered H + RZ + CX (gate-object API) +// --------------------------------------------------------------------------- + +static void run_circuit_gate_api(QuantumStateCpu& state, int num_qubits, int num_layers) { + for (int layer = 0; layer < num_layers; layer++) { + for (int q = 0; q < num_qubits; q++) { + auto* g1 = gate::H(q); + g1->update_quantum_state(&state); + delete g1; + // Qulacs uses opposite sign convention for rotations + auto* g2 = gate::RZ(q, -0.1); + g2->update_quantum_state(&state); + delete g2; + } + for (int q = 0; q < num_qubits - 1; q++) { + auto* g = gate::CNOT(q, q + 1); + g->update_quantum_state(&state); + delete g; + } + } +} + +// --------------------------------------------------------------------------- +// Circuit: layered H + RZ + CX (direct csim kernels) +// --------------------------------------------------------------------------- + +static void run_circuit_csim(QuantumStateCpu& state, int num_qubits, int num_layers) { + CTYPE* data = state.data_c(); + ITYPE dim = state.dim; + + for (int layer = 0; layer < num_layers; layer++) { + for (int q = 0; q < num_qubits; q++) { + H_gate((UINT)q, data, dim); + // Qulacs uses opposite sign convention for rotations + RZ_gate((UINT)q, -0.1, data, dim); + } + for (int q = 0; q < num_qubits - 1; q++) { + CNOT_gate((UINT)q, (UINT)(q + 1), data, dim); + } + } +} + +// --------------------------------------------------------------------------- +// Layered circuit benchmarks +// --------------------------------------------------------------------------- + +static void bench_circuit(int num_qubits, int num_layers, int reps, const char* tag, + void (*fn)(QuantumStateCpu&, int, int)) { + QuantumStateCpu state(num_qubits); + std::vector times(reps); + + for (int r = 0; r < reps; r++) { + state.set_zero_state(); + double t0 = now_sec(); + fn(state, num_qubits, num_layers); + double t1 = now_sec(); + times[r] = t1 - t0; + } + + double med = median(times); + std::printf("circuit %2dq %2dl %-10s %12.3f us\n", + num_qubits, num_layers, tag, med * 1e6); +} + +// --------------------------------------------------------------------------- +// Individual gate benchmarks (18 qubits, 100 iterations) +// --------------------------------------------------------------------------- + +// -- Gate-object API variants -- + +static void bench_gate_h_api(int num_qubits, int iters, int reps) { + QuantumStateCpu state(num_qubits); + state.set_zero_state(); + std::vector times(reps); + + for (int r = 0; r < reps; r++) { + double t0 = now_sec(); + for (int i = 0; i < iters; i++) + for (int q = 0; q < num_qubits; q++) { + auto* g = gate::H(q); + g->update_quantum_state(&state); + delete g; + } + double t1 = now_sec(); + times[r] = t1 - t0; + } + std::printf("gate H %-10s %12.3f us\n", "gate_api", median(times) * 1e6); +} + +static void bench_gate_x_api(int num_qubits, int iters, int reps) { + QuantumStateCpu state(num_qubits); + state.set_zero_state(); + std::vector times(reps); + + for (int r = 0; r < reps; r++) { + double t0 = now_sec(); + for (int i = 0; i < iters; i++) + for (int q = 0; q < num_qubits; q++) { + auto* g = gate::X(q); + g->update_quantum_state(&state); + delete g; + } + double t1 = now_sec(); + times[r] = t1 - t0; + } + std::printf("gate X %-10s %12.3f us\n", "gate_api", median(times) * 1e6); +} + +static void bench_gate_cx_api(int num_qubits, int iters, int reps) { + QuantumStateCpu state(num_qubits); + state.set_zero_state(); + std::vector times(reps); + + for (int r = 0; r < reps; r++) { + double t0 = now_sec(); + for (int i = 0; i < iters; i++) + for (int q = 0; q < num_qubits - 1; q++) { + auto* g = gate::CNOT(q, q + 1); + g->update_quantum_state(&state); + delete g; + } + double t1 = now_sec(); + times[r] = t1 - t0; + } + std::printf("gate CX %-10s %12.3f us\n", "gate_api", median(times) * 1e6); +} + +static void bench_gate_rz_api(int num_qubits, int iters, int reps) { + QuantumStateCpu state(num_qubits); + state.set_zero_state(); + std::vector times(reps); + + for (int r = 0; r < reps; r++) { + double t0 = now_sec(); + for (int i = 0; i < iters; i++) + for (int q = 0; q < num_qubits; q++) { + auto* g = gate::RZ(q, -0.1); + g->update_quantum_state(&state); + delete g; + } + double t1 = now_sec(); + times[r] = t1 - t0; + } + std::printf("gate RZ %-10s %12.3f us\n", "gate_api", median(times) * 1e6); +} + +// -- Direct csim kernel variants -- + +static void bench_gate_h_csim(int num_qubits, int iters, int reps) { + QuantumStateCpu state(num_qubits); + state.set_zero_state(); + CTYPE* data = state.data_c(); + ITYPE dim = state.dim; + std::vector times(reps); + + for (int r = 0; r < reps; r++) { + double t0 = now_sec(); + for (int i = 0; i < iters; i++) + for (int q = 0; q < num_qubits; q++) + H_gate((UINT)q, data, dim); + double t1 = now_sec(); + times[r] = t1 - t0; + } + std::printf("gate H %-10s %12.3f us\n", "csim", median(times) * 1e6); +} + +static void bench_gate_x_csim(int num_qubits, int iters, int reps) { + QuantumStateCpu state(num_qubits); + state.set_zero_state(); + CTYPE* data = state.data_c(); + ITYPE dim = state.dim; + std::vector times(reps); + + for (int r = 0; r < reps; r++) { + double t0 = now_sec(); + for (int i = 0; i < iters; i++) + for (int q = 0; q < num_qubits; q++) + X_gate((UINT)q, data, dim); + double t1 = now_sec(); + times[r] = t1 - t0; + } + std::printf("gate X %-10s %12.3f us\n", "csim", median(times) * 1e6); +} + +static void bench_gate_cx_csim(int num_qubits, int iters, int reps) { + QuantumStateCpu state(num_qubits); + state.set_zero_state(); + CTYPE* data = state.data_c(); + ITYPE dim = state.dim; + std::vector times(reps); + + for (int r = 0; r < reps; r++) { + double t0 = now_sec(); + for (int i = 0; i < iters; i++) + for (int q = 0; q < num_qubits - 1; q++) + CNOT_gate((UINT)q, (UINT)(q + 1), data, dim); + double t1 = now_sec(); + times[r] = t1 - t0; + } + std::printf("gate CX %-10s %12.3f us\n", "csim", median(times) * 1e6); +} + +static void bench_gate_rz_csim(int num_qubits, int iters, int reps) { + QuantumStateCpu state(num_qubits); + state.set_zero_state(); + CTYPE* data = state.data_c(); + ITYPE dim = state.dim; + std::vector times(reps); + + for (int r = 0; r < reps; r++) { + double t0 = now_sec(); + for (int i = 0; i < iters; i++) + for (int q = 0; q < num_qubits; q++) + RZ_gate((UINT)q, -0.1, data, dim); + double t1 = now_sec(); + times[r] = t1 - t0; + } + std::printf("gate RZ %-10s %12.3f us\n", "csim", median(times) * 1e6); +} + +// --------------------------------------------------------------------------- +// Main +// --------------------------------------------------------------------------- + +int main() { + int reps = 5; + + std::printf("=== Qulacs standalone benchmarks ===\n"); + std::printf("\n-- Layered circuits (median of %d runs) --\n", reps); + + int configs[][2] = { + {10, 20}, {14, 20}, {18, 20}, {20, 20}, {22, 10}, {24, 5} + }; + int n_configs = sizeof(configs) / sizeof(configs[0]); + + for (int i = 0; i < n_configs; i++) { + bench_circuit(configs[i][0], configs[i][1], reps, "gate_api", run_circuit_gate_api); + bench_circuit(configs[i][0], configs[i][1], reps, "csim", run_circuit_csim); + } + + std::printf("\n-- Individual gates at 18 qubits, 100 iters (median of %d runs) --\n", reps); + + bench_gate_h_api(18, 100, reps); + bench_gate_h_csim(18, 100, reps); + + bench_gate_x_api(18, 100, reps); + bench_gate_x_csim(18, 100, reps); + + bench_gate_cx_api(18, 100, reps); + bench_gate_cx_csim(18, 100, reps); + + bench_gate_rz_api(18, 100, reps); + bench_gate_rz_csim(18, 100, reps); + + return 0; +} diff --git a/scripts/native_bench/run.sh b/scripts/native_bench/run.sh new file mode 100755 index 000000000..1ba2cc5c0 --- /dev/null +++ b/scripts/native_bench/run.sh @@ -0,0 +1,208 @@ +#!/usr/bin/env bash +# Copyright 2026 The PECOS Developers +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. + +# Standalone native benchmark: PECOS vs QuEST vs Qulacs +# +# Builds QuEST and Qulacs from source with their own CMake build systems, +# compiles standalone C/C++ benchmark programs, runs them, and compares +# the results against PECOS Rust criterion benchmarks. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +DEPS_DIR="$HOME/.pecos/deps" +BUILD_DIR="$SCRIPT_DIR/build" + +QUEST_SRC="$DEPS_DIR/quest-v4.1.0" +QULACS_SRC="$DEPS_DIR/qulacs-0.6.12" + +# --------------------------------------------------------------------------- +# Check sources exist +# --------------------------------------------------------------------------- + +missing=0 +if [ ! -d "$QUEST_SRC" ]; then + echo "ERROR: QuEST sources not found at $QUEST_SRC" + missing=1 +fi +if [ ! -d "$QULACS_SRC" ]; then + echo "ERROR: Qulacs sources not found at $QULACS_SRC" + missing=1 +fi +if [ "$missing" -eq 1 ]; then + echo "" + echo "Run the following to download the dependencies:" + echo " cargo build -p pecos-quest -p pecos-qulacs" + exit 1 +fi + +echo "=== Native Benchmark: PECOS vs QuEST vs Qulacs ===" +echo "" + +# --------------------------------------------------------------------------- +# Build QuEST via CMake (single-threaded CPU, no OpenMP/GPU/MPI) +# --------------------------------------------------------------------------- + +echo "--- Building QuEST (CMake, Release, single-threaded) ---" +QUEST_BUILD="$BUILD_DIR/quest" +mkdir -p "$QUEST_BUILD" +cmake -S "$QUEST_SRC" -B "$QUEST_BUILD" \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_C_FLAGS="-march=native" \ + -DCMAKE_CXX_FLAGS="-march=native" \ + -DENABLE_MULTITHREADING=OFF \ + -DENABLE_CUDA=OFF \ + -DENABLE_HIP=OFF \ + -DENABLE_DISTRIBUTION=OFF \ + -DBUILD_SHARED_LIBS=OFF \ + -DCMAKE_POSITION_INDEPENDENT_CODE=ON \ + 2>&1 | tail -5 +cmake --build "$QUEST_BUILD" -j "$(nproc)" 2>&1 | tail -3 +echo "QuEST built." +echo "" + +# --------------------------------------------------------------------------- +# Build Qulacs via CMake (no OpenMP) +# --------------------------------------------------------------------------- + +echo "--- Building Qulacs (CMake, Release, single-threaded) ---" +QULACS_BUILD="$BUILD_DIR/qulacs" +mkdir -p "$QULACS_BUILD" + +# Qulacs needs Boost headers; use the copy already downloaded by PECOS +BOOST_DIR="$DEPS_DIR/boost-1.83.0" +if [ ! -d "$BOOST_DIR" ]; then + echo "ERROR: Boost not found at $BOOST_DIR" + echo "Run: cargo build -p pecos-qulacs" + exit 1 +fi + +cmake -S "$QULACS_SRC" -B "$QULACS_BUILD" \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_C_FLAGS="-march=native" \ + -DCMAKE_CXX_FLAGS="-march=native -DEIGEN_NO_DEBUG" \ + -DBoost_INCLUDE_DIR="$BOOST_DIR" \ + -DUSE_OMP=OFF \ + -DUSE_GPU=OFF \ + -DUSE_MPI=OFF \ + -DUSE_TEST=OFF \ + -DUSE_PYTHON=OFF \ + 2>&1 | tail -5 +cmake --build "$QULACS_BUILD" -j "$(nproc)" --target csim_static cppsim_static 2>&1 | tail -3 +echo "Qulacs built." +echo "" + +# --------------------------------------------------------------------------- +# Locate built libraries +# --------------------------------------------------------------------------- + +# QuEST: static library built by CMake +QUEST_LIB="$(find "$QUEST_BUILD" -name 'libQuEST.a' | head -1)" +if [ -z "$QUEST_LIB" ]; then + echo "ERROR: Could not find libQuEST.a in $QUEST_BUILD" + exit 1 +fi +QUEST_LIB_DIR="$(dirname "$QUEST_LIB")" + +# QuEST include paths: source headers + generated quest.h +QUEST_INC_GEN="$QUEST_BUILD/include" +QUEST_INC_SRC="$QUEST_SRC/quest/include" +QUEST_INC_ROOT="$QUEST_SRC" + +# Qulacs: static libraries (csim + cppsim) +# Qulacs CMakeLists sets CMAKE_ARCHIVE_OUTPUT_DIRECTORY to ${PROJECT_BINARY_DIR}/../lib +QULACS_CSIM_LIB="$(find "$BUILD_DIR" -name 'libcsim_static.a' | head -1)" +QULACS_CPPSIM_LIB="$(find "$BUILD_DIR" -name 'libcppsim_static.a' | head -1)" +if [ -z "$QULACS_CSIM_LIB" ] || [ -z "$QULACS_CPPSIM_LIB" ]; then + echo "ERROR: Could not find Qulacs static libraries in $BUILD_DIR" + exit 1 +fi + +# Qulacs include: source tree + Eigen (downloaded by CMake ExternalProject) +QULACS_INC="$QULACS_SRC/src" +QULACS_EIGEN_INC="$QULACS_SRC/include" +# If Eigen wasn't installed by CMake into the source tree, fall back to PECOS's copy +if [ ! -d "$QULACS_EIGEN_INC/Eigen" ]; then + QULACS_EIGEN_INC="$DEPS_DIR/eigen-3.4.0" +fi + +# --------------------------------------------------------------------------- +# Compile standalone benchmark programs +# --------------------------------------------------------------------------- + +echo "--- Compiling bench_quest ---" +cc -O3 -march=native -std=c11 \ + -I"$QUEST_INC_GEN" -I"$QUEST_INC_SRC" -I"$QUEST_INC_ROOT" \ + "$SCRIPT_DIR/bench_quest.c" \ + -L"$QUEST_LIB_DIR" -lQuEST \ + -lstdc++ -lm \ + -o "$BUILD_DIR/bench_quest" +echo "Compiled." + +echo "--- Compiling bench_qulacs ---" +c++ -O3 -march=native -std=c++14 \ + -I"$QULACS_INC" -I"$QULACS_EIGEN_INC" -I"$BOOST_DIR" \ + -DEIGEN_NO_DEBUG \ + "$SCRIPT_DIR/bench_qulacs.cpp" \ + "$QULACS_CPPSIM_LIB" "$QULACS_CSIM_LIB" \ + -lm \ + -o "$BUILD_DIR/bench_qulacs" +echo "Compiled." +echo "" + +# --------------------------------------------------------------------------- +# Run standalone benchmarks +# --------------------------------------------------------------------------- + +echo "--- Running QuEST benchmark ---" +"$BUILD_DIR/bench_quest" | tee "$BUILD_DIR/quest_results.txt" +echo "" + +echo "--- Running Qulacs benchmark ---" +"$BUILD_DIR/bench_qulacs" | tee "$BUILD_DIR/qulacs_results.txt" +echo "" + +# --------------------------------------------------------------------------- +# Run PECOS Rust criterion benchmarks +# --------------------------------------------------------------------------- + +echo "--- Running PECOS criterion benchmarks (--quick mode) ---" +cd "$REPO_ROOT" + +# Capture criterion output; --quick runs minimal iterations for fast comparison +CRITERION_OUT="$BUILD_DIR/criterion_output.txt" +cargo bench -p benchmarks --profile native --bench benchmarks \ + --features quest,qulacs -- "Native" --quick 2>&1 | tee "$CRITERION_OUT" +echo "" + +# --------------------------------------------------------------------------- +# Parse criterion results and print comparison table +# --------------------------------------------------------------------------- + +echo "============================================================" +echo " COMPARISON SUMMARY" +echo "============================================================" +echo "" +echo "QuEST standalone results:" +cat "$BUILD_DIR/quest_results.txt" +echo "" +echo "Qulacs standalone results:" +cat "$BUILD_DIR/qulacs_results.txt" +echo "" +echo "PECOS criterion results (see above for full output):" +# Extract timing lines from criterion output +grep -E "time:.*\[" "$CRITERION_OUT" 2>/dev/null || echo "(parse criterion output above for timings)" +echo "" +echo "============================================================" +echo "Done. Full outputs saved in: $BUILD_DIR/" From dad78133302ec68d9821dc92c680e651c399bdb2 Mon Sep 17 00:00:00 2001 From: Ciaran Ryan-Anderson Date: Mon, 2 Mar 2026 19:05:42 -0700 Subject: [PATCH 4/5] lint and things --- crates/benchmarks/benches/benchmarks.rs | 4 +- .../modules/native_statevec_comparison.rs | 36 +++++++++--------- crates/pecos-qsim/src/state_vec_soa.rs | 14 +++++++ crates/pecos-quest/build_quest.rs | 38 ++++++++----------- crates/pecos-quest/src/bridge.cpp | 8 +++- crates/pecos-qulacs/build.rs | 13 ++++--- crates/pecos-qulacs/src/bridge.rs | 2 + .../src/pecos/circuits/quantum_circuit.py | 12 +++++- 8 files changed, 76 insertions(+), 51 deletions(-) diff --git a/crates/benchmarks/benches/benchmarks.rs b/crates/benchmarks/benches/benchmarks.rs index 3dbf686cd..538861e38 100644 --- a/crates/benchmarks/benches/benchmarks.rs +++ b/crates/benchmarks/benches/benchmarks.rs @@ -45,8 +45,8 @@ use modules::gpu_influence_sampler; use modules::sparse_stab_vs_cpp; use modules::{ allocation_overhead, cpu_stabilizer_comparison, dem_sampler, dod_statevec, - measurement_sampling, native_statevec_comparison, noise_models, rng, set_ops, - sparse_state_vec, stabilizer_sims, state_vec_sims, surface_code, trig, + measurement_sampling, native_statevec_comparison, noise_models, rng, set_ops, sparse_state_vec, + stabilizer_sims, state_vec_sims, surface_code, trig, }; fn all_benchmarks(c: &mut Criterion) { diff --git a/crates/benchmarks/benches/modules/native_statevec_comparison.rs b/crates/benchmarks/benches/modules/native_statevec_comparison.rs index b9182369c..84bcd6c56 100644 --- a/crates/benchmarks/benches/modules/native_statevec_comparison.rs +++ b/crates/benchmarks/benches/modules/native_statevec_comparison.rs @@ -12,7 +12,7 @@ //! Native state vector comparison benchmarks. //! -//! Calls QuEST and Qulacs FFI directly (bypassing the PECOS wrapper layer's qubit index +//! Calls `QuEST` and Qulacs FFI directly (bypassing the PECOS wrapper layer's qubit index //! remapping, bounds checks, and `QubitId`/`Angle64` conversions) to give an apples-to-apples //! comparison of raw gate computation performance against the pure-Rust PECOS simulators. //! @@ -74,9 +74,7 @@ impl QuestState { fn new(num_qubits: usize) -> Self { let env_ptr = quest_ffi::quest_create_env(); assert!(!env_ptr.is_null(), "Failed to create QuEST environment"); - let qureg_ptr = unsafe { - quest_ffi::quest_create_qureg(env_ptr, num_qubits as i32) - }; + let qureg_ptr = unsafe { quest_ffi::quest_create_qureg(env_ptr, num_qubits as i32) }; assert!(!qureg_ptr.is_null(), "Failed to create QuEST qureg"); unsafe { quest_ffi::quest_init_zero_state(qureg_ptr) }; Self { env_ptr, qureg_ptr } @@ -166,10 +164,22 @@ mod cuquantum_matrices { pub const X: [[f64; 2]; 4] = [[0.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 0.0]]; pub const CX: [[f64; 2]; 16] = [ - [1.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], - [0.0, 0.0], [1.0, 0.0], [0.0, 0.0], [0.0, 0.0], - [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [1.0, 0.0], - [0.0, 0.0], [0.0, 0.0], [1.0, 0.0], [0.0, 0.0], + [1.0, 0.0], + [0.0, 0.0], + [0.0, 0.0], + [0.0, 0.0], + [0.0, 0.0], + [1.0, 0.0], + [0.0, 0.0], + [0.0, 0.0], + [0.0, 0.0], + [0.0, 0.0], + [0.0, 0.0], + [1.0, 0.0], + [0.0, 0.0], + [0.0, 0.0], + [1.0, 0.0], + [0.0, 0.0], ]; pub fn rz(theta: f64) -> [[f64; 2]; 4] { @@ -201,14 +211,7 @@ fn bench_native_statevec_comparison(c: &mut Criterion) { let mut group = c.benchmark_group("Native StateVec Comparison"); group.sample_size(20); - let configs = [ - (10, 20), - (14, 20), - (18, 20), - (20, 20), - (22, 10), - (24, 5), - ]; + let configs = [(10, 20), (14, 20), (18, 20), (20, 20), (22, 10), (24, 5)]; for (num_qubits, num_layers) in configs { let label = format!("{num_qubits}q_{num_layers}l"); @@ -360,7 +363,6 @@ fn bench_native_statevec_comparison(c: &mut Criterion) { } Err(e) => eprintln!("CuStateVec not available: {e}"), } - } group.finish(); diff --git a/crates/pecos-qsim/src/state_vec_soa.rs b/crates/pecos-qsim/src/state_vec_soa.rs index 5ad2039a7..8e3195df1 100644 --- a/crates/pecos-qsim/src/state_vec_soa.rs +++ b/crates/pecos-qsim/src/state_vec_soa.rs @@ -2742,6 +2742,8 @@ where let q1 = pair[0].index(); let q2 = pair[1].index(); + self.flush_two_qubit(q1, q2); + let n = self.real.len(); let (q_lo, q_hi) = if q1 < q2 { (q1, q2) } else { (q2, q1) }; @@ -2981,6 +2983,8 @@ where let q1 = pair[0].index(); let q2 = pair[1].index(); + self.flush_two_qubit(q1, q2); + let n = self.real.len(); let (q_lo, q_hi) = if q1 < q2 { (q1, q2) } else { (q2, q1) }; @@ -3199,6 +3203,9 @@ where for pair in qubits.chunks_exact(2) { let q1 = pair[0].index(); let q2 = pair[1].index(); + + self.flush_two_qubit(q1, q2); + let q_lo = q1.min(q2); // When both qubits >= 2, consecutive indices share the same phase @@ -3803,6 +3810,9 @@ where for pair in qubits.chunks_exact(2) { let q1 = pair[0].index(); let q2 = pair[1].index(); + + self.flush_two_qubit(q1, q2); + let q_lo = q1.min(q2); // When both qubits >= 2, consecutive indices share the same phase @@ -3862,6 +3872,8 @@ where let q1 = pair[0].index(); let q2 = pair[1].index(); + self.flush_two_qubit(q1, q2); + // Use strided iteration for cache efficiency let (lo, hi) = if q1 < q2 { (q1, q2) } else { (q2, q1) }; let step_lo = 1 << lo; @@ -3925,6 +3937,8 @@ where let q1 = pair[0].index(); let q2 = pair[1].index(); + self.flush_two_qubit(q1, q2); + // Use strided iteration for cache efficiency let (lo, hi) = if q1 < q2 { (q1, q2) } else { (q2, q1) }; let step_lo = 1 << lo; diff --git a/crates/pecos-quest/build_quest.rs b/crates/pecos-quest/build_quest.rs index 7bca58638..776f0c6d1 100644 --- a/crates/pecos-quest/build_quest.rs +++ b/crates/pecos-quest/build_quest.rs @@ -90,7 +90,7 @@ fn detect_cuda_path() -> Option { /// - `gpu_config.cpp` (GPU device management) /// - `gpu_subroutines.cpp` (GPU kernel implementations) /// -/// All other QuEST sources compile fine with the standard C++ compiler even +/// All other `QuEST` sources compile fine with the standard C++ compiler even /// with `COMPILE_CUDA=1`, since the GPU headers only contain declarations /// with standard C++ types. fn build_quest_gpu_objects(cuda_path: &str, quest_dir: &Path, out_dir: &Path) -> Option<()> { @@ -163,10 +163,7 @@ fn build_quest_gpu_objects(cuda_path: &str, quest_dir: &Path, out_dir: &Path) -> if !output.status.success() { let stdout_str = String::from_utf8_lossy(&output.stdout); let stderr_str = String::from_utf8_lossy(&output.stderr); - eprintln!( - "ERROR: Failed to compile {} with nvcc", - src_file.display() - ); + eprintln!("ERROR: Failed to compile {} with nvcc", src_file.display()); eprintln!("Exit status: {:?}", output.status); if !stdout_str.is_empty() { eprintln!("stdout:\n{stdout_str}"); @@ -196,10 +193,7 @@ fn build_quest_gpu_objects(cuda_path: &str, quest_dir: &Path, out_dir: &Path) -> .arg(format!("/OUT:{}", archive_path.display())) .args(&object_files); } else { - ar_cmd - .arg("rcs") - .arg(&archive_path) - .args(&object_files); + ar_cmd.arg("rcs").arg(&archive_path).args(&object_files); } let output = ar_cmd.output().ok()?; @@ -210,7 +204,10 @@ fn build_quest_gpu_objects(cuda_path: &str, quest_dir: &Path, out_dir: &Path) -> return None; } - info!("Successfully built GPU static archive: {}", archive_path.display()); + info!( + "Successfully built GPU static archive: {}", + archive_path.display() + ); Some(()) } @@ -332,12 +329,10 @@ fn generate_quest_header(quest_dir: &Path) -> Result<()> { return Some("#define COMPILE_OPENMP 0".to_string()); } if line.contains("#cmakedefine01 COMPILE_CUDA") { - // When the cuda feature is enabled, COMPILE_CUDA=1 so QuEST's - // GPU dispatch paths are active in the generated header. - let cuda_enabled = env::var("CARGO_FEATURE_CUDA").is_ok(); - if cuda_enabled { - return Some("#define COMPILE_CUDA 1".to_string()); - } + // The embedded QuEST library always runs in CPU-only mode. + // GPU acceleration is handled by the CUDA engine builder + // (QuestCudaStateVecEngine) which loads a separate GPU backend + // at runtime via dlopen. return Some("#define COMPILE_CUDA 0".to_string()); } if line.contains("#cmakedefine01 COMPILE_CUQUANTUM") { @@ -579,7 +574,10 @@ fn build_cxx_bridge(quest_dir: &Path, out_dir: &Path) { // Link the GPU static archive and CUDA runtime libraries println!("cargo:rustc-link-lib=static=quest-gpu"); - println!("cargo:rustc-link-search=native={}/lib64", cuda_path.as_ref().unwrap()); + println!( + "cargo:rustc-link-search=native={}/lib64", + cuda_path.as_ref().unwrap() + ); println!("cargo:rustc-link-lib=cudart"); println!("cargo:rustc-link-lib=cublas"); } @@ -603,11 +601,7 @@ fn build_cxx_bridge(quest_dir: &Path, out_dir: &Path) { // When CUDA is enabled, COMPILE_CUDA=1 and COMPILE_GPU=1 so the standard C++ // compiler sees GPU dispatch declarations (which use only standard C++ types). // The actual GPU kernel implementations are in the nvcc-compiled static archive. - let (cuda_flag, gpu_flag) = if gpu_enabled { - ("1", "1") - } else { - ("0", "0") - }; + let (cuda_flag, gpu_flag) = if gpu_enabled { ("1", "1") } else { ("0", "0") }; build .define("COMPILE_CPU", "1") .define("COMPILE_OPENMP", "0") diff --git a/crates/pecos-quest/src/bridge.cpp b/crates/pecos-quest/src/bridge.cpp index 925feaf0d..799286c3f 100644 --- a/crates/pecos-quest/src/bridge.cpp +++ b/crates/pecos-quest/src/bridge.cpp @@ -29,8 +29,12 @@ class GlobalQuestEnv { std::lock_guard lock(init_mutex); if (!is_initialized.load()) { - // Initialize QuEST environment only once per process - initQuESTEnv(); + // Initialize QuEST environment in CPU-only mode. + // GPU acceleration is handled separately via the CUDA engine builder + // (QuestCudaStateVecEngine) which loads a dedicated GPU backend at + // runtime via dlopen, allowing a single binary to work on systems + // with and without CUDA. + initCustomQuESTEnv(/*useDistrib=*/0, /*useGpuAccel=*/0, /*useMultithread=*/0); global_env_ptr = new QuESTEnv(getQuESTEnv()); is_initialized = true; } diff --git a/crates/pecos-qulacs/build.rs b/crates/pecos-qulacs/build.rs index bda906591..fd80b0614 100644 --- a/crates/pecos-qulacs/build.rs +++ b/crates/pecos-qulacs/build.rs @@ -317,13 +317,14 @@ fn configure_build( } // Enable SIMD-optimized gate kernels in Qulacs (matches Qulacs CMake USE_SIMD=Yes). - // On x86/x86_64, _USE_SIMD activates hand-written AVX2 intrinsics for gates like H, X, - // CNOT, RZ, etc. Qulacs's type.hpp will #undef _USE_SIMD if the compiler doesn't define + // _USE_SIMD activates hand-written SIMD intrinsics for gates like H, X, CNOT, RZ, etc. + // On x86/x86_64, Qulacs's type.hpp will #undef _USE_SIMD if the compiler doesn't define // __AVX2__, so this is safe even when -march=native isn't used. - // On aarch64, Qulacs uses _USE_SVE for SVE intrinsics instead. - if target.contains("x86_64") || target.contains("x86") || target.contains("i686") { - build.define("_USE_SIMD", None); - } else if target.contains("aarch64") { + if target.contains("x86_64") + || target.contains("x86") + || target.contains("i686") + || target.contains("aarch64") + { build.define("_USE_SIMD", None); } } diff --git a/crates/pecos-qulacs/src/bridge.rs b/crates/pecos-qulacs/src/bridge.rs index 4bd175efc..2f5284e18 100644 --- a/crates/pecos-qulacs/src/bridge.rs +++ b/crates/pecos-qulacs/src/bridge.rs @@ -8,6 +8,7 @@ pub mod ffi { type QulacsState; // Constructor and destructor + #[must_use] fn create_quantum_state(num_qubits: usize) -> UniquePtr; fn clone_quantum_state(state: &QulacsState) -> UniquePtr; @@ -57,6 +58,7 @@ pub mod ffi { fn apply_swap(state: Pin<&mut QulacsState>, qubit1: usize, qubit2: usize); // Measurement + #[must_use] fn measure_z(state: Pin<&mut QulacsState>, qubit: usize) -> u8; // Direct csim-level gate functions (bypass gate object allocation) diff --git a/python/quantum-pecos/src/pecos/circuits/quantum_circuit.py b/python/quantum-pecos/src/pecos/circuits/quantum_circuit.py index 63b2d9b5a..84d44ae51 100644 --- a/python/quantum-pecos/src/pecos/circuits/quantum_circuit.py +++ b/python/quantum-pecos/src/pecos/circuits/quantum_circuit.py @@ -319,7 +319,11 @@ def add_with_symbol( method_name = _ROTATION_GATES[symbol_upper] if hasattr(tick_handle, method_name): method = getattr(tick_handle, method_name) - angle = params.get("angle", params.get("theta", 0.0)) + angles_val = params.get("angles") + if angles_val is not None and len(angles_val) >= 1: + angle = angles_val[0] + else: + angle = params.get("angle", params.get("theta", 0.0)) for loc in loc_list: if isinstance(loc, tuple): for q in loc: @@ -345,7 +349,11 @@ def add_with_symbol( method_name = _TWO_QUBIT_ROTATION_GATES[symbol_upper] if hasattr(tick_handle, method_name): method = getattr(tick_handle, method_name) - angle = params.get("angle", params.get("theta", 0.0)) + angles_val = params.get("angles") + if angles_val is not None and len(angles_val) >= 1: + angle = angles_val[0] + else: + angle = params.get("angle", params.get("theta", 0.0)) for loc in loc_list: if isinstance(loc, tuple) and len(loc) == 2: add_with_symbol(method, angle, loc[0], loc[1]) From 5e544f25cef296b48310b6969df038deb8a77200 Mon Sep 17 00:00:00 2001 From: Ciaran Ryan-Anderson Date: Mon, 2 Mar 2026 19:43:44 -0700 Subject: [PATCH 5/5] Restore GPU shared library (dlopen) build in build_quest.rs, reverting incomplete static-archive migration that broke CI GPU tests. --- crates/pecos-quest/build_quest.rs | 265 +++++++++++++++++++++--------- 1 file changed, 186 insertions(+), 79 deletions(-) diff --git a/crates/pecos-quest/build_quest.rs b/crates/pecos-quest/build_quest.rs index 776f0c6d1..3e60a13d1 100644 --- a/crates/pecos-quest/build_quest.rs +++ b/crates/pecos-quest/build_quest.rs @@ -1,12 +1,11 @@ //! Build script for `QuEST` integration //! -//! This build script produces a static library (libquest-bridge.a) for `QuEST` operations. +//! This build script produces: +//! 1. A static library (libquest-bridge.a) for CPU-only `QuEST` operations +//! 2. Optionally, a shared library (`libpecos_quest_cuda.so`) for CUDA operations (when cuda feature enabled) //! -//! When the `cuda` feature is enabled, GPU source files (`gpu_config.cpp`, `gpu_subroutines.cpp`) -//! are compiled with nvcc into a separate static archive (`libquest-gpu.a`) and linked in. -//! The remaining `QuEST` sources compile with the standard C++ compiler (they only contain -//! declarations with standard C++ types, no CUDA-specific syntax). This means the same -//! `quest_ffi` functions run on GPU transparently when CUDA is enabled. +//! The CUDA library is loaded at runtime via dlopen, allowing a single binary to work +//! on systems with and without CUDA installed. use log::{debug, info}; use pecos_build::{Manifest, Result, ensure_dep_ready, report_cache_config}; @@ -84,18 +83,15 @@ fn detect_cuda_path() -> Option { None } -/// Build GPU source files with nvcc into a static archive (`libquest-gpu.a`) +/// Build the GPU shared library (`libpecos_quest_cuda.so`) /// -/// Only compiles the two GPU implementation files that require nvcc: -/// - `gpu_config.cpp` (GPU device management) -/// - `gpu_subroutines.cpp` (GPU kernel implementations) -/// -/// All other `QuEST` sources compile fine with the standard C++ compiler even -/// with `COMPILE_CUDA=1`, since the GPU headers only contain declarations -/// with standard C++ types. -fn build_quest_gpu_objects(cuda_path: &str, quest_dir: &Path, out_dir: &Path) -> Option<()> { - info!("Building GPU static archive (libquest-gpu.a)..."); +/// This library contains the GPU-accelerated `QuEST` implementation and is loaded +/// at runtime via dlopen. This allows the main library to work on systems without CUDA. +#[allow(clippy::too_many_lines)] +fn build_gpu_shared_library(cuda_path: &str, quest_dir: &Path, out_dir: &Path) -> Option { + info!("Building GPU shared library (libpecos_quest_cuda.so)..."); + // nvcc executable name differs by platform let nvcc_name = if cfg!(target_os = "windows") { "nvcc.exe" } else { @@ -103,28 +99,71 @@ fn build_quest_gpu_objects(cuda_path: &str, quest_dir: &Path, out_dir: &Path) -> }; let nvcc_path = Path::new(cuda_path).join("bin").join(nvcc_name); info!("Using nvcc at: {}", nvcc_path.display()); - let quest_include_dir = quest_dir.join("include"); let quest_src_dir = quest_dir.join("src"); let gpu_dir = quest_src_dir.join("gpu"); - // Only the GPU implementation files need nvcc - let gpu_sources = [ - gpu_dir.join("gpu_config.cpp"), - gpu_dir.join("gpu_subroutines.cpp"), + // Source files for the GPU library + let bridge_gpu = PathBuf::from("src/bridge_cuda.cpp"); + let gpu_config = gpu_dir.join("gpu_config.cpp"); + let gpu_subroutines = gpu_dir.join("gpu_subroutines.cpp"); + + // QuEST core files needed by the GPU library + let api_dir = quest_src_dir.join("api"); + let core_dir = quest_src_dir.join("core"); + let cpu_dir = quest_src_dir.join("cpu"); + let comm_dir = quest_src_dir.join("comm"); + + // Collect all source files + let source_files = vec![ + bridge_gpu, + gpu_config, + gpu_subroutines, + // API layer + api_dir.join("calculations.cpp"), + api_dir.join("channels.cpp"), + api_dir.join("debug.cpp"), + api_dir.join("decoherence.cpp"), + api_dir.join("environment.cpp"), + api_dir.join("initialisations.cpp"), + api_dir.join("matrices.cpp"), + api_dir.join("modes.cpp"), + api_dir.join("operations.cpp"), + api_dir.join("paulis.cpp"), + api_dir.join("qureg.cpp"), + api_dir.join("types.cpp"), + // Core utilities + core_dir.join("errors.cpp"), + core_dir.join("utilities.cpp"), + core_dir.join("validation.cpp"), + core_dir.join("memory.cpp"), + core_dir.join("printer.cpp"), + core_dir.join("randomiser.cpp"), + core_dir.join("parser.cpp"), + core_dir.join("localiser.cpp"), + core_dir.join("autodeployer.cpp"), + core_dir.join("accelerator.cpp"), + // CPU backend (still needed for some operations) + cpu_dir.join("cpu_config.cpp"), + cpu_dir.join("cpu_subroutines.cpp"), + // Communication + comm_dir.join("comm_config.cpp"), + comm_dir.join("comm_routines.cpp"), ]; + // Compile all source files to object files let mut object_files = Vec::new(); - for src_file in &gpu_sources { + for src_file in &source_files { let file_stem = src_file.file_stem()?.to_str()?; + // Windows uses .obj extension, Unix uses .o let obj_ext = if cfg!(target_os = "windows") { "obj" } else { "o" }; - let obj_file = out_dir.join(format!("gpu_static_{file_stem}.{obj_ext}")); + let obj_file = out_dir.join(format!("gpu_{file_stem}.{obj_ext}")); - debug!("Compiling GPU source with nvcc: {}", src_file.display()); + debug!("Compiling for GPU lib: {}", src_file.display()); let mut compile_cmd = Command::new(&nvcc_path); compile_cmd .arg("-c") @@ -132,7 +171,7 @@ fn build_quest_gpu_objects(cuda_path: &str, quest_dir: &Path, out_dir: &Path) -> .arg("-o") .arg(&obj_file) .arg("-x") - .arg("cu") + .arg("cu") // Treat .cpp files as CUDA source .arg("-I") .arg(&quest_include_dir) .arg("-I") @@ -140,7 +179,7 @@ fn build_quest_gpu_objects(cuda_path: &str, quest_dir: &Path, out_dir: &Path) -> .arg("-I") .arg(quest_dir.parent()?) .arg("-I") - .arg("include") + .arg("include") // For quest_ffi.h .arg("--std=c++20") .arg("-DCOMPILE_GPU=1") .arg("-DCOMPILE_CUDA=1") @@ -149,12 +188,19 @@ fn build_quest_gpu_objects(cuda_path: &str, quest_dir: &Path, out_dir: &Path) -> .arg("-DCOMPILE_MPI=0") .arg("-DCOMPILE_CUQUANTUM=0") .arg("-DFLOAT_PRECISION=2") + // Target compute capability 7.5 (Turing) which supports atomicAdd(double*, double) + // sm_75 is the minimum supported by both CUDA 12.x and 13.x .arg("-arch=sm_75") + // Allow newer GCC versions (e.g., GCC 14 in manylinux_2_28) .arg("-allow-unsupported-compiler"); + // Platform-specific compiler flags if cfg!(target_os = "windows") { + // Windows/MSVC: no -fPIC needed (not applicable) + // Use /EHsc for C++ exception handling compile_cmd.arg("-Xcompiler").arg("/EHsc"); } else { + // Unix: position-independent code for shared libraries compile_cmd.arg("-Xcompiler").arg("-fPIC"); } @@ -163,7 +209,10 @@ fn build_quest_gpu_objects(cuda_path: &str, quest_dir: &Path, out_dir: &Path) -> if !output.status.success() { let stdout_str = String::from_utf8_lossy(&output.stdout); let stderr_str = String::from_utf8_lossy(&output.stderr); - eprintln!("ERROR: Failed to compile {} with nvcc", src_file.display()); + eprintln!( + "ERROR: Failed to compile {} for GPU library", + src_file.display() + ); eprintln!("Exit status: {:?}", output.status); if !stdout_str.is_empty() { eprintln!("stdout:\n{stdout_str}"); @@ -177,38 +226,90 @@ fn build_quest_gpu_objects(cuda_path: &str, quest_dir: &Path, out_dir: &Path) -> object_files.push(obj_file); } - // Archive into libquest-gpu.a - let archive_path = out_dir.join("libquest-gpu.a"); - info!("Archiving GPU objects into: {}", archive_path.display()); - - let ar_name = if cfg!(target_os = "windows") { - "lib.exe" + // Link into a shared library + let lib_name = if cfg!(target_os = "macos") { + "libpecos_quest_cuda.dylib" + } else if cfg!(target_os = "windows") { + "pecos_quest_cuda.dll" } else { - "ar" + "libpecos_quest_cuda.so" }; - let mut ar_cmd = Command::new(ar_name); + let gpu_lib_path = out_dir.join(lib_name); + + info!("Linking GPU shared library: {}", gpu_lib_path.display()); + + let mut link_cmd = Command::new(&nvcc_path); + link_cmd + .arg("-shared") + .arg("-o") + .arg(&gpu_lib_path) + .args(&object_files); + + // Platform-specific library paths and linking if cfg!(target_os = "windows") { - ar_cmd - .arg(format!("/OUT:{}", archive_path.display())) - .args(&object_files); + // Windows: CUDA libraries are in lib\x64 + link_cmd + .arg(format!("-L{cuda_path}/lib/x64")) + .arg("-lcudart") + .arg("-lcublas"); + // Windows uses MSVC runtime, no need to explicitly link C++ stdlib } else { - ar_cmd.arg("rcs").arg(&archive_path).args(&object_files); + // Unix: CUDA libraries are in lib64 + link_cmd + .arg(format!("-L{cuda_path}/lib64")) + .arg("-lcudart") + .arg("-lcublas"); + // Add C++ standard library + if cfg!(target_os = "macos") { + link_cmd.arg("-lc++"); + } else { + link_cmd.arg("-lstdc++"); + } } - let output = ar_cmd.output().ok()?; + let output = link_cmd.output().ok()?; + if !output.status.success() { let stderr_str = String::from_utf8_lossy(&output.stderr); - eprintln!("ERROR: Failed to create GPU static archive"); + eprintln!("ERROR: Failed to link GPU shared library"); eprintln!("{stderr_str}"); return None; } info!( - "Successfully built GPU static archive: {}", - archive_path.display() + "Successfully built GPU shared library: {}", + gpu_lib_path.display() ); - Some(()) + + // Also copy to target directory for easier discovery + // Try CARGO_TARGET_DIR first, then derive from OUT_DIR + let target_lib_dir = if let Ok(target_dir) = env::var("CARGO_TARGET_DIR") { + let profile = get_build_profile(); + Some(Path::new(&target_dir).join(&profile)) + } else { + // OUT_DIR is something like: target/release/build/pecos-quest-xxx/out + // We want: target/release/ + out_dir + .parent() // build/pecos-quest-xxx + .and_then(|p| p.parent()) // build + .and_then(|p| p.parent()) // release or debug + .map(std::path::Path::to_path_buf) + }; + + if let Some(target_dir) = target_lib_dir { + let target_lib_path = target_dir.join(lib_name); + if let Some(parent) = target_lib_path.parent() { + let _ = fs::create_dir_all(parent); + } + if let Err(e) = fs::copy(&gpu_lib_path, &target_lib_path) { + debug!("Could not copy CUDA lib to target dir: {e}"); + } else { + info!("Copied CUDA lib to: {}", target_lib_path.display()); + } + } + + Some(gpu_lib_path) } /// Patch `QuEST` GPU code for CUDA 13 compatibility @@ -300,9 +401,10 @@ fn generate_quest_header(quest_dir: &Path) -> Result<()> { // Since MULTI_LIB_HEADERS=0, we want the #if !0 block to be active // which means we need to process the #cmakedefine directives // - // COMPILE_CUDA is set based on the `cuda` Cargo feature: - // - cuda feature enabled: COMPILE_CUDA=1 (GPU dispatch paths active) - // - cuda feature disabled: COMPILE_CUDA=0 (CPU only) + // IMPORTANT: The main library is ALWAYS CPU-only (COMPILE_CUDA=0). + // GPU support is provided via a separate shared library (libpecos_quest_cuda.so) + // which is compiled with nvcc and has its own COMPILE_CUDA=1 flag. + // This generated quest.h is only used by the main library. // Process the template line by line to handle conditional blocks let mut in_multi_lib_block = false; @@ -329,10 +431,7 @@ fn generate_quest_header(quest_dir: &Path) -> Result<()> { return Some("#define COMPILE_OPENMP 0".to_string()); } if line.contains("#cmakedefine01 COMPILE_CUDA") { - // The embedded QuEST library always runs in CPU-only mode. - // GPU acceleration is handled by the CUDA engine builder - // (QuestCudaStateVecEngine) which loads a separate GPU backend - // at runtime via dlopen. + // Main library is always CPU-only; GPU library is separate return Some("#define COMPILE_CUDA 0".to_string()); } if line.contains("#cmakedefine01 COMPILE_CUQUANTUM") { @@ -515,11 +614,11 @@ fn build_cxx_bridge(quest_dir: &Path, out_dir: &Path) { let cpu_dir = quest_src_dir.join("cpu"); let comm_dir = quest_src_dir.join("comm"); - // When CUDA is enabled, GPU implementations come from the nvcc-compiled - // static archive (libquest-gpu.a). Otherwise, use gpu_stubs.cpp. - if !gpu_enabled { - build.file("src/gpu_stubs.cpp"); - } + // IMPORTANT: The main library ALWAYS uses gpu_stubs.cpp (CPU only). + // GPU support is provided by a separate shared library (libpecos_quest_cuda.so) + // that is loaded at runtime via dlopen. This allows a single binary to work + // on systems with and without CUDA installed. + build.file("src/gpu_stubs.cpp"); build .file("src/bridge.cpp") @@ -549,7 +648,8 @@ fn build_cxx_bridge(quest_dir: &Path, out_dir: &Path) { // Accelerator.cpp contains dispatch logic for both CPU and GPU .file(core_dir.join("accelerator.cpp")); - // Build the GPU static archive if CUDA feature is enabled + // Build the separate GPU shared library if GPU feature is enabled + // This library will be loaded at runtime via dlopen if gpu_enabled { let gpu_dir = quest_src_dir.join("gpu"); if !gpu_dir.exists() { @@ -559,11 +659,21 @@ fn build_cxx_bridge(quest_dir: &Path, out_dir: &Path) { std::process::exit(1); } - // Build GPU source files (gpu_config.cpp, gpu_subroutines.cpp) with nvcc - if build_quest_gpu_objects(cuda_path.as_ref().unwrap(), quest_dir, out_dir).is_some() { - info!("GPU static archive built successfully"); + // Build the separate GPU shared library + if let Some(gpu_lib_path) = + build_gpu_shared_library(cuda_path.as_ref().unwrap(), quest_dir, out_dir) + { + info!( + "GPU shared library built successfully: {}", + gpu_lib_path.display() + ); + // Emit the GPU library path so downstream crates can find it + println!( + "cargo:rustc-env=PECOS_QUEST_CUDA_LIB={}", + gpu_lib_path.display() + ); } else { - eprintln!("\nERROR: GPU feature enabled but GPU archive build failed"); + eprintln!("\nERROR: GPU feature enabled but GPU library build failed"); eprintln!(" See warnings above for compilation errors"); eprintln!(" Solutions:"); eprintln!(" 1. Use CUDA 11 or 12 instead of CUDA 13 (QuEST incompatibility)"); @@ -571,15 +681,6 @@ fn build_cxx_bridge(quest_dir: &Path, out_dir: &Path) { eprintln!(" 3. Use Python GPU simulators (CuStateVec/MPS) which work with CUDA 13"); std::process::exit(1); } - - // Link the GPU static archive and CUDA runtime libraries - println!("cargo:rustc-link-lib=static=quest-gpu"); - println!( - "cargo:rustc-link-search=native={}/lib64", - cuda_path.as_ref().unwrap() - ); - println!("cargo:rustc-link-lib=cudart"); - println!("cargo:rustc-link-lib=cublas"); } // CPU backend @@ -598,19 +699,21 @@ fn build_cxx_bridge(quest_dir: &Path, out_dir: &Path) { .include("include"); // Define preprocessor flags based on features - // When CUDA is enabled, COMPILE_CUDA=1 and COMPILE_GPU=1 so the standard C++ - // compiler sees GPU dispatch declarations (which use only standard C++ types). - // The actual GPU kernel implementations are in the nvcc-compiled static archive. - let (cuda_flag, gpu_flag) = if gpu_enabled { ("1", "1") } else { ("0", "0") }; + // IMPORTANT: The main library is ALWAYS CPU-only. GPU support is provided via + // a separate shared library (libpecos_quest_cuda.so) loaded at runtime via dlopen. + // This allows a single binary to work on systems with and without CUDA. build .define("COMPILE_CPU", "1") - .define("COMPILE_OPENMP", "0") - .define("COMPILE_MPI", "0") - .define("FLOAT_PRECISION", "2") - .define("COMPILE_CUDA", cuda_flag) - .define("COMPILE_GPU", gpu_flag) + .define("COMPILE_OPENMP", "0") // Disable OpenMP for simplicity initially + .define("COMPILE_MPI", "0") // Disable MPI for simplicity initially + .define("FLOAT_PRECISION", "2") // Double precision by default + .define("COMPILE_CUDA", "0") // Main library never uses CUDA directly + .define("COMPILE_GPU", "0") // GPU ops are in the separate GPU library .define("COMPILE_CUQUANTUM", "0"); + // Note: We do NOT link cudart/cublas here. The GPU library handles CUDA linking + // and is loaded at runtime only when GPU is requested. + // Use C++20 standard (QuEST v4 uses designated initializers which require C++20) // However, on macOS there's a known issue with C++20 and cxx crate's pointer_traits // specializations, so we use C++17 there (designated initializers are a GNU extension @@ -680,6 +783,10 @@ fn build_cxx_bridge(quest_dir: &Path, out_dir: &Path) { build.compile("quest-bridge"); + // Note: GPU object files are now compiled into a separate shared library + // (libpecos_quest_cuda.so) which is built by build_gpu_shared_library() + // and loaded at runtime via dlopen. + // On macOS, ensure the C++ standard library is linked correctly // Use the system libc++ which is in the dyld shared cache (macOS Big Sur+) // We rely on the compiler's default behavior rather than explicit cargo directives