feat: add walnuts implementation

aseyboldt · aseyboldt · commit 72e6d0ae3153 · 2026-04-20T18:50:20.000+02:00
diff --git a/src/adapt_strategy.rs b/src/adapt_strategy.rs
@@ -326,11 +326,12 @@ where
         start: &State<M, P>,
         end: &State<M, P>,
         divergence_info: Option<&DivergenceInfo>,
+        num_substeps: u64,
     ) {
         self.collector1
-            .register_leapfrog(math, start, end, divergence_info);
+            .register_leapfrog(math, start, end, divergence_info, num_substeps);
         self.collector2
-            .register_leapfrog(math, start, end, divergence_info);
+            .register_leapfrog(math, start, end, divergence_info, num_substeps);
     }
 
     fn register_draw(&mut self, math: &mut M, state: &State<M, P>, info: &crate::nuts::SampleInfo) {
@@ -458,6 +459,7 @@ mod test {
             target_integration_time: None,
             extra_doublings: 0,
             max_energy_error: 1000.0,
+            walnuts_options: None,
         };
 
         let rng = {
diff --git a/src/chain.rs b/src/chain.rs
@@ -158,6 +158,7 @@ where
             &mut self.hamiltonian,
             &self.options,
             &mut self.collector,
+            self.draw_count < 70,
         )?;
         let mut position: Box<[f64]> = vec![0f64; math.dim()].into();
         state.write_position(math, &mut position);
@@ -226,6 +227,17 @@ pub struct NutsStats<P: HasDims, H: Storable<P>, A: Storable<P>, D: Storable<P>>
     pub point: D,
     #[storable(flatten)]
     pub divergence: DivergenceStats,
+    pub diverging: bool,
+    #[storable(dims("unconstrained_parameter"))]
+    pub divergence_start: Option<Vec<f64>>,
+    #[storable(dims("unconstrained_parameter"))]
+    pub divergence_start_gradient: Option<Vec<f64>>,
+    #[storable(dims("unconstrained_parameter"))]
+    pub divergence_end: Option<Vec<f64>>,
+    #[storable(dims("unconstrained_parameter"))]
+    pub divergence_momentum: Option<Vec<f64>>,
+    non_reversible: Option<bool>,
+    //pub divergence_message: Option<String>,
     #[storable(ignore)]
     _phantom: PhantomData<fn() -> P>,
 }
@@ -279,6 +291,17 @@ impl<M: Math, R: rand::Rng, A: AdaptStrategy<M>> SamplerStats<M> for NutsChain<M
             adapt: adapt_stats,
             point: point_stats,
             divergence: (div_info, options.divergence, self.draw_count).into(),
+            diverging: div_info.is_some(),
+            divergence_start: div_info
+                .and_then(|d| d.start_location.as_ref().map(|v| v.as_ref().to_vec())),
+            divergence_start_gradient: div_info
+                .and_then(|d| d.start_gradient.as_ref().map(|v| v.as_ref().to_vec())),
+            divergence_end: div_info
+                .and_then(|d| d.end_location.as_ref().map(|v| v.as_ref().to_vec())),
+            divergence_momentum: div_info
+                .and_then(|d| d.start_momentum.as_ref().map(|v| v.as_ref().to_vec())),
+            //divergence_message: self.divergence_msg.clone(),
+            non_reversible: div_info.and_then(|d| Some(d.non_reversible)),
             _phantom: PhantomData,
         }
     }
diff --git a/src/dynamics/hamiltonian.rs b/src/dynamics/hamiltonian.rs
@@ -22,6 +22,7 @@ use crate::{
 ///   a cutoff value or nan.
 /// - The logp function caused a recoverable error (eg if an ODE solver
 ///   failed)
+#[non_exhaustive]
 #[derive(Debug, Clone)]
 pub struct DivergenceInfo {
     pub start_momentum: Option<Box<[f64]>>,
@@ -32,6 +33,86 @@ pub struct DivergenceInfo {
     pub end_idx_in_trajectory: Option<i64>,
     pub start_idx_in_trajectory: Option<i64>,
     pub logp_function_error: Option<Arc<dyn std::error::Error + Send + Sync>>,
+    pub non_reversible: bool,
+}
+impl DivergenceInfo {
+    pub fn new() -> Self {
+        DivergenceInfo {
+            start_momentum: None,
+            start_location: None,
+            start_gradient: None,
+            end_location: None,
+            energy_error: None,
+            end_idx_in_trajectory: None,
+            start_idx_in_trajectory: None,
+            logp_function_error: None,
+            non_reversible: false,
+        }
+    }
+
+    pub fn new_energy_error_too_large<M: Math>(
+        math: &mut M,
+        start: &State<M, impl Point<M>>,
+        stop: &State<M, impl Point<M>>,
+    ) -> Self {
+        DivergenceInfo {
+            logp_function_error: None,
+            start_location: Some(math.box_array(start.point().position())),
+            start_gradient: Some(math.box_array(start.point().gradient())),
+            // TODO
+            start_momentum: None,
+            start_idx_in_trajectory: Some(start.index_in_trajectory()),
+            end_location: Some(math.box_array(&stop.point().position())),
+            end_idx_in_trajectory: Some(stop.index_in_trajectory()),
+            // TODO
+            energy_error: None,
+            non_reversible: false,
+        }
+    }
+
+    pub fn new_logp_function_error<M: Math>(
+        math: &mut M,
+        start: &State<M, impl Point<M>>,
+        logp_function_error: Arc<dyn std::error::Error + Send + Sync>,
+    ) -> Self {
+        DivergenceInfo {
+            logp_function_error: Some(logp_function_error),
+            start_location: Some(math.box_array(start.point().position())),
+            start_gradient: Some(math.box_array(start.point().gradient())),
+            // TODO
+            start_momentum: None,
+            start_idx_in_trajectory: Some(start.index_in_trajectory()),
+            end_location: None,
+            end_idx_in_trajectory: None,
+            energy_error: None,
+            non_reversible: false,
+        }
+    }
+
+    pub fn new_not_reversible<M: Math>(math: &mut M, start: &State<M, impl Point<M>>) -> Self {
+        // TODO add info about what went wrong
+        DivergenceInfo {
+            logp_function_error: None,
+            start_location: Some(math.box_array(start.point().position())),
+            start_gradient: Some(math.box_array(start.point().gradient())),
+            // TODO
+            start_momentum: None,
+            start_idx_in_trajectory: Some(start.index_in_trajectory()),
+            end_location: None,
+            end_idx_in_trajectory: None,
+            energy_error: None,
+            non_reversible: true,
+        }
+    }
+    pub fn new_max_step_size_halvings<M: Math>(math: &mut M, num_steps: u64, info: Self) -> Self {
+        info // TODO
+    }
+}
+
+impl DivergenceInfo {
+    pub(crate) fn new_non_reversible() -> DivergenceInfo {
+        todo!()
+    }
 }
 
 /// Per-draw divergence statistics, suitable for storage.
@@ -108,6 +189,15 @@ pub enum Direction {
     Backward,
 }
 
+impl Direction {
+    pub fn reverse(&self) -> Self {
+        match self {
+            Direction::Forward => Direction::Backward,
+            Direction::Backward => Direction::Forward,
+        }
+    }
+}
+
 impl Distribution<Direction> for StandardUniform {
     fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Direction {
         if rng.random::<bool>() {
@@ -167,6 +257,40 @@ pub trait Hamiltonian<M: Math>: SamplerStats<M> + Sized {
         collector: &mut C,
     ) -> LeapfrogResult<M, Self::Point>;
 
+    fn split_leapfrog<C: Collector<M, Self::Point>>(
+        &mut self,
+        math: &mut M,
+        start: &State<M, Self::Point>,
+        dir: Direction,
+        num_steps: u64,
+        collector: &mut C,
+        max_error: f64,
+    ) -> LeapfrogResult<M, Self::Point> {
+        let mut state = start.clone();
+
+        let mut min_energy = start.energy();
+        let mut max_energy = min_energy;
+
+        for _ in 0..num_steps {
+            state = match self.leapfrog(math, &state, dir, num_steps, collector) {
+                LeapfrogResult::Ok(state) => state,
+                LeapfrogResult::Divergence(info) => return LeapfrogResult::Divergence(info),
+                LeapfrogResult::Err(err) => return LeapfrogResult::Err(err),
+            };
+            let energy = state.energy();
+            min_energy = min_energy.min(energy);
+            max_energy = max_energy.max(energy);
+
+            // TODO: walnuts papers says to use abs, but c++ code doesn't?
+            if max_energy - min_energy > max_error {
+                let info = DivergenceInfo::new_energy_error_too_large(math, start, &state);
+                return LeapfrogResult::Divergence(info);
+            }
+        }
+
+        LeapfrogResult::Ok(state)
+    }
+
     fn is_turning(
         &self,
         math: &mut M,
@@ -255,4 +379,5 @@ pub trait Hamiltonian<M: Math>: SamplerStats<M> + Sized {
         let _ = (math, state, noise, rng, factor);
         Ok(())
     }
+    fn max_energy_error(&self) -> f64;
 }
diff --git a/src/dynamics/transformed_hamiltonian.rs b/src/dynamics/transformed_hamiltonian.rs
@@ -572,8 +572,9 @@ impl<M: Math, T: Transformation<M>> Hamiltonian<M> for TransformedHamiltonian<M,
                 start_idx_in_trajectory: Some(start.point().index_in_trajectory()),
                 end_idx_in_trajectory: None,
                 energy_error: None,
+                non_reversible: false,
             };
-            collector.register_leapfrog(math, start, &out, Some(&div_info));
+            collector.register_leapfrog(math, start, &out, Some(&div_info), step_size_splits);
             return LeapfrogResult::Divergence(div_info);
         }
 
@@ -604,12 +605,19 @@ impl<M: Math, T: Transformation<M>> Hamiltonian<M> for TransformedHamiltonian<M,
                 start_idx_in_trajectory: Some(start.index_in_trajectory()),
                 end_idx_in_trajectory: Some(out.index_in_trajectory()),
                 energy_error: Some(energy_error),
+                non_reversible: false,
             };
-            collector.register_leapfrog(math, start, &out, Some(&divergence_info));
+            collector.register_leapfrog(
+                math,
+                start,
+                &out,
+                Some(&divergence_info),
+                step_size_splits,
+            );
             return LeapfrogResult::Divergence(divergence_info);
         }
 
-        collector.register_leapfrog(math, start, &out, None);
+        collector.register_leapfrog(math, start, &out, None, step_size_splits);
 
         LeapfrogResult::Ok(out)
     }
@@ -824,4 +832,8 @@ impl<M: Math, T: Transformation<M>> Hamiltonian<M> for TransformedHamiltonian<M,
 
         Ok(())
     }
+
+    fn max_energy_error(&self) -> f64 {
+        self.max_energy_error
+    }
 }
diff --git a/src/external_adapt_strategy.rs b/src/external_adapt_strategy.rs
@@ -97,6 +97,7 @@ impl<M: Math, P: Point<M>> Collector<M, P> for DrawCollector<M> {
         _start: &State<M, P>,
         end: &State<M, P>,
         divergence_info: Option<&crate::DivergenceInfo>,
+        _num_substeps: u64,
     ) {
         if divergence_info.is_some() {
             return;
diff --git a/src/lib.rs b/src/lib.rs
@@ -122,7 +122,7 @@ pub use dynamics::{DivergenceInfo, KineticEnergyKind};
 pub use math::{CpuLogpFunc, CpuMath, CpuMathError, LogpError, Math};
 pub use mclmc::{MclmcChain, MclmcInfo, MclmcStats, MclmcTrajectoryKind};
 pub use model::Model;
-pub use nuts::NutsError;
+pub use nuts::{NutsError, WalnutsOptions};
 
 #[allow(deprecated)]
 pub use sampler::{
diff --git a/src/nuts.rs b/src/nuts.rs
diff --git a/src/sampler.rs b/src/sampler.rs
diff --git a/src/stepsize/dual_avg.rs b/src/stepsize/dual_avg.rs