update with last review

SteveBronder · SteveBronder · commit eab35ec8228b · 2026-02-05T17:58:28.000-05:00
diff --git a/stan/math/mix/functor/laplace_marginal_density_estimator.hpp b/stan/math/mix/functor/laplace_marginal_density_estimator.hpp
@@ -13,6 +13,7 @@
 #include <stan/math/prim/functor/iter_tuple_nested.hpp>
 #include <unsupported/Eigen/MatrixFunctions>
 #include <cmath>
+#include <sstream>
 
 /**
  * @file
@@ -443,8 +444,12 @@ inline void llt_with_jitter(LLT& llt_B, B_t& B, double min_jitter = 1e-10,
       }
     }
     if (llt_B.info() != Eigen::Success) {
-      throw std::domain_error(
-          "laplace_marginal_density: Cholesky (Diag) failed");
+      std::stringstream msg;
+      msg << "laplace_marginal_density: Cholesky decomposition failed on "
+          << "Hessian matrix B after attempting jitter values from "
+          << min_jitter << " to " << max_jitter
+          << ". Matrix may not be positive definite.";
+      throw std::domain_error(msg.str());
     }
   }
 }
@@ -942,16 +947,13 @@ inline auto run_newton_loop(SolverPolicy& solver, NewtonStateT& state,
       scratch.alpha() = 1.0;
       update_fun(scratch, state.curr(), state.prev(), scratch.eval_,
                  state.wolfe_info.p_);
-      bool run_convergence_check = true;
+      bool force_finish = false;
       if (scratch.alpha() <= options.line_search.min_alpha) {
         state.wolfe_status.accept_ = false;
-        finish_update = true;
-        run_convergence_check = false;
+        force_finish = true;
       } else if (options.line_search.max_iterations == 0) {
         state.curr().update(scratch);
         state.wolfe_status.accept_ = true;
-        finish_update = false;
-        run_convergence_check = false;
       } else {
         Eigen::VectorXd s = scratch.a() - state.prev().a();
         auto full_step_grad
@@ -964,16 +966,15 @@ inline auto run_newton_loop(SolverPolicy& solver, NewtonStateT& state,
         state.wolfe_status = internal::wolfe_line_search(
             state.wolfe_info, update_fun, options.line_search, msgs);
       }
-      if (run_convergence_check) {
-        /**
-         * Stop when objective change is small, or when a rejected Wolfe step
-         * fails to improve; finish_update then exits the Newton loop.
-         */
-        finish_update = std::abs(state.curr().obj() - state.prev().obj())
-                            < options.tolerance
-                        || (!state.wolfe_status.accept_
-                            && state.curr().obj() <= state.prev().obj());
-      }
+      /**
+       * Stop when objective change is small, or when a rejected Wolfe step
+       * fails to improve; finish_update then exits the Newton loop.
+       */
+      const bool obj_below_tol = std::abs(state.curr().obj() - state.prev().obj()) <
+                          options.tolerance;
+      const bool wolfe_failed = !state.wolfe_status.accept_
+                                  && state.curr().obj() <= state.prev().obj();
+      finish_update = force_finish || obj_below_tol || wolfe_failed;
     }
     if (finish_update) {
       if (!state.final_loop && state.wolfe_status.accept_) {
diff --git a/stan/math/mix/functor/wolfe_line_search.hpp b/stan/math/mix/functor/wolfe_line_search.hpp
@@ -156,7 +156,7 @@ namespace internal {
  *         (x_left + x_right) / 2 is returned instead.
  */
 template <typename Scalar>
-[[nodiscard]] inline Scalar cubic_or_bisect_max(Scalar x_left, Scalar f_left,
+[[nodiscard]] inline Scalar cubic_interpolation(Scalar x_left, Scalar f_left,
                                                 Scalar df_left, Scalar x_right,
                                                 Scalar f_right,
                                                 Scalar df_right) noexcept {
@@ -283,8 +283,8 @@ template <typename Scalar>
 }
 
 template <typename Eval, typename Options>
-inline auto cubic_or_bisect_max(Eval&& low, Eval&& high, Options&& opt) {
-  auto alpha = cubic_or_bisect_max(low.alpha(), low.obj(), low.dir(),
+inline auto cubic_interpolation(Eval&& low, Eval&& high, Options&& opt) {
+  auto alpha = cubic_interpolation(low.alpha(), low.obj(), low.dir(),
                                    high.alpha(), high.obj(), high.dir());
   const double width = high.alpha() - low.alpha();
   const double guard = 1e-3 * width;  // or make this an option
@@ -714,7 +714,7 @@ inline auto retry_evaluate(Update&& update, Proposal&& proposal, Curr&& curr,
  *
  *    - If `low.dir()` and `high.dir()` have opposite signs and the right
  *      endpoint `high` satisfies Armijo, a cubic interpolation of the endpoints
- *      is used (`cubic_or_bisect_max(low, high, opt)`).
+ *      is used (`cubic_interpolation(low, high, opt)`).
  *    - Otherwise the trial is the simple bisection midpoint
  *      \f$\tfrac{1}{2}(\alpha_\text{low} + \alpha_\text{high})\f$.
  *
@@ -864,10 +864,6 @@ inline WolfeStatus wolfe_line_search(Info& wolfe_info, UpdateFun&& update_fun,
   Eval low{0.0, prev.obj(), dir_deriv_init};
   prev.dir() = dir_deriv_init;
   int total_updates = 0;
-  auto eval_finite = [](const Eval& e, const WolfeData& state) {
-    return std::isfinite(e.obj()) && std::isfinite(e.dir())
-           && state.theta().allFinite() && state.theta_grad().allFinite();
-  };
   Eval best = low;  // keep the best Armijo-OK in case strong-Wolfe fails
   auto update_with_tick = [&total_updates, &opt, &best, &update_fun](
                               auto&& proposal, auto&& curr, auto&& prev,
@@ -895,7 +891,6 @@ inline WolfeStatus wolfe_line_search(Info& wolfe_info, UpdateFun&& update_fun,
       = std::clamp(curr.alpha() * opt.scale_up, opt.min_alpha, opt.max_alpha);
   Eval high{alpha_start, curr.obj(), dir_deriv_init};
   WolfeStatus wolfe_check{WolfeReturn::Continue, 0, 0, false};
-  bool high_has_eval = true;
   // Initial check for numerical trouble
   {
     wolfe_check = update_with_tick(scratch, curr, prev, high, p);
@@ -920,7 +915,6 @@ inline WolfeStatus wolfe_line_search(Info& wolfe_info, UpdateFun&& update_fun,
           if (wolfe_check.stop_ != WolfeReturn::Continue) {
             return wolfe_check;
           }
-          high_has_eval = true;
         }
         wolfe_check = update_with_tick(scratch, curr, prev, best, p);
         if (wolfe_check.stop_ != WolfeReturn::Continue) {
@@ -935,55 +929,50 @@ inline WolfeStatus wolfe_line_search(Info& wolfe_info, UpdateFun&& update_fun,
       }
     }
   }
-  bool found_right = false;
   int num_backtracks = 0;
   /**
-   * For each case
+   * From Nocedal–Wright (2006), Algorithm 3.5:
+   * https://www.math.uci.edu/~qnie/Publications/NumericalOptimization.pdf
    * | armijo     | wolfe | sign(g) | Action
    * -------+-------+---------+--------------------------------
    * | [1]  T     |   T   |         | Accept alpha
    * | [2]  T     |   F   |   > 0   | set low=high, expand high
-   * | [3]  T     |   F   |   < 0   | Set alpha_high <- alpha, stop
-   * | [4]  F     |   T   |         | Set alpha_high <- alpha, stop
-   * | [5]  F     |   F   |         | Set alpha_high <- alpha, stop
+   * | [3]  T     |   F   |   < 0   | Bracket found: stop
+   * | [4]  F     |   T   |         | Bracket found: stop
+   * | [5]  F     |   F   |         | Bracket found: stop
+   * NOTE: In an ideal case we would end up with a positive low directional gradient and
+   * negative high directional gradient. Cubic interpolation requires a bracket with directional
+   * shape like /\. This scheme does not gurantee a bracket with that shape will be found.
+   * So in the zoom we will have to check if we can do cubic or have to fallback to bisection.
    **/
-  while (!found_right && high.alpha() < opt.max_alpha) {
+  while (high.alpha() < opt.max_alpha) {
     num_backtracks++;
-    // 1. Evaluate f(alpha) and g(alpha)
     wolfe_check = update_with_tick(scratch, curr, prev, high, p);
     if (wolfe_check.stop_ != WolfeReturn::Continue) {
       return wolfe_check;
     }
-    high_has_eval = true;
-    const bool finite_ok = eval_finite(high, scratch);
-    // 2. Handle numerical trouble first
-    if (!finite_ok) {  //   f or g is NaN/Inf → shrink
-      high.alpha() *= 0.5;
-      high_has_eval = false;
-      if (high.alpha() < opt.min_alpha) {
-        break;
-      }
-      continue;
-    }
     const bool armijo = check_armijo(high, prev, opt);
     const bool wolfe = check_wolfe(high, prev, opt);
-    if (armijo && wolfe) {  // [1]
+    // [1]
+    if (armijo && wolfe) {
       curr.update(scratch, high);
       return WolfeStatus{WolfeReturn::Wolfe, total_updates, num_backtracks,
                          true};
+    } else if (armijo) {
+      if (best.obj() < high.obj()) {
+        best = high;
+      }
+      // [2]
+      if (high.dir() > 0) {
+        low = high;
+        high.alpha() *= opt.scale_up;
+        continue;
+      }
+      // [3]
+      break;
     }
-    if (armijo && best.obj() < high.obj()) {
-      best = high;
-    }
-    const bool dir_pos = high.dir() > 0;
-    if (armijo && !wolfe && dir_pos) {  // [2]
-      low = high;
-      high.alpha() *= opt.scale_up;
-      high_has_eval = false;
-      continue;
-    }
-    // [3,4,5]
-    found_right = true;
+    // [3, 4, 5]
+    break;
   }
   const double grad_tol
       = std::max(opt.abs_grad_threshold,
@@ -1018,13 +1007,6 @@ inline WolfeStatus wolfe_line_search(Info& wolfe_info, UpdateFun&& update_fun,
     return WolfeStatus{WolfeReturn::Continue, total_updates, num_backtracks,
                        false};
   };
-  if (!high_has_eval) {
-    wolfe_check = update_with_tick(scratch, curr, prev, high, p);
-    if (wolfe_check.stop_ != WolfeReturn::Continue) {
-      return wolfe_check;
-    }
-    high_has_eval = true;
-  }
   auto check_b = check_bounds(high);
   if (check_b.stop_ != WolfeReturn::Continue) {
     if (check_b.accept_) {
@@ -1036,7 +1018,19 @@ inline WolfeStatus wolfe_line_search(Info& wolfe_info, UpdateFun&& update_fun,
   if (wolfe_check.stop_ != WolfeReturn::Continue) {
     return wolfe_check;
   }
-  // Zoom phase
+  /**
+   * Zoom Step: (Alg 3.6, adapted to maximization via phi=-obj)
+   *
+   * Exit/update table (evaluated at `mid`, with `low` = best Armijo endpoint):
+   * | Armijo? | obj(mid) >= obj(low)? | Wolfe? | dir(mid) >= 0? | Action
+   * |---------|-----------------------|--------|----------------|--------------------------|
+   * |   T     |           F           |   T    |       *        | accept mid [1]           |
+   * |   T     |           T           |   *    |       *        | high = mid [2]           |
+   * |   T     |           F           |   F    |       T        | high = low; low = mid [3]|
+   * |   T     |           F           |   F    |       F        | low = mid [4]            |
+   * |   F     |           *           |   *    |       *        | high = mid [5]           |
+   * ----------------------------------------------------------------------------------------
+   **/
   while ((high.alpha() - low.alpha() > opt.min_alpha)
          && high.alpha() > opt.min_alpha) {
     num_backtracks++;
@@ -1046,9 +1040,12 @@ inline WolfeStatus wolfe_line_search(Info& wolfe_info, UpdateFun&& update_fun,
     const bool use_cubic = have_sign_change && high_armijo_ok;
 
     // Choose trial alpha: cubic when bracket is good, else bisection.
-    double alpha_mid = use_cubic ? cubic_or_bisect_max(low, high, opt)
-                                 : 0.5 * (low.alpha() + high.alpha());
-
+    double alpha_mid{0};
+    if (use_cubic) {
+      alpha_mid = cubic_interpolation(low, high, opt);
+    } else {
+      alpha_mid = 0.5 * (low.alpha() + high.alpha());
+    }
     if (alpha_mid <= opt.min_alpha) {
       break;
     }
@@ -1063,6 +1060,7 @@ inline WolfeStatus wolfe_line_search(Info& wolfe_info, UpdateFun&& update_fun,
     }
     if (check_armijo(mid, prev, opt)) {
       if (check_wolfe(mid, prev, opt)) {
+        // [1]
         curr.update(scratch, mid);
         return WolfeStatus{WolfeReturn::Wolfe, total_updates, num_backtracks,
                            true};
@@ -1071,17 +1069,17 @@ inline WolfeStatus wolfe_line_search(Info& wolfe_info, UpdateFun&& update_fun,
       if (mid.obj() > best.obj()) {
         best = mid;
       }
-    }
-
-    // Update bracket based on derivative sign
-    if (mid.dir() * low.dir() < 0) {
-      // sign change between low and mid -> [low, mid]
-      high = mid;
-    } else {
-      // otherwise shift left endpoint -> [mid, high]
+      if (mid.obj() >= low.obj()) {
+        // [2]
+        high = mid;
+      } else if (mid.dir() >= 0) {
+        // [3]
+        high = low;
+        low = mid;
+      }
+      // [4]
       low = mid;
     }
-
     // Convergence/guard-rail checks (uses prev/grad_tol/obj_tol etc.)
     auto bounds_check = check_bounds(mid);
     if (bounds_check.stop_ != WolfeReturn::Continue) {
@@ -1090,6 +1088,8 @@ inline WolfeStatus wolfe_line_search(Info& wolfe_info, UpdateFun&& update_fun,
       }
       return bounds_check;
     }
+    // [5]
+    high = mid;
   }
   // On failure, use the best point we have found so far that at least satisfies
   // armijo