start LM update

MaxenceGollier · MaxenceGollier · commit 58f26a16b684 · 2025-08-04T08:53:24.000+02:00
diff --git a/src/LMModel.jl b/src/LMModel.jl
@@ -0,0 +1,55 @@
+export LMModel
+
+@doc raw"""
+    LMModel(J, F, v, σ, x0)
+
+Given the unconstrained optimization problem:
+```math
+\min \tfrac{1}{2} \| F(x) \|^2,
+```
+this model represents the smooth LM subproblem:
+```math
+\min_s \ \tfrac{1}{2} \| F(x) + J(x)s \|^2 + \tfrac{1}{2} σ \|s\|^2
+```
+where `J` is the Jacobian of `F` at `x0` in sparse format or as a linear operator.
+`σ > 0` is a regularization parameter and `v` is a vector of the same size as `F(x0)` used for intermediary computations.
+"""
+mutable struct LMModel{T <: Real, V <: AbstractVector{T}, G <: Union{AbstractMatrix{T}, AbstractLinearOperator{T}}} <:
+               AbstractNLPModel{T, V}
+  J::G
+  F::V
+  v::V
+  σ::T
+  meta::NLPModelMeta{T, V}
+  counters::Counters
+end
+
+function LMModel(J::G, F::V, σ::T, x0::V) where {T, V, G}
+  @assert length(x0) == size(J, 2)
+  @assert length(F) == size(J, 1)
+  meta = NLPModelMeta(
+    length(x0),
+    x0 = x0, # Perhaps we should add lvar and uvar as well here.
+  )
+  v = similar(F)
+  return LMModel(J::G, F::V, v::V, σ::T, meta, Counters())
+end
+
+function NLPModels.obj(nlp::LMModel, x::AbstractVector{T}) where{T}
+  @lencheck nlp.meta.nvar x
+  increment!(nlp, :neval_obj)
+  nlp.v .= nlp.F
+  mul!(nlp.v, nlp.J, x, one(T), one(T))
+  return ( dot(nlp.v, nlp.v) + nlp.σ * dot(x, x) ) / 2
+end
+
+function NLPModels.grad!(nlp::LMModel, x::AbstractVector{T}, g::AbstractVector{T}) where{T}
+  @lencheck nlp.meta.nvar x
+  @lencheck nlp.meta.nvar g
+  increment!(nlp, :neval_grad)
+  nlp.v .= nlp.F
+  @. nlp.g = nlp.σ .* x
+  mul!(nlp.v, nlp.J, x, one(T), one(T))
+  mul!(g, nlp.J', nlp.v, one(T), one(T))
+  return g
+end
diff --git a/src/LM_alg.jl b/src/LM_alg.jl
@@ -1,4 +1,195 @@
-export LM
+export LM, LMSolver, solve!
+
+import SolverCore.solve!
+
+mutable struct TRDHSolver # FIXME
+end
+
+mutable struct LMSolver{
+  T <: Real,
+  G <: ShiftedProximableFunction,
+  V <: AbstractVector{T},
+  M <: AbstractLinearOperator{T},
+  ST <: AbstractOptimizationSolver,
+  PB <: AbstractRegularizedNLPModel,
+} <: AbstractOptimizationSolver
+  xk::V
+  ∇fk::V
+  mν∇fk::V
+  Fk::V
+  Fkn::V
+  Jk::M
+  ψ::G
+  xkn::V
+  s::V
+  has_bnds::Bool
+  l_bound::V
+  u_bound::V
+  l_bound_m_x::V
+  u_bound_m_x::V
+  subsolver::ST
+  subpb::PB
+  substats::GenericExecutionStats{T, V, V, T}
+end
+
+function LMSolver(
+  reg_nls::AbstractRegularizedNLPModel{T, V};
+  subsolver = R2Solver,
+) where{T, V}
+  x0 = reg_nls.model.meta.x0
+  l_bound = reg_nls.model.meta.lvar
+  u_bound = reg_nls.model.meta.uvar
+
+  xk = similar(x0)
+  ∇fk = similar(x0)
+  mν∇fk = similar(x0)
+  Fk = similar(x0, reg_nls.model.nls_meta.nequ)
+  Fkn = similar(Fk)
+  Jk = jac_op_residual(reg_nls.model, xk)
+  xkn = similar(x0)
+  s = similar(x0)
+  has_bnds = any(l_bound .!= T(-Inf)) || any(u_bound .!= T(Inf)) || subsolver == TRDHSolver
+  if has_bnds
+    l_bound_m_x = similar(xk)
+    u_bound_m_x = similar(xk)
+    @. l_bound_m_x = l_bound - x0
+    @. u_bound_m_x = u_bound - x0
+  else
+    l_bound_m_x = similar(xk, 0)
+    u_bound_m_x = similar(xk, 0)
+  end
+
+  ψ =
+    has_bnds ? shifted(reg_nls.h, xk, l_bound_m_x, u_bound_m_x, reg_nls.selected) :
+    shifted(reg_nls.h, xk)
+  
+  sub_nlp = LMModel(Jk, Fk, T(1), x0)
+  subpb = RegularizedNLPModel(sub_nlp, ψ)
+  substats = RegularizedExecutionStats(subpb)
+  subsolver = subsolver(subpb)
+
+  return LMSolver(
+    xk,
+    ∇fk,
+    mν∇fk,
+    Fk,
+    Fkn,
+    Jk,
+    ψ,
+    xkn,
+    s,
+    has_bnds,
+    l_bound,
+    u_bound,
+    l_bound_m_x,
+    u_bound_m_x,
+    subsolver,
+    subpb,
+    substats
+  )
+end
+
+function SolverCore.solve!(
+  solver::LMSolver{T, G, V},
+  reg_nls::AbstractRegularizedNLPModel{T, V},
+  stats::GenericExecutionStats{T, V};
+  callback = (args...) -> nothing, 
+  x::V = reg_nls.model.meta.x0,
+  atol::T = √eps(T),
+  rtol::T = √eps(T),
+  verbose::Int = 0,
+  max_iter::Int = 10000,
+  max_time::Float64 = 30.0,
+  max_eval::Int = -1,
+  σk::T = eps(T)^(1 / 5),
+  σmin::T = eps(T),
+  η1::T = √√eps(T),
+  η2::T = T(0.9),
+  γ::T = T(3),
+  θ::T = 1/(1 + eps(T)^(1 / 5)),
+) where {T, V, G}
+  reset!(stats)
+
+  # Retrieve workspace
+  selected = reg_nls.selected
+  h = reg_nls.h
+  nls = reg_nls.model
+
+  xk = solver.xk .= x
+
+  # Make sure ψ has the correct shift 
+  shift!(solver.ψ, xk)
+
+  Fk = solver.Fk
+  Fkn = solver.Fkn
+  Jk = solver.Jk
+  ∇fk = solver.∇fk
+  JdFk = solver.JdFk
+  Jt_Fk = solver.Jt_Fk
+  ψ = solver.ψ
+  xkn = solver.xkn
+  s = solver.s
+
+  has_bnds = solver.has_bnds
+  if has_bnds
+    l_bound = solver.l_bound
+    u_bound = solver.u_bound
+    l_bound_m_x = solver.l_bound_m_x
+    u_bound_m_x = solver.u_bound_m_x
+  end
+
+  # initialize parameters
+  improper = false
+  hk = @views h(xk[selected])
+  if hk == Inf
+    verbose > 0 && @info "LM: finding initial guess where nonsmooth term is finite"
+    prox!(xk, h, xk, one(eltype(x0)))
+    hk = @views h(xk[selected])
+    hk < Inf || error("prox computation must be erroneous")
+    verbose > 0 && @debug "LM: found point where h has value" hk
+  end
+  improper = (hk == -Inf)
+  improper == true && @warn "LM: Improper term detected"
+  improper == true && return stats
+
+  if verbose > 0
+    @info log_header(
+      [:outer, :inner, :fx, :hx, :xi, :ρ, :σ, :normx, :norms, :normJ, :arrow],
+      [Int, Int, T, T, T, T, T, T, T, T, Char],
+      hdr_override = Dict{Symbol, String}(
+        :fx => "f(x)",
+        :hx => "h(x)",
+        :xi => "√(ξ1/ν)",
+        :normx => "‖x‖",
+        :norms => "‖s‖",
+        :normB => "‖J‖²",
+        :arrow => "R2N",
+      ),
+      colsep = 1,
+    )
+  end
+
+  local ξ1::T
+  local ρk::T = zero(T)
+
+  residual!(nls, xk, Fk)
+  Jk = jac_op_residual(nls, xk)
+  mul!(∇fk, Jk', Fk)
+  fk = dot(Fk, Fk) / 2
+
+  σmax, found_σ = opnorm(Jk)
+  found_σ || error("operator norm computation failed")
+  ν = θ / (σmax^2 + σk) # ‖J'J + σₖ I‖ = ‖J‖² + σₖ
+  sqrt_ξ1_νInv = one(T)
+
+  @. mν∇fk = -ν * ∇fk
+
+  φ1(d) = let Fk = Fk, Jk = Jk, 
+    d -> dot(Fk, Fk) / 2  
+  end
+
+  return
+end
 
 """
     LM(nls, h, options; kwargs...)
@@ -143,7 +334,7 @@ function LM(
     Resid_hist[k] = nls.counters.neval_residual
 
     # model for first prox-gradient iteration
-    φ1(d) = begin
+    φ1(d) = begin # || Fk ||^2/2 + d*Jk'*Fk
       jtprod_residual!(nls, xk, Fk, Jt_Fk)
       dot(Fk, Fk) / 2 + dot(Jt_Fk, d)
     end
diff --git a/src/RegularizedOptimization.jl b/src/RegularizedOptimization.jl
@@ -19,6 +19,7 @@ include("splitting.jl")
 include("TR_alg.jl")
 include("TRDH_alg.jl")
 include("R2_alg.jl")
+include("LMModel.jl")
 include("LM_alg.jl")
 include("LMTR_alg.jl")
 include("R2DH.jl")