Skip to content

Commit fd5daa3

Browse files
LM & LMTR: add cauchy step stopping criterion
1 parent 786841c commit fd5daa3

File tree

3 files changed

+73
-18
lines changed

3 files changed

+73
-18
lines changed

src/LMTR_alg.jl

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,10 @@ For advanced usage, first define a solver "LMSolver" to preallocate the memory u
127127
- `atol::T = √eps(T)`: absolute tolerance;
128128
- `sub_atol::T = atol`: subsolver absolute tolerance;
129129
- `rtol::T = √eps(T)`: relative tolerance;
130+
- `atol_decr::T = atol`: (advanced) absolute tolerance for the optimality measure `√(ξₖ/νₖ)` (see below);
131+
- `rtol_decr::T = rtol`: (advanced) relative tolerance for the optimality measure `√(ξₖ/νₖ)` (see below);
132+
- `atol_step::T = atol`: (advanced) absolute tolerance for the optimality measure `‖sₖ₁‖/ν₁` (see below);
133+
- `rtol_step::T = rtol`: (advanced) relative tolerance for the optimality measure `‖sₖ₁‖/ν₁` (see below);
130134
- `neg_tol::T = zero(T): negative tolerance;
131135
- `max_eval::Int = -1`: maximum number of evaluation of the objective function (negative number means unlimited);
132136
- `max_time::Float64 = 30.0`: maximum time limit in seconds;
@@ -142,7 +146,7 @@ For advanced usage, first define a solver "LMSolver" to preallocate the memory u
142146
- `subsolver::S = R2Solver`: subsolver used to solve the subproblem that appears at each iteration.
143147
- `sub_kwargs::NamedTuple = NamedTuple()`: a named tuple containing the keyword arguments to be sent to the subsolver. The solver will fail if invalid keyword arguments are provided to the subsolver. For example, if the subsolver is `R2Solver`, you can pass `sub_kwargs = (max_iter = 100, σmin = 1e-6,)`.
144148
145-
The algorithm stops either when `√(ξₖ/νₖ) < atol + rtol*√(ξ₀/ν₀) ` or `ξₖ < 0` and `√(-ξₖ/νₖ) < neg_tol` where ξₖ := f(xₖ) + h(xₖ) - φ(sₖ; xₖ) - ψ(sₖ; xₖ), and √(ξₖ/νₖ) is a stationarity measure.
149+
The algorithm stops either when `√(ξₖ/νₖ) < atol + rtol*√(ξ₀/ν₀) ` or `ξₖ < 0` and `√(-ξₖ/νₖ) < neg_tol` where ξₖ := f(xₖ) + h(xₖ) - φ(sₖ; xₖ) - ψ(sₖ; xₖ), and √(ξₖ/νₖ) is a stationarity measure or when `‖sₖ₁‖/νₖ < atol_step + rtol_step*‖s₀‖/ν₀` where `sₖ₁` is the Cauchy step.
146150
147151
# Output
148152
The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.
@@ -193,6 +197,10 @@ function SolverCore.solve!(
193197
atol::T = eps(T),
194198
sub_atol::T = atol,
195199
rtol::T = eps(T),
200+
atol_decr::T = atol,
201+
rtol_decr::T = rtol,
202+
atol_step::T = atol,
203+
rtol_step::T = rtol,
196204
neg_tol::T = zero(T),
197205
verbose::Int = 0,
198206
max_iter::Int = 10000,
@@ -251,12 +259,13 @@ function SolverCore.solve!(
251259

252260
if verbose > 0
253261
@info log_header(
254-
[:outer, :inner, :fx, :hx, :xi, , , :normx, :norms, , :arrow],
255-
[Int, Int, T, T, T, T, T, T, T, T, Char],
262+
[:outer, :inner, :fx, :hx, :xi, :norms1dnu, :ρ, , :normx, :norms, , :arrow],
263+
[Int, Int, T, T, T, T, T, T, T, T, T, Char],
256264
hdr_override = Dict{Symbol, String}(
257265
:fx => "f(x)",
258266
:hx => "h(x)",
259267
:xi => "√(ξ1/ν)",
268+
:norms1dnu => "‖s₁‖/ν",
260269
:normx => "‖x‖",
261270
:norms => "‖s‖",
262271
:arrow => "LMTR",
@@ -302,14 +311,21 @@ function SolverCore.solve!(
302311
# Take first proximal gradient step s1 and see if current xk is nearly stationary.
303312
# s1 minimizes φ1(d) + ‖d‖² / 2 / ν + ψ(d) ⟺ s1 ∈ prox{νψ}(-ν∇φ1(0))
304313
prox!(s, ψ, mν∇fk, ν)
314+
315+
# Estimate optimality and check stopping criteria
305316
ξ1 = fk + hk - mk1(s) + max(1, abs(fk + hk)) * 10 * eps()
306317
sqrt_ξ1_νInv = ξ1 0 ? sqrt(ξ1 / ν) : sqrt(-ξ1 / ν)
307-
solved = (ξ1 < 0 && sqrt_ξ1_νInv neg_tol) || (ξ1 0 && sqrt_ξ1_νInv atol)
308318
(ξ1 < 0 && sqrt_ξ1_νInv > neg_tol) &&
309319
error("LMTR: prox-gradient step should produce a decrease but ξ1 = $(ξ1)")
310-
atol += rtol * sqrt_ξ1_νInv # make stopping test absolute and relative
320+
atol_decr += rtol_decr * sqrt_ξ1_νInv # make stopping test absolute and relative
311321
sub_atol += rtol * sqrt_ξ1_νInv
312322

323+
norm_s_cauchy = norm(s)
324+
norm_s_cauchydν = norm_s_cauchy / ν
325+
atol_step += rtol_step * norm_s_cauchydν # make stopping test absolute and relative
326+
327+
solved = (ξ1 < 0 && sqrt_ξ1_νInv neg_tol) || (ξ1 0 && sqrt_ξ1_νInv atol_decr) || (norm_s_cauchydν atol_step)
328+
313329
set_status!(
314330
stats,
315331
get_status(
@@ -390,6 +406,7 @@ function SolverCore.solve!(
390406
fk,
391407
hk,
392408
sqrt_ξ1_νInv,
409+
norm_s_cauchydν,
393410
ρk,
394411
∆_effective,
395412
χ(xk),
@@ -453,12 +470,17 @@ function SolverCore.solve!(
453470
prox!(s, ψ, mν∇fk, ν)
454471
mks = mk1(s)
455472

473+
# Estimate optimality and check stopping criteria
456474
ξ1 = fk + hk - mks + max(1, abs(hk)) * 10 * eps()
457475
sqrt_ξ1_νInv = ξ1 0 ? sqrt(ξ1 / ν) : sqrt(-ξ1 / ν)
458-
solved = (ξ1 < 0 && sqrt_ξ1_νInv neg_tol) || (ξ1 0 && sqrt_ξ1_νInv atol)
459476
(ξ1 < 0 && sqrt_ξ1_νInv > neg_tol) &&
460477
error("LM: prox-gradient step should produce a decrease but ξ1 = $(ξ1)")
461478

479+
norm_s_cauchy = norm(s)
480+
norm_s_cauchydν = norm_s_cauchy / ν
481+
482+
solved = (ξ1 < 0 && sqrt_ξ1_νInv neg_tol) || (ξ1 0 && sqrt_ξ1_νInv atol_decr) || (norm_s_cauchydν atol_step)
483+
462484
set_status!(
463485
stats,
464486
get_status(
@@ -479,8 +501,8 @@ function SolverCore.solve!(
479501
end
480502

481503
if verbose > 0 && stats.status == :first_order
482-
@info log_row(Any[stats.iter, 0, fk, hk, sqrt_ξ1_νInv, ρk, Δk, χ(xk), χ(s), ν, ""], colsep = 1)
483-
@info "LMTR: terminating with √(ξ1/ν) = $(sqrt_ξ1_νInv)"
504+
@info log_row(Any[stats.iter, 0, fk, hk, sqrt_ξ1_νInv, norm_s_cauchydν, ρk, Δk, χ(xk), χ(s), ν, ""], colsep = 1)
505+
@info "LMTR: terminating with √(ξ1/ν) = $(sqrt_ξ1_νInv) and ‖s₁‖/ν = $(norm_s_cauchydν)"
484506
end
485507

486508
set_solution!(stats, xk)

src/LM_alg.jl

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,10 @@ For advanced usage, first define a solver "LMSolver" to preallocate the memory u
127127
- `nonlinear::Bool = true`: whether the function `F` is nonlinear or not.
128128
- `atol::T = √eps(T)`: absolute tolerance;
129129
- `rtol::T = √eps(T)`: relative tolerance;
130+
- `atol_decr::T = atol`: (advanced) absolute tolerance for the optimality measure `√(ξₖ/νₖ)` (see below);
131+
- `rtol_decr::T = rtol`: (advanced) relative tolerance for the optimality measure `√(ξₖ/νₖ)` (see below);
132+
- `atol_step::T = atol`: (advanced) absolute tolerance for the optimality measure `‖sₖ₁‖/ν₁` (see below);
133+
- `rtol_step::T = rtol`: (advanced) relative tolerance for the optimality measure `‖sₖ₁‖/ν₁` (see below);
130134
- `neg_tol::T = zero(T): negative tolerance;
131135
- `max_eval::Int = -1`: maximum number of evaluation of the objective function (negative number means unlimited);
132136
- `max_time::Float64 = 30.0`: maximum time limit in seconds;
@@ -142,7 +146,7 @@ For advanced usage, first define a solver "LMSolver" to preallocate the memory u
142146
- `subsolver = R2Solver`: the solver used to solve the subproblems.
143147
- `sub_kwargs::NamedTuple = NamedTuple()`: a named tuple containing the keyword arguments to be sent to the subsolver. The solver will fail if invalid keyword arguments are provided to the subsolver. For example, if the subsolver is `R2Solver`, you can pass `sub_kwargs = (max_iter = 100, σmin = 1e-6,)`.
144148
145-
The algorithm stops either when `√(ξₖ/νₖ) < atol + rtol*√(ξ₀/ν₀) ` or `ξₖ < 0` and `√(-ξₖ/νₖ) < neg_tol` where ξₖ := f(xₖ) + h(xₖ) - φ(sₖ; xₖ) - ψ(sₖ; xₖ), and √(ξₖ/νₖ) is a stationarity measure.
149+
The algorithm stops either when `√(ξₖ/νₖ) < atol + rtol*√(ξ₀/ν₀) ` or `ξₖ < 0` and `√(-ξₖ/νₖ) < neg_tol` where ξₖ := f(xₖ) + h(xₖ) - φ(sₖ; xₖ) - ψ(sₖ; xₖ), and √(ξₖ/νₖ) is a stationarity measure or when `‖sₖ₁‖/νₖ < atol_step + rtol_step*‖s₀₁‖/ν₀` where `sₖ₁` is the Cauchy step.
146150
147151
# Output
148152
The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.
@@ -192,6 +196,10 @@ function SolverCore.solve!(
192196
nonlinear::Bool = true,
193197
atol::T = eps(T),
194198
rtol::T = eps(T),
199+
atol_decr::T = atol,
200+
rtol_decr::T = rtol,
201+
atol_step::T = atol,
202+
rtol_step::T = rtol,
195203
neg_tol::T = zero(T),
196204
verbose::Int = 0,
197205
max_iter::Int = 10000,
@@ -254,12 +262,13 @@ function SolverCore.solve!(
254262

255263
if verbose > 0
256264
@info log_header(
257-
[:outer, :inner, :fx, :hx, :xi, , , :normx, :norms, :normJ, :arrow],
258-
[Int, Int, T, T, T, T, T, T, T, T, Char],
265+
[:outer, :inner, :fx, :hx, :xi, :normsdnu, :ρ, , :normx, :norms, :normJ, :arrow],
266+
[Int, Int, T, T, T, T, T, T, T, T, T, Char],
259267
hdr_override = Dict{Symbol, String}(
260268
:fx => "f(x)",
261269
:hx => "h(x)",
262270
:xi => "√(ξ1/ν)",
271+
:normsdnu => "‖s₁‖/ν",
263272
:normx => "‖x‖",
264273
:norms => "‖s‖",
265274
:normJ => "‖J‖²",
@@ -306,12 +315,19 @@ function SolverCore.solve!(
306315
end
307316

308317
prox!(s, ψ, mν∇fk, ν)
318+
319+
# Estimate optimality and check stopping criteria
309320
ξ1 = fk + hk - mk1(s) + max(1, abs(fk + hk)) * 10 * eps()
310321
sqrt_ξ1_νInv = ξ1 0 ? sqrt(ξ1 / ν) : sqrt(-ξ1 / ν)
311-
solved = (ξ1 < 0 && sqrt_ξ1_νInv neg_tol) || (ξ1 0 && sqrt_ξ1_νInv atol)
312322
(ξ1 < 0 && sqrt_ξ1_νInv > neg_tol) &&
313323
error("LM: prox-gradient step should produce a decrease but ξ1 = $(ξ1)")
314-
atol += rtol * sqrt_ξ1_νInv # make stopping test absolute and relative
324+
atol_decr += rtol_decr * sqrt_ξ1_νInv # make stopping test absolute and relative
325+
326+
norm_s_cauchy = norm(s)
327+
norm_s_cauchydν = norm_s_cauchy / ν
328+
atol_step += rtol_step * norm_s_cauchydν # make stopping test absolute and relative
329+
330+
solved = (ξ1 < 0 && sqrt_ξ1_νInv neg_tol) || (ξ1 0 && sqrt_ξ1_νInv atol_decr) || (norm_s_cauchydν atol_step)
315331

316332
set_status!(
317333
stats,
@@ -384,6 +400,7 @@ function SolverCore.solve!(
384400
fk,
385401
hk,
386402
sqrt_ξ1_νInv,
403+
norm_s_cauchydν,
387404
ρk,
388405
σk,
389406
norm(xk),
@@ -441,14 +458,17 @@ function SolverCore.solve!(
441458
prox!(s, ψ, mν∇fk, ν)
442459
mks = mk1(s)
443460

461+
# Estimate optimality and check stopping criteria
444462
ξ1 = fk + hk - mks + max(1, abs(hk)) * 10 * eps()
445-
446463
sqrt_ξ1_νInv = ξ1 0 ? sqrt(ξ1 / ν) : sqrt(-ξ1 / ν)
447-
solved = (ξ1 < 0 && sqrt_ξ1_νInv neg_tol) || (ξ1 0 && sqrt_ξ1_νInv atol)
448-
449464
(ξ1 < 0 && sqrt_ξ1_νInv > neg_tol) &&
450465
error("LM: prox-gradient step should produce a decrease but ξ1 = $(ξ1)")
451466

467+
norm_s_cauchy = norm(s)
468+
norm_s_cauchydν = norm_s_cauchy / ν
469+
470+
solved = (ξ1 < 0 && sqrt_ξ1_νInv neg_tol) || (ξ1 0 && sqrt_ξ1_νInv atol_decr) || (norm_s_cauchydν atol_step)
471+
452472
set_status!(
453473
stats,
454474
get_status(
@@ -470,10 +490,10 @@ function SolverCore.solve!(
470490

471491
if verbose > 0 && stats.status == :first_order
472492
@info log_row(
473-
Any[stats.iter, 0, fk, hk, sqrt_ξ1_νInv, ρk, σk, norm(xk), norm(s), 1 / ν, ""],
493+
Any[stats.iter, 0, fk, hk, sqrt_ξ1_νInv, norm_s_cauchydν, ρk, σk, norm(xk), norm(s), 1 / ν, ""],
474494
colsep = 1,
475495
)
476-
@info "LM: terminating with √(ξ1/ν) = $(sqrt_ξ1_νInv)"
496+
@info "LM: terminating with √(ξ1/ν) = $(sqrt_ξ1_νInv) and ‖s‖/ν = $(norm_s_cauchydν)"
477497
end
478498

479499
set_solution!(stats, xk)

test/runtests.jl

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,19 @@ for (h, h_name) ∈ ((NormL0(λ), "l0"), (NormL1(λ), "l1"), (IndBallL0(10 * com
129129
@test length(out.solution) == bpdn.meta.nvar
130130
@test typeof(out.dual_feas) == eltype(out.solution)
131131
@test out.status == :first_order
132+
133+
# Test with the different stopping criteria
134+
out = solver(bpdn_nls, h, args..., options, x0 = x0, atol_decr = 1e-6, rtol_decr = 1e-6, atol_step = 0.0, rtol_step = 0.0)
135+
@test typeof(out.solution) == typeof(bpdn.meta.x0)
136+
@test length(out.solution) == bpdn.meta.nvar
137+
@test typeof(out.dual_feas) == eltype(out.solution)
138+
@test out.status == :first_order
139+
140+
out = solver(bpdn_nls, h, args..., options, x0 = x0, atol_decr = 0.0, rtol_decr = 0.0, atol_step = 1e-6, rtol_step = 1e-6)
141+
@test typeof(out.solution) == typeof(bpdn.meta.x0)
142+
@test length(out.solution) == bpdn.meta.nvar
143+
@test typeof(out.dual_feas) == eltype(out.solution)
144+
@test out.status == :first_order
132145
end
133146
end
134147
end

0 commit comments

Comments
 (0)