Skip to content

Commit 1ce02a6

Browse files
LM & LMTR: change stopping criterion from xi to the norm of the cauchy point
1 parent 84b36bc commit 1ce02a6

File tree

2 files changed

+36
-62
lines changed

2 files changed

+36
-62
lines changed

src/LMTR_alg.jl

Lines changed: 20 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ For advanced usage, first define a solver "LMSolver" to preallocate the memory u
142142
- `subsolver::S = R2Solver`: subsolver used to solve the subproblem that appears at each iteration.
143143
- `sub_kwargs::NamedTuple = NamedTuple()`: a named tuple containing the keyword arguments to be sent to the subsolver. The solver will fail if invalid keyword arguments are provided to the subsolver. For example, if the subsolver is `R2Solver`, you can pass `sub_kwargs = (max_iter = 100, σmin = 1e-6,)`.
144144
145-
The algorithm stops either when `√(ξₖ/νₖ) < atol + rtol*√(ξ₀/ν₀) ` or `ξₖ < 0` and `√(-ξₖ/νₖ) < neg_tol` where ξₖ := f(xₖ) + h(xₖ) - φ(sₖ; xₖ) - ψ(sₖ; xₖ), and √(ξₖ/νₖ) is a stationarity measure.
145+
The algorithm stops when `‖sᶜᵖ‖/ν < atol + rtol*‖s₀ᶜᵖ‖/ν ` where sᶜᵖ ∈ argminₛ f(xₖ) + ∇f(xₖ)ᵀs + ψ(s; xₖ) ½ ν⁻¹ ‖s‖².
146146
147147
# Output
148148
The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.
@@ -251,12 +251,12 @@ function SolverCore.solve!(
251251

252252
if verbose > 0
253253
@info log_header(
254-
[:outer, :inner, :fx, :hx, :xi, , , :normx, :norms, , :arrow],
254+
[:outer, :inner, :fx, :hx, :norm_s_cauchydν, , , :normx, :norms, , :arrow],
255255
[Int, Int, T, T, T, T, T, T, T, T, Char],
256256
hdr_override = Dict{Symbol, String}(
257257
:fx => "f(x)",
258258
:hx => "h(x)",
259-
:xi => "√(ξ1/ν)",
259+
:norm_s_cauchydν => "‖sᶜᵖ‖/ν",
260260
:normx => "‖x‖",
261261
:norms => "‖s‖",
262262
:arrow => "LMTR",
@@ -277,7 +277,6 @@ function SolverCore.solve!(
277277
found_σ || error("operator norm computation failed")
278278
ν = α * Δk / (1 + σmax^2 ** Δk + 1))
279279
@. mν∇fk = -∇fk * ν
280-
sqrt_ξ1_νInv = one(T)
281280

282281
set_iter!(stats, 0)
283282
start_time = time()
@@ -287,28 +286,20 @@ function SolverCore.solve!(
287286
set_solver_specific!(stats, :nonsmooth_obj, hk)
288287
set_solver_specific!(stats, :prox_evals, prox_evals + 1)
289288

290-
φ1 = let Fk = Fk, ∇fk = ∇fk
291-
d -> dot(Fk, Fk) / 2 + dot(∇fk, d) # ∇fk = Jk^T Fk
292-
end
293-
294-
mk1 = let φ1 = φ1, ψ = ψ
295-
d -> φ1(d) + ψ(d)
296-
end
297-
298289
mk = let ψ = ψ, solver = solver
299290
d -> obj(solver.subpb.model, d) + ψ(d)
300291
end
301292

302293
# Take first proximal gradient step s1 and see if current xk is nearly stationary.
303294
# s1 minimizes φ1(d) + ‖d‖² / 2 / ν + ψ(d) ⟺ s1 ∈ prox{νψ}(-ν∇φ1(0))
304295
prox!(s, ψ, mν∇fk, ν)
305-
ξ1 = fk + hk - mk1(s) + max(1, abs(fk + hk)) * 10 * eps()
306-
sqrt_ξ1_νInv = ξ1 0 ? sqrt(ξ1 / ν) : sqrt(-ξ1 / ν)
307-
solved = (ξ1 < 0 && sqrt_ξ1_νInv neg_tol) || (ξ1 0 && sqrt_ξ1_νInv atol)
308-
(ξ1 < 0 && sqrt_ξ1_νInv > neg_tol) &&
309-
error("LMTR: prox-gradient step should produce a decrease but ξ1 = $(ξ1)")
310-
atol += rtol * sqrt_ξ1_νInv # make stopping test absolute and relative
311-
sub_atol += rtol * sqrt_ξ1_νInv
296+
norm_s_cauchy = norm(s)
297+
norm_s_cauchydν = norm_s_cauchy / ν
298+
299+
atol += rtol * norm_s_cauchydν # make stopping test absolute and relative
300+
sub_atol += rtol * norm_s_cauchydν
301+
302+
solved = norm_s_cauchydν atol
312303

313304
set_status!(
314305
stats,
@@ -347,7 +338,7 @@ function SolverCore.solve!(
347338
solver.subpb,
348339
solver.substats;
349340
x = s,
350-
atol = stats.iter == 0 ? 1.0e-5 : max(sub_atol, min(1.0e-1, ξ1 / 10)),
341+
atol = stats.iter == 0 ? 1.0e-5 : max(sub_atol, min(1e-2, norm_s_cauchydν)),
351342
Δk = ∆_effective / 10,
352343
sub_kwargs...,
353344
)
@@ -357,7 +348,7 @@ function SolverCore.solve!(
357348
solver.subpb,
358349
solver.substats;
359350
x = s,
360-
atol = stats.iter == 0 ? 1.0e-5 : max(sub_atol, min(1.0e-1, ξ1 / 10)),
351+
atol = stats.iter == 0 ? 1.0e-5 : max(sub_atol, min(1e-2, norm_s_cauchydν)),
361352
ν = ν,
362353
sub_kwargs...,
363354
)
@@ -389,7 +380,7 @@ function SolverCore.solve!(
389380
solver.substats.iter,
390381
fk,
391382
hk,
392-
sqrt_ξ1_νInv,
383+
norm_s_cauchydν,
393384
ρk,
394385
∆_effective,
395386
χ(xk),
@@ -451,13 +442,10 @@ function SolverCore.solve!(
451442
@. mν∇fk = -∇fk * ν
452443

453444
prox!(s, ψ, mν∇fk, ν)
454-
mks = mk1(s)
455-
456-
ξ1 = fk + hk - mks + max(1, abs(hk)) * 10 * eps()
457-
sqrt_ξ1_νInv = ξ1 0 ? sqrt(ξ1 / ν) : sqrt(-ξ1 / ν)
458-
solved = (ξ1 < 0 && sqrt_ξ1_νInv neg_tol) || (ξ1 0 && sqrt_ξ1_νInv atol)
459-
(ξ1 < 0 && sqrt_ξ1_νInv > neg_tol) &&
460-
error("LM: prox-gradient step should produce a decrease but ξ1 = $(ξ1)")
445+
norm_s_cauchy = norm(s)
446+
norm_s_cauchydν = norm_s_cauchy / ν
447+
448+
solved = norm_s_cauchydν atol
461449

462450
set_status!(
463451
stats,
@@ -479,11 +467,11 @@ function SolverCore.solve!(
479467
end
480468

481469
if verbose > 0 && stats.status == :first_order
482-
@info log_row(Any[stats.iter, 0, fk, hk, sqrt_ξ1_νInv, ρk, Δk, χ(xk), χ(s), ν, ""], colsep = 1)
483-
@info "LMTR: terminating with √(ξ1/ν) = $(sqrt_ξ1_νInv)"
470+
@info log_row(Any[stats.iter, 0, fk, hk, norm_s_cauchydν, ρk, Δk, χ(xk), χ(s), ν, ""], colsep = 1)
471+
@info "LMTR: terminating with ‖sᶜᵖ‖/ν = $(norm_s_cauchydν)"
484472
end
485473

486474
set_solution!(stats, xk)
487-
set_residuals!(stats, zero(T), sqrt_ξ1_νInv)
475+
set_residuals!(stats, zero(T), norm_s_cauchydν)
488476
return stats
489477
end

src/LM_alg.jl

Lines changed: 16 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ For advanced usage, first define a solver "LMSolver" to preallocate the memory u
142142
- `subsolver = R2Solver`: the solver used to solve the subproblems.
143143
- `sub_kwargs::NamedTuple = NamedTuple()`: a named tuple containing the keyword arguments to be sent to the subsolver. The solver will fail if invalid keyword arguments are provided to the subsolver. For example, if the subsolver is `R2Solver`, you can pass `sub_kwargs = (max_iter = 100, σmin = 1e-6,)`.
144144
145-
The algorithm stops either when `√(ξₖ/νₖ) < atol + rtol*√(ξ₀/ν₀) ` or `ξₖ < 0` and `√(-ξₖ/νₖ) < neg_tol` where ξₖ := f(xₖ) + h(xₖ) - φ(sₖ; xₖ) - ψ(sₖ; xₖ), and √(ξₖ/νₖ) is a stationarity measure.
145+
The algorithm stops when `‖sᶜᵖ‖/ν < atol + rtol*‖s₀ᶜᵖ‖/ν ` where sᶜᵖ ∈ argminₛ f(xₖ) + ∇f(xₖ)ᵀs + ψ(s; xₖ) ½ ν⁻¹ ‖s‖².
146146
147147
# Output
148148
The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.
@@ -254,12 +254,12 @@ function SolverCore.solve!(
254254

255255
if verbose > 0
256256
@info log_header(
257-
[:outer, :inner, :fx, :hx, :xi, , , :normx, :norms, :normJ, :arrow],
257+
[:outer, :inner, :fx, :hx, :norm_s_cauchydν, , , :normx, :norms, :normJ, :arrow],
258258
[Int, Int, T, T, T, T, T, T, T, T, Char],
259259
hdr_override = Dict{Symbol, String}(
260260
:fx => "f(x)",
261261
:hx => "h(x)",
262-
:xi => "√(ξ1/ν)",
262+
:norm_s_cauchydν => "‖sᶜᵖ‖/ν",
263263
:normx => "‖x‖",
264264
:norms => "‖s‖",
265265
:normJ => "‖J‖²",
@@ -293,26 +293,17 @@ function SolverCore.solve!(
293293
set_solver_specific!(stats, :prox_evals, prox_evals + 1)
294294
m_monotone > 1 && (m_fh_hist[stats.iter % (m_monotone - 1) + 1] = fk + hk)
295295

296-
φ1 = let Fk = Fk, ∇fk = ∇fk
297-
d -> dot(Fk, Fk) / 2 + dot(∇fk, d) # ∇fk = Jk^T Fk
298-
end
299-
300-
mk1 = let φ1 = φ1, ψ = ψ
301-
d -> φ1(d) + ψ(d)
302-
end
303-
304296
mk = let ψ = ψ, solver = solver
305297
d -> obj(solver.subpb.model, d, skip_sigma = true) + ψ(d)
306298
end
307299

308300
prox!(s, ψ, mν∇fk, ν)
309-
ξ1 = fk + hk - mk1(s) + max(1, abs(fk + hk)) * 10 * eps()
310-
sqrt_ξ1_νInv = ξ1 0 ? sqrt(ξ1 / ν) : sqrt(-ξ1 / ν)
311-
solved = (ξ1 < 0 && sqrt_ξ1_νInv neg_tol) || (ξ1 0 && sqrt_ξ1_νInv atol)
312-
(ξ1 < 0 && sqrt_ξ1_νInv > neg_tol) &&
313-
error("LM: prox-gradient step should produce a decrease but ξ1 = $(ξ1)")
314-
atol += rtol * sqrt_ξ1_νInv # make stopping test absolute and relative
301+
norm_s_cauchy = norm(s)
302+
norm_s_cauchydν = norm_s_cauchy / ν
303+
304+
atol += rtol * norm_s_cauchydν # make stopping test absolute and relative
315305

306+
solved = norm_s_cauchydν atol
316307
set_status!(
317308
stats,
318309
get_status(
@@ -332,7 +323,7 @@ function SolverCore.solve!(
332323
done = stats.status != :unknown
333324

334325
while !done
335-
sub_atol = stats.iter == 0 ? 1.0e-3 : min(sqrt_ξ1_νInv ^ (1.5), sqrt_ξ1_νInv * 1e-3)
326+
sub_atol = stats.iter == 0 ? 1.0e-3 : min(norm_s_cauchydν ^ (1.5), norm_s_cauchydν * 1e-3)
336327
solver.subpb.model.σ = σk
337328
isa(solver.subsolver, R2DHSolver) && (solver.subsolver.D.d[1] = 1/ν)
338329
if isa(solver.subsolver, R2Solver) #FIXME
@@ -439,15 +430,10 @@ function SolverCore.solve!(
439430

440431
@. mν∇fk = - ν * ∇fk
441432
prox!(s, ψ, mν∇fk, ν)
442-
mks = mk1(s)
443-
444-
ξ1 = fk + hk - mks + max(1, abs(hk)) * 10 * eps()
445-
446-
sqrt_ξ1_νInv = ξ1 0 ? sqrt(ξ1 / ν) : sqrt(-ξ1 / ν)
447-
solved = (ξ1 < 0 && sqrt_ξ1_νInv neg_tol) || (ξ1 0 && sqrt_ξ1_νInv atol)
448-
449-
(ξ1 < 0 && sqrt_ξ1_νInv > neg_tol) &&
450-
error("LM: prox-gradient step should produce a decrease but ξ1 = $(ξ1)")
433+
norm_s_cauchy = norm(s)
434+
norm_s_cauchydν = norm_s_cauchy / ν
435+
436+
solved = norm_s_cauchydν atol
451437

452438
set_status!(
453439
stats,
@@ -470,13 +456,13 @@ function SolverCore.solve!(
470456

471457
if verbose > 0 && stats.status == :first_order
472458
@info log_row(
473-
Any[stats.iter, 0, fk, hk, sqrt_ξ1_νInv, ρk, σk, norm(xk), norm(s), 1 / ν, ""],
459+
Any[stats.iter, 0, fk, hk, norm_s_cauchydν, ρk, σk, norm(xk), norm(s), 1 / ν, ""],
474460
colsep = 1,
475461
)
476-
@info "LM: terminating with √(ξ1/ν) = $(sqrt_ξ1_νInv)"
462+
@info "LM: terminating with ‖sᶜᵖ‖/ν = $(norm_s_cauchydν)"
477463
end
478464

479465
set_solution!(stats, xk)
480-
set_residuals!(stats, zero(T), sqrt_ξ1_νInv)
466+
set_residuals!(stats, zero(T), norm_s_cauchydν)
481467
return stats
482468
end

0 commit comments

Comments
 (0)