Skip to content

Commit 84b36bc

Browse files
change stopping criterion from xi to the norm of the cauchy point
1 parent 0871eee commit 84b36bc

3 files changed

Lines changed: 53 additions & 81 deletions

File tree

src/R2N.jl

Lines changed: 17 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ For advanced usage, first define a solver "R2NSolver" to preallocate the memory
145145
- `compute_grad::Bool = true`: (advanced) whether `∇f(x₀)` should be computed or not. If set to false, then the value is retrieved from `solver.∇fk`;
146146
- `sub_kwargs::NamedTuple = NamedTuple()`: a named tuple containing the keyword arguments to be sent to the subsolver. The solver will fail if invalid keyword arguments are provided to the subsolver. For example, if the subsolver is `R2Solver`, you can pass `sub_kwargs = (max_iter = 100, σmin = 1e-6,)`.
147147
148-
The algorithm stops either when `√(ξₖ/νₖ) < atol + rtol*√(ξ₀/ν₀) ` or `ξₖ < 0` and `√(-ξₖ/νₖ) < neg_tol` where ξₖ := f(xₖ) + h(xₖ) - φ(sₖ; xₖ) - ψ(sₖ; xₖ), and √(ξₖ/νₖ) is a stationarity measure.
148+
The algorithm stops when `‖sᶜᵖ‖/ν < atol + rtol*‖s₀ᶜᵖ‖/ν ` where sᶜᵖ ∈ argminₛ f(xₖ) + ∇f(xₖ)ᵀs + ψ(s; xₖ) ½ ν⁻¹ ‖s‖².
149149
150150
# Output
151151
The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.
@@ -270,12 +270,12 @@ function SolverCore.solve!(
270270

271271
if verbose > 0
272272
@info log_header(
273-
[:outer, :inner, :fx, :hx, :xi, , , :normx, :norms, :normB, :arrow],
273+
[:outer, :inner, :fx, :hx, :norm_s_cauchydν, , , :normx, :norms, :normB, :arrow],
274274
[Int, Int, T, T, T, T, T, T, T, T, Char],
275275
hdr_override = Dict{Symbol, String}(
276276
:fx => "f(x)",
277277
:hx => "h(x)",
278-
:xi => "√(ξ1/ν)",
278+
:norm_s_cauchydν => "‖sᶜᵖ‖/ν",
279279
:normx => "‖x‖",
280280
:norms => "‖s‖",
281281
:normB => "‖B‖",
@@ -321,27 +321,17 @@ function SolverCore.solve!(
321321
set_solver_specific!(stats, :prox_evals, prox_evals + 1)
322322
m_monotone > 1 && (m_fh_hist[stats.iter % (m_monotone - 1) + 1] = fk + hk)
323323

324-
φ1 = let ∇fk = ∇fk
325-
d -> dot(∇fk, d)
326-
end
327-
328-
mk1 = let ψ = ψ
329-
d -> φ1(d) + ψ(d)::T
330-
end
331-
332324
mk = let ψ = ψ, solver = solver
333325
d -> obj(solver.subpb.model, d, skip_sigma = true) + ψ(d)::T
334326
end
335327

336328
prox!(s1, ψ, mν∇fk, ν₁)
337-
mks = mk1(s1)
329+
norm_s_cauchy = norm(s1)
330+
norm_s_cauchydν = norm_s_cauchy / ν₁
331+
332+
atol += rtol * norm_s_cauchydν # make stopping test absolute and relative
338333

339-
ξ1 = hk - mks + max(1, abs(hk)) * 10 * eps()
340-
sqrt_ξ1_νInv = ξ1 0 ? sqrt(ξ1 / ν₁) : sqrt(-ξ1 / ν₁)
341-
solved = (ξ1 < 0 && sqrt_ξ1_νInv neg_tol) || (ξ1 0 && sqrt_ξ1_νInv atol)
342-
(ξ1 < 0 && sqrt_ξ1_νInv > neg_tol) &&
343-
error("R2N: prox-gradient step should produce a decrease but ξ1 = $(ξ1)")
344-
atol += rtol * sqrt_ξ1_νInv # make stopping test absolute and relative
334+
solved = norm_s_cauchydν atol
345335

346336
set_status!(
347337
stats,
@@ -362,7 +352,7 @@ function SolverCore.solve!(
362352
done = stats.status != :unknown
363353

364354
while !done
365-
sub_atol = stats.iter == 0 ? 1.0e-3 : min(sqrt_ξ1_νInv ^ (1.5), sqrt_ξ1_νInv * 1e-3)
355+
sub_atol = stats.iter == 0 ? 1.0e-3 : min(norm_s_cauchydν ^ (1.5), norm_s_cauchydν * 1e-3)
366356

367357
solver.subpb.model.σ = σk
368358
isa(solver.subsolver, R2DHSolver) && (solver.subsolver.D.d[1] = 1/ν₁)
@@ -391,7 +381,7 @@ function SolverCore.solve!(
391381
prox_evals += solver.substats.iter
392382
s .= solver.substats.solution
393383

394-
if norm(s) > β * norm(s1)
384+
if norm(s) > β * norm_s_cauchy
395385
s .= s1
396386
end
397387

@@ -419,7 +409,7 @@ function SolverCore.solve!(
419409
solver.substats.iter,
420410
fk,
421411
hk,
422-
sqrt_ξ1_νInv,
412+
norm_s_cauchydν,
423413
ρk,
424414
σk,
425415
norm(xk),
@@ -479,15 +469,11 @@ function SolverCore.solve!(
479469

480470
@. mν∇fk = - ν₁ * ∇fk
481471
prox!(s1, ψ, mν∇fk, ν₁)
482-
mks = mk1(s1)
483-
484-
ξ1 = hk - mks + max(1, abs(hk)) * 10 * eps()
472+
norm_s_cauchy = norm(s1)
473+
norm_s_cauchydν = norm_s_cauchy / ν₁
485474

486-
sqrt_ξ1_νInv = ξ1 0 ? sqrt(ξ1 / ν₁) : sqrt(-ξ1 / ν₁)
487-
solved = (ξ1 < 0 && sqrt_ξ1_νInv neg_tol) || (ξ1 0 && sqrt_ξ1_νInv atol)
475+
solved = norm_s_cauchydν atol
488476

489-
(ξ1 < 0 && sqrt_ξ1_νInv > neg_tol) &&
490-
error("R2N: prox-gradient step should produce a decrease but ξ1 = $(ξ1)")
491477
set_status!(
492478
stats,
493479
get_status(
@@ -509,14 +495,14 @@ function SolverCore.solve!(
509495

510496
if verbose > 0 && stats.status == :first_order
511497
@info log_row(
512-
Any[stats.iter, 0, fk, hk, sqrt_ξ1_νInv, ρk, σk, norm(xk), norm(s), λmax, ""],
498+
Any[stats.iter, 0, fk, hk, norm_s_cauchydν, ρk, σk, norm(xk), norm(s), λmax, ""],
513499
colsep = 1,
514500
)
515-
@info "R2N: terminating with √(ξ1/ν) = $(sqrt_ξ1_νInv)"
501+
@info "R2N: terminating with ‖sᶜᵖ‖/ν = $(norm_s_cauchydν)"
516502
end
517503

518504
set_solution!(stats, xk)
519-
set_residuals!(stats, zero(eltype(xk)), sqrt_ξ1_νInv)
505+
set_residuals!(stats, zero(eltype(xk)), norm_s_cauchydν)
520506
return stats
521507
end
522508

src/R2_alg.jl

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ For advanced usage, first define a solver "R2Solver" to preallocate the memory u
156156
- `compute_obj::Bool = true`: (advanced) whether `f(x₀)` should be computed or not. If set to false, then the value is retrieved from `stats.solver_specific[:smooth_obj]`;
157157
- `compute_grad::Bool = true`: (advanced) whether `∇f(x₀)` should be computed or not. If set to false, then the value is retrieved from `solver.∇fk`;
158158
159-
The algorithm stops either when `√(ξₖ/νₖ) < atol + rtol*√(ξ₀/ν₀) ` or `ξₖ < 0` and `√(-ξₖ/νₖ) < neg_tol` where ξₖ := f(xₖ) + h(xₖ) - φ(sₖ; xₖ) - ψ(sₖ; xₖ), and √(ξₖ/νₖ) is a stationarity measure.
159+
The algorithm stops when `‖sᶜᵖ‖/ν < atol + rtol*‖s₀ᶜᵖ‖/ν ` where sᶜᵖ ∈ argminₛ f(xₖ) + ∇f(xₖ)ᵀs + ψ(s; xₖ) ½ ν⁻¹ ‖s‖².
160160
161161
# Output
162162
The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.
@@ -367,13 +367,13 @@ function SolverCore.solve!(
367367

368368
if verbose > 0
369369
@info log_header(
370-
[:iter, :fx, :hx, :xi, , , :normx, :norms, :arrow],
370+
[:iter, :fx, :hx, :norm_s_cauchydν, , , :normx, :norms, :arrow],
371371
[Int, Float64, Float64, Float64, Float64, Float64, Float64, Float64, Char],
372372
hdr_override = Dict{Symbol, String}( # TODO: Add this as constant dict elsewhere
373373
:iter => "iter",
374374
:fx => "f(x)",
375375
:hx => "h(x)",
376-
:xi => "√(ξ/ν)",
376+
:norm_s_cauchydν => "‖sᶜᵖ‖/ν",
377377
=> "ρ",
378378
=> "σ",
379379
:normx => "‖x‖",
@@ -407,15 +407,14 @@ function SolverCore.solve!(
407407

408408
prox!(s, ψ, mν∇fk, ν)
409409
mks = mk(s)
410+
norm_s_cauchy = norm(s)
411+
norm_s_cauchydν = norm_s_cauchy / ν
410412

411413
ξ = hk - mks + max(1, abs(hk)) * 10 * eps()
412414

413-
sqrt_ξ_νInv = ξ 0 ? sqrt/ ν) : sqrt(-ξ / ν)
414-
atol += rtol * sqrt_ξ_νInv # make stopping test absolute and relative
415+
atol += rtol * norm_s_cauchydν # make stopping test absolute and relative
415416

416-
solved =< 0 && sqrt_ξ_νInv neg_tol) || 0 && sqrt_ξ_νInv atol)
417-
< 0 && sqrt_ξ_νInv > neg_tol) &&
418-
error("R2: prox-gradient step should produce a decrease but ξ = $(ξ)")
417+
solved = norm_s_cauchydν atol
419418

420419
set_status!(
421420
stats,
@@ -453,11 +452,11 @@ function SolverCore.solve!(
453452
stats.iter,
454453
fk,
455454
hk,
456-
sqrt_ξ_νInv,
455+
norm_s_cauchydν,
457456
ρk,
458457
σk,
459458
norm(xk),
460-
norm(s),
459+
norm_s_cauchy,
461460
(η2 ρk < Inf) ? '' : (ρk < η1 ? '' : '='),
462461
],
463462
colsep = 1,
@@ -494,12 +493,11 @@ function SolverCore.solve!(
494493

495494
prox!(s, ψ, mν∇fk, ν)
496495
mks = mk(s)
496+
norm_s_cauchy = norm(s)
497+
norm_s_cauchydν = norm_s_cauchy / ν
497498

498499
ξ = hk - mks + max(1, abs(hk)) * 10 * eps()
499-
sqrt_ξ_νInv = ξ 0 ? sqrt/ ν) : sqrt(-ξ / ν)
500-
solved =< 0 && sqrt_ξ_νInv neg_tol) || 0 && sqrt_ξ_νInv atol)
501-
< 0 && sqrt_ξ_νInv > neg_tol) &&
502-
error("R2: prox-gradient step should produce a decrease but ξ = $(ξ)")
500+
solved = norm_s_cauchydν atol
503501

504502
set_status!(
505503
stats,
@@ -521,11 +519,11 @@ function SolverCore.solve!(
521519
end
522520

523521
if verbose > 0 && stats.status == :first_order
524-
@info log_row(Any[stats.iter, fk, hk, sqrt_ξ_νInv, ρk, σk, norm(xk), norm(s), ""], colsep = 1)
525-
@info "R2: terminating with √(ξ/ν) = $(sqrt_ξ_νInv)"
522+
@info log_row(Any[stats.iter, fk, hk, norm_s_cauchydν, ρk, σk, norm(xk), norm_s_cauchy, ""], colsep = 1)
523+
@info "R2: terminating with ‖sᶜᵖ‖/ν = $(norm_s_cauchydν)"
526524
end
527525

528526
set_solution!(stats, xk)
529-
set_residuals!(stats, zero(eltype(xk)), sqrt_ξ_νInv)
527+
set_residuals!(stats, zero(eltype(xk)), norm_s_cauchy)
530528
return stats
531529
end

src/TR_alg.jl

Lines changed: 21 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ For advanced usage, first define a solver "TRSolver" to preallocate the memory u
145145
- `compute_grad::Bool = true`: (advanced) whether `∇f(x₀)` should be computed or not. If set to false, then the value is retrieved from `solver.∇fk`;
146146
- `sub_kwargs::NamedTuple = NamedTuple()`: a named tuple containing the keyword arguments to be sent to the subsolver. The solver will fail if invalid keyword arguments are provided to the subsolver. For example, if the subsolver is `R2Solver`, you can pass `sub_kwargs = (max_iter = 100, σmin = 1e-6,)`.
147147
148-
The algorithm stops either when `√(ξₖ/νₖ) < atol + rtol*√(ξ₀/ν₀) ` or `ξₖ < 0` and `√(-ξₖ/νₖ) < neg_tol` where ξₖ := f(xₖ) + h(xₖ) - φ(sₖ; xₖ) - ψ(sₖ; xₖ), and √(ξₖ/νₖ) is a stationarity measure.
148+
The algorithm stops when `‖sᶜᵖ‖/ν < atol + rtol*‖s₀ᶜᵖ‖/ν ` where sᶜᵖ ∈ argminₛ f(xₖ) + ∇f(xₖ)ᵀs + ψ(s; xₖ) ½ ν⁻¹ ‖s‖².
149149
150150
# Output
151151
The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.
@@ -268,12 +268,12 @@ function SolverCore.solve!(
268268

269269
if verbose > 0
270270
@info log_header(
271-
[:outer, :inner, :fx, :hx, :xi, , , :normx, :norms, :normB, :arrow],
271+
[:outer, :inner, :fx, :hx, :norm_s_cauchydν, , , :normx, :norms, :normB, :arrow],
272272
[Int, Int, T, T, T, T, T, T, T, T, Char],
273273
hdr_override = Dict{Symbol, String}( # TODO: Add this as constant dict elsewhere
274274
:fx => "f(x)",
275275
:hx => "h(x)",
276-
:xi => "√(ξ1/ν)",
276+
:norm_s_cauchydν => "‖sᶜᵖ‖/ν",
277277
:normx => "‖x‖",
278278
:norms => "‖s‖",
279279
:normB => "‖B‖",
@@ -283,7 +283,6 @@ function SolverCore.solve!(
283283
)
284284
end
285285

286-
local ξ1::T
287286
local ρk = zero(T)
288287
local prox_evals::Int = 0
289288

@@ -306,7 +305,6 @@ function SolverCore.solve!(
306305
found_λ || error("operator norm computation failed")
307306

308307
ν₁ = α * Δk / (1 + λmax ** Δk + 1))
309-
sqrt_ξ1_νInv = one(T)
310308

311309
@. mν∇fk = -ν₁ * ∇fk
312310

@@ -320,27 +318,18 @@ function SolverCore.solve!(
320318
m_monotone > 1 && (m_fh_hist[stats.iter % (m_monotone - 1) + 1] = fk + hk)
321319

322320
# models
323-
φ1 = let ∇fk = ∇fk
324-
d -> dot(∇fk, d)
325-
end
326-
mk1 = let ψ = ψ, φ1 = φ1
327-
d -> φ1(d) + ψ(d)
328-
end
329-
330321
mk = let ψ = ψ, solver = solver
331322
d -> obj(solver.subpb.model, d) + ψ(d)::T
332323
end
333324

334325
prox!(s, ψ, mν∇fk, ν₁)
335-
ξ1 = hk - mk1(s) + max(1, abs(hk)) * 10 * eps()
336-
ξ1 > 0 || error("TR: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)")
337-
sqrt_ξ1_νInv = sqrt(ξ1 / ν₁)
326+
norm_s_cauchy = norm(s)
327+
norm_s_cauchydν = norm_s_cauchy / ν₁
328+
329+
atol += rtol * norm_s_cauchydν # make stopping test absolute and relative
330+
sub_atol += rtol * norm_s_cauchydν
338331

339-
solved = (ξ1 < 0 && sqrt_ξ1_νInv neg_tol) || (ξ1 0 && sqrt_ξ1_νInv atol)
340-
(ξ1 < 0 && sqrt_ξ1_νInv > neg_tol) &&
341-
error("TR: prox-gradient step should produce a decrease but ξ1 = $(ξ1)")
342-
atol += rtol * sqrt_ξ1_νInv # make stopping test absolute and relative
343-
sub_atol += rtol * sqrt_ξ1_νInv
332+
solved = norm_s_cauchydν atol
344333

345334
set_status!(
346335
stats,
@@ -359,8 +348,9 @@ function SolverCore.solve!(
359348
callback(nlp, solver, stats)
360349

361350
done = stats.status != :unknown
351+
362352
while !done
363-
sub_atol = stats.iter == 0 ? 1e-5 : max(sub_atol, min(1e-2, sqrt_ξ1_νInv))
353+
sub_atol = stats.iter == 0 ? 1e-5 : max(sub_atol, min(1e-2, norm_s_cauchydν))
364354
∆_effective = min* χ(s), Δk)
365355

366356
if has_bnds || isa(solver.subsolver, TRDHSolver) #TODO elsewhere ?
@@ -379,7 +369,7 @@ function SolverCore.solve!(
379369
solver.subpb,
380370
solver.substats;
381371
x = s,
382-
atol = stats.iter == 0 ? 1e-5 : max(sub_atol, min(1e-2, sqrt_ξ1_νInv)),
372+
atol = stats.iter == 0 ? 1e-5 : max(sub_atol, min(1e-2, norm_s_cauchydν)),
383373
Δk = ∆_effective / 10,
384374
sub_kwargs...,
385375
)
@@ -389,7 +379,7 @@ function SolverCore.solve!(
389379
solver.subpb,
390380
solver.substats;
391381
x = s,
392-
atol = stats.iter == 0 ? 1e-5 : max(sub_atol, min(1e-2, sqrt_ξ1_νInv)),
382+
atol = stats.iter == 0 ? 1e-5 : max(sub_atol, min(1e-2, norm_s_cauchydν)),
393383
ν = ν₁,
394384
sub_kwargs...,
395385
)
@@ -422,7 +412,7 @@ function SolverCore.solve!(
422412
solver.substats.iter,
423413
fk,
424414
hk,
425-
sqrt_ξ1_νInv,
415+
norm_s_cauchydν,
426416
ρk,
427417
Δk,
428418
χ(xk),
@@ -494,12 +484,10 @@ function SolverCore.solve!(
494484
@. mν∇fk = -ν₁ * ∇fk
495485

496486
prox!(s, ψ, mν∇fk, ν₁)
497-
ξ1 = hk - mk1(s) + max(1, abs(hk)) * 10 * eps()
498-
sqrt_ξ1_νInv = sqrt(ξ1 / ν₁)
499-
500-
solved = (ξ1 < 0 && sqrt_ξ1_νInv neg_tol) || (ξ1 0 && sqrt_ξ1_νInv atol)
501-
(ξ1 < 0 && sqrt_ξ1_νInv > neg_tol) &&
502-
error("TR: prox-gradient step should produce a decrease but ξ1 = $(ξ1)")
487+
norm_s_cauchy = norm(s)
488+
norm_s_cauchydν = norm_s_cauchy / ν₁
489+
490+
solved = norm_s_cauchydν atol
503491

504492
set_status!(
505493
stats,
@@ -521,12 +509,12 @@ function SolverCore.solve!(
521509
end
522510
if verbose > 0 && stats.status == :first_order
523511
@info log_row(
524-
Any[stats.iter, solver.substats.iter, fk, hk, sqrt_ξ1_νInv, ρk, Δk, χ(xk), χ(s), λmax, ""],
512+
Any[stats.iter, solver.substats.iter, fk, hk, norm_s_cauchydν, ρk, Δk, χ(xk), χ(s), λmax, ""],
525513
colsep = 1,
526514
)
527-
@info "TR: terminating with √(ξ1/ν) = $(sqrt_ξ1_νInv)"
515+
@info "TR: terminating with ‖sᶜᵖ‖/ν = $(norm_s_cauchydν)"
528516
end
529517

530518
set_solution!(stats, xk)
531-
set_residuals!(stats, zero(eltype(xk)), sqrt_ξ1_νInv)
519+
set_residuals!(stats, zero(eltype(xk)), norm_s_cauchydν)
532520
end

0 commit comments

Comments
 (0)