diff --git a/Project.toml b/Project.toml index b8cf01b1..7e7e9f14 100644 --- a/Project.toml +++ b/Project.toml @@ -3,6 +3,10 @@ uuid = "10dff2fc-5484-5881-a0e0-c90441020f8a" version = "0.14.8" [deps] +Arpack = "7d9fca2a-8960-54d3-9f78-7d1dccf2cb97" +GenericLinearAlgebra = "14197337-ba66-59df-a3e3-ca00e7dcff7a" +HSL = "34c5aeac-e683-54a6-a0e9-6e0fdc586c50" +HSL_jll = "017b0a0e-03f4-516a-9b91-836bbd1904dd" Krylov = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LinearOperators = "5c8ed15e-5a4c-59e4-a42b-c7e8811fb125" @@ -10,18 +14,29 @@ Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" NLPModels = "a4795742-8479-5a88-8948-cc11e1c8c1a6" NLPModelsModifiers = "e01155f1-5c6f-4375-a9d8-616dd036575f" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" +QRMumps = "422b30a1-cc69-4d85-abe7-cc07b540c444" SolverCore = "ff4d7338-4cf1-434d-91df-b86cb86fb843" SolverParameters = "d076d87d-d1f9-4ea3-a44b-64b4cdd1e470" SolverTools = "b5612192-2639-5dc1-abfe-fbedd65fab29" +SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +SparseMatricesCOO = "fa32481b-f100-4b48-8dc8-c62f61b13870" +TSVD = "9449cd9e-2762-5aa3-a617-5413e99d722e" [compat] -Krylov = "0.10.0" -LinearOperators = "2.0" +Arpack = "0.5.4" +GenericLinearAlgebra = "0.3.19" +HSL = "0.5.2" +Krylov = "0.10.1" +LinearOperators = "2.12.0" NLPModels = "0.21" NLPModelsModifiers = "0.7, 0.8" +QRMumps = "0.3.2" SolverCore = "0.3" SolverParameters = "0.1" SolverTools = "0.10" +SparseArrays = "1.11.0" +SparseMatricesCOO = "0.2.6" +TSVD = "0.4.4" julia = "1.10" [extras] diff --git a/README.md b/README.md index 938593e7..b2e54276 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,8 @@ This package provides an implementation of four classic algorithms for unconstra > DOI: [10.1007/BF01589116](https://doi.org/10.1007/BF01589116) +- `R2N`: An inexact second-order quadratic regularization method for unconstrained optimization (with shifted L-BFGS or shifted Hessian operator); + - `R2`: a first-order quadratic regularization method for unconstrained optimization; > E. G. Birgin, J. L. Gardenghi, J. M. Martínez, S. A. Santos, Ph. L. Toint. (2017). @@ -33,6 +35,8 @@ This package provides an implementation of four classic algorithms for unconstra > high-order regularized models. *Mathematical Programming*, 163(1), 359-368. > DOI: [10.1007/s10107-016-1065-8](https://doi.org/10.1007/s10107-016-1065-8) +- `R2NLS`: an inexact second-order quadratic regularization method for nonlinear least-squares problems; + - `fomo`: a first-order method with momentum for unconstrained optimization; - `tron`: a pure Julia implementation of TRON, a trust-region solver for bound-constrained optimization described in @@ -68,7 +72,7 @@ using JSOSolvers, ADNLPModels # Rosenbrock nlp = ADNLPModel(x -> 100 * (x[2] - x[1]^2)^2 + (x[1] - 1)^2, [-1.2; 1.0]) -stats = lbfgs(nlp) # or trunk, tron, R2 +stats = lbfgs(nlp) # or trunk, tron, R2, R2N ``` ## Documentation diff --git a/docs/src/solvers.md b/docs/src/solvers.md index 1f74344b..8e24377a 100644 --- a/docs/src/solvers.md +++ b/docs/src/solvers.md @@ -7,11 +7,13 @@ - [`trunk`](@ref) - [`R2`](@ref) - [`fomo`](@ref) +- [`R2N`](@ref) +- [`R2NLS`](@ref) | Problem type | Solvers | | --------------------- | -------- | -| Unconstrained Nonlinear Optimization Problem | [`lbfgs`](@ref), [`tron`](@ref), [`trunk`](@ref), [`R2`](@ref), [`fomo`](@ref)| -| Unconstrained Nonlinear Least Squares | [`trunk`](@ref), [`tron`](@ref) | +| Unconstrained Nonlinear Optimization Problem | [`lbfgs`](@ref), [`tron`](@ref), [`trunk`](@ref), [`R2`](@ref), [`fomo`](@ref), ['R2N'](@ref)| +| Unconstrained Nonlinear Least Squares | [`trunk`](@ref), [`tron`](@ref), ['R2NLS'](@ref)| | Bound-constrained Nonlinear Optimization Problem | [`tron`](@ref) | | Bound-constrained Nonlinear Least Squares | [`tron`](@ref) | @@ -23,4 +25,6 @@ tron trunk R2 fomo +R2N +R2NLS ``` diff --git a/my_R2N_tester/Project.toml b/my_R2N_tester/Project.toml new file mode 100644 index 00000000..e01a7a28 --- /dev/null +++ b/my_R2N_tester/Project.toml @@ -0,0 +1,20 @@ +[deps] +ADNLPModels = "54578032-b7ea-4c30-94aa-7cbd1cce6c9a" +Arpack = "7d9fca2a-8960-54d3-9f78-7d1dccf2cb97" +CUTEst = "1b53aba6-35b6-5f92-a507-53c67d53f819" +GenericLinearAlgebra = "14197337-ba66-59df-a3e3-ca00e7dcff7a" +HSL = "34c5aeac-e683-54a6-a0e9-6e0fdc586c50" +HSL_jll = "017b0a0e-03f4-516a-9b91-836bbd1904dd" +JSOSolvers = "10dff2fc-5484-5881-a0e0-c90441020f8a" +Krylov = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7" +LinearOperators = "5c8ed15e-5a4c-59e4-a42b-c7e8811fb125" +NLPModels = "a4795742-8479-5a88-8948-cc11e1c8c1a6" +NLPModelsModifiers = "e01155f1-5c6f-4375-a9d8-616dd036575f" +OptimizationProblems = "5049e819-d29b-5fba-b941-0eee7e64c1c6" +Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" +QRMumps = "422b30a1-cc69-4d85-abe7-cc07b540c444" +Quadmath = "be4d8f0f-7fa4-5f49-b795-2f01399ab2dd" +Revise = "295af30f-e4ad-537b-8983-00126c2a3abe" +SolverCore = "ff4d7338-4cf1-434d-91df-b86cb86fb843" +SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +TSVD = "9449cd9e-2762-5aa3-a617-5413e99d722e" diff --git a/my_R2N_tester/R2N_op.jl b/my_R2N_tester/R2N_op.jl new file mode 100644 index 00000000..1164c2fc --- /dev/null +++ b/my_R2N_tester/R2N_op.jl @@ -0,0 +1,77 @@ +using Revise +using JSOSolvers +using HSL +using Arpack, TSVD, GenericLinearAlgebra +using SparseArrays, LinearAlgebra +using ADNLPModels, Krylov, LinearOperators, NLPModels, NLPModelsModifiers, SolverCore +using Printf +using CUTEst + +using OptimizationProblems, OptimizationProblems.ADNLPProblems +nlp = chainwoo(n=100) + +solvers_to_test = [ + ("GS (Goldstein)--minres", MinresR2NSubsolver, :ag, 0.0), + # ("GS (Goldstein)--cr", CRR2NSubsolver, :ag, 0.0), + # ("Sigma Increase", MinresR2NSubsolver, :sigma, 0.0), + # ("Previous Step", MinresR2NSubsolver, :prev, 0.0), + # ("Cauchy Point", MinresR2NSubsolver, :cp, 0.0), +] + + +# 3. Run R2N Variants +results = [] + +for (name, sub_type, handler, sigma_min) in solvers_to_test + println("\nRunning $name...") + stats = R2N( + nlp; + verbose = 1, + max_iter = 700, + subsolver = sub_type, + npc_handler = handler, + σmin = sigma_min, + always_accept_npc_ag = true, + ) + push!(results, (name, stats)) + print("stats: ") + print(stats) +end + +# 4. Run Benchmark Solver (Trunk from JSOSolvers) +println("\nRunning Trunk (JSOSolvers)...") +stats_trunk = trunk(nlp; verbose = 1, max_iter = 700) +push!(results, ("Trunk", stats_trunk)) + + +##### +# LBFGS +# 1. Wrap the model so R2N knows it's a Quasi-Newton problem +qn_nlp = LBFGSModel(nlp) + +R2N( + qn_nlp; + subsolver = MinresR2NSubsolver, # Just pass the type, R2N will construct it with qn_nlp + npc_handler = :ag, + max_iter = 750, + η1 = 1.0e-6, + verbose = 10, + fast_local_convergence = false +) + + + +qn_nlp = LBFGSModel(nlp) + +R2N( + qn_nlp; + subsolver = ShiftedLBFGSSolver, # Just pass the type, R2N will construct it with qn_nlp + npc_handler = :ag, + max_iter = 750, + η1 = 1.0e-6, + verbose = 10, + fast_local_convergence = false +) + + + diff --git a/my_R2N_tester/R2N_subsolver_test.jl b/my_R2N_tester/R2N_subsolver_test.jl new file mode 100644 index 00000000..6300fba5 --- /dev/null +++ b/my_R2N_tester/R2N_subsolver_test.jl @@ -0,0 +1,105 @@ +println("==============================================================") +println(" Exhaustive Testing R2N Combinations ") +println("==============================================================") + +# 1. Define the Problem +n = 30 +nlp = ADNLPModel( + x -> sum(100 * (x[i + 1] - x[i]^2)^2 + (x[i] - 1)^2 for i = 1:(n - 1)), + collect(1:n) ./ (n + 1), + name = "Extended Rosenbrock" +) + +# 2. Define Parameter Grids for Combinations +subsolvers = [CGR2NSubsolver, CRR2NSubsolver, MinresR2NSubsolver, MinresQlpR2NSubsolver] + +# Automatically append HSL solvers if available +if LIBHSL_isfunctional() + push!(subsolvers, MA97R2NSubsolver, MA57R2NSubsolver) +end + +npc_handlers = [:ag, :sigma, :prev, :cp] +scp_flags = [true, false] +always_accept_npc_ags = [true, false] +fast_local_convergences = [true, false] + +# 3. Storage for Results +passed_runs = [] +failed_runs = [] + +# Calculate total combinations +total_combinations = length(subsolvers) * length(npc_handlers) * length(scp_flags) * length(always_accept_npc_ags) * length(fast_local_convergences) +println("Testing $total_combinations combinations...\n") + +# 4. Execution Loop +current_run = 1 +for params in Iterators.product(subsolvers, npc_handlers, scp_flags, always_accept_npc_ags, fast_local_convergences) + sub_type, handler, scp, accept_ag, fast_conv = params + + # Create a string identifying this exact setup + config_name = "Sub: $(string(sub_type)) | NPC: $(handler) | scp: $(scp) | accept_ag: $(accept_ag) | fast_conv: $(fast_conv)" + + print("\rProgress: $current_run / $total_combinations combinations tested...") + + try + # Run silently with a smaller max_iter so the mass-test finishes quickly + stats = R2N( + nlp; + verbose = 0, + max_iter = 100, + subsolver = sub_type, + npc_handler = handler, + scp_flag = scp, + always_accept_npc_ag = accept_ag, + fast_local_convergence = fast_conv + ) + push!(passed_runs, (config_name, stats.status, stats.iter, stats.objective)) + catch e + # If it crashes, catch the error and the backtrace so we can inspect it later + bt = catch_backtrace() + err_msg = sprint(showerror, e, bt) + push!(failed_runs, (config_name, err_msg)) + end + + current_run += 1 +end + +println("\n\n==============================================================") +println(" Testing Summary ") +println("==============================================================") +println("Total Tested: $total_combinations") +println("Passed: $(length(passed_runs))") +println("Failed: $(length(failed_runs))") +println("==============================================================\n") + +# 5. Print Error Report +if !isempty(failed_runs) + println("### 🚨 ERROR REPORT 🚨 ###\n") + for (config, err) in failed_runs + println("❌ CONFIGURATION:") + println(" ", config) + println("\n ERROR DETAILS:") + + # Print just the first few lines of the stacktrace to avoid terminal flooding + err_lines = split(err, '\n') + for line in err_lines[1:min(12, length(err_lines))] + println(" ", line) + end + println("-" ^ 80) + end +else + println("🎉 All configurations ran without throwing exceptions!") +end + +# Optional: Print a small sample of passed runs to verify it's working +if !isempty(passed_runs) + println("\nSample of Passed Runs:") + @printf("%-100s %-15s %-5s\n", "Configuration", "Status", "Iter") + println("-" ^ 125) + for (cfg, st, it, obj) in passed_runs[1:min(5, length(passed_runs))] + @printf("%-100s %-15s %-5d\n", cfg, st, it) + end +end + +# Clean up CUTEst model memory +finalize(nlp) \ No newline at end of file diff --git a/my_R2N_tester/R2N_test.jl b/my_R2N_tester/R2N_test.jl new file mode 100644 index 00000000..93d6c9d9 --- /dev/null +++ b/my_R2N_tester/R2N_test.jl @@ -0,0 +1,128 @@ +using Revise +using JSOSolvers +using HSL +using Arpack, TSVD, GenericLinearAlgebra +using SparseArrays, LinearAlgebra +using ADNLPModels, Krylov, LinearOperators, NLPModels, NLPModelsModifiers, SolverCore +using Printf +using CUTEst + +println("==============================================================") +println(" Testing R2N with different NPC Handling Strategies ") +println("==============================================================") + +# 1. Define the Problem (Extended Rosenbrock) +# n = 30 +# nlp = ADNLPModel( +# x -> sum(100 * (x[i + 1] - x[i]^2)^2 + (x[i] - 1)^2 for i = 1:(n - 1)), +# collect(1:n) ./ (n + 1), +# name = "Extended Rosenbrock" +# ) + +nlp = CUTEstModel("TOINTQOR") +# nlp from optimization_problems.jl + + + +# bigfloat +using CUTEst, Quadmath + +nlp_big = CUTEstModel{Float128}("TOINTQOR") +R2N(nlp_big; subsolver=MinresR2NSubsolver(nlp_big), npc_handler=:ag, max_iter=105, η1=1.0e-6, verbose = 1 , fast_local_convergence= true) +R2N(nlp_big; subsolver=MinresR2NSubsolver(nlp_big), npc_handler=:ag, max_iter=105, η1=1.0e-6, verbose = 1 ) +# 2. Define Solver Configurations + + +##### +# LBFGS +# 1. Wrap the model so R2N knows it's a Quasi-Newton problem +qn_nlp = LBFGSModel(nlp) + +# 2. Pass the wrapped model as the main argument +R2N( + qn_nlp; + subsolver = MinresR2NSubsolver, # Just pass the type, R2N will construct it with qn_nlp + npc_handler = :ag, + max_iter = 105, + η1 = 1.0e-6, + verbose = 1, + fast_local_convergence = true +) + + +# List of strategies to test +solvers_to_test = [ + ("GS (Goldstein)", MinresQlpR2NSubsolver, :ag, 0.0), + ("Sigma Increase", MinresQlpR2NSubsolver, :sigma, 1.0), + ("Previous Step", MinresQlpR2NSubsolver, :prev, 0.0), + ("Cauchy Point", MinresQlpR2NSubsolver, :cp, 1.0), +] + +# 3. Run R2N Variants +results = [] + +for (name, sub_type, handler, sigma_min) in solvers_to_test + println("\nRunning $name...") + stats = R2N( + nlp; + verbose = 5, + max_iter = 700, + subsolver = sub_type, + npc_handler = handler, + σmin = sigma_min + ) + push!(results, (name, stats)) +end + +# 4. Run Benchmark Solver (Trunk from JSOSolvers) +println("\nRunning Trunk (JSOSolvers)...") +stats_trunk = trunk(nlp; verbose = 1, max_iter = 700) +push!(results, ("Trunk", stats_trunk)) + + +# 5. Print Summary Table +println("\n\n") +println("==============================================================") +println(" Benchmark Results ") +println("==============================================================") +@printf("%-20s %-15s %-10s %-15s\n", "Strategy", "Status", "Iter", "Final Obj") +println("--------------------------------------------------------------") + +for (name, st) in results + @printf("%-20s %-15s %-10d %-15.4e\n", + name, st.status, st.iter, st.objective) +end +println("==============================================================\n") + + +# ============================================================================== +# HSL Specific Unit Tests +# ============================================================================== + +if LIBHSL_isfunctional() + println("\nRunning HSL Unit Tests (MA97 & MA57)...") + + # Simple 2D Rosenbrock for unit testing + f_test(x) = (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2 + nlp_test = ADNLPModel(f_test, [-1.2; 1.0]) + + for (sub_name, sub_type) in [("MA97", MA97R2NSubsolver), ("MA57", MA57R2NSubsolver)] + println(" Testing $sub_name...") + + # Reset problem + stats_test = R2N( + nlp_test; + subsolver = sub_type, + verbose = 0, + max_iter = 50 + ) + + is_solved = stats_test.status == :first_order + is_accurate = isapprox(stats_test.solution, [1.0; 1.0], atol = 1e-6) + + status_msg = (is_solved && is_accurate) ? "✅ PASSED" : "❌ FAILED" + println(" -> $status_msg (Iter: $(stats_test.iter))") + end +else + @warn "HSL library is not functional. Skipping HSL unit tests." +end \ No newline at end of file diff --git a/my_R2N_tester/test_R2NLS.jl b/my_R2N_tester/test_R2NLS.jl new file mode 100644 index 00000000..85e68514 --- /dev/null +++ b/my_R2N_tester/test_R2NLS.jl @@ -0,0 +1,29 @@ +using JSOSolvers +using ADNLPModels +using SolverCore +using LinearAlgebra +using SparseArrays +using Printf +using QRMumps +using Krylov +using LinearOperators + +# 1. Define the Rosenbrock Problem +# Residual F(x) = [10(x2 - x1^2); 1 - x1] +# Minimum at [1, 1] +rosenbrock_f(x) = [10 * (x[2] - x[1]^2); 1 - x[1]] +nls = ADNLSModel(rosenbrock_f, [-1.2; 1.0], 2, name="Rosenbrock") + +println("Problem: $(nls.meta.name)") +println("Initial x: $(nls.meta.x0)") + +# 2. Run R2NLS with default settings (QRMumps subsolver) +println("\nRunning R2NLS...") +stats = R2NLS(nls,max_iter = 100 ,verbose=1) + + +########### Additional tests can be added here, e.g., with different subsolvers or on different problems + +stats = R2NLS(nls, subsolver=LSMRSubsolver, verbose=1, max_iter=100) + + \ No newline at end of file diff --git a/src/JSOSolvers.jl b/src/JSOSolvers.jl index 9c021e3d..a11ddd65 100644 --- a/src/JSOSolvers.jl +++ b/src/JSOSolvers.jl @@ -1,12 +1,14 @@ module JSOSolvers # stdlib -using LinearAlgebra, Logging, Printf +using LinearAlgebra, Logging, Printf, SparseArrays # JSO packages +using Arpack, TSVD, GenericLinearAlgebra using Krylov, LinearOperators, NLPModels, NLPModelsModifiers, SolverCore, SolverParameters, SolverTools + import SolverTools.reset! import SolverCore.solve! export default_callback_quasi_newton, solve! @@ -58,14 +60,20 @@ function default_callback_quasi_newton( end end end +# subsolver interface +include("r2n_subsolver_common.jl") +include("R2N_subsolvers.jl") +include("R2NLS_subsolvers.jl") # Unconstrained solvers include("lbfgs.jl") include("trunk.jl") include("fomo.jl") +include("R2N.jl") # Unconstrained solvers for NLS include("trunkls.jl") +include("R2NLS.jl") # List of keywords accepted by TRONTrustRegion const tron_keys = ( diff --git a/src/R2N.jl b/src/R2N.jl new file mode 100644 index 00000000..8cf2f8b8 --- /dev/null +++ b/src/R2N.jl @@ -0,0 +1,698 @@ +export R2N, R2NSolver, R2NParameterSet + +""" + R2NParameterSet([T=Float64]; θ1, θ2, η1, η2, γ1, γ2, γ3, σmin, non_mono_size) + +Parameter set for the R2N solver. Controls algorithmic tolerances and step acceptance. + +# Keyword Arguments +- `θ1 = T(0.5)`: Cauchy step parameter (0 < θ1 < 1). +- `θ2 = eps(T)^(-1)`: Maximum allowed ratio between the step and the Cauchy step (θ2 > 1). +- `η1 = eps(T)^(1/4)`: Accept step if actual/predicted reduction ≥ η1 (0 < η1 ≤ η2 < 1). +- `η2 = T(0.95)`: Step is very successful if reduction ≥ η2 (0 < η1 ≤ η2 < 1). +- `γ1 = T(1.5)`: Regularization increase factor on successful (but not very successful) step (1 < γ1 ≤ γ2). +- `γ2 = T(2.5)`: Regularization increase factor on rejected step (γ1 ≤ γ2). +- `γ3 = T(0.5)`: Regularization decrease factor on very successful step (0 < γ3 ≤ 1). +- `δ1 = T(0.5)`: Cauchy point calculation parameter. +- `σmin = eps(T)`: Minimum regularization parameter. +- `non_mono_size = 1`: Window size for non-monotone acceptance. +""" +struct R2NParameterSet{T} <: AbstractParameterSet + θ1::Parameter{T, RealInterval{T}} + θ2::Parameter{T, RealInterval{T}} + η1::Parameter{T, RealInterval{T}} + η2::Parameter{T, RealInterval{T}} + γ1::Parameter{T, RealInterval{T}} + γ2::Parameter{T, RealInterval{T}} + γ3::Parameter{T, RealInterval{T}} + δ1::Parameter{T, RealInterval{T}} + σmin::Parameter{T, RealInterval{T}} + non_mono_size::Parameter{Int, IntegerRange{Int}} + ls_c::Parameter{T, RealInterval{T}} + ls_increase::Parameter{T, RealInterval{T}} + ls_decrease::Parameter{T, RealInterval{T}} + ls_min_alpha::Parameter{T, RealInterval{T}} + ls_max_alpha::Parameter{T, RealInterval{T}} +end + +# Default parameter values +const R2N_θ1 = DefaultParameter(nlp -> eltype(nlp.meta.x0)(0.5), "T(0.5)") +const R2N_θ2 = DefaultParameter(nlp -> inv(eps(eltype(nlp.meta.x0))), "eps(T)^(-1)") +const R2N_η1 = DefaultParameter(nlp -> begin + T = eltype(nlp.meta.x0) + T(eps(T))^(T(1) / T(4)) +end, "eps(T)^(1/4)") +const R2N_η2 = DefaultParameter(nlp -> eltype(nlp.meta.x0)(0.95), "T(0.95)") +const R2N_γ1 = DefaultParameter(nlp -> eltype(nlp.meta.x0)(1.5), "T(1.5)") +const R2N_γ2 = DefaultParameter(nlp -> eltype(nlp.meta.x0)(2.5), "T(2.5)") +const R2N_γ3 = DefaultParameter(nlp -> eltype(nlp.meta.x0)(0.5), "T(0.5)") +const R2N_δ1 = DefaultParameter(nlp -> eltype(nlp.meta.x0)(0.5), "T(0.5)") +const R2N_σmin = DefaultParameter(nlp -> begin + T = eltype(nlp.meta.x0) + T(eps(T)) +end, "eps(T)") +const R2N_non_mono_size = DefaultParameter(1) +# Line search parameters +const R2N_ls_c = DefaultParameter(nlp -> eltype(nlp.meta.x0)(1e-4), "T(1e-4)") +const R2N_ls_increase = DefaultParameter(nlp -> eltype(nlp.meta.x0)(1.5), "T(1.5)") +const R2N_ls_decrease = DefaultParameter(nlp -> eltype(nlp.meta.x0)(0.5), "T(0.5)") +const R2N_ls_min_alpha = DefaultParameter(nlp -> eltype(nlp.meta.x0)(1e-8), "T(1e-8)") +const R2N_ls_max_alpha = DefaultParameter(nlp -> eltype(nlp.meta.x0)(1e2), "T(1e2)") + +function R2NParameterSet( + nlp::AbstractNLPModel; + θ1::T = get(R2N_θ1, nlp), + θ2::T = get(R2N_θ2, nlp), + η1::T = get(R2N_η1, nlp), + η2::T = get(R2N_η2, nlp), + γ1::T = get(R2N_γ1, nlp), + γ2::T = get(R2N_γ2, nlp), + γ3::T = get(R2N_γ3, nlp), + δ1::T = get(R2N_δ1, nlp), + σmin::T = get(R2N_σmin, nlp), + non_mono_size::Int = get(R2N_non_mono_size, nlp), + ls_c::T = get(R2N_ls_c, nlp), + ls_increase::T = get(R2N_ls_increase, nlp), + ls_decrease::T = get(R2N_ls_decrease, nlp), + ls_min_alpha::T = get(R2N_ls_min_alpha, nlp), + ls_max_alpha::T = get(R2N_ls_max_alpha, nlp), +) where {T} + @assert zero(T) < θ1 < one(T) "θ1 must satisfy 0 < θ1 < 1" + @assert θ2 > one(T) "θ2 must satisfy θ2 > 1" + @assert zero(T) < η1 <= η2 < one(T) "η1, η2 must satisfy 0 < η1 ≤ η2 < 1" + @assert one(T) < γ1 <= γ2 "γ1, γ2 must satisfy 1 < γ1 ≤ γ2" + @assert γ3 > zero(T) && γ3 <= one(T) "γ3 must satisfy 0 < γ3 ≤ 1" + @assert zero(T) < δ1 < one(T) "δ1 must satisfy 0 < δ1 < 1" + + R2NParameterSet{T}( + Parameter(θ1, RealInterval(zero(T), one(T), lower_open = true, upper_open = true)), + Parameter(θ2, RealInterval(one(T), T(Inf), lower_open = true, upper_open = true)), + Parameter(η1, RealInterval(zero(T), one(T), lower_open = false, upper_open = true)), + Parameter(η2, RealInterval(zero(T), one(T), lower_open = true, upper_open = true)), + Parameter(γ1, RealInterval(one(T), T(Inf), lower_open = true, upper_open = true)), + Parameter(γ2, RealInterval(one(T), T(Inf), lower_open = true, upper_open = true)), + Parameter(γ3, RealInterval(zero(T), one(T), lower_open = false, upper_open = true)), + Parameter(δ1, RealInterval(zero(T), one(T), lower_open = true, upper_open = true)), + Parameter(σmin, RealInterval(zero(T), T(Inf), lower_open = false, upper_open = true)), + Parameter(non_mono_size, IntegerRange(1, typemax(Int))), + Parameter(ls_c, RealInterval(zero(T), one(T), lower_open = true, upper_open = true)), + Parameter(ls_increase, RealInterval(one(T), T(Inf), lower_open = true, upper_open = true)), + Parameter(ls_decrease, RealInterval(zero(T), one(T), lower_open = true, upper_open = true)), + Parameter(ls_min_alpha, RealInterval(zero(T), T(Inf), lower_open = true, upper_open = true)), + Parameter(ls_max_alpha, RealInterval(zero(T), T(Inf), lower_open = true, upper_open = true)), + ) +end + +const npc_handler_allowed = [:ag, :sigma, :prev, :cp] + +""" + R2N(nlp; kwargs...) + +A second-order quadratic regularization method for unconstrained optimization (with shifted L-BFGS or shifted Hessian operator). + + min f(x) + +For advanced usage, first define a `R2NSolver` to preallocate the memory used in the algorithm, and then call `solve!`: + + solver = R2NSolver(nlp) + solve!(solver, nlp; kwargs...) + +# Arguments +- `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`. + +# Keyword arguments +- `params::R2NParameterSet = R2NParameterSet(nlp)`: algorithm parameters, see [`R2NParameterSet`](@ref). +- `η1::T = $(R2N_η1)`: step acceptance parameter, see [`R2NParameterSet`](@ref). +- `η2::T = $(R2N_η2)`: step acceptance parameter, see [`R2NParameterSet`](@ref). +- `θ1::T = $(R2N_θ1)`: Cauchy step parameter, see [`R2NParameterSet`](@ref). +- `θ2::T = $(R2N_θ2)`: Cauchy step parameter, see [`R2NParameterSet`](@ref). +- `γ1::T = $(R2N_γ1)`: regularization update parameter, see [`R2NParameterSet`](@ref). +- `γ2::T = $(R2N_γ2)`: regularization update parameter, see [`R2NParameterSet`](@ref). +- `γ3::T = $(R2N_γ3)`: regularization update parameter, see [`R2NParameterSet`](@ref). +- `δ1::T = $(R2N_δ1)`: Cauchy point calculation parameter, see [`R2NParameterSet`](@ref). +- `σmin::T = $(R2N_σmin)`: minimum step parameter, see [`R2NParameterSet`](@ref). +- `non_mono_size::Int = $(R2N_non_mono_size)`: the size of the non-monotone behaviour. If > 1, the algorithm will use a non-monotone strategy to accept steps. +- `x::V = nlp.meta.x0`: the initial guess. +- `atol::T = √eps(T)`: absolute tolerance. +- `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖. +- `max_eval::Int = -1`: maximum number of evaluations of the objective function. +- `max_time::Float64 = 30.0`: maximum time limit in seconds. +- `max_iter::Int = typemax(Int)`: maximum number of iterations. +- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. +- `subsolver = CGR2NSubsolver`: the subproblem solver type or instance. +- `subsolver_verbose::Int = 0`: if > 0, display iteration information every `subsolver_verbose` iteration of the subsolver if KrylovWorkspace type is selected. +- `callback`: function called at each iteration, see [`Callbacks`](https://jso.dev/JSOSolvers.jl/stable/#Callbacks) section. +- `callback_quasi_newton`: function called at each iteration, specifically to update the Hessian approximation of quasi-Newton models, see [`Callbacks`](https://jso.dev/JSOSolvers.jl/stable/#Callbacks) section. +- `scp_flag::Bool = true`: if true, we compare the norm of the calculate step with `θ2 * norm(scp)`, each iteration, selecting the smaller step. +- `always_accept_npc_ag::Bool = false`: if true, we skip the computation of the reduction ratio ρ for Goldstein steps taken along directions of negative curvature, unconditionally accepting them as very successful steps to aggressively escape saddle points. +- `fast_local_convergence::Bool = false`: if true, we scale the regularization parameter σ by the norm of the current gradient on very successful iterations (using `γ3 * min(σ, norm(gx))`), which accelerates local convergence near a minimizer. +- `npc_handler::Symbol = :ag`: the non_positive_curve handling strategy. + - `:ag`: run line-search along NPC with Armijo-Goldstein conditions. + - `:sigma`: increase the regularization parameter σ. + - `:prev`: if subsolver return after first iteration, increase the sigma, but if subsolver return after second iteration, set s_k = s_k^(t-1). + - `:cp`: set s_k to Cauchy point. + +# Output +The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. +- `callback`: function called at each iteration, see [`Callback`](https://jso.dev/JSOSolvers.jl/stable/#Callback) section. + +# Examples +```jldoctest; output = false +using JSOSolvers, ADNLPModels +nlp = ADNLPModel(x -> sum(x.^2), ones(3)) +stats = R2N(nlp) +# output +"Execution stats: first-order stationary" + +``` + +""" +mutable struct R2NSolver{T, V, Sub <: AbstractR2NSubsolver{T}, M <: AbstractNLPModel{T, V}} <: + AbstractOptimizationSolver + x::V # Current iterate x_k + xt::V # Trial iterate x_{k+1} + gx::V # Gradient ∇f(x) + rhs::V # RHS for subsolver (store -∇f) + Hs::V # Storage for H*s products + s::V # Step direction + scp::V # Cauchy point step + obj_vec::V # History of objective values for non-monotone strategy + subsolver::Sub # The subproblem solver + h::LineModel{T, V, M} # Line search model + subtol::T # Current tolerance for the subproblem + σ::T # Regularization parameter + params::R2NParameterSet{T} # Algorithmic parameters +end + +function R2NSolver( + nlp::AbstractNLPModel{T, V}; + η1 = get(R2N_η1, nlp), + η2 = get(R2N_η2, nlp), + θ1 = get(R2N_θ1, nlp), + θ2 = get(R2N_θ2, nlp), + γ1 = get(R2N_γ1, nlp), + γ2 = get(R2N_γ2, nlp), + γ3 = get(R2N_γ3, nlp), + δ1 = get(R2N_δ1, nlp), + σmin = get(R2N_σmin, nlp), + non_mono_size = get(R2N_non_mono_size, nlp), + subsolver::Union{Function, Type, AbstractR2NSubsolver} = CGR2NSubsolver(nlp), + ls_c = get(R2N_ls_c, nlp), + ls_increase = get(R2N_ls_increase, nlp), + ls_decrease = get(R2N_ls_decrease, nlp), + ls_min_alpha = get(R2N_ls_min_alpha, nlp), + ls_max_alpha = get(R2N_ls_max_alpha, nlp), +) where {T, V} + params = R2NParameterSet( + nlp; + η1 = η1, + η2 = η2, + θ1 = θ1, + θ2 = θ2, + γ1 = γ1, + γ2 = γ2, + γ3 = γ3, + δ1 = δ1, + σmin = σmin, + non_mono_size = non_mono_size, + ls_c = ls_c, + ls_increase = ls_increase, + ls_decrease = ls_decrease, + ls_min_alpha = ls_min_alpha, + ls_max_alpha = ls_max_alpha, + ) + + value(params.non_mono_size) >= 1 || error("non_mono_size must be greater than or equal to 1") + + nvar = nlp.meta.nvar + x = V(undef, nvar) + xt = V(undef, nvar) + gx = V(undef, nvar) + rhs = V(undef, nvar) + Hs = V(undef, nvar) + + x .= nlp.meta.x0 + + if subsolver isa ShiftedLBFGSSolver && !(nlp isa LBFGSModel) + error("ShiftedLBFGSSolver can only be used by LBFGSModel") + end + + σ = zero(T) + s = V(undef, nvar) + scp = V(undef, nvar) + subtol = one(T) + obj_vec = fill(typemin(T), non_mono_size) + + h = LineModel(nlp, x, s) + + return R2NSolver{T, V, typeof(subsolver), typeof(nlp)}( + x, + xt, + gx, + rhs, + Hs, + s, + scp, + obj_vec, + subsolver, + h, + subtol, + σ, + params, + ) +end + +function SolverCore.reset!(solver::R2NSolver{T}) where {T} + fill!(solver.obj_vec, typemin(T)) + return solver +end + +function SolverCore.reset!(solver::R2NSolver{T}, nlp::AbstractNLPModel) where {T} + fill!(solver.obj_vec, typemin(T)) + solver.h = LineModel(nlp, solver.x, solver.s) + return solver +end + +@doc (@doc R2NSolver) function R2N( + nlp::AbstractNLPModel{T, V}; + η1::Real = get(R2N_η1, nlp), + η2::Real = get(R2N_η2, nlp), + θ1::Real = get(R2N_θ1, nlp), + θ2::Real = get(R2N_θ2, nlp), + γ1::Real = get(R2N_γ1, nlp), + γ2::Real = get(R2N_γ2, nlp), + γ3::Real = get(R2N_γ3, nlp), + δ1::Real = get(R2N_δ1, nlp), + σmin::Real = get(R2N_σmin, nlp), + non_mono_size::Int = get(R2N_non_mono_size, nlp), + subsolver::Union{Function, Type, AbstractR2NSubsolver} = CGR2NSubsolver(nlp), + ls_c::Real = get(R2N_ls_c, nlp), + ls_increase::Real = get(R2N_ls_increase, nlp), + ls_decrease::Real = get(R2N_ls_decrease, nlp), + ls_min_alpha::Real = get(R2N_ls_min_alpha, nlp), + ls_max_alpha::Real = get(R2N_ls_max_alpha, nlp), + kwargs..., +) where {T, V} + sub_instance = (subsolver isa Type || subsolver isa Function) ? subsolver(nlp) : subsolver + solver = R2NSolver( + nlp; + η1 = convert(T, η1), + η2 = convert(T, η2), + θ1 = convert(T, θ1), + θ2 = convert(T, θ2), + γ1 = convert(T, γ1), + γ2 = convert(T, γ2), + γ3 = convert(T, γ3), + δ1 = convert(T, δ1), + σmin = convert(T, σmin), + non_mono_size = non_mono_size, + subsolver = sub_instance, + ls_c = convert(T, ls_c), + ls_increase = convert(T, ls_increase), + ls_decrease = convert(T, ls_decrease), + ls_min_alpha = convert(T, ls_min_alpha), + ls_max_alpha = convert(T, ls_max_alpha), + ) + return solve!(solver, nlp; kwargs...) +end + +function SolverCore.solve!( + solver::R2NSolver{T, V}, + nlp::AbstractNLPModel{T, V}, + stats::GenericExecutionStats{T, V}; + callback = (args...) -> nothing, + callback_quasi_newton = default_callback_quasi_newton, + x::V = nlp.meta.x0, + atol::T = √eps(T), + rtol::T = √eps(T), + max_time::Float64 = 30.0, + max_eval::Int = -1, + max_iter::Int = typemax(Int), + verbose::Int = 0, + subsolver_verbose::Int = 0, + npc_handler::Symbol = :ag, + scp_flag::Bool = true, + always_accept_npc_ag::Bool = false, + fast_local_convergence::Bool = false, +) where {T, V} + unconstrained(nlp) || error("R2N should only be called on unconstrained problems.") + npc_handler in npc_handler_allowed || error("npc_handler must be one of $(npc_handler_allowed)") + + SolverCore.reset!(stats) + params = solver.params + η1 = value(params.η1) + η2 = value(params.η2) + θ1 = value(params.θ1) + θ2 = value(params.θ2) + γ1 = value(params.γ1) + γ2 = value(params.γ2) + γ3 = value(params.γ3) + δ1 = value(params.δ1) + σmin = value(params.σmin) + non_mono_size = value(params.non_mono_size) + + ls_c = value(params.ls_c) + ls_increase = value(params.ls_increase) + ls_decrease = value(params.ls_decrease) + + start_time = time() + set_time!(stats, 0.0) + + n = nlp.meta.nvar + x = solver.x .= x + xt = solver.xt + ∇fk = solver.gx # current gradient + rhs = solver.rhs # -∇f for subsolver + s = solver.s + scp = solver.scp + Hs = solver.Hs + σk = solver.σ + + subtol = solver.subtol + + initialize!(solver.subsolver, nlp, x) + H = get_operator(solver.subsolver) + + set_iter!(stats, 0) + f0 = obj(nlp, x) + set_objective!(stats, f0) + + grad!(nlp, x, ∇fk) + norm_∇fk = norm(∇fk) + set_dual_residual!(stats, norm_∇fk) + + σk = 2^round(log2(norm_∇fk + 1)) / norm_∇fk + ρk = zero(T) + + fmin = min(-one(T), f0) / eps(T) + unbounded = f0 < fmin + + ϵ = atol + rtol * norm_∇fk + optimal = norm_∇fk ≤ ϵ + + if optimal + @info "Optimal point found at initial point" + @info log_header( + [:iter, :f, :dual, :σ, :ρ], + [Int, Float64, Float64, Float64, Float64], + hdr_override = Dict(:f => "f(x)", :dual => "‖∇f‖"), + ) + @info log_row([stats.iter, stats.objective, norm_∇fk, σk, ρk]) + end + + if verbose > 0 && mod(stats.iter, verbose) == 0 + @info log_header( + [:iter, :f, :dual, :norm_s, :σ, :ρ, :sub_iter, :dir, :sub_status], + [Int, Float64, Float64, Float64, Float64, Int, String, String], + hdr_override = Dict( + :f => "f(x)", + :dual => "‖∇f‖", + :norm_s => "‖s‖", + :sub_iter => "subiter", + :dir => "dir", + :sub_status => "status", + ), + ) + @info log_row([stats.iter, stats.objective, norm_∇fk, 0.0, σk, ρk, 0, " ", " "]) + end + + set_status!( + stats, + get_status( + nlp, + elapsed_time = stats.elapsed_time, + optimal = optimal, + unbounded = unbounded, + max_eval = max_eval, + iter = stats.iter, + max_iter = max_iter, + max_time = max_time, + ), + ) + + subtol = max(rtol, min(T(0.1), √norm_∇fk, T(0.9) * subtol)) + solver.σ = σk + solver.subtol = subtol + + if solver.subsolver isa ShiftedLBFGSSolver + scp_flag = false + end + + callback_quasi_newton(nlp, solver, stats) + callback(nlp, solver, stats) + subtol = solver.subtol + σk = solver.σ + + done = stats.status != :unknown + ν_k = one(T) + γ_k = zero(T) + ft = f0 + + # Initialize scope variables + step_accepted = false + sub_stats = :unknown + subiter = 0 + dir_stat = "" + is_npc_ag_step = false # Determine if we took an NPC Goldstein step this iteration + + while !done + npcCount = 0 + fck_computed = false + + # Prepare RHS for subsolver (rhs = -∇f) + @. rhs = -∇fk + + subsolver_solved, sub_stats, subiter, npcCount = + solver.subsolver(s, rhs, σk, T(0.0), subtol, n; verbose = subsolver_verbose) # TODO atol = 0 when subsolver + + if !subsolver_solved && npcCount == 0 + @warn "Subsolver failed to solve the system. Terminating." + set_status!(stats, :stalled) + done = true + break + end + + calc_scp_needed = false + force_sigma_increase = false + if solver.subsolver isa HSLR2NSubsolver + num_neg, num_zero = get_inertia(solver.subsolver) + + if num_zero > 0 + force_sigma_increase = true + end + + if !force_sigma_increase && num_neg > 0 + mul!(Hs, H, s) + curv_s = dot(s, Hs) + + if curv_s < 0 + npcCount = 1 + if npc_handler == :prev + npc_handler = :ag #Force the npc_handler to be ag and not :prev since we can not have that behavior with HSL subsolver + end + else + calc_scp_needed = true + end + end + end + + if !(solver.subsolver isa ShiftedLBFGSSolver) && npcCount >= 1 + if npc_handler == :ag + is_npc_ag_step = true + npcCount = 0 + # If it's HSL, `s` is already our NPC direction + dir = solver.subsolver isa HSLR2NSubsolver ? s : get_npc_direction(solver.subsolver) + + # Ensure line search model points to current x and dir + SolverTools.redirect!(solver.h, x, dir) + f0_val = stats.objective + dot_ag = dot(∇fk, dir) # dot_ag = ∇f^T * d + + α, ft, nbk, nbG = armijo_goldstein( + solver.h, + f0_val, + dot_ag; + t = one(T), + τ₀ = ls_c, + τ₁ = 1 - ls_c, + γ₀ = ls_decrease, + γ₁ = ls_increase, + bk_max = 100, + bG_max = 100, + verbose = false#(verbose > 0), #TODO False? + ) + @. s = α * dir + fck_computed = true + elseif npc_handler == :prev + npcCount = 0 + # s is already populated by solver.subsolver + end + end + + # Compute Cauchy step + if scp_flag == true || npc_handler == :cp || calc_scp_needed + mul!(Hs, H, ∇fk) + dot_gHg = dot(∇fk, Hs) + σ_norm_g2 = σk * norm_∇fk^2 + γ_k = (dot_gHg + σ_norm_g2) / norm_∇fk^2 + + if γ_k > 0 + ν_k = 2*(1-δ1) / (γ_k) + else + λmax = get_operator_norm(solver.subsolver) + ν_k = θ1 / (λmax + σk) + end + + # scp = - ν_k * ∇f + @. scp = -ν_k * ∇fk + + if npc_handler == :cp && npcCount >= 1 + npcCount = 0 + s .= scp + fck_computed = false + elseif norm(s) > θ2 * norm(scp) + s .= scp + fck_computed = false + end + end + + + + + + if force_sigma_increase || (npc_handler == :sigma && npcCount >= 1) + step_accepted = false + σk = γ2 * σk + solver.σ = σk + npcCount = 0 + else + # 1. Determine if we took an NPC Armijo-Goldstein step this iteration + if is_npc_ag_step && always_accept_npc_ag + is_npc_ag_step = false # reset + ρk = η1 + @. xt = x + s + + # Safety Check: If the Cauchy step logic overwrote `s`, we MUST re-evaluate ft! + if !fck_computed + ft = obj(nlp, xt) + end + + step_accepted = true + + # 2. Standard Model Predicted Reduction + else + mul!(Hs, H, s) + dot_sHs = dot(s, Hs) # s' (H + σI) s + dot_ag = dot(s, ∇fk) # ∇f' s + + # Predicted Reduction: m(0) - m(s) = -g's - 0.5 s'Bs + ΔTk = -dot_ag - dot_sHs / 2 + + if ΔTk <= eps(T) * max(one(T), abs(stats.objective)) + step_accepted = false + else + @. xt = x + s + if !fck_computed + ft = obj(nlp, xt) + end + + if non_mono_size > 1 + k = mod(stats.iter, non_mono_size) + 1 + solver.obj_vec[k] = stats.objective + ft_max = maximum(solver.obj_vec) + ρk = (ft_max - ft) / (ft_max - stats.objective + ΔTk) + else + ρk = (stats.objective - ft) / ΔTk + end + step_accepted = ρk >= η1 + end + end + + # 3. Process the Step + if step_accepted + x .= xt + grad!(nlp, x, ∇fk) + + if !(solver.subsolver isa ShiftedLBFGSSolver) + update_subsolver!(solver.subsolver, nlp, x) + H = get_operator(solver.subsolver) + end + + set_objective!(stats, ft) + unbounded = ft < fmin + norm_∇fk = norm(∇fk) + + # Update Sigma on Success + if ρk >= η2 + if fast_local_convergence + σk = γ3 * min(σk, norm_∇fk) + else + σk = γ3 * σk + end + else + σk = γ1 * σk + end + + # Update Sigma on Rejection + else + σk = γ2 * σk + end + end + + set_step_status!(stats, step_accepted ? :accepted : :rejected) + + set_iter!(stats, stats.iter + 1) + set_time!(stats, time() - start_time) + + subtol = max(√eps(T), min(T(0.1), √norm_∇fk, T(0.9) * subtol)) # TODO + set_dual_residual!(stats, norm_∇fk) + + solver.σ = σk + solver.subtol = subtol + + callback_quasi_newton(nlp, solver, stats) + callback(nlp, solver, stats) + + norm_∇fk = stats.dual_feas + σk = solver.σ + subtol = solver.subtol + optimal = norm_∇fk ≤ ϵ + + if verbose > 0 && mod(stats.iter, verbose) == 0 + dir_stat = step_accepted ? "↘" : "↗" + @info log_row([ + stats.iter, + stats.objective, + norm_∇fk, + norm(s), + σk, + ρk, + subiter, + dir_stat, + sub_stats, + ]) + end + + if stats.status == :user + done = true + else + set_status!( + stats, + get_status( + nlp, + elapsed_time = stats.elapsed_time, + optimal = optimal, + unbounded = unbounded, + max_eval = max_eval, + iter = stats.iter, + max_iter = max_iter, + max_time = max_time, + ), + ) + done = stats.status != :unknown + end + end + + set_solution!(stats, x) + return stats +end diff --git a/src/R2NLS.jl b/src/R2NLS.jl new file mode 100644 index 00000000..61e9efcb --- /dev/null +++ b/src/R2NLS.jl @@ -0,0 +1,522 @@ +export R2NLS, R2NLSSolver, R2NLSParameterSet + +""" + R2NLSParameterSet([T=Float64]; η1, η2, θ1, θ2, γ1, γ2, γ3, δ1, σmin, non_mono_size) + +Parameter set for the R2NLS solver. Controls algorithmic tolerances and step acceptance. + +# Keyword Arguments +- `η1 = eps(T)^(1/4)`: Accept step if actual/predicted reduction ≥ η1 (0 < η1 ≤ η2 < 1). +- `η2 = T(0.95)`: Step is very successful if reduction ≥ η2 (0 < η1 ≤ η2 < 1). +- `θ1 = T(0.5)`: Controls Cauchy step size (0 < θ1 < 1). +- `θ2 = eps(T)^(-1)`: Maximum allowed ratio between the step and the Cauchy step (θ2 > 1). +- `γ1 = T(1.5)`: Regularization increase factor on successful (but not very successful) step (1 < γ1 ≤ γ2). +- `γ2 = T(2.5)`: Regularization increase factor on rejected step (γ1 ≤ γ2). +- `γ3 = T(0.5)`: Regularization increase factor on very successful step (0 < γ3 ≤ 1). +- `δ1 = T(0.5)`: Cauchy point scaling (0 < δ1 < 1). θ1 scales the step size when using the exact Cauchy point, while δ1 scales the step size inexact Cauchy point. +- `σmin = eps(T)`: Smallest allowed regularization. +- `non_mono_size = 1`: Window size for non-monotone acceptance. +- `compute_cauchy_point = false`: Whether to compute the Cauchy point. +- `inexact_cauchy_point = true`: Whether to use an inexact Cauchy point. +""" +struct R2NLSParameterSet{T} <: AbstractParameterSet + η1::Parameter{T, RealInterval{T}} + η2::Parameter{T, RealInterval{T}} + θ1::Parameter{T, RealInterval{T}} + θ2::Parameter{T, RealInterval{T}} + γ1::Parameter{T, RealInterval{T}} + γ2::Parameter{T, RealInterval{T}} + γ3::Parameter{T, RealInterval{T}} + δ1::Parameter{T, RealInterval{T}} + σmin::Parameter{T, RealInterval{T}} + non_mono_size::Parameter{Int, IntegerRange{Int}} + compute_cauchy_point::Parameter{Bool, BinaryRange{Bool}} + inexact_cauchy_point::Parameter{Bool, BinaryRange{Bool}} +end + +# Default parameter values +const R2NLS_η1 = DefaultParameter(nls -> begin + T = eltype(nls.meta.x0) + T(eps(T))^(T(1)/T(4)) +end, "eps(T)^(1/4)") +const R2NLS_η2 = DefaultParameter(nls -> eltype(nls.meta.x0)(0.95), "T(0.95)") +const R2NLS_θ1 = DefaultParameter(nls -> eltype(nls.meta.x0)(0.5), "T(0.5)") +const R2NLS_θ2 = DefaultParameter(nls -> inv(eps(eltype(nls.meta.x0))), "eps(T)^(-1)") +const R2NLS_γ1 = DefaultParameter(nls -> eltype(nls.meta.x0)(1.5), "T(1.5)") +const R2NLS_γ2 = DefaultParameter(nls -> eltype(nls.meta.x0)(2.5), "T(2.5)") +const R2NLS_γ3 = DefaultParameter(nls -> eltype(nls.meta.x0)(0.5), "T(0.5)") +const R2NLS_δ1 = DefaultParameter(nls -> eltype(nls.meta.x0)(0.5), "T(0.5)") +const R2NLS_σmin = DefaultParameter(nls -> eps(eltype(nls.meta.x0)), "eps(T)") +const R2NLS_non_mono_size = DefaultParameter(1) +const R2NLS_compute_cauchy_point = DefaultParameter(false) +const R2NLS_inexact_cauchy_point = DefaultParameter(true) + +function R2NLSParameterSet( + nls::AbstractNLSModel; + η1::T = get(R2NLS_η1, nls), + η2::T = get(R2NLS_η2, nls), + θ1::T = get(R2NLS_θ1, nls), + θ2::T = get(R2NLS_θ2, nls), + γ1::T = get(R2NLS_γ1, nls), + γ2::T = get(R2NLS_γ2, nls), + γ3::T = get(R2NLS_γ3, nls), + δ1::T = get(R2NLS_δ1, nls), + σmin::T = get(R2NLS_σmin, nls), + non_mono_size::Int = get(R2NLS_non_mono_size, nls), + compute_cauchy_point::Bool = get(R2NLS_compute_cauchy_point, nls), + inexact_cauchy_point::Bool = get(R2NLS_inexact_cauchy_point, nls), +) where {T} + @assert zero(T) < θ1 < one(T) "θ1 must satisfy 0 < θ1 < 1" + @assert θ2 > one(T) "θ2 must satisfy θ2 > 1" + @assert zero(T) < η1 <= η2 < one(T) "η1, η2 must satisfy 0 < η1 ≤ η2 < 1" + @assert one(T) < γ1 <= γ2 "γ1, γ2 must satisfy 1 < γ1 ≤ γ2" + @assert γ3 > zero(T) && γ3 <= one(T) "γ3 must satisfy 0 < γ3 ≤ 1" + @assert zero(T) < δ1 < one(T) "δ1 must satisfy 0 < δ1 < 1" + @assert θ1 <= 2(one(T) - δ1) "θ1 must be ≤ 2(1 - δ1) to ensure sufficient decrease condition is compatible with Cauchy point scaling" + + R2NLSParameterSet{T}( + Parameter(η1, RealInterval(zero(T), one(T), lower_open = true, upper_open = true)), + Parameter(η2, RealInterval(zero(T), one(T), lower_open = true, upper_open = true)), + Parameter(θ1, RealInterval(zero(T), one(T), lower_open = true, upper_open = true)), + Parameter(θ2, RealInterval(one(T), T(Inf), lower_open = true, upper_open = true)), + Parameter(γ1, RealInterval(one(T), T(Inf), lower_open = true, upper_open = true)), + Parameter(γ2, RealInterval(one(T), T(Inf), lower_open = true, upper_open = true)), + Parameter(γ3, RealInterval(zero(T), one(T), lower_open = true, upper_open = true)), + Parameter(δ1, RealInterval(zero(T), one(T), lower_open = true, upper_open = true)), + Parameter(σmin, RealInterval(zero(T), T(Inf), lower_open = true, upper_open = true)), + Parameter(non_mono_size, IntegerRange(1, typemax(Int))), + Parameter(compute_cauchy_point, BinaryRange()), + Parameter(inexact_cauchy_point, BinaryRange()), + ) +end + +""" + R2NLS(nls; kwargs...) + +An implementation of the Levenberg-Marquardt method with regularization for nonlinear least-squares problems: + + min ½‖F(x)‖² + +where `F: ℝⁿ → ℝᵐ` is a vector-valued function defining the least-squares residuals. + +For advanced usage, first create a `R2NLSSolver` to preallocate the necessary memory for the algorithm, and then call `solve!`: + + solver = R2NLSSolver(nls) + solve!(solver, nls; kwargs...) + +# Arguments + +- `nls::AbstractNLSModel{T, V}` is the nonlinear least-squares model to solve. See `NLPModels.jl` for additional details. + +# Keyword Arguments + +- `x::V = nls.meta.x0`: the initial guess. +- `atol::T = √eps(T)`: absolute stopping tolerance. +- `rtol::T = √eps(T)`: relative stopping tolerance; the algorithm stops when ‖J(x)ᵀF(x)‖ ≤ atol + rtol * ‖J(x₀)ᵀF(x₀)‖. +- `Fatol::T = √eps(T)`: absolute tolerance for the residual. +- `Frtol::T = eps(T)`: relative tolerance for the residual; the algorithm stops when ‖F(x)‖ ≤ Fatol + Frtol * ‖F(x₀)‖. +- `params::R2NLSParameterSet = R2NLSParameterSet()`: algorithm parameters, see [`R2NLSParameterSet`](@ref). +- `η1::T = $(R2NLS_η1)`: step acceptance parameter, see [`R2NLSParameterSet`](@ref). +- `η2::T = $(R2NLS_η2)`: step acceptance parameter, see [`R2NLSParameterSet`](@ref). +- `θ1::T = $(R2NLS_θ1)`: Cauchy step parameter, see [`R2NLSParameterSet`](@ref). +- `θ2::T = $(R2NLS_θ2)`: Cauchy step parameter, see [`R2NLSParameterSet`](@ref). +- `γ1::T = $(R2NLS_γ1)`: regularization update parameter, see [`R2NLSParameterSet`](@ref). +- `γ2::T = $(R2NLS_γ2)`: regularization update parameter, see [`R2NLSParameterSet`](@ref). +- `γ3::T = $(R2NLS_γ3)`: regularization update parameter, see [`R2NLSParameterSet`](@ref). +- `δ1::T = $(R2NLS_δ1)`: Cauchy point calculation parameter, see [`R2NLSParameterSet`](@ref). +- `σmin::T = $(R2NLS_σmin)`: minimum step parameter, see [`R2NLSParameterSet`](@ref). +- `non_mono_size::Int = $(R2NLS_non_mono_size)`: the size of the non-monotone history. If > 1, the algorithm will use a non-monotone strategy to accept steps. +- `compute_cauchy_point::Bool = false`: if true, safeguards the step size by reverting to the Cauchy point `scp` if the calculated step `s` is too large relative to the Cauchy step (i.e., if `‖s‖ > θ2 * ‖scp‖`). +- `inexact_cauchy_point::Bool = true`: if true and `compute_cauchy_point` is true, the Cauchy point is calculated using a computationally cheaper inexact formula; otherwise, it is calculated using the operator norm of the Jacobian. +- `subsolver = QRMumpsSubsolver`: the subproblem solver type or instance. Pass a type (e.g., `QRMumpsSubsolver`, `LSMRSubsolver`, `LSQRSubsolver`, `CGLSSubsolver`) to let the solver instantiate it, or pass a pre-allocated instance of `AbstractR2NLSSubsolver`. +- `subsolver_verbose::Int = 0`: if > 0, display subsolver iteration details every `subsolver_verbose` iterations (only applicable for iterative subsolvers). +- `max_eval::Int = -1`: maximum number of objective function evaluations. +- `max_time::Float64 = 30.0`: maximum allowed time in seconds. +- `max_iter::Int = typemax(Int)`: maximum number of iterations. +- `verbose::Int = 0`: if > 0, displays iteration details every `verbose` iterations. + +# Output + +Returns a `GenericExecutionStats` object containing statistics and information about the optimization process (see `SolverCore.jl`). + +- `callback`: function called at each iteration, see [`Callback`](https://jso.dev/JSOSolvers.jl/stable/#Callback) section. + +# Examples + +```jldoctest +using JSOSolvers, ADNLPModels +F(x) = [x[1] - 1; 2 * (x[2] - x[1]^2)] +model = ADNLSModel(F, [-1.2; 1.0], 2) +solver = R2NLSSolver(model) +stats = solve!(solver, model) +# output +"Execution stats: first-order stationary" +``` +""" + +mutable struct R2NLSSolver{T, V, Sub <: AbstractR2NLSSubsolver{T}} <: AbstractOptimizationSolver + x::V # Current iterate x_k + xt::V # Trial iterate x_{k+1} + gx::V # Gradient of the objective function: J' * F(x) + r::V # Residual vector F(x) + rt::V # Residual vector at trial point F(xt) + temp::V # Temporary vector for intermediate calculations (e.g. J*v) + subsolver::Sub # The solver for the linear least-squares subproblem + obj_vec::V # History of objective values for non-monotone strategy + subtol::T # Current tolerance for the subproblem solver + s::V # The calculated step direction + scp::V # The Cauchy point step + σ::T # Regularization parameter (Levenberg-Marquardt parameter) + params::R2NLSParameterSet{T} # Algorithmic parameters +end + +function R2NLSSolver( + nls::AbstractNLSModel{T, V}; + subsolver::Union{Function, Type, AbstractR2NLSSubsolver{T}} = QRMumpsSubsolver(nls), # Default is an INSTANCE + η1::T = get(R2NLS_η1, nls), + η2::T = get(R2NLS_η2, nls), + θ1::T = get(R2NLS_θ1, nls), + θ2::T = get(R2NLS_θ2, nls), + γ1::T = get(R2NLS_γ1, nls), + γ2::T = get(R2NLS_γ2, nls), + γ3::T = get(R2NLS_γ3, nls), + δ1::T = get(R2NLS_δ1, nls), + σmin::T = get(R2NLS_σmin, nls), + non_mono_size::Int = get(R2NLS_non_mono_size, nls), + compute_cauchy_point::Bool = get(R2NLS_compute_cauchy_point, nls), + inexact_cauchy_point::Bool = get(R2NLS_inexact_cauchy_point, nls), +) where {T, V} + params = R2NLSParameterSet( + nls; + η1 = η1, + η2 = η2, + θ1 = θ1, + θ2 = θ2, + γ1 = γ1, + γ2 = γ2, + γ3 = γ3, + δ1 = δ1, + σmin = σmin, + non_mono_size = non_mono_size, + compute_cauchy_point = compute_cauchy_point, + inexact_cauchy_point = inexact_cauchy_point, + ) + + value(params.non_mono_size) >= 1 || error("non_mono_size must be greater than or equal to 1") + + nvar = nls.meta.nvar + nequ = nls.nls_meta.nequ + x = V(undef, nvar) + xt = V(undef, nvar) + gx = V(undef, nvar) + r = V(undef, nequ) + rt = V(undef, nequ) + temp = V(undef, nequ) + s = V(undef, nvar) + scp = V(undef, nvar) + obj_vec = fill(typemin(T), value(params.non_mono_size)) + + x .= nls.meta.x0 + + # We pass the subsolver instance directly into the struct. No if/else checks needed! + R2NLSSolver(x, xt, gx, r, rt, temp, subsolver, obj_vec, one(T), s, scp, eps(T)^(1/5), params) +end + +function SolverCore.reset!(solver::R2NLSSolver{T}) where {T} + fill!(solver.obj_vec, typemin(T)) + solver.σ = eps(T)^(1 / 5) + solver.subtol = one(T) + solver +end + +function SolverCore.reset!(solver::R2NLSSolver{T}, nls::AbstractNLSModel) where {T} + fill!(solver.obj_vec, typemin(T)) + solver.σ = eps(T)^(1 / 5) + solver.subtol = one(T) + solver +end + +@doc (@doc R2NLSSolver) function R2NLS( + nls::AbstractNLSModel{T, V}; + η1::Real = get(R2NLS_η1, nls), + η2::Real = get(R2NLS_η2, nls), + θ1::Real = get(R2NLS_θ1, nls), + θ2::Real = get(R2NLS_θ2, nls), + γ1::Real = get(R2NLS_γ1, nls), + γ2::Real = get(R2NLS_γ2, nls), + γ3::Real = get(R2NLS_γ3, nls), + δ1::Real = get(R2NLS_δ1, nls), + σmin::Real = get(R2NLS_σmin, nls), + non_mono_size::Int = get(R2NLS_non_mono_size, nls), + compute_cauchy_point::Bool = get(R2NLS_compute_cauchy_point, nls), + inexact_cauchy_point::Bool = get(R2NLS_inexact_cauchy_point, nls), + subsolver::Union{Function, Type, AbstractR2NLSSubsolver} = QRMumpsSubsolver(nls), + kwargs..., +) where {T, V} + sub_instance = (subsolver isa Type || subsolver isa Function) ? subsolver(nls) : subsolver + solver = R2NLSSolver( + nls; + η1 = convert(T, η1), + η2 = convert(T, η2), + θ1 = convert(T, θ1), + θ2 = convert(T, θ2), + γ1 = convert(T, γ1), + γ2 = convert(T, γ2), + γ3 = convert(T, γ3), + δ1 = convert(T, δ1), + σmin = convert(T, σmin), + non_mono_size = non_mono_size, + compute_cauchy_point = compute_cauchy_point, + inexact_cauchy_point = inexact_cauchy_point, + subsolver = sub_instance, + ) + return solve!(solver, nls; kwargs...) +end + +function SolverCore.solve!( + solver::R2NLSSolver{T, V}, + nls::AbstractNLSModel{T, V}, + stats::GenericExecutionStats{T, V}; + callback = (args...) -> nothing, + x::V = nls.meta.x0, # user can reset the initial point here, but it will also be reset in the solver + atol::T = √eps(T), + rtol::T = √eps(T), + Fatol::T = √eps(T), + Frtol::T = eps(T), + max_time::Float64 = 30.0, + max_eval::Int = -1, + max_iter::Int = typemax(Int), + verbose::Int = 0, + scp_flag::Bool = true, + subsolver_verbose::Int = 0, +) where {T, V} + unconstrained(nls) || error("R2NLS should only be called on unconstrained problems.") + if !(nls.meta.minimize) + error("R2NLS only works for minimization problem") + end + + SolverCore.reset!(stats) + params = solver.params + η1 = value(params.η1) + η2 = value(params.η2) + θ1 = value(params.θ1) + θ2 = value(params.θ2) + γ1 = value(params.γ1) + γ2 = value(params.γ2) + γ3 = value(params.γ3) + δ1 = value(params.δ1) + σmin = value(params.σmin) + non_mono_size = value(params.non_mono_size) + + start_time = time() + set_time!(stats, 0.0) + + n = nls.nls_meta.nvar + m = nls.nls_meta.nequ + + x = solver.x .= x + xt = solver.xt + r, rt = solver.r, solver.rt + s = solver.s + scp = solver.scp + ∇f = solver.gx + + # Ensure subsolver is up to date with initial x + initialize!(solver.subsolver, nls, x) + + # Get accessor for Jacobian (abstracted away from solver details) + Jx = get_jacobian(solver.subsolver) + + # Initial Eval + residual!(nls, x, r) + resid_norm = norm(r) + f = resid_norm^2 / 2 + mul!(∇f, Jx', r) + norm_∇fk = norm(∇f) + + # Heuristic for initial σ + + solver.σ = 2^round(log2(norm_∇fk + 1)) / norm_∇fk + + # Stopping criterion: + unbounded = false + ρk = zero(T) + + ϵ = atol + rtol * norm_∇fk + ϵF = Fatol + Frtol * resid_norm + + temp = solver.temp + + stationary = norm_∇fk ≤ ϵ + small_residual = resid_norm ≤ ϵF + + set_iter!(stats, 0) + set_objective!(stats, f) + set_dual_residual!(stats, norm_∇fk) + + if stationary + @info "Stationary point found at initial point" + @info log_header( + [:iter, :resid_norm, :dual, :σ, :ρ], + [Int, Float64, Float64, Float64, Float64], + hdr_override = Dict(:resid_norm => "‖F(x)‖", :dual => "‖∇f‖"), + ) + @info log_row([stats.iter, resid_norm, norm_∇fk, solver.σ, ρk]) + end + + if verbose > 0 && mod(stats.iter, verbose) == 0 + @info log_header( + [:iter, :resid_norm, :dual, :σ, :ρ, :sub_iter, :dir, :sub_status], + [Int, Float64, Float64, Float64, Float64, Int, String, String], + hdr_override = Dict( + :resid_norm => "‖F(x)‖", + :dual => "‖∇f‖", + :sub_iter => "sub_iter", + :dir => "dir", + :sub_status => "status", + ), + ) + @info log_row([stats.iter, stats.objective, norm_∇fk, solver.σ, ρk, 0, " ", " "]) + end + + set_status!( + stats, + get_status( + nls, + elapsed_time = stats.elapsed_time, + optimal = stationary, + unbounded = unbounded, + max_eval = max_eval, + iter = stats.iter, + small_residual = small_residual, + max_iter = max_iter, + max_time = max_time, + ), + ) + + solver.subtol = max(rtol, min(T(0.1), √norm_∇fk, T(0.9) * solver.subtol)) + + callback(nls, solver, stats) + + # retrieve values again in case the user changed them in the callback + + done = stats.status != :unknown + compute_cauchy_point = value(params.compute_cauchy_point) + inexact_cauchy_point = value(params.inexact_cauchy_point) + + while !done + + # 1. Solve Subproblem + # We pass -r as RHS. Subsolver handles its own temp/workspace for this. + @. temp = -r + + sub_solved, sub_stats, sub_iter = + solver.subsolver(s, temp, solver.σ, atol, solver.subtol, verbose = subsolver_verbose) + + # 2. Cauchy Point + if compute_cauchy_point + if inexact_cauchy_point + mul!(temp, Jx, ∇f) + curvature_gn = dot(temp, temp) + γ_k = curvature_gn / norm_∇fk^2 + solver.σ + ν_k = 2 * (1 - δ1) / γ_k + else + λmax = get_operator_norm(solver.subsolver) + ν_k = θ1 / (λmax + solver.σ) + end + + @. scp = -ν_k * ∇f + if norm(s) > θ2 * norm(scp) + s .= scp + end + end + + # 3. Acceptance + xt .= x .+ s + mul!(temp, Jx, s) + @. temp += r + pred_f = norm(temp)^2 / 2 + ΔTk = stats.objective - pred_f + + residual!(nls, xt, rt) + resid_norm_t = norm(rt) + ft = resid_norm_t^2 / 2 + + if non_mono_size > 1 + k = mod(stats.iter, non_mono_size) + 1 + solver.obj_vec[k] = stats.objective + ft_max = maximum(solver.obj_vec) + ρk = (ft_max - ft) / (ft_max - stats.objective + ΔTk) + else + ρk = (stats.objective - ft) / ΔTk + end + + # 4. Update regularization parameters and determine acceptance of the new candidate + step_accepted = ρk >= η1 + + if step_accepted # Step Accepted + x .= xt + r .= rt + f = ft + + # Update Subsolver Jacobian + update_subsolver!(solver.subsolver, nls, x) + + resid_norm = resid_norm_t + mul!(∇f, Jx', r) + norm_∇fk = norm(∇f) + set_objective!(stats, f) + + if ρk >= η2 + solver.σ = max(σmin, γ3 * solver.σ) + else + solver.σ = γ1 * solver.σ + end + else + solver.σ = γ2 * solver.σ + end + + set_iter!(stats, stats.iter + 1) + set_time!(stats, time() - start_time) + + solver.subtol = max(rtol, min(T(0.1), √norm_∇fk, T(0.9) * solver.subtol)) + + set_dual_residual!(stats, norm_∇fk) + + callback(nls, solver, stats) + + norm_∇fk = stats.dual_feas + + stationary = norm_∇fk ≤ ϵ + small_residual = 2 * √f ≤ ϵF + + if verbose > 0 && mod(stats.iter, verbose) == 0 + dir_stat = step_accepted ? "↘" : "↗" + @info log_row([stats.iter, resid_norm, norm_∇fk, solver.σ, ρk, sub_iter, dir_stat, sub_stats]) + end + + if stats.status == :user + done = true + else + set_status!( + stats, + get_status( + nls, + elapsed_time = stats.elapsed_time, + optimal = stationary, + unbounded = unbounded, + small_residual = small_residual, + max_eval = max_eval, + iter = stats.iter, + max_iter = max_iter, + max_time = max_time, + ), + ) + end + + done = stats.status != :unknown + end + + set_solution!(stats, x) + return stats +end diff --git a/src/R2NLS_subsolvers.jl b/src/R2NLS_subsolvers.jl new file mode 100644 index 00000000..7f624d35 --- /dev/null +++ b/src/R2NLS_subsolvers.jl @@ -0,0 +1,171 @@ +using QRMumps, SparseMatricesCOO, LinearOperators +export QRMumpsSubsolver, LSMRSubsolver, LSQRSubsolver, CGLSSubsolver + +# ============================================================================== +# QRMumps Subsolver +# ============================================================================== + +mutable struct QRMumpsSubsolver{T} <: AbstractR2NLSSubsolver{T} + spmat::qrm_spmat{T} + spfct::qrm_spfct{T} + irn::Vector{Int} + jcn::Vector{Int} + val::Vector{T} + b_aug::Vector{T} + m::Int + n::Int + nnzj::Int + + # Stored internally, initialized in constructor + Jx::SparseMatrixCOO{T, Int} + + function QRMumpsSubsolver(nls::AbstractNLSModel{T}) where {T} + qrm_init() + meta = nls.meta + n = meta.nvar + m = nls.nls_meta.nequ + nnzj = nls.nls_meta.nnzj + + # 1. Allocate Arrays + irn = Vector{Int}(undef, nnzj + n) + jcn = Vector{Int}(undef, nnzj + n) + val = Vector{T}(undef, nnzj + n) + + # 2. FILL STRUCTURE IMMEDIATELY + # This populates the first nnzj elements with the Jacobian pattern + jac_structure_residual!(nls, view(irn, 1:nnzj), view(jcn, 1:nnzj)) + + # 3. Fill Regularization Structure + # Rows m+1 to m+n, Columns 1 to n + @inbounds for i = 1:n + irn[nnzj + i] = m + i + jcn[nnzj + i] = i + end + + # 4. Create Jx + # Since irn/jcn are already populated, Jx is valid immediately. + # It copies the structure from irn/jcn. + Jx = SparseMatrixCOO(m, n, irn[1:nnzj], jcn[1:nnzj], val[1:nnzj]) + + # 5. Initialize QRMumps + spmat = qrm_spmat_init(m + n, n, irn, jcn, val; sym = false) + spfct = qrm_spfct_init(spmat) + b_aug = Vector{T}(undef, m + n) + + # 6. Analyze Sparsity + qrm_analyse!(spmat, spfct; transp = 'n') + + sub = new{T}(spmat, spfct, irn, jcn, val, b_aug, m, n, nnzj, Jx) + return sub + end +end + +function initialize!(sub::QRMumpsSubsolver, nls, x) + # Just update the values for the new x. + update_subsolver!(sub, nls, x) +end + +function update_subsolver!(sub::QRMumpsSubsolver, nls, x) + # 1. Compute Jacobian values into QRMumps 'val' array + jac_coord_residual!(nls, x, view(sub.val, 1:sub.nnzj)) + + # 2. Explicitly sync to Jx (Copy values) + # This ensures Jx.vals has the fresh Jacobian for the gradient calculation + sub.Jx.vals .= view(sub.val, 1:sub.nnzj) +end + +function (sub::QRMumpsSubsolver{T})(s, rhs, σ, atol, rtol; verbose = 0) where {T} + sqrt_σ = sqrt(σ) + + # 1. Update ONLY the regularization values + @inbounds for i = 1:sub.n + sub.val[sub.nnzj + i] = sqrt_σ + end + + # 2. Tell QRMumps values changed + qrm_update_subsolver!(sub.spmat, sub.val) + + # 3. Prepare RHS [-F(x); 0] + sub.b_aug[1:sub.m] .= rhs + sub.b_aug[(sub.m + 1):end] .= zero(T) + + # 4. Factorize and Solve + qrm_factorize!(sub.spmat, sub.spfct; transp = 'n') + qrm_apply!(sub.spfct, sub.b_aug; transp = 't') + qrm_solve!(sub.spfct, sub.b_aug, s; transp = 'n') + + return true, :solved, 1 +end + +get_jacobian(sub::QRMumpsSubsolver) = sub.Jx + +function get_operator_norm(sub::QRMumpsSubsolver) + # The Frobenius norm is extremely cheap to compute from the COO values + # and serves as a mathematically valid upper bound for the operator 2-norm. + return norm(sub.Jx.vals) +end + +# ============================================================================== +# Krylov Subsolvers (LSMR, LSQR, CGLS) +# ============================================================================== + +mutable struct GenericKrylovSubsolver{T, V, Op, W} <: AbstractR2NLSSubsolver{T} + workspace::W + Jx::Op + solver_name::Symbol + Jv::V # Store buffer + Jtv::V # Store buffer + + function GenericKrylovSubsolver(nls::AbstractNLSModel{T, V}, solver_name::Symbol) where {T, V} + x_init = nls.meta.x0 + m = nls.nls_meta.nequ + n = nls.meta.nvar + + # Jx and its buffers allocated here inside the subsolver + Jv = V(undef, m) + Jtv = V(undef, n) + Jx = jac_op_residual!(nls, x_init, Jv, Jtv) + + workspace = krylov_workspace(Val(solver_name), m, n, V) + new{T, V, typeof(Jx), typeof(workspace)}(workspace, Jx, solver_name) + end +end + +# Specific Constructors for Uniform Signature (nls, x) +LSMRSubsolver(nls, x) = GenericKrylovSubsolver(nls, :lsmr) +LSQRSubsolver(nls, x) = GenericKrylovSubsolver(nls, :lsqr) +CGLSSubsolver(nls, x) = GenericKrylovSubsolver(nls, :cgls) + +function update_subsolver!(sub::GenericKrylovSubsolver, nls, x) + # Implicitly updated because Jx holds reference to x. + # We just ensure x is valid. + nothing +end + +function (sub::GenericKrylovSubsolver)(s, rhs, σ, atol, rtol; verbose = 0) + # λ allocation/calculation happens here in the solve + krylov_solve!( + sub.workspace, + sub.Jx, + rhs, + atol = atol, + rtol = rtol, + λ = sqrt(σ), # λ allocated here + itmax = max(2 * (size(sub.Jx, 1) + size(sub.Jx, 2)), 50), + verbose = verbose, + ) + s .= sub.workspace.x + return Krylov.issolved(sub.workspace), sub.workspace.stats.status, sub.workspace.stats.niter +end + +get_jacobian(sub::GenericKrylovSubsolver) = sub.Jx +function initialize!(sub::GenericKrylovSubsolver, nls, x) + sub.Jx = jac_op_residual!(nls, x, sub.Jv, sub.Jtv) + return nothing +end + +function get_operator_norm(sub::GenericKrylovSubsolver) + # Jx is a LinearOperator, so we can use the specialized estimator + λmax, _ = LinearOperators.estimate_opnorm(sub.Jx) + return λmax +end diff --git a/src/R2N_subsolvers.jl b/src/R2N_subsolvers.jl new file mode 100644 index 00000000..e28b492a --- /dev/null +++ b/src/R2N_subsolvers.jl @@ -0,0 +1,241 @@ +using HSL +export ShiftedLBFGSSolver, HSLR2NSubsolver, KrylovR2NSubsolver +export CGR2NSubsolver, CRR2NSubsolver, MinresR2NSubsolver, MinresQlpR2NSubsolver +export AbstractR2NSubsolver +export MA97R2NSubsolver, MA57R2NSubsolver + +# ============================================================================== +# Krylov Subsolver (CG, CR, MINRES) +# ============================================================================== + +mutable struct KrylovR2NSubsolver{T, V, Op, W, ShiftOp} <: AbstractR2NSubsolver{T} + workspace::W + H::Op # The Hessian Operator + A::ShiftOp # The Shifted Operator (only for CG/CR) + solver_name::Symbol + npc_dir::V # Store NPC direction if needed + + function KrylovR2NSubsolver(nlp::AbstractNLPModel{T, V}, solver_name::Symbol = :cg) where {T, V} + x_init = nlp.meta.x0 + n = nlp.meta.nvar + H = isa(nlp, LBFGSModel) ? nlp.op : hess_op(nlp, x_init) + + A = nothing + A = ShiftedOperator(H) + + workspace = krylov_workspace(Val(solver_name), n, n, V) + + new{T, V, typeof(H), typeof(workspace), typeof(A)}(workspace, H, A, solver_name, V(undef, n)) + end +end + +CGR2NSubsolver(nlp) = KrylovR2NSubsolver(nlp, :cg) +CRR2NSubsolver(nlp) = KrylovR2NSubsolver(nlp, :cr) +MinresR2NSubsolver(nlp) = KrylovR2NSubsolver(nlp, :minres) +MinresQlpR2NSubsolver(nlp) = KrylovR2NSubsolver(nlp, :minres_qlp) + +function initialize!(sub::KrylovR2NSubsolver, nlp, x) + # x here is the live solver.x from the main loop! + sub.H = isa(nlp, LBFGSModel) ? nlp.op : hess_op(nlp, x) + sub.A = ShiftedOperator(sub.H) + return nothing +end + +function update_subsolver!(sub::KrylovR2NSubsolver, nlp, x) + # Standard hess_op updates internally if it holds the NLP reference + return nothing +end + +function (sub::KrylovR2NSubsolver)(s, rhs, σ, atol, rtol, n; verbose = 0) + sub.workspace.stats.niter = 0 + + sub.A.data.σ = σ + krylov_solve!( + sub.workspace, + sub.A, + rhs, + itmax = max(2 * n, 50), + atol = atol, + rtol = rtol, + verbose = verbose, + linesearch = true, + ) + + s .= sub.workspace.x + if isdefined(sub.workspace, :npc_dir) + sub.npc_dir .= sub.workspace.npc_dir + end + + # Return the tuple expected by the main loop + return Krylov.issolved(sub.workspace), + sub.workspace.stats.status, + sub.workspace.stats.niter, + sub.workspace.stats.npcCount +end + +get_operator(sub::KrylovR2NSubsolver) = sub.H +has_npc_direction(sub::KrylovR2NSubsolver) = + isdefined(sub.workspace, :npc_dir) && sub.workspace.stats.npcCount > 0 + +function get_npc_direction(sub::KrylovR2NSubsolver) + has_npc_direction(sub) || error("No NPC direction found.") + return sub.npc_dir +end +function get_operator_norm(sub::KrylovR2NSubsolver) + # Estimate norm of H. + val, _ = LinearOperators.estimate_opnorm(sub.H) + return val +end + +# ============================================================================== +# Shifted LBFGS Subsolver +# ============================================================================== + +mutable struct ShiftedLBFGSSolver{T, Op} <: AbstractR2NSubsolver{T} + H::Op # The LBFGS Operator + + function ShiftedLBFGSSolver(nlp::AbstractNLPModel{T, V}) where {T, V} + if !(nlp isa LBFGSModel) + error("ShiftedLBFGSSolver can only be used by LBFGSModel") + end + new{T, typeof(nlp.op)}(nlp.op) + end +end + +ShiftedLBFGSSolver(nlp) = ShiftedLBFGSSolver(nlp) + +initialize!(sub::ShiftedLBFGSSolver, nlp, x) = nothing +update_subsolver!(sub::ShiftedLBFGSSolver, nlp, x) = nothing # LBFGS updates via push! in outer loop + +function (sub::ShiftedLBFGSSolver)(s, rhs, σ, atol, rtol, n; verbose = 0) + # rhs is usually -∇f. solve_shifted_system! expects negative gradient + solve_shifted_system!(s, sub.H, rhs, σ) + return true, :first_order, 1, 0 +end + +get_operator(sub::ShiftedLBFGSSolver) = sub.H + +function get_operator_norm(sub::ShiftedLBFGSSolver) + # Estimate norm of H. + val, _ = LinearOperators.estimate_opnorm(sub.H) + return val +end + +# ============================================================================== +# HSL Subsolver (MA97 / MA57) +# ============================================================================== + +mutable struct HSLR2NSubsolver{T, S} <: AbstractR2NSubsolver{T} + hsl_obj::S + rows::Vector{Int} + cols::Vector{Int} + vals::Vector{T} + n::Int + nnzh::Int + work::Vector{T} # workspace for solves (used for MA57) +end + +function HSLR2NSubsolver(nlp::AbstractNLPModel{T, V}; hsl_constructor = ma97_coord) where {T, V} + LIBHSL_isfunctional() || error("HSL library is not functional") + n = nlp.meta.nvar + nnzh = nlp.meta.nnzh + total_nnz = nnzh + n + + rows = Vector{Int}(undef, total_nnz) + cols = Vector{Int}(undef, total_nnz) + vals = Vector{T}(undef, total_nnz) + + # Structure analysis must happen in constructor to define the object type S + hess_structure!(nlp, view(rows, 1:nnzh), view(cols, 1:nnzh)) + + # Initialize values to zero. Actual computation happens in initialize! + fill!(vals, zero(T)) + + @inbounds for i = 1:n + rows[nnzh + i] = i + cols[nnzh + i] = i + # Diagonal shift will be updated during solve using σ + vals[nnzh + i] = one(T) + end + + hsl_obj = hsl_constructor(n, cols, rows, vals) + + if hsl_constructor == ma57_coord + work = Vector{T}(undef, n * size(nlp.meta.x0, 2)) + else + work = Vector{T}(undef, 0) + end + + return HSLR2NSubsolver{T, typeof(hsl_obj)}(hsl_obj, rows, cols, vals, n, nnzh, work) +end + +MA97R2NSubsolver(nlp) = HSLR2NSubsolver(nlp; hsl_constructor = ma97_coord) +MA57R2NSubsolver(nlp) = HSLR2NSubsolver(nlp; hsl_constructor = ma57_coord) + +function initialize!(sub::HSLR2NSubsolver, nlp, x) + # Compute the initial Hessian values at x + hess_coord!(nlp, x, view(sub.vals, 1:sub.nnzh)) + return nothing +end + +function update_subsolver!(sub::HSLR2NSubsolver, nlp, x) + hess_coord!(nlp, x, view(sub.vals, 1:sub.nnzh)) +end + +function get_inertia(sub::HSLR2NSubsolver{T, S}) where {T, S <: Ma97{T}} + n = sub.n + num_neg = sub.hsl_obj.info.num_neg + num_zero = n - sub.hsl_obj.info.matrix_rank + return num_neg, num_zero +end + +function get_inertia(sub::HSLR2NSubsolver{T, S}) where {T, S <: Ma57{T}} + n = sub.n + num_neg = sub.hsl_obj.info.num_negative_eigs + num_zero = n - sub.hsl_obj.info.rank + return num_neg, num_zero +end + +function _hsl_factor_and_solve!(sub::HSLR2NSubsolver{T, S}, g, s) where {T, S <: Ma97{T}} + ma97_factorize!(sub.hsl_obj) + if sub.hsl_obj.info.flag < 0 + return false, :err, 0, 0 + end + s .= g + ma97_solve!(sub.hsl_obj, s) + return true, :first_order, 1, 0 +end + +function _hsl_factor_and_solve!(sub::HSLR2NSubsolver{T, S}, g, s) where {T, S <: Ma57{T}} + ma57_factorize!(sub.hsl_obj) + s .= g + ma57_solve!(sub.hsl_obj, s, sub.work) + return true, :first_order, 1, 0 +end + +function (sub::HSLR2NSubsolver)(s, rhs, σ, atol, rtol, n; verbose = 0) + # Update diagonal shift in the vals array + @inbounds for i = 1:n + sub.vals[sub.nnzh + i] = σ + end + return _hsl_factor_and_solve!(sub, rhs, s) +end + +get_operator(sub::HSLR2NSubsolver) = sub + +function get_operator_norm(sub::HSLR2NSubsolver) + # Cheap estimate of norm using the stored values + # Exclude the shift values (last n elements) which are at indices nnzh+1:end + return norm(view(sub.vals, 1:sub.nnzh), Inf) +end + +# Helper to support `mul!` for HSL subsolver +function LinearAlgebra.mul!(y::AbstractVector, sub::HSLR2NSubsolver, x::AbstractVector) + coo_sym_prod!( + view(sub.rows, 1:sub.nnzh), + view(sub.cols, 1:sub.nnzh), + view(sub.vals, 1:sub.nnzh), + x, + y, + ) +end diff --git a/src/r2n_subsolver_common.jl b/src/r2n_subsolver_common.jl new file mode 100644 index 00000000..bf8a36b7 --- /dev/null +++ b/src/r2n_subsolver_common.jl @@ -0,0 +1,105 @@ +# ============================================================================== +# Subsolver Common Interface +# Defines abstract types and generic functions shared by R2N and R2NLS +# ============================================================================== + +export AbstractSubsolver, AbstractR2NSubsolver, AbstractR2NLSSubsolver +export initialize!, update_subsolver! +export get_operator, + get_jacobian, get_inertia, has_npc_direction, get_npc_direction, get_operator_norm + +""" + AbstractSubsolver{T} + +Base abstract type for all subproblem solvers. + +# Interface +Subtypes of `AbstractSubsolver` must be callable (act as functors) to solve the subproblem: + (subsolver::AbstractSubsolver)(s, rhs, σ, atol, rtol; verbose=0) +""" +abstract type AbstractSubsolver{T} end + +""" + AbstractR2NSubsolver{T} + +Abstract type for subsolvers used in the R2N (Newton-type) algorithm. +""" +abstract type AbstractR2NSubsolver{T} <: AbstractSubsolver{T} end + +""" + AbstractR2NLSSubsolver{T} + +Abstract type for subsolvers used in the R2NLS (Nonlinear Least Squares) algorithm. +""" +abstract type AbstractR2NLSSubsolver{T} <: AbstractSubsolver{T} end + +# ============================================================================== +# Generic Functions (Interface) +# ============================================================================== + +""" + initialize!(subsolver::AbstractSubsolver, args...) + +Perform initial setup for the subsolver (e.g., analyze sparsity, allocate workspace). +This is typically called once at the start of the optimization. +""" +function initialize! end + +""" + update_subsolver!(subsolver::AbstractSubsolver, model, x) + +Update the internal representation of the subsolver at point `x`. +For R2N solvers, this typically updates the Hessian or Operator. +For R2NLS solvers, this updates the Jacobian. +""" +function update_subsolver! end + +""" + get_operator(subsolver) + +Return the operator/matrix H for outer loop calculations (curvature, Cauchy point) in R2N. +""" +function get_operator end + +""" + get_jacobian(subsolver) + +Return the operator/matrix J for outer loop calculations in R2NLS. +""" +function get_jacobian end + +""" + get_operator_norm(subsolver) + +Return the norm (usually infinity norm or estimate) of the operator H or J. +Used for Cauchy point calculation. +""" +function get_operator_norm end + +# ============================================================================== +# Optional Interface Methods (Default Implementations) +# ============================================================================== + +""" + get_inertia(subsolver) + +Return (num_neg, num_zero) eigenvalues of the underlying operator. +Returns (-1, -1) if unknown or not applicable. +""" +function get_inertia(sub) + return -1, -1 +end + +""" + has_npc_direction(subsolver) + +Return `true` if a direction of negative curvature was found during the last solve, `false` otherwise. +""" +has_npc_direction(sub) = false + +""" + get_npc_direction(subsolver) + +Return a direction of negative curvature. Raises an error if none was found or if the subsolver does not support it. +""" +get_npc_direction(sub) = error("No NPC direction available for $(typeof(sub)).") diff --git a/test/allocs.jl b/test/allocs.jl index 70fbd214..570ce6ed 100644 --- a/test/allocs.jl +++ b/test/allocs.jl @@ -30,17 +30,26 @@ end if Sys.isunix() @testset "Allocation tests" begin - @testset "$symsolver" for symsolver in - (:LBFGSSolver, :FoSolver, :FomoSolver, :TrunkSolver, :TronSolver) + @testset "$name" for (name, symsolver) in ( + (:R2N, :R2NSolver), + (:R2N_exact, :R2NSolver), + (:R2, :FoSolver), + (:fomo, :FomoSolver), + (:lbfgs, :LBFGSSolver), + (:tron, :TronSolver), + (:trunk, :TrunkSolver), + ) for model in NLPModelsTest.nlp_problems nlp = eval(Meta.parse(model))() - if unconstrained(nlp) || (bound_constrained(nlp) && (symsolver == :TronSolver)) - if (symsolver == :FoSolver || symsolver == :FomoSolver) + if unconstrained(nlp) || (bound_constrained(nlp) && (name == :TronSolver)) + if (name == :FoSolver || name == :FomoSolver) solver = eval(symsolver)(nlp; M = 2) # nonmonotone configuration allocates extra memory + elseif name == :R2N_exact + solver = eval(symsolver)(LBFGSModel(nlp), subsolver= :shifted_lbfgs) else solver = eval(symsolver)(nlp) end - if symsolver == :FomoSolver + if name == :FomoSolver T = eltype(nlp.meta.x0) stats = GenericExecutionStats(nlp, solver_specific = Dict(:avgβmax => T(0))) else @@ -48,8 +57,8 @@ if Sys.isunix() end with_logger(NullLogger()) do SolverCore.solve!(solver, nlp, stats) - SolverCore.reset!(solver) - NLPModels.reset!(nlp) + reset!(solver) + reset!(nlp) al = @wrappedallocs SolverCore.solve!(solver, nlp, stats) @test al == 0 end @@ -57,11 +66,21 @@ if Sys.isunix() end end - @testset "$symsolver" for symsolver in (:TrunkSolverNLS, :TronSolverNLS) + @testset "$name" for (name, symsolver) in ( + (:TrunkSolverNLS, :TrunkSolverNLS), + (:R2NLSSolver, :R2NLSSolver), + (:R2NLSSolver_QRMumps, :R2NLSSolver), + (:TronSolverNLS, :TronSolverNLS), + ) for model in NLPModelsTest.nls_problems nlp = eval(Meta.parse(model))() if unconstrained(nlp) || (bound_constrained(nlp) && (symsolver == :TronSolverNLS)) - solver = eval(symsolver)(nlp) + if name == :R2NLSSolver_QRMumps + solver = eval(symsolver)(nlp, subsolver = :qrmumps) + else + solver = eval(symsolver)(nlp) + end + stats = GenericExecutionStats(nlp) with_logger(NullLogger()) do SolverCore.solve!(solver, nlp, stats) diff --git a/test/callback.jl b/test/callback.jl index 0418e9dc..4e063cc3 100644 --- a/test/callback.jl +++ b/test/callback.jl @@ -17,6 +17,11 @@ using ADNLPModels, JSOSolvers, LinearAlgebra, Logging #, Plots end @test stats.iter == 8 + stats = with_logger(NullLogger()) do + R2N(nlp, callback = cb) + end + @test stats.iter == 8 + stats = with_logger(NullLogger()) do lbfgs(nlp, callback = cb) end @@ -56,6 +61,11 @@ end tron(nls, callback = cb) end @test stats.iter == 8 + + stats = with_logger(NullLogger()) do + R2NLS(nls, callback = cb) + end + @test stats.iter == 8 end @testset "Test quasi-Newton callback" begin diff --git a/test/consistency.jl b/test/consistency.jl index 8ec758fa..fbe6a2a2 100644 --- a/test/consistency.jl +++ b/test/consistency.jl @@ -10,53 +10,82 @@ function consistency() @testset "Consistency" begin args = Pair{Symbol, Number}[:atol => 1e-6, :rtol => 1e-6, :max_eval => 20000, :max_time => 60.0] - @testset "NLP with $mtd" for mtd in [trunk, lbfgs, tron, R2, fomo] + @testset "NLP with $mtd" for (mtd, solver) in [ + ("trunk", trunk), + ("lbfgs", lbfgs), + ("tron", tron), + ("R2", R2), + ("R2N", R2N), + ( + "R2N_exact", + (nlp; kwargs...) -> + R2N(LBFGSModel(nlp), subsolver= :shifted_lbfgs; kwargs...), + ), + ("fomo", fomo), + ] with_logger(NullLogger()) do - NLPModels.reset!(unlp) - stats = mtd(unlp; args...) + reset!(unlp) + stats = solver(unlp; args...) @test stats isa GenericExecutionStats @test stats.status == :first_order - NLPModels.reset!(unlp) - stats = mtd(unlp; max_eval = 1) + reset!(unlp) + stats = solver(unlp; max_eval = 1) @test stats.status == :max_eval slow_nlp = ADNLPModel(x -> begin sleep(0.1) f(x) end, unlp.meta.x0) - stats = mtd(slow_nlp; max_time = 0.0) + stats = solver(slow_nlp; max_time = 0.0) @test stats.status == :max_time end end - @testset "Quasi-Newton NLP with $mtd" for mtd in [trunk, lbfgs, tron, R2, fomo] + @testset "Quasi-Newton NLP with $mtd" for (mtd, solver) in [ + ("trunk", trunk), + ("lbfgs", lbfgs), + ("tron", tron), + ("R2", R2), + ("R2N", R2N), + ( + "R2N_exact", + (nlp; kwargs...) -> + R2N(LBFGSModel(nlp), subsolver= :shifted_lbfgs; kwargs...), + ), + ("fomo", fomo), + ] with_logger(NullLogger()) do - NLPModels.reset!(qnlp) - stats = mtd(qnlp; args...) + reset!(qnlp) + stats = solver(qnlp; args...) @test stats isa GenericExecutionStats @test stats.status == :first_order end end - @testset "NLS with $mtd" for mtd in [trunk] + @testset "NLS with $mtd" for (mtd, solver) in [ + ("trunk", trunk), + ("R2NLS", (unls; kwargs...) -> R2NLS(unls; kwargs...)), + ("R2NLS_CGLS", (unls; kwargs...) -> R2NLS(unls, subsolver = :cgls; kwargs...)), + ("R2NLS_QRMumps", (unls; kwargs...) -> R2NLS(unls, subsolver = :qrmumps; kwargs...)), + ] with_logger(NullLogger()) do - stats = mtd(unls; args...) + stats = solver(unls; args...) @test stats isa GenericExecutionStats @test stats.status == :first_order NLPModels.reset!(unls) - stats = mtd(unls; max_eval = 1) + stats = solver(unls; max_eval = 1) @test stats.status == :max_eval slow_nls = ADNLSModel(x -> begin sleep(0.1) F(x) end, unls.meta.x0, nls_meta(unls).nequ) - stats = mtd(slow_nls; max_time = 0.0) + stats = solver(slow_nls; max_time = 0.0) @test stats.status == :max_time end end - @testset "Quasi-Newton NLS with $mtd" for mtd in [trunk] + @testset "Quasi-Newton NLS with $mtd" for (mtd, solver) in [("trunk", trunk)] with_logger(NullLogger()) do - stats = mtd(qnls; args...) + stats = solver(qnls; args...) @test stats isa GenericExecutionStats @test stats.status == :first_order end diff --git a/test/restart.jl b/test/restart.jl index 38765465..e33dd1d7 100644 --- a/test/restart.jl +++ b/test/restart.jl @@ -1,4 +1,5 @@ @testset "Test restart with a different initial guess: $fun" for (fun, s) in ( + (:R2N, :R2NSolver), (:R2, :FoSolver), (:fomo, :FomoSolver), (:lbfgs, :LBFGSSolver), @@ -7,9 +8,11 @@ ) f(x) = (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2 nlp = ADNLPModel(f, [-1.2; 1.0]) + + solver = eval(s)(nlp) + stats = GenericExecutionStats(nlp) - solver = eval(s)(nlp) stats = SolverCore.solve!(solver, nlp, stats) @test stats.status == :first_order @test isapprox(stats.solution, [1.0; 1.0], atol = 1e-6) @@ -25,12 +28,30 @@ end @testset "Test restart NLS with a different initial guess: $fun" for (fun, s) in ( (:tron, :TronSolverNLS), (:trunk, :TrunkSolverNLS), + (:R2NLSSolver, :R2NLSSolver), + (:R2NLSSolver_CG, :R2NLSSolver), + (:R2NLSSolver_LSQR, :R2NLSSolver), + (:R2NLSSolver_CR, :R2NLSSolver), + (:R2NLSSolver_LSMR, :R2NLSSolver), + (:R2NLSSolver_QRMumps, :R2NLSSolver), ) F(x) = [x[1] - 1; 2 * (x[2] - x[1]^2)] nlp = ADNLSModel(F, [-1.2; 1.0], 2) stats = GenericExecutionStats(nlp) - solver = eval(s)(nlp) + if fun == :R2NLSSolver_CG + solver = eval(s)(nlp, subsolver = :cgls) + elseif fun == :R2NLSSolver_LSQR + solver = eval(s)(nlp, subsolver = :lsqr) + elseif fun == :R2NLSSolver_CR + solver = eval(s)(nlp, subsolver = :crls) + elseif fun == :R2NLSSolver_LSMR + solver = eval(s)(nlp, subsolver = :lsmr) + elseif fun == :R2NLSSolver_QRMumps + solver = eval(s)(nlp, subsolver = :qrmumps) + else + solver = eval(s)(nlp) + end stats = SolverCore.solve!(solver, nlp, stats) @test stats.status == :first_order @test isapprox(stats.solution, [1.0; 1.0], atol = 1e-6) @@ -44,6 +65,7 @@ end end @testset "Test restart with a different problem: $fun" for (fun, s) in ( + (:R2N, :R2NSolver), (:R2, :FoSolver), (:fomo, :FomoSolver), (:lbfgs, :LBFGSSolver), @@ -52,15 +74,19 @@ end ) f(x) = (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2 nlp = ADNLPModel(f, [-1.2; 1.0]) - + + solver = eval(s)(nlp) + stats = GenericExecutionStats(nlp) - solver = eval(s)(nlp) stats = SolverCore.solve!(solver, nlp, stats) @test stats.status == :first_order @test isapprox(stats.solution, [1.0; 1.0], atol = 1e-6) f2(x) = (x[1])^2 + 4 * (x[2] - x[1]^2)^2 nlp = ADNLPModel(f2, [-1.2; 1.0]) + + solver = eval(s)(nlp) + SolverCore.reset!(solver, nlp) stats = SolverCore.solve!(solver, nlp, stats, atol = 1e-10, rtol = 1e-10) @@ -71,12 +97,30 @@ end @testset "Test restart NLS with a different problem: $fun" for (fun, s) in ( (:tron, :TronSolverNLS), (:trunk, :TrunkSolverNLS), + (:R2NLSSolver, :R2NLSSolver), + (:R2NLSSolver_CG, :R2NLSSolver), + (:R2NLSSolver_LSQR, :R2NLSSolver), + (:R2NLSSolver_CR, :R2NLSSolver), + (:R2NLSSolver_LSMR, :R2NLSSolver), + (:R2NLSSolver_QRMumps, :R2NLSSolver), ) F(x) = [x[1] - 1; 2 * (x[2] - x[1]^2)] nlp = ADNLSModel(F, [-1.2; 1.0], 2) stats = GenericExecutionStats(nlp) - solver = eval(s)(nlp) + if fun == :R2NLSSolver_CG + solver = eval(s)(nlp, subsolver = :cgls) + elseif fun == :R2NLSSolver_LSQR + solver = eval(s)(nlp, subsolver = :lsqr) + elseif fun == :R2NLSSolver_CR + solver = eval(s)(nlp, subsolver = :crls) + elseif fun == :R2NLSSolver_LSMR + solver = eval(s)(nlp, subsolver = :lsmr) + elseif fun == :R2NLSSolver_QRMumps + solver = eval(s)(nlp, subsolver = :qrmumps) + else + solver = eval(s)(nlp) + end stats = SolverCore.solve!(solver, nlp, stats) @test stats.status == :first_order @test isapprox(stats.solution, [1.0; 1.0], atol = 1e-6) diff --git a/test/runtests.jl b/test/runtests.jl index 7a0173a8..cfc865a4 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,7 +2,7 @@ using Printf, LinearAlgebra, Logging, SparseArrays, Test using CUDA # additional packages -using ADNLPModels, LinearOperators, NLPModels, NLPModelsModifiers, SolverCore, SolverTools +using ADNLPModels, LinearOperators, NLPModels, NLPModelsModifiers, SolverCore, SolverTools, Krylov using NLPModelsTest, SolverParameters # this package @@ -16,6 +16,7 @@ include("test-gpu.jl") (TRONParameterSet, tron), (TRUNKParameterSet, trunk), (FOMOParameterSet, fomo), + (R2NParameterSet, R2N), ) nlp = BROWNDEN() params = eval(paramset)(nlp) @@ -50,7 +51,7 @@ end end @testset "Test iteration limit" begin - @testset "$fun" for fun in (R2, fomo, lbfgs, tron, trunk) + @testset "$fun" for fun in (R2, R2N,fomo, lbfgs, tron, trunk) f(x) = (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2 nlp = ADNLPModel(f, [-1.2; 1.0]) @@ -58,7 +59,7 @@ end @test stats.status == :max_iter end - @testset "$(fun)-NLS" for fun in (tron, trunk) + @testset "$(fun)-NLS" for fun in (R2NLS, tron, trunk) f(x) = [x[1] - 1; 2 * (x[2] - x[1]^2)] nlp = ADNLSModel(f, [-1.2; 1.0], 2) @@ -68,20 +69,32 @@ end end @testset "Test unbounded below" begin - @testset "$fun" for fun in (R2, fomo, lbfgs, tron, trunk) + @testset "$name" for (name, solver) in [ + ("trunk", trunk), + ("lbfgs", lbfgs), + ("tron", tron), + ("R2", R2), + # ("R2N", R2N), + ( + "R2N_exact", + (nlp; kwargs...) -> + R2N(LBFGSModel(nlp), subsolver= :shifted_lbfgs; kwargs...), + ), + ("fomo", fomo), + ] T = Float64 x0 = [T(0)] f(x) = -exp(x[1]) nlp = ADNLPModel(f, x0) - stats = eval(fun)(nlp) + stats = solver(nlp) @test stats.status == :unbounded @test stats.objective < -one(T) / eps(T) end end -include("restart.jl") -include("callback.jl") +include("test_hsl_subsolver.jl") +# include("restart.jl") #TODO issue with rtol -10e-10include("callback.jl") include("consistency.jl") include("test_solvers.jl") include("incompatible.jl") diff --git a/test/test_hsl_subsolver.jl b/test/test_hsl_subsolver.jl new file mode 100644 index 00000000..fb2e8139 --- /dev/null +++ b/test/test_hsl_subsolver.jl @@ -0,0 +1,37 @@ +using HSL_jll +using HSL +if LIBHSL_isfunctional() + @testset "Testing HSL Subsolvers" begin + for (name, mySolver) in [ + ( + "R2N_ma97", + (nlp; kwargs...) -> R2N(nlp; subsolver = :ma97, kwargs...), + ), + ( + "R2N_ma97_armijo", + (nlp; kwargs...) -> R2N(nlp; subsolver = :ma97, npc_handler = :armijo, kwargs...), + ), + # ma57 + ( + "R2N_ma57", + (nlp; kwargs...) -> R2N(nlp; subsolver = :ma57, kwargs...), + ), + ( + "R2N_ma57_armijo", + (nlp; kwargs...) -> R2N(nlp; subsolver = :ma57, npc_handler = :armijo, kwargs...), + ), + ] + @testset "Testing solver: $name" begin + f(x) = (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2 + nlp = ADNLPModel(f, [-1.2; 1.0]) + + stats = mySolver(nlp) + @test stats.status == :first_order + @test isapprox(stats.solution, [1.0; 1.0], atol = 1e-6) + + end + end + end +else + println("Skipping HSL subsolver tests; LIBHSL is not functional.") +end diff --git a/test/test_solvers.jl b/test/test_solvers.jl index f836df3d..c7f3bcb7 100644 --- a/test/test_solvers.jl +++ b/test/test_solvers.jl @@ -8,6 +8,8 @@ function tests() ("lbfgs", lbfgs), ("tron", tron), ("R2", R2), + ("R2N", R2N), + ("R2N_exact", (nlp; kwargs...) -> R2N(LBFGSModel(nlp), subsolver= :shifted_lbfgs; kwargs...)), ("fomo_r2", fomo), ("fomo_tr", (nlp; kwargs...) -> fomo(nlp, step_backend = JSOSolvers.tr_step(); kwargs...)), ] @@ -41,6 +43,11 @@ function tests() ("trunk full Hessian", (nls; kwargs...) -> trunk(nls, variant = :Newton; kwargs...)), ("tron+cgls", (nls; kwargs...) -> tron(nls, subsolver = :cgls; kwargs...)), ("tron full Hessian", (nls; kwargs...) -> tron(nls, variant = :Newton; kwargs...)), + ("R2NLS", (unls; kwargs...) -> R2NLS(unls; kwargs...)), + ("R2NLS_CGLS", (unls; kwargs...) -> R2NLS(unls, subsolver = :cgls; kwargs...)), + ("R2NLS_LSQR", (unls; kwargs...) -> R2NLS(unls, subsolver = :lsqr; kwargs...)), + ("R2NLS_CRLS", (unls; kwargs...) -> R2NLS(unls, subsolver = :crls; kwargs...)), + ("R2NLS_LSMR", (unls; kwargs...) -> R2NLS(unls, subsolver = :lsmr; kwargs...)), ] unconstrained_nls(solver) multiprecision_nls(solver, :unc)