From a2d51d25e5e7c289703d4e87b8345a255cea4e88 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Tue, 14 Oct 2025 12:09:46 -0400 Subject: [PATCH 01/24] fix references --- paper/paper.bib | 99 ++++++++++++++++++++++++------------------------- 1 file changed, 49 insertions(+), 50 deletions(-) diff --git a/paper/paper.bib b/paper/paper.bib index 37da40fd..f5b195c2 100644 --- a/paper/paper.bib +++ b/paper/paper.bib @@ -1,7 +1,7 @@ @Article{ aravkin-baraldi-orban-2022, Author = {A. Y. Aravkin and R. Baraldi and D. Orban}, Title = {A Proximal Quasi-{N}ewton Trust-Region Method for Nonsmooth Regularized Optimization}, - Journal = siopt, + Journal = {SIAM J. Optim.}, Year = 2022, Volume = 32, Number = 2, @@ -12,7 +12,7 @@ @Article{ aravkin-baraldi-orban-2022 @Article{ aravkin-baraldi-orban-2024, Author = {A. Y. Aravkin and R. Baraldi and D. Orban}, Title = {A {L}evenberg–{M}arquardt Method for Nonsmooth Regularized Least Squares}, - Journal = sisc, + Journal = {SIAM J. Sci. Comput.}, Year = 2024, Volume = 46, Number = 4, @@ -23,7 +23,7 @@ @Article{ aravkin-baraldi-orban-2024 @Software{ leconte_linearoperators_jl_linear_operators_2023, Author = {Leconte, Geoffroy and Orban, Dominique and Soares Siqueira, Abel and contributors}, license = {MPL-2.0}, - Title = {{LinearOperators.jl: Linear Operators for Julia}}, + Title = {{LinearOperators.jl}: Linear Operators for Julia}, url = {https://github.com/JuliaSmoothOptimizers/LinearOperators.jl}, version = {2.6.0}, Year = 2023, @@ -32,34 +32,33 @@ @Software{ leconte_linearoperators_jl_linear_operators_2023 @Article{ leconte-orban-2023, Author = {G. Leconte and D. Orban}, Title = {The Indefinite Proximal Gradient Method}, - Journal = coap, + Journal = {Comput. Optim. Appl.}, Year = 2025, Volume = 91, Number = 2, - Pages = 861--903, + Pages = {861--903}, doi = {10.1007/s10589-024-00604-5}, } @TechReport{ leconte-orban-2023-2, Author = {G. Leconte and D. Orban}, Title = {Complexity of trust-region methods with unbounded {H}essian approximations for smooth and nonsmooth optimization}, - Institution = gerad, + Institution = {GERAD}, Year = 2023, Type = {Cahier}, Number = {G-2023-65}, - Address = gerad-address, + Address = {Montr\'eal, QC, Canada}, url = {https://www.gerad.ca/fr/papers/G-2023-65}, } @TechReport{ diouane-habiboullah-orban-2024, Author = {Youssef Diouane and Mohamed Laghdaf Habiboullah and Dominique Orban}, - Title = {A proximal modified quasi-Newton method for nonsmooth regularized optimization}, + Title = {A proximal modified quasi-{N}ewton method for nonsmooth regularized optimization}, Institution = {GERAD}, Year = 2024, Type = {Cahier}, Number = {G-2024-64}, Address = {Montr\'eal, Canada}, - doi = {10.48550/arxiv.2409.19428}, url = {https://www.gerad.ca/fr/papers/G-2024-64}, } @@ -74,47 +73,47 @@ @TechReport{ diouane-gollier-orban-2024 doi = {10.13140/RG.2.2.16095.47527}, } -@Misc{orban-siqueira-cutest-2020, - author = {D. Orban and A. S. Siqueira and {contributors}}, - title = {{CUTEst.jl}: {J}ulia's {CUTEst} interface}, - month = {October}, - url = {https://github.com/JuliaSmoothOptimizers/CUTEst.jl}, - year = {2020}, - DOI = {10.5281/zenodo.1188851}, +@Software{ orban-siqueira-cutest-2020, + author = {D. Orban and A. S. Siqueira and {contributors}}, + title = {{CUTEst.jl}: {J}ulia's {CUTEst} interface}, + month = {October}, + url = {https://github.com/JuliaSmoothOptimizers/CUTEst.jl}, + year = {2020}, + DOI = {10.5281/zenodo.1188851}, } -@Misc{orban-siqueira-nlpmodels-2020, - author = {D. Orban and A. S. Siqueira and {contributors}}, - title = {{NLPModels.jl}: Data Structures for Optimization Models}, - month = {July}, - url = {https://github.com/JuliaSmoothOptimizers/NLPModels.jl}, - year = {2020}, - DOI = {10.5281/zenodo.2558627}, +@Software{ orban-siqueira-nlpmodels-2020, + author = {D. Orban and A. S. Siqueira and {contributors}}, + title = {{NLPModels.jl}: Data Structures for Optimization Models}, + month = {July}, + url = {https://github.com/JuliaSmoothOptimizers/NLPModels.jl}, + year = {2020}, + DOI = {10.5281/zenodo.2558627}, } -@Misc{jso, - author = {T. Migot and D. Orban and A. S. Siqueira}, - title = {The {JuliaSmoothOptimizers} Ecosystem for Linear and Nonlinear Optimization}, - year = {2021}, - url = {https://juliasmoothoptimizers.github.io/}, - doi = {10.5281/zenodo.2655082}, +@Software{ jso, + author = {T. Migot and D. Orban and A. S. Siqueira}, + title = {The {JuliaSmoothOptimizers} Ecosystem for Linear and Nonlinear Optimization}, + year = {2021}, + url = {https://juliasmoothoptimizers.github.io/}, + doi = {10.5281/zenodo.2655082}, } -@Misc{migot-orban-siqueira-optimizationproblems-2023, +@Software{ migot-orban-siqueira-optimizationproblems-2023, author = {T. Migot and D. Orban and A. S. Siqueira}, - title = {OptimizationProblems.jl: A collection of optimization problems in Julia}, + title = {{OptimizationProblems.jl}: A collection of optimization problems in Julia}, year = {2023}, doi = {10.5281/zenodo.3672094}, url = {https://github.com/JuliaSmoothOptimizers/OptimizationProblems.jl}, } -@techreport{kim-park-2008, - title = {Sparse Nonnegative Matrix Factorization for Clustering}, - author = {Jingu Kim and Haesun Park}, - institution = {Georgia Inst. of Technology}, - number = {GT-CSE-08-01}, - year = {2008}, - url = {http://hdl.handle.net/1853/20058}, +@techreport{ kim-park-2008, + title = {Sparse Nonnegative Matrix Factorization for Clustering}, + author = {Jingu Kim and Haesun Park}, + institution = {Georgia Inst. of Technology}, + number = {GT-CSE-08-01}, + year = {2008}, + url = {http://hdl.handle.net/1853/20058}, } @InProceedings{ stella-themelis-sopasakis-patrinos-2017, @@ -126,10 +125,10 @@ @InProceedings{ stella-themelis-sopasakis-patrinos-2017 doi = {10.1109/CDC.2017.8263933}, } -@article{demarchi-jia-kanzow-mehlitz-2023, +@article{ demarchi-jia-kanzow-mehlitz-2023, author = {De~Marchi, Alberto and Jia, Xiaoxi and Kanzow, Christian and Mehlitz, Patrick}, title = {Constrained composite optimization and augmented {L}agrangian methods}, - journal = {Mathematical Programming}, + journal = {Math. Program.}, year = {2023}, month = {9}, volume = {201}, @@ -141,7 +140,7 @@ @article{demarchi-jia-kanzow-mehlitz-2023 @Article{ themelis-stella-patrinos-2017, Author = {Themelis, Andreas and Stella, Lorenzo and Patrinos, Panagiotis}, Title = {Forward-Backward Envelope for the Sum of Two Nonconvex Functions: Further Properties and Nonmonotone line seach Algorithms}, - Journal = siopt, + Journal = {SIAM J. Optim.}, Year = 2018, Volume = 28, Number = 3, @@ -149,13 +148,13 @@ @Article{ themelis-stella-patrinos-2017 doi = {10.1137/16M1080240}, } -@article{eckstein1992douglas, - title={On the Douglas—Rachford splitting method and the proximal point algorithm for maximal monotone operators}, - author={Eckstein, Jonathan and Bertsekas, Dimitri P}, - journal={Mathematical programming}, - volume={55}, - number={1}, - pages={293--318}, - year={1992}, - publisher={Springer} +@article{ eckstein-bertsekas-1992, + title = {On the {D}ouglas—{R}achford splitting method and the proximal point algorithm for maximal monotone operators}, + author = {Eckstein, Jonathan and Bertsekas, Dimitri P}, + journal = {Math. Program.}, + volume = {55}, + number = {1}, + pages = {293--318}, + year = {1992}, + publisher = {Springer} } From 0447936fc993810fca3b4b49020a859b517ebb1d Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Tue, 14 Oct 2025 13:45:59 -0400 Subject: [PATCH 02/24] remove release on push --- .github/workflows/draft-pdf.yml | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/.github/workflows/draft-pdf.yml b/.github/workflows/draft-pdf.yml index 1f6fc01c..5bbf32d0 100644 --- a/.github/workflows/draft-pdf.yml +++ b/.github/workflows/draft-pdf.yml @@ -28,25 +28,4 @@ jobs: # This is the output path where Pandoc will write the compiled # PDF. Note, this should be the same directory as the input # paper.md - path: paper/paper.pdf - - name: Create release - if: github.event_name == 'push' - uses: rymndhng/release-on-push-action@master - id: release - with: - bump_version_scheme: patch - tag_prefix: v - release_body: "" - use_github_release_notes: true - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Upload PDF to release - if: github.event_name == 'push' - uses: svenstaro/upload-release-action@v2 - with: - repo_token: ${{ secrets.GITHUB_TOKEN }} - file: paper/paper.pdf - asset_name: joss-draft.pdf - tag: ${{ steps.release.outputs.tag_name }} - overwrite: true - body: "" + path: paper/paper.pdf \ No newline at end of file From 70f3e044b6ec51d5928b3e94deed6639a846b928 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Tue, 14 Oct 2025 13:54:35 -0400 Subject: [PATCH 03/24] fix Acknowlegdement name --- paper/paper.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paper/paper.md b/paper/paper.md index 35f63522..d54aceb9 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -203,7 +203,7 @@ In ongoing research, the package will be extended with algorithms that enable to # Acknowledgements -The authors would like to thank Alberto Demarchi for his implementation of the Augmented Lagrangian solver. +The authors would like to thank Alberto De Marchi for his implementation of the Augmented Lagrangian solver. Mohamed Laghdaf Habiboullah is supported by an excellence FRQNT grant. Youssef Diouane, Maxence Gollier and Dominique Orban are partially supported by an NSERC Discovery Grant. From 974cc476034f0c6aaa77fcaccdbeeb38194de70c Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Thu, 16 Oct 2025 09:48:53 -0400 Subject: [PATCH 04/24] paper: reduce number of words --- paper/paper.md | 49 +------------------------------------------------ 1 file changed, 1 insertion(+), 48 deletions(-) diff --git a/paper/paper.md b/paper/paper.md index d54aceb9..92c66773 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -73,16 +73,6 @@ Among others, the **PANOC** [@stella-themelis-sopasakis-patrinos-2017] solver ta By contrast, [RegularizedOptimization.jl](https://github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl) focuses on model-based trust-region and quadratic regularization methods, which typically require fewer evaluations of $f$ and its gradient than first-order line search methods, at the expense of more evaluations of proximal operators [@aravkin-baraldi-orban-2022]. However, each proximal computation is inexpensive for numerous commonly used choices of $h$, such as separable penalties and bound constraints (see examples below), so that the overall approach is efficient for large-scale problems. -When computing a step by (approximately) minimizing a model, [ShiftedProximalOperators.jl](https://github.com/JuliaSmoothOptimizers/ShiftedProximalOperators.jl) implements efficient allocation-free shifted proximal mappings. -Specifically, it supports shifted proximal operators of the form -$$ - \underset{t \in \mathbb{R}^n}{\arg\min} \, { \tfrac{1}{2} \|t - q\|_2^2 + \nu \psi(t + s; x) + \chi(s + t \mid \Delta \mathbb{B})} -$$ -where $q$ is given, $x$ and $s$ are fixed shifts, $\chi(\cdot \mid \Delta \mathbb{B})$ is the indicator of a ball of radius $\Delta > 0$ defined by a certain norm, and $\psi(\cdot; x)$ is a model of $h$ about $x$. -It is common to set $\psi(t + s; x) = h(x + s + t)$. - -These shifted operators allow to (i) incorporate bound or trust-region constraints via the indicator, which is required for the **TR** and **TRDH** solvers, and (ii) evaluate the above in place, without additional allocations, which is currently not possible with ProximalOperators.jl. - RegularizedOptimization.jl provides a consistent API to formulate optimization problems and apply different solvers. It integrates seamlessly with the [JuliaSmoothOptimizers](https://github.com/JuliaSmoothOptimizers) [@jso] ecosystem, an academic organization for nonlinear optimization software development, testing, and benchmarking. @@ -119,19 +109,11 @@ The package includes a comprehensive suite of unit tests that cover all function Extensive documentation is provided, including a user guide, API reference, and examples to help users get started quickly. Documentation is built using Documenter.jl. -## Application - -A novel implementation of the exact penalty approach [@diouane-gollier-orban-2024] for equality-constrained smooth optimization is being developed based on RegularizedOptimization.jl. -In it, $h(x) = \|c(x)\|$ and the model $\psi(\cdot; x)$ differs from $h$ itself. -Specifically, $\psi(\cdot; x)$ is the norm of a linearization of $c$ about $x$. -This is not covered in the current version of [ProximalAlgorithms.jl](https://github.com/JuliaFirstOrder/ProximalAlgorithms.jl). - # Examples -We illustrate the capabilities of [RegularizedOptimization.jl](https://github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl) on two nonsmooth and nonconvex problems: +We illustrate the capabilities of [RegularizedOptimization.jl](https://github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl) on a nonsmooth and nonconvex problem: - **Support Vector Machine (SVM) with $\ell_{1/2}^{1/2}$ penalty** for image classification [@aravkin-baraldi-orban-2024]. -- **Nonnegative Matrix Factorization (NNMF) with $\ell_0$ penalty and bound constraints** [@kim-park-2008]. Below is a condensed example showing how to define and solve SVM problem, and perform a solve followed by a re-solve: @@ -152,19 +134,6 @@ solve!(solver, reg_nlp, stats; atol=1e-4, rtol=1e-4, verbose=1, sub_kwargs=(max_ solve!(solver, reg_nlp, stats; atol=1e-5, rtol=1e-5, verbose=1, sub_kwargs=(max_iter=200,)) ``` -The NNMF problem can be set up in a similar fashion: - -```julia -Random.seed!(1234) -m, n, k = 100, 50, 5 -model, nls_model, _, selected = nnmf_model(m, n, k) # Build NNMF model -x0 = rand(model.meta.nvar) # Initial point -λ = norm(grad(model, rand(model.meta.nvar)), Inf) / 200 # Regularization parameter -h = NormL0(λ) # Nonsmooth term -reg_nls = RegularizedNLSModel(nls_model, h) # Regularized problem for LM -solver = LMSolver(reg_nls) # Choose solver -``` - ## Numerical results We compare **TR**, **R2N**, **LM** and **LMTR** from our library. @@ -184,22 +153,6 @@ Note that, the final objective values differ due to the nonconvexity of the prob However, it requires more proximal evaluations, but these are inexpensive. **LMTR** and **LM** require the fewest function evaluations, but incur many Jacobian–vector products, and are the slowest. Note that here, **LMTR** achieves the lowest objective value. -- **NNMF with constrained $\ell_0$ penalty:** **LMTR** is the fastest, and requires a fewer number of function evaluations than all the other solvers. Followed by **TR** which is the second fastest and requires the fewest gradient evaluations, however it achieves the highest objective value. -Note that both **LMTR** and **LM** achieve the lowest objective value. - -Additional tests (e.g., other regularizers, constraint types, and scaling dimensions) have also been conducted, and a full benchmarking campaign is currently underway. - -# Conclusion - -The experiments highlight the effectiveness of the solvers implemented in [RegularizedOptimization.jl](https://github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl). - - - - - - - -In ongoing research, the package will be extended with algorithms that enable to reduce the number of proximal evaluations, especially when the proximal mapping of $h$ is expensive to compute. # Acknowledgements From ce435f5d6dea7a51a93b292dfa330bccfc7b3dc1 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Thu, 16 Oct 2025 09:55:48 -0400 Subject: [PATCH 05/24] paper: further reduce word count --- paper/examples/Benchmark.tex | 5 ----- paper/paper.md | 6 ++---- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/paper/examples/Benchmark.tex b/paper/examples/Benchmark.tex index 41685108..c6d883dd 100644 --- a/paper/examples/Benchmark.tex +++ b/paper/examples/Benchmark.tex @@ -5,9 +5,4 @@ R2N (LSR1, SVM) & first\_order & 1.9511 & 185 & 101 & 27932 & 192.493 \\ LM (SVM) & first\_order & 19.7826 & 6 & 2876 & 1001 & 201.186 \\ LMTR (SVM) & first\_order & 12.4967 & 11 & 1614 & 432 & 188.274 \\ - \hline - TR (LBFGS, NNMF) & first\_order & 0.1014 & 42 & 40 & 3160 & 976.06 \\ - R2N (LBFGS, NNMF) & first\_order & 0.4913 & 169 & 107 & 17789 & 411.727 \\ - LM (NNMF) & first\_order & 0.1157 & 14 & 7042 & 2601 & 131.184 \\ - LMTR (NNMF) & first\_order & 0.0697 & 9 & 4066 & 1435 & 131.186 \\\hline \end{tabular} diff --git a/paper/paper.md b/paper/paper.md index 92c66773..cf078a74 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -111,9 +111,8 @@ Documentation is built using Documenter.jl. # Examples -We illustrate the capabilities of [RegularizedOptimization.jl](https://github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl) on a nonsmooth and nonconvex problem: - -- **Support Vector Machine (SVM) with $\ell_{1/2}^{1/2}$ penalty** for image classification [@aravkin-baraldi-orban-2024]. +We illustrate the capabilities of [RegularizedOptimization.jl](https://github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl) on a Support Vector Machine (SVM) model with a $\ell_{1/2}^{1/2}$ penalty for image classification [@aravkin-baraldi-orban-2024]. +This problem is nonsmmooth and nonconvex. Below is a condensed example showing how to define and solve SVM problem, and perform a solve followed by a re-solve: @@ -139,7 +138,6 @@ solve!(solver, reg_nlp, stats; atol=1e-5, rtol=1e-5, verbose=1, sub_kwargs=(max_ We compare **TR**, **R2N**, **LM** and **LMTR** from our library. We report the following solver statistics in the table: the convergence status of each solver, the number of evaluations of $f$, the number of evaluations of $\nabla f$, the number of proximal operator evaluations, the elapsed time in seconds and the final objective value. -On the SVM and NNMF problems, we use limited-memory SR1 and BFGS Hessian approximations, respectively. The subproblem solver is **R2**. \input{examples/Benchmark.tex} From 3bc8a7b73cb289a151216c7b1486a38fcad1ff34 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Thu, 16 Oct 2025 10:00:29 -0400 Subject: [PATCH 06/24] paper: rename "Examples" section to "Example" --- paper/paper.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paper/paper.md b/paper/paper.md index cf078a74..fcd6f05b 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -109,7 +109,7 @@ The package includes a comprehensive suite of unit tests that cover all function Extensive documentation is provided, including a user guide, API reference, and examples to help users get started quickly. Documentation is built using Documenter.jl. -# Examples +# Example We illustrate the capabilities of [RegularizedOptimization.jl](https://github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl) on a Support Vector Machine (SVM) model with a $\ell_{1/2}^{1/2}$ penalty for image classification [@aravkin-baraldi-orban-2024]. This problem is nonsmmooth and nonconvex. From e551d8b3c567ef610dacc7118fad82870d6ced12 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Thu, 16 Oct 2025 13:32:01 -0400 Subject: [PATCH 07/24] paper: apply corrections from Mohamed --- paper/paper.bib | 4 ++-- paper/paper.md | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/paper/paper.bib b/paper/paper.bib index f5b195c2..92939515 100644 --- a/paper/paper.bib +++ b/paper/paper.bib @@ -101,7 +101,7 @@ @Software{ jso @Software{ migot-orban-siqueira-optimizationproblems-2023, author = {T. Migot and D. Orban and A. S. Siqueira}, - title = {{OptimizationProblems.jl}: A collection of optimization problems in Julia}, + title = {{OptimizationProblems.jl}: A collection of optimization problems in {J}ulia}, year = {2023}, doi = {10.5281/zenodo.3672094}, url = {https://github.com/JuliaSmoothOptimizers/OptimizationProblems.jl}, @@ -139,7 +139,7 @@ @article{ demarchi-jia-kanzow-mehlitz-2023 @Article{ themelis-stella-patrinos-2017, Author = {Themelis, Andreas and Stella, Lorenzo and Patrinos, Panagiotis}, - Title = {Forward-Backward Envelope for the Sum of Two Nonconvex Functions: Further Properties and Nonmonotone line seach Algorithms}, + Title = {Forward-Backward Envelope for the Sum of Two Nonconvex Functions: Further Properties and Nonmonotone line search Algorithms}, Journal = {SIAM J. Optim.}, Year = 2018, Volume = 28, diff --git a/paper/paper.md b/paper/paper.md index fcd6f05b..d1738184 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -39,7 +39,7 @@ header-includes: | where $f: \mathbb{R}^n \to \mathbb{R}$ and $c: \mathbb{R}^n \to \mathbb{R}^m$ are continuously differentiable, and $h: \mathbb{R}^n \to \mathbb{R} \cup \{+\infty\}$ is lower semi-continuous. The nonsmooth objective $h$ can be a *regularizer* such as a sparsity-inducing penalty, model simple constraints such as $x$ belonging to a simple convex set, or be a combination of both. All $f$, $h$ and $c$ can be nonconvex. -Together with the companion library [ShiftedProximalOperators.jl](https://github.com/JuliaSmoothOptimizers/ShiftedProximalOperators.jl) described below, RegularizedOptimization.jl provides a modular and extensible framework for solving \eqref{eq:nlp}, and developing novel solvers. +RegularizedOptimization.jl provides a modular and extensible framework for solving \eqref{eq:nlp}, and developing novel solvers. Currently, the following solvers are implemented: - **Trust-region solvers TR and TRDH** [@aravkin-baraldi-orban-2022;@leconte-orban-2023] @@ -52,7 +52,7 @@ If second derivatives are not available or too costly to compute, quasi-Newton a In addition, the proximal mapping of the nonsmooth part $h$, or adequate models thereof, must be evaluated. At each iteration, a step is computed by solving a subproblem of the form \eqref{eq:nlp} inexactly, in which $f$, $h$, and $c$ are replaced with appropriate models about the current iterate. The solvers R2, R2DH and TRDH are particularly well suited to solve the subproblems, though they are general enough to solve \eqref{eq:nlp}. -All solvers have a non-monotone mode that enhance performance in practice on certain problems [@leconte-orban-2023;@diouane-habiboullah-orban-2024]. +All solvers have a non-monotone mode that enhances performance in practice on certain problems [@leconte-orban-2023;@diouane-habiboullah-orban-2024]. All are implemented in an in-place fashion, so that re-solves incur no allocations. To illustrate our claim of extensibility, a first version of the AL solver was implemented and submitted by an external contributor. @@ -77,7 +77,7 @@ RegularizedOptimization.jl provides a consistent API to formulate optimization p It integrates seamlessly with the [JuliaSmoothOptimizers](https://github.com/JuliaSmoothOptimizers) [@jso] ecosystem, an academic organization for nonlinear optimization software development, testing, and benchmarking. The smooth objective $f$ can be defined via [NLPModels.jl](https://github.com/JuliaSmoothOptimizers/NLPModels.jl) [@orban-siqueira-nlpmodels-2020], which provides a standardized Julia API for representing nonlinear programming (NLP) problems. -Large collections of such problems are available in [CUTEst.jl](https://github.com/JuliaSmoothOptimizers/CUTEst.jl) [@orban-siqueira-cutest-2020] and [OptimizationProblems.jl](https://github.com/JuliaSmoothOptimizers/OptimizationProblems.jl) [@migot-orban-siqueira-optimizationproblems-2023], but a use can easily interface or model their own smooth objective. +Large collections of such problems are available in [CUTEst.jl](https://github.com/JuliaSmoothOptimizers/CUTEst.jl) [@orban-siqueira-cutest-2020] and [OptimizationProblems.jl](https://github.com/JuliaSmoothOptimizers/OptimizationProblems.jl) [@migot-orban-siqueira-optimizationproblems-2023], but a user can easily interface or model their own smooth objective. The nonsmooth term $h$ can be modeled using [ProximalOperators.jl](https://github.com/JuliaSmoothOptimizers/ProximalOperators.jl), which provides a broad collection of regularizers and indicators of simple sets. @@ -112,9 +112,9 @@ Documentation is built using Documenter.jl. # Example We illustrate the capabilities of [RegularizedOptimization.jl](https://github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl) on a Support Vector Machine (SVM) model with a $\ell_{1/2}^{1/2}$ penalty for image classification [@aravkin-baraldi-orban-2024]. -This problem is nonsmmooth and nonconvex. +This problem is nonsmooth and nonconvex. -Below is a condensed example showing how to define and solve SVM problem, and perform a solve followed by a re-solve: +Below is a condensed example showing how to define and solve an SVM problem, and perform a solve followed by a re-solve: ```julia using LinearAlgebra, Random, ProximalOperators @@ -135,7 +135,7 @@ solve!(solver, reg_nlp, stats; atol=1e-5, rtol=1e-5, verbose=1, sub_kwargs=(max_ ## Numerical results -We compare **TR**, **R2N**, **LM** and **LMTR** from our library. +We compare **TR**, **R2N**, **LM** and **LMTR** from this library. We report the following solver statistics in the table: the convergence status of each solver, the number of evaluations of $f$, the number of evaluations of $\nabla f$, the number of proximal operator evaluations, the elapsed time in seconds and the final objective value. The subproblem solver is **R2**. @@ -145,9 +145,9 @@ The subproblem solver is **R2**. - Note that for the **LM** and **LMTR** solvers, gradient evaluations count $\#\nabla f$ equals the number of Jacobian–vector and adjoint-Jacobian–vector products. All methods successfully reduced the optimality measure below the specified tolerance of $10^{-4}$, and thus converged to an approximate first-order stationary point. -Note that, the final objective values differ due to the nonconvexity of the problems. +Note that the final objective values differ due to the nonconvexity of the problems. -- **SVM with $\ell^{1/2}$ penalty:** **R2N** is the fastest, requiring the fewest gradient evaluations compared to all the other solvers. +- **SVM with $\ell^{1/2}$ penalty:** **R2N** is the fastest, requiring the fewest gradient evaluations. However, it requires more proximal evaluations, but these are inexpensive. **LMTR** and **LM** require the fewest function evaluations, but incur many Jacobian–vector products, and are the slowest. Note that here, **LMTR** achieves the lowest objective value. From 485db85a8e1b3690aa4e5bd660409a123c9e6938 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Thu, 16 Oct 2025 13:42:03 -0400 Subject: [PATCH 08/24] paper: add conclusion back --- paper/paper.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/paper/paper.md b/paper/paper.md index d1738184..45afc5f1 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -152,6 +152,12 @@ However, it requires more proximal evaluations, but these are inexpensive. **LMTR** and **LM** require the fewest function evaluations, but incur many Jacobian–vector products, and are the slowest. Note that here, **LMTR** achieves the lowest objective value. +# Conclusion + +The experiments show the applicability of the solvers implemented in [RegularizedOptimization.jl](https://github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl). + +Ongoing research aims to reduce the number of proximal evaluations. + # Acknowledgements The authors would like to thank Alberto De Marchi for his implementation of the Augmented Lagrangian solver. From 14753babac8d3aa31e8e8483f802d6caff8ae010 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Thu, 16 Oct 2025 13:43:11 -0400 Subject: [PATCH 09/24] paper: remove non-monotone --- paper/paper.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/paper/paper.md b/paper/paper.md index 45afc5f1..beb1f655 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -52,8 +52,7 @@ If second derivatives are not available or too costly to compute, quasi-Newton a In addition, the proximal mapping of the nonsmooth part $h$, or adequate models thereof, must be evaluated. At each iteration, a step is computed by solving a subproblem of the form \eqref{eq:nlp} inexactly, in which $f$, $h$, and $c$ are replaced with appropriate models about the current iterate. The solvers R2, R2DH and TRDH are particularly well suited to solve the subproblems, though they are general enough to solve \eqref{eq:nlp}. -All solvers have a non-monotone mode that enhances performance in practice on certain problems [@leconte-orban-2023;@diouane-habiboullah-orban-2024]. -All are implemented in an in-place fashion, so that re-solves incur no allocations. +All solvers are implemented in an in-place fashion, so that re-solves incur no allocations. To illustrate our claim of extensibility, a first version of the AL solver was implemented and submitted by an external contributor. From ef6870c3f394a8cc6ed39f1e213fe5f5536c2887 Mon Sep 17 00:00:00 2001 From: Maxence Gollier <134112149+MaxenceGollier@users.noreply.github.com> Date: Fri, 17 Oct 2025 09:55:18 -0400 Subject: [PATCH 10/24] paper: Apply suggestions from dpo review Co-authored-by: Dominique --- paper/paper.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/paper/paper.md b/paper/paper.md index beb1f655..30691bd4 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -52,7 +52,7 @@ If second derivatives are not available or too costly to compute, quasi-Newton a In addition, the proximal mapping of the nonsmooth part $h$, or adequate models thereof, must be evaluated. At each iteration, a step is computed by solving a subproblem of the form \eqref{eq:nlp} inexactly, in which $f$, $h$, and $c$ are replaced with appropriate models about the current iterate. The solvers R2, R2DH and TRDH are particularly well suited to solve the subproblems, though they are general enough to solve \eqref{eq:nlp}. -All solvers are implemented in an in-place fashion, so that re-solves incur no allocations. +All solvers are implemented in place, so re-solves incur no allocations. To illustrate our claim of extensibility, a first version of the AL solver was implemented and submitted by an external contributor. @@ -136,7 +136,7 @@ solve!(solver, reg_nlp, stats; atol=1e-5, rtol=1e-5, verbose=1, sub_kwargs=(max_ We compare **TR**, **R2N**, **LM** and **LMTR** from this library. -We report the following solver statistics in the table: the convergence status of each solver, the number of evaluations of $f$, the number of evaluations of $\nabla f$, the number of proximal operator evaluations, the elapsed time in seconds and the final objective value. +The table reports the convergence status of each solver, the number of evaluations of $f$, the number of evaluations of $\nabla f$, the number of proximal operator evaluations, the elapsed time in seconds and the final objective value. The subproblem solver is **R2**. \input{examples/Benchmark.tex} @@ -151,9 +151,7 @@ However, it requires more proximal evaluations, but these are inexpensive. **LMTR** and **LM** require the fewest function evaluations, but incur many Jacobian–vector products, and are the slowest. Note that here, **LMTR** achieves the lowest objective value. -# Conclusion -The experiments show the applicability of the solvers implemented in [RegularizedOptimization.jl](https://github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl). Ongoing research aims to reduce the number of proximal evaluations. From 6dd26ba8852a0fd93913cc4c773fd6a8fbda595d Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Fri, 17 Oct 2025 09:59:22 -0400 Subject: [PATCH 11/24] paper: apply other suggestions from dpo review --- paper/paper.md | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/paper/paper.md b/paper/paper.md index 30691bd4..506d6695 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -134,9 +134,10 @@ solve!(solver, reg_nlp, stats; atol=1e-5, rtol=1e-5, verbose=1, sub_kwargs=(max_ ## Numerical results -We compare **TR**, **R2N**, **LM** and **LMTR** from this library. +We compare **TR**, **R2N**, **LM** and **LMTR** from our library. The table reports the convergence status of each solver, the number of evaluations of $f$, the number of evaluations of $\nabla f$, the number of proximal operator evaluations, the elapsed time in seconds and the final objective value. +On the SVM and NNMF problems, we use limited-memory SR1 and BFGS Hessian approximations, respectively. The subproblem solver is **R2**. \input{examples/Benchmark.tex} @@ -144,14 +145,11 @@ The subproblem solver is **R2**. - Note that for the **LM** and **LMTR** solvers, gradient evaluations count $\#\nabla f$ equals the number of Jacobian–vector and adjoint-Jacobian–vector products. All methods successfully reduced the optimality measure below the specified tolerance of $10^{-4}$, and thus converged to an approximate first-order stationary point. -Note that the final objective values differ due to the nonconvexity of the problems. +Note that the final objective values differ due to the nonconvexity of the problem. -- **SVM with $\ell^{1/2}$ penalty:** **R2N** is the fastest, requiring the fewest gradient evaluations. +**R2N** is the fastest, requiring the fewest gradient evaluations. However, it requires more proximal evaluations, but these are inexpensive. **LMTR** and **LM** require the fewest function evaluations, but incur many Jacobian–vector products, and are the slowest. -Note that here, **LMTR** achieves the lowest objective value. - - Ongoing research aims to reduce the number of proximal evaluations. From 39bde6dc487f1dee0f5de46a3c1f0cb094fa1c57 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Fri, 17 Oct 2025 10:03:22 -0400 Subject: [PATCH 12/24] paper: add workflow to count number of words --- .github/workflows/wordcount.yml | 34 +++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 .github/workflows/wordcount.yml diff --git a/.github/workflows/wordcount.yml b/.github/workflows/wordcount.yml new file mode 100644 index 00000000..b17c01e1 --- /dev/null +++ b/.github/workflows/wordcount.yml @@ -0,0 +1,34 @@ +name: Word Count + +on: + pull_request: + types: [opened, synchronize, reopened] + +jobs: + count-words: + runs-on: ubuntu-latest + + steps: + - name: Checkout repo + uses: actions/checkout@v4 + + - name: Install Pandoc + run: sudo apt-get update && sudo apt-get install -y pandoc + + - name: Count words in paper.md + id: wordcount + run: | + COUNT=$(pandoc paper.md -t plain | wc -w) + echo "count=$COUNT" >> $GITHUB_OUTPUT + + - name: Comment on PR with word count + if: github.event_name == 'pull_request' + uses: marocchino/sticky-pull-request-comment@v2 + with: + header: joss-wordcount + message: | + 📝 **JOSS Word Count** + + The current word count for `paper.md` is **${{ steps.wordcount.outputs.count }}** words. + + _(JOSS recommends 250-1000 words for the main text.)_ \ No newline at end of file From df6364c0a86f54e246a698746c1d21dec84acb5b Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Fri, 17 Oct 2025 10:13:12 -0400 Subject: [PATCH 13/24] fix wordcount workflow --- .github/workflows/wordcount.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/wordcount.yml b/.github/workflows/wordcount.yml index b17c01e1..5d8af740 100644 --- a/.github/workflows/wordcount.yml +++ b/.github/workflows/wordcount.yml @@ -4,6 +4,11 @@ on: pull_request: types: [opened, synchronize, reopened] +permissions: + contents: read + pull-requests: write + issues: write + jobs: count-words: runs-on: ubuntu-latest @@ -18,7 +23,7 @@ jobs: - name: Count words in paper.md id: wordcount run: | - COUNT=$(pandoc paper.md -t plain | wc -w) + COUNT=$(pandoc paper/paper.md -t plain | wc -w) echo "count=$COUNT" >> $GITHUB_OUTPUT - name: Comment on PR with word count From 25fac28a09d837d2fbf8aaf8d17a194f58e28916 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Fri, 17 Oct 2025 10:17:49 -0400 Subject: [PATCH 14/24] comment wordcount only if from same repo --- .github/workflows/wordcount.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/wordcount.yml b/.github/workflows/wordcount.yml index 5d8af740..5c7be5d7 100644 --- a/.github/workflows/wordcount.yml +++ b/.github/workflows/wordcount.yml @@ -25,9 +25,13 @@ jobs: run: | COUNT=$(pandoc paper/paper.md -t plain | wc -w) echo "count=$COUNT" >> $GITHUB_OUTPUT + echo "📝 JOSS Word Count: $COUNT words" + if [ "$COUNT" -gt 1000 ]; then + echo "::warning title=JOSS Word Count::Paper exceeds 1000 words ($COUNT)." + fi - name: Comment on PR with word count - if: github.event_name == 'pull_request' + if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository uses: marocchino/sticky-pull-request-comment@v2 with: header: joss-wordcount From ca5113c354d4a994fd6b0e2003c9afbf5c17bee6 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Fri, 17 Oct 2025 13:30:53 -0400 Subject: [PATCH 15/24] paper: apply suggestions from D1Lab --- paper/examples/Benchmark.tex | 8 ++++---- paper/paper.md | 14 +++++++------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/paper/examples/Benchmark.tex b/paper/examples/Benchmark.tex index c6d883dd..7f423056 100644 --- a/paper/examples/Benchmark.tex +++ b/paper/examples/Benchmark.tex @@ -1,8 +1,8 @@ \begin{tabular}{lcrrrrr} \hline \textbf{Method} & \textbf{Status} & \textbf{$t$($s$)} & \textbf{$\#f$} & \textbf{$\#\nabla f$} & \textbf{$\#prox$} & \textbf{Objective} \\\hline - TR (LSR1, SVM) & first\_order & 3.9349 & 347 & 291 & 4037 & 179.837 \\ - R2N (LSR1, SVM) & first\_order & 1.9511 & 185 & 101 & 27932 & 192.493 \\ - LM (SVM) & first\_order & 19.7826 & 6 & 2876 & 1001 & 201.186 \\ - LMTR (SVM) & first\_order & 12.4967 & 11 & 1614 & 432 & 188.274 \\ + TR (LSR1) & first\_order & 3.9349 & 347 & 291 & 4037 & 179.837 \\ + R2N (LSR1) & first\_order & 1.9511 & 185 & 101 & 27932 & 192.493 \\ + LM & first\_order & 19.7826 & 6 & 2876 & 1001 & 201.186 \\ + LMTR & first\_order & 12.4967 & 11 & 1614 & 432 & 188.274 \\ \end{tabular} diff --git a/paper/paper.md b/paper/paper.md index 506d6695..e08c288e 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -44,7 +44,7 @@ Currently, the following solvers are implemented: - **Trust-region solvers TR and TRDH** [@aravkin-baraldi-orban-2022;@leconte-orban-2023] - **Quadratic regularization solvers R2, R2DH and R2N** [@diouane-habiboullah-orban-2024;@aravkin-baraldi-orban-2022] -- **Levenberg-Marquardt solvers LM and LMTR** [@aravkin-baraldi-orban-2024] used when $f$ is a least-squares residual +- **Levenberg-Marquardt solvers LM and LMTR** [@aravkin-baraldi-orban-2024]. - **Augmented Lagrangian solver AL** [@demarchi-jia-kanzow-mehlitz-2023]. All solvers rely on first derivatives of $f$ and $c$, and optionally on their second derivatives in the form of Hessian-vector products. @@ -134,15 +134,15 @@ solve!(solver, reg_nlp, stats; atol=1e-5, rtol=1e-5, verbose=1, sub_kwargs=(max_ ## Numerical results -We compare **TR**, **R2N**, **LM** and **LMTR** from our library. +We compare **TR**, **R2N**, **LM** and **LMTR** from our library on the SVM problem. -The table reports the convergence status of each solver, the number of evaluations of $f$, the number of evaluations of $\nabla f$, the number of proximal operator evaluations, the elapsed time in seconds and the final objective value. +The table reports the convergence status of each solver, the number of evaluations of $f$, the number of evaluations of $\nabla f$, the number of proximal operator evaluations, the elapsed time and the final objective value. On the SVM and NNMF problems, we use limited-memory SR1 and BFGS Hessian approximations, respectively. The subproblem solver is **R2**. \input{examples/Benchmark.tex} -- Note that for the **LM** and **LMTR** solvers, gradient evaluations count $\#\nabla f$ equals the number of Jacobian–vector and adjoint-Jacobian–vector products. +Note that for the **LM** and **LMTR** solvers, gradient evaluations count $\#\nabla f$ equals the number of Jacobian–vector and adjoint-Jacobian–vector products. All methods successfully reduced the optimality measure below the specified tolerance of $10^{-4}$, and thus converged to an approximate first-order stationary point. Note that the final objective values differ due to the nonconvexity of the problem. @@ -155,8 +155,8 @@ Ongoing research aims to reduce the number of proximal evaluations. # Acknowledgements -The authors would like to thank Alberto De Marchi for his implementation of the Augmented Lagrangian solver. -Mohamed Laghdaf Habiboullah is supported by an excellence FRQNT grant. -Youssef Diouane, Maxence Gollier and Dominique Orban are partially supported by an NSERC Discovery Grant. +The authors would like to thank A. De Marchi for the Augmented Lagrangian solver. +M. L. Habiboullah is supported by an excellence FRQNT grant. +Y. Diouane, M. Gollier and D. Orban are partially supported by an NSERC Discovery Grant. # References From 17f213036347b9bbdfa9e0ce5b4c1e0d3fdc1003 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Fri, 17 Oct 2025 13:58:01 -0400 Subject: [PATCH 16/24] paper: remove more words --- paper/paper.md | 7 ------- 1 file changed, 7 deletions(-) diff --git a/paper/paper.md b/paper/paper.md index e08c288e..08bb256d 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -76,7 +76,6 @@ RegularizedOptimization.jl provides a consistent API to formulate optimization p It integrates seamlessly with the [JuliaSmoothOptimizers](https://github.com/JuliaSmoothOptimizers) [@jso] ecosystem, an academic organization for nonlinear optimization software development, testing, and benchmarking. The smooth objective $f$ can be defined via [NLPModels.jl](https://github.com/JuliaSmoothOptimizers/NLPModels.jl) [@orban-siqueira-nlpmodels-2020], which provides a standardized Julia API for representing nonlinear programming (NLP) problems. -Large collections of such problems are available in [CUTEst.jl](https://github.com/JuliaSmoothOptimizers/CUTEst.jl) [@orban-siqueira-cutest-2020] and [OptimizationProblems.jl](https://github.com/JuliaSmoothOptimizers/OptimizationProblems.jl) [@migot-orban-siqueira-optimizationproblems-2023], but a user can easily interface or model their own smooth objective. The nonsmooth term $h$ can be modeled using [ProximalOperators.jl](https://github.com/JuliaSmoothOptimizers/ProximalOperators.jl), which provides a broad collection of regularizers and indicators of simple sets. @@ -102,12 +101,6 @@ Hessian–vector products $v \mapsto Hv$ can be obtained via automatic different Limited-memory and diagonal quasi-Newton approximations can be selected from [LinearOperators.jl](https://github.com/JuliaSmoothOptimizers/LinearOperators.jl). This design allows solvers to exploit second-order information without explicitly forming dense or sparse Hessians, which is often expensive in time and memory, particularly at large scale. -## Testing and documentation - -The package includes a comprehensive suite of unit tests that cover all functionalities, ensuring reliability and correctness. -Extensive documentation is provided, including a user guide, API reference, and examples to help users get started quickly. -Documentation is built using Documenter.jl. - # Example We illustrate the capabilities of [RegularizedOptimization.jl](https://github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl) on a Support Vector Machine (SVM) model with a $\ell_{1/2}^{1/2}$ penalty for image classification [@aravkin-baraldi-orban-2024]. From fffd47cd3903b127701bc097cfba96876e494436 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Fri, 17 Oct 2025 14:35:48 -0400 Subject: [PATCH 17/24] paper: remove more words based on the suggestions of D1Lab --- paper/paper.md | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/paper/paper.md b/paper/paper.md index 08bb256d..472ff5cd 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -48,12 +48,13 @@ Currently, the following solvers are implemented: - **Augmented Lagrangian solver AL** [@demarchi-jia-kanzow-mehlitz-2023]. All solvers rely on first derivatives of $f$ and $c$, and optionally on their second derivatives in the form of Hessian-vector products. -If second derivatives are not available or too costly to compute, quasi-Newton approximations can be used. -In addition, the proximal mapping of the nonsmooth part $h$, or adequate models thereof, must be evaluated. +If second derivatives are not available, quasi-Newton approximations can be used. +The proximal mapping of the nonsmooth part $h$, or adequate models thereof, must be evaluated. At each iteration, a step is computed by solving a subproblem of the form \eqref{eq:nlp} inexactly, in which $f$, $h$, and $c$ are replaced with appropriate models about the current iterate. -The solvers R2, R2DH and TRDH are particularly well suited to solve the subproblems, though they are general enough to solve \eqref{eq:nlp}. +The solvers R2, R2DH and TRDH are particularly well suited to solve the subproblems, though they are general enough to solve~\eqref{eq:nlp}. All solvers are implemented in place, so re-solves incur no allocations. -To illustrate our claim of extensibility, a first version of the AL solver was implemented and submitted by an external contributor. +To illustrate our claim of extensibility, a first version of the AL solver was implemented by an external contributor. +Furthermore, a nonsmooth penalty approach, described in [@diouane-gollier-orban-2024] is currently being developed, relying on the library’s solvers to efficiently solve its subproblems. @@ -67,32 +68,32 @@ To illustrate our claim of extensibility, a first version of the AL solver was i ## Model-based framework for nonsmooth methods In Julia, \eqref{eq:nlp} can be solved using [ProximalAlgorithms.jl](https://github.com/JuliaFirstOrder/ProximalAlgorithms.jl), which implements splitting schemes and line-search–based methods [@stella-themelis-sopasakis-patrinos-2017;@themelis-stella-patrinos-2017]. -Among others, the **PANOC** [@stella-themelis-sopasakis-patrinos-2017] solver takes a step along a direction $d$, which depends on the gradient of $f$ modified by a L-BFGS Quasi-Newton approximation, followed by proximal steps on the nonsmooth part $h$. +Among others, the **PANOC** [@stella-themelis-sopasakis-patrinos-2017] solver takes a step along a direction $d$, which depends on the gradient of $f$ modified by a L-BFGS Quasi-Newton approximation, followed by proximal steps on $h$. By contrast, [RegularizedOptimization.jl](https://github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl) focuses on model-based trust-region and quadratic regularization methods, which typically require fewer evaluations of $f$ and its gradient than first-order line search methods, at the expense of more evaluations of proximal operators [@aravkin-baraldi-orban-2022]. -However, each proximal computation is inexpensive for numerous commonly used choices of $h$, such as separable penalties and bound constraints (see examples below), so that the overall approach is efficient for large-scale problems. +However, each proximal computation is inexpensive for numerous commonly used choices of $h$, such as separable penalties and bound constraints, so that the overall approach is efficient for large-scale problems. -RegularizedOptimization.jl provides a consistent API to formulate optimization problems and apply different solvers. -It integrates seamlessly with the [JuliaSmoothOptimizers](https://github.com/JuliaSmoothOptimizers) [@jso] ecosystem, an academic organization for nonlinear optimization software development, testing, and benchmarking. +RegularizedOptimization.jl provides an API to formulate optimization problems and apply different solvers. +It integrates seamlessly with the [JuliaSmoothOptimizers](https://github.com/JuliaSmoothOptimizers) [@jso] ecosystem. The smooth objective $f$ can be defined via [NLPModels.jl](https://github.com/JuliaSmoothOptimizers/NLPModels.jl) [@orban-siqueira-nlpmodels-2020], which provides a standardized Julia API for representing nonlinear programming (NLP) problems. The nonsmooth term $h$ can be modeled using [ProximalOperators.jl](https://github.com/JuliaSmoothOptimizers/ProximalOperators.jl), which provides a broad collection of regularizers and indicators of simple sets. -With $f$ and $h$ modeled as discussed above, the companion package [RegularizedProblems.jl](https://github.com/JuliaSmoothOptimizers/RegularizedProblems.jl) provides a straightforward way to pair them into a *Regularized Nonlinear Programming Model* +With $f$ and $h$ modeled, the companion package [RegularizedProblems.jl](https://github.com/JuliaSmoothOptimizers/RegularizedProblems.jl) provides a way to pair them into a *Regularized Nonlinear Programming Model* ```julia reg_nlp = RegularizedNLPModel(f, h) ``` -They can also be paired into a *Regularized Nonlinear Least Squares Model* if $f(x) = \tfrac{1}{2} \|F(x)\|^2$ for some residual $F: \mathbb{R}^n \to \mathbb{R}^m$, as would be the case with the **LM** and **LMTR** solvers. +They can also be paired into a *Regularized Nonlinear Least Squares Model* if $f(x) = \tfrac{1}{2} \|F(x)\|^2$ for some residual $F: \mathbb{R}^n \to \mathbb{R}^m$, in the case of the **LM** and **LMTR** solvers. ```julia reg_nls = RegularizedNLSModel(f, h) ``` -RegularizedProblems.jl also provides a set of instances commonly used in data science and in the nonsmooth optimization literature, where several choices of $f$ can be paired with various nonsmooth terms $h$. -This design makes for a convenient source of reproducible problem instances for testing and benchmarking the solvers in [RegularizedOptimization.jl](https://www.github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl). +RegularizedProblems.jl also provides a set of instances commonly used in data science and in the nonsmooth optimization, where several choices of $f$ can be paired with various nonsmooth terms $h$. +This design makes for a convenient source of reproducible problem instances for benchmarking the solvers in [RegularizedOptimization.jl](https://www.github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl). ## Support for both exact and approximate Hessian @@ -104,7 +105,6 @@ This design allows solvers to exploit second-order information without explicitl # Example We illustrate the capabilities of [RegularizedOptimization.jl](https://github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl) on a Support Vector Machine (SVM) model with a $\ell_{1/2}^{1/2}$ penalty for image classification [@aravkin-baraldi-orban-2024]. -This problem is nonsmooth and nonconvex. Below is a condensed example showing how to define and solve an SVM problem, and perform a solve followed by a re-solve: From e176915565f5482d14a8aa27f1680703d79eed11 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Fri, 17 Oct 2025 15:01:20 -0400 Subject: [PATCH 18/24] paper: remove more words, remove tilde --- paper/paper.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paper/paper.md b/paper/paper.md index 472ff5cd..0d7a0563 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -51,7 +51,7 @@ All solvers rely on first derivatives of $f$ and $c$, and optionally on their se If second derivatives are not available, quasi-Newton approximations can be used. The proximal mapping of the nonsmooth part $h$, or adequate models thereof, must be evaluated. At each iteration, a step is computed by solving a subproblem of the form \eqref{eq:nlp} inexactly, in which $f$, $h$, and $c$ are replaced with appropriate models about the current iterate. -The solvers R2, R2DH and TRDH are particularly well suited to solve the subproblems, though they are general enough to solve~\eqref{eq:nlp}. +The solvers R2, R2DH and TRDH are particularly well suited to solve the subproblems, though they are general enough to solve \eqref{eq:nlp}. All solvers are implemented in place, so re-solves incur no allocations. To illustrate our claim of extensibility, a first version of the AL solver was implemented by an external contributor. Furthermore, a nonsmooth penalty approach, described in [@diouane-gollier-orban-2024] is currently being developed, relying on the library’s solvers to efficiently solve its subproblems. @@ -68,7 +68,7 @@ Furthermore, a nonsmooth penalty approach, described in [@diouane-gollier-orban- ## Model-based framework for nonsmooth methods In Julia, \eqref{eq:nlp} can be solved using [ProximalAlgorithms.jl](https://github.com/JuliaFirstOrder/ProximalAlgorithms.jl), which implements splitting schemes and line-search–based methods [@stella-themelis-sopasakis-patrinos-2017;@themelis-stella-patrinos-2017]. -Among others, the **PANOC** [@stella-themelis-sopasakis-patrinos-2017] solver takes a step along a direction $d$, which depends on the gradient of $f$ modified by a L-BFGS Quasi-Newton approximation, followed by proximal steps on $h$. +Among others, the **PANOC** [@stella-themelis-sopasakis-patrinos-2017] solver takes a step along a direction $d$, which depends on the L-BFGS Quasi-Newton approximation of $f$, followed by proximal steps on $h$. By contrast, [RegularizedOptimization.jl](https://github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl) focuses on model-based trust-region and quadratic regularization methods, which typically require fewer evaluations of $f$ and its gradient than first-order line search methods, at the expense of more evaluations of proximal operators [@aravkin-baraldi-orban-2022]. However, each proximal computation is inexpensive for numerous commonly used choices of $h$, such as separable penalties and bound constraints, so that the overall approach is efficient for large-scale problems. From d9ff1285b35f853b500960422dcba03be8a6f825 Mon Sep 17 00:00:00 2001 From: Maxence Gollier <134112149+MaxenceGollier@users.noreply.github.com> Date: Fri, 17 Oct 2025 15:34:39 -0400 Subject: [PATCH 19/24] Apply suggestions from code review Co-authored-by: Dominique --- paper/paper.md | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/paper/paper.md b/paper/paper.md index 0d7a0563..752d9c44 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -77,10 +77,9 @@ RegularizedOptimization.jl provides an API to formulate optimization problems an It integrates seamlessly with the [JuliaSmoothOptimizers](https://github.com/JuliaSmoothOptimizers) [@jso] ecosystem. The smooth objective $f$ can be defined via [NLPModels.jl](https://github.com/JuliaSmoothOptimizers/NLPModels.jl) [@orban-siqueira-nlpmodels-2020], which provides a standardized Julia API for representing nonlinear programming (NLP) problems. +The nonsmooth term $h$ can be modeled using [ProximalOperators.jl](https://github.com/JuliaSmoothOptimizers/ProximalOperators.jl). -The nonsmooth term $h$ can be modeled using [ProximalOperators.jl](https://github.com/JuliaSmoothOptimizers/ProximalOperators.jl), which provides a broad collection of regularizers and indicators of simple sets. - -With $f$ and $h$ modeled, the companion package [RegularizedProblems.jl](https://github.com/JuliaSmoothOptimizers/RegularizedProblems.jl) provides a way to pair them into a *Regularized Nonlinear Programming Model* +Given $f$ and $h$, the companion package [RegularizedProblems.jl](https://github.com/JuliaSmoothOptimizers/RegularizedProblems.jl) provides a way to pair them into a *Regularized Nonlinear Programming Model* ```julia reg_nlp = RegularizedNLPModel(f, h) @@ -92,7 +91,7 @@ They can also be paired into a *Regularized Nonlinear Least Squares Model* if $f reg_nls = RegularizedNLSModel(f, h) ``` -RegularizedProblems.jl also provides a set of instances commonly used in data science and in the nonsmooth optimization, where several choices of $f$ can be paired with various nonsmooth terms $h$. +RegularizedProblems.jl also provides a set of instances commonly used in data science and in nonsmooth optimization, where several choices of $f$ can be paired with various regularizers. This design makes for a convenient source of reproducible problem instances for benchmarking the solvers in [RegularizedOptimization.jl](https://www.github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl). ## Support for both exact and approximate Hessian @@ -130,12 +129,12 @@ solve!(solver, reg_nlp, stats; atol=1e-5, rtol=1e-5, verbose=1, sub_kwargs=(max_ We compare **TR**, **R2N**, **LM** and **LMTR** from our library on the SVM problem. The table reports the convergence status of each solver, the number of evaluations of $f$, the number of evaluations of $\nabla f$, the number of proximal operator evaluations, the elapsed time and the final objective value. -On the SVM and NNMF problems, we use limited-memory SR1 and BFGS Hessian approximations, respectively. +We use limited-memory SR1 Hessian approximations. The subproblem solver is **R2**. \input{examples/Benchmark.tex} -Note that for the **LM** and **LMTR** solvers, gradient evaluations count $\#\nabla f$ equals the number of Jacobian–vector and adjoint-Jacobian–vector products. +For the **LM** and **LMTR** solvers, $\#\nabla f$ counts the number of Jacobian–vector and adjoint-Jacobian–vector products. All methods successfully reduced the optimality measure below the specified tolerance of $10^{-4}$, and thus converged to an approximate first-order stationary point. Note that the final objective values differ due to the nonconvexity of the problem. From 5e6d1e7293a9ff48660217bea73cb839d972dbea Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Fri, 17 Oct 2025 16:12:09 -0400 Subject: [PATCH 20/24] apply suggestion from dpo --- paper/paper.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paper/paper.md b/paper/paper.md index 752d9c44..0d2489f0 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -49,7 +49,7 @@ Currently, the following solvers are implemented: All solvers rely on first derivatives of $f$ and $c$, and optionally on their second derivatives in the form of Hessian-vector products. If second derivatives are not available, quasi-Newton approximations can be used. -The proximal mapping of the nonsmooth part $h$, or adequate models thereof, must be evaluated. +In addition, the proximal mapping of the nonsmooth part $h$, or adequate models thereof, must be evaluated. At each iteration, a step is computed by solving a subproblem of the form \eqref{eq:nlp} inexactly, in which $f$, $h$, and $c$ are replaced with appropriate models about the current iterate. The solvers R2, R2DH and TRDH are particularly well suited to solve the subproblems, though they are general enough to solve \eqref{eq:nlp}. All solvers are implemented in place, so re-solves incur no allocations. From 94712ce259cc425d0f964dc0b1b396f0bef6193b Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Sun, 19 Oct 2025 15:59:59 -0400 Subject: [PATCH 21/24] remove LSR1 from benchmark table --- paper/examples/Benchmark.tex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paper/examples/Benchmark.tex b/paper/examples/Benchmark.tex index 7f423056..92e483af 100644 --- a/paper/examples/Benchmark.tex +++ b/paper/examples/Benchmark.tex @@ -1,8 +1,8 @@ \begin{tabular}{lcrrrrr} \hline \textbf{Method} & \textbf{Status} & \textbf{$t$($s$)} & \textbf{$\#f$} & \textbf{$\#\nabla f$} & \textbf{$\#prox$} & \textbf{Objective} \\\hline - TR (LSR1) & first\_order & 3.9349 & 347 & 291 & 4037 & 179.837 \\ - R2N (LSR1) & first\_order & 1.9511 & 185 & 101 & 27932 & 192.493 \\ + TR & first\_order & 3.9349 & 347 & 291 & 4037 & 179.837 \\ + R2N & first\_order & 1.9511 & 185 & 101 & 27932 & 192.493 \\ LM & first\_order & 19.7826 & 6 & 2876 & 1001 & 201.186 \\ LMTR & first\_order & 12.4967 & 11 & 1614 & 432 & 188.274 \\ \end{tabular} From c161c845671ee64ee33c0213edeb0855ed37880c Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Mon, 20 Oct 2025 12:44:35 -0400 Subject: [PATCH 22/24] numerical results: apply D1Lab comments --- paper/paper.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/paper/paper.md b/paper/paper.md index 0d2489f0..92f2130e 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -88,7 +88,7 @@ reg_nlp = RegularizedNLPModel(f, h) They can also be paired into a *Regularized Nonlinear Least Squares Model* if $f(x) = \tfrac{1}{2} \|F(x)\|^2$ for some residual $F: \mathbb{R}^n \to \mathbb{R}^m$, in the case of the **LM** and **LMTR** solvers. ```julia -reg_nls = RegularizedNLSModel(f, h) +reg_nls = RegularizedNLSModel(F, h) ``` RegularizedProblems.jl also provides a set of instances commonly used in data science and in nonsmooth optimization, where several choices of $f$ can be paired with various regularizers. @@ -116,7 +116,7 @@ Random.seed!(1234) model, nls_model, _ = RegularizedProblems.svm_train_model() # Build SVM model f = LSR1Model(model) # L-SR1 Hessian approximation λ = 1.0 # Regularization parameter -h = RootNormLhalf(λ) # Nonsmooth term +h = RootNormLhalf(λ) # Nonsmooth term reg_nlp = RegularizedNLPModel(f, h) # Regularized problem solver = R2NSolver(reg_nlp) # Choose solver stats = RegularizedExecutionStats(reg_nlp) @@ -129,7 +129,7 @@ solve!(solver, reg_nlp, stats; atol=1e-5, rtol=1e-5, verbose=1, sub_kwargs=(max_ We compare **TR**, **R2N**, **LM** and **LMTR** from our library on the SVM problem. The table reports the convergence status of each solver, the number of evaluations of $f$, the number of evaluations of $\nabla f$, the number of proximal operator evaluations, the elapsed time and the final objective value. -We use limited-memory SR1 Hessian approximations. +For TR and R2N, we use limited-memory SR1 Hessian approximations. The subproblem solver is **R2**. \input{examples/Benchmark.tex} From 97e12c9fb03ec33ab36e0bd3c894a489ca1d9a28 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Mon, 20 Oct 2025 12:46:38 -0400 Subject: [PATCH 23/24] revert some changes in the paper --- paper/paper.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paper/paper.md b/paper/paper.md index 92f2130e..40e0ba7a 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -44,7 +44,7 @@ Currently, the following solvers are implemented: - **Trust-region solvers TR and TRDH** [@aravkin-baraldi-orban-2022;@leconte-orban-2023] - **Quadratic regularization solvers R2, R2DH and R2N** [@diouane-habiboullah-orban-2024;@aravkin-baraldi-orban-2022] -- **Levenberg-Marquardt solvers LM and LMTR** [@aravkin-baraldi-orban-2024]. +- **Levenberg-Marquardt solvers LM and LMTR** [@aravkin-baraldi-orban-2024] used when $f$ is a least-squares residual. - **Augmented Lagrangian solver AL** [@demarchi-jia-kanzow-mehlitz-2023]. All solvers rely on first derivatives of $f$ and $c$, and optionally on their second derivatives in the form of Hessian-vector products. From cf964284e6ace76a9f331aead39dcb2bcc59dc73 Mon Sep 17 00:00:00 2001 From: Maxence Gollier <134112149+MaxenceGollier@users.noreply.github.com> Date: Tue, 21 Oct 2025 13:33:19 -0400 Subject: [PATCH 24/24] Apply suggestions from code review Co-authored-by: Dominique --- paper/examples/Benchmark.tex | 5 +++-- paper/paper.md | 16 ++++++++-------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/paper/examples/Benchmark.tex b/paper/examples/Benchmark.tex index 92e483af..1c10c244 100644 --- a/paper/examples/Benchmark.tex +++ b/paper/examples/Benchmark.tex @@ -1,8 +1,9 @@ -\begin{tabular}{lcrrrrr} +\begin{tabular}{llrrrrr} \hline - \textbf{Method} & \textbf{Status} & \textbf{$t$($s$)} & \textbf{$\#f$} & \textbf{$\#\nabla f$} & \textbf{$\#prox$} & \textbf{Objective} \\\hline + Method & Status & $t$($s$) & $\#f$ & $\#\nabla f$ & $\#prox$ & Objective \\\hline TR & first\_order & 3.9349 & 347 & 291 & 4037 & 179.837 \\ R2N & first\_order & 1.9511 & 185 & 101 & 27932 & 192.493 \\ LM & first\_order & 19.7826 & 6 & 2876 & 1001 & 201.186 \\ LMTR & first\_order & 12.4967 & 11 & 1614 & 432 & 188.274 \\ + \hline \end{tabular} diff --git a/paper/paper.md b/paper/paper.md index 40e0ba7a..f625dcea 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -37,7 +37,7 @@ header-includes: | \underset{x \in \mathbb{R}^n}{\text{minimize}} \quad f(x) + h(x) \quad \text{subject to} \quad c(x) = 0, \end{equation} where $f: \mathbb{R}^n \to \mathbb{R}$ and $c: \mathbb{R}^n \to \mathbb{R}^m$ are continuously differentiable, and $h: \mathbb{R}^n \to \mathbb{R} \cup \{+\infty\}$ is lower semi-continuous. -The nonsmooth objective $h$ can be a *regularizer* such as a sparsity-inducing penalty, model simple constraints such as $x$ belonging to a simple convex set, or be a combination of both. +The nonsmooth objective $h$ can be a *regularizer*, such as a sparsity-inducing penalty, model simple constraints, such as $x$ belonging to a simple convex set, or be a combination of both. All $f$, $h$ and $c$ can be nonconvex. RegularizedOptimization.jl provides a modular and extensible framework for solving \eqref{eq:nlp}, and developing novel solvers. Currently, the following solvers are implemented: @@ -54,7 +54,7 @@ At each iteration, a step is computed by solving a subproblem of the form \eqref The solvers R2, R2DH and TRDH are particularly well suited to solve the subproblems, though they are general enough to solve \eqref{eq:nlp}. All solvers are implemented in place, so re-solves incur no allocations. To illustrate our claim of extensibility, a first version of the AL solver was implemented by an external contributor. -Furthermore, a nonsmooth penalty approach, described in [@diouane-gollier-orban-2024] is currently being developed, relying on the library’s solvers to efficiently solve its subproblems. +Furthermore, a nonsmooth penalty approach, described in [@diouane-gollier-orban-2024] is currently being developed, that relies on the library to efficiently solve the subproblems. @@ -68,7 +68,7 @@ Furthermore, a nonsmooth penalty approach, described in [@diouane-gollier-orban- ## Model-based framework for nonsmooth methods In Julia, \eqref{eq:nlp} can be solved using [ProximalAlgorithms.jl](https://github.com/JuliaFirstOrder/ProximalAlgorithms.jl), which implements splitting schemes and line-search–based methods [@stella-themelis-sopasakis-patrinos-2017;@themelis-stella-patrinos-2017]. -Among others, the **PANOC** [@stella-themelis-sopasakis-patrinos-2017] solver takes a step along a direction $d$, which depends on the L-BFGS Quasi-Newton approximation of $f$, followed by proximal steps on $h$. +Among others, the **PANOC** [@stella-themelis-sopasakis-patrinos-2017] solver takes a step along a direction $d$, which depends on the L-BFGS quasi-Newton approximation of $f$, followed by proximal steps on $h$. By contrast, [RegularizedOptimization.jl](https://github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl) focuses on model-based trust-region and quadratic regularization methods, which typically require fewer evaluations of $f$ and its gradient than first-order line search methods, at the expense of more evaluations of proximal operators [@aravkin-baraldi-orban-2022]. However, each proximal computation is inexpensive for numerous commonly used choices of $h$, such as separable penalties and bound constraints, so that the overall approach is efficient for large-scale problems. @@ -85,14 +85,14 @@ Given $f$ and $h$, the companion package [RegularizedProblems.jl](https://github reg_nlp = RegularizedNLPModel(f, h) ``` -They can also be paired into a *Regularized Nonlinear Least Squares Model* if $f(x) = \tfrac{1}{2} \|F(x)\|^2$ for some residual $F: \mathbb{R}^n \to \mathbb{R}^m$, in the case of the **LM** and **LMTR** solvers. +They can also be paired into a *Regularized Nonlinear Least-Squares Model* if $f(x) = \tfrac{1}{2} \|F(x)\|^2$ for some residual $F: \mathbb{R}^n \to \mathbb{R}^m$, in the case of the **LM** and **LMTR** solvers. ```julia reg_nls = RegularizedNLSModel(F, h) ``` RegularizedProblems.jl also provides a set of instances commonly used in data science and in nonsmooth optimization, where several choices of $f$ can be paired with various regularizers. -This design makes for a convenient source of reproducible problem instances for benchmarking the solvers in [RegularizedOptimization.jl](https://www.github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl). +This design makes for a convenient source of problem instances for benchmarking the solvers in [RegularizedOptimization.jl](https://www.github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl). ## Support for both exact and approximate Hessian @@ -105,7 +105,7 @@ This design allows solvers to exploit second-order information without explicitl We illustrate the capabilities of [RegularizedOptimization.jl](https://github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl) on a Support Vector Machine (SVM) model with a $\ell_{1/2}^{1/2}$ penalty for image classification [@aravkin-baraldi-orban-2024]. -Below is a condensed example showing how to define and solve an SVM problem, and perform a solve followed by a re-solve: +Below is a condensed example showing how to define and solve the problem, and perform a solve followed by a re-solve: ```julia using LinearAlgebra, Random, ProximalOperators @@ -139,9 +139,9 @@ For the **LM** and **LMTR** solvers, $\#\nabla f$ counts the number of Jacobian All methods successfully reduced the optimality measure below the specified tolerance of $10^{-4}$, and thus converged to an approximate first-order stationary point. Note that the final objective values differ due to the nonconvexity of the problem. -**R2N** is the fastest, requiring the fewest gradient evaluations. +**R2N** is the fastest in terms of time and number of gradient evaluations. However, it requires more proximal evaluations, but these are inexpensive. -**LMTR** and **LM** require the fewest function evaluations, but incur many Jacobian–vector products, and are the slowest. +**LMTR** and **LM** require the fewest function evaluations, but incur many Jacobian–vector products, and are the slowest in terms of time. Ongoing research aims to reduce the number of proximal evaluations.