diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml new file mode 100644 index 0000000..e8ef2c5 --- /dev/null +++ b/.github/workflows/documentation.yml @@ -0,0 +1,40 @@ +name: Documentation + +on: + push: + branches: + - main + tags: + - '*' + pull_request: + +jobs: + build: + permissions: + actions: write + contents: write + pull-requests: read + statuses: write + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v6 + + - uses: julia-actions/setup-julia@v3 + with: + version: '1.12' + + - uses: julia-actions/cache@v3 + + - name: Install documentation dependencies + shell: julia --color=yes --project=docs {0} + run: | + using Pkg + Pkg.develop(PackageSpec(path = ".")) + Pkg.instantiate() + + - name: Build and deploy documentation + run: julia --color=yes --project=docs docs/make.jl + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} diff --git a/.gitignore b/.gitignore index 3f40be2..c324dcf 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,9 @@ *.log tutorials/.DS_Store .DS_Store +/Manifest*.toml +/docs/build/ +/docs/Manifest*.toml +/docs/src/generated_examples/ +/docs/src/examples.md +/docs/src/changelog.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..e39ffae --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,59 @@ +# Changelog + +All notable changes to `PEPit.jl` are documented in this file. The format is +based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and the +project aims to follow [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +## [0.1.2] - 2026-06-08 + +### Added +- Documenter.jl documentation scaffold with API reference pages, an executable + Quick Start, tutorials, contributing guide, release notes, and custom styling. +- GitHub Actions workflow for building and deploying the documentation site. +- Comprehensive source docstrings for the public API and key implementation + helpers across the core objects, function classes, operator classes, + primitive steps, and utilities. +- Mathematical docstrings for all `wc_*` example functions. +- Executable, expanded Quick Start covering worst-case instance recovery with + `evaluate`, the explicit dual certificate (`solve_dual!`, `eval_dual`, + `DualPEPCertificate`), and dimension-reduction heuristics (`tracetrick`, + `logdetiters`). +- Contributing guide with templates for adding function/operator classes, + primitive steps, and worked examples. +- Automatically generated Examples overview, category pages, and per-example + pages extracted from the corresponding `wc_*` function docstrings. +- This changelog and a "Release notes" page in the documentation. + +### Changed +- `evaluate` is now exported. It is the documented way to recover numerical + realizations of `Point`/`Expression` objects after solving. +- Standardized all documentation links and documentation deployment on + `github.com/PerformanceEstimation/PEPit.jl`. +- Tests now access internal counters and helper functions as `PEPit.` + after those symbols were removed from the public export list. + +### Fixed +- `evaluate` was documented and shown in the Quick Start but not exported, so + user code following the Quick Start raised `UndefVarError`. +- Broken display-math block in the `inexact_gradient_step!` docstring: the + conditional definition is now contained in a single `math` fence (previously + part of it rendered as raw LaTeX / a code block). +- De-Pythonized the `inexact_gradient_step!` docstring: a `# Throws` section now + documents the real `ErrorException`, with Julia string/list syntax. + +### Removed +- Internal symbols are no longer exported into the public namespace: the global + counters (`Point_counter`, `Expression_counter`, `Function_counter`, + `Global_Constraint_counter`, `PSDMatrix_counter`, `NEXT_ID`) and the internal + helpers (`_is_already_evaluated_on_point`, + `_separate_leaf_functions_regarding_their_need_on_point`, + `_get_nb_eigs_and_corrected`). They remain reachable as `PEPit.`. + +## [0.1.1] + +- Initial documented release: the core PEP workflow (`PEP`, `Point`, + `Expression`, `Constraint`, `PSDMatrix`), function and operator interpolation + classes, primitive steps, `solve!` / `solve_dual!`, and ~96 worked examples + validated against the Python `PEPit` package. diff --git a/Project.toml b/Project.toml index 8223ca0..0db6516 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "PEPit" uuid = "362dabbc-d565-4ef8-96fa-aefabe1c072a" -version = "0.1.1" +version = "0.1.2" authors = ["Shuvomoy Das Gupta and contributors"] [deps] @@ -33,3 +33,6 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] test = ["Test"] + +[workspace] +projects = ["docs"] diff --git a/README.md b/README.md index ac8acf6..a1f38d5 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,9 @@ # PEPit.jl +[![Documentation](https://img.shields.io/badge/Documentation-stable-purple.svg)](https://PerformanceEstimation.github.io/PEPit.jl/stable/) +[![License](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE) +[![Release](https://img.shields.io/badge/Release-v0.1.2-blue.svg)](https://github.com/PerformanceEstimation/PEPit.jl/releases/tag/v0.1.2) + `PEPit.jl` is a native Julia implementation of the Performance Estimation Programming (PEP) methodology [1,2,3] and the Python package `PEPit` [4] for worst-case analysis of first-order optimization algorithms. The core idea in PEP is to model the design and analysis of first-order optimization algorithms as higher-level optimization problems called performance estimation problems (PEPs), which are semidefinite programs (SDPs). We then solve these SDPs numerically to obtain tight worst-case bounds for known algorithms and also to discover new algorithms under suitable conditions. The intent of this Julia package is to be functionally equivalent to existing packages such as `PESTO` [5] and `PEPit` while providing a clean, Julia-native API along with a broader support of commercial and open-source solvers under the `JuMP` ecosystem [6]. @@ -139,7 +143,7 @@ After `solve_dual!`, `eval_dual(::Constraint)` and `eval_dual(::PSDMatrix)` are Examples are standard Julia scripts. For instance, you can run them as: ```julia -include("examples/unconstrained_convex_optimization/gradient_exact_line_search.jl") +include("examples/unconstrained_convex_minimization/gradient_exact_line_search.jl") ``` ## Notes and scope diff --git a/docs/Project.toml b/docs/Project.toml new file mode 100644 index 0000000..141c492 --- /dev/null +++ b/docs/Project.toml @@ -0,0 +1,8 @@ +[deps] +Clarabel = "61c947e1-3e6d-4ee4-985a-eec8c727bd6e" +Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" +PEPit = "362dabbc-d565-4ef8-96fa-aefabe1c072a" + +[compat] +Documenter = "1.17" diff --git a/docs/make.jl b/docs/make.jl new file mode 100644 index 0000000..1a3f153 --- /dev/null +++ b/docs/make.jl @@ -0,0 +1,158 @@ +using Documenter +using PEPit + +const IS_CI = get(ENV, "CI", "false") == "true" +const DOCS_ROOT = @__DIR__ +const PACKAGE_ROOT = normpath(joinpath(DOCS_ROOT, "..")) +const EXAMPLES_ROOT = joinpath(PACKAGE_ROOT, "examples") +const GENERATED_EXAMPLES_ROOT = joinpath(DOCS_ROOT, "src", "generated_examples") +const REPO_EXAMPLES_URL = "https://github.com/PerformanceEstimation/PEPit.jl/blob/main/examples" + +function _title_from_slug(slug::AbstractString) + return join(uppercasefirst.(split(replace(slug, "_" => " "))), " ") +end + +function _extract_example_doc(path::AbstractString) + text = read(path, String) + m = match(r"(?s)@doc\s+raw\"\"\"(.*?)\"\"\"\s*function\s+(wc_[A-Za-z0-9_!]+)\s*\(", text) + m === nothing && return nothing + doc = replace(m.captures[1], r"^\n+" => "") + doc = replace(doc, r"\s+$" => "") + return (name = m.captures[2], doc = doc) +end + +function _demote_markdown_headings(text::AbstractString; levels::Integer=1) + return replace(text, r"(?m)^#{1,4}\s+" => heading -> begin + first_non_hash = findfirst(!=('#'), heading) + level = first_non_hash === nothing ? length(heading) : first_non_hash - 1 + repeat("#", level + levels) * heading[(level + 1):end] + end) +end + +function _prepare_example_doc(text::AbstractString) + return _demote_markdown_headings(text; levels=1) +end + +function _generate_example_doc_pages!() + isdir(GENERATED_EXAMPLES_ROOT) && rm(GENERATED_EXAMPLES_ROOT; recursive=true) + mkpath(GENERATED_EXAMPLES_ROOT) + + repo_tree_base = replace(REPO_EXAMPLES_URL, "/blob/" => "/tree/") + + # The Examples overview page is generated from the same scan as the per-example + # pages, so every family is listed uniformly and links never drift. + overview = IOBuffer() + println(overview, "# Examples") + println(overview) + println(overview, "The Julia examples are ordinary scripts under ", + "[`examples/`]($(repo_tree_base)), grouped by family. Each page below is ", + "generated from the corresponding `wc_*` function docstring and includes the ", + "problem statement, the algorithm, the performance metric, the reference ", + "guarantee when available, the Julia arguments and return values, and a link ", + "to the source file.") + println(overview) + + pages = Pair{String,Any}[] + for family in sort(filter(isdir, readdir(EXAMPLES_ROOT; join=true))) + family_slug = basename(family) + family_title = _title_from_slug(family_slug) + entries = [] + + for path in sort(filter(p -> endswith(p, ".jl"), readdir(family; join=true))) + extracted = _extract_example_doc(path) + extracted === nothing && continue + example_slug = splitext(basename(path))[1] + example_title = _title_from_slug(example_slug) + rel_source = replace(relpath(path, EXAMPLES_ROOT), "\\" => "/") + push!(entries, (title=example_title, + slug=example_slug, + source_url="$(REPO_EXAMPLES_URL)/$(rel_source)", + doc=extracted.doc)) + end + + isempty(entries) && continue + + family_output_root = joinpath(GENERATED_EXAMPLES_ROOT, family_slug) + mkpath(family_output_root) + + family_pages = Pair{String,String}["Overview" => "generated_examples/$(family_slug)/index.md"] + open(joinpath(family_output_root, "index.md"), "w") do io + println(io, "# $(family_title)") + println(io) + println(io, "Examples in this category.") + println(io) + for entry in entries + println(io, "- [$(entry.title)]($(entry.slug).md)") + end + end + + println(overview, "## $(family_title)") + println(overview) + println(overview, "[Source directory]($(repo_tree_base)/$(family_slug))") + println(overview) + + for entry in entries + output_path = joinpath(family_output_root, "$(entry.slug).md") + open(output_path, "w") do io + println(io, "# $(entry.title)") + println(io) + println(io, "[Source file]($(entry.source_url))") + println(io) + println(io, _prepare_example_doc(entry.doc)) + println(io) + end + push!(family_pages, entry.title => "generated_examples/$(family_slug)/$(entry.slug).md") + println(overview, "- [$(entry.title)](generated_examples/$(family_slug)/$(entry.slug).md)") + end + println(overview) + + push!(pages, family_title => family_pages) + end + + write(joinpath(DOCS_ROOT, "src", "examples.md"), String(take!(overview))) + + return pages +end + +const EXAMPLE_DOC_PAGES = _generate_example_doc_pages!() + +# Mirror the root changelog into the docs as the "Release notes" page. +cp(joinpath(PACKAGE_ROOT, "CHANGELOG.md"), joinpath(DOCS_ROOT, "src", "changelog.md"); force=true) + +makedocs(; + modules = [PEPit], + sitename = "PEPit.jl", + authors = "PEPit.jl contributors", + remotes = nothing, + linkcheck = false, + checkdocs = :exports, + warnonly = [:missing_docs], + pagesonly = true, + meta = Dict(:CurrentModule => PEPit), + format = Documenter.HTML(; + prettyurls = IS_CI, + edit_link = "main", + repolink = "https://github.com/PerformanceEstimation/PEPit.jl", + assets = ["assets/pepit.css"], + ), + pages = [ + "Home" => "index.md", + "Quick start" => "quickstart.md", + "API reference" => [ + "Core workflow" => "api/core.md", + "Function classes" => "api/functions.md", + "Operator classes" => "api/operators.md", + "Primitive steps" => "api/steps.md", + "Utilities" => "api/utilities.md", + ], + "Examples" => vcat(["Overview" => "examples.md"], EXAMPLE_DOC_PAGES), + "Tutorials" => "tutorials.md", + "Contributing" => "contributing.md", + "Release notes" => "changelog.md", + ], +) + +deploydocs(; + repo = "github.com/PerformanceEstimation/PEPit.jl.git", + devbranch = "main", +) diff --git a/docs/src/api/core.md b/docs/src/api/core.md new file mode 100644 index 0000000..a378995 --- /dev/null +++ b/docs/src/api/core.md @@ -0,0 +1,49 @@ +# Core workflow + +```@meta +CurrentModule = PEPit +``` + +## Main objects + +```@docs +PEP +Point +Expression +Constraint +PSDMatrix +BlockPartition +PEPFunction +DualPEPCertificate +``` + +## Problem construction + +```@docs +declare_function! +declare_block_partition! +set_initial_point! +set_initial_condition! +set_performance_metric! +add_constraint! +add_psd_matrix! +``` + +## Oracles and fixed points + +```@docs +oracle! +gradient! +value! +stationary_point! +fixed_point! +``` + +## Solving and evaluation + +```@docs +solve! +solve_dual! +evaluate +eval_dual +``` diff --git a/docs/src/api/functions.md b/docs/src/api/functions.md new file mode 100644 index 0000000..8363d91 --- /dev/null +++ b/docs/src/api/functions.md @@ -0,0 +1,29 @@ +# Function classes + +```@meta +CurrentModule = PEPit +``` + +The following exported types model interpolation constraints for common +classes of scalar functions. Parameters are supplied through an +`OrderedDict`, matching the examples in `PEPit.jl/examples`. + +```@docs +AbstractFunction +ConvexFunction +ConvexLipschitzFunction +SmoothFunction +SmoothConvexFunction +SmoothStronglyConvexFunction +StronglyConvexFunction +ConvexIndicatorFunction +ConvexQGFunction +ConvexSupportFunction +RsiEbFunction +SmoothConvexLipschitzFunction +SmoothStronglyConvexQuadraticFunction +SmoothQuadraticLojasiewiczFunctionCheap +SmoothQuadraticLojasiewiczFunctionExpensive +BlockSmoothConvexFunctionCheap +BlockSmoothConvexFunctionExpensive +``` diff --git a/docs/src/api/operators.md b/docs/src/api/operators.md new file mode 100644 index 0000000..61c8400 --- /dev/null +++ b/docs/src/api/operators.md @@ -0,0 +1,25 @@ +# Operator classes + +```@meta +CurrentModule = PEPit +``` + +Operator classes use the same oracle infrastructure as function classes, but +their interpolation constraints describe set-valued or single-valued operators +such as monotone, cocoercive, Lipschitz, or nonexpansive maps. + +```@docs +LipschitzOperator +LinearOperator +NonexpansiveOperator +MonotoneOperator +StronglyMonotoneOperator +NegativelyComonotoneOperator +CocoerciveOperator +CocoerciveStronglyMonotoneOperatorCheap +CocoerciveStronglyMonotoneOperatorExpensive +LipschitzStronglyMonotoneOperatorCheap +LipschitzStronglyMonotoneOperatorExpensive +SymmetricLinearOperator +SkewSymmetricLinearOperator +``` diff --git a/docs/src/api/steps.md b/docs/src/api/steps.md new file mode 100644 index 0000000..1b86303 --- /dev/null +++ b/docs/src/api/steps.md @@ -0,0 +1,21 @@ +# Primitive steps + +```@meta +CurrentModule = PEPit +``` + +Primitive steps create symbolic points, gradients, function values, and +constraints for common algorithmic operations. They are intended to keep example +scripts close to the mathematical method being analyzed. + +```@docs +inexact_gradient_step! +bregman_gradient_step! +bregman_proximal_step! +epsilon_subgradient_step! +exact_linesearch_step! +inexact_proximal_step! +proximal_step! +linear_optimization_step! +shifted_optimization_step! +``` diff --git a/docs/src/api/utilities.md b/docs/src/api/utilities.md new file mode 100644 index 0000000..b083db6 --- /dev/null +++ b/docs/src/api/utilities.md @@ -0,0 +1,18 @@ +# Utilities + +```@meta +CurrentModule = PEPit +``` + +These helpers support the symbolic linear-combination representation used by +[`Point`](@ref) and [`Expression`](@ref). Most users interact with them +indirectly through overloaded arithmetic. + +```@docs +merge_dicts +multiply_dicts +prune_dict +get_block +get_nb_blocks +get_is_leaf +``` diff --git a/docs/src/assets/pepit.css b/docs/src/assets/pepit.css new file mode 100644 index 0000000..89fffe8 --- /dev/null +++ b/docs/src/assets/pepit.css @@ -0,0 +1,62 @@ +:root { + --pepit-blue: #24568f; + --pepit-ink: #172033; + --pepit-muted: #5f6b7a; + --pepit-border: #d8dee8; + --pepit-surface: #f7f9fc; +} + +body { + color: var(--pepit-ink); +} + +.docs-sidebar { + border-right: 1px solid var(--pepit-border); +} + +.docs-sidebar .docs-logo { + color: var(--pepit-blue); +} + +.content h1, +.content h2, +.content h3 { + color: var(--pepit-ink); + letter-spacing: 0; +} + +.content h1 { + border-bottom: 2px solid var(--pepit-border); + padding-bottom: 0.25rem; +} + +.content a { + color: var(--pepit-blue); +} + +.content blockquote, +.admonition { + border-radius: 6px; +} + +.content table { + font-size: 0.94rem; +} + +.content table th { + background: var(--pepit-surface); + color: var(--pepit-ink); +} + +.content code { + border-radius: 4px; +} + +.content .api-index { + columns: 2 18rem; +} + +.content .api-index li { + break-inside: avoid; + margin-bottom: 0.2rem; +} diff --git a/docs/src/contributing.md b/docs/src/contributing.md new file mode 100644 index 0000000..de69e3f --- /dev/null +++ b/docs/src/contributing.md @@ -0,0 +1,161 @@ +# Contributing + +`PEPit.jl` is research software. Contributions should preserve mathematical +correctness, reproducibility, and parity with the intended PEP formulation. The +most common contributions are new **function/operator classes**, new +**primitive steps** (black-box oracles), and new **worked examples**. + +## Local development + +Instantiate the package project and run the test suite (it uses the open-source +`Clarabel` solver, so no commercial license is required): + +```bash +julia --project=PEPit.jl -e 'using Pkg; Pkg.instantiate(); Pkg.test()' +``` + +Build the documentation: + +```bash +julia --project=PEPit.jl/docs -e 'using Pkg; Pkg.develop(PackageSpec(path="PEPit.jl")); Pkg.instantiate()' +julia --project=PEPit.jl/docs PEPit.jl/docs/make.jl +``` + +## General guidelines + +- Follow standard Julia conventions: `CamelCase` for types, a trailing `!` for + mutating functions, and parameters passed through an `OrderedDict`. +- Document every exported symbol with a docstring attached immediately above it. + Use `@doc raw"""..."""` whenever the docstring contains LaTeX, and write + display math inside a ` ```math ` fence (keep the whole expression — including + any `\left\{ ... \right.` — inside a single fence). +- Add a test for every new class, step, or example; keep examples deterministic + and solver-light. +- Update the documentation (the relevant `docs/src/api/*.md` page) and add a line + to `CHANGELOG.md` describing the change. + +## Adding a function or operator class + +Function classes live in `src/functions/`, operator classes in `src/operators/`. +A class is a small `struct` that wraps a [`PEPFunction`](@ref) and overrides +`add_class_constraints!` to add its interpolation (or operator) inequalities. +Using `ConvexFunction` (`src/functions/convex_function.jl`) as a template: + +```julia +@doc raw""" + MyFunction(param; reuse_gradient=false) + +One-line summary, then the interpolation conditions in a `math` fence, and a +clickable reference to the paper that introduces them. +""" +mutable struct MyFunction <: AbstractFunction + mu::Float64 # class parameters, if any + L::Float64 + _PEPit_func::PEPFunction # required: stores oracle calls and constraints + + function MyFunction(param=OrderedDict(); is_leaf=true, + decomposition_dict=nothing, reuse_gradient=false) + @assert is_leaf + func = PEPFunction(; is_leaf, decomposition_dict, reuse_gradient) + return new(param["mu"], param["L"], func) + end +end + +# Delegate the oracle/utility methods to the wrapped PEPFunction: +gradient!(f::MyFunction, p::Point) = gradient!(f._PEPit_func, p) +value!(f::MyFunction, p::Point) = value!(f._PEPit_func, p) +stationary_point!(f::MyFunction) = stationary_point!(f._PEPit_func) +add_constraint!(f::MyFunction, c::Constraint) = add_constraint!(f._PEPit_func, c) +_get_pep_func(f::MyFunction) = f._PEPit_func + +# The mathematical content: add one inequality per ordered pair of oracle points. +function add_class_constraints!(f::MyFunction) + pts = f._PEPit_func.list_of_points + for pi in pts, pj in pts + pi == pj && continue + xi, gi, fi = pi + xj, gj, fj = pj + add_constraint!(f, fi - fj >= gj * (xi - xj)) # replace with your class' inequality + end +end +``` + +Then: (1) `include` the file in `src/PEPit.jl`, (2) add the type to the `export` +list, and (3) list it in `docs/src/api/functions.md` (or `operators.md`). + +## Adding a primitive step + +Primitive steps live in `src/primitive_steps/`. A step is a `!`-suffixed function +that creates symbolic points/gradients/values, attaches the appropriate +constraints with `add_constraint!`, and returns the new symbolic objects. +Document it with `@doc raw"""..."""`, including the defining display-math relation +and a paper reference (see `src/primitive_steps/inexact_gradient_step.jl`). Export +it and list it in `docs/src/api/steps.md`. + +## Adding an example + +Examples live in `examples//`. Each file defines a single +`wc_(...)` function whose `@doc raw"""..."""` docstring is extracted +verbatim into the documentation, so it must sit immediately above the +`function wc_...` line. Follow this structure (see +`examples/unconstrained_convex_minimization/gradient_descent.jl`): + +```julia +@doc raw""" + wc_my_method(L, gamma, n; solver=Clarabel.Optimizer, verbose=true) + +## Problem statement +Define the function class and assumptions. + +## Algorithm +The iteration, in a `math` fence. + +## Performance metric and initial condition +The quantity being bounded and the normalization, e.g. +``f(x_n) - f_\star \le \tau(L, \gamma, n)\, \|x_0 - x_\star\|^2``. + +## Theoretical guarantee +State whether the bound is **tight**, an **upper**, or a **lower** bound, with +the closed-form ``\tau``. + +## References +Clickable reference(s) to the introducing paper(s). + +# Arguments +- `L`: smoothness parameter. +- `gamma`: step size. +- `n`: number of iterations. + +# Returns +- `pepit_tau`: the worst-case value computed by `PEPit.jl`. +- `theoretical_tau`: the reference value from the literature. +""" +function wc_my_method(L, gamma, n; solver=Clarabel.Optimizer, verbose=true) + # build the PEP, solve it, and return (pepit_tau, theoretical_tau) +end +``` + +The Examples overview and per-example pages are then generated automatically by +`docs/make.jl` from this docstring — no manual edit of `examples.md` is needed. + +## Writing the test + +Add a `@testset` that runs the example and compares the computed value to the +theoretical one within a relative tolerance: + +```julia +@testset "my_method" begin + pepit_tau, theoretical_tau = wc_my_method(1.0, 1.0, 5; verbose=false) + @test isapprox(pepit_tau, theoretical_tau; rtol=1e-4) # if the bound is tight + # or, for a (non-tight) upper bound: + # @test pepit_tau <= theoretical_tau * (1 + 1e-4) +end +``` + +## Documentation style + +- Prefer mathematically precise prose with Julia-specific syntax. +- Keep public docstrings concise and attach them immediately above the object. +- Use examples that are deterministic and solver-light. +- Link long-running examples instead of executing them in the documentation + build. diff --git a/docs/src/index.md b/docs/src/index.md new file mode 100644 index 0000000..1ce27b2 --- /dev/null +++ b/docs/src/index.md @@ -0,0 +1,45 @@ +# PEPit.jl + +`PEPit.jl` is a Julia implementation of Performance Estimation Programming +for computer-assisted worst-case analysis of first-order algorithms. A user +describes an algorithm in symbolic Julia code, declares the class of functions +or operators under study, specifies initial conditions and performance +metrics, and asks `PEPit.jl` to build and solve the resulting semidefinite +program through JuMP-compatible solvers. + +The package follows the mathematical workflow of Python `PEPit`, but the +documentation here describes the Julia API and implementation. In particular, +points, gradients, and iterates are represented by [`Point`](@ref) objects; +function values and inner products are represented by [`Expression`](@ref) +objects; and [`solve!`](@ref) turns those symbolic relations into a JuMP SDP +with a Gram matrix variable. + +## Documentation map + +- [Quick start](@ref): build and solve a first PEP in Julia. +- [Core workflow](@ref): main PEP objects and solver entry points. +- [Function classes](@ref): interpolation models for convex, smooth, strongly + convex, Lipschitz, and related function classes. +- [Operator classes](@ref): monotone, cocoercive, Lipschitz, nonexpansive, + and linear operator models. +- [Primitive steps](@ref): reusable symbolic building blocks for algorithm + descriptions. +- [Examples](@ref): categorized links to Julia example scripts. +- [Tutorials](@ref): literate Julia, notebook, PDF, and Pluto tutorial assets. + +## Solvers + +The default solver in [`solve!`](@ref) is `Clarabel.Optimizer`. The package also +supports other JuMP-compatible conic solvers, including Mosek when a license is +available. Solver choice is passed as a keyword argument: + +```julia +tau = solve!(problem; solver = Clarabel.Optimizer, verbose = false) +``` + +## Explicit dual certificates + +Use [`solve_dual!`](@ref) to build the primal SDP, dualize it with +`Dualization.jl`, and return a [`DualPEPCertificate`](@ref). The certificate +stores multipliers for performance metrics, initial conditions, class +constraints, and PSD blocks. diff --git a/docs/src/quickstart.md b/docs/src/quickstart.md new file mode 100644 index 0000000..4bf383a --- /dev/null +++ b/docs/src/quickstart.md @@ -0,0 +1,135 @@ +# Quick start + +```@meta +CurrentModule = PEPit +``` + +This page gives the standard `PEPit.jl` workflow for a worst-case analysis. The +example computes a guarantee for gradient descent on the class of `L`-smooth +convex functions. The code blocks below are *executed* when this documentation +is built, so the values shown are produced by `PEPit.jl` itself. + +## Installation + +From the Julia package manager, add the package and instantiate its +dependencies: + +```julia +] add https://github.com/PerformanceEstimation/PEPit.jl +``` + +During development from this repository, use the package project: + +```bash +julia --project=PEPit.jl +``` + +## A first PEP + +Load the package, an ordered-dictionary type for the class parameters, and a +conic solver: + +```@example pep +using PEPit +using OrderedCollections +using Clarabel +``` + +Create a PEP and declare the function class (parameters are passed through an +`OrderedDict`): + +```@example pep +problem = PEP() +param = OrderedDict("L" => 3.0) +func = declare_function!(problem, SmoothConvexFunction, param) +nothing # hide +``` + +Declare a stationary point ``x_\star`` (where the gradient vanishes), its +function value ``f_\star``, an initial point ``x_0``, and the initial condition +``\|x_0 - x_\star\|^2 \le 1``: + +```@example pep +xs = stationary_point!(func) +fs = value!(func, xs) +x0 = set_initial_point!(problem) + +set_initial_condition!(problem, (x0 - xs)^2 <= 1) +nothing # hide +``` + +Describe the algorithm symbolically. Here `gradient!(func, x)` creates or reuses +an oracle evaluation according to the function class and its `reuse_gradient` +setting. (The loop assigns to the module-level `x`, hence the `global`.) + +```@example pep +L = 3.0 +gamma = 1 / L +n = 4 + +x = x0 +for _ in 1:n + global x = x - gamma * gradient!(func, x) +end +``` + +Set the performance metric ``f(x_n) - f_\star`` and solve the SDP: + +```@example pep +set_performance_metric!(problem, value!(func, x) - fs) + +pepit_tau = solve!(problem; solver = Clarabel.Optimizer, verbose = false) +``` + +The returned value is the worst-case constant ``\tau`` in the guarantee + +```math +f(x_n) - f_\star \;\leq\; \tau \, \|x_0 - x_\star\|^2 . +``` + +## Recovering the worst-case instance + +After [`solve!`](@ref), [`evaluate`](@ref) recovers one numerical realization of +any symbolic [`Point`](@ref) or [`Expression`](@ref) from the solved Gram +matrix. This is how the worst-case iterates and function values are +reconstructed: + +```@example pep +evaluate(x) # coordinates of the last iterate x_n +``` + +```@example pep +evaluate(value!(func, x) - fs) # the attained value f(x_n) - f_star +``` + +## Explicit dual certificate + +The dual multipliers of the SDP form a machine-checkable proof of the bound. +[`solve_dual!`](@ref) builds the primal SDP, dualizes it with `Dualization.jl`, +solves the dual, and returns a [`DualPEPCertificate`](@ref): + +```@example pep +certificate = solve_dual!(problem; verbose = false) +certificate.dual_value # dual objective: matches the primal tau +``` + +The certificate exposes the multiplier blocks — `α` for the performance metric, +`λ`/`ν` for inequality/equality conditions, `θ` for the interpolation +constraints, and `S`/`Y` for the PSD blocks. After solving, [`eval_dual`](@ref) +returns the dual value attached to an individual [`Constraint`](@ref) or +[`PSDMatrix`](@ref). + +## Dimension-reduction heuristics + +Worst-case Gram matrices are often low rank, so a low-dimensional worst-case +example usually exists. [`solve!`](@ref) can post-process the solution with a +trace-minimization step (`tracetrick`) or several log-det iterations +(`logdetiters`) while preserving the optimal value: + +```@example pep +solve!(problem; verbose = false, tracetrick = true) +``` + +Relevant keywords of [`solve!`](@ref): `tracetrick::Bool`, `logdetiters::Int`, +`eig_regularization`, and `tol_dimension_reduction` (the objective degradation +tolerated during reduction). diff --git a/docs/src/tutorials.md b/docs/src/tutorials.md new file mode 100644 index 0000000..8880349 --- /dev/null +++ b/docs/src/tutorials.md @@ -0,0 +1,17 @@ +# Tutorials + +The repository contains literate Julia tutorials and generated notebook, PDF, +and Pluto assets. The Documenter site links these checked-in artifacts rather +than executing notebooks during the docs build. + +| Topic | Markdown | Notebook | PDF | +| --- | --- | --- | --- | +| Accelerated gradient, smooth strongly convex minimization | [Markdown](https://github.com/PerformanceEstimation/PEPit.jl/blob/main/tutorials/unconstrained_convex_minimization/accelerated_gradient_strongly_convex/accelerated_gradient_strongly_convex.tutorial.md) | [Notebook](https://github.com/PerformanceEstimation/PEPit.jl/blob/main/tutorials/unconstrained_convex_minimization/accelerated_gradient_strongly_convex/accelerated_gradient_strongly_convex.tutorial.ipynb) | [PDF](https://github.com/PerformanceEstimation/PEPit.jl/blob/main/tutorials/unconstrained_convex_minimization/accelerated_gradient_strongly_convex/accelerated_gradient_strongly_convex.tutorial.pdf) | +| Accelerated inexact forward-backward | [Markdown](https://github.com/PerformanceEstimation/PEPit.jl/blob/main/tutorials/inexact_proximal_methods/accelerated_inexact_forward_backward.tutorial.md) | [Notebook](https://github.com/PerformanceEstimation/PEPit.jl/blob/main/tutorials/inexact_proximal_methods/accelerated_inexact_forward_backward.tutorial.ipynb) | [PDF](https://github.com/PerformanceEstimation/PEPit.jl/blob/main/tutorials/inexact_proximal_methods/accelerated_inexact_forward_backward.tutorial.pdf) | +| Accelerated proximal point for monotone inclusions | [Markdown](https://github.com/PerformanceEstimation/PEPit.jl/blob/main/tutorials/monotone_inclusions_variational_inequalities/accelerated_proximal_point.tutorial.md) | [Notebook](https://github.com/PerformanceEstimation/PEPit.jl/blob/main/tutorials/monotone_inclusions_variational_inequalities/accelerated_proximal_point.tutorial.ipynb) | [PDF](https://github.com/PerformanceEstimation/PEPit.jl/blob/main/tutorials/monotone_inclusions_variational_inequalities/accelerated_proximal_point.tutorial.pdf) | +| Online gradient descent | [Markdown](https://github.com/PerformanceEstimation/PEPit.jl/blob/main/tutorials/online_learning/online_gradient_descent.tutorial.md) | [Notebook](https://github.com/PerformanceEstimation/PEPit.jl/blob/main/tutorials/online_learning/online_gradient_descent.tutorial.ipynb) | [PDF](https://github.com/PerformanceEstimation/PEPit.jl/blob/main/tutorials/online_learning/online_gradient_descent.tutorial.pdf) | + +## Pluto tutorials + +- [PEPit Julia tutorial Pluto notebook](https://github.com/PerformanceEstimation/PEPit.jl/blob/main/tutorials/Pluto_notebook/PEPit_jl_Tutorial.pluto.jl) +- [JuMP-dev 2026 PEPit Pluto notebook](https://github.com/PerformanceEstimation/PEPit.jl/blob/main/tutorials/JuMP_dev_2026/JuMP_dev_2026_PEPit.pluto.jl) diff --git a/examples/adaptive_methods/polyak_steps_in_distance_to_optimum.jl b/examples/adaptive_methods/polyak_steps_in_distance_to_optimum.jl index 6568d1c..1cab3ca 100644 --- a/examples/adaptive_methods/polyak_steps_in_distance_to_optimum.jl +++ b/examples/adaptive_methods/polyak_steps_in_distance_to_optimum.jl @@ -1,5 +1,90 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_polyak_steps_in_distance_to_optimum(L, mu, gamma; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_polyak_steps_in_distance_to_optimum`. + +Consider the minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is $L$-smooth and $\mu$-strongly convex, and $x_\star=\arg\min_x f(x)$. + +# Performance metric + +This code computes a worst-case guarantee for a variant of a **gradient method** relying on **Polyak step-sizes** +(PS). That is, it computes the smallest possible $\tau(L, \mu, \gamma)$ such that the guarantee + +```math +\|x_{t+1} - x_\star\|^2 \leqslant \tau(L, \mu, \gamma) \|x_{t} - x_\star\|^2 +``` + +is valid, where $x_t$ is the output of the gradient method with PS and $\gamma$ is the effective +value of the step-size of the gradient method with PS. + +In short, for given values of $L$, $\mu$, and $\gamma$, $\tau(L, \mu, \gamma)$ is +computed as the worst-case value of $\|x_{t+1} - x_\star\|^2$ when +$\|x_{t} - x_\star\|^2 \leqslant 1$. + +# Algorithm + +Gradient descent is described by + +```math +x_{t+1} = x_t - \gamma \nabla f(x_t), +``` + +where $\gamma$ is a step-size. The Polyak step-size rule under consideration here corresponds to choosing +of $\gamma$ satisfying: + +```math +\gamma \|\nabla f(x_t)\|^2 = 2 (f(x_t) - f_\star). +``` + +# Theoretical guarantee +The gradient method with the variant of Polyak step-sizes under consideration enjoys the +**tight** theoretical guarantee [1, Proposition 1]: + +```math +\|x_{t+1} - x_\star\|^2 \leqslant \tau(L, \mu, \gamma) \|x_{t} - x_\star\|^2, +``` + + where $\gamma$ is the effective step-size used at iteration $t$ and + +```math + \begin{aligned} + \tau(L, \mu, \gamma) & = & \left\{\begin{array}{ll} \frac{(\gamma L-1)(1-\gamma \mu)}{\gamma(L+\mu)-1} & \text{if } \gamma\in[\tfrac{1}{L},\tfrac{1}{\mu}],\\ + 0 & \text{otherwise.} \end{array}\right. + \end{aligned} +``` +# References + + +[[1] M. Barre, A. Taylor, A. d'Aspremont (2020). +Complexity guarantees for Polyak steps with momentum. +In Conference on Learning Theory (COLT).](https://arxiv.org/pdf/2002.00915.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `gamma`: step-size parameter. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_polyak_steps_in_distance_to_optimum(1.0, 0.1, 2 / (1.0 + 0.1); solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_polyak_steps_in_distance_to_optimum(L, mu, gamma; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/adaptive_methods/polyak_steps_in_function_value.jl b/examples/adaptive_methods/polyak_steps_in_function_value.jl index 80ddc30..2d8edda 100644 --- a/examples/adaptive_methods/polyak_steps_in_function_value.jl +++ b/examples/adaptive_methods/polyak_steps_in_function_value.jl @@ -1,6 +1,90 @@ using PEPit, Clarabel using OrderedCollections +@doc raw""" + wc_polyak_steps_in_function_value(L, mu, gamma; verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_polyak_steps_in_function_value`. + +Consider the minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is $L$-smooth and $\mu$-strongly convex, and $x_\star=\arg\min_x f(x)$. + +# Performance metric + +This code computes a worst-case guarantee for a variant of a **gradient method** relying on **Polyak step-sizes**. +That is, it computes the smallest possible $\tau(L, \mu, \gamma)$ such that the guarantee + +```math +f(x_{t+1}) - f_\star \leqslant \tau(L, \mu, \gamma) (f(x_t) - f_\star) +``` + +is valid, where $x_t$ is the output of the gradient method with PS and $\gamma$ is the effective value +of the step-size of the gradient method. + +In short, for given values of $L$, $\mu$, and $\gamma$, $\tau(L, \mu, \gamma)$ is +computed as the worst-case value of $f(x_{t+1})-f_\star$ when $f(x_t)-f_\star \leqslant 1$. + +# Algorithm + +Gradient descent is described by + +```math +x_{t+1} = x_t - \gamma \nabla f(x_t), +``` + +where $\gamma$ is a step-size. The Polyak step-size rule under consideration here corresponds to choosing +of $\gamma$ satisfying: + +```math +\|\nabla f(x_t)\|^2 = 2 L (2 - L \gamma) (f(x_t) - f_\star). +``` + +# Theoretical guarantee + +The gradient method with the variant of Polyak step-sizes under consideration enjoys the +**tight** theoretical guarantee [1, Proposition 2]: + +```math +f(x_{t+1})-f_\star \leqslant \tau(L,\mu,\gamma) (f(x_{t})-f_\star), +``` + +where $\gamma$ is the effective step-size used at iteration $t$ and + +```math + \begin{aligned} + \tau(L,\mu,\gamma) & = & \left\{\begin{array}{ll} (\gamma L - 1) (L \gamma (3 - \gamma (L + \mu)) - 1) & \text{if } \gamma\in[\tfrac{1}{L},\tfrac{2L-\mu}{L^2}],\\ + 0 & \text{otherwise.} \end{array}\right. + \end{aligned} +``` +# References + + +[[1] M. Barre, A. Taylor, A. d'Aspremont (2020). +Complexity guarantees for Polyak steps with momentum. +In Conference on Learning Theory (COLT).](https://arxiv.org/pdf/2002.00915.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `gamma`: step-size parameter. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_polyak_steps_in_function_value(1.0, 0.1, 2 / (1 + 0.1); verbose=true) +``` +""" function wc_polyak_steps_in_function_value(L, mu, gamma; verbose=true) problem = PEP() diff --git a/examples/composite_convex_minimization/accelerated_douglas_rachford_splitting.jl b/examples/composite_convex_minimization/accelerated_douglas_rachford_splitting.jl index 78c8e84..261780f 100644 --- a/examples/composite_convex_minimization/accelerated_douglas_rachford_splitting.jl +++ b/examples/composite_convex_minimization/accelerated_douglas_rachford_splitting.jl @@ -1,6 +1,94 @@ using PEPit, OrderedCollections, Clarabel, OffsetArrays +@doc raw""" + wc_accelerated_douglas_rachford_splitting(mu, L, alpha, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_accelerated_douglas_rachford_splitting`. + +Consider the composite convex minimization problem + +```math +F_\star \triangleq \min_x \{F(x) \equiv f_1(x) + f_2(x)\}, +``` + +where $f_1$ is closed convex and proper, and $f_2$ is $L$-smooth and $\mu$-strongly convex. + +# Performance metric + +This code computes a worst-case guarantee for **accelerated Douglas-Rachford**. That is, it computes +the smallest possible $\tau(n, L, \mu, \alpha)$ such that the guarantee + +```math +F(y_n) - F(x_\star) \leqslant \tau(n,L,\mu,\alpha) \|w_0 - w_\star\|^2 +``` + +is valid, $\alpha$ is a parameter of the method, and where $y_n$ is the output +of the accelerated Douglas-Rachford Splitting method, where $x_\star$ is a minimizer of $F$, +and $w_\star$ defined such that + +```math +x_\star = \mathrm{prox}_{\alpha f_2}(w_\star) +``` + +is an optimal point. + +In short, for given values of $n$, $L$, $\mu$, $\alpha$, +$\tau(n, L, \mu, \alpha)$ is computed as the worst-case value of $F(y_n)-F_\star$ +when $\|w_0 - w_\star\|^2 \leqslant 1$. + +# Algorithm + +The accelerated Douglas-Rachford splitting is described in [1, Section 4]. For $t \in \{0, \dots, n-1\}$, + +```math + \begin{aligned} + x_{t} & = & \mathrm{prox}_{\alpha f_2} (u_t),\\ + y_{t} & = & \mathrm{prox}_{\alpha f_1}(2x_t-u_t),\\ + w_{t+1} & = & u_t + \theta (y_t-x_t),\\ + u_{t+1} & = & \left\{\begin{array}{ll} w_{t+1}+\frac{t-1}{t+2}(w_{t+1}-w_t)\, & \text{if } t >1,\\ + w_{t+1} & \text{otherwise.} \end{array}\right. + \end{aligned} +``` +# Theoretical guarantee + +There is no known worst-case guarantee for this method beyond quadratic minimization. +For quadratics, an **upper** bound on is provided by [1, Theorem 5]: + +```math +F(y_n) - F_\star \leqslant \frac{2}{\alpha \theta (n + 3)^ 2} \|w_0-w_\star\|^2, +``` + +when $\theta=\frac{1-\alpha L}{1+\alpha L}$ and $\alpha < \frac{1}{L}$. + +# References + +An analysis of the accelerated Douglas-Rachford splitting is available in [1, Theorem 5] for when the convex +minimization problem is quadratic. + +[[1] P. Patrinos, L. Stella, A. Bemporad (2014). +Douglas-Rachford splitting: Complexity estimates and accelerated variants. +In 53rd IEEE Conference on Decision and Control (CDC).](https://arxiv.org/pdf/1407.6723.pdf) + +# Arguments +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `alpha`: algorithm parameter used in the update rule. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value. +- `theoretical_tau`: theoretical value (upper bound for quadratics; not directly comparable). + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_accelerated_douglas_rachford_splitting(0.1, 1.0, 0.9, 2; verbose=true) +``` +""" function wc_accelerated_douglas_rachford_splitting(mu, L, alpha, n; solver=Clarabel.Optimizer, verbose=true) diff --git a/examples/composite_convex_minimization/accelerated_proximal_gradient.jl b/examples/composite_convex_minimization/accelerated_proximal_gradient.jl index 210aed0..c1abba0 100644 --- a/examples/composite_convex_minimization/accelerated_proximal_gradient.jl +++ b/examples/composite_convex_minimization/accelerated_proximal_gradient.jl @@ -1,6 +1,79 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_accelerated_proximal_gradient(mu, L, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_accelerated_proximal_gradient`. + +Consider the composite convex minimization problem + +```math +F_\star \triangleq \min_x \{F(x) \equiv f(x) + h(x)\}, +``` + +where $f$ is $L$-smooth and $\mu$-strongly convex, +and where $h$ is closed convex and proper. + +# Performance metric + +This code computes a worst-case guarantee for the **accelerated proximal gradient** method, +also known as **fast proximal gradient (FPGM)** method or FISTA [1]. +That is, it computes the smallest possible $\tau(n, L, \mu)$ such that the guarantee + +```math +F(x_n) - F(x_\star) \leqslant \tau(n, L, \mu) \|x_0 - x_\star\|^2, +``` + +is valid, where $x_n$ is the output of the **accelerated proximal gradient** method, +and where $x_\star$ is a minimizer of $F$. + +In short, for given values of $n$, $L$ and $\mu$, +$\tau(n, L, \mu)$ is computed as the worst-case value of +$F(x_n) - F(x_\star)$ when $\|x_0 - x_\star\|^2 \leqslant 1$. + +# Algorithm +Initialize $\lambda_1=1$, $y_1=x_0$. One iteration of FISTA is described by + +```math +\begin{aligned} + \text{Set: }\lambda_{t+1} & = & \frac{1 + \sqrt{4\lambda_t^2 + 1}}{2}\\ + x_t & = & \arg\min_x \left\{h(x)+\frac{L}{2}\|x-\left(y_t - \frac{1}{L} \nabla f(y_t)\right)\|^2 \right\}\\ + y_{t+1} & = & x_t + \frac{\lambda_t-1}{\lambda_{t+1}} (x_t-x_{t-1}). +\end{aligned} +``` +# Theoretical guarantee +The following worst-case guarantee can be found in e.g., [1, Theorem 4.4]: + +```math +f(x_n)-f_\star \leqslant \frac{L}{2}\frac{\|x_0-x_\star\|^2}{\lambda_n^2}. +``` + +# References + + +[[1] A. Beck, M. Teboulle (2009). +A Fast Iterative Shrinkage-Thresholding Algorithm for Linear Inverse Problems. +SIAM journal on imaging sciences, 2009, vol. 2, no 1, p. 183-202.](https://www.ceremade.dauphine.fr/~carlier/FISTA) + +# Arguments +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value. +- `theoretical_tau`: theoretical value. + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_accelerated_proximal_gradient(0.0, 1.0, 4; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_accelerated_proximal_gradient(mu, L, n; solver=Clarabel.Optimizer, verbose=true) diff --git a/examples/composite_convex_minimization/accelerated_proximal_gradient_simplified.jl b/examples/composite_convex_minimization/accelerated_proximal_gradient_simplified.jl index 4335baa..1840749 100644 --- a/examples/composite_convex_minimization/accelerated_proximal_gradient_simplified.jl +++ b/examples/composite_convex_minimization/accelerated_proximal_gradient_simplified.jl @@ -1,6 +1,83 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_accelerated_proximal_gradient_simplified(mu, L, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_accelerated_proximal_gradient_simplified`. + +Consider the composite convex minimization problem + +```math +F_\star \triangleq \min_x \{F(x) \equiv f(x) + h(x)\}, +``` + +where $f$ is $L$-smooth and $\mu$-strongly convex, +and where $h$ is closed convex and proper. + +# Performance metric + +This code computes a worst-case guarantee for the **accelerated proximal gradient** method, +also known as **fast proximal gradient (FPGM)** method. +That is, it computes the smallest possible $\tau(n, L, \mu)$ such that the guarantee + +```math +F(x_n) - F(x_\star) \leqslant \tau(n, L, \mu) \|x_0 - x_\star\|^2, +``` + +is valid, where $x_n$ is the output of the **accelerated proximal gradient** method, +and where $x_\star$ is a minimizer of $F$. + +In short, for given values of $n$, $L$ and $\mu$, +$\tau(n, L, \mu)$ is computed as the worst-case value of +$F(x_n) - F(x_\star)$ when $\|x_0 - x_\star\|^2 \leqslant 1$. + +# Algorithm +Accelerated proximal gradient is described as follows, for $t \in \{ 0, \dots, n-1\}$, + +```math +\begin{aligned} + x_{t+1} & = & \arg\min_x \left\{h(x)+\frac{L}{2}\|x-\left(y_{t} - \frac{1}{L} \nabla f(y_t)\right)\|^2 \right\}, \\ + y_{t+1} & = & x_{t+1} + \frac{i}{i+3} (x_{t+1} - x_{t}), +\end{aligned} +``` +where $y_{0} = x_0$. + +# Theoretical guarantee +A **tight** (empirical) worst-case guarantee for FPGM is obtained in +[1, method FPGM1 in Sec. 4.2.1, Table 1 in sec 4.2.2], for $\mu=0$: + +```math +F(x_n) - F_\star \leqslant \frac{2 L}{n^2+5n+2} \|x_0 - x_\star\|^2, +``` + +which is attained on simple one-dimensional constrained linear optimization problems. + +# References + + +[[1] A. Taylor, J. Hendrickx, F. Glineur (2017). +Exact worst-case performance of first-order methods for composite convex optimization. +SIAM Journal on Optimization, 27(3):1283-1313.](https://arxiv.org/pdf/1512.07516.pdf) + +# Arguments +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value. +- `theoretical_tau`: theoretical value. + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_accelerated_proximal_gradient_simplified(0.0, 1.0, 4; verbose=true) +``` +""" function wc_accelerated_proximal_gradient_simplified(mu, L, n; solver=Clarabel.Optimizer, verbose=true) diff --git a/examples/composite_convex_minimization/bregman_proximal_point.jl b/examples/composite_convex_minimization/bregman_proximal_point.jl index abea970..6b0a005 100644 --- a/examples/composite_convex_minimization/bregman_proximal_point.jl +++ b/examples/composite_convex_minimization/bregman_proximal_point.jl @@ -1,6 +1,71 @@ using PEPit using OrderedCollections +@doc raw""" + wc_bregman_proximal_point(gamma, n; verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_bregman_proximal_point`. + +Consider the composite convex minimization problem + +```math +F_\star \triangleq \min_x \{F(x) \equiv f_1(x)+f_2(x) \} +``` + +where $f_1(x)$ and $f_2(x)$ are closed convex proper functions. + +# Performance metric + +This code computes a worst-case guarantee for **Bregman Proximal Point** method. +That is, it computes the smallest possible $\tau(n, \gamma)$ such that the guarantee + +```math +F(x_n) - F(x_\star) \leqslant \tau(n, \gamma) D_{f_1}(x_\star; x_0) +``` + +is valid, where $x_n$ is the output of the **Bregman Proximal Point** (BPP) method, +where $x_\star$ is a minimizer of $F$, and when $D_{f_1}$ is the Bregman distance +generated by $f_1$. + +# Algorithm +Bregman proximal point is described in [1, Section 2, equation (9)]. For $t \in \{0, \dots, n-1\}$, + +```math + \begin{aligned} + x_{t+1} & = & \arg\min_{u \in R^n} f_1(u) + \frac{1}{\gamma} D_{f_2}(u; x_t), \\ + D_h(x; y) & = & h(x) - h(y) - \nabla h (y)^T(x - y). + \end{aligned} +``` +# Theoretical guarantee +A **tight** empirical guarantee can be guessed from the numerics + +```math +F(x_n) - F(x_\star) \leqslant \frac{1}{\gamma n} D_{f_1}(x_\star, x_0). +``` + +# References + + +[[1] Y. Censor, S.A. Zenios (1992). +Proximal minimization algorithm with D-functions. +Journal of Optimization Theory and Applications, 73(3), 451-464.](https://link.springer.com/content/pdf/10.1007/BF00940051.pdf) + +# Arguments +- `gamma`: step-size parameter. +- `n`: number of iterations. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value. +- `theoretical_tau`: theoretical value. + +# Julia usage +```julia +PEPit_tau, theoretical_tau = wc_bregman_proximal_point(3.0, 5; verbose=true) +``` +""" function wc_bregman_proximal_point(gamma, n; verbose=true) problem = PEP() diff --git a/examples/composite_convex_minimization/douglas_rachford_splitting.jl b/examples/composite_convex_minimization/douglas_rachford_splitting.jl index e8c155d..1f52bf4 100644 --- a/examples/composite_convex_minimization/douglas_rachford_splitting.jl +++ b/examples/composite_convex_minimization/douglas_rachford_splitting.jl @@ -2,6 +2,84 @@ using PEPit using OrderedCollections using OffsetArrays +@doc raw""" + wc_douglas_rachford_splitting(L, alpha, theta, n; verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_douglas_rachford_splitting`. + +Consider the composite convex minimization problem + +```math +F_\star \triangleq \min_x \{F(x) \equiv f_1(x)+f_2(x) \} +``` + +where $f_1(x)$ is is convex, closed and proper , and $f_2$ is $L$-smooth. +Both proximal operators are assumed to be available. + +# Performance metric + +This code computes a worst-case guarantee for the **Douglas Rachford Splitting (DRS)** method. +That is, it computes the smallest possible $\tau(n, L, \alpha, \theta)$ such that the guarantee + +```math +F(y_n) - F(x_\star) \leqslant \tau(n, L, \alpha, \theta) \|x_0 - x_\star\|^2. +``` + +is valid, where it is known that $x_k$ and $y_k$ converge to $x_\star$, but not $w_k$ +(see definitions in the section # Algorithm +). Hence we require the initial condition on $x_0$ +(arbitrary choice, partially justified by the fact we choose $f_2$ to be the smooth function). + +Note that $y_n$ is feasible as it +has a finite value for $f_1$ (output of the proximal operator on $f_1$) and as $f_2$ is smooth. + +# Algorithm + + +Our notations for the DRS method are as follows, for $t \in \{0, \dots, n-1\}$, + +```math + \begin{aligned} + x_t & = & \mathrm{prox}_{\alpha f_2}(w_t), \\ + y_t & = & \mathrm{prox}_{\alpha f_1}(2x_t - w_t), \\ + w_{t+1} & = & w_t + \theta (y_t - x_t). + \end{aligned} +``` +This description can be found in [1, Section 7.3]. + +# Theoretical guarantee +We compare the output with that of PESTO [2] for when $0\leqslant n \leqslant 10$ +in the case where $\alpha=\theta=L=1$. + +# References + + +[[1] E. Ryu, S. Boyd (2016). +A primer on monotone operator methods. +Applied and Computational Mathematics 15(1), 3-43.](https://web.stanford.edu/~boyd/papers/pdf/monotone_primer.pdf) + +[[2] A. Taylor, J. Hendrickx, F. Glineur (2017). +Performance Estimation Toolbox (PESTO): automated worst-case analysis of first-order optimization methods. +In 56th IEEE Conference on Decision and Control (CDC).](https://github.com/AdrienTaylor/Performance-Estimation-Toolbox) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `alpha`: algorithm parameter used in the update rule. +- `theta`: relaxation or averaging parameter used in the update rule. +- `n`: number of iterations. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value. +- `theoretical_tau`: theoretical value. + +# Julia usage +```julia +PEPit_tau, theoretical_tau = wc_douglas_rachford_splitting(1.0, 1.0, 1.0, 9; verbose=true) +``` +""" function wc_douglas_rachford_splitting(L, alpha, theta, n; verbose=true) problem = PEP() diff --git a/examples/composite_convex_minimization/douglas_rachford_splitting_contraction.jl b/examples/composite_convex_minimization/douglas_rachford_splitting_contraction.jl index f11e0fc..800c2ca 100644 --- a/examples/composite_convex_minimization/douglas_rachford_splitting_contraction.jl +++ b/examples/composite_convex_minimization/douglas_rachford_splitting_contraction.jl @@ -1,6 +1,95 @@ using PEPit using OrderedCollections +@doc raw""" + wc_douglas_rachford_splitting_contraction(mu, L, alpha, theta, n; verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_douglas_rachford_splitting_contraction`. + +Consider the composite convex minimization problem + +```math +F_\star \triangleq \min_x \{F(x) \equiv f_1(x) + f_2(x) \} +``` + +where $f_1(x)$ is $L$-smooth and $\mu$-strongly convex, and $f_2$ is convex, +closed and proper. Both proximal operators are assumed to be available. + +# Performance metric + +This code computes a worst-case guarantee for the **Douglas Rachford Splitting (DRS)** method. +That is, it computes the smallest possible $\tau(\mu,L,\alpha,\theta,n)$ such that the guarantee + +```math +\|w_1 - w_1'\|^2 \leqslant \tau(\mu,L,\alpha,\theta,n) \|w_0 - w_0'\|^2. +``` + +is valid, where $x_n$ is the output of the **Douglas Rachford Splitting method**. It is a contraction +factor computed when the algorithm is started from two different points $w_0$ and $w_0$. + +# Algorithm + + +Our notations for the DRS method are as follows [3, Section 7.3], for $t \in \{0, \dots, n-1\}$, + +```math + \begin{aligned} + x_t & = & \mathrm{prox}_{\alpha f_2}(w_t), \\ + y_t & = & \mathrm{prox}_{\alpha f_1}(2x_t - w_t), \\ + w_{t+1} & = & w_t + \theta (y_t - x_t). + \end{aligned} +``` +# Theoretical guarantee + + +The **tight** theoretial guarantee is obtained in [2, Theorem 2]: + +```math +\|w_1 - w_1'\|^2 \leqslant \max\left(\frac{1}{1 + \mu \alpha}, \frac{\alpha L }{1 + L \alpha}\right)^{2n} \|w_0 - w_0'\|^2 +``` + +for when $\theta=1$. + +# References + + +Details on the SDP formulations can be found in + +[[1] E. Ryu, A. Taylor, C. Bergeling, P. Giselsson (2020). +Operator splitting performance estimation: Tight contraction factors and optimal parameter selection. +SIAM Journal on Optimization, 30(3), 2251-2271.](https://arxiv.org/pdf/1812.00146.pdf) + +When $\theta = 1$, the bound can be compared with that of [2, Theorem 2] + +[[2] P. Giselsson, and S. Boyd (2016). +Linear convergence and metric selection in Douglas-Rachford splitting and ADMM. +IEEE Transactions on Automatic Control, 62(2), 532-544.](https://arxiv.org/pdf/1410.8479.pdf) + +A description for the DRS method can be found in [3, 7.3] + +[[3] E. Ryu, S. Boyd (2016). +A primer on monotone operator methods. +Applied and Computational Mathematics 15(1), 3-43.](https://web.stanford.edu/~boyd/papers/pdf/monotone_primer.pdf) + +# Arguments +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `alpha`: algorithm parameter used in the update rule. +- `theta`: relaxation or averaging parameter used in the update rule. +- `n`: number of iterations. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +PEPit_tau, theoretical_tau = wc_douglas_rachford_splitting_contraction(0.1, 1.0, 3.0, 1.0, 2; verbose=true) +``` +""" function wc_douglas_rachford_splitting_contraction(mu, L, alpha, theta, n; verbose=true) problem = PEP() diff --git a/examples/composite_convex_minimization/frank_wolfe.jl b/examples/composite_convex_minimization/frank_wolfe.jl index e7ec2df..2cc37f4 100644 --- a/examples/composite_convex_minimization/frank_wolfe.jl +++ b/examples/composite_convex_minimization/frank_wolfe.jl @@ -1,6 +1,86 @@ using PEPit using OrderedCollections +@doc raw""" + wc_frank_wolfe(L, D, R, center, n; verbose::Bool=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_frank_wolfe`. + +Consider the composite convex minimization problem + +```math +F_\star \triangleq \min_x \{F(x) \equiv f_1(x) + f_2(x)\}, +``` + +where $f_1$ is $L$-smooth and convex +and where $f_2$ is a convex indicator function on $\mathcal{D}$ +of diameter at most $D$ and radius at most $R$ around `center`. + +# Performance metric + +This code computes a worst-case guarantee for the **conditional gradient** method, aka **Frank-Wolfe** method. +That is, it computes the smallest possible $\tau(n, L)$ such that the guarantee + +```math +F(x_n) - F(x_\star) \leqslant \tau(n, L, D, R), +``` + +is valid, where $x_n$ is the output of the **conditional gradient** method, +and where $x_\star$ is a minimizer of $F$. +In short, for given values of $n$ and $L$, $\tau(n, L, D, R)$ +is computed as the worst-case value of $F(x_n) - F(x_\star)$. + +# Algorithm + + +This method was first presented in [1]. A more recent version can be found in, e.g., [2, Algorithm 1]. +For $t \in \{0, \dots, n-1\}$, + +```math + \begin{aligned} + y_t & = & \arg\min_{s \in \mathcal{D}} \langle s \mid \nabla f_1(x_t) \rangle, \\ + x_{t+1} & = & \frac{t}{t + 2} x_t + \frac{2}{t + 2} y_t. + \end{aligned} +``` +# Theoretical guarantee + + +An **upper** guarantee obtained in [2, Theorem 1] when R = infinity is + +```math +F(x_n) - F(x_\star) \leqslant \frac{2L D^2}{n+2}. +``` + +# References + + +[[1] M .Frank, P. Wolfe (1956). +An algorithm for quadratic programming. +Naval research logistics quarterly, 3(1-2), 95-110.](https://arxiv.org/pdf/1608.04826.pdf) + +[[2] M. Jaggi (2013). +Revisiting Frank-Wolfe: Projection-free sparse convex optimization. +In 30th International Conference on Machine Learning (ICML).](http://proceedings.mlr.press/v28/jaggi13.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `D`: diameter of $\mathcal{D}$. +- `R`: radius of $\mathcal{D}$. +- `center`: center of $\mathcal{D}$. If None, the radius constraint must be observed to one center. +- `n`: number of iterations. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value. +- `theoretical_tau`: theoretical value. + +# Julia usage +```julia +PEPit_tau, theoretical_tau = wc_frank_wolfe(1.0, 1.0, Inf, nothing, 10; verbose=true) +``` +""" function wc_frank_wolfe(L, D, R, center, n; verbose::Bool=true) problem = PEP() diff --git a/examples/composite_convex_minimization/improved_interior_algorithm.jl b/examples/composite_convex_minimization/improved_interior_algorithm.jl index c217a62..4469338 100644 --- a/examples/composite_convex_minimization/improved_interior_algorithm.jl +++ b/examples/composite_convex_minimization/improved_interior_algorithm.jl @@ -1,6 +1,83 @@ using PEPit using OrderedCollections +@doc raw""" + wc_improved_interior_algorithm(L, mu, c, lam, n; verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_improved_interior_algorithm`. + +Consider the composite convex minimization problem + +```math +F_\star \triangleq \min_x \{F(x) \equiv f_1(x) + f_2(x)\}, +``` + +where $f_1$ is a $L$-smooth convex function, and $f_2$ is a closed convex indicator function. +We use a kernel function $h$ that is assumed to be closed, proper, and strongly convex (see [1, Section 5]). + +# Performance metric + +This code computes a worst-case guarantee for **Improved interior gradient algorithm** (IGA). +That is, it computes the smallest possible $\tau(\mu,L,c,\lambda,n)$ such that the guarantee + +```math +F(x_n) - F(x_\star) \leqslant \tau(\mu,L,c,\lambda,n) (c D_h(x_\star;x_0) + f_1(x_0) - f_1(x_\star)) +``` + +is valid, where $x_n$ is the output of the IGA and where $x_\star$ is a minimizer of $F$ and +$D_h$ is the Bregman distance generated by $h$. + +In short, for given values of $\mu$, $L$, $c$, $\lambda$ and $n$, +$\tau(\mu,L,c,\lambda,n)$ is computed as the worst-case value of $F(x_n)-F_\star$ +when $c D_h(x_\star;x_0) + f_1(x_0) - f_1(x_\star)\leqslant 1$. + +# Algorithm + +The IGA is described in [1, "Improved Interior Gradient Algorithm"]. For $t \in \{0, \dots, n-1\}$, + +```math + \begin{aligned} + \alpha_t & = & \frac{\sqrt{(c_t\lambda)^2+4c_t\lambda}-\lambda c_t}{2},\\ + y_t & = & (1-\alpha_t) x_t + \alpha_t z_t,\\ + c_{t+1} & = & (1-\alpha_t)c_t,\\ + z_{t+1} & = & \arg\min_{z} \left\{ \left< z;\frac{\alpha_t}{c_{t+1}}\nabla f_1(y_t)\right> +f_2(z)+D_h(z;z_t)\right\}, \\ + x_{t+1} & = & (1-\alpha_t) x_t + \alpha_t z_{t+1}. + \end{aligned} +``` +# Theoretical guarantee + +The following **upper** bound can be found in [1, Theorem 5.2]: + +```math +F(x_n) - F_\star \leqslant \frac{4L}{c n^2}\left(c D_h(x_\star;x_0) + f_1(x_0) - f_1(x_\star) \right). +``` + +# References + + +[[1] A. Auslender, M. Teboulle (2006). +Interior gradient and proximal methods for convex and conic optimization. +SIAM Journal on Optimization 16.3 (2006): 697-725.](https://epubs.siam.org/doi/pdf/10.1137/S1052623403427823) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `c`: initial value. +- `lam`: the step-size. +- `n`: number of iterations. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value. +- `theoretical_tau`: theoretical value. + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_improved_interior_algorithm(1.0, 1.0, 1.0, 1.0, 5; verbose=true) +``` +""" function wc_improved_interior_algorithm(L, mu, c, lam, n; verbose=true) problem = PEP() diff --git a/examples/composite_convex_minimization/no_lips_in_bregman_divergence.jl b/examples/composite_convex_minimization/no_lips_in_bregman_divergence.jl index 50516f9..d135fc6 100644 --- a/examples/composite_convex_minimization/no_lips_in_bregman_divergence.jl +++ b/examples/composite_convex_minimization/no_lips_in_bregman_divergence.jl @@ -1,5 +1,83 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_no_lips_in_bregman_divergence(L, gamma, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_no_lips_in_bregman_divergence`. + +Consider the constrainted composite convex minimization problem + +```math +F_\star \triangleq \min_x \{F(x) \equiv f_1(x) + f_2(x)\}, +``` + +where $f_1$ is convex and $L$-smooth relatively to $h$, +$h$ being closed proper and convex, +and where $f_2$ is a closed convex indicator function. + +# Performance metric + +This code computes a worst-case guarantee for the **NoLips** method. +That is, it computes the smallest possible $\tau(n, L)$ such that the guarantee + +```math +\min_{t\leqslant n} D_h(x_{t-1}; x_t) \leqslant \tau(n, L) D_h(x_\star; x_0), +``` + +is valid, where $x_n$ is the output of the **NoLips** method, +where $x_\star$ is a minimizer of $F$, +and where $D_h$ is the Bregman divergence generated by $h$. +In short, for given values of $n$ and $L$, +$\tau(n, L)$ is computed as the worst-case value of +$\min_{t\leqslant n} D_h(x_{t-1}; x_t)$ when $D_h(x_\star; x_0) \leqslant 1$. + +# Algorithm +This method (also known as Bregman Gradient, or Mirror descent) can be found in, +e.g., [2, Algorithm 1]. For $t \in \{0, \dots, n-1\}$, + +```math +x_{t+1} = \arg\min_{u} \{f_2(u)+\langle \nabla f_1(x_t) \mid u - x_t \rangle + \frac{1}{\gamma} D_h(u; x_t)\}. +``` + +# Theoretical guarantee + +The **upper** guarantee obtained in [2, Proposition 4] is + +```math +\min_{t\leqslant n} D_h(x_{t-1}; x_t) \leqslant \frac{2}{n (n - 1)} D_h(x_\star; x_0), +``` + +for any $\gamma \leq \frac{1}{L}$. It is empirically tight. + +# References + + +[[1] H.H. Bauschke, J. Bolte, M. Teboulle (2017). +A Descent Lemma Beyond Lipschitz Gradient Continuity: First-Order Methods Revisited and Applications. +Mathematics of Operations Research, 2017, vol. 42, no 2, p. 330-348.](https://cmps-people.ok.ubc.ca/bauschke/Research/103.pdf) + +[[2] R. Dragomir, A. Taylor, A. d'Aspremont, J. Bolte (2021). +Optimal complexity and certification of Bregman first-order methods. +Mathematical Programming, 1-43.](https://arxiv.org/pdf/1911.08510.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `gamma`: step-size parameter. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value. +- `theoretical_tau`: theoretical value. + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_no_lips_in_bregman_divergence(L, gamma, 10; verbose=true) +``` +""" function wc_no_lips_in_bregman_divergence(L, gamma, n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/composite_convex_minimization/no_lips_in_function_value.jl b/examples/composite_convex_minimization/no_lips_in_function_value.jl index 58e6116..caebdec 100644 --- a/examples/composite_convex_minimization/no_lips_in_function_value.jl +++ b/examples/composite_convex_minimization/no_lips_in_function_value.jl @@ -1,5 +1,85 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_no_lips_in_function_value(L, gamma, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_no_lips_in_function_value`. + +Consider the constrainted composite convex minimization problem + +```math +F_\star \triangleq \min_x \{F(x) \equiv f_1(x) + f_2(x)\}, +``` + +where $f_1$ is convex and $L$-smooth relatively to $h$, +$h$ being closed proper and convex, +and where $f_2$ is a closed convex indicator function. + +# Performance metric + +This code computes a worst-case guarantee for the **NoLips** method. +That is, it computes the smallest possible $\tau(n, L)$ such that the guarantee + +```math +F(x_n) - F_\star \leqslant \tau(n, L) D_h(x_\star; x_0), +``` + +is valid, where $x_n$ is the output of the **NoLips** method, +where $x_\star$ is a minimizer of $F$, +and where $D_h$ is the Bregman divergence generated by $h$. +In short, for given values of $n$ and $L$, +$\tau(n, L)$ is computed as the worst-case value of +$F(x_n) - F_\star$ when $D_h(x_\star; x_0) \leqslant 1$. + +# Algorithm +This method (also known as Bregman Gradient, or Mirror descent) can be found in, +e.g., [2, Algorithm 1]. For $t \in \{0, \dots, n-1\}$, + +```math +x_{t+1} = \arg\min_{u} \{f_2(u)+\langle \nabla f_1(x_t) \mid u - x_t \rangle + \frac{1}{\gamma} D_h(u; x_t)\}. +``` + +# Theoretical guarantee + + +The **tight** guarantee obtained in [2, Theorem 1] is + +```math +F(x_n) - F_\star \leqslant \frac{1}{\gamma n} D_h(x_\star; x_0), +``` + +for any $\gamma \leq \frac{1}{L}$; tightness is provided in [2, page 23]. + +# References +NoLips was proposed [1] for convex problems involving relative smoothness. +The worst-case analysis using a PEP, as well as the tightness are provided in [2]. + +[[1] H.H. Bauschke, J. Bolte, M. Teboulle (2017). +A Descent Lemma Beyond Lipschitz Gradient Continuity: First-Order Methods Revisited and Applications. +Mathematics of Operations Research, 2017, vol. 42, no 2, p. 330-348.](https://cmps-people.ok.ubc.ca/bauschke/Research/103.pdf) + +[[2] R. Dragomir, A. Taylor, A. d'Aspremont, J. Bolte (2021). +Optimal complexity and certification of Bregman first-order methods. +Mathematical Programming, 1-43.](https://arxiv.org/pdf/1911.08510.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `gamma`: step-size parameter. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value. +- `theoretical_tau`: theoretical value. + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_no_lips_in_function_value(L, gamma, 3; verbose=true) +``` +""" function wc_no_lips_in_function_value(L, gamma, n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/composite_convex_minimization/proximal_gradient.jl b/examples/composite_convex_minimization/proximal_gradient.jl index 7316a63..b7e6cb0 100644 --- a/examples/composite_convex_minimization/proximal_gradient.jl +++ b/examples/composite_convex_minimization/proximal_gradient.jl @@ -1,6 +1,94 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_proximal_gradient(L, mu, gamma, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_proximal_gradient`. + +Consider the composite convex minimization problem + +```math +F_\star \triangleq \min_x \{F(x) \equiv f_1(x) + f_2(x)\}, +``` + +where $f_1$ is $L$-smooth and $\mu$-strongly convex, +and where $f_2$ is closed convex and proper. + +# Performance metric + +This code computes a worst-case guarantee for the **proximal gradient** method (PGM). +That is, it computes the smallest possible $\tau(n, L, \mu)$ such that the guarantee + +```math +\|x_n - x_\star\|^2 \leqslant \tau(n, L, \mu) \|x_0 - x_\star\|^2, +``` + +is valid, where $x_n$ is the output of the **proximal gradient**, +and where $x_\star$ is a minimizer of $F$. +In short, for given values of $n$, $L$ and $\mu$, +$\tau(n, L, \mu)$ is computed as the worst-case value of +$\|x_n - x_\star\|^2$ when $\|x_0 - x_\star\|^2 \leqslant 1$. + +# Algorithm +Proximal gradient is described by + +```math + \begin{aligned} + y_t & = & x_t - \gamma \nabla f_1(x_t), \\ + x_{t+1} & = & \arg\min_x \left\{f_2(x)+\frac{1}{2\gamma}\|x-y_t\|^2 \right\}, + \end{aligned} +``` +for $t \in \{ 0, \dots, n-1\}$ and where $\gamma$ is a step-size. + +# Theoretical guarantee +It is well known that a **tight** guarantee for PGM is provided by + +```math +\|x_n - x_\star\|^2 \leqslant \max\{(1-L\gamma)^2,(1-\mu\gamma)^2\}^n \|x_0 - x_\star\|^2, +``` + +which can be found in, e.g., [1, Theorem 3.1]. It is a folk knowledge and the result can be found in many references +for gradient descent; see, e.g.,[2, Section 1.4: Theorem 3], [3, Section 5.1] and [4, Section 4.4]. + +# References + + +[[1] A. Taylor, J. Hendrickx, F. Glineur (2018). +Exact worst-case convergence rates of the proximal gradient method for composite convex minimization. +Journal of Optimization Theory and Applications, 178(2), 455-476.](https://arxiv.org/pdf/1705.04398.pdf) + +[[2] B. Polyak (1987). +Introduction to Optimization. +Optimization Software New York.](https://www.researchgate.net/profile/Boris-Polyak-2/publication/342978480_Introduction_to_Optimization/links/5f1033e5299bf1e548ba4636/Introduction-to-Optimization.pdf) + +[[3] E. Ryu, S. Boyd (2016). +A primer on monotone operator methods. +Applied and Computational Mathematics 15(1), 3-43.](https://web.stanford.edu/~boyd/papers/pdf/monotone_primer.pdf) + +[[4] L. Lessard, B. Recht, A. Packard (2016). +Analysis and design of optimization algorithms via integral quadratic constraints. +SIAM Journal on Optimization 26(1), 57-95.](https://arxiv.org/pdf/1408.3595.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `gamma`: step-size parameter. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value. +- `theoretical_tau`: theoretical value. + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_proximal_gradient(1.0, 0.1, 1.0, 2; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_proximal_gradient(L, mu, gamma, n; solver=Clarabel.Optimizer, verbose=true) diff --git a/examples/composite_convex_minimization/proximal_gradient_quadratics.jl b/examples/composite_convex_minimization/proximal_gradient_quadratics.jl index 1e0cb92..70988b2 100644 --- a/examples/composite_convex_minimization/proximal_gradient_quadratics.jl +++ b/examples/composite_convex_minimization/proximal_gradient_quadratics.jl @@ -1,6 +1,94 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_proximal_gradient_quadratics(L, mu, gamma, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_proximal_gradient_quadratics`. + +Consider the composite convex minimization problem + +```math +F_\star \triangleq \min_x \{F(x) \equiv f_1(x) + f_2(x)\}, +``` + +where $f_1$ is $L$-smooth, $\mu$-strongly convex and quadratic, +and where $f_2$ is closed convex and proper. + +# Performance metric + +This code computes a worst-case guarantee for the **proximal gradient** method (PGM). +That is, it computes the smallest possible $\tau(n, L, \mu)$ such that the guarantee + +```math +\|x_n - x_\star\|^2 \leqslant \tau(n, L, \mu) \|x_0 - x_\star\|^2, +``` + +is valid, where $x_n$ is the output of the **proximal gradient**, +and where $x_\star$ is a minimizer of $F$. +In short, for given values of $n$, $L$ and $\mu$, +$\tau(n, L, \mu)$ is computed as the worst-case value of +$\|x_n - x_\star\|^2$ when $\|x_0 - x_\star\|^2 \leqslant 1$. + +# Algorithm +Proximal gradient is described by + +```math + \begin{aligned} + y_t & = & x_t - \gamma \nabla f_1(x_t), \\ + x_{t+1} & = & \arg\min_x \left\{f_2(x)+\frac{1}{2\gamma}\|x-y_t\|^2 \right\}, + \end{aligned} +``` +for $t \in \{ 0, \dots, n-1\}$ and where $\gamma$ is a step-size. + +# Theoretical guarantee +It is well known that a **tight** guarantee for PGM is provided by + +```math +\|x_n - x_\star\|^2 \leqslant \max\{(1-L\gamma)^2,(1-\mu\gamma)^2\}^n \|x_0 - x_\star\|^2, +``` + +which can be found in, e.g., [1, Theorem 3.1]. It is a folk knowledge and the result can be found in many references +for gradient descent; see, e.g.,[2, Section 1.4: Theorem 3], [3, Section 5.1] and [4, Section 4.4]. + +# References + + +[[1] A. Taylor, J. Hendrickx, F. Glineur (2018). +Exact worst-case convergence rates of the proximal gradient method for composite convex minimization. +Journal of Optimization Theory and Applications, 178(2), 455-476.](https://arxiv.org/pdf/1705.04398.pdf) + +[[2] B. Polyak (1987). +Introduction to Optimization. +Optimization Software New York.](https://www.researchgate.net/profile/Boris-Polyak-2/publication/342978480_Introduction_to_Optimization/links/5f1033e5299bf1e548ba4636/Introduction-to-Optimization.pdf) + +[[3] E. Ryu, S. Boyd (2016). +A primer on monotone operator methods. +Applied and Computational Mathematics 15(1), 3-43.](https://web.stanford.edu/~boyd/papers/pdf/monotone_primer.pdf) + +[[4] L. Lessard, B. Recht, A. Packard (2016). +Analysis and design of optimization algorithms via integral quadratic constraints. +SIAM Journal on Optimization 26(1), 57-95.](https://arxiv.org/pdf/1408.3595.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `gamma`: step-size parameter. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value. +- `theoretical_tau`: theoretical value. + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_proximal_gradient_quadratics(1.0, 0.1, 1.0, 2; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_proximal_gradient_quadratics(L, mu, gamma, n; solver=Clarabel.Optimizer, verbose=true) diff --git a/examples/composite_convex_minimization/three_operator_splitting.jl b/examples/composite_convex_minimization/three_operator_splitting.jl index 7344a06..334eff4 100644 --- a/examples/composite_convex_minimization/three_operator_splitting.jl +++ b/examples/composite_convex_minimization/three_operator_splitting.jl @@ -1,6 +1,77 @@ using PEPit, OrderedCollections +@doc raw""" + wc_three_operator_splitting(mu1, L1, L3, alpha, theta, n; verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_three_operator_splitting`. + +Consider the composite convex minimization problem, + +```math +F_\star \triangleq \min_x \{F(x) \equiv f_1(x) + f_2(x) + f_3(x)\} +``` + +where, $f_1$ is $L_1$-smooth and $\mu_1$-strongly convex, +$f_2$ is closed, convex and proper, +and $f_3$ is $L_3$-smooth convex. +Proximal operators are assumed to be available for $f_1$ and $f_2$. + +# Performance metric + +This code computes the worst-case guarantee of the contraction achieved by the **Three Operator Splitting (TOS)**. +That is, it computes the smallest possible $\tau(n, L_1, L_3, \mu_1)$ such that the guarantee + +```math +\|w^{(0)}_{n} - w^{(1)}_{n}\|^2 \leqslant \tau(n, L_1, L_3, \mu_1, \alpha, \theta) \|w^{(0)}_{0} - w^{(1)}_{0}\|^2 +``` + +is valid, where $w^{(0)}_{0}$ and $w^{(1)}_{0}$ are two different starting points +and $w^{(0)}_{n}$ and $w^{(1)}_{n}$ are the two corresponding $n^{\mathrm{th}}$ outputs of TOS. + +In short, for given values of $n$, $L_1$, $L_3$, $\mu_1$, $\alpha$ +and $\theta$, the contraction factor $\tau(n, L_1, L_3, \mu_1, \alpha, \theta)$ +is computed as the worst-case value of $\|w^{(0)}_{n} - w^{(1)}_{n}\|^2$ +when $\|w^{(0)}_{0} - w^{(1)}_{0}\|^2 \leqslant 1$. + +# Algorithm + +One iteration of the algorithm is described in [1]. For $t \in \{0, \dots, n-1\}$, + +```math + \begin{aligned} + x_t & = & \mathrm{prox}_{\alpha, f_2}(w_t), \\ + y_t & = & \mathrm{prox}_{\alpha, f_1}(2 x_t - w_t - \alpha \nabla f_3(x_t)), \\ + w_{t+1} & = & w_t + \theta (y_t - x_t). + \end{aligned} +``` +# References +The TOS was introduced in [1]. + +[[1] D. Davis, W. Yin (2017). +A three-operator splitting scheme and its optimization applications. +Set-valued and variational analysis, 25(4), 829-858.](https://arxiv.org/pdf/1504.01032.pdf) + +# Arguments +- `mu1`: the strong convexity parameter of function $f_1$. +- `L1`: the smoothness parameter of function $f_1$. +- `L3`: the smoothness parameter of function $f_3$. +- `alpha`: algorithm parameter used in the update rule. +- `theta`: relaxation or averaging parameter used in the update rule. +- `n`: number of iterations. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value. +- `theoretical_tau`: no theoretical value. + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_three_operator_splitting(0.1, 10.0, L3, alpha, 1.0, 4; verbose=true) +``` +""" function wc_three_operator_splitting(mu1, L1, L3, alpha, theta, n; verbose=true) diff --git a/examples/continuous_time_models/accelerated_gradient_flow_convex.jl b/examples/continuous_time_models/accelerated_gradient_flow_convex.jl index 42dbec2..f3e175b 100644 --- a/examples/continuous_time_models/accelerated_gradient_flow_convex.jl +++ b/examples/continuous_time_models/accelerated_gradient_flow_convex.jl @@ -1,5 +1,85 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_accelerated_gradient_flow_convex(t; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_accelerated_gradient_flow_convex`. + +Consider the convex minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is convex. + +# Performance metric + +This code computes a worst-case guarantee for an **accelerated gradient** flow. +That is, it verifies the inequality + +```math +\frac{d}{dt}\mathcal{V}(X_t, t) \leqslant 0 , +``` + +is valid, where $\mathcal{V}(X_t, t) = t^2(f(X_t) - f(x_\star)) + 2 \|(X_t - x_\star) + \frac{t}{2}\frac{d}{dt}X_t \|^2$, +$X_t$ is the output of an **accelerated gradient** flow, and where $x_\star$ is the minimizer of $f$. + +In short, for given values of $t$, it verifies $\frac{d}{dt}\mathcal{V}(X_t, t) \leqslant 0$. + +# Algorithm + +For $t \geqslant 0$, + +```math +\frac{d^2}{dt^2}X_t + \frac{3}{t}\frac{d}{dt}X_t + \nabla f(X_t) = 0, +``` + +with some initialization $X_{0}\triangleq x_0$. + +# Theoretical guarantee + + + The following **tight** guarantee can be verified in [1, Section 2]: + +```math +\frac{d}{dt}\mathcal{V}(X_t, t) \leqslant 0. +``` + + After integrating between $0$ and $T$, + +```math +f(X_T) - f_\star \leqslant \frac{2}{T^2}\|x_0 - x_\star\|^2. +``` + + The detailed approach using PEPs is available in [2, Theorem 2.6]. + +# References + + +[[1] W. Su, S. Boyd, E. J. Candes (2016). +A differential equation for modeling Nesterov's accelerated gradient method: Theory and insights. +In the Journal of Machine Learning Research (JMLR).](https://jmlr.org/papers/volume17/15-084/15-084.pdf) + +[[2] C. Moucer, A. Taylor, F. Bach (2022). +A systematic approach to Lyapunov analyses of continuous-time models in convex optimization.](https://arxiv.org/pdf/2205.12772.pdf) + +# Arguments +- `t`: time step +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_accelerated_gradient_flow_convex(3.4; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_accelerated_gradient_flow_convex(t; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/continuous_time_models/accelerated_gradient_flow_strongly_convex.jl b/examples/continuous_time_models/accelerated_gradient_flow_strongly_convex.jl index d6fb690..1716b54 100644 --- a/examples/continuous_time_models/accelerated_gradient_flow_strongly_convex.jl +++ b/examples/continuous_time_models/accelerated_gradient_flow_strongly_convex.jl @@ -1,5 +1,100 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_accelerated_gradient_flow_strongly_convex(mu; psd=true, solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_accelerated_gradient_flow_strongly_convex`. + +Consider the convex minimization problem + +```math +f_\star \triangleq \min_{x\in\mathbb{R}^d} f(x), +``` + +where $f$ is $\mu$-strongly convex. + +# Performance metric + +This code computes a worst-case guarantee for an **accelerated gradient** flow. +That is, it computes the smallest possible $\tau(\mu)$ such that the guarantee + +```math +\frac{d}{dt}\mathcal{V}_{P}(X_t) \leqslant -\tau(\mu)\mathcal{V}_P(X_t) , +``` + +is valid with + +```math +\mathcal{V}_{P}(X_t) = f(X_t) - f(x_\star) + (X_t - x_\star, \frac{d}{dt}X_t)^T(P \otimes I_d)(X_t - x_\star, \frac{d}{dt}X_t) , +``` + +where $I_d$ is the identity matrix, $X_t$ is the output of an **accelerated gradient** flow, +and where $x_\star$ is the minimizer of $f$. + +In short, for given values of $\mu$, $\tau(\mu)$ is computed as the worst-case value of +the derivative of $f(X_t)-f_\star$ when $f(X_t) - f(x_\star)\leqslant 1$. + +# Algorithm + +For $t \geqslant 0$, + +```math +\frac{d^2}{dt^2}X_t + 2\sqrt{\mu}\frac{d}{dt}X_t + \nabla f(X_t) = 0, +``` + +with some initialization $X_{0}\triangleq x_0$. + +# Theoretical guarantee + + + The following **tight** guarantee for $P = \frac{1}{2}\begin{pmatrix} \mu & \sqrt{\mu} \\ \sqrt{\mu} & 1\end{pmatrix}$, + for which $\mathcal{V}_{P} \geqslant 0$ can be found in [1, Appendix B], [2, Theorem 4.3]: + +```math +\frac{d}{dt}\mathcal{V}_P(X_t) \leqslant -\sqrt{\mu}\mathcal{V}_P(X_t). +``` + + For $P = \begin{pmatrix} \frac{4}{9}\mu & \frac{4}{3}\sqrt{\mu} \\ \frac{4}{3}\sqrt{\mu} & \frac{1}{2}\end{pmatrix}$, + for which $\mathcal{V}_{P}(X_t) \geqslant 0$ along the trajectory, the following **tight** guarantee can + be found in [3, Corollary 2.5], + +```math +\frac{d}{dt}\mathcal{V}_P(X_t) \leqslant -\frac{4}{3}\sqrt{\mu}\mathcal{V}_P(X_t). +``` + + +# References + + +[[1] A. C. Wilson, B. Recht, M. I. Jordan (2021). +A Lyapunov analysis of accelerated methods in optimization. +In the Journal of Machine Learning Reasearch (JMLR), 22(113):1-34, 2021.](https://jmlr.org/papers/volume22/20-195/20-195.pdf) + +[[2] J.M. Sanz-Serna and K. C. Zygalakis (2021). +The connections between Lyapunov functions for some optimization algorithms and differential equations. +In SIAM Journal on Numerical Analysis, 59 pp 1542-1565.](https://arxiv.org/pdf/2009.00673.pdf) + +[[3] C. Moucer, A. Taylor, F. Bach (2022). +A systematic approach to Lyapunov analyses of continuous-time models in convex optimization. +In SIAM Journal on Optimization 33 (3), 1558-1586.](https://arxiv.org/pdf/2205.12772.pdf) + +# Arguments +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `psd`: option for positivity of $P$ in the Lyapunov function $\mathcal{V}_{P}$ +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_accelerated_gradient_flow_strongly_convex(0.1; psd=true, solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_accelerated_gradient_flow_strongly_convex(mu; psd=true, solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/continuous_time_models/gradient_flow_convex.jl b/examples/continuous_time_models/gradient_flow_convex.jl index 8fa5709..b8a2542 100644 --- a/examples/continuous_time_models/gradient_flow_convex.jl +++ b/examples/continuous_time_models/gradient_flow_convex.jl @@ -1,5 +1,85 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_gradient_flow_convex(t; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_gradient_flow_convex`. + +Consider the convex minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is convex. + +# Performance metric + +This code computes a worst-case guarantee for a **gradient** flow. +That is, it verifies the following inequality + +```math +\frac{d}{dt}\mathcal{V}(X_t, t) \leqslant 0, +``` + +is valid, where $\mathcal{V}(X_t, t) = t(f(X_t) - f(x_\star)) + \frac{1}{2} \|X_t - x_\star\|^2$, +$X_t$ is the output of the **gradient** flow, and where $x_\star$ is the minimizer of $f$. +In short, for given values of $t$, it verifies $\frac{d}{dt}\mathcal{V}(X_t, t)\leqslant 0$. + +# Algorithm + +For $t \geqslant 0$, + +```math +\frac{d}{dt}X_t = -\nabla f(X_t), +``` + +with some initialization $X_{0}\triangleq x_0$. + +# Theoretical guarantee + + + The following **tight** guarantee can be found in [1, p. 7]: + +```math +\frac{d}{dt}\mathcal{V}(X_t, t) \leqslant 0. +``` + + After integrating between $0$ and $T$, + +```math +f(X_T) - f_\star \leqslant \frac{1}{2T}\|x_0 - x_\star\|^2. +``` + + The detailed approach using PEPs is available in [2, Theorem 2.3]. + + +# References + + +[[1] W. Su, S. Boyd, E. J. Candes (2016). +A differential equation for modeling Nesterov's accelerated gradient method: Theory and insights. +In the Journal of Machine Learning Research (JMLR).](https://jmlr.org/papers/volume17/15-084/15-084.pdf) + +[[2] C. Moucer, A. Taylor, F. Bach (2022). +A systematic approach to Lyapunov analyses of continuous-time models in convex optimization.](https://arxiv.org/pdf/2205.12772.pdf) + +# Arguments +- `t`: time step +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_gradient_flow_convex(2.5; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_gradient_flow_convex(t; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/continuous_time_models/gradient_flow_strongly_convex.jl b/examples/continuous_time_models/gradient_flow_strongly_convex.jl index af3f1d1..de3c45e 100644 --- a/examples/continuous_time_models/gradient_flow_strongly_convex.jl +++ b/examples/continuous_time_models/gradient_flow_strongly_convex.jl @@ -1,5 +1,79 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_gradient_flow_strongly_convex(mu; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_gradient_flow_strongly_convex`. + +Consider the convex minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is $\mu$-strongly convex. + +# Performance metric + +This code computes a worst-case guarantee for a **gradient** flow. +That is, it computes the smallest possible $\tau(\mu)$ such that the guarantee + +```math +\frac{d}{dt}\mathcal{V}(X_t) \leqslant -\tau(\mu)\mathcal{V}(X_t) , +``` + +is valid, where $\mathcal{V}(X_t) = f(X_t) - f(x_\star)$, $X_t$ is the output of +the **gradient** flow, and where $x_\star$ is the minimizer of $f$. +In short, for given values of $\mu$, $\tau(\mu)$ is computed as the worst-case value +of the derivative $f(X_t)-f_\star$ when $f(X_t) - f(x_\star)\leqslant 1$. + +# Algorithm + +For $t \geqslant 0$, + +```math +\frac{d}{dt}X_t = -\nabla f(X_t), +``` + +with some initialization $X_{0}\triangleq x_0$. + +# Theoretical guarantee + + + The following **tight** guarantee can be found in [1, Proposition 11]: + +```math +\frac{d}{dt}\mathcal{V}(X_t) \leqslant -2\mu\mathcal{V}(X_t). +``` + + The detailed approach using PEPs is available in [2, Theorem 2.1]. + +# References + + +[[1] D. Scieur, V. Roulet, F. Bach and A. D'Aspremont (2017). +Integration methods and accelerated optimization algorithms. +In Advances in Neural Information Processing Systems (NIPS).](https://papers.nips.cc/paper/2017/file/bf62768ca46b6c3b5bea9515d1a1fc45-Paper.pdf) + +[[2] C. Moucer, A. Taylor, F. Bach (2022). +A systematic approach to Lyapunov analyses of continuous-time models in convex optimization.](https://arxiv.org/pdf/2205.12772.pdf) + +# Arguments +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_gradient_flow_strongly_convex(0.1; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_gradient_flow_strongly_convex(mu; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/fixed_point_problems/halpern_iteration.jl b/examples/fixed_point_problems/halpern_iteration.jl index 8119430..b6cf8f7 100644 --- a/examples/fixed_point_problems/halpern_iteration.jl +++ b/examples/fixed_point_problems/halpern_iteration.jl @@ -1,6 +1,76 @@ using PEPit using OrderedCollections +@doc raw""" + wc_halpern_iteration(n; verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_halpern_iteration`. + +Consider the fixed point problem + +```math +\mathrm{Find}\, x:\, x = Ax, +``` + +where $A$ is a non-expansive operator, +that is a $L$-Lipschitz operator with $L=1$. + +# Performance metric + +This code computes a worst-case guarantee for the **Halpern Iteration**. +That is, it computes the smallest possible $\tau(n)$ such that the guarantee + +```math +\|x_n - Ax_n\|^2 \leqslant \tau(n) \|x_0 - x_\star\|^2 +``` + +is valid, where $x_n$ is the output of the **Halpern iteration**, +and $x_\star$ the fixed point of $A$. + +In short, for a given value of $n$, +$\tau(n)$ is computed as the worst-case value of +$\|x_n - Ax_n\|^2$ when $\|x_0 - x_\star\|^2 \leqslant 1$. + +# Algorithm +The Halpern iteration can be written as + +```math +x_{t+1} = \frac{1}{t + 2} x_0 + \left(1 - \frac{1}{t + 2}\right) Ax_t. +``` + +# Theoretical guarantee +A **tight** worst-case guarantee for Halpern iteration can be found in [2, Theorem 2.1]: + +```math +\|x_n - Ax_n\|^2 \leqslant \left(\frac{2}{n+1}\right)^2 \|x_0 - x_\star\|^2. +``` + +# References +The method was first proposed in [1]. The detailed analysis and tight bound are available in [2]. + +[[1] B. Halpern (1967). +Fixed points of nonexpanding maps. +American Mathematical Society, 73(6), 957-961.](https://www.ams.org/journals/bull/1967-73-06/S0002-9904-1967-11864-0/S0002-9904-1967-11864-0.pdf) + +[[2] F. Lieder (2021). +On the convergence rate of the Halpern-iteration. +Optimization Letters, 15(2), 405-418.](http://www.optimization-online.org/DB_FILE/2017/11/6336.pdf) + +# Arguments +- `n`: number of iterations. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_halpern_iteration(25; verbose=true) +``` +""" function wc_halpern_iteration(n; verbose=true) problem = PEP() diff --git a/examples/fixed_point_problems/inconsistent_halpern_iteration.jl b/examples/fixed_point_problems/inconsistent_halpern_iteration.jl index 41292de..3fc4004 100644 --- a/examples/fixed_point_problems/inconsistent_halpern_iteration.jl +++ b/examples/fixed_point_problems/inconsistent_halpern_iteration.jl @@ -1,6 +1,80 @@ using PEPit using OrderedCollections +@doc raw""" + wc_inconsistent_halpern_iteration(n; verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_inconsistent_halpern_iteration`. + +Consider the fixed point problem + +```math +\mathrm{Find}\, x:\, x = Ax, +``` + +where $A$ is a non-expansive operator, +that is a $L$-Lipschitz operator with $L=1$. +When the solution of above problem, or fixed point, does not exist, +behavior of the fixed-point iteration with A can be characterized with +infimal displacement vector $v$. + +# Performance metric + +This code computes a worst-case guarantee for the **Halpern Iteration**, +when `A` is not necessarily consistent, i.e., does not necessarily have fixed point. +That is, it computes the smallest possible $\tau(n)$ such that the guarantee + +```math +\|x_n - Ax_n - v\|^2 \leqslant \tau(n) \|x_0 - x_\star\|^2 +``` + +is valid, where $x_n$ is the output of the **Halpern iteration** +and $x_\star$ is the point where $v$ is attained, i.e., + +```math +v = x_\star - Ax_\star +``` + +In short, for a given value of $n$, +$\tau(n)$ is computed as the worst-case value of +$\|x_n - Ax_n - v\|^2$ when $\|x_0 - x_\star\|^2 \leqslant 1$. + +# Algorithm +The Halpern iteration can be written as + +```math +x_{t+1} = \frac{1}{t + 2} x_0 + \left(1 - \frac{1}{t + 2}\right) Ax_t. +``` + +# Theoretical guarantee +A worst-case guarantee for Halpern iteration can be found in [1, Theorem 8]: + +```math +\|x_n - Ax_n - v\|^2 \leqslant \left(\frac{\sqrt{Hn + 12} + 1}{n + 1}\right)^2 \|x_0 - x_\star\|^2. +``` + +# References +The detailed approach is available in [1]. + +[[1] J. Park, E. Ryu (2023). +Accelerated Infeasibility Detection of Constrained Optimization and Fixed-Point Iterations. +International Conference on Machine Learning.](https://arxiv.org/pdf/2303.15876.pdf) + +# Arguments +- `n`: number of iterations. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_inconsistent_halpern_iteration(25; verbose=true) +``` +""" function wc_inconsistent_halpern_iteration(n; verbose=true) problem = PEP() diff --git a/examples/fixed_point_problems/krasnoselskii_mann_constant_step_sizes.jl b/examples/fixed_point_problems/krasnoselskii_mann_constant_step_sizes.jl index 0e72efd..c3fa63e 100644 --- a/examples/fixed_point_problems/krasnoselskii_mann_constant_step_sizes.jl +++ b/examples/fixed_point_problems/krasnoselskii_mann_constant_step_sizes.jl @@ -1,5 +1,76 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_krasnoselskii_mann_constant_step_sizes(n, gamma; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_krasnoselskii_mann_constant_step_sizes`. + +Consider the fixed point problem + +```math +\mathrm{Find}\, x:\, x = Ax, +``` + +where $A$ is a non-expansive operator, that is a $L$-Lipschitz operator with $L=1$. + +# Performance metric + +This code computes a worst-case guarantee for the **Krasnolselskii-Mann** (KM) method with constant step-size. +That is, it computes the smallest possible $\tau(n)$ such that the guarantee + +```math +\frac{1}{4}\|x_n - Ax_n\|^2 \leqslant \tau(n) \|x_0 - x_\star\|^2 +``` + +is valid, where $x_n$ is the output of the KM method, and $x_\star$ is some fixed point of $A$ +(i.e., $x_\star=Ax_\star$). + +# Algorithm +The constant step-size KM method is described by + +```math +x_{t+1} = \left(1 - \gamma\right) x_{t} + \gamma Ax_{t}. +``` + +# Theoretical guarantee +A theoretical **upper** bound is provided by [1, Theorem 4.9] + +```math +\tau(n) = \left\{ +``` + \begin{aligned} + \frac{1}{n+1}\left(\frac{n}{n+1}\right)^n \frac{1}{4 \gamma (1 - \gamma)}\quad & \text{if } \frac{1}{2}\leqslant \gamma \leqslant \frac{1}{2}\left(1+\sqrt{\frac{n}{n+1}}\right) \\ + (\gamma - 1)^{2n} \quad & \text{if } \frac{1}{2}\left(1+\sqrt{\frac{n}{n+1}}\right) < \gamma \leqslant 1. + \end{aligned} + \right. + +**Reference**: + +[[1] F. Lieder (2018). +Projection Based Methods for Conic Linear Programming +Optimal First Order Complexities and Norm Constrained Quasi Newton Methods. +PhD thesis, HHU Dusseldorf.](https://docserv.uni-duesseldorf.de/servlets/DerivateServlet/Derivate-49971/Dissertation.pdf) + +# References +No bibliographic reference was listed in the corresponding Python PEPit example docstring. + +# Arguments +- `n`: number of iterations. +- `gamma`: step-size parameter. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_krasnoselskii_mann_constant_step_sizes(3, 3 / 4; verbose=true) +``` +""" function wc_krasnoselskii_mann_constant_step_sizes(n, gamma; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/fixed_point_problems/krasnoselskii_mann_increasing_step_sizes.jl b/examples/fixed_point_problems/krasnoselskii_mann_increasing_step_sizes.jl index 8d37dd8..05ef10b 100644 --- a/examples/fixed_point_problems/krasnoselskii_mann_increasing_step_sizes.jl +++ b/examples/fixed_point_problems/krasnoselskii_mann_increasing_step_sizes.jl @@ -1,5 +1,63 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_krasnoselskii_mann_increasing_step_sizes(n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_krasnoselskii_mann_increasing_step_sizes`. + +Consider the fixed point problem + +```math +\mathrm{Find}\, x:\, x = Ax, +``` + +where $A$ is a non-expansive operator, that is a $L$-Lipschitz operator with $L=1$. + +# Performance metric + +This code computes a worst-case guarantee for the **Krasnolselskii-Mann** method. That is, it computes +the smallest possible $\tau(n)$ such that the guarantee + +```math +\frac{1}{4}\|x_n - Ax_n\|^2 \leqslant \tau(n) \|x_0 - x_\star\|^2 +``` + +is valid, where $x_n$ is the output of the KM method, and $x_\star$ is some fixed point of $A$ +(i.e., $x_\star=Ax_\star$). + +# Algorithm +The KM method is described by + +```math +x_{t+1} = \frac{1}{t + 2} x_{t} + \left(1 - \frac{1}{t + 2}\right) Ax_{t}. +``` + +**Reference**: This scheme was first studied using PEPs in [1]. + +[[1] F. Lieder (2018). +Projection Based Methods for Conic Linear Programming +Optimal First Order Complexities and Norm Constrained Quasi Newton Methods. +PhD thesis, HHU Dusseldorf.](https://docserv.uni-duesseldorf.de/servlets/DerivateServlet/Derivate-49971/Dissertation.pdf) + +# References +No bibliographic reference was listed in the corresponding Python PEPit example docstring. + +# Arguments +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: no theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_krasnoselskii_mann_increasing_step_sizes(3; verbose=true) +``` +""" function wc_krasnoselskii_mann_increasing_step_sizes(n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/fixed_point_problems/optimal_contractive_halpern_iteration.jl b/examples/fixed_point_problems/optimal_contractive_halpern_iteration.jl index a100441..a8766e4 100644 --- a/examples/fixed_point_problems/optimal_contractive_halpern_iteration.jl +++ b/examples/fixed_point_problems/optimal_contractive_halpern_iteration.jl @@ -1,5 +1,74 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_optimal_contractive_halpern_iteration(n, gamma; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_optimal_contractive_halpern_iteration`. + +Consider the fixed point problem + +```math +\mathrm{Find}\, x:\, x = Ax, +``` + +where $A$ is a $1/\gamma$-contractive operator, +i.e. a $L$-Lipschitz operator with $L=1/\gamma$. + +# Performance metric + +This code computes a worst-case guarantee for the **Optimal Contractive Halpern Iteration**. +That is, it computes the smallest possible $\tau(n, \gamma)$ such that the guarantee + +```math +\|x_n - Ax_n\|^2 \leqslant \tau(n, \gamma) \|x_0 - x_\star\|^2 +``` + +is valid, where $x_n$ is the output of the **Optimal Contractive Halpern iteration**, +and $x_\star$ is the fixed point of $A$. In short, for a given value of $n, \gamma$, +$\tau(n, \gamma)$ is computed as the worst-case value of +$\|x_n - Ax_n\|^2$ when $\|x_0 - x_\star\|^2 \leqslant 1$. + +# Algorithm +The Optimal Contractive Halpern iteration can be written as + +```math +x_{t+1} = \left(1 - \frac{1}{\varphi_{t+1}} \right) Ax_t + \frac{1}{\varphi_{t+1}} x_0. +``` + +where $\varphi_k = \sum_{i=0}^k \gamma^{2i}$ and $x_0$ is a starting point. + +# Theoretical guarantee +A **tight** worst-case guarantee for the Optimal Contractive Halpern iteration +can be found in [1, Corollary 3.3, Theorem 4.1]: + +```math +\|x_n - Ax_n\|^2 \leqslant \left(1 + \frac{1}{\gamma}\right)^2 \left( \frac{1}{\sum_{k=0}^n \gamma^k} \right)^2 \|x_0 - x_\star\|^2. +``` + +# References +The detailed approach and tight bound are available in [1]. + +[[1] J. Park, E. Ryu (2022). +Exact Optimal Accelerated Complexity for Fixed-Point Iterations. +In 39th International Conference on Machine Learning (ICML).](https://proceedings.mlr.press/v162/park22c/park22c.pdf) + +# Arguments +- `n`: number of iterations. +- `gamma`: step-size parameter. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_optimal_contractive_halpern_iteration(10, 1.1; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_optimal_contractive_halpern_iteration(n, gamma; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/inexact_proximal_methods/accelerated_inexact_forward_backward.jl b/examples/inexact_proximal_methods/accelerated_inexact_forward_backward.jl index 0059c1d..64dc6e0 100644 --- a/examples/inexact_proximal_methods/accelerated_inexact_forward_backward.jl +++ b/examples/inexact_proximal_methods/accelerated_inexact_forward_backward.jl @@ -1,6 +1,102 @@ using PEPit, OrderedCollections +@doc raw""" + wc_accelerated_inexact_forward_backward(L, zeta, n; verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_accelerated_inexact_forward_backward`. + +Consider the composite convex minimization problem, + +```math +F_\star \triangleq \min_x \left\{F(x) \equiv f(x) + g(x) \right\}, +``` + +where $f$ is $L$-smooth convex, and $g$ is closed, proper, and convex. +We further assume that one can readily evaluate the gradient of $f$ and that one has access to an inexact +version of the proximal operator of $g$ (whose level of accuracy is controlled by some +parameter $\zeta\in (0,1)$). + +# Performance metric + +This code computes a worst-case guarantee for an **accelerated inexact forward backward** (AIFB) method (a.k.a., +inexact accelerated proximal gradient method). That is, it computes the smallest possible +$\tau(n, L, \zeta)$ such that the guarantee + +```math +F(x_n) - F(x_\star) \leqslant \tau(n, L, \zeta) \|x_0 - x_\star\|^2, +``` + +is valid, where $x_n$ is the output of the IAFB, and where $x_\star$ is a minimizer of $F$. + +In short, for given values of $n$, $L$ and $\zeta$, $\tau(n, L, \zeta)$ is computed as +the worst-case value of $F(x_n) - F(x_\star)$ when $\|x_0 - x_\star\|^2 \leqslant 1$. + +# Algorithm +Let $t\in\{0,1,\ldots,n\}$. The method is presented in, e.g., [1, Algorithm 3.1]. +For simplicity, we instantiate [1, Algorithm 3.1] using simple values for its parameters and for the problem +setting (in the notation of [1]: $A_0\triangleq 0$, $\mu=0$, $\xi_t \triangleq0$, +$\sigma_t\triangleq 0$, $\lambda_t \triangleq\gamma\triangleq\tfrac{1}{L}$, +$\zeta_t\triangleq\zeta$, $\eta \triangleq (1-\zeta^2) \gamma$), and without backtracking, +arriving to: + +```math + \begin{aligned} + A_{t+1} && = A_t + \frac{\eta+\sqrt{\eta^2+4\eta A_t}}{2},\\ + y_{t} && = x_t + \frac{A_{t+1}-A_t}{A_{t+1}} (z_t-x_t),\\ + (x_{t+1},v_{t+1}) && \approx_{\varepsilon_t} \left(\mathrm{prox}_{\gamma g}\left(y_t-\gamma \nabla f(y_t)\right),\, + \mathrm{prox}_{ g^*/\gamma}\left(\frac{y_t-\gamma \nabla f(y_t)}{\gamma}\right)\right),\\ + && \text{with } \varepsilon_t = \frac{\zeta^2\gamma^2}{2}\|v_{t+1}+\nabla f(y_t) \|^2,\\ + z_{t+1} && = z_t-(A_{t+1}-A_t)\left(v_{t+1}+\nabla f(y_t)\right),\\ + \end{aligned} +``` +where $\{\varepsilon_t\}_{t\geqslant 0}$ is some sequence of accuracy parameters (whose values are fixed +within the algorithm as it runs), and $\{A_t\}_{t\geqslant 0}$ is some scalar sequence of parameters +for the method (typical of accelerated methods). + +The line with "$\approx_{\varepsilon}$" can be described as the pair $(x_{t+1},v_{t+1})$ satisfying +an accuracy requirement provided by [1, Definition 2.3]. More precisely (but without providing any intuition), +it requires the existence of some $w_{t+1}$ such that $v_{t+1} \in \partial g(w_{t+1})$ +and for which the accuracy requirement + +```math +\gamma^2 || x_{t+1} - y_t + \gamma v_{t+1} ||^2 + \gamma (g(x_{t+1}) - g(w_{t+1}) - v_{t+1}(x_{t+1} - w_{t+1})) \leqslant \varepsilon_t, +``` + +is valid. + +# Theoretical guarantee +A theoretical upper bound is obtained in [1, Corollary 3.5]: + +```math +F(x_n)-F_\star\leqslant \frac{2L \|x_0-x_\star\|^2}{(1-\zeta^2)n^2}. +``` + +# References +The method and theoretical result can be found in [1, Section 3]. + +[[1] M. Barre, A. Taylor, F. Bach (2021). +A note on approximate accelerated forward-backward methods with +absolute and relative errors, and possibly strongly convex objectives. +arXiv:2106.15536v2.](https://arxiv.org/pdf/2106.15536v2.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `zeta`: relative approximation parameter in (0,1). +- `n`: number of iterations. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value. +- `theoretical_tau`: theoretical value. + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_accelerated_inexact_forward_backward(1.3, 0.45, 11; verbose=true) +``` +""" function wc_accelerated_inexact_forward_backward(L, zeta, n; verbose=true) diff --git a/examples/inexact_proximal_methods/partially_inexact_douglas_rachford_splitting.jl b/examples/inexact_proximal_methods/partially_inexact_douglas_rachford_splitting.jl index 4edba12..472cb5d 100644 --- a/examples/inexact_proximal_methods/partially_inexact_douglas_rachford_splitting.jl +++ b/examples/inexact_proximal_methods/partially_inexact_douglas_rachford_splitting.jl @@ -1,6 +1,97 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_partially_inexact_douglas_rachford_splitting(mu, L, n, gamma, sigma; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_partially_inexact_douglas_rachford_splitting`. + +Consider the composite strongly convex minimization problem, + +```math +F_\star \triangleq \min_x \left\{ F(x) \equiv f(x) + g(x) \right\} +``` + +where $f$ is $L$-smooth and $\mu$-strongly convex, and $g$ is closed convex and proper. We +denote by $x_\star = \arg\min_x F(x)$ the minimizer of $F$. +The (exact) proximal operator of $g$, and an approximate version of the proximal operator of +$f$ are assumed to be available. + +# Performance metric + +This code computes a worst-case guarantee for a **partially inexact Douglas-Rachford Splitting** (DRS). That is, it +computes the smallest possible $\tau(n,L,\mu,\sigma,\gamma)$ such that the guarantee + +```math +\|z_{n} - z_\star\|^2 \leqslant \tau(n,L,\mu,\sigma,\gamma) \|z_0 - z_\star\|^2 +``` + +is valid, where $z_n$ is the output of the DRS (initiated at $x_0$), +$z_\star$ is its fixed point, +$\gamma$ is a step-size, +and $\sigma$ is the level of inaccuracy. + +# Algorithm +The partially inexact Douglas-Rachford splitting under consideration is described by + +```math + \begin{aligned} + x_{t} && \approx_{\sigma} \arg\min_x \left\{ \gamma f(x)+\frac{1}{2} \|x-z_t\|^2 \right\},\\ + y_{t} && = \arg\min_y \left\{ \gamma g(y)+\frac{1}{2} \|y-(x_t-\gamma \nabla f(x_t))\|^2 \right\},\\ + z_{t+1} && = z_t + y_t - x_t. + \end{aligned} +``` +More precisely, the notation "$\approx_{\sigma}$" correspond to require the existence of some +$e_{t}$ such that + +```math + \begin{aligned} + x_{t} && = z_t - \gamma (\nabla f(x_t) - e_t),\\ + y_{t} && = \arg\min_y \left\{ \gamma g(y)+\frac{1}{2} \|y-(x_t-\gamma \nabla f(x_t))\|^2 \right\},\\ + && \text{with } \|e_t\|^2 \leqslant \frac{\sigma^2}{\gamma^2}\|y_{t} - z_t + \gamma \nabla f(x_t) \|^2,\\ + z_{t+1} && = z_t + y_t - x_t. + \end{aligned} +``` +# Theoretical guarantee +The following **tight** theoretical bound is due to [2, Theorem 5.1]: + +```math +\|z_{n} - z_\star\|^2 \leqslant \max\left(\frac{1 - \sigma + \gamma \mu \sigma}{1 - \sigma + \gamma \mu}, +``` + \frac{\sigma + (1 - \sigma) \gamma L}{1 + (1 - \sigma) \gamma L)}\right)^{2n} \|z_0 - z_\star\|^2. + +# References +The method is from [1], its PEP formulation and the worst-case analysis from [2], +see [2, Section 4.4] for more details. + +[[1] J. Eckstein and W. Yao (2018). +Relative-error approximate versions of Douglas-Rachford splitting and special cases of the ADMM. +Mathematical Programming, 170(2), 417-444.](https://link.springer.com/article/10.1007/s10107-017-1160-5) + +[[2] M. Barre, A. Taylor, F. Bach (2020). +Principled analyses and design of first-order methods with inexact proximal operators, +arXiv 2006.06041v2.](https://arxiv.org/pdf/2006.06041v2.pdf) + +# Arguments +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `n`: number of iterations. +- `gamma`: step-size parameter. +- `sigma`: noise parameter. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_partially_inexact_douglas_rachford_splitting(0.1, 5, 5, 1.4, 0.2; verbose=true) +``` +""" function wc_partially_inexact_douglas_rachford_splitting(mu, L, n, gamma, sigma; solver=Clarabel.Optimizer, verbose=true) diff --git a/examples/inexact_proximal_methods/relatively_inexact_proximal_point_algorithm.jl b/examples/inexact_proximal_methods/relatively_inexact_proximal_point_algorithm.jl index 7896f3d..ff34fad 100644 --- a/examples/inexact_proximal_methods/relatively_inexact_proximal_point_algorithm.jl +++ b/examples/inexact_proximal_methods/relatively_inexact_proximal_point_algorithm.jl @@ -1,6 +1,82 @@ using PEPit, OrderedCollections, Clarabel, OffsetArrays +@doc raw""" + wc_relatively_inexact_proximal_point_algorithm(n, gamma, sigma; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_relatively_inexact_proximal_point_algorithm`. + +Consider the (possibly non-smooth) convex minimization problem, + +```math +f_\star \triangleq \min_x f(x) +``` + +where $f$ is closed, convex, and proper. We denote by $x_\star$ some optimal point of $f$ (hence +$0\in\partial f(x_\star)$). We further assume that one has access to an inexact version of the proximal +operator of $f$, whose level of accuracy is controlled by some parameter $\sigma\geqslant 0$. + +# Performance metric + +This code computes a worst-case guarantee for an **inexact proximal point method**. That is, it computes the +smallest possible $\tau(n, \gamma, \sigma)$ such that the guarantee + +```math +f(x_n) - f(x_\star) \leqslant \tau(n, \gamma, \sigma) \|x_0 - x_\star\|^2 +``` + +is valid, where $x_n$ is the output of the method, $\gamma$ is some step-size, and $\sigma$ is +the level of accuracy of the approximate proximal point oracle. + +# Algorithm +The approximate proximal point method under consideration is described by + +```math +x_{t+1} \approx_{\sigma} \arg\min_x \left\{ \gamma f(x)+\frac{1}{2} \|x-x_t\|^2 \right\}, +``` + +where the notation "$\approx_{\sigma}$" corresponds to require the existence of some vector +$s_{t+1}\in\partial f(x_{t+1})$ and $e_{t+1}$ such that + +```math +x_{t+1} = x_t - \gamma s_{t+1} + e_{t+1} \quad \quad \text{with }\|e_{t+1}\|^2 \leqslant \sigma^2\|x_{t+1} - x_t\|^2. +``` + +We note that the case $\sigma=0$ implies $e_{t+1}=0$ and this operation reduces to a standard proximal +step with step-size $\gamma$. + +# Theoretical guarantee +The following (empirical) upper bound is provided in [1, Section 3.5.1], + +```math +f(x_n) - f(x_\star) \leqslant \frac{1 + \sigma}{4 \gamma n^{\sqrt{1 - \sigma^2}}}\|x_0 - x_\star\|^2. +``` + +# References +The precise formulation is presented in [1, Section 3.5.1]. + +[[1] M. Barre, A. Taylor, F. Bach (2020). +Principled analyses and design of first-order methods with inexact proximal operators. +arXiv 2006.06041v2.](https://arxiv.org/pdf/2006.06041.pdf) + +# Arguments +- `n`: number of iterations. +- `gamma`: step-size parameter. +- `sigma`: accuracy parameter of the proximal point computation. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_relatively_inexact_proximal_point_algorithm(8, 10, 0.65; verbose=true) +``` +""" function wc_relatively_inexact_proximal_point_algorithm(n, gamma, sigma; solver=Clarabel.Optimizer, verbose=true) diff --git a/examples/low_dimensional_worst_cases_scenarios/alternate_projections.jl b/examples/low_dimensional_worst_cases_scenarios/alternate_projections.jl index cd78b4e..d0dc827 100644 --- a/examples/low_dimensional_worst_cases_scenarios/alternate_projections.jl +++ b/examples/low_dimensional_worst_cases_scenarios/alternate_projections.jl @@ -1,5 +1,71 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_alternate_projections(n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_alternate_projections`. + +Consider the convex feasibility problem: + +```math +\mathrm{Find}\, x\in Q_1\cap Q_2 +``` + +where $Q_1$ and $Q_2$ are two closed convex sets. + +# Performance metric + +This code computes a worst-case guarantee for the **alternate projection method**, and looks for a low-dimensional +worst-case example nearly achieving this worst-case guarantee. +That is, it computes the smallest possible $\tau(n)$ such that the guarantee + +```math +\|\mathrm{Proj}_{Q_1}(x_n)-\mathrm{Proj}_{Q_2}(x_n)\|^2 \leqslant \tau(n) \|x_0 - x_\star\|^2 +``` + +is valid, where $x_n$ is the output of the **alternate projection method**, +and $x_\star\in Q_1\cap Q_2$ is a solution to the convex feasibility problem. + +In short, for a given value of $n$, +$\tau(n)$ is computed as the worst-case value of +$\|\mathrm{Proj}_{Q_1}(x_n)-\mathrm{Proj}_{Q_2}(x_n)\|^2$ +when $\|x_0 - x_\star\|^2 \leqslant 1$. +Then, it looks for a low-dimensional nearly achieving this performance. + +# Algorithm +The alternate projection method can be written as + +```math + \begin{aligned} + y_{t+1} & = & \mathrm{Proj}_{Q_1}(x_t), \\ + x_{t+1} & = & \mathrm{Proj}_{Q_2}(y_{t+1}). + \end{aligned} +``` +# References +The first results on this method are due to [1]. Its translation in PEPs is due to [2]. + +[[1] J. Von Neumann (1949). On rings of operators. Reduction theory. Annals of Mathematics, pp. 401-485.](https://www.jstor.org/stable/1969463) + +[[2] A. Taylor, J. Hendrickx, F. Glineur (2017). +Exact worst-case performance of first-order methods for composite convex optimization. +SIAM Journal on Optimization, 27(3):1283-1313.](https://arxiv.org/pdf/1512.07516.pdf) + +# Arguments +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: no theoretical value. + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_alternate_projections(10; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_alternate_projections(n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/low_dimensional_worst_cases_scenarios/averaged_projections.jl b/examples/low_dimensional_worst_cases_scenarios/averaged_projections.jl index fb2e39e..49eadf7 100644 --- a/examples/low_dimensional_worst_cases_scenarios/averaged_projections.jl +++ b/examples/low_dimensional_worst_cases_scenarios/averaged_projections.jl @@ -1,5 +1,65 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_averaged_projections(n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_averaged_projections`. + +Consider the convex feasibility problem: + +```math +\mathrm{Find}\, x\in Q_1\cap Q_2 +``` + +where $Q_1$ and $Q_2$ are two closed convex sets. + +# Performance metric + +This code computes a worst-case guarantee for the **averaged projection method**, and looks for a low-dimensional +worst-case example nearly achieving this worst-case guarantee. +That is, it computes the smallest possible $\tau(n)$ such that the guarantee + +```math +\|\mathrm{Proj}_{Q_1}(x_n)-\mathrm{Proj}_{Q_2}(x_n)\|^2 \leqslant \tau(n) \|x_0 - x_\star\|^2 +``` + +is valid, where $x_n$ is the output of the **averaged projection method**, +and $x_\star\in Q_1\cap Q_2$ is a solution to the convex feasibility problem. + +In short, for a given value of $n$, +$\tau(n)$ is computed as the worst-case value of +$\|\mathrm{Proj}_{Q_1}(x_n)-\mathrm{Proj}_{Q_2}(x_n)\|^2$ +when $\|x_0 - x_\star\|^2 \leqslant 1$. +Then, it looks for a low-dimensional nearly achieving this performance. + +# Algorithm +The averaged projection method can be written as + +```math + \begin{aligned} + x_{t+1} & = & \frac{1}{2} \left(\mathrm{Proj}_{Q_1}(x_t) + \mathrm{Proj}_{Q_2}(x_t)\right). + \end{aligned} +``` + +# References +No bibliographic reference was listed in the corresponding Python PEPit example docstring. + +# Arguments +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: no theoretical value. + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_averaged_projections(10; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_averaged_projections(n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/low_dimensional_worst_cases_scenarios/dykstra.jl b/examples/low_dimensional_worst_cases_scenarios/dykstra.jl index 8c17c27..4323834 100644 --- a/examples/low_dimensional_worst_cases_scenarios/dykstra.jl +++ b/examples/low_dimensional_worst_cases_scenarios/dykstra.jl @@ -1,5 +1,71 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_dykstra(n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_dykstra`. + +Consider the convex feasibility problem: + +```math +\mathrm{Find}\, x\in Q_1\cap Q_2 +``` + +where $Q_1$ and $Q_2$ are two closed convex sets. + +# Performance metric + +This code computes a worst-case guarantee for the **Dykstra projection method**, and looks for a low-dimensional +worst-case example nearly achieving this worst-case guarantee. +That is, it computes the smallest possible $\tau(n)$ such that the guarantee + +```math +\|\mathrm{Proj}_{Q_1}(x_n)-\mathrm{Proj}_{Q_2}(x_n)\|^2 \leqslant \tau(n) \|x_0 - x_\star\|^2 +``` + +is valid, where $x_n$ is the output of the **Dykstra projection method**, +and $x_\star\in Q_1\cap Q_2$ is a solution to the convex feasibility problem. + +In short, for a given value of $n$, +$\tau(n)$ is computed as the worst-case value of +$\|\mathrm{Proj}_{Q_1}(x_n)-\mathrm{Proj}_{Q_2}(x_n)\|^2$ +when $\|x_0 - x_\star\|^2 \leqslant 1$. +Then, it looks for a low-dimensional nearly achieving this performance. + +# Algorithm +The Dykstra projection method can be written as + +```math + \begin{aligned} + y_{t} & = & \mathrm{Proj}_{Q_1}(x_t+p_t), \\ + p_{t+1} & = & x_t + p_t - y_t,\\ + x_{t+1} & = & \mathrm{Proj}_{Q_2}(y_t+q_t),\\ + q_{t+1} & = & y_t + q_t - x_{t+1}. + \end{aligned} +``` +# References +This method is due to [1]. + +[[1] J.P. Boyle, R.L. Dykstra (1986). +A method for finding projections onto the intersection of convex sets in Hilbert spaces. +Lecture Notes in Statistics. Vol. 37. pp. 28-47.](https://link.springer.com/chapter/10.1007/978-1-4613-9940-7_3) + +# Arguments +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: no theoretical value. + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_dykstra(10; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_dykstra(n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/low_dimensional_worst_cases_scenarios/frank_wolfe.jl b/examples/low_dimensional_worst_cases_scenarios/frank_wolfe.jl index 9e6c937..1a4db3b 100644 --- a/examples/low_dimensional_worst_cases_scenarios/frank_wolfe.jl +++ b/examples/low_dimensional_worst_cases_scenarios/frank_wolfe.jl @@ -1,5 +1,88 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_frank_wolfe(L, D, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_frank_wolfe`. + +Consider the composite convex minimization problem + +```math +F_\star \triangleq \min_x \{F(x) \equiv f_1(x) + f_2(x)\}, +``` + +where $f_1$ is $L$-smooth and convex +and where $f_2$ is a convex indicator function on $\mathcal{D}$ of diameter at most $D$. + +# Performance metric + +This code computes a worst-case guarantee for the **conditional gradient** method, aka **Frank-Wolfe** method, +and looks for a low-dimensional worst-case example nearly achieving this worst-case guarantee using +$12$ iterations of the logdet heuristic. +That is, it computes the smallest possible $\tau(n, L)$ such that the guarantee + +```math +F(x_n) - F(x_\star) \leqslant \tau(n, L) D^2, +``` + +is valid, where $x_n$ is the output of the **conditional gradient** method, +and where $x_\star$ is a minimizer of $F$. +In short, for given values of $n$ and $L$, $\tau(n, L)$ is computed as the worst-case value of +$F(x_n) - F(x_\star)$ when $D \leqslant 1$. Then, it looks for a low-dimensional nearly achieving this +performance. + +# Algorithm + + +This method was first presented in [1]. A more recent version can be found in, e.g., [2, Algorithm 1]. +For $t \in \{0, \dots, n-1\}$, + +```math + \begin{aligned} + y_t & = & \arg\min_{s \in \mathcal{D}} \langle s \mid \nabla f_1(x_t) \rangle, \\ + x_{t+1} & = & \frac{t}{t + 2} x_t + \frac{2}{t + 2} y_t. + \end{aligned} +``` +# Theoretical guarantee + + +An **upper** guarantee obtained in [2, Theorem 1] is + +```math +F(x_n) - F(x_\star) \leqslant \frac{2L D^2}{n+2}. +``` + +# References +The algorithm is presented in, among others, [1, 2]. The logdet heuristic is presented in [3]. + +[1] M .Frank, P. Wolfe (1956). +An algorithm for quadratic programming. +Naval research logistics quarterly, 3(1-2), 95-110. + +[[2] M. Jaggi (2013). Revisiting Frank-Wolfe: Projection-free sparse convex optimization. +In 30th International Conference on Machine Learning (ICML).](http://proceedings.mlr.press/v28/jaggi13.pdf) + +[[3] F. Maryam, H. Hindi, S. Boyd (2003). Log-det heuristic for matrix rank minimization with applications to Hankel +and Euclidean distance matrices. American Control Conference (ACC).](https://web.stanford.edu/~boyd/papers/pdf/rank_min_heur_hankel.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `D`: diameter of $f_2$. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value. +- `theoretical_tau`: theoretical value. + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_frank_wolfe(1.0, 1.0, 10; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_frank_wolfe(L, D, n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/low_dimensional_worst_cases_scenarios/gradient_descent.jl b/examples/low_dimensional_worst_cases_scenarios/gradient_descent.jl index 5b292ff..fd21d09 100644 --- a/examples/low_dimensional_worst_cases_scenarios/gradient_descent.jl +++ b/examples/low_dimensional_worst_cases_scenarios/gradient_descent.jl @@ -1,5 +1,78 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_gradient_descent(L, gamma, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_gradient_descent`. + +Consider the minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is $L$-smooth. + +# Performance metric + +This code computes a worst-case guarantee for **gradient descent** with fixed step-size $\gamma$, +and looks for a low-dimensional worst-case example nearly achieving this worst-case guarantee. +That is, it computes the smallest possible $\tau(n, L, \gamma)$ such that the guarantee + +```math +\min_{t\leqslant n} \|\nabla f(x_t)\|^2 \leqslant \tau(n, L, \gamma) (f(x_0) - f(x_n)) +``` + +is valid, where $x_n$ is the n-th iterates obtained with the gradient method with fixed step-size. +Then, it looks for a low-dimensional nearly achieving this performance. + +# Algorithm + +Gradient descent is described as follows, for $t \in \{ 0, \dots, n-1\}$, + +```math +x_{t+1} = x_t - \gamma \nabla f(x_t), +``` + +where $\gamma$ is a step-size and. + +# Theoretical guarantee + +When $\gamma \leqslant \frac{1}{L}$, an empirically tight theoretical worst-case guarantee is + +```math +\min_{t\leqslant n} \|\nabla f(x_t)\|^2 \leqslant \frac{4}{3}\frac{L}{n} (f(x_0) - f(x_n)), +``` + +see discussions in [1, page 190] and [2]. + +# References + + +[[1] Taylor, A. B. (2017). Convex interpolation and performance estimation of first-order methods for +convex optimization. PhD Thesis, UCLouvain.](https://dial.uclouvain.be/downloader/downloader.php?pid=boreal:182881&datastream=PDF_01) + +[[2] H. Abbaszadehpeivasti, E. de Klerk, M. Zamani (2021). The exact worst-case convergence rate of the +gradient method with fixed step lengths for L-smooth functions. Optimization Letters, 16(6), 1649-1661.](https://arxiv.org/pdf/2104.05468v3.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `gamma`: step-size parameter. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value. +- `theoretical_tau`: theoretical value. + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_gradient_descent(L, gamma, 5; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_gradient_descent(L, gamma, n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/low_dimensional_worst_cases_scenarios/halpern_iteration.jl b/examples/low_dimensional_worst_cases_scenarios/halpern_iteration.jl index f894e46..c45193c 100644 --- a/examples/low_dimensional_worst_cases_scenarios/halpern_iteration.jl +++ b/examples/low_dimensional_worst_cases_scenarios/halpern_iteration.jl @@ -1,5 +1,75 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_halpern_iteration(n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_halpern_iteration`. + +Consider the fixed point problem + +```math +\mathrm{Find}\, x:\, x = Ax, +``` + +where $A$ is a non-expansive operator, +that is a $L$-Lipschitz operator with $L=1$. + +# Performance metric + +This code computes a worst-case guarantee for the **Halpern Iteration**, and looks for a low-dimensional +worst-case example nearly achieving this worst-case guarantee. +That is, it computes the smallest possible $\tau(n)$ such that the guarantee + +```math +\|x_n - Ax_n\|^2 \leqslant \tau(n) \|x_0 - x_\star\|^2 +``` + +is valid, where $x_n$ is the output of the **Halpern iteration**, +and $x_\star$ the fixed point of $A$. + +In short, for a given value of $n$, +$\tau(n)$ is computed as the worst-case value of +$\|x_n - Ax_n\|^2$ when $\|x_0 - x_\star\|^2 \leqslant 1$. Then, it looks for a low-dimensional +nearly achieving this performance. + +# Algorithm +The Halpern iteration can be written as + +```math +x_{t+1} = \frac{1}{t + 2} x_0 + \left(1 - \frac{1}{t + 2}\right) Ax_t. +``` + +# Theoretical guarantee +A **tight** worst-case guarantee for Halpern iteration can be found in [1, Theorem 2.1]: + +```math +\|x_n - Ax_n\|^2 \leqslant \left(\frac{2}{n+1}\right)^2 \|x_0 - x_\star\|^2. +``` + +# References +The detailed approach and tight bound are available in [1]. + +[[1] F. Lieder (2021). On the convergence rate of the Halpern-iteration. Optimization Letters, 15(2), 405-418.](http://www.optimization-online.org/DB_FILE/2017/11/6336.pdf) + +[[2] F. Maryam, H. Hindi, S. Boyd (2003). Log-det heuristic for matrix rank minimization with applications to Hankel +and Euclidean distance matrices. American Control Conference (ACC).](https://web.stanford.edu/~boyd/papers/pdf/rank_min_heur_hankel.pdf) + +# Arguments +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_halpern_iteration(10; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_halpern_iteration(n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/low_dimensional_worst_cases_scenarios/inexact_gradient.jl b/examples/low_dimensional_worst_cases_scenarios/inexact_gradient.jl index f8eca4e..40e4a0a 100644 --- a/examples/low_dimensional_worst_cases_scenarios/inexact_gradient.jl +++ b/examples/low_dimensional_worst_cases_scenarios/inexact_gradient.jl @@ -1,5 +1,99 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_inexact_gradient(L, mu, epsilon, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_inexact_gradient`. + +Consider the convex minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is $L$-smooth and $\mu$-strongly convex. + +# Performance metric + +This code computes a worst-case guarantee for an **inexact gradient method** and looks for a low-dimensional +worst-case example nearly achieving this worst-case guarantee using $10$ iterations of the logdet heuristic. + +That is, it computes the smallest possible $\tau(n,L,\mu,\varepsilon)$ such that the guarantee + +```math +f(x_n) - f_\star \leqslant \tau(n,L,\mu,\varepsilon) (f(x_0) - f_\star) +``` + +is valid, where $x_n$ is the output of the gradient descent with an inexact descent direction, +and where $x_\star$ is the minimizer of $f$. Then, it looks for a low-dimensional nearly achieving this +performance. + +The inexact descent direction is assumed to satisfy a relative inaccuracy +described by (with $0 \leqslant \varepsilon \leqslant 1$) + +```math +\|\nabla f(x_t) - d_t\| \leqslant \varepsilon \|\nabla f(x_t)\|, +``` + +where $\nabla f(x_t)$ is the true gradient, +and $d_t$ is the approximate descent direction that is used. + +# Algorithm + + +The inexact gradient descent under consideration can be written as + +```math +x_{t+1} = x_t - \frac{2}{L_{\varepsilon} + \mu_{\varepsilon}} d_t +``` + +where $d_t$ is the inexact search direction, $L_{\varepsilon} = (1 + \varepsilon)L$ +and $\mu_{\varepsilon} = (1-\varepsilon) \mu$. + +# Theoretical guarantee + + +A **tight** worst-case guarantee obtained in [1, Theorem 5.3] or [2, Remark 1.6] is + +```math +f(x_n) - f_\star \leqslant \left(\frac{L_{\varepsilon} - \mu_{\varepsilon}}{L_{\varepsilon} + \mu_{\varepsilon}}\right)^{2n}(f(x_0) - f_\star ), +``` + +with $L_{\varepsilon} = (1 + \varepsilon)L$ and $\mu_{\varepsilon} = (1-\varepsilon) \mu$. This +guarantee is achieved on one-dimensional quadratic functions. + +# References +The detailed analyses can be found in [1, 2]. The logdet heuristic is presented in [3]. + +[[1] E. De Klerk, F. Glineur, A. Taylor (2020). Worst-case convergence analysis of +inexact gradient and Newton methods through semidefinite programming performance estimation. +SIAM Journal on Optimization, 30(3), 2053-2082.](https://arxiv.org/pdf/1709.05191.pdf) + +[[2] O. Gannot (2021). A frequency-domain analysis of inexact gradient methods. +Mathematical Programming.](https://arxiv.org/pdf/1912.13494.pdf) + +[[3] F. Maryam, H. Hindi, S. Boyd (2003). Log-det heuristic for matrix rank minimization with applications to Hankel +and Euclidean distance matrices. American Control Conference (ACC).](https://web.stanford.edu/~boyd/papers/pdf/rank_min_heur_hankel.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `epsilon`: level of inaccuracy +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_inexact_gradient(1.0, 0.1, 0.1, 6; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_inexact_gradient(L, mu, epsilon, n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/low_dimensional_worst_cases_scenarios/optimized_gradient.jl b/examples/low_dimensional_worst_cases_scenarios/optimized_gradient.jl index f188062..912905b 100644 --- a/examples/low_dimensional_worst_cases_scenarios/optimized_gradient.jl +++ b/examples/low_dimensional_worst_cases_scenarios/optimized_gradient.jl @@ -1,6 +1,92 @@ using PEPit using OrderedCollections +@doc raw""" + wc_optimized_gradient(L, n; verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_optimized_gradient`. + +Consider the minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is $L$-smooth and convex. + +# Performance metric + +This code computes a worst-case guarantee for **optimized gradient method** (OGM), and applies the trace heuristic +for trying to find a low-dimensional worst-case example on which this guarantee is nearly achieved. +That is, it computes the smallest possible $\tau(n, L)$ such that the guarantee + +```math +f(x_n) - f_\star \leqslant \tau(n, L) \|x_0 - x_\star\|^2 +``` + +is valid, where $x_n$ is the output of OGM and where $x_\star$ is a minimizer of $f$. +Then, it applies the trace heuristic, which allows obtaining a one-dimensional function +on which the guarantee is nearly achieved. + +# Algorithm + +The optimized gradient method is described by + +```math + \begin{aligned} + x_{t+1} & = & y_t - \frac{1}{L} \nabla f(y_t)\\ + y_{t+1} & = & x_{t+1} + \frac{\theta_{t}-1}{\theta_{t+1}}(x_{t+1}-x_t)+\frac{\theta_{t}}{\theta_{t+1}}(x_{t+1}-y_t), + \end{aligned} +``` +with + +```math + \begin{aligned} + \theta_0 & = & 1 \\ + \theta_t & = & \frac{1 + \sqrt{4 \theta_{t-1}^2 + 1}}{2}, \forall t \in [|1, n-1|] \\ + \theta_n & = & \frac{1 + \sqrt{8 \theta_{n-1}^2 + 1}}{2}. + \end{aligned} +``` +# Theoretical guarantee + +The **tight** theoretical guarantee can be found in [2, Theorem 2]: + +```math +f(x_n)-f_\star \leqslant \frac{L\|x_0-x_\star\|^2}{2\theta_n^2}. +``` + +# References +The OGM was developed in [1,2]. +Low-dimensional worst-case functions for OGM were obtained in [3, 4]. + +[[1] Y. Drori, M. Teboulle (2014). Performance of first-order methods for smooth convex minimization: a novel +approach. Mathematical Programming 145(1-2), 451-482.](https://arxiv.org/pdf/1206.3209.pdf) + +[[2] D. Kim, J. Fessler (2016). Optimized first-order methods for smooth convex minimization. Mathematical +Programming 159.1-2: 81-107.](https://arxiv.org/pdf/1406.5468.pdf) + +[[3] A. Taylor, J. Hendrickx, F. Glineur (2017). Smooth strongly convex interpolation and exact worst-case +performance of first-order methods. Mathematical Programming, 161(1-2), 307-345.](https://arxiv.org/pdf/1502.05666.pdf) + +[[4] D. Kim, J. Fessler (2017). On the convergence analysis of the optimized gradient method. Journal of +Optimization Theory and Applications, 172(1), 187-205.](https://arxiv.org/pdf/1510.08573.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `n`: number of iterations. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_optimized_gradient(3.0, 4; verbose=true) +``` +""" function wc_optimized_gradient(L, n; verbose=true) problem = PEP() diff --git a/examples/low_dimensional_worst_cases_scenarios/proximal_point.jl b/examples/low_dimensional_worst_cases_scenarios/proximal_point.jl index 61ca6fb..50bbf14 100644 --- a/examples/low_dimensional_worst_cases_scenarios/proximal_point.jl +++ b/examples/low_dimensional_worst_cases_scenarios/proximal_point.jl @@ -1,5 +1,73 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_proximal_point(alpha, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_proximal_point`. + +Consider the monotone inclusion problem + +```math +\mathrm{Find}\, x:\, 0\in Ax, +``` + +where $A$ is maximally monotone. We denote $J_A = (I + A)^{-1}$ the resolvents of $A$. + +# Performance metric + +This code computes a worst-case guarantee for the **proximal point** method, and looks for a low-dimensional +worst-case example nearly achieving this worst-case guarantee using the trace heuristic. + +That is, it computes the smallest possible $\tau(n, \alpha)$ such that the guarantee + +```math +\|x_n - x_{n-1}\|^2 \leqslant \tau(n, \alpha) \|x_0 - x_\star\|^2, +``` + +is valid, where $x_\star$ is such that $0 \in Ax_\star$. +Then, it looks for a low-dimensional nearly achieving this performance. + +# Algorithm +The proximal point algorithm for monotone inclusions is described as follows, for $t \in \{ 0, \dots, n-1\}$, + +```math +x_{t+1} = J_{\alpha A}(x_t), +``` + +where $\alpha$ is a step-size. + +# Theoretical guarantee +A tight theoretical guarantee can be found in [1, section 4]. + +```math +\|x_n - x_{n-1}\|^2 \leqslant \frac{\left(1 - \frac{1}{n}\right)^{n - 1}}{n} \|x_0 - x_\star\|^2. +``` + +**Reference**: + +[[1] G. Gu, J. Yang (2020). Tight sublinear convergence rate of the proximal point algorithm for maximal +monotone inclusion problem. SIAM Journal on Optimization, 30(3), 1905-1921.](https://epubs.siam.org/doi/pdf/10.1137/19M1299049) + +# References +No bibliographic reference was listed in the corresponding Python PEPit example docstring. + +# Arguments +- `alpha`: algorithm parameter used in the update rule. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value. +- `theoretical_tau`: theoretical value. + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_proximal_point(2.2, 11; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_proximal_point(alpha, n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/monotone_inclusions_variational_inequalities/accelerated_proximal_point.jl b/examples/monotone_inclusions_variational_inequalities/accelerated_proximal_point.jl index 36596f4..dd8045b 100644 --- a/examples/monotone_inclusions_variational_inequalities/accelerated_proximal_point.jl +++ b/examples/monotone_inclusions_variational_inequalities/accelerated_proximal_point.jl @@ -1,5 +1,73 @@ using PEPit, OrderedCollections, Clarabel, OffsetArrays +@doc raw""" + wc_accelerated_proximal_point(alpha::Real, n::Int; solver=Clarabel.Optimizer, verbose::Int=1) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_accelerated_proximal_point`. + +Consider the monotone inclusion problem + +```math +\mathrm{Find}\, x:\, 0\in Ax, +``` + +where $A$ is maximally monotone. We denote $J_A = (I + A)^{-1}$ the resolvent of $A$. + +# Performance metric + +This code computes a worst-case guarantee for the **accelerated proximal point** method proposed in [1]. +That, it computes the smallest possible $\tau(n, \alpha)$ such that the guarantee + +```math +\|x_n - y_n\|^2 \leqslant \tau(n, \alpha) \|x_0 - x_\star\|^2, +``` + +is valid, where $x_\star$ is such that $0 \in Ax_\star$. + +# Algorithm +Accelerated proximal point is described as follows, for $t \in \{ 0, \dots, n-1\}$ + +```math + \begin{aligned} + x_{t+1} & = & J_{\alpha A}(y_t), \\ + y_{t+1} & = & x_{t+1} + \frac{t}{t+2}(x_{t+1} - x_{t}) - \frac{t}{t+2}(x_t - y_{t-1}), + \end{aligned} +``` +where $x_0=y_0=y_{-1}$ + +# Theoretical guarantee +A tight theoretical worst-case guarantee can be found in [1, Theorem 4.1], +for $n \geqslant 1$, + +```math +\|x_n - y_{n-1}\|^2 \leqslant \frac{1}{n^2} \|x_0 - x_\star\|^2. +``` + +**Reference**: + +[[1] D. Kim (2021). Accelerated proximal point method for maximally monotone operators. +Mathematical Programming, 1-31.](https://arxiv.org/pdf/1905.05149v4.pdf) + +# References +No bibliographic reference was listed in the corresponding Python PEPit example docstring. + +# Arguments +- `alpha`: algorithm parameter used in the update rule. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +wc_accelerated_proximal_point(2.0, 10; verbose=1) +``` +""" function wc_accelerated_proximal_point(alpha::Real, n::Int; solver=Clarabel.Optimizer, verbose::Int=1) problem = PEP() diff --git a/examples/monotone_inclusions_variational_inequalities/douglas_rachford_splitting.jl b/examples/monotone_inclusions_variational_inequalities/douglas_rachford_splitting.jl index 69b5303..43b57f2 100644 --- a/examples/monotone_inclusions_variational_inequalities/douglas_rachford_splitting.jl +++ b/examples/monotone_inclusions_variational_inequalities/douglas_rachford_splitting.jl @@ -1,5 +1,82 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_douglas_rachford_splitting(L, mu, alpha, theta; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_douglas_rachford_splitting`. + +Consider the monotone inclusion problem + +```math +\mathrm{Find}\, x:\, 0\in Ax + Bx, +``` + +where $A$ is $L$-Lipschitz and maximally monotone and $B$ is (maximally) $\mu$-strongly +monotone. We denote by $J_{\alpha A}$ and $J_{\alpha B}$ the resolvents of respectively +$\alpha A$ and $\alpha B$. + +# Performance metric + +This code computes a worst-case guarantee for the **Douglas-Rachford splitting** (DRS). +That is, given two initial points $w^{(0)}_t$ and $w^{(1)}_t$, +this code computes the smallest possible $\tau(L, \mu, \alpha, \theta)$ +(a.k.a. "contraction factor") such that the guarantee + +```math +\|w^{(0)}_{t+1} - w^{(1)}_{t+1}\|^2 \leqslant \tau(L, \mu, \alpha, \theta) \|w^{(0)}_{t} - w^{(1)}_{t}\|^2, +``` + +is valid, where $w^{(0)}_{t+1}$ and $w^{(1)}_{t+1}$ are obtained after one iteration of DRS from +respectively $w^{(0)}_{t}$ and $w^{(1)}_{t}$. + +In short, for given values of $L$, $\mu$, $\alpha$ and $\theta$, the contraction +factor $\tau(L, \mu, \alpha, \theta)$ is computed as the worst-case value of +$\|w^{(0)}_{t+1} - w^{(1)}_{t+1}\|^2$ when $\|w^{(0)}_{t} - w^{(1)}_{t}\|^2 \leqslant 1$. + +# Algorithm +One iteration of the Douglas-Rachford splitting is described as follows, +for $t \in \{ 0, \dots, n-1\}$, + +```math + \begin{aligned} + x_{t+1} & = & J_{\alpha B} (w_t),\\ + y_{t+1} & = & J_{\alpha A} (2x_{t+1}-w_t),\\ + w_{t+1} & = & w_t - \theta (x_{t+1}-y_{t+1}). + \end{aligned} +``` +# Theoretical guarantee +Theoretical worst-case guarantees can be found in [1, section 4, Theorem 4.3]. +Since the results of [2] tighten that of [1], we compare with [2, Theorem 4.3] below. The theoretical results +are complicated and we do not copy them here. + +# References +The detailed PEP methodology for studying operator splitting is provided in [2]. + +[[1] W. Moursi, L. Vandenberghe (2019). Douglas-Rachford Splitting for the Sum of a Lipschitz Continuous and +a Strongly Monotone Operator. Journal of Optimization Theory and Applications 183, 179-198.](https://arxiv.org/pdf/1805.09396.pdf) + +[[2] E. Ryu, A. Taylor, C. Bergeling, P. Giselsson (2020). Operator splitting performance estimation: +Tight contraction factors and optimal parameter selection. SIAM Journal on Optimization, 30(3), 2251-2271.](https://arxiv.org/pdf/1812.00146.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `alpha`: algorithm parameter used in the update rule. +- `theta`: relaxation or averaging parameter used in the update rule. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value. +- `theoretical_tau`: theoretical value. + +# Julia usage +```julia +wc_douglas_rachford_splitting(1.0, 0.1, 1.3, 0.9; verbose=true) +``` +""" function wc_douglas_rachford_splitting(L, mu, alpha, theta; solver=Clarabel.Optimizer, verbose=true) diff --git a/examples/monotone_inclusions_variational_inequalities/douglas_rachford_splitting_2.jl b/examples/monotone_inclusions_variational_inequalities/douglas_rachford_splitting_2.jl index 26dfa08..4e49425 100644 --- a/examples/monotone_inclusions_variational_inequalities/douglas_rachford_splitting_2.jl +++ b/examples/monotone_inclusions_variational_inequalities/douglas_rachford_splitting_2.jl @@ -1,5 +1,78 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_douglas_rachford_splitting_2(beta, mu, alpha, theta; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_douglas_rachford_splitting_2`. + +Consider the monotone inclusion problem + +```math +\mathrm{Find}\, x:\, 0\in Ax + Bx, +``` + +where $A$ is $\beta$-cocoercive and maximally monotone +and $B$ is (maximally) $\mu$-strongly monotone. +We denote by $J_{\alpha A}$ and $J_{\alpha B}$ the resolvents of respectively +$\alpha A$ and $\alpha B$. + +# Performance metric + +This code computes a worst-case guarantee for the **Douglas-Rachford splitting** (DRS). +That is, given two initial points $w^{(0)}_t$ and $w^{(1)}_t$, +this code computes the smallest possible $\tau(\beta, \mu, \alpha, \theta)$ +(a.k.a. "contraction factor") such that the guarantee + +```math +\|w^{(0)}_{t+1} - w^{(1)}_{t+1}\|^2 \leqslant \tau(\beta, \mu, \alpha, \theta) \|w^{(0)}_{t} - w^{(1)}_{t}\|^2, +``` + +is valid, where $w^{(0)}_{t+1}$ and $w^{(1)}_{t+1}$ are obtained after one iteration of DRS from +respectively $w^{(0)}_{t}$ and $w^{(1)}_{t}$. + +In short, for given values of $\beta$, $\mu$, $\alpha$ and $\theta$, the contraction +factor $\tau(\beta, \mu, \alpha, \theta)$ is computed as the worst-case value of +$\|w^{(0)}_{t+1} - w^{(1)}_{t+1}\|^2$ when $\|w^{(0)}_{t} - w^{(1)}_{t}\|^2 \leqslant 1$. + +# Algorithm +One iteration of the Douglas-Rachford splitting is described as follows, +for $t \in \{ 0, \dots, n-1\}$, + +```math + \begin{aligned} + x_{t+1} & = & J_{\alpha B} (w_t),\\ + y_{t+1} & = & J_{\alpha A} (2x_{t+1}-w_t),\\ + w_{t+1} & = & w_t - \theta (x_{t+1}-y_{t+1}). + \end{aligned} +``` +# Theoretical guarantee +Theoretical worst-case guarantees can be found in [1, section 4, Theorem 4.1]. + +# References +The detailed PEP methodology for studying operator splitting is provided in [1]. + +[[1] E. Ryu, A. Taylor, C. Bergeling, P. Giselsson (2020). Operator splitting performance estimation: +Tight contraction factors and optimal parameter selection. SIAM Journal on Optimization, 30(3), 2251-2271.](https://arxiv.org/pdf/1812.00146.pdf) + +# Arguments +- `beta`: operator or algorithm parameter used in the model. +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `alpha`: algorithm parameter used in the update rule. +- `theta`: relaxation or averaging parameter used in the update rule. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value. +- `theoretical_tau`: theoretical value. + +# Julia usage +```julia +wc_douglas_rachford_splitting_2(1.2, 0.1, 0.3, 1.5; verbose=true) +``` +""" function wc_douglas_rachford_splitting_2(beta, mu, alpha, theta; solver=Clarabel.Optimizer, verbose=true) diff --git a/examples/monotone_inclusions_variational_inequalities/optimal_strongly_monotone_proximal_point.jl b/examples/monotone_inclusions_variational_inequalities/optimal_strongly_monotone_proximal_point.jl index 66befe1..e37a50a 100644 --- a/examples/monotone_inclusions_variational_inequalities/optimal_strongly_monotone_proximal_point.jl +++ b/examples/monotone_inclusions_variational_inequalities/optimal_strongly_monotone_proximal_point.jl @@ -8,6 +8,78 @@ function phi(mu, idx) return ((1 + 2 * mu)^(2 * idx + 2) - 1) / ((1 + 2 * mu)^2 - 1) end +@doc raw""" + wc_optimal_strongly_monotone_proximal_point(n, mu; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_optimal_strongly_monotone_proximal_point`. + +Compute a worst-case guarantee for optimal strongly monotone proximal point. + +# Performance metric + +This code computes a worst-case guarantee for the squared final residual + +```math +\|y_{n-1} - x_n\|^2, +``` + +under the normalization $\|x_0-x_\star\|^2 \leqslant 1$, where $x_\star$ is a zero +of the strongly monotone operator $A$. + +# Algorithm + +The method applies a unit-stepsize proximal point oracle for a $\mu$-strongly +monotone operator and then extrapolates the next query point. With + +```math +\phi_i(\mu) = +\begin{cases} +0, & i=-1,\\ +\frac{(1+2\mu)^{2i+2}-1}{(1+2\mu)^2-1}, & i\geq 0, +\end{cases} +``` + +the Julia implementation uses + +```math +\begin{aligned} +x_{i+1} &= (I + A)^{-1}(y_i),\\ +y_{i+1} &= x_{i+1} + + \frac{\phi_i(\mu)-1}{\phi_{i+1}(\mu)}(x_{i+1}-x_i) + - \frac{2\mu\phi_i(\mu)}{\phi_{i+1}(\mu)}(y_i-x_{i+1})\\ +&\quad + + \frac{(1+2\mu)\phi_{i-1}(\mu)}{\phi_{i+1}(\mu)}(y_{i-1}-x_i). +\end{aligned} +``` + +# Theoretical guarantee + +The reference value computed by the example is + +```math +\left(\frac{2\mu}{(1+2\mu)^n-1}\right)^2. +``` + +# References +No bibliographic reference was listed in the corresponding Python PEPit example docstring. + +# Arguments +- `n`: number of iterations. +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value computed by PEPit.jl. +- `theoretical_tau`: reference theoretical value when the example provides one. + +# Julia usage +```julia +wc_optimal_strongly_monotone_proximal_point(10, 0.05; verbose=true) +``` +""" function wc_optimal_strongly_monotone_proximal_point(n, mu; solver=Clarabel.Optimizer, verbose=true) diff --git a/examples/monotone_inclusions_variational_inequalities/optimistic_gradient.jl b/examples/monotone_inclusions_variational_inequalities/optimistic_gradient.jl index a3713f9..c92d96b 100644 --- a/examples/monotone_inclusions_variational_inequalities/optimistic_gradient.jl +++ b/examples/monotone_inclusions_variational_inequalities/optimistic_gradient.jl @@ -1,5 +1,73 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_optimistic_gradient(n, gamma, L; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_optimistic_gradient`. + +Consider the monotone variational inequality + +```math +\mathrm{Find}\, x_\star \in C\text{ such that } \left \geqslant 0\,\,\forall x\in C, +``` + +where $C$ is a closed convex set and $F$ is maximally monotone and Lipschitz. + +# Performance metric + +This code computes a worst-case guarantee for the **optimistic gradient method**. +That, it computes the smallest possible $\tau(n)$ such that the guarantee + +```math +\|\tilde{x}_n - \tilde{x}_{n-1}\|^2 \leqslant \tau(n) \|x_0 - x_\star\|^2, +``` + +is valid, where $\tilde{x}_n$ is the output of the **optimistic gradient method** +and $x_0$ its starting point. + +# Algorithm +The optimistic gradient method is described as follows, for $t \in \{ 0, \dots, n-1\}$, + +```math + \begin{aligned} + \tilde{x}_{t} & = & \mathrm{Proj}_{C} [x_t-\gamma F(\tilde{x}_{t-1})], \\ + {x}_{t+1} & = & \tilde{x}_t + \gamma (F(\tilde{x}_{t-1}) - F(\tilde{x}_t)). + \end{aligned} +``` +where $\gamma$ is some step-size. + +# Theoretical guarantee +The method and many variants of it are discussed in [1] and a PEP formulation suggesting +a worst-case guarantee in $O(1/n)$ can be found in [2, Appendix D]. + +# References + + +[[1] Y.-G. Hsieh, F. Iutzeler, J. Malick, P. Mertikopoulos (2019). +On the convergence of single-call stochastic extra-gradient methods. +Advances in Neural Information Processing Systems, 32:6938-6948, 2019](https://arxiv.org/pdf/1908.08465.pdf) + +[[2] E. Gorbunov, A. Taylor, G. Gidel (2022). +Last-Iterate Convergence of Optimistic Gradient Method for Monotone Variational Inequalities.](https://arxiv.org/pdf/2205.08446.pdf) + +# Arguments +- `n`: number of iterations. +- `gamma`: step-size parameter. +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value. +- `theoretical_tau`: no theoretical bound. + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_optimistic_gradient(5, 1 / 4, 1; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_optimistic_gradient(n, gamma, L; solver=Clarabel.Optimizer, verbose=true) diff --git a/examples/monotone_inclusions_variational_inequalities/optimistic_gradient_refined.jl b/examples/monotone_inclusions_variational_inequalities/optimistic_gradient_refined.jl index 80ebeb1..2b0cc2e 100644 --- a/examples/monotone_inclusions_variational_inequalities/optimistic_gradient_refined.jl +++ b/examples/monotone_inclusions_variational_inequalities/optimistic_gradient_refined.jl @@ -1,5 +1,78 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_optimistic_gradient_refined(n::Int, gamma::Real, L::Real; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_optimistic_gradient_refined`. + +Consider the monotone variational inequality + +```math +\mathrm{Find}\, x_\star \in C\text{ such that } \left \geqslant 0\,\,\forall x\in C, +``` + +where $C$ is a closed convex set and $F$ is maximally monotone and Lipschitz. +In this example, we use the characterization of Lipschitz monotone operators provided in [3, Proposition 3.15] +(which results in more computationnaly expensive PEPs to be solved). + +# Performance metric + +This code computes a worst-case guarantee for the **optimistic gradient method**. +That, it computes the smallest possible $\tau(n)$ such that the guarantee + +```math +\|\tilde{x}_n - \tilde{x}_{n-1}\|^2 \leqslant \tau(n) \|x_0 - x_\star\|^2, +``` + +is valid, where $\tilde{x}_n$ is the output of the **optimistic gradient method** +and $x_0$ its starting point. + +# Algorithm +The optimistic gradient method is described as follows, for $t \in \{ 0, \dots, n-1\}$, + +```math + \begin{aligned} + \tilde{x}_{t} & = & \mathrm{Proj}_{C} [x_t-\gamma F(\tilde{x}_{t-1})], \\ + {x}_{t+1} & = & \tilde{x}_t + \gamma (F(\tilde{x}_{t-1}) - F(\tilde{x}_t)). + \end{aligned} +``` +where $\gamma$ is some step-size. + +# Theoretical guarantee +The method and many variants of it are discussed in [1] and a PEP formulation suggesting +a worst-case guarantee in $O(1/n)$ can be found in [2, Appendix D]. + +# References + + +[[1] Y.-G. Hsieh, F. Iutzeler, J. Malick, P. Mertikopoulos (2019). +On the convergence of single-call stochastic extra-gradient methods. +Advances in Neural Information Processing Systems, 32:6938-6948, 2019](https://arxiv.org/pdf/1908.08465.pdf) + +[[2] E. Gorbunov, A. Taylor, G. Gidel (2022). +Last-Iterate Convergence of Optimistic Gradient Method for Monotone Variational Inequalities.](https://arxiv.org/pdf/2205.08446.pdf) + +[[3] A. Rubbens, J.M. Hendrickx, A. Taylor (2025). +A constructive approach to strengthen algebraic descriptions of function and operator classes.](https://arxiv.org/pdf/2504.14377.pdf) + +# Arguments +- `n`: number of iterations. +- `gamma`: step-size parameter. +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value. +- `theoretical_tau`: no theoretical bound. + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_optimistic_gradient_refined(1, 1 / 4, 1; verbose=true) +``` +""" function wc_optimistic_gradient_refined(n::Int, gamma::Real, L::Real; solver=Clarabel.Optimizer, verbose=true) diff --git a/examples/monotone_inclusions_variational_inequalities/optimistic_gradient_refined_cocoercive.jl b/examples/monotone_inclusions_variational_inequalities/optimistic_gradient_refined_cocoercive.jl index fad0473..8fabad5 100644 --- a/examples/monotone_inclusions_variational_inequalities/optimistic_gradient_refined_cocoercive.jl +++ b/examples/monotone_inclusions_variational_inequalities/optimistic_gradient_refined_cocoercive.jl @@ -1,5 +1,78 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_optimistic_gradient_refined_cocoercive(n, gamma, beta; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_optimistic_gradient_refined_cocoercive`. + +Consider the monotone variational inequality + +```math +\mathrm{Find}\, x_\star \in C\text{ such that } \left \geqslant 0\,\,\forall x\in C, +``` + +where $C$ is a closed convex set and $F$ is maximally monotone and cocoercive. +In this example, we use the characterization of cocoercive strongly monotone operators +provided in [3, Proposition F.3] (which results in more computationnaly expensive PEPs to be solved). + +# Performance metric + +This code computes a worst-case guarantee for the **optimistic gradient method**. +That, it computes the smallest possible $\tau(n)$ such that the guarantee + +```math +\|\tilde{x}_n - \tilde{x}_{n-1}\|^2 \leqslant \tau(n) \|x_0 - x_\star\|^2, +``` + +is valid, where $\tilde{x}_n$ is the output of the **optimistic gradient method** +and $x_0$ its starting point. + +# Algorithm +The optimistic gradient method is described as follows, for $t \in \{ 0, \dots, n-1\}$, + +```math + \begin{aligned} + \tilde{x}_{t} & = & \mathrm{Proj}_{C} [x_t-\gamma F(\tilde{x}_{t-1})], \\ + {x}_{t+1} & = & \tilde{x}_t + \gamma (F(\tilde{x}_{t-1}) - F(\tilde{x}_t)). + \end{aligned} +``` +where $\gamma$ is some step-size. + +# Theoretical guarantee +The method and many variants of it are discussed in [1] and a PEP formulation suggesting +a worst-case guarantee in $O(1/n)$ (when $\mu=0$) can be found in [2, Appendix D]. + +# References + + +[[1] Y.-G. Hsieh, F. Iutzeler, J. Malick, P. Mertikopoulos (2019). +On the convergence of single-call stochastic extra-gradient methods. +Advances in Neural Information Processing Systems, 32:6938-6948, 2019](https://arxiv.org/pdf/1908.08465.pdf) + +[[2] E. Gorbunov, A. Taylor, G. Gidel (2022). +Last-Iterate Convergence of Optimistic Gradient Method for Monotone Variational Inequalities.](https://arxiv.org/pdf/2205.08446.pdf) + +[[3] A. Rubbens, J.M. Hendrickx, A. Taylor (2025). +A constructive approach to strengthen algebraic descriptions of function and operator classes.](https://arxiv.org/pdf/2504.14377.pdf) + +# Arguments +- `n`: number of iterations. +- `gamma`: step-size parameter. +- `beta`: operator or algorithm parameter used in the model. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value. +- `theoretical_tau`: no theoretical bound. + +# Julia usage +```julia +wc_optimistic_gradient_refined_cocoercive(1, 1 / 4, 1 / 4; verbose=true) +``` +""" function wc_optimistic_gradient_refined_cocoercive(n, gamma, beta; solver=Clarabel.Optimizer, verbose=true) diff --git a/examples/monotone_inclusions_variational_inequalities/past_extragradient.jl b/examples/monotone_inclusions_variational_inequalities/past_extragradient.jl index 59f7590..254a363 100644 --- a/examples/monotone_inclusions_variational_inequalities/past_extragradient.jl +++ b/examples/monotone_inclusions_variational_inequalities/past_extragradient.jl @@ -1,5 +1,76 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_past_extragradient(n, gamma, L; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_past_extragradient`. + +Consider the monotone variational inequality + +```math +\mathrm{Find}\, x_\star \in C\text{ such that } \left \geqslant 0\,\,\forall x\in C, +``` + +where $C$ is a closed convex set and $F$ is maximally monotone and Lipschitz. + +# Performance metric + +This code computes a worst-case guarantee for the **past extragradient method**. +That, it computes the smallest possible $\tau(n)$ such that the guarantee + +```math +\|x_n - x_{n-1}\|^2 \leqslant \tau(n) \|x_0 - x_\star\|^2, +``` + +is valid, where $x_n$ is the output of the **past extragradient method** and $x_0$ its starting point. + +# Algorithm +The past extragradient method is described as follows, for $t \in \{ 0, \dots, n-1\}$, + +```math + \begin{aligned} + \tilde{x}_{t} & = & \mathrm{Proj}_{C} [x_t-\gamma F(\tilde{x}_{t-1})], \\ + {x}_{t+1} & = & \mathrm{Proj}_{C} [x_t-\gamma F(\tilde{x}_{t})]. + \end{aligned} +``` +where $\gamma$ is some step-size. + +# Theoretical guarantee +The method and many variants of it are discussed in [1]. +A worst-case guarantee in $O(1/n)$ can be found in [2, 3]. + +# References + + +[[1] Y.-G. Hsieh, F. Iutzeler, J. Malick, P. Mertikopoulos (2019). +On the convergence of single-call stochastic extra-gradient methods. +Advances in Neural Information Processing Systems, 32:6938-6948, 2019](https://arxiv.org/pdf/1908.08465.pdf) + +[[2] E. Gorbunov, A. Taylor, G. Gidel (2022). +Last-Iterate Convergence of Optimistic Gradient Method for Monotone Variational Inequalities.](https://arxiv.org/pdf/2205.08446.pdf) + +[[3] Y. Cai, A. Oikonomou, W. Zheng (2022). +Tight Last-Iterate Convergence of the Extragradient and the Optimistic Gradient Descent-Ascent Algorithm +for Constrained Monotone Variational Inequalities.](https://arxiv.org/pdf/2204.09228.pdf) + +# Arguments +- `n`: number of iterations. +- `gamma`: step-size parameter. +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value. +- `theoretical_tau`: no theoretical bound. + +# Julia usage +```julia +wc_past_extragradient(5, 1 / 4, 1; verbose=true) +``` +""" function wc_past_extragradient(n, gamma, L; solver=Clarabel.Optimizer, verbose=true) diff --git a/examples/monotone_inclusions_variational_inequalities/proximal_point.jl b/examples/monotone_inclusions_variational_inequalities/proximal_point.jl index d8806af..a603190 100644 --- a/examples/monotone_inclusions_variational_inequalities/proximal_point.jl +++ b/examples/monotone_inclusions_variational_inequalities/proximal_point.jl @@ -1,5 +1,71 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_proximal_point(alpha::Real, n::Int; solver=Clarabel.Optimizer, verbose::Int=1) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_proximal_point`. + +Consider the monotone inclusion problem + +```math +\mathrm{Find}\, x:\, 0\in Ax, +``` + +where $A$ is maximally monotone. We denote $J_A = (I + A)^{-1}$ the resolvent of $A$. + +# Performance metric + +This code computes a worst-case guarantee for the **proximal point** method. +That, it computes the smallest possible $\tau(n, \alpha)$ such that the guarantee + +```math +\|x_n - x_{n-1}\|^2 \leqslant \tau(n, \alpha) \|x_0 - x_\star\|^2, +``` + +is valid, where $x_\star$ is such that $0 \in Ax_\star$. + +# Algorithm +The proximal point algorithm for monotone inclusions is described as follows, +for $t \in \{ 0, \dots, n-1\}$, + +```math +x_{t+1} = J_{\alpha A}(x_t), +``` + +where $\alpha$ is a step-size. + +# Theoretical guarantee +A tight theoretical guarantee can be found in [1, section 4]. + +```math +\|x_n - x_{n-1}\|^2 \leqslant \frac{\left(1 - \frac{1}{n}\right)^{n - 1}}{n} \|x_0 - x_\star\|^2. +``` + +**Reference**: + +[[1] G. Gu, J. Yang (2020). Tight sublinear convergence rate of the proximal point algorithm for maximal +monotone inclusion problem. SIAM Journal on Optimization, 30(3), 1905-1921.](https://epubs.siam.org/doi/pdf/10.1137/19M1299049) + +# References +No bibliographic reference was listed in the corresponding Python PEPit example docstring. + +# Arguments +- `alpha`: algorithm parameter used in the update rule. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value. +- `theoretical_tau`: theoretical value. + +# Julia usage +```julia +wc_proximal_point(2.0, 10; verbose=1) +``` +""" function wc_proximal_point(alpha::Real, n::Int; solver=Clarabel.Optimizer, verbose::Int=1) problem = PEP() diff --git a/examples/monotone_inclusions_variational_inequalities/three_operator_splitting.jl b/examples/monotone_inclusions_variational_inequalities/three_operator_splitting.jl index b816029..e94d51c 100644 --- a/examples/monotone_inclusions_variational_inequalities/three_operator_splitting.jl +++ b/examples/monotone_inclusions_variational_inequalities/three_operator_splitting.jl @@ -1,5 +1,79 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_three_operator_splitting(L, mu, beta, alpha, theta; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_three_operator_splitting`. + +Consider the monotone inclusion problem + +```math +\mathrm{Find}\, x:\, 0\in Ax + Bx + Cx, +``` + +where $A$ is maximally monotone, $B$ is $\beta$-cocoercive and C is the gradient of some +$L$-smooth $\mu$-strongly convex function. We denote by $J_{\alpha A}$ and $J_{\alpha B}$ +the resolvents of respectively $\alpha A$ and $\alpha B$. + +# Performance metric + +This code computes a worst-case guarantee for the **three operator splitting** (TOS). +That is, given two initial points $w^{(0)}_t$ and $w^{(1)}_t$, +this code computes the smallest possible $\tau(L, \mu, \beta, \alpha, \theta)$ +(a.k.a. "contraction factor") such that the guarantee + +```math +\|w^{(0)}_{t+1} - w^{(1)}_{t+1}\|^2 \leqslant \tau(L, \mu, \beta, \alpha, \theta) \|w^{(0)}_{t} - w^{(1)}_{t}\|^2, +``` + +is valid, where $w^{(0)}_{t+1}$ and $w^{(1)}_{t+1}$ are obtained after one iteration of TOS from +respectively $w^{(0)}_{t}$ and $w^{(1)}_{t}$. + +In short, for given values of $L$, $\mu$, $\beta$, $\alpha$ and $\theta$, +the contraction factor $\tau(L, \mu, \beta, \alpha, \theta)$ is computed as the worst-case value of +$\|w^{(0)}_{t+1} - w^{(1)}_{t+1}\|^2$ when $\|w^{(0)}_{t} - w^{(1)}_{t}\|^2 \leqslant 1$. + +# Algorithm + +One iteration of the algorithm is described in [1]. For $t \in \{ 0, \dots, n-1\}$, + +```math + \begin{aligned} + x_{t+1} & = & J_{\alpha B} (w_t),\\ + y_{t+1} & = & J_{\alpha A} (2x_{t+1} - w_t - C x_{t+1}),\\ + w_{t+1} & = & w_t - \theta (x_{t+1} - y_{t+1}). + \end{aligned} +``` +# References +The TOS was proposed in [1], +the analysis of such operator splitting methods using PEPs was proposed in [2]. + +[[1] D. Davis, W. Yin (2017). A three-operator splitting scheme and its optimization applications. +Set-valued and variational analysis, 25(4), 829-858.](https://arxiv.org/pdf/1504.01032.pdf) + +[[2] E. Ryu, A. Taylor, C. Bergeling, P. Giselsson (2020). Operator splitting performance estimation: +Tight contraction factors and optimal parameter selection. SIAM Journal on Optimization, 30(3), 2251-2271.](https://arxiv.org/pdf/1812.00146.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `beta`: operator or algorithm parameter used in the model. +- `alpha`: algorithm parameter used in the update rule. +- `theta`: relaxation or averaging parameter used in the update rule. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value. +- `theoretical_tau`: no theoretical value. + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_three_operator_splitting(1.0, 0.1, 1.0, 0.9, 1.3; verbose=true) +``` +""" function wc_three_operator_splitting(L, mu, beta, alpha, theta; solver=Clarabel.Optimizer, verbose=true) diff --git a/examples/nonconvex_optimization/difference_of_convex_algorithm.jl b/examples/nonconvex_optimization/difference_of_convex_algorithm.jl index 23e8a10..feee06f 100644 --- a/examples/nonconvex_optimization/difference_of_convex_algorithm.jl +++ b/examples/nonconvex_optimization/difference_of_convex_algorithm.jl @@ -1,5 +1,77 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_difference_of_convex_algorithm(mu1, mu2, L1, L2, n, alpha=0; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_difference_of_convex_algorithm`. + +Consider the minimization problem + +```math +F_\star \triangleq \min_x f_1(x)-f_2(x), +``` + +where $f_1$ and $f_2$ are convex functions, respectively $L_1$-smooth and +$\mu_1$-strongly convex and $L_2$-smooth and $\mu_2$-strongly convex. + +# Performance metric + +This code computes a worst-case guarantee for **DCA** (difference-of-convex algorithm, also known as the +convex-concave procedure). That is, it computes the smallest possible $\tau(n, \mu_1, L_1,\mu_2, L_2)$ +such that the guarantee + +```math +\min_{t\leqslant n} \|\nabla f_1(x_t)-\nabla f_2(x_t)\|^2 \leqslant \tau(n, \mu_1, L_1,\mu_2, L_2) (f_1(x_0)-f_2(x_0)-F_\star) +``` + +is valid, where $x_n$ is the n-th iterates obtained with DCA. + +# Algorithm + +DCA is described as follows, for $t \in \{ 0, \dots, n-1\}$, + +```math +x_{t+1} \in \mathrm{argmin}_x\,\{ f_1(x) - \langle \nabla f_2(x_t), x\rangle\}, +``` + + +# Theoretical guarantee +The results are compared with [1, Theorem 3]; +a more complete picture can be obtained from [2], also by possibly allowing for non-convex functions +$f_1$ and $f_2$ (i.e., possibly negative values for $\mu_1$, $\mu_2$). + +# References + + +[[1] H. Abbaszadehpeivasti, E. de Klerk, M. Zamani (2021). +On the rate of convergence of the difference-of-convex algorithm (DCA). +Journal of Optimization Theory and Applications, 202(1), 475-496.](https://arxiv.org/pdf/2109.13566) + +[[2] T. Rotaru, P. Patrinos, F. Glineur (2025). +Tight Analysis of Difference-of-Convex Algorithm (DCA) Improves Convergence Rates for Proximal Gradient Descent. +Journal of Optimization Theory and Applications, 202(1), 475-496.](https://arxiv.org/pdf/2503.04486) + +# Arguments +- `mu1`: strong convexity parameter for f1. +- `mu2`: strong convexity parameter for f2. +- `L1`: smoothness parameter for f1. +- `L2`: smoothness parameter for f2. +- `n`: number of iterations. +- `alpha`: algorithm parameter used in the update rule. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value. +- `theoretical_tau`: reference theoretical value [1, Theorem 3]. + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_difference_of_convex_algorithm(mu1, mu2, L1, L2, 5, 0; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_difference_of_convex_algorithm(mu1, mu2, L1, L2, n, alpha=0; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/nonconvex_optimization/gradient_descent.jl b/examples/nonconvex_optimization/gradient_descent.jl index db1c5d1..98f7196 100644 --- a/examples/nonconvex_optimization/gradient_descent.jl +++ b/examples/nonconvex_optimization/gradient_descent.jl @@ -1,6 +1,78 @@ using PEPit using OrderedCollections +@doc raw""" + wc_gradient_descent(L, gamma, n; verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_gradient_descent`. + +Consider the minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is $L$-smooth. + +# Performance metric + +This code computes a worst-case guarantee for **gradient descent** with fixed step-size $\gamma$. +That is, it computes the smallest possible $\tau(n, L, \gamma)$ such that the guarantee + +```math +\min_{t\leqslant n} \|\nabla f(x_t)\|^2 \leqslant \tau(n, L, \gamma) (f(x_0) - f(x_n)) +``` + +is valid, where $x_n$ is the n-th iterates obtained with the gradient method with fixed step-size. + +# Algorithm + +Gradient descent is described as follows, for $t \in \{ 0, \dots, n-1\}$, + +```math +x_{t+1} = x_t - \gamma \nabla f(x_t), +``` + +where $\gamma$ is a step-size. + +# Theoretical guarantee + +When $\gamma \leqslant \frac{1}{L}$, an empirically tight theoretical worst-case guarantee is + +```math +\min_{t\leqslant n} \|\nabla f(x_t)\|^2 \leqslant \frac{4}{3}\frac{L}{n} (f(x_0) - f(x_n)), +``` + +see discussions in [1, page 190] and [2]. + +# References + + +[[1] Taylor, A. B. (2017). +Convex interpolation and performance estimation of first-order methods for convex optimization. +PhD Thesis, UCLouvain.](https://dial.uclouvain.be/downloader/downloader.php?pid=boreal:182881&datastream=PDF_01) + +[[2] H. Abbaszadehpeivasti, E. de Klerk, M. Zamani (2021). +The exact worst-case convergence rate of the gradient method with fixed step lengths for L-smooth functions. +Optimization Letters, 16(6), 1649-1661.](https://arxiv.org/pdf/2104.05468v3.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `gamma`: step-size parameter. +- `n`: number of iterations. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value. +- `theoretical_tau`: theoretical value. + +# Julia usage +```julia +PEPit_val, theoretical_val = wc_gradient_descent(1.0, 1.0, 5; verbose=true) +``` +""" function wc_gradient_descent(L, gamma, n; verbose=true) problem = PEP() diff --git a/examples/nonconvex_optimization/gradient_descent_quadratic_lojasiewicz_expensive.jl b/examples/nonconvex_optimization/gradient_descent_quadratic_lojasiewicz_expensive.jl index 38e7c5c..eecabb6 100644 --- a/examples/nonconvex_optimization/gradient_descent_quadratic_lojasiewicz_expensive.jl +++ b/examples/nonconvex_optimization/gradient_descent_quadratic_lojasiewicz_expensive.jl @@ -1,5 +1,89 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_gradient_descent_quadratic_lojasiewicz_expensive(L, mu, gamma, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_gradient_descent_quadratic_lojasiewicz_expensive`. + +Consider the minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is $L$-smooth and satisfies a quadratic Lojasiewicz inequality: + +```math +f(x)-f_\star \leqslant \frac{1}{2\mu}\|\nabla f(x) \|^2, +``` + +details can be found in [1,2,3]. The example here relies on the `SmoothQuadraticLojasiewiczFunctionExpensive` +description of smooth Lojasiewicz functions (based on [5, Proposition 3.4]). + +# Performance metric + +This code computes a worst-case guarantee for **gradient descent** with fixed step-size $\gamma$. +That is, it computes the smallest possible $\tau(n, L, \gamma)$ such that the guarantee + +```math +f(x_n)-f_\star \leqslant \tau(n, L, \mu, \gamma) (f(x_0) - f(x_\star)) +``` + +is valid, where $x_n$ is the n-th iterates obtained with the gradient method with fixed step-size. + +# Algorithm + +Gradient descent is described as follows, for $t \in \{ 0, \dots, n-1\}$, + +```math +x_{t+1} = x_t - \gamma \nabla f(x_t), +``` + +where $\gamma$ is a step-size and. + +# Theoretical guarantee +We compare with the guarantees from [4, Theorem 3]. + +# References + + [[1] S. Lojasiewicz (1963). + Une propriete topologique des sous-ensembles analytiques reels. + Les equations aux derivees partielles, 117 (1963), 87-89.](https://aif.centre-mersenne.org/item/10.5802/aif.1384.pdf) + + [[2] B. Polyak (1963). + Gradient methods for the minimisation of functionals + USSR Computational Mathematics and Mathematical Physics 3(4), 864-878.](https://www.sciencedirect.com/science/article/abs/pii/0041555363903823) + + [[3] J. Bolte, A. Daniilidis, and A. Lewis (2007). + The ojasiewicz inequality for nonsmooth subanalytic functions with applications to subgradient dynamical systems. + SIAM Journal on Optimization 17, 1205-1223.](https://bolte.perso.math.cnrs.fr/Loja.pdf) + + [[4] H. Abbaszadehpeivasti, E. de Klerk, M. Zamani (2023). + Conditions for linear convergence of the gradient method for non-convex optimization. + Optimization Letters.](https://arxiv.org/pdf/2204.00647) + + [[5] A. Rubbens, J.M. Hendrickx, A. Taylor (2025). + A constructive approach to strengthen algebraic descriptions of function and operator classes.](https://arxiv.org/pdf/2504.14377.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `gamma`: step-size parameter. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value. +- `theoretical_tau`: theoretical value. + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_gradient_descent_quadratic_lojasiewicz_expensive(L, mu, gamma, n; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_gradient_descent_quadratic_lojasiewicz_expensive(L, mu, gamma, n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/nonconvex_optimization/gradient_descent_quadratic_lojasiewicz_intermediate.jl b/examples/nonconvex_optimization/gradient_descent_quadratic_lojasiewicz_intermediate.jl index c5d64ea..d124ffe 100644 --- a/examples/nonconvex_optimization/gradient_descent_quadratic_lojasiewicz_intermediate.jl +++ b/examples/nonconvex_optimization/gradient_descent_quadratic_lojasiewicz_intermediate.jl @@ -1,5 +1,90 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_gradient_descent_quadratic_lojasiewicz_intermediate(L, mu, gamma, n, alpha; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_gradient_descent_quadratic_lojasiewicz_intermediate`. + +Consider the minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is $L$-smooth and satisfies a quadratic Lojasiewicz inequality: + +```math +f(x)-f_\star \leqslant \frac{1}{2\mu}\|\nabla f(x) \|^2, +``` + +details can be found in [1,2,3]. The example here relies on the `SmoothQuadraticLojasiewiczFunctionCheap` +description of smooth Lojasiewicz functions (based on [5, Proposition 3.2]). + +# Performance metric + +This code computes a worst-case guarantee for **gradient descent** with fixed step-size $\gamma$. +That is, it computes the smallest possible $\tau(n, L, \gamma)$ such that the guarantee + +```math +f(x_n)-f_\star \leqslant \tau(n, L, \mu, \gamma) (f(x_0) - f(x_\star)) +``` + +is valid, where $x_n$ is the n-th iterates obtained with the gradient method with fixed step-size. + +# Algorithm + +Gradient descent is described as follows, for $t \in \{ 0, \dots, n-1\}$, + +```math +x_{t+1} = x_t - \gamma \nabla f(x_t), +``` + +where $\gamma$ is a step-size and. + +# Theoretical guarantee +We compare with the guarantees from [4, Theorem 3]. + +# References + + [[1] S. Lojasiewicz (1963). + Une propriete topologique des sous-ensembles analytiques reels. + Les equations aux derivees partielles, 117 (1963), 87-89.](https://aif.centre-mersenne.org/item/10.5802/aif.1384.pdf) + + [[2] B. Polyak (1963). + Gradient methods for the minimisation of functionals + USSR Computational Mathematics and Mathematical Physics 3(4), 864-878.](https://www.sciencedirect.com/science/article/abs/pii/0041555363903823) + + [[3] J. Bolte, A. Daniilidis, and A. Lewis (2007). + The ojasiewicz inequality for nonsmooth subanalytic functions with applications to subgradient dynamical systems. + SIAM Journal on Optimization 17, 1205-1223.](https://bolte.perso.math.cnrs.fr/Loja.pdf) + + [[4] H. Abbaszadehpeivasti, E. de Klerk, M. Zamani (2023). + Conditions for linear convergence of the gradient method for non-convex optimization. + Optimization Letters.](https://arxiv.org/pdf/2204.00647) + + [[5] A. Rubbens, J.M. Hendrickx, A. Taylor (2025). + A constructive approach to strengthen algebraic descriptions of function and operator classes.](https://arxiv.org/pdf/2504.14377.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `gamma`: step-size parameter. +- `n`: number of iterations. +- `alpha`: algorithm parameter used in the update rule. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value. +- `theoretical_tau`: theoretical value. + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_gradient_descent_quadratic_lojasiewicz_intermediate(L, mu, gamma, n, alpha; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_gradient_descent_quadratic_lojasiewicz_intermediate(L, mu, gamma, n, alpha; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/nonconvex_optimization/gradient_descent_quadratic_lojasiewicz_naive.jl b/examples/nonconvex_optimization/gradient_descent_quadratic_lojasiewicz_naive.jl index d558d1c..75f7ac9 100644 --- a/examples/nonconvex_optimization/gradient_descent_quadratic_lojasiewicz_naive.jl +++ b/examples/nonconvex_optimization/gradient_descent_quadratic_lojasiewicz_naive.jl @@ -1,5 +1,85 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_gradient_descent_quadratic_lojasiewicz_naive(L, mu, gamma, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_gradient_descent_quadratic_lojasiewicz_naive`. + +Consider the minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is $L$-smooth and satisfies a quadratic Lojasiewicz inequality: + +```math +f(x)-f_\star \leqslant \frac{1}{2\mu}\|\nabla f(x) \|^2, +``` + +details can be found in [1,2,3]. + +# Performance metric + +This code computes a worst-case guarantee for **gradient descent** with fixed step-size $\gamma$. +That is, it computes the smallest possible $\tau(n, L, \gamma)$ such that the guarantee + +```math +f(x_n)-f_\star \leqslant \tau(n, L, \mu, \gamma) (f(x_0) - f(x_\star)) +``` + +is valid, where $x_n$ is the n-th iterates obtained with the gradient method with fixed step-size. + +# Algorithm + +Gradient descent is described as follows, for $t \in \{ 0, \dots, n-1\}$, + +```math +x_{t+1} = x_t - \gamma \nabla f(x_t), +``` + +where $\gamma$ is a step-size and. + +# Theoretical guarantee +We compare with the guarantees from [4, Theorem 3]. + +# References + + [[1] S. Lojasiewicz (1963). + Une propriete topologique des sous-ensembles analytiques reels. + Les equations aux derivees partielles, 117 (1963), 87-89.](https://aif.centre-mersenne.org/item/10.5802/aif.1384.pdf) + + [[2] B. Polyak (1963). + Gradient methods for the minimisation of functionals + USSR Computational Mathematics and Mathematical Physics 3(4), 864-878.](https://www.sciencedirect.com/science/article/abs/pii/0041555363903823) + + [[3] J. Bolte, A. Daniilidis, and A. Lewis (2007). + The ojasiewicz inequality for nonsmooth subanalytic functions with applications to subgradient dynamical systems. + SIAM Journal on Optimization 17, 1205-1223.](https://bolte.perso.math.cnrs.fr/Loja.pdf) + + [[4] H. Abbaszadehpeivasti, E. de Klerk, M. Zamani (2023). + Conditions for linear convergence of the gradient method for non-convex optimization. + Optimization Letters.](https://arxiv.org/pdf/2204.00647) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `gamma`: step-size parameter. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value. +- `theoretical_tau`: theoretical value. + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_gradient_descent_quadratic_lojasiewicz_naive(1.0, 0.2, 1.0, 1; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_gradient_descent_quadratic_lojasiewicz_naive(L, mu, gamma, n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/nonconvex_optimization/no_lips_1.jl b/examples/nonconvex_optimization/no_lips_1.jl index a66540f..4f2eaf2 100644 --- a/examples/nonconvex_optimization/no_lips_1.jl +++ b/examples/nonconvex_optimization/no_lips_1.jl @@ -1,5 +1,90 @@ using PEPit, OrderedCollections, Clarabel, OffsetArrays +@doc raw""" + wc_no_lips_1(L, gamma, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_no_lips_1`. + +Consider the constrainted non-convex minimization problem + +```math +F_\star \triangleq \min_x \{F(x) \equiv f_1(x)+f_2(x) \} +``` + +where $f_2$ is a closed convex indicator function and $f_1$ is possibly non-convex +and $L$-smooth relatively to $h$, +and where $h$ is closed proper and convex. + +# Performance metric + +This code computes a worst-case guarantee for the **NoLips** method. +That is, it computes the smallest possible $\tau(n, L, \gamma)$ such that the guarantee + +```math +\min_{0 \leqslant t \leqslant n-1} D_h(x_{t+1}; x_t) \leqslant \tau(n, L, \gamma) (F(x_0) - F(x_n)) +``` + +is valid, where $x_n$ is the output of the **NoLips** method, +and where $D_h$ is the Bregman distance generated by $h$: + +```math +D_h(x; y) \triangleq h(x) - h(y) - \nabla h (y)^T(x - y). +``` + +In short, for given values of $n$, $L$, and $\gamma$, $\tau(n, L, \gamma)$ is computed +as the worst-case value of $\min_{0 \leqslant t \leqslant n-1}D_h(x_{t+1}; x_t)$ when +$F(x_0) - F(x_n) \leqslant 1$. + +# Algorithm + +This method (also known as Bregman Gradient, or Mirror descent) can be found in, +e.g., [1, Section 3]. For $t \in \{0, \dots, n-1\}$, + +```math +x_{t+1} = \arg\min_{u \in R^d} \nabla f(x_t)^T(u - x_t) + \frac{1}{\gamma} D_h(u; x_t). +``` + +# Theoretical guarantee + +The **tight** theoretical upper bound is obtained in [1, Proposition 4.1] + +```math +\min_{0 \leqslant t \leqslant n-1} D_h(x_{t+1}; x_t) \leqslant \frac{\gamma}{n(1 - L\gamma)}(F(x_0) - F(x_n)) +``` + +# References +The detailed setup and results are availaible in [1]. The PEP approach for studying such settings +is presented in [2]. + +[[1] J. Bolte, S. Sabach, M. Teboulle, Y. Vaisbourd (2018). +First order methods beyond convexity and Lipschitz gradient continuity +with applications to quadratic inverse problems. +SIAM Journal on Optimization, 28(3), 2131-2151.](https://arxiv.org/pdf/1706.06461.pdf) + +[[2] R. Dragomir, A. Taylor, A. d'Aspremont, J. Bolte (2021). +Optimal complexity and certification of Bregman first-order methods. +Mathematical Programming, 1-43.](https://arxiv.org/pdf/1911.08510.pdf) + +DISCLAIMER: This example requires some experience with PEPit and PEPs (see Section 4 in [2]). + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `gamma`: step-size parameter. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value. +- `theoretical_tau`: theoretical value. + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_no_lips_1(L, gamma, 5; verbose=true) +``` +""" function wc_no_lips_1(L, gamma, n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/nonconvex_optimization/no_lips_2.jl b/examples/nonconvex_optimization/no_lips_2.jl index 3ff5f74..b92bc59 100644 --- a/examples/nonconvex_optimization/no_lips_2.jl +++ b/examples/nonconvex_optimization/no_lips_2.jl @@ -1,5 +1,90 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_no_lips_2(L, gamma, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_no_lips_2`. + +Consider the constrainted composite convex minimization problem + +```math +F_\star \triangleq \min_x \{F(x) \equiv f_1(x)+f_2(x) \} +``` + +where $f_2$ is a closed convex indicator function and $f_1$ is possibly non-convex, +$L$-smooth relatively to $h$, +and $h$ is closed proper and convex. + +# Performance metric + +This code computes a worst-case guarantee for the **NoLips** method. +That is, it computes the smallest possible $\tau(n,L,\gamma)$ such that the guarantee + +```math +\min_{0 \leqslant t \leqslant n-1} D_h(x_t;x_{t+1}) \leqslant \tau(n, L, \gamma) (F(x_0) - F(x_n)) +``` + +is valid, where $x_n$ is the output of the **NoLips** method, +and where $D_h$ is the Bregman distance generated by $h$: + +```math +D_h(x; y) \triangleq h(x) - h(y) - \nabla h (y)^T(x - y). +``` + +In short, for given values of $n$, $L$, and $\gamma$, $\tau(n, L, \gamma)$ is computed +as the worst-case value of $\min_{0 \leqslant t \leqslant n-1}D_h(x_t;x_{t+1})$ when +$F(x_0) - F(x_n) \leqslant 1$. + +# Algorithm + +This method (also known as Bregman Gradient, or Mirror descent) can be found in, +e.g., [1, Section 3]. For $t \in \{0, \dots, n-1\}$, + +```math +x_{t+1} = \arg\min_{u \in R^d} \nabla f(x_t)^T(u - x_t) + \frac{1}{\gamma} D_h(u; x_t). +``` + +# Theoretical guarantee + +An empirically **tight** worst-case guarantee is + +```math +\min_{0 \leqslant t \leqslant n-1}D_h(x_t;x_{t+1}) \leqslant \frac{\gamma}{n}(F(x_0) - F(x_n)). +``` + +# References +The detailed setup is presented in [1]. The PEP approach for studying such settings +is presented in [2]. + +[[1] J. Bolte, S. Sabach, M. Teboulle, Y. Vaisbourd (2018). +First order methods beyond convexity and Lipschitz gradient continuity +with applications to quadratic inverse problems. +SIAM Journal on Optimization, 28(3), 2131-2151.](https://arxiv.org/pdf/1706.06461.pdf) + +[[2] R. Dragomir, A. Taylor, A. d'Aspremont, J. Bolte (2021). +Optimal complexity and certification of Bregman first-order methods. +Mathematical Programming, 1-43.](https://arxiv.org/pdf/1911.08510.pdf) + +DISCLAIMER: This example requires some experience with PEPit and PEPs (see Section 4 in [2]). + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `gamma`: step-size parameter. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value. +- `theoretical_tau`: theoretical value. + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_no_lips_2(L, gamma, 3; verbose=true) +``` +""" function wc_no_lips_2(L, gamma, n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/online_learning/online_follow_leader.jl b/examples/online_learning/online_follow_leader.jl index 284bf74..d564e5b 100644 --- a/examples/online_learning/online_follow_leader.jl +++ b/examples/online_learning/online_follow_leader.jl @@ -1,5 +1,76 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_online_follow_leader(M::Real, D::Real, n::Int; solver = Clarabel.Optimizer, verbose = true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_online_follow_leader`. + +Consider the online convex minimization problem, whose goal is to sequentially minimize the regret + +```math +R_n \triangleq \max_{x\in Q} \sum_{i=1}^n f_i(x_i)-f_i(x), +``` + +where the functions $f_i$ are $M$-Lipschitz and convex, and where $Q$ is a +bounded closed convex set with diameter upper bounded by $D$. We also denote by $x_\star\in Q$ +the solution to the minimization problem defining $R_n$ (i.e., $x_\star$ is a reference point). +Classical references on the topic include [1, 2]. + +# Performance metric + +This code computes a worst-case guarantee for **follow the leader** (FTL). +That is, it computes the smallest possible $\tau(n, M, D)$ such that the guarantee + +```math +R_n \leqslant \tau(n, M, D) +``` + +is valid for any such sequence of queries of FTL; that is, $x_t$ are the query points of OGD. + +In short, for given values of $n$, $M$, $D$: +$\tau(n, M, D)$ is computed as the worst-case value of $R_n$. + +# Algorithm + +Follow the leader is described by + +```math +x_{t+1} \in \text{argmin}_{x\in Q} \left( \sum_{i=1}^t f_i(x) \right). +``` + +# Theoretical guarantee +The follow the leader strategy is known to have a linear regret +(see, e.g., [1, Chapter 5]); we do not compare to any guarantee here. + + +# References + + +[[1] E. Hazan (2016). +Introduction to online convex optimization. +Foundations and Trends in Optimization, 2(3-4), 157-325.](https://arxiv.org/pdf/1912.13213) + +[[2] F. Orabona (2025). +A Modern Introduction to Online Learning.](https://arxiv.org/pdf/1912.13213) + +# Arguments +- `M`: the Lipschitz parameter. +- `D`: the diameter of the set. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_online_follow_leader(M, D, n; verbose=true) +``` +""" function wc_online_follow_leader(M::Real, D::Real, n::Int; solver = Clarabel.Optimizer, verbose = true) diff --git a/examples/online_learning/online_follow_regularized_leader.jl b/examples/online_learning/online_follow_regularized_leader.jl index dd1a0ef..5dee77f 100644 --- a/examples/online_learning/online_follow_regularized_leader.jl +++ b/examples/online_learning/online_follow_regularized_leader.jl @@ -1,5 +1,86 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_online_follow_regularized_leader(M::Real, D::Real, n::Int; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_online_follow_regularized_leader`. + +Consider the online convex minimization problem, whose goal is to sequentially minimize the regret + +```math +R_n \triangleq \max_{x\in Q} \sum_{i=1}^n f_i(x_i)-f_i(x), +``` + +where the functions $f_i$ are $M$-Lipschitz and convex, and where $Q$ is a +bounded closed convex set with diameter upper bounded by $D$. We also denote by $x_\star\in Q$ +the solution to the minimization problem defining $R_n$ (i.e., $x_\star$ is a reference point). +Classical references on the topic include [1, 2]; such algorithms were studied using the performance +estimation technique in [3]. + +# Performance metric + +This code computes a worst-case guarantee for **follow the regularized leader** (FTRL). +That is, it computes the smallest possible $\tau(n, M, D)$ such that the guarantee + +```math +R_n \leqslant \tau(n, M, D) +``` + +is valid for any such sequence of queries of FTRL; that is, $x_t$ are the query points of OGD. + +In short, for given values of $n$, $M$, $D$: +$\tau(n, M, D)$ is computed as the worst-case value of $R_n$. + +# Algorithm + +Follow the regularized leader is described by + +```math +x_{t+1} \in \text{argmin}_{x\in Q} \left( \sum_{i=1}^t f_i(x) + \tfrac{\eta}{2}\|x-x_1\|^2 \right). +``` + +# Theoretical guarantee +The follow the regularized leader strategy is known to enjoy sublinear regret +(see, e.g., [1, Theorem 5.2]); we compare with the bound: + +```math +R_n \leqslant MD\sqrt{n} +``` + +with a regularization strength $\eta=D/M/\sqrt{n}$. + + +# References + + +[[1] E. Hazan (2016). +Introduction to online convex optimization. +Foundations and Trends in Optimization, 2(3-4), 157-325.](https://arxiv.org/pdf/1912.13213) + +[[2] F. Orabona (2025). +A Modern Introduction to Online Learning.](https://arxiv.org/pdf/1912.13213) + +[[3] J. Weibel, P. Gaillard, W.M. Koolen, A. Taylor (2025). +Optimized projection-free algorithms for online learning: construction and worst-case analysis](https://arxiv.org/pdf/2506.05855) + +# Arguments +- `M`: the Lipschitz parameter. +- `D`: the diameter of the set. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_online_follow_regularized_leader(M, D, n; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_online_follow_regularized_leader(M::Real, D::Real, n::Int; solver=Clarabel.Optimizer, verbose=true) diff --git a/examples/online_learning/online_frank_wolfe.jl b/examples/online_learning/online_frank_wolfe.jl index bbbed12..31f589f 100644 --- a/examples/online_learning/online_frank_wolfe.jl +++ b/examples/online_learning/online_frank_wolfe.jl @@ -1,5 +1,89 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_online_frank_wolfe(M::Real, D::Real, n::Int; solver = Clarabel.Optimizer, verbose = true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_online_frank_wolfe`. + +Consider the online convex minimization problem, whose goal is to sequentially minimize the regret + +```math +R_n \triangleq \max_{x\in Q} \sum_{i=1}^n f_i(x_i)-f_i(x), +``` + +where the functions $f_i$ are $M$-Lipschitz and convex, and where $Q$ is a +bounded closed convex set with diameter upper bounded by $D$. We also denote by $x_\star\in Q$ +the solution to the minimization problem defining $R_n$ (i.e., $x_\star$ is a reference point). +Classical references on the topic include [1, 2]. + +# Performance metric + +This code computes a worst-case guarantee for **online Frank-Wolfe** (OFW), see [1, Algorithm 27]; +the code uses the choice [3, Section 2] here. That is, it computes the smallest possible +$\tau(n, M, D)$ such that the guarantee + +```math +R_n \leqslant \tau(n, M, D) +``` + +is valid for any such sequence of queries of OFW; that is, $x_t$ are the query points of OFW. + +In short, for given values of $n$, $M$, $D$: +$\tau(n, M, D)$ is computed as the worst-case value of $R_n$. + +# Algorithm + +Online Frank-Wolfe is described by + +```math + \begin{aligned} + \text{dir}_t & = & x_t-x_1 + \eta \sum_{s=1}^t g_s \\ + v_{t} & = & \text{argmin}_{v\in Q} \langle \text{dir}_t;v\rangle\\ + x_{t+1} & = & (1-\sigma) x_t + \sigma v_t + \end{aligned} +``` +where $\eta=\tfrac{D}{2M}\left(\frac{3}{n} \right)^{3/4}$ and $\sigma=\min\{1,\sqrt{3/n}\}$. + +# Theoretical guarantee + +We compare the numerical results with those of [3, Theorem 2.1]: + +```math +R_n \leqslant \frac{4}{3^{3/4}} MDn^{3/4} +``` + + +# References + + +[[1] E. Hazan (2016). +Introduction to online convex optimization. +Foundations and Trends in Optimization, 2(3-4), 157-325.](https://arxiv.org/pdf/1912.13213) + +[[2] F. Orabona (2025). +A Modern Introduction to Online Learning.](https://arxiv.org/pdf/1912.13213) + +[[3] J. Weibel, P. Gaillard, W.M. Koolen, A. Taylor (2025). +Optimized projection-free algorithms for online learning: construction and worst-case analysis](https://arxiv.org/pdf/2506.05855) + +# Arguments +- `M`: the Lipschitz parameter. +- `D`: the diameter of the set. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_online_frank_wolfe(M, D, n; verbose=true) +``` +""" function wc_online_frank_wolfe(M::Real, D::Real, n::Int; solver = Clarabel.Optimizer, verbose = true) diff --git a/examples/online_learning/online_gradient_descent.jl b/examples/online_learning/online_gradient_descent.jl index 73ddf9e..09e4ef8 100644 --- a/examples/online_learning/online_gradient_descent.jl +++ b/examples/online_learning/online_gradient_descent.jl @@ -1,5 +1,88 @@ using PEPit, OrderedCollections +@doc raw""" + wc_online_gradient_descent(M::Real, D::Real, n::Int; verbose = true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_online_gradient_descent`. + +Consider the online convex minimization problem, whose goal is to sequentially minimize the regret + +```math +R_n \triangleq \max_{x\in Q} \sum_{i=1}^n f_i(x_i)-f_i(x), +``` + +where the functions $f_i$ are $M$-Lipschitz and convex, and where $Q$ is a +bounded closed convex set with diameter upper bounded by $D$. We also denote by $x_\star\in Q$ +the solution to the minimization problem defining $R_n$ (i.e., $x_\star$ is a reference point). +Classical references on the topic include [1, 2]; such algorithms were studied using the performance +estimation technique in [3] and using the related IQCs in [4]. + +# Performance metric + +This code computes a worst-case guarantee for **online gradient descent** (OGD) with a step-size $\gamma=D/M/\sqrt{n}$. +That is, it computes the smallest possible $\tau(n, M, D)$ such that the guarantee + +```math +R_n \leqslant \tau(n, M, D) +``` + +is valid for any such sequence of queries of OGD; that is, $x_t$ are the query points of OGD. + +In short, for given values of $n$, $M$, $D$: +$\tau(n, M, D)$ is computed as the worst-case value of $R_n$. + +# Algorithm + +Online gradient descent is described by + +```math +x_{t+1} = x_t - \gamma \nabla f_t(x_t), +``` + +where $\gamma=D/M/\sqrt{n}$ is a step-size. + +# Theoretical guarantee + +We compare the numerical results with those of [2, Section 2.1.2]: + +```math +R_n \leqslant MD\sqrt{n}. +``` + + +# References + + +[[1] E. Hazan (2016). +Introduction to online convex optimization. +Foundations and Trends in Optimization, 2(3-4), 157-325.](https://arxiv.org/pdf/1912.13213) + +[[2] F. Orabona (2025). +A Modern Introduction to Online Learning.](https://arxiv.org/pdf/1912.13213) + +[[3] J. Weibel, P. Gaillard, W.M. Koolen, A. Taylor (2025). +Optimized projection-free algorithms for online learning: construction and worst-case analysis](https://arxiv.org/pdf/2506.05855) + +[[4] F. Jakob, A. Iannelli (2025). +Online Convex Optimization and Integral Quadratic Constraints: A new approach to regret analysis](https://arxiv.org/pdf/2503.23600?) + +# Arguments +- `M`: the Lipschitz parameter. +- `D`: the diameter of the set. +- `n`: number of iterations. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_online_gradient_descent(M, D, n; verbose=true) +``` +""" function wc_online_gradient_descent(M::Real, D::Real, n::Int; verbose = true) diff --git a/examples/potential_functions/accelerated_gradient_method.jl b/examples/potential_functions/accelerated_gradient_method.jl index 89a318c..b9aab46 100644 --- a/examples/potential_functions/accelerated_gradient_method.jl +++ b/examples/potential_functions/accelerated_gradient_method.jl @@ -1,6 +1,90 @@ using PEPit using OrderedCollections +@doc raw""" + wc_accelerated_gradient_method(L, gamma, lam; verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_accelerated_gradient_method`. + +Consider the convex minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is $L$-smooth and convex. + +# Performance metric + +This code verifies a worst-case guarantee for an **accelerated gradient method**. That is, it verifies +that the Lyapunov (or potential/energy) function + +```math +V_n \triangleq \lambda_n^2 (f(x_n) - f_\star) + \frac{L}{2} \|z_n - x_\star\|^2 +``` + +is decreasing along all trajectories and all smooth convex function $f$ (i.e., in the worst-case): + +```math +V_{n+1} \leqslant V_n, +``` + +where $x_{n+1}$, $z_{n+1}$, and $\lambda_{n+1}$ are obtained from one iteration of +the accelerated gradient method below, from some arbitrary $x_{n}$, $z_{n}$, and $\lambda_{n}$. + +# Algorithm +One iteration of accelerated gradient method is described by + +```math +\begin{aligned} + \text{Set: }\lambda_{n+1} & = & \frac{1}{2} \left(1 + \sqrt{4\lambda_n^2 + 1}\right), \tau_n & = & \frac{1}{\lambda_{n+1}}, + \text{ and } \eta_n & = & \frac{\lambda_{n+1}^2 - \lambda_{n}^2}{L} \\ + y_n & = & (1 - \tau_n) x_n + \tau_n z_n,\\ + z_{n+1} & = & z_n - \eta_n \nabla f(y_n), \\ + x_{n+1} & = & y_n - \gamma \nabla f(y_n). +\end{aligned} +``` +# Theoretical guarantee +The following worst-case guarantee can be found in e.g., [2, Theorem 5.3]: + +```math +V_{n+1} - V_n \leqslant 0, +``` + +when $\gamma=\frac{1}{L}$. + +# References +The potential can be found in the historical [1]; and in more recent works, e.g., [2, 3]. + +[[1] Y. Nesterov (1983). +A method for solving the convex programming problem with convergence rate $O(1/k^2)$. +In Dokl. akad. nauk Sssr (Vol. 269, pp. 543-547).](http://www.mathnet.ru/links/9bcb158ed2df3d8db3532aafd551967d/dan46009.pdf) + +[[2] N. Bansal, A. Gupta (2019). +Potential-function proofs for gradient methods. +Theory of Computing, 15(1), 1-32.](https://arxiv.org/pdf/1712.04581.pdf) + +[[3] A. d'Aspremont, D. Scieur, A. Taylor (2021). +Acceleration Methods. +Foundations and Trends in Optimization: Vol. 5, No. 1-2.](https://arxiv.org/pdf/2101.09545.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `gamma`: step-size parameter. +- `lam`: the initial value for sequence $(\lambda_t)_t$. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_accelerated_gradient_method(1.0, 1.0, 10.0; verbose=true) +``` +""" function wc_accelerated_gradient_method(L, gamma, lam; verbose=true) problem = PEP() diff --git a/examples/potential_functions/gradient_descent_lyapunov_1.jl b/examples/potential_functions/gradient_descent_lyapunov_1.jl index 16072c7..97eca31 100644 --- a/examples/potential_functions/gradient_descent_lyapunov_1.jl +++ b/examples/potential_functions/gradient_descent_lyapunov_1.jl @@ -1,5 +1,83 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_gradient_descent_lyapunov_1(L, gamma, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_gradient_descent_lyapunov_1`. + +Consider the convex minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is $L$-smooth and convex. + +# Performance metric + +This code verifies a worst-case guarantee for **gradient descent** with fixed step-size $\gamma$. +That is, it verifies that the Lyapunov (or potential/energy) function + +```math +V_n \triangleq n (f(x_n) - f_\star) + \frac{L}{2} \|x_n - x_\star\|^2 +``` + +is decreasing along all trajectories and all smooth convex function $f$ (i.e., in the worst-case): + +```math +V_{n+1} \leqslant V_n, +``` + +where $x_{n+1}$ is obtained from a gradient step from $x_{n}$ +with fixed step-size $\gamma=\frac{1}{L}$. + +# Algorithm +Onte iteration of gradient descent is described by + +```math +x_{n+1} = x_n - \gamma \nabla f(x_n), +``` + +where $\gamma$ is a step-size. + +# Theoretical guarantee +The theoretical guarantee can be found in e.g., [1, Theorem 3.3]: + +```math +V_{n+1} - V_n \leqslant 0, +``` + +when $\gamma=\frac{1}{L}$. + +# References +The detailed potential function can found in [1] and the SDP approach can be found in [2]. + +[[1] N. Bansal, A. Gupta (2019). +Potential-function proofs for gradient methods. +Theory of Computing, 15(1), 1-32.](https://arxiv.org/pdf/1712.04581.pdf) + +[[2] A. Taylor, F. Bach (2019). +Stochastic first-order methods: non-asymptotic and computer-aided analyses via potential functions. +Conference on Learning Theory (COLT).](https://arxiv.org/pdf/1902.00947.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `gamma`: step-size parameter. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value. +- `theoretical_tau`: theoretical value. + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_gradient_descent_lyapunov_1(1.0, 1.0, 10; verbose=true) +``` +""" function wc_gradient_descent_lyapunov_1(L, gamma, n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/potential_functions/gradient_descent_lyapunov_2.jl b/examples/potential_functions/gradient_descent_lyapunov_2.jl index fa7f045..f8d72d3 100644 --- a/examples/potential_functions/gradient_descent_lyapunov_2.jl +++ b/examples/potential_functions/gradient_descent_lyapunov_2.jl @@ -1,5 +1,79 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_gradient_descent_lyapunov_2(L, gamma, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_gradient_descent_lyapunov_2`. + +Consider the convex minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is $L$-smooth and convex. + +# Performance metric + +This code verifies a worst-case guarantee for **gradient descent** with fixed step-size $\gamma$. +That is, it verifies that the Lyapunov (or potential/energy) function + +```math +V_n \triangleq (2n + 1) L \left(f(x_n) - f_\star\right) + n(n+2) \|\nabla f(x_n)\|^2 + L^2 \|x_n - x_\star\|^2 +``` + +is decreasing along all trajectories and all smooth convex function $f$ (i.e., in the worst-case): + +```math +V_{n+1} \leqslant V_n, +``` + +where $x_{n+1}$ is obtained from a gradient step from $x_{n}$ with fixed step-size $\gamma=\frac{1}{L}$. + +# Algorithm +Onte iteration of radient descent is described by + +```math +x_{n+1} = x_n - \gamma \nabla f(x_n), +``` + +where $\gamma$ is a step-size. + +# Theoretical guarantee + +The theoretical guarantee can be found in [1, Theorem 3]: + +```math +V_{n+1} - V_n \leqslant 0, +``` + +when $\gamma=\frac{1}{L}$. + +# References +The detailed potential function and SDP approach can be found in [1]. + +[[1] A. Taylor, F. Bach (2019). +Stochastic first-order methods: non-asymptotic and computer-aided analyses via potential functions. +Conference on Learning Theory (COLT).](https://arxiv.org/pdf/1902.00947.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `gamma`: step-size parameter. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value. +- `theoretical_tau`: theoretical value. + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_gradient_descent_lyapunov_2(L, 1 / L, 10; verbose=true) +``` +""" function wc_gradient_descent_lyapunov_2(L, gamma, n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/stochastic_and_randomized_convex_minimization/point_saga.jl b/examples/stochastic_and_randomized_convex_minimization/point_saga.jl index 46ae13e..4f360ec 100644 --- a/examples/stochastic_and_randomized_convex_minimization/point_saga.jl +++ b/examples/stochastic_and_randomized_convex_minimization/point_saga.jl @@ -1,5 +1,89 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_point_saga(L, mu, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_point_saga`. + +Consider the finite sum minimization problem + +```math +F^\star \triangleq \min_x \left\{F(x) \equiv \frac{1}{n} \sum_{i=1}^n f_i(x)\right\}, +``` + +where $f_1, \dots, f_n$ are $L$-smooth and $\mu$-strongly convex, and with proximal operator +readily available. In the sequel, we use the notation $\mathbb{E}$ for denoting the expectation over the +uniform distribution of the index $i \sim \mathcal{U}\left([|1, n|]\right)$, +e.g., $F(x)\equiv \mathbb{E}[f_i(x)]$. + +# Performance metric + +This code computes a tight (one-step) worst-case guarantee using a Lyapunov function for **Point SAGA** [1]. +The Lyapunov (or energy) function at a point $x$ is given in [1, Theorem 5]: + +```math +V(x) = \frac{1}{L \mu}\frac{1}{n} \sum_{i \leq n} \|\nabla f_i(x) - \nabla f_i(x_\star)\|^2 + \|x - x^\star\|^2, +``` + +where $x^\star$ denotes the minimizer of $F$. The code computes the smallest possible +$\tau(n, L, \mu)$ such that the guarantee (in expectation): + +```math +\mathbb{E}\left[V\left(x^{(1)}\right)\right] \leqslant \tau(n, L, \mu) V\left(x^{(0)}\right), +``` + +is valid (note that we use the notation $x^{(0)},x^{(1)}$ to denote two consecutive iterates for convenience; +as the bound is valid for all $x^{(0)}$, +it is also valid for any pair of consecutive iterates of the algorithm). + +In short, for given values of $n$, $L$, and $\mu$, +$\tau(n, L, \mu)$ is computed as the worst-case value of +$\mathbb{E}\left[V\left(x^{(1)}\right)\right]$ when $V\left(x^{(0)}\right) \leqslant 1$. + +# Algorithm + +Point SAGA is described by + +```math +\begin{aligned} + \text{Set }\gamma & = & \frac{\sqrt{(n - 1)^2 + 4n\frac{L}{\mu}}}{2Ln} - \frac{\left(1 - \frac{1}{n}\right)}{2L} \\ + \text{Pick random }j & \sim & \mathcal{U}\left([|1, n|]\right) \\ + z^{(t)} & = & x_t + \gamma \left(g_j^{(t)} - \frac{1}{n} \sum_{i\leq n}g_i^{(t)} \right), \\ + x^{(t+1)} & = & \mathrm{prox}_{\gamma f_j}(z^{(t)})\triangleq \arg\min_x\left\{ \gamma f_j(x)+\frac{1}{2} \|x-z^{(t)}\|^2 \right\}, \\ + g_j^{(t+1)} & = & \frac{1}{\gamma}(z^{(t)} - x^{(t+1)}). +\end{aligned} +``` +# Theoretical guarantee +A theoretical **upper** bound is given in [1, Theorem 5]. + +```math +\mathbb{E}\left[V\left(x^{(t+1)}\right)\right] \leqslant \frac{1}{1 + \mu\gamma} V\left(x^{(t)}\right) +``` + +# References + + +[[1] A. Defazio (2016). A simple practical accelerated method for finite sums. +Advances in Neural Information Processing Systems (NIPS), 29, 676-684.](https://proceedings.neurips.cc/paper/2016/file/4f6ffe13a5d75b2d6a3923922b3922e5-Paper.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_point_saga(1.0, 0.01, 10; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_point_saga(L, mu, n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/stochastic_and_randomized_convex_minimization/randomized_coordinate_descent_smooth_convex.jl b/examples/stochastic_and_randomized_convex_minimization/randomized_coordinate_descent_smooth_convex.jl index 03c5c04..7cd14f5 100644 --- a/examples/stochastic_and_randomized_convex_minimization/randomized_coordinate_descent_smooth_convex.jl +++ b/examples/stochastic_and_randomized_convex_minimization/randomized_coordinate_descent_smooth_convex.jl @@ -1,5 +1,83 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_randomized_coordinate_descent_smooth_convex(L, gamma, d, t; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_randomized_coordinate_descent_smooth_convex`. + +Consider the convex minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is $L$-smooth and convex. + +# Performance metric + +This code computes a worst-case guarantee for **randomized block-coordinate descent** with $d$ blocks and +fixed step-size $\gamma$. +That is, it verifies that the Lyapunov function + +```math +\phi(t, x_t) = (t \gamma \frac{L}{d} + 1)(f(x_t) - f_\star) + \frac{L}{2} \|x_t - x_\star\||^2 +``` + +is decreasing in expectation over the **randomized block-coordinate descent** algorithm. We use the notation +$\mathbb{E}$ for denoting the expectation over the uniform distribution +of the index $i \sim \mathcal{U}\left([|1, n|]\right)$. + +In short, for given values of $L$, $d$, and $\gamma$, it computes the worst-case value +of $\mathbb{E}[\phi(t, x_t)]$ such that $\phi(x_{t-1}) \leqslant 1$. + +# Algorithm + +Randomized block-coordinate descent is described by + +```math +\begin{aligned} + \text{Pick random }i & \sim & \mathcal{U}\left([|1, d|]\right), \\ + x_{t+1} & = & x_t - \gamma \nabla_i f(x_t), +\end{aligned} +``` +where $\gamma$ is a step-size and $\nabla_i f(x_t)$ is the $i^{\text{th}}$ partial gradient. + +# Theoretical guarantee + +When $\gamma \leqslant \frac{1}{L}$, +the **tight** theoretical guarantee can be found in [1, Appendix I, Theorem 16]: + +```math +\mathbb{E}[\phi(t, x_t)] \leqslant \phi(t-1, x_{t-1}), +``` + +where $\phi(t, x_t) = (t \gamma \frac{L}{d} + 1)(f(x_t) - f_\star) + \frac{L}{2} \|x_t - x_\star\|^2$. + +# References + + +[[1] A. Taylor, F. Bach (2019). Stochastic first-order methods: non-asymptotic and computer-aided +analyses via potential functions. In Conference on Learning Theory (COLT).](https://arxiv.org/pdf/1902.00947.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `gamma`: step-size parameter. +- `d`: the dimension. +- `t`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_randomized_coordinate_descent_smooth_convex(L, 1 / L, 2, 4; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_randomized_coordinate_descent_smooth_convex(L, gamma, d, t; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/stochastic_and_randomized_convex_minimization/randomized_coordinate_descent_smooth_strongly_convex.jl b/examples/stochastic_and_randomized_convex_minimization/randomized_coordinate_descent_smooth_strongly_convex.jl index 0194357..05a530c 100644 --- a/examples/stochastic_and_randomized_convex_minimization/randomized_coordinate_descent_smooth_strongly_convex.jl +++ b/examples/stochastic_and_randomized_convex_minimization/randomized_coordinate_descent_smooth_strongly_convex.jl @@ -1,5 +1,85 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_randomized_coordinate_descent_smooth_strongly_convex(L, mu, gamma, d; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_randomized_coordinate_descent_smooth_strongly_convex`. + +Consider the convex minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is $L$-smooth and $\mu$-strongly convex. + +# Performance metric + +This code computes a worst-case guarantee for **randomized block-coordinate descent** +with step-size $\gamma$. +That is, it computes the smallest possible $\tau(L, \mu, \gamma, d)$ such that the guarantee + +```math +\mathbb{E}[\|x_{t+1} - x_\star \|^2] \leqslant \tau(L, \mu, \gamma, d) \|x_t - x_\star\|^2 +``` + +holds for any fixed step-size $\gamma$ and any number of blocks $d$, +and where $x_\star$ denotes a minimizer of $f$. The notation $\mathbb{E}$ +denotes the expectation over the uniform distribution of the index +$i \sim \mathcal{U}\left([|1, n|]\right)$. + +In short, for given values of $\mu$, $L$, $d$, and $\gamma$, +$\tau(L, \mu, \gamma, d)$ is computed as the worst-case value of +$\mathbb{E}[\|x_{t+1} - x_\star \|^2]$ when $\|x_t - x_\star\|^2 \leqslant 1$. + +# Algorithm + +Randomized block-coordinate descent is described by + +```math +\begin{aligned} + \text{Pick random }i & \sim & \mathcal{U}\left([|1, d|]\right), \\ + x_{t+1} & = & x_t - \gamma \nabla_i f(x_t), +\end{aligned} +``` +where $\gamma$ is a step-size and $\nabla_i f(x_t)$ is the $i^{\text{th}}$ partial gradient. + +# Theoretical guarantee + +When $\gamma \leqslant \frac{1}{L}$, the **tight** theoretical guarantee +can be found in [1, Appendix I, Theorem 17]: + +```math +\mathbb{E}[\|x_{t+1} - x_\star \|^2] \leqslant \rho^2 \|x_t-x_\star\|^2, +``` + +where $\rho^2 = \max \left( \frac{(\gamma\mu - 1)^2 + d - 1}{d},\frac{(\gamma L - 1)^2 + d - 1}{d} \right)$. + +# References + + +[[1] A. Taylor, F. Bach (2019). Stochastic first-order methods: non-asymptotic and computer-aided +analyses via potential functions. In Conference on Learning Theory (COLT).](https://arxiv.org/pdf/1902.00947.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `gamma`: step-size parameter. +- `d`: the dimension. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_randomized_coordinate_descent_smooth_strongly_convex(L, mu, gamma, 2; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_randomized_coordinate_descent_smooth_strongly_convex(L, mu, gamma, d; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/stochastic_and_randomized_convex_minimization/saga.jl b/examples/stochastic_and_randomized_convex_minimization/saga.jl index 0e4c68c..c966433 100644 --- a/examples/stochastic_and_randomized_convex_minimization/saga.jl +++ b/examples/stochastic_and_randomized_convex_minimization/saga.jl @@ -1,5 +1,86 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_saga(L, mu, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_saga`. + +Consider the finite sum convex minimization problem + +```math +F_\star \triangleq \min_x \left\{F(x) \equiv h(x) + \frac{1}{n} \sum_{i=1}^{n} f_i(x)\right\}, +``` + +where the functions $f_i$ are assumed to be $L$-smooth $\mu$-strongly convex, and $h$ is +closed, proper, and convex with a proximal operator readily available. In the sequel, we use the notation +$\mathbb{E}$ for denoting the expectation over the uniform distribution of the index +$i \sim \mathcal{U}\left([|1, n|]\right)$, +e.g., $F(x)\equiv h(x)+\mathbb{E}[f_i(x)]$. + +# Performance metric + +This code computes the exact rate for a Lyapunov (or energy) function for **SAGA** [1]. +That is, it computes the smallest possible $\tau(n,L,\mu)$ such this Lyapunov function decreases geometrically + +```math +\mathbb{E}[V^{(1)}] \leqslant \tau(n, L, \mu) V^{(0)}, +``` + +where the value of the Lyapunov function at iteration $t$ is denoted by $V^{(t)}$ and is defined as + +```math +V^{(t)} \triangleq \frac{1}{n} \sum_{i=1}^n \left(f_i(\phi_i^{(t)}) - f_i(x^\star) - \langle \nabla f_i(x^\star); \phi_i^{(t)} - x^\star\rangle\right) + \frac{1}{2 n \gamma (1-\mu \gamma)} \|x^{(t)} - x^\star\|^2, +``` + +with $\gamma = \frac{1}{2(\mu n+L)}$ (this Lyapunov function was proposed in [1, Theorem 1]). +We consider the case $t=0$ in the code below, without loss of generality. + +In short, for given values of $n$, $L$, and $\mu$, +$\tau(n, L, \mu)$ is computed as the worst-case value of $\mathbb{E}[V^{(1)}]$ +when $V(x^{(0)}) \leqslant 1$. + +# Algorithm +One iteration of SAGA [1] is described as follows: at iteration $t$, pick +$j\in\{1,\ldots,n\}$ uniformely at random and set: + +```math + \begin{aligned} + \phi_j^{(t+1)} & = & x^{(t)} \\ + w^{(t+1)} & = & x^{(t)} - \gamma \left[ \nabla f_j (\phi_j^{(t+1)}) - \nabla f_j(\phi_j^{(t)}) + \frac{1}{n} \sum_{i=1}^n(\nabla f_i(\phi^{(t)}))\right] \\ + x^{(t+1)} & = & \mathrm{prox}_{\gamma h} (w^{(t+1)})\triangleq \arg\min_x \left\{ \gamma h(x)+\frac{1}{2}\|x-w^{(t+1)}\|^2\right\} + \end{aligned} +``` +# Theoretical guarantee +The following **upper** bound (empirically tight) can be found in [1, Theorem 1]: + +```math +\mathbb{E}[V^{(t+1)}] \leqslant \left(1-\gamma\mu \right)V^{(t)} +``` + +# References + + +[[1] A. Defazio, F. Bach, S. Lacoste-Julien (2014). SAGA: A fast incremental gradient method with support for non-strongly convex composite objectives. +In Advances in Neural Information Processing Systems (NIPS).](http://papers.nips.cc/paper/2014/file/ede7e2b6d13a41ddf9f4bdef84fdc737-Paper.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_saga(1.0, 0.1, 5; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_saga(L, mu, n; solver=Clarabel.Optimizer, verbose=true) diff --git a/examples/stochastic_and_randomized_convex_minimization/sgd.jl b/examples/stochastic_and_randomized_convex_minimization/sgd.jl index 7bd28d3..d09a13e 100644 --- a/examples/stochastic_and_randomized_convex_minimization/sgd.jl +++ b/examples/stochastic_and_randomized_convex_minimization/sgd.jl @@ -1,6 +1,97 @@ using PEPit using OrderedCollections +@doc raw""" + wc_sgd(L, mu, gamma, v, R, n; verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_sgd`. + +Consider the finite sum minimization problem + +```math +F_\star \triangleq \min_x \left\{F(x) \equiv \frac{1}{n} \sum_{i=1}^n f_i(x)\right\}, +``` + +where $f_1, ..., f_n$ are $L$-smooth and $\mu$-strongly convex. +In the sequel, we use the notation $\mathbb{E}$ for denoting the expectation over the uniform distribution +of the index $i \sim \mathcal{U}\left([|1, n|]\right)$, +e.g., $F(x)\equiv\mathbb{E}[f_i(x)]$. In addition, we assume a bounded variance at +the optimal point (which is denoted by $x_\star$): + +```math +\mathbb{E}\left[\|\nabla f_i(x_\star)\|^2\right] = \frac{1}{n} \sum_{i=1}^n\|\nabla f_i(x_\star)\|^2 \leqslant v^2. +``` + +# Performance metric + +This code computes a worst-case guarantee for one step of the **stochastic gradient descent** (SGD) in expectation, +for the distance to an optimal point. That is, it computes the smallest possible +$\tau(L, \mu, \gamma, v, R, n)$ such that + +```math +\mathbb{E}\left[\|x_1 - x_\star\|^2\right] \leqslant \tau(L, \mu, \gamma, v, R, n) +``` + +where $\|x_0 - x_\star\|^2 \leqslant R^2$, where $v$ is the variance at $x_\star$, and where +$x_1$ is the output of one step of SGD (note that we use the notation $x_0,x_1$ to denote two +consecutive iterates for convenience; as the bound is valid for all $x_0$, it is also valid for +any pair of consecutive iterates of the algorithm). + +# Algorithm +One iteration of SGD is described by: + +```math +\begin{aligned} + \text{Pick random }i & \sim & \mathcal{U}\left([|1, n|]\right), \\ + x_{t+1} & = & x_t - \gamma \nabla f_{i}(x_t), +\end{aligned} +``` +where $\gamma$ is a step-size. + +# Theoretical guarantee +An empirically tight one-iteration guarantee is provided in the code of PESTO [1]: + +```math +\mathbb{E}\left[\|x_1 - x_\star\|^2\right] \leqslant \left( \gamma\frac{L-\mu}{2}R + \sqrt{\left(1-\gamma\frac{L+\mu}{2}\right)^2 R^2 + \gamma^2 v^2} \right)^2. +``` + +Note that we observe the guarantee does not depend on the number $n$ of +functions for this particular setting, thereby implying that the guarantees are also valid for expectation +minimization settings (i.e., when $n$ goes to infinity). + +# References +Empirically tight guarantee provided in code of [1]. Using SDPs for analyzing SGD-type method was +proposed in [2, 3]. + +[[1] A. Taylor, J. Hendrickx, F. Glineur (2017). Performance Estimation Toolbox (PESTO): automated worst-case +analysis of first-order optimization methods. In 56th IEEE Conference on Decision and Control (CDC).](https://github.com/AdrienTaylor/Performance-Estimation-Toolbox) + +[[2] B. Hu, P. Seiler, L. Lessard (2020). Analysis of biased stochastic gradient descent using sequential +semidefinite programs. Mathematical programming.](https://arxiv.org/pdf/1711.00987.pdf) + +[[3] A. Taylor, F. Bach (2019). Stochastic first-order methods: non-asymptotic and computer-aided analyses +via potential functions. Conference on Learning Theory (COLT).](https://arxiv.org/pdf/1902.00947.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `gamma`: step-size parameter. +- `v`: the variance bound. +- `R`: the initial distance. +- `n`: number of iterations. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_sgd(1.0, 0.1, 0.7, 1.0, 2.0, 5; verbose=true) +``` +""" function wc_sgd(L, mu, gamma, v, R, n; verbose=true) problem = PEP() diff --git a/examples/stochastic_and_randomized_convex_minimization/sgd_overparametrized.jl b/examples/stochastic_and_randomized_convex_minimization/sgd_overparametrized.jl index 588e631..70bfad8 100644 --- a/examples/stochastic_and_randomized_convex_minimization/sgd_overparametrized.jl +++ b/examples/stochastic_and_randomized_convex_minimization/sgd_overparametrized.jl @@ -1,5 +1,99 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_sgd_overparametrized(L, mu, gamma, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_sgd_overparametrized`. + +Consider the finite sum minimization problem + +```math +F_\star \triangleq \min_x \left\{F(x) \equiv \frac{1}{n} \sum_{i=1}^n f_i(x)\right\}, +``` + +where $f_1, ..., f_n$ are $L$-smooth and $\mu$-strongly convex. In the sequel, we use the notation +$\mathbb{E}$ for denoting the expectation over the uniform distribution of the index +$i \sim \mathcal{U}\left([|1, n|]\right)$, e.g., $F(x)\equiv\mathbb{E}[f_i(x)]$. +In addition, we assume a zero variance at the optimal point (which is denoted by $x_\star$): + +```math +\mathbb{E}\left[\|\nabla f_i(x_\star)\|^2\right] = \frac{1}{n} \sum_{i=1}^n \|\nabla f_i(x_\star)\|^2 = 0, +``` + +where the expectation $\mathbb{E}$ is taken over the uniform distribution of the index +$i \sim \mathcal{U}\left([|1, n|]\right)$. + +This kind of situations happens for example in machine learning in the interpolation regime, +that is if there exists a model $x_\star$ +such that the loss $\mathcal{L}$ on any observation $(z_i)_{i \in [|1, n|]}$, +$\mathcal{L}(x_\star, z_i) = f_i(x_\star)$ is zero. + +# Performance metric + +This code computes a worst-case guarantee for one step of the **stochastic gradient descent** (SGD) in expectation, +for the distance to optimal point. +That is, it computes the smallest possible $\tau(L, \mu, \gamma, n)$ such that + +```math +\mathbb{E}\left[\|x_1 - x_\star\|^2\right] \leqslant \tau(L, \mu, \gamma, n) \|x_0 - x_\star\|^2 +``` + +is valid, where $x_1$ is the output of one step of SGD. + +# Algorithm +One iteration of SGD is described by: + +```math +\begin{aligned} + \text{Pick random }i & \sim & \mathcal{U}\left([|1, n|]\right), \\ + x_{t+1} & = & x_t - \gamma \nabla f_{i}(x_t), +\end{aligned} +``` +where $\gamma$ is a step-size. + +# Theoretical guarantee +An empirically tight one-iteration guarantee is provided in the code of PESTO [1]: + +```math +\mathbb{E}\left[\|x_1 - x_\star\|^2\right] \leqslant \left(\max(1 - \gamma\mu, L\gamma - 1\right)\right)^2 \|x_0-x_\star\|^2. +``` + +Note that we observe the guarantee does not depend on the number $n$ of +functions for this particular setting, thereby implying that the guarantees are also valid for expectation +minimization settings (i.e., when $n$ goes to infinity). + +# References +Empirically tight guarantee provided in code of [1]. Using SDPs for analyzing SGD-type method was +proposed in [2, 3]. + +[[1] A. Taylor, J. Hendrickx, F. Glineur (2017). Performance Estimation Toolbox (PESTO): automated worst-case +analysis of first-order optimization methods. In 56th IEEE Conference on Decision and Control (CDC).](https://github.com/AdrienTaylor/Performance-Estimation-Toolbox) + +[[2] B. Hu, P. Seiler, L. Lessard (2020). Analysis of biased stochastic gradient descent using sequential +semidefinite programs. Mathematical programming.](https://arxiv.org/pdf/1711.00987.pdf) + +[[3] A. Taylor, F. Bach (2019). Stochastic first-order methods: non-asymptotic and computer-aided analyses +via potential functions. Conference on Learning Theory (COLT).](https://arxiv.org/pdf/1902.00947.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `gamma`: step-size parameter. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_sgd_overparametrized(1.0, 0.1, 2.3, 5; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_sgd_overparametrized(L, mu, gamma, n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/unconstrained_convex_minimization/accelerated_gradient_convex.jl b/examples/unconstrained_convex_minimization/accelerated_gradient_convex.jl index f96d88a..3b2c96a 100644 --- a/examples/unconstrained_convex_minimization/accelerated_gradient_convex.jl +++ b/examples/unconstrained_convex_minimization/accelerated_gradient_convex.jl @@ -1,5 +1,80 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_accelerated_gradient_convex(mu, L, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_accelerated_gradient_convex`. + +Consider the convex minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is $L$-smooth and $\mu$-strongly convex ($\mu$ is possibly 0). + +# Performance metric + +This code computes a worst-case guarantee for an **accelerated gradient method**, a.k.a. **fast gradient method** [1]. +That is, it computes the smallest possible $\tau(n, L, \mu)$ such that the guarantee + +```math +f(x_n) - f_\star \leqslant \tau(n, L, \mu) \|x_0 - x_\star\|^2 +``` + +is valid, where $x_n$ is the output of the accelerated gradient method, +and where $x_\star$ is the minimizer of $f$. +In short, for given values of $n$, $L$ and $\mu$, +$\tau(n, L, \mu)$ is computed as the worst-case value of +$f(x_n)-f_\star$ when $\|x_0 - x_\star\|^2 \leqslant 1$. + +# Algorithm +Initialize $\lambda_1=1$, $y_1=x_0$. +One iteration of accelerated gradient method is described by + +```math +\begin{align} + \text{Set: }\lambda_{t+1} & = & \frac{1 + \sqrt{4\lambda_t^2 + 1}}{2} \\ + x_{t} & = & y_t - \frac{1}{L} \nabla f(y_t),\\ + y_{t+1} & = & x_{t} + \frac{\lambda_t-1}{\lambda_{t+1}} (x_t-x_{t-1}). +\end{align} +``` +# Theoretical guarantee +The following worst-case guarantee can be found in e.g., [2, Theorem 4.4]: + +```math +f(x_n)-f_\star \leqslant \frac{L}{2}\frac{\|x_0-x_\star\|^2}{\lambda_n^2}. +``` + +# References + + +[[1] Y. Nesterov (1983). +A method for solving the convex programming problem with convergence rate O(1/k^2). +In Dokl. akad. nauk Sssr (Vol. 269, pp. 543-547).](http://www.mathnet.ru/links/9bcb158ed2df3d8db3532aafd551967d/dan46009.pdf) + +[[2] A. Beck, M. Teboulle (2009). +A Fast Iterative Shrinkage-Thresholding Algorithm for Linear Inverse Problems. +SIAM journal on imaging sciences, 2009, vol. 2, no 1, p. 183-202.](https://www.ceremade.dauphine.fr/~carlier/FISTA) + +# Arguments +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_accelerated_gradient_convex(0, 1, 1; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_accelerated_gradient_convex(mu, L, n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/unconstrained_convex_minimization/accelerated_gradient_convex_simplified.jl b/examples/unconstrained_convex_minimization/accelerated_gradient_convex_simplified.jl index 5266a51..cb8994b 100644 --- a/examples/unconstrained_convex_minimization/accelerated_gradient_convex_simplified.jl +++ b/examples/unconstrained_convex_minimization/accelerated_gradient_convex_simplified.jl @@ -1,5 +1,83 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_accelerated_gradient_convex_simplified(mu, L, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_accelerated_gradient_convex_simplified`. + +Consider the convex minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is $L$-smooth and $\mu$-strongly convex ($\mu$ is possibly 0). + +# Performance metric + +This code computes a worst-case guarantee for an **accelerated gradient method**, a.k.a. **fast gradient method** +with a set of classical slightly simplified sets of coefficients compared to the original [1]. +That is, the code computes the smallest possible $\tau(n, L, \mu)$ such that the guarantee + +```math +f(x_n) - f_\star \leqslant \tau(n, L, \mu) \|x_0 - x_\star\|^2 +``` + +is valid, where $x_n$ is the output of the accelerated gradient method below, +and where $x_\star$ is the minimizer of $f$. +In short, for given values of $n$, $L$ and $\mu$, +$\tau(n, L, \mu)$ is computed as the worst-case value of +$f(x_n)-f_\star$ when $\|x_0 - x_\star\|^2 \leqslant 1$. + +# Algorithm + +The accelerated gradient method of this example is provided by + +```math + \begin{aligned} + x_{t+1} & = & y_t - \frac{1}{L} \nabla f(y_t) \\ + y_{t+1} & = & x_{t+1} + \frac{t-1}{t+2} (x_{t+1} - x_t). + \end{aligned} +``` +# Theoretical guarantee + +When $\mu=0$, a tight **empirical** guarantee can be found in [2, Table 1]: + +```math +f(x_n)-f_\star \leqslant \frac{2L\|x_0-x_\star\|^2}{n^2 + 5 n + 6}, +``` + +where tightness is obtained on some Huber loss functions. + +# References + + +[[1] Y. Nesterov (1983). +A method for solving the convex programming problem with convergence rate O(1/k^2). +In Dokl. akad. nauk Sssr (Vol. 269, pp. 543-547).](http://www.mathnet.ru/links/9bcb158ed2df3d8db3532aafd551967d/dan46009.pdf) + +[[2] A. Taylor, J. Hendrickx, F. Glineur (2017). +Exact worst-case performance of first-order methods for composite convex optimization. +SIAM Journal on Optimization, 27(3):1283-1313.](https://arxiv.org/pdf/1512.07516.pdf) + +# Arguments +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_accelerated_gradient_convex_simplified(0, 1, 1; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_accelerated_gradient_convex_simplified(mu, L, n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/unconstrained_convex_minimization/accelerated_gradient_strongly_convex.jl b/examples/unconstrained_convex_minimization/accelerated_gradient_strongly_convex.jl index b110b38..522f256 100644 --- a/examples/unconstrained_convex_minimization/accelerated_gradient_strongly_convex.jl +++ b/examples/unconstrained_convex_minimization/accelerated_gradient_strongly_convex.jl @@ -2,6 +2,78 @@ using PEPit using OrderedCollections using JuMP +@doc raw""" + wc_accelerated_gradient_strongly_convex(mu, L, n; verbose=false) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_accelerated_gradient_strongly_convex`. + +Consider the convex minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is $L$-smooth and $\mu$-strongly convex. + +# Performance metric + +This code computes a worst-case guarantee for an **accelerated gradient** method, a.k.a **fast gradient** method. +That is, it computes the smallest possible $\tau(n, L, \mu)$ such that the guarantee + +```math +f(x_n) - f_\star \leqslant \tau(n, L, \mu) \left(f(x_0) - f(x_\star) + \frac{\mu}{2}\|x_0 - x_\star\|^2\right), +``` + +is valid, where $x_n$ is the output of the **accelerated gradient** method, +and where $x_\star$ is the minimizer of $f$. +In short, for given values of $n$, $L$ and $\mu$, +$\tau(n, L, \mu)$ is computed as the worst-case value of +$f(x_n)-f_\star$ when $f(x_0) - f(x_\star) + \frac{\mu}{2}\|x_0 - x_\star\|^2 \leqslant 1$. + +# Algorithm + +For $t \in \{0, \dots, n-1\}$, + +```math + \begin{aligned} + y_t & = & x_t + \frac{\sqrt{L} - \sqrt{\mu}}{\sqrt{L} + \sqrt{\mu}}(x_t - x_{t-1}) \\ + x_{t+1} & = & y_t - \frac{1}{L} \nabla f(y_t) + \end{aligned} +``` +with $x_{-1}:= x_0$. + +# Theoretical guarantee + + + The following **upper** guarantee can be found in [1, Corollary 4.15]: + +```math +f(x_n)-f_\star \leqslant \left(1 - \sqrt{\frac{\mu}{L}}\right)^n \left(f(x_0) - f(x_\star) + \frac{\mu}{2}\|x_0 - x_\star\|^2\right). +``` + +# References + + +[[1] A. d'Aspremont, D. Scieur, A. Taylor (2021). Acceleration Methods. Foundations and Trends +in Optimization: Vol. 5, No. 1-2.](https://arxiv.org/pdf/2101.09545.pdf) + +# Arguments +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `n`: number of iterations. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +PEPit_val, theoretical_val = wc_accelerated_gradient_strongly_convex(0.1, 1.0, 2, verbose=true) +``` +""" function wc_accelerated_gradient_strongly_convex(mu, L, n; verbose=false) problem = PEP() param = OrderedDict("mu" => mu, "L" => L) diff --git a/examples/unconstrained_convex_minimization/accelerated_proximal_point.jl b/examples/unconstrained_convex_minimization/accelerated_proximal_point.jl index 817f2b1..2e21575 100644 --- a/examples/unconstrained_convex_minimization/accelerated_proximal_point.jl +++ b/examples/unconstrained_convex_minimization/accelerated_proximal_point.jl @@ -1,5 +1,93 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_accelerated_proximal_point(A0, gammas, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_accelerated_proximal_point`. + +Consider the minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is convex and possibly non-smooth. + +# Performance metric + +This code computes a worst-case guarantee an **accelerated proximal point** method, +aka **fast proximal point** method (FPP). +That is, it computes the smallest possible $\tau(n, A_0,\vec{\gamma})$ such that the guarantee + +```math +f(x_n) - f_\star \leqslant \tau(n, A_0, \vec{\gamma}) \left(f(x_0) - f_\star + \frac{A_0}{2} \|x_0 - x_\star\|^2\right) +``` + +is valid, where $x_n$ is the output of FPP (with step-size $\gamma_t$ at step +$t\in \{0, \dots, n-1\}$) and where $x_\star$ is a minimizer of $f$ +and $A_0$ is a positive number. + +In short, for given values of $n$, $A_0$ and $\vec{\gamma}$, $\tau(n)$ +is computed as the worst-case value of $f(x_n)-f_\star$ +when $f(x_0) - f_\star + \frac{A_0}{2} \|x_0 - x_\star\|^2 \leqslant 1$, for the following method. + +# Algorithm + +For $t\in \{0, \dots, n-1\}$: + +```math + \begin{aligned} + y_{t+1} & = & (1-\alpha_{t} ) x_{t} + \alpha_{t} v_t \\ + x_{t+1} & = & \arg\min_x \left\{f(x)+\frac{1}{2\gamma_t}\|x-y_{t+1}\|^2 \right\}, \\ + v_{t+1} & = & v_t + \frac{1}{\alpha_{t}} (x_{t+1}-y_{t+1}) + \end{aligned} +``` +with + +```math + \begin{aligned} + \alpha_{t} & = & \frac{\sqrt{(A_t \gamma_t)^2 + 4 A_t \gamma_t} - A_t \gamma_t}{2} \\ + A_{t+1} & = & (1 - \alpha_{t}) A_t + \end{aligned} +``` +and $v_0=x_0$. + + + +# Theoretical guarantee + +A theoretical **upper** bound can be found in [1, Theorem 2.3.]: + +```math +f(x_n)-f_\star \leqslant \frac{4}{A_0 (\sum_{t=0}^{n-1} \sqrt{\gamma_t})^2}\left(f(x_0) - f_\star + \frac{A_0}{2} \|x_0 - x_\star\|^2 \right). +``` + +# References + +The accelerated proximal point was first obtained and analyzed in [1]. + +[[1] O. Guler (1992). +New proximal point algorithms for convex minimization. +SIAM Journal on Optimization, 2(4):649-664.](https://epubs.siam.org/doi/abs/10.1137/0802032?mobileUi=0) + +# Arguments +- `A0`: initial value for parameter A_0. +- `gammas`: sequence of step-sizes. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_accelerated_proximal_point(5, [(i + 1) / 1.1 for i in 0:2], 3; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_accelerated_proximal_point(A0, gammas, n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/unconstrained_convex_minimization/conjugate_gradient.jl b/examples/unconstrained_convex_minimization/conjugate_gradient.jl index 3b08a9e..594120c 100644 --- a/examples/unconstrained_convex_minimization/conjugate_gradient.jl +++ b/examples/unconstrained_convex_minimization/conjugate_gradient.jl @@ -1,5 +1,95 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_conjugate_gradient(L, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_conjugate_gradient`. + +Consider the convex minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is $L$-smooth and convex. + +# Performance metric + +This code computes a worst-case guarantee for the **conjugate gradient (CG)** method (with exact span searches). +That is, it computes the smallest possible $\tau(n, L)$ such that the guarantee + +```math +f(x_n) - f_\star \leqslant \tau(n, L) \|x_0-x_\star\|^2 +``` + +is valid, where $x_n$ is the output of the **conjugate gradient** method, +and where $x_\star$ is a minimizer of $f$. +In short, for given values of $n$ and $L$, +$\tau(n, L)$ is computed as the worst-case value of +$f(x_n)-f_\star$ when $\|x_0-x_\star\|^2 \leqslant 1$. + +# Algorithm + + +```math +x_{t+1} = x_t - \sum_{i=0}^t \gamma_i \nabla f(x_i) +``` + + with + +```math +(\gamma_i)_{i \leqslant t} = \arg\min_{(\gamma_i)_{i \leqslant t}} f \left(x_t - \sum_{i=0}^t \gamma_i \nabla f(x_i) \right) +``` + +# Theoretical guarantee + + + The **tight** guarantee obtained in [1] is + +```math +f(x_n) - f_\star \leqslant\frac{L}{2 \theta_n^2}\|x_0-x_\star\|^2. +``` + + where + +```math + \begin{aligned} + \theta_0 & = & 1 \\ + \theta_t & = & \frac{1 + \sqrt{4 \theta_{t-1}^2 + 1}}{2}, \forall t \in [|1, n-1|] \\ + \theta_n & = & \frac{1 + \sqrt{8 \theta_{n-1}^2 + 1}}{2}, + \end{aligned} + +and tightness follows from [2, Theorem 3]. +``` +# References + +The detailed approach (based on convex relaxations) is available in [1, Corollary 6]. + +[[1] Y. Drori and A. Taylor (2020). +Efficient first-order methods for convex minimization: a constructive approach. +Mathematical Programming 184 (1), 183-220.](https://arxiv.org/pdf/1803.05676.pdf) + +[[2] Y. Drori (2017). +The exact information-based complexity of smooth convex minimization. +Journal of Complexity, 39, 1-16.](https://arxiv.org/pdf/1606.01424.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_conjugate_gradient(1.0, 2; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_conjugate_gradient(L, n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/unconstrained_convex_minimization/conjugate_gradient_qg_convex.jl b/examples/unconstrained_convex_minimization/conjugate_gradient_qg_convex.jl index 90a71cf..965d522 100644 --- a/examples/unconstrained_convex_minimization/conjugate_gradient_qg_convex.jl +++ b/examples/unconstrained_convex_minimization/conjugate_gradient_qg_convex.jl @@ -1,5 +1,84 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_conjugate_gradient_qg_convex(L, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_conjugate_gradient_qg_convex`. + +Consider the convex minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is quadratically upper bounded ($\text{QG}^+$ [2]), i.e. +$\forall x, f(x) - f_\star \leqslant \frac{L}{2} \|x-x_\star\|^2$, and convex. + +# Performance metric + +This code computes a worst-case guarantee for the **conjugate gradient (CG)** method (with exact span searches). +That is, it computes the smallest possible $\tau(n, L)$ such that the guarantee + +```math +f(x_n) - f_\star \leqslant \tau(n, L) \|x_0-x_\star\|^2 +``` + +is valid, where $x_n$ is the output of the **conjugate gradient** method, +and where $x_\star$ is a minimizer of $f$. +In short, for given values of $n$ and $L$, +$\tau(n, L)$ is computed as the worst-case value of +$f(x_n)-f_\star$ when $\|x_0-x_\star\|^2 \leqslant 1$. + +# Algorithm + + +```math +x_{t+1} = x_t - \sum_{i=0}^t \gamma_i \nabla f(x_i) +``` + + with + +```math +(\gamma_i)_{i \leqslant t} = \arg\min_{(\gamma_i)_{i \leqslant t}} f \left(x_t - \sum_{i=0}^t \gamma_i \nabla f(x_i) \right) +``` + +# Theoretical guarantee + + + The **tight** guarantee obtained in [2, Theorem 2.3] (lower) and [2, Theorem 2.4] (upper) is + +```math +f(x_n) - f_\star \leqslant \frac{L}{2 (n + 1)} \|x_0-x_\star\|^2. +``` + +# References + +The detailed approach (based on convex relaxations) is available in [1, Corollary 6], +and the result provided in [2, Theorem 2.4]. + +[[1] Y. Drori and A. Taylor (2020). Efficient first-order methods for convex minimization: a constructive approach. +Mathematical Programming 184 (1), 183-220.](https://arxiv.org/pdf/1803.05676.pdf) + +[[2] B. Goujaud, A. Taylor, A. Dieuleveut (2022). +Optimal first-order methods for convex functions with a quadratic upper bound.](https://arxiv.org/pdf/2205.15033.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_conjugate_gradient_qg_convex(1.0, 12; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_conjugate_gradient_qg_convex(L, n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/unconstrained_convex_minimization/cyclic_coordinate_descent.jl b/examples/unconstrained_convex_minimization/cyclic_coordinate_descent.jl index 4665c81..3fe850e 100644 --- a/examples/unconstrained_convex_minimization/cyclic_coordinate_descent.jl +++ b/examples/unconstrained_convex_minimization/cyclic_coordinate_descent.jl @@ -1,5 +1,68 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_cyclic_coordinate_descent(L, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_cyclic_coordinate_descent`. + +Consider the convex minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is $L$-smooth by blocks (with $d$ blocks) and convex. + +# Performance metric + +This code computes a worst-case guarantee for **cyclic coordinate descent** with fixed step-sizes $1/L_i$. +That is, it computes the smallest possible $\tau(n, d, L)$ such that the guarantee + +```math +f(x_n) - f_\star \leqslant \tau(n, d, L) \|x_0 - x_\star\|^2 +``` + +is valid, where $x_n$ is the output of cyclic coordinate descent with fixed step-sizes $1/L_i$, and +where $x_\star$ is a minimizer of $f$. + +In short, for given values of $n$, $L$, and $d$, $\tau(n, d, L)$ is computed as +the worst-case value of $f(x_n)-f_\star$ when $\|x_0 - x_\star\|^2 \leqslant 1$. + +# Algorithm + +Cyclic coordinate descent is described by + +```math +x_{t+1} = x_t - \frac{1}{L_{i_t}} \nabla_{i_t} f(x_t), +``` + +where $L_{i_t}$ is the Lipschitz constant of the block $i_t$, +and where $i_t$ follows a prescribed ordering. + +# References + + +[[1] Z. Shi, R. Liu (2016). +Better worst-case complexity analysis of the block coordinate descent method for large scale machine learning. +In 2017 16th IEEE International Conference on Machine Learning and Applications (ICMLA).](https://arxiv.org/pdf/1608.04826.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: None + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_cyclic_coordinate_descent(L, 9; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_cyclic_coordinate_descent(L, n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/unconstrained_convex_minimization/epsilon_subgradient_method.jl b/examples/unconstrained_convex_minimization/epsilon_subgradient_method.jl index 1e33c89..3126075 100644 --- a/examples/unconstrained_convex_minimization/epsilon_subgradient_method.jl +++ b/examples/unconstrained_convex_minimization/epsilon_subgradient_method.jl @@ -1,5 +1,81 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_epsilon_subgradient_method(M, n, gamma, eps, R; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_epsilon_subgradient_method`. + +Consider the minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is closed, convex, and proper. This problem is a (possibly non-smooth) minimization problem. + +# Performance metric + +This code computes a worst-case guarantee for the $\varepsilon$ **-subgradient method**. That is, it computes +the smallest possible $\tau(n, M, \gamma, \varepsilon, R)$ such that the guarantee + +```math +\min_{0 \leqslant t \leqslant n} f(x_t) - f_\star \leqslant \tau(n, M, \gamma, \varepsilon, R) +``` + +is valid, where $x_t$ are the iterates of the $\varepsilon$ **-subgradient method** +after $t\leqslant n$ steps, +where $x_\star$ is a minimizer of $f$, where $M$ is an upper bound on the norm of all +$\varepsilon$-subgradients encountered, and when $\|x_0-x_\star\|\leqslant R$. + +In short, for given values of $M$, of the accuracy $\varepsilon$, of the step-size $\gamma$, +of the initial distance $R$, and of the number of iterations $n$, +$\tau(n, M, \gamma, \varepsilon, R)$ is computed as the worst-case value of +$\min_{0 \leqslant t \leqslant n} f(x_t) - f_\star$. + +# Algorithm + +For $t\in \{0, \dots, n-1 \}$ + +```math + \begin{aligned} + g_{t} & \in & \partial_{\varepsilon} f(x_t) \\ + x_{t+1} & = & x_t - \gamma g_t + \end{aligned} +``` +# Theoretical guarantee +An upper bound is obtained in [1, Lemma 2]: + +```math +\min_{0 \leqslant t \leqslant n} f(x_t)- f(x_\star) \leqslant \frac{R^2+2(n+1)\gamma\varepsilon+(n+1) \gamma^2 M^2}{2(n+1) \gamma}. +``` + +# References + + +[[1] R.D. Millan, M.P. Machado (2019). +Inexact proximal epsilon-subgradient methods for composite convex optimization problems. +Journal of Global Optimization 75.4 (2019): 1029-1060.](https://arxiv.org/pdf/1805.10120.pdf) + +# Arguments +- `M`: the bound on norms of epsilon-subgradients. +- `n`: number of iterations. +- `gamma`: step-size parameter. +- `eps`: the bound on the value of epsilon (inaccuracy). +- `R`: the bound on initial distance to an optimal solution. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_epsilon_subgradient_method(M, n, gamma, eps, R; verbose=true) +``` +""" function wc_epsilon_subgradient_method(M, n, gamma, eps, R; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/unconstrained_convex_minimization/gradient_descent.jl b/examples/unconstrained_convex_minimization/gradient_descent.jl index 3da9343..a06f795 100644 --- a/examples/unconstrained_convex_minimization/gradient_descent.jl +++ b/examples/unconstrained_convex_minimization/gradient_descent.jl @@ -1,5 +1,79 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_gradient_descent(L, gamma, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_gradient_descent`. + +Consider the convex minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is $L$-smooth and convex. + +# Performance metric + +This code computes a worst-case guarantee for **gradient descent** with fixed step-size $\gamma$. +That is, it computes the smallest possible $\tau(n, L, \gamma)$ such that the guarantee + +```math +f(x_n) - f_\star \leqslant \tau(n, L, \gamma) \|x_0 - x_\star\|^2 +``` + +is valid, where $x_n$ is the output of gradient descent with fixed step-size $\gamma$, and +where $x_\star$ is a minimizer of $f$. + +In short, for given values of $n$, $L$, and $\gamma$, +$\tau(n, L, \gamma)$ is computed as the worst-case +value of $f(x_n)-f_\star$ when $\|x_0 - x_\star\|^2 \leqslant 1$. + +# Algorithm + +Gradient descent is described by + +```math +x_{t+1} = x_t - \gamma \nabla f(x_t), +``` + +where $\gamma$ is a step-size. + +# Theoretical guarantee + +When $\gamma \leqslant \frac{1}{L}$, the **tight** theoretical guarantee can be found in [1, Theorem 3.1]: + +```math +f(x_n)-f_\star \leqslant \frac{L}{4nL\gamma+2} \|x_0-x_\star\|^2, +``` + +which is tight on some Huber loss functions. + +# References + + +[[1] Y. Drori, M. Teboulle (2014). +Performance of first-order methods for smooth convex minimization: a novel approach. +Mathematical Programming 145(1-2), 451-482.](https://arxiv.org/pdf/1206.3209.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `gamma`: step-size parameter. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_gradient_descent(L, 1 / L, 4; verbose=true) +``` +""" function wc_gradient_descent(L, gamma, n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/unconstrained_convex_minimization/gradient_descent_contraction.jl b/examples/unconstrained_convex_minimization/gradient_descent_contraction.jl index 0540777..d4ea405 100644 --- a/examples/unconstrained_convex_minimization/gradient_descent_contraction.jl +++ b/examples/unconstrained_convex_minimization/gradient_descent_contraction.jl @@ -1,5 +1,77 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_gradient_descent_contraction(L, mu, gamma, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_gradient_descent_contraction`. + +Consider the convex minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is $L$-smooth and $\mu$-strongly convex. + +# Performance metric + +This code computes a worst-case guarantee for **gradient descent** with fixed step-size $\gamma$. +That is, it computes the smallest possible $\tau(n, L, \mu, \gamma)$ such that the guarantee + +```math +\| x_n - y_n \|^2 \leqslant \tau(n, L, \mu, \gamma) \| x_0 - y_0 \|^2 +``` + +is valid, where $x_n$ and $y_n$ are the outputs of +the gradient descent method with fixed step-size $\gamma$, +starting respectively from $x_0$ and $y_0$. + +In short, for given values of $n$, $L$, $\mu$ and $\gamma$, +$\tau(n, L, \mu \gamma)$ is computed as the worst-case value of $\| x_n - y_n \|^2$ +when $\| x_0 - y_0 \|^2 \leqslant 1$. + +# Algorithm + +For $t\in\{0,1,\ldots,n-1\}$, gradient descent is described by + +```math +x_{t+1} = x_t - \gamma \nabla f(x_t), +``` + +where $\gamma$ is a step-size. + +# Theoretical guarantee + +The **tight** theoretical guarantee is + +```math +\| x_n - y_n \|^2 \leqslant \max\{(1-L\gamma)^2,(1-\mu \gamma)^2\}^n\| x_0 - y_0 \|^2, +``` + +which is tight on simple quadratic functions. + +# References +No bibliographic reference was listed in the corresponding Python PEPit example docstring. + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `gamma`: step-size parameter. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_gradient_descent_contraction(L, mu, gamma, n; verbose=true) +``` +""" function wc_gradient_descent_contraction(L, mu, gamma, n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/unconstrained_convex_minimization/gradient_descent_lc.jl b/examples/unconstrained_convex_minimization/gradient_descent_lc.jl index 5ed40cd..7858c5d 100644 --- a/examples/unconstrained_convex_minimization/gradient_descent_lc.jl +++ b/examples/unconstrained_convex_minimization/gradient_descent_lc.jl @@ -15,6 +15,75 @@ function _fsolve_scalar(fun, x0; xtol=1e-10, maxiter=200) return x end +@doc raw""" + wc_gradient_descent_lc(mug, Lg, typeM, muM, LM, gamma, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_gradient_descent_lc`. + +Consider the convex minimization problem + +```math +g_\star \triangleq \min_x g(Mx), +``` + +where $g$ is an $L_g$-smooth, `\mu_g`-strongly convex function and $M$ is a general, symmetric or +skew-symmetric matrix with $\mu_M \leqslant \|M\| \leqslant L_M$. + +# Performance metric + +This code computes a worst-case guarantee for the objective residual + +```math +g(Mx_n) - g_\star +``` + +under the normalization $\|x_0-x_\star\|^2 \leqslant 1$, where $x_\star$ is a +stationary point of the linearly composed objective. + +# Algorithm + +The method is gradient descent on the composition $x \mapsto g(Mx)$. The +symbolic step is + +```math +x_{t+1} = x_t - \gamma M^\ast \nabla g(Mx_t), +``` + +where the Julia implementation realizes $M^\ast$ as `M.T` for a general linear +operator, as `M` for a symmetric operator, and as `-M` for a skew-symmetric +operator. + +# Theoretical guarantee + +The example computes its reference value from the scalar equation and closed-form +post-processing implemented below, using the condition ratios +$\kappa_g=\mu_g/L_g$ and $\kappa_M=\mu_M/L_M$. + +# References +No bibliographic reference was listed in the corresponding Python PEPit example docstring. + +# Arguments +- `mug`: the strong convexity parameter of $g(y)$. +- `Lg`: the smoothness parameter of $g(y)$. +- `typeM`: type of matrix $M$ ("gen", "sym" or "skew"). +- `muM`: lower bound on $\|M\|$ (if typeM != "sym", then muM must be set to zero). +- `LM`: upper bound on $\|M\|$. +- `gamma`: step-size parameter. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_gradient_descent_lc(mug, Lg, typeM, muM, LM, 1 / (Lg * LM^2), 3; verbose=true) +``` +""" function wc_gradient_descent_lc(mug, Lg, typeM, muM, LM, gamma, n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/unconstrained_convex_minimization/gradient_descent_qg_convex.jl b/examples/unconstrained_convex_minimization/gradient_descent_qg_convex.jl index df0c041..8f27a73 100644 --- a/examples/unconstrained_convex_minimization/gradient_descent_qg_convex.jl +++ b/examples/unconstrained_convex_minimization/gradient_descent_qg_convex.jl @@ -1,5 +1,79 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_gradient_descent_qg_convex(L, gamma, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_gradient_descent_qg_convex`. + +Consider the convex minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is quadratically upper bounded ($\text{QG}^+$ [1]), i.e. +$\forall x, f(x) - f_\star \leqslant \frac{L}{2} \|x-x_\star\|^2$, and convex. + +# Performance metric + +This code computes a worst-case guarantee for **gradient descent** with fixed step-size $\gamma$. +That is, it computes the smallest possible $\tau(n, L, \gamma)$ such that the guarantee + +```math +f(x_n) - f_\star \leqslant \tau(n, L, \gamma) \| x_0 - x_\star\|^2 +``` + +is valid, where $x_n$ is the output of gradient descent with fixed step-size $\gamma$, and +where $x_\star$ is a minimizer of $f$. + +In short, for given values of $n$, $L$, +and $\gamma$, $\tau(n, L, \gamma)$ is computed as the worst-case +value of $f(x_n)-f_\star$ when $||x_0 - x_\star||^2 \leqslant 1$. + +# Algorithm + +Gradient descent is described by + +```math +x_{t+1} = x_t - \gamma \nabla f(x_t), +``` + +where $\gamma$ is a step-size. + +# Theoretical guarantee + +When $\gamma < \frac{1}{L}$, the **lower** theoretical guarantee can be found in [1, Theorem 2.2]: + +```math +f(x_n)-f_\star \leqslant \frac{L}{2}\max\left(\frac{1}{2n L \gamma + 1}, L \gamma\right) \|x_0-x_\star\|^2. +``` + +# References + + +The detailed approach is available in [1, Theorem 2.2]. + +[[1] B. Goujaud, A. Taylor, A. Dieuleveut (2022). +Optimal first-order methods for convex functions with a quadratic upper bound.](https://arxiv.org/pdf/2205.15033.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `gamma`: step-size parameter. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_gradient_descent_qg_convex(L, 0.2 / L, 4; verbose=true) +``` +""" function wc_gradient_descent_qg_convex(L, gamma, n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/unconstrained_convex_minimization/gradient_descent_qg_convex_decreasing.jl b/examples/unconstrained_convex_minimization/gradient_descent_qg_convex_decreasing.jl index 06e124d..a74c902 100644 --- a/examples/unconstrained_convex_minimization/gradient_descent_qg_convex_decreasing.jl +++ b/examples/unconstrained_convex_minimization/gradient_descent_qg_convex_decreasing.jl @@ -1,5 +1,85 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_gradient_descent_qg_convex_decreasing(L, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_gradient_descent_qg_convex_decreasing`. + +Consider the convex minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is quadratically upper bounded ($\text{QG}^+$ [1]), i.e. +$\forall x, f(x) - f_\star \leqslant \frac{L}{2} \|x-x_\star\|^2$, and convex. + +# Performance metric + +This code computes a worst-case guarantee for **gradient descent** with decreasing step-sizes. +That is, it computes the smallest possible $\tau(n, L)$ such that the guarantee + +```math +f(x_n) - f_\star \leqslant \tau(n, L) \| x_0 - x_\star\|^2 +``` + +is valid, where $x_n$ is the output of gradient descent with decreasing step-sizes, and +where $x_\star$ is a minimizer of $f$. + +In short, for given values of $n$ and $L$, +$\tau(n, L)$ is computed as the worst-case +value of $f(x_n)-f_\star$ when $||x_0 - x_\star||^2 \leqslant 1$. + +# Algorithm + +Gradient descent with decreasing step sizes is described by + +```math +x_{t+1} = x_t - \gamma_t \nabla f(x_t) +``` + +with + +```math +\gamma_t = \frac{1}{L u_{t+1}} +``` + +where the sequence $u$ is defined by + +```math +\begin{aligned} + u_0 & = & 1 \\ + u_{t} & = & \frac{u_{t-1}}{2} + \sqrt{\left(\frac{u_{t-1}}{2}\right)^2 + 2}, \quad \mathrm{for } t \geq 1 +\end{aligned} +``` +# Theoretical guarantee + +The **tight** theoretical guarantee is conjectured in [1, Conjecture A.3]: + +```math +f(x_n)-f_\star \leqslant \frac{L}{2 u_t} \|x_0-x_\star\|^2. +``` + +# References +No bibliographic reference was listed in the corresponding Python PEPit example docstring. + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_gradient_descent_qg_convex_decreasing(1.0, 6; verbose=true) +``` +""" function wc_gradient_descent_qg_convex_decreasing(L, n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/unconstrained_convex_minimization/gradient_descent_quadratics.jl b/examples/unconstrained_convex_minimization/gradient_descent_quadratics.jl index 7338917..bf1dc02 100644 --- a/examples/unconstrained_convex_minimization/gradient_descent_quadratics.jl +++ b/examples/unconstrained_convex_minimization/gradient_descent_quadratics.jl @@ -1,5 +1,80 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_gradient_descent_quadratics(mu, L, gamma, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_gradient_descent_quadratics`. + +Consider the convex minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f=\frac{1}{2} x^T Q x$ is $L$-smooth and $\mu$-strongly convex (i.e. $\mu I \preceq Q \preceq LI$). + +# Performance metric + +This code computes a worst-case guarantee for **gradient descent** with fixed step-size $\gamma$. +That is, it computes the smallest possible $\tau(n, \mu, L, \gamma)$ such that the guarantee + +```math +f(x_n) - f_\star \leqslant \tau(n, \mu, L, \gamma) \|x_0 - x_\star\|^2 +``` + +is valid, where $x_n$ is the output of gradient descent with fixed step-size $\gamma$, and +where $x_\star$ is a minimizer of $f$. + +In short, for given values of $n$, $\mu$, $L$, and $\gamma$, $\tau(n, L, \gamma)$ is computed as the worst-case +value of $f(x_n)-f_\star$ when $\|x_0 - x_\star\|^2 \leqslant 1$. + +# Algorithm + +Gradient descent is described by + +```math +x_{t+1} = x_t - \gamma \nabla f(x_t), +``` + +where $\gamma$ is a step-size. + +# Theoretical guarantee + + When $\gamma \leqslant \frac{2}{L}$ and $0 \leqslant \mu \leqslant L$, + the **tight** theoretical conjecture can be found in [1, Equation (4.17)]: + +```math +f(x_n)-f_\star \leqslant \frac{L}{2} \max\left\{\alpha(1-\alpha L\gamma)^{2n}, (1-L\gamma)^{2n} \right\} \|x_0-x_\star\|^2, +``` + +where $\alpha = \mathrm{proj}_{[\frac{\mu}{L},1]} \left(\frac{1}{L\gamma (2n+1)}\right)$. + +# References + + + [[1] N. Bousselmi, J. Hendrickx, F. Glineur (2023). + Interpolation Conditions for Linear Operators and applications to Performance Estimation Problems. + arXiv preprint](https://arxiv.org/pdf/2302.08781.pdf) + +# Arguments +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `gamma`: step-size parameter. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_gradient_descent_quadratics(mu, L, 1 / L, 4; verbose=true) +``` +""" function wc_gradient_descent_quadratics(mu, L, gamma, n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/unconstrained_convex_minimization/gradient_descent_silver_stepsize_convex.jl b/examples/unconstrained_convex_minimization/gradient_descent_silver_stepsize_convex.jl index d848087..24d6e98 100644 --- a/examples/unconstrained_convex_minimization/gradient_descent_silver_stepsize_convex.jl +++ b/examples/unconstrained_convex_minimization/gradient_descent_silver_stepsize_convex.jl @@ -1,5 +1,78 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_gradient_descent_silver_stepsize_convex(L, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_gradient_descent_silver_stepsize_convex`. + +Consider the convex minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is $L$-smooth and convex. + +# Performance metric + +This code computes a worst-case guarantee for $n$ steps of the **gradient descent** method tuned +according to the silver stepsize schedule. +That is, it computes the smallest possible $\tau(n, L)$ such that the guarantee + +```math +f(x_n) - f_\star \leqslant \tau(n, L) \|x_0 - x_\star\|^2 +``` + +is valid, where $x_n$ is the output of gradient descent using the silver step-sizes, and +where $x_\star$ is a minimizer of $f$. + +In short, for given values of $n$, and $L$, $\tau(n, L)$ is computed as the worst-case +value of $f(x_n)-f_\star$ when $\|x_0 - x_\star\|^2 \leqslant 1$. + +# Algorithm + +Gradient descent is described by + +```math +x_{t+1} = x_t - \gamma_t \nabla f(x_t), +``` + +where $\gamma_t$ is a step-size of the $t^{th}$ step of the silver step-size schedule described in [1]. + +# Theoretical guarantee + +The theoretical guarantee for the convergence rate of the silver stepsize can be found in [1, Theorem 1.1]: + +```math +f(x_n)-f_\star \leqslant \frac{L}{1 + \sqrt{4(1 + \sqrt{2})^{2k}-3}} \|x_0-x_\star\|^2, +``` + +where $k$ is such that $n = 2^k - 1$. + +# References + + +[[1] J. M. Altschuler, P. A. Parrilo (2023). +Acceleration by Stepsize Hedging II: Silver Stepsize Schedule for Smooth Convex Optimization. +arXiv preprint arXiv:2309.16530.](https://arxiv.org/abs/2309.16530) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_gradient_descent_silver_stepsize_convex(10.0, 7; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_gradient_descent_silver_stepsize_convex(L, n; solver=Clarabel.Optimizer, verbose=true) k = log2(n + 1) diff --git a/examples/unconstrained_convex_minimization/gradient_descent_silver_stepsize_strongly_convex.jl b/examples/unconstrained_convex_minimization/gradient_descent_silver_stepsize_strongly_convex.jl index a169393..6ee4356 100644 --- a/examples/unconstrained_convex_minimization/gradient_descent_silver_stepsize_strongly_convex.jl +++ b/examples/unconstrained_convex_minimization/gradient_descent_silver_stepsize_strongly_convex.jl @@ -1,5 +1,86 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_gradient_descent_silver_stepsize_strongly_convex(L, mu, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_gradient_descent_silver_stepsize_strongly_convex`. + +Consider the strongly convex minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is $L$-smooth and $\mu$ strongly-convex. + +# Performance metric + +This code computes a worst-case guarantee for $n$ steps of the **gradient descent** method tuned +according to the silver stepsize schedule. +That is, it computes the smallest possible $\tau(n, L, \mu)$ such that the guarantee + +```math +\|x_n - x_\star\|^2 \leqslant \tau(n, L, \mu) \|x_0 - x_\star\|^2 +``` + +is valid, where $x_n$ is the output of gradient descent using the silver stepsizes, and +where $x_\star$ is a minimizer of $f$. + +In short, for given values of $n$, $L$ and $\mu$, $\tau(n, L, \mu)$ is computed +as the worst-case value of $\|x_n - x_\star\|^2$ when $\|x_0 - x_\star\|^2 \leqslant 1$. + +# Algorithm + +Gradient descent is described by + +```math +x_{t+1} = x_t - \gamma_t \nabla f(x_t), +``` + +where $\gamma_t$ is a step-size of the $t^{th}$ step of the silver step-size schedule described in [1]. + +# Theoretical guarantee + +The theoretical guarantee for the convergence rate of the silver stepsize can be found in [1, Theorem 4.1]: +Let $n^\star = 2^{\lfloor log_\rho(L/(3\mu)) \rfloor}$. + +When $n \leq n^\star$, the guarantee is given by + +```math +\|x_n - x_\star\|^2 \leqslant e^{-\frac{n^{\log_2(1 + \sqrt{2})}}{L/\mu}} \|x_0-x_\star\|^2, +``` + +When $n > n^\star$ the guarantee is given by + +```math +\|x_n - x_\star\|^2 \leqslant e^{-\frac{n}{n^*} \frac{(n^*)^{\log_2(\rho)}}{L/\mu}} \|x_0-x_\star\|^2 +``` + +# References + + +[[1] J. M. Altschuler, P. A. Parrilo (2023). +Acceleration by Stepsize Hedging I: Multi-Step Descent and the Silver Stepsize Schedule. +arXiv preprint arXiv:2309.07879.](https://arxiv.org/abs/2309.07879) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_gradient_descent_silver_stepsize_strongly_convex(3.2, 0.1, 8; verbose=true) +``` +""" function wc_gradient_descent_silver_stepsize_strongly_convex(L, mu, n; solver=Clarabel.Optimizer, verbose=true) diff --git a/examples/unconstrained_convex_minimization/gradient_exact_line_search.jl b/examples/unconstrained_convex_minimization/gradient_exact_line_search.jl index 7f03ec5..41ad78a 100644 --- a/examples/unconstrained_convex_minimization/gradient_exact_line_search.jl +++ b/examples/unconstrained_convex_minimization/gradient_exact_line_search.jl @@ -1,6 +1,76 @@ using PEPit using OrderedCollections +@doc raw""" + wc_gradient_exact_line_search(L, mu, n; verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_gradient_exact_line_search`. + +Consider the convex minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is $L$-smooth and $\mu$-strongly convex. + +# Performance metric + +This code computes a worst-case guarantee for the **gradient descent** (GD) with **exact linesearch** (ELS). +That is, it computes the smallest possible $\tau(n, L, \mu)$ such that the guarantee + +```math +f(x_n) - f_\star \leqslant \tau(n, L, \mu) (f(x_0) - f_\star) +``` + +is valid, where $x_n$ is the output of the GD with ELS, +and where $x_\star$ is the minimizer of $f$. +In short, for given values of $n$, $L$ and $\mu$, +$\tau(n, L, \mu)$ is computed as the worst-case value of +$f(x_n)-f_\star$ when $f(x_0) - f_\star \leqslant 1$. + +# Algorithm + +GD with ELS can be written as + +```math +x_{t+1} = x_t - \gamma_t \nabla f(x_t) +``` + +with $\gamma_t = \arg\min_{\gamma} f \left( x_t - \gamma \nabla f(x_t) \right)$. + +# Theoretical guarantee +The **tight** worst-case guarantee for GD with ELS, obtained in [1, Theorem 1.2], is + +```math +f(x_n) - f_\star \leqslant \left(\frac{L-\mu}{L+\mu}\right)^{2n} (f(x_0) - f_\star). +``` + +# References +The detailed approach (based on convex relaxations) is available in [1], +along with theoretical bound. + +[[1] E. De Klerk, F. Glineur, A. Taylor (2017). +On the worst-case complexity of the gradient method with exact line search for smooth strongly convex functions. +Optimization Letters, 11(7), 1185-1199.](https://link.springer.com/content/pdf/10.1007/s11590-016-1087-4.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `n`: number of iterations. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_gradient_exact_line_search(1.0, 0.1, 2; verbose=true) +``` +""" function wc_gradient_exact_line_search(L, mu, n; verbose=true) problem = PEP() diff --git a/examples/unconstrained_convex_minimization/heavy_ball_momentum.jl b/examples/unconstrained_convex_minimization/heavy_ball_momentum.jl index 1fdab8b..d1329b1 100644 --- a/examples/unconstrained_convex_minimization/heavy_ball_momentum.jl +++ b/examples/unconstrained_convex_minimization/heavy_ball_momentum.jl @@ -1,5 +1,93 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_heavy_ball_momentum(mu, L, alpha, beta, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_heavy_ball_momentum`. + +Consider the convex minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is $L$-smooth and $\mu$-strongly convex. + +# Performance metric + +This code computes a worst-case guarantee for the **Heavy-ball (HB)** method, aka **Polyak momentum** method. +That is, it computes the smallest possible $\tau(n, L, \mu, \alpha, \beta)$ such that the guarantee + +```math +f(x_n) - f_\star \leqslant \tau(n, L, \mu, \alpha, \beta) (f(x_0) - f_\star) +``` + +is valid, where $x_n$ is the output of the **Heavy-ball (HB)** method, +and where $x_\star$ is the minimizer of $f$. +In short, for given values of $n$, $L$ and $\mu$, +$\tau(n, L, \mu, \alpha, \beta)$ is computed as the worst-case value of +$f(x_n)-f_\star$ when $f(x_0) - f_\star \leqslant 1$. + +# Algorithm + + +```math +x_{t+1} = x_t - \alpha \nabla f(x_t) + \beta (x_t-x_{t-1}) +``` + + with + +```math +\alpha \in (0, \frac{1}{L}] +``` + + and + +```math +\beta = \sqrt{(1 - \alpha \mu)(1 - L \alpha)} +``` + +# Theoretical guarantee + + +The **upper** guarantee obtained in [2, Theorem 4] is + +```math +f(x_n) - f_\star \leqslant (1 - \alpha \mu)^n (f(x_0) - f_\star). +``` + +# References +This methods was first introduce in [1, Section 2], +and convergence upper bound was proven in [2, Theorem 4]. + +[[1] B.T. Polyak (1964). +Some methods of speeding up the convergence of iteration method. +URSS Computational Mathematics and Mathematical Physics.](https://www.sciencedirect.com/science/article/pii/0041555364901375) + +[[2] E. Ghadimi, H. R. Feyzmahdavian, M. Johansson (2015). +Global convergence of the Heavy-ball method for convex optimization. +European Control Conference (ECC).](https://arxiv.org/pdf/1412.7457.pdf) + +# Arguments +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `alpha`: algorithm parameter used in the update rule. +- `beta`: operator or algorithm parameter used in the model. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_heavy_ball_momentum(mu, L, alpha, beta, 2; verbose=true) +``` +""" function wc_heavy_ball_momentum(mu, L, alpha, beta, n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/unconstrained_convex_minimization/heavy_ball_momentum_qg_convex.jl b/examples/unconstrained_convex_minimization/heavy_ball_momentum_qg_convex.jl index fedf845..dd5b312 100644 --- a/examples/unconstrained_convex_minimization/heavy_ball_momentum_qg_convex.jl +++ b/examples/unconstrained_convex_minimization/heavy_ball_momentum_qg_convex.jl @@ -1,5 +1,93 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_heavy_ball_momentum_qg_convex(L, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_heavy_ball_momentum_qg_convex`. + +Consider the convex minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is quadratically upper bounded ($\text{QG}^+$ [2]), i.e. +$\forall x, f(x) - f_\star \leqslant \frac{L}{2} \|x-x_\star\|^2$, and convex. + +# Performance metric + +This code computes a worst-case guarantee for the **Heavy-ball (HB)** method, aka **Polyak momentum** method. +That is, it computes the smallest possible $\tau(n, L)$ such that the guarantee + +```math +f(x_n) - f_\star \leqslant \tau(n, L) \|x_0 - x_\star\|^2 +``` + +is valid, where $x_n$ is the output of the **Heavy-ball (HB)** method, +and where $x_\star$ is the minimizer of $f$. +In short, for given values of $n$ and $L$, +$\tau(n, L)$ is computed as the worst-case value of +$f(x_n)-f_\star$ when $\|x_0 - x_\star\|^2 \leqslant 1$. + +# Algorithm + + +This method is described in [1] + +```math +x_{t+1} = x_t - \alpha_t \nabla f(x_t) + \beta_t (x_t-x_{t-1}) +``` + + with + +```math +\alpha_t = \frac{1}{L} \frac{1}{t+2} +``` + + and + +```math +\beta_t = \frac{t}{t+2} +``` + +# Theoretical guarantee + + +The **tight** guarantee obtained in [2, Theorem 2.3] (lower) and [2, Theorem 2.4] (upper) is + +```math +f(x_n) - f_\star \leqslant \frac{L}{2}\frac{1}{n+1} \|x_0 - x_\star\|^2. +``` + +# References + +This methods was first introduce in [1, section 3], +and convergence **tight** bound was proven in [2, Theorem 2.3] (lower) and [2, Theorem 2.4] (upper). + +[[1] E. Ghadimi, H. R. Feyzmahdavian, M. Johansson (2015). +Global convergence of the Heavy-ball method for convex optimization. +European Control Conference (ECC).](https://arxiv.org/pdf/1412.7457.pdf) + +[[2] B. Goujaud, A. Taylor, A. Dieuleveut (2022). +Optimal first-order methods for convex functions with a quadratic upper bound.](https://arxiv.org/pdf/2205.15033.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_heavy_ball_momentum_qg_convex(1, 5; verbose=true) +``` +""" function wc_heavy_ball_momentum_qg_convex(L, n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/unconstrained_convex_minimization/inexact_accelerated_gradient.jl b/examples/unconstrained_convex_minimization/inexact_accelerated_gradient.jl index b3684a0..748e639 100644 --- a/examples/unconstrained_convex_minimization/inexact_accelerated_gradient.jl +++ b/examples/unconstrained_convex_minimization/inexact_accelerated_gradient.jl @@ -1,5 +1,84 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_inexact_accelerated_gradient(L, epsilon, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_inexact_accelerated_gradient`. + +Consider the minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is $L$-smooth and convex. + +# Performance metric + +This code computes a worst-case guarantee for an **accelerated gradient method** using **inexact first-order +information**. That is, it computes the smallest possible $\tau(n, L, \varepsilon)$ such that the guarantee + +```math +f(x_n) - f_\star \leqslant \tau(n, L, \varepsilon) \|x_0 - x_\star\|^2 +``` + +is valid, where $x_n$ is the output of **inexact accelerated gradient descent** and where $x_\star$ +is a minimizer of $f$. + +The inexact descent direction is assumed to satisfy a relative inaccuracy described by +(with $0\leqslant \varepsilon \leqslant 1$) + +```math +\|\nabla f(y_t) - d_t\| \leqslant \varepsilon \|\nabla f(y_t)\|, +``` + +where $\nabla f(y_t)$ is the true gradient at $y_t$ and $d_t$ is +the approximate descent direction that is used. + +# Algorithm + +The inexact accelerated gradient method of this example is provided by + +```math + \begin{aligned} + x_{t+1} & = & y_t - \frac{1}{L} d_t\\ + y_{k+1} & = & x_{t+1} + \frac{t-1}{t+2} (x_{t+1} - x_t). + \end{aligned} +``` +# Theoretical guarantee + +When $\varepsilon=0$, a **tight** empirical guarantee can be found in [1, Table 1]: + +```math +f(x_n)-f_\star \leqslant \frac{2L\|x_0-x_\star\|^2}{n^2 + 5 n + 6}, +``` + +which is achieved on some Huber loss functions (when $\varepsilon=0$). + +# References + + +[[1] A. Taylor, J. Hendrickx, F. Glineur (2017). Exact worst-case performance of first-order methods for composite +convex optimization. SIAM Journal on Optimization, 27(3):1283-1313.](https://arxiv.org/pdf/1512.07516.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `epsilon`: level of inaccuracy +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_inexact_accelerated_gradient(1.0, 0.1, 5; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_inexact_accelerated_gradient(L, epsilon, n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/unconstrained_convex_minimization/inexact_gradient_descent.jl b/examples/unconstrained_convex_minimization/inexact_gradient_descent.jl index ec2bd8c..77fa132 100644 --- a/examples/unconstrained_convex_minimization/inexact_gradient_descent.jl +++ b/examples/unconstrained_convex_minimization/inexact_gradient_descent.jl @@ -1,5 +1,96 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_inexact_gradient_descent(L, mu, epsilon, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_inexact_gradient_descent`. + +Consider the convex minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is $L$-smooth and $\mu$-strongly convex. + +# Performance metric + +This code computes a worst-case guarantee for the **inexact gradient** method. +That is, it computes the smallest possible $\tau(n, L, \mu, \varepsilon)$ such that the guarantee + +```math +f(x_n) - f_\star \leqslant \tau(n, L, \mu, \varepsilon) (f(x_0) - f_\star) +``` + +is valid, where $x_n$ is the output of the **inexact gradient** method, +and where $x_\star$ is the minimizer of $f$. +In short, for given values of $n$, $L$, $\mu$ and $\varepsilon$, +$\tau(n, L, \mu, \varepsilon)$ is computed as the worst-case value of +$f(x_n)-f_\star$ when $f(x_0) - f_\star \leqslant 1$. + +# Algorithm + + +```math +x_{t+1} = x_t - \gamma d_t +``` + + with + +```math +\|d_t - \nabla f(x_t)\| \leqslant \varepsilon \|\nabla f(x_t)\| +``` + + and + +```math +\gamma = \frac{2}{L_{\varepsilon} + \mu_{\varepsilon}} +``` + + where $L_{\varepsilon} = (1 + \varepsilon) L$ and $\mu_{\varepsilon} = (1 - \varepsilon) \mu$. + +# Theoretical guarantee + + +The **tight** worst-case guarantee obtained in [1, Theorem 5.3] or [2, Remark 1.6] is + +```math +f(x_n) - f_\star \leqslant \left(\frac{L_{\varepsilon}-\mu_{\varepsilon}}{L_{\varepsilon}+\mu_{\varepsilon}}\right)^{2n}(f(x_0) - f_\star), +``` + +where tightness is achieved on simple quadratic functions. + +# References +The detailed analyses can be found in [1, 2]. + +[[1] E. De Klerk, F. Glineur, A. Taylor (2020). +Worst-case convergence analysis of inexact gradient +and Newton methods through semidefinite programming performance estimation. +SIAM Journal on Optimization, 30(3), 2053-2082.](https://arxiv.org/pdf/1709.05191.pdf) + +[[2] O. Gannot (2021). +A frequency-domain analysis of inexact gradient methods. +Mathematical Programming.](https://arxiv.org/pdf/1912.13494.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `epsilon`: level of inaccuracy. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_inexact_gradient_descent(1.0, 0.1, 0.1, 2; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_inexact_gradient_descent(L, mu, epsilon, n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/unconstrained_convex_minimization/inexact_gradient_exact_line_search.jl b/examples/unconstrained_convex_minimization/inexact_gradient_exact_line_search.jl index 261137b..cee75c5 100644 --- a/examples/unconstrained_convex_minimization/inexact_gradient_exact_line_search.jl +++ b/examples/unconstrained_convex_minimization/inexact_gradient_exact_line_search.jl @@ -1,6 +1,87 @@ using PEPit using OrderedCollections +@doc raw""" + wc_inexact_gradient_exact_line_search(L, mu, epsilon, n; verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_inexact_gradient_exact_line_search`. + +Consider the convex minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is $L$-smooth and $\mu$-strongly convex. + +# Performance metric + +This code computes a worst-case guarantee for an **inexact gradient method with exact linesearch (ELS)**. +That is, it computes the smallest possible $\tau(n, L, \mu, \varepsilon)$ such that the guarantee + +```math +f(x_n) - f_\star \leqslant \tau(n, L, \mu, \varepsilon) ( f(x_0) - f_\star ) +``` + +is valid, where $x_n$ is the output of the **gradient descent with an inexact descent direction +and an exact linesearch**, and where $x_\star$ is the minimizer of $f$. + +The inexact descent direction $d$ is assumed to satisfy a relative inaccuracy described by +(with $0 \leqslant \varepsilon < 1$) + +```math +\|\nabla f(x_t) - d_t\| \leqslant \varepsilon \|\nabla f(x_t)\|, +``` + +where $\nabla f(x_t)$ is the true gradient, and $d_t$ +is the approximate descent direction that is used. + +# Algorithm + + +For $t \in \{0, \dots, n-1\}$, + +```math + \begin{aligned} + \gamma_t & = & \arg\min_{\gamma \in R^d} f(x_t- \gamma d_t), \\ + x_{t+1} & = & x_t - \gamma_t d_t. + \end{aligned} +``` +# Theoretical guarantee + +The **tight** guarantee obtained in [1, Theorem 5.1] is + +```math +f(x_n) - f_\star\leqslant \left(\frac{L_{\varepsilon} - \mu_{\varepsilon}}{L_{\varepsilon} + \mu_{\varepsilon}}\right)^{2n}( f(x_0) - f_\star ), +``` + +with $L_{\varepsilon} = (1 + \varepsilon) L$ and $\mu_{\varepsilon} = (1 - \varepsilon) \mu$. +Tightness is achieved on simple quadratic functions. + +# References +The detailed approach (based on convex relaxations) is available in [1], + +[[1] E. De Klerk, F. Glineur, A. Taylor (2017). On the worst-case complexity of the gradient method with exact +line search for smooth strongly convex functions. Optimization Letters, 11(7), 1185-1199.](https://link.springer.com/content/pdf/10.1007/s11590-016-1087-4.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `epsilon`: level of inaccuracy. +- `n`: number of iterations. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_inexact_gradient_exact_line_search(1.0, 0.1, 0.1, 2; verbose=true) +``` +""" function wc_inexact_gradient_exact_line_search(L, mu, epsilon, n; verbose=true) problem = PEP() diff --git a/examples/unconstrained_convex_minimization/information_theoretic_exact_method.jl b/examples/unconstrained_convex_minimization/information_theoretic_exact_method.jl index c4c0963..5493a22 100644 --- a/examples/unconstrained_convex_minimization/information_theoretic_exact_method.jl +++ b/examples/unconstrained_convex_minimization/information_theoretic_exact_method.jl @@ -1,5 +1,90 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_information_theoretic(mu, L, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_information_theoretic`. + +Consider the convex minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is $L$-smooth and $\mu$-strongly convex ($\mu$ is possibly 0). + +# Performance metric + +This code computes a worst-case guarantee for the **information theoretic exact method** (ITEM). +That is, it computes the smallest possible $\tau(n, L, \mu)$ such that the guarantee + +```math +\|z_n - x_\star\|^2 \leqslant \tau(n, L, \mu) \|z_0 - x_\star\|^2 +``` + +is valid, where $z_n$ is the output of the ITEM, +and where $x_\star$ is the minimizer of $f$. +In short, for given values of $n$, $L$ and $\mu$, +$\tau(n, L, \mu)$ is computed as the worst-case value of +$\|z_n - x_\star\|^2$ when $\|z_0 - x_\star\|^2 \leqslant 1$. + +# Algorithm + +For $t\in\{0,1,\ldots,n-1\}$, the information theoretic exact method of this example is provided by + +```math + \begin{aligned} + y_{t} & = & (1-\beta_t) z_t + \beta_t x_t \\ + x_{t+1} & = & y_t - \frac{1}{L} \nabla f(y_t) \\ + z_{t+1} & = & \left(1-q\delta_t\right) z_t+q\delta_t y_t-\frac{\delta_t}{L}\nabla f(y_t), + \end{aligned} +``` +with $y_{-1}=x_0=z_0$, $q=\frac{\mu}{L}$ (inverse condition ratio), and the scalar sequences: + +```math + \begin{aligned} + A_{t+1} & = & \frac{(1+q)A_t+2\left(1+\sqrt{(1+A_t)(1+qA_t)}\right)}{(1-q)^2},\\ + \beta_{t+1} & = & \frac{A_t}{(1-q)A_{t+1}},\\ + \delta_{t+1} & = & \frac{1}{2}\frac{(1-q)^2A_{t+1}-(1+q)A_t}{1+q+q A_t}, + \end{aligned} +``` +with $A_0=0$. + +# Theoretical guarantee + +A tight worst-case guarantee can be found in [1, Theorem 3]: + +```math +\|z_n - x_\star\|^2 \leqslant \frac{1}{1+q A_n} \|z_0-x_\star\|^2, +``` + +where tightness is obtained on some quadratic loss functions (see [1, Lemma 2]). + +# References + + +[[1] A. Taylor, Y. Drori (2022). +An optimal gradient method for smooth strongly convex minimization. +Mathematical Programming.](https://arxiv.org/pdf/2101.09741.pdf) + +# Arguments +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_information_theoretic(0.001, 1.0, 15; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_information_theoretic(mu, L, n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/unconstrained_convex_minimization/optimized_gradient.jl b/examples/unconstrained_convex_minimization/optimized_gradient.jl index 62ac94a..770a62b 100644 --- a/examples/unconstrained_convex_minimization/optimized_gradient.jl +++ b/examples/unconstrained_convex_minimization/optimized_gradient.jl @@ -1,5 +1,94 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_optimized_gradient(L, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_optimized_gradient`. + +Consider the minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is $L$-smooth and convex. + +# Performance metric + +This code computes a worst-case guarantee for **optimized gradient method** (OGM). That is, it computes +the smallest possible $\tau(n, L)$ such that the guarantee + +```math +f(x_n) - f_\star \leqslant \tau(n, L) \|x_0 - x_\star\|^2 +``` + +is valid, where $x_n$ is the output of OGM and where $x_\star$ is a minimizer of $f$. + +In short, for given values of $n$ and $L$, $\tau(n, L)$ is computed as the worst-case value +of $f(x_n)-f_\star$ when $\|x_0 - x_\star\|^2 \leqslant 1$. + +# Algorithm + +The optimized gradient method is described by + +```math + \begin{aligned} + x_{t+1} & = & y_t - \frac{1}{L} \nabla f(y_t)\\ + y_{t+1} & = & x_{t+1} + \frac{\theta_{t}-1}{\theta_{t+1}}(x_{t+1}-x_t)+\frac{\theta_{t}}{\theta_{t+1}}(x_{t+1}-y_t), + \end{aligned} +``` +with + +```math + \begin{aligned} + \theta_0 & = & 1 \\ + \theta_t & = & \frac{1 + \sqrt{4 \theta_{t-1}^2 + 1}}{2}, \forall t \in [|1, n-1|] \\ + \theta_n & = & \frac{1 + \sqrt{8 \theta_{n-1}^2 + 1}}{2}. + \end{aligned} +``` +# Theoretical guarantee + +The **tight** theoretical guarantee can be found in [2, Theorem 2]: + +```math +f(x_n)-f_\star \leqslant \frac{L\|x_0-x_\star\|^2}{2\theta_n^2}, +``` + +where tightness follows from [3, Theorem 3]. + +# References + +The optimized gradient method was developed in [1, 2]; the corresponding lower bound was first obtained in [3]. + +[[1] Y. Drori, M. Teboulle (2014). +Performance of first-order methods for smooth convex minimization: a novel approach. +Mathematical Programming 145(1-2), 451-482.](https://arxiv.org/pdf/1206.3209.pdf) + +[[2] D. Kim, J. Fessler (2016). +Optimized first-order methods for smooth convex minimization. +Mathematical Programming 159.1-2: 81-107.](https://arxiv.org/pdf/1406.5468.pdf) + +[[3] Y. Drori (2017). +The exact information-based complexity of smooth convex minimization. +Journal of Complexity, 39, 1-16.](https://arxiv.org/pdf/1606.01424.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_optimized_gradient(3.0, 4; verbose=true) +``` +""" function wc_optimized_gradient(L, n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/unconstrained_convex_minimization/optimized_gradient_for_gradient.jl b/examples/unconstrained_convex_minimization/optimized_gradient_for_gradient.jl index d825e6e..149c3da 100644 --- a/examples/unconstrained_convex_minimization/optimized_gradient_for_gradient.jl +++ b/examples/unconstrained_convex_minimization/optimized_gradient_for_gradient.jl @@ -1,5 +1,86 @@ using PEPit, OrderedCollections, Clarabel, OffsetArrays +@doc raw""" + wc_optimized_gradient_for_gradient(L, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_optimized_gradient_for_gradient`. + +Consider the minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is $L$-smooth and convex. + +# Performance metric + +This code computes a worst-case guarantee for **optimized gradient method for gradient** (OGM-G). +That is, it computes the smallest possible $\tau(n, L)$ such that the guarantee + +```math +\|\nabla f(x_n)\|^2 \leqslant \tau(n, L) (f(x_0) - f_\star) +``` + +is valid, where $x_n$ is the output of OGM-G and where $x_\star$ is a minimizer of $f$. + +In short, for given values of $n$ and $L$, $\tau(n, L)$ is computed as the worst-case value +of $\|\nabla f(x_n)\|^2$ when $f(x_0)-f_\star \leqslant 1$. + +# Algorithm + +For $t\in\{0,1,\ldots,n-1\}$, the optimized gradient method for gradient [1, Section 6.3] is described by + +```math + \begin{aligned} + y_{t+1} & = & x_t - \frac{1}{L} \nabla f(x_t),\\ + x_{t+1} & = & y_{t+1} + \frac{(\tilde{\theta}_t-1)(2\tilde{\theta}_{t+1}-1)}{\tilde{\theta}_t(2\tilde{\theta}_t-1)}(y_{t+1}-y_t)+\frac{2\tilde{\theta}_{t+1}-1}{2\tilde{\theta}_t-1}(y_{t+1}-x_t), + \end{aligned} +``` +with + +```math + \begin{aligned} + \tilde{\theta}_n & = & 1 \\ + \tilde{\theta}_t & = & \frac{1 + \sqrt{4 \tilde{\theta}_{t+1}^2 + 1}}{2}, \forall t \in [|1, n-1|] \\ + \tilde{\theta}_0 & = & \frac{1 + \sqrt{8 \tilde{\theta}_{1}^2 + 1}}{2}. + \end{aligned} +``` +# Theoretical guarantee + +The **tight** worst-case guarantee can be found in [1, Theorem 6.1]: + +```math +\|\nabla f(x_n)\|^2 \leqslant \frac{2L(f(x_0)-f_\star)}{\tilde{\theta}_0^2}, +``` + +where tightness is achieved on Huber losses, see [1, Section 6.4]. + +# References + +The optimized gradient method for gradient was developed in [1]. + +[[1] D. Kim, J. Fessler (2021). +Optimizing the efficiency of first-order methods for decreasing the gradient of smooth convex functions. +Journal of optimization theory and applications, 188(1), 192-219.](https://arxiv.org/pdf/1803.06600.pdf) + +# Arguments +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_optimized_gradient_for_gradient(3.0, 4; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_optimized_gradient_for_gradient(L, n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/unconstrained_convex_minimization/proximal_point.jl b/examples/unconstrained_convex_minimization/proximal_point.jl index 0df018c..d3d3fee 100644 --- a/examples/unconstrained_convex_minimization/proximal_point.jl +++ b/examples/unconstrained_convex_minimization/proximal_point.jl @@ -1,5 +1,80 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_proximal_point(gamma, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_proximal_point`. + +Consider the minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is closed, proper, and convex (and potentially non-smooth). + +# Performance metric + +This code computes a worst-case guarantee for the **proximal point method** with step-size $\gamma$. +That is, it computes the smallest possible $\tau(n,\gamma)$ such that the guarantee + +```math +f(x_n) - f_\star \leqslant \tau(n, \gamma) \|x_0 - x_\star\|^2 +``` + +is valid, where $x_n$ is the output of the proximal point method, and where $x_\star$ is a +minimizer of $f$. + +In short, for given values of $n$ and $\gamma$, +$\tau(n,\gamma)$ is computed as the worst-case value of $f(x_n)-f_\star$ +when $\|x_0 - x_\star\|^2 \leqslant 1$. + +# Algorithm + + +The proximal point method is described by + +```math +x_{t+1} = \arg\min_x \left\{f(x)+\frac{1}{2\gamma}\|x-x_t\|^2 \right\}, +``` + +where $\gamma$ is a step-size. + +# Theoretical guarantee + + +The **tight** theoretical guarantee can be found in [1, Theorem 4.1]: + +```math +f(x_n)-f_\star \leqslant \frac{\|x_0-x_\star\|^2}{4\gamma n}, +``` + +where tightness is obtained on, e.g., one-dimensional linear problems on the positive orthant. + +# References + + +[[1] A. Taylor, J. Hendrickx, F. Glineur (2017). +Exact worst-case performance of first-order methods for composite convex optimization. +SIAM Journal on Optimization, 27(3):1283-1313.](https://arxiv.org/pdf/1512.07516.pdf) + +# Arguments +- `gamma`: step-size parameter. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_proximal_point(3, 4; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_proximal_point(gamma, n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/unconstrained_convex_minimization/robust_momentum.jl b/examples/unconstrained_convex_minimization/robust_momentum.jl index 1f01ab1..3d6cd45 100644 --- a/examples/unconstrained_convex_minimization/robust_momentum.jl +++ b/examples/unconstrained_convex_minimization/robust_momentum.jl @@ -1,5 +1,88 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_robust_momentum(mu, L, lam; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_robust_momentum`. + +Consider the convex minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is $L$-smooth and $\mu$-strongly-convex. + +# Performance metric + +This code computes a worst-case guarantee for the **robust momentum method** (RMM). +That is, it computes the smallest possible $\tau(n, \mu, L, \lambda)$ such that the guarantee + +```math +v(x_{n+1}) \leqslant \tau(n, \mu, L, \lambda) v(x_{n}), +``` + +is valid, where $x_n$ is the $n^{\mathrm{th}}$ iterate of the RMM, and $x_\star$ is a minimizer +of $f$. The function $v(.)$ is a well-chosen Lyapunov defined as follows, + +```math + \begin{aligned} + v(x_t) & = & l\|z_t - x_\star\|^2 + q_t, \\ + q_t & = & (L - \mu) \left(f(x_t) - f_\star - \frac{\mu}{2}\|y_t - x_\star\|^2 - \frac{1}{2}\|\nabla f(y_t) - \mu (y_t - x_\star)\|^2 \right), + \end{aligned} +``` +with $\kappa = \frac{\mu}{L}$, $\rho = \lambda (1 - \frac{1}{\kappa}) + (1 - \lambda) \left(1 - \frac{1}{\sqrt{\kappa}}\right)$, and $l = \mu^2 \frac{\kappa - \kappa \rho^2 - 1}{2 \rho (1 - \rho)}$. + +# Algorithm + + +For $t \in \{0, \dots, n-1\}$, + +```math + \begin{aligned} + x_{t+1} & = & x_{t} + \beta (x_t - x_{t-1}) - \alpha \nabla f(y_t), \\ + y_{t+1} & = & y_{t} + \gamma (x_t - x_{t-1}), + \end{aligned} +``` +with $x_{-1}, x_0 \in \mathrm{R}^d$, +and with parameters $\alpha = \frac{\kappa (1 - \rho^2)(1 + \rho)}{L}$, $\beta = \frac{\kappa \rho^3}{\kappa - 1}$, $\gamma = \frac{\rho^2}{(\kappa - 1)(1 - \rho)^2(1 + \rho)}$. + +# Theoretical guarantee + + +A convergence guarantee (empirically tight) is obtained in [1, Theorem 1], + +```math +v(x_{n+1}) \leqslant \rho^2 v(x_n), +``` + +with $\rho = \lambda (1 - \frac{1}{\kappa}) + (1 - \lambda) \left(1 - \frac{1}{\sqrt{\kappa}}\right)$. + +# References + + +[[1] S. Cyrus, B. Hu, B. Van Scoy, L. Lessard (2018). +A robust accelerated optimization algorithm for strongly convex functions. +American Control Conference (ACC).](https://arxiv.org/pdf/1710.04753.pdf) + +# Arguments +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `lam`: if $\lambda=1$ it is the gradient descent, if $\lambda=0$, it is the Triple Momentum Method. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_robust_momentum(0.1, 1.0, 0.2; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_robust_momentum(mu, L, lam; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/unconstrained_convex_minimization/subgradient_method.jl b/examples/unconstrained_convex_minimization/subgradient_method.jl index 5e029d7..3b2c785 100644 --- a/examples/unconstrained_convex_minimization/subgradient_method.jl +++ b/examples/unconstrained_convex_minimization/subgradient_method.jl @@ -1,5 +1,85 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_subgradient_method(M, n, gamma; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_subgradient_method`. + +Consider the minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is convex and $M$-Lipschitz. This problem is a (possibly non-smooth) minimization problem. + +# Performance metric + +This code computes a worst-case guarantee for the **subgradient method**. That is, it computes +the smallest possible $\tau(n, M, \gamma)$ such that the guarantee + +```math +\min_{0 \leqslant t \leqslant n} f(x_t) - f_\star \leqslant \tau(n, M, \gamma) +``` + +is valid, where $x_t$ are the iterates of the **subgradient method** after $t\leqslant n$ steps, +where $x_\star$ is a minimizer of $f$, and when $\|x_0-x_\star\|\leqslant 1$. + +In short, for given values of $M$, the step-size $\gamma$ and the number of iterations $n$, +$\tau(n, M, \gamma)$ is computed as the worst-case value of +$\min_{0 \leqslant t \leqslant n} f(x_t) - f_\star$ when $\|x_0-x_\star\| \leqslant 1$. + +# Algorithm + +For $t\in \{0, \dots, n-1 \}$ + +```math + \begin{aligned} + g_{t} & \in & \partial f(x_t) \\ + x_{t+1} & = & x_t - \gamma g_t + \end{aligned} +``` +# Theoretical guarantee +The **tight** bound is obtained in [1, Section 3.2.3] and [2, Eq (2)] + +```math +\min_{0 \leqslant t \leqslant n} f(x_t)- f(x_\star) \leqslant \frac{M}{\sqrt{n+1}}\|x_0-x_\star\|, +``` + +and tightness follows from the lower complexity bound for this class of problems, e.g., [3, Appendix A]. + +# References +Classical references on this topic include [1, 2]. + +[[1] Y. Nesterov (2003). +Introductory lectures on convex optimization: A basic course. +Springer Science & Business Media.](https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.693.855&rep=rep1&type=pdf) + +[[2] S. Boyd, L. Xiao, A. Mutapcic (2003). +Subgradient Methods (lecture notes).](https://web.stanford.edu/class/ee392o/subgrad_method.pdf) + +[[3] Y. Drori, M. Teboulle (2016). +An optimal variant of Kelley's cutting-plane method. +Mathematical Programming, 160(1), 321-351.](https://arxiv.org/pdf/1409.2636.pdf) + +# Arguments +- `M`: the Lipschitz parameter. +- `n`: number of iterations. +- `gamma`: step-size parameter. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_subgradient_method(M, n, gamma; verbose=true) +``` +""" function wc_subgradient_method(M, n, gamma; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/unconstrained_convex_minimization/subgradient_method_rsi_eb.jl b/examples/unconstrained_convex_minimization/subgradient_method_rsi_eb.jl index 0410b75..0396728 100644 --- a/examples/unconstrained_convex_minimization/subgradient_method_rsi_eb.jl +++ b/examples/unconstrained_convex_minimization/subgradient_method_rsi_eb.jl @@ -1,5 +1,79 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_subgradient_method_rsi_eb(mu, L, gamma, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_subgradient_method_rsi_eb`. + +Consider the convex minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ verifies the "lower" restricted secant inequality ($\mu-\text{RSI}^-$) +and the "upper" error bound ($L-\text{EB}^+$) [1]. + +# Performance metric + +This code computes a worst-case guarantee for **gradient descent** with fixed step-size $\gamma$. +That is, it computes the smallest possible $\tau(n, \mu, L, \gamma)$ such that the guarantee + +```math +\| x_n - x_\star \|^2 \leqslant \tau(n, \mu, L, \gamma) \| x_0 - x_\star \|^2 +``` + +is valid, where $x_n$ is the output of gradient descent with fixed step-size $\gamma$, and +where $x_\star$ is a minimizer of $f$. + +In short, for given values of $n$, $L$, and $\gamma$, +$\tau(n, \mu, L, \gamma)$ is computed as the worst-case value of +$\| x_n - x_\star \|^2$ when $\|x_0 - x_\star\|^2 \leqslant 1$. + +# Algorithm + +Sub-gradient descent is described by + +```math +x_{t+1} = x_t - \gamma \nabla f(x_t), +``` + +where $\gamma$ is a step-size. + +# Theoretical guarantee + +The **tight** theoretical guarantee can be found in [1, Prop 1] (upper bound) and [1, Theorem 2] (lower bound): + +```math +\| x_n - x_\star \|^2 \leqslant (1 - 2\gamma\mu + L^2 \gamma^2)^n \|x_0-x_\star\|^2. +``` + +# References + +Definition and convergence guarantees can be found in [1]. + +[[1] C. Guille-Escuret, B. Goujaud, A. Ibrahim, I. Mitliagkas (2022). +Gradient Descent Is Optimal Under Lower Restricted Secant Inequality And Upper Error Bound.](https://arxiv.org/pdf/2203.00342.pdf) + +# Arguments +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `gamma`: step-size parameter. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_subgradient_method_rsi_eb(mu, L, mu / L^2, 4; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_subgradient_method_rsi_eb(mu, L, gamma, n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/examples/unconstrained_convex_minimization/triple_momentum.jl b/examples/unconstrained_convex_minimization/triple_momentum.jl index d0bd68b..7992f49 100644 --- a/examples/unconstrained_convex_minimization/triple_momentum.jl +++ b/examples/unconstrained_convex_minimization/triple_momentum.jl @@ -1,5 +1,96 @@ using PEPit, OrderedCollections, Clarabel +@doc raw""" + wc_triple_momentum(mu, L, n; solver=Clarabel.Optimizer, verbose=true) + +# Problem statement + +Compute a PEPit worst-case guarantee for `wc_triple_momentum`. + +Consider the minimization problem + +```math +f_\star \triangleq \min_x f(x), +``` + +where $f$ is $L$-smooth and $\mu$-strongly convex. + +# Performance metric + +This code computes a worst-case guarantee for **triple momentum method** (TMM). +That is, it computes the smallest possible $\tau(n, L, \mu)$ such that the guarantee + +```math +f(x_n) - f_\star \leqslant \tau(n, L, \mu) \|x_0 - x_\star\|^2 +``` + +is valid, where $x_n$ is the output of the TMM, and where $x_\star$ is the minimizer of $f$. +In short, for given values of $n$, $L$ and $\mu$, $\tau(n, L, \mu)$ is computed +as the worst-case value of $f(x_n)-f_\star$ when $\|x_0 - x_\star\|^2 \leqslant 1$. + + +# Algorithm + + +For $t \in \{ 1, \dots, n\}$ + +```math + \begin{aligned} + \xi_{t+1} &&= (1 + \beta) \xi_{t} - \beta \xi_{t-1} - \alpha \nabla f(y_t) \\ + y_{t} &&= (1+\gamma ) \xi_{t} -\gamma \xi_{t-1} \\ + x_{t} && = (1 + \delta) \xi_{t} - \delta \xi_{t-1} + \end{aligned} +``` +with + +```math + \begin{aligned} + \kappa &&= \frac{L}{\mu} , \quad \rho = 1- \frac{1}{\sqrt{\kappa}}\\ + (\alpha, \beta, \gamma,\delta) && = \left(\frac{1+\rho}{L}, \frac{\rho^2}{2-\rho}, + \frac{\rho^2}{(1+\rho)(2-\rho)}, \frac{\rho^2}{1-\rho^2}\right) + \end{aligned} +``` +and + +```math + \begin{aligned} + \xi_{0} = x_0 \\ + \xi_{1} = x_0 \\ + y = x_0 + \end{aligned} +``` +# Theoretical guarantee + +A theoretical **upper** (empirically tight) bound can be found in [1, Theorem 1, eq. 4]: + +```math +f(x_n)-f_\star \leqslant \frac{\rho^{2(n+1)} L \kappa}{2}\|x_0 - x_\star\|^2. +``` + +# References + +The triple momentum method was discovered and analyzed in [1]. + +[[1] Van Scoy, B., Freeman, R. A., Lynch, K. M. (2018). +The fastest known globally convergent first-order method for minimizing strongly convex functions. +IEEE Control Systems Letters, 2(1), 49-54.](http://www.optimization-online.org/DB_FILE/2017/03/5908.pdf) + +# Arguments +- `mu`: strong convexity or monotonicity parameter, as used by the modeled class. +- `L`: smoothness or Lipschitz parameter, as used by the modeled class. +- `n`: number of iterations. +- `solver`: JuMP optimizer constructor used to solve the generated SDP. +- `verbose`: print example and solver progress information when true. + +# Returns +- `pepit_tau`: worst-case value +- `theoretical_tau`: theoretical value + +# Julia usage +```julia +pepit_tau, theoretical_tau = wc_triple_momentum(0.1, 1.0, 4; solver=Clarabel.Optimizer, verbose=true) +``` +""" function wc_triple_momentum(mu, L, n; solver=Clarabel.Optimizer, verbose=true) problem = PEP() diff --git a/src/PEPit.jl b/src/PEPit.jl index a822993..55018c6 100644 --- a/src/PEPit.jl +++ b/src/PEPit.jl @@ -13,6 +13,24 @@ import Base: +, -, *, /, ==, <=, >=, ^, hash, getindex abstract type AbstractPoint end abstract type AbstractExpression end abstract type AbstractConstraint end +""" + AbstractFunction + +Abstract supertype for function and operator classes that can participate in a +[`PEP`](@ref). + +Concrete subtypes represent interpolation models for classes of scalar +functions or operators. They wrap a [`PEPFunction`](@ref), expose oracle methods +such as [`gradient!`](@ref), [`value!`](@ref), [`stationary_point!`](@ref), or +[`fixed_point!`](@ref), and implement `add_class_constraints!` to add the +class-specific interpolation constraints before the SDP is solved. + +# Implementation +New function or operator classes should subtype `AbstractFunction`, store an +internal `PEPFunction`, forward oracle calls to it, and implement +`add_class_constraints!`. The concrete class is then passed to +[`declare_function!`](@ref). +""" abstract type AbstractFunction end @@ -118,13 +136,8 @@ export merge_dicts, multiply_dicts, prune_dict, - Point_counter, Expression_counter, Function_counter, - Global_Constraint_counter, NEXT_ID, PSDMatrix_counter, - get_is_leaf, - _is_already_evaluated_on_point, _separate_leaf_functions_regarding_their_need_on_point, - _get_nb_eigs_and_corrected, - eval_dual + evaluate, eval_dual end diff --git a/src/core/block_partition.jl b/src/core/block_partition.jl index 23f2dc3..42d1e16 100644 --- a/src/core/block_partition.jl +++ b/src/core/block_partition.jl @@ -1,3 +1,23 @@ +""" + BlockPartition(d) + +Represent a symbolic partition of points into `d` mutually orthogonal blocks. + +Block partitions support block-coordinate interpolation constraints. Calling +[`get_block`](@ref) on a point creates symbolic block components +`x^{(1)}, ..., x^{(d)}` whose sum is the original point. The corresponding +orthogonality constraints are generated later when the PEP is compiled. + +# Fields +- `d`: number of blocks. +- `list_of_constraints`: orthogonality constraints generated for this + partition. +- `blocks_dict`: map from original points to their symbolic block components. +- `counter`: global partition index. + +See also [`declare_block_partition!`](@ref), [`get_block`](@ref), and +[`get_nb_blocks`](@ref). +""" mutable struct BlockPartition d::Int list_of_constraints::Vector{Constraint} @@ -14,9 +34,25 @@ mutable struct BlockPartition end +""" + get_nb_blocks(bp::BlockPartition) + +Return the number of blocks in a block partition. +""" get_nb_blocks(bp::BlockPartition) = bp.d +""" + get_block(bp::BlockPartition, point::Point, block_number::Int) + +Return the `block_number`-th symbolic block of `point`, creating the block +decomposition and its orthogonality constraints if needed. + +The first `d - 1` blocks are new leaf points; the final block is defined as the +residual required to make the block sum equal to `point`. Orthogonality +constraints are registered by `add_partition_constraints!` during model +construction. +""" function get_block(bp::BlockPartition, point::Point, block_number::Int) @assert 1 <= block_number <= bp.d "block_number must be an integer in 1..$(bp.d)." if !haskey(bp.blocks_dict, point) diff --git a/src/core/constraint.jl b/src/core/constraint.jl index df1a658..1a7023e 100644 --- a/src/core/constraint.jl +++ b/src/core/constraint.jl @@ -1,3 +1,28 @@ +""" + Constraint(expression, equality_or_inequality) + +Represent a scalar equality or inequality constraint in a PEP. + +The stored `expression` is the canonical left-hand side. Inequalities are stored +in the form `expression <= 0`, while equalities are stored as `expression == 0`. +Users normally create constraints through overloaded comparisons such as +`expr <= 1`, `expr1 >= expr2`, or `expr1 == expr2`. + +# Fields +- `expression`: symbolic scalar residual. +- `equality_or_inequality`: either `"equality"` or `"inequality"`. +- `counter`: global scalar-constraint index. +- `_dual_variable_value`: dual multiplier populated after solving. +- `_value`: numerical residual populated by [`evaluate`](@ref). + +# Mathematical model +Scalar constraints encode initial conditions, performance metric epigraph +constraints, function/operator interpolation inequalities, and auxiliary +relations introduced by primitive steps. + +See also [`Expression`](@ref), [`add_constraint!`](@ref), +[`set_initial_condition!`](@ref), and [`eval_dual`](@ref). +""" mutable struct Constraint <: AbstractConstraint expression::Expression equality_or_inequality::String @@ -31,6 +56,15 @@ Base.:(==)(e1::Expression, e2::Real) = Constraint(e1 - e2, "equality") Base.:(==)(e1::Real, e2::Expression) = Constraint(Expression(e1) - e2, "equality") +""" + evaluate(c::Constraint) + +Return the numerical residual of `c.expression` after the PEP has been solved. + +For inequalities, feasibility corresponds to a nonpositive residual because the +canonical form is `expression <= 0`. For equalities, feasibility corresponds to +a residual close to zero. +""" function evaluate(c::Constraint) if isnothing(c._value) try @@ -44,4 +78,14 @@ function evaluate(c::Constraint) end +""" + eval_dual(obj) + +Return the dual multiplier associated with a scalar or PSD constraint after +[`solve!`](@ref) or [`solve_dual!`](@ref) has populated dual values. + +For a scalar [`Constraint`](@ref), the return value is a number. For a +[`PSDMatrix`](@ref), the return value is the corresponding symmetric dual +matrix. +""" eval_dual(c::Constraint) = isnothing(c._dual_variable_value) ? error("PEP must be solved") : c._dual_variable_value diff --git a/src/core/expression.jl b/src/core/expression.jl index ab5fb98..ee26b44 100644 --- a/src/core/expression.jl +++ b/src/core/expression.jl @@ -1,3 +1,31 @@ +""" + Expression(; is_leaf=true, decomposition_dict=nothing) + +Represent a symbolic scalar expression in a PEP model. + +An `Expression` is an affine combination of three kinds of scalar atoms: +independent leaf function values, inner products of leaf [`Point`](@ref) +objects, and constants. These expressions are used for function values, +initial conditions, performance metrics, interpolation inequalities, and PSD +matrix entries. + +# Fields +- `_id`: unique object identifier used for hashing and equality. +- `_is_leaf`: whether the expression is an independent scalar function value. +- `decomposition_dict`: coefficients keyed by leaf expressions, point pairs, or + the constant key `1`. +- `counter`: scalar-function-value index for leaf expressions, or `nothing`. +- `_value`: numerical scalar recovered after the PEP is solved. + +# Mathematical model +Products of points create expression atoms: if `x` and `g` are points, `x * g` +represents `\\langle x, g \\rangle`. Scalar algebra on expressions builds affine +combinations, and comparisons such as `expr <= 0` create [`Constraint`](@ref) +objects. + +See also [`Point`](@ref), [`Constraint`](@ref), [`value!`](@ref), and +[`evaluate`](@ref). +""" mutable struct Expression <: AbstractExpression _id::Int _is_leaf::Bool diff --git a/src/core/function.jl b/src/core/function.jl index 0d87dea..61e69b6 100644 --- a/src/core/function.jl +++ b/src/core/function.jl @@ -1,3 +1,35 @@ +""" + PEPFunction(; is_leaf=true, decomposition_dict=nothing, reuse_gradient=false) + +Represent the internal symbolic function object shared by all concrete classes. + +`PEPFunction` stores the data needed to model a leaf function, a leaf operator, +or an affine combination of leaf objects. Concrete classes such as +[`SmoothConvexFunction`](@ref) and [`MonotoneOperator`](@ref) wrap a +`PEPFunction` and define their interpolation constraints by inspecting its +registered oracle evaluations. + +# Fields +- `_id`: unique object identifier used for hashing and equality. +- `_is_leaf`: whether this object is a leaf function/operator. +- `decomposition_dict`: affine decomposition over leaf `PEPFunction` objects. +- `reuse_gradient`: whether repeated oracle calls at the same point reuse the + same gradient/subgradient. +- `list_of_points`: registered triples `(x, g, f)` representing oracle calls. +- `list_of_stationary_points`: registered triples with zero gradient. +- `list_of_constraints`: scalar constraints attached to this function object. +- `list_of_psd`: user-added PSD constraints attached to this object. +- `list_of_class_psd`: class-generated PSD constraints. +- `counter`: leaf function index, or `nothing` for affine combinations. + +# Mathematical model +For a registered triple `(x, g, f)`, the symbolic objects represent +`g \\in \\partial f(x)` and the scalar value `f(x)` for the modeled class. During +`solve!`, class-specific interpolation constraints relate all such triples. + +See also [`declare_function!`](@ref), [`oracle!`](@ref), [`gradient!`](@ref), +[`value!`](@ref), and [`add_constraint!`](@ref). +""" mutable struct PEPFunction <: AbstractFunction _id::Int _is_leaf::Bool @@ -152,6 +184,25 @@ function add_point!(func::PEPFunction, triplet::Tuple{Point,Point,Expression}) end +""" + oracle!(func, point) + +Evaluate the symbolic oracle of `func` at `point`, returning a pair +`(gradient, value)` and registering the evaluation for interpolation +constraints. + +If `func.reuse_gradient` is true and the point has already been evaluated, the +previously registered pair is returned. Otherwise, a new symbolic subgradient is +created while reusing the previous function value when available. + +For affine combinations of leaf functions, the oracle either aggregates known +leaf oracle values or creates the missing symbolic objects needed to make the +aggregate gradient and value consistent. + +# Returns +`(g, f)`, where `g` is a [`Point`](@ref) representing a gradient/subgradient and +`f` is an [`Expression`](@ref) representing the function value. +""" function oracle!(func::PEPFunction, point::Point) evaluation = _is_already_evaluated_on_point(func, point) if evaluation !== nothing && func.reuse_gradient @@ -188,17 +239,50 @@ function oracle!(func::PEPFunction, point::Point) end +""" + value!(func, point) + +Return the symbolic function value of `func` at `point`, creating an oracle +evaluation if one has not already been registered. + +This is the Julia analogue of evaluating a PEPit function symbolically. It does +not compute a numerical value before the PEP is solved; it creates or retrieves +the scalar variable that will later be constrained by the class interpolation +conditions. +""" value!(func::PEPFunction, point::Point) = ( _is_already_evaluated_on_point(func, point) !== nothing ? _is_already_evaluated_on_point(func, point)[2] : oracle!(func, point)[2] ) +""" + gradient!(func, point) + +Return the symbolic gradient or subgradient of `func` at `point`, creating an +oracle evaluation if needed. + +For differentiable classes the returned point is interpreted as the gradient. +For nonsmooth classes it is an arbitrary selected subgradient. The +`reuse_gradient` flag controls whether repeated calls at the same point must +return the same symbolic subgradient. +""" gradient!(func::PEPFunction, point::Point) = oracle!(func, point)[1] subgradient!(func::PEPFunction, point::Point) = gradient!(func, point) +""" + stationary_point!(func; return_gradient_and_function_value=false) + +Create a symbolic stationary point of `func`. When +`return_gradient_and_function_value=true`, also return the zero gradient and +associated function value. + +The registered oracle triple is `(xs, 0, fs)`, so class interpolation +constraints can use `xs` as an optimizer or zero point when the class supports +such an interpretation. +""" function stationary_point!(func::PEPFunction; return_gradient_and_function_value=false) point, g, f = @@ -214,6 +298,17 @@ function stationary_point!(func::PEPFunction; return_gradient_and_function_value end +""" + fixed_point!(func) + +Create a symbolic fixed point for an operator-like object and return the point, +its image, and the associated scalar value. + +The registered triple is `(x, x, f)`, which encodes an operator evaluation whose +output equals its input. This is used by fixed-point and monotone-operator +examples where the oracle value is an operator image rather than a gradient of a +scalar objective. +""" function fixed_point!(func::PEPFunction) x = Point() fx = Expression() diff --git a/src/core/pep.jl b/src/core/pep.jl index 3676de2..beb9520 100644 --- a/src/core/pep.jl +++ b/src/core/pep.jl @@ -1,3 +1,28 @@ +""" + PEP() + +Create an empty performance estimation problem and reset the global symbolic +registries used to index points, expressions, functions, constraints, and PSD +blocks. + +A `PEP` stores the symbolic description of a worst-case analysis: function or +operator classes, initial points, initial conditions, performance metrics, and +optional PSD constraints. Calling [`solve!`](@ref) converts this symbolic +description into a semidefinite program in JuMP. + +# Fields +- `list_of_functions`: function/operator classes declared in the problem. +- `list_of_points`: initial leaf points registered by [`set_initial_point!`](@ref). +- `list_of_conditions`: scalar initial or normalization constraints. +- `list_of_performance_metrics`: expressions whose minimum is maximized. +- `list_of_psd`: global PSD constraints. +- `residual`: dual residual matrix associated with the Gram PSD constraint. + +# Mathematical model +PEPit computes the smallest valid worst-case constant by maximizing a +performance expression over all symbolic iterates and oracle values satisfying +the selected interpolation constraints and initial conditions. +""" mutable struct PEP list_of_functions::Vector{AbstractFunction} list_of_points::Vector{Point} @@ -24,6 +49,26 @@ mutable struct PEP end +""" + declare_function!(pep, func_class, param; reuse_gradient=nothing) + +Declare a leaf function or operator class in `pep` and return the created +object. `param` is typically an `OrderedDict` containing class parameters such +as `"L"`, `"mu"`, or `"beta"`. + +# Arguments +- `pep::PEP`: problem to which the class is added. +- `func_class`: concrete subtype of [`AbstractFunction`](@ref). +- `param`: parameter dictionary consumed by `func_class`. +- `reuse_gradient`: optional override for repeated oracle evaluations at the + same point. + +# Examples +```julia +problem = PEP() +f = declare_function!(problem, SmoothConvexFunction, OrderedDict("L" => 1.0)) +``` +""" function declare_function!(pep::PEP, func_class, param; reuse_gradient=nothing) f = reuse_gradient === nothing ? func_class(param; is_leaf=true) : @@ -32,6 +77,15 @@ function declare_function!(pep::PEP, func_class, param; reuse_gradient=nothing) return f end +""" + declare_block_partition!(pep, d) + +Create a [`BlockPartition`](@ref) with `d` blocks and register it for inclusion +in the PEP model. + +The returned partition is global to the current `PEP` construction context. +Block orthogonality constraints are materialized during model construction. +""" function declare_block_partition!(pep::PEP, d::Int) @@ -39,15 +93,61 @@ function declare_block_partition!(pep::PEP, d::Int) end +""" + add_constraint!(target, constraint) + +Add a scalar [`Constraint`](@ref) to a PEP, function class, operator class, or +block partition, depending on `target`. + +For a `PEP`, the constraint is treated as a global initial/general condition. +For a function-like object, it is treated as an additional class-specific +constraint and is included with that object's interpolation constraints. +""" add_constraint!(pep::PEP, constraint::Constraint) = push!(pep.list_of_conditions, constraint) +""" + set_initial_point!(pep) + +Create and register a new leaf [`Point`](@ref) as an initial point of `pep`. + +Initial points are independent vectors in the Gram matrix. They are typically +used in initial conditions such as `(x0 - xs)^2 <= R^2`. +""" set_initial_point!(pep::PEP) = (x = Point(); push!(pep.list_of_points, x); x) +""" + set_initial_condition!(pep, condition) + +Add an initial condition or normalization constraint to `pep`. + +Initial conditions define the admissible set of problem instances, for example +`\\|x_0 - x_\\star\\|^2 \\leq 1` or a bound on an initial function gap. +""" set_initial_condition!(pep::PEP, condition::Constraint) = add_constraint!(pep, condition) +""" + set_performance_metric!(pep, expression) + +Register a scalar performance metric. `solve!` maximizes the minimum over all +registered metrics. + +The expression is usually the quantity whose worst-case value is sought, such +as `f(x_n) - f_\\star`, `\\|x_n - x_\\star\\|^2`, or a residual norm. Multiple +metrics model the pointwise minimum of several quantities. +""" set_performance_metric!(pep::PEP, expression::Expression) = push!(pep.list_of_performance_metrics, expression) +""" + add_psd_matrix!(target, matrix_of_expressions) + +Add a positive-semidefinite matrix constraint to a PEP or function-like object. +Entries may be [`Expression`](@ref) objects or real constants. + +Global PSD constraints are attached to the problem. Function-local PSD +constraints are attached to the internal [`PEPFunction`](@ref) and compiled with +that class's interpolation constraints. +""" function add_psd_matrix!(pep::PEP, matrix_of_expressions) push!(pep.list_of_psd, PSDMatrix(matrix_of_expressions)) return pep.list_of_psd[end] @@ -182,6 +282,25 @@ struct _PEPModelBuild class_constraints::Vector{Constraint} end +""" + DualPEPCertificate + +Store the solution of the explicit conic dual generated by [`solve_dual!`](@ref), +including scalar multipliers, PSD multipliers, model handles, and solver +statuses. + +# Fields +- `dual_value`: objective value of the explicit dual model. +- `α`: multipliers for performance-metric epigraph constraints. +- `λ`: multipliers for inequality initial/general conditions. +- `ν`: multipliers for equality initial/general conditions. +- `θ`: multipliers for scalar interpolation constraints. +- `S`: dual residual matrix for the main Gram PSD constraint. +- `Y`: PSD multipliers for global and class-generated PSD blocks. +- `primal_model`, `dual_model`: JuMP model handles. +- `mappings`: symbolic objects associated with the multiplier arrays. +- `termination_status`, `primal_status`, `dual_status`: solver statuses. +""" struct DualPEPCertificate dual_value::Float64 α::Vector{Float64} @@ -415,6 +534,22 @@ function _apply_psd_duals_from_dual_model!(dual_model::Model, packs) return Y_values end +""" + solve_dual!(pep; solver=Clarabel.Optimizer, verbose=true) + +Build the primal SDP for `pep`, dualize it with `Dualization.jl`, solve the +explicit dual model, and return a [`DualPEPCertificate`](@ref). + +This routine is useful when the dual multipliers themselves are part of the +output, for example when reconstructing a proof certificate for a worst-case +bound. It also writes scalar and PSD dual values back to the symbolic +constraints so that [`eval_dual`](@ref) can be used afterwards. + +# Arguments +- `pep::PEP`: symbolic performance estimation problem. +- `solver`: JuMP optimizer constructor used for the dual model. +- `verbose`: print model-building and solver progress when true. +""" function solve_dual!(pep::PEP; solver=Clarabel.Optimizer, verbose::Bool=true) @@ -523,6 +658,39 @@ function _logdet_dimension_reduction!(model::JuMP.Model, G, objective, wc_value: end +""" + solve!(pep; solver=Clarabel.Optimizer, verbose=true, tracetrick=false, + logdetiters=0, eig_regularization=1e-3, + tol_dimension_reduction=1e-5, return_full_model=false) + +Build and solve the primal SDP associated with `pep`. Return the worst-case +value unless `return_full_model=true`, in which case solver variables, +constraints, and residual data are returned as a named tuple. + +The constructed SDP uses a Gram matrix for all leaf [`Point`](@ref) objects and +one scalar variable for each leaf [`Expression`](@ref). Initial conditions, +performance metrics, interpolation constraints, and PSD blocks are translated +into JuMP constraints before the model is optimized. + +# Arguments +- `pep::PEP`: symbolic problem to solve. +- `solver`: JuMP optimizer constructor, for example `Clarabel.Optimizer`. +- `verbose`: print model-building and solver progress when true. +- `tracetrick`: run a trace-minimization heuristic after the first solve. +- `logdetiters`: number of log-det heuristic iterations for dimension + reduction. +- `eig_regularization`: regularization used in the log-det heuristic. +- `tol_dimension_reduction`: allowed objective degradation during dimension + reduction. +- `return_full_model`: return JuMP model data instead of only the worst-case + value. + +# Returns +The worst-case value as a `Float64`, or a named tuple with model internals when +`return_full_model=true`. + +See also [`solve_dual!`](@ref), [`evaluate`](@ref), and [`eval_dual`](@ref). +""" function solve!(pep::PEP; solver=Clarabel.Optimizer, verbose::Bool=true, diff --git a/src/core/point.jl b/src/core/point.jl index 82b4efe..8765214 100644 --- a/src/core/point.jl +++ b/src/core/point.jl @@ -1,3 +1,38 @@ +""" + Point(; is_leaf=true, decomposition_dict=nothing) + +Represent an element of the ambient Hilbert space used by a performance +estimation problem. + +`Point` objects encode both iterates and gradients/subgradients. A leaf point +is an independent symbolic vector and receives its own row/column in the Gram +matrix. A non-leaf point stores a linear combination of leaf points through +`decomposition_dict`; no new Gram coordinate is introduced. + +# Fields +- `_id`: unique object identifier used for hashing and equality. +- `_is_leaf`: whether the point is an independent symbolic vector. +- `decomposition_dict`: coefficients of the point in the leaf-point basis. +- `counter`: Gram-matrix index for leaf points, or `nothing` for non-leaves. +- `_value`: numerical vector recovered from the solved Gram matrix. + +# Mathematical model +If `p` and `q` are `Point`s, then `p + q`, `p - q`, and scalar multiples are +again symbolic points. The product `p * q` creates an [`Expression`](@ref) +representing the inner product `\\langle p, q \\rangle`, and `p^2` represents +`\\|p\\|^2`. + +# Examples +```julia +x0 = Point() +x1 = Point() +direction = (x1 - x0) / 2 +squared_norm = direction^2 +``` + +See also [`Expression`](@ref), [`PEP`](@ref), [`set_initial_point!`](@ref), and +[`evaluate`](@ref). +""" mutable struct Point <: AbstractPoint _id::Int _is_leaf::Bool @@ -31,6 +66,15 @@ Base.:(==)(p1::Point, p2::Point) = p1._id == p2._id Base.isequal(p1::Point, p2::Point) = p1._id == p2._id +""" + get_is_leaf(obj) + +Return whether `obj` is a leaf symbolic object with its own SDP variable index. + +For points, leaf objects are the independent vectors that generate the Gram +matrix. For expressions, leaf objects are independent scalar function values. +Non-leaf objects are affine or linear combinations of existing leaves. +""" get_is_leaf(p::Point) = p._is_leaf @@ -61,6 +105,17 @@ get_is_leaf(p::Point) = p._is_leaf const null_point = Point(is_leaf=false, decomposition_dict=OrderedDict{Point,Float64}()) +""" + evaluate(obj) + +Return the numerical value assigned to a symbolic object after the PEP has been +solved. For a [`Point`](@ref), the value is recovered from a Gram matrix +factorization. + +For a non-leaf point, the value is reconstructed from its leaf decomposition. +Calling this before [`solve!`](@ref) or [`solve_dual!`](@ref) has populated the +numerical realization raises an error. +""" function evaluate(p::Point) isnothing(p._value) || return p._value if get_is_leaf(p) diff --git a/src/core/psd_matrix.jl b/src/core/psd_matrix.jl index 9989835..5e56c39 100644 --- a/src/core/psd_matrix.jl +++ b/src/core/psd_matrix.jl @@ -1,3 +1,24 @@ +""" + PSDMatrix(matrix_of_expressions) + +Represent a positive-semidefinite matrix constraint with symbolic entries. + +A `PSDMatrix` stores a square matrix whose entries are [`Expression`](@ref) +objects or real constants. It is used for interpolation conditions and +additional matrix inequalities that are more naturally expressed as PSD blocks +than as scalar inequalities. + +# Fields +- `matrix_of_expressions`: square matrix of symbolic scalar entries. +- `shape`: matrix dimensions. +- `_value`: numerical matrix value after solving. +- `_dual_variable_value`: PSD dual matrix after solving. +- `entries_dual_variable_value`: duals associated with entry-linking + equalities in the JuMP model. +- `counter`: global PSD-constraint index. + +See also [`add_psd_matrix!`](@ref), [`evaluate`](@ref), and [`eval_dual`](@ref). +""" mutable struct PSDMatrix matrix_of_expressions::Matrix{Expression} shape::Tuple{Int,Int} @@ -74,6 +95,15 @@ end Base.getindex(psd::PSDMatrix, i::Int, j::Int) = psd.matrix_of_expressions[i, j] +""" + evaluate(psd::PSDMatrix) + +Return the numerical matrix value of a PSD constraint after the PEP has been +solved. + +Each symbolic entry is evaluated independently. Calling this before the PEP has +been solved raises an error. +""" function evaluate(psd::PSDMatrix) if psd._value === nothing try diff --git a/src/functions/block_smooth_convex_function_cheap.jl b/src/functions/block_smooth_convex_function_cheap.jl index aa9dc03..52bb45e 100644 --- a/src/functions/block_smooth_convex_function_cheap.jl +++ b/src/functions/block_smooth_convex_function_cheap.jl @@ -1,3 +1,38 @@ +@doc raw""" + BlockSmoothConvexFunctionCheap(param; ) + +Represent the `BlockSmoothConvexFunctionCheap` interpolation class in PEPit.jl. + +Implement necessary constraints for interpolation of the class of smooth convex functions by blocks. + +# Warning + + Functions that are smooth by blocks and convex generally do not enjoy known interpolation conditions. + The conditions implemented in this class are necessary but a priori not sufficient for interpolation. + Hence, the numerical results obtained when using this class might be non-tight upper bounds. + +# Class parameters +- `partition`: partitioning of the variables (in blocks). +- `L`: smoothness parameters (one per block). + +Smooth convex functions by blocks are characterized by a list of parameters $L_i$ (one per block), +hence can be instantiated as + +# Julia usage +```julia +problem = PEP() +param = OrderedDict("L" => 1.0) # adapt keys to the class +f = declare_function!(problem, BlockSmoothConvexFunctionCheap, param) +``` + +# Fields +- `partition`: class parameter or auxiliary state stored as `BlockPartition`. +- `L`: class parameter or auxiliary state stored as `Vector{Float64}`. +- `_PEPit_func`: internal [`PEPFunction`](@ref) storing oracle calls and constraints. + +# Implementation +The constructor receives parameters through an `OrderedDict`; `add_class_constraints!` adds the interpolation model when [`solve!`](@ref) builds the SDP. +""" mutable struct BlockSmoothConvexFunctionCheap <: AbstractFunction partition::BlockPartition L::Vector{Float64} diff --git a/src/functions/block_smooth_convex_function_expensive.jl b/src/functions/block_smooth_convex_function_expensive.jl index dd934e0..e2198ff 100644 --- a/src/functions/block_smooth_convex_function_expensive.jl +++ b/src/functions/block_smooth_convex_function_expensive.jl @@ -1,3 +1,40 @@ +@doc raw""" + BlockSmoothConvexFunctionExpensive(param; ) + +Represent the `BlockSmoothConvexFunctionExpensive` interpolation class in PEPit.jl. + +The `RefinedBlockSmoothConvexFunctionExpensive` class overwrites the `add_class_constraints` method +of [`PEPFunction`](@ref), by implementing necessary constraints for interpolation of the class of +smooth convex functions by blocks. The implemented constraint is that of [2, Section 3.1]. + +# Warning + + Functions that are smooth by blocks and convex generally do not enjoy known interpolation conditions. + The conditions implemented in this class are necessary but a priori not sufficient for interpolation. + Hence, the numerical results obtained when using this class might be non-tight upper bounds. + +# Class parameters +- `partition`: partitioning of the variables (in blocks). +- `L`: smoothness parameters (one per block). + +Smooth convex functions by blocks are characterized by a list of parameters $L_i$ (one per block), +hence can be instantiated as + +# Julia usage +```julia +problem = PEP() +param = OrderedDict("L" => 1.0) # adapt keys to the class +f = declare_function!(problem, BlockSmoothConvexFunctionExpensive, param) +``` + +# Fields +- `partition`: class parameter or auxiliary state stored as `BlockPartition`. +- `L`: class parameter or auxiliary state stored as `Vector{Float64}`. +- `_PEPit_func`: internal [`PEPFunction`](@ref) storing oracle calls and constraints. + +# Implementation +The constructor receives parameters through an `OrderedDict`; `add_class_constraints!` adds the interpolation model when [`solve!`](@ref) builds the SDP. +""" mutable struct BlockSmoothConvexFunctionExpensive <: AbstractFunction partition::BlockPartition L::Vector{Float64} diff --git a/src/functions/convex_function.jl b/src/functions/convex_function.jl index 99cd9a5..4a4246b 100644 --- a/src/functions/convex_function.jl +++ b/src/functions/convex_function.jl @@ -1,3 +1,26 @@ +@doc raw""" + ConvexFunction(param; ) + +Represent the `ConvexFunction` interpolation class in PEPit.jl. + +Implement the interpolation constraints of the class of convex, closed and proper (CCP) functions (i.e., convex +functions whose epigraphs are non-empty closed sets). + +General CCP functions are not characterized by any parameter, hence can be instantiated as + +# Julia usage +```julia +problem = PEP() +param = OrderedDict("L" => 1.0) # adapt keys to the class +f = declare_function!(problem, ConvexFunction, param) +``` + +# Fields +- `_PEPit_func`: internal [`PEPFunction`](@ref) storing oracle calls and constraints. + +# Implementation +The constructor receives parameters through an `OrderedDict`; `add_class_constraints!` adds the interpolation model when [`solve!`](@ref) builds the SDP. +""" mutable struct ConvexFunction <: AbstractFunction _PEPit_func::PEPFunction diff --git a/src/functions/convex_indicator.jl b/src/functions/convex_indicator.jl index bb6b04e..a14c5de 100644 --- a/src/functions/convex_indicator.jl +++ b/src/functions/convex_indicator.jl @@ -1,5 +1,35 @@ using OrderedCollections +@doc raw""" + ConvexIndicatorFunction(param; ) + +Represent the `ConvexIndicatorFunction` interpolation class in PEPit.jl. + +Implement interpolation constraints for the class of closed convex indicator functions. + +# Class parameters +- `D`: upper bound on the diameter of the feasible set, possibly set to `Inf` +- `R`: upper bound on the radius of the feasible set, possibly set to `Inf` +- `center`: Center of the feasible set spanned by the radius constraint. If set to `nothing`, there exists such a point but it remains undefined. + +Convex indicator functions are characterized by a parameter `D` and/or `R`, hence can be instantiated as + +# Julia usage +```julia +problem = PEP() +param = OrderedDict("L" => 1.0) # adapt keys to the class +f = declare_function!(problem, ConvexIndicatorFunction, param) +``` + +# Fields +- `D`: class parameter or auxiliary state stored as `Float64`. +- `R`: class parameter or auxiliary state stored as `Float64`. +- `center`: class parameter or auxiliary state stored as `Union{Point,Nothing}`. +- `_PEPit_func`: internal [`PEPFunction`](@ref) storing oracle calls and constraints. + +# Implementation +The constructor receives parameters through an `OrderedDict`; `add_class_constraints!` adds the interpolation model when [`solve!`](@ref) builds the SDP. +""" mutable struct ConvexIndicatorFunction <: AbstractFunction D::Float64 R::Float64 diff --git a/src/functions/convex_lipschitz_function.jl b/src/functions/convex_lipschitz_function.jl index fd8407c..e080e03 100644 --- a/src/functions/convex_lipschitz_function.jl +++ b/src/functions/convex_lipschitz_function.jl @@ -1,3 +1,30 @@ +@doc raw""" + ConvexLipschitzFunction(param; ) + +Represent the `ConvexLipschitzFunction` interpolation class in PEPit.jl. + +Implement the interpolation constraints of the class of convex closed proper (CCP) +Lipschitz continuous functions. + +# Class parameters +- `M`: Lipschitz parameter + +CCP Lipschitz continuous functions are characterized by a parameter `M`, hence can be instantiated as + +# Julia usage +```julia +problem = PEP() +param = OrderedDict("L" => 1.0) # adapt keys to the class +f = declare_function!(problem, ConvexLipschitzFunction, param) +``` + +# Fields +- `M`: class parameter or auxiliary state stored as `Float64`. +- `_PEPit_func`: internal [`PEPFunction`](@ref) storing oracle calls and constraints. + +# Implementation +The constructor receives parameters through an `OrderedDict`; `add_class_constraints!` adds the interpolation model when [`solve!`](@ref) builds the SDP. +""" mutable struct ConvexLipschitzFunction <: AbstractFunction M::Float64 _PEPit_func::PEPFunction diff --git a/src/functions/convex_qg_function.jl b/src/functions/convex_qg_function.jl index 6b894da..6031aa2 100644 --- a/src/functions/convex_qg_function.jl +++ b/src/functions/convex_qg_function.jl @@ -1,3 +1,31 @@ +@doc raw""" + ConvexQGFunction(param; reuse_gradient=false) + +Represent the `ConvexQGFunction` interpolation class in PEPit.jl. + +Implement the interpolation constraints of the class of quadratically upper bounded ($\text{QG}^+$ [1]), +i.e. $\forall x, f(x) - f_\star \leqslant \frac{L}{2} \|x-x_\star\|^2$, and convex functions. + +# Class parameters +- `L`: The quadratic upper bound parameter + +General quadratically upper bounded ($\text{QG}^+$) convex functions are characterized +by the quadratic growth parameter `L`, hence can be instantiated as + +# Julia usage +```julia +problem = PEP() +param = OrderedDict("L" => 1.0) # adapt keys to the class +f = declare_function!(problem, ConvexQGFunction, param) +``` + +# Fields +- `L`: class parameter or auxiliary state stored as `Float64`. +- `_PEPit_func`: internal [`PEPFunction`](@ref) storing oracle calls and constraints. + +# Implementation +The constructor receives parameters through an `OrderedDict`; `add_class_constraints!` adds the interpolation model when [`solve!`](@ref) builds the SDP. +""" mutable struct ConvexQGFunction <: AbstractFunction L::Float64 _PEPit_func::PEPFunction diff --git a/src/functions/convex_support_function.jl b/src/functions/convex_support_function.jl index 65247be..67f7b68 100644 --- a/src/functions/convex_support_function.jl +++ b/src/functions/convex_support_function.jl @@ -1,3 +1,29 @@ +@doc raw""" + ConvexSupportFunction(param; reuse_gradient=false) + +Represent the `ConvexSupportFunction` interpolation class in PEPit.jl. + +Implement interpolation constraints for the class of closed convex support functions. + +# Class parameters +- `M`: upper bound on the Lipschitz constant + +Convex support functions are characterized by a parameter `M`, hence can be instantiated as + +# Julia usage +```julia +problem = PEP() +param = OrderedDict("L" => 1.0) # adapt keys to the class +f = declare_function!(problem, ConvexSupportFunction, param) +``` + +# Fields +- `M`: class parameter or auxiliary state stored as `Float64`. +- `_PEPit_func`: internal [`PEPFunction`](@ref) storing oracle calls and constraints. + +# Implementation +The constructor receives parameters through an `OrderedDict`; `add_class_constraints!` adds the interpolation model when [`solve!`](@ref) builds the SDP. +""" mutable struct ConvexSupportFunction <: AbstractFunction M::Float64 _PEPit_func::PEPFunction diff --git a/src/functions/rsi_eb_function.jl b/src/functions/rsi_eb_function.jl index 30525eb..413d685 100644 --- a/src/functions/rsi_eb_function.jl +++ b/src/functions/rsi_eb_function.jl @@ -1,3 +1,32 @@ +@doc raw""" + RsiEbFunction(param; reuse_gradient=false) + +Represent the `RsiEbFunction` interpolation class in PEPit.jl. + +Implement the interpolation constraints of the class of functions verifying +the "lower" restricted secant inequality ($\text{RSI}^-$) and the "upper" error bound ($\text{EB}^+$). + +# Class parameters +- `mu`: Restricted sequent inequality parameter +- `L`: Error bound parameter +$\text{RSI}^-$ and $\text{EB}^+$ functions are characterized by parameters $\mu$ and `L`, +hence can be instantiated as + +# Julia usage +```julia +problem = PEP() +param = OrderedDict("L" => 1.0) # adapt keys to the class +f = declare_function!(problem, RsiEbFunction, param) +``` + +# Fields +- `mu`: class parameter or auxiliary state stored as `Float64`. +- `L`: class parameter or auxiliary state stored as `Float64`. +- `_PEPit_func`: internal [`PEPFunction`](@ref) storing oracle calls and constraints. + +# Implementation +The constructor receives parameters through an `OrderedDict`; `add_class_constraints!` adds the interpolation model when [`solve!`](@ref) builds the SDP. +""" mutable struct RsiEbFunction <: AbstractFunction mu::Float64 L::Float64 diff --git a/src/functions/smooth_convex_function.jl b/src/functions/smooth_convex_function.jl index e52cd45..4479718 100644 --- a/src/functions/smooth_convex_function.jl +++ b/src/functions/smooth_convex_function.jl @@ -1,3 +1,29 @@ +@doc raw""" + SmoothConvexFunction(param; reuse_gradient=true) + +Represent the `SmoothConvexFunction` interpolation class in PEPit.jl. + +Implement interpolation constraints of the class of smooth convex functions. + +# Class parameters +- `L`: smoothness parameter + +Smooth convex functions are characterized by the smoothness parameter `L`, hence can be instantiated as + +# Julia usage +```julia +problem = PEP() +param = OrderedDict("L" => 1.0) # adapt keys to the class +f = declare_function!(problem, SmoothConvexFunction, param) +``` + +# Fields +- `L`: class parameter or auxiliary state stored as `Float64`. +- `_PEPit_func`: internal [`PEPFunction`](@ref) storing oracle calls and constraints. + +# Implementation +The constructor receives parameters through an `OrderedDict`; `add_class_constraints!` adds the interpolation model when [`solve!`](@ref) builds the SDP. +""" mutable struct SmoothConvexFunction <: AbstractFunction L::Float64 _PEPit_func::PEPFunction diff --git a/src/functions/smooth_convex_lipschitz_function.jl b/src/functions/smooth_convex_lipschitz_function.jl index b09080f..80953e1 100644 --- a/src/functions/smooth_convex_lipschitz_function.jl +++ b/src/functions/smooth_convex_lipschitz_function.jl @@ -1,3 +1,32 @@ +@doc raw""" + SmoothConvexLipschitzFunction(param; reuse_gradient=true) + +Represent the `SmoothConvexLipschitzFunction` interpolation class in PEPit.jl. + +Implement interpolation constraints of the class of smooth convex Lipschitz continuous functions. + +# Class parameters +- `L`: smoothness parameter +- `M`: Lipschitz continuity parameter + +Smooth convex Lipschitz continuous functions are characterized by the smoothness parameters `L` +and Lipschitz continuity parameter `M`, hence can be instantiated as + +# Julia usage +```julia +problem = PEP() +param = OrderedDict("L" => 1.0) # adapt keys to the class +f = declare_function!(problem, SmoothConvexLipschitzFunction, param) +``` + +# Fields +- `L`: class parameter or auxiliary state stored as `Float64`. +- `M`: class parameter or auxiliary state stored as `Float64`. +- `_PEPit_func`: internal [`PEPFunction`](@ref) storing oracle calls and constraints. + +# Implementation +The constructor receives parameters through an `OrderedDict`; `add_class_constraints!` adds the interpolation model when [`solve!`](@ref) builds the SDP. +""" mutable struct SmoothConvexLipschitzFunction <: AbstractFunction L::Float64 M::Float64 diff --git a/src/functions/smooth_function.jl b/src/functions/smooth_function.jl index 99ef4b0..e3a6d98 100644 --- a/src/functions/smooth_function.jl +++ b/src/functions/smooth_function.jl @@ -1,3 +1,29 @@ +@doc raw""" + SmoothFunction(param; reuse_gradient=true) + +Represent the `SmoothFunction` interpolation class in PEPit.jl. + +Implement the interpolation constraints of the class of smooth (not necessarily convex) functions. + +# Class parameters +- `L`: smoothness parameter + +Smooth functions are characterized by the smoothness parameter `L`, hence can be instantiated as + +# Julia usage +```julia +problem = PEP() +param = OrderedDict("L" => 1.0) # adapt keys to the class +f = declare_function!(problem, SmoothFunction, param) +``` + +# Fields +- `L`: class parameter or auxiliary state stored as `Float64`. +- `_PEPit_func`: internal [`PEPFunction`](@ref) storing oracle calls and constraints. + +# Implementation +The constructor receives parameters through an `OrderedDict`; `add_class_constraints!` adds the interpolation model when [`solve!`](@ref) builds the SDP. +""" mutable struct SmoothFunction <: AbstractFunction L::Float64 _PEPit_func::PEPFunction diff --git a/src/functions/smooth_quadratic_lojasiewicz_function_cheap.jl b/src/functions/smooth_quadratic_lojasiewicz_function_cheap.jl index d8a4610..77bad84 100644 --- a/src/functions/smooth_quadratic_lojasiewicz_function_cheap.jl +++ b/src/functions/smooth_quadratic_lojasiewicz_function_cheap.jl @@ -1,3 +1,43 @@ +@doc raw""" + SmoothQuadraticLojasiewiczFunctionCheap(param; reuse_gradient=true) + +Represent the `SmoothQuadraticLojasiewiczFunctionCheap` interpolation class in PEPit.jl. + +Implement some constraints (which are not necessary and sufficient for interpolation) +for the class of smooth (not necessarily convex) functions that also satisfy a quadratic Lojasiewicz inequality +(sometimes also referred to as a Polyak-Lojasiewicz inequality). Extensive descriptions of such classes of +functions can be found in [1, 2]. + +The conditions implemented here are presented in [4, Proposition 3.2] (for alpha to be chosen) +and [4, Proposition 3.4] with smoothness conditions from [3]. + +# Warning + + Smooth functions satisfying a Lojasiewicz property do not enjoy known interpolation conditions. + The conditions implemented in this class are necessary but a priori not sufficient for interpolation. + Hence, the numerical results obtained when using this class might be non-tight upper bounds. + +# Class parameters +- `L`: smoothness parameter +- `mu`: quadratic Lojasiewicz parameter +- `alpha`: relaxation parameter (in [0,2*mu/(2*L+mu)]) + +# Julia usage +```julia +problem = PEP() +param = OrderedDict("L" => 1.0) # adapt keys to the class +f = declare_function!(problem, SmoothQuadraticLojasiewiczFunctionCheap, param) +``` + +# Fields +- `mu`: class parameter or auxiliary state stored as `Float64`. +- `L`: class parameter or auxiliary state stored as `Float64`. +- `alpha`: class parameter or auxiliary state stored as `Union{Float64,Nothing}`. +- `_PEPit_func`: internal [`PEPFunction`](@ref) storing oracle calls and constraints. + +# Implementation +The constructor receives parameters through an `OrderedDict`; `add_class_constraints!` adds the interpolation model when [`solve!`](@ref) builds the SDP. +""" mutable struct SmoothQuadraticLojasiewiczFunctionCheap <: AbstractFunction mu::Float64 L::Float64 diff --git a/src/functions/smooth_quadratic_lojasiewicz_function_expensive.jl b/src/functions/smooth_quadratic_lojasiewicz_function_expensive.jl index 68873b3..b8e75b2 100644 --- a/src/functions/smooth_quadratic_lojasiewicz_function_expensive.jl +++ b/src/functions/smooth_quadratic_lojasiewicz_function_expensive.jl @@ -1,3 +1,40 @@ +@doc raw""" + SmoothQuadraticLojasiewiczFunctionExpensive(param; reuse_gradient=true) + +Represent the `SmoothQuadraticLojasiewiczFunctionExpensive` interpolation class in PEPit.jl. + +Implement some constraints (which are not necessary and sufficient for interpolation) +for the class of smooth (not necessarily convex) functions that also satisfy a quadratic Lojasiewicz inequality +(sometimes also referred to as a Polyak-Lojasiewicz inequality). Extensive descriptions of such classes of +functions can be found in [1, 2]. + +The conditions implemented here are presented in [3, Proposition 3.4]. + +# Warning + + Smooth functions satisfying a Lojasiewicz property do not enjoy known interpolation conditions. + The conditions implemented in this class are necessary but a priori not sufficient for interpolation. + Hence, the numerical results obtained when using this class might be non-tight upper bounds. + +# Class parameters +- `L`: smoothness parameter +- `mu`: quadratic Lojasiewicz parameter + +# Julia usage +```julia +problem = PEP() +param = OrderedDict("L" => 1.0) # adapt keys to the class +f = declare_function!(problem, SmoothQuadraticLojasiewiczFunctionExpensive, param) +``` + +# Fields +- `mu`: class parameter or auxiliary state stored as `Float64`. +- `L`: class parameter or auxiliary state stored as `Float64`. +- `_PEPit_func`: internal [`PEPFunction`](@ref) storing oracle calls and constraints. + +# Implementation +The constructor receives parameters through an `OrderedDict`; `add_class_constraints!` adds the interpolation model when [`solve!`](@ref) builds the SDP. +""" mutable struct SmoothQuadraticLojasiewiczFunctionExpensive <: AbstractFunction mu::Float64 L::Float64 diff --git a/src/functions/smooth_strongly_convex_function.jl b/src/functions/smooth_strongly_convex_function.jl index f90f10a..19ddc98 100644 --- a/src/functions/smooth_strongly_convex_function.jl +++ b/src/functions/smooth_strongly_convex_function.jl @@ -1,3 +1,32 @@ +@doc raw""" + SmoothStronglyConvexFunction(param; reuse_gradient=true) + +Represent the `SmoothStronglyConvexFunction` interpolation class in PEPit.jl. + +Implement interpolation constraints of the class of smooth strongly convex functions. + +# Class parameters +- `mu`: strong convexity parameter +- `L`: smoothness parameter + +Smooth strongly convex functions are characterized by parameters $\mu$ and $L$, +hence can be instantiated as + +# Julia usage +```julia +problem = PEP() +param = OrderedDict("L" => 1.0) # adapt keys to the class +f = declare_function!(problem, SmoothStronglyConvexFunction, param) +``` + +# Fields +- `mu`: class parameter or auxiliary state stored as `Float64`. +- `L`: class parameter or auxiliary state stored as `Float64`. +- `_PEPit_func`: internal [`PEPFunction`](@ref) storing oracle calls and constraints. + +# Implementation +The constructor receives parameters through an `OrderedDict`; `add_class_constraints!` adds the interpolation model when [`solve!`](@ref) builds the SDP. +""" mutable struct SmoothStronglyConvexFunction <: AbstractFunction mu::Float64 L::Float64 diff --git a/src/functions/smooth_strongly_convex_quadratic_function.jl b/src/functions/smooth_strongly_convex_quadratic_function.jl index 2a23838..45a3a71 100644 --- a/src/functions/smooth_strongly_convex_quadratic_function.jl +++ b/src/functions/smooth_strongly_convex_quadratic_function.jl @@ -1,3 +1,32 @@ +@doc raw""" + SmoothStronglyConvexQuadraticFunction(param; reuse_gradient=true) + +Represent the `SmoothStronglyConvexQuadraticFunction` interpolation class in PEPit.jl. + +Implement interpolation constraints of the class of smooth strongly convex quadratic functions. + +# Class parameters +- `mu`: strong convexity parameter +- `L`: smoothness parameter + +Smooth strongly convex quadratic functions are characterized by parameters $\mu$ and `L`, +hence can be instantiated as + +# Julia usage +```julia +problem = PEP() +param = OrderedDict("L" => 1.0) # adapt keys to the class +f = declare_function!(problem, SmoothStronglyConvexQuadraticFunction, param) +``` + +# Fields +- `mu`: class parameter or auxiliary state stored as `Float64`. +- `L`: class parameter or auxiliary state stored as `Float64`. +- `_PEPit_func`: internal [`PEPFunction`](@ref) storing oracle calls and constraints. + +# Implementation +The constructor receives parameters through an `OrderedDict`; `add_class_constraints!` adds the interpolation model when [`solve!`](@ref) builds the SDP. +""" mutable struct SmoothStronglyConvexQuadraticFunction <: AbstractFunction mu::Float64 L::Float64 diff --git a/src/functions/strongly_convex_function.jl b/src/functions/strongly_convex_function.jl index cac50d7..f0841bc 100644 --- a/src/functions/strongly_convex_function.jl +++ b/src/functions/strongly_convex_function.jl @@ -1,3 +1,31 @@ +@doc raw""" + StronglyConvexFunction(param; reuse_gradient=false) + +Represent the `StronglyConvexFunction` interpolation class in PEPit.jl. + +Implement the interpolation constraints of the class of strongly convex closed proper functions (strongly convex +functions whose epigraphs are non-empty closed sets). + +# Class parameters +- `mu`: strong convexity parameter + +Strongly convex functions are characterized by the strong convexity parameter $\mu$, +hence can be instantiated as + +# Julia usage +```julia +problem = PEP() +param = OrderedDict("L" => 1.0) # adapt keys to the class +f = declare_function!(problem, StronglyConvexFunction, param) +``` + +# Fields +- `mu`: class parameter or auxiliary state stored as `Float64`. +- `_PEPit_func`: internal [`PEPFunction`](@ref) storing oracle calls and constraints. + +# Implementation +The constructor receives parameters through an `OrderedDict`; `add_class_constraints!` adds the interpolation model when [`solve!`](@ref) builds the SDP. +""" mutable struct StronglyConvexFunction <: AbstractFunction mu::Float64 _PEPit_func::PEPFunction diff --git a/src/operators/cocoercive.jl b/src/operators/cocoercive.jl index e0f189c..793eab0 100644 --- a/src/operators/cocoercive.jl +++ b/src/operators/cocoercive.jl @@ -1,3 +1,33 @@ +@doc raw""" + CocoerciveOperator(param; reuse_gradient=true) + +Represent the `CocoerciveOperator` interpolation class in PEPit.jl. + +Implement the interpolation constraints of the class of cocoercive (and maximally monotone) operators. + +# Note + + Operator values can be requested through `gradient`, and `function values` should not be used. + +# Class parameters +- `beta`: cocoercivity parameter + +Cocoercive operators are characterized by the parameter $\beta$, hence can be instantiated as + +# Julia usage +```julia +problem = PEP() +param = OrderedDict("L" => 1.0) # adapt keys to the class +f = declare_function!(problem, CocoerciveOperator, param) +``` + +# Fields +- `beta`: class parameter or auxiliary state stored as `Float64`. +- `_PEPit_func`: internal [`PEPFunction`](@ref) storing oracle calls and constraints. + +# Implementation +The constructor receives parameters through an `OrderedDict`; `add_class_constraints!` adds the interpolation model when [`solve!`](@ref) builds the SDP. +""" mutable struct CocoerciveOperator <: AbstractFunction beta::Float64 _PEPit_func::PEPFunction diff --git a/src/operators/cocoercive_strongly_monotone_cheap.jl b/src/operators/cocoercive_strongly_monotone_cheap.jl index 1ccc06a..5a28180 100644 --- a/src/operators/cocoercive_strongly_monotone_cheap.jl +++ b/src/operators/cocoercive_strongly_monotone_cheap.jl @@ -1,3 +1,37 @@ +@doc raw""" + CocoerciveStronglyMonotoneOperatorCheap(param; reuse_gradient=true) + +Represent the `CocoerciveStronglyMonotoneOperatorCheap` interpolation class in PEPit.jl. + +Implement some necessary constraints verified by the class of cocoercive +and strongly monotone (maximally) operators. + +# Note + + Operator values can be requested through `gradient`, and `function values` should not be used. + +# Class parameters +- `mu`: strong monotonicity parameter +- `beta`: cocoercivity parameter + +Cocoercive operators are characterized by the parameters $\mu$ and $\beta$, +hence can be instantiated as + +# Julia usage +```julia +problem = PEP() +param = OrderedDict("L" => 1.0) # adapt keys to the class +f = declare_function!(problem, CocoerciveStronglyMonotoneOperatorCheap, param) +``` + +# Fields +- `mu`: class parameter or auxiliary state stored as `Float64`. +- `beta`: class parameter or auxiliary state stored as `Float64`. +- `_PEPit_func`: internal [`PEPFunction`](@ref) storing oracle calls and constraints. + +# Implementation +The constructor receives parameters through an `OrderedDict`; `add_class_constraints!` adds the interpolation model when [`solve!`](@ref) builds the SDP. +""" mutable struct CocoerciveStronglyMonotoneOperatorCheap <: AbstractFunction mu::Float64 beta::Float64 diff --git a/src/operators/cocoercive_strongly_monotone_expensive.jl b/src/operators/cocoercive_strongly_monotone_expensive.jl index c851507..4f8d294 100644 --- a/src/operators/cocoercive_strongly_monotone_expensive.jl +++ b/src/operators/cocoercive_strongly_monotone_expensive.jl @@ -1,3 +1,38 @@ +@doc raw""" + CocoerciveStronglyMonotoneOperatorExpensive(param; reuse_gradient=true) + +Represent the `CocoerciveStronglyMonotoneOperatorExpensive` interpolation class in PEPit.jl. + +Implement some necessary constraints verified by the class of cocoercive +and strongly monotone (maximally) operators. Those conditions are presented in [1, Appendix F] and are +stronger than those used in [2]. + +# Note + + Operator values can be requested through `gradient`, and `function values` should not be used. + +# Class parameters +- `mu`: strong monotonicity parameter +- `beta`: cocoercivity parameter + +Cocoercive operators are characterized by the parameters $\mu$ and $\beta$, +hence can be instantiated as + +# Julia usage +```julia +problem = PEP() +param = OrderedDict("L" => 1.0) # adapt keys to the class +f = declare_function!(problem, CocoerciveStronglyMonotoneOperatorExpensive, param) +``` + +# Fields +- `mu`: class parameter or auxiliary state stored as `Float64`. +- `beta`: class parameter or auxiliary state stored as `Float64`. +- `_PEPit_func`: internal [`PEPFunction`](@ref) storing oracle calls and constraints. + +# Implementation +The constructor receives parameters through an `OrderedDict`; `add_class_constraints!` adds the interpolation model when [`solve!`](@ref) builds the SDP. +""" mutable struct CocoerciveStronglyMonotoneOperatorExpensive <: AbstractFunction mu::Float64 beta::Float64 diff --git a/src/operators/linear.jl b/src/operators/linear.jl index 65799c8..4dababb 100644 --- a/src/operators/linear.jl +++ b/src/operators/linear.jl @@ -1,3 +1,35 @@ +@doc raw""" + LinearOperator(param; reuse_gradient=true) + +Represent the `LinearOperator` interpolation class in PEPit.jl. + +Implement the interpolation constraints of the class of linear operators. + +# Note + + Operator values can be requested through `gradient`, and `function values` should not be used. + +# Class parameters +- `L`: singular values upper bound +- `T`: the adjunct linear operator + +Linear operators are characterized by the parameter $L$, hence can be instantiated as + +# Julia usage +```julia +problem = PEP() +param = OrderedDict("L" => 1.0) # adapt keys to the class +f = declare_function!(problem, LinearOperator, param) +``` + +# Fields +- `L`: class parameter or auxiliary state stored as `Float64`. +- `_PEPit_func`: internal [`PEPFunction`](@ref) storing oracle calls and constraints. +- `T`: class parameter or auxiliary state stored as `PEPFunction`. + +# Implementation +The constructor receives parameters through an `OrderedDict`; `add_class_constraints!` adds the interpolation model when [`solve!`](@ref) builds the SDP. +""" mutable struct LinearOperator <: AbstractFunction L::Float64 _PEPit_func::PEPFunction diff --git a/src/operators/lipschitz.jl b/src/operators/lipschitz.jl index a8e3839..5042fdd 100644 --- a/src/operators/lipschitz.jl +++ b/src/operators/lipschitz.jl @@ -1,3 +1,33 @@ +@doc raw""" + LipschitzOperator(param; reuse_gradient=true) + +Represent the `LipschitzOperator` interpolation class in PEPit.jl. + +Implement the interpolation constraints of the class of Lipschitz continuous operators. + +# Note + + Operator values can be requested through `gradient`, and `function values` should not be used. + +# Class parameters +- `L`: Lipschitz parameter + +Cocoercive operators are characterized by the parameter $L$, hence can be instantiated as + +# Julia usage +```julia +problem = PEP() +param = OrderedDict("L" => 1.0) # adapt keys to the class +f = declare_function!(problem, LipschitzOperator, param) +``` + +# Fields +- `L`: class parameter or auxiliary state stored as `Float64`. +- `_PEPit_func`: internal [`PEPFunction`](@ref) storing oracle calls and constraints. + +# Implementation +The constructor receives parameters through an `OrderedDict`; `add_class_constraints!` adds the interpolation model when [`solve!`](@ref) builds the SDP. +""" mutable struct LipschitzOperator <: AbstractFunction L::Float64 _PEPit_func::PEPFunction diff --git a/src/operators/lipschitz_strongly_monotone_cheap.jl b/src/operators/lipschitz_strongly_monotone_cheap.jl index ea0c9a3..7cf4374 100644 --- a/src/operators/lipschitz_strongly_monotone_cheap.jl +++ b/src/operators/lipschitz_strongly_monotone_cheap.jl @@ -1,3 +1,43 @@ +@doc raw""" + LipschitzStronglyMonotoneOperatorCheap(param; reuse_gradient=true) + +Represent the `LipschitzStronglyMonotoneOperatorCheap` interpolation class in PEPit.jl. + +Implement some constraints (which are not necessary and sufficient for interpolation) +for the class of Lipschitz continuous strongly monotone (and maximally monotone) operators. + +# Warning + + Lipschitz strongly monotone operators do not enjoy known interpolation conditions. The conditions implemented + in this class are necessary but a priori not sufficient for interpolation. Hence, the numerical results + obtained when using this class might be non-tight upper bounds (see Discussions in [1, Section 2]). + +# Class parameters +- `mu`: strong monotonicity parameter +- `L`: Lipschitz parameter + +Lipschitz continuous strongly monotone operators are characterized by parameters $\mu$ and `L`, +hence can be instantiated as + +# Note + + Operator values can be requested through `gradient`, and `function values` should not be used. + +# Julia usage +```julia +problem = PEP() +param = OrderedDict("L" => 1.0) # adapt keys to the class +f = declare_function!(problem, LipschitzStronglyMonotoneOperatorCheap, param) +``` + +# Fields +- `mu`: class parameter or auxiliary state stored as `Float64`. +- `L`: class parameter or auxiliary state stored as `Float64`. +- `_PEPit_func`: internal [`PEPFunction`](@ref) storing oracle calls and constraints. + +# Implementation +The constructor receives parameters through an `OrderedDict`; `add_class_constraints!` adds the interpolation model when [`solve!`](@ref) builds the SDP. +""" mutable struct LipschitzStronglyMonotoneOperatorCheap <: AbstractFunction mu::Float64 L::Float64 diff --git a/src/operators/lipschitz_strongly_monotone_expensive.jl b/src/operators/lipschitz_strongly_monotone_expensive.jl index bd6b737..12b3999 100644 --- a/src/operators/lipschitz_strongly_monotone_expensive.jl +++ b/src/operators/lipschitz_strongly_monotone_expensive.jl @@ -1,3 +1,45 @@ +@doc raw""" + LipschitzStronglyMonotoneOperatorExpensive(param; reuse_gradient=true) + +Represent the `LipschitzStronglyMonotoneOperatorExpensive` interpolation class in PEPit.jl. + +Implement some constraints (which are not necessary and sufficient for interpolation) +for the class of Lipschitz continuous strongly monotone (and maximally monotone) operators. +Those conditions are presented in [1, Proposition 3.15] (details in [1, Appendix E]) and are stronger than +those used in [2]. + +# Warning + + Lipschitz strongly monotone operators do not enjoy known interpolation conditions. The conditions implemented + in this class are necessary but a priori not sufficient for interpolation. Hence, the numerical results + obtained when using this class might be non-tight upper bounds (see Discussions in [1, Section 2]). + +# Note + + Operator values can be requested through `gradient`, and `function values` should not be used. + +# Class parameters +- `mu`: strong monotonicity parameter +- `L`: Lipschitz parameter + +Lipschitz continuous strongly monotone operators are characterized by parameters $\mu$ and `L`, +hence can be instantiated as + +# Julia usage +```julia +problem = PEP() +param = OrderedDict("L" => 1.0) # adapt keys to the class +f = declare_function!(problem, LipschitzStronglyMonotoneOperatorExpensive, param) +``` + +# Fields +- `mu`: class parameter or auxiliary state stored as `Float64`. +- `L`: class parameter or auxiliary state stored as `Float64`. +- `_PEPit_func`: internal [`PEPFunction`](@ref) storing oracle calls and constraints. + +# Implementation +The constructor receives parameters through an `OrderedDict`; `add_class_constraints!` adds the interpolation model when [`solve!`](@ref) builds the SDP. +""" mutable struct LipschitzStronglyMonotoneOperatorExpensive <: AbstractFunction mu::Float64 L::Float64 diff --git a/src/operators/monotone.jl b/src/operators/monotone.jl index c7fd26d..944ed54 100644 --- a/src/operators/monotone.jl +++ b/src/operators/monotone.jl @@ -1,3 +1,29 @@ +@doc raw""" + MonotoneOperator(param=OrderedDict(); reuse_gradient=false) + +Represent the `MonotoneOperator` interpolation class in PEPit.jl. + +Implement interpolation constraints for the class of maximally monotone operators. + +# Note + + Operator values can be requested through `gradient`, and `function values` should not be used. + +General maximally monotone operators are not characterized by any parameter, hence can be instantiated as + +# Julia usage +```julia +problem = PEP() +param = OrderedDict("L" => 1.0) # adapt keys to the class +f = declare_function!(problem, MonotoneOperator, param) +``` + +# Fields +- `_PEPit_func`: internal [`PEPFunction`](@ref) storing oracle calls and constraints. + +# Implementation +The constructor receives parameters through an `OrderedDict`; `add_class_constraints!` adds the interpolation model when [`solve!`](@ref) builds the SDP. +""" mutable struct MonotoneOperator <: AbstractFunction _PEPit_func::PEPFunction diff --git a/src/operators/negatively_comonotone.jl b/src/operators/negatively_comonotone.jl index 95b152f..b56d4f9 100644 --- a/src/operators/negatively_comonotone.jl +++ b/src/operators/negatively_comonotone.jl @@ -1,3 +1,33 @@ +@doc raw""" + NegativelyComonotoneOperator(param; reuse_gradient=true) + +Represent the `NegativelyComonotoneOperator` interpolation class in PEPit.jl. + +Implement some necessary constraints of the class of negatively comonotone operators. + +# Note + + Operator values can be requested through `gradient`, and `function values` should not be used. + +# Class parameters +- `rho`: comonotonicity parameter (>0) + +Negatively comonotone operators are characterized by the parameter $\rho$, hence can be instantiated as + +# Julia usage +```julia +problem = PEP() +param = OrderedDict("L" => 1.0) # adapt keys to the class +f = declare_function!(problem, NegativelyComonotoneOperator, param) +``` + +# Fields +- `rho`: class parameter or auxiliary state stored as `Float64`. +- `_PEPit_func`: internal [`PEPFunction`](@ref) storing oracle calls and constraints. + +# Implementation +The constructor receives parameters through an `OrderedDict`; `add_class_constraints!` adds the interpolation model when [`solve!`](@ref) builds the SDP. +""" mutable struct NegativelyComonotoneOperator <: AbstractFunction rho::Float64 _PEPit_func::PEPFunction diff --git a/src/operators/nonexpansive.jl b/src/operators/nonexpansive.jl index 5a7ce5d..e7e8b1f 100644 --- a/src/operators/nonexpansive.jl +++ b/src/operators/nonexpansive.jl @@ -1,5 +1,35 @@ using OrderedCollections +@doc raw""" + NonexpansiveOperator(param; ) + +Represent the `NonexpansiveOperator` interpolation class in PEPit.jl. + +Implement the interpolation constraints of the class of (possibly inconsistent) nonexpansive operators. + +# Note + + Operator values can be requested through `gradient`, and `function values` should not be used. + +# Class parameters +- `v`: infimal displacement vector. + +Nonexpansive operators are not characterized by any parameter, hence can be initiated as + +# Julia usage +```julia +problem = PEP() +param = OrderedDict("L" => 1.0) # adapt keys to the class +f = declare_function!(problem, NonexpansiveOperator, param) +``` + +# Fields +- `v`: class parameter or auxiliary state stored as `Union{Point,Nothing}`. +- `_PEPit_func`: internal [`PEPFunction`](@ref) storing oracle calls and constraints. + +# Implementation +The constructor receives parameters through an `OrderedDict`; `add_class_constraints!` adds the interpolation model when [`solve!`](@ref) builds the SDP. +""" mutable struct NonexpansiveOperator <: AbstractFunction v::Union{Point,Nothing} _PEPit_func::PEPFunction diff --git a/src/operators/skew_symmetric_linear.jl b/src/operators/skew_symmetric_linear.jl index 9e2d45f..c19111d 100644 --- a/src/operators/skew_symmetric_linear.jl +++ b/src/operators/skew_symmetric_linear.jl @@ -1,3 +1,33 @@ +@doc raw""" + SkewSymmetricLinearOperator(param; reuse_gradient=true) + +Represent the `SkewSymmetricLinearOperator` interpolation class in PEPit.jl. + +Implement the interpolation constraints for the class of skew-symmetric linear operators. + +# Note + + Operator values can be requested through `gradient`, and `function values` should not be used. + +# Class parameters +- `L`: singular values upper bound + +Skew-Symmetric Linear operators are characterized by parameters $L$, hence can be instantiated as + +# Julia usage +```julia +problem = PEP() +param = OrderedDict("L" => 1.0) # adapt keys to the class +f = declare_function!(problem, SkewSymmetricLinearOperator, param) +``` + +# Fields +- `L`: class parameter or auxiliary state stored as `Float64`. +- `_PEPit_func`: internal [`PEPFunction`](@ref) storing oracle calls and constraints. + +# Implementation +The constructor receives parameters through an `OrderedDict`; `add_class_constraints!` adds the interpolation model when [`solve!`](@ref) builds the SDP. +""" mutable struct SkewSymmetricLinearOperator <: AbstractFunction L::Float64 _PEPit_func::PEPFunction diff --git a/src/operators/strongly_monotone.jl b/src/operators/strongly_monotone.jl index 09615b8..23610eb 100644 --- a/src/operators/strongly_monotone.jl +++ b/src/operators/strongly_monotone.jl @@ -1,3 +1,35 @@ +@doc raw""" + StronglyMonotoneOperator(param; reuse_gradient=false) + +Represent the `StronglyMonotoneOperator` interpolation class in PEPit.jl. + +Implement interpolation constraints of the class of strongly monotone +(maximally monotone) operators. + +# Note + + Operator values can be requested through `gradient`, and `function values` should not be used. + +# Class parameters +- `mu`: strong monotonicity parameter + +Strongly monotone (and maximally monotone) operators are characterized by the parameter $\mu$, +hence can be instantiated as + +# Julia usage +```julia +problem = PEP() +param = OrderedDict("L" => 1.0) # adapt keys to the class +f = declare_function!(problem, StronglyMonotoneOperator, param) +``` + +# Fields +- `mu`: class parameter or auxiliary state stored as `Float64`. +- `_PEPit_func`: internal [`PEPFunction`](@ref) storing oracle calls and constraints. + +# Implementation +The constructor receives parameters through an `OrderedDict`; `add_class_constraints!` adds the interpolation model when [`solve!`](@ref) builds the SDP. +""" mutable struct StronglyMonotoneOperator <: AbstractFunction mu::Float64 _PEPit_func::PEPFunction diff --git a/src/operators/symmetric_linear.jl b/src/operators/symmetric_linear.jl index ad04d8e..ceb9c06 100644 --- a/src/operators/symmetric_linear.jl +++ b/src/operators/symmetric_linear.jl @@ -1,3 +1,36 @@ +@doc raw""" + SymmetricLinearOperator(param; reuse_gradient=true) + +Represent the `SymmetricLinearOperator` interpolation class in PEPit.jl. + +Implement the interpolation constraints for the class of symmetric linear operators. + +# Note + + Operator values can be requested through `gradient`, and `function values` should not be used. + +# Class parameters +- `mu`: eigenvalues lower bound +- `L`: eigenvalues upper bound + +Symmetric Linear operators are characterized by parameters $\mu$ and `L`, +hence can be instantiated as + +# Julia usage +```julia +problem = PEP() +param = OrderedDict("L" => 1.0) # adapt keys to the class +f = declare_function!(problem, SymmetricLinearOperator, param) +``` + +# Fields +- `mu`: class parameter or auxiliary state stored as `Float64`. +- `L`: class parameter or auxiliary state stored as `Float64`. +- `_PEPit_func`: internal [`PEPFunction`](@ref) storing oracle calls and constraints. + +# Implementation +The constructor receives parameters through an `OrderedDict`; `add_class_constraints!` adds the interpolation model when [`solve!`](@ref) builds the SDP. +""" mutable struct SymmetricLinearOperator <: AbstractFunction mu::Float64 L::Float64 diff --git a/src/primitive_steps/bregman_gradient_step.jl b/src/primitive_steps/bregman_gradient_step.jl index 2921cc6..3a71c91 100644 --- a/src/primitive_steps/bregman_gradient_step.jl +++ b/src/primitive_steps/bregman_gradient_step.jl @@ -1,3 +1,36 @@ +@doc raw""" + bregman_gradient_step!(gx0::AbstractPoint, sx0::AbstractPoint, mirror_map::AbstractFunction, gamma::Real) + +Create the symbolic primitive step `bregman_gradient_step!`. + +This routine outputs $x$ by performing a mirror step of step-size $\gamma$. +That is, denoting $f$ the function to be minimized +and $h$ the **mirror map**, it performs + +```math +x = \arg\min_x \left[ f(x_0) + \left< \nabla f(x_0);\, x - x_0 \right> +``` + + \frac{1}{\gamma} D_h(x; x_0) \right], + +where $D_h(x; x_0)$ denotes the Bregman divergence of $h$ on $x$ with respect to $x_0$. + +```math +D_h(x; x_0) \triangleq h(x) - h(x_0) - \left< \nabla h(x_0);\, x - x_0 \right>. +``` + +# Arguments +- `gx0`: descent direction $\textbf{gx0} \triangleq \nabla f(x_0)$. +- `sx0`: starting gradient $\textbf{sx0} \triangleq \nabla h(x_0)$. +- `mirror_map`: the reference function $h$ we computed Bregman divergence of. +- `gamma`: step-size parameter. + +# Returns +- `x`: new iterate $\textbf{x} \triangleq x$. +- `sx`: $h$'s gradient on new iterate $x$ $\textbf{sx} \triangleq \nabla h(x)$. +- `hx`: $h$'s value on new iterate $\textbf{hx} \triangleq h(x)$. + +See also [`Point`](@ref), [`Expression`](@ref), and [`add_constraint!`](@ref). +""" function bregman_gradient_step!(gx0::AbstractPoint, sx0::AbstractPoint, mirror_map::AbstractFunction, gamma::Real) x = Point() diff --git a/src/primitive_steps/bregman_proximal_step.jl b/src/primitive_steps/bregman_proximal_step.jl index 555fb6f..5a839cb 100644 --- a/src/primitive_steps/bregman_proximal_step.jl +++ b/src/primitive_steps/bregman_proximal_step.jl @@ -1,3 +1,37 @@ +@doc raw""" + bregman_proximal_step!(sx0::AbstractPoint, mirror_map::AbstractFunction, min_function::AbstractFunction, gamma::Real) + +Create the symbolic primitive step `bregman_proximal_step!`. + +This routine outputs $x$ by performing a proximal mirror step of step-size $\gamma$. +That is, denoting $f$ the function to be minimized +and $h$ the **mirror map**, it performs + +```math +x = \arg\min_x \left[ f(x) + \frac{1}{\gamma} D_h(x; x_0) \right], +``` + +where $D_h(x; x_0)$ denotes the Bregman divergence of $h$ on $x$ with respect to $x_0$. + +```math +D_h(x; x_0) \triangleq h(x) - h(x_0) - \left< \nabla h(x_0);\, x - x_0 \right>. +``` + +# Arguments +- `sx0`: starting gradient $\textbf{sx0} \triangleq \nabla h(x_0)$. +- `mirror_map`: the reference function $h$ we computed Bregman divergence of. +- `min_function`: function we aim to minimize. +- `gamma`: step-size parameter. + +# Returns +- `x`: new iterate $\textbf{x} \triangleq x$. +- `sx`: $h$'s gradient on new iterate $x$ $\textbf{sx} \triangleq \nabla h(x)$. +- `hx`: $h$'s value on new iterate $\textbf{hx} \triangleq h(x)$. +- `gx`: $f$'s gradient on new iterate $x$ $\textbf{gx} \triangleq \nabla f(x)$. +- `fx`: $f$'s value on new iterate $\textbf{fx} \triangleq f(x)$. + +See also [`Point`](@ref), [`Expression`](@ref), and [`add_constraint!`](@ref). +""" function bregman_proximal_step!(sx0::AbstractPoint, mirror_map::AbstractFunction, min_function::AbstractFunction, gamma::Real) x = Point() diff --git a/src/primitive_steps/epsilon_subgradient_step.jl b/src/primitive_steps/epsilon_subgradient_step.jl index e4ca78e..26f35e8 100644 --- a/src/primitive_steps/epsilon_subgradient_step.jl +++ b/src/primitive_steps/epsilon_subgradient_step.jl @@ -1,3 +1,41 @@ +@doc raw""" + epsilon_subgradient_step!(x0::AbstractPoint, f::AbstractFunction, gamma::Real) + +Create the symbolic primitive step `epsilon_subgradient_step!`. + +This routine performs a step $x \leftarrow x_0 - \gamma g_0$ +where $g_0 \in\partial_{\varepsilon} f(x_0)$. That is, $g_0$ is an +$\varepsilon$-subgradient of $f$ at $x_0$. The set $\partial_{\varepsilon} f(x_0)$ +(referred to as the $\varepsilon$-subdifferential) is defined as (see [1, Section 3]) + +```math +\partial_{\varepsilon} f(x_0)=\left\{g_0:\,\forall z,\, f(z)\geqslant f(x_0)+\left< g_0;\, z-x_0 \right>-\varepsilon \right\}. +``` + +An alternative characterization of $g_0 \in\partial_{\varepsilon} f(x_0)$ consists in writing + +```math +f(x_0)+f^*(g_0)-\left< g_0;x_0\right>\leqslant \varepsilon. +``` + +References: + [[1] A. Brndsted, R.T. Rockafellar. + On the subdifferentiability of convex functions. + Proceedings of the American Mathematical Society 16(4), 605-611 (1965)](https://www.jstor.org/stable/2033889) + +# Arguments +- `x0`: starting point x0. +- `f`: a function. +- `gamma`: step-size parameter. + +# Returns +- `x`: the output point. +- `g0`: an $\varepsilon$-subgradient of f at x0. +- `f0`: the value of the function f at x0. +- `epsilon`: the value of epsilon. + +See also [`Point`](@ref), [`Expression`](@ref), and [`add_constraint!`](@ref). +""" function epsilon_subgradient_step!(x0::AbstractPoint, f::AbstractFunction, gamma::Real) g0 = Point() diff --git a/src/primitive_steps/exact_linesearch_step.jl b/src/primitive_steps/exact_linesearch_step.jl index c4d5390..f90a16b 100644 --- a/src/primitive_steps/exact_linesearch_step.jl +++ b/src/primitive_steps/exact_linesearch_step.jl @@ -1,3 +1,52 @@ +@doc raw""" + exact_linesearch_step!(x0::AbstractPoint, f::AbstractFunction, directions) + +Create the symbolic primitive step `exact_linesearch_step!`. + +This routine outputs some $x$ by *mimicking* an exact line/span search in specified directions. +It is used for instance by the Julia examples +`examples/unconstrained_convex_minimization/gradient_exact_line_search.jl` and +`examples/unconstrained_convex_minimization/conjugate_gradient.jl`. + +The routine aims at mimicking the operation: + +```math +\begin{aligned} + x & = & x_0 - \sum_{i=1}^{T} \gamma_i d_i,\\ + \text{with } \overrightarrow{\gamma} & = & \arg\min_\overrightarrow{\gamma} f\left(x_0 - \sum_{i=1}^{T} \gamma_i d_i\right), +\end{aligned} +``` +where $T$ denotes the number of directions $d_i$. This operation can equivalently be described +in terms of the following conditions: + +```math +\begin{aligned} + x - x_0 & \in & \text{span}\left\{d_1,\ldots,d_T\right\}, \\ + \nabla f(x) & \perp & \text{span}\left\{d_1,\ldots,d_T\right\}. +\end{aligned} +``` +In this routine, we instead constrain $x_{t}$ and $\nabla f(x_{t})$ to satisfy + +```math +\begin{aligned} + \forall i=1,\ldots,T: & \left< \nabla f(x);\, d_i \right> & = & 0,\\ + \text{and } & \left< \nabla f(x);\, x - x_0 \right> & = & 0, +\end{aligned} +``` +which is a relaxation of the true line/span search conditions. + +# Arguments +- `x0`: the starting point. +- `f`: the function on which the (sub)gradient will be evaluated. +- `directions`: the list of all directions required to be orthogonal to the (sub)gradient of x. + +# Returns +- `x`: such that all vectors in directions are orthogonal to the (sub)gradient of f at x. +- `gx`: a (sub)gradient of f at x. +- `fx`: the function f evaluated at x. + +See also [`Point`](@ref), [`Expression`](@ref), and [`add_constraint!`](@ref). +""" function exact_linesearch_step!(x0::AbstractPoint, f::AbstractFunction, directions) x = Point() diff --git a/src/primitive_steps/inexact_gradient_step.jl b/src/primitive_steps/inexact_gradient_step.jl index a0f6ae4..b4a5930 100644 --- a/src/primitive_steps/inexact_gradient_step.jl +++ b/src/primitive_steps/inexact_gradient_step.jl @@ -1,3 +1,48 @@ +@doc raw""" + inexact_gradient_step!(x0::AbstractPoint, f::AbstractFunction, gamma::Real, epsilon::Real; notion::String="absolute") + +Create the symbolic primitive step `inexact_gradient_step!`. + +This routine performs a step $x \leftarrow x_0 - \gamma d_{x_0}$ +where $d_{x_0}$ is close to the gradient of $f$ in $x_0$ +in the following sense: + +```math +\|d_{x_0} - \nabla f(x_0)\|^2 \leqslant \left\{ + \begin{aligned} + & \varepsilon^2 & \text{if notion is set to 'absolute'}, \\ + & \varepsilon^2 \|\nabla f(x_0)\|^2 & \text{if notion is set to 'relative'}. + \end{aligned} + \right. +``` + +This relative approximation is used at least in 3 PEPit examples, +in particular in 2 unconstrained convex minimizations: +an inexact gradient descent, and an inexact accelerated gradient. + +References: + [[1] E. De Klerk, F. Glineur, A. Taylor (2020). + Worst-case convergence analysis of inexact gradient and Newton methods + through semidefinite programming performance estimation. + SIAM Journal on Optimization, 30(3), 2053-2082.](https://arxiv.org/pdf/1709.05191.pdf) + +# Arguments +- `x0`: starting point x0. +- `f`: a function. +- `gamma`: step-size parameter. +- `epsilon`: the required accuracy. +- `notion`: defines the mode (absolute or relative inaccuracy). By default, `notion="absolute"`. + +# Returns +- `x`: the output point. +- `dx0`: the approximate (sub)gradient of f at x0. +- `fx0`: the value of the function f at x0. + +# Throws +- `ErrorException` (via `error`): if `notion` is not one of `"absolute"` or `"relative"`. + +See also [`Point`](@ref), [`Expression`](@ref), and [`add_constraint!`](@ref). +""" function inexact_gradient_step!(x0::AbstractPoint, f::AbstractFunction, gamma::Real, epsilon::Real; notion::String="absolute") gx0, fx0 = oracle!(f, x0) diff --git a/src/primitive_steps/inexact_proximal_step.jl b/src/primitive_steps/inexact_proximal_step.jl index fe231dc..4b2c116 100644 --- a/src/primitive_steps/inexact_proximal_step.jl +++ b/src/primitive_steps/inexact_proximal_step.jl @@ -1,3 +1,138 @@ +@doc raw""" + inexact_proximal_step!(x0::AbstractPoint, f::AbstractFunction, gamma::Real; opt::String="PD_gapII") + +Create the symbolic primitive step `inexact_proximal_step!`. + +This routine encodes an inexact proximal operation with step size $\gamma$. That is, it outputs a tuple +$(x, g\in \partial f(x), f(x), w, v\in\partial f(w), f(w), \varepsilon)$ which are described as follows. + +First, $x$ is an approximation to the proximal point of $x_0$ on function $f$: + +```math +x \approx \mathrm{prox}_{\gamma f}(x_0)\triangleq\arg\min_x \left\{ \gamma f(x) + \frac{1}{2}\|x-x_0\|^2\right\}, +``` + +where the meaning of $\approx$ depends on the option "opt" and is explained below. The notions of inaccuracy +implemented within this routine are specified using primal and dual proximal problems, denoted by + +```math + \begin{aligned} + &\Phi^{(p)}_{\gamma f}(x; x_0) \triangleq \gamma f(x) + \frac{1}{2}\|x-x_0\|^2,\\ + &\Phi^{(d)}_{\gamma f}(v; x_0) \triangleq -\gamma f^*(v)-\frac{1}{2}\|x_0-\gamma v\|^2 + \frac{1}{2}\|x_0\|^2,\\ + \end{aligned} +``` +where $\Phi^{(p)}_{\gamma f}(x;x_0)$ and $\Phi^{(d)}_{\gamma f}(v;x_0)$ respectively denote the primal +and the dual proximal problems, and where $f^*$ is the Fenchel conjugate of $f$. The options below +encode different meanings of "$\approx$" by specifying accuracy requirements on primal-dual pairs: + +```math +(x,v) \approx_{\varepsilon} \left(\mathrm{prox}_{\gamma f}(x_0),\,\mathrm{prox}_{f^*/\gamma}(x_0/\gamma)\right), +``` + +where $\approx_{\varepsilon}$ corresponds to require the primal-dual pair $(x,v)$ to satisfy some +primal-dual accuracy requirement: + +```math +\Phi^{(p)}_{\gamma f}(x;x_0)-\Phi^{(d)}_{\gamma f}(v;x_0) \leqslant \varepsilon, +``` + +where $\varepsilon\geqslant 0$ is the error magnitude, which is returned to the user so that one can +constrain it to be bounded by some other values. + +**Relation to the exact proximal operation:** In the exact case (no error in the computation, +$\varepsilon=0$), $v$ corresponds to the solution of the dual proximal problem and one can write + +```math +x = x_0-\gamma g, +``` + +with $g=v=\mathrm{prox}_{f^*/\gamma}(x_0/\gamma)\in\partial f(x)$, and $x=w$. + +**Reformulation of the primal-dual gap:** In regard with the exact proximal computation; the inexact case under +consideration here can be described as performing + +```math +x = x_0-\gamma v + e, +``` + +where $v$ is an $\epsilon$-subgradient of $f$ at $x$ +(notation $v\in\partial_{\epsilon} f(x)$) +and $e$ is some additional computation error. Those elements allow for a common convenient reformulation of +the primal-dual gap, written in terms of the magnitudes of $\epsilon$ and of $e$: + +```math +\Phi^{(p)}_{\gamma f}(x;x_0)-\Phi^{(d)}_{\gamma f}(v;x_0) = \frac{1}{2} \|e\|^2 + \gamma \epsilon. +``` + +**Options:** The following options are available (a list of such choices is presented in [4]; we provide a reference +for each of those choices below). + + - 'PD_gapI' : the constraint imposed on the output is the vanilla (see, e.g., [2]) + +```math +\Phi^{(p)}_{\gamma f}(x;x_0)-\Phi^{(d)}_{\gamma f}(v;x_0) \leqslant \varepsilon. +``` + + This approximation requirement is used in one PEPit example: an accelerated inexact forward backward. + + - 'PD_gapII' : the constraint is stronger than the vanilla primal-dual gap, as more structure is imposed + (see, e.g., [1,5]) : + +```math +\Phi^{(p)}_{\gamma f}(x;x_0)-\Phi^{(d)}_{\gamma f}(g;x_0) \leqslant \varepsilon, +``` + + where we imposed that $v\triangleq g\in\partial f(x)$ and $w\triangleq x$. This approximation + requirement is used in two PEPit examples: in a relatively inexact proximal point algorithm and in a partially + inexact Douglas-Rachford splitting. + + - 'PD_gapIII' : the constraint is stronger than the vanilla primal-dual gap, as more structure is imposed + (see, e.g., [3]): + +```math +\Phi^{(p)}_{\gamma f}(x;x_0)-\Phi^{(d)}_{\gamma f}(\tfrac{x_0 - x}{\gamma};x_0) \leqslant \varepsilon, +``` + + where we imposed that $v \triangleq \frac{x_0 - x}{\gamma}$. + +References: + + [[1] R.T. Rockafellar (1976). + Monotone operators and the proximal point algorithm. SIAM journal on control and optimization, 14(5), 877-898.](https://epubs.siam.org/doi/pdf/10.1137/0314056) + + [[2] R.D. Monteiro, B.F. Svaiter (2013). + An accelerated hybrid proximal extragradient method for convex optimization + and its implications to second-order methods. + SIAM Journal on Optimization, 23(2), 1092-1125.](https://epubs.siam.org/doi/abs/10.1137/110833786) + + [[3] S. Salzo, S. Villa (2012). + Inexact and accelerated proximal point algorithms. + Journal of Convex analysis, 19(4), 1167-1192.](http://www.optimization-online.org/DB_FILE/2011/08/3128.pdf) + + [[4] M. Barre, A. Taylor, F. Bach (2020). + Principled analyses and design of first-order methods with inexact proximal operators.](https://arxiv.org/pdf/2006.06041v3.pdf) + + [[5] A. d'Aspremont, D. Scieur, A. Taylor (2021). + Acceleration Methods. + Foundations and Trends in Optimization: Vol. 5, No. 1-2.](https://arxiv.org/pdf/2101.09545.pdf) + +# Arguments +- `x0`: point for which we aim to compute an approximate proximal step. +- `f`: function whose proximal operator is approximated. +- `gamma`: step-size parameter. +- `opt`: option (type of error requirement) among 'PD_gapI', 'PD_gapII', and 'PD_gapIII'. + +# Returns +- `x`: the approximated proximal point. +- `gx`: a (sub)gradient of f at x (subgradient used in evaluating the accuracy criterion). +- `fx`: f evaluated at x. +- `w`: a point w such that v (see next output) is a subgradient of f at w. +- `v`: the approximated proximal point of the dual problem, (sub)gradient of f evaluated at w. +- `fw`: f evaluated at w. +- `eps_var`: value of the primal-dual gap (which can be further bounded by the user). + +See also [`Point`](@ref), [`Expression`](@ref), and [`add_constraint!`](@ref). +""" function inexact_proximal_step!(x0::AbstractPoint, f::AbstractFunction, gamma::Real; opt::String="PD_gapII") if opt == "PD_gapI" diff --git a/src/primitive_steps/linear_optimization_step.jl b/src/primitive_steps/linear_optimization_step.jl index c6f110c..936c1b9 100644 --- a/src/primitive_steps/linear_optimization_step.jl +++ b/src/primitive_steps/linear_optimization_step.jl @@ -1,3 +1,40 @@ +@doc raw""" + linear_optimization_step!(dir::AbstractPoint, ind::AbstractFunction) + +Create the symbolic primitive step `linear_optimization_step!`. + +This routine outputs the result of a minimization problem with linear objective (whose direction +is provided by `dir`) on the domain of the (closed convex) indicator function `ind`. +That is, it outputs a solution to + +```math +\arg\min_{\text{ind}(x)=0} \left< \text{dir};\, x \right>, +``` + +One can notice that $x$ is solution of this problem if and only if + +```math +- \text{dir} \in \partial \text{ind}(x). +``` + +Linear optimization oracles are classically used in conditional gradient-type algorithm (a.k.a., Frank-Wolfe) [1]. + +References: + [[1] M. Frank, P. Wolfe (1956). + An algorithm for quadratic programming. + Naval research logistics quarterly, 3(1-2), 95-110.](https://arxiv.org/pdf/1608.04826.pdf) + +# Arguments +- `dir`: direction of optimization +- `ind`: convex indicator function + +# Returns +- `x`: oracle output. +- `gx`: the (sub)gradient of ind on x. +- `fx`: the function value of ind on x. + +See also [`Point`](@ref), [`Expression`](@ref), and [`add_constraint!`](@ref). +""" function linear_optimization_step!(dir::AbstractPoint, ind::AbstractFunction) x = Point() diff --git a/src/primitive_steps/proximal_step.jl b/src/primitive_steps/proximal_step.jl index 0d1118b..1186853 100644 --- a/src/primitive_steps/proximal_step.jl +++ b/src/primitive_steps/proximal_step.jl @@ -1,3 +1,33 @@ +@doc raw""" + proximal_step!(x0::AbstractPoint, f::AbstractFunction, gamma::Real) + +Create the symbolic primitive step `proximal_step!`. + +This routine performs a proximal step of step-size **gamma**, starting from **x0**, and on function **f**. +That is, it performs: + +```math +\begin{aligned} + x \triangleq \text{prox}_{\gamma f}(x_0) & \triangleq & \arg\min_x \left\{ \gamma f(x) + \frac{1}{2} \|x - x_0\|^2 \right\}, \\ + & \Updownarrow & \\ + 0 & = & \gamma g_x + x - x_0 \text{ for some } g_x\in\partial f(x),\\ + & \Updownarrow & \\ + x & = & x_0 - \gamma g_x \text{ for some } g_x\in\partial f(x). +\end{aligned} +``` + +# Arguments +- `x0`: starting point x0. +- `f`: function on which the proximal step is computed. +- `gamma`: step-size parameter. + +# Returns +- `x`: proximal point. +- `gx`: the (sub)gradient of f at x. +- `fx`: the function value of f on x. + +See also [`Point`](@ref), [`Expression`](@ref), and [`add_constraint!`](@ref). +""" function proximal_step!(x0::AbstractPoint, f::AbstractFunction, gamma::Real) gx = Point() diff --git a/src/primitive_steps/shifted_optimization_step.jl b/src/primitive_steps/shifted_optimization_step.jl index 9500ddb..b8a34f0 100644 --- a/src/primitive_steps/shifted_optimization_step.jl +++ b/src/primitive_steps/shifted_optimization_step.jl @@ -1,3 +1,39 @@ +@doc raw""" + shifted_optimization_step!(dir::AbstractPoint, f::AbstractFunction) + +Create the symbolic primitive step `shifted_optimization_step!`. + +This routine outputs a stationary point of a minimization problem: + +```math +\arg\min_{x} f(x)-\left< \text{dir};\, x \right>. +``` + +That is, it outputs $x$ such that + +```math +\text{dir} \in \partial f(x). +``` + +Shifted optimization oracles are classically used in difference-of-convex algorithms +(a.k.a., convex-concave procedure), see, e.g., [1]. + +References: + [[1] H.A. Le Thi, T. Pham Dinh (2018). + DC programming and DCA: thirty years of developments. + Mathematical Programming, 169(1), 5-68.](https://link.springer.com/article/10.1007/s10107-018-1235-y) + +# Arguments +- `dir`: direction/linear shift in the objective of the optimization problem +- `f`: function + +# Returns +- `x`: oracle output. +- `gx`: the (sub)gradient of f at x. +- `fx`: the function value of f at x. + +See also [`Point`](@ref), [`Expression`](@ref), and [`add_constraint!`](@ref). +""" function shifted_optimization_step!(dir::AbstractPoint, f::AbstractFunction) x = Point() diff --git a/src/tools/dict_operations.jl b/src/tools/dict_operations.jl index 8e838bf..ff6a070 100644 --- a/src/tools/dict_operations.jl +++ b/src/tools/dict_operations.jl @@ -1,3 +1,20 @@ +@doc raw""" + merge_dicts(dict1::OrderedDict, dict2::OrderedDict) + +Merge two coefficient dictionaries by adding coefficients with matching keys. + +This helper is used when symbolic points, functions, and expressions are added +together. The keys encode symbolic atoms and the values encode their scalar +coefficients. + +# Arguments +- `dict1`: first coefficient dictionary. +- `dict2`: second coefficient dictionary. + +# Returns +An `OrderedDict` containing the union of both key sets, with coefficients added +for keys that appear in both inputs. +""" function merge_dicts(dict1::OrderedDict, dict2::OrderedDict) merged_dict = copy(dict1) for (key, value) in dict2 @@ -7,9 +24,44 @@ function merge_dicts(dict1::OrderedDict, dict2::OrderedDict) end +@doc raw""" + prune_dict(d::OrderedDict) + +Remove all zero coefficients from a symbolic decomposition dictionary. + +The symbolic algebra stores many affine and linear combinations as dictionaries. +After additions or scalar multiplications, exact zero coefficients can appear +and should be removed so that equality checks on decompositions remain stable. + +# Arguments +- `d`: coefficient dictionary to prune. + +# Returns +A new `OrderedDict` with all entries whose value is exactly zero removed. +""" prune_dict(d::OrderedDict{K,V}) where {K,V} = OrderedDict{K,V}(k => v for (k, v) in d if v != 0) +@doc raw""" + multiply_dicts(dict1::OrderedDict, dict2::OrderedDict) + +Develop the product of two symbolic linear combinations. + +If `dict1` represents `\sum_i a_i p_i` and `dict2` represents +`\sum_j b_j q_j`, then the returned dictionary represents the bilinear +expansion with keys `(p_i, q_j)` and coefficients `a_i b_j`. This is the +dictionary-level operation behind inner products of symbolic [`Point`](@ref) +objects. + +# Arguments +- `dict1`: first coefficient dictionary. +- `dict2`: second coefficient dictionary. + +# Returns +An `OrderedDict{Any,Float64}` whose keys are ordered pairs of input keys and +whose values are products of the corresponding coefficients, added when the same +pair appears more than once. +""" function multiply_dicts(dict1::OrderedDict, dict2::OrderedDict) product_dict = OrderedDict{Any,Float64}() for (key1, value1) in dict1 diff --git a/test/test_constraint.jl b/test/test_constraint.jl index 3713055..2863d14 100644 --- a/test/test_constraint.jl +++ b/test/test_constraint.jl @@ -2,11 +2,11 @@ using Test @testset "Constraints" begin - Point_counter[] = 0 - Expression_counter[] = 0 - Function_counter[] = 0 - Global_Constraint_counter[] = 0 - NEXT_ID[] = 0 + PEPit.Point_counter[] = 0 + PEPit.Expression_counter[] = 0 + PEPit.Function_counter[] = 0 + PEPit.Global_Constraint_counter[] = 0 + PEPit.NEXT_ID[] = 0 L = 1.0 @@ -113,11 +113,11 @@ end @testset "Constraints: logdet heuristic" begin - Point_counter[] = 0 - Expression_counter[] = 0 - Function_counter[] = 0 - Global_Constraint_counter[] = 0 - NEXT_ID[] = 0 + PEPit.Point_counter[] = 0 + PEPit.Expression_counter[] = 0 + PEPit.Function_counter[] = 0 + PEPit.Global_Constraint_counter[] = 0 + PEPit.NEXT_ID[] = 0 L = 1.0 mu = 0.1 diff --git a/test/test_expression.jl b/test/test_expression.jl index ee757cd..0f0facd 100644 --- a/test/test_expression.jl +++ b/test/test_expression.jl @@ -2,10 +2,10 @@ using Test @testset "Expression" begin - Point_counter[] = 0 - Expression_counter[] = 0 - Global_Constraint_counter[] = 0 - NEXT_ID[] = 0 + PEPit.Point_counter[] = 0 + PEPit.Expression_counter[] = 0 + PEPit.Global_Constraint_counter[] = 0 + PEPit.NEXT_ID[] = 0 pep = PEP() @@ -23,11 +23,11 @@ using Test @test composite_expression.counter === nothing @test inner_product.counter === nothing @test function_value.counter == 0 - @test Expression_counter[] == 1 + @test PEPit.Expression_counter[] == 1 new_expression = Expression() @test new_expression.counter == 1 - @test Expression_counter[] == 2 + @test PEPit.Expression_counter[] == 2 new_expression2 = 1 + 2 * (4 - (-(inner_product) * 3) - 5 + 2 * function_value - function_value / 5 + 2) diff --git a/test/test_function.jl b/test/test_function.jl index 5573056..725d010 100644 --- a/test/test_function.jl +++ b/test/test_function.jl @@ -2,11 +2,11 @@ using Test function reset_counters!() - Point_counter[] = 0 - Expression_counter[] = 0 - Function_counter[] = 0 - Global_Constraint_counter[] = 0 - NEXT_ID[] = 0 + PEPit.Point_counter[] = 0 + PEPit.Expression_counter[] = 0 + PEPit.Function_counter[] = 0 + PEPit.Global_Constraint_counter[] = 0 + PEPit.NEXT_ID[] = 0 end @testset "PEPFunction Tests" begin @@ -36,11 +36,11 @@ end @test func1.counter == 0 @test func2._PEPit_func.counter == 1 @test composite_function.counter === nothing - @test Function_counter[] == 2 + @test PEPit.Function_counter[] == 2 new_function = PEPFunction(is_leaf=true, decomposition_dict=nothing) @test new_function.counter == 2 - @test Function_counter[] == 3 + @test PEPit.Function_counter[] == 3 end function compute_linear_combination(func1, func2) @@ -249,17 +249,17 @@ end new_function = compute_linear_combination(func1, func2) - @test _is_already_evaluated_on_point(new_function, point) === nothing - @test _is_already_evaluated_on_point(func1, point) === nothing - @test _is_already_evaluated_on_point(func2._PEPit_func, point) === nothing + @test PEPit._is_already_evaluated_on_point(new_function, point) === nothing + @test PEPit._is_already_evaluated_on_point(func1, point) === nothing + @test PEPit._is_already_evaluated_on_point(func2._PEPit_func, point) === nothing oracle!(new_function, point) - @test _is_already_evaluated_on_point(new_function, point) == new_function.list_of_points[1][2:3] - @test _is_already_evaluated_on_point(func1, point) == func1.list_of_points[1][2:3] - @test _is_already_evaluated_on_point(func2._PEPit_func, point) == func2._PEPit_func.list_of_points[1][2:3] + @test PEPit._is_already_evaluated_on_point(new_function, point) == new_function.list_of_points[1][2:3] + @test PEPit._is_already_evaluated_on_point(func1, point) == func1.list_of_points[1][2:3] + @test PEPit._is_already_evaluated_on_point(func2._PEPit_func, point) == func2._PEPit_func.list_of_points[1][2:3] end @testset "Separate Leaf Functions - Non-differentiable" begin @@ -271,12 +271,12 @@ end point2 = Point(is_leaf=true, decomposition_dict=nothing) oracle!(new_function, point1) - list_nothing, list_grad_only, list_grad_val = _separate_leaf_functions_regarding_their_need_on_point(new_function, point1) + list_nothing, list_grad_only, list_grad_val = PEPit._separate_leaf_functions_regarding_their_need_on_point(new_function, point1) @test length(list_nothing) == 0 @test length(list_grad_only) == 2 @test length(list_grad_val) == 0 - list_nothing, list_grad_only, list_grad_val = _separate_leaf_functions_regarding_their_need_on_point(new_function, point2) + list_nothing, list_grad_only, list_grad_val = PEPit._separate_leaf_functions_regarding_their_need_on_point(new_function, point2) @test length(list_nothing) == 0 @test length(list_grad_only) == 0 @test length(list_grad_val) == 2 @@ -291,12 +291,12 @@ end point2 = Point(is_leaf=true, decomposition_dict=nothing) oracle!(new_function, point1) - list_nothing, list_grad_only, list_grad_val = _separate_leaf_functions_regarding_their_need_on_point(new_function, point1) + list_nothing, list_grad_only, list_grad_val = PEPit._separate_leaf_functions_regarding_their_need_on_point(new_function, point1) @test length(list_nothing) == 1 @test length(list_grad_only) == 0 @test length(list_grad_val) == 0 - list_nothing, list_grad_only, list_grad_val = _separate_leaf_functions_regarding_their_need_on_point(new_function, point2) + list_nothing, list_grad_only, list_grad_val = PEPit._separate_leaf_functions_regarding_their_need_on_point(new_function, point2) @test length(list_nothing) == 0 @test length(list_grad_only) == 0 @test length(list_grad_val) == 1 diff --git a/test/test_pep.jl b/test/test_pep.jl index e9312f7..fbb1429 100644 --- a/test/test_pep.jl +++ b/test/test_pep.jl @@ -3,11 +3,11 @@ using JuMP: value @testset "PEP Tests" begin - Point_counter[] = 0 - Expression_counter[] = 0 - Function_counter[] = 0 - Global_Constraint_counter[] = 0 - NEXT_ID[] = 0 + PEPit.Point_counter[] = 0 + PEPit.Expression_counter[] = 0 + PEPit.Function_counter[] = 0 + PEPit.Global_Constraint_counter[] = 0 + PEPit.NEXT_ID[] = 0 L = 1.0 @@ -49,9 +49,9 @@ using JuMP: value PEPit_tau = solve!(problem; verbose=false) @test length(func._PEPit_func.list_of_constraints) == 2 - @test Point_counter[] == 3 - @test Expression_counter[] == 2 - @test Function_counter[] == 1 + @test PEPit.Point_counter[] == 3 + @test PEPit.Expression_counter[] == 2 + @test PEPit.Function_counter[] == 1 end @testset "Eval Points and Function Values" begin @@ -83,12 +83,12 @@ end @testset "PEP Duals and LMIs" begin - Point_counter[] = 0 - Expression_counter[] = 0 - Function_counter[] = 0 - Global_Constraint_counter[] = 0 - PSDMatrix_counter[] = 0 - NEXT_ID[] = 0 + PEPit.Point_counter[] = 0 + PEPit.Expression_counter[] = 0 + PEPit.Function_counter[] = 0 + PEPit.Global_Constraint_counter[] = 0 + PEPit.PSDMatrix_counter[] = 0 + PEPit.NEXT_ID[] = 0 L = 1.0 @@ -198,11 +198,11 @@ end @testset "PEP: dimension reduction with logdet" begin - Point_counter[] = 0 - Expression_counter[] = 0 - Function_counter[] = 0 - Global_Constraint_counter[] = 0 - NEXT_ID[] = 0 + PEPit.Point_counter[] = 0 + PEPit.Expression_counter[] = 0 + PEPit.Function_counter[] = 0 + PEPit.Global_Constraint_counter[] = 0 + PEPit.NEXT_ID[] = 0 L = 1.0 mu = 0.1 @@ -231,7 +231,7 @@ end res_base = solve!(build_problem(); verbose=false, return_full_model=true) res_logd = solve!(build_problem(); verbose=false, return_full_model=true, logdetiters=2) - nb_base, _, _ = _get_nb_eigs_and_corrected(value.(res_base.variables.G)) - nb_logd, _, _ = _get_nb_eigs_and_corrected(value.(res_logd.variables.G)) + nb_base, _, _ = PEPit._get_nb_eigs_and_corrected(value.(res_base.variables.G)) + nb_logd, _, _ = PEPit._get_nb_eigs_and_corrected(value.(res_logd.variables.G)) @test nb_logd <= nb_base end diff --git a/test/test_point.jl b/test/test_point.jl index 1bd3357..9a2eb22 100644 --- a/test/test_point.jl +++ b/test/test_point.jl @@ -2,10 +2,10 @@ using Test @testset "Point" begin - Point_counter[] = 0 - Expression_counter[] = 0 - Global_Constraint_counter[] = 0 - NEXT_ID[] = 0 + PEPit.Point_counter[] = 0 + PEPit.Expression_counter[] = 0 + PEPit.Global_Constraint_counter[] = 0 + PEPit.NEXT_ID[] = 0 pep = PEP() A = Point() @@ -20,11 +20,11 @@ using Test @test A.counter == 0 @test B.counter == 1 @test C.counter === nothing - @test Point_counter[] == 2 + @test PEPit.Point_counter[] == 2 D = Point() @test D.counter == 2 - @test Point_counter[] == 3 + @test PEPit.Point_counter[] == 3 new_point = (-A) * 1.0 + 2 * B - (B / 5)