|
| 1 | +# Copyright 2017, Iain Dunning, Joey Huchette, Miles Lubin, and contributors #src |
| 2 | +# This Source Code Form is subject to the terms of the Mozilla Public License #src |
| 3 | +# v.2.0. If a copy of the MPL was not distributed with this file, You can #src |
| 4 | +# obtain one at https://mozilla.org/MPL/2.0/. #src |
| 5 | + |
| 6 | +# # Progressive Hedging |
| 7 | + |
| 8 | +# The purpose of this tutorial is to demonstrate the Progressive Hedging |
| 9 | +# algorithm. It may be helpful to read [Two-stage stochastic programs](@ref) |
| 10 | +# first. |
| 11 | + |
| 12 | +# ## Required packages |
| 13 | + |
| 14 | +# This tutorial requires the following packages: |
| 15 | + |
| 16 | +using JuMP |
| 17 | +import Distributions |
| 18 | +import Ipopt |
| 19 | +import Printf |
| 20 | + |
| 21 | +# ## Background |
| 22 | + |
| 23 | +# Progressive Hedging (PH) is a popular decomposition algorithm for stochastic |
| 24 | +# programming. It decomposes a stochastic problem into scenario subproblems that |
| 25 | +# are solved iteratively, with penalty terms driving solutions toward consensus. |
| 26 | + |
| 27 | +# In Progressive Hedging, each scenario subproblem includes a quadratic penalty |
| 28 | +# term: |
| 29 | +# ```math |
| 30 | +# \min\limits_{x_s}: f_s(x_s) + \frac{\rho}{2} ||x_s - \bar{x}||^2 + w_s^\top x_s |
| 31 | +# ``` |
| 32 | +# where: |
| 33 | +# - ``x_s`` is the primal variable in scenario ``s`` |
| 34 | +# - ``f_s(x)`` is the original scenario objective |
| 35 | +# - ``\rho`` is the penalty parameter |
| 36 | +# - ``\bar{x}`` is the current consensus (average) solution |
| 37 | +# - ``w_s`` is the dual price (Lagrangian multiplier) in scenario ``s`` |
| 38 | + |
| 39 | +# Progressive Hedging is an iterative algorithm. In each iteration, it solves |
| 40 | +# all the penalized scenario subproblems, then it applies two updates: |
| 41 | +# |
| 42 | +# 1. ``\bar{x} = \mathbb{E}_s[x_s]`` |
| 43 | +# 2. ``w_s = w_s + \rho (x_s - \bar{x})`` |
| 44 | +# |
| 45 | +# The algorithm terminates if $|\bar{x} - x_s| \le \varepsilon$ for all |
| 46 | +# scenarios (the primal residual), and $\bar{x}$ has not changed by much between |
| 47 | +# iterations (the dual residual). |
| 48 | + |
| 49 | +# ``\rho`` can be optionally updated between iterations. How to do so is an open |
| 50 | +# question. There is a large literature on different updates strategies. |
| 51 | + |
| 52 | +# In this tutorial we use parameters for $\rho$, $w$, and $\bar{x}$ to |
| 53 | +# efficiently modify each scenario's subproblem between PH iterations. |
| 54 | + |
| 55 | +# ## Building a single scenario |
| 56 | + |
| 57 | +# The building block of Progressive Hedging is a separate JuMP model for each |
| 58 | +# scenario. Here's an example, using the problem from |
| 59 | +# [Two-stage stochastic programs](@ref): |
| 60 | + |
| 61 | +function build_subproblem(; demand::Float64) |
| 62 | + model = Model(Ipopt.Optimizer) |
| 63 | + set_silent(model) |
| 64 | + @variable(model, x >= 0) |
| 65 | + @variable(model, 0 <= y <= demand) |
| 66 | + @constraint(model, y <= x) |
| 67 | + @variable(model, ρ in Parameter(1)) |
| 68 | + @variable(model, x̄ in Parameter(0)) |
| 69 | + @variable(model, w in Parameter(0)) |
| 70 | + @expression(model, f_s, 2 * x - 5 * y + 0.1 * (x - y)) |
| 71 | + @objective(model, Min, f_s + ρ / 2 * (x - x̄)^2 + w * x) |
| 72 | + return model |
| 73 | +end |
| 74 | + |
| 75 | +# Using the `build_subproblem` function, we can create one JuMP model for each |
| 76 | +# scenario: |
| 77 | + |
| 78 | +N = 10 |
| 79 | +demands = rand(Distributions.TriangularDist(150.0, 250.0, 200.0), N); |
| 80 | +subproblems = map(demands) do demand |
| 81 | + return (; model = build_subproblem(; demand), probability = 1 / N) |
| 82 | +end; |
| 83 | + |
| 84 | +# ## The Progressive Hedging loop |
| 85 | + |
| 86 | +# We're almost ready for our optimization loop, but first, here's a helpful |
| 87 | +# function for logging: |
| 88 | + |
| 89 | +function print_iteration(iter, args...) |
| 90 | + if mod(iter, 10) == 0 |
| 91 | + f(x) = Printf.@sprintf("%15.4e", x) |
| 92 | + println(lpad(iter, 9), " ", join(f.(args), " ")) |
| 93 | + end |
| 94 | + return |
| 95 | +end |
| 96 | + |
| 97 | +# Now we can implement our algorithm: |
| 98 | + |
| 99 | +function solve_progressive_hedging( |
| 100 | + subproblems; |
| 101 | + iteration_limit::Int = 400, |
| 102 | + atol::Float64 = 1e-4, |
| 103 | + ρ::Float64 = 1.0, |
| 104 | +) |
| 105 | + x̄_old, x̄ = 0.0, 0.0 |
| 106 | + x, w = zeros(length(subproblems)), zeros(length(subproblems)) |
| 107 | + println("iteration primal_residual dual_residual") |
| 108 | + ## For each iteration... |
| 109 | + for iter in 1:iteration_limit |
| 110 | + ## For each subproblem... |
| 111 | + for (i, data) in enumerate(subproblems) |
| 112 | + ## Update the parameters |
| 113 | + set_parameter_value(data.model[:ρ], ρ) |
| 114 | + set_parameter_value(data.model[:x̄], x̄) |
| 115 | + set_parameter_value(data.model[:w], w[i]) |
| 116 | + ## Solve the subproblem |
| 117 | + optimize!(data.model) |
| 118 | + assert_is_solved_and_feasible(data.model) |
| 119 | + ## Store the primal solution |
| 120 | + x[i] = value(data.model[:x]) |
| 121 | + end |
| 122 | + ## Compute the consensus solution for the first-stage variables |
| 123 | + x̄ = sum(s.probability * x_s for (s, x_s) in zip(subproblems, x)) |
| 124 | + ## Compute the primal and dual residuals |
| 125 | + primal_residual = maximum(abs, x_s - x̄ for x_s in x) |
| 126 | + dual_residual = ρ * abs(x̄ - x̄_old) |
| 127 | + print_iteration(iter, primal_residual, dual_residual) |
| 128 | + ## Check for convergence |
| 129 | + if primal_residual < atol && dual_residual < atol |
| 130 | + break |
| 131 | + end |
| 132 | + ## Update |
| 133 | + x̄_old = x̄ |
| 134 | + w .+= ρ .* (x .- x̄) |
| 135 | + end |
| 136 | + return x̄ |
| 137 | +end |
| 138 | + |
| 139 | +x̄ = solve_progressive_hedging(subproblems); |
| 140 | + |
| 141 | +# The consensus first-stage decision is: |
| 142 | + |
| 143 | +x̄ |
| 144 | + |
| 145 | +# ## Progressive Hedging with an adaptive penalty parameter |
| 146 | + |
| 147 | +# You can also make the penalty parameter $\rho$ adaptive. How to do so is an |
| 148 | +# open question. There is a large literature on different updates strategies. |
| 149 | +# One approach is to increase $\rho$ if the primal residual is much larger than |
| 150 | +# the dual residual, and to decrease $\rho$ if the dual residual is much larger |
| 151 | +# than the primal residual. |
| 152 | + |
| 153 | +function solve_adaptive_progressive_hedging( |
| 154 | + subproblems; |
| 155 | + iteration_limit::Int = 400, |
| 156 | + atol::Float64 = 1e-4, |
| 157 | + ρ::Float64 = 1.0, |
| 158 | + τ::Float64 = 1.3, |
| 159 | + μ::Float64 = 15.0, |
| 160 | +) |
| 161 | + x̄_old, x̄ = 0.0, 0.0 |
| 162 | + x, w = zeros(length(subproblems)), zeros(length(subproblems)) |
| 163 | + println("iteration primal_residual dual_residual") |
| 164 | + for iter in 1:iteration_limit |
| 165 | + for (i, data) in enumerate(subproblems) |
| 166 | + set_parameter_value(data.model[:ρ], ρ) |
| 167 | + set_parameter_value(data.model[:x̄], x̄) |
| 168 | + set_parameter_value(data.model[:w], w[i]) |
| 169 | + optimize!(data.model) |
| 170 | + assert_is_solved_and_feasible(data.model) |
| 171 | + x[i] = value(data.model[:x]) |
| 172 | + end |
| 173 | + x̄ = sum(s.probability * x_s for (s, x_s) in zip(subproblems, x)) |
| 174 | + primal_residual = maximum(abs, x_s - x̄ for x_s in x) |
| 175 | + dual_residual = ρ * abs(x̄ - x̄_old) |
| 176 | + print_iteration(iter, primal_residual, dual_residual) |
| 177 | + if primal_residual < atol && dual_residual < atol |
| 178 | + break |
| 179 | + end |
| 180 | + w .+= ρ .* (x .- x̄) |
| 181 | + x̄_old = x̄ |
| 182 | + ## Adaptive ρ update |
| 183 | + if primal_residual > μ * dual_residual |
| 184 | + ρ *= τ |
| 185 | + elseif dual_residual > μ * primal_residual |
| 186 | + ρ /= τ |
| 187 | + end |
| 188 | + end |
| 189 | + return x̄ |
| 190 | +end |
| 191 | + |
| 192 | +x̄ = solve_adaptive_progressive_hedging(subproblems); |
| 193 | + |
| 194 | +# The consensus first-stage decision is: |
| 195 | + |
| 196 | +x̄ |
| 197 | + |
| 198 | +# Try tuning the values of `τ` and `μ`. Can you get the algorithm to converge |
| 199 | +# in fewer iterations? |
0 commit comments