diff --git a/src/lbfgs.jl b/src/lbfgs.jl index e933b7b9..c4708d88 100644 --- a/src/lbfgs.jl +++ b/src/lbfgs.jl @@ -8,12 +8,14 @@ mutable struct LBFGSData{T, I <: Integer} damped::Bool σ₂::T σ₃::T + opnorm_upper_bound::T # Upper bound for the operator norm ‖Bₖ‖₂ ≤ ‖B₀‖₂ + ∑ᵢ ‖bᵢ‖₂² s::Vector{Vector{T}} y::Vector{Vector{T}} ys::Vector{T} α::Vector{T} a::Vector{Vector{T}} b::Vector{Vector{T}} + norm_b::Vector{T} insert::I Ax::Vector{T} shifted_p::Matrix{T} # Temporary matrix used in the computation solve_shifted_system! @@ -38,12 +40,14 @@ function LBFGSData( damped, convert(T, σ₂), convert(T, σ₃), + convert(T, 1), [zeros(T, n) for _ = 1:mem], [zeros(T, n) for _ = 1:mem], zeros(T, mem), inverse ? zeros(T, mem) : zeros(T, 0), inverse ? Vector{T}(undef, 0) : [zeros(T, n) for _ = 1:mem], inverse ? Vector{T}(undef, 0) : [zeros(T, n) for _ = 1:mem], + inverse ? Vector{T}(undef, 0) : zeros(T, mem), 1, Vector{T}(undef, n), Array{T}(undef, (n, 2 * mem)), @@ -217,11 +221,18 @@ function push_common!( data.s[insert] .= s data.y[insert] .= y data.ys[insert] = ys - op.data.scaling && (op.data.scaling_factor = ys / dot(y, y)) + if op.data.scaling + !iszero(data.scaling_factor) && (data.opnorm_upper_bound -= 1 / op.data.scaling_factor) + op.data.scaling_factor = ys / dot(y, y) + !iszero(data.scaling_factor) && (data.opnorm_upper_bound += 1 / op.data.scaling_factor) + end # Update arrays a and b used in forward products. if !op.inverse + data.opnorm_upper_bound -= data.norm_b[insert] data.b[insert] .= y ./ sqrt(ys) + data.norm_b[insert] = norm(data.b[insert]) + data.opnorm_upper_bound += data.norm_b[insert] @inbounds for i = 1:(data.mem) k = mod(insert + i - 1, data.mem) + 1 diff --git a/src/lsr1.jl b/src/lsr1.jl index 7319fb29..87960893 100644 --- a/src/lsr1.jl +++ b/src/lsr1.jl @@ -5,6 +5,7 @@ mutable struct LSR1Data{T, I <: Integer} mem::I scaling::Bool scaling_factor::T + opnorm_upper_bound::T # Upper bound for the operator norm ‖Bₖ‖₂ ≤ ‖B₀‖₂ + ∑ᵢ |σᵢ|‖aᵢ‖₂². s::Vector{Vector{T}} y::Vector{Vector{T}} ys::Vector{T} @@ -20,6 +21,7 @@ function LSR1Data(T::Type, n::I; mem::I = 5, scaling::Bool = true) where {I <: I max(mem, 1), scaling, convert(T, 1), + convert(T, 1), [zeros(T, n) for _ = 1:mem], [zeros(T, n) for _ = 1:mem], zeros(T, mem), @@ -152,7 +154,11 @@ function push!(op::LSR1Operator, s::AbstractVector, y::AbstractVector) data.ys[data.insert] = ys # update scaling factor - data.scaling && (data.scaling_factor = ys / yy) + data.opnorm_upper_bound = convert(typeof(data.opnorm_upper_bound), 1) + if data.scaling + (data.scaling_factor = ys / yy) + !iszero(data.scaling_factor) && (data.opnorm_upper_bound = 1 / abs(op.data.scaling_factor)) + end # update next insertion position data.insert = mod(data.insert, data.mem) + 1 @@ -170,6 +176,8 @@ function push!(op::LSR1Operator, s::AbstractVector, y::AbstractVector) end end data.as[k] = dot(data.a[k], data.s[k]) + + !iszero(data.as[k]) && (data.opnorm_upper_bound += norm(data.a[k])^2/abs(data.as[k])) end end diff --git a/test/test_lbfgs.jl b/test/test_lbfgs.jl index 250cec9a..4c2d4bd3 100644 --- a/test/test_lbfgs.jl +++ b/test/test_lbfgs.jl @@ -65,6 +65,9 @@ function test_lbfgs() @test H.data.scaling_factor == 1.0 @test norm(B * v - v) < rtol @test norm(H * v - v) < rtol + + # test upper bound + @test opnorm(Matrix(B)) ≤ B.data.opnorm_upper_bound end # test against full BFGS without scaling @@ -95,6 +98,9 @@ function test_lbfgs() @test norm(diag(LB) - diag(B)) < rtol * norm(diag(B)) end + # Test upper bound + @test opnorm(B) ≤ LB.data.opnorm_upper_bound + # test damped L-BFGS B = LBFGSOperator(n, mem = mem, damped = true, scaling = false, σ₂ = 0.8, σ₃ = Inf) H = InverseLBFGSOperator(n, mem = mem, damped = true, scaling = false, σ₂ = 0.8, σ₃ = Inf) @@ -129,6 +135,10 @@ function test_lbfgs() @test norm(Matrix(H * B) - Matrix(1.0I, n, n)) <= rtol + # Test upper bound + @test opnorm(Matrix(B)) ≤ B.data.opnorm_upper_bound + + # test against full BFGS without scaling mem = n LB = LBFGSOperator(n, mem = mem, damped = true, scaling = false) @@ -145,6 +155,10 @@ function test_lbfgs() @test norm(Matrix(LB) - B) < rtol * norm(B) @test norm(diag(LB) - diag(B)) < rtol * norm(diag(B)) end + + # Test upper bound + @test opnorm(Matrix(B)) ≤ LB.data.opnorm_upper_bound + end @testset ExtendedTestSet "Different precision" begin diff --git a/test/test_lsr1.jl b/test/test_lsr1.jl index 1698d542..85512d30 100644 --- a/test/test_lsr1.jl +++ b/test/test_lsr1.jl @@ -35,6 +35,9 @@ function test_lsr1() reset!(B) @test B.data.scaling_factor == 1.0 @test norm(B * v - v) < rtol + + # Test upper bound + @test opnorm(Matrix(B)) ≤ B.data.opnorm_upper_bound end # test against full SR1 without scaling @@ -63,6 +66,9 @@ function test_lsr1() @test norm(Matrix(LB) - B) < rtol * norm(B) @test norm(diag(LB) - diag(B)) < rtol * norm(diag(B)) end + + # Test upper bound + @test opnorm(B) ≤ LB.data.opnorm_upper_bound end @testset ExtendedTestSet "Different precision" begin