Skip to content

Commit d9cd737

Browse files
Add an upper bound of the operator norm for limited-memory qn (#395)
1 parent 022913e commit d9cd737

File tree

4 files changed

+41
-2
lines changed

4 files changed

+41
-2
lines changed

src/lbfgs.jl

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,14 @@ mutable struct LBFGSData{T, I <: Integer}
88
damped::Bool
99
σ₂::T
1010
σ₃::T
11+
opnorm_upper_bound::T # Upper bound for the operator norm ‖Bₖ‖₂ ≤ ‖B₀‖₂ + ∑ᵢ ‖bᵢ‖₂²
1112
s::Vector{Vector{T}}
1213
y::Vector{Vector{T}}
1314
ys::Vector{T}
1415
α::Vector{T}
1516
a::Vector{Vector{T}}
1617
b::Vector{Vector{T}}
18+
norm_b::Vector{T}
1719
insert::I
1820
Ax::Vector{T}
1921
shifted_p::Matrix{T} # Temporary matrix used in the computation solve_shifted_system!
@@ -38,12 +40,14 @@ function LBFGSData(
3840
damped,
3941
convert(T, σ₂),
4042
convert(T, σ₃),
43+
convert(T, 1),
4144
[zeros(T, n) for _ = 1:mem],
4245
[zeros(T, n) for _ = 1:mem],
4346
zeros(T, mem),
4447
inverse ? zeros(T, mem) : zeros(T, 0),
4548
inverse ? Vector{T}(undef, 0) : [zeros(T, n) for _ = 1:mem],
4649
inverse ? Vector{T}(undef, 0) : [zeros(T, n) for _ = 1:mem],
50+
inverse ? Vector{T}(undef, 0) : zeros(T, mem),
4751
1,
4852
Vector{T}(undef, n),
4953
Array{T}(undef, (n, 2 * mem)),
@@ -217,11 +221,18 @@ function push_common!(
217221
data.s[insert] .= s
218222
data.y[insert] .= y
219223
data.ys[insert] = ys
220-
op.data.scaling && (op.data.scaling_factor = ys / dot(y, y))
224+
if op.data.scaling
225+
!iszero(data.scaling_factor) && (data.opnorm_upper_bound -= 1 / op.data.scaling_factor)
226+
op.data.scaling_factor = ys / dot(y, y)
227+
!iszero(data.scaling_factor) && (data.opnorm_upper_bound += 1 / op.data.scaling_factor)
228+
end
221229

222230
# Update arrays a and b used in forward products.
223231
if !op.inverse
232+
data.opnorm_upper_bound -= data.norm_b[insert]
224233
data.b[insert] .= y ./ sqrt(ys)
234+
data.norm_b[insert] = norm(data.b[insert])
235+
data.opnorm_upper_bound += data.norm_b[insert]
225236

226237
@inbounds for i = 1:(data.mem)
227238
k = mod(insert + i - 1, data.mem) + 1

src/lsr1.jl

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ mutable struct LSR1Data{T, I <: Integer}
55
mem::I
66
scaling::Bool
77
scaling_factor::T
8+
opnorm_upper_bound::T # Upper bound for the operator norm ‖Bₖ‖₂ ≤ ‖B₀‖₂ + ∑ᵢ |σᵢ|‖aᵢ‖₂².
89
s::Vector{Vector{T}}
910
y::Vector{Vector{T}}
1011
ys::Vector{T}
@@ -20,6 +21,7 @@ function LSR1Data(T::Type, n::I; mem::I = 5, scaling::Bool = true) where {I <: I
2021
max(mem, 1),
2122
scaling,
2223
convert(T, 1),
24+
convert(T, 1),
2325
[zeros(T, n) for _ = 1:mem],
2426
[zeros(T, n) for _ = 1:mem],
2527
zeros(T, mem),
@@ -152,7 +154,11 @@ function push!(op::LSR1Operator, s::AbstractVector, y::AbstractVector)
152154
data.ys[data.insert] = ys
153155

154156
# update scaling factor
155-
data.scaling && (data.scaling_factor = ys / yy)
157+
data.opnorm_upper_bound = convert(typeof(data.opnorm_upper_bound), 1)
158+
if data.scaling
159+
(data.scaling_factor = ys / yy)
160+
!iszero(data.scaling_factor) && (data.opnorm_upper_bound = 1 / abs(op.data.scaling_factor))
161+
end
156162

157163
# update next insertion position
158164
data.insert = mod(data.insert, data.mem) + 1
@@ -170,6 +176,8 @@ function push!(op::LSR1Operator, s::AbstractVector, y::AbstractVector)
170176
end
171177
end
172178
data.as[k] = dot(data.a[k], data.s[k])
179+
180+
!iszero(data.as[k]) && (data.opnorm_upper_bound += norm(data.a[k])^2/abs(data.as[k]))
173181
end
174182
end
175183

test/test_lbfgs.jl

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ function test_lbfgs()
6565
@test H.data.scaling_factor == 1.0
6666
@test norm(B * v - v) < rtol
6767
@test norm(H * v - v) < rtol
68+
69+
# test upper bound
70+
@test opnorm(Matrix(B)) B.data.opnorm_upper_bound
6871
end
6972

7073
# test against full BFGS without scaling
@@ -95,6 +98,9 @@ function test_lbfgs()
9598
@test norm(diag(LB) - diag(B)) < rtol * norm(diag(B))
9699
end
97100

101+
# Test upper bound
102+
@test opnorm(B) LB.data.opnorm_upper_bound
103+
98104
# test damped L-BFGS
99105
B = LBFGSOperator(n, mem = mem, damped = true, scaling = false, σ₂ = 0.8, σ₃ = Inf)
100106
H = InverseLBFGSOperator(n, mem = mem, damped = true, scaling = false, σ₂ = 0.8, σ₃ = Inf)
@@ -129,6 +135,10 @@ function test_lbfgs()
129135

130136
@test norm(Matrix(H * B) - Matrix(1.0I, n, n)) <= rtol
131137

138+
# Test upper bound
139+
@test opnorm(Matrix(B)) B.data.opnorm_upper_bound
140+
141+
132142
# test against full BFGS without scaling
133143
mem = n
134144
LB = LBFGSOperator(n, mem = mem, damped = true, scaling = false)
@@ -145,6 +155,10 @@ function test_lbfgs()
145155
@test norm(Matrix(LB) - B) < rtol * norm(B)
146156
@test norm(diag(LB) - diag(B)) < rtol * norm(diag(B))
147157
end
158+
159+
# Test upper bound
160+
@test opnorm(Matrix(B)) LB.data.opnorm_upper_bound
161+
148162
end
149163

150164
@testset ExtendedTestSet "Different precision" begin

test/test_lsr1.jl

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ function test_lsr1()
3535
reset!(B)
3636
@test B.data.scaling_factor == 1.0
3737
@test norm(B * v - v) < rtol
38+
39+
# Test upper bound
40+
@test opnorm(Matrix(B)) B.data.opnorm_upper_bound
3841
end
3942

4043
# test against full SR1 without scaling
@@ -63,6 +66,9 @@ function test_lsr1()
6366
@test norm(Matrix(LB) - B) < rtol * norm(B)
6467
@test norm(diag(LB) - diag(B)) < rtol * norm(diag(B))
6568
end
69+
70+
# Test upper bound
71+
@test opnorm(B) LB.data.opnorm_upper_bound
6672
end
6773

6874
@testset ExtendedTestSet "Different precision" begin

0 commit comments

Comments
 (0)