1+ #=
2+ Compressed LBFGS implementation from:
3+ REPRESENTATIONS OF QUASI-NEWTON MATRICES AND THEIR USE IN LIMITED MEMORY METHODS
4+ Richard H. Byrd, Jorge Nocedal and Robert B. Schnabel (1994)
5+ DOI: 10.1007/BF01582063
6+
7+ Implemented by Paul Raynaud (supervised by Dominique Orban)
8+ =#
9+
10+ using LinearAlgebra, LinearAlgebra. BLAS
11+ using Requires
12+
13+ export CompressedLBFGSOperator, CompressedLBFGSData
14+ # export default_matrix_type, default_vector_type
15+
16+ default_matrix_type (; T:: DataType = Float64) = Matrix{T}
17+ default_vector_type (; T:: DataType = Float64) = Vector{T}
18+
19+ @init begin
20+ @require CUDA = " 052768ef-5323-5732-b1bb-66c8b64840ba" begin
21+ default_matrix_type (; T:: DataType = Float64) = CUDA. functional () ? CUDA. CuMatrix{T, CUDA. Mem. DeviceBuffer} : Matrix{T}
22+ default_vector_type (; T:: DataType = Float64) = CUDA. functional () ? CUDA. CuVector{T, CUDA. Mem. DeviceBuffer} : Vector{T}
23+ end
24+ # this scheme may be extended to other GPU backend modules
25+ end
26+
27+ function columnshift! (A:: AbstractMatrix{T} ; direction:: Int = - 1 , indicemax:: Int = size (A)[1 ]) where T
28+ map (i-> view (A,:,i+ direction) .= view (A,:,i), 1 - direction: indicemax)
29+ return A
30+ end
31+
32+ function vectorshift! (v:: AbstractVector{T} ; direction:: Int = - 1 , indicemax:: Int = length (v)) where T
33+ view (v, 1 : indicemax+ direction) .= view (v,1 - direction: indicemax)
34+ return v
35+ end
36+
37+ """
38+ CompressedLBFGSData{T, M<:AbstractMatrix{T}, V<:AbstractVector{T}}
39+
40+ A LBFGS limited-memory operator.
41+ It represents a linear application Rⁿˣⁿ, considering at most `mem` BFGS updates.
42+ This implementation considers the bloc matrices reoresentation of the BFGS (forward) update.
43+ It follows the algorithm described in [REPRESENTATIONS OF QUASI-NEWTON MATRICES AND THEIR USE IN LIMITED MEMORY METHODS](https://link.springer.com/article/10.1007/BF01582063) from Richard H. Byrd, Jorge Nocedal and Robert B. Schnabel (1994).
44+ This operator considers several fields directly related to the bloc representation of the operator:
45+ - `mem`: the maximal memory of the operator;
46+ - `n`: the dimension of the linear application;
47+ - `k`: the current memory's size of the operator;
48+ - `α`: scalar for `B₀ = α I`;
49+ - `Sₖ`: retain the `k`-th last vectors `s` from the updates parametrized by `(s,y)`;
50+ - `Yₖ`: retain the `k`-th last vectors `y` from the updates parametrized by `(s,y)`;;
51+ - `Dₖ`: a diagonal matrix mandatory to perform the linear application and to form the matrix;
52+ - `Lₖ`: a lower diagonal mandatory to perform the linear application and to form the matrix.
53+ In addition to this structures which are circurlarly update when `k` reaches `mem`, we consider other intermediate data structures renew at each update:
54+ - `chol_matrix`: a matrix required to store a Cholesky factorization of a Rᵏˣᵏ matrix;
55+ - `intermediate_1`: a R²ᵏˣ²ᵏ matrix;
56+ - `intermediate_2`: a R²ᵏˣ²ᵏ matrix;
57+ - `inverse_intermediate_1`: a R²ᵏˣ²ᵏ matrix;
58+ - `inverse_intermediate_2`: a R²ᵏˣ²ᵏ matrix;
59+ - `intermediary_vector`: a vector ∈ Rᵏ to store intermediate solutions;
60+ - `sol`: a vector ∈ Rᵏ to store intermediate solutions;
61+ This implementation is designed to work either on CPU or GPU.
62+ """
63+ mutable struct CompressedLBFGSData{T, M<: AbstractMatrix{T} , V<: AbstractVector{T} , I <: Integer }
64+ mem:: Int # memory of the operator
65+ n:: I # vector size
66+ k:: I # k ≤ mem, active memory of the operator
67+ α:: T # B₀ = αI
68+ Sₖ:: M # gather all sₖ₋ₘ : n * mem
69+ Yₖ:: M # gather all yₖ₋ₘ : n * mem
70+ Dₖ:: Diagonal{T,V} # mem * mem
71+ Lₖ:: LowerTriangular{T,M} # mem * mem
72+
73+ chol_matrix:: M # 2mem * 2mem
74+ intermediate_diagonal:: Diagonal{T,V} # mem * mem
75+ intermediate_1:: UpperTriangular{T,M} # 2mem * 2mem
76+ intermediate_2:: LowerTriangular{T,M} # 2mem * 2mem
77+ inverse_intermediate_1:: UpperTriangular{T,M} # 2mem * 2mem
78+ inverse_intermediate_2:: LowerTriangular{T,M} # 2mem * 2mem
79+ intermediary_vector:: V # 2mem
80+ sol:: V # 2mem
81+
82+ nprod:: I
83+ end
84+
85+ """
86+ CompressedLBFGSData(n::Int; [T=Float64, mem=5], gpu:Bool)
87+
88+ A implementation of a LBFGS operator (forward), representing a `nxn` linear application.
89+ It considers at most `k` BFGS iterates, and fit the architecture depending if it is launched on a CPU or a GPU.
90+ """
91+ function CompressedLBFGSData (n:: I ; mem:: I = 5 , T= Float64, M= default_matrix_type (; T), V= default_vector_type (; T)) where {I<: Integer }
92+ α = (T)(1 )
93+ k = 0
94+ Sₖ = M (undef, n, mem)
95+ Yₖ = M (undef, n, mem)
96+ Dₖ = Diagonal (V (undef, mem))
97+ Lₖ = LowerTriangular (M (undef, mem, mem))
98+ Lₖ. data .= zero (T)
99+
100+ chol_matrix = M (undef, mem, mem)
101+ intermediate_diagonal = Diagonal (V (undef, mem))
102+ intermediate_1 = UpperTriangular (M (undef, 2 * mem, 2 * mem))
103+ intermediate_2 = LowerTriangular (M (undef, 2 * mem, 2 * mem))
104+ inverse_intermediate_1 = UpperTriangular (M (undef, 2 * mem, 2 * mem))
105+ inverse_intermediate_2 = LowerTriangular (M (undef, 2 * mem, 2 * mem))
106+ intermediary_vector = V (undef, 2 * mem)
107+ sol = V (undef, 2 * mem)
108+
109+ nprod = 0
110+
111+ return CompressedLBFGSData {T,M,V,I} (mem, n, k, α, Sₖ, Yₖ, Dₖ, Lₖ, chol_matrix, intermediate_diagonal, intermediate_1, intermediate_2, inverse_intermediate_1, inverse_intermediate_2, intermediary_vector, sol, nprod)
112+ end
113+
114+ mutable struct CompressedLBFGSOperator{T, M<: AbstractMatrix{T} , V<: AbstractVector{T} , F, I <: Integer } <: AbstractQuasiNewtonOperator{T}
115+ nrow:: I
116+ ncol:: I
117+ symmetric:: Bool
118+ hermitian:: Bool
119+ Bv:: V
120+ data:: CompressedLBFGSData{T,M,V}
121+ prod!:: F # apply the operator to a vector
122+ tprod!:: F # apply the transpose operator to a vector
123+ ctprod!:: F # apply the transpose conjugate operator to a vector
124+ end
125+
126+ function CompressedLBFGSOperator (n:: I ; mem:: I = 5 , T= Float64, M= default_matrix_type (; T), V= default_vector_type (; T)) where {I <: Integer }
127+ nrow = n
128+ ncol = n
129+ symmetric = true
130+ hermitian = true
131+ Bv = V (undef, n)
132+ data = CompressedLBFGSData (n; mem, T, M, V)
133+
134+ prod! = @closure (res, v, α, β) -> begin
135+ mul! (Bv, data, v)
136+ if β == zero (T)
137+ res .= α .* Bv
138+ else
139+ res .= α .* Bv .+ β .* res
140+ end
141+ end
142+
143+ F = typeof (prod!)
144+
145+ return CompressedLBFGSOperator {T,M,V,F,I} (nrow, ncol, symmetric, hermitian, Bv, data, prod!, prod!, prod!)
146+ end
147+
148+ has_args5 (op:: CompressedLBFGSOperator ) = true
149+ use_prod5! (op:: CompressedLBFGSOperator ) = true
150+
151+ Base. push! (op:: CompressedLBFGSOperator{T,M,V} , s:: V , y:: V ) where {T,M,V<: AbstractVector{T} } = Base. push! (op. data, s, y)
152+ function Base. push! (data:: CompressedLBFGSData{T,M,V} , s:: V , y:: V ) where {T,M,V<: AbstractVector{T} }
153+ if data. k < data. mem # still some place in the structures
154+ data. k += 1
155+ view (data. Sₖ, :, data. k) .= s
156+ view (data. Yₖ, :, data. k) .= y
157+ view (data. Dₖ. diag, data. k) .= dot (s, y)
158+ mul! (view (data. Lₖ. data, data. k, 1 : data. k- 1 ), transpose (view (data. Yₖ, :, 1 : data. k- 1 )), view (data. Sₖ, :, data. k) )
159+ else # k == mem update circurlarly the intermediary structures
160+ columnshift! (data. Sₖ; indicemax= data. k)
161+ columnshift! (data. Yₖ; indicemax= data. k)
162+ # data.Dₖ .= circshift(data.Dₖ, (-1, -1))
163+ vectorshift! (data. Dₖ. diag; indicemax= data. k)
164+ view (data. Sₖ, :, data. k) .= s
165+ view (data. Yₖ, :, data. k) .= y
166+ view (data. Dₖ. diag, data. k) .= dot (s, y)
167+
168+ map (i-> view (data. Lₖ, i: data. mem- 1 , i- 1 ) .= view (data. Lₖ, i+ 1 : data. mem, i), 2 : data. mem)
169+ mul! (view (data. Lₖ. data, data. k, 1 : data. k- 1 ), transpose (view (data. Yₖ, :, 1 : data. k- 1 )), view (data. Sₖ, :, data. k) )
170+ end
171+
172+ # step 4 and 6
173+ precompile_iterated_structure! (data)
174+
175+ # secant equation fails if uncommented
176+ # data.α = dot(y,s)/dot(s,s)
177+ return data
178+ end
179+
180+ # Algorithm 3.2 (p15)
181+ # Theorem 2.3 (p6)
182+ Base. Matrix (op:: CompressedLBFGSOperator{T,M,V} ) where {T,M,V} = Base. Matrix (op. data)
183+ function Base. Matrix (data:: CompressedLBFGSData{T,M,V} ) where {T,M,V}
184+ B₀ = M (undef, data. n, data. n)
185+ map (i -> B₀[i, i] = data. α, 1 : data. n)
186+
187+ BSY = M (undef, data. n, 2 * data. k)
188+ (data. k > 0 ) && (BSY[:, 1 : data. k] = B₀ * data. Sₖ[:, 1 : data. k])
189+ (data. k > 0 ) && (BSY[:, data. k+ 1 : 2 * data. k] = data. Yₖ[:, 1 : data. k])
190+ _C = M (undef, 2 * data. k, 2 * data. k)
191+ (data. k > 0 ) && (_C[1 : data. k, 1 : data. k] .= transpose (data. Sₖ[:, 1 : data. k]) * data. Sₖ[:, 1 : data. k])
192+ (data. k > 0 ) && (_C[1 : data. k, data. k+ 1 : 2 * data. k] .= data. Lₖ[1 : data. k, 1 : data. k])
193+ (data. k > 0 ) && (_C[data. k+ 1 : 2 * data. k, 1 : data. k] .= transpose (data. Lₖ[1 : data. k, 1 : data. k]))
194+ (data. k > 0 ) && (_C[data. k+ 1 : 2 * data. k, data. k+ 1 : 2 * data. k] .- = data. Dₖ[1 : data. k, 1 : data. k])
195+ C = inv (_C)
196+
197+ Bₖ = B₀ .- BSY * C * transpose (BSY)
198+ return Bₖ
199+ end
200+
201+ # Algorithm 3.2 (p15)
202+ # step 4, Jₖ is computed only if needed
203+ function inverse_cholesky (data:: CompressedLBFGSData{T,M,V} ) where {T,M,V}
204+ view (data. intermediate_diagonal. diag, 1 : data. k) .= inv .(view (data. Dₖ. diag, 1 : data. k))
205+
206+ mul! (view (data. inverse_intermediate_1, 1 : data. k, 1 : data. k), view (data. intermediate_diagonal, 1 : data. k, 1 : data. k), transpose (view (data. Lₖ, 1 : data. k, 1 : data. k)))
207+ mul! (view (data. chol_matrix, 1 : data. k, 1 : data. k), view (data. Lₖ, 1 : data. k, 1 : data. k), view (data. inverse_intermediate_1, 1 : data. k, 1 : data. k))
208+
209+ mul! (view (data. chol_matrix, 1 : data. k, 1 : data. k), transpose (view (data. Sₖ, :, 1 : data. k)), view (data. Sₖ, :, 1 : data. k), data. α, (T)(1 ))
210+
211+ cholesky! (Symmetric (view (data. chol_matrix, 1 : data. k, 1 : data. k)))
212+ Jₖ = transpose (UpperTriangular (view (data. chol_matrix, 1 : data. k, 1 : data. k)))
213+ return Jₖ
214+ end
215+
216+ # step 6, must be improve
217+ function precompile_iterated_structure! (data:: CompressedLBFGSData )
218+ Jₖ = inverse_cholesky (data)
219+
220+ # constant update
221+ view (data. intermediate_1, data. k+ 1 : 2 * data. k, 1 : data. k) .= 0
222+ view (data. intermediate_2, 1 : data. k, data. k+ 1 : 2 * data. k) .= 0
223+ view (data. intermediate_1, data. k+ 1 : 2 * data. k, data. k+ 1 : 2 * data. k) .= transpose (Jₖ)
224+ view (data. intermediate_2, data. k+ 1 : 2 * data. k, data. k+ 1 : 2 * data. k) .= Jₖ
225+
226+ # updates related to D^(1/2)
227+ view (data. intermediate_diagonal. diag, 1 : data. k) .= sqrt .(view (data. Dₖ. diag, 1 : data. k))
228+ view (data. intermediate_1, 1 : data. k,1 : data. k) .= .- view (data. intermediate_diagonal, 1 : data. k, 1 : data. k)
229+ view (data. intermediate_2, 1 : data. k, 1 : data. k) .= view (data. intermediate_diagonal, 1 : data. k, 1 : data. k)
230+
231+ # updates related to D^(-1/2)
232+ view (data. intermediate_diagonal. diag, 1 : data. k) .= (x -> 1 / sqrt (x)). (view (data. Dₖ. diag, 1 : data. k))
233+ mul! (view (data. intermediate_1, 1 : data. k,data. k+ 1 : 2 * data. k), view (data. intermediate_diagonal, 1 : data. k, 1 : data. k), transpose (view (data. Lₖ, 1 : data. k, 1 : data. k)))
234+ mul! (view (data. intermediate_2, data. k+ 1 : 2 * data. k, 1 : data. k), view (data. Lₖ, 1 : data. k, 1 : data. k), view (data. intermediate_diagonal, 1 : data. k, 1 : data. k))
235+ view (data. intermediate_2, data. k+ 1 : 2 * data. k, 1 : data. k) .= view (data. intermediate_2, data. k+ 1 : 2 * data. k, 1 : data. k) .* - 1
236+
237+ view (data. inverse_intermediate_1, 1 : 2 * data. k, 1 : 2 * data. k) .= inv (data. intermediate_1[1 : 2 * data. k, 1 : 2 * data. k])
238+ view (data. inverse_intermediate_2, 1 : 2 * data. k, 1 : 2 * data. k) .= inv (data. intermediate_2[1 : 2 * data. k, 1 : 2 * data. k])
239+ end
240+
241+ # Algorithm 3.2 (p15)
242+ LinearAlgebra. mul! (Bv:: V , op:: CompressedLBFGSOperator{T,M,V} , v:: V ) where {T,M,V<: AbstractVector{T} } = LinearAlgebra. mul! (Bv, op. data, v)
243+ function LinearAlgebra. mul! (Bv:: V , data:: CompressedLBFGSData{T,M,V} , v:: V ) where {T,M,V<: AbstractVector{T} }
244+ data. nprod += 1
245+ # step 1-4 and 6 mainly done by Base.push!
246+ # step 5
247+ mul! (view (data. sol, 1 : data. k), transpose (view (data. Yₖ, :, 1 : data. k)), v)
248+ mul! (view (data. sol, data. k+ 1 : 2 * data. k), transpose (view (data. Sₖ, :, 1 : data. k)), v)
249+ # scal!(data.α, view(data.sol, data.k+1:2*data.k)) # more allocation, slower
250+ view (data. sol, data. k+ 1 : 2 * data. k) .*= data. α
251+
252+ mul! (view (data. intermediary_vector, 1 : 2 * data. k), view (data. inverse_intermediate_2, 1 : 2 * data. k, 1 : 2 * data. k), view (data. sol, 1 : 2 * data. k))
253+ mul! (view (data. sol, 1 : 2 * data. k), view (data. inverse_intermediate_1, 1 : 2 * data. k, 1 : 2 * data. k), view (data. intermediary_vector, 1 : 2 * data. k))
254+
255+ # step 7
256+ mul! (Bv, view (data. Yₖ, :, 1 : data. k), view (data. sol, 1 : data. k))
257+ mul! (Bv, view (data. Sₖ, :, 1 : data. k), view (data. sol, data. k+ 1 : 2 * data. k), - data. α, (T)(- 1 ))
258+ Bv .+ = data. α .* v
259+ return Bv
260+ end
261+
262+ """
263+ reset!(op)
264+
265+ Resets the CompressedLBFGS data of the given operator.
266+ """
267+ function reset! (op:: CompressedLBFGSOperator )
268+ op. data. nprod = 0
269+ return op
270+ end
0 commit comments