@@ -140,102 +140,35 @@ end
140140function LinearAlgebra. mul! (y:: AbstractVector , B:: BlockFactorization , x:: AbstractVector , α:: Real = 1 , β:: Real = 0 )
141141 xx = [@view x[B. mindices[i] : B. mindices[i+ 1 ]- 1 ] for i in 1 : length (B. mindices)- 1 ]
142142 yy = [@view y[B. nindices[i] : B. nindices[i+ 1 ]- 1 ] for i in 1 : length (B. nindices)- 1 ]
143- strided = Val (false )
144- blockmul! (yy, B. A, xx, strided, α, β)
143+ blockmul! (yy, B. A, xx, α, β)
145144 return y
146145end
147146
148147function LinearAlgebra. mul! (Y:: AbstractMatrix , B:: BlockFactorization , X:: AbstractMatrix , α:: Real = 1 , β:: Real = 0 )
149148 XX = [@view X[B. mindices[i] : B. mindices[i+ 1 ]- 1 , :] for i in 1 : length (B. mindices)- 1 ]
150149 YY = [@view Y[B. nindices[i] : B. nindices[i+ 1 ]- 1 , :] for i in 1 : length (B. nindices)- 1 ]
151- strided = Val (false )
152- blockmul! (YY, B. A, XX, strided, α, β)
150+ blockmul! (YY, B. A, XX, α, β)
153151 return Y
154152end
155153
156154# carries out multiplication for general BlockFactorization
157- function blockmul! (y:: AbstractVecOfVecOrMat , G:: AbstractMatrix ,
158- x:: AbstractVecOfVecOrMat , strided:: Val{false} = Val (false ), α:: Real = 1 , β:: Real = 0 )
155+ function blockmul! (y:: AbstractVecOfVecOrMat , G:: AbstractMatrix , x:: AbstractVecOfVecOrMat , α:: Real = 1 , β:: Real = 0 )
159156 @threads for i in eachindex (y)
160157 @. y[i] = β * y[i]
161158 for j in eachindex (x)
162- Gij = G[i, j] # if it is not strided, we can't pre-allocate memory for blocks
163- mul! (y[i], Gij, x[j], α, 1 ) # woodbury still allocates here because of Diagonal
159+ mul! (y[i], G[i, j], x[j], α, 1 )
164160 end
165161 end
166162 return y
167163end
168164
169- # carries out block multiplication for strided block factorization
170- function LinearAlgebra. mul! (y:: AbstractVector , B:: StridedBlockFactorization , x:: AbstractVector , α:: Real = 1 , β:: Real = 0 )
171- length (x) == size (B, 2 ) || throw (DimensionMismatch (" length(x) = $(length (x)) ≠ $(size (B, 2 )) = size(B, 2)" ))
172- length (y) == size (B, 1 ) || throw (DimensionMismatch (" length(y) = $(length (y)) ≠ $(size (B, 1 )) = size(B, 1)" ))
173- X, Y = reshape (x, B. mindices. step, :), reshape (y, B. nindices. step, :)
174- xx, yy = [c for c in eachcol (X)], [c for c in eachcol (Y)]
175- strided = Val (true )
176- blockmul! (yy, B. A, xx, strided, α, β)
177- return y
178- end
179-
180- function LinearAlgebra. mul! (Y:: AbstractMatrix , B:: StridedBlockFactorization , X:: AbstractMatrix , α:: Real = 1 , β:: Real = 0 )
181- size (X, 1 ) == size (B, 2 ) || throw (DimensionMismatch (" size(X, 1) = $(size (X, 1 )) ≠ $(size (B, 2 )) = size(B, 2)" ))
182- size (Y, 1 ) == size (B, 1 ) || throw (DimensionMismatch (" size(Y, 1) = $(size (Y, 1 )) ≠ $(size (B, 1 )) = size(B, 1)" ))
183- k = size (Y, 2 )
184- size (Y, 2 ) == size (X, 2 ) || throw (DimensionMismatch (" size(Y, 2) = $(size (Y, 2 )) ≠ $(size (X, 1 )) = size(X, 1)" ))
185- XR, YR = reshape (X, B. mindices. step, :, k), reshape (Y, B. nindices. step, :, k)
186- n, m = size (XR, 2 ), size (YR, 2 )
187- XX, YY = @views [XR[:, i, :] for i in 1 : n], [YR[:, i, :] for i in 1 : m]
188- strided = Val (true )
189- blockmul! (YY, B. A, XX, strided, α, β)
190- return Y
191- end
192-
193- # recursively calls mul!, thereby avoiding memory allocation of block-matrix multiplication
194- function blockmul! (y:: AbstractVecOfVecOrMat , G:: AbstractMatrix ,
195- x:: AbstractVecOfVecOrMat , strided:: Val{true} , α:: Real = 1 , β:: Real = 0 )
196- # pre-allocate temporary storage for matrix elements (needs to be done better, "similar"?)
197- Gijs = [G[1 , 1 ] for _ in 1 : nthreads ()] # IDEA could be nothing if G is a AbstractMatrix{<:Matrix}
198- @threads for i in eachindex (y)
199- @. y[i] = β * y[i]
200- Gij = Gijs[threadid ()]
201- for j in eachindex (x)
202- Gij = evaluate_block! (Gij, G, i, j) # evaluating G[i, j] but can be more efficient if block has special structure (e.g. Woodbury)
203- mul! (y[i], Gij, x[j], α, 1 ) # woodbury still allocates here because of Diagonal
204- end
205- end
206- return y
207- end
208-
209- # fallback for generic matrices or factorizations
210- # does not overwrite Gij in this case, only for more advanced data structures,
211- # that are not already fully allocated
212- function evaluate_block! (Gij, G:: AbstractMatrix , i:: Int , j:: Int )
213- G[i, j]
214- end
215-
216165# ################# specialization for diagonal block factorizations ############
217166# const DiagonalBlockFactorization{T} = BlockFactorization{T, <:Diagonal}
218167# carries out multiplication for Diagonal BlockFactorization
219168function blockmul! (y:: AbstractVecOfVecOrMat , G:: Diagonal ,
220- x:: AbstractVecOfVecOrMat , strided :: Val{false} , α:: Real = 1 , β:: Real = 0 )
169+ x:: AbstractVecOfVecOrMat , α:: Real = 1 , β:: Real = 0 )
221170 @threads for i in eachindex (y)
222- @. y[i] = β * y[i] # IDEA: y[i] .*= β ?
223- Gii = G[i, i] # if it is not strided, we can't pre-allocate memory for blocks
224- mul! (y[i], Gii, x[i], α, 1 ) # woodbury still allocates here because of Diagonal
225- end
226- return y
227- end
228-
229- # recursively calls mul!, thereby avoiding memory allocation of block-matrix multiplication
230- function blockmul! (y:: AbstractVecOfVecOrMat , G:: Diagonal ,
231- x:: AbstractVecOfVecOrMat , strided:: Val{true} , α:: Real = 1 , β:: Real = 0 )
232- # pre-allocate temporary storage for matrix elements (needs to be done better, "similar"?)
233- Giis = [G[1 , 1 ] for _ in 1 : nthreads ()]
234- @threads for i in eachindex (y)
235- @. y[i] = β * y[i]
236- Gii = Giis[threadid ()]
237- Gii = evaluate_block! (Gii, G, i, i) # evaluating G[i, j] but can be more efficient if block has special structure (e.g. Woodbury)
238- mul! (y[i], Gii, x[i], α, 1 ) # woodbury still allocates here because of Diagonal
171+ mul! (y[i], G[i, i], x[i], α, β)
239172 end
240173 return y
241174end
0 commit comments