Skip to content

Commit 711c27a

Browse files
simplified blockmul by moving specialization to CovarianceFunctions
1 parent 924cfe2 commit 711c27a

2 files changed

Lines changed: 7 additions & 74 deletions

File tree

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "BlockFactorizations"
22
uuid = "5c499583-5bfe-4591-9b59-c1e192d48697"
33
authors = ["Sebastian Ament <sebastianeament@gmail.com>"]
4-
version = "1.2.0"
4+
version = "1.2.1"
55

66
[deps]
77
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"

src/block.jl

Lines changed: 6 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -140,102 +140,35 @@ end
140140
function LinearAlgebra.mul!(y::AbstractVector, B::BlockFactorization, x::AbstractVector, α::Real = 1, β::Real = 0)
141141
xx = [@view x[B.mindices[i] : B.mindices[i+1]-1] for i in 1:length(B.mindices)-1]
142142
yy = [@view y[B.nindices[i] : B.nindices[i+1]-1] for i in 1:length(B.nindices)-1]
143-
strided = Val(false)
144-
blockmul!(yy, B.A, xx, strided, α, β)
143+
blockmul!(yy, B.A, xx, α, β)
145144
return y
146145
end
147146

148147
function LinearAlgebra.mul!(Y::AbstractMatrix, B::BlockFactorization, X::AbstractMatrix, α::Real = 1, β::Real = 0)
149148
XX = [@view X[B.mindices[i] : B.mindices[i+1]-1, :] for i in 1:length(B.mindices)-1]
150149
YY = [@view Y[B.nindices[i] : B.nindices[i+1]-1, :] for i in 1:length(B.nindices)-1]
151-
strided = Val(false)
152-
blockmul!(YY, B.A, XX, strided, α, β)
150+
blockmul!(YY, B.A, XX, α, β)
153151
return Y
154152
end
155153

156154
# carries out multiplication for general BlockFactorization
157-
function blockmul!(y::AbstractVecOfVecOrMat, G::AbstractMatrix,
158-
x::AbstractVecOfVecOrMat, strided::Val{false} = Val(false), α::Real = 1, β::Real = 0)
155+
function blockmul!(y::AbstractVecOfVecOrMat, G::AbstractMatrix, x::AbstractVecOfVecOrMat, α::Real = 1, β::Real = 0)
159156
@threads for i in eachindex(y)
160157
@. y[i] = β * y[i]
161158
for j in eachindex(x)
162-
Gij = G[i, j] # if it is not strided, we can't pre-allocate memory for blocks
163-
mul!(y[i], Gij, x[j], α, 1) # woodbury still allocates here because of Diagonal
159+
mul!(y[i], G[i, j], x[j], α, 1)
164160
end
165161
end
166162
return y
167163
end
168164

169-
# carries out block multiplication for strided block factorization
170-
function LinearAlgebra.mul!(y::AbstractVector, B::StridedBlockFactorization, x::AbstractVector, α::Real = 1, β::Real = 0)
171-
length(x) == size(B, 2) || throw(DimensionMismatch("length(x) = $(length(x))$(size(B, 2)) = size(B, 2)"))
172-
length(y) == size(B, 1) || throw(DimensionMismatch("length(y) = $(length(y))$(size(B, 1)) = size(B, 1)"))
173-
X, Y = reshape(x, B.mindices.step, :), reshape(y, B.nindices.step, :)
174-
xx, yy = [c for c in eachcol(X)], [c for c in eachcol(Y)]
175-
strided = Val(true)
176-
blockmul!(yy, B.A, xx, strided, α, β)
177-
return y
178-
end
179-
180-
function LinearAlgebra.mul!(Y::AbstractMatrix, B::StridedBlockFactorization, X::AbstractMatrix, α::Real = 1, β::Real = 0)
181-
size(X, 1) == size(B, 2) || throw(DimensionMismatch("size(X, 1) = $(size(X, 1))$(size(B, 2)) = size(B, 2)"))
182-
size(Y, 1) == size(B, 1) || throw(DimensionMismatch("size(Y, 1) = $(size(Y, 1))$(size(B, 1)) = size(B, 1)"))
183-
k = size(Y, 2)
184-
size(Y, 2) == size(X, 2) || throw(DimensionMismatch("size(Y, 2) = $(size(Y, 2))$(size(X, 1)) = size(X, 1)"))
185-
XR, YR = reshape(X, B.mindices.step, :, k), reshape(Y, B.nindices.step, :, k)
186-
n, m = size(XR, 2), size(YR, 2)
187-
XX, YY = @views [XR[:, i, :] for i in 1:n], [YR[:, i, :] for i in 1:m]
188-
strided = Val(true)
189-
blockmul!(YY, B.A, XX, strided, α, β)
190-
return Y
191-
end
192-
193-
# recursively calls mul!, thereby avoiding memory allocation of block-matrix multiplication
194-
function blockmul!(y::AbstractVecOfVecOrMat, G::AbstractMatrix,
195-
x::AbstractVecOfVecOrMat, strided::Val{true}, α::Real = 1, β::Real = 0)
196-
# pre-allocate temporary storage for matrix elements (needs to be done better, "similar"?)
197-
Gijs = [G[1, 1] for _ in 1:nthreads()] # IDEA could be nothing if G is a AbstractMatrix{<:Matrix}
198-
@threads for i in eachindex(y)
199-
@. y[i] = β * y[i]
200-
Gij = Gijs[threadid()]
201-
for j in eachindex(x)
202-
Gij = evaluate_block!(Gij, G, i, j) # evaluating G[i, j] but can be more efficient if block has special structure (e.g. Woodbury)
203-
mul!(y[i], Gij, x[j], α, 1) # woodbury still allocates here because of Diagonal
204-
end
205-
end
206-
return y
207-
end
208-
209-
# fallback for generic matrices or factorizations
210-
# does not overwrite Gij in this case, only for more advanced data structures,
211-
# that are not already fully allocated
212-
function evaluate_block!(Gij, G::AbstractMatrix, i::Int, j::Int)
213-
G[i, j]
214-
end
215-
216165
################## specialization for diagonal block factorizations ############
217166
# const DiagonalBlockFactorization{T} = BlockFactorization{T, <:Diagonal}
218167
# carries out multiplication for Diagonal BlockFactorization
219168
function blockmul!(y::AbstractVecOfVecOrMat, G::Diagonal,
220-
x::AbstractVecOfVecOrMat, strided::Val{false}, α::Real = 1, β::Real = 0)
169+
x::AbstractVecOfVecOrMat, α::Real = 1, β::Real = 0)
221170
@threads for i in eachindex(y)
222-
@. y[i] = β * y[i] # IDEA: y[i] .*= β ?
223-
Gii = G[i, i] # if it is not strided, we can't pre-allocate memory for blocks
224-
mul!(y[i], Gii, x[i], α, 1) # woodbury still allocates here because of Diagonal
225-
end
226-
return y
227-
end
228-
229-
# recursively calls mul!, thereby avoiding memory allocation of block-matrix multiplication
230-
function blockmul!(y::AbstractVecOfVecOrMat, G::Diagonal,
231-
x::AbstractVecOfVecOrMat, strided::Val{true}, α::Real = 1, β::Real = 0)
232-
# pre-allocate temporary storage for matrix elements (needs to be done better, "similar"?)
233-
Giis = [G[1, 1] for _ in 1:nthreads()]
234-
@threads for i in eachindex(y)
235-
@. y[i] = β * y[i]
236-
Gii = Giis[threadid()]
237-
Gii = evaluate_block!(Gii, G, i, i) # evaluating G[i, j] but can be more efficient if block has special structure (e.g. Woodbury)
238-
mul!(y[i], Gii, x[i], α, 1) # woodbury still allocates here because of Diagonal
171+
mul!(y[i], G[i, i], x[i], α, β)
239172
end
240173
return y
241174
end

0 commit comments

Comments
 (0)