Skip to content

Commit 0618e9d

Browse files
committed
Remove BlockArrays dep and handle blocked arrays manually
This and the transpose of the R factor are a huge slow-down for very large systems. In the end, we copy the data by hand anyway, so do not use BlockArrays at all.
1 parent 668c385 commit 0618e9d

3 files changed

Lines changed: 16 additions & 46 deletions

File tree

Project.toml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ authors = ["Christian Merdon <merdon@wias-berlin.de>", "Patrick Jaap <patrick.ja
55

66
[deps]
77
ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
8-
BlockArrays = "8e7c35d0-a365-5155-bbbb-fb81a777f24e"
98
ChunkSplitters = "ae650224-84b6-46f8-82ea-d812ca08434e"
109
CommonSolve = "38540f10-b2f7-11e9-35d8-d573e4eb0ff2"
1110
DiffResults = "163ba53b-c6d8-5494-b064-1a9d43ac40c5"
@@ -30,7 +29,6 @@ UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228"
3029
[compat]
3130
ADTypes = "1.16.0"
3231
Aqua = "0.8"
33-
BlockArrays = "1.7.0"
3432
ChunkSplitters = "3.1.2"
3533
CommonSolve = "0.2"
3634
DiffResults = "1"

src/ExtendableFEM.jl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ module ExtendableFEM
77

88
using ADTypes: ADTypes, KnownJacobianSparsityDetector
99
using ChunkSplitters: chunks
10-
using BlockArrays: BlockMatrix, BlockVector, Block, blocks, axes, mortar
1110
using CommonSolve: CommonSolve
1211
using DifferentiationInterface: DifferentiationInterface,
1312
AutoSparse, AutoForwardDiff, prepare_jacobian

src/solvers.jl

Lines changed: 16 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ function solve_linear_system!(A, b, sol, soltemp, residual, unknowns, freedofs,
271271
end
272272

273273
## restrictions only involve the blocks coressponding to the unknowns and not necessarily the full sol.entries
274-
sol_freedofs = mortar([view(sol[u]) for u in unknowns])
274+
sol_freedofs = vcat([view(sol[u]) for u in unknowns]...)
275275

276276
if length(PD.restrictions) == 0
277277
if linsolve_needs_matrix
@@ -285,7 +285,7 @@ function solve_linear_system!(A, b, sol, soltemp, residual, unknowns, freedofs,
285285

286286
# block sizes for the block matrix
287287
block_sizes = [length(b_unrestricted), [ length(b) for b in restriction_rhss ]...]
288-
total_size = sum(block_sizes)
288+
block_ends = cumsum(block_sizes)
289289

290290
# total number of additional LM dofs
291291
nLMs = @views sum(block_sizes[2:end])
@@ -315,14 +315,15 @@ function solve_linear_system!(A, b, sol, soltemp, residual, unknowns, freedofs,
315315
# we need the inverse column permutation
316316
ipcol = invperm(pcol)
317317

318-
# the new combined restriction block (add another transpose)
319-
combined_restriction_matrix = R[:, ipcol]'
318+
# the new combined restriction block (add another transpose and convert into CSC matrix)
319+
combined_restriction_matrix = sparse(R[:, ipcol]')
320320

321321
# the new combined restriction rhs
322322
combined_restriction_rhs = Q'combined_restriction_rhs[prow]
323323

324324
# compress the column space
325325
qr_rank = rank(qr_result)
326+
SC.parameters[:verbosity] > 1 && @info "QR decomposition has rank $qr_rank"
326327
@assert norm(combined_restriction_rhs[(qr_rank + 1):end]) 1.0e-12 * norm(combined_restriction_rhs) "the rhs of the restriction is not in the image"
327328

328329
combined_restriction_matrix = combined_restriction_matrix[:, 1:qr_rank]
@@ -337,63 +338,36 @@ function solve_linear_system!(A, b, sol, soltemp, residual, unknowns, freedofs,
337338

338339
# update sizes
339340
block_sizes = [length(b_unrestricted), length(combined_restriction_rhs)]
340-
total_size = sum(block_sizes)
341+
block_ends = cumsum(block_sizes)
341342

342343
SC.parameters[:verbosity] > 1 && @info "Created new CompressedRestriction"
343344
end
344345

345-
A_block = BlockMatrix(spzeros(Tv, total_size, total_size), block_sizes, block_sizes)
346-
A_block[Block(1, 1)] = A_unrestricted
346+
linsolve_A = spzeros(Tv, block_ends[end], block_ends[end])
347+
linsolve_A[1:block_ends[1], 1:block_ends[1]] = A_unrestricted
347348
end
348349

349350
## we need to add the (initial) solution to the rhs, since we work with the residual equation
350351
for (B, rhs) in zip(restriction_matrices, restriction_rhss)
351-
rhs .-= B'sol_freedofs
352+
# rhs -= B'sol_freedofs
353+
mul!(rhs, B', sol_freedofs, -1.0, 0.0)
352354
end
353355

354-
b_block = BlockVector(zeros(Tv, total_size), block_sizes)
355-
b_block[Block(1)] = b_unrestricted
356+
linsolve_b = zeros(Tv, block_ends[end])
357+
linsolve_b[1:block_ends[1]] = b_unrestricted
356358

357359
for i in eachindex(restriction_matrices)
358360
if linsolve_needs_matrix
359-
A_block[Block(1, i + 1)] = restriction_matrices[i]
360-
A_block[Block(i + 1, 1)] = transpose(restriction_matrices[i])
361+
linsolve_A[1:block_ends[1], (block_ends[i] + 1):block_ends[i + 1]] = restriction_matrices[i]
362+
linsolve_A[(block_ends[i] + 1):block_ends[i + 1], 1:block_ends[1]] = restriction_matrices[i]'
361363
end
362-
b_block[Block(i + 1)] = restriction_rhss[i]
364+
linsolve_b[(block_ends[i] + 1):block_ends[i + 1]] = restriction_rhss[i]
363365

364366
end
365-
366-
# convert to dense vectors
367-
linsolve_b = Vector(b_block)
368-
369-
if linsolve_needs_matrix
370-
371-
# linsolve.A = sparse(A_block) # convert to CSC Matrix is very slow https://github.com/JuliaArrays/BlockArrays.jl/issues/78
372-
# do it manually:
373-
A_flat = spzeros(size(A_block))
374-
375-
lasts_row = [0, axes(A_block)[1].lasts...] # add leading zero
376-
lasts_col = [0, axes(A_block)[2].lasts...] # add leading zero
377-
378-
(n_row, n_col) = size(blocks(A_block))
379-
380-
381-
SC.parameters[:verbosity] > 1 && @info "Assemble flat matrix out of blocks..."
382-
for i in 1:n_row, j in 1:n_col
383-
range_row = (lasts_row[i] + 1):lasts_row[i + 1]
384-
range_col = (lasts_col[j] + 1):lasts_col[j + 1]
385-
386-
# write each block directly in the resulting matrix
387-
A_flat[range_row, range_col] = A_block[Block(i, j)]
388-
end
389-
SC.parameters[:verbosity] > 1 && @info "... matrix assembly complete!"
390-
391-
linsolve_A = A_flat
392-
end
367+
SC.parameters[:verbosity] > 1 && @info "done setting blocked matrix/vector"
393368
end
394369
end
395370

396-
397371
if SC.parameters[:initialized]
398372
# set/update the linear system
399373
SC.linsolver.b = linsolve_b
@@ -442,7 +416,6 @@ function solve_linear_system!(A, b, sol, soltemp, residual, unknowns, freedofs,
442416

443417
if length(PD.restrictions) > 0
444418
# extract all residuals for the restriction blocks
445-
block_ends = cumsum(block_sizes)
446419
restriction_residuals = [norm(residual_flat[(block_ends[i] + 1):block_ends[i + 1]]) for i in 1:(length(block_sizes) - 1) ]
447420
push!(stats[:restriction_residuals], restriction_residuals)
448421

0 commit comments

Comments
 (0)