Skip to content

Commit d91fcd7

Browse files
committed
WIP
1 parent e3a8e21 commit d91fcd7

3 files changed

Lines changed: 159 additions & 1 deletion

File tree

Project.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ version = "2.1.0"
44
authors = ["Juergen Fuhrmann <juergen.fuhrmann@wias-berlin.de>", "Daniel Runge"]
55

66
[deps]
7+
ChunkSplitters = "ae650224-84b6-46f8-82ea-d812ca08434e"
78
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
89
ILUZero = "88f59080-6952-5380-9ea5-54057fb9a43f"
910
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"

src/ExtendableSparse.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ $(read(joinpath(@__DIR__, "..", "README.md"), String))
55
"""
66
module ExtendableSparse
77

8+
using ChunkSplitters: chunks
89
using DocStringExtensions: DocStringExtensions, SIGNATURES, TYPEDEF, TYPEDFIELDS, TYPEDSIGNATURES
910
using ILUZero: ILUZero
1011
using LinearAlgebra: LinearAlgebra, Diagonal, Hermitian, Symmetric, Tridiagonal, convert, mul!, ldiv!, lu
@@ -42,7 +43,7 @@ export eliminate_dirichlet, eliminate_dirichlet!, mark_dirichlet
4243

4344

4445
include("preconbuilders.jl")
45-
export LinearSolvePreconBuilder, BlockPreconBuilder, JacobiPreconBuilder, SchurComplementPreconBuilder
46+
export LinearSolvePreconBuilder, BlockPreconBuilder, JacobiPreconBuilder, SchurComplementPreconBuilder, FullSchurComplementPreconBuilder
4647
@public ILUZeroPreconBuilder, ILUTPreconBuilder
4748

4849

src/preconbuilders.jl

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,3 +386,159 @@ function LinearAlgebra.ldiv!(p::SchurComplementPreconditioner, v)
386386

387387
return v
388388
end
389+
390+
391+
struct FullSchurComplementPreconditioner{AT, ST, BT}
392+
partitions
393+
A_fac::AT
394+
S_fac::ST
395+
B::BT
396+
Av1_cache::Vector{Float64}
397+
BSv2_cache::Vector{Float64}
398+
BSBAv1_cache::Vector{Float64}
399+
ABSBAv1_cache::Vector{Float64}
400+
ABSv2_cache::Vector{Float64}
401+
Sv2_cache::Vector{Float64}
402+
BAv1_cache::Vector{Float64}
403+
SBAv1_cache::Vector{Float64}
404+
end
405+
406+
407+
function FullSchurComplementPreconBuilder(dofs_first_block, A_factorization, S_factorization = lu; verbosity = 0)
408+
409+
# this is the resulting preconditioner
410+
function prec(M)
411+
412+
# We have
413+
# M = [ A B ] → n1 dofs
414+
# [ Bᵀ 0 ] → n2 dofs
415+
416+
n1 = dofs_first_block
417+
n2 = size(M, 1) - n1
418+
419+
# I see no other way than creating this expensive copy
420+
A = M[1:n1, 1:n1]
421+
B = M[1:n1, (n1 + 1):end]
422+
423+
# first factorization
424+
A_fac = A_factorization(A)
425+
426+
verbosity > 0 && @info "SchurComplementPreconBuilder: A ($n1×$n1) is factorized"
427+
428+
# compute dense (!) the Schur Matrix
429+
# S ≈ -BᵀA⁻¹B
430+
# S = zeros(n2, n2)
431+
# for col in 1:n2
432+
# @show col
433+
# @views S[:, col] = B' * (A_fac \ Vector(B[:, col]))
434+
# end
435+
# S .*= -1.0
436+
437+
# S = - B' * (Diagonal(A) \ B)
438+
439+
S = zeros(n2, n2)
440+
441+
function col_loop(col_chunk)
442+
# sigh, some factorizations (like ilu0) store internal buffers. Hence, every thread needs a copy.
443+
local A_fac_copy = deepcopy(A_fac)
444+
445+
# local buffers and result
446+
local ldiv_buffer = zeros(n1)
447+
local Bcol_buffer = zeros(n1)
448+
449+
for col in col_chunk
450+
@info "col $col of $(length(col_chunk))"
451+
Bcol = B[:, col] # B is very sparse: this copy is fine?
452+
Bcol_buffer .= 0.0
453+
Bcol_buffer[Bcol.nzind] = Bcol.nzval
454+
@views ldiv!(ldiv_buffer, A_fac_copy, Bcol_buffer)
455+
@views mul!(S[:, col], B', ldiv_buffer, -1.0, 0.0)
456+
end
457+
return nothing
458+
end
459+
460+
# split columns into chunks and assemble S
461+
col_chunks = chunks(1:n2, n = Threads.nthreads())
462+
tasks = map(col_chunks) do col_chunk
463+
@info "start thread"
464+
Threads.@spawn col_loop(col_chunk)
465+
end
466+
467+
# then S is ready
468+
fetch.(tasks)
469+
470+
verbosity > 0 && @info "SchurComplementPreconBuilder: S ($n2×$n2) is computed"
471+
472+
# factorize S
473+
S_fac = S_factorization(S)
474+
475+
verbosity > 0 && @info "SchurComplementPreconBuilder: S ($n2×$n2) is factorized"
476+
477+
return FullSchurComplementPreconditioner(
478+
[1:n1, (n1 + 1):(n1 + n2)],
479+
A_fac,
480+
S_fac,
481+
B,
482+
zeros(n1),
483+
zeros(n1),
484+
zeros(n1),
485+
zeros(n1),
486+
zeros(n1),
487+
zeros(n2),
488+
zeros(n2),
489+
zeros(n2),
490+
)
491+
end
492+
493+
return prec
494+
end
495+
496+
497+
function LinearAlgebra.ldiv!(u, p::FullSchurComplementPreconditioner, v)
498+
499+
# M⁻¹ =[ A⁻¹ + A⁻¹BS⁻¹BᵀA⁻¹ -A⁻¹BS⁻¹ ]
500+
# [ -S⁻¹BᵀA⁻¹ S⁻¹ ]
501+
502+
(part1, part2) = p.partitions
503+
@views v1 = v[part1]
504+
@views v2 = v[part2]
505+
506+
(; A_fac, S_fac, B, Av1_cache, Sv2_cache, BAv1_cache, BSv2_cache, BSBAv1_cache, ABSBAv1_cache, ABSBAv1_cache, ABSv2_cache, SBAv1_cache) = p
507+
508+
# A_fac \ v1
509+
@showtime ldiv!(Av1_cache, A_fac, v1)
510+
511+
# S_fac \ v2
512+
@showtime ldiv!(Sv2_cache, S_fac, v2)
513+
514+
# B' * (Av1_cache)
515+
@showtime mul!(BAv1_cache, B', Av1_cache)
516+
517+
# S_fac \ BAv1_cache
518+
@showtime ldiv!(SBAv1_cache, S_fac, BAv1_cache)
519+
520+
# B * Sv2_cache)
521+
@showtime mul!(BSv2_cache, B, Sv2_cache)
522+
523+
# B * SBAv1_cache
524+
@showtime mul!(BSBAv1_cache, B, SBAv1_cache)
525+
526+
# A_fac \ BSBAv1_cache
527+
@showtime ldiv!(ABSBAv1_cache, A_fac, BSBAv1_cache)
528+
529+
# A_fac \ BSv2_cache
530+
@showtime ldiv!(ABSv2_cache, A_fac, BSv2_cache)
531+
532+
@showtime @views u[part1] .= Av1_cache .+ ABSBAv1_cache .- ABSv2_cache
533+
@showtime @views u[part2] .= Sv2_cache .- SBAv1_cache
534+
return u
535+
end
536+
537+
# function LinearAlgebra.ldiv!(p::FullSchurComplementPreconditioner, v)
538+
539+
# (part1, part2) = p.partitions
540+
# @views ldiv!(p.A_fac, v[part1])
541+
# @views ldiv!(p.S_fac, v[part2])
542+
543+
# return v
544+
# end

0 commit comments

Comments
 (0)