@@ -386,3 +386,159 @@ function LinearAlgebra.ldiv!(p::SchurComplementPreconditioner, v)
386386
387387 return v
388388end
389+
390+
391+ struct FullSchurComplementPreconditioner{AT, ST, BT}
392+ partitions
393+ A_fac:: AT
394+ S_fac:: ST
395+ B:: BT
396+ Av1_cache:: Vector{Float64}
397+ BSv2_cache:: Vector{Float64}
398+ BSBAv1_cache:: Vector{Float64}
399+ ABSBAv1_cache:: Vector{Float64}
400+ ABSv2_cache:: Vector{Float64}
401+ Sv2_cache:: Vector{Float64}
402+ BAv1_cache:: Vector{Float64}
403+ SBAv1_cache:: Vector{Float64}
404+ end
405+
406+
407+ function FullSchurComplementPreconBuilder (dofs_first_block, A_factorization, S_factorization = lu; verbosity = 0 )
408+
409+ # this is the resulting preconditioner
410+ function prec (M)
411+
412+ # We have
413+ # M = [ A B ] → n1 dofs
414+ # [ Bᵀ 0 ] → n2 dofs
415+
416+ n1 = dofs_first_block
417+ n2 = size (M, 1 ) - n1
418+
419+ # I see no other way than creating this expensive copy
420+ A = M[1 : n1, 1 : n1]
421+ B = M[1 : n1, (n1 + 1 ): end ]
422+
423+ # first factorization
424+ A_fac = A_factorization (A)
425+
426+ verbosity > 0 && @info " SchurComplementPreconBuilder: A ($n1 ×$n1 ) is factorized"
427+
428+ # compute dense (!) the Schur Matrix
429+ # S ≈ -BᵀA⁻¹B
430+ # S = zeros(n2, n2)
431+ # for col in 1:n2
432+ # @show col
433+ # @views S[:, col] = B' * (A_fac \ Vector(B[:, col]))
434+ # end
435+ # S .*= -1.0
436+
437+ # S = - B' * (Diagonal(A) \ B)
438+
439+ S = zeros (n2, n2)
440+
441+ function col_loop (col_chunk)
442+ # sigh, some factorizations (like ilu0) store internal buffers. Hence, every thread needs a copy.
443+ local A_fac_copy = deepcopy (A_fac)
444+
445+ # local buffers and result
446+ local ldiv_buffer = zeros (n1)
447+ local Bcol_buffer = zeros (n1)
448+
449+ for col in col_chunk
450+ @info " col $col of $(length (col_chunk)) "
451+ Bcol = B[:, col] # B is very sparse: this copy is fine?
452+ Bcol_buffer .= 0.0
453+ Bcol_buffer[Bcol. nzind] = Bcol. nzval
454+ @views ldiv! (ldiv_buffer, A_fac_copy, Bcol_buffer)
455+ @views mul! (S[:, col], B' , ldiv_buffer, - 1.0 , 0.0 )
456+ end
457+ return nothing
458+ end
459+
460+ # split columns into chunks and assemble S
461+ col_chunks = chunks (1 : n2, n = Threads. nthreads ())
462+ tasks = map (col_chunks) do col_chunk
463+ @info " start thread"
464+ Threads. @spawn col_loop (col_chunk)
465+ end
466+
467+ # then S is ready
468+ fetch .(tasks)
469+
470+ verbosity > 0 && @info " SchurComplementPreconBuilder: S ($n2 ×$n2 ) is computed"
471+
472+ # factorize S
473+ S_fac = S_factorization (S)
474+
475+ verbosity > 0 && @info " SchurComplementPreconBuilder: S ($n2 ×$n2 ) is factorized"
476+
477+ return FullSchurComplementPreconditioner (
478+ [1 : n1, (n1 + 1 ): (n1 + n2)],
479+ A_fac,
480+ S_fac,
481+ B,
482+ zeros (n1),
483+ zeros (n1),
484+ zeros (n1),
485+ zeros (n1),
486+ zeros (n1),
487+ zeros (n2),
488+ zeros (n2),
489+ zeros (n2),
490+ )
491+ end
492+
493+ return prec
494+ end
495+
496+
497+ function LinearAlgebra. ldiv! (u, p:: FullSchurComplementPreconditioner , v)
498+
499+ # M⁻¹ =[ A⁻¹ + A⁻¹BS⁻¹BᵀA⁻¹ -A⁻¹BS⁻¹ ]
500+ # [ -S⁻¹BᵀA⁻¹ S⁻¹ ]
501+
502+ (part1, part2) = p. partitions
503+ @views v1 = v[part1]
504+ @views v2 = v[part2]
505+
506+ (; A_fac, S_fac, B, Av1_cache, Sv2_cache, BAv1_cache, BSv2_cache, BSBAv1_cache, ABSBAv1_cache, ABSBAv1_cache, ABSv2_cache, SBAv1_cache) = p
507+
508+ # A_fac \ v1
509+ @showtime ldiv! (Av1_cache, A_fac, v1)
510+
511+ # S_fac \ v2
512+ @showtime ldiv! (Sv2_cache, S_fac, v2)
513+
514+ # B' * (Av1_cache)
515+ @showtime mul! (BAv1_cache, B' , Av1_cache)
516+
517+ # S_fac \ BAv1_cache
518+ @showtime ldiv! (SBAv1_cache, S_fac, BAv1_cache)
519+
520+ # B * Sv2_cache)
521+ @showtime mul! (BSv2_cache, B, Sv2_cache)
522+
523+ # B * SBAv1_cache
524+ @showtime mul! (BSBAv1_cache, B, SBAv1_cache)
525+
526+ # A_fac \ BSBAv1_cache
527+ @showtime ldiv! (ABSBAv1_cache, A_fac, BSBAv1_cache)
528+
529+ # A_fac \ BSv2_cache
530+ @showtime ldiv! (ABSv2_cache, A_fac, BSv2_cache)
531+
532+ @showtime @views u[part1] .= Av1_cache .+ ABSBAv1_cache .- ABSv2_cache
533+ @showtime @views u[part2] .= Sv2_cache .- SBAv1_cache
534+ return u
535+ end
536+
537+ # function LinearAlgebra.ldiv!(p::FullSchurComplementPreconditioner, v)
538+
539+ # (part1, part2) = p.partitions
540+ # @views ldiv!(p.A_fac, v[part1])
541+ # @views ldiv!(p.S_fac, v[part2])
542+
543+ # return v
544+ # end
0 commit comments