Skip to content

Commit f453cd9

Browse files
author
Sebastien Loisel
committed
Phase 1 on the fine quadrature; drop hierarchical quadrature machinery
The old phase 1 centred via mgb_phase1 on coarse, level-restricted quadrature rules, whose coarse-grid points were unreliable. Replace the leading mgb_phase1 call in mgb_core with a fine-quadrature mgb_step at the initial t. The feasibility subproblem already runs through mgb_core with an early_stop on strict feasibility, so it now path-follows on the fine rule and stops once feasible — no variable quadrature anywhere. Delete the now-dead mgb_phase1 and mgb_coarsen_levels, and the AMG fields they alone consumed: R_coarse, D_levels, refine_u, coarsen_u, refine_z, coarsen_z (and their type params), plus the orphaned _galerkin overloads (CPU, BlockMatrices, CUDA) and the coarsen_fine/ncanon build steps.
1 parent 02984e8 commit f453cd9

4 files changed

Lines changed: 32 additions & 189 deletions

File tree

ext/MultiGridBarrierCUDAExt/block_ops.jl

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,10 @@ using SparseArrays
1616
import MultiGridBarrier: mgb_diag, mgb_zeros, mgb_blockdiag, apply_D, mgb_cleanup,
1717
block_batched_gemm!, block_fused_triple!, block_segmented_sum!,
1818
block_batched_gemm_broadcast_B!, block_batched_gemm_broadcast_A!,
19-
block_alloc, _galerkin,
19+
block_alloc,
2020
BlockDiag, BlockColumn, BlockHessian, SubBlockDiag,
2121
VBlockDiag, HBlockDiag, ScaledAdjBlockCol, LazyBlockHessianProduct
2222

23-
# Galerkin overload for GPU sparse AMG transfers around a GPU BlockDiag op:
24-
# convert the op to CuSparseMatrixCSR first, mirroring the CPU specialization.
25-
_galerkin(coar::CuSparseMatrixCSR{T}, op::BlockDiag{T, <:CuArray}, ref::CuSparseMatrixCSR{T}) where T =
26-
coar * _to_cusparse(op) * ref
27-
2823
# ============================================================================
2924
# CUDA Kernels (definitions)
3025
# ============================================================================

src/AlgebraicMultiGridBarrier.jl

Lines changed: 30 additions & 176 deletions
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,7 @@ end
369369
#
370370
# Here we collapse the broken-basis intermediate for `:uniform`:
371371
# • At the fine level L, keep `subspaces[:uniform][L] = ones(n_doubled, 1)`
372-
# so R_coarse[L] still lifts the scalar :uniform coefficient to the
372+
# so R_fine[L] still lifts the scalar :uniform coefficient to the
373373
# broken-basis fine iterate (length n_doubled). refines/coarsens at L are
374374
# the identity at fine.
375375
# • At every coarser level l < L, `subspaces[:uniform][l] = [1]` (1×1
@@ -381,11 +381,10 @@ end
381381
#
382382
# The composed `refine_fine_per[:uniform][l]` then collapses (via the chain in
383383
# amg_helper) to `(n_doubled × 1) ones` at every l < L — a sparse column, no
384-
# outer product. `R_coarse[l]`/`D_levels[l,k]`/`refine_z[l]`/`coarsen_z[l]` all
385-
# acquire heterogeneous block sizes for the :uniform variable, but each
386-
# downstream consumer indexes through the actual matrix shapes (e.g.
387-
# `size(R_coarse[J], 2)`) rather than a hardcoded `n_l`, so the math threads
388-
# through.
384+
# outer product. `R_fine[l]` acquires heterogeneous block sizes for the
385+
# :uniform variable, but each downstream consumer indexes through the actual
386+
# matrix shapes (e.g. `size(R_fine[J], 2)`) rather than a hardcoded `n_l`, so
387+
# the math threads through.
389388
function _normalize_uniform_subspace(::Type{T},
390389
subspaces::Dict{Symbol,Vector{M_sub}},
391390
refine::Dict{Symbol,Vector{M_ref}},
@@ -454,25 +453,16 @@ end
454453
Base.propertynames(mg::MultiGrid, private::Bool=false) =
455454
(:geometry, :subspaces, :refine, :coarsen, :x, :w, :operators, :discretization)
456455

457-
@kwdef struct AMG{X,W,M_sub,M_D_lev,M_D_fine,M_ref,M_coar,M_refz,M_coarz,G}
456+
@kwdef struct AMG{X,W,M_sub,M_D_fine,G}
458457
geometry::G
459458
x::X
460459
w::W
461460
R_fine::Vector{M_sub}
462-
R_coarse::Vector{M_sub}
463-
# D_levels[l, k]: discrete operator k at multigrid level l, kept sparse so the
464-
# Galerkin triple product `coarsen_fine[l] * op * refine_fine[l]` is well-typed
465-
# at every level. Used by `mgb_phase1` only.
466-
D_levels::Matrix{M_D_lev}
467461
# D_fine[k]: discrete operator k at the finest level, preserving whatever block
468462
# structure `geometry.operators[k]` has. Used by `mgb_step` / `mgb_core` /
469-
# `mgb_driver` — i.e. the main phase, where every Newton step in the V-cycle
470-
# benefits from batched-gemm Hessian assembly when this is a `BlockDiag`.
463+
# `mgb_driver` — every Newton step in the V-cycle benefits from batched-gemm
464+
# Hessian assembly when this is a `BlockDiag`.
471465
D_fine::Vector{M_D_fine}
472-
refine_u::Vector{M_ref}
473-
coarsen_u::Vector{M_coar}
474-
refine_z::Vector{M_refz}
475-
coarsen_z::Vector{M_coarz}
476466
end
477467

478468
"""
@@ -550,15 +540,6 @@ assembly at the finest level via the D_fine path.
550540
"""
551541
subdivide(geom::Geometry, L::Int) = geometric_mg(geom, L; structured=true).geometry
552542

553-
# Galerkin triple product coarsen * op * refine, used by amg_helper to build the
554-
# per-level D_levels matrix. Default: just do the natural matmul. The specialization
555-
# in BlockMatrices.jl handles the awkward case of *sparse* AMG transfers wrapped
556-
# around a *structured* (BlockDiag) operator by converting the operator to sparse
557-
# first (otherwise the generic sparse-times-BlockDiag fallback scalar-indexes the
558-
# BlockDiag and crashes). When both transfers and op are structured (geometric MG),
559-
# the natural matmul kicks in and the structured form is preserved.
560-
_galerkin(coar, op, ref) = coar * op * ref
561-
562543
function amg_helper(mg::MultiGrid{T,M_sub,M_ref,M_coar,G},
563544
state_variables::Matrix{Symbol},
564545
D::Matrix{Symbol}) where {T,M_sub,M_ref,M_coar,G}
@@ -572,15 +553,11 @@ function amg_helper(mg::MultiGrid{T,M_sub,M_ref,M_coar,G},
572553
# `mg.refine[X]` / `mg.coarsen[X]` / `mg.subspaces[X]` already has the
573554
# same length here.
574555
#
575-
# The CANONICAL hierarchy below (used for the level-l integration grid via
576-
# `refine_fine` / `coarsen_fine` / `ncanon`, and for quadrature-weight
577-
# restriction via `refine_u`) is `:full`'s all-corners Neumann chain — so
578-
# the level-l grid includes boundary corners and the level-l integration
579-
# covers the whole domain. Cross-hierarchy interpolation from each
580-
# per-variable level-l basis to this canonical grid happens automatically
581-
# in the Galerkin triple product `coarsen_fine[l] · op · refine_fine_per[X][l]`
582-
# via the fine basis as a bridge. Per-variable iterate transfers
583-
# (`refine_z`/`coarsen_z`) still consult each variable's own subspace.
556+
# The CANONICAL hierarchy below (used to build `refine_fine`, the level-l →
557+
# fine broken-basis prolongation) is `:full`'s all-corners Neumann chain — so
558+
# the level-l grid includes boundary corners and covers the whole domain.
559+
# Each per-variable level-l basis is lifted to the fine broken basis through
560+
# its own `refine_fine_per[X]` chain, and those lifts assemble into `R_fine`.
584561
refines_s = mg.refine
585562
coarsens_s = mg.coarsen
586563
subspaces_s = mg.subspaces
@@ -595,11 +572,8 @@ function amg_helper(mg::MultiGrid{T,M_sub,M_ref,M_coar,G},
595572
end
596573
refine_fine = Vector{M_ref}(undef,(L,))
597574
refine_fine[L] = refine[L]
598-
coarsen_fine = Vector{M_coar}(undef,(L,))
599-
coarsen_fine[L] = coarsen[L]
600575
for l=L-1:-1:1
601576
refine_fine[l] = refine_fine[l+1]*refine[l]
602-
coarsen_fine[l] = coarsen[l]*coarsen_fine[l+1]
603577
end
604578
nu = size(state_variables)[1]
605579
@assert size(state_variables)[2] == 2
@@ -630,10 +604,7 @@ function amg_helper(mg::MultiGrid{T,M_sub,M_ref,M_coar,G},
630604
# the AMG-tuned interior-corner P1 chain (preserves zero trace at
631605
# every level). For :uniform with the heterogeneous (intrinsic dim
632606
# 1) representation, the chain collapses to a (n_doubled × 1)
633-
# column ones(n_doubled) at l<L plus identity at l=L. Cross-mapping
634-
# to the canonical :full level-l grid happens implicitly in the
635-
# Galerkin product `coarsen_fine[l] · op · refine_fine_per[X][l]`
636-
# via the fine basis bridge.
607+
# column ones(n_doubled) at l<L plus identity at l=L.
637608
rfp = Vector{eltype(refines_s[X])}(undef, L)
638609
rfp[L] = refines_s[X][L]
639610
for l = L-1:-1:1
@@ -642,48 +613,18 @@ function amg_helper(mg::MultiGrid{T,M_sub,M_ref,M_coar,G},
642613
refine_fine_per[X] = rfp
643614
end
644615
end
645-
R_coarse = [mgb_blockdiag((subspaces[state_variables[k,2]][l] for k=1:nu)...) for l=1:L]
646616
R_fine = [mgb_blockdiag((refine_fine_per[state_variables[k,2]][l]*subspaces[state_variables[k,2]][l] for k=1:nu)...) for l=1:L]
647617
nD = size(D)[1]
648618
@assert size(D)[2]==2
649619
bar = Dict{Symbol,Int}()
650620
for k=1:nu
651621
bar[state_variables[k,1]] = k
652622
end
653-
# D_levels[l, k]: Galerkin projection of operator k to multigrid level l, for
654-
# the coarser levels only (l = 1 .. L-1). Stays structured if transfers are
655-
# structured (geometric MG); falls back to sparse if transfers are sparse (AMG)
656-
# via the specialized `_galerkin` overload that converts a BlockDiag op to
657-
# sparse before multiplying. The fine-level (l = L) operator is *not* stored
658-
# here — phase 1 picks it up from `D_fine` so its level-L Newton solves get
659-
# the structure-preserving batched-gemm path. With L = 1, D_levels is empty
660-
# (0 × nD) and phase 1 never indexes it.
661-
#
662-
# Per-variable block widths: at level l, variable v's column block has
663-
# width `size(subspaces[X_v][l], 1)` (rows of X_v's level-l broken-basis
664-
# embedding). When every X shares :dirichlet's hierarchy this equals
665-
# `sizes[l]` for all v, giving the legacy homogeneous nu*sizes[l] cols;
666-
# once a subspace exposes a different level-l-broken-basis row count
667-
# (e.g. a depth-1 :uniform with rows = n_doubled), the block widths
668-
# become heterogeneous, but the canonical output dim (`sizes[l]` rows)
669-
# is preserved.
670-
ncanon = [size(coarsen_fine[l], 1) for l in 1:L]
671-
D_levels = [let
672-
foo = [v == bar[D[k,1]] ?
673-
_galerkin(coarsen_fine[l], operators[D[k,2]],
674-
refine_fine_per[state_variables[v,2]][l]) :
675-
mgb_zeros(coarsen_fine[l],
676-
ncanon[l],
677-
size(subspaces[state_variables[v,2]][l], 1))
678-
for v in 1:nu]
679-
hcat(foo...)
680-
end for l=1:L-1, k=1:nD]
681623
# D_fine[k]: finest-level operator k with its original structure preserved.
682-
# At l = L, `coarsen_fine[L]` and `refine_fine[L]` are identity, so we skip
683-
# the triple product entirely and slot `operators[k]` straight into the
684-
# nu-state-variable hcat. `mgb_zeros` returns BlockDiag zeros when given a
685-
# BlockDiag, so `hcat` returns a BlockColumn — exactly the structured form
686-
# the f2 barrier closure exploits for batched-gemm Hessian assembly.
624+
# `operators[k]` is slotted straight into the nu-state-variable hcat.
625+
# `mgb_zeros` returns BlockDiag zeros when given a BlockDiag, so `hcat`
626+
# returns a BlockColumn — exactly the structured form the f2 barrier closure
627+
# exploits for batched-gemm Hessian assembly.
687628
D_fine = [let
688629
op = operators[D[k,2]]
689630
n = size(op, 1)
@@ -692,19 +633,7 @@ function amg_helper(mg::MultiGrid{T,M_sub,M_ref,M_coar,G},
692633
foo[bar[D[k,1]]] = op
693634
hcat(foo...)
694635
end for k=1:nD]
695-
# Per-variable iterate transfers: each state variable's iterate at level l
696-
# gets prolonged using *its own* subspace hierarchy. Today, every subspace
697-
# symbol points at the same shared `Vector` (the canonical :dirichlet
698-
# hierarchy), so the blockdiag is equivalent to repeating one matrix. Once
699-
# per-subspace hierarchies land (:uniform with depth-1, :full with continuous
700-
# all-corners AMG, etc.), each block here will differ in shape.
701-
# Per-variable iterate transfers: blockdiag of each state variable's
702-
# *stretched* per-subspace hierarchy step at level l.
703-
refine_z = [mgb_blockdiag([refines_s[state_variables[k,2]][l] for k=1:nu]...) for l=1:L]
704-
coarsen_z = [mgb_blockdiag([coarsens_s[state_variables[k,2]][l] for k=1:nu]...) for l=1:L]
705-
AMG(geometry=geometry,x=x,w=w,R_fine=R_fine,R_coarse=R_coarse,
706-
D_levels=D_levels,D_fine=D_fine,
707-
refine_u=refine,coarsen_u=coarsen,refine_z=refine_z,coarsen_z=coarsen_z)
636+
AMG(geometry=geometry,x=x,w=w,R_fine=R_fine,D_fine=D_fine)
708637
end
709638

710639
# Internal: build the (M1, M2) AMG pair from a MultiGrid.
@@ -1993,87 +1922,8 @@ function divide_and_conquer(eta,j,J)
19931922
if jmid==j || jmid==J return false end
19941923
return divide_and_conquer(eta,j,jmid) && divide_and_conquer(eta,jmid,J)
19951924
end
1996-
function mgb_coarsen_levels(M::AMG, z, c)
1997-
L = length(M.R_fine)
1998-
zm = Vector{typeof(z)}(undef,L); zm[L] = z
1999-
cm = Vector{typeof(c)}(undef,L); cm[L] = c
2000-
wm = Vector{typeof(M.w)}(undef,L); wm[L] = M.w
2001-
for l=L-1:-1:1
2002-
cm[l] = M.coarsen_u[l]*cm[l+1]
2003-
zm[l] = M.coarsen_z[l]*zm[l+1]
2004-
wm[l] = M.refine_u[l]'*wm[l+1]
2005-
end
2006-
(zm,cm,wm)
2007-
end
2008-
function mgb_phase1(Q::Vector{<:Convex{T}},
2009-
M::AMG{X,W,M_sub,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any},
2010-
z::W,
2011-
c::X;
2012-
maxit,
2013-
max_newton,
2014-
stopping_criterion,
2015-
line_search,
2016-
printlog,
2017-
args...
2018-
) where {T,X,W,M_sub}
2019-
@debug("start")
2020-
L = length(M.R_fine)
2021-
zm, cm, wm = mgb_coarsen_levels(M, z, c)
2022-
passed = falses((L,))
2023-
its = zeros(Int,(L,))
2024-
function zeta(j,J)
2025-
@debug("j=",j," J=",J)
2026-
# Create barrier for level J from Q[J]
2027-
B_J = barrier(Q[J])
2028-
(f0,f1,f2) = (B_J.f0, B_J.f1, B_J.f2)
2029-
w = wm[J]
2030-
R = M.R_coarse[J]
2031-
D = J == L ? M.D_fine : M.D_levels[J,:]
2032-
z0 = zm[J]
2033-
c0 = cm[J]
2034-
s0 = mgb_zeros(W, size(R, 2))
2035-
# AMG coarsening does not preserve feasibility; if z0 is infeasible
2036-
# at level J, skip the level (phase 1 is best-effort).
2037-
y_check = try f0(s0,w,c0,R,D,z0)::T catch; T(NaN) end
2038-
isfinite(y_check) || return false
2039-
mi = J-j==1 ? maxit : max_newton
2040-
SOL = newton(M_sub,T,
2041-
s->f0(s,w,c0,R,D,z0),
2042-
s->f1(s,w,c0,R,D,z0),
2043-
s->f2(s,w,c0,R,D,z0),
2044-
s0,
2045-
maxit=mi,
2046-
stopping_criterion=stopping_criterion,
2047-
;line_search,printlog)
2048-
SOL.converged || return false
2049-
znext = copy(zm)
2050-
s = R*SOL.x
2051-
znext[J] = zm[J]+s
2052-
try
2053-
for k=J+1:L
2054-
s = M.refine_z[k-1]*s
2055-
znext[k] = zm[k]+s
2056-
# Create barrier for level k
2057-
B_k = barrier(Q[k])
2058-
s0 = mgb_zeros(W,size(M.R_coarse[k])[2])
2059-
D_k = k == L ? M.D_fine : M.D_levels[k,:]
2060-
y0 = B_k.f0(s0,wm[k],cm[k],M.R_coarse[k],D_k,znext[k])::T
2061-
y1 = B_k.f1(s0,wm[k],cm[k],M.R_coarse[k],D_k,znext[k])
2062-
@assert isfinite(y0) && mgb_all_isfinite(y1)
2063-
end
2064-
zm = znext
2065-
passed[J] = true
2066-
catch
2067-
end
2068-
return true
2069-
end
2070-
divide_and_conquer(zeta,0,L)
2071-
# Best-effort: zm[L] is at worst the input z (still fine-grid feasible);
2072-
# the main phase will absorb any lack of centring.
2073-
(;z=zm[L],its,passed)
2074-
end
20751925
function mgb_step(Q::Vector{<:Convex{T}},
2076-
M::AMG{X,W,M_sub,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any},
1926+
M::AMG{X,W,M_sub,<:Any,<:Any},
20771927
z::W,
20781928
c::X;
20791929
early_stop,
@@ -2403,7 +2253,7 @@ function newton(::Type{Mat}, ::Type{T},
24032253
end
24042254

24052255
function mgb_core(Q::Vector{<:Convex{T}},
2406-
M::AMG{X,W,M_sub,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any},
2256+
M::AMG{X,W,M_sub,<:Any,<:Any},
24072257
z::W,
24082258
c::X;
24092259
tol=sqrt(eps(T)),
@@ -2428,9 +2278,13 @@ function mgb_core(Q::Vector{<:Convex{T}},
24282278
c_dot_Dz = zeros(T,(maxit,))
24292279
k = 1
24302280
times[k] = time()
2431-
SOL = mgb_phase1(Q,M,z,t*c;maxit,max_newton,printlog,args...)
2432-
@debug("phase 1 success")
2433-
passed = SOL.passed
2281+
# Initial return-to-center at t, on the fine quadrature rule. The main
2282+
# MGBstep multigrid sweep handles centring from the (feasible) starting
2283+
# point; no coarse/hierarchical quadrature is used. For the feasibility
2284+
# subproblem `early_stop` halts the t-ramp as soon as strict feasibility
2285+
# is reached.
2286+
SOL = mgb_step(Q,M,z,t*c;max_newton,early_stop,maxit,printlog,finalize=false,args...)
2287+
@debug("initial centering done")
24342288
its[:,k] = SOL.its
24352289
kappas[k] = kappa
24362290
ts[k] = t
@@ -2480,10 +2334,10 @@ function mgb_core(Q::Vector{<:Convex{T}},
24802334
@debug("success. t=",t," tol=",tol)
24812335
return (;z,z_unfinalized,c,its=its[:,1:k],ts=ts[1:k],kappas=kappas[1:k],
24822336
t_begin,t_end,t_elapsed,times=times[1:k],
2483-
passed,c_dot_Dz=c_dot_Dz[1:k])
2337+
c_dot_Dz=c_dot_Dz[1:k])
24842338
end
24852339

2486-
function mgb_driver(M::Tuple{AMG{X,W,M_sub,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any},AMG{X,W,M_sub,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any}},
2340+
function mgb_driver(M::Tuple{AMG{X,W,M_sub,<:Any,<:Any},AMG{X,W,M_sub,<:Any,<:Any}},
24872341
f::X,
24882342
g::X,
24892343
Q::Vector{<:Convex{T}};

src/BlockMatrices.jl

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -957,12 +957,6 @@ LinearAlgebra.norm(A::BlockDiag) = norm(A.data)
957957
# Conversion to SparseMatrixCSC
958958
# ============================================================================
959959

960-
# Galerkin overload: sparse transfers wrapped around a BlockDiag op (the AMG +
961-
# structured-fine-mesh combination) — convert the op to sparse to avoid the
962-
# generic `SparseMatrixCSC * BlockDiag` fallback (which scalar-indexes BlockDiag).
963-
_galerkin(coar::SparseMatrixCSC{T,Int}, op::BlockDiag{T}, ref::SparseMatrixCSC{T,Int}) where T =
964-
coar * to_sparse(op) * ref
965-
966960
function to_sparse(B::BlockDiag{T}) where T
967961
p, q, N = B.p, B.q, B.N
968962
m = p * N

test/test_algebraic_coverage.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ using LinearAlgebra
55
using StaticArrays
66

77
# Import internal functions for testing
8-
import MultiGridBarrier: mgb_phase1, mgb_core, illinois, newton, linesearch_illinois
8+
import MultiGridBarrier: mgb_core, illinois, newton, linesearch_illinois
99

1010
@testset "AlgebraicMultiGridBarrier Coverage Tests" begin
1111

0 commit comments

Comments
 (0)