@@ -3,6 +3,11 @@ MUMPS-based distributed sparse factorization.
33
44Uses MUMPS with distributed matrix input (ICNTL(18)=3) for efficient
55parallel direct solve of sparse linear systems.
6+
7+ Analysis caching: The symbolic analysis phase (ordering, symbolic factorization)
8+ depends only on sparsity structure, not numerical values. We cache the analyzed
9+ MUMPS object by structural hash, so subsequent factorizations with the same
10+ structure skip the expensive analysis phase.
611"""
712
813using MPI
@@ -40,6 +45,56 @@ const _destroy_list = Int[]
4045# Lock for thread-safe access to _destroy_list (finalizers may run from GC thread)
4146const _destroy_list_lock = ReentrantLock ()
4247
48+ # ============================================================================
49+ # MUMPS Analysis Cache
50+ # ============================================================================
51+ #
52+ # The symbolic analysis phase (job=1) depends only on sparsity structure, not
53+ # numerical values. We cache the analyzed MUMPS object by structural hash,
54+ # allowing subsequent factorizations to skip analysis and only do numeric
55+ # factorization (job=2).
56+ #
57+ # The cache stores MUMPSAnalysisPlan objects, which contain:
58+ # - A pre-analyzed MUMPS object (ready for job=2)
59+ # - The COO index arrays (structure is fixed)
60+ # - Metadata for validation
61+
62+ """
63+ MUMPSAnalysisPlan{T}
64+
65+ Cached MUMPS symbolic analysis for a given sparsity structure.
66+ Stores a pre-analyzed MUMPS object that can be reused for numeric
67+ factorization with different values but the same structure.
68+ """
69+ mutable struct MUMPSAnalysisPlan{T}
70+ mumps:: Any # Mumps{T,R} after analysis (job=1)
71+ irn_loc:: Vector{MUMPS_INT} # Row indices (structure, immutable)
72+ jcn_loc:: Vector{MUMPS_INT} # Column indices (structure, immutable)
73+ a_loc:: Vector{T} # Value array (updated for each factorization)
74+ n:: Int
75+ symmetric:: Bool
76+ row_partition:: Vector{Int}
77+ structural_hash:: NTuple{32,UInt8}
78+ end
79+
80+ # Cache mapping (structural_hash, symmetric, element_type) -> MUMPSAnalysisPlan
81+ const _mumps_analysis_cache = Dict {Tuple{NTuple{32,UInt8}, Bool, DataType}, Any} ()
82+
83+ """
84+ clear_mumps_analysis_cache!()
85+
86+ Clear the MUMPS analysis cache. This is a collective operation that must
87+ be called on all MPI ranks together.
88+ """
89+ function clear_mumps_analysis_cache! ()
90+ for (key, plan) in _mumps_analysis_cache
91+ # Finalize the cached MUMPS objects
92+ plan. mumps. _finalized = false
93+ MUMPS. finalize! (plan. mumps)
94+ end
95+ empty! (_mumps_analysis_cache)
96+ end
97+
4398# ============================================================================
4499# MUMPS Factorization Type
45100# ============================================================================
@@ -48,17 +103,21 @@ const _destroy_list_lock = ReentrantLock()
48103 MUMPSFactorizationMPI{T}
49104
50105Distributed MUMPS factorization result. Can be reused for multiple solves.
106+
107+ Note: The MUMPS object is shared with the analysis cache. The factorization
108+ does not own the MUMPS object and should not finalize it directly.
51109"""
52110mutable struct MUMPSFactorizationMPI{T}
53111 id:: Int # Unique ID for finalization tracking
54- mumps:: Any # Mumps{T,R} where R is the real type (Float64 for both real and complex)
112+ mumps:: Any # Mumps{T,R} - shared with cache, do not finalize
55113 irn_loc:: Vector{MUMPS_INT}
56114 jcn_loc:: Vector{MUMPS_INT}
57115 a_loc:: Vector{T}
58116 n:: Int
59117 symmetric:: Bool
60118 row_partition:: Vector{Int}
61119 rhs_buffer:: Vector{T}
120+ owns_mumps:: Bool # Whether this factorization owns the MUMPS object
62121end
63122
64123Base. size (F:: MUMPSFactorizationMPI ) = (F. n, F. n)
@@ -123,9 +182,11 @@ function _process_finalizers()
123182 if haskey (_mumps_registry, id)
124183 F = _mumps_registry[id]
125184 delete! (_mumps_registry, id)
126- # Actually finalize the MUMPS object
127- F. mumps. _finalized = false
128- MUMPS. finalize! (F. mumps)
185+ # Only finalize if we own the MUMPS object (not shared with cache)
186+ if F. owns_mumps
187+ F. mumps. _finalized = false
188+ MUMPS. finalize! (F. mumps)
189+ end
129190 end
130191 end
131192end
@@ -182,21 +243,29 @@ end
182243# ============================================================================
183244
184245"""
185- _create_mumps_factorization (A::SparseMatrixMPI{T}, symmetric::Bool) where T
246+ _get_or_create_analysis_plan (A::SparseMatrixMPI{T}, symmetric::Bool) where T
186247
187- Create and compute a MUMPS factorization of the distributed matrix A.
248+ Get a cached analysis plan or create a new one. Returns the plan with
249+ values updated from matrix A.
188250"""
189- function _create_mumps_factorization (A:: SparseMatrixMPI{T} , symmetric:: Bool ) where T
251+ function _get_or_create_analysis_plan (A:: SparseMatrixMPI{T} , symmetric:: Bool ) where T
190252 comm = MPI. COMM_WORLD
191- rank = MPI. Comm_rank (comm)
192253
193- # Process any pending finalizations first (collective operation)
194- _process_finalizers ()
254+ # Ensure structural hash is computed
255+ structural_hash = _ensure_hash (A)
256+ cache_key = (structural_hash, symmetric, T)
195257
196- # Assign unique ID for this factorization
197- id = _mumps_count[]
198- _mumps_count[] += 1
258+ if haskey (_mumps_analysis_cache, cache_key)
259+ # Cache hit: reuse existing analysis
260+ plan = _mumps_analysis_cache[cache_key] :: MUMPSAnalysisPlan{T}
199261
262+ # Update values from matrix A (structure is already correct)
263+ _update_values! (plan, A, symmetric)
264+
265+ return plan, true # true = cache hit
266+ end
267+
268+ # Cache miss: create new analysis
200269 m, n = size (A)
201270 @assert m == n " Matrix must be square for factorization"
202271
@@ -219,6 +288,12 @@ function _create_mumps_factorization(A::SparseMatrixMPI{T}, symmetric::Bool) whe
219288 set_icntl! (mumps, 18 , 3 ; displaylevel= 0 ) # Distributed matrix input
220289 set_icntl! (mumps, 20 , 0 ; displaylevel= 0 ) # Dense RHS
221290 set_icntl! (mumps, 21 , 0 ; displaylevel= 0 ) # Centralized solution on host
291+ set_icntl! (mumps, 7 , 5 ; displaylevel= 0 ) # METIS ordering (better fill-in)
292+
293+ # Set OpenMP threads for MUMPS to match Julia's thread count if OMP_NUM_THREADS not set
294+ if ! haskey (ENV , " OMP_NUM_THREADS" )
295+ set_icntl! (mumps, 16 , Threads. nthreads (); displaylevel= 0 )
296+ end
222297
223298 # Set matrix dimension
224299 mumps. n = MUMPS_INT (n)
@@ -235,18 +310,81 @@ function _create_mumps_factorization(A::SparseMatrixMPI{T}, symmetric::Bool) whe
235310 invoke_mumps_unsafe! (mumps)
236311 _check_mumps_error (mumps, " analysis" )
237312
313+ # Create and cache the analysis plan
314+ plan = MUMPSAnalysisPlan {T} (
315+ mumps, irn_loc, jcn_loc, a_loc,
316+ n, symmetric, copy (A. row_partition), structural_hash
317+ )
318+ _mumps_analysis_cache[cache_key] = plan
319+
320+ return plan, false # false = cache miss
321+ end
322+
323+ """
324+ _update_values!(plan::MUMPSAnalysisPlan{T}, A::SparseMatrixMPI{T}, symmetric::Bool) where T
325+
326+ Update the values in a cached analysis plan from a new matrix with the same structure.
327+ """
328+ function _update_values! (plan:: MUMPSAnalysisPlan{T} , A:: SparseMatrixMPI{T} , symmetric:: Bool ) where T
329+ comm = MPI. COMM_WORLD
330+ rank = MPI. Comm_rank (comm)
331+
332+ row_start = A. row_partition[rank + 1 ]
333+ AT = A. A. parent
334+
335+ # Update values in-place (structure must match exactly)
336+ idx = 1
337+ for local_row in 1 : AT. n
338+ global_row = row_start + local_row - 1
339+ for ptr in AT. colptr[local_row]: (AT. colptr[local_row + 1 ] - 1 )
340+ local_col_idx = AT. rowval[ptr]
341+ global_col = A. col_indices[local_col_idx]
342+
343+ if ! symmetric || global_row >= global_col
344+ plan. a_loc[idx] = AT. nzval[ptr]
345+ idx += 1
346+ end
347+ end
348+ end
349+ end
350+
351+ """
352+ _create_mumps_factorization(A::SparseMatrixMPI{T}, symmetric::Bool) where T
353+
354+ Create and compute a MUMPS factorization of the distributed matrix A.
355+ Uses cached symbolic analysis when available for the same sparsity structure.
356+ """
357+ function _create_mumps_factorization (A:: SparseMatrixMPI{T} , symmetric:: Bool ) where T
358+ comm = MPI. COMM_WORLD
359+ rank = MPI. Comm_rank (comm)
360+
361+ # Process any pending finalizations first (collective operation)
362+ _process_finalizers ()
363+
364+ # Assign unique ID for this factorization
365+ id = _mumps_count[]
366+ _mumps_count[] += 1
367+
368+ # Get or create analysis plan (may be cached)
369+ plan, cache_hit = _get_or_create_analysis_plan (A, symmetric)
370+
371+ # Update value pointer (values may have been updated)
372+ plan. mumps. a_loc = pointer (plan. a_loc)
373+
238374 # Factorization phase (job = 2)
239- mumps. job = MUMPS_INT (2 )
240- invoke_mumps_unsafe! (mumps)
241- _check_mumps_error (mumps, " factorization" )
375+ plan . mumps. job = MUMPS_INT (2 )
376+ invoke_mumps_unsafe! (plan . mumps)
377+ _check_mumps_error (plan . mumps, " factorization" )
242378
243379 # Pre-allocate RHS buffer on rank 0
244- rhs_buffer = rank == 0 ? zeros (T, n) : T[]
380+ rhs_buffer = rank == 0 ? zeros (T, plan . n) : T[]
245381
246382 # Create factorization object with ID
383+ # Note: We copy the value array since the plan's array is reused.
384+ # The MUMPS object is shared with the cache (owns_mumps=false).
247385 F = MUMPSFactorizationMPI {T} (
248- id, mumps, irn_loc, jcn_loc, a_loc,
249- n, symmetric, copy (A . row_partition), rhs_buffer
386+ id, plan . mumps, plan . irn_loc, plan . jcn_loc, copy (plan . a_loc) ,
387+ plan . n, symmetric, copy (plan . row_partition), rhs_buffer, false
250388 )
251389
252390 # Register in global registry (prevents GC until removed)
366504 finalize!(F::MUMPSFactorizationMPI)
367505
368506Release MUMPS resources. Must be called on all ranks together.
507+
508+ Note: If the MUMPS object is shared with the analysis cache (owns_mumps=false),
509+ this only removes the factorization from the registry. The MUMPS object itself
510+ is finalized when `clear_mumps_analysis_cache!()` is called.
369511"""
370512function finalize! (F:: MUMPSFactorizationMPI )
371513 # Check if already finalized (removed from registry)
@@ -376,9 +518,12 @@ function finalize!(F::MUMPSFactorizationMPI)
376518 # Remove from registry
377519 delete! (_mumps_registry, F. id)
378520
379- # Actually finalize the MUMPS object
380- F. mumps. _finalized = false # Re-enable MUMPS finalization
381- MUMPS. finalize! (F. mumps)
521+ # Only finalize the MUMPS object if we own it (not shared with cache)
522+ if F. owns_mumps
523+ F. mumps. _finalized = false # Re-enable MUMPS finalization
524+ MUMPS. finalize! (F. mumps)
525+ end
526+
382527 return F
383528end
384529
0 commit comments