Skip to content

Commit cb0191e

Browse files
Sébastien LoiselSébastien Loisel
authored andcommitted
Refactor precompilation and transpose materialization
- Replace PrecompileTools with SnoopCompile-generated static precompile.jl - Rename materialize_transpose to SparseMatrixMPI(transpose(A)) constructor - Refactor SparseMatrixMPI(SparseMatrixCSC) to delegate to SparseMatrixMPI_local - Add scripts/generate_precompile.jl for regenerating precompile directives - Update documentation for new transpose materialization API
1 parent 148cb4e commit cb0191e

7 files changed

Lines changed: 251 additions & 161 deletions

File tree

CLAUDE.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ Note: Uses `transpose()` (not adjoint `'`) to correctly handle complex values wi
137137
`transpose(A)` returns `Transpose{T, SparseMatrixMPI{T}}` (lazy wrapper). Materialization happens automatically when needed:
138138
- `transpose(A) * transpose(B)``transpose(B * A)` (stays lazy)
139139
- `transpose(A) * B` or `A * transpose(B)` → materializes via `TransposePlan`
140+
- `SparseMatrixMPI(transpose(A))` → explicitly materialize the transpose (cached bidirectionally)
140141

141142
### Indexing Operations
142143

Project.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,18 @@ Blake3Hash = "8f478455-a32d-4928-b0e4-72b19a7d5574"
88
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
99
MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
1010
MUMPS = "55d2b088-9f4e-11e9-26c0-150b02ea6a46"
11-
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
1211
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
1312

1413
[compat]
1514
Blake3Hash = "0.3"
1615
MPI = "0.20"
1716
MUMPS = "1.5"
18-
PrecompileTools = "1"
1917
julia = "1.10"
2018

2119
[extras]
2220
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
2321
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
22+
SnoopCompile = "aa65fe97-06da-5843-b5b1-d5d13cad87d2"
2423
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
2524

2625
[targets]

docs/src/api.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,10 @@ C = A ⊛ B # Parallel multiplication using available threads
5252
### Transpose and Adjoint
5353

5454
```julia
55-
transpose(A) # Lazy transpose
56-
conj(A) # Conjugate (new matrix)
57-
A' # Adjoint (conjugate transpose, lazy)
55+
transpose(A) # Lazy transpose
56+
conj(A) # Conjugate (new matrix)
57+
A' # Adjoint (conjugate transpose, lazy)
58+
SparseMatrixMPI(transpose(A)) # Materialize lazy transpose (cached)
5859
```
5960

6061
### Matrix-Vector Multiplication

scripts/generate_precompile.jl

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
# Generate precompile directives using SnoopCompile
2+
#
3+
# Run this script under MPI to generate src/precompile.jl:
4+
# mpiexec -n 1 julia --project=. scripts/generate_precompile.jl
5+
#
6+
# Only run with a single MPI rank to avoid file conflicts.
7+
8+
using SnoopCompile
9+
using SnoopCompile.SnoopCompileCore
10+
using SparseArrays
11+
using LinearAlgebra
12+
using MPI
13+
14+
# Initialize MPI first
15+
MPI.Init()
16+
17+
# Load the package outside of @snoop_inference
18+
using LinearAlgebraMPI
19+
20+
# Now snoop on the actual workload
21+
tinf = @snoop_inference begin
22+
# Small test data for precompilation
23+
n = 8
24+
25+
# Sparse matrix (tridiagonal) - Float64
26+
I_sp = Int[]; J_sp = Int[]; V_sp = Float64[]
27+
for i in 1:n
28+
push!(I_sp, i); push!(J_sp, i); push!(V_sp, 4.0)
29+
if i > 1
30+
push!(I_sp, i); push!(J_sp, i-1); push!(V_sp, -1.0)
31+
push!(I_sp, i-1); push!(J_sp, i); push!(V_sp, -1.0)
32+
end
33+
end
34+
A_sparse_f64 = sparse(I_sp, J_sp, V_sp, n, n)
35+
36+
# Sparse matrix - ComplexF64
37+
A_sparse_c64 = sparse(I_sp, J_sp, ComplexF64.(V_sp), n, n)
38+
39+
# Dense matrix - Float64
40+
A_dense_f64 = Float64[i == j ? 4.0 : (abs(i-j) == 1 ? -1.0 : 0.0) for i in 1:n, j in 1:n]
41+
42+
# Dense matrix - ComplexF64
43+
A_dense_c64 = ComplexF64.(A_dense_f64)
44+
45+
# Vectors
46+
v_f64 = ones(Float64, n)
47+
v_c64 = ones(ComplexF64, n)
48+
49+
# Identity for SPD construction
50+
I_sparse = sparse(1.0 * LinearAlgebra.I, n, n)
51+
52+
# === VectorMPI operations (Float64) ===
53+
v = VectorMPI(v_f64)
54+
w = VectorMPI(2.0 .* v_f64)
55+
_ = v + w
56+
_ = v - w
57+
_ = 2.0 * v
58+
_ = v * 2.0
59+
_ = norm(v)
60+
_ = dot(v, w)
61+
_ = conj(v)
62+
_ = length(v)
63+
_ = size(v)
64+
65+
# VectorMPI (ComplexF64)
66+
vc = VectorMPI(v_c64)
67+
_ = conj(vc)
68+
_ = norm(vc)
69+
70+
# === SparseMatrixMPI operations (Float64) ===
71+
A = SparseMatrixMPI{Float64}(A_sparse_f64)
72+
B = SparseMatrixMPI{Float64}(A_sparse_f64)
73+
_ = A + B
74+
_ = A - B
75+
_ = 2.0 * A
76+
_ = A * v
77+
_ = A * B
78+
_ = transpose(A)
79+
At = SparseMatrixMPI(transpose(A))
80+
_ = size(A)
81+
_ = nnz(A)
82+
_ = norm(A)
83+
84+
# SparseMatrixMPI (ComplexF64)
85+
Ac = SparseMatrixMPI{ComplexF64}(A_sparse_c64)
86+
_ = Ac * vc
87+
88+
# === MatrixMPI operations (Float64) ===
89+
D = MatrixMPI(A_dense_f64)
90+
_ = 2.0 * D
91+
_ = D * v
92+
_ = transpose(D)
93+
Dt = copy(transpose(D)) # Materialize dense transpose
94+
_ = size(D)
95+
_ = norm(D)
96+
97+
# MatrixMPI (ComplexF64)
98+
Dc = MatrixMPI(A_dense_c64)
99+
_ = Dc * vc
100+
101+
# === Mixed operations ===
102+
_ = A * D # Sparse * Dense
103+
104+
# === Indexing ===
105+
_ = v[1]
106+
_ = A[1, 1]
107+
_ = D[1, 1]
108+
109+
# === Factorization (MUMPS) ===
110+
# Make symmetric positive definite: A + A^T + 10I
111+
At_mat = SparseMatrixMPI(transpose(A))
112+
I_dist = SparseMatrixMPI{Float64}(I_sparse)
113+
A_spd = A + At_mat + I_dist * 10.0
114+
F = LinearAlgebra.ldlt(A_spd)
115+
x = F \ v
116+
finalize!(F)
117+
118+
# LU factorization
119+
F_lu = LinearAlgebra.lu(A)
120+
x = F_lu \ v
121+
finalize!(F_lu)
122+
123+
# === Block operations ===
124+
_ = cat(v, w; dims=1)
125+
_ = blockdiag(A, B)
126+
127+
# === Conversions ===
128+
_ = Vector(v)
129+
_ = Matrix(D)
130+
_ = SparseMatrixCSC(A)
131+
132+
# Clear caches
133+
clear_plan_cache!()
134+
end
135+
136+
# Generate precompile directives
137+
# parcel returns (total_time, Vector{Pair{Module, (time, MethodInstances)}})
138+
_, pc = SnoopCompile.parcel(tinf)
139+
140+
# Filter for LinearAlgebraMPI only
141+
pc_filtered = filter(p -> p.first == LinearAlgebraMPI, pc)
142+
143+
# Extract method instances for our module
144+
if !isempty(pc_filtered)
145+
_, method_instances = pc_filtered[1].second
146+
147+
# Write the precompile file
148+
outfile = joinpath(@__DIR__, "..", "src", "precompile.jl")
149+
open(outfile, "w") do io
150+
println(io, "# Precompile directives generated by SnoopCompile")
151+
println(io, "# Regenerate with: mpiexec -n 1 julia --project=. scripts/generate_precompile.jl")
152+
println(io, "#")
153+
println(io, "# $(length(method_instances)) method instances")
154+
println(io)
155+
println(io, "function _precompile_()")
156+
println(io, " ccall(:jl_generating_output, Cint, ()) == 1 || return nothing")
157+
SnoopCompile.write(io, method_instances)
158+
println(io, "end")
159+
println(io)
160+
println(io, "_precompile_()")
161+
end
162+
163+
println("Generated precompile directives: $outfile")
164+
println("Found $(length(method_instances)) method instances")
165+
else
166+
println("No method instances found for LinearAlgebraMPI")
167+
end

src/LinearAlgebraMPI.jl

Lines changed: 5 additions & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ function LinearAlgebra.issymmetric(A::SparseMatrixMPI{T}) where T
336336
return false
337337
end
338338

339-
At = materialize_transpose(A)
339+
At = SparseMatrixMPI(transpose(A))
340340
return _compare_rows_distributed(A, At)
341341
end
342342

@@ -363,7 +363,7 @@ end
363363
Solve transpose(A)*x = b using LDLT if transpose(A) is symmetric, otherwise LU.
364364
"""
365365
function Base.:\(At::Transpose{T,SparseMatrixMPI{T}}, b::VectorMPI{T}) where T
366-
A_t = materialize_transpose(At.parent)
366+
A_t = SparseMatrixMPI(At)
367367
F = issymmetric(A_t) ? LinearAlgebra.ldlt(A_t) : LinearAlgebra.lu(A_t)
368368
x = F \ b
369369
finalize!(F)
@@ -662,133 +662,8 @@ end
662662
# Precompilation Workload
663663
# ============================================================================
664664

665-
using PrecompileTools
666-
667-
@setup_workload begin
668-
# Small test data for precompilation (runs with single MPI rank)
669-
n = 8
670-
671-
# Sparse matrix (tridiagonal) - Float64
672-
I_sp = Int[]; J_sp = Int[]; V_sp = Float64[]
673-
for i in 1:n
674-
push!(I_sp, i); push!(J_sp, i); push!(V_sp, 4.0)
675-
if i > 1
676-
push!(I_sp, i); push!(J_sp, i-1); push!(V_sp, -1.0)
677-
push!(I_sp, i-1); push!(J_sp, i); push!(V_sp, -1.0)
678-
end
679-
end
680-
A_sparse_f64 = sparse(I_sp, J_sp, V_sp, n, n)
681-
682-
# Sparse matrix - ComplexF64
683-
A_sparse_c64 = sparse(I_sp, J_sp, ComplexF64.(V_sp), n, n)
684-
685-
# Dense matrix - Float64
686-
A_dense_f64 = Float64[i == j ? 4.0 : (abs(i-j) == 1 ? -1.0 : 0.0) for i in 1:n, j in 1:n]
687-
688-
# Dense matrix - ComplexF64
689-
A_dense_c64 = ComplexF64.(A_dense_f64)
690-
691-
# Vectors
692-
v_f64 = ones(Float64, n)
693-
v_c64 = ones(ComplexF64, n)
694-
695-
# Identity for SPD construction
696-
I_sparse = sparse(1.0 * LinearAlgebra.I, n, n)
697-
698-
@compile_workload begin
699-
# Try to initialize MPI - may fail during precompilation under mpiexec
700-
mpi_ok = try
701-
MPI.Init()
702-
true
703-
catch
704-
false
705-
end
706-
707-
if mpi_ok
708-
# === VectorMPI operations (Float64) ===
709-
v = VectorMPI(v_f64)
710-
w = VectorMPI(2.0 .* v_f64)
711-
_ = v + w
712-
_ = v - w
713-
_ = 2.0 * v
714-
_ = v * 2.0
715-
_ = norm(v)
716-
_ = dot(v, w)
717-
_ = conj(v)
718-
_ = length(v)
719-
_ = size(v)
720-
721-
# VectorMPI (ComplexF64)
722-
vc = VectorMPI(v_c64)
723-
_ = conj(vc)
724-
_ = norm(vc)
725-
726-
# === SparseMatrixMPI operations (Float64) ===
727-
A = SparseMatrixMPI{Float64}(A_sparse_f64)
728-
B = SparseMatrixMPI{Float64}(A_sparse_f64)
729-
_ = A + B
730-
_ = A - B
731-
_ = 2.0 * A
732-
_ = A * v
733-
_ = A * B
734-
_ = transpose(A)
735-
At = materialize_transpose(A)
736-
_ = size(A)
737-
_ = nnz(A)
738-
_ = norm(A)
739-
740-
# SparseMatrixMPI (ComplexF64)
741-
Ac = SparseMatrixMPI{ComplexF64}(A_sparse_c64)
742-
_ = Ac * vc
743-
744-
# === MatrixMPI operations (Float64) ===
745-
D = MatrixMPI(A_dense_f64)
746-
_ = 2.0 * D
747-
_ = D * v
748-
_ = transpose(D)
749-
Dt = copy(transpose(D)) # Materialize dense transpose
750-
_ = size(D)
751-
_ = norm(D)
752-
753-
# MatrixMPI (ComplexF64)
754-
Dc = MatrixMPI(A_dense_c64)
755-
_ = Dc * vc
756-
757-
# === Mixed operations ===
758-
_ = A * D # Sparse * Dense
759-
760-
# === Indexing ===
761-
_ = v[1]
762-
_ = A[1, 1]
763-
_ = D[1, 1]
764-
765-
# === Factorization (MUMPS) ===
766-
# Make symmetric positive definite: A + A^T + 10I
767-
At_mat = materialize_transpose(A)
768-
I_dist = SparseMatrixMPI{Float64}(I_sparse)
769-
A_spd = A + At_mat + I_dist * 10.0
770-
F = LinearAlgebra.ldlt(A_spd)
771-
x = F \ v
772-
finalize!(F)
773-
774-
# LU factorization
775-
F_lu = LinearAlgebra.lu(A)
776-
x = F_lu \ v
777-
finalize!(F_lu)
778-
779-
# === Block operations ===
780-
_ = cat(v, w; dims=1)
781-
_ = blockdiag(A, B)
782-
783-
# === Conversions ===
784-
_ = Vector(v)
785-
_ = Matrix(D)
786-
_ = SparseMatrixCSC(A)
787-
788-
# Clear caches
789-
clear_plan_cache!()
790-
end # if mpi_ok
791-
end
792-
end
665+
# Precompile directives generated by SnoopCompile
666+
# Regenerate with: mpiexec -n 1 julia --project=. scripts/generate_precompile.jl
667+
include("precompile.jl")
793668

794669
end # module LinearAlgebraMPI

0 commit comments

Comments
 (0)