|
| 1 | +# Generate precompile directives using SnoopCompile |
| 2 | +# |
| 3 | +# Run this script under MPI to generate src/precompile.jl: |
| 4 | +# mpiexec -n 1 julia --project=. scripts/generate_precompile.jl |
| 5 | +# |
| 6 | +# Only run with a single MPI rank to avoid file conflicts. |
| 7 | + |
| 8 | +using SnoopCompile |
| 9 | +using SnoopCompile.SnoopCompileCore |
| 10 | +using SparseArrays |
| 11 | +using LinearAlgebra |
| 12 | +using MPI |
| 13 | + |
| 14 | +# Initialize MPI first |
| 15 | +MPI.Init() |
| 16 | + |
| 17 | +# Load the package outside of @snoop_inference |
| 18 | +using LinearAlgebraMPI |
| 19 | + |
| 20 | +# Now snoop on the actual workload |
| 21 | +tinf = @snoop_inference begin |
| 22 | + # Small test data for precompilation |
| 23 | + n = 8 |
| 24 | + |
| 25 | + # Sparse matrix (tridiagonal) - Float64 |
| 26 | + I_sp = Int[]; J_sp = Int[]; V_sp = Float64[] |
| 27 | + for i in 1:n |
| 28 | + push!(I_sp, i); push!(J_sp, i); push!(V_sp, 4.0) |
| 29 | + if i > 1 |
| 30 | + push!(I_sp, i); push!(J_sp, i-1); push!(V_sp, -1.0) |
| 31 | + push!(I_sp, i-1); push!(J_sp, i); push!(V_sp, -1.0) |
| 32 | + end |
| 33 | + end |
| 34 | + A_sparse_f64 = sparse(I_sp, J_sp, V_sp, n, n) |
| 35 | + |
| 36 | + # Sparse matrix - ComplexF64 |
| 37 | + A_sparse_c64 = sparse(I_sp, J_sp, ComplexF64.(V_sp), n, n) |
| 38 | + |
| 39 | + # Dense matrix - Float64 |
| 40 | + A_dense_f64 = Float64[i == j ? 4.0 : (abs(i-j) == 1 ? -1.0 : 0.0) for i in 1:n, j in 1:n] |
| 41 | + |
| 42 | + # Dense matrix - ComplexF64 |
| 43 | + A_dense_c64 = ComplexF64.(A_dense_f64) |
| 44 | + |
| 45 | + # Vectors |
| 46 | + v_f64 = ones(Float64, n) |
| 47 | + v_c64 = ones(ComplexF64, n) |
| 48 | + |
| 49 | + # Identity for SPD construction |
| 50 | + I_sparse = sparse(1.0 * LinearAlgebra.I, n, n) |
| 51 | + |
| 52 | + # === VectorMPI operations (Float64) === |
| 53 | + v = VectorMPI(v_f64) |
| 54 | + w = VectorMPI(2.0 .* v_f64) |
| 55 | + _ = v + w |
| 56 | + _ = v - w |
| 57 | + _ = 2.0 * v |
| 58 | + _ = v * 2.0 |
| 59 | + _ = norm(v) |
| 60 | + _ = dot(v, w) |
| 61 | + _ = conj(v) |
| 62 | + _ = length(v) |
| 63 | + _ = size(v) |
| 64 | + |
| 65 | + # VectorMPI (ComplexF64) |
| 66 | + vc = VectorMPI(v_c64) |
| 67 | + _ = conj(vc) |
| 68 | + _ = norm(vc) |
| 69 | + |
| 70 | + # === SparseMatrixMPI operations (Float64) === |
| 71 | + A = SparseMatrixMPI{Float64}(A_sparse_f64) |
| 72 | + B = SparseMatrixMPI{Float64}(A_sparse_f64) |
| 73 | + _ = A + B |
| 74 | + _ = A - B |
| 75 | + _ = 2.0 * A |
| 76 | + _ = A * v |
| 77 | + _ = A * B |
| 78 | + _ = transpose(A) |
| 79 | + At = SparseMatrixMPI(transpose(A)) |
| 80 | + _ = size(A) |
| 81 | + _ = nnz(A) |
| 82 | + _ = norm(A) |
| 83 | + |
| 84 | + # SparseMatrixMPI (ComplexF64) |
| 85 | + Ac = SparseMatrixMPI{ComplexF64}(A_sparse_c64) |
| 86 | + _ = Ac * vc |
| 87 | + |
| 88 | + # === MatrixMPI operations (Float64) === |
| 89 | + D = MatrixMPI(A_dense_f64) |
| 90 | + _ = 2.0 * D |
| 91 | + _ = D * v |
| 92 | + _ = transpose(D) |
| 93 | + Dt = copy(transpose(D)) # Materialize dense transpose |
| 94 | + _ = size(D) |
| 95 | + _ = norm(D) |
| 96 | + |
| 97 | + # MatrixMPI (ComplexF64) |
| 98 | + Dc = MatrixMPI(A_dense_c64) |
| 99 | + _ = Dc * vc |
| 100 | + |
| 101 | + # === Mixed operations === |
| 102 | + _ = A * D # Sparse * Dense |
| 103 | + |
| 104 | + # === Indexing === |
| 105 | + _ = v[1] |
| 106 | + _ = A[1, 1] |
| 107 | + _ = D[1, 1] |
| 108 | + |
| 109 | + # === Factorization (MUMPS) === |
| 110 | + # Make symmetric positive definite: A + A^T + 10I |
| 111 | + At_mat = SparseMatrixMPI(transpose(A)) |
| 112 | + I_dist = SparseMatrixMPI{Float64}(I_sparse) |
| 113 | + A_spd = A + At_mat + I_dist * 10.0 |
| 114 | + F = LinearAlgebra.ldlt(A_spd) |
| 115 | + x = F \ v |
| 116 | + finalize!(F) |
| 117 | + |
| 118 | + # LU factorization |
| 119 | + F_lu = LinearAlgebra.lu(A) |
| 120 | + x = F_lu \ v |
| 121 | + finalize!(F_lu) |
| 122 | + |
| 123 | + # === Block operations === |
| 124 | + _ = cat(v, w; dims=1) |
| 125 | + _ = blockdiag(A, B) |
| 126 | + |
| 127 | + # === Conversions === |
| 128 | + _ = Vector(v) |
| 129 | + _ = Matrix(D) |
| 130 | + _ = SparseMatrixCSC(A) |
| 131 | + |
| 132 | + # Clear caches |
| 133 | + clear_plan_cache!() |
| 134 | +end |
| 135 | + |
| 136 | +# Generate precompile directives |
| 137 | +# parcel returns (total_time, Vector{Pair{Module, (time, MethodInstances)}}) |
| 138 | +_, pc = SnoopCompile.parcel(tinf) |
| 139 | + |
| 140 | +# Filter for LinearAlgebraMPI only |
| 141 | +pc_filtered = filter(p -> p.first == LinearAlgebraMPI, pc) |
| 142 | + |
| 143 | +# Extract method instances for our module |
| 144 | +if !isempty(pc_filtered) |
| 145 | + _, method_instances = pc_filtered[1].second |
| 146 | + |
| 147 | + # Write the precompile file |
| 148 | + outfile = joinpath(@__DIR__, "..", "src", "precompile.jl") |
| 149 | + open(outfile, "w") do io |
| 150 | + println(io, "# Precompile directives generated by SnoopCompile") |
| 151 | + println(io, "# Regenerate with: mpiexec -n 1 julia --project=. scripts/generate_precompile.jl") |
| 152 | + println(io, "#") |
| 153 | + println(io, "# $(length(method_instances)) method instances") |
| 154 | + println(io) |
| 155 | + println(io, "function _precompile_()") |
| 156 | + println(io, " ccall(:jl_generating_output, Cint, ()) == 1 || return nothing") |
| 157 | + SnoopCompile.write(io, method_instances) |
| 158 | + println(io, "end") |
| 159 | + println(io) |
| 160 | + println(io, "_precompile_()") |
| 161 | + end |
| 162 | + |
| 163 | + println("Generated precompile directives: $outfile") |
| 164 | + println("Found $(length(method_instances)) method instances") |
| 165 | +else |
| 166 | + println("No method instances found for LinearAlgebraMPI") |
| 167 | +end |
0 commit comments