Add Base.zeros for distributed types with GPU support

Sébastien Loisel · Sébastien Loisel · commit 6b20302ea3f8 · 2025-12-26T01:42:39.000+01:00
- Add zeros(VectorMPI{T,AV}, n) for creating zero distributed vectors
- Add zeros(MatrixMPI{T,AM}, m, n) for creating zero distributed matrices
- Add zeros(SparseMatrixMPI{T,Ti,AV}, m, n) for creating zero sparse matrices
- Export VectorMPI_CPU, MatrixMPI_CPU, SparseMatrixMPI_CPU type aliases
- Add _zeros_like helper with Metal extension support for GPU arrays
- Update CLAUDE.md with zeros usage examples

Bump version to 0.1.5
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -40,6 +40,27 @@ GPU acceleration is supported via Metal.jl (macOS) as a package extension.
 - `SparseMatrixMPI{T,Ti,AV}` where `AV` is `Vector{T}` (CPU) or `MtlVector{T}` (GPU) for the `nzval` array
 - Type aliases: `VectorMPI_CPU{T}`, `MatrixMPI_CPU{T}`, `SparseMatrixMPI_CPU{T,Ti}` for CPU-backed types
 
+### Creating Zero Arrays
+
+Use `Base.zeros` with the full parametric type or type alias:
+
+```julia
+# CPU zero arrays
+v = zeros(VectorMPI{Float64,Vector{Float64}}, 100)
+v = zeros(VectorMPI_CPU{Float64}, 100)  # Equivalent using type alias
+
+A = zeros(MatrixMPI{Float64,Matrix{Float64}}, 50, 30)
+A = zeros(MatrixMPI_CPU{Float64}, 50, 30)
+
+S = zeros(SparseMatrixMPI{Float64,Int,Vector{Float64}}, 100, 100)
+S = zeros(SparseMatrixMPI_CPU{Float64,Int}, 100, 100)
+
+# GPU zero arrays (requires Metal.jl loaded)
+using Metal
+v_gpu = zeros(VectorMPI{Float32,MtlVector{Float32}}, 100)
+A_gpu = zeros(MatrixMPI{Float32,MtlMatrix{Float32}}, 50, 30)
+```
+
 ### CPU Staging
 
 MPI communication always uses CPU buffers (no Metal-aware MPI exists). GPU data is staged through CPU:
diff --git a/Project.toml b/Project.toml
@@ -1,6 +1,6 @@
 name = "LinearAlgebraMPI"
 uuid = "5bdd2be4-ae34-42ef-8b36-f4c85d48f377"
-version = "0.1.4"
+version = "0.1.5"
 authors = ["S. Loisel"]
 
 [deps]
diff --git a/ext/LinearAlgebraMPIMetalExt.jl b/ext/LinearAlgebraMPIMetalExt.jl
@@ -149,6 +149,26 @@ function LinearAlgebraMPI._convert_vector_to_backend(v::LinearAlgebraMPI.VectorM
     return LinearAlgebraMPI.mtl(v)
 end
 
+# ============================================================================
+# Base.zeros Support
+# ============================================================================
+
+"""
+    _zeros_like(::Type{MtlVector{T}}, dims...) where T
+
+Create a zero MtlVector of the specified dimensions.
+Used by Base.zeros(VectorMPI{T,MtlVector{T}}, n).
+"""
+LinearAlgebraMPI._zeros_like(::Type{MtlVector{T}}, dims...) where T = Metal.zeros(T, dims...)
+
+"""
+    _zeros_like(::Type{MtlMatrix{T}}, dims...) where T
+
+Create a zero MtlMatrix of the specified dimensions.
+Used by Base.zeros(MatrixMPI{T,MtlMatrix{T}}, m, n).
+"""
+LinearAlgebraMPI._zeros_like(::Type{MtlMatrix{T}}, dims...) where T = Metal.zeros(T, dims...)
+
 # ============================================================================
 # MatrixPlan Index Array Support
 # ============================================================================
diff --git a/src/LinearAlgebraMPI.jl b/src/LinearAlgebraMPI.jl
@@ -10,6 +10,7 @@ import LinearAlgebra
 import LinearAlgebra: tr, diag, triu, tril, Transpose, Adjoint, norm, opnorm, mul!, ldlt, BLAS, issymmetric, UniformScaling, dot, Symmetric
 
 export SparseMatrixMPI, MatrixMPI, VectorMPI, clear_plan_cache!, uniform_partition, repartition
+export VectorMPI_CPU, MatrixMPI_CPU, SparseMatrixMPI_CPU  # Type aliases for CPU-backed types
 export SparseMatrixCSR  # Type alias for Transpose{SparseMatrixCSC} (CSR storage format)
 export map_rows  # Row-wise map over distributed vectors/matrices
 export VectorMPI_local, MatrixMPI_local, SparseMatrixMPI_local  # Local constructors
@@ -959,6 +960,141 @@ function map_rows(f, A...)
 end
 
 
+# ============================================================================
+# Base.zeros for Distributed Types
+# ============================================================================
+
+# Helper to create a zero array with the correct backend type
+# Base case: CPU arrays
+_zeros_like(::Type{Vector{T}}, dims...) where T = zeros(T, dims...)
+_zeros_like(::Type{Matrix{T}}, dims...) where T = zeros(T, dims...)
+
+# For GPU arrays, extensions will define additional methods
+
+"""
+    Base.zeros(::Type{VectorMPI{T,AV}}, n::Integer; comm=MPI.COMM_WORLD) where {T,AV}
+
+Create a distributed zero vector of length `n` with element type `T` and storage type `AV`.
+
+The vector is uniformly partitioned across MPI ranks.
+
+# Examples
+```julia
+# CPU zero vector
+v = zeros(VectorMPI{Float64,Vector{Float64}}, 100)
+
+# Using type alias
+v = zeros(VectorMPI_CPU{Float64}, 100)
+
+# GPU zero vector (requires Metal.jl loaded)
+using Metal
+v = zeros(VectorMPI{Float32,MtlVector{Float32}}, 100)
+```
+"""
+function Base.zeros(::Type{VectorMPI{T,AV}}, n::Integer;
+                    comm::MPI.Comm=MPI.COMM_WORLD) where {T,AV<:AbstractVector{T}}
+    nranks = MPI.Comm_size(comm)
+    rank = MPI.Comm_rank(comm)
+
+    partition = uniform_partition(n, nranks)
+    local_size = partition[rank + 2] - partition[rank + 1]
+
+    local_v = _zeros_like(AV, local_size)
+    hash = compute_partition_hash(partition)
+
+    return VectorMPI{T,AV}(hash, partition, local_v)
+end
+
+"""
+    Base.zeros(::Type{MatrixMPI{T,AM}}, m::Integer, n::Integer; comm=MPI.COMM_WORLD) where {T,AM}
+
+Create a distributed zero matrix of size `m × n` with element type `T` and storage type `AM`.
+
+The matrix is row-partitioned across MPI ranks.
+
+# Examples
+```julia
+# CPU zero matrix
+A = zeros(MatrixMPI{Float64,Matrix{Float64}}, 100, 50)
+
+# Using type alias
+A = zeros(MatrixMPI_CPU{Float64}, 100, 50)
+
+# GPU zero matrix (requires Metal.jl loaded)
+using Metal
+A = zeros(MatrixMPI{Float32,MtlMatrix{Float32}}, 100, 50)
+```
+"""
+function Base.zeros(::Type{MatrixMPI{T,AM}}, m::Integer, n::Integer;
+                    comm::MPI.Comm=MPI.COMM_WORLD) where {T,AM<:AbstractMatrix{T}}
+    nranks = MPI.Comm_size(comm)
+    rank = MPI.Comm_rank(comm)
+
+    row_partition = uniform_partition(m, nranks)
+    col_partition = uniform_partition(n, nranks)  # Used for transpose operations
+    local_nrows = row_partition[rank + 2] - row_partition[rank + 1]
+
+    local_A = _zeros_like(AM, local_nrows, n)
+    # Structural hash computed lazily
+
+    return MatrixMPI{T,AM}(nothing, row_partition, col_partition, local_A)
+end
+
+"""
+    Base.zeros(::Type{SparseMatrixMPI{T,Ti,AV}}, m::Integer, n::Integer; comm=MPI.COMM_WORLD) where {T,Ti,AV}
+
+Create a distributed zero sparse matrix of size `m × n`.
+
+A zero sparse matrix has no nonzero entries, so the resulting matrix has:
+- Empty `rowptr` (all ones)
+- Empty `colval` and `nzval`
+
+# Examples
+```julia
+# CPU zero sparse matrix
+A = zeros(SparseMatrixMPI{Float64,Int,Vector{Float64}}, 100, 100)
+
+# Using type alias
+A = zeros(SparseMatrixMPI_CPU{Float64,Int}, 100, 100)
+```
+"""
+function Base.zeros(::Type{SparseMatrixMPI{T,Ti,AV}}, m::Integer, n::Integer;
+                    comm::MPI.Comm=MPI.COMM_WORLD) where {T,Ti<:Integer,AV<:AbstractVector{T}}
+    nranks = MPI.Comm_size(comm)
+    rank = MPI.Comm_rank(comm)
+
+    row_partition = uniform_partition(m, nranks)
+    col_partition = uniform_partition(n, nranks)
+    local_nrows = row_partition[rank + 2] - row_partition[rank + 1]
+
+    # Empty sparse structure
+    rowptr = ones(Ti, local_nrows + 1)  # All rows have 0 entries
+    colval = Ti[]
+    nzval = _zeros_like(AV, 0)  # Empty but correct type
+    col_indices = Int[]  # No columns referenced
+
+    # For CPU, rowptr_target/colval_target are the same as rowptr/colval
+    # For GPU, they would be GPU copies (but empty arrays don't matter)
+    rowptr_target = rowptr
+    colval_target = colval
+
+    return SparseMatrixMPI{T,Ti,AV}(
+        nothing,  # Hash computed lazily
+        row_partition,
+        col_partition,
+        col_indices,
+        rowptr,
+        colval,
+        nzval,
+        local_nrows,
+        0,  # ncols_compressed = 0 (no columns referenced)
+        nothing,  # cached_transpose
+        true,  # cached_symmetric (zero matrix is symmetric)
+        rowptr_target,
+        colval_target
+    )
+end
+
 # ============================================================================
 # Precompilation Workload
 # ============================================================================