diff --git a/src/sparse/array.jl b/src/sparse/array.jl index 48856195c..26a8b5fb1 100644 --- a/src/sparse/array.jl +++ b/src/sparse/array.jl @@ -410,6 +410,10 @@ ROCSparseMatrixCSR{T}(Mat::SparseMatrixCSC) where {T} = ROCSparseMatrixCSR(ROCSp ROCSparseMatrixBSR{T}(Mat::SparseMatrixCSC, blockdim) where {T} = ROCSparseMatrixBSR(ROCSparseMatrixCSR{T}(Mat), blockdim) ROCSparseMatrixCOO{T}(Mat::SparseMatrixCSC) where {T} = ROCSparseMatrixCOO(ROCSparseMatrixCSR{T}(Mat)) +# CPU sparse transpose/adjoint → ROC (Transpose/Adjoint of a CPU CSC) +ROCSparseMatrixCOO{T}(Mat::Transpose{Tv, <:SparseMatrixCSC}) where {T, Tv} = ROCSparseMatrixCOO{T}(ROCSparseMatrixCSR{T}(Mat)) +ROCSparseMatrixCOO{T}(Mat::Adjoint{Tv, <:SparseMatrixCSC}) where {T, Tv} = ROCSparseMatrixCOO{T}(ROCSparseMatrixCSR{T}(Mat)) + # untyped variants ROCSparseVector(x::AbstractSparseArray{T}) where {T} = ROCSparseVector{T}(x) ROCSparseMatrixCSC(x::AbstractSparseArray{T}) where {T} = ROCSparseMatrixCSC{T}(x) @@ -420,8 +424,23 @@ ROCSparseMatrixCSR(x::Transpose{T}) where {T} = ROCSparseMatrixCSR{T}(x) ROCSparseMatrixCSR(x::Adjoint{T}) where {T} = ROCSparseMatrixCSR{T}(x) ROCSparseMatrixCSC(x::Transpose{T}) where {T} = ROCSparseMatrixCSC{T}(x) ROCSparseMatrixCSC(x::Adjoint{T}) where {T} = ROCSparseMatrixCSC{T}(x) - -# TODO adjoint / transpose: GPUArrays._sptranspose +ROCSparseMatrixCOO(x::Transpose{T}) where {T} = ROCSparseMatrixCOO{T}(x) +ROCSparseMatrixCOO(x::Adjoint{T}) where {T} = ROCSparseMatrixCOO{T}(x) + +# GPU-to-GPU transpose/adjoint constructors: +# materialize the transposed/conjugate-transposed matrix using GPUArrays._sptranspose / _spadjoint (implemented in interfaces.jl). +ROCSparseMatrixCSR(x::Transpose{T,<:Union{ROCSparseMatrixCSC, ROCSparseMatrixCSR, ROCSparseMatrixCOO}}) where {T} = + ROCSparseMatrixCSR(GPUArrays._sptranspose(parent(x))) +ROCSparseMatrixCSC(x::Transpose{T,<:Union{ROCSparseMatrixCSC, ROCSparseMatrixCSR, ROCSparseMatrixCOO}}) where {T} = + ROCSparseMatrixCSC(GPUArrays._sptranspose(parent(x))) +ROCSparseMatrixCOO(x::Transpose{T,<:Union{ROCSparseMatrixCSC, ROCSparseMatrixCSR, ROCSparseMatrixCOO}}) where {T} = + ROCSparseMatrixCOO(GPUArrays._sptranspose(parent(x))) +ROCSparseMatrixCSR(x::Adjoint{T,<:Union{ROCSparseMatrixCSC, ROCSparseMatrixCSR, ROCSparseMatrixCOO}}) where {T} = + ROCSparseMatrixCSR(GPUArrays._spadjoint(parent(x))) +ROCSparseMatrixCSC(x::Adjoint{T,<:Union{ROCSparseMatrixCSC, ROCSparseMatrixCSR, ROCSparseMatrixCOO}}) where {T} = + ROCSparseMatrixCSC(GPUArrays._spadjoint(parent(x))) +ROCSparseMatrixCOO(x::Adjoint{T,<:Union{ROCSparseMatrixCSC, ROCSparseMatrixCSR, ROCSparseMatrixCOO}}) where {T} = + ROCSparseMatrixCOO(GPUArrays._spadjoint(parent(x))) # gpu to cpu SparseVector(x::ROCSparseVector) = SparseVector(length(x), Array(SparseArrays.nonzeroinds(x)), Array(SparseArrays.nonzeros(x))) diff --git a/src/sparse/conversions.jl b/src/sparse/conversions.jl index 569e95255..f91ddc9fa 100644 --- a/src/sparse/conversions.jl +++ b/src/sparse/conversions.jl @@ -134,6 +134,14 @@ for SparseMatrixType in [:ROCSparseMatrixCSC, :ROCSparseMatrixCSR] end end +ROCSparseMatrixCOO(S::Diagonal) = ROCSparseMatrixCOO(roc(S)) +ROCSparseMatrixCOO(S::Diagonal{T, <:ROCArray}) where {T} = ROCSparseMatrixCOO{T}(S) +ROCSparseMatrixCOO{Tv}(S::Diagonal{T, <:ROCArray}) where {Tv, T} = ROCSparseMatrixCOO{Tv, Cint}(S) +function ROCSparseMatrixCOO{Tv, Ti}(S::Diagonal{T, <:ROCArray}) where {Tv, Ti, T} + m = size(S, 1) + return ROCSparseMatrixCOO{Tv, Ti}(ROCVector(1:m), ROCVector(1:m), Tv.(S.diag), (m, m), m) +end + # by flipping rows and columns, we can use that to get CSC to CSR too for (elty, fname) in ((:Float32, :rocsparse_scsr2csc), (:Float64, :rocsparse_dcsr2csc), (:ComplexF32, :rocsparse_ccsr2csc), (:ComplexF64, :rocsparse_zcsr2csc)) @@ -323,6 +331,8 @@ function ROCSparseMatrixCSR(coo::ROCSparseMatrixCOO{Tv}, ind::SparseChar='O') wh rocsparse_coo2csr(handle(), coo.rowInd, nnz(coo), m, csrRowPtr, ind) ROCSparseMatrixCSR{Tv}(csrRowPtr, coo.colInd, nonzeros(coo), size(coo)) end +# Typed forwarding constructors: allow ROCSparseMatrixCSR{Tv,Ti}(coo) as called by GPUArrays generics +ROCSparseMatrixCSR{Tv,Ti}(coo::ROCSparseMatrixCOO{Tv,Ti}) where {Tv,Ti} = ROCSparseMatrixCSR(coo) function ROCSparseMatrixCOO(csr::ROCSparseMatrixCSR{Tv}, ind::SparseChar='O') where Tv m,n = size(csr) @@ -334,6 +344,7 @@ end ### CSC/BSR to COO and viceversa ROCSparseMatrixCSC(coo::ROCSparseMatrixCOO) = ROCSparseMatrixCSC(ROCSparseMatrixCSR(coo)) # no direct conversion +ROCSparseMatrixCSC{Tv,Ti}(coo::ROCSparseMatrixCOO{Tv,Ti}) where {Tv,Ti} = ROCSparseMatrixCSC(coo) ROCSparseMatrixCOO(csc::ROCSparseMatrixCSC) = ROCSparseMatrixCOO(ROCSparseMatrixCSR(csc)) # no direct conversion ROCSparseMatrixBSR(coo::ROCSparseMatrixCOO, blockdim) = ROCSparseMatrixBSR(ROCSparseMatrixCSR(coo), blockdim) # no direct conversion ROCSparseMatrixCOO(bsr::ROCSparseMatrixBSR) = ROCSparseMatrixCOO(ROCSparseMatrixCSR(bsr)) # no direct conversion diff --git a/src/sparse/interfaces.jl b/src/sparse/interfaces.jl index fe77ab835..5a22854a5 100644 --- a/src/sparse/interfaces.jl +++ b/src/sparse/interfaces.jl @@ -1,5 +1,35 @@ # interfacing with other packages +# Materialize transpose/adjoint of a sparse ROC matrix. +function GPUArrays._sptranspose(A::ROCSparseMatrixCSR) + ROCSparseMatrixCSR(ROCSparseMatrixCSC(A.rowPtr, A.colVal, A.nzVal, reverse(size(A)))) +end +function GPUArrays._spadjoint(A::ROCSparseMatrixCSR) + ROCSparseMatrixCSR(ROCSparseMatrixCSC(A.rowPtr, A.colVal, conj(A.nzVal), reverse(size(A)))) +end + +function GPUArrays._sptranspose(A::ROCSparseMatrixCSC) + ROCSparseMatrixCSC(ROCSparseMatrixCSR(A.colPtr, A.rowVal, A.nzVal, reverse(size(A)))) +end +function GPUArrays._spadjoint(A::ROCSparseMatrixCSC) + ROCSparseMatrixCSC(ROCSparseMatrixCSR(A.colPtr, A.rowVal, conj(A.nzVal), reverse(size(A)))) +end + +function GPUArrays._sptranspose(A::ROCSparseMatrixCOO) + # swap row/col indices and re-sort so the result is sorted by row + sparse(A.colInd, A.rowInd, A.nzVal, reverse(size(A))..., fmt=:coo) +end +function GPUArrays._spadjoint(A::ROCSparseMatrixCOO) + sparse(A.colInd, A.rowInd, conj(A.nzVal), reverse(size(A))..., fmt=:coo) +end + +# (type_wrapper, value_unwrapper) pairs for generating methods over plain, transposed, and adjoint arguments. +const adjtrans_wrappers = ( + (identity, identity), + (M -> :(Transpose{T, <:$M}), M -> :(_sptranspose(parent($M)))), + (M -> :(Adjoint{T, <:$M}), M -> :(_spadjoint(parent($M)))), +) + function mv_wrapper( transa::SparseChar, alpha::Number, A::ROCSparseMatrix, X::DenseROCVector{T}, beta::Number, Y::ROCVector{T}, @@ -76,73 +106,41 @@ function LinearAlgebra.generic_matmatmul!(C::ROCMatrix{T}, tA, tB, A::DenseROCMa mm!(tA, tB, alpha, A, B, beta, C, 'O') end -Base.:(+)(A::ROCSparseMatrixCSR, B::ROCSparseMatrixCSR) = geam(one(eltype(A)), A, one(eltype(A)), B, 'O') -Base.:(-)(A::ROCSparseMatrixCSR, B::ROCSparseMatrixCSR) = geam(one(eltype(A)), A, -one(eltype(A)), B, 'O') - -Base.:(+)(A::ROCSparseMatrixCSR, B::Adjoint{T,<:ROCSparseMatrixCSR}) where T = A + Transpose(conj(B.parent)) -Base.:(-)(A::ROCSparseMatrixCSR, B::Adjoint{T,<:ROCSparseMatrixCSR}) where T = A - Transpose(conj(B.parent)) -Base.:(+)(A::Adjoint{T,<:ROCSparseMatrixCSR}, B::ROCSparseMatrixCSR) where T = Transpose(conj(A.parent)) + B -Base.:(-)(A::Adjoint{T,<:ROCSparseMatrixCSR}, B::ROCSparseMatrixCSR) where T = Transpose(conj(A.parent)) - B -Base.:(+)(A::Adjoint{T,<:ROCSparseMatrixCSR}, B::Adjoint{T,<:ROCSparseMatrixCSR}) where T = - Transpose(conj(A.parent)) + B -Base.:(-)(A::Adjoint{T,<:ROCSparseMatrixCSR}, B::Adjoint{T,<:ROCSparseMatrixCSR}) where T = - Transpose(conj(A.parent)) - B - -function Base.:(+)(A::ROCSparseMatrixCSR, B::Transpose{T,<:ROCSparseMatrixCSR}) where T - cscB = ROCSparseMatrixCSC(B.parent) - transB = ROCSparseMatrixCSR(cscB.colPtr, cscB.rowVal, cscB.nzVal, size(cscB)) - return geam(one(T), A, one(T), transB, 'O') -end - -function Base.:(-)(A::ROCSparseMatrixCSR, B::Transpose{T,<:ROCSparseMatrixCSR}) where T - cscB = ROCSparseMatrixCSC(B.parent) - transB = ROCSparseMatrixCSR(cscB.colPtr, cscB.rowVal, cscB.nzVal, size(cscB)) - return geam(one(T), A, -one(T), transB, 'O') -end - -function Base.:(+)(A::Transpose{T,<:ROCSparseMatrixCSR}, B::ROCSparseMatrixCSR) where T - cscA = ROCSparseMatrixCSC(A.parent) - transA = ROCSparseMatrixCSR(cscA.colPtr, cscA.rowVal, cscA.nzVal, size(cscA)) - geam(one(T), transA, one(T), B, 'O') -end - -function Base.:(-)(A::Transpose{T,<:ROCSparseMatrixCSR}, B::ROCSparseMatrixCSR) where T - cscA = ROCSparseMatrixCSC(A.parent) - transA = ROCSparseMatrixCSR(cscA.colPtr, cscA.rowVal, cscA.nzVal, size(cscA)) - geam(one(T), transA, -one(T), B, 'O') -end - -function Base.:(+)(A::Transpose{T,<:ROCSparseMatrixCSR}, B::Transpose{T,<:ROCSparseMatrixCSR}) where T - C = geam(one(T), A.parent, one(T), B.parent, 'O') - cscC = ROCSparseMatrixCSC(C) - return ROCSparseMatrixCSR(cscC.colPtr, cscC.rowVal, cscC.nzVal, size(cscC)) -end - -function Base.:(-)(A::Transpose{T,<:ROCSparseMatrixCSR}, B::Transpose{T,<:ROCSparseMatrixCSR}) where T - C = geam(one(T), A.parent, -one(T), B.parent, 'O') - cscC = ROCSparseMatrixCSC(C) - return ROCSparseMatrixCSR(cscC.colPtr, cscC.rowVal, cscC.nzVal, size(cscC)) -end - -function Base.:(+)(A::ROCSparseMatrixCSR, B::ROCSparseMatrix) - csrB = ROCSparseMatrixCSR(B) - return geam(one(eltype(A)), A, one(eltype(A)), csrB, 'O') -end - -function Base.:(-)(A::ROCSparseMatrixCSR, B::ROCSparseMatrix) - csrB = ROCSparseMatrixCSR(B) - return geam(one(eltype(A)), A, -one(eltype(A)), csrB, 'O') +# +/- for all combinations of plain/transposed/adjoint CSR and CSC, generated via adjtrans_wrappers. +for op in (:(+), :(-)) + for (wrapa, unwrapa) in adjtrans_wrappers, (wrapb, unwrapb) in adjtrans_wrappers + for SparseMatrixType in (:(ROCSparseMatrixCSC{T}), :(ROCSparseMatrixCSR{T})) + TypeA = wrapa(SparseMatrixType) + TypeB = wrapb(SparseMatrixType) + @eval Base.$op(A::$TypeA, B::$TypeB) where {T <: BlasFloat} = + geam(one(T), $(unwrapa(:A)), $(op)(one(T)), $(unwrapb(:B)), 'O') + end + end + # COO: materialise both sides as CSR, run geam, convert back + @eval Base.$op( + A::Union{ROCSparseMatrixCOO{T}, Transpose{T,<:ROCSparseMatrixCOO}, Adjoint{T,<:ROCSparseMatrixCOO}}, + B::Union{ROCSparseMatrixCOO{T}, Transpose{T,<:ROCSparseMatrixCOO}, Adjoint{T,<:ROCSparseMatrixCOO}}, + ) where {T <: BlasFloat} = + ROCSparseMatrixCOO(Base.$op(ROCSparseMatrixCSR(A), ROCSparseMatrixCSR(B))) end -function Base.:(+)(A::ROCSparseMatrix, B::ROCSparseMatrixCSR) - csrA = ROCSparseMatrixCSR(A) - return geam(one(eltype(A)), csrA, one(eltype(A)), B, 'O') +# Cross-format +/- for CSR/CSC/BSR mixtures: normalise both operands to CSR, then geam +for op in (:(+), :(-)) + @eval begin + Base.$op(A::ROCSparseMatrixCSR{T}, B::ROCSparseMatrixCSC{T}) where {T <: BlasFloat} = + geam(one(T), A, $(op)(one(T)), ROCSparseMatrixCSR(B), 'O') + Base.$op(A::ROCSparseMatrixCSC{T}, B::ROCSparseMatrixCSR{T}) where {T <: BlasFloat} = + geam(one(T), ROCSparseMatrixCSR(A), $(op)(one(T)), B, 'O') + Base.$op(A::ROCSparseMatrixCSR{T}, B::ROCSparseMatrixBSR{T}) where {T <: BlasFloat} = + geam(one(T), A, $(op)(one(T)), ROCSparseMatrixCSR(B), 'O') + Base.$op(A::ROCSparseMatrixBSR{T}, B::ROCSparseMatrixCSR{T}) where {T <: BlasFloat} = + geam(one(T), ROCSparseMatrixCSR(A), $(op)(one(T)), B, 'O') + end end -function Base.:(-)(A::ROCSparseMatrix, B::ROCSparseMatrixCSR) - csrA = ROCSparseMatrixCSR(A) - return geam(one(eltype(A)), csrA, -one(eltype(A)), B, 'O') -end +# vector +/- +Base.:(+)(A::ROCSparseVector{T}, B::ROCSparseVector{T}) where {T <: BlasFloat} = axpby(one(T), A, one(T), B, 'O') +Base.:(-)(A::ROCSparseVector{T}, B::ROCSparseVector{T}) where {T <: BlasFloat} = axpby(one(T), A, -one(T), B, 'O') # triangular @@ -199,10 +197,7 @@ end ## uniform scaling -# these operations materialize the identity matrix and re-use broadcast -# TODO: can we do without this, and just use the broadcast implementation -# with a singleton argument it knows how to index? - +# TODO: use a broadcast singleton for I instead of materialising the full sparse identity. function _sparse_identity( ::Type{<:ROCSparseMatrixCSR{<:Any,Ti}}, I::UniformScaling{Tv}, dims::Dims, ) where {Tv, Ti} @@ -223,24 +218,112 @@ function _sparse_identity( ROCSparseMatrixCSC{Tv,Ti}(colPtr, rowVal, nzVal, dims) end -# TODO COO - -Base.:(+)(A::Union{ROCSparseMatrixCSR,ROCSparseMatrixCSC}, J::UniformScaling) = - A .+ _sparse_identity(typeof(A), J, size(A)) +function _sparse_identity( + ::Type{<:ROCSparseMatrixCOO{<:Any,Ti}}, I::UniformScaling{Tv}, dims::Dims, +) where {Tv, Ti} + len = min(dims[1], dims[2]) + rowInd = ROCVector{Ti}(1:len) + colInd = ROCVector{Ti}(1:len) + nzVal = AMDGPU.fill(I.λ, len) + ROCSparseMatrixCOO{Tv,Ti}(rowInd, colInd, nzVal, dims) +end -Base.:(-)(J::UniformScaling, A::Union{ROCSparseMatrixCSR,ROCSparseMatrixCSC}) = - _sparse_identity(typeof(A), J, size(A)) .- A +# Scale all nzVals of a COO matrix by scalar λ. +_coo_scale(A::ROCSparseMatrixCOO{T}, λ) where {T} = + ROCSparseMatrixCOO(A.rowInd, A.colInd, A.nzVal .* λ, size(A), nnz(A)) + +# UniformScaling +/-/* for all formats/wrappers; typeof(A′) passes a concrete type to _sparse_identity +# (SparseMatrixType is a UnionAll with Ti unbound at runtime and won't match its signatures). +for (wrapa, unwrapa) in adjtrans_wrappers + for SparseMatrixType in (:(ROCSparseMatrixCSC{T}), :(ROCSparseMatrixCSR{T}), :(ROCSparseMatrixCOO{T})) + TypeA = wrapa(SparseMatrixType) + if SparseMatrixType != :(ROCSparseMatrixCOO{T}) + # CSR/CSC: identity is the same format; broadcasting works for .* + @eval begin + Base.:(+)(A::$TypeA, J::UniformScaling) where {T} = + let A′ = $(unwrapa(:A)); A′ + _sparse_identity(typeof(A′), J, size(A)) end + Base.:(+)(J::UniformScaling, A::$TypeA) where {T} = + let A′ = $(unwrapa(:A)); _sparse_identity(typeof(A′), J, size(A)) + A′ end + Base.:(-)(A::$TypeA, J::UniformScaling) where {T} = + let A′ = $(unwrapa(:A)); A′ - _sparse_identity(typeof(A′), J, size(A)) end + Base.:(-)(J::UniformScaling, A::$TypeA) where {T} = + let A′ = $(unwrapa(:A)); _sparse_identity(typeof(A′), J, size(A)) - A′ end + Base.:(*)(A::$TypeA, J::UniformScaling) where {T} = $(unwrapa(:A)) .* J.λ + Base.:(*)(J::UniformScaling, A::$TypeA) where {T} = J.λ .* $(unwrapa(:A)) + end + else + # COO: broadcast not supported → route +/- through CSR, scale nzVal for * + @eval begin + Base.:(+)(A::$TypeA, J::UniformScaling) where {T} = + let A′ = $(unwrapa(:A)); csr = ROCSparseMatrixCSR(A′) + ROCSparseMatrixCOO(csr + _sparse_identity(typeof(csr), J, size(A))) end + Base.:(+)(J::UniformScaling, A::$TypeA) where {T} = + let A′ = $(unwrapa(:A)); csr = ROCSparseMatrixCSR(A′) + ROCSparseMatrixCOO(_sparse_identity(typeof(csr), J, size(A)) + csr) end + Base.:(-)(A::$TypeA, J::UniformScaling) where {T} = + let A′ = $(unwrapa(:A)); csr = ROCSparseMatrixCSR(A′) + ROCSparseMatrixCOO(csr - _sparse_identity(typeof(csr), J, size(A))) end + Base.:(-)(J::UniformScaling, A::$TypeA) where {T} = + let A′ = $(unwrapa(:A)); csr = ROCSparseMatrixCSR(A′) + ROCSparseMatrixCOO(_sparse_identity(typeof(csr), J, size(A)) - csr) end + Base.:(*)(A::$TypeA, J::UniformScaling) where {T} = _coo_scale($(unwrapa(:A)), J.λ) + Base.:(*)(J::UniformScaling, A::$TypeA) where {T} = _coo_scale($(unwrapa(:A)), J.λ) + end + end + end +end # TODO: let Broadcast handle this automatically (a la SparseArrays.PromoteToSparse) -for SparseMatrixType in [:ROCSparseMatrixCSC, :ROCSparseMatrixCSR], op in [:(+), :(-)] - @eval begin - function Base.$op(lhs::Diagonal{T,<:ROCArray}, rhs::$SparseMatrixType{T}) where T - return $op($SparseMatrixType(lhs), rhs) +# +/- with Diagonal: convert it to the same sparse format, then geam. +for (wrapa, unwrapa) in adjtrans_wrappers, op in (:(+), :(-)) + for SparseMatrixType in (:(ROCSparseMatrixCSC{T}), :(ROCSparseMatrixCSR{T}), :(ROCSparseMatrixCOO{T})) + TypeA = wrapa(SparseMatrixType) + @eval begin + function Base.$op(lhs::Diagonal, rhs::$TypeA) where {T} + return $op($SparseMatrixType(lhs), $(unwrapa(:rhs))) + end + function Base.$op(lhs::$TypeA, rhs::Diagonal) where {T} + return $op($(unwrapa(:lhs)), $SparseMatrixType(rhs)) + end end - function Base.$op(lhs::$SparseMatrixType{T}, rhs::Diagonal{T,<:ROCArray}) where T - return $op(lhs, $SparseMatrixType(rhs)) + end +end + +# * with Diagonal for CSR/CSC: convert to COO, scale nzVal by d[colInd] or d[rowInd], convert back. +for (wrapa, unwrapa) in adjtrans_wrappers + for SparseMatrixType in (:(ROCSparseMatrixCSC{T}), :(ROCSparseMatrixCSR{T})) + FmtCtor = SparseMatrixType == :(ROCSparseMatrixCSR{T}) ? :ROCSparseMatrixCSR : :ROCSparseMatrixCSC + TypeA = wrapa(SparseMatrixType) + @eval begin + function Base.:(*)(lhs::$TypeA, rhs::Diagonal) where {T} + A = $(unwrapa(:lhs)) + d = rhs isa Diagonal{<:Any, <:ROCArray} ? T.(rhs.diag) : ROCArray(T.(rhs.diag)) + coo = ROCSparseMatrixCOO(A) + $FmtCtor(ROCSparseMatrixCOO(coo.rowInd, coo.colInd, coo.nzVal .* d[coo.colInd], size(coo), nnz(coo))) + end + function Base.:(*)(lhs::Diagonal, rhs::$TypeA) where {T} + A = $(unwrapa(:rhs)) + d = lhs isa Diagonal{<:Any, <:ROCArray} ? T.(lhs.diag) : ROCArray(T.(lhs.diag)) + coo = ROCSparseMatrixCOO(A) + $FmtCtor(ROCSparseMatrixCOO(coo.rowInd, coo.colInd, d[coo.rowInd] .* coo.nzVal, size(coo), nnz(coo))) + end end end end -# TODO _sptranspose / _spadjoint +# * with Diagonal for COO: scale nzVal by d[colInd] or d[rowInd]. +for (wrapa, unwrapa) in adjtrans_wrappers + TypeA = wrapa(:(ROCSparseMatrixCOO{T})) + @eval begin + function Base.:(*)(lhs::$TypeA, rhs::Diagonal) where {T} + A = $(unwrapa(:lhs)) + d = rhs isa Diagonal{<:Any, <:ROCArray} ? T.(rhs.diag) : ROCArray(T.(rhs.diag)) + ROCSparseMatrixCOO(A.rowInd, A.colInd, A.nzVal .* d[A.colInd], size(A), nnz(A)) + end + function Base.:(*)(lhs::Diagonal, rhs::$TypeA) where {T} + A = $(unwrapa(:rhs)) + d = lhs isa Diagonal{<:Any, <:ROCArray} ? T.(lhs.diag) : ROCArray(T.(lhs.diag)) + ROCSparseMatrixCOO(A.rowInd, A.colInd, d[A.rowInd] .* A.nzVal, size(A), nnz(A)) + end + end +end diff --git a/src/sparse/linalg.jl b/src/sparse/linalg.jl index 83374ef3e..12a0e57e0 100644 --- a/src/sparse/linalg.jl +++ b/src/sparse/linalg.jl @@ -1 +1,73 @@ -# TODO triu, kron +using LinearAlgebra + +# triu/tril for COO: mask entries by position; GPUArrays' generic CSR/CSC implementations route through coo_type, so this covers all formats. +function LinearAlgebra.triu(A::ROCSparseMatrixCOO, k::Integer=0) + mask = A.rowInd .+ k .<= A.colInd + sparse(A.rowInd[mask], A.colInd[mask], A.nzVal[mask], size(A)..., fmt=:coo) +end + +function LinearAlgebra.tril(A::ROCSparseMatrixCOO, k::Integer=0) + mask = A.rowInd .+ k .>= A.colInd + sparse(A.rowInd[mask], A.colInd[mask], A.nzVal[mask], size(A)..., fmt=:coo) +end + +# kron for COO: Kronecker product via repeat/broadcast on the index and value arrays. +function LinearAlgebra.kron(A::ROCSparseMatrixCOO{T,Ti}, B::ROCSparseMatrixCOO{T,Ti}) where {T,Ti} + mA, nA = size(A) + mB, nB = size(B) + out_shape = (mA * mB, nA * nB) + Annz = Int64(A.nnz) + Bnnz = Int64(B.nnz) + if Annz == 0 || Bnnz == 0 + return ROCSparseMatrixCOO( + ROCVector{Ti}(undef, 0), ROCVector{Ti}(undef, 0), + ROCVector{T}(undef, 0), out_shape) + end + row = repeat((A.rowInd .- 1) .* mB, inner=Bnnz) + col = repeat((A.colInd .- 1) .* nB, inner=Bnnz) + data = repeat(A.nzVal, inner=Bnnz) + row .+= repeat(B.rowInd .- 1, outer=Annz) .+ 1 + col .+= repeat(B.colInd .- 1, outer=Annz) .+ 1 + data .*= repeat(B.nzVal, outer=Annz) + sparse(row, col, data, out_shape..., fmt=:coo) +end + +function LinearAlgebra.kron(A::ROCSparseMatrixCOO{T,Ti}, B::Diagonal) where {T,Ti} + mA, nA = size(A) + mB, nB = size(B) + out_shape = (mA * mB, nA * nB) + Annz = Int64(A.nnz) + Bnnz = nB + if Annz == 0 || Bnnz == 0 + return ROCSparseMatrixCOO( + ROCVector{Ti}(undef, 0), ROCVector{Ti}(undef, 0), + ROCVector{T}(undef, 0), out_shape) + end + row = repeat((A.rowInd .- 1) .* mB, inner=Bnnz) + col = repeat((A.colInd .- 1) .* nB, inner=Bnnz) + data = repeat(A.nzVal, inner=Bnnz) + row .+= ROCVector(repeat(0:nB-1, outer=Annz)) .+ 1 + col .+= ROCVector(repeat(0:nB-1, outer=Annz)) .+ 1 + data .*= repeat(ROCArray(B.diag), outer=Annz) + sparse(row, col, data, out_shape..., fmt=:coo) +end + +function LinearAlgebra.kron(A::Diagonal, B::ROCSparseMatrixCOO{T,Ti}) where {T,Ti} + mA, nA = size(A) + mB, nB = size(B) + out_shape = (mA * mB, nA * nB) + Annz = nA + Bnnz = Int64(B.nnz) + if Annz == 0 || Bnnz == 0 + return ROCSparseMatrixCOO( + ROCVector{Ti}(undef, 0), ROCVector{Ti}(undef, 0), + ROCVector{T}(undef, 0), out_shape) + end + row = ROCVector(repeat(collect(Ti, (0:nA-1) .* mB), inner=Bnnz)) + col = ROCVector(repeat(collect(Ti, (0:nA-1) .* nB), inner=Bnnz)) + data = repeat(ROCArray(A.diag), inner=Bnnz) + row .+= repeat(B.rowInd .- 1, outer=Annz) .+ 1 + col .+= repeat(B.colInd .- 1, outer=Annz) .+ 1 + data .*= repeat(B.nzVal, outer=Annz) + sparse(row, col, data, out_shape..., fmt=:coo) +end diff --git a/src/sparse/rocSPARSE.jl b/src/sparse/rocSPARSE.jl index 1ca043101..1c5cbd4f7 100644 --- a/src/sparse/rocSPARSE.jl +++ b/src/sparse/rocSPARSE.jl @@ -12,6 +12,7 @@ using ..AMDGPU using ..AMDGPU: @allowscalar using ..AMDGPU: ROCArrayStyle, threadIdx, blockIdx, blockDim +import GPUArrays: _sptranspose, _spadjoint import SparseArrays: SparseVector, SparseMatrixCSC import AMDGPU: librocsparse, HandleCache, HIP, library_state, ROCVector import AMDGPU.Device: ROCDeviceVector diff --git a/test/hip_rocsparse/interfaces.jl b/test/hip_rocsparse/interfaces.jl index 7bc573760..0e93e26f0 100644 --- a/test/hip_rocsparse/interfaces.jl +++ b/test/hip_rocsparse/interfaces.jl @@ -12,11 +12,10 @@ using Adapt @testset "$f(A)±$h(B) $elty" for elty in ( Float32, Float64, ComplexF32, ComplexF64, ), f in ( - identity, transpose, #= adjoint, =# + identity, transpose, adjoint, ), h in ( - identity, transpose, #= , adjoint, =# + identity, transpose, adjoint, ) - # adjoint need the support of broadcast for `conj()` to work with `ROCSparseMatrix`. n = 10 alpha = rand() beta = rand() @@ -182,6 +181,22 @@ using Adapt @test SparseMatrixCSC(ROCSparseMatrixCSR(T)) ≈ f(S) end + @testset "GPU adjoint/transpose round-trips ($fmt, $wrap)" for + fmt in (ROCSparseMatrixCSR, ROCSparseMatrixCSC, ROCSparseMatrixCOO), + wrap in (transpose, adjoint), + elty in (Float32, ComplexF32) + + n = 8 + S = sprand(elty, n, n, 0.5) + dA = fmt(S) + + # materialize the wrap into all three output formats + dAt = wrap(dA) + @test SparseMatrixCSC(ROCSparseMatrixCSC(dAt)) ≈ wrap(S) + @test SparseMatrixCSC(ROCSparseMatrixCSR(dAt)) ≈ wrap(S) + @test SparseMatrixCSC(ROCSparseMatrixCOO(dAt)) ≈ wrap(S) + end + @testset "UniformScaling with $typ($dims)" for typ in ( ROCSparseMatrixCSR, ROCSparseMatrixCSC, ), dims in ((10, 10), (5, 10), (10, 5)) @@ -192,6 +207,35 @@ using Adapt @test Array(I + dA) ≈ (I + S) @test Array(dA - I) ≈ (S - I) @test Array(I - dA) ≈ (I - S) + @test Array(dA * I) ≈ Array(S) + @test Array(I * dA) ≈ Array(S) + end + + @testset "UniformScaling with ROCSparseMatrixCOO($dims)" for dims in ((10, 10), (5, 10), (10, 5)) + S = sprand(Float32, dims..., 0.1) + dA = ROCSparseMatrixCOO(S) + @test SparseMatrixCSC(ROCSparseMatrixCSC(dA + I)) ≈ S + I + @test SparseMatrixCSC(ROCSparseMatrixCSC(I + dA)) ≈ I + S + @test SparseMatrixCSC(ROCSparseMatrixCSC(dA - I)) ≈ S - I + @test SparseMatrixCSC(ROCSparseMatrixCSC(I - dA)) ≈ I - S + @test SparseMatrixCSC(ROCSparseMatrixCSC(dA * I)) ≈ S + @test SparseMatrixCSC(ROCSparseMatrixCSC(I * dA)) ≈ S + end + + @testset "UniformScaling with wrapped sparse ($wrap, $typ)" for + wrap in (transpose, adjoint), + typ in (ROCSparseMatrixCSR, ROCSparseMatrixCSC, ROCSparseMatrixCOO) + + S = sprand(Float32, 10, 10, 0.3) + dA = typ(S) + dAt = wrap(dA) + Sw = wrap(S) + @test SparseMatrixCSC(ROCSparseMatrixCSC(dAt + I)) ≈ Sw + I + @test SparseMatrixCSC(ROCSparseMatrixCSC(I + dAt)) ≈ I + Sw + @test SparseMatrixCSC(ROCSparseMatrixCSC(dAt - I)) ≈ Sw - I + @test SparseMatrixCSC(ROCSparseMatrixCSC(I - dAt)) ≈ I - Sw + @test SparseMatrixCSC(ROCSparseMatrixCSC(dAt * I)) ≈ Array(Sw) + @test SparseMatrixCSC(ROCSparseMatrixCSC(I * dAt)) ≈ Array(Sw) end @testset "Diagonal with $typ(10, 10)" for typ in ( @@ -206,11 +250,56 @@ using Adapt @test Array(dD + dA) ≈ D + S @test Array(dA - dD) ≈ S - D @test Array(dD - dA) ≈ D - S + @test Array(dA * dD) ≈ S * D + @test Array(dD * dA) ≈ D * S @test (dA + dD) isa typ @test (dD + dA) isa typ @test (dA - dD) isa typ @test (dD - dA) isa typ + @test (dA * dD) isa typ + @test (dD * dA) isa typ + end + + @testset "Diagonal with ROCSparseMatrixCOO(10, 10)" begin + S = sprand(Float32, 10, 10, 0.8) + D = Diagonal(rand(Float32, 10)) + dA = ROCSparseMatrixCOO(S) + dD = adapt(ROCArray, D) + @test SparseMatrixCSC(ROCSparseMatrixCSC(dA + dD)) ≈ S + D + @test SparseMatrixCSC(ROCSparseMatrixCSC(dD + dA)) ≈ D + S + @test SparseMatrixCSC(ROCSparseMatrixCSC(dA - dD)) ≈ S - D + @test SparseMatrixCSC(ROCSparseMatrixCSC(dD - dA)) ≈ D - S + @test SparseMatrixCSC(ROCSparseMatrixCSC(dA * dD)) ≈ S * D + @test SparseMatrixCSC(ROCSparseMatrixCSC(dD * dA)) ≈ D * S + end + + @testset "Diagonal with wrapped sparse ($wrap, $typ)" for + wrap in (transpose, adjoint), + typ in (ROCSparseMatrixCSR, ROCSparseMatrixCSC, ROCSparseMatrixCOO) + + S = sprand(Float32, 10, 10, 0.8) + D = Diagonal(rand(Float32, 10)) + dA = typ(S) + dD = adapt(ROCArray, D) + dAt = wrap(dA) + Sw = wrap(S) + @test SparseMatrixCSC(ROCSparseMatrixCSC(dAt + dD)) ≈ Sw + D + @test SparseMatrixCSC(ROCSparseMatrixCSC(dD + dAt)) ≈ D + Sw + @test SparseMatrixCSC(ROCSparseMatrixCSC(dAt - dD)) ≈ Sw - D + @test SparseMatrixCSC(ROCSparseMatrixCSC(dD - dAt)) ≈ D - Sw + @test SparseMatrixCSC(ROCSparseMatrixCSC(dAt * dD)) ≈ Sw * D + @test SparseMatrixCSC(ROCSparseMatrixCSC(dD * dAt)) ≈ D * Sw + end + + @testset "COO ± COO $elty" for elty in (Float32, Float64, ComplexF32, ComplexF64) + n = 10 + A = sprand(elty, n, n, 0.5) + B = sprand(elty, n, n, 0.5) + dA = ROCSparseMatrixCOO(A) + dB = ROCSparseMatrixCOO(B) + @test SparseMatrixCSC(ROCSparseMatrixCSC(dA + dB)) ≈ A + B + @test SparseMatrixCSC(ROCSparseMatrixCSC(dA - dB)) ≈ A - B end end diff --git a/test/hip_rocsparse/linalg.jl b/test/hip_rocsparse/linalg.jl index 1a9174a04..ad564ac4e 100644 --- a/test/hip_rocsparse/linalg.jl +++ b/test/hip_rocsparse/linalg.jl @@ -4,6 +4,7 @@ using AMDGPU: ROCVector, ROCMatrix, ROCArray using AMDGPU.rocSPARSE using SparseArrays using LinearAlgebra +using Adapt @assert AMDGPU.functional(:rocsparse) @@ -14,3 +15,66 @@ using LinearAlgebra @test opnorm(S, Inf) ≈ opnorm(dS_csc, Inf) @test opnorm(S, Inf) ≈ opnorm(dS_csr, Inf) end + +@testset "triu/tril ($typ)" for typ in ( + ROCSparseMatrixCOO, ROCSparseMatrixCSR, ROCSparseMatrixCSC, +) + for T in (Float32, Float64, ComplexF32, ComplexF64) + n = 10 + A = sprand(T, n, n, 0.5) + dA = typ(A) + + for k in (-2, -1, 0, 1, 2) + @test SparseMatrixCSC(typ(triu(dA, k))) ≈ triu(A, k) + @test SparseMatrixCSC(typ(tril(dA, k))) ≈ tril(A, k) + end + # default k=0 + @test SparseMatrixCSC(typ(triu(dA))) ≈ triu(A) + @test SparseMatrixCSC(typ(tril(dA))) ≈ tril(A) + end +end + +@testset "triu/tril with transpose/adjoint" begin + for T in (Float32, ComplexF32) + n = 8 + A = sprand(T, n, n, 0.5) + dA = ROCSparseMatrixCSR(A) + for wrap in (transpose, adjoint) + dAt = wrap(dA) + At = wrap(A) + @test SparseMatrixCSC(ROCSparseMatrixCSC(triu(dAt))) ≈ triu(At) + @test SparseMatrixCSC(ROCSparseMatrixCSC(tril(dAt))) ≈ tril(At) + end + end +end + +@testset "kron (COO×COO)" begin + for T in (Float32, Float64) + A = sprand(T, 4, 5, 0.5) + B = sprand(T, 3, 2, 0.5) + dA = ROCSparseMatrixCOO(A) + dB = ROCSparseMatrixCOO(B) + @test SparseMatrixCSC(ROCSparseMatrixCSC(kron(dA, dB))) ≈ kron(A, B) + end +end + +@testset "kron (COO×Diagonal)" begin + for T in (Float32, Float64) + A = sprand(T, 4, 4, 0.5) + D = Diagonal(rand(T, 3)) + dA = ROCSparseMatrixCOO(A) + dD = adapt(ROCArray, D) + @test SparseMatrixCSC(ROCSparseMatrixCSC(kron(dA, dD))) ≈ kron(A, D) + @test SparseMatrixCSC(ROCSparseMatrixCSC(kron(dD, dA))) ≈ kron(D, A) + end +end + +@testset "kron (CSR/CSC via COO)" begin + for T in (Float32,), typ in (ROCSparseMatrixCSR, ROCSparseMatrixCSC) + A = sprand(T, 3, 3, 0.5) + B = sprand(T, 2, 2, 0.5) + dA = typ(A) + dB = typ(B) + @test SparseMatrixCSC(typ(kron(dA, dB))) ≈ kron(A, B) + end +end