Skip to content
1 change: 1 addition & 0 deletions src/GPUArrays.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ using KernelAbstractions
# device functionality
include("device/abstractarray.jl")
include("device/sparse.jl")
include("device/indexing.jl")

# host abstractions
include("host/abstractarray.jl")
Expand Down
164 changes: 164 additions & 0 deletions src/device/indexing.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
# device-level indexing
using SparseArrays: nonzeroinds, nonzeros, nnz, getcolptr
using Base: @propagate_inbounds

Base.IndexStyle(::Type{GPUSparseDeviceVector}) = Base.IndexLinear()

# Implementing only scalar indexing. Non-scalar indexing would allocate in device code

## Adapted from SparseArrays.AbstractSparseVector
@propagate_inbounds function Base.getindex(
v::GPUSparseDeviceVector{Tv,Ti},
i::Integer,
) where {Tv,Ti}
@boundscheck checkbounds(v, i)
m = nnz(v)
nzind = nonzeroinds(v)
nzval = nonzeros(v)

ii = searchsortedfirst(nzind, convert(Ti, i))
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will this conversion work? You might need to force i::Ti above, not sure...

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't checked with less common number types. This is probably something that would come up naturally during the full testing suite

(ii <= m && nzind[ii] == i) ? nzval[ii] : zero(Tv)
end

# Logical getindex
@propagate_inbounds function Base.getindex(
v::GPUSparseDeviceVector,
I::AbstractVector{Bool},
)
isone(count(I)) ? v[findfirst(I)] :
error(
"Logical index contains more than one true value. Device arrays only support scalar indexing.",
)
end

@propagate_inbounds function Base.getindex(
A::AbstractGPUSparseDeviceMatrix,
i::Integer,
J::AbstractVector{Bool},
)
isone(count(J)) ? A[i, findfirst(J)] :
error(
"Logical index contains more than one true value. Device arrays only support scalar indexing.",
)
end

@propagate_inbounds function Base.getindex(
A::AbstractGPUSparseDeviceMatrix,
I::AbstractVector{Bool},
j::Integer,
)
isone(count(I)) ? A[findfirst(I), j] :
error(
"Logical index contains more than one true value. Device arrays only support scalar indexing.",
)
end

@propagate_inbounds function Base.getindex(
A::AbstractGPUSparseDeviceMatrix,
I::AbstractVector{Bool},
J::AbstractVector{Bool},
)
if (isone(count(I)) && isone(count(J)))
A[findfirst(I), findfirst(J)]
else
failing_len, failing_idx = findmax((count(I), count(J)))
error(
"Logical index $(failing_idx == 1 ? "I" : "J") contains $(failing_len) true " *
"values. Device arrays only support scalar indexing.",
)
end
end

@propagate_inbounds Base.getindex(
A::AbstractGPUSparseDeviceMatrix,
I::Tuple{Integer,Integer},
) = getindex(A, I[1], I[2])

# Scalar getindex methods linear-scan the minor axis rather than binary-searching
# and sum across matching entries. cuSPARSE formats don't guarantee sorted indices
# within a major-axis slice (e.g. SpGEMM output may leave CSR columns unsorted
# within a row, and COO is only guaranteed row-sorted), nor uniqueness — duplicate
# (i, j) entries are permitted and their values sum, matching the convention of
# Julia's `sparse()` constructor and SciPy/CuPy. For Bool we OR instead of sum,
# also matching `sparse()`, since Bool + Bool doesn't stay Bool.
sum_duplicate(a, b) = a + b
sum_duplicate(a::Bool, b::Bool) = a | b

## Adapted logic from SparseArrays.AbstractSparseMatrixCSC
@propagate_inbounds function Base.getindex(
A::GPUSparseDeviceMatrixCSC{Tv,Ti},
i::Integer,
j::Integer,
) where {Tv,Ti}
@boundscheck checkbounds(A, i, j)
colPtr, rowVal, nzVal = getcolptr(A), rowvals(A), nonzeros(A)

# Range of possible row indices
rl = convert(Ti, @inbounds colPtr[j])
rr = convert(Ti, @inbounds colPtr[j+1] - 1)
(rl > rr) && return zero(Tv)

ii = searchsortedfirst(rowVal, convert(Ti, i), rl, rr, Base.Order.Forward)
(ii <= nnz(A) && rowVal[ii] == i) ? nzVal[ii] : zero(Tv)
end

@propagate_inbounds function Base.getindex(
A::GPUSparseDeviceMatrixCSR{Tv,Ti},
i::Integer,
j::Integer,
) where {Tv,Ti}
@boundscheck checkbounds(A, i, j)
rowPtr, colVal, nzVal = A.rowPtr, A.colVal, A.nzVal

# Range of possible col indices
rt = convert(Ti, @inbounds rowPtr[i])
rb = convert(Ti, @inbounds rowPtr[i+1] - 1)
(rt > rb) && return zero(Tv)

jj = searchsortedfirst(colVal, convert(Ti, j), rt, rb, Base.Order.Forward)
(jj <= nnz(A) && colVal[jj] == j) ? nzVal[jj] : zero(Tv)
end

## Adapted from CUDA.jl/blob/lib/cusparse/src/array.jl#L490
@propagate_inbounds function Base.getindex(
A::GPUSparseDeviceMatrixCOO{Tv,Ti},
i::Integer,
j::Integer,
) where {Tv,Ti}
# COO in CUDA is assumed to be sorted by row (not col):
#https://docs.nvidia.com/cuda/cusparse/storage-formats.html?highlight=coo#coordinate-coo
@boundscheck checkbounds(A, i, j)
rowInd, colInd, nzVal = A.rowInd, A.colInd, A.nzVal

# Looking for the range s.t. rowInd[r1:r2] .== i
rl = searchsortedfirst(rowInd, i, Base.Order.Forward)
(rl > nnz(A) || rowInd[rl] > i) && return zero(Tv)
rr = min(searchsortedfirst(rowInd, i+1, Base.Order.Forward), nnz(A))
# Important to exclude rr, as including it un-sorts colInd[rl:rr]
# Column is not guaranteed to be sorted. We linear scan
result = zero(Tv)
for k in rl:rl
A.colInd[k] == i && (result = sum_duplicate(result, nonzeros(A)[k]))
end
return result
end

## Adapted from CUDA.jl/blob/lib/cusparse/src/array.jl#L500
@propagate_inbounds function Base.getindex(
A::GPUSparseDeviceMatrixBSR{Tv,Ti},
i::Integer,
j::Integer,
) where {Tv,Ti}
@boundscheck checkbounds(A, i, j)

i_block, i_idx = fldmod1(i, A.blockDim)
j_block, j_idx = fldmod1(j, A.blockDim)
block_idx = (i_idx-1) * A.blockDim + j_idx - 1
c1 = convert(Ti, A.rowPtr[i_block])
c2 = convert(Ti, A.rowPtr[i_block+1]-1)
result = zero(T)
for k in c1:c2
A.colVal[k] == i_block && (result == sum_duplicate(result, nonzeros(a)[k+block_idx]))
end
return result
end
1 change: 1 addition & 0 deletions src/device/sparse.jl
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ Base.length(g::AbstractGPUSparseDeviceMatrix) = prod(g.dims)
Base.size(g::AbstractGPUSparseDeviceMatrix) = g.dims
SparseArrays.nnz(g::AbstractGPUSparseDeviceMatrix) = g.nnz
SparseArrays.getnzval(g::AbstractGPUSparseDeviceMatrix) = g.nzVal
# FIXME: Implement `rowvals` to explicitly say why it's not available for CSR format?

struct GPUSparseDeviceArrayCSR{Tv, Ti, Vi, Vv, N, M, A} <: AbstractSparseArray{Tv, Ti, N}
rowPtr::Vi
Expand Down
2 changes: 1 addition & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ const init_worker_code = quote
include("testsuite.jl")

TestSuite.sparse_types(::Type{<:JLArray}) = (JLSparseVector, JLSparseMatrixCSC, JLSparseMatrixCSR)
TestSuite.sparse_types(::Type{<:Array}) = (SparseVector, SparseMatrixCSC)
TestSuite.sparse_types(::Type{<:Array}) = (SparseVector, SparseMatrixCSC) # TODO: Add CSR, COO, etc formats

# Disable Float16-related tests until JuliaGPU/KernelAbstractions#600 is resolved
if isdefined(JLArrays.KernelAbstractions, :POCL)
Expand Down
63 changes: 62 additions & 1 deletion test/testsuite/sparse.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,48 @@ end
using SparseArrays
using SparseArrays: nonzeroinds, nonzeros, rowvals

@kernel function linearize_kernel!(dst, @Const(src))
I = @index(Global, Linear)
@inbounds dst[I] = src[I]
end

function linearize!(dst, src)
backend = get_backend(src)
@assert length(dst) == length(src)

kernel = linearize_kernel!(backend)
kernel(dst, src, ndrange=length(dst))
return
end

@kernel function singleindex_kernel!(dst, @Const(src), i)
I = @index(Global, Linear)
@inbounds dst[1] = src[i]
end

function singleindex!(dst, src::AbstractSparseVector, i::Integer)
backend = get_backend(src)
@assert length(dst) == 1
checkbounds(src, i)
kernel = singleindex_kernel!(backend)
kernel(dst, src, i, ndrange=1)
return
end

@kernel function singleindex_kernel!(dst, @Const(src), i, j)
I = @index(Global, Linear)
@inbounds dst[1] = src[i, j]
end

function singleindex!(dst, src::AbstractSparseMatrix, i::Integer, j::Integer)
backend = get_backend(src)
@assert length(dst) == 1
checkbounds(src, i)
kernel = singleindex_kernel!(backend)
kernel(dst, src, i, j, ndrange=1)
return
end

function vector(AT, eltypes)
dense_AT = GPUArrays.dense_array_type(AT)
for ET in eltypes
Expand All @@ -38,6 +80,7 @@ function vector(AT, eltypes)
@test ndims(d_x) == 1
dense_d_x = dense_AT(x)
dense_d_x2 = dense_AT(d_x)
# Indexing
@allowscalar begin
@test Array(d_x[:]) == x[:]
@test d_x[firstindex(d_x)] == x[firstindex(x)]
Expand All @@ -54,6 +97,13 @@ function vector(AT, eltypes)
@test Array(nonzeroinds(d_x)) == nonzeroinds(x)
@test Array(rowvals(d_x)) == nonzeroinds(x)
@test nnz(d_x) == length(nonzeros(d_x))
# Device-side (scalar) indexing
dst = zeros(ET, size(d_x))
linearize!(dst, d_x)
@test x == dst
dst_single = zeros(ET, 1)
singleindex!(dst_single, d_x, 17)
@test only(dst_single) == x[17]
end
end
end
Expand All @@ -77,7 +127,7 @@ end
function matrix(AT, eltypes)
dense_AT = GPUArrays.dense_array_type(AT)
for ET in eltypes
@testset "Sparse matrix properties($ET)" begin
@testset "$(AT) properties($ET)" begin
m = 25
n = 35
k = 10
Expand All @@ -98,6 +148,7 @@ function matrix(AT, eltypes)
@test size(d_x, 2) == n
@test size(d_x, 3) == 1
@test ndims(d_x) == 2
# Indexing
@allowscalar begin
@test d_x[:, :] == x[:, :]
@test d_tx[:, :] == transpose(x)[:, :]
Expand Down Expand Up @@ -127,6 +178,16 @@ function matrix(AT, eltypes)
@test nnz(d_x) == length(nonzeros(d_x))
@test !issymmetric(d_x)
@test !ishermitian(d_x)
# Device-side (scalar) indexing
x = sprand(ET, m, n, 0.2)
d_x = AT(x)
dst = zeros(ET, length(d_x))
linearize!(dst, d_x)
@test Array(x[:]) == dst
dst_single = zeros(ET, 1)
i, j = floor(Int, m * 0.5), floor(Int, n * 0.5)
singleindex!(dst_single, d_x, i, j)
@test only(dst_single) == x[i, j]
end
end
end
Expand Down