Skip to content

Commit 1f74b4b

Browse files
committed
Add proper docstrings.
1 parent 2084ef9 commit 1f74b4b

1 file changed

Lines changed: 34 additions & 16 deletions

File tree

src/ThreadedDenseSparseMul.jl

Lines changed: 34 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,42 +4,62 @@ import SparseArrays
44
import SparseArrays: SparseMatrixCSC, SparseVector, nonzeroinds, nonzeros, findnz
55
import Polyester: @batch
66

7+
export fastdensesparse!, fastdensesparse_threaded!
8+
export fastdensesparse_outer!, fastdensesparse_outer_threaded!
9+
10+
const VecOrView{T} = Union{Vector{T}, SubArray{T, 1, Matrix{T}}}
11+
const MatOrView{T} = Union{Matrix{T}, SubArray{T, 2, Matrix{T}}}
12+
713
"""
814
fastdensesparse!(C, A, B, α, β)
915
10-
BLAS like interface, computing `C .= β*C + α*A*B`, but accelerated with multi-threading using `Polyester.jl`.
16+
BLAS like interface, computing `C .= β*C + α*A*B`, but way faster than Base would.
17+
18+
Also see `fastdensesparse_threaded!` for a multi-threaded version using `Polyester.jl`.
1119
"""
12-
function fastdensesparse!(C::Matrix{T}, A::Matrix{T}, B::SparseMatrixCSC{T}, α::Number, β::Number) where T
20+
function fastdensesparse!(C::MatOrView{T}, A::MatOrView{T}, B::SparseMatrixCSC{T}, α::Number, β::Number) where T
1321
for j in axes(B, 2)
1422
C[:, j] .*= β
1523
C[:, j] .+= A *.*B[:, j])
1624
end
1725
return C
1826
end
19-
function fastdensesparse_threaded!(C::Matrix{T}, A::Matrix{T}, B::SparseMatrixCSC{T}, α::Number, β::Number) where T
27+
28+
"""
29+
fastdensesparse!(C, A, B, α, β)
30+
31+
Threaded, BLAS like interface, computing `C .= β*C + α*A*B`, but way faster than Base would.
32+
Also see `fastdensesparse!` for a single-threaded version.
33+
"""
34+
function fastdensesparse_threaded!(C::MatOrView{T}, A::MatOrView{T}, B::SparseMatrixCSC{T}, α::Number, β::Number) where T
2035
@batch for j in axes(B, 2)
2136
C[:, j] .*= β
2237
C[:, j] .+= A *.*B[:, j])
2338
end
2439
return C
2540
end
2641

27-
const VecOrView{T} = Union{Vector{T}, SubArray{T, 1, Matrix{T}}}
28-
# this one is slightly slower than `fastdensesparse_outer!`, probably because of extra allocations.
29-
function _fastdensesparse_outer!(C::Matrix{T}, A::VecOrView{T}, b::SparseVector{T}, α::Number, β::Number) where T
30-
for (j, X_val) in zip(findnz(b)...)
31-
C[:, j] .*= β
32-
C[:, j] .+=*X_val) .* A # this compiles to something similar to axpy!, i.e. no allocations. Notice we need the dot also for the scalar mul.
33-
end
34-
return C
35-
end
42+
"""
43+
fastdensesparse_outer!(C, a, b, α, β)
3644
37-
function fastdensesparse_outer!(C::Matrix{T}, a::VecOrView{T}, b::SparseVector{T}, α::Number, β::Number) where T
45+
Fast outer product when computing `C .= β*C + α * a*b'`, but way faster than Base would.
46+
- `a` is a dense vector (or view), `b` is a sparse vector, `C` is a dense matrix (or view).
47+
Also see `fastdensesparse_outer_threaded!` for a multi-threaded version using `Polyester.jl`.
48+
"""
49+
function fastdensesparse_outer!(C::MatOrView{T}, a::VecOrView{T}, b::SparseVector{T}, α::Number, β::Number) where T
3850
C[:, nonzeroinds(b)] .+= a * nonzeros(b)'
3951
return C
4052
end
4153

42-
function fastdensesparse_outer_threaded!(C::Matrix{T}, a::VecOrView{T}, b::SparseVector{T}, α::Number, β::Number) where T
54+
"""
55+
fastdensesparse_outer_threaded!(C, a, b, α, β)
56+
57+
Threaded, fast outer product when computing `C .= β*C + α * a*b'`, but way faster than Base would, using `Polyester.jl`.
58+
- `a` is a dense vector (or view), `b` is a sparse vector, `C` is a dense matrix (or view).
59+
60+
Also see `fastdensesparse_outer!` for a single-threaded version.
61+
"""
62+
function fastdensesparse_outer_threaded!(C::MatOrView{T}, a::VecOrView{T}, b::SparseVector{T}, α::Number, β::Number) where T
4363
inds = nonzeroinds(b)
4464
nzs = nonzeros(b)
4565
@batch for i in axes(nzs, 1)
@@ -49,6 +69,4 @@ function fastdensesparse_outer_threaded!(C::Matrix{T}, a::VecOrView{T}, b::Spars
4969
return C
5070
end
5171

52-
53-
5472
end # module ThreadedDenseSparseMul

0 commit comments

Comments
 (0)