Skip to content

Commit 76e4944

Browse files
committed
Write tests and fix bug.
1 parent 1f74b4b commit 76e4944

2 files changed

Lines changed: 46 additions & 32 deletions

File tree

src/ThreadedDenseSparseMul.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,8 @@ Fast outer product when computing `C .= β*C + α * a*b'`, but way faster than B
4747
Also see `fastdensesparse_outer_threaded!` for a multi-threaded version using `Polyester.jl`.
4848
"""
4949
function fastdensesparse_outer!(C::MatOrView{T}, a::VecOrView{T}, b::SparseVector{T}, α::Number, β::Number) where T
50-
C[:, nonzeroinds(b)] .+= a * nonzeros(b)'
50+
C[:, nonzeroinds(b)] .*= β
51+
C[:, nonzeroinds(b)] .+= a *.*nonzeros(b))'
5152
return C
5253
end
5354

test/runtests.jl

Lines changed: 44 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -3,46 +3,59 @@ using Test
33
import SparseArrays: mul!, sprand
44
import Profile
55

6-
(a,b) = occursin(a,b)
7-
# We use a hack to find out if Polyester is actually used, namely
8-
# profiling the call and checking the used modules for Polyester.
9-
# This only works if the problem size is large enough (and then is still a statistical property).
10-
# I wasn't able to find a clear way, but probably there is one.
11-
function check_module_used(mod_string::String, prof_data::Vector)
12-
cache = Dict{Symbol, String}();
13-
sframes = Profile.getdict(prof_data) |> values |> Iterators.flatten
14-
used_paths = [Profile.short_path(frame.file, cache) for frame in sframes]
15-
any(path->mod_string ₛ path, used_paths)
16-
end
176

187
@testset "Compare with equivalent dense mul" begin
19-
@testset for trial in 1:10
20-
lhs = rand(500, 1000);
21-
rhs = sprand(1000, 10_000, 0.1);
8+
@testset "fastdensesparse" begin
9+
@testset for trial in 1:10
10+
lhs = rand(500, 1000);
11+
rhs = sprand(1000, 10_000, 0.1);
12+
13+
baseline = lhs * Matrix(rhs);
14+
15+
buf = similar(baseline)
16+
17+
fastdensesparse!(buf, lhs, rhs, 1, 0)
18+
@test buf baseline
19+
fastdensesparse!(buf, lhs, rhs, -1, 0)
20+
fastdensesparse_threaded!(buf, lhs, rhs, 1, 0)
21+
@test buf baseline
2222

23-
baseline = lhs * Matrix(rhs);
24-
@test lhs * rhs baseline
2523

26-
buf = similar(baseline)
24+
# test @view interface and \beta \neq 0
25+
inds = collect(3:5:100)
26+
baseline[inds, :] .+= 2.5 * @view(lhs[inds, :]) * Matrix(rhs);
2727

28-
@testset "Check actually using Polyester" begin
29-
# make sure I've overwritten the regular mul correctly and actually use Polyester...
30-
Profile.clear(); Profile.@profile (buf .= lhs * rhs)
31-
@test check_module_used("@Polyester", Profile.fetch())
28+
fastdensesparse!(@view(buf[inds, :]), @view(lhs[inds, :]), rhs, 2.5, 1)
29+
@test buf baseline rtol=sqrt(eps(eltype(baseline)))
30+
fastdensesparse!(@view(buf[inds, :]), @view(lhs[inds, :]), rhs, -2.5, 1)
31+
fastdensesparse_threaded!(@view(buf[inds, :]), @view(lhs[inds, :]), rhs, 2.5, 1)
32+
@test buf baseline rtol=sqrt(eps(eltype(baseline)))
3233
end
34+
end
35+
36+
@testset "fastdensesparse_outer" begin
37+
@testset for trial in 1:10
38+
lhs = rand(500, 1000);
39+
rhs = sprand(1000, 10_000, 0.1);
40+
k = rand(1:size(rhs, 1))
3341

34-
buf .= lhs * rhs
35-
@test buf baseline
42+
baseline = lhs[:, k:k] * Matrix(rhs)[k:k, :];
3643

37-
buf .= 0.
38-
buf .+= lhs * rhs
39-
@test buf baseline
44+
buf = similar(baseline)
4045

41-
buf .= 0.
42-
buf .+= 2.0.*lhs * rhs
43-
@test buf/2 baseline
46+
fastdensesparse_outer!(buf, @view(lhs[:, k]), rhs[k, :], 1, 0)
47+
@test buf baseline
48+
fastdensesparse_outer!(buf, @view(lhs[:, k]), rhs[k, :], -1, 0)
49+
fastdensesparse_outer_threaded!(buf, @view(lhs[:, k]), rhs[k, :], 1, 0)
50+
@test buf baseline
4451

45-
mul!(buf, lhs, rhs, 1, 0)
46-
@test buf baseline
52+
baseline .+= 2.5 * lhs[:, (k+1):(k+1)] * Matrix(rhs)[(k+1):(k+1), :];
53+
54+
fastdensesparse_outer!(buf, lhs[:, k+1], rhs[k+1, :], 2.5, 1)
55+
@test buf baseline
56+
fastdensesparse_outer!(buf, lhs[:, k+1], rhs[k+1, :], -2.5, 1)
57+
fastdensesparse_outer_threaded!(buf, lhs[:, k+1], rhs[k+1, :], 2.5, 1)
58+
@test buf baseline
59+
end
4760
end
4861
end

0 commit comments

Comments
 (0)