Skip to content

Commit b8af140

Browse files
authored
Merge pull request #40 from kshyatt/ksh/cuda
Extensions to support `copy!` for `CuStridedView` (and friends!)
1 parent f490921 commit b8af140

9 files changed

Lines changed: 192 additions & 21 deletions

File tree

.buildkite/pipeline.yml

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
env:
2+
SECRET_CODECOV_TOKEN: "EEXB5DS9rR3VXck1NzJougBwxy+3bGKAX9sq1hTwe+rvftmQzdnpy3MlJXLUXQXnBvjezhHZpt07nlG1p9Pi39bnUIddPJHJVVbtqjiGbVuAjVno2tcm8cvi/mYDPoJw7hs8G36IVDb3wklO9wAiO7vwO2br8LQOHMNZBTCUfkb30aT3e/yBnb2QiwNspKCvcd7XYpsmMy78Egdg219sfZ783fG/H7VHv0YzZThj+IAUhm8ftsPURHRmHk28wSdFGzwI2CX8nEx4LgtDhqa+JH84YajIiwWaFymfkw6phpSF3KQNlR53qRWUDD6hClhOizmYyQuZZ8TO5gnNDsrGLg==;U2FsdGVkX1/pfvZY/FJSU7D+DE+6I18s5BSfa63C+31RoDKiHqENegG4whXuxZ5a6YE0XegF8jOretp+E7FiyQ=="
3+
4+
steps:
5+
- label: "Julia v1 -- CUDA"
6+
plugins:
7+
- JuliaCI/julia#v1:
8+
version: "1"
9+
- JuliaCI/julia-test#v1: ~
10+
- JuliaCI/julia-coverage#v1:
11+
dirs:
12+
- src
13+
- ext
14+
agents:
15+
queue: "juliagpu"
16+
cuda: "*"
17+
if: build.message !~ /\[skip tests\]/
18+
timeout_in_minutes: 30
19+
20+
- label: "Julia LTS -- CUDA"
21+
plugins:
22+
- JuliaCI/julia#v1:
23+
version: "1.10" # "lts" isn't valid
24+
- JuliaCI/julia-test#v1: ~
25+
- JuliaCI/julia-coverage#v1:
26+
dirs:
27+
- src
28+
- ext
29+
agents:
30+
queue: "juliagpu"
31+
cuda: "*"
32+
if: build.message !~ /\[skip tests\]/
33+
timeout_in_minutes: 30
34+
35+
- label: "Julia v1 -- AMDGPU"
36+
plugins:
37+
- JuliaCI/julia#v1:
38+
version: "1"
39+
- JuliaCI/julia-test#v1: ~
40+
- JuliaCI/julia-coverage#v1:
41+
dirs:
42+
- src
43+
- ext
44+
agents:
45+
queue: "juliagpu"
46+
rocm: "*"
47+
rocmgpu: "*"
48+
if: build.message !~ /\[skip tests\]/
49+
timeout_in_minutes: 30
50+
51+
- label: "Julia LTS -- AMDGPU"
52+
plugins:
53+
- JuliaCI/julia#v1:
54+
version: "1.10" # "lts" isn't valid
55+
- JuliaCI/julia-test#v1: ~
56+
- JuliaCI/julia-coverage#v1:
57+
dirs:
58+
- src
59+
- ext
60+
agents:
61+
queue: "juliagpu"
62+
rocm: "*"
63+
rocmgpu: "*"
64+
if: build.message !~ /\[skip tests\]/
65+
timeout_in_minutes: 30

.github/workflows/ci.yml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ name: CI
22
on:
33
push:
44
branches:
5-
- 'master'
65
- 'main'
76
- 'release-'
87
tags: '*'
@@ -21,7 +20,6 @@ jobs:
2120
fail-fast: false
2221
matrix:
2322
version:
24-
- '1.6' # previous LTS release
2523
- 'lts' # current LTS release
2624
- '1' # current stable release
2725
os:
@@ -45,4 +43,4 @@ jobs:
4543
- uses: codecov/codecov-action@v5
4644
with:
4745
file: lcov.info
48-
46+

Project.toml

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,42 @@
11
name = "Strided"
22
uuid = "5e0ebb24-38b0-5f93-81fe-25c709ecae67"
3-
authors = ["Lukas Devos <lukas.devos@ugent.be>", "Maarten Van Damme <maartenvd1994@gmail.com>", "Jutho Haegeman <jutho.haegeman@ugent.be>"]
43
version = "2.3.2"
4+
authors = ["Lukas Devos <lukas.devos@ugent.be>", "Maarten Van Damme <maartenvd1994@gmail.com>", "Jutho Haegeman <jutho.haegeman@ugent.be>"]
55

66
[deps]
77
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
88
StridedViews = "4db3bf67-4bd7-4b4e-b153-31dc3fb37143"
99
TupleTools = "9d95972d-f1c8-5527-a6e0-b4b365fa01f6"
1010

11+
[weakdeps]
12+
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
13+
GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
14+
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
15+
16+
[extensions]
17+
StridedAMDGPUExt = "AMDGPU"
18+
StridedGPUArraysExt = "GPUArrays"
19+
StridedCUDAExt = "CUDA"
20+
1121
[compat]
22+
AMDGPU = "2"
1223
Aqua = "0.8"
24+
CUDA = "5"
25+
GPUArrays = "11.4.1"
1326
LinearAlgebra = "1.6"
1427
Random = "1.6"
15-
StridedViews = "0.3.2,0.4"
28+
StridedViews = "0.4.5"
1629
Test = "1.6"
1730
TupleTools = "1.6"
1831
julia = "1.6"
1932

2033
[extras]
34+
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
2135
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
36+
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
37+
GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
2238
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
2339
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
2440

2541
[targets]
26-
test = ["Test", "Random", "Aqua"]
42+
test = ["Test", "Random", "Aqua", "AMDGPU", "CUDA", "GPUArrays"]

ext/StridedAMDGPUExt.jl

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
module StridedAMDGPUExt
2+
3+
using Strided, StridedViews, AMDGPU
4+
using AMDGPU: Adapt
5+
using AMDGPU: GPUArrays
6+
7+
const ALL_FS = Union{typeof(adjoint), typeof(conj), typeof(identity), typeof(transpose)}
8+
9+
function Base.copy!(dst::StridedView{TD, ND, TAD, FD}, src::StridedView{TS, NS, TAS, FS}) where {TD <: Number, ND, TAD <: ROCArray{TD}, FD <: ALL_FS, TS <: Number, NS, TAS <: ROCArray{TS}, FS <: ALL_FS}
10+
bc_style = Base.Broadcast.BroadcastStyle(TAS)
11+
bc = Base.Broadcast.Broadcasted(bc_style, identity, (src,), axes(dst))
12+
GPUArrays._copyto!(dst, bc)
13+
return dst
14+
end
15+
16+
end

ext/StridedCUDAExt.jl

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
module StridedCUDAExt
2+
3+
using Strided, StridedViews, CUDA
4+
using CUDA: Adapt, KernelAdaptor
5+
using CUDA: GPUArrays
6+
7+
const ALL_FS = Union{typeof(adjoint), typeof(conj), typeof(identity), typeof(transpose)}
8+
9+
function Base.copy!(dst::StridedView{TD, ND, TAD, FD}, src::StridedView{TS, NS, TAS, FS}) where {TD <: Number, ND, TAD <: CuArray{TD}, FD <: ALL_FS, TS <: Number, NS, TAS <: CuArray{TS}, FS <: ALL_FS}
10+
bc_style = Base.Broadcast.BroadcastStyle(TAS)
11+
bc = Base.Broadcast.Broadcasted(bc_style, identity, (src,), axes(dst))
12+
GPUArrays._copyto!(dst, bc)
13+
return dst
14+
end
15+
16+
end

ext/StridedGPUArraysExt.jl

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
module StridedGPUArraysExt
2+
3+
using Strided, GPUArrays
4+
using GPUArrays: Adapt, KernelAbstractions
5+
6+
ALL_FS = Union{typeof(adjoint), typeof(conj), typeof(identity), typeof(transpose)}
7+
8+
KernelAbstractions.get_backend(sv::StridedView{T, N, TA}) where {T, N, TA <: AnyGPUArray{T}} = KernelAbstractions.get_backend(parent(sv))
9+
10+
function Base.Broadcast.BroadcastStyle(gpu_sv::StridedView{T, N, TA}) where {T, N, TA <: AnyGPUArray{T}}
11+
raw_style = Base.Broadcast.BroadcastStyle(TA)
12+
return typeof(raw_style)(Val(N)) # sets the dimensionality correctly
13+
end
14+
15+
end

test/amd.jl

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
for T in (Float32, Float64, Complex{Float32}, Complex{Float64})
2+
@testset "Copy with ROCStridedView: $T, $f1, $f2" for f2 in (identity, conj, adjoint, transpose), f1 in (identity, conj, transpose, adjoint)
3+
for m1 in (0, 16, 32), m2 in (0, 16, 32)
4+
if iszero(m1 * m2)
5+
A1 = AMDGPU.ROCMatrix{T}(undef, (m1, m2))
6+
else
7+
A1 = ROCMatrix(randn(T, (m1, m2)))
8+
end
9+
A2 = similar(A1)
10+
A1c = copy(A1)
11+
A2c = copy(A2)
12+
B1 = f1(StridedView(A1c))
13+
B2 = f2(StridedView(A2c))
14+
axes(f1(A1)) == axes(f2(A2)) || continue
15+
@test collect(ROCMatrix(copy!(f2(A2), f1(A1)))) == AMDGPU.Adapt.adapt(Vector{T}, copy!(B2, B1))
16+
end
17+
end
18+
end

test/cuda.jl

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
for T in (Float32, Float64, Complex{Float32}, Complex{Float64})
2+
@testset "Copy with CuStridedView: $T, $f1, $f2" for f2 in (identity, conj, adjoint, transpose), f1 in (identity, conj, transpose, adjoint)
3+
for m1 in (0, 16, 32), m2 in (0, 16, 32)
4+
A1 = CUDA.randn(T, (m1, m2))
5+
A2 = similar(A1)
6+
A1c = copy(A1)
7+
A2c = copy(A2)
8+
B1 = f1(StridedView(A1c))
9+
B2 = f2(StridedView(A2c))
10+
axes(f1(A1)) == axes(f2(A2)) || continue
11+
@test collect(CuMatrix(copy!(f2(A2), f1(A1)))) == CUDA.Adapt.adapt(Vector{T}, copy!(B2, B1))
12+
end
13+
end
14+
end

test/runtests.jl

Lines changed: 28 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,25 +3,38 @@ using LinearAlgebra
33
using Random
44
using Strided
55
using Strided: StridedView
6+
using Aqua
7+
using AMDGPU, CUDA, GPUArrays
68

79
Random.seed!(1234)
810

9-
println("Base.Threads.nthreads() = $(Base.Threads.nthreads())")
11+
is_buildkite = get(ENV, "BUILDKITE", "false") == "true"
1012

11-
println("Running tests single-threaded:")
12-
Strided.disable_threads()
13-
include("othertests.jl")
14-
include("blasmultests.jl")
13+
if !is_buildkite
14+
println("Base.Threads.nthreads() = $(Base.Threads.nthreads())")
1515

16-
println("Running tests multi-threaded:")
17-
Strided.enable_threads()
18-
Strided.set_num_threads(Base.Threads.nthreads() + 1)
19-
include("othertests.jl")
20-
include("blasmultests.jl")
16+
println("Running tests single-threaded:")
17+
Strided.disable_threads()
18+
include("othertests.jl")
19+
include("blasmultests.jl")
2120

22-
Strided.enable_threaded_mul()
23-
include("blasmultests.jl")
24-
Strided.disable_threaded_mul()
21+
println("Running tests multi-threaded:")
22+
Strided.enable_threads()
23+
Strided.set_num_threads(Base.Threads.nthreads() + 1)
24+
include("othertests.jl")
25+
include("blasmultests.jl")
2526

26-
using Aqua
27-
Aqua.test_all(Strided; piracies = false)
27+
Strided.enable_threaded_mul()
28+
include("blasmultests.jl")
29+
Strided.disable_threaded_mul()
30+
31+
Aqua.test_all(Strided; piracies = false)
32+
end
33+
34+
if CUDA.functional()
35+
include("cuda.jl")
36+
end
37+
38+
if AMDGPU.functional()
39+
include("amd.jl")
40+
end

0 commit comments

Comments
 (0)