Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 30 additions & 2 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ env:
SECRET_CODECOV_TOKEN: "NsHKj2ZxqUDfErNc+zlH6erC00pk0XRZeNAaU+hyRg6oHlIuSUVL53Z0/MW6Xeq8mBsYsfdG3rmE+h0hoGXj6swpmtjCnLI0CAHUSVOTKNHQ4R6VmKuNnLkNQX7+GO6PEcnV+sCMDSt/nhci0lUl/9qo+6uT/VA+9E6XiKOsKV8nL+kb/GDNJqrG8u2JJzd9EcrFG9Vf4p7tLgsafhQq+yQeVdeYxPWKPx2x6+K2w2WrGel0RlVfyYFLEGHo4TW4+OPPoMOJBCA+kkE2I8OlqzzMUMkULhwhWujHyOrWBZ74EFY2zbwYD/iiYTlGJW8UWaOn561uJp3J7+nab4nEYA==;U2FsdGVkX1/EACeMbht8x2ar6VrhBrcGZUtM4/B4viOz590nUZNIUkWPkjpmdriAAP3t1KEj2LlRg+z/FK+CSQ=="

steps:
- label: "Julia v1"
- label: "Julia v1 -- CUDA"
plugins:
- JuliaCI/julia#v1:
version: "1"
Expand All @@ -16,7 +16,7 @@ steps:
if: build.message !~ /\[skip tests\]/
timeout_in_minutes: 30

- label: "Julia LTS"
- label: "Julia LTS -- CUDA"
plugins:
- JuliaCI/julia#v1:
version: "1.10" # "lts" isn't valid
Expand All @@ -29,3 +29,31 @@ steps:
queue: "cuda"
if: build.message !~ /\[skip tests\]/
timeout_in_minutes: 30

- label: "Julia v1 -- AMDGPU"
plugins:
- JuliaCI/julia#v1:
version: "1"
- JuliaCI/julia-test#v1: ~
- JuliaCI/julia-coverage#v1:
dirs:
- src
- ext
agents:
queue: "rocm"
if: build.message !~ /\[skip tests\]/
timeout_in_minutes: 30

- label: "Julia LTS -- AMDGPU"
plugins:
- JuliaCI/julia#v1:
version: "1.10" # "lts" isn't valid
- JuliaCI/julia-test#v1: ~
- JuliaCI/julia-coverage#v1:
dirs:
- src
- ext
agents:
queue: "rocm"
if: build.message !~ /\[skip tests\]/
timeout_in_minutes: 30
7 changes: 5 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ TupleTools = "9d95972d-f1c8-5527-a6e0-b4b365fa01f6"
VectorInterface = "409d34a3-91d5-4945-b6ec-7529ddf182d8"

[weakdeps]
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
Bumper = "8ce10254-0962-460f-a3d8-1f77fea1446e"
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
CUDACore = "bd0ed864-bdfe-4181-a5ed-ce625a5fdea2"
Expand All @@ -26,6 +27,7 @@ JLArrays = "27aeb0d3-9eb9-45fb-866b-73c2ecf80fcb"
Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6"

[extensions]
TensorOperationsAMDGPUExt = "AMDGPU"
TensorOperationsBumperExt = "Bumper"
TensorOperationsChainRulesCoreExt = "ChainRulesCore"
TensorOperationsMooncakeExt = "Mooncake"
Expand All @@ -35,6 +37,7 @@ TensorOperationscuTENSORExt = "cuTENSOR"
TensorOperationsJLArraysExt = "JLArrays"

[compat]
AMDGPU = "2"
Aqua = "0.6, 0.7, 0.8"
Adapt = "4"
Bumper = "0.6, 0.7"
Expand All @@ -54,7 +57,7 @@ PrecompileTools = "1.1"
Preferences = "1.4"
PtrArrays = "1.2"
Random = "1"
Strided = "2.5"
Strided = "2.6"
StridedViews = "0.5"
Test = "1"
TupleTools = "1.6"
Expand Down Expand Up @@ -82,4 +85,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1"

[targets]
test = ["Test", "Random", "DynamicPolynomials", "ChainRulesTestUtils", "ChainRulesCore", "cuRAND", "CUDACore", "cuTENSOR", "Aqua", "Logging", "Bumper", "Mooncake", "Enzyme", "EnzymeTestUtils", "Adapt", "JLArrays"]
test = ["Test", "Random", "DynamicPolynomials", "ChainRulesTestUtils", "ChainRulesCore", "cuRAND", "CUDACore", "cuTENSOR", "Aqua", "Logging", "Bumper", "Mooncake", "Enzyme", "EnzymeTestUtils", "Adapt", "JLArrays", "AMDGPU"]
49 changes: 49 additions & 0 deletions ext/TensorOperationsAMDGPUExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
module TensorOperationsAMDGPUExt

using AMDGPU
using TensorOperations
using TensorOperations: TensorOperations as TO

#-------------------------------------------------------------------------------------------
# Allocator
#-------------------------------------------------------------------------------------------

TO.tensoradd_type(TC, A::AnyROCArray, pA::Index2Tuple, conjA::Bool) =
ROCArray{TC, TO.numind(pA)}

function TO.tensoralloc_add(
TC, A::AbstractArray, pA::Index2Tuple, conjA::Bool,
istemp::Val, allocator::TO.AMDAllocator
)
ttype = ROCArray{TC, TO.numind(pA)}
structure = TO.tensoradd_structure(A, pA, conjA)
return TO.tensoralloc(ttype, structure, istemp, allocator)::ttype
end

function TO.tensoralloc_contract(
TC,
A::AbstractArray, pA::Index2Tuple, conjA::Bool,
B::AbstractArray, pB::Index2Tuple, conjB::Bool,
pAB::Index2Tuple,
istemp::Val, allocator::TO.AMDAllocator
)
ttype = ROCArray{TC, TO.numind(pAB)}
structure = TO.tensorcontract_structure(A, pA, conjA, B, pB, conjB, pAB)
return TO.tensoralloc(ttype, structure, istemp, allocator)::ttype
end

# NOTE: the general implementation in the `DefaultAllocator` case works just fine, without
# selecting an explicit memory model
function TO.tensoralloc(
::Type{<:ROCArray{T, N}}, structure,
::Val{istemp}, allocator::TO.AMDAllocator
) where {T, N, istemp}
return ROCArray{T, N}(undef, structure)
end

function TO.tensorfree!(C::ROCArray, ::TO.AMDAllocator)
AMDGPU.unsafe_free!(C)
return nothing
end

end
7 changes: 7 additions & 0 deletions src/implementation/allocator.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,13 @@ parameters `Min`, `Mout`, `Mtemp` can be any of the CUDA.jl memory types, i.e.
"""
struct CUDAAllocator{Mout, Min, Mtemp} end

"""
AMDAllocator()
Allocator that uses the AMD memory manager and will thus allocate `ROCArray` instances.
"""
struct AMDAllocator end

"""
ManualAllocator()
Expand Down
4 changes: 2 additions & 2 deletions src/implementation/strided.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ function tensoradd!(
)
# resolve conj flags and absorb into StridedView constructor to avoid type instabilities later on
if conjA
stridedtensoradd!(SV(C), conj(SV(A)), pA, α, β, backend, allocator)
stridedtensoradd!(SV(C), conj!(SV(A)), pA, α, β, backend, allocator)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need this conj! there? It looks a bit suspicious to me, since I'm not sure we are really allowed to modify A, so we are really depending on the fact that conj(SV(A)) produces a view without modifying A.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This "resolved" the AMD tests but caused the CUDA ones to fail (I was testing on an AMD machine), I thought conj! on a StridedView would also return simply a view but I guess not

else
stridedtensoradd!(SV(C), SV(A), pA, α, β, backend, allocator)
end
Expand All @@ -35,7 +35,7 @@ function tensortrace!(
)
# resolve conj flags and absorb into StridedView constructor to avoid type instabilities later on
if conjA
stridedtensortrace!(SV(C), conj(SV(A)), p, q, α, β, backend, allocator)
stridedtensortrace!(SV(C), conj!(SV(A)), p, q, α, β, backend, allocator)
else
stridedtensortrace!(SV(C), SV(A), p, q, α, β, backend, allocator)
end
Expand Down
3 changes: 2 additions & 1 deletion test/gpu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ using Adapt
using TupleTools
using JLArrays
using VectorInterface
using CUDACore
using CUDACore, AMDGPU

test_result(a::AbstractArray, b::AbstractArray; kwargs...) =
isapprox(collect(a), collect(b); kwargs...)
Expand All @@ -24,6 +24,7 @@ end
ATs = []
!is_buildkite && push!(ATs, JLArray)
CUDACore.functional() && push!(ATs, CuArray)
AMDGPU.functional() && push!(ATs, ROCArray)

backends = [StridedBLAS(), StridedNative()]

Expand Down
Loading