Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/GPU.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ concurrency:
jobs:
cuda-tests:
name: "CUDA GPU Tests"
runs-on: [self-hosted, Linux, X64, gpu]
runs-on: [self-hosted, Linux, X64, gpu-t4]
timeout-minutes: 240
steps:
- uses: actions/checkout@v6
Expand All @@ -39,7 +39,7 @@ jobs:

gpu-docs:
name: "Documentation"
runs-on: [self-hosted, Linux, X64, gpu]
runs-on: [self-hosted, Linux, X64, gpu-t4]
timeout-minutes: 240
if: github.event_name == 'push' || !github.event.pull_request.draft
steps:
Expand Down
7 changes: 7 additions & 0 deletions LocalPreferences.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[CUDA_Runtime_jll]
version = "12.6"

[CUDA_Driver_jll]
# Disable forward-compat driver — V100 runners need the system driver
# since CUDA_Driver_jll v13+ drops compute capability 7.0 support
compat = "false"
12 changes: 8 additions & 4 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "DeepEquilibriumNetworks"
uuid = "6748aba7-0e9b-415e-a410-ae3cc0ecb334"
authors = ["Avik Pal <avikpal@mit.edu>"]
version = "2.6.0"
authors = ["Avik Pal <avikpal@mit.edu>"]

[deps]
ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
Expand All @@ -10,6 +10,7 @@ CommonSolve = "38540f10-b2f7-11e9-35d8-d573e4eb0ff2"
ConcreteStructs = "2569d6c7-a4a2-43d3-a901-331e8e4be471"
DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e"
FastClosures = "9aa1b823-49e4-5ca5-8b0f-3971ec8bab6a"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Lux = "b2108857-7c20-44ae-9111-449ecde12c47"
LuxCore = "bb33d45b-7691-41d6-9220-0943567d0623"
NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
Expand All @@ -34,9 +35,10 @@ FastClosures = "0.3"
ForwardDiff = "0.10, 1"
Functors = "0.4, 0.5"
GPUArraysCore = "0.1, 0.2"
SafeTestsets = "0.1"
InteractiveUtils = "<0.0.1, 1"
LinearAlgebra = "1.10"
Lux = "1"
LuxCUDA = "0.3"
LuxCore = "1"
LuxTestUtils = "1, 2"
MLDataDevices = "1"
Expand All @@ -48,6 +50,7 @@ OrdinaryDiffEq = "6.74"
Pkg = "1.10"
PrecompileTools = "1"
Random = "1.10"
SafeTestsets = "0.1"
SciMLBase = "2"
SciMLSensitivity = "7.43"
StableRNGs = "1"
Expand All @@ -64,18 +67,19 @@ ExplicitImports = "7d51a73a-1435-4ff3-83d9-f097790105c7"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196"
GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527"
SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
LuxCUDA = "d0bbae9a-e099-4d5b-a835-1c6931763bda"
LuxTestUtils = "ac9de150-d08f-4546-94fb-7472b5760531"
MLDataDevices = "7e8f7934-dd98-4c1a-8fe8-92b47a384d40"
NLsolve = "2774e3e8-f4cf-5e23-947b-6d7e65073b56"
NonlinearSolve = "8913a72c-1f9b-4ce2-8d82-65094dcecaec"
OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
SciMLSensitivity = "1ed8b502-d754-442c-8d5d-10ac956f44a1"
StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"

[targets]
test = ["Aqua", "Documenter", "ExplicitImports", "ForwardDiff", "Functors", "GPUArraysCore", "InteractiveUtils", "LuxTestUtils", "MLDataDevices", "NLsolve", "NonlinearSolve", "OrdinaryDiffEq", "Pkg", "SafeTestsets", "SciMLSensitivity", "StableRNGs", "Test", "Zygote"]
test = ["Aqua", "Documenter", "ExplicitImports", "ForwardDiff", "Functors", "GPUArraysCore", "InteractiveUtils", "LuxCUDA", "LuxTestUtils", "MLDataDevices", "NLsolve", "NonlinearSolve", "OrdinaryDiffEq", "Pkg", "SafeTestsets", "SciMLSensitivity", "StableRNGs", "Test", "Zygote"]
7 changes: 7 additions & 0 deletions docs/LocalPreferences.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[CUDA_Runtime_jll]
version = "12.6"

[CUDA_Driver_jll]
# Disable forward-compat driver — V100 runners need the system driver
# since CUDA_Driver_jll v13+ drops compute capability 7.0 support
compat = "false"
2 changes: 2 additions & 0 deletions docs/Project.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
[deps]
CUDA_Driver_jll = "4ee394cb-3365-5eb0-8335-949819d2adfc"
CUDA_Runtime_jll = "76a88914-d11a-5bdc-97e0-2f5a05c973a2"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
DeepEquilibriumNetworks = "6748aba7-0e9b-415e-a410-ae3cc0ecb334"
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
Expand Down
1 change: 1 addition & 0 deletions docs/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ makedocs(;
clean = true,
doctest = false, # Tested in CI
linkcheck = true,
warnonly = [:example_block], # GPU examples may fail on V100 runners (cuDNN compat)
format = Documenter.HTML(;
assets = ["assets/favicon.ico"],
canonical = "https://docs.sciml.ai/DeepEquilibriumNetworks/stable/"
Expand Down
1 change: 1 addition & 0 deletions src/DeepEquilibriumNetworks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ using Random: Random, AbstractRNG, randn!
using SciMLBase: SciMLBase, AbstractNonlinearAlgorithm, AbstractODEAlgorithm,
NonlinearSolution, ODESolution, ODEFunction, ODEProblem,
SteadyStateProblem, _unwrap_val
using LinearAlgebra: LinearAlgebra
using SciMLSensitivity: SteadyStateAdjoint, GaussAdjoint, ZygoteVJP
using Static: StaticSymbol, StaticInt, known, static

Expand Down
9 changes: 8 additions & 1 deletion src/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,21 @@ CRC.@non_differentiable zeros_init(::Any, ::Any)

## Don't rely on SciMLSensitivity's choice
function default_sensealg(::SteadyStateProblem)
# Ideally we should use GMRES here, but it is not very robust
return SteadyStateAdjoint(;
linsolve = nothing, linsolve_kwargs = (; maxiters = 10, abstol = 1.0e-3, reltol = 1.0e-3),
autojacvec = ZygoteVJP()
)
end
default_sensealg(::ODEProblem) = GaussAdjoint(; autojacvec = ZygoteVJP())

# Workaround for LinearSolve.jl DefaultLinearSolver bug: _copy_A_for_safety calls copy()
# on an Adjoint matrix, which unwraps it to a plain array. Then setproperty! fails because
# convert(Adjoint{T,S}, ::S) is not defined. The constructor Adjoint{T,S}(::Any) exists
# (LinearAlgebra adjtrans.jl:33) but convert doesn't use it. This adds the missing method.
function Base.convert(::Type{LinearAlgebra.Adjoint{T, S}}, x::S) where {T, S <: AbstractArray{T}}
return LinearAlgebra.Adjoint{T, S}(x)
end

function randn_like(rng::AbstractRNG, x::AbstractArray)
y = similar(x)::typeof(x)
randn!(rng, y)
Expand Down
6 changes: 6 additions & 0 deletions test/layers_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ const SOLVERS = (

@testset "x_size: $(x_size)" for (base_model, init_model, x_size) in
zip(base_models, init_models, x_sizes)
# Skip Conv tests on V100 GPUs (cuDNN CUDNN_STATUS_EXECUTION_FAILED_CUDART)
if length(x_size) == 4 && ongpu && !CONV_WORKS
@test_broken false
continue
end

model = if mtype === :deq
DeepEquilibriumNetwork(base_model, solver; jacobian_regularization)
elseif mtype === :skipdeq
Expand Down
8 changes: 7 additions & 1 deletion test/qa_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,13 @@ using DeepEquilibriumNetworks, Test
@testset "Aqua" begin
using Aqua

Aqua.test_all(DeepEquilibriumNetworks; ambiguities = false)
# treat_as_own: Adjoint convert method is a workaround for LinearSolve.jl bug
# (missing convert(Adjoint{T,S}, ::S) in LinearAlgebra)
using LinearAlgebra: Adjoint
Aqua.test_all(
DeepEquilibriumNetworks;
ambiguities = false, piracies = (; treat_as_own = [Adjoint])
)
Aqua.test_ambiguities(DeepEquilibriumNetworks; recursive = false)
end

Expand Down
13 changes: 9 additions & 4 deletions test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,17 +1,22 @@
using Pkg
using SafeTestsets, Test

const GROUP = uppercase(get(ENV, "GROUP", "CPU"))
const BACKEND_GROUP = uppercase(get(ENV, "BACKEND_GROUP", get(ENV, "GROUP", "CPU")))

@info "Running tests for GROUP: $GROUP"
@info "Running tests for BACKEND_GROUP: $BACKEND_GROUP"

@time begin
if GROUP == "CPU" || GROUP == "ALL"
if BACKEND_GROUP == "CPU" || BACKEND_GROUP == "ALL"
@time @safetestset "Utils Tests" include("utils_tests.jl")
@time @safetestset "Layers Tests" include("layers_tests.jl")
end

if GROUP == "QA"
if BACKEND_GROUP == "CUDA" || BACKEND_GROUP == "ALL"
@time @safetestset "CUDA Utils Tests" include("utils_tests.jl")
@time @safetestset "CUDA Layers Tests" include("layers_tests.jl")
end

if BACKEND_GROUP == "QA"
@time @safetestset "Quality Assurance Tests" include("qa_tests.jl")
end
end
19 changes: 19 additions & 0 deletions test/shared_testsetup.jl
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,22 @@ function conv_layer(args...; kwargs...)
init_weight(rng::AbstractRNG, dims...) = randn(rng, Float32, dims) .* 0.001f0
return Conv(args...; init_weight, use_bias = false, kwargs...)
end

# V100 GPUs have cuDNN issues with CUDA 12.x (CUDNN_STATUS_EXECUTION_FAILED_CUDART)
# Probe whether cuDNN Conv actually works on the current GPU
const CONV_WORKS = if cuda_testing()
try
_rng = Random.default_rng()
_model = Conv((1, 1), 1 => 1)
_ps, _st = Lux.setup(_rng, _model)
_dev = MLDataDevices.gpu_device()
_ps, _st = _dev(_ps), _dev(_st)
_x = _dev(randn(Float32, 2, 2, 1, 1))
_model(_x, _ps, _st)
true
catch
false
end
else
true
end
Loading