Skip to content

Commit 19426bd

Browse files
AJ0070andreasnoackclaude
authored
Replace Travis CI with GitHub Actions (#85)
* Replace Travis CI with GitHub Actions Remove .travis.yml and add .github/workflows/ci.yml. Tests Julia 1 on Linux (x64) and macOS (Apple Silicon), and Julia pre on Linux. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Keep Grid reference in DistSparseMatrix and DistMultiVec Prevent the default Grid from being garbage collected while distributed matrices or vectors are still alive, matching the pattern already used by DistMatrix. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Constrain MPICH_jll to v3/v4 to avoid ABI break in v5 MPICH 5.0 adopted the standard MPI ABI where MPI_Comm changed from int to a pointer type. The pre-compiled Elemental binary is incompatible with this new ABI, causing segfaults in Grid::VCSize(). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Constrain MPICH_jll to v3 to match libmpi.so.12 ABI The Elemental binary was linked against libmpi.so.12 (MPICH 3.x). MPICH 4.x may have internal ABI differences despite the same soname. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Add Julia 1.10 to CI matrix to test older runtime The Elemental binary segfaults on Julia 1.12 in Grid::VCSize() regardless of MPICH version. Adding 1.10 to determine if this is a Julia version compatibility issue. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Fix ccall signatures: pass Grid instead of MPI communicator The C interface functions ElDistSparseMatrixCreate and ElDistMultiVecCreate take an ElConstGrid (Grid pointer), not an MPI_Comm. The Julia wrappers were incorrectly passing an ElComm integer which got reinterpret_cast'd as a Grid pointer, causing a null pointer dereference in Grid::VCSize(). Also revert the MPICH_jll version constraint and Julia 1.10 CI entry since those were not the root cause. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Remove comm() for DistSparseMatrix/DistMultiVec, use grid instead ElDistSparseMatrixComm and ElDistMultiVecComm don't exist in the C API. Replace comm(A) calls with A.grid and remove the dead comm functions. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Increase timeout for CI jobs and handle test process timeouts in runtests * temp fix * fix * implemented suggestion * Remove DistributedArrays import from runtests.jl * Reduce matrix dimensions in lav.jl test to 12x12 for improved performance * fix --------- Co-authored-by: Andreas Noack <andreas@noack.dk> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent a3478e6 commit 19426bd

9 files changed

Lines changed: 74 additions & 64 deletions

File tree

.github/workflows/ci.yml

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
name: CI
2+
on:
3+
push:
4+
branches:
5+
- master
6+
pull_request:
7+
jobs:
8+
test:
9+
name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }}
10+
runs-on: ${{ matrix.os }}
11+
timeout-minutes: 60
12+
strategy:
13+
fail-fast: false
14+
matrix:
15+
version:
16+
- '1'
17+
os:
18+
- ubuntu-latest
19+
- macos-latest
20+
arch:
21+
- x64
22+
exclude:
23+
- os: macos-latest
24+
arch: x64
25+
include:
26+
- os: macos-latest
27+
arch: aarch64
28+
version: '1'
29+
- os: ubuntu-latest
30+
arch: x64
31+
version: 'pre'
32+
steps:
33+
- uses: actions/checkout@v4
34+
- uses: julia-actions/setup-julia@v2
35+
with:
36+
version: ${{ matrix.version }}
37+
- uses: julia-actions/cache@v2
38+
- uses: julia-actions/julia-buildpkg@v1
39+
- uses: julia-actions/julia-runtest@v1

.travis.yml

Lines changed: 0 additions & 13 deletions
This file was deleted.

Project.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,12 @@ Elemental_jll = "0.87"
1515
julia = "1.3"
1616

1717
[extras]
18+
MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
1819
MPIClusterManagers = "e7922434-ae4b-11e9-05c5-9780451d2c66"
1920
Primes = "27ebfcd6-29c5-5fa9-bf4b-fb8fc14df3ae"
2021
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
2122
TSVD = "9449cd9e-2762-5aa3-a617-5413e99d722e"
2223
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
2324

2425
[targets]
25-
test = ["Test", "MPIClusterManagers", "Primes", "TSVD", "Random"]
26+
test = ["Test", "MPI", "MPIClusterManagers", "Primes", "TSVD", "Random"]

src/core/distmultivec.jl

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
mutable struct DistMultiVec{T} <: ElementalMatrix{T}
22
obj::Ptr{Cvoid}
3+
grid::Grid # keep the grid around to avoid that it's freed before the matrix
34
end
45

56
for (elty, ext) in ((:ElInt, :i),
@@ -16,24 +17,16 @@ for (elty, ext) in ((:ElInt, :i),
1617
return nothing
1718
end
1819

19-
function DistMultiVec(::Type{$elty}, cm::ElComm = MPI.CommWorld[])
20+
function DistMultiVec(::Type{$elty}, grid::Grid = DefaultGrid[])
2021
obj = Ref{Ptr{Cvoid}}(C_NULL)
2122
ElError(ccall(($(string("ElDistMultiVecCreate_", ext)), libEl), Cuint,
22-
(Ref{Ptr{Cvoid}}, ElComm),
23-
obj, cm))
24-
A = DistMultiVec{$elty}(obj[])
23+
(Ref{Ptr{Cvoid}}, Ptr{Cvoid}),
24+
obj, grid.obj))
25+
A = DistMultiVec{$elty}(obj[], grid)
2526
finalizer(destroy, A)
2627
return A
2728
end
2829

29-
function comm(A::DistMultiVec{$elty})
30-
cm = Ref{ElComm}()
31-
ElError(ccall(($(string("ElDistMultiVecComm_", ext)), libEl), Cuint,
32-
(Ptr{Cvoid}, Ref{ElComm}),
33-
A.obj, cm))
34-
return cm[]
35-
end
36-
3730
function get(x::DistMultiVec{$elty}, i::Integer = size(x, 1), j::Integer = 1)
3831
v = Ref{$elty}()
3932
ElError(ccall(($(string("ElDistMultiVecGet_", ext)), libEl), Cuint,
@@ -117,8 +110,8 @@ end
117110

118111
getindex(x::DistMultiVec, i, j) = get(x, i, j)
119112

120-
function similar(::DistMultiVec, ::Type{T}, sz::Dims, cm::ElComm = MPI.CommWorld[]) where {T}
121-
A = DistMultiVec(T, cm)
113+
function similar(::DistMultiVec, ::Type{T}, sz::Dims, grid::Grid = DefaultGrid[]) where {T}
114+
A = DistMultiVec(T, grid)
122115
resize!(A, sz...)
123116
return A
124117
end

src/core/distsparsematrix.jl

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
mutable struct DistSparseMatrix{T} <: ElementalMatrix{T}
22
obj::Ptr{Cvoid}
3+
grid::Grid # keep the grid around to avoid that it's freed before the matrix
34
end
45

56
for (elty, ext) in ((:ElInt, :i),
@@ -16,24 +17,16 @@ for (elty, ext) in ((:ElInt, :i),
1617
return nothing
1718
end
1819

19-
function DistSparseMatrix(::Type{$elty}, comm::ElComm = MPI.CommWorld[])
20+
function DistSparseMatrix(::Type{$elty}, grid::Grid = DefaultGrid[])
2021
obj = Ref{Ptr{Cvoid}}(C_NULL)
2122
ElError(ccall(($(string("ElDistSparseMatrixCreate_", ext)), libEl), Cuint,
22-
(Ref{Ptr{Cvoid}}, ElComm),
23-
obj, comm))
24-
A = DistSparseMatrix{$elty}(obj[])
23+
(Ref{Ptr{Cvoid}}, Ptr{Cvoid}),
24+
obj, grid.obj))
25+
A = DistSparseMatrix{$elty}(obj[], grid)
2526
finalizer(destroy, A)
2627
return A
2728
end
2829

29-
function comm(A::DistSparseMatrix{$elty})
30-
cm = Ref{ElComm}()
31-
ElError(ccall(($(string("ElDistSparseMatrixComm_", ext)), libEl), Cuint,
32-
(Ptr{Cvoid}, Ref{ElComm}),
33-
A.obj, cm))
34-
return cm[]
35-
end
36-
3730
function globalRow(A::DistSparseMatrix{$elty}, iLoc::Integer)
3831
i = Ref{ElInt}(0)
3932
ElError(ccall(($(string("ElDistSparseMatrixGlobalRow_", ext)), libEl), Cuint,
@@ -112,8 +105,8 @@ for (elty, ext) in ((:ElInt, :i),
112105
end
113106

114107
# The other constructors don't have a version with dimensions. Should they, or should this one go?
115-
function DistSparseMatrix(::Type{T}, m::Integer, n::Integer, comm::ElComm = MPI.CommWorld[]) where {T}
116-
A = DistSparseMatrix(T, comm)
108+
function DistSparseMatrix(::Type{T}, m::Integer, n::Integer, grid::Grid = DefaultGrid[]) where {T}
109+
A = DistSparseMatrix(T, grid)
117110
resize!(A, m, n)
118111
return A
119112
end

src/lapack_like/euclidean_min.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ function leastSquares(A::DistMatrix{T}, B::DistMatrix{T};
2323
end
2424
function leastSquares(A::DistSparseMatrix{T}, B::DistMultiVec{T};
2525
orientation::Orientation = NORMAL) where {T}
26-
X = DistMultiVec(T, comm(A))
26+
X = DistMultiVec(T, A.grid)
2727
return leastSquares!(A, B, X, orientation = orientation)
2828
end
2929

@@ -49,7 +49,7 @@ function bpdn(A::DistMatrix{T}, B::DistMatrix{T}, lambda::T) where {T}
4949
return bpdn!(A, B, lambda, X)
5050
end
5151
function bpdn(A::DistSparseMatrix{T}, B::DistMultiVec{T}, lambda) where {T}
52-
X = DistMultiVec(T, comm(A))
52+
X = DistMultiVec(T, A.grid)
5353
return bpdn!(A, B, lambda, X)
5454
end
5555

@@ -78,6 +78,6 @@ function ridge(A::DistMatrix{T}, B::DistMatrix{T}, gamma::T; ka...) where {T}
7878
return ridge!(A, B, gamma, X; ka...)
7979
end
8080
function ridge(A::DistSparseMatrix{T}, B::DistMultiVec{T}, gamma::T; ka...) where {T}
81-
X = DistMultiVec(T, comm(A))
81+
X = DistMultiVec(T, A.grid)
8282
return ridge!(A, B, gamma, X; ka...)
8383
end

src/optimization/models.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,11 @@ function lav(A::DistMatrix{T}, b::DistMatrix{T}) where {T<:Union{Float32,Float64
3333
return lav!(A, b, x)
3434
end
3535
function lav(A::DistSparseMatrix{T}, b::DistMultiVec{T}) where {T<:Union{Float32,Float64}}
36-
x = DistMultiVec(T, comm(A))
36+
x = DistMultiVec(T, A.grid)
3737
return lav!(A, b, x)
3838
end
3939

4040
function lav(A::DistSparseMatrix{T}, b::DistMultiVec{T}, ctrl::LPAffineCtrl{T}) where {T<:Union{Float32,Float64}}
41-
x = DistMultiVec(T, comm(A))
41+
x = DistMultiVec(T, A.grid)
4242
return lav!(A, b, x, ctrl)
4343
end

test/lav.jl

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -94,17 +94,7 @@ if display
9494
# El.print(b, "b")
9595
end
9696

97-
ctrl = El.LPAffineCtrl(Float64,
98-
mehrotraCtrl=El.MehrotraCtrl(Float64,
99-
solveCtrl=El.RegSolveCtrl(Float64,
100-
progress=true),
101-
print=true,
102-
outerEquil=true,
103-
time=true)
104-
)
105-
106-
# elapsedLAV = @elapsed x = El.lav(A, b);#Elemental.print(A, "Matrix A")
107-
elapsedLAV = @elapsed x = El.lav(A, b, ctrl)
97+
elapsedLAV = @elapsed x = El.lav(A, b)
10898

10999
if El.MPI.commRank(El.MPI.CommWorld[]) == 0
110100
println("LAV time: $elapsedLAV seconds")
@@ -165,4 +155,4 @@ end
165155

166156
# Require the user to press a button before the figures are closed
167157
# commSize = El.mpi.Size( El.mpi.COMM_WORLD() )
168-
# El.Finalize()
158+
El.Finalize()

test/runtests.jl

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
using Test
2-
using Elemental_jll.MPICH_jll: mpiexec
2+
using MPI: mpiexec
33

44
# Import all of our external dependencies to make sure they're compiled serially.
5-
using DistributedArrays
65
using TSVD
76
using Primes
87
using MPIClusterManagers
@@ -15,9 +14,12 @@ function runtests_mpirun()
1514
@info "Running Elemental.jl tests"
1615
for f in testfiles
1716
try
18-
mpiexec() do exec
19-
run(`$exec -np $nprocs $(Base.julia_cmd()) $(joinpath(@__DIR__, f))`)
17+
proc = run(`$(mpiexec()) -np $nprocs $(Base.julia_cmd()) $(joinpath(@__DIR__, f))`, wait=false)
18+
if timedwait(() -> !process_running(proc), 300.0) === :timed_out
19+
kill(proc)
20+
error("Test $f timed out after 5 minutes")
2021
end
22+
wait(proc)
2123
Base.with_output_color(:green,stdout) do io
2224
println(io,"\tSUCCESS: $f")
2325
end
@@ -43,7 +45,12 @@ function runtests_repl()
4345
# FixMe! We temporarily run Finalize() explictly on the workers because the atexit hook
4446
# doesn't seem to be correctly triggered on workers as of 31 October 2018.
4547
cmdstr = "using Distributed, MPIClusterManagers; man = MPIManager(np = $nprocs); addprocs(man); include(\"$(joinpath(@__DIR__, f))\"); asyncmap(p -> remotecall_fetch(() -> Elemental.Finalize(), p), workers())"
46-
run(`$exename -e $cmdstr`)
48+
proc = run(`$exename -e $cmdstr`, wait=false)
49+
if timedwait(() -> !process_running(proc), 300.0) === :timed_out
50+
kill(proc)
51+
error("Test $f timed out after 5 minutes")
52+
end
53+
wait(proc)
4754
Base.with_output_color(:green,stdout) do io
4855
println(io,"\tSUCCESS: $f")
4956
end

0 commit comments

Comments
 (0)