From f6137dfb671964e996aacbae657913001b14aba4 Mon Sep 17 00:00:00 2001 From: Alexander Sinn Date: Wed, 22 Apr 2026 11:08:48 +0200 Subject: [PATCH 01/15] Switch to C++20 --- .github/workflows/cuda.yml | 6 +++--- .github/workflows/hip.yml | 2 +- .github/workflows/linux.yml | 10 +++++----- .github/workflows/setup/{nvcc11.sh => nvcc.sh} | 18 +++++++++--------- CMakeLists.txt | 10 +++++----- cmake/HiPACEFunctions.cmake | 10 +++++----- docs/source/building/building.rst | 2 +- src/utils/GPUUtil.H | 5 +++-- 8 files changed, 32 insertions(+), 31 deletions(-) rename .github/workflows/setup/{nvcc11.sh => nvcc.sh} (84%) mode change 100755 => 100644 diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml index 5f621e798d..7f8802bc30 100644 --- a/.github/workflows/cuda.yml +++ b/.github/workflows/cuda.yml @@ -19,7 +19,7 @@ jobs: # https://github.com/ComputationalRadiationPhysics/picongpu/blob/0.5.0/share/picongpu/dockerfiles/ubuntu-1604/Dockerfile # https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/ build_nvcc: - name: NVCC 11.8 SP + name: NVCC 12.2 SP runs-on: ubuntu-22.04 if: github.event.pull_request.draft == false env: @@ -29,7 +29,7 @@ jobs: - name: install dependencies run: | - .github/workflows/setup/nvcc11.sh + .github/workflows/setup/nvcc.sh - name: CCache Cache uses: actions/cache@v4 @@ -52,7 +52,7 @@ jobs: cmake -S . -B build \ -DCMAKE_VERBOSE_MAKEFILE=ON \ - -DAMReX_CUDA_ARCH=6.0 \ + -DAMReX_CUDA_ARCH=8.0 \ -DHiPACE_COMPUTE=CUDA \ -DHiPACE_PRECISION=SINGLE \ -DAMReX_CUDA_ERROR_CROSS_EXECUTION_SPACE_CALL=ON \ diff --git a/.github/workflows/hip.yml b/.github/workflows/hip.yml index eddb1587fa..4e6fb418b7 100644 --- a/.github/workflows/hip.yml +++ b/.github/workflows/hip.yml @@ -52,7 +52,7 @@ jobs: cmake -S . -B build_sp \ -DCMAKE_VERBOSE_MAKEFILE=ON \ - -DAMReX_AMD_ARCH=gfx900 \ + -DAMReX_AMD_ARCH=gfx90a \ -DHiPACE_COMPUTE=HIP \ -DHiPACE_MPI=ON \ -DHiPACE_PRECISION=SINGLE diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 236f2d615e..54cd32fef4 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -16,8 +16,8 @@ on: - development jobs: - linux_gcc_cxx17_omp_ompi: - name: GNU@7.5 C++17 OMP OMPI + linux_gcc_cxx20_omp_ompi: + name: GNU C++20 OMP OMPI runs-on: ubuntu-latest env: {OMP_NUM_THREADS: 2} steps: @@ -41,15 +41,15 @@ jobs: pip install -U -e ./tools cmake -S . -B build \ -DHiPACE_COMPUTE=OMP \ - -DCMAKE_CXX_STANDARD=17 \ + -DCMAKE_CXX_STANDARD=20 \ -DCMAKE_INSTALL_PREFIX=/tmp/my-hipace \ -DCMAKE_VERBOSE_MAKEFILE=ON cmake --build build -j 2 - name: Run Tests run: ctest --test-dir build --output-on-failure - linux_gcc_cxx17: - name: GNU@7.5 C++17 Serial + linux_gcc_cxx20: + name: GNU C++20 Serial runs-on: ubuntu-latest env: {OMP_NUM_THREADS: 2} steps: diff --git a/.github/workflows/setup/nvcc11.sh b/.github/workflows/setup/nvcc.sh old mode 100755 new mode 100644 similarity index 84% rename from .github/workflows/setup/nvcc11.sh rename to .github/workflows/setup/nvcc.sh index a269cac5e4..7d14cd520b --- a/.github/workflows/setup/nvcc11.sh +++ b/.github/workflows/setup/nvcc.sh @@ -41,15 +41,15 @@ echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x8 sudo apt-get update sudo apt-get install -y \ - cuda-command-line-tools-11-8 \ - cuda-compiler-11-8 \ - cuda-cupti-dev-11-8 \ - cuda-minimal-build-11-8 \ - cuda-nvml-dev-11-8 \ - cuda-nvtx-11-8 \ - libcufft-dev-11-8 \ - libcurand-dev-11-8 -sudo ln -s cuda-11.8 /usr/local/cuda + cuda-command-line-tools-12-2 \ + cuda-compiler-12-2 \ + cuda-cupti-dev-12-2 \ + cuda-minimal-build-12-2 \ + cuda-nvml-dev-12-2 \ + cuda-nvtx-12-2 \ + libcufft-dev-12-2 \ + libcurand-dev-12-2 +sudo ln -s cuda-12.2 /usr/local/cuda # cmake-easyinstall # diff --git a/CMakeLists.txt b/CMakeLists.txt index 0d8e4c8277..c8f3cf799c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,9 +41,9 @@ endif() # C++ Standard in Superbuilds ################################################# # -# This is the easiest way to push up a C++17 requirement for AMReX, PICSAR and +# This is the easiest way to push up a C++20 requirement for AMReX and # openPMD-api until they increase their requirement. -set_cxx17_superbuild() +set_cxx20_superbuild() # CCache Support ############################################################## @@ -143,8 +143,8 @@ target_include_directories(HiPACE PRIVATE # add sources add_subdirectory(src) -# C++ properties: at least a C++17 capable compiler is needed -target_compile_features(HiPACE PUBLIC cxx_std_17) +# C++ properties: at least a C++20 capable compiler is needed +target_compile_features(HiPACE PUBLIC cxx_std_20) set_target_properties(HiPACE PROPERTIES CXX_EXTENSIONS OFF CXX_STANDARD_REQUIRED ON @@ -158,7 +158,7 @@ target_link_libraries(HiPACE PUBLIC HiPACE::thirdparty::FFT) # AMReX helper function: propagate CUDA specific target & source properties if(HiPACE_COMPUTE STREQUAL CUDA) setup_target_for_cuda_compilation(HiPACE) - target_compile_features(HiPACE PUBLIC cuda_std_17) + target_compile_features(HiPACE PUBLIC cuda_std_20) set_target_properties(HiPACE PROPERTIES CUDA_EXTENSIONS OFF CUDA_STANDARD_REQUIRED ON diff --git a/cmake/HiPACEFunctions.cmake b/cmake/HiPACEFunctions.cmake index 837f2867ba..e4f07dc998 100644 --- a/cmake/HiPACEFunctions.cmake +++ b/cmake/HiPACEFunctions.cmake @@ -1,11 +1,11 @@ -# Set C++17 for the whole build if not otherwise requested +# Set C++20 for the whole build if not otherwise requested # -# This is the easiest way to push up a C++17 requirement for AMReX, PICSAR and +# This is the easiest way to push up a C++20 requirement for AMReX and # openPMD-api until they increase their requirement. # -macro(set_cxx17_superbuild) +macro(set_cxx20_superbuild) if(NOT DEFINED CMAKE_CXX_STANDARD) - set(CMAKE_CXX_STANDARD 17) + set(CMAKE_CXX_STANDARD 20) endif() if(NOT DEFINED CMAKE_CXX_EXTENSIONS) set(CMAKE_CXX_EXTENSIONS OFF) @@ -15,7 +15,7 @@ macro(set_cxx17_superbuild) endif() if(NOT DEFINED CMAKE_CUDA_STANDARD) - set(CMAKE_CUDA_STANDARD 17) + set(CMAKE_CUDA_STANDARD 20) endif() if(NOT DEFINED CMAKE_CUDA_EXTENSIONS) set(CMAKE_CUDA_EXTENSIONS OFF) diff --git a/docs/source/building/building.rst b/docs/source/building/building.rst index df523f6e1f..87246aafac 100644 --- a/docs/source/building/building.rst +++ b/docs/source/building/building.rst @@ -30,7 +30,7 @@ Dependencies HiPACE++ depends on the following popular third party software. Please see installation instructions below in the Developers section. -- a mature `C++17 `__ compiler: e.g. GCC 7, Clang 7, NVCC 11.0, MSVC 19.15 or newer +- a mature `C++20 `__ compiler: e.g. GCC 11, Clang 14, NVCC 12.2, MSVC 19.34 or newer - `CMake 3.24.0+ `__ - `AMReX development `__: we automatically download and compile a copy of AMReX - `openPMD-api 0.17.0+ `__: we automatically download and compile a copy of openPMD-api diff --git a/src/utils/GPUUtil.H b/src/utils/GPUUtil.H index 593d7f9177..e3fb74eb5d 100644 --- a/src/utils/GPUUtil.H +++ b/src/utils/GPUUtil.H @@ -108,9 +108,10 @@ struct Array3 { int ncomp=0; #endif - template ,int> = 0> + template + requires std::is_convertible_v AMREX_GPU_HOST_DEVICE - Array3 (const Array3>& rhs) noexcept + Array3 (const Array3& rhs) noexcept : p(rhs.p), jstride(rhs.jstride), nstride(rhs.nstride), From 1060b0a16e9d47cc419e4964f16ebc487b3fdee2 Mon Sep 17 00:00:00 2001 From: Alexander Sinn Date: Wed, 22 Apr 2026 11:19:12 +0200 Subject: [PATCH 02/15] fix chmod --- .github/workflows/setup/nvcc.sh | 0 docs/source/building/building.rst | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) mode change 100644 => 100755 .github/workflows/setup/nvcc.sh diff --git a/.github/workflows/setup/nvcc.sh b/.github/workflows/setup/nvcc.sh old mode 100644 new mode 100755 diff --git a/docs/source/building/building.rst b/docs/source/building/building.rst index 87246aafac..2b6856829a 100644 --- a/docs/source/building/building.rst +++ b/docs/source/building/building.rst @@ -40,8 +40,8 @@ Please see installation instructions below in the Developers section. Platform-dependent, at least one of the following: -- `CUDA Toolkit 11.0+ `__: for NVIDIA GPU support (see `matching host-compilers `__) -- `ROCm 5.2+ `__: for AMD GPU support +- `CUDA Toolkit 12.2+ `__: for NVIDIA GPU support (see `matching host-compilers `__) +- `ROCm 6.0+ `__: for AMD GPU support - `FFTW3 `__: for CPUs (only used serially, but multi-threading supported; *not* needed for GPUs) Optional dependencies include: From 5f23319a1957fa11d2d566271522afc11ff9a6ce Mon Sep 17 00:00:00 2001 From: Alexander Sinn Date: Wed, 22 Apr 2026 11:32:25 +0200 Subject: [PATCH 03/15] update cuda download link --- .github/workflows/setup/nvcc.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/setup/nvcc.sh b/.github/workflows/setup/nvcc.sh index 7d14cd520b..8d8652f13e 100755 --- a/.github/workflows/setup/nvcc.sh +++ b/.github/workflows/setup/nvcc.sh @@ -35,8 +35,8 @@ sudo apt-get install -y \ pkg-config \ wget -sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub -echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 /" \ +sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub +echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 /" \ | sudo tee /etc/apt/sources.list.d/cuda.list sudo apt-get update From 5a0327485d4390d36e77e750b3f211b2e1883d8c Mon Sep 17 00:00:00 2001 From: Alexander Sinn Date: Wed, 22 Apr 2026 12:53:58 +0200 Subject: [PATCH 04/15] Run Tests on MacOS CI --- .github/workflows/macos.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 74ced37fa6..55182d02be 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -98,15 +98,15 @@ jobs: export CCACHE_SLOPPINESS=time_macros ccache -z - cmake -S . -B build_sp \ + cmake -S . -B build \ -DCMAKE_VERBOSE_MAKEFILE=ON \ -DHiPACE_openpmd_internal=OFF \ - -DHiPACE_PRECISION=SINGLE - cmake --build build_sp -j 2 + -DHiPACE_PRECISION=DOUBLE + cmake --build build -j 2 du -hs ~/Library/Caches/ccache ccache -s -# - name: test HiPACE++ -# run: | -# ctest --test-dir build_sp --output-on-failure + - name: test HiPACE++ + run: | + ctest --test-dir build --output-on-failure From 9233d6386cacb58b8a730da9beedcf60e1915ec8 Mon Sep 17 00:00:00 2001 From: Alexander Sinn Date: Wed, 22 Apr 2026 13:10:26 +0200 Subject: [PATCH 05/15] python packages --- .github/workflows/macos.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 55182d02be..7799430a6a 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -90,6 +90,10 @@ jobs: brew install openpmd-api brew link openpmd-api + python -m pip install --upgrade pip + python -m pip install --upgrade matplotlib numpy scipy openpmd-viewer openpmd-api + python -m pip install git+https://github.com/LASY-org/lasy.git@development + - name: build HiPACE++ run: | export CCACHE_COMPRESS=1 From bf6497f15764a3c2f66922145041e16f17bca127 Mon Sep 17 00:00:00 2001 From: Alexander Sinn Date: Wed, 22 Apr 2026 13:19:19 +0200 Subject: [PATCH 06/15] fix --- .github/workflows/macos.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 7799430a6a..40878472bb 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -90,7 +90,7 @@ jobs: brew install openpmd-api brew link openpmd-api - python -m pip install --upgrade pip + python -m pip install --upgrade pip setuptools wheel python -m pip install --upgrade matplotlib numpy scipy openpmd-viewer openpmd-api python -m pip install git+https://github.com/LASY-org/lasy.git@development From 48e00aa82602c994c75278d1525618481d1473ec Mon Sep 17 00:00:00 2001 From: Alexander Sinn Date: Wed, 22 Apr 2026 13:25:24 +0200 Subject: [PATCH 07/15] fix --- .github/workflows/macos.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 40878472bb..7bf533cabd 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -58,10 +58,10 @@ jobs: brew --cache set +e - rm -rf /usr/local/bin/2to3* - rm -rf /usr/local/bin/idle3* - rm -rf /usr/local/bin/pydoc3* - rm -rf /usr/local/bin/python3* + # rm -rf /usr/local/bin/2to3* + # rm -rf /usr/local/bin/idle3* + # rm -rf /usr/local/bin/pydoc3* + # rm -rf /usr/local/bin/python3* brew install fftw --only-dependencies --force brew install fftw From 5f665363aa98a9c899b7b72213f3a4a6ab840ca5 Mon Sep 17 00:00:00 2001 From: Alexander Sinn Date: Wed, 22 Apr 2026 13:33:59 +0200 Subject: [PATCH 08/15] fix --- .github/workflows/macos.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 7bf533cabd..07fa8142f5 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -53,6 +53,10 @@ jobs: restore-keys: | ccache-macos-appleclang-${{ hashFiles('.github/workflows/macos.yml') }}- ccache-macos-appleclang- + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' - name: install dependencies run: | brew --cache From 8adb7bbe564563b3f7193a69f79f14bfe1dcc5cd Mon Sep 17 00:00:00 2001 From: Alexander Sinn Date: Wed, 22 Apr 2026 13:56:38 +0200 Subject: [PATCH 09/15] fix bool conversion --- .github/workflows/macos.yml | 1 + src/fields/Fields.cpp | 16 +++++++++------- src/laser/MultiLaser.cpp | 6 +++--- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 07fa8142f5..49fdf7aad9 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -97,6 +97,7 @@ jobs: python -m pip install --upgrade pip setuptools wheel python -m pip install --upgrade matplotlib numpy scipy openpmd-viewer openpmd-api python -m pip install git+https://github.com/LASY-org/lasy.git@development + python -m pip install -U -e ./tools - name: build HiPACE++ run: | diff --git a/src/fields/Fields.cpp b/src/fields/Fields.cpp index 789975361d..6a2ca459fe 100644 --- a/src/fields/Fields.cpp +++ b/src/fields/Fields.cpp @@ -779,13 +779,15 @@ SetDirichletBoundaries (Array2 RHS, const amrex::Box& solver_size, [=] AMREX_GPU_DEVICE (int i, int j) noexcept { const bool i_is_changing = (i < box_len0); - const bool i_lo_edge = (!i_is_changing) && (!j); - const bool i_hi_edge = (!i_is_changing) && j; - const bool j_lo_edge = i_is_changing && (!j); - const bool j_hi_edge = i_is_changing && j; + const int i_is_changing_i = static_cast(i_is_changing); + const int i_not_changing_i = static_cast(!i_is_changing); + const int i_lo_edge = static_cast(!i_is_changing && (j == 0)); + const int i_hi_edge = static_cast(!i_is_changing && (j != 0)); + const int j_lo_edge = static_cast(i_is_changing && (j == 0)); + const int j_hi_edge = static_cast(i_is_changing && (j != 0)); - const int i_idx = box_lo0 + i_hi_edge*(box_len0-1) + i_is_changing*i; - const int j_idx = box_lo1 + j_hi_edge*(box_len1-1) + (!i_is_changing)*(i-box_len0); + const int i_idx = box_lo0 + i_hi_edge*(box_len0-1) + i_is_changing_i*i; + const int j_idx = box_lo1 + j_hi_edge*(box_len1-1) + i_not_changing_i*(i-box_len0); const amrex::Real i_idx_offset = i_idx + (- i_lo_edge + i_hi_edge) * offset; const amrex::Real j_idx_offset = j_idx + (- j_lo_edge + j_hi_edge) * offset; @@ -793,7 +795,7 @@ SetDirichletBoundaries (Array2 RHS, const amrex::Box& solver_size, const amrex::Real x = i_idx_offset * dx + offset0; const amrex::Real y = j_idx_offset * dy + offset1; - const amrex::Real dxdx = dx*dx*(!i_is_changing) + dy*dy*i_is_changing; + const amrex::Real dxdx = i_is_changing ? dy*dy : dx*dx; // atomic add because the corners of RHS get two values amrex::Gpu::Atomic::AddNoRet(&(RHS(i_idx, j_idx)), diff --git a/src/laser/MultiLaser.cpp b/src/laser/MultiLaser.cpp index f408a0138e..dbae789516 100644 --- a/src/laser/MultiLaser.cpp +++ b/src/laser/MultiLaser.cpp @@ -44,9 +44,9 @@ MultiLaser::ReadParameters () AMREX_ALWAYS_ASSERT(m_interp_order <= 3 && m_interp_order >= 0); bool mg_param_given = queryWithParser(pp, "MG_tolerance_rel", m_MG_tolerance_rel); - mg_param_given += queryWithParser(pp, "MG_tolerance_abs", m_MG_tolerance_abs); - mg_param_given += queryWithParser(pp, "MG_verbose", m_MG_verbose); - mg_param_given += queryWithParser(pp, "MG_average_rhs", m_MG_average_rhs); + mg_param_given = queryWithParser(pp, "MG_tolerance_abs", m_MG_tolerance_abs) || mg_param_given; + mg_param_given = queryWithParser(pp, "MG_verbose", m_MG_verbose) || mg_param_given; + mg_param_given = queryWithParser(pp, "MG_average_rhs", m_MG_average_rhs) || mg_param_given; // Raise warning if user specifies MG parameters without using the MG solver if (mg_param_given && (m_solver_type != "multigrid")) { From 09aad8d8a8892f8bc2da2ba0243882a2274de02a Mon Sep 17 00:00:00 2001 From: Alexander Sinn Date: Wed, 22 Apr 2026 14:09:58 +0200 Subject: [PATCH 10/15] add sanitizers --- .github/workflows/macos.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 49fdf7aad9..0680bd0e30 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -106,10 +106,13 @@ jobs: export CCACHE_MAXSIZE=100M export CCACHE_SLOPPINESS=time_macros ccache -z + export CXXFLAGS="-g -fsanitize=undefined,address -fno-sanitize-recover=all" cmake -S . -B build \ -DCMAKE_VERBOSE_MAKEFILE=ON \ -DHiPACE_openpmd_internal=OFF \ + -DAMReX_BOUND_CHECK=YES \ + -DAMReX_ASSERTIONS=YES \ -DHiPACE_PRECISION=DOUBLE cmake --build build -j 2 @@ -118,4 +121,6 @@ jobs: - name: test HiPACE++ run: | + export "ASAN_OPTIONS=detect_leaks=0" + export UBSAN_OPTIONS=print_stacktrace=1 ctest --test-dir build --output-on-failure From 0358756adf07cd8eb2f7ff9e60c5999ed9f3db42 Mon Sep 17 00:00:00 2001 From: Alexander Sinn Date: Wed, 22 Apr 2026 14:30:43 +0200 Subject: [PATCH 11/15] Remove SIMD from deposition --- .github/workflows/macos.yml | 5 ----- src/particles/deposition/DepositionUtil.H | 8 +++----- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 0680bd0e30..49fdf7aad9 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -106,13 +106,10 @@ jobs: export CCACHE_MAXSIZE=100M export CCACHE_SLOPPINESS=time_macros ccache -z - export CXXFLAGS="-g -fsanitize=undefined,address -fno-sanitize-recover=all" cmake -S . -B build \ -DCMAKE_VERBOSE_MAKEFILE=ON \ -DHiPACE_openpmd_internal=OFF \ - -DAMReX_BOUND_CHECK=YES \ - -DAMReX_ASSERTIONS=YES \ -DHiPACE_PRECISION=DOUBLE cmake --build build -j 2 @@ -121,6 +118,4 @@ jobs: - name: test HiPACE++ run: | - export "ASAN_OPTIONS=detect_leaks=0" - export UBSAN_OPTIONS=print_stacktrace=1 ctest --test-dir build --output-on-failure diff --git a/src/particles/deposition/DepositionUtil.H b/src/particles/deposition/DepositionUtil.H index bda9ae156f..4aac1861f4 100644 --- a/src/particles/deposition/DepositionUtil.H +++ b/src/particles/deposition/DepositionUtil.H @@ -240,10 +240,8 @@ SharedMemoryDeposition (int num_particles, const int tile_id = (itile_x * ntile_y + itile_y); -#ifdef AMREX_USE_OMP -#pragma omp simd -#endif // deposit charge / current of all particles in this tile + // cannot use SIMD here because particles might deposit into the same cell for (int ip = a_offsets[tile_id]; ip < a_offsets[tile_id+1]; ++ip) { do_deposit(a_indices[ip], ptd, field, idx_cache, idx_depos); } @@ -254,8 +252,8 @@ SharedMemoryDeposition (int num_particles, } #endif else { - // simple loop over particles, on CPU this only uses one thread - amrex::ParallelFor(num_particles, + // simple loop over particles, on CPU this only uses one thread and no SIMD + amrex::For(num_particles, [=] AMREX_GPU_DEVICE (int ip) { if (is_valid(ip, ptd)) { do_deposit(ip, ptd, field, idx_cache, idx_depos); From baf4f70a9062c520beb5401f5459d3a8f394f0ca Mon Sep 17 00:00:00 2001 From: Alexander Sinn Date: Wed, 22 Apr 2026 15:12:21 +0200 Subject: [PATCH 12/15] set OMP_NUM_THREADS to 2 and install hdf5 --- .github/workflows/macos.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 49fdf7aad9..eea10e5f03 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -17,6 +17,7 @@ jobs: # For macOS, Ninja is slower than the default: #CMAKE_GENERATOR: Ninja SETUPTOOLS_USE_DISTUTILS: stdlib + OMP_NUM_THREADS: 2 steps: - uses: actions/checkout@v4 - name: Brew Cache @@ -83,6 +84,8 @@ jobs: brew install open-mpi brew link open-mpi + brew install hdf5-mpi + brew install pkg-config set -e From 1bb71137b7f56c02e0e746c98ad29139b11978f9 Mon Sep 17 00:00:00 2001 From: Alexander Sinn Date: Wed, 22 Apr 2026 15:48:13 +0200 Subject: [PATCH 13/15] time laser STC test --- tests/laser_STC.1Rank.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/laser_STC.1Rank.sh b/tests/laser_STC.1Rank.sh index 5ef19c822e..3e06bd8e32 100644 --- a/tests/laser_STC.1Rank.sh +++ b/tests/laser_STC.1Rank.sh @@ -22,7 +22,11 @@ TEST_NAME="${FILE_NAME%.*}" rm -rf $TEST_NAME # Run the simulation +time \ mpiexec -n 1 $HIPACE_EXECUTABLE $HIPACE_EXAMPLE_DIR/inputs_STC \ hipace.file_prefix = $TEST_NAME # Compare the result with theory +time \ $HIPACE_EXAMPLE_DIR/analysis_laser_STC.py --output-dir=$TEST_NAME + +exit 1; From 785e0df10663ee2d2cc286b49b15134643ef1b75 Mon Sep 17 00:00:00 2001 From: Alexander Sinn Date: Wed, 22 Apr 2026 16:17:26 +0200 Subject: [PATCH 14/15] revert changes to CI --- .github/workflows/macos.yml | 48 ++++++++----------------------------- tests/laser_STC.1Rank.sh | 4 ---- 2 files changed, 10 insertions(+), 42 deletions(-) diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index eea10e5f03..93c90ea3e8 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -1,11 +1,8 @@ name: 🍏 macOS - on: [push, pull_request] - concurrency: group: ${{ github.ref }}-${{ github.head_ref }}-macos cancel-in-progress: true - jobs: build_appleclang: name: AppleClang @@ -17,7 +14,6 @@ jobs: # For macOS, Ninja is slower than the default: #CMAKE_GENERATOR: Ninja SETUPTOOLS_USE_DISTUTILS: stdlib - OMP_NUM_THREADS: 2 steps: - uses: actions/checkout@v4 - name: Brew Cache @@ -54,54 +50,33 @@ jobs: restore-keys: | ccache-macos-appleclang-${{ hashFiles('.github/workflows/macos.yml') }}- ccache-macos-appleclang- - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.11' - name: install dependencies run: | brew --cache set +e - - # rm -rf /usr/local/bin/2to3* - # rm -rf /usr/local/bin/idle3* - # rm -rf /usr/local/bin/pydoc3* - # rm -rf /usr/local/bin/python3* - + rm -rf /usr/local/bin/2to3* + rm -rf /usr/local/bin/idle3* + rm -rf /usr/local/bin/pydoc3* + rm -rf /usr/local/bin/python3* brew install fftw --only-dependencies --force brew install fftw brew link fftw - brew install libomp --only-dependencies --force brew install libomp brew link --force libomp - brew install ninja --only-dependencies --force brew install ninja brew link ninja - brew install open-mpi --only-dependencies --force brew install open-mpi brew link open-mpi - - brew install hdf5-mpi - brew install pkg-config - set -e - brew uninstall cmake - brew tap openpmd/openpmd brew install openpmd-api --only-dependencies --force brew install openpmd-api brew link openpmd-api - - python -m pip install --upgrade pip setuptools wheel - python -m pip install --upgrade matplotlib numpy scipy openpmd-viewer openpmd-api - python -m pip install git+https://github.com/LASY-org/lasy.git@development - python -m pip install -U -e ./tools - - name: build HiPACE++ run: | export CCACHE_COMPRESS=1 @@ -109,16 +84,13 @@ jobs: export CCACHE_MAXSIZE=100M export CCACHE_SLOPPINESS=time_macros ccache -z - - cmake -S . -B build \ + cmake -S . -B build_sp \ -DCMAKE_VERBOSE_MAKEFILE=ON \ -DHiPACE_openpmd_internal=OFF \ - -DHiPACE_PRECISION=DOUBLE - cmake --build build -j 2 - + -DHiPACE_PRECISION=SINGLE + cmake --build build_sp -j 2 du -hs ~/Library/Caches/ccache ccache -s - - - name: test HiPACE++ - run: | - ctest --test-dir build --output-on-failure +# - name: test HiPACE++ +# run: | +# ctest --test-dir build_sp --output-on-failure diff --git a/tests/laser_STC.1Rank.sh b/tests/laser_STC.1Rank.sh index 3e06bd8e32..5ef19c822e 100644 --- a/tests/laser_STC.1Rank.sh +++ b/tests/laser_STC.1Rank.sh @@ -22,11 +22,7 @@ TEST_NAME="${FILE_NAME%.*}" rm -rf $TEST_NAME # Run the simulation -time \ mpiexec -n 1 $HIPACE_EXECUTABLE $HIPACE_EXAMPLE_DIR/inputs_STC \ hipace.file_prefix = $TEST_NAME # Compare the result with theory -time \ $HIPACE_EXAMPLE_DIR/analysis_laser_STC.py --output-dir=$TEST_NAME - -exit 1; From 2384fa1d46ca5a43bbd06e5574d571ce3c27332d Mon Sep 17 00:00:00 2001 From: Alexander Sinn Date: Wed, 22 Apr 2026 16:20:11 +0200 Subject: [PATCH 15/15] add whitespace --- .github/workflows/macos.yml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 93c90ea3e8..74ced37fa6 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -1,8 +1,11 @@ name: 🍏 macOS + on: [push, pull_request] + concurrency: group: ${{ github.ref }}-${{ github.head_ref }}-macos cancel-in-progress: true + jobs: build_appleclang: name: AppleClang @@ -54,29 +57,39 @@ jobs: run: | brew --cache set +e + rm -rf /usr/local/bin/2to3* rm -rf /usr/local/bin/idle3* rm -rf /usr/local/bin/pydoc3* rm -rf /usr/local/bin/python3* + brew install fftw --only-dependencies --force brew install fftw brew link fftw + brew install libomp --only-dependencies --force brew install libomp brew link --force libomp + brew install ninja --only-dependencies --force brew install ninja brew link ninja + brew install open-mpi --only-dependencies --force brew install open-mpi brew link open-mpi + brew install pkg-config + set -e + brew uninstall cmake + brew tap openpmd/openpmd brew install openpmd-api --only-dependencies --force brew install openpmd-api brew link openpmd-api + - name: build HiPACE++ run: | export CCACHE_COMPRESS=1 @@ -84,13 +97,16 @@ jobs: export CCACHE_MAXSIZE=100M export CCACHE_SLOPPINESS=time_macros ccache -z + cmake -S . -B build_sp \ -DCMAKE_VERBOSE_MAKEFILE=ON \ -DHiPACE_openpmd_internal=OFF \ -DHiPACE_PRECISION=SINGLE cmake --build build_sp -j 2 + du -hs ~/Library/Caches/ccache ccache -s + # - name: test HiPACE++ # run: | # ctest --test-dir build_sp --output-on-failure