Skip to content

Commit c5e3ea3

Browse files
Don't use pragma omp simd in Deposition (#1381)
1 parent e9c19d7 commit c5e3ea3

3 files changed

Lines changed: 15 additions & 15 deletions

File tree

src/fields/Fields.cpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -779,21 +779,23 @@ SetDirichletBoundaries (Array2<amrex::Real> RHS, const amrex::Box& solver_size,
779779
[=] AMREX_GPU_DEVICE (int i, int j) noexcept
780780
{
781781
const bool i_is_changing = (i < box_len0);
782-
const bool i_lo_edge = (!i_is_changing) && (!j);
783-
const bool i_hi_edge = (!i_is_changing) && j;
784-
const bool j_lo_edge = i_is_changing && (!j);
785-
const bool j_hi_edge = i_is_changing && j;
782+
const int i_is_changing_i = static_cast<int>(i_is_changing);
783+
const int i_not_changing_i = static_cast<int>(!i_is_changing);
784+
const int i_lo_edge = static_cast<int>(!i_is_changing && (j == 0));
785+
const int i_hi_edge = static_cast<int>(!i_is_changing && (j != 0));
786+
const int j_lo_edge = static_cast<int>(i_is_changing && (j == 0));
787+
const int j_hi_edge = static_cast<int>(i_is_changing && (j != 0));
786788

787-
const int i_idx = box_lo0 + i_hi_edge*(box_len0-1) + i_is_changing*i;
788-
const int j_idx = box_lo1 + j_hi_edge*(box_len1-1) + (!i_is_changing)*(i-box_len0);
789+
const int i_idx = box_lo0 + i_hi_edge*(box_len0-1) + i_is_changing_i*i;
790+
const int j_idx = box_lo1 + j_hi_edge*(box_len1-1) + i_not_changing_i*(i-box_len0);
789791

790792
const amrex::Real i_idx_offset = i_idx + (- i_lo_edge + i_hi_edge) * offset;
791793
const amrex::Real j_idx_offset = j_idx + (- j_lo_edge + j_hi_edge) * offset;
792794

793795
const amrex::Real x = i_idx_offset * dx + offset0;
794796
const amrex::Real y = j_idx_offset * dy + offset1;
795797

796-
const amrex::Real dxdx = dx*dx*(!i_is_changing) + dy*dy*i_is_changing;
798+
const amrex::Real dxdx = i_is_changing ? dy*dy : dx*dx;
797799

798800
// atomic add because the corners of RHS get two values
799801
amrex::Gpu::Atomic::AddNoRet(&(RHS(i_idx, j_idx)),

src/laser/MultiLaser.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,9 @@ MultiLaser::ReadParameters ()
4444
AMREX_ALWAYS_ASSERT(m_interp_order <= 3 && m_interp_order >= 0);
4545

4646
bool mg_param_given = queryWithParser(pp, "MG_tolerance_rel", m_MG_tolerance_rel);
47-
mg_param_given += queryWithParser(pp, "MG_tolerance_abs", m_MG_tolerance_abs);
48-
mg_param_given += queryWithParser(pp, "MG_verbose", m_MG_verbose);
49-
mg_param_given += queryWithParser(pp, "MG_average_rhs", m_MG_average_rhs);
47+
mg_param_given = queryWithParser(pp, "MG_tolerance_abs", m_MG_tolerance_abs) || mg_param_given;
48+
mg_param_given = queryWithParser(pp, "MG_verbose", m_MG_verbose) || mg_param_given;
49+
mg_param_given = queryWithParser(pp, "MG_average_rhs", m_MG_average_rhs) || mg_param_given;
5050

5151
// Raise warning if user specifies MG parameters without using the MG solver
5252
if (mg_param_given && (m_solver_type != "multigrid")) {

src/particles/deposition/DepositionUtil.H

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -240,10 +240,8 @@ SharedMemoryDeposition (int num_particles,
240240

241241
const int tile_id = (itile_x * ntile_y + itile_y);
242242

243-
#ifdef AMREX_USE_OMP
244-
#pragma omp simd
245-
#endif
246243
// deposit charge / current of all particles in this tile
244+
// cannot use SIMD here because particles might deposit into the same cell
247245
for (int ip = a_offsets[tile_id]; ip < a_offsets[tile_id+1]; ++ip) {
248246
do_deposit(a_indices[ip], ptd, field, idx_cache, idx_depos);
249247
}
@@ -254,8 +252,8 @@ SharedMemoryDeposition (int num_particles,
254252
}
255253
#endif
256254
else {
257-
// simple loop over particles, on CPU this only uses one thread
258-
amrex::ParallelFor(num_particles,
255+
// simple loop over particles, on CPU this only uses one thread and no SIMD
256+
amrex::For(num_particles,
259257
[=] AMREX_GPU_DEVICE (int ip) {
260258
if (is_valid(ip, ptd)) {
261259
do_deposit(ip, ptd, field, idx_cache, idx_depos);

0 commit comments

Comments
 (0)