@@ -54,22 +54,60 @@ DepositTemperature (PlasmaParticleContainer& plasma,
5454 // extract laser properties and boolean for the presence of the laser and for ionization
5555 const PhysConst pc = get_phys_const ();
5656 const int aabs = Hipace::m_use_laser ? Comps[WhichSlice::This][" aabs" ] : -1 ;
57- const bool can_ionize = plasma.m_can_ionize ;
58- const bool use_laser = Hipace::m_use_laser;
5957 const amrex::Real laser_norm = (plasma.m_charge /pc.q_e ) * (pc.m_e /plasma.m_mass )
6058 * (plasma.m_charge /pc.q_e ) * (pc.m_e /plasma.m_mass );
6159
6260 // Loop over particles
63- SharedMemoryDeposition<3 , 3 , true >(
64- int (pti.numParticles ()),
61+ amrex::AnyCTO (
62+ // use compile-time options
63+ amrex::TypeList<
64+ amrex::CompileTimeOptions<0 , 1 , 2 , 3 >, // depos_order
65+ amrex::CompileTimeOptions<false , true >, // can_ionize
66+ amrex::CompileTimeOptions<false , true > // use_laser
67+ >{}, {
68+ Hipace::m_temperature_depos_order,
69+ plasma.m_can_ionize ,
70+ Hipace::m_use_laser
71+ },
72+ // call deposition function
73+ // The three functions passed as arguments to this lambda
74+ // are defined below as the next arguments.
75+ [&](auto is_valid, auto get_start_cell, auto deposit){
76+ constexpr auto ctos = deposit.GetOptions ();
77+ constexpr int depos_order = ctos[0 ];
78+ constexpr int use_laser = ctos[2 ];
79+ constexpr int stencil_size = depos_order + 1 ;
80+
81+ if constexpr (use_laser) {
82+ SharedMemoryDeposition<stencil_size, stencil_size, true >(
83+ int (pti.numParticles ()), is_valid, get_start_cell, deposit, isl_fab.array (),
84+ isl_fab.box (), pti.GetParticleTile ().getParticleTileData (),
85+ amrex::GpuArray<int , 1 >{aabs},
86+ amrex::GpuArray<int , 7 >{w, ux, uy, uz, uxsq, uysq, uzsq});
87+ } else {
88+ SharedMemoryDeposition<stencil_size, stencil_size, true >(
89+ int (pti.numParticles ()), is_valid, get_start_cell, deposit, isl_fab.array (),
90+ isl_fab.box (), pti.GetParticleTile ().getParticleTileData (),
91+ amrex::GpuArray<int , 0 >{},
92+ amrex::GpuArray<int , 7 >{w, ux, uy, uz, uxsq, uysq, uzsq});
93+ }
94+ },
6595 // is_valid
6696 // return whether the particle is valid and should deposit
67- [=] AMREX_GPU_DEVICE (int ip, auto ptd)
97+ [=] AMREX_GPU_DEVICE (int ip, auto ptd,
98+ auto /* depos_order*/ ,
99+ auto /* can_ionize*/ ,
100+ auto /* use_laser*/ )
68101 {
69102 // only deposit on or below their according MR level
70103 return ptd.id (ip).is_valid () && (lev == 0 || ptd.cpu (ip) >= lev);
71104 },
72- [=] AMREX_GPU_DEVICE (int ip, auto ptd) -> amrex::IntVectND<2 >
105+ // get_start_cell
106+ // return the lowest cell index that the particle deposits into
107+ [=] AMREX_GPU_DEVICE (int ip, auto ptd,
108+ auto depos_order,
109+ auto /* can_ionize*/ ,
110+ auto /* use_laser*/ ) -> amrex::IntVectND<2 >
73111 {
74112 const amrex::Real xp = ptd.pos (0 , ip);
75113 const amrex::Real yp = ptd.pos (1 , ip);
@@ -78,17 +116,21 @@ DepositTemperature (PlasmaParticleContainer& plasma,
78116 const amrex::Real ymid = (yp - y_pos_offset) * dy_inv;
79117
80118 auto [shape_x, i] =
81- compute_single_shape_factor<false , 0 >(xmid, 0 );
119+ compute_single_shape_factor<false , depos_order >(xmid, 0 );
82120
83121 auto [shape_y, j] =
84- compute_single_shape_factor<false , 0 >(ymid, 0 );
122+ compute_single_shape_factor<false , depos_order >(ymid, 0 );
85123
86124 return {i-1 , j-1 };
87125 },
126+ // do_deposit
88127 // deposit of weight, momentum (ux, uy, uz) and their squares (uxsq, uysq, uzsq)
89128 [=] AMREX_GPU_DEVICE (int ip, auto ptd,
90- Array3<amrex::Real> arr,
91- auto cache_idx, auto depos_idx) noexcept
129+ Array3<amrex::Real> arr,
130+ auto cache_idx, auto depos_idx,
131+ auto depos_order,
132+ auto can_ionize,
133+ auto use_laser) noexcept
92134 {
93135 const amrex::Real xp = ptd.pos (0 , ip);
94136 const amrex::Real yp = ptd.pos (1 , ip);
@@ -101,7 +143,7 @@ DepositTemperature (PlasmaParticleContainer& plasma,
101143 ptd.idata (PlasmaIdx::ion_lev)[ip] * ptd.idata (PlasmaIdx::ion_lev)[ip];
102144 }
103145 doLaserGatherShapeN<2 >(xp, yp, Aabssqp, arr, cache_idx[0 ],
104- dx_inv, dy_inv, x_pos_offset, y_pos_offset);
146+ dx_inv, dy_inv, x_pos_offset, y_pos_offset);
105147 Aabssqp *= laser_norm_ion;
106148 }
107149
@@ -116,25 +158,24 @@ DepositTemperature (PlasmaParticleContainer& plasma,
116158 const amrex::Real xmid = (xp - x_pos_offset) * dx_inv;
117159 const amrex::Real ymid = (yp - y_pos_offset) * dy_inv;
118160
119- // --- Compute shape factors
120- // x direction
121- auto [shape_x, i] = compute_single_shape_factor<false , 0 >(xmid, 0 );
122- // y direction
123- auto [shape_y, j] = compute_single_shape_factor<false , 0 >(ymid, 0 );
124-
125- amrex::Gpu::Atomic::Add (arr.ptr (i, j, depos_idx[0 ]), wp);
126- amrex::Gpu::Atomic::Add (arr.ptr (i, j, depos_idx[1 ]), wp*uxp);
127- amrex::Gpu::Atomic::Add (arr.ptr (i, j, depos_idx[2 ]), wp*uyp);
128- amrex::Gpu::Atomic::Add (arr.ptr (i, j, depos_idx[3 ]), wp*uzp);
129- amrex::Gpu::Atomic::Add (arr.ptr (i, j, depos_idx[4 ]), wp*uxp*uxp);
130- amrex::Gpu::Atomic::Add (arr.ptr (i, j, depos_idx[5 ]), wp*uyp*uyp);
131- amrex::Gpu::Atomic::Add (arr.ptr (i, j, depos_idx[6 ]), wp*uzp*uzp);
132- },
133- isl_fab.array (),
134- isl_fab.box (), pti.GetParticleTile ().getParticleTileData (),
135- amrex::GpuArray<int , 1 >{aabs},
136- amrex::GpuArray<int , 7 >{w, ux, uy, uz, uxsq, uysq, uzsq}
137- );
161+ for (int iy=0 ; iy <= depos_order; ++iy) {
162+ for (int ix=0 ; ix <= depos_order; ++ix) {
163+ // --- Compute shape factors
164+ // x direction
165+ auto [shape_x, i] = compute_single_shape_factor<false , depos_order>(xmid, ix);
166+ // y direction
167+ auto [shape_y, j] = compute_single_shape_factor<false , depos_order>(ymid, iy);
168+
169+ amrex::Gpu::Atomic::Add (arr.ptr (i, j, depos_idx[0 ]), shape_x*shape_y*wp);
170+ amrex::Gpu::Atomic::Add (arr.ptr (i, j, depos_idx[1 ]), shape_x*shape_y*wp*uxp);
171+ amrex::Gpu::Atomic::Add (arr.ptr (i, j, depos_idx[2 ]), shape_x*shape_y*wp*uyp);
172+ amrex::Gpu::Atomic::Add (arr.ptr (i, j, depos_idx[3 ]), shape_x*shape_y*wp*uzp);
173+ amrex::Gpu::Atomic::Add (arr.ptr (i, j, depos_idx[4 ]), shape_x*shape_y*wp*uxp*uxp);
174+ amrex::Gpu::Atomic::Add (arr.ptr (i, j, depos_idx[5 ]), shape_x*shape_y*wp*uyp*uyp);
175+ amrex::Gpu::Atomic::Add (arr.ptr (i, j, depos_idx[6 ]), shape_x*shape_y*wp*uzp*uzp);
176+ }
177+ }
178+ });
138179 Array3<amrex::Real> field_arr = isl_fab.array ();
139180
140181 // Normalize the components of momentum (ux, uy, uz) and their squares (uxsq, uysq, uzsq)
0 commit comments