Skip to content

Commit b969e02

Browse files
committed
Fix E2PFromCSR
1 parent 3cb9c51 commit b969e02

1 file changed

Lines changed: 10 additions & 4 deletions

File tree

sumpy/e2p.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -330,10 +330,15 @@ def get_kernel(self, max_ntargets_in_one_box):
330330
if=run_itgt}
331331
end
332332
end
333-
result[iknl, itgt] = result[iknl, itgt] + result_temp[ \
333+
for itgt_offset
334+
<> itgt2 = itgt_start + itgt_offset {id=init_itgt_for_write}
335+
<> run_itgt2 = itgt_start + itgt_offset < itgt_end \
336+
{id=init_cond_for_write}
337+
result[iknl, itgt2] = result[iknl, itgt2] + result_temp[ \
334338
itgt_offset, iknl] * kernel_scaling \
335-
{dep=update_result:init_result,id=write_result, \
336-
dup=iknl,if=run_itgt}
339+
{dep=update_result:init_result,id=write_result, \
340+
dup=iknl,if=run_itgt2}
341+
end
337342
end
338343
"""],
339344
[
@@ -386,7 +391,8 @@ def get_optimized_kernel(self, max_ntargets_in_one_box):
386391
knl = lp.privatize_temporaries_with_inames(knl,
387392
"itgt_offset_outer", "result_temp")
388393
knl = lp.duplicate_inames(knl, "itgt_offset_outer", "id:init_result")
389-
knl = lp.duplicate_inames(knl, "itgt_offset_outer", "id:write_result")
394+
knl = lp.duplicate_inames(knl, "itgt_offset_outer",
395+
"id:write_result or id:init_itgt_for_write or id:init_cond_for_write")
390396
knl = lp.add_inames_to_insn(knl, "dummy",
391397
"id:init_box* or id:fetch_src_box or id:fetch_center "
392398
"or id:kernel_scaling")

0 commit comments

Comments
 (0)