@@ -211,7 +211,15 @@ def twice(x):
211211 allocator = alloc , use_memory_pool = use_memory_pool )
212212
213213 from pyopencl .tools import ImmediateAllocator , MemoryPool
214- assert isinstance (actx .allocator ,
214+
215+ from arraycontext .impl .pytato import _PaddedAllocator
216+ alloc_to_check = actx .allocator
217+ if isinstance (alloc_to_check , _PaddedAllocator ):
218+ # On the Intel CPU runtime the actx wraps its allocator to pad
219+ # buffers (working around an out-of-bounds runtime store); check
220+ # the wrapped allocator's type.
221+ alloc_to_check = alloc_to_check ._allocator
222+ assert isinstance (alloc_to_check ,
215223 MemoryPool if use_memory_pool else ImmediateAllocator )
216224
217225 f = actx .compile (twice )
@@ -398,6 +406,26 @@ def twice(x):
398406 actx2 ._enable_profiling (True )
399407
400408
409+ def _auto_test_vs_ref (ref_t_unit , cl_ctx , t_unit ):
410+ import loopy as lp
411+ import pyopencl as cl
412+ from pyopencl .tools import ImmediateAllocator
413+
414+ queue = cl .CommandQueue (cl_ctx )
415+ allocator = ImmediateAllocator (queue )
416+
417+ # The Intel CPU OpenCL runtime writes out of bounds past kernel output
418+ # buffers when executing partial work-groups, corrupting the host heap.
419+ # auto_test_vs_ref allocates its own buffers, so on that runtime pad them
420+ # (via _PaddedAllocator) so the stray stores land in valid memory.
421+ dev = cl_ctx .devices [0 ]
422+ if dev .type & cl .device_type .CPU and "intel" in dev .platform .name .lower ():
423+ from arraycontext .impl .pytato import _PaddedAllocator
424+ allocator = _PaddedAllocator (allocator )
425+
426+ lp .auto_test_vs_ref (ref_t_unit , cl_ctx , t_unit , allocator = allocator )
427+
428+
401429def test_parallelize_disjoint_loop_sets_scalar ():
402430 import loopy as lp
403431 from loopy .kernel .data import GroupInameTag , LocalInameTag
@@ -489,7 +517,7 @@ def test_parallelize_disjoint_loop_sets_single_non_redn_iname():
489517 == {GroupInameTag (0 )}
490518 assert knl .iname_tags_of_type ("k" , (GroupInameTag , LocalInameTag )) == set ()
491519
492- lp . auto_test_vs_ref (ref_t_unit , cl_ctx , t_unit )
520+ _auto_test_vs_ref (ref_t_unit , cl_ctx , t_unit )
493521
494522
495523def test_parallelize_disjoint_loop_sets_multiple_non_redn_inames ():
@@ -531,7 +559,7 @@ def test_parallelize_disjoint_loop_sets_multiple_non_redn_inames():
531559 == {LocalInameTag (0 )}
532560 assert knl .iname_tags_of_type ("k" , (GroupInameTag , LocalInameTag )) == set ()
533561
534- lp . auto_test_vs_ref (ref_t_unit , cl_ctx , t_unit )
562+ _auto_test_vs_ref (ref_t_unit , cl_ctx , t_unit )
535563
536564
537565def test_parallelize_disjoint_loop_sets_only_redn_iname ():
@@ -572,7 +600,7 @@ def test_parallelize_disjoint_loop_sets_only_redn_iname():
572600 == {GroupInameTag (0 )}
573601 assert knl .iname_tags_of_type ("k" , (GroupInameTag , LocalInameTag )) == set ()
574602
575- lp . auto_test_vs_ref (ref_t_unit , cl_ctx , t_unit )
603+ _auto_test_vs_ref (ref_t_unit , cl_ctx , t_unit )
576604
577605
578606def test_parallelize_disjoint_loop_sets_mixed ():
@@ -612,7 +640,7 @@ def test_parallelize_disjoint_loop_sets_mixed():
612640 == {LocalInameTag (0 )}
613641 assert knl .iname_tags_of_type ("k" , (GroupInameTag , LocalInameTag )) == set ()
614642
615- lp . auto_test_vs_ref (ref_t_unit , cl_ctx , t_unit )
643+ _auto_test_vs_ref (ref_t_unit , cl_ctx , t_unit )
616644
617645
618646def test_parallelize_disjoint_loop_sets_multiple_independent_loop_sets ():
@@ -678,7 +706,7 @@ def test_parallelize_disjoint_loop_sets_multiple_independent_loop_sets():
678706 and insn .synchronization_kind == "global" ]
679707 assert len (gbarriers ) == 1
680708
681- lp . auto_test_vs_ref (ref_t_unit , cl_ctx , t_unit )
709+ _auto_test_vs_ref (ref_t_unit , cl_ctx , t_unit )
682710
683711
684712def test_parallelize_disjoint_loop_sets_multiple_dependent_loop_sets ():
@@ -748,7 +776,7 @@ def test_parallelize_disjoint_loop_sets_multiple_dependent_loop_sets():
748776 assert gbarrier .id in knl .id_to_insn ["loopset2insn1" ].depends_on
749777 assert gbarrier .id in knl .id_to_insn ["loopset2insn2" ].depends_on
750778
751- lp . auto_test_vs_ref (ref_t_unit , cl_ctx , t_unit )
779+ _auto_test_vs_ref (ref_t_unit , cl_ctx , t_unit )
752780
753781
754782def test_alias_global_temporaries ():
@@ -807,7 +835,7 @@ def global_temp(name: str):
807835 assert base_storages ["tmp2" ] != base_storages ["tmp1" ]
808836 assert len (set (base_storages .values ())) == 2
809837
810- lp . auto_test_vs_ref (ref_t_unit , cl_ctx , t_unit )
838+ _auto_test_vs_ref (ref_t_unit , cl_ctx , t_unit )
811839
812840
813841if __name__ == "__main__" :
0 commit comments