File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -143,6 +143,16 @@ jobs:
143143 sudo apt-get update && sudo apt-get install -y valgrind
144144 export PYTHONMALLOC=malloc
145145 export NO_COLOR=1
146+
147+ # Test whether disabling the Intel CPU runtime's SIMD vectorizer
148+ # avoids the out-of-bounds tail-lane store at its source. The
149+ # over-provisioned, bounds-guard-masked work-items only have
150+ # their stores leak through the vectorized code path; running
151+ # scalar should honor the guard. If valgrind reports no "Invalid
152+ # write" here (with no padding), this is a clean global fix.
153+ # (CL_CONFIG_CPU_VECTORIZER_MODE=1 had no effect, so use the
154+ # primary CL_CONFIG_USE_VECTORIZER switch instead.)
155+ export CL_CONFIG_USE_VECTORIZER=False
146156 valgrind \
147157 --smc-check=all-non-file \
148158 --leak-check=no --errors-for-leak-kinds=none \
Original file line number Diff line number Diff line change 4141print ()
4242print (lp .generate_code_v2 (knl ).device_code ())
4343
44- # Execute the kernel. Allocate the output through the array context's padding
45- # allocator, which over-allocates buffers to absorb the Intel CPU runtime's
46- # out-of-bounds tail-lane stores. Under valgrind this should turn the previous
47- # "Invalid write ... 0 bytes after a block" into a write that lands inside the
48- # (padded) block.
49- from pyopencl .tools import ImmediateAllocator
50-
51- from arraycontext .impl .pytato import _PaddedAllocator
52-
53-
44+ # Execute the kernel.
5445ctx = cl .create_some_context (interactive = False )
5546queue = cl .CommandQueue (ctx )
56- allocator = _PaddedAllocator (ImmediateAllocator (queue ))
5747
58- _evt , (out ,) = knl (queue , allocator = allocator )
48+ _evt , (out ,) = knl (queue )
5949print (out .get ())
You can’t perform that action at this time.
0 commit comments