Skip to content

Commit e7479ff

Browse files
isurufinducer
authored andcommitted
fp_contract(fast) for pocl CUDA
1 parent 88f619d commit e7479ff

2 files changed

Lines changed: 20 additions & 0 deletions

File tree

sumpy/codegen.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,20 @@ def register_bessel_callables(loopy_knl):
208208
Hankel1_01("hank1_01"))
209209
return loopy_knl
210210

211+
212+
def _fp_contract_fast_preamble(preamble_info):
213+
yield ("fp_contract_fast_pocl", "#pragma clang fp contract(fast)")
214+
215+
216+
def register_optimization_preambles(loopy_knl, device):
217+
if isinstance(loopy_knl.target, lp.PyOpenCLTarget):
218+
import pyopencl as cl
219+
if device.platform.name == "Portable Computing Language" and \
220+
(device.type & cl.device_type.GPU):
221+
loopy_knl = lp.register_preamble_generators(loopy_knl,
222+
[_fp_contract_fast_preamble])
223+
return loopy_knl
224+
211225
# }}}
212226

213227

sumpy/p2p.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,9 @@ def get_optimized_kernel(self, targets_is_obj_array, sources_is_obj_array):
190190
knl = lp.set_options(knl,
191191
enforce_variable_access_ordered="no_check")
192192

193+
from sumpy.codegen import register_optimization_preambles
194+
knl = register_optimization_preambles(knl, self.device)
195+
193196
return knl
194197

195198

@@ -714,6 +717,9 @@ def get_optimized_kernel(self, max_nsources_in_one_box,
714717
knl = lp.set_options(knl,
715718
enforce_variable_access_ordered="no_check")
716719

720+
from sumpy.codegen import register_optimization_preambles
721+
knl = register_optimization_preambles(knl, self.device)
722+
717723
return knl
718724

719725
def __call__(self, queue, **kwargs):

0 commit comments

Comments
 (0)