@@ -127,7 +127,7 @@ def default_name(self):
127127 def get_kernel (self , max_ntargets_in_one_box ):
128128 ncoeffs = len (self .expansion )
129129 loopy_args = self .get_loopy_args ()
130- max_work_items = min (32 , max (ncoeffs , max_ntargets_in_one_box ))
130+ max_work_items = min (256 , max (ncoeffs , max_ntargets_in_one_box ))
131131
132132 loopy_knl = lp .make_kernel (
133133 [
@@ -212,8 +212,8 @@ def get_optimized_kernel(self, max_ntargets_in_one_box):
212212 inner_knl , optimizations = self .get_cached_loopy_knl_and_optimizations ()
213213 knl = self .get_kernel (max_ntargets_in_one_box = max_ntargets_in_one_box )
214214 knl = lp .tag_inames (knl , {"itgt_box" : "g.0" })
215- knl = lp .split_iname (knl , "itgt_offset" , 32 , inner_tag = "l.0" )
216- knl = lp .split_iname (knl , "icoeff" , 32 , inner_tag = "l.0" )
215+ knl = lp .split_iname (knl , "itgt_offset" , 256 , inner_tag = "l.0" )
216+ knl = lp .split_iname (knl , "icoeff" , 256 , inner_tag = "l.0" )
217217 knl = lp .add_inames_to_insn (knl , "dummy" ,
218218 "id:fetch_init* or id:fetch_center or id:kernel_scaling" )
219219 knl = lp .add_inames_to_insn (knl , "itgt_box" , "id:kernel_scaling" )
@@ -278,7 +278,7 @@ def default_name(self):
278278 def get_kernel (self , max_ntargets_in_one_box ):
279279 ncoeffs = len (self .expansion )
280280 loopy_args = self .get_loopy_args ()
281- max_work_items = min (32 , max (ncoeffs , max_ntargets_in_one_box ))
281+ max_work_items = min (256 , max (ncoeffs , max_ntargets_in_one_box ))
282282
283283 loopy_knl = lp .make_kernel (
284284 [
@@ -385,8 +385,8 @@ def get_optimized_kernel(self, max_ntargets_in_one_box):
385385 knl = lp .tag_inames (knl , {"itgt_box" : "g.0" , "dummy" : "l.0" })
386386 knl = lp .unprivatize_temporaries_with_inames (knl ,
387387 "itgt_offset" , "result_temp" )
388- knl = lp .split_iname (knl , "itgt_offset" , 32 , inner_tag = "l.0" )
389- knl = lp .split_iname (knl , "icoeff" , 32 , inner_tag = "l.0" )
388+ knl = lp .split_iname (knl , "itgt_offset" , 256 , inner_tag = "l.0" )
389+ knl = lp .split_iname (knl , "icoeff" , 256 , inner_tag = "l.0" )
390390 knl = lp .privatize_temporaries_with_inames (knl ,
391391 "itgt_offset_outer" , "result_temp" )
392392 knl = lp .duplicate_inames (knl , "itgt_offset_outer" , "id:init_result" )
0 commit comments