@@ -257,9 +257,14 @@ def l2p_loopy_volume_taylor(expansion, kernels):
257257 c = max_mi [axis_permutation [0 ]]
258258 v = [pymbolic .var (f"x{ i } " ) for i in range (dim )]
259259 v [axis_permutation [0 ]], v [0 ] = v [0 ], v [axis_permutation [0 ]]
260+ x0 = v [0 ]
260261
261262 def get_domains (v , iorder ):
262- domains = [f"{{ [{ v [0 ]} ]: 0<={ v [0 ]} <=order }}" ]
263+ domains = [f"{{ [{ x0 } _outer]: 0<={ x0 } _outer<={ order // c } }}" ]
264+ expr = f"{ v [0 ]} _inner + { c } *{ x0 } _outer"
265+ domains += [f"{{ [{ v [0 ]} _inner]: 0<={ expr } <=order "
266+ f"and 0<={ v [0 ]} _inner<{ c } }}" ]
267+ domains += [f"{{ [{ v [0 ]} ]: { expr } <={ v [0 ]} <={ expr } }}" ]
263268 domains += [f"{{ [{ iorder } ]: { v [0 ]} <={ iorder } <=order }}" ]
264269 upper_bound = f"{ iorder } -{ v [0 ]} "
265270 for i in range (dim - 1 , 1 , - 1 ):
@@ -286,25 +291,20 @@ def get_idx(v):
286291 continue
287292 mi_sym = [max_mi_sym [i ] + deriv_id .mi [i ] for i in range (dim )]
288293 mi_sym [0 ] = mi_sym [0 ] % c
289- expr += (coeffs_copy [(v [0 ]// c + 1 ) % 2 , wrangler . get_storage_index ( mi_sym )]
290- * (pde_coeff * scale ))
294+ expr += (coeffs_copy [(v [0 ]// c + 1 ) % 2 ,
295+ wrangler . get_storage_index ( mi_sym )] * (pde_coeff * scale ))
291296
292- domains += ["{[dummy]: 0<=dummy<1}" ]
293- insns2 = []
294297 insns .append (lp .Assignment (
295- assignee = coeffs_copy [(v [0 ]// c )% 2 , idx ],
298+ assignee = coeffs_copy [(v [0 ]// c ) % 2 , idx ],
296299 expression = expr ,
297300 id = "update_coeffs" ,
298301 depends_on = frozenset (["copy_coeffs" ]),
299302 depends_on_is_final = True ,
300- #within_inames=frozenset(["dummy"]),
301303 predicates = frozenset ([prim .Comparison (v [0 ], ">=" , c )]),
302304 ))
303305
304- x0 = v [0 ]
305306 v = [pymbolic .var (f"y{ i } " ) for i in range (dim )]
306307 v [axis_permutation [0 ]], v [0 ] = v [0 ], v [axis_permutation [0 ]]
307- v [0 ] = x0
308308 iorder = pymbolic .var ("iorder2" )
309309 idx = get_idx (v )
310310 domains += get_domains (v , iorder )[1 :]
@@ -318,7 +318,7 @@ def get_idx(v):
318318 insn = lp .Assignment (
319319 assignee = result [ikernel ],
320320 expression = (result [ikernel ]
321- + coeffs_copy [(v [0 ]// c )% 2 , idx ] * expr ),
321+ + coeffs_copy [(v [0 ]// c ) % 2 , idx ] * expr ),
322322 id = f"write_{ ikernel } " ,
323323 depends_on = frozenset (["update_monomials" , "update_coeffs" ]),
324324 depends_on_is_final = True ,
@@ -328,8 +328,10 @@ def get_idx(v):
328328 lambda knl : lp .tag_inames (knl , {
329329 #"e2p_dummy": "l.0",
330330 "e2p_iorder1" : "l.0" ,
331- f"e2p_{ v [0 ]} " : "unr" ,
332- #"e2p_iorder2": "unr",
331+ f"e2p_{ x0 } _outer" : "unr" ,
332+ f"e2p_{ x0 } _inner" : "unr" ,
333+ f"e2p_{ v [0 ]} _inner" : "unr" ,
334+ "e2p_iorder2" : "unr" ,
333335 }),
334336 lambda knl : lp .set_temporary_address_space (knl , "e2p_coeffs_copy" ,
335337 lp .AddressSpace .LOCAL ),
0 commit comments