|
| 1 | +#!/usr/bin/env python |
| 2 | +"""Generate device code for the ``frozen_result`` kernel. |
| 3 | +
|
| 4 | +The kernel below is reconstructed inline from its loopy listing: a (10, 4) |
| 5 | +float64 array fill (``_ary = 0.0 + 1.0``) whose two axes have been split into |
| 6 | +group/local/chunk inames and tagged for an OpenCL-style execution. |
| 7 | +""" |
| 8 | + |
| 9 | +import numpy as np |
| 10 | + |
| 11 | +import loopy as lp |
| 12 | +import pyopencl as cl |
| 13 | + |
| 14 | + |
| 15 | +knl = lp.make_kernel( |
| 16 | + "{ [_ary_dim0_chunk, _ary_dim0_group, _ary_dim0_local_one," |
| 17 | + " _ary_dim1_chunk, _ary_dim1_local_zero] :" |
| 18 | + " _ary_dim0_chunk = 0 and _ary_dim1_chunk = 0 and _ary_dim0_group >= 0" |
| 19 | + " and 0 <= _ary_dim0_local_one <= 9 - 4*_ary_dim0_group" |
| 20 | + " and _ary_dim0_local_one <= 3" |
| 21 | + " and 0 <= _ary_dim1_local_zero <= 3 }", |
| 22 | + """ |
| 23 | + _ary[_ary_dim0_local_one + _ary_dim0_group*4 + _ary_dim0_chunk*64, |
| 24 | + _ary_dim1_local_zero + _ary_dim1_chunk*16] = 0.0 + 1.0 {id=_ary_store} |
| 25 | + """, |
| 26 | + [ |
| 27 | + lp.GlobalArg("_ary", dtype=np.float64, shape=(10, 4), dim_tags="N1,N0"), |
| 28 | + ], |
| 29 | + name="frozen_result", |
| 30 | + target=lp.PyOpenCLTarget(), |
| 31 | + lang_version=(2018, 2), |
| 32 | +) |
| 33 | + |
| 34 | +knl = lp.tag_inames(knl, { |
| 35 | + "_ary_dim0_group": "g.0", |
| 36 | + "_ary_dim0_local_one": "l.1", |
| 37 | + "_ary_dim1_local_zero": "l.0", |
| 38 | +}) |
| 39 | + |
| 40 | +print(knl) |
| 41 | +print() |
| 42 | +print(lp.generate_code_v2(knl).device_code()) |
| 43 | + |
| 44 | +# Execute the kernel. |
| 45 | +ctx = cl.create_some_context(interactive=False) |
| 46 | +queue = cl.CommandQueue(ctx) |
| 47 | + |
| 48 | +_evt, (out,) = knl(queue) |
| 49 | +print(out.get()) |
0 commit comments