|
1 | 1 | // NOTE: Work in progress, will be refactored |
2 | 2 |
|
3 | | -extern crate ocl; |
4 | | -use ocl::{Buffer, MemFlags, ProQue}; |
| 3 | +use opencl3::command_queue::{CommandQueue, CL_QUEUE_PROFILING_ENABLE}; |
| 4 | +use opencl3::context::Context; |
| 5 | +use opencl3::device::{get_all_devices, Device, CL_DEVICE_TYPE_GPU}; |
| 6 | +use opencl3::kernel::{ExecuteKernel, Kernel}; |
| 7 | +use opencl3::memory::{ |
| 8 | + Buffer, CL_MAP_WRITE, CL_MEM_COPY_HOST_PTR, CL_MEM_READ_ONLY, CL_MEM_READ_WRITE, |
| 9 | + CL_MEM_WRITE_ONLY, |
| 10 | +}; |
| 11 | +use opencl3::program::{Program, CL_STD_2_0}; |
| 12 | +use opencl3::types::{ |
| 13 | + cl_double, cl_event, cl_float, cl_int, cl_long, CL_BLOCKING, CL_NON_BLOCKING, |
| 14 | +}; |
| 15 | + |
| 16 | +use std::ptr; |
5 | 17 |
|
6 | 18 | const KERNEL_SRC: &'static str = include_str!("kernel.cl"); |
7 | 19 |
|
8 | | -pub fn sum_two_ints32(arr_1: &[i32], arr_2: &[i32], result_vec: &mut Vec<i64>) { |
9 | | - let pro_que = ProQue::builder() |
10 | | - .src(KERNEL_SRC) |
11 | | - .dims(arr_1.len()) |
12 | | - .build() |
13 | | - .unwrap(); |
14 | | - |
15 | | - let buffer_1 = Buffer::builder() |
16 | | - .queue(pro_que.queue().clone()) |
17 | | - .flags(MemFlags::new().read_write()) |
18 | | - .len(arr_1.len()) |
19 | | - .copy_host_slice(&arr_1) |
20 | | - .build() |
21 | | - .unwrap(); |
22 | | - |
23 | | - let buffer_2 = Buffer::builder() |
24 | | - .queue(pro_que.queue().clone()) |
25 | | - .flags(MemFlags::new().read_write()) |
26 | | - .len(arr_1.len()) |
27 | | - .copy_host_slice(&arr_2) |
28 | | - .build() |
29 | | - .unwrap(); |
30 | | - |
31 | | - let result = pro_que.create_buffer::<i64>().unwrap(); |
32 | | - |
33 | | - let kernel = pro_que |
34 | | - .kernel_builder("add_i") |
35 | | - .arg(&buffer_1) |
36 | | - .arg(&buffer_2) |
37 | | - .arg(&result) |
38 | | - .build() |
39 | | - .unwrap(); |
40 | | - |
41 | | - unsafe { |
42 | | - kernel.enq().unwrap(); |
43 | | - } |
44 | | - |
45 | | - result.read(result_vec).enq().unwrap(); |
| 20 | +pub struct GPUKernelsDispatcher { |
| 21 | + context: Context, |
| 22 | + program: Program, |
| 23 | + queue: CommandQueue, |
46 | 24 | } |
47 | 25 |
|
48 | | -pub fn dot_float(arr_1: &[f32], arr_2: &[f32], result_vec: &mut Vec<f32>) { |
49 | | - let pro_que = ProQue::builder() |
50 | | - .src(KERNEL_SRC) |
51 | | - .dims(arr_1.len()) |
52 | | - .build() |
53 | | - .unwrap(); |
54 | | - |
55 | | - let buffer_1 = Buffer::builder() |
56 | | - .queue(pro_que.queue().clone()) |
57 | | - .flags(MemFlags::new().read_write()) |
58 | | - .len(arr_1.len()) |
59 | | - .copy_host_slice(&arr_1) |
60 | | - .build() |
61 | | - .unwrap(); |
62 | | - |
63 | | - let buffer_2 = Buffer::builder() |
64 | | - .queue(pro_que.queue().clone()) |
65 | | - .flags(MemFlags::new().read_write()) |
66 | | - .len(arr_1.len()) |
67 | | - .copy_host_slice(&arr_2) |
68 | | - .build() |
69 | | - .unwrap(); |
70 | | - |
71 | | - let result = pro_que.create_buffer::<f32>().unwrap(); |
72 | | - |
73 | | - let kernel = pro_que |
74 | | - .kernel_builder("dot_f") |
75 | | - .arg(&buffer_1) |
76 | | - .arg(&buffer_2) |
77 | | - .arg(&result) |
78 | | - .build() |
79 | | - .unwrap(); |
80 | | - |
81 | | - unsafe { |
82 | | - kernel.enq().unwrap(); |
| 26 | +impl GPUKernelsDispatcher { |
| 27 | + pub fn new() -> Self { |
| 28 | + let device_id: *mut std::ffi::c_void = *get_all_devices(CL_DEVICE_TYPE_GPU) |
| 29 | + .unwrap() |
| 30 | + .first() |
| 31 | + .expect("no device found in platform"); |
| 32 | + |
| 33 | + let device = Device::new(device_id); |
| 34 | + let context = Context::from_device(&device).expect("Context::from_device failed"); |
| 35 | + |
| 36 | + let program = Program::create_and_build_from_source(&context, KERNEL_SRC, CL_STD_2_0) |
| 37 | + .expect("Program::create_and_build_from_source failed"); |
| 38 | + |
| 39 | + let queue = |
| 40 | + CommandQueue::create_default_with_properties(&context, CL_QUEUE_PROFILING_ENABLE, 0) |
| 41 | + .expect("CommandQueue::create_default_with_properties failed"); |
| 42 | + |
| 43 | + Self { |
| 44 | + context, |
| 45 | + program, |
| 46 | + queue, |
| 47 | + } |
| 48 | + } |
| 49 | + |
| 50 | + pub fn sum_two_ints32(&self, arr_1: &[i32], arr_2: &[i32], result_vec: &mut Vec<i64>) { |
| 51 | + let kernel = Kernel::create(&self.program, "add_i").expect("Kernel::create failed"); |
| 52 | + |
| 53 | + let mut arr_1_buf = unsafe { |
| 54 | + Buffer::<cl_int>::create( |
| 55 | + &self.context, |
| 56 | + CL_MEM_READ_ONLY, |
| 57 | + arr_1.len(), |
| 58 | + ptr::null_mut(), |
| 59 | + ) |
| 60 | + .expect("allocation error") |
| 61 | + }; |
| 62 | + let mut arr_2_buf = unsafe { |
| 63 | + Buffer::<cl_int>::create( |
| 64 | + &self.context, |
| 65 | + CL_MEM_READ_ONLY, |
| 66 | + arr_2.len(), |
| 67 | + ptr::null_mut(), |
| 68 | + ) |
| 69 | + .expect("allocation error") |
| 70 | + }; |
| 71 | + let result_buf = unsafe { |
| 72 | + Buffer::<cl_long>::create( |
| 73 | + &self.context, |
| 74 | + CL_MEM_WRITE_ONLY, |
| 75 | + result_vec.len(), |
| 76 | + ptr::null_mut(), |
| 77 | + ) |
| 78 | + .expect("allocation error") |
| 79 | + }; |
| 80 | + |
| 81 | + let _arr_1_buf_write_event = unsafe { |
| 82 | + self.queue |
| 83 | + .enqueue_write_buffer(&mut arr_1_buf, CL_NON_BLOCKING, 0, &arr_1, &[]) |
| 84 | + .unwrap() |
| 85 | + }; |
| 86 | + let _arr_2_buf_write_event = unsafe { |
| 87 | + self.queue |
| 88 | + .enqueue_write_buffer(&mut arr_2_buf, CL_NON_BLOCKING, 0, &arr_2, &[]) |
| 89 | + .unwrap() |
| 90 | + }; |
| 91 | + |
| 92 | + let kernel_event = unsafe { |
| 93 | + ExecuteKernel::new(&kernel) |
| 94 | + .set_arg(&arr_1_buf) |
| 95 | + .set_arg(&arr_2_buf) |
| 96 | + .set_arg(&result_buf) |
| 97 | + .set_global_work_size(arr_1.len()) |
| 98 | + .set_wait_event(&_arr_1_buf_write_event) |
| 99 | + .set_wait_event(&_arr_2_buf_write_event) |
| 100 | + .enqueue_nd_range(&self.queue) |
| 101 | + .unwrap() |
| 102 | + }; |
| 103 | + |
| 104 | + let mut events: Vec<cl_event> = Vec::default(); |
| 105 | + events.push(kernel_event.get()); |
| 106 | + |
| 107 | + let read_event = unsafe { |
| 108 | + self.queue |
| 109 | + .enqueue_read_buffer(&result_buf, CL_NON_BLOCKING, 0, result_vec, &events) |
| 110 | + .unwrap() |
| 111 | + }; |
| 112 | + |
| 113 | + read_event.wait().unwrap(); |
83 | 114 | } |
84 | 115 |
|
85 | | - result.read(result_vec).enq().unwrap(); |
| 116 | + pub fn dot_floats32(&self, arr_1: &[f32], arr_2: &[f32]) -> f32 { |
| 117 | + let kernel = Kernel::create(&self.program, "dot_f").expect("Kernel::create failed"); |
| 118 | + |
| 119 | + let mut arr_1_buf = unsafe { |
| 120 | + Buffer::<cl_float>::create( |
| 121 | + &self.context, |
| 122 | + CL_MEM_READ_ONLY, |
| 123 | + arr_1.len(), |
| 124 | + ptr::null_mut(), |
| 125 | + ) |
| 126 | + .expect("opencl: allocation error") |
| 127 | + }; |
| 128 | + |
| 129 | + let mut arr_2_buf = unsafe { |
| 130 | + Buffer::<cl_float>::create( |
| 131 | + &self.context, |
| 132 | + CL_MEM_READ_ONLY, |
| 133 | + arr_2.len(), |
| 134 | + ptr::null_mut(), |
| 135 | + ) |
| 136 | + .expect("opencl: allocation error") |
| 137 | + }; |
| 138 | + |
| 139 | + let local_size = 64; |
| 140 | + let group_count = (arr_1.len() + local_size - 1) / local_size; |
| 141 | + |
| 142 | + let partial_buf = unsafe { |
| 143 | + Buffer::<cl_float>::create( |
| 144 | + &self.context, |
| 145 | + CL_MEM_WRITE_ONLY, |
| 146 | + group_count, |
| 147 | + ptr::null_mut(), |
| 148 | + ) |
| 149 | + .unwrap() |
| 150 | + }; |
| 151 | + |
| 152 | + let _arr_1_buf_write_event = unsafe { |
| 153 | + self.queue |
| 154 | + .enqueue_write_buffer(&mut arr_1_buf, CL_NON_BLOCKING, 0, &arr_1, &[]) |
| 155 | + .unwrap() |
| 156 | + }; |
| 157 | + let _arr_2_buf_write_event = unsafe { |
| 158 | + self.queue |
| 159 | + .enqueue_write_buffer(&mut arr_2_buf, CL_NON_BLOCKING, 0, &arr_2, &[]) |
| 160 | + .unwrap() |
| 161 | + }; |
| 162 | + |
| 163 | + let kernel_event = unsafe { |
| 164 | + ExecuteKernel::new(&kernel) |
| 165 | + .set_arg(&arr_1_buf) |
| 166 | + .set_arg(&arr_2_buf) |
| 167 | + .set_arg(&partial_buf) |
| 168 | + .set_global_work_size(arr_1.len()) |
| 169 | + .set_local_work_size(local_size) |
| 170 | + .set_wait_event(&_arr_1_buf_write_event) |
| 171 | + .set_wait_event(&_arr_2_buf_write_event) |
| 172 | + .enqueue_nd_range(&self.queue) |
| 173 | + .unwrap() |
| 174 | + }; |
| 175 | + |
| 176 | + let mut events: Vec<cl_event> = Vec::default(); |
| 177 | + events.push(kernel_event.get()); |
| 178 | + |
| 179 | + let mut partial_results = vec![0.0f32; group_count]; |
| 180 | + let read_event = unsafe { |
| 181 | + self.queue |
| 182 | + .enqueue_read_buffer(&partial_buf, CL_BLOCKING, 0, &mut partial_results, &[]) |
| 183 | + .unwrap() |
| 184 | + }; |
| 185 | + |
| 186 | + let result: f32 = partial_results.iter().sum(); |
| 187 | + read_event.wait().unwrap(); |
| 188 | + |
| 189 | + result |
| 190 | + } |
86 | 191 | } |
0 commit comments