Skip to content

Commit 0489266

Browse files
committed
fix: opencl.lib not found issue, by changing library that works dynamically
1 parent 85fe407 commit 0489266

11 files changed

Lines changed: 313 additions & 104 deletions

File tree

.github/workflows/release.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ jobs:
3030
- name: Installing OpenCL
3131
run: |
3232
sudo apt update
33-
sudo apt install ocl-icd-opencl-dev
33+
sudo apt install ocl-icd-opencl-dev -y
3434
3535
- uses: actions/checkout@v4
3636
- name: Testing Rust modules
@@ -150,7 +150,7 @@ jobs:
150150
name: Release
151151
runs-on: ubuntu-latest
152152
if: ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch' }}
153-
needs: [sdist]
153+
needs: [test, sdist]
154154
permissions:
155155
id-token: write
156156
contents: write
@@ -159,7 +159,7 @@ jobs:
159159
- name: Installing OpenCL
160160
run: |
161161
sudo apt update
162-
sudo apt install ocl-icd-opencl-dev
162+
sudo apt install ocl-icd-opencl-dev -y
163163
164164
- uses: actions/download-artifact@v4
165165
- name: Generate artifact attestation

Cargo.toml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "rem_math"
3-
version = "0.2.7"
3+
version = "0.2.8"
44
edition = "2021"
55

66
[lib]
@@ -19,7 +19,10 @@ crate-type = ["cdylib", "rlib"]
1919
numpy = "0.25.0"
2020
pyo3 = { version = "0.25.1", features = ["extension-module"] }
2121
rayon = "1.10.0"
22-
ocl = "0.19"
22+
23+
[dependencies.opencl3]
24+
version = "0.11"
25+
features = ["CL_VERSION_2_1", "CL_VERSION_2_2", "CL_VERSION_3_0"]
2326

2427
[dev-dependencies]
2528
criterion = "0.3"

build.rs

Lines changed: 0 additions & 14 deletions
This file was deleted.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "rem_math"
3-
version = "0.2.7"
3+
version = "0.2.8"
44
description = ""
55
authors = [
66
{name = "WrldEngine",email = "kamran_pulatov@outlook.com"}

python.Dockerfile

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# syntax=docker/dockerfile:1
2+
3+
FROM python:3.12
4+
5+
WORKDIR /sandbox
6+
7+
# OpenCLのライブラリインストールをします
8+
RUN apt update
9+
RUN apt install ocl-icd-opencl-dev -y
10+
11+
RUN --mount=type=cache,target=/root/.cache/pip \
12+
pip install rem-math
13+
14+
ARG UID=10001
15+
RUN adduser \
16+
--disabled-password \
17+
--gecos "" \
18+
--home "/nonexistent" \
19+
--shell "/sbin/nologin" \
20+
--no-create-home \
21+
--uid "${UID}" \
22+
appuser
23+
USER appuser
24+
25+
ENTRYPOINT ["/bin/bash", "-c"]

rem_math/_rem_math.pyi

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,7 @@ def multiply_two_nparr_ints32(
1515
arr_1: List[int] | NDArray, arr_2: List[int] | NDArray, method: str
1616
) -> List: ...
1717
def multiply_two_ints32(arr_1: List[int], arr_2: List[int], method: str) -> List: ...
18+
def dot_two_nparr_floats32(
19+
arr_1: List[float] | NDArray, arr_2: List[float] | NDArray, method: str
20+
) -> float: ...
21+
def dot_two_floats32(arr_1: List[float], arr_2: List[float], method: str) -> float: ...

Dockerfile renamed to rust.Dockerfile

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,17 @@ RUN apt-get update -y && \
1515
apt-get install -y pkg-config make g++ libssl-dev && \
1616
rustup target add x86_64-unknown-linux-gnu
1717

18+
# OpenCLのライブラリインストールをします
19+
RUN apt update
20+
RUN apt install ocl-icd-opencl-dev -y
21+
1822
RUN --mount=type=bind,source=src,target=src \
1923
--mount=type=bind,source=Cargo.toml,target=Cargo.toml \
2024
--mount=type=bind,source=Cargo.lock,target=Cargo.lock \
2125
--mount=type=cache,target=/$APP_WORKDIR/target/ \
2226
--mount=type=cache,target=/usr/local/cargo/registry/ \
2327
RUSTFLAGS="-Z threads=8" cargo +nightly build --release --locked
2428

25-
WORKDIR ${APP_WORKDIR}
26-
2729
ARG UID=10001
2830
RUN adduser \
2931
--disabled-password \
@@ -35,5 +37,4 @@ RUN adduser \
3537
appuser
3638
USER appuser
3739

38-
COPY --from=build ${APP_WORKDIR} ${APP_WORKDIR}
3940
ENTRYPOINT ["/bin/bash", "-c"]

src/gpu.rs

Lines changed: 181 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -1,86 +1,191 @@
11
// NOTE: Work in progress, will be refactored
22

3-
extern crate ocl;
4-
use ocl::{Buffer, MemFlags, ProQue};
3+
use opencl3::command_queue::{CommandQueue, CL_QUEUE_PROFILING_ENABLE};
4+
use opencl3::context::Context;
5+
use opencl3::device::{get_all_devices, Device, CL_DEVICE_TYPE_GPU};
6+
use opencl3::kernel::{ExecuteKernel, Kernel};
7+
use opencl3::memory::{
8+
Buffer, CL_MAP_WRITE, CL_MEM_COPY_HOST_PTR, CL_MEM_READ_ONLY, CL_MEM_READ_WRITE,
9+
CL_MEM_WRITE_ONLY,
10+
};
11+
use opencl3::program::{Program, CL_STD_2_0};
12+
use opencl3::types::{
13+
cl_double, cl_event, cl_float, cl_int, cl_long, CL_BLOCKING, CL_NON_BLOCKING,
14+
};
15+
16+
use std::ptr;
517

618
const KERNEL_SRC: &'static str = include_str!("kernel.cl");
719

8-
pub fn sum_two_ints32(arr_1: &[i32], arr_2: &[i32], result_vec: &mut Vec<i64>) {
9-
let pro_que = ProQue::builder()
10-
.src(KERNEL_SRC)
11-
.dims(arr_1.len())
12-
.build()
13-
.unwrap();
14-
15-
let buffer_1 = Buffer::builder()
16-
.queue(pro_que.queue().clone())
17-
.flags(MemFlags::new().read_write())
18-
.len(arr_1.len())
19-
.copy_host_slice(&arr_1)
20-
.build()
21-
.unwrap();
22-
23-
let buffer_2 = Buffer::builder()
24-
.queue(pro_que.queue().clone())
25-
.flags(MemFlags::new().read_write())
26-
.len(arr_1.len())
27-
.copy_host_slice(&arr_2)
28-
.build()
29-
.unwrap();
30-
31-
let result = pro_que.create_buffer::<i64>().unwrap();
32-
33-
let kernel = pro_que
34-
.kernel_builder("add_i")
35-
.arg(&buffer_1)
36-
.arg(&buffer_2)
37-
.arg(&result)
38-
.build()
39-
.unwrap();
40-
41-
unsafe {
42-
kernel.enq().unwrap();
43-
}
44-
45-
result.read(result_vec).enq().unwrap();
20+
pub struct GPUKernelsDispatcher {
21+
context: Context,
22+
program: Program,
23+
queue: CommandQueue,
4624
}
4725

48-
pub fn dot_float(arr_1: &[f32], arr_2: &[f32], result_vec: &mut Vec<f32>) {
49-
let pro_que = ProQue::builder()
50-
.src(KERNEL_SRC)
51-
.dims(arr_1.len())
52-
.build()
53-
.unwrap();
54-
55-
let buffer_1 = Buffer::builder()
56-
.queue(pro_que.queue().clone())
57-
.flags(MemFlags::new().read_write())
58-
.len(arr_1.len())
59-
.copy_host_slice(&arr_1)
60-
.build()
61-
.unwrap();
62-
63-
let buffer_2 = Buffer::builder()
64-
.queue(pro_que.queue().clone())
65-
.flags(MemFlags::new().read_write())
66-
.len(arr_1.len())
67-
.copy_host_slice(&arr_2)
68-
.build()
69-
.unwrap();
70-
71-
let result = pro_que.create_buffer::<f32>().unwrap();
72-
73-
let kernel = pro_que
74-
.kernel_builder("dot_f")
75-
.arg(&buffer_1)
76-
.arg(&buffer_2)
77-
.arg(&result)
78-
.build()
79-
.unwrap();
80-
81-
unsafe {
82-
kernel.enq().unwrap();
26+
impl GPUKernelsDispatcher {
27+
pub fn new() -> Self {
28+
let device_id: *mut std::ffi::c_void = *get_all_devices(CL_DEVICE_TYPE_GPU)
29+
.unwrap()
30+
.first()
31+
.expect("no device found in platform");
32+
33+
let device = Device::new(device_id);
34+
let context = Context::from_device(&device).expect("Context::from_device failed");
35+
36+
let program = Program::create_and_build_from_source(&context, KERNEL_SRC, CL_STD_2_0)
37+
.expect("Program::create_and_build_from_source failed");
38+
39+
let queue =
40+
CommandQueue::create_default_with_properties(&context, CL_QUEUE_PROFILING_ENABLE, 0)
41+
.expect("CommandQueue::create_default_with_properties failed");
42+
43+
Self {
44+
context,
45+
program,
46+
queue,
47+
}
48+
}
49+
50+
pub fn sum_two_ints32(&self, arr_1: &[i32], arr_2: &[i32], result_vec: &mut Vec<i64>) {
51+
let kernel = Kernel::create(&self.program, "add_i").expect("Kernel::create failed");
52+
53+
let mut arr_1_buf = unsafe {
54+
Buffer::<cl_int>::create(
55+
&self.context,
56+
CL_MEM_READ_ONLY,
57+
arr_1.len(),
58+
ptr::null_mut(),
59+
)
60+
.expect("allocation error")
61+
};
62+
let mut arr_2_buf = unsafe {
63+
Buffer::<cl_int>::create(
64+
&self.context,
65+
CL_MEM_READ_ONLY,
66+
arr_2.len(),
67+
ptr::null_mut(),
68+
)
69+
.expect("allocation error")
70+
};
71+
let result_buf = unsafe {
72+
Buffer::<cl_long>::create(
73+
&self.context,
74+
CL_MEM_WRITE_ONLY,
75+
result_vec.len(),
76+
ptr::null_mut(),
77+
)
78+
.expect("allocation error")
79+
};
80+
81+
let _arr_1_buf_write_event = unsafe {
82+
self.queue
83+
.enqueue_write_buffer(&mut arr_1_buf, CL_NON_BLOCKING, 0, &arr_1, &[])
84+
.unwrap()
85+
};
86+
let _arr_2_buf_write_event = unsafe {
87+
self.queue
88+
.enqueue_write_buffer(&mut arr_2_buf, CL_NON_BLOCKING, 0, &arr_2, &[])
89+
.unwrap()
90+
};
91+
92+
let kernel_event = unsafe {
93+
ExecuteKernel::new(&kernel)
94+
.set_arg(&arr_1_buf)
95+
.set_arg(&arr_2_buf)
96+
.set_arg(&result_buf)
97+
.set_global_work_size(arr_1.len())
98+
.set_wait_event(&_arr_1_buf_write_event)
99+
.set_wait_event(&_arr_2_buf_write_event)
100+
.enqueue_nd_range(&self.queue)
101+
.unwrap()
102+
};
103+
104+
let mut events: Vec<cl_event> = Vec::default();
105+
events.push(kernel_event.get());
106+
107+
let read_event = unsafe {
108+
self.queue
109+
.enqueue_read_buffer(&result_buf, CL_NON_BLOCKING, 0, result_vec, &events)
110+
.unwrap()
111+
};
112+
113+
read_event.wait().unwrap();
83114
}
84115

85-
result.read(result_vec).enq().unwrap();
116+
pub fn dot_floats32(&self, arr_1: &[f32], arr_2: &[f32]) -> f32 {
117+
let kernel = Kernel::create(&self.program, "dot_f").expect("Kernel::create failed");
118+
119+
let mut arr_1_buf = unsafe {
120+
Buffer::<cl_float>::create(
121+
&self.context,
122+
CL_MEM_READ_ONLY,
123+
arr_1.len(),
124+
ptr::null_mut(),
125+
)
126+
.expect("opencl: allocation error")
127+
};
128+
129+
let mut arr_2_buf = unsafe {
130+
Buffer::<cl_float>::create(
131+
&self.context,
132+
CL_MEM_READ_ONLY,
133+
arr_2.len(),
134+
ptr::null_mut(),
135+
)
136+
.expect("opencl: allocation error")
137+
};
138+
139+
let local_size = 64;
140+
let group_count = (arr_1.len() + local_size - 1) / local_size;
141+
142+
let partial_buf = unsafe {
143+
Buffer::<cl_float>::create(
144+
&self.context,
145+
CL_MEM_WRITE_ONLY,
146+
group_count,
147+
ptr::null_mut(),
148+
)
149+
.unwrap()
150+
};
151+
152+
let _arr_1_buf_write_event = unsafe {
153+
self.queue
154+
.enqueue_write_buffer(&mut arr_1_buf, CL_NON_BLOCKING, 0, &arr_1, &[])
155+
.unwrap()
156+
};
157+
let _arr_2_buf_write_event = unsafe {
158+
self.queue
159+
.enqueue_write_buffer(&mut arr_2_buf, CL_NON_BLOCKING, 0, &arr_2, &[])
160+
.unwrap()
161+
};
162+
163+
let kernel_event = unsafe {
164+
ExecuteKernel::new(&kernel)
165+
.set_arg(&arr_1_buf)
166+
.set_arg(&arr_2_buf)
167+
.set_arg(&partial_buf)
168+
.set_global_work_size(arr_1.len())
169+
.set_local_work_size(local_size)
170+
.set_wait_event(&_arr_1_buf_write_event)
171+
.set_wait_event(&_arr_2_buf_write_event)
172+
.enqueue_nd_range(&self.queue)
173+
.unwrap()
174+
};
175+
176+
let mut events: Vec<cl_event> = Vec::default();
177+
events.push(kernel_event.get());
178+
179+
let mut partial_results = vec![0.0f32; group_count];
180+
let read_event = unsafe {
181+
self.queue
182+
.enqueue_read_buffer(&partial_buf, CL_BLOCKING, 0, &mut partial_results, &[])
183+
.unwrap()
184+
};
185+
186+
let result: f32 = partial_results.iter().sum();
187+
read_event.wait().unwrap();
188+
189+
result
190+
}
86191
}

0 commit comments

Comments
 (0)