Skip to content
This repository was archived by the owner on Mar 11, 2025. It is now read-only.

Commit 1ea0c0a

Browse files
fix: undo accidental crippling (#27)
A previous attempt to fix a bug on AMD GPUs accidentally crippled all miners. The main problem was trying to initialize the output to 0. This would not change the hashrate but would result in valid nonces being missed
1 parent 59d074a commit 1ea0c0a

File tree

3 files changed

+163
-167
lines changed

3 files changed

+163
-167
lines changed

src/main.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -514,7 +514,7 @@ fn run_thread<T: EngineImpl>(
514514
debug!(target: LOG_TARGET, "Elapsed {:?} > {:?}", elapsed.elapsed().as_secs(), config.template_refresh_secs );
515515
break;
516516
}
517-
let num_iterations = 16;
517+
let num_iterations = 1;
518518
let result = gpu_engine.mine(
519519
&gpu_function,
520520
&context,

src/opencl_engine.rs

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use core::ffi::c_void;
12
use std::{
23
io::Read,
34
ptr,
@@ -11,7 +12,7 @@ use opencl3::{
1112
context::Context,
1213
device::{Device, CL_DEVICE_TYPE_GPU},
1314
kernel::{ExecuteKernel, Kernel},
14-
memory::{Buffer, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY},
15+
memory::{Buffer, CL_MEM_COPY_HOST_PTR, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY},
1516
platform::{get_platforms, Platform},
1617
program::Program,
1718
types::{cl_ulong, CL_TRUE},
@@ -169,8 +170,8 @@ impl EngineImpl for OpenClEngine {
169170

170171
debug!(target: LOG_TARGET, "OpenClEngine: created queue");
171172

172-
let batch_size = 1 << 19; // According to tests, but we can try work this out
173-
let global_dimensions = [batch_size as usize];
173+
// let batch_size = 1 << 19; // According to tests, but we can try work this out
174+
// let global_dimensions = [batch_size as usize];
174175
// let max_workgroups = Device::new(context.context.devices()[0]).max_work_group_size().unwrap();
175176
// dbg!(max_compute);
176177
// let max_work_items = queue.max_work_item_dimensions();
@@ -195,14 +196,19 @@ impl EngineImpl for OpenClEngine {
195196
};
196197

197198
debug!(target: LOG_TARGET, "OpenClEngine: buffer created",);
198-
let output_buffer =
199-
match Buffer::<cl_ulong>::create(&context.context, CL_MEM_WRITE_ONLY, 2, ptr::null_mut()) {
200-
Ok(buffer) => buffer,
201-
Err(e) => {
202-
error!(target: LOG_TARGET, "OpenClEngine: failed to create output buffer: {}", e);
203-
return Err(e.into());
204-
},
205-
};
199+
let initial_output = vec![0u64, 0u64];
200+
let output_buffer = match Buffer::<cl_ulong>::create(
201+
&context.context,
202+
CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
203+
2,
204+
initial_output.as_ptr() as *mut c_void,
205+
) {
206+
Ok(buffer) => buffer,
207+
Err(e) => {
208+
error!(target: LOG_TARGET, "OpenClEngine: failed to create output buffer: {}", e);
209+
return Err(e.into());
210+
},
211+
};
206212
// dbg!(block_size);
207213
// dbg!(grid_size);
208214
debug!(target: LOG_TARGET, "OpenClEngine: output buffer created",);
@@ -243,9 +249,9 @@ impl EngineImpl for OpenClEngine {
243249
queue.finish()?;
244250

245251
let mut output = vec![0u64, 0u64];
246-
debug!(target: LOG_TARGET, "OpenClEngine: mine output {:?}", output[0] > 0);
247252
queue.enqueue_read_buffer(&output_buffer, CL_TRUE, 0, output.as_mut_slice(), &[])?;
248253
if output[0] > 0 {
254+
println!("output and diff {:?} {:?}", output[0], u64::MAX / output[1]);
249255
return Ok((
250256
Some(output[0]),
251257
grid_size * block_size * num_iterations,

src/opencl_sha3.cl

Lines changed: 144 additions & 154 deletions
Original file line numberDiff line numberDiff line change
@@ -17,167 +17,156 @@ constant static const ulong RC[] = {
1717
0x8000000000008080ul, 0x0000000080000001ul, 0x8000000080008008ul,
1818
};
1919

20-
2120
ulong swap_endian_64(ulong value) {
22-
return ((value & 0x00000000000000FFULL) << 56) |
23-
((value & 0x000000000000FF00ULL) << 40) |
24-
((value & 0x0000000000FF0000ULL) << 24) |
25-
((value & 0x00000000FF000000ULL) << 8) |
26-
((value & 0x000000FF00000000ULL) >> 8) |
27-
((value & 0x0000FF0000000000ULL) >> 24) |
28-
((value & 0x00FF000000000000ULL) >> 40) |
29-
((value & 0xFF00000000000000ULL) >> 56);
21+
return ((value & 0x00000000000000FFULL) << 56) |
22+
((value & 0x000000000000FF00ULL) << 40) |
23+
((value & 0x0000000000FF0000ULL) << 24) |
24+
((value & 0x00000000FF000000ULL) << 8) |
25+
((value & 0x000000FF00000000ULL) >> 8) |
26+
((value & 0x0000FF0000000000ULL) >> 24) |
27+
((value & 0x00FF000000000000ULL) >> 40) |
28+
((value & 0xFF00000000000000ULL) >> 56);
3029
}
3130

31+
kernel void sha3(global ulong *buffer, ulong nonce_start, ulong difficulty,
32+
uint num_rounds, global ulong *output_1) {
33+
34+
// output_1[0] = 0;
35+
// output_1[1] = 0;
36+
ulong state[25];
37+
for (uint i = 0; i < num_rounds; i++) {
38+
39+
for (uint j = 0; j < 25; j++) {
40+
state[j] = 0;
41+
}
42+
state[0] = nonce_start + get_global_id(0) + i * get_global_size(0);
43+
state[1] = buffer[1];
44+
state[2] = buffer[2];
45+
state[3] = buffer[3];
46+
47+
state[4] = buffer[4];
48+
state[5] = buffer[5];
49+
50+
state[16] ^= 0x8000000000000000ul;
51+
52+
uint r, x, y, t;
53+
ulong tmp, current, C[5];
54+
for (r = 0; r < 24; ++r) {
55+
for (x = 0; x < 5; ++x) {
56+
C[x] = state[x] ^ state[x + 5] ^ state[x + 10] ^ state[x + 15] ^
57+
state[x + 20];
58+
}
59+
for (x = 0; x < 5; ++x) {
60+
tmp = C[(x + 4) % 5] ^ rotate(C[(x + 1) % 5], 1ul);
61+
for (y = 0; y < 5; ++y) {
62+
state[x + y * 5] ^= tmp;
63+
}
64+
}
65+
current = state[1];
66+
for (t = 0; t < 24; ++t) {
67+
tmp = state[pos[t]];
68+
state[pos[t]] = rotate(current, rot[t]);
69+
current = tmp;
70+
}
71+
for (y = 0; y < 25; y += 5) {
72+
for (x = 0; x < 5; ++x)
73+
C[x] = state[y + x];
74+
for (x = 0; x < 5; ++x) {
75+
state[x + y] = C[x] ^ (~C[(x + 1) % 5] & C[(x + 2) % 5]);
76+
}
77+
}
78+
state[0] ^= RC[r];
79+
}
80+
81+
for (uint j = 4; j < 25; j++) {
82+
state[j] = 0;
83+
}
84+
state[4] = 0x06;
85+
state[16] = 0x8000000000000000ul;
86+
87+
for (r = 0; r < 24; ++r) {
88+
for (x = 0; x < 5; ++x) {
89+
C[x] = state[x] ^ state[x + 5] ^ state[x + 10] ^ state[x + 15] ^
90+
state[x + 20];
91+
}
92+
for (x = 0; x < 5; ++x) {
93+
tmp = C[(x + 4) % 5] ^ rotate(C[(x + 1) % 5], 1ul);
94+
for (y = 0; y < 5; ++y) {
95+
state[x + y * 5] ^= tmp;
96+
}
97+
}
98+
current = state[1];
99+
for (t = 0; t < 24; ++t) {
100+
tmp = state[pos[t]];
101+
state[pos[t]] = rotate(current, rot[t]);
102+
current = tmp;
103+
}
104+
for (y = 0; y < 25; y += 5) {
105+
for (x = 0; x < 5; ++x)
106+
C[x] = state[y + x];
107+
for (x = 0; x < 5; ++x) {
108+
state[x + y] = C[x] ^ (~C[(x + 1) % 5] & C[(x + 2) % 5]);
109+
}
110+
}
111+
state[0] ^= RC[r];
112+
}
113+
114+
for (uint j = 4; j < 25; j++) {
115+
state[j] = 0;
116+
}
117+
state[4] = 0x06;
118+
state[16] = 0x8000000000000000ul;
119+
120+
// round 3
121+
for (r = 0; r < 24; ++r) {
122+
for (x = 0; x < 5; ++x) {
123+
C[x] = state[x] ^ state[x + 5] ^ state[x + 10] ^ state[x + 15] ^
124+
state[x + 20];
125+
}
126+
for (x = 0; x < 5; ++x) {
127+
tmp = C[(x + 4) % 5] ^ rotate(C[(x + 1) % 5], 1ul);
128+
for (y = 0; y < 5; ++y) {
129+
state[x + y * 5] ^= tmp;
130+
}
131+
}
132+
current = state[1];
133+
for (t = 0; t < 24; ++t) {
134+
tmp = state[pos[t]];
135+
state[pos[t]] = rotate(current, rot[t]);
136+
current = tmp;
137+
}
138+
for (y = 0; y < 25; y += 5) {
139+
for (x = 0; x < 5; ++x)
140+
C[x] = state[y + x];
141+
for (x = 0; x < 5; ++x) {
142+
state[x + y] = C[x] ^ (~C[(x + 1) % 5] & C[(x + 2) % 5]);
143+
}
144+
}
145+
state[0] ^= RC[r];
146+
}
32147

33-
kernel void sha3(global ulong *buffer,
34-
ulong nonce_start, ulong difficulty,
35-
uint num_rounds, global ulong *output_1
36-
) {
37-
38-
output_1[0] = 0;
39-
output_1[1] = 0;
40-
ulong state[25];
41-
for (uint i = 0;i< num_rounds; i++) {
42-
43-
for (uint j = 0; j < 25; j++) {
44-
state[j] = 0;
45-
}
46-
state[0] = nonce_start + get_global_id(0) + i * get_global_size(0);
47-
state[1] = buffer[1];
48-
state[2] = buffer[2];
49-
state[3] = buffer[3];
50-
51-
state[4] = buffer[4];
52-
state[5] = buffer[5];
53-
54-
state[16] ^= 0x8000000000000000ul;
55-
56-
57-
58-
59-
60-
uint r, x, y, t;
61-
ulong tmp, current, C[5];
62-
for (r = 0; r < 24; ++r) {
63-
for (x = 0; x < 5; ++x) {
64-
C[x] = state[x] ^ state[x + 5] ^ state[x + 10] ^ state[x + 15] ^
65-
state[x + 20];
66-
}
67-
for (x = 0; x < 5; ++x) {
68-
tmp = C[(x + 4) % 5] ^ rotate(C[(x + 1) % 5], 1ul);
69-
for (y = 0; y < 5; ++y) {
70-
state[x + y * 5] ^= tmp;
71-
}
72-
}
73-
current = state[1];
74-
for (t = 0; t < 24; ++t) {
75-
tmp = state[pos[t]];
76-
state[pos[t]] = rotate(current, rot[t]);
77-
current = tmp;
78-
}
79-
for (y = 0; y < 25; y += 5) {
80-
for (x = 0; x < 5; ++x)
81-
C[x] = state[y + x];
82-
for (x = 0; x < 5; ++x) {
83-
state[x + y] = C[x] ^ (~C[(x + 1) % 5] & C[(x + 2) % 5]);
84-
}
85-
}
86-
state[0] ^= RC[r];
87-
}
88-
89-
90-
for (uint j = 4; j < 25; j++) {
91-
state[j] = 0;
92-
}
93-
state[4] = 0x06;
94-
state[16] = 0x8000000000000000ul;
95-
96-
for (r = 0; r < 24; ++r) {
97-
for (x = 0; x < 5; ++x) {
98-
C[x] = state[x] ^ state[x + 5] ^ state[x + 10] ^ state[x + 15] ^
99-
state[x + 20];
100-
}
101-
for (x = 0; x < 5; ++x) {
102-
tmp = C[(x + 4) % 5] ^ rotate(C[(x + 1) % 5], 1ul);
103-
for (y = 0; y < 5; ++y) {
104-
state[x + y * 5] ^= tmp;
105-
}
106-
}
107-
current = state[1];
108-
for (t = 0; t < 24; ++t) {
109-
tmp = state[pos[t]];
110-
state[pos[t]] = rotate(current, rot[t]);
111-
current = tmp;
112-
}
113-
for (y = 0; y < 25; y += 5) {
114-
for (x = 0; x < 5; ++x)
115-
C[x] = state[y + x];
116-
for (x = 0; x < 5; ++x) {
117-
state[x + y] = C[x] ^ (~C[(x + 1) % 5] & C[(x + 2) % 5]);
118-
}
119-
}
120-
state[0] ^= RC[r];
121-
}
122-
123-
124-
for (uint j = 4; j < 25; j++) {
125-
state[j] = 0;
126-
}
127-
state[4] = 0x06;
128-
state[16] = 0x8000000000000000ul;
129-
130-
// round 3
131-
for (r = 0; r < 24; ++r) {
132-
for (x = 0; x < 5; ++x) {
133-
C[x] = state[x] ^ state[x + 5] ^ state[x + 10] ^ state[x + 15] ^
134-
state[x + 20];
135-
}
136-
for (x = 0; x < 5; ++x) {
137-
tmp = C[(x + 4) % 5] ^ rotate(C[(x + 1) % 5], 1ul);
138-
for (y = 0; y < 5; ++y) {
139-
state[x + y * 5] ^= tmp;
140-
}
141-
}
142-
current = state[1];
143-
for (t = 0; t < 24; ++t) {
144-
tmp = state[pos[t]];
145-
state[pos[t]] = rotate(current, rot[t]);
146-
current = tmp;
147-
}
148-
for (y = 0; y < 25; y += 5) {
149-
for (x = 0; x < 5; ++x)
150-
C[x] = state[y + x];
151-
for (x = 0; x < 5; ++x) {
152-
state[x + y] = C[x] ^ (~C[(x + 1) % 5] & C[(x + 2) % 5]);
153-
}
154-
}
155-
state[0] ^= RC[r];
156-
}
157-
158-
159-
// check difficulty
160-
ulong swap = swap_endian_64(state[0]);
161-
if (swap < difficulty) {
162-
output_1[0] = nonce_start + get_global_id(0) + i * get_global_size(0);
163-
output_1[1] = swap;
164-
}
165-
else {
148+
// check difficulty
149+
ulong swap = swap_endian_64(state[0]);
150+
if (swap < difficulty) {
166151
if (output_1[1] == 0 || output_1[1] > swap) {
167-
output_1[1] = swap;
152+
output_1[0] = nonce_start + get_global_id(0) + i * get_global_size(0);
153+
output_1[1] = swap;
168154
}
169-
// if (output_1[1] < nonce_start+ get_global_id(0)) {
170-
// output_1[1] = nonce_start + get_global_id(0);
171-
// }
172-
}
155+
} else {
156+
if (output_1[1] == 0 || output_1[1] > swap) {
157+
// output_1[0] = nonce_start + get_global_id(0) + i *
158+
// get_global_size(0);
159+
output_1[1] = swap;
160+
}
161+
// if (output_1[1] < nonce_start+ get_global_id(0)) {
162+
// output_1[1] = nonce_start + get_global_id(0);
163+
// }
164+
}
173165

174-
//output_1[0] = difficulty;
175-
// output_1[0] = nonce_start + get_global_id(0) ;
166+
// output_1[0] = difficulty;
167+
// output_1[0] = nonce_start + get_global_id(0) ;
176168
// output_1[0] = 1;
177-
}
178-
179-
180-
169+
}
181170

182171
// // Compare difficulty
183172
// bool le = true;
@@ -225,7 +214,8 @@ for (uint i = 0;i< num_rounds; i++) {
225214
// // n -= d
226215
// int r = 0;
227216
// for (int j = 31; j >= 0; --j) {
228-
// // There is no temporary overflow, because in OpenCL uchar + uchar is
217+
// // There is no temporary overflow, because in OpenCL uchar + uchar
218+
// is
229219
// // ulong (not really sure, but it's bigger than uchar)
230220
// if (n[j] < output_buffer[j] + r) {
231221
// n[j] = n[j] - r - output_buffer[j];

0 commit comments

Comments
 (0)