Skip to content
This repository was archived by the owner on Mar 11, 2025. It is now read-only.

Commit d81697e

Browse files
fix: add barrier for better opencl memory fencing (#30)
1 parent c537428 commit d81697e

File tree

3 files changed

+135
-0
lines changed

3 files changed

+135
-0
lines changed

src/config_file.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ pub(crate) struct ConfigFile {
1919
pub single_grid_size: u32,
2020
pub per_device_grid_sizes: Vec<u32>,
2121
pub template_timeout_secs: u64,
22+
#[serde(default = "default_max_template_failures")]
23+
pub max_template_failures: u64,
2224
}
2325

2426
impl Default for ConfigFile {
@@ -36,6 +38,7 @@ impl Default for ConfigFile {
3638
single_grid_size: 1024,
3739
per_device_grid_sizes: vec![],
3840
template_timeout_secs: 1,
41+
max_template_failures: 10,
3942
}
4043
}
4144
}
@@ -54,3 +57,7 @@ impl ConfigFile {
5457
Ok(())
5558
}
5659
}
60+
61+
fn default_max_template_failures() -> u64 {
62+
10
63+
}

src/main.rs

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,9 @@ struct Cli {
192192

193193
#[arg(short, long)]
194194
template_timeout_secs: Option<u64>,
195+
196+
#[arg(long)]
197+
max_template_failures: Option<usize>,
195198
}
196199

197200
async fn main_inner() -> Result<(), anyhow::Error> {
@@ -446,6 +449,122 @@ async fn main_inner() -> Result<(), anyhow::Error> {
446449
return Ok(());
447450
}
448451

452+
if let Some(max_template_failures) = cli.max_template_failures {
453+
config.max_template_failures = max_template_failures as u64;
454+
}
455+
// create a list of devices (by index) to use
456+
let devices_to_use: Vec<u32> = (0..num_devices)
457+
.filter(|x| {
458+
if let Some(use_devices) = &cli.use_devices {
459+
use_devices.contains(x)
460+
} else {
461+
true
462+
}
463+
})
464+
.filter(|x| {
465+
if let Some(excluded_devices) = &cli.exclude_devices {
466+
!excluded_devices.contains(x)
467+
} else {
468+
true
469+
}
470+
})
471+
.collect();
472+
473+
info!(target: LOG_TARGET, "Device indexes to use: {:?} from the total number of devices: {:?}", devices_to_use, num_devices);
474+
475+
println!(
476+
"Device indexes to use: {:?} from the total number of devices: {:?}",
477+
devices_to_use, num_devices
478+
);
479+
480+
if cli.find_optimal {
481+
let mut best_hashrate = 0;
482+
let mut best_grid_size = 1;
483+
let mut current_grid_size = 32;
484+
let mut is_doubling_stage = true;
485+
let mut last_grid_size_increase = 0;
486+
let mut prev_hashrate = 0;
487+
488+
while true {
489+
dbg!("here");
490+
let mut config = config.clone();
491+
config.single_grid_size = current_grid_size;
492+
// config.block_size = ;
493+
let mut threads = vec![];
494+
let (tx, rx) = tokio::sync::broadcast::channel(100);
495+
for i in 0..num_devices {
496+
if !devices_to_use.contains(&i) {
497+
continue;
498+
}
499+
let c = config.clone();
500+
let gpu = gpu_engine.clone();
501+
let x = tx.clone();
502+
threads.push(thread::spawn(move || {
503+
run_thread(gpu, num_devices as u64, i as u32, c, true, x)
504+
}));
505+
}
506+
let thread_len = threads.len();
507+
let mut thread_hashrate = Vec::with_capacity(thread_len);
508+
for t in threads {
509+
match t.join() {
510+
Ok(res) => match res {
511+
Ok(hashrate) => {
512+
info!(target: LOG_TARGET, "Thread join succeeded: {}", hashrate.to_formatted_string(&Locale::en));
513+
thread_hashrate.push(hashrate);
514+
},
515+
Err(err) => {
516+
eprintln!("Thread join succeeded but result failed: {:?}", err);
517+
error!(target: LOG_TARGET, "Thread join succeeded but result failed: {:?}", err);
518+
},
519+
},
520+
Err(err) => {
521+
eprintln!("Thread join failed: {:?}", err);
522+
error!(target: LOG_TARGET, "Thread join failed: {:?}", err);
523+
},
524+
}
525+
}
526+
let total_hashrate: u64 = thread_hashrate.iter().sum();
527+
if total_hashrate > best_hashrate {
528+
best_hashrate = total_hashrate;
529+
best_grid_size = current_grid_size;
530+
// best_grid_size = config.single_grid_size;
531+
// best_block_size = config.block_size;
532+
println!(
533+
"Best hashrate: {} grid_size: {}, current_grid: {} block_size: {} Prev Hash {}",
534+
best_hashrate, best_grid_size, current_grid_size, config.block_size, prev_hashrate
535+
);
536+
}
537+
// if total_hashrate < prev_hashrate {
538+
// println!("total decreased, breaking");
539+
// break;
540+
// }
541+
if is_doubling_stage {
542+
if total_hashrate > prev_hashrate {
543+
last_grid_size_increase = current_grid_size;
544+
current_grid_size = current_grid_size * 2;
545+
} else {
546+
is_doubling_stage = false;
547+
last_grid_size_increase = last_grid_size_increase / 2;
548+
current_grid_size = current_grid_size.saturating_sub(last_grid_size_increase);
549+
}
550+
} else {
551+
// Bisecting stage
552+
if last_grid_size_increase < 2 {
553+
break;
554+
}
555+
if total_hashrate > prev_hashrate {
556+
last_grid_size_increase = last_grid_size_increase / 2;
557+
current_grid_size += last_grid_size_increase;
558+
} else {
559+
last_grid_size_increase = last_grid_size_increase / 2;
560+
current_grid_size = current_grid_size.saturating_sub(last_grid_size_increase);
561+
}
562+
}
563+
prev_hashrate = total_hashrate;
564+
}
565+
return Ok(());
566+
}
567+
449568
let (stats_tx, stats_rx) = tokio::sync::broadcast::channel(100);
450569
if config.http_server_enabled {
451570
let mut stats_collector = stats_collector::StatsCollector::new(shutdown.to_signal(), stats_rx);
@@ -536,6 +655,7 @@ fn run_thread<T: EngineImpl>(
536655
} else {
537656
ClientType::BaseNode
538657
};
658+
let mut template_fetch_failures = 0;
539659
let coinbase_extra = config.coinbase_extra.clone();
540660
let node_client = Arc::new(RwLock::new(runtime.block_on(async move {
541661
node_client::create_client(client_type, &tari_node_url, coinbase_extra).await
@@ -586,6 +706,7 @@ fn run_thread<T: EngineImpl>(
586706
let mining_hash: FixedHash;
587707
match runtime.block_on(async move { get_template(clone_config, clone_node_client, rounds, benchmark).await }) {
588708
Ok((res_target_difficulty, res_block, res_header, res_mining_hash)) => {
709+
template_fetch_failures = 0;
589710
info!(target: LOG_TARGET, "Getting next block...");
590711
println!("Getting next block...{}", res_header.height);
591712
target_difficulty = res_target_difficulty;
@@ -595,6 +716,12 @@ fn run_thread<T: EngineImpl>(
595716
previous_template = Some((target_difficulty, block.clone(), header.clone(), mining_hash.clone()));
596717
},
597718
Err(error) => {
719+
template_fetch_failures += 1;
720+
if template_fetch_failures > config.max_template_failures {
721+
eprintln!("Too many template fetch failures, exiting");
722+
error!(target: LOG_TARGET, "Too many template fetch failures, exiting");
723+
return Err(error);
724+
}
598725
println!("Error during getting next block: {error:?}");
599726
error!(target: LOG_TARGET, "Error during getting next block: {:?}", error);
600727
if previous_template.is_none() {

src/opencl_sha3.cl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ kernel void sha3(global ulong *buffer, ulong nonce_start, ulong difficulty,
147147

148148
// check difficulty
149149
ulong swap = swap_endian_64(state[0]);
150+
barrier(CLK_GLOBAL_MEM_FENCE);
150151
if (swap < difficulty) {
151152
if (output_1[1] == 0 || output_1[1] > swap) {
152153
output_1[0] = nonce_start + get_global_id(0) + i * get_global_size(0);

0 commit comments

Comments
 (0)