diff --git a/Cargo.lock b/Cargo.lock index c337c95..50664d6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -425,6 +425,7 @@ dependencies = [ "image", "image-compare", "rand", + "rayon", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index f4172be..f4ccf6a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ path = "src/lib.rs" rand = "0.8" image = "0.25" image-compare = "0.5.0" +rayon = "1.11.0" [dev-dependencies] divan = { version = "4.0.2", package = "codspeed-divan-compat" } diff --git a/README.md b/README.md index 8e9c5be..496d8ee 100644 --- a/README.md +++ b/README.md @@ -34,3 +34,6 @@ cargo codspeed run -m walltime ``` Note: You can also set the `CODSPEED_RUNNER_MODE` environment variable to `walltime` to avoid passing `-m walltime` every time. + + + diff --git a/src/bfs.rs b/src/bfs.rs index 487fddc..b0f1d6d 100644 --- a/src/bfs.rs +++ b/src/bfs.rs @@ -1,4 +1,4 @@ -use std::collections::HashSet; +use std::collections::{HashSet, VecDeque}; /// A simple graph represented as an adjacency list #[derive(Debug, Clone)] @@ -25,23 +25,22 @@ impl Graph { /// Naive BFS implementation using Vec as a queue (intentionally slow) /// Returns the order in which nodes were visited -pub fn bfs_naive(graph: &Graph, start: usize) -> Vec { - let mut visited = HashSet::new(); - let mut queue = Vec::new(); // Using Vec instead of VecDeque - intentionally inefficient! - let mut result = Vec::new(); +pub fn bfs_naive(graph: &Graph, start: usize) -> VecDeque { + let mut visited = [false; 10000]; + let mut queue = VecDeque::with_capacity(256); + let mut result = VecDeque::with_capacity(256); - queue.push(start); - visited.insert(start); + queue.push_back(start); + visited[start] = true; - while !queue.is_empty() { - // remove(0) is O(n) - this makes BFS slow! - let node = queue.remove(0); - result.push(node); + while let Some(node) = queue.pop_front() { + result.push_back(node); if let Some(neighbors) = graph.adjacency.get(node) { for &neighbor in neighbors { - if visited.insert(neighbor) { - queue.push(neighbor); + if !visited[neighbor] { + visited[neighbor] = true; + queue.push_back(neighbor); } } } diff --git a/src/dna_matcher.rs b/src/dna_matcher.rs index d99c90e..b9325d7 100644 --- a/src/dna_matcher.rs +++ b/src/dna_matcher.rs @@ -1,9 +1,11 @@ +use rayon::prelude::*; + /// Naive approach: Read the entire file as a string and filter lines pub fn naive_dna_matcher(genome: &str, pattern: &str) -> Vec { genome - .lines() + .par_lines() // Use parallel lines iterator .filter(|line| !line.starts_with('>')) // Skip headers - .filter(|line| line.contains(pattern)) + .filter(|line| line.contains(pattern)) .map(|s| s.to_string()) .collect() } diff --git a/src/lut_filters.rs b/src/lut_filters.rs index a73068c..df4d1e1 100644 --- a/src/lut_filters.rs +++ b/src/lut_filters.rs @@ -42,24 +42,28 @@ mod naive { pub fn apply_brightness_contrast(img: &RgbImage, brightness: i16, contrast: f32) -> RgbImage { let (width, height) = img.dimensions(); let mut output = ImageBuffer::new(width, height); + let mut c_table: [u8; 256] = [0; 256]; + + + for i in 0..256 { + let f = i as f32; + c_table[i] = (((f - 128.0) * (1.0 + contrast)) + 128.0 + (brightness as f32)).clamp(0.0, 255.0) as u8; + } + + for (x, y, pixel) in img.enumerate_pixels() { let r = pixel[0] as f32; let g = pixel[1] as f32; let b = pixel[2] as f32; - // Apply contrast and brightness (5 FP ops per channel!) - let r = ((r - 128.0) * (1.0 + contrast)) + 128.0 + brightness as f32; - let g = ((g - 128.0) * (1.0 + contrast)) + 128.0 + brightness as f32; - let b = ((b - 128.0) * (1.0 + contrast)) + 128.0 + brightness as f32; - output.put_pixel( x, y, Rgb([ - r.clamp(0.0, 255.0) as u8, - g.clamp(0.0, 255.0) as u8, - b.clamp(0.0, 255.0) as u8, + c_table[r as usize], + c_table[g as usize], + c_table[b as usize], ]), ); } @@ -73,11 +77,17 @@ mod naive { let (width, height) = img.dimensions(); let mut output = ImageBuffer::new(width, height); + let mut gamma_table = [0u8; 256]; + let pow_gamma = 1.0 / gamma; + + for i in 0..250 { + gamma_table[i] = ((i as f32 / 255.0).powf(pow_gamma) * 255.0) as u8; + } + for (x, y, pixel) in img.enumerate_pixels() { - // powf() is VERY expensive - this is why we need a LUT! - let r = (pixel[0] as f32 / 255.0).powf(1.0 / gamma) * 255.0; - let g = (pixel[1] as f32 / 255.0).powf(1.0 / gamma) * 255.0; - let b = (pixel[2] as f32 / 255.0).powf(1.0 / gamma) * 255.0; + let r = gamma_table[pixel[0] as usize]; + let g = gamma_table[pixel[1] as usize]; + let b = gamma_table[pixel[2] as usize]; output.put_pixel(x, y, Rgb([r as u8, g as u8, b as u8])); }