diff --git a/Cargo.lock b/Cargo.lock index c337c95..564a8b7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -424,7 +424,10 @@ dependencies = [ "codspeed-divan-compat", "image", "image-compare", + "jetscii", + "memchr", "rand", + "rayon", ] [[package]] @@ -654,6 +657,12 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +[[package]] +name = "jetscii" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47f142fe24a9c9944451e8349de0a56af5f3e7226dc46f3ed4d4ecc0b85af75e" + [[package]] name = "jobserver" version = "0.1.34" diff --git a/Cargo.toml b/Cargo.toml index f4172be..705da70 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,9 @@ path = "src/lib.rs" rand = "0.8" image = "0.25" image-compare = "0.5.0" +rayon = "1.11.0" +jetscii = { version = "0.5.3" } +memchr = "2.7.6" [dev-dependencies] divan = { version = "4.0.2", package = "codspeed-divan-compat" } diff --git a/README.md b/README.md index 8e9c5be..9e4aa1e 100644 --- a/README.md +++ b/README.md @@ -34,3 +34,5 @@ cargo codspeed run -m walltime ``` Note: You can also set the `CODSPEED_RUNNER_MODE` environment variable to `walltime` to avoid passing `-m walltime` every time. + + diff --git a/src/bfs.rs b/src/bfs.rs index 487fddc..3f9e173 100644 --- a/src/bfs.rs +++ b/src/bfs.rs @@ -1,4 +1,5 @@ -use std::collections::HashSet; +use core::hash::{BuildHasherDefault, Hasher}; +use std::collections::{HashSet, VecDeque}; /// A simple graph represented as an adjacency list #[derive(Debug, Clone)] @@ -23,25 +24,67 @@ impl Graph { } } +pub(crate) type BuildNoHashHasher = BuildHasherDefault; + +#[derive(Default)] +pub(crate) struct NoHashHasher(u64); + +impl Hasher for NoHashHasher { + fn finish(&self) -> u64 { + self.0 + } + fn write(&mut self, _: &[u8]) { + unreachable!("Should not be used") + } + fn write_u8(&mut self, _: u8) { + unreachable!("Should not be used") + } + fn write_u16(&mut self, _: u16) { + unreachable!("Should not be used") + } + fn write_u32(&mut self, _: u32) { + unreachable!("Should not be used") + } + fn write_u64(&mut self, _: u64) { + unreachable!("Should not be used") + } + fn write_usize(&mut self, n: usize) { + self.0 = n as u64; + } + fn write_i8(&mut self, _: i8) { + unreachable!("Should not be used") + } + fn write_i16(&mut self, _: i16) { + unreachable!("Should not be used") + } + fn write_i32(&mut self, _: i32) { + unreachable!("Should not be used") + } + fn write_i64(&mut self, _: i64) { + unreachable!("Should not be used") + } + fn write_isize(&mut self, _: isize) { + unreachable!("Should not be used") + } +} + /// Naive BFS implementation using Vec as a queue (intentionally slow) /// Returns the order in which nodes were visited pub fn bfs_naive(graph: &Graph, start: usize) -> Vec { - let mut visited = HashSet::new(); - let mut queue = Vec::new(); // Using Vec instead of VecDeque - intentionally inefficient! + let mut visited = HashSet::with_capacity_and_hasher(1024, BuildNoHashHasher::new()); + let mut queue = VecDeque::new(); // Using Vec instead of VecDeque - intentionally inefficient! let mut result = Vec::new(); - queue.push(start); + queue.push_back(start); visited.insert(start); - while !queue.is_empty() { - // remove(0) is O(n) - this makes BFS slow! - let node = queue.remove(0); + while let Some(node) = queue.pop_front() { result.push(node); if let Some(neighbors) = graph.adjacency.get(node) { for &neighbor in neighbors { if visited.insert(neighbor) { - queue.push(neighbor); + queue.push_back(neighbor); } } } diff --git a/src/dna_matcher.rs b/src/dna_matcher.rs index d99c90e..032b3dc 100644 --- a/src/dna_matcher.rs +++ b/src/dna_matcher.rs @@ -1,10 +1,15 @@ +use jetscii::ByteSubstring; +use rayon::prelude::*; + /// Naive approach: Read the entire file as a string and filter lines -pub fn naive_dna_matcher(genome: &str, pattern: &str) -> Vec { +pub fn naive_dna_matcher<'a>(genome: &'a str, pattern: &str) -> Vec<&'a str> { + let searcher = ByteSubstring::new(pattern.as_bytes()); genome - .lines() - .filter(|line| !line.starts_with('>')) // Skip headers - .filter(|line| line.contains(pattern)) - .map(|s| s.to_string()) + .as_bytes() + .par_split(|c| *c == b'\n') + .filter(|line| !line.starts_with(b">")) // Skip headers + .filter(|line| searcher.find(line).is_some()) + .map(|s| unsafe { std::str::from_utf8_unchecked(s) }) .collect() } diff --git a/src/lut_filters.rs b/src/lut_filters.rs index a73068c..24bef78 100644 --- a/src/lut_filters.rs +++ b/src/lut_filters.rs @@ -38,48 +38,67 @@ pub fn apply_brightness_contrast_gamma( mod naive { use super::*; + const LUT1: [f32; 256] = { + let mut data = [0.0; 256]; + let mut i = 0; + while i < 256 { + data[i] = i as f32 - 128.0; + i += 1; + } + data + }; + /// Apply brightness and contrast with floating-point math per pixel pub fn apply_brightness_contrast(img: &RgbImage, brightness: i16, contrast: f32) -> RgbImage { let (width, height) = img.dimensions(); let mut output = ImageBuffer::new(width, height); - for (x, y, pixel) in img.enumerate_pixels() { - let r = pixel[0] as f32; - let g = pixel[1] as f32; - let b = pixel[2] as f32; + let mut lut: [u8; 256] = [0; 256]; + for i in 0..256 { + lut[i] = + ((LUT1[i] * (1.0 + contrast)) + 128.0 + brightness as f32).clamp(0.0, 255.0) as u8 + } + for (x, y, pixel) in img.enumerate_pixels() { // Apply contrast and brightness (5 FP ops per channel!) - let r = ((r - 128.0) * (1.0 + contrast)) + 128.0 + brightness as f32; - let g = ((g - 128.0) * (1.0 + contrast)) + 128.0 + brightness as f32; - let b = ((b - 128.0) * (1.0 + contrast)) + 128.0 + brightness as f32; - - output.put_pixel( - x, - y, - Rgb([ - r.clamp(0.0, 255.0) as u8, - g.clamp(0.0, 255.0) as u8, - b.clamp(0.0, 255.0) as u8, - ]), - ); + let r = lut[pixel[0] as usize]; + let g = lut[pixel[1] as usize]; + let b = lut[pixel[2] as usize]; + + output.put_pixel(x, y, Rgb([r, g, b])); } output } + const LUT2: [f32; 256] = { + let mut data = [0.0; 256]; + let mut i = 0; + while i < 256 { + data[i] = i as f32 / 255.0; + i += 1; + } + data + }; + /// Naive implementation: Apply gamma correction /// This is VERY slow because powf() is expensive! pub fn apply_gamma(img: &RgbImage, gamma: f32) -> RgbImage { let (width, height) = img.dimensions(); let mut output = ImageBuffer::new(width, height); + let mut lut: [u8; 256] = [0; 256]; + for i in 0..256 { + lut[i] = (LUT2[i].powf(1.0 / gamma) * 255.0) as u8; + } + for (x, y, pixel) in img.enumerate_pixels() { // powf() is VERY expensive - this is why we need a LUT! - let r = (pixel[0] as f32 / 255.0).powf(1.0 / gamma) * 255.0; - let g = (pixel[1] as f32 / 255.0).powf(1.0 / gamma) * 255.0; - let b = (pixel[2] as f32 / 255.0).powf(1.0 / gamma) * 255.0; + let r = lut[pixel[0] as usize]; + let g = lut[pixel[1] as usize]; + let b = lut[pixel[2] as usize]; - output.put_pixel(x, y, Rgb([r as u8, g as u8, b as u8])); + output.put_pixel(x, y, Rgb([r, g, b])); } output