From 3d0c7e224f21426e073da4c4c41a16944291a621 Mon Sep 17 00:00:00 2001 From: G Endx Date: Wed, 8 Oct 2025 08:29:09 +0000 Subject: [PATCH 01/13] Add my name. --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 8e9c5be..e5a3313 100644 --- a/README.md +++ b/README.md @@ -34,3 +34,5 @@ cargo codspeed run -m walltime ``` Note: You can also set the `CODSPEED_RUNNER_MODE` environment variable to `walltime` to avoid passing `-m walltime` every time. + +Participant: gendx From dfdfecabf4fc7137d4d47f7dc0a3922baebf223b Mon Sep 17 00:00:00 2001 From: G Endx Date: Wed, 8 Oct 2025 09:22:08 +0000 Subject: [PATCH 02/13] Cargo fmt. --- benches/blob_corruption_checker.rs | 17 ++++++++++++++--- benches/lut_grayscale_bench.rs | 2 +- src/blob_corruption_checker.rs | 5 +---- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/benches/blob_corruption_checker.rs b/benches/blob_corruption_checker.rs index 3ad54d6..45084c0 100644 --- a/benches/blob_corruption_checker.rs +++ b/benches/blob_corruption_checker.rs @@ -18,14 +18,25 @@ fn corruption_check(bencher: Bencher) { // All corruptions should be 1KB aligned for corruption in &corruptions { - assert_eq!(corruption.offset % 1024, 0, "Corruption offset should be 1KB aligned"); - assert_eq!(corruption.length % 1024, 0, "Corruption length should be multiple of 1KB"); + assert_eq!( + corruption.offset % 1024, + 0, + "Corruption offset should be 1KB aligned" + ); + assert_eq!( + corruption.length % 1024, + 0, + "Corruption length should be multiple of 1KB" + ); } // Check specific corruptions assert_eq!(corruptions[0].offset, 14801920, "First corruption offset"); assert_eq!(corruptions[0].length, 2048, "First corruption length"); - assert_eq!(corruptions[25].offset, 243891200, "Middle corruption offset"); + assert_eq!( + corruptions[25].offset, 243891200, + "Middle corruption offset" + ); assert_eq!(corruptions[25].length, 4096, "Middle corruption length"); assert_eq!(corruptions[49].offset, 507871232, "Last corruption offset"); assert_eq!(corruptions[49].length, 5120, "Last corruption length"); diff --git a/benches/lut_grayscale_bench.rs b/benches/lut_grayscale_bench.rs index 5816569..64add8b 100644 --- a/benches/lut_grayscale_bench.rs +++ b/benches/lut_grayscale_bench.rs @@ -1,5 +1,5 @@ use eurorust_2025_workshop::lut_grayscale::*; -use image::{RgbImage}; +use image::RgbImage; fn main() { divan::main(); diff --git a/src/blob_corruption_checker.rs b/src/blob_corruption_checker.rs index 2515c20..3df243c 100644 --- a/src/blob_corruption_checker.rs +++ b/src/blob_corruption_checker.rs @@ -92,10 +92,7 @@ mod tests { "Middle corruption offset" ); assert_eq!(corruptions[25].length, 4096, "Middle corruption length"); - assert_eq!( - corruptions[49].offset, 507871232, - "Last corruption offset" - ); + assert_eq!(corruptions[49].offset, 507871232, "Last corruption offset"); assert_eq!(corruptions[49].length, 5120, "Last corruption length"); } } From f3f4e47fbb349926bd3b99c01b099c13beaa38ed Mon Sep 17 00:00:00 2001 From: G Endx Date: Wed, 8 Oct 2025 09:30:42 +0000 Subject: [PATCH 03/13] Use VecDeque for BFS. --- benches/bfs.rs | 8 ++++---- src/bfs.rs | 25 ++++++++++++++++++++++++- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/benches/bfs.rs b/benches/bfs.rs index eaadd7a..d796914 100644 --- a/benches/bfs.rs +++ b/benches/bfs.rs @@ -1,5 +1,5 @@ use divan::Bencher; -use eurorust_2025_workshop::bfs::{bfs_naive, generate_graph}; +use eurorust_2025_workshop::bfs::{bfs_optimized, generate_graph}; fn main() { divan::main(); @@ -10,7 +10,7 @@ fn bfs_small_graph(bencher: Bencher) { let graph = generate_graph(100); bencher.bench_local(|| { - let result = divan::black_box(bfs_naive(divan::black_box(&graph), divan::black_box(0))); + let result = divan::black_box(bfs_optimized(divan::black_box(&graph), divan::black_box(0))); assert!(!result.is_empty(), "BFS result should not be empty"); assert!( @@ -28,7 +28,7 @@ fn bfs_medium_graph(bencher: Bencher) { let graph = generate_graph(1000); bencher.bench_local(|| { - let result = divan::black_box(bfs_naive(divan::black_box(&graph), divan::black_box(0))); + let result = divan::black_box(bfs_optimized(divan::black_box(&graph), divan::black_box(0))); assert!(!result.is_empty(), "BFS result should not be empty"); assert!( @@ -46,7 +46,7 @@ fn bfs_large_graph(bencher: Bencher) { let graph = generate_graph(10000); bencher.bench_local(|| { - let result = divan::black_box(bfs_naive(divan::black_box(&graph), divan::black_box(0))); + let result = divan::black_box(bfs_optimized(divan::black_box(&graph), divan::black_box(0))); assert!(!result.is_empty(), "BFS result should not be empty"); assert!( diff --git a/src/bfs.rs b/src/bfs.rs index 487fddc..44c251c 100644 --- a/src/bfs.rs +++ b/src/bfs.rs @@ -1,4 +1,4 @@ -use std::collections::HashSet; +use std::collections::{HashSet, VecDeque}; /// A simple graph represented as an adjacency list #[derive(Debug, Clone)] @@ -50,6 +50,29 @@ pub fn bfs_naive(graph: &Graph, start: usize) -> Vec { result } +pub fn bfs_optimized(graph: &Graph, start: usize) -> Vec { + let mut visited = HashSet::new(); + let mut queue = VecDeque::new(); + let mut result = Vec::new(); + + queue.push_back(start); + visited.insert(start); + + while let Some(node) = queue.pop_front() { + result.push(node); + + if let Some(neighbors) = graph.adjacency.get(node) { + for &neighbor in neighbors { + if visited.insert(neighbor) { + queue.push_back(neighbor); + } + } + } + } + + result +} + /// Helper function to generate a random graph for benchmarking pub fn generate_graph(nodes: usize) -> Graph { use rand::{Rng, SeedableRng}; From 5afb56e710916420bba0636377718aa299052e30 Mon Sep 17 00:00:00 2001 From: G Endx Date: Wed, 8 Oct 2025 09:39:11 +0000 Subject: [PATCH 04/13] Use bit-set crate in BFS. --- Cargo.lock | 16 ++++++++++++++++ Cargo.toml | 1 + src/bfs.rs | 3 ++- 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index c337c95..9d0b430 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -119,6 +119,21 @@ dependencies = [ "syn", ] +[[package]] +name = "bit-set" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + [[package]] name = "bit_field" version = "0.10.3" @@ -421,6 +436,7 @@ dependencies = [ name = "eurorust-2025-workshop" version = "0.1.0" dependencies = [ + "bit-set", "codspeed-divan-compat", "image", "image-compare", diff --git a/Cargo.toml b/Cargo.toml index f4172be..5eb089c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ path = "src/lib.rs" rand = "0.8" image = "0.25" image-compare = "0.5.0" +bit-set = "0.8.0" [dev-dependencies] divan = { version = "4.0.2", package = "codspeed-divan-compat" } diff --git a/src/bfs.rs b/src/bfs.rs index 44c251c..4c98a75 100644 --- a/src/bfs.rs +++ b/src/bfs.rs @@ -1,3 +1,4 @@ +use bit_set::BitSet; use std::collections::{HashSet, VecDeque}; /// A simple graph represented as an adjacency list @@ -51,7 +52,7 @@ pub fn bfs_naive(graph: &Graph, start: usize) -> Vec { } pub fn bfs_optimized(graph: &Graph, start: usize) -> Vec { - let mut visited = HashSet::new(); + let mut visited = BitSet::new(); let mut queue = VecDeque::new(); let mut result = Vec::new(); From 9f49fe5bf5d21844b4031b5e09148bc2fd393405 Mon Sep 17 00:00:00 2001 From: G Endx Date: Wed, 8 Oct 2025 10:04:09 +0000 Subject: [PATCH 05/13] Ignore vim swap files. --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 8d1533f..f459d1b 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,5 @@ genome.fasta *.jpg *.png + +*.swp From 22ebeedd411cd2ea10e943f04c43980c97eabaed Mon Sep 17 00:00:00 2001 From: G Endx Date: Wed, 8 Oct 2025 10:11:51 +0000 Subject: [PATCH 06/13] Implement look-up-tabled filters. --- src/lut_filters.rs | 52 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 49 insertions(+), 3 deletions(-) diff --git a/src/lut_filters.rs b/src/lut_filters.rs index a73068c..28a135b 100644 --- a/src/lut_filters.rs +++ b/src/lut_filters.rs @@ -18,11 +18,11 @@ use image::{ImageBuffer, Rgb, RgbImage}; pub fn apply_brightness_contrast(img: &RgbImage, brightness: i16, contrast: f32) -> RgbImage { - naive::apply_brightness_contrast(img, brightness, contrast) + optimized::apply_brightness_contrast(img, brightness, contrast) } pub fn apply_gamma(img: &RgbImage, gamma: f32) -> RgbImage { - naive::apply_gamma(img, gamma) + optimized::apply_gamma(img, gamma) } pub fn apply_brightness_contrast_gamma( @@ -32,9 +32,10 @@ pub fn apply_brightness_contrast_gamma( gamma: f32, ) -> RgbImage { let temp_img = apply_brightness_contrast(img, brightness, contrast); - naive::apply_gamma(&temp_img, gamma) + optimized::apply_gamma(&temp_img, gamma) } +#[allow(dead_code)] mod naive { use super::*; @@ -86,6 +87,51 @@ mod naive { } } +mod optimized { + use super::*; + + /// Apply brightness and contrast with floating-point math per pixel + pub fn apply_brightness_contrast(img: &RgbImage, brightness: i16, contrast: f32) -> RgbImage { + let (width, height) = img.dimensions(); + let mut output = ImageBuffer::new(width, height); + + let lut: [u8; 256] = std::array::from_fn(|x| { + (((x as f32 - 128.0) * (1.0 + contrast)) + 128.0 + brightness as f32).clamp(0.0, 255.0) + as u8 + }); + + for (x, y, pixel) in img.enumerate_pixels() { + let r = lut[pixel[0] as usize]; + let g = lut[pixel[1] as usize]; + let b = lut[pixel[2] as usize]; + + output.put_pixel(x, y, Rgb([r, g, b])); + } + + output + } + + /// Naive implementation: Apply gamma correction + /// This is VERY slow because powf() is expensive! + pub fn apply_gamma(img: &RgbImage, gamma: f32) -> RgbImage { + let (width, height) = img.dimensions(); + let mut output = ImageBuffer::new(width, height); + + let lut: [u8; 256] = + std::array::from_fn(|x| ((x as f32 / 255.0).powf(1.0 / gamma) * 255.0) as u8); + + for (x, y, pixel) in img.enumerate_pixels() { + let r = lut[pixel[0] as usize]; + let g = lut[pixel[1] as usize]; + let b = lut[pixel[2] as usize]; + + output.put_pixel(x, y, Rgb([r, g, b])); + } + + output + } +} + #[cfg(test)] mod tests { use super::*; From 2192fdcd3813ce6f4be1e5358c8237f88f2a6faa Mon Sep 17 00:00:00 2001 From: G Endx Date: Wed, 8 Oct 2025 10:26:14 +0000 Subject: [PATCH 07/13] Compute brightness_contrast_gamma in a single pass. --- src/lut_filters.rs | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/src/lut_filters.rs b/src/lut_filters.rs index 28a135b..b8b472c 100644 --- a/src/lut_filters.rs +++ b/src/lut_filters.rs @@ -31,8 +31,7 @@ pub fn apply_brightness_contrast_gamma( contrast: f32, gamma: f32, ) -> RgbImage { - let temp_img = apply_brightness_contrast(img, brightness, contrast); - optimized::apply_gamma(&temp_img, gamma) + optimized::apply_brightness_contrast_gamma(img, brightness, contrast, gamma) } #[allow(dead_code)] @@ -130,6 +129,33 @@ mod optimized { output } + + pub fn apply_brightness_contrast_gamma( + img: &RgbImage, + brightness: i16, + contrast: f32, + gamma: f32, + ) -> RgbImage { + let (width, height) = img.dimensions(); + let mut output = ImageBuffer::new(width, height); + + let lut: [u8; 256] = std::array::from_fn(|x| { + let brightness_contrast = + (((x as f32 - 128.0) * (1.0 + contrast)) + 128.0 + brightness as f32) + .clamp(0.0, 255.0) as u8; + ((brightness_contrast as f32 / 255.0).powf(1.0 / gamma) * 255.0) as u8 + }); + + for (x, y, pixel) in img.enumerate_pixels() { + let r = lut[pixel[0] as usize]; + let g = lut[pixel[1] as usize]; + let b = lut[pixel[2] as usize]; + + output.put_pixel(x, y, Rgb([r, g, b])); + } + + output + } } #[cfg(test)] From 01eeb865b25f936912a116e57c05f8ab76764374 Mon Sep 17 00:00:00 2001 From: G Endx Date: Wed, 8 Oct 2025 12:33:23 +0000 Subject: [PATCH 08/13] Filter with SIMD look-up-tables (Simd::gather_or_default). --- src/simd_filters.rs | 72 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 68 insertions(+), 4 deletions(-) diff --git a/src/simd_filters.rs b/src/simd_filters.rs index 6675943..5a52edd 100644 --- a/src/simd_filters.rs +++ b/src/simd_filters.rs @@ -1,11 +1,11 @@ use image::{ImageBuffer, Rgb, RgbImage}; pub fn apply_brightness_contrast(img: &RgbImage, brightness: i16, contrast: f32) -> RgbImage { - naive::apply_brightness_contrast(img, brightness, contrast) + optimized::apply_brightness_contrast(img, brightness, contrast) } pub fn apply_gamma(img: &RgbImage, gamma: f32) -> RgbImage { - naive::apply_gamma(img, gamma) + optimized::apply_gamma(img, gamma) } pub fn apply_brightness_contrast_gamma( @@ -14,10 +14,10 @@ pub fn apply_brightness_contrast_gamma( contrast: f32, gamma: f32, ) -> RgbImage { - let temp_img = apply_brightness_contrast(img, brightness, contrast); - naive::apply_gamma(&temp_img, gamma) + optimized::apply_brightness_contrast_gamma(img, brightness, contrast, gamma) } +#[allow(dead_code)] mod naive { use super::*; @@ -69,6 +69,70 @@ mod naive { } } +mod optimized { + use super::*; + + use std::simd::num::SimdUint; + use std::simd::{Simd, u8x16, usizex16}; + + pub fn apply_brightness_contrast(img: &RgbImage, brightness: i16, contrast: f32) -> RgbImage { + let lut: [u8; 256] = std::array::from_fn(|x| { + (((x as f32 - 128.0) * (1.0 + contrast)) + 128.0 + brightness as f32).clamp(0.0, 255.0) + as u8 + }); + + apply_lut(img, &lut) + } + + pub fn apply_gamma(img: &RgbImage, gamma: f32) -> RgbImage { + let lut: [u8; 256] = + std::array::from_fn(|x| ((x as f32 / 255.0).powf(1.0 / gamma) * 255.0) as u8); + + apply_lut(img, &lut) + } + + pub fn apply_brightness_contrast_gamma( + img: &RgbImage, + brightness: i16, + contrast: f32, + gamma: f32, + ) -> RgbImage { + let lut: [u8; 256] = std::array::from_fn(|x| { + let brightness_contrast = + (((x as f32 - 128.0) * (1.0 + contrast)) + 128.0 + brightness as f32) + .clamp(0.0, 255.0) as u8; + ((brightness_contrast as f32 / 255.0).powf(1.0 / gamma) * 255.0) as u8 + }); + + apply_lut(img, &lut) + } + + fn apply_lut(img: &RgbImage, lut: &[u8; 256]) -> RgbImage { + let (width, height) = img.dimensions(); + + let input = img.as_raw(); + let mut output = vec![0u8; input.len()]; + + // Process 16 bytes at a time + let chunks = input.chunks_exact(16); + let remainder = chunks.remainder(); + + for (i, chunk) in chunks.enumerate() { + let pixels = u8x16::from_slice(chunk); + let indices: usizex16 = pixels.cast(); + let result: u8x16 = Simd::gather_or_default(lut, indices); + result.copy_to_slice(&mut output[i * 16..(i + 1) * 16]); + } + + // Handle remaining bytes + for (i, &byte) in remainder.iter().enumerate() { + output[input.len() - remainder.len() + i] = lut[byte as usize]; + } + + ImageBuffer::from_raw(width, height, output).unwrap() + } +} + #[cfg(test)] mod tests { use super::*; From 75a238acb92e0e45388c532d4f063d3b637c059e Mon Sep 17 00:00:00 2001 From: G Endx Date: Wed, 8 Oct 2025 12:53:32 +0000 Subject: [PATCH 09/13] SIMD version of apply_brightness_contrast. That's actually slower than look-up tables. --- src/simd_filters.rs | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/src/simd_filters.rs b/src/simd_filters.rs index 5a52edd..56f3887 100644 --- a/src/simd_filters.rs +++ b/src/simd_filters.rs @@ -72,7 +72,7 @@ mod naive { mod optimized { use super::*; - use std::simd::num::SimdUint; + use std::simd::num::{SimdFloat, SimdUint}; use std::simd::{Simd, u8x16, usizex16}; pub fn apply_brightness_contrast(img: &RgbImage, brightness: i16, contrast: f32) -> RgbImage { @@ -81,7 +81,33 @@ mod optimized { as u8 }); - apply_lut(img, &lut) + let (width, height) = img.dimensions(); + + let input = img.as_raw(); + let mut output = vec![0u8; input.len()]; + + // Process LANES bytes at a time + const LANES: usize = 8; + + let chunks = input.chunks_exact(LANES); + let remainder = chunks.remainder(); + + for (i, chunk) in chunks.enumerate() { + let pixels: Simd = Simd::from_slice(chunk); + let pixels: Simd = pixels.cast(); + let adjusted = (pixels - Simd::splat(128.0)) * Simd::splat(1.0 + contrast) + + Simd::splat(128.0 + brightness as f32); + let clamped = adjusted.simd_clamp(Simd::splat(0.0), Simd::splat(255.0)); + let result: Simd = clamped.cast(); + result.copy_to_slice(&mut output[i * LANES..(i + 1) * LANES]); + } + + // Handle remaining bytes + for (i, &byte) in remainder.iter().enumerate() { + output[input.len() - remainder.len() + i] = lut[byte as usize]; + } + + ImageBuffer::from_raw(width, height, output).unwrap() } pub fn apply_gamma(img: &RgbImage, gamma: f32) -> RgbImage { From 954f02a75e2a69f5528ffa766d791129e1f7edca Mon Sep 17 00:00:00 2001 From: G Endx Date: Wed, 8 Oct 2025 13:20:19 +0000 Subject: [PATCH 10/13] DNA matcher: mess with itertools. --- Cargo.lock | 1 + Cargo.toml | 1 + benches/dna_matcher.rs | 2 +- src/dna_matcher.rs | 39 ++++++++++++++++++++++++++++++++++----- 4 files changed, 37 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9d0b430..534d93c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -440,6 +440,7 @@ dependencies = [ "codspeed-divan-compat", "image", "image-compare", + "itertools 0.14.0", "rand", ] diff --git a/Cargo.toml b/Cargo.toml index 5eb089c..57ca5ed 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,7 @@ rand = "0.8" image = "0.25" image-compare = "0.5.0" bit-set = "0.8.0" +itertools = "0.14.0" [dev-dependencies] divan = { version = "4.0.2", package = "codspeed-divan-compat" } diff --git a/benches/dna_matcher.rs b/benches/dna_matcher.rs index c955168..38c8037 100644 --- a/benches/dna_matcher.rs +++ b/benches/dna_matcher.rs @@ -11,7 +11,7 @@ fn dna_matcher() { ); let pattern = "AGTCCGTA"; - let matches = divan::black_box(naive_dna_matcher( + let matches = divan::black_box(dna_matcher_api( divan::black_box(&genome), divan::black_box(pattern), )); diff --git a/src/dna_matcher.rs b/src/dna_matcher.rs index d99c90e..85937ff 100644 --- a/src/dna_matcher.rs +++ b/src/dna_matcher.rs @@ -1,5 +1,12 @@ +use itertools::*; + +pub fn dna_matcher_api(genome: &str, pattern: &str) -> Vec { + optimized_dna_matcher_impl(genome, pattern) +} + /// Naive approach: Read the entire file as a string and filter lines -pub fn naive_dna_matcher(genome: &str, pattern: &str) -> Vec { +#[allow(dead_code)] +fn naive_dna_matcher_impl(genome: &str, pattern: &str) -> Vec { genome .lines() .filter(|line| !line.starts_with('>')) // Skip headers @@ -8,27 +15,49 @@ pub fn naive_dna_matcher(genome: &str, pattern: &str) -> Vec { .collect() } +fn optimized_dna_matcher_impl(genome: &str, pattern: &str) -> Vec { + std::iter::once(usize::MAX) + .chain(genome.as_bytes().iter().positions(|&c| c == b'\n')) + .chain(std::iter::once(genome.len())) + .tuple_windows() + .filter_map(|(start, end)| { + let line = if start == usize::MAX { + &genome[..end] + } else { + &genome[start + 1..end] + }; + if line.len() == 0 || line.as_bytes()[0] == b'>' { + None + } else { + Some(line) + } + }) + .filter(|line| line.contains(pattern)) + .map(|s| s.to_string()) + .collect() +} + #[cfg(test)] mod tests { use super::*; #[test] - fn test_naive_matcher() { + fn test_matcher() { let test_genome = ">seq1\nACGTACGT\n>seq2\nAGTCCGTAAA\n>seq3\nGGGGGG"; let pattern = "AGTCCGTA"; - let matches = naive_dna_matcher(test_genome, pattern); + let matches = dna_matcher_api(test_genome, pattern); assert_eq!(matches.len(), 1); assert_eq!(matches[0], "AGTCCGTAAA"); } #[test] - fn test_naive_matcher_on_genome_file() { + fn test_matcher_on_genome_file() { // Read the actual genome.fasta file let genome = std::fs::read_to_string("genome.fasta") .expect("Failed to read genome.fasta\n\n Make sure to run 'cargo run --release --bin generate_fasta'"); let pattern = "AGTCCGTA"; - let matches = naive_dna_matcher(&genome, pattern); + let matches = dna_matcher_api(&genome, pattern); // With fixed seed (42), we should always get exactly 4927 matches assert_eq!( From 9acd252589c6d33c21feffdb2a7ccdbb265bb7b5 Mon Sep 17 00:00:00 2001 From: G Endx Date: Wed, 8 Oct 2025 13:34:06 +0000 Subject: [PATCH 11/13] DNA matcher: use Rayon. --- Cargo.lock | 1 + Cargo.toml | 1 + src/dna_matcher.rs | 18 +++++++++++++++--- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 534d93c..f049cc9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -442,6 +442,7 @@ dependencies = [ "image-compare", "itertools 0.14.0", "rand", + "rayon", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 57ca5ed..ae46fe9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,7 @@ image = "0.25" image-compare = "0.5.0" bit-set = "0.8.0" itertools = "0.14.0" +rayon = "1.11.0" [dev-dependencies] divan = { version = "4.0.2", package = "codspeed-divan-compat" } diff --git a/src/dna_matcher.rs b/src/dna_matcher.rs index 85937ff..a5b5320 100644 --- a/src/dna_matcher.rs +++ b/src/dna_matcher.rs @@ -1,5 +1,3 @@ -use itertools::*; - pub fn dna_matcher_api(genome: &str, pattern: &str) -> Vec { optimized_dna_matcher_impl(genome, pattern) } @@ -15,7 +13,10 @@ fn naive_dna_matcher_impl(genome: &str, pattern: &str) -> Vec { .collect() } -fn optimized_dna_matcher_impl(genome: &str, pattern: &str) -> Vec { +#[allow(dead_code)] +fn itertools_dna_matcher_impl(genome: &str, pattern: &str) -> Vec { + use itertools::*; + std::iter::once(usize::MAX) .chain(genome.as_bytes().iter().positions(|&c| c == b'\n')) .chain(std::iter::once(genome.len())) @@ -37,6 +38,17 @@ fn optimized_dna_matcher_impl(genome: &str, pattern: &str) -> Vec { .collect() } +fn optimized_dna_matcher_impl(genome: &str, pattern: &str) -> Vec { + use rayon::prelude::*; + + genome + .par_lines() + .filter(|line| !line.starts_with('>')) // Skip headers + .filter(|line| line.contains(pattern)) + .map(|s| s.to_string()) + .collect() +} + #[cfg(test)] mod tests { use super::*; From 3bff0a747529c897790ef4f22ddfd519977d7541 Mon Sep 17 00:00:00 2001 From: G Endx Date: Wed, 8 Oct 2025 13:53:05 +0000 Subject: [PATCH 12/13] Use memchr crate. --- Cargo.lock | 1 + Cargo.toml | 1 + src/dna_matcher.rs | 19 +++++++++++++++++-- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f049cc9..9f4f8c9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -441,6 +441,7 @@ dependencies = [ "image", "image-compare", "itertools 0.14.0", + "memchr", "rand", "rayon", ] diff --git a/Cargo.toml b/Cargo.toml index ae46fe9..adcd193 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,6 +14,7 @@ image-compare = "0.5.0" bit-set = "0.8.0" itertools = "0.14.0" rayon = "1.11.0" +memchr = "2.7.6" [dev-dependencies] divan = { version = "4.0.2", package = "codspeed-divan-compat" } diff --git a/src/dna_matcher.rs b/src/dna_matcher.rs index a5b5320..53f04c1 100644 --- a/src/dna_matcher.rs +++ b/src/dna_matcher.rs @@ -1,5 +1,5 @@ pub fn dna_matcher_api(genome: &str, pattern: &str) -> Vec { - optimized_dna_matcher_impl(genome, pattern) + optimized_dna_matcher_impl(genome.as_bytes(), pattern.as_bytes()) } /// Naive approach: Read the entire file as a string and filter lines @@ -38,7 +38,8 @@ fn itertools_dna_matcher_impl(genome: &str, pattern: &str) -> Vec { .collect() } -fn optimized_dna_matcher_impl(genome: &str, pattern: &str) -> Vec { +#[allow(dead_code)] +fn rayon_dna_matcher_impl(genome: &str, pattern: &str) -> Vec { use rayon::prelude::*; genome @@ -49,6 +50,20 @@ fn optimized_dna_matcher_impl(genome: &str, pattern: &str) -> Vec { .collect() } +fn optimized_dna_matcher_impl(genome: &[u8], pattern: &[u8]) -> Vec { + use memchr::memmem; + use rayon::prelude::*; + + let finder = memmem::Finder::new(pattern); + + genome + .par_split(|&c| c == b'\n') + .filter(|line| line.first().map_or(false, |&c| c != b'>')) // Skip headers and empty lines + .filter(|line| finder.find(line).is_some()) + .map(|s| std::str::from_utf8(s).unwrap().to_string()) + .collect() +} + #[cfg(test)] mod tests { use super::*; From c4dfef85e286c0d5757d91530f992d07b319f0f2 Mon Sep 17 00:00:00 2001 From: G Endx Date: Wed, 8 Oct 2025 14:02:13 +0000 Subject: [PATCH 13/13] Use memmap2 crate. --- Cargo.lock | 17 +++++++++++++++++ Cargo.toml | 2 ++ benches/dna_matcher.rs | 11 +++++++++-- src/dna_matcher.rs | 8 ++++---- 4 files changed, 32 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9f4f8c9..b543d22 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -176,6 +176,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495" +[[package]] +name = "bytes" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" + [[package]] name = "cc" version = "1.2.40" @@ -437,11 +443,13 @@ name = "eurorust-2025-workshop" version = "0.1.0" dependencies = [ "bit-set", + "bytes", "codspeed-divan-compat", "image", "image-compare", "itertools 0.14.0", "memchr", + "memmap2", "rand", "rayon", ] @@ -768,6 +776,15 @@ version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" +[[package]] +name = "memmap2" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843a98750cd611cc2965a8213b53b43e715f13c37a9e096c6408e69990961db7" +dependencies = [ + "libc", +] + [[package]] name = "minimal-lexical" version = "0.2.1" diff --git a/Cargo.toml b/Cargo.toml index adcd193..9e9ca79 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,8 @@ bit-set = "0.8.0" itertools = "0.14.0" rayon = "1.11.0" memchr = "2.7.6" +memmap2 = "0.9.8" +bytes = "1.10.1" [dev-dependencies] divan = { version = "4.0.2", package = "codspeed-divan-compat" } diff --git a/benches/dna_matcher.rs b/benches/dna_matcher.rs index 38c8037..4d5a302 100644 --- a/benches/dna_matcher.rs +++ b/benches/dna_matcher.rs @@ -6,13 +6,20 @@ fn main() { #[divan::bench(sample_count = 2, sample_size = 3)] fn dna_matcher() { - let genome = std::fs::read_to_string("genome.fasta").expect( + use bytes::Bytes; + use memmap2::Mmap; + use std::fs::File; + use std::ops::Deref; + + let file = File::open("genome.fasta").expect( "Failed to read genome.fasta\n\n Make sure to run 'cargo run --release --bin generate_fasta'", ); + let mmap = unsafe { Mmap::map(&file).unwrap() }; + let genome = Bytes::from_owner(mmap); let pattern = "AGTCCGTA"; let matches = divan::black_box(dna_matcher_api( - divan::black_box(&genome), + divan::black_box(genome.deref()), divan::black_box(pattern), )); diff --git a/src/dna_matcher.rs b/src/dna_matcher.rs index 53f04c1..307d270 100644 --- a/src/dna_matcher.rs +++ b/src/dna_matcher.rs @@ -1,5 +1,5 @@ -pub fn dna_matcher_api(genome: &str, pattern: &str) -> Vec { - optimized_dna_matcher_impl(genome.as_bytes(), pattern.as_bytes()) +pub fn dna_matcher_api(genome: &[u8], pattern: &str) -> Vec { + optimized_dna_matcher_impl(genome, pattern.as_bytes()) } /// Naive approach: Read the entire file as a string and filter lines @@ -70,7 +70,7 @@ mod tests { #[test] fn test_matcher() { - let test_genome = ">seq1\nACGTACGT\n>seq2\nAGTCCGTAAA\n>seq3\nGGGGGG"; + let test_genome = b">seq1\nACGTACGT\n>seq2\nAGTCCGTAAA\n>seq3\nGGGGGG"; let pattern = "AGTCCGTA"; let matches = dna_matcher_api(test_genome, pattern); assert_eq!(matches.len(), 1); @@ -84,7 +84,7 @@ mod tests { .expect("Failed to read genome.fasta\n\n Make sure to run 'cargo run --release --bin generate_fasta'"); let pattern = "AGTCCGTA"; - let matches = dna_matcher_api(&genome, pattern); + let matches = dna_matcher_api(genome.as_bytes(), pattern); // With fixed seed (42), we should always get exactly 4927 matches assert_eq!(