diff --git a/Cargo.lock b/Cargo.lock
index c337c95..7054777 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -424,7 +424,10 @@ dependencies = [
  "codspeed-divan-compat",
  "image",
  "image-compare",
+ "memchr",
+ "memmap2",
  "rand",
+ "rayon",
 ]
 
 [[package]]
@@ -749,6 +752,15 @@ version = "2.7.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
 
+[[package]]
+name = "memmap2"
+version = "0.9.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "843a98750cd611cc2965a8213b53b43e715f13c37a9e096c6408e69990961db7"
+dependencies = [
+ "libc",
+]
+
 [[package]]
 name = "minimal-lexical"
 version = "0.2.1"
diff --git a/Cargo.toml b/Cargo.toml
index f4172be..8ff3f4b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -11,6 +11,9 @@ path = "src/lib.rs"
 rand = "0.8"
 image = "0.25"
 image-compare = "0.5.0"
+memmap2 = "0.9"
+rayon = "1.10"
+memchr = "2.7"
 
 [dev-dependencies]
 divan = { version = "4.0.2", package = "codspeed-divan-compat" }
diff --git a/benches/blob_corruption_checker.rs b/benches/blob_corruption_checker.rs
index 3ad54d6..9e74386 100644
--- a/benches/blob_corruption_checker.rs
+++ b/benches/blob_corruption_checker.rs
@@ -1,5 +1,5 @@
 use divan::Bencher;
-use eurorust_2025_workshop::blob_corruption_checker::find_corruptions_sequential;
+use eurorust_2025_workshop::blob_corruption_checker::find_corruptions_parallel;
 
 fn main() {
     divan::main();
@@ -8,7 +8,7 @@ fn main() {
 #[divan::bench(sample_count = 3, sample_size = 5)]
 fn corruption_check(bencher: Bencher) {
     bencher.bench_local(|| {
-        let corruptions = divan::black_box(find_corruptions_sequential(
+        let corruptions = divan::black_box(find_corruptions_parallel(
             "reference.bin",
             "corrupted.bin",
             1024, // 1KB chunks
@@ -18,14 +18,25 @@ fn corruption_check(bencher: Bencher) {
 
         // All corruptions should be 1KB aligned
         for corruption in &corruptions {
-            assert_eq!(corruption.offset % 1024, 0, "Corruption offset should be 1KB aligned");
-            assert_eq!(corruption.length % 1024, 0, "Corruption length should be multiple of 1KB");
+            assert_eq!(
+                corruption.offset % 1024,
+                0,
+                "Corruption offset should be 1KB aligned"
+            );
+            assert_eq!(
+                corruption.length % 1024,
+                0,
+                "Corruption length should be multiple of 1KB"
+            );
         }
 
         // Check specific corruptions
         assert_eq!(corruptions[0].offset, 14801920, "First corruption offset");
         assert_eq!(corruptions[0].length, 2048, "First corruption length");
-        assert_eq!(corruptions[25].offset, 243891200, "Middle corruption offset");
+        assert_eq!(
+            corruptions[25].offset, 243891200,
+            "Middle corruption offset"
+        );
         assert_eq!(corruptions[25].length, 4096, "Middle corruption length");
         assert_eq!(corruptions[49].offset, 507871232, "Last corruption offset");
         assert_eq!(corruptions[49].length, 5120, "Last corruption length");
diff --git a/benches/dna_matcher.rs b/benches/dna_matcher.rs
index c955168..6011a37 100644
--- a/benches/dna_matcher.rs
+++ b/benches/dna_matcher.rs
@@ -11,7 +11,7 @@ fn dna_matcher() {
     );
     let pattern = "AGTCCGTA";
 
-    let matches = divan::black_box(naive_dna_matcher(
+    let matches = divan::black_box(exported_dna_matcher(
         divan::black_box(&genome),
         divan::black_box(pattern),
     ));
diff --git a/benches/lut_grayscale_bench.rs b/benches/lut_grayscale_bench.rs
index 5816569..64add8b 100644
--- a/benches/lut_grayscale_bench.rs
+++ b/benches/lut_grayscale_bench.rs
@@ -1,5 +1,5 @@
 use eurorust_2025_workshop::lut_grayscale::*;
-use image::{RgbImage};
+use image::RgbImage;
 
 fn main() {
     divan::main();
diff --git a/src/bfs.rs b/src/bfs.rs
index 487fddc..938102d 100644
--- a/src/bfs.rs
+++ b/src/bfs.rs
@@ -1,4 +1,4 @@
-use std::collections::HashSet;
+use std::collections::VecDeque;
 
 /// A simple graph represented as an adjacency list
 #[derive(Debug, Clone)]
@@ -26,22 +26,23 @@ impl Graph {
 /// Naive BFS implementation using Vec as a queue (intentionally slow)
 /// Returns the order in which nodes were visited
 pub fn bfs_naive(graph: &Graph, start: usize) -> Vec<usize> {
-    let mut visited = HashSet::new();
-    let mut queue = Vec::new(); // Using Vec instead of VecDeque - intentionally inefficient!
-    let mut result = Vec::new();
+    let mut visited = vec![false; graph.num_nodes()];
+    let mut queue = VecDeque::new(); // Using VecDeque for efficient FIFO queue
+    let mut result = Vec::with_capacity(graph.num_nodes());
 
-    queue.push(start);
-    visited.insert(start);
+    queue.push_back(start);
+    visited[start] = true;
 
     while !queue.is_empty() {
-        // remove(0) is O(n) - this makes BFS slow!
-        let node = queue.remove(0);
+        // pop_front() is O(1) - this makes BFS efficient!
+        let node = queue.pop_front().unwrap();
         result.push(node);
 
         if let Some(neighbors) = graph.adjacency.get(node) {
             for &neighbor in neighbors {
-                if visited.insert(neighbor) {
-                    queue.push(neighbor);
+                if !visited[neighbor] {
+                    visited[neighbor] = true;
+                    queue.push_back(neighbor);
                 }
             }
         }
diff --git a/src/blob_corruption_checker.rs b/src/blob_corruption_checker.rs
index 2515c20..863e750 100644
--- a/src/blob_corruption_checker.rs
+++ b/src/blob_corruption_checker.rs
@@ -1,3 +1,5 @@
+use memmap2::Mmap;
+use rayon::prelude::*;
 use std::fs::File;
 use std::io::{BufReader, Read};
 
@@ -60,6 +62,85 @@ pub fn find_corruptions_sequential(
     corruptions
 }
 
+pub fn find_corruptions_parallel(
+    reference_path: &str,
+    corrupted_path: &str,
+    chunk_size: usize,
+) -> Vec<Corruption> {
+    // Memory map both files
+    let ref_file = File::open(reference_path).unwrap();
+    let corrupt_file = File::open(corrupted_path).unwrap();
+
+    // it is fine to use unsafe here since the files are not modified while mapped
+    let ref_mmap = unsafe { Mmap::map(&ref_file).unwrap() };
+    let corrupt_mmap = unsafe { Mmap::map(&corrupt_file).unwrap() };
+
+    let file_size = ref_mmap.len();
+
+    // Divide the file into chunks and process in parallel
+    let num_chunks = (file_size + chunk_size - 1) / chunk_size;
+
+    // Use fold/reduce pattern to stream and merge results
+    let corruptions = (0..num_chunks)
+        .into_par_iter()
+        .fold(
+            Vec::new,
+            |mut acc: Vec<Corruption>, chunk_idx| {
+                let offset = chunk_idx * chunk_size;
+                let end = std::cmp::min(offset + chunk_size, file_size);
+                let len = end - offset;
+
+                let ref_chunk = &ref_mmap[offset..end];
+                let corrupt_chunk = &corrupt_mmap[offset..end];
+
+                if ref_chunk != corrupt_chunk {
+                    let corruption = Corruption {
+                        offset: offset as u64,
+                        length: len as u64,
+                    };
+
+                    // Try to merge with the last corruption in this thread's accumulator
+                    if let Some(last) = acc.last_mut() {
+                        if last.offset + last.length == corruption.offset {
+                            last.length += corruption.length;
+                        } else {
+                            acc.push(corruption);
+                        }
+                    } else {
+                        acc.push(corruption);
+                    }
+                }
+
+                acc
+            },
+        )
+        .reduce(Vec::new, |mut a, b| {
+            // Merge two vectors of corruptions
+            if a.is_empty() {
+                return b;
+            }
+            if b.is_empty() {
+                return a;
+            }
+
+            // Check if we can merge the last of 'a' with the first of 'b'
+            let last_a = a.last_mut().unwrap();
+            let mut b_iter = b.into_iter();
+            let first_b = b_iter.next().unwrap();
+
+            if last_a.offset + last_a.length == first_b.offset {
+                last_a.length += first_b.length;
+            } else {
+                a.push(first_b);
+            }
+
+            a.extend(b_iter);
+            a
+        });
+
+    corruptions
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -92,10 +173,39 @@ mod tests {
             "Middle corruption offset"
         );
         assert_eq!(corruptions[25].length, 4096, "Middle corruption length");
+        assert_eq!(corruptions[49].offset, 507871232, "Last corruption offset");
+        assert_eq!(corruptions[49].length, 5120, "Last corruption length");
+    }
+
+    #[test]
+    fn test_find_corruptions_parallel() {
+        let corruptions = find_corruptions_parallel("reference.bin", "corrupted.bin", 1024);
+
+        assert_eq!(corruptions.len(), 50, "Should find 50 corruptions");
+
+        // All corruptions should be 1KB aligned
+        for corruption in &corruptions {
+            assert_eq!(
+                corruption.offset % 1024,
+                0,
+                "Corruption offset should be 1KB aligned"
+            );
+            assert_eq!(
+                corruption.length % 1024,
+                0,
+                "Corruption length should be multiple of 1KB"
+            );
+        }
+
+        // Check specific corruptions
+        assert_eq!(corruptions[0].offset, 14801920, "First corruption offset");
+        assert_eq!(corruptions[0].length, 2048, "First corruption length");
         assert_eq!(
-            corruptions[49].offset, 507871232,
-            "Last corruption offset"
+            corruptions[25].offset, 243891200,
+            "Middle corruption offset"
         );
+        assert_eq!(corruptions[25].length, 4096, "Middle corruption length");
+        assert_eq!(corruptions[49].offset, 507871232, "Last corruption offset");
         assert_eq!(corruptions[49].length, 5120, "Last corruption length");
     }
 }
diff --git a/src/dna_matcher.rs b/src/dna_matcher.rs
index d99c90e..582bd62 100644
--- a/src/dna_matcher.rs
+++ b/src/dna_matcher.rs
@@ -1,13 +1,97 @@
+use rayon::prelude::*;
+use memchr::memmem;
+
+pub fn exported_dna_matcher(genome: &str, pattern: &str) -> Vec<String> {
+    chunked_dna_matcher(genome, pattern)
+}
+
 /// Naive approach: Read the entire file as a string and filter lines
-pub fn naive_dna_matcher(genome: &str, pattern: &str) -> Vec<String> {
+fn naive_dna_matcher(genome: &str, pattern: &str) -> Vec<String> {
     genome
-        .lines()
+        .par_lines()
         .filter(|line| !line.starts_with('>')) // Skip headers
         .filter(|line| line.contains(pattern))
         .map(|s| s.to_string())
         .collect()
 }
 
+/// Chunked approach: Process genome in parallel byte chunks
+fn chunked_dna_matcher(genome: &str, pattern: &str) -> Vec<String> {
+    let pattern_bytes = pattern.as_bytes();
+    let genome_bytes = genome.as_bytes();
+    let finder = memmem::Finder::new(pattern_bytes);
+
+    // Chunk size: balance between parallelism and overhead
+    // Aim for ~1000 lines per chunk, with typical DNA line length of 60-80 chars
+    let chunk_size = 64 * 1024; // 64KB per chunk
+    let total_len = genome_bytes.len();
+
+    // Find chunk boundaries that align with line boundaries
+    let mut chunk_starts = vec![0];
+    let mut pos = chunk_size;
+
+    while pos < total_len {
+        // Find the next newline after pos
+        let search_start = pos;
+        let search_end = std::cmp::min(pos + 1024, total_len); // Look ahead up to 1KB for newline
+
+        if let Some(newline_offset) = memchr::memchr(b'\n', &genome_bytes[search_start..search_end]) {
+            chunk_starts.push(search_start + newline_offset + 1);
+            pos = search_start + newline_offset + 1 + chunk_size;
+        } else {
+            // No newline found, just use the current position
+            chunk_starts.push(pos);
+            pos += chunk_size;
+        }
+    }
+    chunk_starts.push(total_len);
+
+    // Process chunks in parallel
+    let matches: Vec<String> = (0..chunk_starts.len() - 1)
+        .into_par_iter()
+        .flat_map(|i| {
+            let chunk_start = chunk_starts[i];
+            let chunk_end = chunk_starts[i + 1];
+            let chunk = &genome_bytes[chunk_start..chunk_end];
+
+            let mut local_matches = Vec::new();
+            let mut line_start = 0;
+
+            // Use memchr_iter for faster newline finding
+            for newline_pos in memchr::memchr_iter(b'\n', chunk) {
+                let line = &chunk[line_start..newline_pos];
+                line_start = newline_pos + 1;
+
+                // Skip headers and empty lines
+                if !line.is_empty() && line[0] != b'>' {
+                    // Use memmem for fast substring search
+                    if finder.find(line).is_some() {
+                        // SAFETY: DNA sequences are ASCII-only, so we can skip UTF-8 validation
+                        let line_str = unsafe { std::str::from_utf8_unchecked(line) };
+                        local_matches.push(line_str.to_string());
+                    }
+                }
+            }
+
+            // Handle last line if chunk doesn't end with newline
+            if line_start < chunk.len() {
+                let line = &chunk[line_start..];
+                if !line.is_empty() && line[0] != b'>' {
+                    if finder.find(line).is_some() {
+                        // SAFETY: DNA sequences are ASCII-only, so we can skip UTF-8 validation
+                        let line_str = unsafe { std::str::from_utf8_unchecked(line) };
+                        local_matches.push(line_str.to_string());
+                    }
+                }
+            }
+
+            local_matches
+        })
+        .collect();
+
+    matches
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -16,7 +100,7 @@ mod tests {
     fn test_naive_matcher() {
         let test_genome = ">seq1\nACGTACGT\n>seq2\nAGTCCGTAAA\n>seq3\nGGGGGG";
         let pattern = "AGTCCGTA";
-        let matches = naive_dna_matcher(test_genome, pattern);
+        let matches = exported_dna_matcher(test_genome, pattern);
         assert_eq!(matches.len(), 1);
         assert_eq!(matches[0], "AGTCCGTAAA");
     }
@@ -28,7 +112,7 @@ mod tests {
             .expect("Failed to read genome.fasta\n\n Make sure to run 'cargo run --release --bin generate_fasta'");
         let pattern = "AGTCCGTA";
 
-        let matches = naive_dna_matcher(&genome, pattern);
+        let matches = exported_dna_matcher(&genome, pattern);
 
         // With fixed seed (42), we should always get exactly 4927 matches
         assert_eq!(
diff --git a/src/lut_filters.rs b/src/lut_filters.rs
index a73068c..974a12e 100644
--- a/src/lut_filters.rs
+++ b/src/lut_filters.rs
@@ -31,8 +31,29 @@ pub fn apply_brightness_contrast_gamma(
     contrast: f32,
     gamma: f32,
 ) -> RgbImage {
-    let temp_img = apply_brightness_contrast(img, brightness, contrast);
-    naive::apply_gamma(&temp_img, gamma)
+    let (width, height) = img.dimensions();
+    let mut output = ImageBuffer::new(width, height);
+
+    // precompute two lookup tables at once
+    let mut brightness_table = [0u8; 256]; // pixels are u8 (0-255)
+    let mut gamma_table = [0u8; 256]; // pixels are u8 (0-255)
+
+    for i in 0..256 {
+        brightness_table[i] = (((i as f32 - 128.0) * (1.0 + contrast)) + 128.0 + brightness as f32)
+            .clamp(0.0, 255.0) as u8;
+        gamma_table[i] = ((i as f32 / 255.0).powf(1.0 / gamma) * 255.0).clamp(0.0, 255.0) as u8;
+    }
+
+    // apply first the brightness/contrast, then gamma
+    for (x, y, pixel) in img.enumerate_pixels() {
+        let r = gamma_table[brightness_table[pixel[0] as usize] as usize] as u8;
+        let g = gamma_table[brightness_table[pixel[1] as usize] as usize] as u8;
+        let b = gamma_table[brightness_table[pixel[2] as usize] as usize] as u8;
+
+        output.put_pixel(x, y, Rgb([r, g, b]));
+    }
+
+    output
 }
 
 mod naive {
@@ -43,23 +64,26 @@ mod naive {
         let (width, height) = img.dimensions();
         let mut output = ImageBuffer::new(width, height);
 
-        for (x, y, pixel) in img.enumerate_pixels() {
-            let r = pixel[0] as f32;
-            let g = pixel[1] as f32;
-            let b = pixel[2] as f32;
+        let mut brightness_table = [0u8; 256]; // pixels are u8 (0-255)
+
+        // Precompute brightness table
+        for i in 0..256 {
+            let toto = ((i as f32 - 128.0) * (1.0 + contrast)) + 128.0 + brightness as f32;
+            brightness_table[i] = toto.clamp(0.0, 255.0) as u8;
+        }
 
-            // Apply contrast and brightness (5 FP ops per channel!)
-            let r = ((r - 128.0) * (1.0 + contrast)) + 128.0 + brightness as f32;
-            let g = ((g - 128.0) * (1.0 + contrast)) + 128.0 + brightness as f32;
-            let b = ((b - 128.0) * (1.0 + contrast)) + 128.0 + brightness as f32;
+        for (x, y, pixel) in img.enumerate_pixels() {
+            let r = pixel[0];
+            let g = pixel[1];
+            let b = pixel[2];
 
             output.put_pixel(
                 x,
                 y,
                 Rgb([
-                    r.clamp(0.0, 255.0) as u8,
-                    g.clamp(0.0, 255.0) as u8,
-                    b.clamp(0.0, 255.0) as u8,
+                    brightness_table[r as usize],
+                    brightness_table[g as usize],
+                    brightness_table[b as usize],
                 ]),
             );
         }
@@ -67,17 +91,20 @@ mod naive {
         output
     }
 
-    /// Naive implementation: Apply gamma correction
-    /// This is VERY slow because powf() is expensive!
     pub fn apply_gamma(img: &RgbImage, gamma: f32) -> RgbImage {
         let (width, height) = img.dimensions();
         let mut output = ImageBuffer::new(width, height);
 
+        let mut gamma_table = [0u8; 256]; // pixels are u8 (0-255)
+        // Precompute gamma table
+        for i in 0..256 {
+            gamma_table[i] = ((i as f32 / 255.0).powf(1.0 / gamma) * 255.0).clamp(0.0, 255.0) as u8;
+        }
+
         for (x, y, pixel) in img.enumerate_pixels() {
-            // powf() is VERY expensive - this is why we need a LUT!
-            let r = (pixel[0] as f32 / 255.0).powf(1.0 / gamma) * 255.0;
-            let g = (pixel[1] as f32 / 255.0).powf(1.0 / gamma) * 255.0;
-            let b = (pixel[2] as f32 / 255.0).powf(1.0 / gamma) * 255.0;
+            let r = gamma_table[pixel[0] as usize];
+            let g = gamma_table[pixel[1] as usize];
+            let b = gamma_table[pixel[2] as usize];
 
             output.put_pixel(x, y, Rgb([r as u8, g as u8, b as u8]));
         }