From e1f196459c31128177eb2c0e4619578f19523659 Mon Sep 17 00:00:00 2001 From: Brian Schwind Date: Sun, 18 Apr 2021 02:18:02 +0900 Subject: [PATCH 01/11] Make defilter() iterate over all bytes in a scanline instead of calling it every byte --- src/lib.rs | 93 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 51 insertions(+), 42 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 072d254..8853954 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -641,46 +641,56 @@ fn read_chunk(bytes: &[u8]) -> Result { fn defilter( filter_type: FilterType, bytes_per_pixel: usize, - x: usize, - current_scanline: &[u8], + bytes_per_scanline: usize, + current_scanline: &mut [u8], last_scanline: &[u8], -) -> u8 { +) { match filter_type { - FilterType::None => current_scanline[x], + FilterType::None => {}, FilterType::Sub => { - if let Some(idx) = x.checked_sub(bytes_per_pixel) { - current_scanline[x].wrapping_add(current_scanline[idx]) - } else { - current_scanline[x] + for x in 0..(bytes_per_scanline) { + if let Some(idx) = x.checked_sub(bytes_per_pixel) { + current_scanline[x] = current_scanline[x].wrapping_add(current_scanline[idx]); + } + } + }, + FilterType::Up => { + for x in 0..(bytes_per_scanline) { + current_scanline[x] = current_scanline[x].wrapping_add(last_scanline[x]); } }, - FilterType::Up => current_scanline[x].wrapping_add(last_scanline[x]), FilterType::Average => { - let raw_val = if let Some(idx) = x.checked_sub(bytes_per_pixel) { - current_scanline[idx] - } else { - 0 - }; + for x in 0..(bytes_per_scanline) { + let raw_val = if let Some(idx) = x.checked_sub(bytes_per_pixel) { + current_scanline[idx] + } else { + 0 + }; - (current_scanline[x] as u16 + ((raw_val as u16 + last_scanline[x] as u16) / 2)) as u8 + current_scanline[x] = (current_scanline[x] as u16 + + ((raw_val as u16 + last_scanline[x] as u16) / 2)) + as u8; + } }, FilterType::Paeth => { - if let Some(idx) = x.checked_sub(bytes_per_pixel) { - let left = current_scanline[idx]; - let above = last_scanline[x]; - let upper_left = last_scanline[idx]; + for x in 0..(bytes_per_scanline) { + if let Some(idx) = x.checked_sub(bytes_per_pixel) { + let left = current_scanline[idx]; + let above = last_scanline[x]; + let upper_left = last_scanline[idx]; - let predictor = paeth_predictor(left as i16, above as i16, upper_left as i16); + let predictor = paeth_predictor(left as i16, above as i16, upper_left as i16); - current_scanline[x].wrapping_add(predictor) - } else { - let left = 0; - let above = last_scanline[x]; - let upper_left = 0; + current_scanline[x] = current_scanline[x].wrapping_add(predictor); + } else { + let left = 0; + let above = last_scanline[x]; + let upper_left = 0; - let predictor = paeth_predictor(left as i16, above as i16, upper_left as i16); + let predictor = paeth_predictor(left as i16, above as i16, upper_left as i16); - current_scanline[x].wrapping_add(predictor) + current_scanline[x] = current_scanline[x].wrapping_add(predictor); + } } }, } @@ -717,11 +727,13 @@ fn process_scanlines( let current_scanline = &mut scanline_data[cursor..(cursor + bytes_per_scanline)]; - for x in 0..(bytes_per_scanline) { - let unfiltered_byte = - defilter(filter_type, bytes_per_pixel, x, current_scanline, &last_scanline); - current_scanline[x] = unfiltered_byte; - } + defilter( + filter_type, + bytes_per_pixel, + bytes_per_scanline, + current_scanline, + &last_scanline, + ); let scanline_iter = ScanlineIterator::new( header.width, @@ -828,16 +840,13 @@ fn process_scanlines( let current_scanline = &mut scanline_data[cursor..(cursor + bytes_per_scanline)]; - for x in 0..(bytes_per_scanline) { - let unfiltered_byte = defilter( - filter_type, - bytes_per_pixel, - x, - current_scanline, - &last_scanline, - ); - current_scanline[x] = unfiltered_byte; - } + defilter( + filter_type, + bytes_per_pixel, + bytes_per_scanline, + current_scanline, + &last_scanline, + ); let scanline_iter = ScanlineIterator::new( pass_width, From 01c37e3f0b149790ceb168b522472e14340c5146 Mon Sep 17 00:00:00 2001 From: Brian Schwind Date: Tue, 20 Apr 2021 09:39:49 +0900 Subject: [PATCH 02/11] Remove conditional in Sub filter --- src/lib.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 8853954..be068a3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -648,10 +648,9 @@ fn defilter( match filter_type { FilterType::None => {}, FilterType::Sub => { - for x in 0..(bytes_per_scanline) { - if let Some(idx) = x.checked_sub(bytes_per_pixel) { - current_scanline[x] = current_scanline[x].wrapping_add(current_scanline[idx]); - } + for x in bytes_per_pixel..(bytes_per_scanline) { + let idx = x - bytes_per_pixel; + current_scanline[x] = current_scanline[x].wrapping_add(current_scanline[idx]); } }, FilterType::Up => { From ea0908da21d69e29ad3166de024b8889a051af0a Mon Sep 17 00:00:00 2001 From: Brian Schwind Date: Tue, 20 Apr 2021 09:55:19 +0900 Subject: [PATCH 03/11] Add some performance printing code --- src/lib.rs | 40 ++++++++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index be068a3..1952835 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -659,12 +659,12 @@ fn defilter( } }, FilterType::Average => { - for x in 0..(bytes_per_scanline) { - let raw_val = if let Some(idx) = x.checked_sub(bytes_per_pixel) { - current_scanline[idx] - } else { - 0 - }; + for x in 0..(bytes_per_pixel) { + current_scanline[x] = + (current_scanline[x] as u16 + ((last_scanline[x] as u16) / 2)) as u8; + } + for x in bytes_per_pixel..(bytes_per_scanline) { + let raw_val = current_scanline[x - bytes_per_pixel]; current_scanline[x] = (current_scanline[x] as u16 + ((raw_val as u16 + last_scanline[x] as u16) / 2)) @@ -719,6 +719,9 @@ fn process_scanlines( let mut last_scanline = vec![0u8; bytes_per_scanline]; + let mut total_defilter = std::time::Duration::from_secs(0); + let mut total_scanline = std::time::Duration::from_secs(0); + for y in 0..header.height { let filter_type = FilterType::try_from(scanline_data[cursor]) .map_err(|_| DecodeError::InvalidFilterType)?; @@ -726,6 +729,7 @@ fn process_scanlines( let current_scanline = &mut scanline_data[cursor..(cursor + bytes_per_scanline)]; + let now = std::time::Instant::now(); defilter( filter_type, bytes_per_pixel, @@ -734,6 +738,9 @@ fn process_scanlines( &last_scanline, ); + total_defilter += now.elapsed(); + + let now = std::time::Instant::now(); let scanline_iter = ScanlineIterator::new( header.width, pixel_type, @@ -755,9 +762,14 @@ fn process_scanlines( output_rgba[output_idx + 3] = a; } + total_scanline += now.elapsed(); + last_scanline.copy_from_slice(current_scanline); cursor += bytes_per_scanline; } + + println!("total_defilter took {:?}", total_defilter); + println!("total_scanline took {:?}", total_scanline); }, InterlaceMethod::Adam7 => { let max_bytes_per_scanline = header.width as usize * bytes_per_pixel; @@ -930,6 +942,7 @@ pub fn decode(bytes: &[u8]) -> Result<(PngHeader, Vec), DecodeError> { let pixel_type = PixelType::new(header.color_type, header.bit_depth)?; let mut ancillary_chunks = AncillaryChunks::default(); + let now = std::time::Instant::now(); while !bytes.is_empty() { let chunk = read_chunk(bytes)?; @@ -946,12 +959,18 @@ pub fn decode(bytes: &[u8]) -> Result<(PngHeader, Vec), DecodeError> { bytes = &bytes[chunk.byte_size()..]; } + println!("Chunk reading took {:?}", now.elapsed()); + + let now = std::time::Instant::now(); let mut scanline_data = miniz_oxide::inflate::decompress_to_vec_zlib(&compressed_data) .map_err(DecodeError::Decompress)?; + println!("Decompress took {:?}", now.elapsed()); + // For now, output data is always RGBA, 1 byte per channel. let mut output_rgba = vec![0u8; header.width as usize * header.height as usize * 4]; + let now = std::time::Instant::now(); process_scanlines( &header, &mut scanline_data, @@ -959,6 +978,7 @@ pub fn decode(bytes: &[u8]) -> Result<(PngHeader, Vec), DecodeError> { &ancillary_chunks, pixel_type, )?; + println!("process_scanlines took {:?}", now.elapsed()); Ok((header, output_rgba)) } @@ -1021,4 +1041,12 @@ mod tests { } } } + + #[test] + fn hd_decode_test() { + let png_bytes = include_bytes!("../test_pngs/skyline.png"); + let now = std::time::Instant::now(); + let (_header, decoded) = decode(png_bytes).unwrap(); + println!("Took {:?}", now.elapsed()); + } } From 1ca251f9d1c0f4bda4515bdfdbd8c3ae59749168 Mon Sep 17 00:00:00 2001 From: Brian Schwind Date: Wed, 21 Apr 2021 12:03:33 +0900 Subject: [PATCH 04/11] Try using iterators in the defilter function --- src/lib.rs | 62 ++++++++++++++++++++++++------------------------------ 1 file changed, 27 insertions(+), 35 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 1952835..e4b772a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -641,21 +641,28 @@ fn read_chunk(bytes: &[u8]) -> Result { fn defilter( filter_type: FilterType, bytes_per_pixel: usize, - bytes_per_scanline: usize, current_scanline: &mut [u8], last_scanline: &[u8], ) { + let bytes_per_scanline = current_scanline.len(); + match filter_type { FilterType::None => {}, FilterType::Sub => { - for x in bytes_per_pixel..(bytes_per_scanline) { - let idx = x - bytes_per_pixel; - current_scanline[x] = current_scanline[x].wrapping_add(current_scanline[idx]); + let mut chunk_iter = current_scanline.chunks_exact_mut(bytes_per_pixel); + let mut prev_chunk = chunk_iter.next().unwrap(); + + for current_chunk in &mut chunk_iter { + for (current_byte, prev_byte) in current_chunk.iter_mut().zip(prev_chunk.iter()) { + *current_byte = current_byte.wrapping_add(*prev_byte); + } + + prev_chunk = current_chunk; } }, FilterType::Up => { - for x in 0..(bytes_per_scanline) { - current_scanline[x] = current_scanline[x].wrapping_add(last_scanline[x]); + for (current, above) in (current_scanline.iter_mut()).zip(last_scanline) { + *current = current.wrapping_add(*above); } }, FilterType::Average => { @@ -663,6 +670,7 @@ fn defilter( current_scanline[x] = (current_scanline[x] as u16 + ((last_scanline[x] as u16) / 2)) as u8; } + for x in bytes_per_pixel..(bytes_per_scanline) { let raw_val = current_scanline[x - bytes_per_pixel]; @@ -672,24 +680,20 @@ fn defilter( } }, FilterType::Paeth => { - for x in 0..(bytes_per_scanline) { - if let Some(idx) = x.checked_sub(bytes_per_pixel) { - let left = current_scanline[idx]; - let above = last_scanline[x]; - let upper_left = last_scanline[idx]; - - let predictor = paeth_predictor(left as i16, above as i16, upper_left as i16); + for x in 0..(bytes_per_pixel) { + let predictor = paeth_predictor(0, last_scanline[x] as i16, 0); + current_scanline[x] = current_scanline[x].wrapping_add(predictor); + } - current_scanline[x] = current_scanline[x].wrapping_add(predictor); - } else { - let left = 0; - let above = last_scanline[x]; - let upper_left = 0; + for x in bytes_per_pixel..(bytes_per_scanline) { + let idx = x - bytes_per_pixel; + let left = current_scanline[idx]; + let above = last_scanline[x]; + let upper_left = last_scanline[idx]; - let predictor = paeth_predictor(left as i16, above as i16, upper_left as i16); + let predictor = paeth_predictor(left as i16, above as i16, upper_left as i16); - current_scanline[x] = current_scanline[x].wrapping_add(predictor); - } + current_scanline[x] = current_scanline[x].wrapping_add(predictor); } }, } @@ -730,13 +734,7 @@ fn process_scanlines( let current_scanline = &mut scanline_data[cursor..(cursor + bytes_per_scanline)]; let now = std::time::Instant::now(); - defilter( - filter_type, - bytes_per_pixel, - bytes_per_scanline, - current_scanline, - &last_scanline, - ); + defilter(filter_type, bytes_per_pixel, current_scanline, &last_scanline); total_defilter += now.elapsed(); @@ -851,13 +849,7 @@ fn process_scanlines( let current_scanline = &mut scanline_data[cursor..(cursor + bytes_per_scanline)]; - defilter( - filter_type, - bytes_per_pixel, - bytes_per_scanline, - current_scanline, - &last_scanline, - ); + defilter(filter_type, bytes_per_pixel, current_scanline, &last_scanline); let scanline_iter = ScanlineIterator::new( pass_width, From 0cac040624e47ef58fac169bcbe4b58d2c09b44c Mon Sep 17 00:00:00 2001 From: Brian Schwind Date: Wed, 21 Apr 2021 12:18:02 +0900 Subject: [PATCH 05/11] Try const generics --- src/lib.rs | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index e4b772a..9e8ff69 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -638,9 +638,9 @@ fn read_chunk(bytes: &[u8]) -> Result { Ok(Chunk { chunk_type, data: &data_for_crc[4..], crc }) } -fn defilter( +// BPP = Bytes Per Pixel +fn defilter( filter_type: FilterType, - bytes_per_pixel: usize, current_scanline: &mut [u8], last_scanline: &[u8], ) { @@ -649,7 +649,7 @@ fn defilter( match filter_type { FilterType::None => {}, FilterType::Sub => { - let mut chunk_iter = current_scanline.chunks_exact_mut(bytes_per_pixel); + let mut chunk_iter = current_scanline.chunks_exact_mut(BPP); let mut prev_chunk = chunk_iter.next().unwrap(); for current_chunk in &mut chunk_iter { @@ -666,13 +666,13 @@ fn defilter( } }, FilterType::Average => { - for x in 0..(bytes_per_pixel) { + for x in 0..(BPP) { current_scanline[x] = (current_scanline[x] as u16 + ((last_scanline[x] as u16) / 2)) as u8; } - for x in bytes_per_pixel..(bytes_per_scanline) { - let raw_val = current_scanline[x - bytes_per_pixel]; + for x in BPP..(bytes_per_scanline) { + let raw_val = current_scanline[x - BPP]; current_scanline[x] = (current_scanline[x] as u16 + ((raw_val as u16 + last_scanline[x] as u16) / 2)) @@ -680,13 +680,13 @@ fn defilter( } }, FilterType::Paeth => { - for x in 0..(bytes_per_pixel) { + for x in 0..(BPP) { let predictor = paeth_predictor(0, last_scanline[x] as i16, 0); current_scanline[x] = current_scanline[x].wrapping_add(predictor); } - for x in bytes_per_pixel..(bytes_per_scanline) { - let idx = x - bytes_per_pixel; + for x in BPP..(bytes_per_scanline) { + let idx = x - BPP; let left = current_scanline[idx]; let above = last_scanline[x]; let upper_left = last_scanline[idx]; @@ -734,7 +734,16 @@ fn process_scanlines( let current_scanline = &mut scanline_data[cursor..(cursor + bytes_per_scanline)]; let now = std::time::Instant::now(); - defilter(filter_type, bytes_per_pixel, current_scanline, &last_scanline); + + match bytes_per_pixel { + 1 => defilter::<1>(filter_type, current_scanline, &last_scanline), + 2 => defilter::<2>(filter_type, current_scanline, &last_scanline), + 3 => defilter::<3>(filter_type, current_scanline, &last_scanline), + 4 => defilter::<4>(filter_type, current_scanline, &last_scanline), + 6 => defilter::<6>(filter_type, current_scanline, &last_scanline), + 8 => defilter::<8>(filter_type, current_scanline, &last_scanline), + _ => {}, + } total_defilter += now.elapsed(); @@ -849,7 +858,15 @@ fn process_scanlines( let current_scanline = &mut scanline_data[cursor..(cursor + bytes_per_scanline)]; - defilter(filter_type, bytes_per_pixel, current_scanline, &last_scanline); + match bytes_per_pixel { + 1 => defilter::<1>(filter_type, current_scanline, &last_scanline), + 2 => defilter::<2>(filter_type, current_scanline, &last_scanline), + 3 => defilter::<3>(filter_type, current_scanline, &last_scanline), + 4 => defilter::<4>(filter_type, current_scanline, &last_scanline), + 6 => defilter::<6>(filter_type, current_scanline, &last_scanline), + 8 => defilter::<8>(filter_type, current_scanline, &last_scanline), + _ => {}, + } let scanline_iter = ScanlineIterator::new( pass_width, From aabadaa7a786e1ca10e181854c90fdf58abb97d2 Mon Sep 17 00:00:00 2001 From: Brian Schwind Date: Wed, 21 Apr 2021 22:14:51 +0900 Subject: [PATCH 06/11] Use iterators for all parts of the defilter function --- src/lib.rs | 51 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 16 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 9e8ff69..f918f75 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -644,8 +644,6 @@ fn defilter( current_scanline: &mut [u8], last_scanline: &[u8], ) { - let bytes_per_scanline = current_scanline.len(); - match filter_type { FilterType::None => {}, FilterType::Sub => { @@ -653,7 +651,7 @@ fn defilter( let mut prev_chunk = chunk_iter.next().unwrap(); for current_chunk in &mut chunk_iter { - for (current_byte, prev_byte) in current_chunk.iter_mut().zip(prev_chunk.iter()) { + for (current_byte, prev_byte) in current_chunk.iter_mut().zip(prev_chunk) { *current_byte = current_byte.wrapping_add(*prev_byte); } @@ -671,12 +669,21 @@ fn defilter( (current_scanline[x] as u16 + ((last_scanline[x] as u16) / 2)) as u8; } - for x in BPP..(bytes_per_scanline) { - let raw_val = current_scanline[x - BPP]; + let mut chunk_iter = current_scanline.chunks_exact_mut(BPP); + let mut left_chunk = chunk_iter.next().unwrap(); + + let upper_iter = last_scanline[BPP..].chunks_exact(BPP); + + for (current_chunk, upper_chunk) in (&mut chunk_iter).zip(upper_iter) { + for ((current_byte, left_byte), upper_byte) in + current_chunk.iter_mut().zip(left_chunk).zip(upper_chunk) + { + *current_byte = (*current_byte as u16 + + ((*left_byte as u16 + *upper_byte as u16) / 2)) + as u8; + } - current_scanline[x] = (current_scanline[x] as u16 - + ((raw_val as u16 + last_scanline[x] as u16) / 2)) - as u8; + left_chunk = current_chunk; } }, FilterType::Paeth => { @@ -685,15 +692,27 @@ fn defilter( current_scanline[x] = current_scanline[x].wrapping_add(predictor); } - for x in BPP..(bytes_per_scanline) { - let idx = x - BPP; - let left = current_scanline[idx]; - let above = last_scanline[x]; - let upper_left = last_scanline[idx]; - - let predictor = paeth_predictor(left as i16, above as i16, upper_left as i16); + let mut chunk_iter = current_scanline.chunks_exact_mut(BPP); + let mut left_chunk = chunk_iter.next().unwrap(); + + let upper_left_iter = last_scanline.chunks_exact(BPP); + let upper_iter = last_scanline[BPP..].chunks_exact(BPP); + + for ((current_chunk, upper_left_chunk), upper_chunk) in + (&mut chunk_iter).zip(upper_left_iter).zip(upper_iter) + { + for (((current_byte, left_byte), upper_left_byte), upper_byte) in + current_chunk.iter_mut().zip(left_chunk).zip(upper_left_chunk).zip(upper_chunk) + { + let predictor = paeth_predictor( + *left_byte as i16, + *upper_byte as i16, + *upper_left_byte as i16, + ); + *current_byte = current_byte.wrapping_add(predictor); + } - current_scanline[x] = current_scanline[x].wrapping_add(predictor); + left_chunk = current_chunk; } }, } From 7802174fb573efccdd66fe03c9fe1ee24be90f34 Mon Sep 17 00:00:00 2001 From: Brian Schwind Date: Thu, 22 Apr 2021 11:02:01 +0900 Subject: [PATCH 07/11] Fix clippy warning --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index f918f75..7e1de66 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1074,7 +1074,7 @@ mod tests { fn hd_decode_test() { let png_bytes = include_bytes!("../test_pngs/skyline.png"); let now = std::time::Instant::now(); - let (_header, decoded) = decode(png_bytes).unwrap(); + let (_header, _decoded) = decode(png_bytes).unwrap(); println!("Took {:?}", now.elapsed()); } } From 5af527ae09642c4cb13ea2599f1d597bd35d9ccd Mon Sep 17 00:00:00 2001 From: Brian Schwind Date: Thu, 22 Apr 2021 16:33:02 +0900 Subject: [PATCH 08/11] Make a branchless paeth predictor function --- src/lib.rs | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 7e1de66..21c59cf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -939,13 +939,16 @@ fn paeth_predictor(a: i16, b: i16, c: i16) -> u8 { let pb = (p - b).abs(); let pc = (p - c).abs(); - if pa <= pb && pa <= pc { - a as u8 - } else if pb <= pc { - b as u8 - } else { - c as u8 - } + let first = pa <= pb && pa <= pc; + let first_bitmask = first as u8 * 255u8; + + let second = !first && pb <= pc; + let second_bitmask = second as u8 * 255u8; + + let third = !first && !second; + let third_bitmask = third as u8 * 255u8; + + (first_bitmask & a as u8) | (second_bitmask & b as u8) | (third_bitmask & c as u8) } pub fn decode(bytes: &[u8]) -> Result<(PngHeader, Vec), DecodeError> { From 9ae6583e86db84c02d819cd99a3a00590b39d4a8 Mon Sep 17 00:00:00 2001 From: Brian Schwind Date: Thu, 22 Apr 2021 16:46:11 +0900 Subject: [PATCH 09/11] Remove unnecessary u16 casting --- src/lib.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 21c59cf..274325a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -665,8 +665,7 @@ fn defilter( }, FilterType::Average => { for x in 0..(BPP) { - current_scanline[x] = - (current_scanline[x] as u16 + ((last_scanline[x] as u16) / 2)) as u8; + current_scanline[x] = current_scanline[x].wrapping_add((last_scanline[x]) / 2); } let mut chunk_iter = current_scanline.chunks_exact_mut(BPP); @@ -678,9 +677,8 @@ fn defilter( for ((current_byte, left_byte), upper_byte) in current_chunk.iter_mut().zip(left_chunk).zip(upper_chunk) { - *current_byte = (*current_byte as u16 - + ((*left_byte as u16 + *upper_byte as u16) / 2)) - as u8; + *current_byte = current_byte + .wrapping_add(((*left_byte as u16 + *upper_byte as u16) / 2) as u8); } left_chunk = current_chunk; From 39466065d7e6776c1eb057057f7ee2ec369dc102 Mon Sep 17 00:00:00 2001 From: Brian Schwind Date: Fri, 23 Apr 2021 00:07:32 +0900 Subject: [PATCH 10/11] Remove some slice copying and cursor code --- src/lib.rs | 81 ++++++++++++++++++++++++++---------------------------- 1 file changed, 39 insertions(+), 42 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 274325a..c377adb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -716,14 +716,36 @@ fn defilter( } } +#[inline(always)] +fn paeth_predictor(a: i16, b: i16, c: i16) -> u8 { + // TODO(bschwind) - Accept i16 or convert once and store in a temp. + // a = left pixel + // b = above pixel + // c = upper left + let p = a + b - c; + let pa = (p - a).abs(); + let pb = (p - b).abs(); + let pc = (p - c).abs(); + + let first = pa <= pb && pa <= pc; + let first_bitmask = first as u8 * 255u8; + + let second = !first && pb <= pc; + let second_bitmask = second as u8 * 255u8; + + let third = !first && !second; + let third_bitmask = third as u8 * 255u8; + + (first_bitmask & a as u8) | (second_bitmask & b as u8) | (third_bitmask & c as u8) +} + fn process_scanlines( header: &PngHeader, - scanline_data: &mut [u8], + mut scanline_data: &mut [u8], output_rgba: &mut [u8], ancillary_chunks: &AncillaryChunks, pixel_type: PixelType, ) -> Result<(), DecodeError> { - let mut cursor = 0; let bytes_per_pixel: usize = ((header.bit_depth as usize * header.color_type.sample_multiplier()) + 7) / 8; @@ -738,17 +760,18 @@ fn process_scanlines( let bytes_per_scanline: usize = bytes_per_scanline.try_into().map_err(|_| DecodeError::IntegerOverflow)?; - let mut last_scanline = vec![0u8; bytes_per_scanline]; + let zero_scanline = vec![0u8; bytes_per_scanline]; + let mut last_scanline: &[u8] = &zero_scanline; let mut total_defilter = std::time::Duration::from_secs(0); let mut total_scanline = std::time::Duration::from_secs(0); for y in 0..header.height { - let filter_type = FilterType::try_from(scanline_data[cursor]) + let filter_type = FilterType::try_from(scanline_data[0]) .map_err(|_| DecodeError::InvalidFilterType)?; - cursor += 1; - let current_scanline = &mut scanline_data[cursor..(cursor + bytes_per_scanline)]; + let (current_scanline, scanline_data_tail) = + scanline_data[1..].split_at_mut(bytes_per_scanline); let now = std::time::Instant::now(); @@ -788,8 +811,8 @@ fn process_scanlines( total_scanline += now.elapsed(); - last_scanline.copy_from_slice(current_scanline); - cursor += bytes_per_scanline; + last_scanline = current_scanline; + scanline_data = scanline_data_tail; } println!("total_defilter took {:?}", total_defilter); @@ -797,7 +820,8 @@ fn process_scanlines( }, InterlaceMethod::Adam7 => { let max_bytes_per_scanline = header.width as usize * bytes_per_pixel; - let mut last_scanline = vec![0u8; max_bytes_per_scanline]; + + let zero_scanline = vec![0u8; max_bytes_per_scanline]; // Adam7 Interlacing Pattern // 1 6 4 6 2 6 4 6 @@ -862,18 +886,14 @@ fn process_scanlines( let bytes_per_scanline: usize = bytes_per_scanline.try_into().expect("bytes_per_scanline overflowed a usize"); - let last_scanline = &mut last_scanline[..(bytes_per_scanline)]; - for byte in last_scanline.iter_mut() { - *byte = 0; - } + let mut last_scanline = &zero_scanline[..(bytes_per_scanline)]; for y in 0..pass_height { - let filter_type = FilterType::try_from(scanline_data[cursor]) + let filter_type = FilterType::try_from(scanline_data[0]) .map_err(|_| DecodeError::InvalidFilterType)?; - cursor += 1; - let current_scanline = - &mut scanline_data[cursor..(cursor + bytes_per_scanline)]; + let (current_scanline, scanline_data_tail) = + scanline_data[1..].split_at_mut(bytes_per_scanline); match bytes_per_pixel { 1 => defilter::<1>(filter_type, current_scanline, &last_scanline), @@ -916,9 +936,8 @@ fn process_scanlines( output_rgba[output_idx + 3] = a; } - last_scanline.copy_from_slice(current_scanline); - - cursor += bytes_per_scanline; + last_scanline = current_scanline; + scanline_data = scanline_data_tail; } } }, @@ -927,28 +946,6 @@ fn process_scanlines( Ok(()) } -fn paeth_predictor(a: i16, b: i16, c: i16) -> u8 { - // TODO(bschwind) - Accept i16 or convert once and store in a temp. - // a = left pixel - // b = above pixel - // c = upper left - let p = a + b - c; - let pa = (p - a).abs(); - let pb = (p - b).abs(); - let pc = (p - c).abs(); - - let first = pa <= pb && pa <= pc; - let first_bitmask = first as u8 * 255u8; - - let second = !first && pb <= pc; - let second_bitmask = second as u8 * 255u8; - - let third = !first && !second; - let third_bitmask = third as u8 * 255u8; - - (first_bitmask & a as u8) | (second_bitmask & b as u8) | (third_bitmask & c as u8) -} - pub fn decode(bytes: &[u8]) -> Result<(PngHeader, Vec), DecodeError> { if bytes.len() < PNG_MAGIC_BYTES.len() { return Err(DecodeError::MissingBytes); From 221951c73990347278f94c08a8ff5d462ef84c91 Mon Sep 17 00:00:00 2001 From: Brian Schwind Date: Fri, 23 Apr 2021 00:58:03 +0900 Subject: [PATCH 11/11] Iterator zipping is not always faster --- src/lib.rs | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index c377adb..036eeb5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -674,11 +674,12 @@ fn defilter( let upper_iter = last_scanline[BPP..].chunks_exact(BPP); for (current_chunk, upper_chunk) in (&mut chunk_iter).zip(upper_iter) { - for ((current_byte, left_byte), upper_byte) in - current_chunk.iter_mut().zip(left_chunk).zip(upper_chunk) - { - *current_byte = current_byte - .wrapping_add(((*left_byte as u16 + *upper_byte as u16) / 2) as u8); + for i in 0..BPP { + let left_byte = left_chunk[i]; + let upper_byte = upper_chunk[i]; + + current_chunk[i] = current_chunk[i] + .wrapping_add(((left_byte as u16 + upper_byte as u16) / 2) as u8); } left_chunk = current_chunk; @@ -699,15 +700,18 @@ fn defilter( for ((current_chunk, upper_left_chunk), upper_chunk) in (&mut chunk_iter).zip(upper_left_iter).zip(upper_iter) { - for (((current_byte, left_byte), upper_left_byte), upper_byte) in - current_chunk.iter_mut().zip(left_chunk).zip(upper_left_chunk).zip(upper_chunk) - { + for i in 0..BPP { + let left_byte = left_chunk[i]; + let upper_left_byte = upper_left_chunk[i]; + let upper_byte = upper_chunk[i]; + let predictor = paeth_predictor( - *left_byte as i16, - *upper_byte as i16, - *upper_left_byte as i16, + left_byte as i16, + upper_byte as i16, + upper_left_byte as i16, ); - *current_byte = current_byte.wrapping_add(predictor); + + current_chunk[i] = current_chunk[i].wrapping_add(predictor); } left_chunk = current_chunk;