Skip to content

Commit c7a5216

Browse files
committed
fix: downsample large images during thumbnail rendering instead of skipping them (250k pixel budget)
1 parent e47b3d7 commit c7a5216

3 files changed

Lines changed: 69 additions & 37 deletions

File tree

open-pdf-render/src/interpreter.rs

Lines changed: 55 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -111,27 +111,29 @@ impl TextState {
111111
pub struct Interpreter;
112112

113113
impl Interpreter {
114-
/// Execute content stream, rendering all content including images.
114+
/// Execute content stream, rendering all content including full-resolution images.
115115
pub fn execute(
116116
content_bytes: &[u8],
117117
renderer: &mut SkiaRenderer,
118118
state: &mut GraphicsStateStack,
119119
doc: &Document,
120120
resources: &Dictionary,
121121
) -> Result<(), RenderError> {
122-
Self::execute_internal(content_bytes, renderer, state, doc, resources, false)
122+
Self::execute_internal(content_bytes, renderer, state, doc, resources, 0)
123123
}
124124

125-
/// Execute content stream but skip image XObjects. Used for fast
126-
/// thumbnail rendering where image decoding would take seconds.
127-
pub fn execute_skip_images(
125+
/// Execute content stream with a pixel budget for images. Images larger
126+
/// than `max_pixels` are downsampled after decode. Use for thumbnails
127+
/// to keep rendering fast without skipping images entirely.
128+
pub fn execute_with_image_limit(
128129
content_bytes: &[u8],
129130
renderer: &mut SkiaRenderer,
130131
state: &mut GraphicsStateStack,
131132
doc: &Document,
132133
resources: &Dictionary,
134+
max_pixels: u32,
133135
) -> Result<(), RenderError> {
134-
Self::execute_internal(content_bytes, renderer, state, doc, resources, true)
136+
Self::execute_internal(content_bytes, renderer, state, doc, resources, max_pixels)
135137
}
136138

137139
fn execute_internal(
@@ -140,7 +142,7 @@ impl Interpreter {
140142
state: &mut GraphicsStateStack,
141143
doc: &Document,
142144
resources: &Dictionary,
143-
skip_images: bool,
145+
max_image_pixels: u32,
144146
) -> Result<(), RenderError> {
145147
let content = Content::decode(content_bytes)
146148
.map_err(|e| RenderError::ParseError(format!("Content decode: {}", e)))?;
@@ -222,7 +224,7 @@ impl Interpreter {
222224
"W" | "W*" => {}
223225
"BT" | "ET" | "Tf" | "Td" | "TD" | "Tm" | "Tj" | "TJ" | "T*" | "'" | "\"" | "Tc" | "Tw" | "Tz" | "TL" | "Ts" | "Tr" => {}
224226
"Do" => {
225-
Self::handle_do_execute(&op.operands, renderer, state, doc, resources, skip_images);
227+
Self::handle_do_execute(&op.operands, renderer, state, doc, resources, max_image_pixels);
226228
}
227229
"gs" | "ri" | "i" => {}
228230
_ => {}
@@ -237,7 +239,7 @@ impl Interpreter {
237239
state: &mut GraphicsStateStack,
238240
doc: &Document,
239241
resources: &Dictionary,
240-
skip_images: bool,
242+
max_image_pixels: u32,
241243
) {
242244
let name = match operands.first() {
243245
Some(Object::Name(n)) => n,
@@ -265,9 +267,7 @@ impl Interpreter {
265267
};
266268
let subtype = stream.dict.get(b"Subtype").ok().and_then(|s| s.as_name().ok());
267269
if subtype == Some(b"Image" as &[u8]) {
268-
if !skip_images {
269-
Self::handle_image_execute(stream, renderer, state, doc);
270-
}
270+
Self::handle_image_execute(stream, renderer, state, doc, max_image_pixels);
271271
return;
272272
}
273273
if subtype != Some(b"Form" as &[u8]) {
@@ -288,16 +288,20 @@ impl Interpreter {
288288
let form_resources = Self::extract_form_resources(&stream.dict, doc);
289289
let res = form_resources.as_ref().unwrap_or(resources);
290290
if let Ok(content_bytes) = stream.decompressed_content() {
291-
let _ = Self::execute_internal(&content_bytes, renderer, state, doc, res, skip_images);
291+
let _ = Self::execute_internal(&content_bytes, renderer, state, doc, res, max_image_pixels);
292292
}
293293
state.restore();
294294
}
295295

296+
/// Decode and draw an image XObject. When `max_decode_pixels` is set,
297+
/// images larger than that limit are downsampled after decode to cap
298+
/// memory usage and speed up rendering (used for thumbnails).
296299
fn handle_image_execute(
297300
stream: &lopdf::Stream,
298301
renderer: &mut SkiaRenderer,
299302
state: &mut GraphicsStateStack,
300303
doc: &Document,
304+
max_decode_pixels: u32,
301305
) {
302306
let dict = &stream.dict;
303307
let width = dict.get(b"Width").ok()
@@ -332,15 +336,15 @@ impl Interpreter {
332336
});
333337
let filter_name = filter.as_deref().unwrap_or(b"");
334338

335-
let rgba = if filter_name == b"DCTDecode" {
339+
// Decode image to RGBA
340+
let (mut img_w, mut img_h, mut rgba) = if filter_name == b"DCTDecode" {
336341
let raw = &stream.content;
337342
match image::load_from_memory_with_format(raw, image::ImageFormat::Jpeg) {
338343
Ok(img) => {
339344
let img = img.to_rgba8();
340-
if img.width() != width || img.height() != height {
341-
return;
342-
}
343-
img.into_raw()
345+
let w = img.width();
346+
let h = img.height();
347+
(w, h, img.into_raw())
344348
}
345349
Err(_) => return,
346350
}
@@ -374,41 +378,67 @@ impl Interpreter {
374378
let expected = width as usize * height as usize * components;
375379
if raw_pixels.len() < expected { return; }
376380

377-
let mut rgba = Vec::with_capacity(width as usize * height as usize * 4);
381+
let mut out = Vec::with_capacity(width as usize * height as usize * 4);
378382
let mut idx = 0;
379383
for _ in 0..(width as usize * height as usize) {
380384
match components {
381385
1 => {
382386
let g = raw_pixels[idx];
383-
rgba.extend_from_slice(&[g, g, g, 255]);
387+
out.extend_from_slice(&[g, g, g, 255]);
384388
idx += 1;
385389
}
386390
3 => {
387-
rgba.extend_from_slice(&[raw_pixels[idx], raw_pixels[idx+1], raw_pixels[idx+2], 255]);
391+
out.extend_from_slice(&[raw_pixels[idx], raw_pixels[idx+1], raw_pixels[idx+2], 255]);
388392
idx += 3;
389393
}
390394
4 => {
391395
let c = raw_pixels[idx] as f32 / 255.0;
392396
let m = raw_pixels[idx+1] as f32 / 255.0;
393397
let y = raw_pixels[idx+2] as f32 / 255.0;
394398
let k = raw_pixels[idx+3] as f32 / 255.0;
395-
rgba.extend_from_slice(&[
399+
out.extend_from_slice(&[
396400
(255.0 * (1.0 - c) * (1.0 - k)) as u8,
397401
(255.0 * (1.0 - m) * (1.0 - k)) as u8,
398402
(255.0 * (1.0 - y) * (1.0 - k)) as u8,
399403
255,
400404
]);
401405
idx += 4;
402406
}
403-
_ => { rgba.extend_from_slice(&[0, 0, 0, 255]); idx += components; }
407+
_ => { out.extend_from_slice(&[0, 0, 0, 255]); idx += components; }
404408
}
405409
}
406-
rgba
410+
(width, height, out)
407411
};
408412

413+
// Downsample if image exceeds the pixel budget (fast box filter).
414+
// For thumbnails this turns a 5000×5000 decode into a 200×200 draw.
415+
if max_decode_pixels > 0 && img_w * img_h > max_decode_pixels {
416+
let ratio = (max_decode_pixels as f64 / (img_w as f64 * img_h as f64)).sqrt();
417+
let new_w = ((img_w as f64 * ratio).ceil() as u32).max(1);
418+
let new_h = ((img_h as f64 * ratio).ceil() as u32).max(1);
419+
let sx = img_w as f64 / new_w as f64;
420+
let sy = img_h as f64 / new_h as f64;
421+
let mut small = Vec::with_capacity((new_w * new_h * 4) as usize);
422+
for dy in 0..new_h {
423+
for dx in 0..new_w {
424+
let src_x = (dx as f64 * sx) as usize;
425+
let src_y = (dy as f64 * sy) as usize;
426+
let src_idx = (src_y * img_w as usize + src_x) * 4;
427+
if src_idx + 3 < rgba.len() {
428+
small.extend_from_slice(&rgba[src_idx..src_idx + 4]);
429+
} else {
430+
small.extend_from_slice(&[0, 0, 0, 255]);
431+
}
432+
}
433+
}
434+
img_w = new_w;
435+
img_h = new_h;
436+
rgba = small;
437+
}
438+
409439
state.save();
410440
state.concat_matrix(1.0, 0.0, 0.0, -1.0, 0.0, 1.0);
411-
renderer.draw_image(width, height, &rgba, &state.current);
441+
renderer.draw_image(img_w, img_h, &rgba, &state.current);
412442
state.restore();
413443
}
414444

open-pdf-render/src/parser.rs

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -41,17 +41,18 @@ impl DocumentHandle {
4141
/// app (e.g. user-applied rotation via the rotate-left/right buttons).
4242
/// Both rotations are clockwise-when-displayed, in degrees.
4343
pub fn render_page(&self, page: usize, scale: f32, extra_rotation: i32) -> Result<RenderedPage, RenderError> {
44-
self.render_page_internal(page, scale, extra_rotation, false)
44+
self.render_page_internal(page, scale, extra_rotation, 0)
4545
}
4646

47-
/// Render a page without decoding embedded images. Produces vector-only
48-
/// output suitable for thumbnails — runs in milliseconds instead of
49-
/// seconds for image-heavy pages.
50-
pub fn render_page_no_images(&self, page: usize, scale: f32, extra_rotation: i32) -> Result<RenderedPage, RenderError> {
51-
self.render_page_internal(page, scale, extra_rotation, true)
47+
/// Render a page with a pixel budget for embedded images. Images larger
48+
/// than `max_image_pixels` total pixels are downsampled after decode.
49+
/// Use for thumbnails: e.g. `max_image_pixels = 250_000` (500×500)
50+
/// keeps images visible but limits decode cost.
51+
pub fn render_page_with_image_limit(&self, page: usize, scale: f32, extra_rotation: i32, max_image_pixels: u32) -> Result<RenderedPage, RenderError> {
52+
self.render_page_internal(page, scale, extra_rotation, max_image_pixels)
5253
}
5354

54-
fn render_page_internal(&self, page: usize, scale: f32, extra_rotation: i32, skip_images: bool) -> Result<RenderedPage, RenderError> {
55+
fn render_page_internal(&self, page: usize, scale: f32, extra_rotation: i32, max_image_pixels: u32) -> Result<RenderedPage, RenderError> {
5556
let page_id = self.get_page_id(page)?;
5657
let (x0, y0, w_pt, h_pt) = self.extract_media_box_full(page_id)?;
5758

@@ -89,8 +90,8 @@ impl DocumentHandle {
8990

9091
let content_bytes = self.get_content_stream(page_id)?;
9192
let resources = self.get_page_resources(page_id)?;
92-
if skip_images {
93-
crate::interpreter::Interpreter::execute_skip_images(&content_bytes, &mut renderer, &mut state, &self.doc, &resources)?;
93+
if max_image_pixels > 0 {
94+
crate::interpreter::Interpreter::execute_with_image_limit(&content_bytes, &mut renderer, &mut state, &self.doc, &resources, max_image_pixels)?;
9495
} else {
9596
crate::interpreter::Interpreter::execute(&content_bytes, &mut renderer, &mut state, &self.doc, &resources)?;
9697
}

open-pdf-studio/src-tauri/src/lib.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -926,10 +926,11 @@ fn render_thumbnail(
926926
// Scale so the longest side fits within max_width pixels
927927
let scale = max_width as f32 / w_pt.max(h_pt);
928928

929-
// Render at thumbnail scale — skip_images=true skips heavy image
930-
// decoding so thumbnails render in milliseconds instead of seconds.
929+
// Render at thumbnail scale. When skipImages is set, cap image decode
930+
// at 250k pixels (≈500×500) so large embedded images are downsampled
931+
// rather than decoded at full resolution (which can take 17+ seconds).
931932
let page = if skip_img {
932-
doc.render_page_no_images(page_index as usize, scale, extra_rot)
933+
doc.render_page_with_image_limit(page_index as usize, scale, extra_rot, 250_000)
933934
} else {
934935
doc.render_page(page_index as usize, scale, extra_rot)
935936
}.map_err(|e| format!("{}", e))?;

0 commit comments

Comments
 (0)