Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
## 1.6.2

### Enhancement
- Make `dpi` an explicit parameter on `convert_pdf_to_image` (default 200) instead of reading from config internally, enabling unstructured to use this as the single source of truth for PDF rendering

## 1.6.1

### Enhancement
Expand Down
2 changes: 1 addition & 1 deletion unstructured_inference/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.6.1" # pragma: no cover
__version__ = "1.6.2" # pragma: no cover
5 changes: 0 additions & 5 deletions unstructured_inference/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,10 +116,5 @@ def IMG_PROCESSOR_SHORTEST_EDGE(self) -> int:
"""configuration for DetrImageProcessor to scale images"""
return self._get_int("IMG_PROCESSOR_SHORTEST_EDGE", 800)

@property
def PDF_RENDER_DPI(self) -> int:
"""DPI to render PDF pages to images"""
return self._get_int("PDF_RENDER_DPI", 350)


inference_config = InferenceConfig()
11 changes: 5 additions & 6 deletions unstructured_inference/inference/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import pypdfium2 as pdfium
from PIL import Image, ImageSequence

from unstructured_inference.config import inference_config
from unstructured_inference.inference.elements import (
TextRegion,
)
Expand Down Expand Up @@ -412,15 +411,17 @@ def process_file_with_model(
def convert_pdf_to_image(
filename: Optional[str] = None,
file: Optional[Union[bytes, BinaryIO]] = None,
dpi: Optional[int] = None,
dpi: int = 200,
output_folder: Optional[Union[str, PurePath]] = None,
path_only: bool = False,
first_page: Optional[int] = None,
last_page: Optional[int] = None,
password: Optional[str] = None,
) -> Union[List[Image.Image], List[str]]:
"""
Centralized function to render PDF pages using pypdfium.
"""Render PDF pages to PIL images or saved PNGs using pypdfium2.

This is the single source of truth for PDF→image rendering across unstructured
and unstructured-inference. Callers should pass their own DPI value explicitly.
"""
if path_only and not output_folder:
raise ValueError("output_folder must be specified if path_only is true")
Expand All @@ -430,8 +431,6 @@ def convert_pdf_to_image(
assert Path(output_folder).exists()
assert Path(output_folder).is_dir()

if dpi is None:
dpi = inference_config.PDF_RENDER_DPI
scale = dpi / 72.0

with _pdfium_lock:
Expand Down
Loading