diff --git a/CHANGELOG.md b/CHANGELOG.md index a81e4f50..1963a4e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +## 1.6.2 + +### Enhancement +- Make `dpi` an explicit parameter on `convert_pdf_to_image` (default 200) instead of reading from config internally, enabling unstructured to use this as the single source of truth for PDF rendering + ## 1.6.1 ### Enhancement diff --git a/unstructured_inference/__version__.py b/unstructured_inference/__version__.py index 14b8f278..4e4d3296 100644 --- a/unstructured_inference/__version__.py +++ b/unstructured_inference/__version__.py @@ -1 +1 @@ -__version__ = "1.6.1" # pragma: no cover +__version__ = "1.6.2" # pragma: no cover diff --git a/unstructured_inference/config.py b/unstructured_inference/config.py index ff2ce660..f9364bd4 100644 --- a/unstructured_inference/config.py +++ b/unstructured_inference/config.py @@ -116,10 +116,5 @@ def IMG_PROCESSOR_SHORTEST_EDGE(self) -> int: """configuration for DetrImageProcessor to scale images""" return self._get_int("IMG_PROCESSOR_SHORTEST_EDGE", 800) - @property - def PDF_RENDER_DPI(self) -> int: - """DPI to render PDF pages to images""" - return self._get_int("PDF_RENDER_DPI", 350) - inference_config = InferenceConfig() diff --git a/unstructured_inference/inference/layout.py b/unstructured_inference/inference/layout.py index c8090282..1b5795e9 100644 --- a/unstructured_inference/inference/layout.py +++ b/unstructured_inference/inference/layout.py @@ -11,7 +11,6 @@ import pypdfium2 as pdfium from PIL import Image, ImageSequence -from unstructured_inference.config import inference_config from unstructured_inference.inference.elements import ( TextRegion, ) @@ -412,15 +411,17 @@ def process_file_with_model( def convert_pdf_to_image( filename: Optional[str] = None, file: Optional[Union[bytes, BinaryIO]] = None, - dpi: Optional[int] = None, + dpi: int = 200, output_folder: Optional[Union[str, PurePath]] = None, path_only: bool = False, first_page: Optional[int] = None, last_page: Optional[int] = None, password: Optional[str] = None, ) -> Union[List[Image.Image], List[str]]: - """ - Centralized function to render PDF pages using pypdfium. + """Render PDF pages to PIL images or saved PNGs using pypdfium2. + + This is the single source of truth for PDF→image rendering across unstructured + and unstructured-inference. Callers should pass their own DPI value explicitly. """ if path_only and not output_folder: raise ValueError("output_folder must be specified if path_only is true") @@ -430,8 +431,6 @@ def convert_pdf_to_image( assert Path(output_folder).exists() assert Path(output_folder).is_dir() - if dpi is None: - dpi = inference_config.PDF_RENDER_DPI scale = dpi / 72.0 with _pdfium_lock: