diff --git a/CHANGELOG.md b/CHANGELOG.md index c71091ce..67d638cc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,11 @@ +## 1.1.9 + +- Fix: Remove manual pypdfium2 close() calls to prevent finalization AssertionError during concurrent PDF processing. Let pypdfium2 finalizers handle resource cleanup. + +## 1.1.8 + +- Fix: Remove `device_map` parameter from TableTransformerForObjectDetection to prevent meta tensor errors during multi-threaded table detection. Added device normalization and explicit dtype specification for robustness. + ## 1.1.7 - Update OpenCV-Python to 4.13.0.90 to squash ffmpeg vulnerability CVE-2023-6605 diff --git a/unstructured_inference/inference/layout.py b/unstructured_inference/inference/layout.py index efc8b021..b11bd438 100644 --- a/unstructured_inference/inference/layout.py +++ b/unstructured_inference/inference/layout.py @@ -426,24 +426,21 @@ def convert_pdf_to_image( dpi = inference_config.PDF_RENDER_DPI scale = dpi / 72.0 for i, page in enumerate(pdf, start=1): - try: - if first_page is not None and i < first_page: - continue - if last_page is not None and i > last_page: - break - bitmap = page.render( - scale=scale, - no_smoothtext=False, - no_smoothimage=False, - no_smoothpath=False, - optimize_mode="print", - ) - try: - images[i] = bitmap.to_pil() - finally: - bitmap.close() - finally: - page.close() + if first_page is not None and i < first_page: + continue + if last_page is not None and i > last_page: + break + bitmap = page.render( + scale=scale, + no_smoothtext=False, + no_smoothimage=False, + no_smoothpath=False, + optimize_mode="print", + ) + # Convert to PIL immediately, then let pypdfium2 finalizers handle cleanup + images[i] = bitmap.to_pil() + # No manual close - prevents finalization race with parent PDF close + if not output_folder: return list(images.values()) else: diff --git a/unstructured_inference/models/tables.py b/unstructured_inference/models/tables.py index 3dc3ec1d..57716ef5 100644 --- a/unstructured_inference/models/tables.py +++ b/unstructured_inference/models/tables.py @@ -72,7 +72,7 @@ def initialize( ): """Loads the donut model using the specified parameters""" self.device = device - self.feature_extractor = DetrImageProcessor.from_pretrained(model, device_map=self.device) + self.feature_extractor = DetrImageProcessor.from_pretrained(model) # value not set in the configuration and needed for newer models # https://huggingface.co/microsoft/table-transformer-structure-recognition-v1.1-all/discussions/1 self.feature_extractor.size["shortest_edge"] = inference_config.IMG_PROCESSOR_SHORTEST_EDGE @@ -82,10 +82,8 @@ def initialize( logger.info("Loading the table structure model ...") cached_current_verbosity = logging.get_verbosity() logging.set_verbosity_error() - self.model = TableTransformerForObjectDetection.from_pretrained( - model, - device_map=self.device, - ) + self.model = TableTransformerForObjectDetection.from_pretrained(model) + self.model.to(self.device) logging.set_verbosity(cached_current_verbosity) self.model.eval()