diff --git a/CHANGELOG.md b/CHANGELOG.md index 1963a4e7..66bf5656 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +## 1.6.3 + +### Enhancement +- Make ONNX Runtime memory arena configurable via `ONNX_DISABLE_MEMORY_ARENA` env var (default: enabled). Set to `1` to trade ~15% inference latency for ~209 MB idle memory savings per session. + ## 1.6.2 ### Enhancement @@ -17,6 +22,7 @@ ### Enhancement - Lazy page rendering in `convert_pdf_to_image` to reduce peak memory from O(N pages) to O(1 page) +>>>>>>> upstream/main ## 1.5.4 diff --git a/unstructured_inference/__version__.py b/unstructured_inference/__version__.py index 4e4d3296..f005954b 100644 --- a/unstructured_inference/__version__.py +++ b/unstructured_inference/__version__.py @@ -1 +1 @@ -__version__ = "1.6.2" # pragma: no cover +__version__ = "1.6.3" # pragma: no cover diff --git a/unstructured_inference/models/detectron2onnx.py b/unstructured_inference/models/detectron2onnx.py index 79cd0a1a..e47dccc1 100644 --- a/unstructured_inference/models/detectron2onnx.py +++ b/unstructured_inference/models/detectron2onnx.py @@ -21,6 +21,11 @@ download_if_needed_and_get_local_path, ) +_ONNX_DISABLE_MEMORY_ARENA = os.environ.get("ONNX_DISABLE_MEMORY_ARENA", "").strip().lower() in ( + "1", + "true", +) + onnxruntime.set_default_logger_severity(logger_onnx.getEffectiveLevel()) DEFAULT_LABEL_MAP: Final[Dict[int, str]] = { @@ -115,8 +120,14 @@ def initialize( ] providers = [provider for provider in ordered_providers if provider in available_providers] + sess_options = onnxruntime.SessionOptions() + if _ONNX_DISABLE_MEMORY_ARENA: + sess_options.enable_mem_pattern = False + sess_options.enable_cpu_mem_arena = False + self.model = onnxruntime.InferenceSession( model_path, + sess_options=sess_options, providers=providers, ) self.model_path = model_path diff --git a/unstructured_inference/models/yolox.py b/unstructured_inference/models/yolox.py index cdf1f405..c3b466d3 100644 --- a/unstructured_inference/models/yolox.py +++ b/unstructured_inference/models/yolox.py @@ -3,6 +3,8 @@ # https://github.com/Megvii-BaseDetection/YOLOX/blob/237e943ac64aa32eb32f875faa93ebb18512d41d/yolox/data/data_augment.py # https://github.com/Megvii-BaseDetection/YOLOX/blob/ac379df3c97d1835ebd319afad0c031c36d03f36/yolox/utils/demo_utils.py +import os + import cv2 import numpy as np import onnxruntime @@ -20,6 +22,11 @@ download_if_needed_and_get_local_path, ) +_ONNX_DISABLE_MEMORY_ARENA = os.environ.get("ONNX_DISABLE_MEMORY_ARENA", "").strip().lower() in ( + "1", + "true", +) + YOLOX_LABEL_MAP = { 0: ElementType.CAPTION, 1: ElementType.FOOTNOTE, @@ -80,8 +87,14 @@ def initialize(self, model_path: str, label_map: dict): ] providers = [provider for provider in ordered_providers if provider in available_providers] + sess_options = onnxruntime.SessionOptions() + if _ONNX_DISABLE_MEMORY_ARENA: + sess_options.enable_mem_pattern = False + sess_options.enable_cpu_mem_arena = False + self.model = onnxruntime.InferenceSession( model_path, + sess_options=sess_options, providers=providers, )