mem: make ONNX memory arena configurable via env var

KRRT7 · KRRT7 · commit 040a46049cff · 2026-03-31T02:49:08.000-05:00
Replace unconditional disable of enable_mem_pattern and
enable_cpu_mem_arena with opt-in via ONNX_DISABLE_MEMORY_ARENA=1.

Default behavior is unchanged (arena enabled, ~15% faster inference).
Setting the env var disables both options, saving ~209 MB idle RSS
per session at a ~15% latency cost.

Env var is read once at module init time.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,9 +1,16 @@
-## 1.5.3
+## 1.5.6
 
-- Store routing in LayoutElement
+### Enhancement
+- Make ONNX Runtime memory arena configurable via `ONNX_DISABLE_MEMORY_ARENA` env var (default: enabled). Set to `1` to trade ~15% inference latency for ~209 MB idle memory savings per session.
+
+## 1.5.4
 
 ### Enhancement
-- Disable ONNX Runtime memory pattern and CPU memory arena on YoloX and Detectron2 sessions to reduce idle memory after inference
+- Use `np.full()` instead of `np.ones() * scalar` in YoloX preprocessing to avoid a redundant temporary array
+
+## 1.5.3
+
+- Store routing in LayoutElement
 
 ## 1.5.2
 
diff --git a/unstructured_inference/__version__.py b/unstructured_inference/__version__.py
@@ -1 +1 @@
-__version__ = "1.5.3"  # pragma: no cover
+__version__ = "1.5.6"  # pragma: no cover
diff --git a/unstructured_inference/models/detectron2onnx.py b/unstructured_inference/models/detectron2onnx.py
@@ -21,6 +21,10 @@
     download_if_needed_and_get_local_path,
 )
 
+_ONNX_DISABLE_MEMORY_ARENA = os.environ.get(
+    "ONNX_DISABLE_MEMORY_ARENA", ""
+).strip().lower() in ("1", "true")
+
 onnxruntime.set_default_logger_severity(logger_onnx.getEffectiveLevel())
 
 DEFAULT_LABEL_MAP: Final[Dict[int, str]] = {
@@ -116,8 +120,9 @@ def initialize(
         providers = [provider for provider in ordered_providers if provider in available_providers]
 
         sess_options = onnxruntime.SessionOptions()
-        sess_options.enable_mem_pattern = False
-        sess_options.enable_cpu_mem_arena = False
+        if _ONNX_DISABLE_MEMORY_ARENA:
+            sess_options.enable_mem_pattern = False
+            sess_options.enable_cpu_mem_arena = False
 
         self.model = onnxruntime.InferenceSession(
             model_path,
diff --git a/unstructured_inference/models/yolox.py b/unstructured_inference/models/yolox.py
@@ -3,6 +3,8 @@
 # https://github.com/Megvii-BaseDetection/YOLOX/blob/237e943ac64aa32eb32f875faa93ebb18512d41d/yolox/data/data_augment.py
 # https://github.com/Megvii-BaseDetection/YOLOX/blob/ac379df3c97d1835ebd319afad0c031c36d03f36/yolox/utils/demo_utils.py
 
+import os
+
 import cv2
 import numpy as np
 import onnxruntime
@@ -20,6 +22,10 @@
     download_if_needed_and_get_local_path,
 )
 
+_ONNX_DISABLE_MEMORY_ARENA = os.environ.get(
+    "ONNX_DISABLE_MEMORY_ARENA", ""
+).strip().lower() in ("1", "true")
+
 YOLOX_LABEL_MAP = {
     0: ElementType.CAPTION,
     1: ElementType.FOOTNOTE,
@@ -81,8 +87,9 @@ def initialize(self, model_path: str, label_map: dict):
         providers = [provider for provider in ordered_providers if provider in available_providers]
 
         sess_options = onnxruntime.SessionOptions()
-        sess_options.enable_mem_pattern = False
-        sess_options.enable_cpu_mem_arena = False
+        if _ONNX_DISABLE_MEMORY_ARENA:
+            sess_options.enable_mem_pattern = False
+            sess_options.enable_cpu_mem_arena = False
 
         self.model = onnxruntime.InferenceSession(
             model_path,
@@ -155,9 +162,9 @@ def image_processing(
 def preprocess(img, input_size, swap=(2, 0, 1)):
     """Preprocess image data before YoloX inference."""
     if len(img.shape) == 3:
-        padded_img = np.ones((input_size[0], input_size[1], 3), dtype=np.uint8) * 114
+        padded_img = np.full((input_size[0], input_size[1], 3), 114, dtype=np.uint8)
     else:
-        padded_img = np.ones(input_size, dtype=np.uint8) * 114
+        padded_img = np.full(input_size, 114, dtype=np.uint8)
 
     r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1])
     resized_img = cv2.resize(

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = "1.5.3" # pragma: no cover`
	`1`	`+__version__ = "1.5.6" # pragma: no cover`