Skip to content

Commit 040a460

Browse files
committed
mem: make ONNX memory arena configurable via env var
Replace unconditional disable of enable_mem_pattern and enable_cpu_mem_arena with opt-in via ONNX_DISABLE_MEMORY_ARENA=1. Default behavior is unchanged (arena enabled, ~15% faster inference). Setting the env var disables both options, saving ~209 MB idle RSS per session at a ~15% latency cost. Env var is read once at module init time.
2 parents 9678c7b + 6a085c6 commit 040a460

File tree

4 files changed

+29
-10
lines changed

4 files changed

+29
-10
lines changed

CHANGELOG.md

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,16 @@
1-
## 1.5.3
1+
## 1.5.6
22

3-
- Store routing in LayoutElement
3+
### Enhancement
4+
- Make ONNX Runtime memory arena configurable via `ONNX_DISABLE_MEMORY_ARENA` env var (default: enabled). Set to `1` to trade ~15% inference latency for ~209 MB idle memory savings per session.
5+
6+
## 1.5.4
47

58
### Enhancement
6-
- Disable ONNX Runtime memory pattern and CPU memory arena on YoloX and Detectron2 sessions to reduce idle memory after inference
9+
- Use `np.full()` instead of `np.ones() * scalar` in YoloX preprocessing to avoid a redundant temporary array
10+
11+
## 1.5.3
12+
13+
- Store routing in LayoutElement
714

815
## 1.5.2
916

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "1.5.3" # pragma: no cover
1+
__version__ = "1.5.6" # pragma: no cover

unstructured_inference/models/detectron2onnx.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@
2121
download_if_needed_and_get_local_path,
2222
)
2323

24+
_ONNX_DISABLE_MEMORY_ARENA = os.environ.get(
25+
"ONNX_DISABLE_MEMORY_ARENA", ""
26+
).strip().lower() in ("1", "true")
27+
2428
onnxruntime.set_default_logger_severity(logger_onnx.getEffectiveLevel())
2529

2630
DEFAULT_LABEL_MAP: Final[Dict[int, str]] = {
@@ -116,8 +120,9 @@ def initialize(
116120
providers = [provider for provider in ordered_providers if provider in available_providers]
117121

118122
sess_options = onnxruntime.SessionOptions()
119-
sess_options.enable_mem_pattern = False
120-
sess_options.enable_cpu_mem_arena = False
123+
if _ONNX_DISABLE_MEMORY_ARENA:
124+
sess_options.enable_mem_pattern = False
125+
sess_options.enable_cpu_mem_arena = False
121126

122127
self.model = onnxruntime.InferenceSession(
123128
model_path,

unstructured_inference/models/yolox.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
# https://github.com/Megvii-BaseDetection/YOLOX/blob/237e943ac64aa32eb32f875faa93ebb18512d41d/yolox/data/data_augment.py
44
# https://github.com/Megvii-BaseDetection/YOLOX/blob/ac379df3c97d1835ebd319afad0c031c36d03f36/yolox/utils/demo_utils.py
55

6+
import os
7+
68
import cv2
79
import numpy as np
810
import onnxruntime
@@ -20,6 +22,10 @@
2022
download_if_needed_and_get_local_path,
2123
)
2224

25+
_ONNX_DISABLE_MEMORY_ARENA = os.environ.get(
26+
"ONNX_DISABLE_MEMORY_ARENA", ""
27+
).strip().lower() in ("1", "true")
28+
2329
YOLOX_LABEL_MAP = {
2430
0: ElementType.CAPTION,
2531
1: ElementType.FOOTNOTE,
@@ -81,8 +87,9 @@ def initialize(self, model_path: str, label_map: dict):
8187
providers = [provider for provider in ordered_providers if provider in available_providers]
8288

8389
sess_options = onnxruntime.SessionOptions()
84-
sess_options.enable_mem_pattern = False
85-
sess_options.enable_cpu_mem_arena = False
90+
if _ONNX_DISABLE_MEMORY_ARENA:
91+
sess_options.enable_mem_pattern = False
92+
sess_options.enable_cpu_mem_arena = False
8693

8794
self.model = onnxruntime.InferenceSession(
8895
model_path,
@@ -155,9 +162,9 @@ def image_processing(
155162
def preprocess(img, input_size, swap=(2, 0, 1)):
156163
"""Preprocess image data before YoloX inference."""
157164
if len(img.shape) == 3:
158-
padded_img = np.ones((input_size[0], input_size[1], 3), dtype=np.uint8) * 114
165+
padded_img = np.full((input_size[0], input_size[1], 3), 114, dtype=np.uint8)
159166
else:
160-
padded_img = np.ones(input_size, dtype=np.uint8) * 114
167+
padded_img = np.full(input_size, 114, dtype=np.uint8)
161168

162169
r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1])
163170
resized_img = cv2.resize(

0 commit comments

Comments
 (0)