Skip to content

Commit 128436c

Browse files
committed
mem: make ONNX memory arena configurable via env var
Replace unconditional disable of enable_mem_pattern and enable_cpu_mem_arena with opt-in via ONNX_DISABLE_MEMORY_ARENA=1. Default behavior is unchanged (arena enabled, ~15% faster inference). Setting the env var disables both options, saving ~209 MB idle RSS per session at a ~15% latency cost. Env var is read once at module init time.
2 parents 9678c7b + 6a085c6 commit 128436c

4 files changed

Lines changed: 29 additions & 10 deletions

File tree

CHANGELOG.md

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,16 @@
1-
## 1.5.3
1+
## 1.5.6
22

3-
- Store routing in LayoutElement
3+
### Enhancement
4+
- Make ONNX Runtime memory arena configurable via `ONNX_DISABLE_MEMORY_ARENA` env var (default: enabled). Set to `1` to trade ~15% inference latency for ~209 MB idle memory savings per session.
5+
6+
## 1.5.4
47

58
### Enhancement
6-
- Disable ONNX Runtime memory pattern and CPU memory arena on YoloX and Detectron2 sessions to reduce idle memory after inference
9+
- Use `np.full()` instead of `np.ones() * scalar` in YoloX preprocessing to avoid a redundant temporary array
10+
11+
## 1.5.3
12+
13+
- Store routing in LayoutElement
714

815
## 1.5.2
916

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "1.5.3" # pragma: no cover
1+
__version__ = "1.5.6" # pragma: no cover

unstructured_inference/models/detectron2onnx.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@
99
from onnxruntime.quantization import QuantType, quantize_dynamic
1010
from PIL import Image
1111

12+
_ONNX_DISABLE_MEMORY_ARENA = os.environ.get(
13+
"ONNX_DISABLE_MEMORY_ARENA", ""
14+
).strip().lower() in ("1", "true")
15+
1216
from unstructured_inference.constants import Source
1317
from unstructured_inference.inference.layoutelement import LayoutElement
1418
from unstructured_inference.logger import logger, logger_onnx
@@ -116,8 +120,9 @@ def initialize(
116120
providers = [provider for provider in ordered_providers if provider in available_providers]
117121

118122
sess_options = onnxruntime.SessionOptions()
119-
sess_options.enable_mem_pattern = False
120-
sess_options.enable_cpu_mem_arena = False
123+
if _ONNX_DISABLE_MEMORY_ARENA:
124+
sess_options.enable_mem_pattern = False
125+
sess_options.enable_cpu_mem_arena = False
121126

122127
self.model = onnxruntime.InferenceSession(
123128
model_path,

unstructured_inference/models/yolox.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,18 @@
33
# https://github.com/Megvii-BaseDetection/YOLOX/blob/237e943ac64aa32eb32f875faa93ebb18512d41d/yolox/data/data_augment.py
44
# https://github.com/Megvii-BaseDetection/YOLOX/blob/ac379df3c97d1835ebd319afad0c031c36d03f36/yolox/utils/demo_utils.py
55

6+
import os
7+
68
import cv2
79
import numpy as np
810
import onnxruntime
911
from onnxruntime.capi import _pybind_state as C
1012
from PIL import Image as PILImage
1113

14+
_ONNX_DISABLE_MEMORY_ARENA = os.environ.get(
15+
"ONNX_DISABLE_MEMORY_ARENA", ""
16+
).strip().lower() in ("1", "true")
17+
1218
from unstructured_inference.constants import ElementType, Source
1319
from unstructured_inference.inference.layoutelement import LayoutElements
1420
from unstructured_inference.models.unstructuredmodel import (
@@ -81,8 +87,9 @@ def initialize(self, model_path: str, label_map: dict):
8187
providers = [provider for provider in ordered_providers if provider in available_providers]
8288

8389
sess_options = onnxruntime.SessionOptions()
84-
sess_options.enable_mem_pattern = False
85-
sess_options.enable_cpu_mem_arena = False
90+
if _ONNX_DISABLE_MEMORY_ARENA:
91+
sess_options.enable_mem_pattern = False
92+
sess_options.enable_cpu_mem_arena = False
8693

8794
self.model = onnxruntime.InferenceSession(
8895
model_path,
@@ -155,9 +162,9 @@ def image_processing(
155162
def preprocess(img, input_size, swap=(2, 0, 1)):
156163
"""Preprocess image data before YoloX inference."""
157164
if len(img.shape) == 3:
158-
padded_img = np.ones((input_size[0], input_size[1], 3), dtype=np.uint8) * 114
165+
padded_img = np.full((input_size[0], input_size[1], 3), 114, dtype=np.uint8)
159166
else:
160-
padded_img = np.ones(input_size, dtype=np.uint8) * 114
167+
padded_img = np.full(input_size, 114, dtype=np.uint8)
161168

162169
r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1])
163170
resized_img = cv2.resize(

0 commit comments

Comments
 (0)