Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions onnxruntime/python/onnxruntime_inference_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -1083,6 +1083,7 @@ def ortvalue_from_shape_and_type(
device_type: str = "cpu",
device_id: int = 0,
vendor_id: int | OrtDeviceVendorId = -1,
memory_info: C.OrtMemoryInfo | None = None,
) -> OrtValue:
"""
Factory method to construct an OrtValue (which holds a Tensor) from given shape and element_type
Expand All @@ -1092,8 +1093,31 @@ def ortvalue_from_shape_and_type(
:param device_type: e.g. cpu, cuda, cann, cpu by default
:param device_id: device id, e.g. 0
:param vendor_id: The device's PCI vendor id as an int or OrtDeviceVendorId. If provided, the device type should be "gpu" or "npu".
:param memory_info: An OrtMemoryInfo from an OrtEpDevice (e.g. via ep_device.memory_info(OrtDeviceMemoryType.HOST_ACCESSIBLE)). When provided, the allocator matching this memory info is used directly, which allows allocating HOST_ACCESSIBLE memory for zero-copy numpy interop. The device_type, device_id, and vendor_id parameters are ignored when memory_info is provided.
"""

if memory_info is not None:
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if memory_info is not None:

When memory_info is not None, the other device parameters are silently ignored. The docstring documents this. This is acceptable, but a warnings.warn() or a check that the caller didn't set both memory_info and non-default device params would be more user-friendly.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a warning.

if device_type != "cpu" or device_id != 0 or vendor_id != -1:
warnings.warn(
"device_type, device_id, and vendor_id are ignored when memory_info is provided.",
stacklevel=2,
)
if isinstance(element_type, int):
return cls(
C.OrtValue.ortvalue_from_shape_and_onnx_type_for_memory_info(
shape,
element_type,
memory_info,
)
)
return cls(
C.OrtValue.ortvalue_from_shape_and_type_for_memory_info(
shape,
element_type,
memory_info,
)
)
Comment thread
ericcraw marked this conversation as resolved.

device = OrtDevice.make(device_type, device_id, vendor_id)._get_c_device()

# Integer for onnx element type (see https://onnx.ai/onnx/api/mapping.html).
Expand Down
49 changes: 49 additions & 0 deletions onnxruntime/python/onnxruntime_pybind_ortvalue.cc
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,29 @@ std::unique_ptr<OrtValue> OrtValueFromShapeAndType(const std::vector<int64_t>& s
Tensor::InitOrtValue(element_type, gsl::make_span(shape), std::move(allocator), *ml_value);
return ml_value;
}

// Allocate an OrtValue using the shared allocator matching the given OrtMemoryInfo.
// This allows callers to specify the exact memory type (e.g. HOST_ACCESSIBLE) rather than
// relying on OrtDevice.make() which always uses DEFAULT.
//
// Uses the full OrtMemoryInfo for the lookup (including mem_type) rather than just the OrtDevice,
// because the registered allocator's OrtMemoryInfo has a specific mem_type (e.g. OrtMemTypeCPU
// for HOST_ACCESSIBLE) that must match for FindExistingAllocator to succeed.
std::unique_ptr<OrtValue> OrtValueFromShapeAndTypeWithMemoryInfo(const std::vector<int64_t>& shape,
MLDataType element_type,
const OrtMemoryInfo& memory_info) {
auto& env = GetOrtEnv()->GetEnvironment();
AllocatorPtr allocator = env.GetRegisteredSharedAllocator(memory_info);

if (!allocator) {
throw std::runtime_error("No shared allocator found for: " + memory_info.ToString());
}
Comment thread
ericcraw marked this conversation as resolved.

auto ml_value = std::make_unique<OrtValue>();
Tensor::InitOrtValue(element_type, gsl::make_span(shape), std::move(allocator), *ml_value);
return ml_value;
}

} // namespace

void addOrtValueMethods(pybind11::module& m) {
Expand Down Expand Up @@ -289,6 +312,32 @@ void addOrtValueMethods(pybind11::module& m) {
auto element_type = OnnxTypeToOnnxRuntimeTensorType(onnx_element_type);
return OrtValueFromShapeAndType(shape, element_type, device);
})
// Factory methods to create an OrtValue using an OrtMemoryInfo to select the allocator.
// This enables allocation with a specific memory type (e.g. HOST_ACCESSIBLE) from plugin EPs.
.def_static("ortvalue_from_shape_and_type_for_memory_info", [](const std::vector<int64_t>& shape, py::object& numpy_element_type, const OrtMemoryInfo& memory_info) -> std::unique_ptr<OrtValue> {
PyArray_Descr* dtype;
if (!PyArray_DescrConverter(numpy_element_type.ptr(), &dtype)) {
throw std::runtime_error("Not a valid numpy type");
}

int type_num = dtype->type_num;
Py_DECREF(dtype);

if (!IsNumericNumpyType(type_num)) {
throw std::runtime_error("Creation of OrtValues is currently only supported from non-string numpy arrays");
}

auto element_type = NumpyTypeToOnnxRuntimeTensorType(type_num);
return OrtValueFromShapeAndTypeWithMemoryInfo(shape, element_type, memory_info);
})
.def_static("ortvalue_from_shape_and_onnx_type_for_memory_info", [](const std::vector<int64_t>& shape, int32_t onnx_element_type, const OrtMemoryInfo& memory_info) -> std::unique_ptr<OrtValue> {
if (onnx_element_type == onnx::TensorProto_DataType::TensorProto_DataType_STRING) {
throw std::runtime_error("Creation of OrtValues is currently only supported from non-string numpy arrays");
}

auto element_type = OnnxTypeToOnnxRuntimeTensorType(onnx_element_type);
return OrtValueFromShapeAndTypeWithMemoryInfo(shape, element_type, memory_info);
})

#if !defined(DISABLE_SPARSE_TENSORS)
.def_static("ort_value_from_sparse_tensor", [](const PySparseTensor* py_sparse_tensor) -> std::unique_ptr<OrtValue> {
Expand Down
3 changes: 1 addition & 2 deletions onnxruntime/python/onnxruntime_pybind_state.cc
Original file line number Diff line number Diff line change
Expand Up @@ -278,13 +278,12 @@ py::object GetPyObjFromTensor(const OrtValue& ort_value,
return py::cast<py::object>(result);
}

const auto device_type = device.Type();
// Create a numpy array on top of the OrtValue memory, no copy,
// but only when the tensor owns the buffer. When the tensor wraps external
// memory (e.g. a numpy input array passed through as output), the buffer
// lifetime is not tied to the OrtValue and zero-copy would create a
// dangling pointer. See https://github.com/microsoft/onnxruntime/issues/21922
if (device_type == OrtDevice::CPU) {
if (device.UsesCpuMemory()) {
if (tensor.OwnsBuffer() || zero_copy_non_owning) {
py::array result = PrimitiveTensorToNumpyOverOrtValue(ort_value);
return py::cast<py::object>(result);
Expand Down
28 changes: 16 additions & 12 deletions onnxruntime/test/autoep/library/example_plugin_ep/ep_factory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ ExampleEpFactory::ExampleEpFactory(const char* ep_name, ApiPtrs apis, const OrtL
default_logger_{default_logger},
ep_name_{ep_name},
default_memory_info_{nullptr},
readonly_memory_info_{nullptr} {
readonly_memory_info_{nullptr},
host_accessible_memory_info_{nullptr} {
ort_version_supported = ORT_API_VERSION; // set to the ORT version we were compiled with.
GetName = GetNameImpl;
GetVendor = GetVendorImpl;
Expand Down Expand Up @@ -71,12 +72,12 @@ ExampleEpFactory::ExampleEpFactory(const char* ep_name, ApiPtrs apis, const OrtL

// HOST_ACCESSIBLE memory example. use the non-CPU device type so it's clear which device the memory is also
// accessible from. we infer from the type of HOST_ACCESSIBLE that it's CPU accessible.
auto host_accessible_memory_info = Ort::MemoryInfo{"ExampleEP GPU pinned",
OrtMemoryInfoDeviceType_GPU,
/*vendor*/ 0xBE57, /* device_id */ 0,
OrtDeviceMemoryType_HOST_ACCESSIBLE,
/*alignment*/ 0,
OrtAllocatorType::OrtDeviceAllocator};
host_accessible_memory_info_ = Ort::MemoryInfo{"ExampleEP GPU pinned",
OrtMemoryInfoDeviceType_GPU,
/*vendor*/ 0xBE57, /* device_id */ 0,
OrtDeviceMemoryType_HOST_ACCESSIBLE,
/*alignment*/ 0,
OrtAllocatorType::OrtDeviceAllocator};
// Custom Op Domains
custom_op_domains_[0] = Ort::CustomOpDomain{"test"};
custom_op_domains_[1] = Ort::CustomOpDomain{"test2"};
Expand Down Expand Up @@ -156,10 +157,11 @@ OrtStatus* ORT_API_CALL ExampleEpFactory::GetSupportedDevicesImpl(OrtEpFactory*
}

// register the allocator info required by the EP.
// registering OrtMemoryInfo for host accessible memory would be done in an additional call.
// OrtReadOnlyAllocator + OrtDeviceMemoryType_DEFAULT allocator for use with initializers is optional.
// OrtDeviceMemoryType_HOST_ACCESSIBLE is also optional and exposes CPU-accessible memory on the EP device.
RETURN_IF_ERROR(factory->ep_api.EpDevice_AddAllocatorInfo(ep_device, factory->default_memory_info_));
RETURN_IF_ERROR(factory->ep_api.EpDevice_AddAllocatorInfo(ep_device, factory->readonly_memory_info_));
RETURN_IF_ERROR(factory->ep_api.EpDevice_AddAllocatorInfo(ep_device, factory->host_accessible_memory_info_));

ep_devices[num_ep_devices++] = ep_device;
}
Expand Down Expand Up @@ -244,8 +246,9 @@ OrtStatus* ORT_API_CALL ExampleEpFactory::CreateAllocatorImpl(OrtEpFactory* this

bool is_default_allocator = memory_info == factory.default_memory_info_;
bool is_readonly_allocator = memory_info == factory.readonly_memory_info_;
bool is_host_accessible_allocator = memory_info == factory.host_accessible_memory_info_;

if (!is_default_allocator && !is_readonly_allocator) {
if (!is_default_allocator && !is_readonly_allocator && !is_host_accessible_allocator) {
return factory.ort_api.CreateStatus(ORT_INVALID_ARGUMENT,
"INTERNAL ERROR! Unknown memory info provided to CreateAllocator. "
"Value did not come directly from an OrtEpDevice returned by this factory.");
Expand All @@ -261,9 +264,10 @@ OrtStatus* ORT_API_CALL ExampleEpFactory::CreateAllocatorImpl(OrtEpFactory* this
// You are of course free to have completely different settings.

// the read-only allocator is used for initializers. we don't need an arena for that.
if (is_readonly_allocator) {
auto read_only_allocator = std::make_unique<CustomAllocator>(memory_info, factory);
*allocator = read_only_allocator.release();
// host-accessible memory is also returned via a plain non-arena allocator.
if (is_readonly_allocator || is_host_accessible_allocator) {
auto simple_allocator = std::make_unique<CustomAllocator>(memory_info, factory);
*allocator = simple_allocator.release();
return nullptr;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ class ExampleEpFactory : public OrtEpFactory, public ApiPtrs {
// CPU allocator so we can control the arena behavior. optional as ORT always provides a CPU allocator if needed.
Ort::MemoryInfo default_memory_info_;
Ort::MemoryInfo readonly_memory_info_; // used for initializers
Ort::MemoryInfo host_accessible_memory_info_;

bool arena_allocator_using_default_settings_{true};
std::unique_ptr<ArenaAllocator> arena_allocator_; // shared device allocator that uses an arena
Expand Down
142 changes: 142 additions & 0 deletions onnxruntime/test/python/onnxruntime_test_python_autoep.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from collections.abc import Sequence

import numpy as np
import onnx
from autoep_helper import AutoEpTestCase
from helper import get_name

Expand All @@ -23,6 +24,8 @@


class TestAutoEP(AutoEpTestCase):
EXAMPLE_EP_NAME = "example_ep"

def test_cuda_ep_register_and_inference(self):
"""
Test registration of CUDA EP, adding its OrtDevice to the SessionOptions, and running inference.
Expand Down Expand Up @@ -341,6 +344,145 @@ def test_copy_tensors(self):

self.unregister_execution_provider_library(ep_name)

def _register_example_plugin_ep_or_skip(self):
"""Register the example plugin EP and return its OrtEpDevice, or skip the test."""
if sys.platform != "win32":
self.skipTest("Skipping test because device discovery is only supported on Windows")

try:
ep_lib_path = get_name("example_plugin_ep.dll")
except FileNotFoundError:
self.skipTest("Skipping test because example_plugin_ep.dll cannot be found")

self.register_execution_provider_library(self.EXAMPLE_EP_NAME, os.path.realpath(ep_lib_path))

ep_device = next(
(d for d in onnxrt.get_ep_devices() if d.ep_name == self.EXAMPLE_EP_NAME),
None,
)
self.assertIsNotNone(ep_device, f"Could not find OrtEpDevice for registered EP '{self.EXAMPLE_EP_NAME}'")
return ep_device

def test_ortvalue_from_shape_and_type_host_accessible_numpy_dtype(self):
ep_device = self._register_example_plugin_ep_or_skip()
mem_info = ep_device.memory_info(onnxrt.OrtDeviceMemoryType.HOST_ACCESSIBLE)
self.assertIsNotNone(mem_info)

ort_value = onnxrt.OrtValue.ortvalue_from_shape_and_type([3, 2], np.float32, memory_info=mem_info)

self.assertEqual(ort_value.shape(), [3, 2])
self.assertEqual(ort_value.data_type(), "tensor(float)")
# The example EP advertises HOST_ACCESSIBLE on a fake GPU device, so the allocator
# came from memory_info rather than the default CPU path.
self.assertNotEqual(ort_value.device_name().lower(), "cpu")

result = ort_value.numpy()
self.assertEqual(result.shape, (3, 2))
self.assertEqual(result.dtype, np.float32)

del ort_value
self.unregister_execution_provider_library(self.EXAMPLE_EP_NAME)

def test_ortvalue_from_shape_and_type_host_accessible_onnx_int_type(self):
ep_device = self._register_example_plugin_ep_or_skip()
mem_info = ep_device.memory_info(onnxrt.OrtDeviceMemoryType.HOST_ACCESSIBLE)

ort_value = onnxrt.OrtValue.ortvalue_from_shape_and_type(
[4], onnx.TensorProto.FLOAT, memory_info=mem_info
)

self.assertEqual(ort_value.shape(), [4])
self.assertEqual(ort_value.data_type(), "tensor(float)")
self.assertEqual(ort_value.numpy().dtype, np.float32)

del ort_value
self.unregister_execution_provider_library(self.EXAMPLE_EP_NAME)

def test_ortvalue_host_accessible_zero_copy_numpy_view(self):
# Writing through view1 must be visible through view2 - if numpy() ever copies,
# this test fails and the UsesCpuMemory() zero-copy guarantee has regressed.
ep_device = self._register_example_plugin_ep_or_skip()
mem_info = ep_device.memory_info(onnxrt.OrtDeviceMemoryType.HOST_ACCESSIBLE)

ort_value = onnxrt.OrtValue.ortvalue_from_shape_and_type([2, 3], np.float32, memory_info=mem_info)
ort_value.numpy().fill(7.5)
np.testing.assert_array_equal(ort_value.numpy(), np.full((2, 3), 7.5, dtype=np.float32))

cpu_value = onnxrt.OrtValue.ortvalue_from_shape_and_type([2, 3], np.float32)
cpu_value.numpy().fill(-1.25)
np.testing.assert_array_equal(cpu_value.numpy(), np.full((2, 3), -1.25, dtype=np.float32))

del ort_value
del cpu_value
self.unregister_execution_provider_library(self.EXAMPLE_EP_NAME)

def test_ortvalue_from_shape_and_type_memory_info_no_allocator(self):
bogus_mem_info = onnxrt.OrtMemoryInfo.create_v2(
"Bogus",
onnxrt.OrtMemoryInfoDeviceType.GPU,
0xDEAD,
0,
onnxrt.OrtDeviceMemoryType.HOST_ACCESSIBLE,
0,
onnxrt.OrtAllocatorType.ORT_DEVICE_ALLOCATOR,
)

with self.assertRaisesRegex(RuntimeError, "No shared allocator found"):
onnxrt.OrtValue.ortvalue_from_shape_and_type([2], np.float32, memory_info=bogus_mem_info)

def test_ortvalue_from_shape_and_onnx_type_memory_info_string_rejected(self):
ep_device = self._register_example_plugin_ep_or_skip()
mem_info = ep_device.memory_info(onnxrt.OrtDeviceMemoryType.HOST_ACCESSIBLE)

with self.assertRaisesRegex(RuntimeError, "non-string numpy arrays"):
onnxrt.OrtValue.ortvalue_from_shape_and_type(
[2], onnx.TensorProto.STRING, memory_info=mem_info
)

self.unregister_execution_provider_library(self.EXAMPLE_EP_NAME)

def test_ortvalue_from_shape_and_type_memory_info_overrides_device_args(self):
ep_device = self._register_example_plugin_ep_or_skip()
mem_info = ep_device.memory_info(onnxrt.OrtDeviceMemoryType.HOST_ACCESSIBLE)

# Bogus device args alongside a valid memory_info: if the wrapper ever stops ignoring
# them, this would fail (unknown device) or silently allocate elsewhere.
ort_value = onnxrt.OrtValue.ortvalue_from_shape_and_type(
[3],
np.float32,
device_type="cuda",
device_id=99,
vendor_id=0xFFFF,
memory_info=mem_info,
)

ort_value_baseline = onnxrt.OrtValue.ortvalue_from_shape_and_type([3], np.float32, memory_info=mem_info)
self.assertEqual(ort_value.device_name(), ort_value_baseline.device_name())

del ort_value
del ort_value_baseline
self.unregister_execution_provider_library(self.EXAMPLE_EP_NAME)

def test_ortvalue_from_shape_and_type_default_memory_info(self):
# Pins the false-branch of UsesCpuMemory(): DEFAULT memory on a non-CPU device must
# round-trip through data_transfer rather than the zero-copy view path.
ep_device = self._register_example_plugin_ep_or_skip()
mem_info = ep_device.memory_info(onnxrt.OrtDeviceMemoryType.DEFAULT)
self.assertIsNotNone(mem_info)

ort_value = onnxrt.OrtValue.ortvalue_from_shape_and_type([2, 3], np.float32, memory_info=mem_info)

self.assertEqual(ort_value.shape(), [2, 3])
self.assertEqual(ort_value.data_type(), "tensor(float)")
self.assertNotEqual(ort_value.device_name().lower(), "cpu")

arr = ort_value.numpy()
self.assertEqual(arr.shape, (2, 3))
self.assertEqual(arr.dtype, np.float32)

del ort_value
self.unregister_execution_provider_library(self.EXAMPLE_EP_NAME)


if __name__ == "__main__":
unittest.main(verbosity=1)
Loading