Skip to content

Commit f2c28e2

Browse files
S390x test fixes (#27404)
### Description This PR contains fixes to various big endian support issues in onnxruntime, both in libraries and tests. ### Motivation and Context Currently some tests from onnxruntime testsuite fail. This change fixes all tests from onnxruntime testsuite when it's built without training support. It also includes a linking issue fix. Following tests are fixed on s390x: OrtModelOnlyTests.ValidateOrtFormatModelDoesNotRunOptimizersInFullBuild FlatbufferUtilsTest.ExternalWriteReadWithLoadInitializers SparseTensorConversionTests.SparseTensorProtoToDense_Rank1Indices64 SparseTensorConversionTests.SparseTensorProtoToDense_Rank1Indices32 SparseTensorConversionTests.SparseTensorProtoToDense_Rank1Indices16 SparseTensorConversionTests.SparseTensorProtoToDense_Rank1Indices8 SparseTensorConversionTests.SparseTensorProtoToDense_Rank2Indices_COO SparseTensorConversionTests.TestConstantNodeConversion OrtModelOnlyTests.SparseInitializerHandling SparseTensorConversionTests.TestConstantNodeConversion SparseTensorConversionTests.TestDenseToSparseConversion ExecutionFrameTestInit.SparseInitializerAsOutput CApiTest.SparseOutputModel
1 parent 0f43e16 commit f2c28e2

25 files changed

Lines changed: 450 additions & 169 deletions

cmake/onnxruntime_unittests.cmake

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -906,6 +906,14 @@ if(NOT IOS)
906906

907907
list(REMOVE_ITEM onnx_test_runner_common_srcs ${onnx_test_runner_src_dir}/main.cc)
908908

909+
# if training is disabled, endian_utils are still used in tests
910+
if (NOT onnxruntime_ENABLE_TRAINING)
911+
list(APPEND onnx_test_runner_common_srcs
912+
${ONNXRUNTIME_ROOT}/core/framework/endian_utils.cc
913+
${ONNXRUNTIME_ROOT}/core/framework/endian_utils.h
914+
)
915+
endif ()
916+
909917
onnxruntime_add_static_library(onnx_test_runner_common ${onnx_test_runner_common_srcs})
910918
if(MSVC)
911919
target_compile_options(onnx_test_runner_common PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /utf-8>"

onnxruntime/core/framework/endian_utils.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,5 +83,11 @@ common::Status ReadLittleEndian(size_t element_size,
8383
return detail::CopyLittleEndian(element_size, source_bytes, destination_bytes);
8484
}
8585

86+
common::Status WriteLittleEndian(size_t element_size,
87+
gsl::span<const unsigned char> source_bytes,
88+
gsl::span<unsigned char> destination_bytes) {
89+
return detail::CopyLittleEndian(element_size, source_bytes, destination_bytes);
90+
}
91+
8692
} // namespace utils
8793
} // namespace onnxruntime

onnxruntime/core/framework/endian_utils.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,14 +76,21 @@ common::Status ReadLittleEndian(gsl::span<const unsigned char> source_bytes, gsl
7676
return ReadLittleEndian(sizeof(T), source_bytes, destination_bytes);
7777
}
7878

79+
/**
80+
* Writes to a little-endian destination.
81+
*/
82+
common::Status WriteLittleEndian(size_t element_size,
83+
gsl::span<const unsigned char> source_bytes,
84+
gsl::span<unsigned char> destination_bytes);
85+
7986
/**
8087
* Writes to a little-endian destination.
8188
*/
8289
template <typename T>
8390
common::Status WriteLittleEndian(gsl::span<const T> source, gsl::span<unsigned char> destination_bytes) {
8491
static_assert(std::is_trivially_copyable<T>::value, "T must be trivially copyable");
8592
const auto source_bytes = gsl::make_span(reinterpret_cast<const unsigned char*>(source.data()), source.size_bytes());
86-
return detail::CopyLittleEndian(sizeof(T), source_bytes, destination_bytes);
93+
return WriteLittleEndian(sizeof(T), source_bytes, destination_bytes);
8794
}
8895

8996
} // namespace utils

onnxruntime/core/framework/tensorprotoutils.cc

Lines changed: 133 additions & 48 deletions
Large diffs are not rendered by default.

onnxruntime/core/framework/tensorprotoutils.h

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,17 @@ Status GetExternalDataInfo(const ONNX_NAMESPACE::TensorProto& tensor_proto,
4848
*/
4949
void ConvertRawDataInTensorProto(ONNX_NAMESPACE::TensorProto& tensor_proto);
5050

51+
/**
52+
* This function is used to get element size of tensor data.
53+
*
54+
* For complex types it returns size of one of elements of complex value.
55+
*
56+
* It will be used mostly to convert data on big endian systems
57+
* after unpacking data.
58+
* @param tensor_data_type tensor data type to get element size from
59+
*/
60+
size_t GetElementSizeOfTensor(ONNX_NAMESPACE::TensorProto_DataType tensor_data_type);
61+
5162
/**
5263
* Wrapper function for set_raw_data.
5364
* First calls the set_raw_data and then calls ConvertRawDataInTensorProto
@@ -156,7 +167,7 @@ common::Status CreateTensorFromTensorProto(const Env& env, const std::filesystem
156167

157168
/// The threshold for small tensors. If the size of the tensor is LE to this value,
158169
/// The data will stay in the TensorProto. Otherwise, the data will be moved to a Tensor instance
159-
/// and TensorProto will contain a kTensorProtoMemoryAddressTag reference as a result of
170+
/// and TensorProto will contain a kTensorProtoNativeEndianMemoryAddressTag reference as a result of
160171
/// TensorToTensorProto() below. This is because shape inferencing code in onnx for
161172
/// like Reshape parses weights data and it needs to be in the TensorProto.
162173
/// The value of 127 was chosen empirically to be the smallest value that is required
@@ -177,7 +188,7 @@ constexpr const size_t kMaxEmbeddedInitializerSizeInBytes = size_t{2} * 1024 * 1
177188
* @param[in] tensor the Tensor whose data and shape will be used to create the TensorProto.
178189
* @param[in] tensor_proto_name the name of the TensorProto.
179190
* @param[in] use_tensor_buffer the tensor proto is set to use external location, with
180-
* 'location' set to onnxruntime::utils::kTensorProtoMemoryAddressTag
191+
* 'location' set to onnxruntime::utils::kTensorProtoNativeEndianMemoryAddressTag
181192
* 'offset' set to tensor's memory location, and 'length' set to tensor's
182193
* memory size. The caller is responsible to maintain the lifetime of
183194
* the allocated memory buffer. Use with caution.
@@ -215,8 +226,19 @@ common::Status ValidateEmbeddedTensorProtoDataSizeAndShape(const ONNX_NAMESPACE:
215226
Special marker used to indicate an existing memory buffer contains the TensorProto external data.
216227
If the 'location' field of the external data info is set to this marker, the 'offset' field should contain the
217228
address of the memory containing the data.
229+
230+
This marker is used when data is always in little endian format.
231+
*/
232+
constexpr const ORTCHAR_T* kTensorProtoLittleEndianMemoryAddressTag = ORT_TSTR("*/_ORT_MEM_ADDR_/*");
233+
234+
/**
235+
Special marker used to indicate an existing memory buffer contains the TensorProto external data.
236+
If the 'location' field of the external data info is set to this marker, the 'offset' field should contain the
237+
address of the memory containing the data.
238+
239+
This marker is used when data is in native endian format, i.e. big endian on big endian systems.
218240
*/
219-
constexpr const ORTCHAR_T* kTensorProtoMemoryAddressTag = ORT_TSTR("*/_ORT_MEM_ADDR_/*");
241+
constexpr const ORTCHAR_T* kTensorProtoNativeEndianMemoryAddressTag = ORT_TSTR("*/_ORT_NATIVE_ENDIAN_MEM_ADDR_/*");
220242

221243
/// <summary>
222244
/// Creates a OrtValue with a tensor on top of the external data.

onnxruntime/core/graph/graph.cc

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1242,15 +1242,6 @@ Graph::Graph(const Model& owning_model,
12421242

12431243
const gsl::not_null<TensorProto*> tensor{graph_proto_->add_initializer()};
12441244
ORT_THROW_IF_ERROR(utils::ConstantNodeProtoToTensorProto(node, model_path, *tensor));
1245-
if constexpr (endian::native != endian::little) {
1246-
const AttributeProto& attrib = node.attribute(0);
1247-
if (attrib.type() == AttributeProto_AttributeType_SPARSE_TENSOR) {
1248-
const TensorProto& sparse_values = node.attribute(0).sparse_tensor().values();
1249-
if ((!(sparse_values.has_raw_data())) && utils::HasRawData(*tensor)) {
1250-
onnxruntime::utils::ConvertRawDataInTensorProto(*tensor);
1251-
}
1252-
}
1253-
}
12541245

12551246
// Ensure initializers are also graph inputs.
12561247
if (ir_version_ < 4) {
@@ -4964,6 +4955,18 @@ Status Graph::AddExternalInitializersToGraphProtoImpl(
49644955
std::vector<uint8_t> raw_data;
49654956
ORT_RETURN_IF_ERROR(utils::UnpackInitializerData(initializer, model_path, raw_data));
49664957
size_t tensor_bytes_size = raw_data.size();
4958+
4959+
// Convert it data to little endian before saving to file
4960+
if constexpr (endian::native != endian::little) {
4961+
size_t element_size = onnxruntime::utils::GetElementSizeOfTensor(static_cast<ONNX_NAMESPACE::TensorProto_DataType>(initializer.data_type()));
4962+
4963+
if (element_size > 1) {
4964+
onnxruntime::utils::SwapByteOrderInplace(
4965+
element_size,
4966+
gsl::make_span(reinterpret_cast<std::byte*>(raw_data.data()), tensor_bytes_size));
4967+
}
4968+
}
4969+
49674970
if (model_saving_options.force_embed_external_ini ||
49684971
tensor_bytes_size < model_saving_options.initializer_size_threshold) {
49694972
*output_proto = initializer;
@@ -6735,13 +6738,13 @@ Status Graph::LoadFromModelEditorApiModel(const OrtGraph& api_graph, bool updati
67356738
const void* data_offset = t.DataRaw(); // address of memory not offset into file
67366739
auto offset = narrow<ExternalDataInfo::OFFSET_TYPE>(reinterpret_cast<intptr_t>(data_offset));
67376740

6738-
ExternalDataInfo::SetExternalLocationToProto(onnxruntime::utils::kTensorProtoMemoryAddressTag,
6741+
ExternalDataInfo::SetExternalLocationToProto(onnxruntime::utils::kTensorProtoNativeEndianMemoryAddressTag,
67396742
offset, t.SizeInBytes(), tensor_proto);
67406743

67416744
// add OrtValue to ortvalue_initializers_ to keep it alive and to store the deleter if provided.
67426745
ortvalue_initializers_.emplace(name, std::move(v));
67436746
} else {
6744-
tensor_proto.set_raw_data(t.DataRaw(), t.SizeInBytes());
6747+
onnxruntime::utils::SetRawDataInTensorProto(tensor_proto, t.DataRaw(), t.SizeInBytes());
67456748
}
67466749

67476750
TypeProto type_proto{utils::TypeProtoFromTensorProto(tensor_proto)};

onnxruntime/core/graph/graph_flatbuffers_utils.cc

Lines changed: 63 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -50,15 +50,19 @@ Status SaveInitializerOrtFormat(flatbuffers::FlatBufferBuilder& builder,
5050
string_data = builder.CreateVectorOfStrings(string_data_vec);
5151
} else {
5252
std::vector<uint8_t> unpacked_tensor;
53-
// We can not convert this in place, because the session may be used
54-
// after the model was saved in ort format. If the session is continued to be used, then
55-
// we continue with initializers in memory with wrong endianess
53+
ORT_RETURN_IF_ERROR(onnxruntime::utils::UnpackInitializerData(initializer, model_path, unpacked_tensor));
54+
55+
// We cannot convert data before unpacking due to
56+
// external data not getting converted by ConvertRawDataInTensorProto function.
57+
// Instead convert data after unpacking it
5658
if constexpr (endian::native != endian::little) {
57-
auto be_copy{initializer};
58-
onnxruntime::utils::ConvertRawDataInTensorProto(be_copy);
59-
ORT_RETURN_IF_ERROR(onnxruntime::utils::UnpackInitializerData(be_copy, model_path, unpacked_tensor));
60-
} else {
61-
ORT_RETURN_IF_ERROR(onnxruntime::utils::UnpackInitializerData(initializer, model_path, unpacked_tensor));
59+
size_t element_size = onnxruntime::utils::GetElementSizeOfTensor(static_cast<ONNX_NAMESPACE::TensorProto_DataType>(initializer.data_type()));
60+
61+
if (element_size > 1) {
62+
onnxruntime::utils::SwapByteOrderInplace(
63+
element_size,
64+
gsl::make_span(reinterpret_cast<std::byte*>(unpacked_tensor.data()), unpacked_tensor.size()));
65+
}
6266
}
6367

6468
if (external_writer && unpacked_tensor.size() >= kMinimumSizeForExternalData) {
@@ -316,7 +320,7 @@ Status LoadInitializerOrtFormat(const fbs::Tensor& fbs_tensor, TensorProto& init
316320
// high bit, but that should be unlikely in a scenario where we care about memory usage enough to use this path.
317321
auto offset = narrow<ExternalDataInfo::OFFSET_TYPE>(reinterpret_cast<intptr_t>(data_offset));
318322

319-
ExternalDataInfo::SetExternalLocationToProto(onnxruntime::utils::kTensorProtoMemoryAddressTag,
323+
ExternalDataInfo::SetExternalLocationToProto(onnxruntime::utils::kTensorProtoLittleEndianMemoryAddressTag,
320324
offset, fbs_raw_data->size(), initializer);
321325

322326
} else {
@@ -473,9 +477,31 @@ Status SaveOrtTensorOrtFormat(
473477
// To avoid issues with vtable offsets, raw_data fbs::vector must be constructed before the TensorBuilder begins
474478
// building the tensor. See flatbuffer_builder.h's NotNested() function for more details.
475479
flatbuffers::Offset<flatbuffers::Vector<uint8_t>> raw_data;
480+
481+
auto unpack_tensor_data_be = [&ort_tensor](std::vector<uint8_t>& unpacked_tensor_data) -> Status {
482+
unpacked_tensor_data.resize(ort_tensor.SizeInBytes());
483+
484+
size_t element_size = onnxruntime::utils::GetElementSizeOfTensor(static_cast<ONNX_NAMESPACE::TensorProto_DataType>(ort_tensor.GetElementType()));
485+
auto src_span = gsl::make_span(reinterpret_cast<const unsigned char*>(ort_tensor.DataRaw()), ort_tensor.SizeInBytes());
486+
auto dst_span = gsl::make_span(reinterpret_cast<unsigned char*>(unpacked_tensor_data.data()), unpacked_tensor_data.size());
487+
488+
// If element size is unknown, set it to 1 to disable byteswapping
489+
if (element_size < 1) element_size = 1;
490+
491+
return onnxruntime::utils::WriteLittleEndian(element_size, src_span, dst_span);
492+
};
493+
476494
if (!external_data_writer) {
477-
raw_data = builder.CreateVector(static_cast<const uint8_t*>(ort_tensor.DataRaw()),
478-
ort_tensor.SizeInBytes());
495+
if constexpr (endian::native != endian::little) {
496+
std::vector<uint8_t> unpacked_tensor;
497+
498+
ORT_RETURN_IF_ERROR(unpack_tensor_data_be(unpacked_tensor));
499+
500+
raw_data = builder.CreateVector(unpacked_tensor.data(), unpacked_tensor.size());
501+
} else {
502+
raw_data = builder.CreateVector(static_cast<const uint8_t*>(ort_tensor.DataRaw()),
503+
ort_tensor.SizeInBytes());
504+
}
479505
}
480506

481507
fbs::TensorBuilder tb(builder);
@@ -485,8 +511,17 @@ Status SaveOrtTensorOrtFormat(
485511
tb.add_data_type(static_cast<fbs::TensorDataType>(ort_tensor.GetElementType()));
486512
if (external_data_writer) {
487513
uint64_t offset = 0;
488-
gsl::span<const uint8_t> ort_tensor_data_span(static_cast<const uint8_t*>(ort_tensor.DataRaw()), ort_tensor.SizeInBytes());
489-
ORT_RETURN_IF_ERROR(external_data_writer(ort_tensor.GetElementType(), ort_tensor_data_span, offset));
514+
if constexpr (endian::native != endian::little) {
515+
std::vector<uint8_t> unpacked_tensor;
516+
517+
ORT_RETURN_IF_ERROR(unpack_tensor_data_be(unpacked_tensor));
518+
519+
gsl::span<const uint8_t> ort_tensor_data_span(static_cast<const uint8_t*>(unpacked_tensor.data()), unpacked_tensor.size());
520+
ORT_RETURN_IF_ERROR(external_data_writer(ort_tensor.GetElementType(), ort_tensor_data_span, offset));
521+
} else {
522+
gsl::span<const uint8_t> ort_tensor_data_span(static_cast<const uint8_t*>(ort_tensor.DataRaw()), ort_tensor.SizeInBytes());
523+
ORT_RETURN_IF_ERROR(external_data_writer(ort_tensor.GetElementType(), ort_tensor_data_span, offset));
524+
}
490525
int64_t external_data_offset = onnxruntime::narrow<int64_t>(offset);
491526
tb.add_external_data_offset(external_data_offset);
492527
} else {
@@ -546,8 +581,21 @@ Status LoadOrtTensorOrtFormat(const fbs::Tensor& fbs_tensor, const AllocatorPtr
546581
const DataTypeImpl* tensor_dtype = DataTypeImpl::TensorTypeFromONNXEnum(
547582
tensor_data_type)
548583
->GetElementType();
549-
ort_tensor = onnxruntime::Tensor(
550-
tensor_dtype, TensorShape(tensor_dims->data(), tensor_dims->size()), allocator);
584+
585+
if constexpr (endian::native != endian::little) {
586+
std::vector<typename std::remove_reference_t<decltype(*tensor_dims)>::return_type> byteswapped_data;
587+
byteswapped_data.resize(tensor_dims->size());
588+
589+
for (size_t i = 0; i < tensor_dims->size(); ++i) {
590+
byteswapped_data[i] = tensor_dims->Get(i);
591+
}
592+
593+
ort_tensor = onnxruntime::Tensor(
594+
tensor_dtype, TensorShape(byteswapped_data.data(), byteswapped_data.size()), allocator);
595+
} else {
596+
ort_tensor = onnxruntime::Tensor(
597+
tensor_dtype, TensorShape(tensor_dims->data(), tensor_dims->size()), allocator);
598+
}
551599

552600
if (fbs_tensor.raw_data() && fbs_tensor.raw_data()->size() == 0U) {
553601
// Empty tensor. Nothing to unpack.

onnxruntime/core/graph/graph_utils.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,8 @@ bool CheckInMemoryDataMatch(const ONNX_NAMESPACE::TensorProto& tensor_proto, con
378378
// Retrieve external data using ExternalData structure
379379
std::unique_ptr<ExternalDataInfo> external_data;
380380
ORT_THROW_IF_ERROR(ExternalDataInfo::Create(tensor_proto.external_data(), external_data));
381-
return (external_data->GetRelPath().compare(utils::kTensorProtoMemoryAddressTag) == 0) &&
381+
return ((external_data->GetRelPath().compare(utils::kTensorProtoLittleEndianMemoryAddressTag) == 0) ||
382+
(external_data->GetRelPath().compare(utils::kTensorProtoNativeEndianMemoryAddressTag) == 0)) &&
382383
(tensor.DataRaw() == reinterpret_cast<const void*>(external_data->GetOffset()));
383384
}
384385
return false;

onnxruntime/core/optimizer/qdq_transformer/where_dummy_dq.cc

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -63,29 +63,29 @@ Status WhereDummyDq::InsertDummyDQ(Node& node, Graph& graph, bool& modified, con
6363
case ONNX_NAMESPACE::TensorProto_DataType_INT8: {
6464
int8_t zp = 0;
6565
int8_t dummy_data = 1;
66-
dummy_zp_proto.set_raw_data(&zp, 1);
67-
dummy_data_proto.set_raw_data(&dummy_data, 1);
66+
utils::SetRawDataInTensorProto(dummy_zp_proto, &zp, 1);
67+
utils::SetRawDataInTensorProto(dummy_data_proto, &dummy_data, 1);
6868
break;
6969
}
7070
case ONNX_NAMESPACE::TensorProto_DataType_UINT8: {
7171
uint8_t zp = 0;
7272
uint8_t dummy_data = 1;
73-
dummy_zp_proto.set_raw_data(&zp, 1);
74-
dummy_data_proto.set_raw_data(&dummy_data, 1);
73+
utils::SetRawDataInTensorProto(dummy_zp_proto, &zp, 1);
74+
utils::SetRawDataInTensorProto(dummy_data_proto, &dummy_data, 1);
7575
break;
7676
}
7777
case ONNX_NAMESPACE::TensorProto_DataType_INT16: {
7878
int16_t zp = 0;
7979
int16_t dummy_data = 1;
80-
dummy_zp_proto.set_raw_data(&zp, 2);
81-
dummy_data_proto.set_raw_data(&dummy_data, 2);
80+
utils::SetRawDataInTensorProto(dummy_zp_proto, &zp, 2);
81+
utils::SetRawDataInTensorProto(dummy_data_proto, &dummy_data, 2);
8282
break;
8383
}
8484
case ONNX_NAMESPACE::TensorProto_DataType_UINT16: {
8585
uint16_t zp = 0;
8686
uint16_t dummy_data = 1;
87-
dummy_zp_proto.set_raw_data(&zp, 2);
88-
dummy_data_proto.set_raw_data(&dummy_data, 2);
87+
utils::SetRawDataInTensorProto(dummy_zp_proto, &zp, 2);
88+
utils::SetRawDataInTensorProto(dummy_data_proto, &dummy_data, 2);
8989
break;
9090
}
9191
default:
@@ -110,7 +110,7 @@ Status WhereDummyDq::InsertDummyDQ(Node& node, Graph& graph, bool& modified, con
110110
switch (initializer.data_type()) {
111111
case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: {
112112
float* where_const_scalar = initializer.data<float>();
113-
dummy_scale_proto.set_raw_data(where_const_scalar, sizeof(float));
113+
utils::SetRawDataInTensorProto(dummy_scale_proto, where_const_scalar, sizeof(float));
114114
break;
115115
}
116116
default:
@@ -167,4 +167,4 @@ Status WhereDummyDq::ApplyImpl(Graph& graph, bool& modified, int graph_level, co
167167

168168
return Status::OK();
169169
}
170-
} // namespace onnxruntime
170+
} // namespace onnxruntime

onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlGraphFusionHelper.cpp

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
#include "DmlGraphFusionHelper.h"
44
#include "DmlRuntimeFusedGraphKernel.h"
55

6+
#include "core/common/endian.h"
7+
#include "core/framework/endian_utils.h"
8+
69
using namespace Windows::AI::MachineLearning::Adapter;
710

811
namespace Dml
@@ -121,7 +124,31 @@ namespace DmlGraphFusionHelper
121124
onnxruntime::FileOffsetType fileOffset;
122125
SafeInt<size_t> safeTensorByteSize;
123126
THROW_IF_NOT_OK(onnxruntime::utils::GetExternalDataInfo(*initializer, graph.ModelPath(), /*out*/ externalFilePath, /*out*/ fileOffset, /*out*/ safeTensorByteSize));
124-
if (externalFilePath == onnxruntime::utils::kTensorProtoMemoryAddressTag)
127+
if (externalFilePath == onnxruntime::utils::kTensorProtoLittleEndianMemoryAddressTag)
128+
{
129+
if constexpr (onnxruntime::endian::native != onnxruntime::endian::little)
130+
{
131+
unpackedTensor.reset(new std::byte[safeTensorByteSize]);
132+
133+
auto src = gsl::make_span<const unsigned char>(reinterpret_cast<const unsigned char*>(fileOffset), safeTensorByteSize);
134+
auto dst = gsl::make_span<unsigned char>(reinterpret_cast<unsigned char*>(unpackedTensor.get()), safeTensorByteSize);
135+
size_t element_size = onnxruntime::utils::GetElementSizeOfTensor(static_cast<ONNX_NAMESPACE::TensorProto_DataType>(initializer->data_type()));
136+
137+
// If element size is unknown, set it to 1 to disable byteswapping
138+
if (element_size < 1) element_size = 1;
139+
140+
THROW_IF_NOT_OK(onnxruntime::utils::ReadLittleEndian(element_size, src, dst));
141+
142+
tensorPtr = unpackedTensor.get();
143+
tensorByteSize = safeTensorByteSize;
144+
}
145+
else
146+
{
147+
tensorPtr = reinterpret_cast<std::byte*>(fileOffset);
148+
tensorByteSize = safeTensorByteSize;
149+
}
150+
}
151+
else if (externalFilePath == onnxruntime::utils::kTensorProtoNativeEndianMemoryAddressTag)
125152
{
126153
tensorPtr = reinterpret_cast<std::byte*>(fileOffset);
127154
tensorByteSize = safeTensorByteSize;

0 commit comments

Comments
 (0)