Support input initializers in onnx runtime

awoll-bdai · exploy-bot · commit f6c4c22bef7e · 2026-06-04T12:48:23.000Z
### What change is being made Extend the onnx runtime to support [input initializers](https://onnx.ai/onnx/intro/python.html#initializer-default-value). In the first run or after reset, evaluation of the model is called without the tensors of inputs which have initializers, such that the onnx runtime uses the default values. ### Why this change is being made Allow to initialize inputs, e.g. memory. ### Tested Extended unit tests. GitOrigin-RevId: d630b9d9baf0dddff8da2e970673f71141b7fe6d
diff --git a/control/include/exploy/onnx_runtime.hpp b/control/include/exploy/onnx_runtime.hpp
@@ -173,7 +173,7 @@ class OnnxRuntime {
   Ort::RunOptions run_options_{nullptr};
 
   struct TensorData {
-    std::size_t size;
+    std::size_t size{0};
     std::vector<std::vector<int64_t>> shapes;
     std::vector<const char*> names;
     std::vector<Ort::Value> tensors;
@@ -184,6 +184,10 @@ class OnnxRuntime {
   TensorData input_;
   TensorData output_;
 
+  // Number of inputs without an overridable initializer.
+  std::size_t non_initializer_input_count_{0};
+  bool use_initializers_{true};
+
   std::unordered_map<std::string, int> input_names_to_index_{};
   std::unordered_map<std::string, int> output_names_to_index_{};
 };
diff --git a/control/src/onnx_runtime.cpp b/control/src/onnx_runtime.cpp
@@ -14,6 +14,46 @@ namespace exploy::control {
 
 namespace {
 
+enum class TensorKind { Input, Output, Initializer };
+
+std::size_t getTensorCount(const Ort::Session& session, TensorKind kind) {
+  switch (kind) {
+    case TensorKind::Input:
+      return session.GetInputCount();
+    case TensorKind::Output:
+      return session.GetOutputCount();
+    case TensorKind::Initializer:
+      return session.GetOverridableInitializerCount();
+  }
+  return 0;
+}
+
+Ort::AllocatedStringPtr getTensorNameAllocated(Ort::Session& session,
+                                               Ort::AllocatorWithDefaultOptions& allocator,
+                                               TensorKind kind, std::size_t index) {
+  switch (kind) {
+    case TensorKind::Input:
+      return session.GetInputNameAllocated(index, allocator);
+    case TensorKind::Output:
+      return session.GetOutputNameAllocated(index, allocator);
+    case TensorKind::Initializer:
+      return session.GetOverridableInitializerNameAllocated(index, allocator);
+  }
+  return Ort::AllocatedStringPtr{nullptr, Ort::detail::AllocatedFree{allocator}};
+}
+
+Ort::TypeInfo getTensorTypeInfo(Ort::Session& session, TensorKind kind, std::size_t index) {
+  switch (kind) {
+    case TensorKind::Input:
+      return session.GetInputTypeInfo(index);
+    case TensorKind::Output:
+      return session.GetOutputTypeInfo(index);
+    case TensorKind::Initializer:
+      return session.GetOverridableInitializerTypeInfo(index);
+  }
+  return Ort::TypeInfo{nullptr};
+}
+
 void resetTensorBuffer(Ort::Value& tensor, ONNXTensorElementDataType data_type) {
   const auto count = tensor.GetTensorTypeAndShapeInfo().GetElementCount();
   switch (data_type) {
@@ -36,34 +76,35 @@ void resetTensorBuffer(Ort::Value& tensor, ONNXTensorElementDataType data_type)
 }
 
 template <typename TensorDataType>
-void initializeTensorData(TensorDataType& tensor_data, std::unique_ptr<Ort::Session>& session,
-                          Ort::AllocatorWithDefaultOptions& allocator,
-                          std::unordered_map<std::string, int>& names_to_index, bool is_input) {
-  tensor_data.size = is_input ? session->GetInputCount() : session->GetOutputCount();
-
-  tensor_data.names.reserve(tensor_data.size);
-  tensor_data.shapes.reserve(tensor_data.size);
-  tensor_data.data_types.reserve(tensor_data.size);
-  tensor_data.tensors.reserve(tensor_data.size);
-  tensor_data.allocated_names.reserve(tensor_data.size);
-
-  for (std::size_t n = 0; n < tensor_data.size; n++) {
-    auto name_ptr = is_input ? session->GetInputNameAllocated(n, allocator)
-                             : session->GetOutputNameAllocated(n, allocator);
-    tensor_data.allocated_names.push_back(std::move(name_ptr));
+void appendTensorData(TensorDataType& tensor_data, std::unique_ptr<Ort::Session>& session,
+                      Ort::AllocatorWithDefaultOptions& allocator,
+                      std::unordered_map<std::string, int>& names_to_index, TensorKind kind) {
+  const std::size_t count = getTensorCount(*session, kind);
+
+  const std::size_t new_size = tensor_data.size + count;
+  tensor_data.names.reserve(new_size);
+  tensor_data.shapes.reserve(new_size);
+  tensor_data.data_types.reserve(new_size);
+  tensor_data.tensors.reserve(new_size);
+  tensor_data.allocated_names.reserve(new_size);
+
+  for (std::size_t n = 0; n < count; n++) {
+    tensor_data.allocated_names.push_back(getTensorNameAllocated(*session, allocator, kind, n));
     tensor_data.names.push_back(tensor_data.allocated_names.back().get());
 
-    auto type_info = is_input ? session->GetInputTypeInfo(n) : session->GetOutputTypeInfo(n);
+    auto type_info = getTensorTypeInfo(*session, kind, n);
     auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
-
     tensor_data.shapes.push_back(tensor_info.GetShape());
     tensor_data.data_types.push_back(tensor_info.GetElementType());
 
-    tensor_data.tensors.push_back(Ort::Value::CreateTensor(allocator, tensor_data.shapes[n].data(),
-                                                           tensor_data.shapes[n].size(),
-                                                           tensor_data.data_types[n]));
+    tensor_data.tensors.push_back(
+        Ort::Value::CreateTensor(allocator, tensor_data.shapes.back().data(),
+                                 tensor_data.shapes.back().size(), tensor_data.data_types.back()));
+
+    resetTensorBuffer(tensor_data.tensors.back(), tensor_data.data_types.back());
 
-    names_to_index[std::string(tensor_data.names.back())] = n;
+    names_to_index[std::string(tensor_data.names.back())] = static_cast<int>(tensor_data.size);
+    tensor_data.size++;
   }
 }
 
@@ -113,22 +154,37 @@ bool OnnxRuntime::initialize(const std::string& model_path, const OnnxRuntimeOpt
       break;
   }
 
-  initializeTensorData(input_, session_, allocator_, input_names_to_index_, /*is_input=*/true);
-  initializeTensorData(output_, session_, allocator_, output_names_to_index_, /*is_input=*/false);
+  input_ = TensorData{};
+  output_ = TensorData{};
+  input_names_to_index_.clear();
+  output_names_to_index_.clear();
+  non_initializer_input_count_ = 0;
 
+  // Append initializer-backed inputs after regular inputs so that we can optionally let ONNX
+  // Runtime use the model's default values for them after a reset.
+  appendTensorData(input_, session_, allocator_, input_names_to_index_, TensorKind::Input);
+  appendTensorData(input_, session_, allocator_, input_names_to_index_, TensorKind::Initializer);
+  appendTensorData(output_, session_, allocator_, output_names_to_index_, TensorKind::Output);
+  non_initializer_input_count_ = getTensorCount(*session_, TensorKind::Input);
+  use_initializers_ = true;
   metadata_ = session_->GetModelMetadata();
 
   return true;
 }
 
 bool OnnxRuntime::evaluate() {
+  // If use_initializers_ is true, we pass only the leading non-initializer inputs to let ONNX
+  // Runtime use the model's default values for the rest. After the first run, we always pass all
+  // inputs and ignore the model defaults.
+  const std::size_t input_count = use_initializers_ ? non_initializer_input_count_ : input_.size;
   try {
-    session_->Run(run_options_, input_.names.data(), input_.tensors.data(), input_.size,
+    session_->Run(run_options_, input_.names.data(), input_.tensors.data(), input_count,
                   output_.names.data(), output_.tensors.data(), output_.size);
   } catch (const Ort::Exception& e) {
     LOG_STREAM(ERROR, "ONNX Runtime evaluation failed: " << e.what());
     return false;
   }
+  use_initializers_ = false;
   return true;
 }
 
@@ -144,6 +200,7 @@ void OnnxRuntime::resetBuffers() {
   for (std::size_t n = 0; n < output_.size; n++) {
     resetTensorBuffer(output_.tensors[n], output_.data_types[n]);
   }
+  use_initializers_ = true;
 }
 
 std::unordered_set<std::string> OnnxRuntime::inputNames() const {
diff --git a/control/test/onnx_runtime_test.cpp b/control/test/onnx_runtime_test.cpp
@@ -4,6 +4,7 @@
 
 #include <gtest/gtest.h>
 
+#include <algorithm>
 #include <filesystem>
 #include <span>
 
@@ -79,6 +80,7 @@ TEST_F(OnnxRuntimeTest, InputTensorNames) {
   EXPECT_TRUE(input_names.contains("float_input"));
   EXPECT_TRUE(input_names.contains("int_input"));
   EXPECT_TRUE(input_names.contains("bool_input"));
+  EXPECT_TRUE(input_names.contains("init_float_input"));
 }
 
 TEST_F(OnnxRuntimeTest, OutputTensorNames) {
@@ -400,4 +402,122 @@ TEST_F(OnnxRuntimeTest, CopyOutputToInputTypeMismatch) {
   EXPECT_FALSE(runtime.copyOutputToInput("float_output", "bool_input"));
 }
 
+TEST_F(OnnxRuntimeTest, FirstRunUsesInitializerDefaults) {
+  OnnxRuntime runtime;
+  ASSERT_TRUE(runtime.initialize(simple_model_path_));
+  runtime.resetBuffers();
+
+  auto float_input = runtime.inputBuffer<float>("float_input");
+  ASSERT_TRUE(float_input.has_value());
+  float_input.value()[0] = 1.5f;
+  float_input.value()[1] = 2.5f;
+  float_input.value()[2] = 3.5f;
+
+  // Populate the ``init_float_input`` buffer with non-zero values that would clearly
+  // differ from the baked-in defaults [0, 0, 0]. The first run must IGNORE these because
+  // ``init_float_input`` is an overridable initializer.
+  auto init_float_input = runtime.inputBuffer<float>("init_float_input");
+  ASSERT_TRUE(init_float_input.has_value());
+  init_float_input.value()[0] = 100.0f;
+  init_float_input.value()[1] = 200.0f;
+  init_float_input.value()[2] = 300.0f;
+
+  ASSERT_TRUE(runtime.evaluate());
+
+  // float_output = float_input * 2 + 0 (init_float_input defaults to zeros).
+  auto float_output = runtime.outputBuffer<float>("float_output");
+  ASSERT_TRUE(float_output.has_value());
+  EXPECT_FLOAT_EQ(float_output.value()[0], 3.0f);
+  EXPECT_FLOAT_EQ(float_output.value()[1], 5.0f);
+  EXPECT_FLOAT_EQ(float_output.value()[2], 7.0f);
+}
+
+TEST_F(OnnxRuntimeTest, SubsequentRunsUseInitializerBuffer) {
+  OnnxRuntime runtime;
+  ASSERT_TRUE(runtime.initialize(simple_model_path_));
+  runtime.resetBuffers();
+
+  auto float_input = runtime.inputBuffer<float>("float_input");
+  ASSERT_TRUE(float_input.has_value());
+  float_input.value()[0] = 1.5f;
+  float_input.value()[1] = 2.5f;
+  float_input.value()[2] = 3.5f;
+
+  auto init_float_input = runtime.inputBuffer<float>("init_float_input");
+  ASSERT_TRUE(init_float_input.has_value());
+
+  // First run: defaults [0, 0, 0] used regardless of buffer contents.
+  std::ranges::fill(init_float_input.value(), 999.0f);
+  ASSERT_TRUE(runtime.evaluate());
+
+  auto float_output = runtime.outputBuffer<float>("float_output");
+  ASSERT_TRUE(float_output.has_value());
+  EXPECT_FLOAT_EQ(float_output.value()[0], 3.0f);  // 1.5 * 2 + 0
+  EXPECT_FLOAT_EQ(float_output.value()[1], 5.0f);  // 2.5 * 2 + 0
+  EXPECT_FLOAT_EQ(float_output.value()[2], 7.0f);  // 3.5 * 2 + 0
+
+  // Second run: now the buffer values must override the defaults.
+  init_float_input.value()[0] = 10.0f;
+  init_float_input.value()[1] = 20.0f;
+  init_float_input.value()[2] = 30.0f;
+  ASSERT_TRUE(runtime.evaluate());
+
+  EXPECT_FLOAT_EQ(float_output.value()[0], 13.0f);  // 1.5 * 2 + 10
+  EXPECT_FLOAT_EQ(float_output.value()[1], 25.0f);  // 2.5 * 2 + 20
+  EXPECT_FLOAT_EQ(float_output.value()[2], 37.0f);  // 3.5 * 2 + 30
+
+  // Third run: confirm the buffer keeps overriding (still not falling back).
+  init_float_input.value()[0] = -1.0f;
+  init_float_input.value()[1] = -2.0f;
+  init_float_input.value()[2] = -3.0f;
+  ASSERT_TRUE(runtime.evaluate());
+
+  EXPECT_FLOAT_EQ(float_output.value()[0], 2.0f);  // 1.5 * 2 - 1
+  EXPECT_FLOAT_EQ(float_output.value()[1], 3.0f);  // 2.5 * 2 - 2
+  EXPECT_FLOAT_EQ(float_output.value()[2], 4.0f);  // 3.5 * 2 - 3
+}
+
+TEST_F(OnnxRuntimeTest, ResetBuffersReArmsInitializerDefaults) {
+  OnnxRuntime runtime;
+  ASSERT_TRUE(runtime.initialize(simple_model_path_));
+  runtime.resetBuffers();
+
+  auto float_input = runtime.inputBuffer<float>("float_input");
+  auto init_float_input = runtime.inputBuffer<float>("init_float_input");
+  ASSERT_TRUE(float_input.has_value());
+  ASSERT_TRUE(init_float_input.has_value());
+
+  float_input.value()[0] = 1.5f;
+  float_input.value()[1] = 2.5f;
+  float_input.value()[2] = 3.5f;
+
+  // Run #1 uses defaults.
+  ASSERT_TRUE(runtime.evaluate());
+  // Run #2 uses the buffer.
+  init_float_input.value()[0] = 10.0f;
+  init_float_input.value()[1] = 20.0f;
+  init_float_input.value()[2] = 30.0f;
+  ASSERT_TRUE(runtime.evaluate());
+  auto float_output = runtime.outputBuffer<float>("float_output");
+  ASSERT_TRUE(float_output.has_value());
+  EXPECT_FLOAT_EQ(float_output.value()[0], 13.0f);  // 1.5 * 2 + 10
+
+  // After resetBuffers(), the next evaluate() must again fall back to the model's
+  // initializer defaults, regardless of buffer contents.
+  runtime.resetBuffers();
+  // resetBuffers() also zeroes the buffers, so set non-zero values to prove they are
+  // ignored on the post-reset run. Restore float_input as well (also zeroed by reset).
+  float_input.value()[0] = 1.5f;
+  float_input.value()[1] = 2.5f;
+  float_input.value()[2] = 3.5f;
+  init_float_input.value()[0] = 999.0f;
+  init_float_input.value()[1] = 999.0f;
+  init_float_input.value()[2] = 999.0f;
+
+  ASSERT_TRUE(runtime.evaluate());
+  EXPECT_FLOAT_EQ(float_output.value()[0], 3.0f);  // 1.5 * 2 + 0
+  EXPECT_FLOAT_EQ(float_output.value()[1], 5.0f);  // 2.5 * 2 + 0
+  EXPECT_FLOAT_EQ(float_output.value()[2], 7.0f);  // 3.5 * 2 + 0
+}
+
 }  // namespace exploy::control
diff --git a/control/test/testdata/test_onnx_generator.py b/control/test/testdata/test_onnx_generator.py
@@ -4,6 +4,7 @@
 import os
 import sys
 
+import numpy as np
 import onnx
 import torch
 
@@ -368,9 +369,8 @@ class SimpleTestModel(torch.nn.Module):
     def __init__(self):
         super().__init__()
 
-    def forward(self, float_input, int_input, bool_input):
-        # Simple pass-through model that just forwards inputs to outputs
-        float_output = float_input * 2.0  # Simple transformation
+    def forward(self, float_input, int_input, bool_input, init_float_input):
+        float_output = float_input * 2.0 + init_float_input
         int_output = int_input + 1  # Simple transformation
         bool_output = torch.logical_not(bool_input)  # Simple transformation
 
@@ -387,15 +387,24 @@ def export_simple_model(data_dir: str):
     float_input = torch.tensor([[1.5, 2.5, 3.5]], dtype=torch.float32)
     int_input = torch.tensor([[10, 20, 30]], dtype=torch.int32)
     bool_input = torch.tensor([[True, False, True]], dtype=torch.bool)
+    # Default values for the overridable initializer baked into the exported model.
+    default_init_float_input = np.zeros((1, 3), dtype=np.float32)
+    init_float_input = torch.from_numpy(default_init_float_input)
 
     torch.onnx.export(
         simple_model,
-        (float_input, int_input, bool_input),
+        (float_input, int_input, bool_input, init_float_input),
         output_path_simple,
-        input_names=["float_input", "int_input", "bool_input"],
+        input_names=["float_input", "int_input", "bool_input", "init_float_input"],
         output_names=["float_output", "int_output", "bool_output"],
     )
 
+    onnx_model = onnx.load(output_path_simple)
+    onnx_model.graph.initializer.append(
+        onnx.numpy_helper.from_array(default_init_float_input, name="init_float_input")
+    )
+    onnx.save(onnx_model, output_path_simple)
+
     # Add simple metadata to the simple test model
     simple_metadata = {
         "model_version": "1.0",