Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion VERSION_NUMBER
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.27.0
1.28.0
98 changes: 97 additions & 1 deletion include/onnxruntime/core/session/onnxruntime_c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
*
* This value is used by some API functions to behave as this version of the header expects.
*/
#define ORT_API_VERSION 27
#define ORT_API_VERSION 28

#ifdef __cplusplus
extern "C" {
Expand Down Expand Up @@ -593,6 +593,61 @@ typedef OrtStatus*(ORT_API_CALL* OrtWriteBufferFunc)(_In_ void* state,
_In_ const void* buffer,
_In_ size_t buffer_num_bytes);

/** \brief Function called to write EPContext binary data during compilation.
*
* This function is called synchronously by the execution provider on the calling thread. ORT does not own or retain
* buffer after the callback returns. ORT does not serialize invocations made by different EP instances or EP worker
* threads.
*
* Each callback invocation represents one complete write operation for file_name. The callback signature does not
* provide an offset, sequence number, or final-chunk marker, so EPs that need chunked streaming must define their own
* ordering and completion contract with the application. EPs should prefer a single callback invocation per EPContext
* binary unless chunking semantics are documented by that EP.
*
* The application's implementation can process the data in any way (e.g., encrypt and store, upload to cloud storage,
* or compress) before persisting it.
*
* \param[in] state Opaque pointer holding the user's state. ORT does not own or manage this pointer. The application
* must keep it valid for the duration of any compile operation that may invoke this callback and must
* provide any synchronization required if it can be used concurrently.
* \param[in] file_name The intended EPContext binary file name as a null-terminated UTF-8 string.
* \param[in] buffer The buffer containing EPContext binary data to write.
* \param[in] buffer_num_bytes The size of the buffer in bytes.
*
* \return OrtStatus* Write status. Return nullptr on success.
* Use CreateStatus to provide error info with ORT_FAIL as the error code.
* ORT will release the OrtStatus* if not null.
*/
typedef OrtStatus*(ORT_API_CALL* OrtWriteEpContextDataFunc)(_In_ void* state,
_In_ const char* file_name,
_In_ const void* buffer,
_In_ size_t buffer_num_bytes);

/** \brief Function called by ORT to read EPContext binary data during session load.
*
* The application reads, processes (e.g., decrypts, decompresses, downloads), and returns the EPContext binary data.
* ORT provides an allocator so the application can allocate the output buffer directly. The callback is called
* synchronously by the execution provider on the calling thread. ORT does not serialize invocations made by different
* EP instances or EP worker threads.
*
* \param[in] state Opaque pointer holding the user's state. ORT does not own or manage this pointer. The application
* must keep it valid while any session or EP created from the associated OrtSessionOptions may invoke
* this callback and must provide any synchronization required if it can be used concurrently.
* \param[in] file_name The EPContext binary file name as a null-terminated UTF-8 string.
* \param[in] allocator ORT-provided allocator. The application must use this to allocate the output buffer.
* \param[out] buffer Set by the implementation to the allocated buffer containing the output data.
* \param[out] data_size Set by the implementation to the size of the output data in bytes.
*
* \return OrtStatus* Read status. Return nullptr on success.
* Use CreateStatus to provide error info with ORT_FAIL as the error code.
* ORT will release the OrtStatus* if not null.
*/
typedef OrtStatus*(ORT_API_CALL* OrtReadEpContextDataFunc)(_In_ void* state,
_In_ const char* file_name,
_In_ OrtAllocator* allocator,
_Outptr_ void** buffer,
_Out_ size_t* data_size);

/** \brief Function called by ORT to allow user to specify how an initializer should be saved, that is, either
* written to an external file or stored within the model. ORT calls this function for every initializer when
* generating a model.
Expand Down Expand Up @@ -7471,6 +7526,26 @@ struct OrtApi {
* \see OrtApi::SetSessionExecutionMode
*/
ORT_API2_STATUS(GetSessionExecutionMode, _In_ const OrtSessionOptions* options, _Out_ ExecutionMode* out);

/** \brief Registers a callback to provide EPContext binary data during session load.
*
* When loading a compiled model with external (non-embedded) EPContext binary data, an execution provider can
* retrieve this callback from OrtEpContextConfig and call it instead of reading the binary data from disk.
*
* The state pointer is stored as-is and is not owned by ORT. It must remain valid while any session or EP created
* from these options may call the callback. If the same state may be used by multiple EPs or threads, the application
* is responsible for synchronization.
*
* \param[in] options The OrtSessionOptions instance.
* \param[in] read_func The OrtReadEpContextDataFunc callback.
* \param[in] state Opaque state passed to read_func. Can be NULL.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.28.
*/
ORT_API2_STATUS(SessionOptions_SetEpContextDataReadFunc, _Inout_ OrtSessionOptions* options,
_In_ OrtReadEpContextDataFunc read_func, _In_opt_ void* state);
};

/*
Expand Down Expand Up @@ -8292,6 +8367,27 @@ struct OrtCompileApi {
ORT_API2_STATUS(ModelCompilationOptions_SetInputModel,
_In_ OrtModelCompilationOptions* model_compile_options,
_In_ const OrtModel* model);

/** \brief Sets a callback for writing EPContext binary data during compilation.
*
* When EPContext embed mode is disabled, execution providers can retrieve this callback from OrtEpContextConfig and
* call it instead of writing EPContext binary data directly to disk.
*
* The state pointer is stored as-is and is not owned by ORT. It must remain valid for the duration of the compile
* operation that may call the callback. If the same state may be used by multiple EPs or threads, the application is
* responsible for synchronization.
*
* \param[in] model_compile_options The OrtModelCompilationOptions instance.
* \param[in] write_func The OrtWriteEpContextDataFunc called to write EPContext bytes.
* \param[in] state Opaque state passed to write_func. Can be NULL.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.28.
*/
ORT_API2_STATUS(ModelCompilationOptions_SetEpContextDataWriteFunc,
_In_ OrtModelCompilationOptions* model_compile_options,
_In_ OrtWriteEpContextDataFunc write_func, _In_opt_ void* state);
};

/**
Expand Down
22 changes: 22 additions & 0 deletions include/onnxruntime/core/session/onnxruntime_cxx_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -658,6 +658,7 @@ ORT_DEFINE_RELEASE(Value);
ORT_DEFINE_RELEASE(ValueInfo);

ORT_DEFINE_RELEASE_FROM_API_STRUCT(ModelCompilationOptions, GetCompileApi);
ORT_DEFINE_RELEASE_FROM_API_STRUCT(EpContextConfig, GetEpApi);
ORT_DEFINE_RELEASE_FROM_API_STRUCT(EpDevice, GetEpApi);
ORT_DEFINE_RELEASE_FROM_API_STRUCT(KernelDef, GetEpApi);
ORT_DEFINE_RELEASE_FROM_API_STRUCT(KernelDefBuilder, GetEpApi);
Expand Down Expand Up @@ -786,6 +787,7 @@ struct AllocatedFree {

struct AllocatorWithDefaultOptions;
struct Env;
struct EpContextConfig;
struct EpDevice;
struct ExternalInitializerInfo;
struct Graph;
Expand Down Expand Up @@ -1185,6 +1187,21 @@ struct EpDevice : detail::EpDeviceImpl<OrtEpDevice> {
ConstKeyValuePairs ep_metadata = {}, ConstKeyValuePairs ep_options = {});
};

/** \brief Owning wrapper around ::OrtEpContextConfig. */
struct EpContextConfig : detail::Base<OrtEpContextConfig> {
explicit EpContextConfig(std::nullptr_t) {} ///< No instance is created
explicit EpContextConfig(OrtEpContextConfig* p) : Base<OrtEpContextConfig>{p} {} ///< Take ownership

/// \brief Wraps OrtEpApi::SessionOptions_GetEpContextConfig
explicit EpContextConfig(const OrtSessionOptions* session_options);

/// \brief Wraps OrtEpApi::EpContextConfig_GetEpContextDataReadFunc
std::pair<OrtReadEpContextDataFunc, void*> GetEpContextDataReadFunc() const;

/// \brief Wraps OrtEpApi::EpContextConfig_GetEpContextDataWriteFunc
std::pair<OrtWriteEpContextDataFunc, void*> GetEpContextDataWriteFunc() const;
};

/** \brief Validate a compiled model's compatibility for one or more EP devices.
*
* Throws on error. Returns the resulting compatibility status.
Expand Down Expand Up @@ -1668,6 +1685,8 @@ struct SessionOptionsImpl : ConstSessionOptionsImpl<T> {
const std::vector<char*>& external_initializer_file_buffer_array,
const std::vector<size_t>& external_initializer_file_lengths); ///< Wraps OrtApi::AddExternalInitializersFromFilesInMemory

SessionOptionsImpl& SetEpContextDataReadFunc(OrtReadEpContextDataFunc read_func, void* state); ///< Wraps OrtApi::SessionOptions_SetEpContextDataReadFunc

SessionOptionsImpl& AppendExecutionProvider_CPU(int use_arena); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_CPU
SessionOptionsImpl& AppendExecutionProvider_CUDA(const OrtCUDAProviderOptions& provider_options); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_CUDA
SessionOptionsImpl& AppendExecutionProvider_CUDA_V2(const OrtCUDAProviderOptionsV2& provider_options); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_CUDA_V2
Expand Down Expand Up @@ -1769,6 +1788,9 @@ struct ModelCompilationOptions : detail::Base<OrtModelCompilationOptions> {
///< Wraps OrtApi::ModelCompilationOptions_SetOutputModelWriteFunc
ModelCompilationOptions& SetOutputModelWriteFunc(OrtWriteBufferFunc write_func, void* state);

///< Wraps OrtCompileApi::ModelCompilationOptions_SetEpContextDataWriteFunc
ModelCompilationOptions& SetEpContextDataWriteFunc(OrtWriteEpContextDataFunc write_func, void* state);

ModelCompilationOptions& SetEpContextBinaryInformation(const ORTCHAR_T* output_directory,
const ORTCHAR_T* model_name); ///< Wraps OrtApi::ModelCompilationOptions_SetEpContextBinaryInformation
ModelCompilationOptions& SetFlags(uint32_t flags); ///< Wraps OrtApi::ModelCompilationOptions_SetFlags
Expand Down
31 changes: 31 additions & 0 deletions include/onnxruntime/core/session/onnxruntime_cxx_inline.h
Original file line number Diff line number Diff line change
Expand Up @@ -769,6 +769,24 @@ inline EpDevice::EpDevice(OrtEpFactory& ep_factory, ConstHardwareDevice& hardwar
ThrowOnError(GetEpApi().CreateEpDevice(&ep_factory, hardware_device, ep_metadata, ep_options, &p_));
}

inline EpContextConfig::EpContextConfig(const OrtSessionOptions* session_options) {
ThrowOnError(GetEpApi().SessionOptions_GetEpContextConfig(session_options, &p_));
}

inline std::pair<OrtReadEpContextDataFunc, void*> EpContextConfig::GetEpContextDataReadFunc() const {
OrtReadEpContextDataFunc read_func = nullptr;
void* state = nullptr;
ThrowOnError(GetEpApi().EpContextConfig_GetEpContextDataReadFunc(this->p_, &read_func, &state));
return {read_func, state};
}

inline std::pair<OrtWriteEpContextDataFunc, void*> EpContextConfig::GetEpContextDataWriteFunc() const {
OrtWriteEpContextDataFunc write_func = nullptr;
void* state = nullptr;
ThrowOnError(GetEpApi().EpContextConfig_GetEpContextDataWriteFunc(this->p_, &write_func, &state));
return {write_func, state};
}

namespace detail {
template <typename T>
inline std::string EpAssignedSubgraphImpl<T>::GetEpName() const {
Expand Down Expand Up @@ -1335,6 +1353,12 @@ inline ModelCompilationOptions& ModelCompilationOptions::SetOutputModelWriteFunc
return *this;
}

inline ModelCompilationOptions& ModelCompilationOptions::SetEpContextDataWriteFunc(
OrtWriteEpContextDataFunc write_func, void* state) {
Ort::ThrowOnError(GetCompileApi().ModelCompilationOptions_SetEpContextDataWriteFunc(this->p_, write_func, state));
return *this;
}

inline ModelCompilationOptions& ModelCompilationOptions::SetEpContextEmbedMode(
bool embed_ep_context_in_model) {
Ort::ThrowOnError(GetCompileApi().ModelCompilationOptions_SetEpContextEmbedMode(
Expand Down Expand Up @@ -1574,6 +1598,13 @@ inline SessionOptionsImpl<T>& SessionOptionsImpl<T>::AddExternalInitializersFrom
return *this;
}

template <typename T>
inline SessionOptionsImpl<T>& SessionOptionsImpl<T>::SetEpContextDataReadFunc(OrtReadEpContextDataFunc read_func,
void* state) {
ThrowOnError(GetApi().SessionOptions_SetEpContextDataReadFunc(this->p_, read_func, state));
return *this;
}

template <typename T>
inline SessionOptionsImpl<T>& SessionOptionsImpl<T>::AppendExecutionProvider_CPU(int use_arena) {
ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CPU(this->p_, use_arena));
Expand Down
70 changes: 70 additions & 0 deletions include/onnxruntime/core/session/onnxruntime_ep_c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ extern "C" {
* @{
*/
ORT_RUNTIME_CLASS(Ep);
ORT_RUNTIME_CLASS(EpContextConfig);
ORT_RUNTIME_CLASS(EpFactory);
ORT_RUNTIME_CLASS(EpGraphSupportInfo);
ORT_RUNTIME_CLASS(MemoryDevice); // opaque class to wrap onnxruntime::OrtDevice
Expand Down Expand Up @@ -2077,6 +2078,75 @@ struct OrtEpApi {
ORT_API2_STATUS(ProfilingEventsContainer_AddEvents, _In_ OrtProfilingEventsContainer* events_container,
_In_reads_(num_events) const OrtProfilingEvent* const* events,
_In_ size_t num_events);

/** \brief Get the EPContext configuration from session options.
*
* Extracts EPContext-related data I/O callbacks from the session options into an opaque OrtEpContextConfig handle.
* The EP should call this during CreateEp() while session_options is still valid, and store the returned handle for
* use during Compile(). The returned config is always non-NULL and must be released with ReleaseEpContextConfig.
*
* The returned handle owns only ORT's copy of callback function pointers and opaque state pointer values. It does not
* own the application-provided state. The application is responsible for keeping callback state valid and
* synchronized while an EP may call callbacks retrieved from this config.
*
* \param[in] session_options The OrtSessionOptions instance.
* \param[out] config The extracted OrtEpContextConfig.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.28.
*/
ORT_API2_STATUS(SessionOptions_GetEpContextConfig,
_In_ const OrtSessionOptions* session_options,
_Outptr_ OrtEpContextConfig** config);

/** \brief Release an OrtEpContextConfig instance.
*
* \param[in] input The OrtEpContextConfig instance to release. May be NULL.
*
* \since Version 1.28.
*/
ORT_CLASS_RELEASE(EpContextConfig);

/** \brief Get the application-provided EPContext data read callback.
*
* Returns the OrtReadEpContextDataFunc and opaque state pointer registered via
* OrtApi::SessionOptions_SetEpContextDataReadFunc. If no callback was registered, *read_func and *state are set to
* NULL. The EP is responsible for calling the callback when present and for using its own normal read path when no
* callback is present.
*
* \param[in] config The OrtEpContextConfig from SessionOptions_GetEpContextConfig.
* \param[out] read_func The registered read callback, or NULL if none was registered.
* \param[out] state Opaque state pointer passed to read_func, or NULL if none was registered.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.28.
*/
ORT_API2_STATUS(EpContextConfig_GetEpContextDataReadFunc,
_In_ const OrtEpContextConfig* config,
_Out_ OrtReadEpContextDataFunc* read_func,
_Out_ void** state);

/** \brief Get the application-provided EPContext data write callback.
*
* Returns the OrtWriteEpContextDataFunc and opaque state pointer registered via
* OrtCompileApi::ModelCompilationOptions_SetEpContextDataWriteFunc. If no callback was registered, *write_func and
* *state are set to NULL. The EP is responsible for calling the callback when present and for using its own normal
* write path when no callback is present.
*
* \param[in] config The OrtEpContextConfig from SessionOptions_GetEpContextConfig.
* \param[out] write_func The registered write callback, or NULL if none was registered.
* \param[out] state Opaque state pointer passed to write_func, or NULL if none was registered.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.28.
*/
ORT_API2_STATUS(EpContextConfig_GetEpContextDataWriteFunc,
_In_ const OrtEpContextConfig* config,
_Out_ OrtWriteEpContextDataFunc* write_func,
_Out_ void** state);
};

/**
Expand Down
4 changes: 4 additions & 0 deletions onnxruntime/core/framework/ep_context_options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ const BufferWriteFuncHolder* ModelGenOptions::TryGetOutputModelWriteFunc() const
return std::get_if<BufferWriteFuncHolder>(&output_model_location);
}

const EpContextDataWriteFuncHolder* ModelGenOptions::TryGetEpContextDataWriteFunc() const {
return ep_context_data_write_func.write_func != nullptr ? &ep_context_data_write_func : nullptr;
}

bool ModelGenOptions::AreInitializersEmbeddedInOutputModel() const {
return std::holds_alternative<std::monostate>(initializers_location);
}
Expand Down
11 changes: 11 additions & 0 deletions onnxruntime/core/framework/ep_context_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,14 @@ struct BufferWriteFuncHolder {
void* stream_state = nullptr; // Opaque pointer to user's stream state. Passed as first argument to write_func.
};

/// <summary>
/// Holds the opaque state and write function that EPs use to write EPContext binary data.
/// </summary>
struct EpContextDataWriteFuncHolder {
OrtWriteEpContextDataFunc write_func = nullptr;
void* state = nullptr;
};

/// <summary>
/// Holds path and size threshold used to write out initializers to an external file.
/// </summary>
Expand Down Expand Up @@ -84,10 +92,13 @@ struct ModelGenOptions {
InitializerHandler> // Custom function called for every initializer to determine location.
initializers_location = std::monostate{};

EpContextDataWriteFuncHolder ep_context_data_write_func = {};

bool HasOutputModelLocation() const;
const std::filesystem::path* TryGetOutputModelPath() const;
const BufferHolder* TryGetOutputModelBuffer() const;
const BufferWriteFuncHolder* TryGetOutputModelWriteFunc() const;
const EpContextDataWriteFuncHolder* TryGetEpContextDataWriteFunc() const;

bool AreInitializersEmbeddedInOutputModel() const;
const ExternalInitializerFileInfo* TryGetExternalInitializerFileInfo() const;
Expand Down
3 changes: 3 additions & 0 deletions onnxruntime/core/framework/session_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,9 @@ struct SessionOptions {
bool has_explicit_ep_context_gen_options = false;
epctx::ModelGenOptions ep_context_gen_options = {};
epctx::ModelGenOptions GetEpContextGenerationOptions() const;

OrtReadEpContextDataFunc ep_context_data_read_func = nullptr;
void* ep_context_data_read_state = nullptr;
};

inline std::ostream& operator<<(std::ostream& os, const SessionOptions& session_options) {
Expand Down
Loading
Loading