Skip to content

Commit 1fb6f12

Browse files
committed
Update base for Update on "[executorch] Propagate device metadata from partitioner result onto TensorSpecs"
Add end-to-end device type annotation support from export to runtime. Currently we only support one device per graph The overall pipeline is: a. Partitioner use `compile_spec` to determine which device the partitoned blob is runing on b. after lowered partitioned graph to backend, the new-introed propagate_device_pass will annotate the input and output tensors of delegate blob as target device. Differential Revision: [D95842511](https://our.internmc.facebook.com/intern/diff/D95842511/) [ghstack-poisoned]
2 parents 2dc7e9d + 0bf3c51 commit 1fb6f12

179 files changed

Lines changed: 7534 additions & 1381 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.ci/scripts/build-qnn-sdk.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ set_up_aot() {
4040
-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
4141
-DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \
4242
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
43+
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
44+
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED_AOT=ON \
4345
-DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
4446
-DPYTHON_EXECUTABLE=python3
4547
cmake --build $PWD --target "PyQnnManagerAdaptor" -j$(nproc)

.github/workflows/android-release-artifacts.yml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ jobs:
5353
run: |
5454
VERSION="${{ inputs.version }}"
5555
FLAVOR="${{ inputs.flavor }}"
56+
if [ -z "$FLAVOR" ]; then
57+
FLAVOR="xnnpack"
58+
fi
5659
if [ -z "$VERSION" ]; then
5760
echo "No version name specified. Will create a snapshot AAR"
5861
echo "should-skip=false" >> $GITHUB_OUTPUT
@@ -115,9 +118,10 @@ jobs:
115118
fi
116119
117120
FLAVOR="${{ inputs.flavor }}"
118-
if [ ! -z "$FLAVOR" ]; then
119-
GRADLE_ARGS+=" -Dflavor=${FLAVOR}"
121+
if [ -z "$FLAVOR" ]; then
122+
FLAVOR="xnnpack"
120123
fi
124+
GRADLE_ARGS+=" -Dflavor=${FLAVOR}"
121125
122126
if [[ "$FLAVOR" == "vulkan" || "$FLAVOR" == "all" || -z "$FLAVOR" ]]; then
123127
curl -O https://sdk.lunarg.com/sdk/download/1.4.321.1/linux/vulkansdk-linux-x86_64-1.4.321.1.tar.xz

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ cmake-out*
1818
cmake-out-android/
1919
build-android/
2020
build-x86/
21+
build-hexagon/
2122
dist/
2223
arm-scratch/
2324
executorch.egg-info

CMakeLists.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,17 @@ project(executorch)
5050

5151
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR})
5252

53+
# Hexagon toolchain with release build complains about code in third party
54+
# libraries.
55+
if("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "Hexagon" AND "${CMAKE_BUILD_TYPE}"
56+
STREQUAL "Release"
57+
)
58+
add_compile_options(
59+
-Wno-error=format -Wno-error=implicit-int-conversion
60+
-Wno-error=unused-variable -Wno-error=unused-function
61+
)
62+
endif()
63+
5364
# --- ExecuTorch Version ---
5465
# Parse version from version.txt (single source of truth)
5566
file(READ "${EXECUTORCH_ROOT}/version.txt" ET_VERSION_STRING)

backends/aoti/aoti_delegate_handle.h

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,20 @@ using AOTInductorModelContainerHandle = AOTInductorModelContainerOpaque*;
3131
using AOTInductorStreamHandle = void*;
3232
using AOTIProxyExecutorHandle = void*;
3333

34+
// Opaque types for AOTI constant management.
35+
// AtenTensorOpaque wraps at::Tensor* in the AOTI runtime — distinct from
36+
// AOTITensorHandle which wraps executorch::runtime::etensor::Tensor*.
37+
struct AtenTensorOpaque;
38+
using AtenTensorHandle = AtenTensorOpaque*;
39+
40+
struct AOTInductorConstantMap;
41+
using AOTInductorConstantMapHandle = AOTInductorConstantMap*;
42+
43+
struct AOTInductorConstantMapEntry {
44+
const char* name;
45+
AtenTensorHandle handle;
46+
};
47+
3448
// Function pointer types for AOT Inductor model container operations
3549
using AOTInductorModelContainerCreateWithDeviceFunc = AOTIRuntimeError (*)(
3650
AOTInductorModelContainerHandle* container_handle,
@@ -77,6 +91,37 @@ using AOTInductorModelUpdateConstantsFromBlobFunc = AOTIRuntimeError (*)(
7791
AOTInductorModelContainerHandle container_handle,
7892
const uint8_t* weight_blob_ptr);
7993

94+
// Retrieves a constant's AOTI internal name by index.
95+
using AOTInductorModelContainerGetConstantNameFunc = AOTIRuntimeError (*)(
96+
AOTInductorModelContainerHandle container_handle,
97+
size_t idx,
98+
const char** name);
99+
100+
// Retrieves a constant's original fully-qualified name by index.
101+
using AOTInductorModelContainerGetConstantOriginalFQNFunc =
102+
AOTIRuntimeError (*)(
103+
AOTInductorModelContainerHandle container_handle,
104+
size_t idx,
105+
const char** original_fqn);
106+
107+
// Extracts the constants map from the container (active or inactive buffer).
108+
// constant_map_handle should point to a
109+
// std::unordered_map<std::string, AtenTensorHandle>.
110+
using AOTInductorModelContainerExtractConstantsMapFunc = AOTIRuntimeError (*)(
111+
AOTInductorModelContainerHandle container_handle,
112+
AOTInductorConstantMapHandle constant_map_handle,
113+
bool use_inactive);
114+
115+
// Updates the container's constants with user-managed tensor handles.
116+
// DLL-boundary safe — uses a flat C array instead of std::unordered_map.
117+
using AOTInductorModelContainerUpdateUserManagedConstantBufferPairsFunc =
118+
AOTIRuntimeError (*)(
119+
AOTInductorModelContainerHandle container_handle,
120+
const AOTInductorConstantMapEntry* pairs,
121+
size_t num_pairs,
122+
bool use_inactive,
123+
bool validate_full_update);
124+
80125
} // extern "C"
81126

82127
// AOTI Delegate Handle structure
@@ -93,6 +138,14 @@ struct AOTIDelegateHandle {
93138
AOTInductorModelContainerGetNumOutputsFunc get_num_outputs;
94139
AOTInductorModelContainerRunFunc run;
95140
AOTInductorModelUpdateConstantsFromBlobFunc update_constants_from_blob;
141+
142+
// Constant management function pointers (for cross-method buffer sharing)
143+
AOTInductorModelContainerGetNumConstantsFunc get_num_constants;
144+
AOTInductorModelContainerGetConstantNameFunc get_constant_name;
145+
AOTInductorModelContainerGetConstantOriginalFQNFunc get_constant_original_fqn;
146+
AOTInductorModelContainerExtractConstantsMapFunc extract_constants_map;
147+
AOTInductorModelContainerUpdateUserManagedConstantBufferPairsFunc
148+
update_user_managed_constant_buffer_pairs;
96149
};
97150

98151
} // namespace aoti

backends/apple/coreml/runtime/delegate/backend_delegate.h

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99

1010
#include "model_logging_options.h"
1111

12+
#include <executorch/runtime/backend/options.h>
13+
#include <executorch/runtime/core/span.h>
14+
1215
#include <system_error>
1316
#include <unordered_map>
1417
#include <vector>
@@ -75,12 +78,15 @@ class BackendDelegate {
7578
/// @param method_name The ExecuTorch method name for metadata lookup (optional, may be nullptr).
7679
/// @param function_name The CoreML function name to invoke (optional, may be nullptr).
7780
/// If nullptr, method_name is used as the function name.
81+
/// @param runtime_specs Runtime options passed via BackendOptions (e.g., cache_dir).
7882
/// @retval An opaque handle to the initialized blob or `nullptr` if the
7983
/// initialization failed.
80-
virtual Handle* init(Buffer processed,
81-
const std::unordered_map<std::string, Buffer>& specs,
82-
const char* method_name = nullptr,
83-
const char* function_name = nullptr) const noexcept = 0;
84+
virtual Handle*
85+
init(Buffer processed,
86+
const std::unordered_map<std::string, Buffer>& specs,
87+
const char* method_name = nullptr,
88+
const char* function_name = nullptr,
89+
executorch::runtime::Span<const executorch::runtime::BackendOption> runtime_specs = {}) const noexcept = 0;
8490

8591
/// Must execute the CoreML model with the specified handle.
8692
///

backends/apple/coreml/runtime/delegate/backend_delegate.mm

Lines changed: 106 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#import "ETCoreMLAssetManager.h"
1212
#import "ETCoreMLLogging.h"
1313
#import "ETCoreMLModel.h"
14+
#import "ETCoreMLModelCache.h"
1415
#import "ETCoreMLModelManager.h"
1516
#import "ETCoreMLStrings.h"
1617
#import "model_event_logger.h"
@@ -100,7 +101,14 @@ - (ModelHandle*)loadModelFromAOTData:(NSData*)data
100101
configuration:(MLModelConfiguration*)configuration
101102
methodName:(nullable NSString*)methodName
102103
functionName:(nullable NSString*)functionName
103-
error:(NSError* __autoreleasing*)error;
104+
error:(NSError* __autoreleasing*)error;
105+
106+
- (ModelHandle*)loadModelFromAOTData:(NSData*)data
107+
configuration:(MLModelConfiguration*)configuration
108+
methodName:(nullable NSString*)methodName
109+
functionName:(nullable NSString*)functionName
110+
cachePath:(nullable NSString*)cachePath
111+
error:(NSError* __autoreleasing*)error;
104112

105113
- (ModelHandle*)loadModelFromAOTData:(NSData*)data
106114
configuration:(MLModelConfiguration*)configuration
@@ -119,6 +127,7 @@ - (BOOL)purgeModelsCacheAndReturnError:(NSError * _Nullable __autoreleasing *)er
119127
@property (assign, readonly, nonatomic) BackendDelegate::Config config;
120128
@property (strong, readonly, nonatomic) dispatch_queue_t syncQueue;
121129
@property (strong, nonatomic, nullable) ETCoreMLModelManager *impl;
130+
@property (strong, nonatomic, nullable) ETCoreMLModelCache *defaultCache;
122131
@property (assign, readonly, nonatomic) BOOL isAvailable;
123132

124133
@end
@@ -157,6 +166,16 @@ - (BOOL)_loadAndReturnError:(NSError * _Nullable __autoreleasing *)error {
157166

158167
self.impl = modelManager;
159168

169+
// Create default filesystem cache at the same location as assets
170+
NSURL *defaultCacheURL = [NSURL fileURLWithPath:ETCoreMLStrings.assetsDirectoryPath isDirectory:YES];
171+
ETCoreMLModelCache *defaultCache = [[ETCoreMLModelCache alloc] initWithCacheRootDirectory:defaultCacheURL];
172+
if (defaultCache.isReady) {
173+
self.defaultCache = defaultCache;
174+
} else {
175+
ETCoreMLLogError(defaultCache.initializationError,
176+
"Default cache initialization failed, will use asset manager as fallback");
177+
}
178+
160179
if (self.config.should_prewarm_asset) {
161180
[modelManager prewarmRecentlyUsedAssetsWithMaxCount:1];
162181
}
@@ -191,22 +210,86 @@ - (ModelHandle*)loadModelFromAOTData:(NSData*)data
191210
configuration:configuration
192211
methodName:nil
193212
functionName:nil
213+
cachePath:nil
214+
error:error];
215+
}
216+
217+
- (ModelHandle*)loadModelFromAOTData:(NSData*)data
218+
configuration:(MLModelConfiguration*)configuration
219+
methodName:(nullable NSString*)methodName
220+
functionName:(nullable NSString*)functionName
221+
error:(NSError* __autoreleasing*)error {
222+
return [self loadModelFromAOTData:data
223+
configuration:configuration
224+
methodName:methodName
225+
functionName:functionName
226+
cachePath:nil
194227
error:error];
195228
}
196229

197230
- (ModelHandle*)loadModelFromAOTData:(NSData*)data
198231
configuration:(MLModelConfiguration*)configuration
199232
methodName:(nullable NSString*)methodName
200233
functionName:(nullable NSString*)functionName
234+
cachePath:(nullable NSString*)cachePath
235+
error:(NSError* __autoreleasing*)error {
236+
// Default to using the old cache (useNewCache = NO)
237+
return [self loadModelFromAOTData:data
238+
configuration:configuration
239+
methodName:methodName
240+
functionName:functionName
241+
cachePath:cachePath
242+
useNewCache:NO
243+
error:error];
244+
}
245+
246+
- (ModelHandle*)loadModelFromAOTData:(NSData*)data
247+
configuration:(MLModelConfiguration*)configuration
248+
methodName:(nullable NSString*)methodName
249+
functionName:(nullable NSString*)functionName
250+
cachePath:(nullable NSString*)cachePath
251+
useNewCache:(BOOL)useNewCache
201252
error:(NSError* __autoreleasing*)error {
202253
if (![self loadAndReturnError:error]) {
203254
return nil;
204255
}
205256

257+
id<ETCoreMLCache> cache = nil;
258+
if (cachePath != nil) {
259+
// Use NEW filesystem cache at specified path
260+
NSURL *cacheURL = [NSURL fileURLWithPath:cachePath isDirectory:YES];
261+
ETCoreMLModelCache *modelCache = [[ETCoreMLModelCache alloc] initWithCacheRootDirectory:cacheURL];
262+
if (!modelCache.isReady) {
263+
// Fallback error if initializationError is unexpectedly nil
264+
NSError *cacheError = modelCache.initializationError
265+
?: [NSError errorWithDomain:ETCoreMLModelCacheErrorDomain
266+
code:ETCoreMLModelCacheErrorCodeInitializationFailed
267+
userInfo:@{NSLocalizedDescriptionKey: @"Cache initialization failed"}];
268+
if (error) *error = cacheError;
269+
return nil;
270+
}
271+
cache = modelCache;
272+
} else if (useNewCache) {
273+
if (self.defaultCache != nil) {
274+
// Use default filesystem cache
275+
cache = self.defaultCache;
276+
} else {
277+
// Fallback: useNewCache requested but default cache unavailable
278+
NSError *fallbackError = [NSError errorWithDomain:ETCoreMLErrorDomain
279+
code:ETCoreMLErrorInternalError
280+
userInfo:@{NSLocalizedDescriptionKey: @"Default cache unavailable"}];
281+
ETCoreMLLogError(fallbackError,
282+
"useNewCache=YES but default cache is unavailable, falling back to asset manager");
283+
}
284+
}
285+
// If useNewCache is false or defaultCache is nil, cache remains nil
286+
// and loadModelFromAOTData will use the asset manager path
287+
206288
auto handle = [self.impl loadModelFromAOTData:data
207289
configuration:configuration
208290
methodName:methodName
209291
functionName:functionName
292+
cache:cache
210293
error:error];
211294
if ((handle != NULL) && self.config.should_prewarm_model) {
212295
[self.impl prewarmModelWithHandle:handle error:nil];
@@ -291,9 +374,10 @@ explicit BackendDelegateImpl(const Config& config) noexcept
291374
BackendDelegateImpl& operator=(BackendDelegateImpl const&) = delete;
292375

293376
Handle *init(Buffer processed,
294-
const std::unordered_map<std::string, Buffer>& specs,
295-
const char* method_name = nullptr,
296-
const char* function_name = nullptr) const noexcept override {
377+
const std::unordered_map<std::string, Buffer>& specs,
378+
const char* method_name = nullptr,
379+
const char* function_name = nullptr,
380+
executorch::runtime::Span<const executorch::runtime::BackendOption> runtime_specs = {}) const noexcept override {
297381
NSError *localError = nil;
298382
MLModelConfiguration *configuration = get_model_configuration(specs, &localError);
299383
if (configuration == nil) {
@@ -304,13 +388,31 @@ explicit BackendDelegateImpl(const Config& config) noexcept
304388
NSString *methodNameStr = method_name ? @(method_name) : nil;
305389
NSString *functionNameStr = function_name ? @(function_name) : nil;
306390

391+
// Parse cache_dir and _use_new_cache from runtime_specs
392+
NSString *cachePath = nil;
393+
BOOL useNewCache = NO; // Default to using the old cache (asset manager)
394+
for (size_t i = 0; i < runtime_specs.size(); ++i) {
395+
const auto& opt = runtime_specs[i];
396+
if (std::strcmp(opt.key, "cache_dir") == 0) {
397+
if (auto* arr = std::get_if<std::array<char, executorch::runtime::kMaxOptionValueLength>>(&opt.value)) {
398+
cachePath = @(arr->data());
399+
}
400+
} else if (std::strcmp(opt.key, "_use_new_cache") == 0) {
401+
if (auto* val = std::get_if<bool>(&opt.value)) {
402+
useNewCache = *val ? YES : NO;
403+
}
404+
}
405+
}
406+
307407
NSData *data = [NSData dataWithBytesNoCopy:const_cast<void *>(processed.data())
308408
length:processed.size()
309409
freeWhenDone:NO];
310410
ModelHandle *modelHandle = [model_manager_ loadModelFromAOTData:data
311411
configuration:configuration
312412
methodName:methodNameStr
313413
functionName:functionNameStr
414+
cachePath:cachePath
415+
useNewCache:useNewCache
314416
error:&localError];
315417
if (localError != nil) {
316418
ETCoreMLLogError(localError, "Model init failed");

backends/apple/coreml/runtime/delegate/coreml_backend_delegate.mm

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ ModelLoggingOptions get_logging_options(BackendExecutionContext& context) {
336336

337337
std::error_code error;
338338
const char* function_name_cstr = functionName.empty() ? nullptr : functionName.c_str();
339-
auto handle = impl_->init(std::move(buffer), specs_map, method_name, function_name_cstr);
339+
auto handle = impl_->init(std::move(buffer), specs_map, method_name, function_name_cstr, runtime_specs);
340340
ET_CHECK_OR_RETURN_ERROR(handle != nullptr,
341341
InvalidProgram,
342342
"%s: Failed to init the model.", ETCoreMLStrings.delegateIdentifier.UTF8String);

backends/apple/coreml/runtime/include/coreml_backend/coreml_backend_options.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,41 @@ class LoadOptionsBuilder {
7575
return *this;
7676
}
7777

78+
/**
79+
* Sets a custom cache directory for compiled model storage.
80+
*
81+
* When specified, the backend uses the new filesystem-based cache
82+
* (ETCoreMLModelCache) at the given path instead of the default
83+
* SQLite-based cache (ETCoreMLAssetManager).
84+
*
85+
* This enables per-model cache selection for experimentation:
86+
* - Specify cache_dir to opt-in to the new filesystem cache
87+
* - Omit cache_dir to use the default (legacy) cache
88+
*
89+
* @param path The directory path for the cache. Must be a valid
90+
* filesystem path with write permissions.
91+
* @return Reference to this builder for chaining.
92+
*/
93+
LoadOptionsBuilder& setCacheDirectory(const char* path) {
94+
options_.set_option("cache_dir", path);
95+
return *this;
96+
}
97+
98+
/**
99+
* Controls whether to use the new filesystem cache (ETCoreMLModelCache).
100+
*
101+
* This is a temporary runtime option for A/B testing the new cache
102+
* implementation. It will be removed once the new cache is fully rolled out.
103+
*
104+
* @param enabled If true, uses the new filesystem cache.
105+
* If false (default), uses the legacy asset manager.
106+
* @return Reference to this builder for chaining.
107+
*/
108+
LoadOptionsBuilder& setUseNewCache(bool enabled) {
109+
options_.set_option("_use_new_cache", enabled);
110+
return *this;
111+
}
112+
78113
/**
79114
* Returns the backend identifier for this options builder.
80115
*/

0 commit comments

Comments
 (0)