diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLModel.h b/backends/apple/coreml/runtime/delegate/ETCoreMLModel.h index 58026593462..e72f2369d23 100644 --- a/backends/apple/coreml/runtime/delegate/ETCoreMLModel.h +++ b/backends/apple/coreml/runtime/delegate/ETCoreMLModel.h @@ -37,17 +37,32 @@ __attribute__((objc_subclassing_restricted)) orderedOutputNames:(NSOrderedSet*)orderedOutputNames error:(NSError* __autoreleasing*)error NS_DESIGNATED_INITIALIZER; +/// Constructs an `ETCoreMLModel` instance from a compiled model URL (for cache-based loading). +/// +/// @param compiledModelURL The URL of the compiled model (.mlmodelc directory). +/// @param identifier The unique identifier for this model. +/// @param configuration The model configuration. +/// @param orderedInputNames The ordered input names of the model. +/// @param orderedOutputNames The ordered output names of the model. +/// @param error On failure, error is filled with the failure information. +- (nullable instancetype)initWithCompiledModelURL:(NSURL*)compiledModelURL + identifier:(NSString*)identifier + configuration:(MLModelConfiguration*)configuration + orderedInputNames:(NSOrderedSet*)orderedInputNames + orderedOutputNames:(NSOrderedSet*)orderedOutputNames + error:(NSError* __autoreleasing*)error NS_DESIGNATED_INITIALIZER; + /// The underlying MLModel. @property (strong, readonly, nonatomic) MLModel* mlModel; /// The model state. @property (strong, readonly, nonatomic, nullable) id state; -/// The asset from which the model is loaded. -@property (strong, readonly, nonatomic) ETCoreMLAsset* asset; +/// The asset from which the model is loaded (for asset-based loading). +@property (strong, readonly, nonatomic, nullable) ETCoreMLAsset* asset; -/// The asset identifier. -@property (strong, readonly, nonatomic) NSString* identifier; +/// The model identifier. +@property (copy, readonly, nonatomic) NSString* identifier; /// The ordered input names of the model. @property (copy, readonly, nonatomic) NSOrderedSet* orderedInputNames; diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLModel.mm b/backends/apple/coreml/runtime/delegate/ETCoreMLModel.mm index 41f32c54a58..d4d2b584821 100644 --- a/backends/apple/coreml/runtime/delegate/ETCoreMLModel.mm +++ b/backends/apple/coreml/runtime/delegate/ETCoreMLModel.mm @@ -41,7 +41,7 @@ - (instancetype)initWithShape:(NSArray *)shape _shape = shape; _dataType = dataType; } - + return self; } @@ -49,11 +49,11 @@ - (BOOL)isEqual:(id)object { if (object == self) { return YES; } - + if (![object isKindOfClass:self.class]) { return NO; } - + ETCoreMLMultiArrayDescriptor *other = (ETCoreMLMultiArrayDescriptor *)object; return [self.shape isEqualToArray:other.shape] && self.dataType == other.dataType; } @@ -97,18 +97,18 @@ size_t get_number_of_bytes(MLMultiArrayDataType data_type) { if (shape.size() == 0) { return {}; } - + if (shape.size() == 1) { return {1}; } - + std::vector strides(shape.size(), 1); size_t product = 1; for (size_t i = shape.size(); i > 0; i--) { strides[i - 1] = product; product *= shape[i - 1]; } - + return strides; } @@ -126,7 +126,7 @@ size_t get_number_of_bytes(MLMultiArrayDataType data_type) { size_t n = std::accumulate(shape.cbegin(), shape.cend(), 1, std::multiplies{}); backing_storage = [[NSMutableData alloc] initWithLength:n * get_number_of_bytes(dataType)]; } - + __weak NSCache *weakCache = cache; // Add the storage back to the cache when it gets deallocated, the next prediction would use the same storage. MLMultiArray *result = [[MLMultiArray alloc] initWithDataPointer:backing_storage.mutableBytes @@ -135,7 +135,7 @@ size_t get_number_of_bytes(MLMultiArrayDataType data_type) { strides:to_array(calculate_strides(shape)) deallocator:^(void * _Nonnull bytes) {[weakCache setObject:backing_storage forKey:descriptor];} error:error]; - + return result; } @@ -145,7 +145,7 @@ size_t get_number_of_bytes(MLMultiArrayDataType data_type) { [feature_descriptions enumerateKeysAndObjectsUsingBlock:^(NSString *key, MLFeatureDescription *description, BOOL * _Nonnull stop) { result[key] = description.multiArrayConstraint; }]; - + return result; } @@ -178,6 +178,7 @@ @interface ETCoreMLModel () @property (strong, readonly, nonatomic) NSCache *cache; @property (copy, readonly, nonatomic) NSDictionary *inputConstraintsByName; @property (copy, readonly, nonatomic) NSDictionary *outputConstraintsByName; +@property (copy, readwrite, nonatomic, nullable) NSString *identifierStorage; @end @@ -192,35 +193,35 @@ - (nullable instancetype)initWithAsset:(ETCoreMLAsset *)asset if (![asset keepAliveAndReturnError:error]) { return nil; } - + MLModel *mlModel = [MLModel modelWithContentsOfURL:asset.contentURL configuration:configuration error:error]; if (!mlModel) { return nil; } - + if (orderedInputNames == nil) { ETCoreMLLogErrorAndSetNSError(error, ETCoreMLErrorCorruptedModel, "orderedInputNames must not be nil"); return nil; } - + if (orderedOutputNames == nil) { ETCoreMLLogErrorAndSetNSError(error, ETCoreMLErrorCorruptedModel, "orderedOutputNames must not be nil"); return nil; } - + self = [super init]; if (self) { _mlModel = mlModel; _asset = asset; _orderedInputNames = [orderedInputNames copy]; _orderedOutputNames = [orderedOutputNames copy]; - + _cache = [[NSCache alloc] init]; _inputConstraintsByName = get_multi_array_input_constraints_by_name(mlModel.modelDescription); _outputConstraintsByName = get_multi_array_output_constraints_by_name(mlModel.modelDescription); @@ -230,12 +231,61 @@ - (nullable instancetype)initWithAsset:(ETCoreMLAsset *)asset } #endif } - + return self; } - (NSString *)identifier { - return self.asset.identifier; + // For URL-based loading, identifierStorage is set directly + // For asset-based loading, derive from asset + return self.identifierStorage ?: self.asset.identifier; +} + +- (nullable instancetype)initWithCompiledModelURL:(NSURL *)compiledModelURL + identifier:(NSString *)identifier + configuration:(MLModelConfiguration *)configuration + orderedInputNames:(NSOrderedSet *)orderedInputNames + orderedOutputNames:(NSOrderedSet *)orderedOutputNames + error:(NSError * __autoreleasing *)error { + MLModel *mlModel = [MLModel modelWithContentsOfURL:compiledModelURL + configuration:configuration + error:error]; + if (!mlModel) { + return nil; + } + + if (orderedInputNames == nil) { + ETCoreMLLogErrorAndSetNSError(error, + ETCoreMLErrorCorruptedModel, + "orderedInputNames must not be nil"); + return nil; + } + + if (orderedOutputNames == nil) { + ETCoreMLLogErrorAndSetNSError(error, + ETCoreMLErrorCorruptedModel, + "orderedOutputNames must not be nil"); + return nil; + } + + self = [super init]; + if (self) { + _mlModel = mlModel; + _identifierStorage = [identifier copy]; + _orderedInputNames = [orderedInputNames copy]; + _orderedOutputNames = [orderedOutputNames copy]; + + _cache = [[NSCache alloc] init]; + _inputConstraintsByName = get_multi_array_input_constraints_by_name(mlModel.modelDescription); + _outputConstraintsByName = get_multi_array_output_constraints_by_name(mlModel.modelDescription); +#if MODEL_STATE_IS_SUPPORTED + if (@available(macOS 15.0, iOS 18.0, tvOS 18.0, watchOS 11.0, *)) { + _state = mlModel.modelDescription.stateDescriptionsByName.count > 0 ? [_mlModel newState] : nil; + } +#endif + } + + return self; } - (nullable NSArray *)prepareArgs:(const std::vector&)args @@ -249,7 +299,7 @@ - (NSString *)identifier { BOOL lCopyData = copyData; NSString *argName = [nameEnumerator nextObject]; MLMultiArrayConstraint *constraint = argConstraintsByName[argName]; - + if (constraint == nil) { ETCoreMLLogErrorAndSetNSError(error, ETCoreMLErrorCorruptedModel, @@ -257,7 +307,7 @@ - (NSString *)identifier { argName); return nil; } - + const auto& layout = arg.layout(); auto dataType = to_ml_multiarray_data_type(layout.dataType()); MLMultiArray *multiArrayArg = nil; @@ -274,11 +324,11 @@ - (NSString *)identifier { // We can't use the same data storage, data types are not the same. multiArrayArg = ::make_ml_multi_array(layout.shape(), constraint.dataType, self.cache, error); } - + if (!multiArrayArg) { return nil; } - + if (multiArrayArg && lCopyData) { void (^copy_data)(void *, NSArray *) = ^(void *bytes, NSArray *strides) { MultiArray buffer(bytes, MultiArray::MemoryLayout(to_multiarray_data_type(constraint.dataType).value(), @@ -298,10 +348,10 @@ MultiArray buffer(bytes, MultiArray::MemoryLayout(to_multiarray_data_type(constr copy_data(multiArrayArg.dataPointer, multiArrayArg.strides); } } - + [result addObject:multiArrayArg]; } - + return result; } @@ -312,7 +362,7 @@ MultiArray buffer(bytes, MultiArray::MemoryLayout(to_multiarray_data_type(constr argConstraintsByName:self.inputConstraintsByName copyData:YES error:error]; - + } - (nullable NSArray *)prepareOutputBackings:(const std::vector&)outputs @@ -322,7 +372,7 @@ MultiArray buffer(bytes, MultiArray::MemoryLayout(to_multiarray_data_type(constr argConstraintsByName:self.outputConstraintsByName copyData:NO error:error]; - + } - (nullable id)predictionFromFeatures:(id)input diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLModelLoader.mm b/backends/apple/coreml/runtime/delegate/ETCoreMLModelLoader.mm index a46c810a71f..4bf3f714f06 100644 --- a/backends/apple/coreml/runtime/delegate/ETCoreMLModelLoader.mm +++ b/backends/apple/coreml/runtime/delegate/ETCoreMLModelLoader.mm @@ -23,7 +23,7 @@ for (const auto& value : values) { [result addObject:@(value.c_str())]; } - + return result; } } // namespace @@ -37,13 +37,13 @@ + (nullable ETCoreMLModel *)loadModelWithCompiledAsset:(ETCoreMLAsset *)compiled if (compiledAsset == nil) { return nil; } - + // Use the metadata's ordered input/output names. // For multifunction models, the caller should load the per-method metadata // which contains the correct input/output names for that method. NSOrderedSet *orderedInputNames = ::get_ordered_set(metadata.input_names); NSOrderedSet *orderedOutputNames = ::get_ordered_set(metadata.output_names); - + NSError *localError = nil; ETCoreMLModel *model = [[ETCoreMLModel alloc] initWithAsset:compiledAsset configuration:configuration @@ -53,14 +53,14 @@ + (nullable ETCoreMLModel *)loadModelWithCompiledAsset:(ETCoreMLAsset *)compiled if (model) { return model; } - + if (error) { *error = localError; } - + return nil; } - + + (nullable ETCoreMLModel *)loadModelWithContentsOfURL:(NSURL *)compiledModelURL configuration:(MLModelConfiguration *)configuration @@ -75,7 +75,7 @@ + (nullable ETCoreMLModel *)loadModelWithContentsOfURL:(NSURL *)compiledModelURL } else { asset = [assetManager storeAssetAtURL:compiledModelURL withIdentifier:identifier error:&localError]; } - + ETCoreMLModel *model; if (asset != nil) { model = [self loadModelWithCompiledAsset:asset configuration:configuration metadata:metadata error:&localError]; diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.h b/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.h index f47092aa9c4..8e33f0a49e3 100644 --- a/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.h +++ b/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.h @@ -19,6 +19,7 @@ class MultiArray; @class ETCoreMLModel; @class ETCoreMLAssetManager; +@protocol ETCoreMLCache; typedef void ModelHandle; @@ -71,6 +72,27 @@ __attribute__((objc_subclassing_restricted)) functionName:(nullable NSString*)functionName error:(NSError* __autoreleasing*)error; +/// Loads the model from the AOT data using a specific cache implementation. +/// +/// Same as loadModelFromAOTData:configuration:methodName:functionName:error: but allows +/// specifying a custom cache implementation for per-model cache selection. +/// When cache is nil, uses the default asset manager (existing behavior). +/// When cache is non-nil, uses the provided cache for model storage. +/// +/// @param data The AOT blob data. +/// @param configuration The model configuration that will be used to load the model. +/// @param methodName Optional method name (e.g., "forward", "prefill") for metadata lookup. +/// @param functionName Optional CoreML function name to invoke. If nil, methodName is used. +/// @param cache Optional cache to use. If nil, uses the default asset manager. +/// @param error On failure, error is filled with the failure information. +/// @retval An opaque handle that points to the loaded model. +- (ModelHandle*)loadModelFromAOTData:(NSData*)data + configuration:(MLModelConfiguration*)configuration + methodName:(nullable NSString*)methodName + functionName:(nullable NSString*)functionName + cache:(nullable id)cache + error:(NSError* __autoreleasing*)error; + /// Executes the loaded model. /// /// @param handle The handle to the loaded model. diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm b/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm index 6c2565fa320..6eb848ec290 100644 --- a/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm +++ b/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm @@ -8,6 +8,7 @@ #import "ETCoreMLModelManager.h" #import "ETCoreMLAsset.h" +#import "ETCoreMLCacheProtocol.h" #import "ETCoreMLAssetManager.h" #import "ETCoreMLDefaultModelExecutor.h" #import "ETCoreMLLogging.h" @@ -44,6 +45,14 @@ using namespace executorchcoreml; +NSOrderedSet *get_ordered_set(const std::vector& values) { + NSMutableOrderedSet *result = [NSMutableOrderedSet orderedSetWithCapacity:values.size()]; + for (const auto& value : values) { + [result addObject:@(value.c_str())]; + } + return result; +} + enum class ModelAssetType: uint8_t { CompiledModel, Model @@ -211,15 +220,6 @@ void set_outputs(std::vector& outputs, return std::nullopt; } -NSOrderedSet *get_ordered_set(const std::vector& values) { - NSMutableOrderedSet *result = [NSMutableOrderedSet orderedSetWithCapacity:values.size()]; - for (const auto& value : values) { - [result addObject:@(value.c_str())]; - } - - return result; -} - NSURL * _Nullable write_model_files(NSURL *dst_url, NSFileManager *fm, NSString *identifier, @@ -428,6 +428,9 @@ - (nullable ETCoreMLAsset *)assetWithIdentifier:(NSString *)identifier { return modelAsset; } +// TODO(asset-manager-deprecation): Remove modelURL parameter when asset manager path is removed. +// The modelURL parameter exists only to support the legacy asset manager path, which passes +// an existing model URL instead of extracting from inMemoryFS. The cache path always passes nil. - (nullable NSURL *)compiledModelURLWithIdentifier:(NSString *)identifier modelURL:(nullable NSURL *)modelURL inMemoryFS:(const inmemoryfs::InMemoryFileSystem*)inMemoryFS @@ -443,6 +446,7 @@ - (nullable NSURL *)compiledModelURLWithIdentifier:(NSString *)identifier // If modelURL is not provided, write model files to the destination directory (dstURL) // and obtain a URL pointing to them. Otherwise, use the provided modelURL. + // TODO(asset-manager-deprecation): Simplify to always call write_model_files when asset manager is removed. modelURL = (modelURL == nil) ? ::write_model_files(dstURL, self.fileManager, identifier, modelAssetType.value(), inMemoryFS, error) : modelURL; if (!modelURL) { // Failed to generate or locate model files, return nil. @@ -469,6 +473,9 @@ - (nullable NSURL *)compiledModelURLWithIdentifier:(NSString *)identifier } } +// TODO(asset-manager-deprecation): Remove this method when asset manager path is removed. +// This method is only used by the legacy asset manager path. The new cache path uses +// compiledModelURLWithMetadata:inMemoryFS:cache:error: instead. - (nullable ETCoreMLAsset *)compiledModelAssetWithMetadata:(const ModelMetadata&)metadata modelURL:(nullable NSURL *)modelURL inMemoryFS:(const inmemoryfs::InMemoryFileSystem*)inMemoryFS @@ -518,6 +525,56 @@ - (nullable ETCoreMLAsset *)compiledModelAssetWithMetadata:(const ModelMetadata& return compiledModelAsset; } +- (nullable NSURL *)compiledModelURLWithMetadata:(const ModelMetadata&)metadata + inMemoryFS:(const inmemoryfs::InMemoryFileSystem*)inMemoryFS + cache:(id)cache + error:(NSError * __autoreleasing *)error { + NSString *identifier = @(metadata.identifier.c_str()); + NSFileManager *fm = [NSFileManager defaultManager]; + + // Check cache for existing compiled model + NSURL *cachedModelURL = [cache cachedModelURLForIdentifier:identifier error:nil]; + if (cachedModelURL) { + ETCoreMLLogInfo("Cache Hit: Successfully retrieved compiled model with identifier=%@ from the cache.", identifier); + return cachedModelURL; + } + + ETCoreMLLogInfo("Cache Miss: Compiled Model with identifier=%@ was not found in the cache.", identifier); + + // Get temp directory from cache (guaranteed same filesystem for atomic moves) + NSURL *tempDirURL = [cache temporaryDirectoryWithError:error]; + if (!tempDirURL) { + return nil; + } + + // Compile/extract model to temp directory + NSURL *compiledModelURL = [self compiledModelURLWithIdentifier:identifier + modelURL:nil + inMemoryFS:inMemoryFS + dstURL:tempDirURL + error:error]; + if (!compiledModelURL) { + [fm removeItemAtURL:tempDirURL error:nil]; + return nil; + } + + // Store compiled model in cache (moves from temp to models/) + ETCoreMLLogInfo("Successfully compiled model with identifier=%@. Storing in cache.", identifier); + NSURL *resultURL = [cache storeModelAtURL:compiledModelURL withIdentifier:identifier error:error]; + + // Clean up temp directory (storeModelAtURL moves the model, so just remove any leftovers) + [fm removeItemAtURL:tempDirURL error:nil]; + + if (!resultURL) { + ETCoreMLLogInfo("Failed to store model with identifier=%@ in cache.", identifier); + if (error && *error) { + ETCoreMLLogInfo("Cache store error: %@", (*error).localizedDescription); + } + } + + return resultURL; +} + #if ET_EVENT_TRACER_ENABLED - (nullable ETCoreMLAsset *)modelAssetWithMetadata:(const ModelMetadata&)metadata inMemoryFS:(const inmemoryfs::InMemoryFileSystem*)inMemoryFS @@ -615,6 +672,40 @@ - (nullable ETCoreMLAsset *)modelAssetWithMetadata:(const ModelMetadata&)metadat } #endif +- (nullable id)modelExecutorWithMetadata:(const ModelMetadata&)metadata + inMemoryFS:(const inmemoryfs::InMemoryFileSystem*)inMemoryFS + configuration:(MLModelConfiguration *)configuration + cache:(id)cache + error:(NSError * __autoreleasing *)error { + // Get or compile the model URL using the provided cache + NSURL *compiledModelURL = [self compiledModelURLWithMetadata:metadata + inMemoryFS:inMemoryFS + cache:cache + error:error]; + if (!compiledModelURL) { + return nil; + } + + // Create model directly - no loader indirection needed for cache path + NSString *identifier = @(metadata.identifier.c_str()); + NSOrderedSet *orderedInputNames = get_ordered_set(metadata.input_names); + NSOrderedSet *orderedOutputNames = get_ordered_set(metadata.output_names); + + ETCoreMLModel *model = [[ETCoreMLModel alloc] initWithCompiledModelURL:compiledModelURL + identifier:identifier + configuration:configuration + orderedInputNames:orderedInputNames + orderedOutputNames:orderedOutputNames + error:error]; + if (!model) { + // Remove corrupted cache entry so next load attempt will recompile + [cache removeCachedModelWithIdentifier:identifier error:nil]; + return nil; + } + + return [[ETCoreMLDefaultModelExecutor alloc] initWithModel:model]; +} + - (nullable id)_modelExecutorWithAOTData:(NSData *)data configuration:(MLModelConfiguration *)configuration @@ -655,7 +746,7 @@ - (nullable ETCoreMLAsset *)modelAssetWithMetadata:(const ModelMetadata&)metadat "Multifunction CoreML models require a methodName for metadata lookup."); return nil; } - + std::string method_name_str = [methodName UTF8String]; const MethodMetadata* method_metadata = metadataValue.get_method_metadata(method_name_str); if (method_metadata != nullptr) { @@ -685,7 +776,7 @@ - (nullable ETCoreMLAsset *)modelAssetWithMetadata:(const ModelMetadata&)metadat "Multifunction CoreML models require a functionName."); return nil; } - + #if defined(__IPHONE_18_0) || defined(__MAC_15_0) || defined(__TVOS_18_0) || defined(__WATCHOS_11_0) if (@available(macOS 15.0, iOS 18.0, tvOS 18.0, watchOS 11.0, *)) { configuration.functionName = functionName; @@ -724,6 +815,100 @@ - (nullable ETCoreMLAsset *)modelAssetWithMetadata:(const ModelMetadata&)metadat return executor; } +- (nullable id)_modelExecutorWithAOTData:(NSData *)data + configuration:(MLModelConfiguration *)configuration + methodName:(nullable NSString *)methodName + functionName:(nullable NSString *)functionName + cache:(id)cache + error:(NSError * __autoreleasing *)error { + using namespace inmemoryfs; + + auto buffer = MemoryBuffer::make_unowned(const_cast(data.bytes), data.length); + std::unique_ptr inMemoryFS = inmemoryfs::make_from_buffer(std::move(buffer)); + if (!inMemoryFS) { + ETCoreMLLogErrorAndSetNSError(error, + ETCoreMLErrorCorruptedModel, + "Model data is corrupted."); + return nil; + } + + std::optional metadata = ::get_model_metadata(inMemoryFS.get()); + if (!metadata) { + ETCoreMLLogErrorAndSetNSError(error, + ETCoreMLErrorCorruptedMetadata, + "Metadata is invalid or missing."); + return nil; + } + + auto metadataValue = metadata.value(); + BOOL isMultifunction = metadataValue.is_multifunction(); + + if (isMultifunction) { + if (methodName == nil || methodName.length == 0) { + ETCoreMLLogErrorAndSetNSError(error, + ETCoreMLErrorCorruptedModel, + "Multifunction CoreML models require a methodName for metadata lookup."); + return nil; + } + + std::string method_name_str = [methodName UTF8String]; + const MethodMetadata* method_metadata = metadataValue.get_method_metadata(method_name_str); + if (method_metadata != nullptr) { + metadataValue.input_names = method_metadata->input_names; + metadataValue.output_names = method_metadata->output_names; + } else { + ETCoreMLLogErrorAndSetNSError(error, + ETCoreMLErrorCorruptedModel, + "Method '%@' not found in multifunction model metadata.", + methodName); + return nil; + } + } + + if (isMultifunction) { + if (functionName == nil || functionName.length == 0) { + ETCoreMLLogErrorAndSetNSError(error, + ETCoreMLErrorCorruptedModel, + "Multifunction CoreML models require a functionName."); + return nil; + } + +#if defined(__IPHONE_18_0) || defined(__MAC_15_0) || defined(__TVOS_18_0) || defined(__WATCHOS_11_0) + if (@available(macOS 15.0, iOS 18.0, tvOS 18.0, watchOS 11.0, *)) { + configuration.functionName = functionName; + } else { + ETCoreMLLogErrorAndSetNSError(error, + ETCoreMLErrorCorruptedModel, + "Multifunction CoreML models require iOS 18.0+ / macOS 15.0+."); + return nil; + } +#else + ETCoreMLLogErrorAndSetNSError(error, + ETCoreMLErrorCorruptedModel, + "Multifunction CoreML models require iOS 18.0+ / macOS 15.0+ SDK to build."); + return nil; +#endif + } + + // Note: We intentionally skip add_compute_unit for the cache path. + // The cache key is based on model identifier only, not compute unit. + // The same compiled model should be used regardless of compute unit. + + NSString *identifier = @(metadataValue.identifier.c_str()); + __block id executor = nil; + dispatch_queue_t loadingQueue = [self queueForLoadingModelWithIdentifier:identifier]; + auto inMemoryFSPtr = inMemoryFS.get(); + dispatch_sync(loadingQueue, ^{ + executor = [self modelExecutorWithMetadata:metadataValue + inMemoryFS:inMemoryFSPtr + configuration:configuration + cache:cache + error:error]; + }); + + return executor; +} + - (dispatch_queue_t)queueForLoadingModelWithIdentifier:(NSString *)identifier { os_unfair_lock_lock(&_lock); dispatch_queue_t queue = [self.modelIdentifierToLoadingQueueMap objectForKey:identifier]; @@ -768,6 +953,40 @@ - (ModelHandle *)loadModelFromAOTData:(NSData*)data return (__bridge ModelHandle *)executor.model; } +- (ModelHandle *)loadModelFromAOTData:(NSData*)data + configuration:(MLModelConfiguration*)configuration + methodName:(nullable NSString*)methodName + functionName:(nullable NSString*)functionName + cache:(nullable id)cache + error:(NSError* __autoreleasing*)error { + // If cache is nil, use the existing asset-based path (status quo) + if (cache == nil) { + return [self loadModelFromAOTData:data + configuration:configuration + methodName:methodName + functionName:functionName + error:error]; + } + + // Use the cache-based path + id executor = [self _modelExecutorWithAOTData:data + configuration:configuration + methodName:methodName + functionName:functionName + cache:cache + error:error]; + { + os_unfair_lock_lock(&_lock); + if (executor) { + NSValue *key = [NSValue valueWithPointer:(__bridge void *)executor.model]; + self.handleToExecutorMap[key] = executor; + } + os_unfair_lock_unlock(&_lock); + } + + return (__bridge ModelHandle *)executor.model; +} + - (BOOL)prewarmModelWithHandle:(ModelHandle *)handle error:(NSError * __autoreleasing *)error { ETCoreMLModel *model = [self modelWithHandle:handle]; diff --git a/backends/apple/coreml/runtime/test/ETCoreMLModelManagerTests.mm b/backends/apple/coreml/runtime/test/ETCoreMLModelManagerTests.mm index 69e3fc09671..8b4a45af021 100644 --- a/backends/apple/coreml/runtime/test/ETCoreMLModelManagerTests.mm +++ b/backends/apple/coreml/runtime/test/ETCoreMLModelManagerTests.mm @@ -10,6 +10,7 @@ #import #import #import +#import #import #import #import @@ -97,7 +98,7 @@ - (void)testModelPrewarm { - (void)testAddModelExecution { NSURL *modelURL = [[self class] bundledResourceWithName:@"add_coreml_all" extension:@"bin"]; XCTAssertNotNil(modelURL); - + NSError *localError = nil; NSData *data = [NSData dataWithContentsOfURL:modelURL]; MLModelConfiguration *configuration = [[MLModelConfiguration alloc] init]; @@ -108,7 +109,7 @@ - (void)testAddModelExecution { int y = 50; // add_coreml_all does the following operation. int z = x + y; - + NSArray *inputs = [ETCoreMLTestUtils inputsForModel:model repeatedValues:@[@(x), @(y)] error:&localError]; XCTAssertNotNil(inputs); MLMultiArray *output = [ETCoreMLTestUtils filledMultiArrayWithShape:inputs[0].shape dataType:inputs[0].dataType repeatedValue:@(0) error:&localError]; @@ -127,7 +128,7 @@ - (void)testAddModelExecution { - (void)testMulModelExecution { NSURL *modelURL = [[self class] bundledResourceWithName:@"mul_coreml_all" extension:@"bin"]; XCTAssertNotNil(modelURL); - + NSError *localError = nil; NSData *data = [NSData dataWithContentsOfURL:modelURL]; MLModelConfiguration *configuration = [[MLModelConfiguration alloc] init]; @@ -151,6 +152,293 @@ - (void)testMulModelExecution { } } +#pragma mark - Cache-based Path Tests + +- (void)testModelLoadAndUnloadWithCache { + NSURL *modelURL = [[self class] bundledResourceWithName:@"add_coreml_all" extension:@"bin"]; + NSError *localError = nil; + XCTAssertNotNil(modelURL); + + // Create a separate cache for this test + NSURL *cacheURL = [self.testDirectoryURL URLByAppendingPathComponent:@"model_cache"]; + ETCoreMLModelCache *cache = [[ETCoreMLModelCache alloc] initWithCacheRootDirectory:cacheURL]; + XCTAssertTrue(cache.isReady, @"Cache should be ready: %@", cache.initializationError); + + NSData *data = [NSData dataWithContentsOfURL:modelURL]; + MLModelConfiguration *configuration = [[MLModelConfiguration alloc] init]; + configuration.computeUnits = MLComputeUnitsAll; + + ModelHandle *handle = [self.modelManager loadModelFromAOTData:data + configuration:configuration + methodName:nil + functionName:nil + cache:cache + error:&localError]; + XCTAssertTrue(handle != NULL, @"Model should load successfully with cache: %@", localError); + XCTAssertTrue([self.modelManager unloadModelWithHandle:handle]); + XCTAssertFalse([self.modelManager unloadModelWithHandle:handle]); +} + +- (void)testModelHandleWithCache { + NSURL *modelURL = [[self class] bundledResourceWithName:@"add_coreml_all" extension:@"bin"]; + NSError *localError = nil; + XCTAssertNotNil(modelURL); + + NSURL *cacheURL = [self.testDirectoryURL URLByAppendingPathComponent:@"model_cache"]; + ETCoreMLModelCache *cache = [[ETCoreMLModelCache alloc] initWithCacheRootDirectory:cacheURL]; + XCTAssertTrue(cache.isReady, @"Cache should be ready: %@", cache.initializationError); + + NSData *data = [NSData dataWithContentsOfURL:modelURL]; + MLModelConfiguration *configuration = [[MLModelConfiguration alloc] init]; + configuration.computeUnits = MLComputeUnitsAll; + + ModelHandle *handle = [self.modelManager loadModelFromAOTData:data + configuration:configuration + methodName:nil + functionName:nil + cache:cache + error:&localError]; + XCTAssertTrue(handle != NULL, @"Model should load with cache: %@", localError); + + ETCoreMLModel *model = [self.modelManager modelWithHandle:handle]; + XCTAssertNotNil(model.mlModel); + XCTAssertTrue(model.identifier.length > 0); + XCTAssertEqual(model.orderedInputNames.count, 2); + XCTAssertEqual(model.orderedOutputNames.count, 1); +} + +- (void)testModelPrewarmWithCache { + NSURL *modelURL = [[self class] bundledResourceWithName:@"add_coreml_all" extension:@"bin"]; + NSError *localError = nil; + XCTAssertNotNil(modelURL); + + NSURL *cacheURL = [self.testDirectoryURL URLByAppendingPathComponent:@"model_cache"]; + ETCoreMLModelCache *cache = [[ETCoreMLModelCache alloc] initWithCacheRootDirectory:cacheURL]; + XCTAssertTrue(cache.isReady, @"Cache should be ready: %@", cache.initializationError); + + NSData *data = [NSData dataWithContentsOfURL:modelURL]; + MLModelConfiguration *configuration = [[MLModelConfiguration alloc] init]; + configuration.computeUnits = MLComputeUnitsAll; + + ModelHandle *handle = [self.modelManager loadModelFromAOTData:data + configuration:configuration + methodName:nil + functionName:nil + cache:cache + error:&localError]; + XCTAssertTrue(handle != NULL, @"Model should load with cache: %@", localError); + XCTAssertTrue([self.modelManager prewarmModelWithHandle:handle error:&localError], @"Prewarm should succeed: %@", localError); +} + +- (void)testAddModelExecutionWithCache { + NSURL *modelURL = [[self class] bundledResourceWithName:@"add_coreml_all" extension:@"bin"]; + XCTAssertNotNil(modelURL); + + NSError *localError = nil; + NSURL *cacheURL = [self.testDirectoryURL URLByAppendingPathComponent:@"model_cache"]; + ETCoreMLModelCache *cache = [[ETCoreMLModelCache alloc] initWithCacheRootDirectory:cacheURL]; + XCTAssertTrue(cache.isReady, @"Cache should be ready: %@", cache.initializationError); + + NSData *data = [NSData dataWithContentsOfURL:modelURL]; + MLModelConfiguration *configuration = [[MLModelConfiguration alloc] init]; + configuration.computeUnits = MLComputeUnitsAll; + + ModelHandle *handle = [self.modelManager loadModelFromAOTData:data + configuration:configuration + methodName:nil + functionName:nil + cache:cache + error:&localError]; + XCTAssertTrue(handle != NULL, @"Model should load with cache: %@", localError); + + ETCoreMLModel *model = [self.modelManager modelWithHandle:handle]; + int x = 20; + int y = 50; + int z = x + y; + + NSArray *inputs = [ETCoreMLTestUtils inputsForModel:model repeatedValues:@[@(x), @(y)] error:&localError]; + XCTAssertNotNil(inputs); + MLMultiArray *output = [ETCoreMLTestUtils filledMultiArrayWithShape:inputs[0].shape dataType:inputs[0].dataType repeatedValue:@(0) error:&localError]; + NSArray *args = [inputs arrayByAddingObject:output]; + + XCTAssertTrue([self.modelManager executeModelWithHandle:handle + args:args + loggingOptions:executorchcoreml::ModelLoggingOptions() + eventLogger:nullptr + error:&localError]); + for (NSUInteger i = 0; i < output.count; i++) { + NSNumber *value = [output objectAtIndexedSubscript:i]; + XCTAssertEqual(value.integerValue, z); + } +} + +- (void)testMulModelExecutionWithCache { + NSURL *modelURL = [[self class] bundledResourceWithName:@"mul_coreml_all" extension:@"bin"]; + XCTAssertNotNil(modelURL); + + NSError *localError = nil; + NSURL *cacheURL = [self.testDirectoryURL URLByAppendingPathComponent:@"model_cache"]; + ETCoreMLModelCache *cache = [[ETCoreMLModelCache alloc] initWithCacheRootDirectory:cacheURL]; + XCTAssertTrue(cache.isReady, @"Cache should be ready: %@", cache.initializationError); + + NSData *data = [NSData dataWithContentsOfURL:modelURL]; + MLModelConfiguration *configuration = [[MLModelConfiguration alloc] init]; + configuration.computeUnits = MLComputeUnitsAll; + + ModelHandle *handle = [self.modelManager loadModelFromAOTData:data + configuration:configuration + methodName:nil + functionName:nil + cache:cache + error:&localError]; + XCTAssertTrue(handle != NULL, @"Model should load with cache: %@", localError); + + ETCoreMLModel *model = [self.modelManager modelWithHandle:handle]; + int x = 20; + int y = 50; + + NSArray *inputs = [ETCoreMLTestUtils inputsForModel:model repeatedValues:@[@(x), @(y)] error:&localError]; + XCTAssertNotNil(inputs); + MLMultiArray *output = [ETCoreMLTestUtils filledMultiArrayWithShape:inputs[0].shape dataType:inputs[0].dataType repeatedValue:@(0) error:&localError]; + NSArray *args = [inputs arrayByAddingObject:output]; + + XCTAssertTrue([self.modelManager executeModelWithHandle:handle + args:args + loggingOptions:executorchcoreml::ModelLoggingOptions() + eventLogger:nullptr + error:&localError]); + for (NSUInteger i = 0; i < output.count; i++) { + NSNumber *value = [output objectAtIndexedSubscript:i]; + XCTAssertEqual(value.integerValue, x * y); + } +} + +- (void)testCacheHitOnReload { + NSURL *modelURL = [[self class] bundledResourceWithName:@"add_coreml_all" extension:@"bin"]; + NSError *localError = nil; + XCTAssertNotNil(modelURL); + + NSURL *cacheURL = [self.testDirectoryURL URLByAppendingPathComponent:@"model_cache"]; + ETCoreMLModelCache *cache = [[ETCoreMLModelCache alloc] initWithCacheRootDirectory:cacheURL]; + XCTAssertTrue(cache.isReady, @"Cache should be ready: %@", cache.initializationError); + + NSData *data = [NSData dataWithContentsOfURL:modelURL]; + MLModelConfiguration *configuration = [[MLModelConfiguration alloc] init]; + configuration.computeUnits = MLComputeUnitsAll; + + // Load model first time (cold cache) + ModelHandle *handle1 = [self.modelManager loadModelFromAOTData:data + configuration:configuration + methodName:nil + functionName:nil + cache:cache + error:&localError]; + XCTAssertTrue(handle1 != NULL, @"First load should succeed: %@", localError); + + ETCoreMLModel *model1 = [self.modelManager modelWithHandle:handle1]; + NSString *identifier = model1.identifier; + + // Unload + XCTAssertTrue([self.modelManager unloadModelWithHandle:handle1]); + + // Load model second time (should hit cache) + ModelHandle *handle2 = [self.modelManager loadModelFromAOTData:data + configuration:configuration + methodName:nil + functionName:nil + cache:cache + error:&localError]; + XCTAssertTrue(handle2 != NULL, @"Second load should succeed from cache: %@", localError); + + ETCoreMLModel *model2 = [self.modelManager modelWithHandle:handle2]; + XCTAssertEqualObjects(model2.identifier, identifier, @"Identifier should match"); + + // Verify model still works + int x = 10; + int y = 20; + NSArray *inputs = [ETCoreMLTestUtils inputsForModel:model2 repeatedValues:@[@(x), @(y)] error:&localError]; + XCTAssertNotNil(inputs); + MLMultiArray *output = [ETCoreMLTestUtils filledMultiArrayWithShape:inputs[0].shape dataType:inputs[0].dataType repeatedValue:@(0) error:&localError]; + NSArray *args = [inputs arrayByAddingObject:output]; + + XCTAssertTrue([self.modelManager executeModelWithHandle:handle2 + args:args + loggingOptions:executorchcoreml::ModelLoggingOptions() + eventLogger:nullptr + error:&localError]); + for (NSUInteger i = 0; i < output.count; i++) { + NSNumber *value = [output objectAtIndexedSubscript:i]; + XCTAssertEqual(value.integerValue, x + y); + } +} + +- (void)testMultipleModelsWithSameCache { + NSURL *addModelURL = [[self class] bundledResourceWithName:@"add_coreml_all" extension:@"bin"]; + NSURL *mulModelURL = [[self class] bundledResourceWithName:@"mul_coreml_all" extension:@"bin"]; + XCTAssertNotNil(addModelURL); + XCTAssertNotNil(mulModelURL); + + NSError *localError = nil; + NSURL *cacheURL = [self.testDirectoryURL URLByAppendingPathComponent:@"model_cache"]; + ETCoreMLModelCache *cache = [[ETCoreMLModelCache alloc] initWithCacheRootDirectory:cacheURL]; + XCTAssertTrue(cache.isReady, @"Cache should be ready: %@", cache.initializationError); + + NSData *addData = [NSData dataWithContentsOfURL:addModelURL]; + NSData *mulData = [NSData dataWithContentsOfURL:mulModelURL]; + MLModelConfiguration *configuration = [[MLModelConfiguration alloc] init]; + configuration.computeUnits = MLComputeUnitsAll; + + // Load both models using the same cache + ModelHandle *addHandle = [self.modelManager loadModelFromAOTData:addData + configuration:configuration + methodName:nil + functionName:nil + cache:cache + error:&localError]; + XCTAssertTrue(addHandle != NULL, @"Add model should load: %@", localError); + + ModelHandle *mulHandle = [self.modelManager loadModelFromAOTData:mulData + configuration:configuration + methodName:nil + functionName:nil + cache:cache + error:&localError]; + XCTAssertTrue(mulHandle != NULL, @"Mul model should load: %@", localError); + + // Verify both models work correctly + ETCoreMLModel *addModel = [self.modelManager modelWithHandle:addHandle]; + ETCoreMLModel *mulModel = [self.modelManager modelWithHandle:mulHandle]; + + int x = 5; + int y = 3; + + // Test add model + NSArray *addInputs = [ETCoreMLTestUtils inputsForModel:addModel repeatedValues:@[@(x), @(y)] error:&localError]; + MLMultiArray *addOutput = [ETCoreMLTestUtils filledMultiArrayWithShape:addInputs[0].shape dataType:addInputs[0].dataType repeatedValue:@(0) error:&localError]; + NSArray *addArgs = [addInputs arrayByAddingObject:addOutput]; + + XCTAssertTrue([self.modelManager executeModelWithHandle:addHandle + args:addArgs + loggingOptions:executorchcoreml::ModelLoggingOptions() + eventLogger:nullptr + error:&localError]); + XCTAssertEqual([addOutput objectAtIndexedSubscript:0].integerValue, x + y); + + // Test mul model + NSArray *mulInputs = [ETCoreMLTestUtils inputsForModel:mulModel repeatedValues:@[@(x), @(y)] error:&localError]; + MLMultiArray *mulOutput = [ETCoreMLTestUtils filledMultiArrayWithShape:mulInputs[0].shape dataType:mulInputs[0].dataType repeatedValue:@(0) error:&localError]; + NSArray *mulArgs = [mulInputs arrayByAddingObject:mulOutput]; + + XCTAssertTrue([self.modelManager executeModelWithHandle:mulHandle + args:mulArgs + loggingOptions:executorchcoreml::ModelLoggingOptions() + eventLogger:nullptr + error:&localError]); + XCTAssertEqual([mulOutput objectAtIndexedSubscript:0].integerValue, x * y); +} + +#pragma mark - Autorelease Pool Tests + // See https://github.com/pytorch/executorch/pull/10465 - (void)testAutoreleasepoolError { NSURL *modelURL = [self.class bundledResourceWithName:@"add_coreml_all" extension:@"bin"];