3 /x: Add cache-based model loading path to ETCoreMLModelManager

metascroy · facebook-github-bot · commit fec36c5e4630 · 2026-04-06T10:49:16.000-07:00
Summary: This is diff 3/9 to implement the new caching system for CoreML, following the design doc here: fbcode/executorch/backends/apple/coreml/docs/new_cache_design.md [ gdoc version: https://docs.google.com/document/d/1aKwTNj-L3-sAyBetL92RwJdSsSwA4KhvAQ3FO1zbguE/edit?tab=t.0#heading=h.3ecu6rvzx1z3 ] The design doc contains detailed plans for each diff in the stack, see https://fburl.com/gdoc/actp9mcs for what this diff does. Reviewed By: digantdesai Differential Revision: D93044161
diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLModel.h b/backends/apple/coreml/runtime/delegate/ETCoreMLModel.h
@@ -37,17 +37,32 @@ __attribute__((objc_subclassing_restricted))
                     orderedOutputNames:(NSOrderedSet<NSString*>*)orderedOutputNames
                                  error:(NSError* __autoreleasing*)error NS_DESIGNATED_INITIALIZER;
 
+/// Constructs an `ETCoreMLModel` instance from a compiled model URL (for cache-based loading).
+///
+/// @param compiledModelURL The URL of the compiled model (.mlmodelc directory).
+/// @param identifier The unique identifier for this model.
+/// @param configuration The model configuration.
+/// @param orderedInputNames   The ordered input names of the model.
+/// @param orderedOutputNames   The ordered output names of the model.
+/// @param error   On failure, error is filled with the failure information.
+- (nullable instancetype)initWithCompiledModelURL:(NSURL*)compiledModelURL
+                                       identifier:(NSString*)identifier
+                                    configuration:(MLModelConfiguration*)configuration
+                                orderedInputNames:(NSOrderedSet<NSString*>*)orderedInputNames
+                               orderedOutputNames:(NSOrderedSet<NSString*>*)orderedOutputNames
+                                            error:(NSError* __autoreleasing*)error NS_DESIGNATED_INITIALIZER;
+
 /// The underlying MLModel.
 @property (strong, readonly, nonatomic) MLModel* mlModel;
 
 /// The model state.
 @property (strong, readonly, nonatomic, nullable) id state;
 
-/// The asset from which the model is loaded.
-@property (strong, readonly, nonatomic) ETCoreMLAsset* asset;
+/// The asset from which the model is loaded (for asset-based loading).
+@property (strong, readonly, nonatomic, nullable) ETCoreMLAsset* asset;
 
-/// The asset identifier.
-@property (strong, readonly, nonatomic) NSString* identifier;
+/// The model identifier.
+@property (copy, readonly, nonatomic) NSString* identifier;
 
 /// The ordered input names of the model.
 @property (copy, readonly, nonatomic) NSOrderedSet<NSString*>* orderedInputNames;
diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLModel.mm b/backends/apple/coreml/runtime/delegate/ETCoreMLModel.mm
@@ -41,19 +41,19 @@ - (instancetype)initWithShape:(NSArray<NSNumber *> *)shape
         _shape = shape;
         _dataType = dataType;
     }
-    
+
     return self;
 }
 
 - (BOOL)isEqual:(id)object {
     if (object == self) {
         return YES;
     }
-    
+
     if (![object isKindOfClass:self.class]) {
         return NO;
     }
-    
+
     ETCoreMLMultiArrayDescriptor *other = (ETCoreMLMultiArrayDescriptor *)object;
     return [self.shape isEqualToArray:other.shape] && self.dataType == other.dataType;
 }
@@ -97,18 +97,18 @@ size_t get_number_of_bytes(MLMultiArrayDataType data_type) {
     if (shape.size() == 0) {
         return {};
     }
-    
+
     if (shape.size() == 1) {
         return {1};
     }
-    
+
     std::vector<size_t> strides(shape.size(), 1);
     size_t product = 1;
     for (size_t i = shape.size(); i > 0; i--) {
         strides[i - 1] = product;
         product *= shape[i - 1];
     }
-    
+
     return strides;
 }
 
@@ -126,7 +126,7 @@ size_t get_number_of_bytes(MLMultiArrayDataType data_type) {
         size_t n = std::accumulate(shape.cbegin(), shape.cend(), 1, std::multiplies<size_t>{});
         backing_storage = [[NSMutableData alloc] initWithLength:n * get_number_of_bytes(dataType)];
     }
-    
+
     __weak NSCache<ETCoreMLMultiArrayDescriptor *, NSMutableData *> *weakCache = cache;
     // Add the storage back to the cache when it gets deallocated, the next prediction would use the same storage.
     MLMultiArray *result = [[MLMultiArray alloc] initWithDataPointer:backing_storage.mutableBytes
@@ -135,7 +135,7 @@ size_t get_number_of_bytes(MLMultiArrayDataType data_type) {
                                                              strides:to_array(calculate_strides(shape))
                                                          deallocator:^(void * _Nonnull bytes) {[weakCache setObject:backing_storage forKey:descriptor];}
                                                                error:error];
-    
+
     return result;
 }
 
@@ -145,7 +145,7 @@ size_t get_number_of_bytes(MLMultiArrayDataType data_type) {
     [feature_descriptions enumerateKeysAndObjectsUsingBlock:^(NSString *key, MLFeatureDescription *description, BOOL * _Nonnull stop) {
         result[key] = description.multiArrayConstraint;
     }];
-    
+
     return result;
 }
 
@@ -178,6 +178,7 @@ @interface ETCoreMLModel ()
 @property (strong, readonly, nonatomic) NSCache<ETCoreMLMultiArrayDescriptor *, NSMutableData *> *cache;
 @property (copy, readonly, nonatomic) NSDictionary<NSString *, MLMultiArrayConstraint *> *inputConstraintsByName;
 @property (copy, readonly, nonatomic) NSDictionary<NSString *, MLMultiArrayConstraint *> *outputConstraintsByName;
+@property (copy, readwrite, nonatomic, nullable) NSString *identifierStorage;
 
 @end
 
@@ -192,35 +193,35 @@ - (nullable instancetype)initWithAsset:(ETCoreMLAsset *)asset
     if (![asset keepAliveAndReturnError:error]) {
         return nil;
     }
-    
+
     MLModel *mlModel = [MLModel modelWithContentsOfURL:asset.contentURL
                                          configuration:configuration
                                                  error:error];
     if (!mlModel) {
         return nil;
     }
-    
+
     if (orderedInputNames == nil) {
         ETCoreMLLogErrorAndSetNSError(error,
                                       ETCoreMLErrorCorruptedModel,
                                       "orderedInputNames must not be nil");
         return nil;
     }
-    
+
     if (orderedOutputNames == nil) {
         ETCoreMLLogErrorAndSetNSError(error,
                                       ETCoreMLErrorCorruptedModel,
                                       "orderedOutputNames must not be nil");
         return nil;
     }
-    
+
     self = [super init];
     if (self) {
         _mlModel = mlModel;
         _asset = asset;
         _orderedInputNames = [orderedInputNames copy];
         _orderedOutputNames = [orderedOutputNames copy];
-        
+
         _cache = [[NSCache alloc] init];
         _inputConstraintsByName = get_multi_array_input_constraints_by_name(mlModel.modelDescription);
         _outputConstraintsByName = get_multi_array_output_constraints_by_name(mlModel.modelDescription);
@@ -230,12 +231,61 @@ - (nullable instancetype)initWithAsset:(ETCoreMLAsset *)asset
         }
 #endif
     }
-    
+
     return self;
 }
 
 - (NSString *)identifier {
-    return self.asset.identifier;
+    // For URL-based loading, identifierStorage is set directly
+    // For asset-based loading, derive from asset
+    return self.identifierStorage ?: self.asset.identifier;
+}
+
+- (nullable instancetype)initWithCompiledModelURL:(NSURL *)compiledModelURL
+                                       identifier:(NSString *)identifier
+                                    configuration:(MLModelConfiguration *)configuration
+                                orderedInputNames:(NSOrderedSet<NSString *> *)orderedInputNames
+                               orderedOutputNames:(NSOrderedSet<NSString *> *)orderedOutputNames
+                                            error:(NSError * __autoreleasing *)error {
+    MLModel *mlModel = [MLModel modelWithContentsOfURL:compiledModelURL
+                                         configuration:configuration
+                                                 error:error];
+    if (!mlModel) {
+        return nil;
+    }
+
+    if (orderedInputNames == nil) {
+        ETCoreMLLogErrorAndSetNSError(error,
+                                      ETCoreMLErrorCorruptedModel,
+                                      "orderedInputNames must not be nil");
+        return nil;
+    }
+
+    if (orderedOutputNames == nil) {
+        ETCoreMLLogErrorAndSetNSError(error,
+                                      ETCoreMLErrorCorruptedModel,
+                                      "orderedOutputNames must not be nil");
+        return nil;
+    }
+
+    self = [super init];
+    if (self) {
+        _mlModel = mlModel;
+        _identifierStorage = [identifier copy];
+        _orderedInputNames = [orderedInputNames copy];
+        _orderedOutputNames = [orderedOutputNames copy];
+
+        _cache = [[NSCache alloc] init];
+        _inputConstraintsByName = get_multi_array_input_constraints_by_name(mlModel.modelDescription);
+        _outputConstraintsByName = get_multi_array_output_constraints_by_name(mlModel.modelDescription);
+#if MODEL_STATE_IS_SUPPORTED
+        if (@available(macOS 15.0, iOS 18.0, tvOS 18.0, watchOS 11.0, *)) {
+            _state = mlModel.modelDescription.stateDescriptionsByName.count > 0 ? [_mlModel newState] : nil;
+        }
+#endif
+    }
+
+    return self;
 }
 
 - (nullable NSArray<MLMultiArray *> *)prepareArgs:(const std::vector<executorchcoreml::MultiArray>&)args
@@ -249,15 +299,15 @@ - (NSString *)identifier {
         BOOL lCopyData = copyData;
         NSString *argName = [nameEnumerator nextObject];
         MLMultiArrayConstraint *constraint = argConstraintsByName[argName];
-        
+
         if (constraint == nil) {
             ETCoreMLLogErrorAndSetNSError(error,
                                           ETCoreMLErrorCorruptedModel,
                                           "No constraint found for arg '%@'. Model may have mismatched input/output names.",
                                           argName);
             return nil;
         }
-        
+
         const auto& layout = arg.layout();
         auto dataType = to_ml_multiarray_data_type(layout.dataType());
         MLMultiArray *multiArrayArg = nil;
@@ -274,11 +324,11 @@ - (NSString *)identifier {
             // We can't use the same data storage, data types are not the same.
             multiArrayArg = ::make_ml_multi_array(layout.shape(), constraint.dataType, self.cache, error);
         }
-        
+
         if (!multiArrayArg) {
             return nil;
         }
-        
+
         if (multiArrayArg && lCopyData) {
             void (^copy_data)(void *, NSArray<NSNumber *> *) = ^(void *bytes, NSArray<NSNumber *> *strides) {
                 MultiArray buffer(bytes, MultiArray::MemoryLayout(to_multiarray_data_type(constraint.dataType).value(),
@@ -298,10 +348,10 @@ MultiArray buffer(bytes, MultiArray::MemoryLayout(to_multiarray_data_type(constr
                 copy_data(multiArrayArg.dataPointer, multiArrayArg.strides);
             }
         }
-        
+
         [result addObject:multiArrayArg];
     }
-    
+
     return result;
 }
 
@@ -312,7 +362,7 @@ MultiArray buffer(bytes, MultiArray::MemoryLayout(to_multiarray_data_type(constr
         argConstraintsByName:self.inputConstraintsByName
                     copyData:YES
                        error:error];
-    
+
 }
 
 - (nullable NSArray<MLMultiArray *> *)prepareOutputBackings:(const std::vector<executorchcoreml::MultiArray>&)outputs
@@ -322,7 +372,7 @@ MultiArray buffer(bytes, MultiArray::MemoryLayout(to_multiarray_data_type(constr
         argConstraintsByName:self.outputConstraintsByName
                     copyData:NO
                        error:error];
-    
+
 }
 
 - (nullable id<MLFeatureProvider>)predictionFromFeatures:(id<MLFeatureProvider>)input
diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLModelLoader.mm b/backends/apple/coreml/runtime/delegate/ETCoreMLModelLoader.mm
@@ -23,7 +23,7 @@
         for (const auto& value : values) {
             [result addObject:@(value.c_str())];
         }
-        
+
         return result;
     }
 } // namespace
@@ -37,13 +37,13 @@ + (nullable ETCoreMLModel *)loadModelWithCompiledAsset:(ETCoreMLAsset *)compiled
     if (compiledAsset == nil) {
         return nil;
     }
-    
+
     // Use the metadata's ordered input/output names.
     // For multifunction models, the caller should load the per-method metadata
     // which contains the correct input/output names for that method.
     NSOrderedSet<NSString *> *orderedInputNames = ::get_ordered_set(metadata.input_names);
     NSOrderedSet<NSString *> *orderedOutputNames = ::get_ordered_set(metadata.output_names);
-    
+
     NSError *localError = nil;
     ETCoreMLModel *model = [[ETCoreMLModel alloc] initWithAsset:compiledAsset
                                                   configuration:configuration
@@ -53,14 +53,14 @@ + (nullable ETCoreMLModel *)loadModelWithCompiledAsset:(ETCoreMLAsset *)compiled
     if (model) {
         return model;
     }
-    
+
     if (error) {
         *error = localError;
     }
-    
+
     return nil;
 }
-                                        
+
 
 + (nullable ETCoreMLModel *)loadModelWithContentsOfURL:(NSURL *)compiledModelURL
                                          configuration:(MLModelConfiguration *)configuration
@@ -75,7 +75,7 @@ + (nullable ETCoreMLModel *)loadModelWithContentsOfURL:(NSURL *)compiledModelURL
     } else {
         asset = [assetManager storeAssetAtURL:compiledModelURL withIdentifier:identifier error:&localError];
     }
-    
+
     ETCoreMLModel *model;
     if (asset != nil) {
         model = [self loadModelWithCompiledAsset:asset configuration:configuration metadata:metadata error:&localError];
diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.h b/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.h
@@ -19,6 +19,7 @@ class MultiArray;
 
 @class ETCoreMLModel;
 @class ETCoreMLAssetManager;
+@protocol ETCoreMLCache;
 
 typedef void ModelHandle;
 
@@ -71,6 +72,27 @@ __attribute__((objc_subclassing_restricted))
                         functionName:(nullable NSString*)functionName
                                error:(NSError* __autoreleasing*)error;
 
+/// Loads the model from the AOT data using a specific cache implementation.
+///
+/// Same as loadModelFromAOTData:configuration:methodName:functionName:error: but allows
+/// specifying a custom cache implementation for per-model cache selection.
+/// When cache is nil, uses the default asset manager (existing behavior).
+/// When cache is non-nil, uses the provided cache for model storage.
+///
+/// @param data The AOT blob data.
+/// @param configuration The model configuration that will be used to load the model.
+/// @param methodName Optional method name (e.g., "forward", "prefill") for metadata lookup.
+/// @param functionName Optional CoreML function name to invoke. If nil, methodName is used.
+/// @param cache Optional cache to use. If nil, uses the default asset manager.
+/// @param error   On failure, error is filled with the failure information.
+/// @retval An opaque handle that points to the loaded model.
+- (ModelHandle*)loadModelFromAOTData:(NSData*)data
+                       configuration:(MLModelConfiguration*)configuration
+                          methodName:(nullable NSString*)methodName
+                        functionName:(nullable NSString*)functionName
+                               cache:(nullable id<ETCoreMLCache>)cache
+                               error:(NSError* __autoreleasing*)error;
+
 /// Executes the loaded model.
 ///
 /// @param handle The handle to the loaded model.
diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm b/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm
diff --git a/backends/apple/coreml/runtime/test/ETCoreMLModelManagerTests.mm b/backends/apple/coreml/runtime/test/ETCoreMLModelManagerTests.mm