Skip to content

Commit fec36c5

Browse files
metascroyfacebook-github-bot
authored andcommitted
3 /x: Add cache-based model loading path to ETCoreMLModelManager
Summary: This is diff 3/9 to implement the new caching system for CoreML, following the design doc here: fbcode/executorch/backends/apple/coreml/docs/new_cache_design.md [ gdoc version: https://docs.google.com/document/d/1aKwTNj-L3-sAyBetL92RwJdSsSwA4KhvAQ3FO1zbguE/edit?tab=t.0#heading=h.3ecu6rvzx1z3 ] The design doc contains detailed plans for each diff in the stack, see https://fburl.com/gdoc/actp9mcs for what this diff does. Reviewed By: digantdesai Differential Revision: D93044161
1 parent 3d2c853 commit fec36c5

File tree

6 files changed

+643
-49
lines changed

6 files changed

+643
-49
lines changed

backends/apple/coreml/runtime/delegate/ETCoreMLModel.h

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,17 +37,32 @@ __attribute__((objc_subclassing_restricted))
3737
orderedOutputNames:(NSOrderedSet<NSString*>*)orderedOutputNames
3838
error:(NSError* __autoreleasing*)error NS_DESIGNATED_INITIALIZER;
3939

40+
/// Constructs an `ETCoreMLModel` instance from a compiled model URL (for cache-based loading).
41+
///
42+
/// @param compiledModelURL The URL of the compiled model (.mlmodelc directory).
43+
/// @param identifier The unique identifier for this model.
44+
/// @param configuration The model configuration.
45+
/// @param orderedInputNames The ordered input names of the model.
46+
/// @param orderedOutputNames The ordered output names of the model.
47+
/// @param error On failure, error is filled with the failure information.
48+
- (nullable instancetype)initWithCompiledModelURL:(NSURL*)compiledModelURL
49+
identifier:(NSString*)identifier
50+
configuration:(MLModelConfiguration*)configuration
51+
orderedInputNames:(NSOrderedSet<NSString*>*)orderedInputNames
52+
orderedOutputNames:(NSOrderedSet<NSString*>*)orderedOutputNames
53+
error:(NSError* __autoreleasing*)error NS_DESIGNATED_INITIALIZER;
54+
4055
/// The underlying MLModel.
4156
@property (strong, readonly, nonatomic) MLModel* mlModel;
4257

4358
/// The model state.
4459
@property (strong, readonly, nonatomic, nullable) id state;
4560

46-
/// The asset from which the model is loaded.
47-
@property (strong, readonly, nonatomic) ETCoreMLAsset* asset;
61+
/// The asset from which the model is loaded (for asset-based loading).
62+
@property (strong, readonly, nonatomic, nullable) ETCoreMLAsset* asset;
4863

49-
/// The asset identifier.
50-
@property (strong, readonly, nonatomic) NSString* identifier;
64+
/// The model identifier.
65+
@property (copy, readonly, nonatomic) NSString* identifier;
5166

5267
/// The ordered input names of the model.
5368
@property (copy, readonly, nonatomic) NSOrderedSet<NSString*>* orderedInputNames;

backends/apple/coreml/runtime/delegate/ETCoreMLModel.mm

Lines changed: 74 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -41,19 +41,19 @@ - (instancetype)initWithShape:(NSArray<NSNumber *> *)shape
4141
_shape = shape;
4242
_dataType = dataType;
4343
}
44-
44+
4545
return self;
4646
}
4747

4848
- (BOOL)isEqual:(id)object {
4949
if (object == self) {
5050
return YES;
5151
}
52-
52+
5353
if (![object isKindOfClass:self.class]) {
5454
return NO;
5555
}
56-
56+
5757
ETCoreMLMultiArrayDescriptor *other = (ETCoreMLMultiArrayDescriptor *)object;
5858
return [self.shape isEqualToArray:other.shape] && self.dataType == other.dataType;
5959
}
@@ -97,18 +97,18 @@ size_t get_number_of_bytes(MLMultiArrayDataType data_type) {
9797
if (shape.size() == 0) {
9898
return {};
9999
}
100-
100+
101101
if (shape.size() == 1) {
102102
return {1};
103103
}
104-
104+
105105
std::vector<size_t> strides(shape.size(), 1);
106106
size_t product = 1;
107107
for (size_t i = shape.size(); i > 0; i--) {
108108
strides[i - 1] = product;
109109
product *= shape[i - 1];
110110
}
111-
111+
112112
return strides;
113113
}
114114

@@ -126,7 +126,7 @@ size_t get_number_of_bytes(MLMultiArrayDataType data_type) {
126126
size_t n = std::accumulate(shape.cbegin(), shape.cend(), 1, std::multiplies<size_t>{});
127127
backing_storage = [[NSMutableData alloc] initWithLength:n * get_number_of_bytes(dataType)];
128128
}
129-
129+
130130
__weak NSCache<ETCoreMLMultiArrayDescriptor *, NSMutableData *> *weakCache = cache;
131131
// Add the storage back to the cache when it gets deallocated, the next prediction would use the same storage.
132132
MLMultiArray *result = [[MLMultiArray alloc] initWithDataPointer:backing_storage.mutableBytes
@@ -135,7 +135,7 @@ size_t get_number_of_bytes(MLMultiArrayDataType data_type) {
135135
strides:to_array(calculate_strides(shape))
136136
deallocator:^(void * _Nonnull bytes) {[weakCache setObject:backing_storage forKey:descriptor];}
137137
error:error];
138-
138+
139139
return result;
140140
}
141141

@@ -145,7 +145,7 @@ size_t get_number_of_bytes(MLMultiArrayDataType data_type) {
145145
[feature_descriptions enumerateKeysAndObjectsUsingBlock:^(NSString *key, MLFeatureDescription *description, BOOL * _Nonnull stop) {
146146
result[key] = description.multiArrayConstraint;
147147
}];
148-
148+
149149
return result;
150150
}
151151

@@ -178,6 +178,7 @@ @interface ETCoreMLModel ()
178178
@property (strong, readonly, nonatomic) NSCache<ETCoreMLMultiArrayDescriptor *, NSMutableData *> *cache;
179179
@property (copy, readonly, nonatomic) NSDictionary<NSString *, MLMultiArrayConstraint *> *inputConstraintsByName;
180180
@property (copy, readonly, nonatomic) NSDictionary<NSString *, MLMultiArrayConstraint *> *outputConstraintsByName;
181+
@property (copy, readwrite, nonatomic, nullable) NSString *identifierStorage;
181182

182183
@end
183184

@@ -192,35 +193,35 @@ - (nullable instancetype)initWithAsset:(ETCoreMLAsset *)asset
192193
if (![asset keepAliveAndReturnError:error]) {
193194
return nil;
194195
}
195-
196+
196197
MLModel *mlModel = [MLModel modelWithContentsOfURL:asset.contentURL
197198
configuration:configuration
198199
error:error];
199200
if (!mlModel) {
200201
return nil;
201202
}
202-
203+
203204
if (orderedInputNames == nil) {
204205
ETCoreMLLogErrorAndSetNSError(error,
205206
ETCoreMLErrorCorruptedModel,
206207
"orderedInputNames must not be nil");
207208
return nil;
208209
}
209-
210+
210211
if (orderedOutputNames == nil) {
211212
ETCoreMLLogErrorAndSetNSError(error,
212213
ETCoreMLErrorCorruptedModel,
213214
"orderedOutputNames must not be nil");
214215
return nil;
215216
}
216-
217+
217218
self = [super init];
218219
if (self) {
219220
_mlModel = mlModel;
220221
_asset = asset;
221222
_orderedInputNames = [orderedInputNames copy];
222223
_orderedOutputNames = [orderedOutputNames copy];
223-
224+
224225
_cache = [[NSCache alloc] init];
225226
_inputConstraintsByName = get_multi_array_input_constraints_by_name(mlModel.modelDescription);
226227
_outputConstraintsByName = get_multi_array_output_constraints_by_name(mlModel.modelDescription);
@@ -230,12 +231,61 @@ - (nullable instancetype)initWithAsset:(ETCoreMLAsset *)asset
230231
}
231232
#endif
232233
}
233-
234+
234235
return self;
235236
}
236237

237238
- (NSString *)identifier {
238-
return self.asset.identifier;
239+
// For URL-based loading, identifierStorage is set directly
240+
// For asset-based loading, derive from asset
241+
return self.identifierStorage ?: self.asset.identifier;
242+
}
243+
244+
- (nullable instancetype)initWithCompiledModelURL:(NSURL *)compiledModelURL
245+
identifier:(NSString *)identifier
246+
configuration:(MLModelConfiguration *)configuration
247+
orderedInputNames:(NSOrderedSet<NSString *> *)orderedInputNames
248+
orderedOutputNames:(NSOrderedSet<NSString *> *)orderedOutputNames
249+
error:(NSError * __autoreleasing *)error {
250+
MLModel *mlModel = [MLModel modelWithContentsOfURL:compiledModelURL
251+
configuration:configuration
252+
error:error];
253+
if (!mlModel) {
254+
return nil;
255+
}
256+
257+
if (orderedInputNames == nil) {
258+
ETCoreMLLogErrorAndSetNSError(error,
259+
ETCoreMLErrorCorruptedModel,
260+
"orderedInputNames must not be nil");
261+
return nil;
262+
}
263+
264+
if (orderedOutputNames == nil) {
265+
ETCoreMLLogErrorAndSetNSError(error,
266+
ETCoreMLErrorCorruptedModel,
267+
"orderedOutputNames must not be nil");
268+
return nil;
269+
}
270+
271+
self = [super init];
272+
if (self) {
273+
_mlModel = mlModel;
274+
_identifierStorage = [identifier copy];
275+
_orderedInputNames = [orderedInputNames copy];
276+
_orderedOutputNames = [orderedOutputNames copy];
277+
278+
_cache = [[NSCache alloc] init];
279+
_inputConstraintsByName = get_multi_array_input_constraints_by_name(mlModel.modelDescription);
280+
_outputConstraintsByName = get_multi_array_output_constraints_by_name(mlModel.modelDescription);
281+
#if MODEL_STATE_IS_SUPPORTED
282+
if (@available(macOS 15.0, iOS 18.0, tvOS 18.0, watchOS 11.0, *)) {
283+
_state = mlModel.modelDescription.stateDescriptionsByName.count > 0 ? [_mlModel newState] : nil;
284+
}
285+
#endif
286+
}
287+
288+
return self;
239289
}
240290

241291
- (nullable NSArray<MLMultiArray *> *)prepareArgs:(const std::vector<executorchcoreml::MultiArray>&)args
@@ -249,15 +299,15 @@ - (NSString *)identifier {
249299
BOOL lCopyData = copyData;
250300
NSString *argName = [nameEnumerator nextObject];
251301
MLMultiArrayConstraint *constraint = argConstraintsByName[argName];
252-
302+
253303
if (constraint == nil) {
254304
ETCoreMLLogErrorAndSetNSError(error,
255305
ETCoreMLErrorCorruptedModel,
256306
"No constraint found for arg '%@'. Model may have mismatched input/output names.",
257307
argName);
258308
return nil;
259309
}
260-
310+
261311
const auto& layout = arg.layout();
262312
auto dataType = to_ml_multiarray_data_type(layout.dataType());
263313
MLMultiArray *multiArrayArg = nil;
@@ -274,11 +324,11 @@ - (NSString *)identifier {
274324
// We can't use the same data storage, data types are not the same.
275325
multiArrayArg = ::make_ml_multi_array(layout.shape(), constraint.dataType, self.cache, error);
276326
}
277-
327+
278328
if (!multiArrayArg) {
279329
return nil;
280330
}
281-
331+
282332
if (multiArrayArg && lCopyData) {
283333
void (^copy_data)(void *, NSArray<NSNumber *> *) = ^(void *bytes, NSArray<NSNumber *> *strides) {
284334
MultiArray buffer(bytes, MultiArray::MemoryLayout(to_multiarray_data_type(constraint.dataType).value(),
@@ -298,10 +348,10 @@ MultiArray buffer(bytes, MultiArray::MemoryLayout(to_multiarray_data_type(constr
298348
copy_data(multiArrayArg.dataPointer, multiArrayArg.strides);
299349
}
300350
}
301-
351+
302352
[result addObject:multiArrayArg];
303353
}
304-
354+
305355
return result;
306356
}
307357

@@ -312,7 +362,7 @@ MultiArray buffer(bytes, MultiArray::MemoryLayout(to_multiarray_data_type(constr
312362
argConstraintsByName:self.inputConstraintsByName
313363
copyData:YES
314364
error:error];
315-
365+
316366
}
317367

318368
- (nullable NSArray<MLMultiArray *> *)prepareOutputBackings:(const std::vector<executorchcoreml::MultiArray>&)outputs
@@ -322,7 +372,7 @@ MultiArray buffer(bytes, MultiArray::MemoryLayout(to_multiarray_data_type(constr
322372
argConstraintsByName:self.outputConstraintsByName
323373
copyData:NO
324374
error:error];
325-
375+
326376
}
327377

328378
- (nullable id<MLFeatureProvider>)predictionFromFeatures:(id<MLFeatureProvider>)input

backends/apple/coreml/runtime/delegate/ETCoreMLModelLoader.mm

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
for (const auto& value : values) {
2424
[result addObject:@(value.c_str())];
2525
}
26-
26+
2727
return result;
2828
}
2929
} // namespace
@@ -37,13 +37,13 @@ + (nullable ETCoreMLModel *)loadModelWithCompiledAsset:(ETCoreMLAsset *)compiled
3737
if (compiledAsset == nil) {
3838
return nil;
3939
}
40-
40+
4141
// Use the metadata's ordered input/output names.
4242
// For multifunction models, the caller should load the per-method metadata
4343
// which contains the correct input/output names for that method.
4444
NSOrderedSet<NSString *> *orderedInputNames = ::get_ordered_set(metadata.input_names);
4545
NSOrderedSet<NSString *> *orderedOutputNames = ::get_ordered_set(metadata.output_names);
46-
46+
4747
NSError *localError = nil;
4848
ETCoreMLModel *model = [[ETCoreMLModel alloc] initWithAsset:compiledAsset
4949
configuration:configuration
@@ -53,14 +53,14 @@ + (nullable ETCoreMLModel *)loadModelWithCompiledAsset:(ETCoreMLAsset *)compiled
5353
if (model) {
5454
return model;
5555
}
56-
56+
5757
if (error) {
5858
*error = localError;
5959
}
60-
60+
6161
return nil;
6262
}
63-
63+
6464

6565
+ (nullable ETCoreMLModel *)loadModelWithContentsOfURL:(NSURL *)compiledModelURL
6666
configuration:(MLModelConfiguration *)configuration
@@ -75,7 +75,7 @@ + (nullable ETCoreMLModel *)loadModelWithContentsOfURL:(NSURL *)compiledModelURL
7575
} else {
7676
asset = [assetManager storeAssetAtURL:compiledModelURL withIdentifier:identifier error:&localError];
7777
}
78-
78+
7979
ETCoreMLModel *model;
8080
if (asset != nil) {
8181
model = [self loadModelWithCompiledAsset:asset configuration:configuration metadata:metadata error:&localError];

backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ class MultiArray;
1919

2020
@class ETCoreMLModel;
2121
@class ETCoreMLAssetManager;
22+
@protocol ETCoreMLCache;
2223

2324
typedef void ModelHandle;
2425

@@ -71,6 +72,27 @@ __attribute__((objc_subclassing_restricted))
7172
functionName:(nullable NSString*)functionName
7273
error:(NSError* __autoreleasing*)error;
7374

75+
/// Loads the model from the AOT data using a specific cache implementation.
76+
///
77+
/// Same as loadModelFromAOTData:configuration:methodName:functionName:error: but allows
78+
/// specifying a custom cache implementation for per-model cache selection.
79+
/// When cache is nil, uses the default asset manager (existing behavior).
80+
/// When cache is non-nil, uses the provided cache for model storage.
81+
///
82+
/// @param data The AOT blob data.
83+
/// @param configuration The model configuration that will be used to load the model.
84+
/// @param methodName Optional method name (e.g., "forward", "prefill") for metadata lookup.
85+
/// @param functionName Optional CoreML function name to invoke. If nil, methodName is used.
86+
/// @param cache Optional cache to use. If nil, uses the default asset manager.
87+
/// @param error On failure, error is filled with the failure information.
88+
/// @retval An opaque handle that points to the loaded model.
89+
- (ModelHandle*)loadModelFromAOTData:(NSData*)data
90+
configuration:(MLModelConfiguration*)configuration
91+
methodName:(nullable NSString*)methodName
92+
functionName:(nullable NSString*)functionName
93+
cache:(nullable id<ETCoreMLCache>)cache
94+
error:(NSError* __autoreleasing*)error;
95+
7496
/// Executes the loaded model.
7597
///
7698
/// @param handle The handle to the loaded model.

0 commit comments

Comments
 (0)