diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLModel.h b/backends/apple/coreml/runtime/delegate/ETCoreMLModel.h
index 58026593462..e72f2369d23 100644
--- a/backends/apple/coreml/runtime/delegate/ETCoreMLModel.h
+++ b/backends/apple/coreml/runtime/delegate/ETCoreMLModel.h
@@ -37,17 +37,32 @@ __attribute__((objc_subclassing_restricted))
                     orderedOutputNames:(NSOrderedSet<NSString*>*)orderedOutputNames
                                  error:(NSError* __autoreleasing*)error NS_DESIGNATED_INITIALIZER;
 
+/// Constructs an `ETCoreMLModel` instance from a compiled model URL (for cache-based loading).
+///
+/// @param compiledModelURL The URL of the compiled model (.mlmodelc directory).
+/// @param identifier The unique identifier for this model.
+/// @param configuration The model configuration.
+/// @param orderedInputNames   The ordered input names of the model.
+/// @param orderedOutputNames   The ordered output names of the model.
+/// @param error   On failure, error is filled with the failure information.
+- (nullable instancetype)initWithCompiledModelURL:(NSURL*)compiledModelURL
+                                       identifier:(NSString*)identifier
+                                    configuration:(MLModelConfiguration*)configuration
+                                orderedInputNames:(NSOrderedSet<NSString*>*)orderedInputNames
+                               orderedOutputNames:(NSOrderedSet<NSString*>*)orderedOutputNames
+                                            error:(NSError* __autoreleasing*)error NS_DESIGNATED_INITIALIZER;
+
 /// The underlying MLModel.
 @property (strong, readonly, nonatomic) MLModel* mlModel;
 
 /// The model state.
 @property (strong, readonly, nonatomic, nullable) id state;
 
-/// The asset from which the model is loaded.
-@property (strong, readonly, nonatomic) ETCoreMLAsset* asset;
+/// The asset from which the model is loaded (for asset-based loading).
+@property (strong, readonly, nonatomic, nullable) ETCoreMLAsset* asset;
 
-/// The asset identifier.
-@property (strong, readonly, nonatomic) NSString* identifier;
+/// The model identifier.
+@property (copy, readonly, nonatomic) NSString* identifier;
 
 /// The ordered input names of the model.
 @property (copy, readonly, nonatomic) NSOrderedSet<NSString*>* orderedInputNames;
diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLModel.mm b/backends/apple/coreml/runtime/delegate/ETCoreMLModel.mm
index 41f32c54a58..d4d2b584821 100644
--- a/backends/apple/coreml/runtime/delegate/ETCoreMLModel.mm
+++ b/backends/apple/coreml/runtime/delegate/ETCoreMLModel.mm
@@ -41,7 +41,7 @@ - (instancetype)initWithShape:(NSArray<NSNumber *> *)shape
         _shape = shape;
         _dataType = dataType;
     }
-    
+
     return self;
 }
 
@@ -49,11 +49,11 @@ - (BOOL)isEqual:(id)object {
     if (object == self) {
         return YES;
     }
-    
+
     if (![object isKindOfClass:self.class]) {
         return NO;
     }
-    
+
     ETCoreMLMultiArrayDescriptor *other = (ETCoreMLMultiArrayDescriptor *)object;
     return [self.shape isEqualToArray:other.shape] && self.dataType == other.dataType;
 }
@@ -97,18 +97,18 @@ size_t get_number_of_bytes(MLMultiArrayDataType data_type) {
     if (shape.size() == 0) {
         return {};
     }
-    
+
     if (shape.size() == 1) {
         return {1};
     }
-    
+
     std::vector<size_t> strides(shape.size(), 1);
     size_t product = 1;
     for (size_t i = shape.size(); i > 0; i--) {
         strides[i - 1] = product;
         product *= shape[i - 1];
     }
-    
+
     return strides;
 }
 
@@ -126,7 +126,7 @@ size_t get_number_of_bytes(MLMultiArrayDataType data_type) {
         size_t n = std::accumulate(shape.cbegin(), shape.cend(), 1, std::multiplies<size_t>{});
         backing_storage = [[NSMutableData alloc] initWithLength:n * get_number_of_bytes(dataType)];
     }
-    
+
     __weak NSCache<ETCoreMLMultiArrayDescriptor *, NSMutableData *> *weakCache = cache;
     // Add the storage back to the cache when it gets deallocated, the next prediction would use the same storage.
     MLMultiArray *result = [[MLMultiArray alloc] initWithDataPointer:backing_storage.mutableBytes
@@ -135,7 +135,7 @@ size_t get_number_of_bytes(MLMultiArrayDataType data_type) {
                                                              strides:to_array(calculate_strides(shape))
                                                          deallocator:^(void * _Nonnull bytes) {[weakCache setObject:backing_storage forKey:descriptor];}
                                                                error:error];
-    
+
     return result;
 }
 
@@ -145,7 +145,7 @@ size_t get_number_of_bytes(MLMultiArrayDataType data_type) {
     [feature_descriptions enumerateKeysAndObjectsUsingBlock:^(NSString *key, MLFeatureDescription *description, BOOL * _Nonnull stop) {
         result[key] = description.multiArrayConstraint;
     }];
-    
+
     return result;
 }
 
@@ -178,6 +178,7 @@ @interface ETCoreMLModel ()
 @property (strong, readonly, nonatomic) NSCache<ETCoreMLMultiArrayDescriptor *, NSMutableData *> *cache;
 @property (copy, readonly, nonatomic) NSDictionary<NSString *, MLMultiArrayConstraint *> *inputConstraintsByName;
 @property (copy, readonly, nonatomic) NSDictionary<NSString *, MLMultiArrayConstraint *> *outputConstraintsByName;
+@property (copy, readwrite, nonatomic, nullable) NSString *identifierStorage;
 
 @end
 
@@ -192,35 +193,35 @@ - (nullable instancetype)initWithAsset:(ETCoreMLAsset *)asset
     if (![asset keepAliveAndReturnError:error]) {
         return nil;
     }
-    
+
     MLModel *mlModel = [MLModel modelWithContentsOfURL:asset.contentURL
                                          configuration:configuration
                                                  error:error];
     if (!mlModel) {
         return nil;
     }
-    
+
     if (orderedInputNames == nil) {
         ETCoreMLLogErrorAndSetNSError(error,
                                       ETCoreMLErrorCorruptedModel,
                                       "orderedInputNames must not be nil");
         return nil;
     }
-    
+
     if (orderedOutputNames == nil) {
         ETCoreMLLogErrorAndSetNSError(error,
                                       ETCoreMLErrorCorruptedModel,
                                       "orderedOutputNames must not be nil");
         return nil;
     }
-    
+
     self = [super init];
     if (self) {
         _mlModel = mlModel;
         _asset = asset;
         _orderedInputNames = [orderedInputNames copy];
         _orderedOutputNames = [orderedOutputNames copy];
-        
+
         _cache = [[NSCache alloc] init];
         _inputConstraintsByName = get_multi_array_input_constraints_by_name(mlModel.modelDescription);
         _outputConstraintsByName = get_multi_array_output_constraints_by_name(mlModel.modelDescription);
@@ -230,12 +231,61 @@ - (nullable instancetype)initWithAsset:(ETCoreMLAsset *)asset
         }
 #endif
     }
-    
+
     return self;
 }
 
 - (NSString *)identifier {
-    return self.asset.identifier;
+    // For URL-based loading, identifierStorage is set directly
+    // For asset-based loading, derive from asset
+    return self.identifierStorage ?: self.asset.identifier;
+}
+
+- (nullable instancetype)initWithCompiledModelURL:(NSURL *)compiledModelURL
+                                       identifier:(NSString *)identifier
+                                    configuration:(MLModelConfiguration *)configuration
+                                orderedInputNames:(NSOrderedSet<NSString *> *)orderedInputNames
+                               orderedOutputNames:(NSOrderedSet<NSString *> *)orderedOutputNames
+                                            error:(NSError * __autoreleasing *)error {
+    MLModel *mlModel = [MLModel modelWithContentsOfURL:compiledModelURL
+                                         configuration:configuration
+                                                 error:error];
+    if (!mlModel) {
+        return nil;
+    }
+
+    if (orderedInputNames == nil) {
+        ETCoreMLLogErrorAndSetNSError(error,
+                                      ETCoreMLErrorCorruptedModel,
+                                      "orderedInputNames must not be nil");
+        return nil;
+    }
+
+    if (orderedOutputNames == nil) {
+        ETCoreMLLogErrorAndSetNSError(error,
+                                      ETCoreMLErrorCorruptedModel,
+                                      "orderedOutputNames must not be nil");
+        return nil;
+    }
+
+    self = [super init];
+    if (self) {
+        _mlModel = mlModel;
+        _identifierStorage = [identifier copy];
+        _orderedInputNames = [orderedInputNames copy];
+        _orderedOutputNames = [orderedOutputNames copy];
+
+        _cache = [[NSCache alloc] init];
+        _inputConstraintsByName = get_multi_array_input_constraints_by_name(mlModel.modelDescription);
+        _outputConstraintsByName = get_multi_array_output_constraints_by_name(mlModel.modelDescription);
+#if MODEL_STATE_IS_SUPPORTED
+        if (@available(macOS 15.0, iOS 18.0, tvOS 18.0, watchOS 11.0, *)) {
+            _state = mlModel.modelDescription.stateDescriptionsByName.count > 0 ? [_mlModel newState] : nil;
+        }
+#endif
+    }
+
+    return self;
 }
 
 - (nullable NSArray<MLMultiArray *> *)prepareArgs:(const std::vector<executorchcoreml::MultiArray>&)args
@@ -249,7 +299,7 @@ - (NSString *)identifier {
         BOOL lCopyData = copyData;
         NSString *argName = [nameEnumerator nextObject];
         MLMultiArrayConstraint *constraint = argConstraintsByName[argName];
-        
+
         if (constraint == nil) {
             ETCoreMLLogErrorAndSetNSError(error,
                                           ETCoreMLErrorCorruptedModel,
@@ -257,7 +307,7 @@ - (NSString *)identifier {
                                           argName);
             return nil;
         }
-        
+
         const auto& layout = arg.layout();
         auto dataType = to_ml_multiarray_data_type(layout.dataType());
         MLMultiArray *multiArrayArg = nil;
@@ -274,11 +324,11 @@ - (NSString *)identifier {
             // We can't use the same data storage, data types are not the same.
             multiArrayArg = ::make_ml_multi_array(layout.shape(), constraint.dataType, self.cache, error);
         }
-        
+
         if (!multiArrayArg) {
             return nil;
         }
-        
+
         if (multiArrayArg && lCopyData) {
             void (^copy_data)(void *, NSArray<NSNumber *> *) = ^(void *bytes, NSArray<NSNumber *> *strides) {
                 MultiArray buffer(bytes, MultiArray::MemoryLayout(to_multiarray_data_type(constraint.dataType).value(),
@@ -298,10 +348,10 @@ MultiArray buffer(bytes, MultiArray::MemoryLayout(to_multiarray_data_type(constr
                 copy_data(multiArrayArg.dataPointer, multiArrayArg.strides);
             }
         }
-        
+
         [result addObject:multiArrayArg];
     }
-    
+
     return result;
 }
 
@@ -312,7 +362,7 @@ MultiArray buffer(bytes, MultiArray::MemoryLayout(to_multiarray_data_type(constr
         argConstraintsByName:self.inputConstraintsByName
                     copyData:YES
                        error:error];
-    
+
 }
 
 - (nullable NSArray<MLMultiArray *> *)prepareOutputBackings:(const std::vector<executorchcoreml::MultiArray>&)outputs
@@ -322,7 +372,7 @@ MultiArray buffer(bytes, MultiArray::MemoryLayout(to_multiarray_data_type(constr
         argConstraintsByName:self.outputConstraintsByName
                     copyData:NO
                        error:error];
-    
+
 }
 
 - (nullable id<MLFeatureProvider>)predictionFromFeatures:(id<MLFeatureProvider>)input
diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLModelLoader.mm b/backends/apple/coreml/runtime/delegate/ETCoreMLModelLoader.mm
index a46c810a71f..4bf3f714f06 100644
--- a/backends/apple/coreml/runtime/delegate/ETCoreMLModelLoader.mm
+++ b/backends/apple/coreml/runtime/delegate/ETCoreMLModelLoader.mm
@@ -23,7 +23,7 @@
         for (const auto& value : values) {
             [result addObject:@(value.c_str())];
         }
-        
+
         return result;
     }
 } // namespace
@@ -37,13 +37,13 @@ + (nullable ETCoreMLModel *)loadModelWithCompiledAsset:(ETCoreMLAsset *)compiled
     if (compiledAsset == nil) {
         return nil;
     }
-    
+
     // Use the metadata's ordered input/output names.
     // For multifunction models, the caller should load the per-method metadata
     // which contains the correct input/output names for that method.
     NSOrderedSet<NSString *> *orderedInputNames = ::get_ordered_set(metadata.input_names);
     NSOrderedSet<NSString *> *orderedOutputNames = ::get_ordered_set(metadata.output_names);
-    
+
     NSError *localError = nil;
     ETCoreMLModel *model = [[ETCoreMLModel alloc] initWithAsset:compiledAsset
                                                   configuration:configuration
@@ -53,14 +53,14 @@ + (nullable ETCoreMLModel *)loadModelWithCompiledAsset:(ETCoreMLAsset *)compiled
     if (model) {
         return model;
     }
-    
+
     if (error) {
         *error = localError;
     }
-    
+
     return nil;
 }
-                                        
+
 
 + (nullable ETCoreMLModel *)loadModelWithContentsOfURL:(NSURL *)compiledModelURL
                                          configuration:(MLModelConfiguration *)configuration
@@ -75,7 +75,7 @@ + (nullable ETCoreMLModel *)loadModelWithContentsOfURL:(NSURL *)compiledModelURL
     } else {
         asset = [assetManager storeAssetAtURL:compiledModelURL withIdentifier:identifier error:&localError];
     }
-    
+
     ETCoreMLModel *model;
     if (asset != nil) {
         model = [self loadModelWithCompiledAsset:asset configuration:configuration metadata:metadata error:&localError];
diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.h b/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.h
index f47092aa9c4..8e33f0a49e3 100644
--- a/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.h
+++ b/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.h
@@ -19,6 +19,7 @@ class MultiArray;
 
 @class ETCoreMLModel;
 @class ETCoreMLAssetManager;
+@protocol ETCoreMLCache;
 
 typedef void ModelHandle;
 
@@ -71,6 +72,27 @@ __attribute__((objc_subclassing_restricted))
                         functionName:(nullable NSString*)functionName
                                error:(NSError* __autoreleasing*)error;
 
+/// Loads the model from the AOT data using a specific cache implementation.
+///
+/// Same as loadModelFromAOTData:configuration:methodName:functionName:error: but allows
+/// specifying a custom cache implementation for per-model cache selection.
+/// When cache is nil, uses the default asset manager (existing behavior).
+/// When cache is non-nil, uses the provided cache for model storage.
+///
+/// @param data The AOT blob data.
+/// @param configuration The model configuration that will be used to load the model.
+/// @param methodName Optional method name (e.g., "forward", "prefill") for metadata lookup.
+/// @param functionName Optional CoreML function name to invoke. If nil, methodName is used.
+/// @param cache Optional cache to use. If nil, uses the default asset manager.
+/// @param error   On failure, error is filled with the failure information.
+/// @retval An opaque handle that points to the loaded model.
+- (ModelHandle*)loadModelFromAOTData:(NSData*)data
+                       configuration:(MLModelConfiguration*)configuration
+                          methodName:(nullable NSString*)methodName
+                        functionName:(nullable NSString*)functionName
+                               cache:(nullable id<ETCoreMLCache>)cache
+                               error:(NSError* __autoreleasing*)error;
+
 /// Executes the loaded model.
 ///
 /// @param handle The handle to the loaded model.
diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm b/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm
index 6c2565fa320..6eb848ec290 100644
--- a/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm
+++ b/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm
@@ -8,6 +8,7 @@
 #import "ETCoreMLModelManager.h"
 
 #import "ETCoreMLAsset.h"
+#import "ETCoreMLCacheProtocol.h"
 #import "ETCoreMLAssetManager.h"
 #import "ETCoreMLDefaultModelExecutor.h"
 #import "ETCoreMLLogging.h"
@@ -44,6 +45,14 @@
 
 using namespace executorchcoreml;
 
+NSOrderedSet<NSString *> *get_ordered_set(const std::vector<std::string>& values) {
+    NSMutableOrderedSet<NSString *> *result = [NSMutableOrderedSet orderedSetWithCapacity:values.size()];
+    for (const auto& value : values) {
+        [result addObject:@(value.c_str())];
+    }
+    return result;
+}
+
 enum class ModelAssetType: uint8_t {
     CompiledModel,
     Model
@@ -211,15 +220,6 @@ void set_outputs(std::vector<executorchcoreml::MultiArray>& outputs,
     return std::nullopt;
 }
 
-NSOrderedSet<NSString *> *get_ordered_set(const std::vector<std::string>& values) {
-    NSMutableOrderedSet<NSString *> *result = [NSMutableOrderedSet orderedSetWithCapacity:values.size()];
-    for (const auto& value : values) {
-        [result addObject:@(value.c_str())];
-    }
-
-    return result;
-}
-
 NSURL * _Nullable write_model_files(NSURL *dst_url,
                                     NSFileManager *fm,
                                     NSString *identifier,
@@ -428,6 +428,9 @@ - (nullable ETCoreMLAsset *)assetWithIdentifier:(NSString *)identifier {
     return modelAsset;
 }
 
+// TODO(asset-manager-deprecation): Remove modelURL parameter when asset manager path is removed.
+// The modelURL parameter exists only to support the legacy asset manager path, which passes
+// an existing model URL instead of extracting from inMemoryFS. The cache path always passes nil.
 - (nullable NSURL *)compiledModelURLWithIdentifier:(NSString *)identifier
                                           modelURL:(nullable NSURL *)modelURL
                                         inMemoryFS:(const inmemoryfs::InMemoryFileSystem*)inMemoryFS
@@ -443,6 +446,7 @@ - (nullable NSURL *)compiledModelURLWithIdentifier:(NSString *)identifier
 
     // If modelURL is not provided, write model files to the destination directory (dstURL)
     // and obtain a URL pointing to them. Otherwise, use the provided modelURL.
+    // TODO(asset-manager-deprecation): Simplify to always call write_model_files when asset manager is removed.
     modelURL = (modelURL == nil) ? ::write_model_files(dstURL, self.fileManager, identifier, modelAssetType.value(), inMemoryFS, error) : modelURL;
     if (!modelURL) {
         // Failed to generate or locate model files, return nil.
@@ -469,6 +473,9 @@ - (nullable NSURL *)compiledModelURLWithIdentifier:(NSString *)identifier
     }
 }
 
+// TODO(asset-manager-deprecation): Remove this method when asset manager path is removed.
+// This method is only used by the legacy asset manager path. The new cache path uses
+// compiledModelURLWithMetadata:inMemoryFS:cache:error: instead.
 - (nullable ETCoreMLAsset *)compiledModelAssetWithMetadata:(const ModelMetadata&)metadata
                                                   modelURL:(nullable NSURL *)modelURL
                                                 inMemoryFS:(const inmemoryfs::InMemoryFileSystem*)inMemoryFS
@@ -518,6 +525,56 @@ - (nullable ETCoreMLAsset *)compiledModelAssetWithMetadata:(const ModelMetadata&
     return compiledModelAsset;
 }
 
+- (nullable NSURL *)compiledModelURLWithMetadata:(const ModelMetadata&)metadata
+                                      inMemoryFS:(const inmemoryfs::InMemoryFileSystem*)inMemoryFS
+                                           cache:(id<ETCoreMLCache>)cache
+                                           error:(NSError * __autoreleasing *)error {
+    NSString *identifier = @(metadata.identifier.c_str());
+    NSFileManager *fm = [NSFileManager defaultManager];
+
+    // Check cache for existing compiled model
+    NSURL *cachedModelURL = [cache cachedModelURLForIdentifier:identifier error:nil];
+    if (cachedModelURL) {
+        ETCoreMLLogInfo("Cache Hit: Successfully retrieved compiled model with identifier=%@ from the cache.", identifier);
+        return cachedModelURL;
+    }
+
+    ETCoreMLLogInfo("Cache Miss: Compiled Model with identifier=%@ was not found in the cache.", identifier);
+
+    // Get temp directory from cache (guaranteed same filesystem for atomic moves)
+    NSURL *tempDirURL = [cache temporaryDirectoryWithError:error];
+    if (!tempDirURL) {
+        return nil;
+    }
+
+    // Compile/extract model to temp directory
+    NSURL *compiledModelURL = [self compiledModelURLWithIdentifier:identifier
+                                                          modelURL:nil
+                                                        inMemoryFS:inMemoryFS
+                                                            dstURL:tempDirURL
+                                                             error:error];
+    if (!compiledModelURL) {
+        [fm removeItemAtURL:tempDirURL error:nil];
+        return nil;
+    }
+
+    // Store compiled model in cache (moves from temp to models/)
+    ETCoreMLLogInfo("Successfully compiled model with identifier=%@. Storing in cache.", identifier);
+    NSURL *resultURL = [cache storeModelAtURL:compiledModelURL withIdentifier:identifier error:error];
+
+    // Clean up temp directory (storeModelAtURL moves the model, so just remove any leftovers)
+    [fm removeItemAtURL:tempDirURL error:nil];
+
+    if (!resultURL) {
+        ETCoreMLLogInfo("Failed to store model with identifier=%@ in cache.", identifier);
+        if (error && *error) {
+            ETCoreMLLogInfo("Cache store error: %@", (*error).localizedDescription);
+        }
+    }
+
+    return resultURL;
+}
+
 #if ET_EVENT_TRACER_ENABLED
 - (nullable ETCoreMLAsset *)modelAssetWithMetadata:(const ModelMetadata&)metadata
                                         inMemoryFS:(const inmemoryfs::InMemoryFileSystem*)inMemoryFS
@@ -615,6 +672,40 @@ - (nullable ETCoreMLAsset *)modelAssetWithMetadata:(const ModelMetadata&)metadat
 }
 #endif
 
+- (nullable id<ETCoreMLModelExecutor>)modelExecutorWithMetadata:(const ModelMetadata&)metadata
+                                                     inMemoryFS:(const inmemoryfs::InMemoryFileSystem*)inMemoryFS
+                                                  configuration:(MLModelConfiguration *)configuration
+                                                          cache:(id<ETCoreMLCache>)cache
+                                                          error:(NSError * __autoreleasing *)error {
+    // Get or compile the model URL using the provided cache
+    NSURL *compiledModelURL = [self compiledModelURLWithMetadata:metadata
+                                                      inMemoryFS:inMemoryFS
+                                                           cache:cache
+                                                           error:error];
+    if (!compiledModelURL) {
+        return nil;
+    }
+
+    // Create model directly - no loader indirection needed for cache path
+    NSString *identifier = @(metadata.identifier.c_str());
+    NSOrderedSet<NSString *> *orderedInputNames = get_ordered_set(metadata.input_names);
+    NSOrderedSet<NSString *> *orderedOutputNames = get_ordered_set(metadata.output_names);
+
+    ETCoreMLModel *model = [[ETCoreMLModel alloc] initWithCompiledModelURL:compiledModelURL
+                                                                identifier:identifier
+                                                             configuration:configuration
+                                                         orderedInputNames:orderedInputNames
+                                                        orderedOutputNames:orderedOutputNames
+                                                                     error:error];
+    if (!model) {
+        // Remove corrupted cache entry so next load attempt will recompile
+        [cache removeCachedModelWithIdentifier:identifier error:nil];
+        return nil;
+    }
+
+    return [[ETCoreMLDefaultModelExecutor alloc] initWithModel:model];
+}
+
 
 - (nullable id<ETCoreMLModelExecutor>)_modelExecutorWithAOTData:(NSData *)data
                                                    configuration:(MLModelConfiguration *)configuration
@@ -655,7 +746,7 @@ - (nullable ETCoreMLAsset *)modelAssetWithMetadata:(const ModelMetadata&)metadat
                                           "Multifunction CoreML models require a methodName for metadata lookup.");
             return nil;
         }
-        
+
         std::string method_name_str = [methodName UTF8String];
         const MethodMetadata* method_metadata = metadataValue.get_method_metadata(method_name_str);
         if (method_metadata != nullptr) {
@@ -685,7 +776,7 @@ - (nullable ETCoreMLAsset *)modelAssetWithMetadata:(const ModelMetadata&)metadat
                                           "Multifunction CoreML models require a functionName.");
             return nil;
         }
-        
+
 #if defined(__IPHONE_18_0) || defined(__MAC_15_0) || defined(__TVOS_18_0) || defined(__WATCHOS_11_0)
         if (@available(macOS 15.0, iOS 18.0, tvOS 18.0, watchOS 11.0, *)) {
             configuration.functionName = functionName;
@@ -724,6 +815,100 @@ - (nullable ETCoreMLAsset *)modelAssetWithMetadata:(const ModelMetadata&)metadat
     return executor;
 }
 
+- (nullable id<ETCoreMLModelExecutor>)_modelExecutorWithAOTData:(NSData *)data
+                                                    configuration:(MLModelConfiguration *)configuration
+                                                       methodName:(nullable NSString *)methodName
+                                                     functionName:(nullable NSString *)functionName
+                                                            cache:(id<ETCoreMLCache>)cache
+                                                            error:(NSError * __autoreleasing *)error {
+    using namespace inmemoryfs;
+
+    auto buffer = MemoryBuffer::make_unowned(const_cast<void *>(data.bytes), data.length);
+    std::unique_ptr<InMemoryFileSystem> inMemoryFS = inmemoryfs::make_from_buffer(std::move(buffer));
+    if (!inMemoryFS) {
+        ETCoreMLLogErrorAndSetNSError(error,
+                                      ETCoreMLErrorCorruptedModel,
+                                      "Model data is corrupted.");
+        return nil;
+    }
+
+    std::optional<ModelMetadata> metadata = ::get_model_metadata(inMemoryFS.get());
+    if (!metadata) {
+        ETCoreMLLogErrorAndSetNSError(error,
+                                      ETCoreMLErrorCorruptedMetadata,
+                                      "Metadata is invalid or missing.");
+        return nil;
+    }
+
+    auto metadataValue = metadata.value();
+    BOOL isMultifunction = metadataValue.is_multifunction();
+
+    if (isMultifunction) {
+        if (methodName == nil || methodName.length == 0) {
+            ETCoreMLLogErrorAndSetNSError(error,
+                                          ETCoreMLErrorCorruptedModel,
+                                          "Multifunction CoreML models require a methodName for metadata lookup.");
+            return nil;
+        }
+
+        std::string method_name_str = [methodName UTF8String];
+        const MethodMetadata* method_metadata = metadataValue.get_method_metadata(method_name_str);
+        if (method_metadata != nullptr) {
+            metadataValue.input_names = method_metadata->input_names;
+            metadataValue.output_names = method_metadata->output_names;
+        } else {
+            ETCoreMLLogErrorAndSetNSError(error,
+                                          ETCoreMLErrorCorruptedModel,
+                                          "Method '%@' not found in multifunction model metadata.",
+                                          methodName);
+            return nil;
+        }
+    }
+
+    if (isMultifunction) {
+        if (functionName == nil || functionName.length == 0) {
+            ETCoreMLLogErrorAndSetNSError(error,
+                                          ETCoreMLErrorCorruptedModel,
+                                          "Multifunction CoreML models require a functionName.");
+            return nil;
+        }
+
+#if defined(__IPHONE_18_0) || defined(__MAC_15_0) || defined(__TVOS_18_0) || defined(__WATCHOS_11_0)
+        if (@available(macOS 15.0, iOS 18.0, tvOS 18.0, watchOS 11.0, *)) {
+            configuration.functionName = functionName;
+        } else {
+            ETCoreMLLogErrorAndSetNSError(error,
+                                          ETCoreMLErrorCorruptedModel,
+                                          "Multifunction CoreML models require iOS 18.0+ / macOS 15.0+.");
+            return nil;
+        }
+#else
+        ETCoreMLLogErrorAndSetNSError(error,
+                                      ETCoreMLErrorCorruptedModel,
+                                      "Multifunction CoreML models require iOS 18.0+ / macOS 15.0+ SDK to build.");
+        return nil;
+#endif
+    }
+
+    // Note: We intentionally skip add_compute_unit for the cache path.
+    // The cache key is based on model identifier only, not compute unit.
+    // The same compiled model should be used regardless of compute unit.
+
+    NSString *identifier = @(metadataValue.identifier.c_str());
+    __block id<ETCoreMLModelExecutor> executor = nil;
+    dispatch_queue_t loadingQueue = [self queueForLoadingModelWithIdentifier:identifier];
+    auto inMemoryFSPtr = inMemoryFS.get();
+    dispatch_sync(loadingQueue, ^{
+        executor = [self modelExecutorWithMetadata:metadataValue
+                                        inMemoryFS:inMemoryFSPtr
+                                     configuration:configuration
+                                             cache:cache
+                                             error:error];
+    });
+
+    return executor;
+}
+
 - (dispatch_queue_t)queueForLoadingModelWithIdentifier:(NSString *)identifier {
     os_unfair_lock_lock(&_lock);
     dispatch_queue_t queue = [self.modelIdentifierToLoadingQueueMap objectForKey:identifier];
@@ -768,6 +953,40 @@ - (ModelHandle *)loadModelFromAOTData:(NSData*)data
     return (__bridge ModelHandle *)executor.model;
 }
 
+- (ModelHandle *)loadModelFromAOTData:(NSData*)data
+                        configuration:(MLModelConfiguration*)configuration
+                           methodName:(nullable NSString*)methodName
+                         functionName:(nullable NSString*)functionName
+                                cache:(nullable id<ETCoreMLCache>)cache
+                                error:(NSError* __autoreleasing*)error {
+    // If cache is nil, use the existing asset-based path (status quo)
+    if (cache == nil) {
+        return [self loadModelFromAOTData:data
+                            configuration:configuration
+                               methodName:methodName
+                             functionName:functionName
+                                    error:error];
+    }
+
+    // Use the cache-based path
+    id<ETCoreMLModelExecutor> executor = [self _modelExecutorWithAOTData:data
+                                                           configuration:configuration
+                                                              methodName:methodName
+                                                            functionName:functionName
+                                                                   cache:cache
+                                                                   error:error];
+    {
+        os_unfair_lock_lock(&_lock);
+        if (executor) {
+            NSValue *key = [NSValue valueWithPointer:(__bridge void *)executor.model];
+            self.handleToExecutorMap[key] = executor;
+        }
+        os_unfair_lock_unlock(&_lock);
+    }
+
+    return (__bridge ModelHandle *)executor.model;
+}
+
 - (BOOL)prewarmModelWithHandle:(ModelHandle *)handle
                          error:(NSError * __autoreleasing *)error {
     ETCoreMLModel *model = [self modelWithHandle:handle];
diff --git a/backends/apple/coreml/runtime/test/ETCoreMLModelManagerTests.mm b/backends/apple/coreml/runtime/test/ETCoreMLModelManagerTests.mm
index 69e3fc09671..8b4a45af021 100644
--- a/backends/apple/coreml/runtime/test/ETCoreMLModelManagerTests.mm
+++ b/backends/apple/coreml/runtime/test/ETCoreMLModelManagerTests.mm
@@ -10,6 +10,7 @@
 #import <ETCoreMLAsset.h>
 #import <ETCoreMLAssetManager.h>
 #import <ETCoreMLModel.h>
+#import <ETCoreMLModelCache.h>
 #import <ETCoreMLModelManager.h>
 #import <MLModel_Prewarm.h>
 #import <XCTest/XCTest.h>
@@ -97,7 +98,7 @@ - (void)testModelPrewarm {
 - (void)testAddModelExecution {
     NSURL *modelURL = [[self class] bundledResourceWithName:@"add_coreml_all" extension:@"bin"];
     XCTAssertNotNil(modelURL);
-    
+
     NSError *localError = nil;
     NSData *data = [NSData dataWithContentsOfURL:modelURL];
     MLModelConfiguration *configuration = [[MLModelConfiguration alloc] init];
@@ -108,7 +109,7 @@ - (void)testAddModelExecution {
     int y = 50;
     // add_coreml_all does the following operation.
     int z = x + y;
-    
+
     NSArray<MLMultiArray *> *inputs = [ETCoreMLTestUtils inputsForModel:model repeatedValues:@[@(x), @(y)] error:&localError];
     XCTAssertNotNil(inputs);
     MLMultiArray *output = [ETCoreMLTestUtils filledMultiArrayWithShape:inputs[0].shape dataType:inputs[0].dataType repeatedValue:@(0) error:&localError];
@@ -127,7 +128,7 @@ - (void)testAddModelExecution {
 - (void)testMulModelExecution {
     NSURL *modelURL = [[self class] bundledResourceWithName:@"mul_coreml_all" extension:@"bin"];
     XCTAssertNotNil(modelURL);
-    
+
     NSError *localError = nil;
     NSData *data = [NSData dataWithContentsOfURL:modelURL];
     MLModelConfiguration *configuration = [[MLModelConfiguration alloc] init];
@@ -151,6 +152,293 @@ - (void)testMulModelExecution {
     }
 }
 
+#pragma mark - Cache-based Path Tests
+
+- (void)testModelLoadAndUnloadWithCache {
+    NSURL *modelURL = [[self class] bundledResourceWithName:@"add_coreml_all" extension:@"bin"];
+    NSError *localError = nil;
+    XCTAssertNotNil(modelURL);
+
+    // Create a separate cache for this test
+    NSURL *cacheURL = [self.testDirectoryURL URLByAppendingPathComponent:@"model_cache"];
+    ETCoreMLModelCache *cache = [[ETCoreMLModelCache alloc] initWithCacheRootDirectory:cacheURL];
+    XCTAssertTrue(cache.isReady, @"Cache should be ready: %@", cache.initializationError);
+
+    NSData *data = [NSData dataWithContentsOfURL:modelURL];
+    MLModelConfiguration *configuration = [[MLModelConfiguration alloc] init];
+    configuration.computeUnits = MLComputeUnitsAll;
+
+    ModelHandle *handle = [self.modelManager loadModelFromAOTData:data
+                                                    configuration:configuration
+                                                       methodName:nil
+                                                     functionName:nil
+                                                            cache:cache
+                                                            error:&localError];
+    XCTAssertTrue(handle != NULL, @"Model should load successfully with cache: %@", localError);
+    XCTAssertTrue([self.modelManager unloadModelWithHandle:handle]);
+    XCTAssertFalse([self.modelManager unloadModelWithHandle:handle]);
+}
+
+- (void)testModelHandleWithCache {
+    NSURL *modelURL = [[self class] bundledResourceWithName:@"add_coreml_all" extension:@"bin"];
+    NSError *localError = nil;
+    XCTAssertNotNil(modelURL);
+
+    NSURL *cacheURL = [self.testDirectoryURL URLByAppendingPathComponent:@"model_cache"];
+    ETCoreMLModelCache *cache = [[ETCoreMLModelCache alloc] initWithCacheRootDirectory:cacheURL];
+    XCTAssertTrue(cache.isReady, @"Cache should be ready: %@", cache.initializationError);
+
+    NSData *data = [NSData dataWithContentsOfURL:modelURL];
+    MLModelConfiguration *configuration = [[MLModelConfiguration alloc] init];
+    configuration.computeUnits = MLComputeUnitsAll;
+
+    ModelHandle *handle = [self.modelManager loadModelFromAOTData:data
+                                                    configuration:configuration
+                                                       methodName:nil
+                                                     functionName:nil
+                                                            cache:cache
+                                                            error:&localError];
+    XCTAssertTrue(handle != NULL, @"Model should load with cache: %@", localError);
+
+    ETCoreMLModel *model = [self.modelManager modelWithHandle:handle];
+    XCTAssertNotNil(model.mlModel);
+    XCTAssertTrue(model.identifier.length > 0);
+    XCTAssertEqual(model.orderedInputNames.count, 2);
+    XCTAssertEqual(model.orderedOutputNames.count, 1);
+}
+
+- (void)testModelPrewarmWithCache {
+    NSURL *modelURL = [[self class] bundledResourceWithName:@"add_coreml_all" extension:@"bin"];
+    NSError *localError = nil;
+    XCTAssertNotNil(modelURL);
+
+    NSURL *cacheURL = [self.testDirectoryURL URLByAppendingPathComponent:@"model_cache"];
+    ETCoreMLModelCache *cache = [[ETCoreMLModelCache alloc] initWithCacheRootDirectory:cacheURL];
+    XCTAssertTrue(cache.isReady, @"Cache should be ready: %@", cache.initializationError);
+
+    NSData *data = [NSData dataWithContentsOfURL:modelURL];
+    MLModelConfiguration *configuration = [[MLModelConfiguration alloc] init];
+    configuration.computeUnits = MLComputeUnitsAll;
+
+    ModelHandle *handle = [self.modelManager loadModelFromAOTData:data
+                                                    configuration:configuration
+                                                       methodName:nil
+                                                     functionName:nil
+                                                            cache:cache
+                                                            error:&localError];
+    XCTAssertTrue(handle != NULL, @"Model should load with cache: %@", localError);
+    XCTAssertTrue([self.modelManager prewarmModelWithHandle:handle error:&localError], @"Prewarm should succeed: %@", localError);
+}
+
+- (void)testAddModelExecutionWithCache {
+    NSURL *modelURL = [[self class] bundledResourceWithName:@"add_coreml_all" extension:@"bin"];
+    XCTAssertNotNil(modelURL);
+
+    NSError *localError = nil;
+    NSURL *cacheURL = [self.testDirectoryURL URLByAppendingPathComponent:@"model_cache"];
+    ETCoreMLModelCache *cache = [[ETCoreMLModelCache alloc] initWithCacheRootDirectory:cacheURL];
+    XCTAssertTrue(cache.isReady, @"Cache should be ready: %@", cache.initializationError);
+
+    NSData *data = [NSData dataWithContentsOfURL:modelURL];
+    MLModelConfiguration *configuration = [[MLModelConfiguration alloc] init];
+    configuration.computeUnits = MLComputeUnitsAll;
+
+    ModelHandle *handle = [self.modelManager loadModelFromAOTData:data
+                                                    configuration:configuration
+                                                       methodName:nil
+                                                     functionName:nil
+                                                            cache:cache
+                                                            error:&localError];
+    XCTAssertTrue(handle != NULL, @"Model should load with cache: %@", localError);
+
+    ETCoreMLModel *model = [self.modelManager modelWithHandle:handle];
+    int x = 20;
+    int y = 50;
+    int z = x + y;
+
+    NSArray<MLMultiArray *> *inputs = [ETCoreMLTestUtils inputsForModel:model repeatedValues:@[@(x), @(y)] error:&localError];
+    XCTAssertNotNil(inputs);
+    MLMultiArray *output = [ETCoreMLTestUtils filledMultiArrayWithShape:inputs[0].shape dataType:inputs[0].dataType repeatedValue:@(0) error:&localError];
+    NSArray<MLMultiArray *> *args = [inputs arrayByAddingObject:output];
+
+    XCTAssertTrue([self.modelManager executeModelWithHandle:handle
+                                                       args:args
+                                             loggingOptions:executorchcoreml::ModelLoggingOptions()
+                                                eventLogger:nullptr
+                                                      error:&localError]);
+    for (NSUInteger i = 0; i < output.count; i++) {
+        NSNumber *value = [output objectAtIndexedSubscript:i];
+        XCTAssertEqual(value.integerValue, z);
+    }
+}
+
+- (void)testMulModelExecutionWithCache {
+    NSURL *modelURL = [[self class] bundledResourceWithName:@"mul_coreml_all" extension:@"bin"];
+    XCTAssertNotNil(modelURL);
+
+    NSError *localError = nil;
+    NSURL *cacheURL = [self.testDirectoryURL URLByAppendingPathComponent:@"model_cache"];
+    ETCoreMLModelCache *cache = [[ETCoreMLModelCache alloc] initWithCacheRootDirectory:cacheURL];
+    XCTAssertTrue(cache.isReady, @"Cache should be ready: %@", cache.initializationError);
+
+    NSData *data = [NSData dataWithContentsOfURL:modelURL];
+    MLModelConfiguration *configuration = [[MLModelConfiguration alloc] init];
+    configuration.computeUnits = MLComputeUnitsAll;
+
+    ModelHandle *handle = [self.modelManager loadModelFromAOTData:data
+                                                    configuration:configuration
+                                                       methodName:nil
+                                                     functionName:nil
+                                                            cache:cache
+                                                            error:&localError];
+    XCTAssertTrue(handle != NULL, @"Model should load with cache: %@", localError);
+
+    ETCoreMLModel *model = [self.modelManager modelWithHandle:handle];
+    int x = 20;
+    int y = 50;
+
+    NSArray<MLMultiArray *> *inputs = [ETCoreMLTestUtils inputsForModel:model repeatedValues:@[@(x), @(y)] error:&localError];
+    XCTAssertNotNil(inputs);
+    MLMultiArray *output = [ETCoreMLTestUtils filledMultiArrayWithShape:inputs[0].shape dataType:inputs[0].dataType repeatedValue:@(0) error:&localError];
+    NSArray<MLMultiArray *> *args = [inputs arrayByAddingObject:output];
+
+    XCTAssertTrue([self.modelManager executeModelWithHandle:handle
+                                                       args:args
+                                             loggingOptions:executorchcoreml::ModelLoggingOptions()
+                                                eventLogger:nullptr
+                                                      error:&localError]);
+    for (NSUInteger i = 0; i < output.count; i++) {
+        NSNumber *value = [output objectAtIndexedSubscript:i];
+        XCTAssertEqual(value.integerValue, x * y);
+    }
+}
+
+- (void)testCacheHitOnReload {
+    NSURL *modelURL = [[self class] bundledResourceWithName:@"add_coreml_all" extension:@"bin"];
+    NSError *localError = nil;
+    XCTAssertNotNil(modelURL);
+
+    NSURL *cacheURL = [self.testDirectoryURL URLByAppendingPathComponent:@"model_cache"];
+    ETCoreMLModelCache *cache = [[ETCoreMLModelCache alloc] initWithCacheRootDirectory:cacheURL];
+    XCTAssertTrue(cache.isReady, @"Cache should be ready: %@", cache.initializationError);
+
+    NSData *data = [NSData dataWithContentsOfURL:modelURL];
+    MLModelConfiguration *configuration = [[MLModelConfiguration alloc] init];
+    configuration.computeUnits = MLComputeUnitsAll;
+
+    // Load model first time (cold cache)
+    ModelHandle *handle1 = [self.modelManager loadModelFromAOTData:data
+                                                     configuration:configuration
+                                                        methodName:nil
+                                                      functionName:nil
+                                                             cache:cache
+                                                             error:&localError];
+    XCTAssertTrue(handle1 != NULL, @"First load should succeed: %@", localError);
+
+    ETCoreMLModel *model1 = [self.modelManager modelWithHandle:handle1];
+    NSString *identifier = model1.identifier;
+
+    // Unload
+    XCTAssertTrue([self.modelManager unloadModelWithHandle:handle1]);
+
+    // Load model second time (should hit cache)
+    ModelHandle *handle2 = [self.modelManager loadModelFromAOTData:data
+                                                     configuration:configuration
+                                                        methodName:nil
+                                                      functionName:nil
+                                                             cache:cache
+                                                             error:&localError];
+    XCTAssertTrue(handle2 != NULL, @"Second load should succeed from cache: %@", localError);
+
+    ETCoreMLModel *model2 = [self.modelManager modelWithHandle:handle2];
+    XCTAssertEqualObjects(model2.identifier, identifier, @"Identifier should match");
+
+    // Verify model still works
+    int x = 10;
+    int y = 20;
+    NSArray<MLMultiArray *> *inputs = [ETCoreMLTestUtils inputsForModel:model2 repeatedValues:@[@(x), @(y)] error:&localError];
+    XCTAssertNotNil(inputs);
+    MLMultiArray *output = [ETCoreMLTestUtils filledMultiArrayWithShape:inputs[0].shape dataType:inputs[0].dataType repeatedValue:@(0) error:&localError];
+    NSArray<MLMultiArray *> *args = [inputs arrayByAddingObject:output];
+
+    XCTAssertTrue([self.modelManager executeModelWithHandle:handle2
+                                                       args:args
+                                             loggingOptions:executorchcoreml::ModelLoggingOptions()
+                                                eventLogger:nullptr
+                                                      error:&localError]);
+    for (NSUInteger i = 0; i < output.count; i++) {
+        NSNumber *value = [output objectAtIndexedSubscript:i];
+        XCTAssertEqual(value.integerValue, x + y);
+    }
+}
+
+- (void)testMultipleModelsWithSameCache {
+    NSURL *addModelURL = [[self class] bundledResourceWithName:@"add_coreml_all" extension:@"bin"];
+    NSURL *mulModelURL = [[self class] bundledResourceWithName:@"mul_coreml_all" extension:@"bin"];
+    XCTAssertNotNil(addModelURL);
+    XCTAssertNotNil(mulModelURL);
+
+    NSError *localError = nil;
+    NSURL *cacheURL = [self.testDirectoryURL URLByAppendingPathComponent:@"model_cache"];
+    ETCoreMLModelCache *cache = [[ETCoreMLModelCache alloc] initWithCacheRootDirectory:cacheURL];
+    XCTAssertTrue(cache.isReady, @"Cache should be ready: %@", cache.initializationError);
+
+    NSData *addData = [NSData dataWithContentsOfURL:addModelURL];
+    NSData *mulData = [NSData dataWithContentsOfURL:mulModelURL];
+    MLModelConfiguration *configuration = [[MLModelConfiguration alloc] init];
+    configuration.computeUnits = MLComputeUnitsAll;
+
+    // Load both models using the same cache
+    ModelHandle *addHandle = [self.modelManager loadModelFromAOTData:addData
+                                                       configuration:configuration
+                                                          methodName:nil
+                                                        functionName:nil
+                                                               cache:cache
+                                                               error:&localError];
+    XCTAssertTrue(addHandle != NULL, @"Add model should load: %@", localError);
+
+    ModelHandle *mulHandle = [self.modelManager loadModelFromAOTData:mulData
+                                                       configuration:configuration
+                                                          methodName:nil
+                                                        functionName:nil
+                                                               cache:cache
+                                                               error:&localError];
+    XCTAssertTrue(mulHandle != NULL, @"Mul model should load: %@", localError);
+
+    // Verify both models work correctly
+    ETCoreMLModel *addModel = [self.modelManager modelWithHandle:addHandle];
+    ETCoreMLModel *mulModel = [self.modelManager modelWithHandle:mulHandle];
+
+    int x = 5;
+    int y = 3;
+
+    // Test add model
+    NSArray<MLMultiArray *> *addInputs = [ETCoreMLTestUtils inputsForModel:addModel repeatedValues:@[@(x), @(y)] error:&localError];
+    MLMultiArray *addOutput = [ETCoreMLTestUtils filledMultiArrayWithShape:addInputs[0].shape dataType:addInputs[0].dataType repeatedValue:@(0) error:&localError];
+    NSArray<MLMultiArray *> *addArgs = [addInputs arrayByAddingObject:addOutput];
+
+    XCTAssertTrue([self.modelManager executeModelWithHandle:addHandle
+                                                       args:addArgs
+                                             loggingOptions:executorchcoreml::ModelLoggingOptions()
+                                                eventLogger:nullptr
+                                                      error:&localError]);
+    XCTAssertEqual([addOutput objectAtIndexedSubscript:0].integerValue, x + y);
+
+    // Test mul model
+    NSArray<MLMultiArray *> *mulInputs = [ETCoreMLTestUtils inputsForModel:mulModel repeatedValues:@[@(x), @(y)] error:&localError];
+    MLMultiArray *mulOutput = [ETCoreMLTestUtils filledMultiArrayWithShape:mulInputs[0].shape dataType:mulInputs[0].dataType repeatedValue:@(0) error:&localError];
+    NSArray<MLMultiArray *> *mulArgs = [mulInputs arrayByAddingObject:mulOutput];
+
+    XCTAssertTrue([self.modelManager executeModelWithHandle:mulHandle
+                                                       args:mulArgs
+                                             loggingOptions:executorchcoreml::ModelLoggingOptions()
+                                                eventLogger:nullptr
+                                                      error:&localError]);
+    XCTAssertEqual([mulOutput objectAtIndexedSubscript:0].integerValue, x * y);
+}
+
+#pragma mark - Autorelease Pool Tests
+
 // See https://github.com/pytorch/executorch/pull/10465
 - (void)testAutoreleasepoolError {
     NSURL *modelURL = [self.class bundledResourceWithName:@"add_coreml_all" extension:@"bin"];