pytorch
diff --git a/‎.ci/scripts/export_model_artifact.sh‎
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/export_model_artifact.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/android-release-artifacts.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/android-release-artifacts.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/apple.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/apple.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.lintrunner.toml‎
Lines changed: 1 addition & 0 deletions b/‎.lintrunner.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/aoti/utils.h‎
Lines changed: 22 additions & 0 deletions b/‎backends/aoti/utils.h‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎backends/apple/coreml/BUCK‎
Lines changed: 1 addition & 0 deletions b/‎backends/apple/coreml/BUCK‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/apple/coreml/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎backends/apple/coreml/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/apple/coreml/runtime/delegate/ETCoreMLCacheProtocol.h‎
Lines changed: 61 additions & 0 deletions b/‎backends/apple/coreml/runtime/delegate/ETCoreMLCacheProtocol.h‎
Lines changed: 61 additions & 0 deletions
diff --git a/‎backends/apple/coreml/runtime/delegate/ETCoreMLModelCache.h‎
Lines changed: 141 additions & 0 deletions b/‎backends/apple/coreml/runtime/delegate/ETCoreMLModelCache.h‎
Lines changed: 141 additions & 0 deletions
@@ -358,7 +358,7 @@ if [ "$MODEL_NAME" = "voxtral_realtime" ]; then
   STREAMING_ARG=""
   PREPROCESSOR_ARGS="--feature_size 128 --output_file ${OUTPUT_DIR}/preprocessor.pte"
   if [ "$USE_STREAMING" = "true" ]; then
-    STREAMING_ARG="--streaming"
+    STREAMING_ARG="--streaming --sliding-window 2048"
     PREPROCESSOR_ARGS="$PREPROCESSOR_ARGS --streaming"
   else
     PREPROCESSOR_ARGS="$PREPROCESSOR_ARGS --stack_output --max_audio_len 300"
 
@@ -165,7 +165,7 @@ jobs:
       contents: read
     steps:
       - name: configure aws credentials
-        uses: aws-actions/configure-aws-credentials@v1.7.0
+        uses: aws-actions/configure-aws-credentials@v4
         with:
           role-to-assume: arn:aws:iam::308535385114:role/gha_executorch_upload-frameworks-android
           aws-region: us-east-1
 
@@ -239,7 +239,7 @@ jobs:
           python-version: '3.11'
           cache: pip
       - name: configure aws credentials
-        uses: aws-actions/configure-aws-credentials@v1.7.0
+        uses: aws-actions/configure-aws-credentials@v4
         with:
           role-to-assume: arn:aws:iam::308535385114:role/gha_executorch_upload-frameworks-ios
           aws-region: us-east-1
 
@@ -223,6 +223,7 @@ exclude_patterns = [
     '**/*.gif',
     'extension/llm/tokenizers',
     'extension/llm/tokenizers/**',
+    'backends/cadence/utils/FACTO',
     'examples/cuda',
     'kernels/portable',
     # File contains @generated
 
@@ -15,6 +15,7 @@
 #include <executorch/runtime/platform/log.h>
 #include <cstddef>
 #include <cstdint>
+#include <utility>
 #include <vector>
 
 namespace executorch {
@@ -163,6 +164,27 @@ inline bool is_contiguous_tensor(
   return true;
 }
 
+// Scope guard: invokes a callable on destruction. Equivalent to
+// std::scope_exit (C++23 <scope>), which is not available in C++17/20.
+template <typename F>
+class ScopeGuard {
+ public:
+  static_assert(
+      noexcept(std::declval<F&>()()),
+      "ScopeGuard callable must be noexcept to avoid std::terminate "
+      "if it throws during stack unwinding");
+
+  explicit ScopeGuard(F&& fn) : fn_(std::move(fn)) {}
+  ~ScopeGuard() noexcept {
+    fn_();
+  }
+  ScopeGuard(const ScopeGuard&) = delete;
+  ScopeGuard& operator=(const ScopeGuard&) = delete;
+
+ private:
+  F fn_;
+};
+
 } // namespace aoti
 } // namespace backends
 } // namespace executorch
@@ -18,6 +18,7 @@ runtime.cxx_library(
         "runtime/delegate/ETCoreMLDefaultModelExecutor.mm",
         "runtime/delegate/ETCoreMLLogging.mm",
         "runtime/delegate/ETCoreMLModel.mm",
+        "runtime/delegate/ETCoreMLModelCache.mm",
         "runtime/delegate/ETCoreMLModelCompiler.mm",
         "runtime/delegate/ETCoreMLModelLoader.mm",
         "runtime/delegate/ETCoreMLModelManager.mm",
 
@@ -32,6 +32,7 @@ set(DELEGATE_SOURCES
     runtime/delegate/ETCoreMLAsset.mm
     runtime/delegate/ETCoreMLAssetManager.mm
     runtime/delegate/ETCoreMLDefaultModelExecutor.mm
+    runtime/delegate/ETCoreMLModelCache.mm
     runtime/delegate/ETCoreMLModelLoader.mm
     runtime/delegate/ETCoreMLModelCompiler.mm
     runtime/delegate/ETCoreMLLogging.mm
 
@@ -0,0 +1,61 @@
+//
+// ETCoreMLCacheProtocol.h
+//
+// Copyright © 2024 Apple Inc. All rights reserved.
+//
+// Please refer to the license found in the LICENSE file in the root directory of the source tree.
+
+#import <Foundation/Foundation.h>
+
+NS_ASSUME_NONNULL_BEGIN
+
+/// Protocol defining the interface for CoreML model caching.
+///
+/// This protocol abstracts the cache implementation
+@protocol ETCoreMLCache <NSObject>
+
+/// Returns the URL of a cached model if it exists and is valid, otherwise nil.
+///
+/// @param identifier The unique identifier for the cached model.
+/// @param error On failure, error is filled with the failure information.
+/// @return The URL to the cached model bundle, or nil if not found or invalid.
+///
+/// @warning The returned URL may become invalid before the caller uses it if another
+/// process deletes or replaces the cached model. Callers MUST handle MLModel load
+/// failures gracefully by treating them as cache misses and recompiling.
+- (nullable NSURL*)cachedModelURLForIdentifier:(NSString*)identifier error:(NSError**)error;
+
+/// Stores a compiled model in the cache. Returns the cached URL on success.
+///
+/// @param compiledModelURL The URL of the compiled model bundle to cache. Must exist.
+/// @param identifier The unique identifier for this model.
+/// @param error On failure, error is filled with the failure information.
+/// @return The URL of the cached model, or nil on failure.
+- (nullable NSURL*)storeModelAtURL:(NSURL*)compiledModelURL withIdentifier:(NSString*)identifier error:(NSError**)error;
+
+/// Removes a specific cached model.
+///
+/// @param identifier The unique identifier for the cached model to remove.
+/// @param error On failure, error is filled with the failure information.
+/// @return YES if the model was removed or didn't exist. Returns NO only on I/O errors.
+- (BOOL)removeCachedModelWithIdentifier:(NSString*)identifier error:(NSError**)error;
+
+/// Clears the entire cache, including all cached models.
+///
+/// @param error On failure, error is filled with the failure information.
+/// @return YES if the cache was purged successfully, otherwise NO.
+- (BOOL)purgeAndReturnError:(NSError**)error;
+
+/// Returns a temp URL where intermediate files can be written during compilation.
+/// This is guaranteed to be on the same filesystem as the cache, ensuring atomic moves.
+///
+/// @param error On failure, error is filled with the failure information.
+/// @return A temp URL where intermediate files can be written, or nil on failure.
+///
+/// @note The temp URL is unique (UUID-based) to avoid conflicts.
+/// @note Temp entries are cleaned up automatically after 24 hours.
+- (nullable NSURL*)temporaryDirectoryWithError:(NSError**)error;
+
+@end
+
+NS_ASSUME_NONNULL_END
@@ -0,0 +1,141 @@
+//
+// ETCoreMLModelCache.h
+//
+// Copyright © 2024 Apple Inc. All rights reserved.
+//
+// Please refer to the license found in the LICENSE file in the root directory of the source tree.
+
+#import <Foundation/Foundation.h>
+
+#import "ETCoreMLCacheProtocol.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+extern NSString* const ETCoreMLModelCacheErrorDomain;
+
+typedef NS_ENUM(NSInteger, ETCoreMLModelCacheErrorCode) {
+    ETCoreMLModelCacheErrorCodeUnknown = 0,
+    ETCoreMLModelCacheErrorCodeInitializationFailed = 1,
+    ETCoreMLModelCacheErrorCodeInvalidIdentifier = 2,
+    ETCoreMLModelCacheErrorCodeSourceNotFound = 3,
+    ETCoreMLModelCacheErrorCodeDiskFull = 4,
+    ETCoreMLModelCacheErrorCodeIOError = 5,
+    ETCoreMLModelCacheErrorCodeCorruptedCache = 6,
+};
+
+/// A simplified, filesystem-based cache for compiled CoreML models.
+///
+/// This class provides a cache implementation that stores compiled models as directories
+/// in a versioned cache structure. It uses atomic writes (rename) to ensure cache integrity
+/// even in the presence of crashes or concurrent access.
+///
+/// Directory structure:
+/// ```
+/// cache_root/
+/// ├── version.txt                         (cache format version)
+/// ├── models/
+/// │   ├── {identifier}.mlmodelc/          (compiled model bundle)
+/// │   ├── {identifier}.accessed           (last access time for LRU eviction)
+/// │   └── ...
+/// └── temp/
+///     └── {uuid}/                         (mlpackage files awaiting compilation)
+/// ```
+///
+/// ## Thread Safety and Concurrency Guarantees
+///
+/// This class provides **NO internal synchronization**. It is designed to be used in one of
+/// two ways:
+///
+/// 1. **Single-threaded access**: All calls to a single instance from one thread/queue.
+///
+/// 2. **External serialization**: When used via `ETCoreMLModelManager`, access is serialized
+///    by the manager's per-identifier loading queue. This is the expected usage pattern.
+///
+/// **Multi-process safety** is provided by:
+/// - Atomic filesystem operations (`rename()`)
+/// - Unique temp paths (UUID-based) to avoid conflicts
+/// - "Last writer wins" semantics (acceptable since all writers produce identical output)
+///
+/// **Multiple instances** pointing to the same directory are safe because:
+/// - Each write uses a unique temp path
+/// - Final placement uses atomic `moveItemAtURL:` (POSIX `rename()`)
+/// - Concurrent writes result in "last writer wins" (both write identical data)
+/// - Cleanup only targets entries older than 24 hours
+///
+/// **Callers are responsible for**:
+/// - Handling `MLModel` load failures gracefully (cache entry may be replaced/deleted
+///   between URL retrieval and model load)
+/// - Not relying on returned URLs remaining valid indefinitely
+@interface ETCoreMLModelCache : NSObject <ETCoreMLCache>
+
+- (instancetype)init NS_UNAVAILABLE;
++ (instancetype)new NS_UNAVAILABLE;
+
+/// The root directory for all cache data (contains models/, temp/, version.txt).
+@property (nonatomic, readonly) NSURL* cacheRootDirectory;
+
+/// Whether the cache was initialized successfully and is ready for use.
+/// If NO, all operations will fail. Check this after initialization.
+@property (nonatomic, readonly, getter=isReady) BOOL ready;
+
+/// If `ready` is NO, this contains the error that occurred during initialization.
+@property (nonatomic, readonly, nullable) NSError* initializationError;
+
+/// Initializes the cache with the given root directory.
+/// Creates the directory structure if it doesn't exist.
+/// Check the `ready` property after initialization to verify success.
+/// If initialization fails, `initializationError` will contain the reason.
+///
+/// @param cacheRootDirectory The root directory for all cache data.
+- (instancetype)initWithCacheRootDirectory:(NSURL*)cacheRootDirectory NS_DESIGNATED_INITIALIZER;
+
+/// Returns the URL of a cached model if it exists and is valid, otherwise nil.
+///
+/// @param identifier The unique identifier for the cached model.
+/// @param error On failure, error is filled with the failure information.
+/// @return The URL to the cached model bundle, or nil if not found or invalid.
+///
+/// @warning The returned URL may become invalid before the caller uses it if another
+/// process deletes or replaces the cached model. Callers MUST handle MLModel load
+/// failures gracefully by treating them as cache misses and recompiling.
+- (nullable NSURL*)cachedModelURLForIdentifier:(NSString*)identifier error:(NSError**)error;
+
+/// Stores a compiled model in the cache. Returns the cached URL on success.
+///
+/// @param compiledModelURL The URL of the compiled model bundle to cache. Must exist.
+/// @param identifier The unique identifier for this model. Must not contain '/' or '..'.
+/// @param error On failure, contains the error. Check for ETCoreMLModelCacheErrorCodeDiskFull
+///              to handle out-of-space conditions specially.
+/// @return The URL of the cached model, or nil on failure.
+- (nullable NSURL*)storeModelAtURL:(NSURL*)compiledModelURL withIdentifier:(NSString*)identifier error:(NSError**)error;
+
+/// Removes a specific cached model. This is a best-effort operation that removes
+/// the model bundle and access time files for the given identifier.
+///
+/// @param identifier The unique identifier for the cached model to remove.
+/// @param error On failure, error is filled with the failure information.
+/// @return YES on success (including if the model didn't exist), NO on validation errors.
+- (BOOL)removeCachedModelWithIdentifier:(NSString*)identifier error:(NSError**)error;
+
+/// Clears the entire cache, including all cached models.
+/// Recreates the empty directory structure after clearing.
+///
+/// @param error On failure, error is filled with the failure information.
+/// @return YES if the cache was purged successfully, otherwise NO.
+- (BOOL)purgeAndReturnError:(NSError**)error;
+
+#pragma mark - Temp Directory (for mlpackage extraction before compilation)
+
+/// Returns a temp URL where an mlpackage can be extracted before compilation.
+/// The caller is responsible for cleaning up this directory after compilation completes.
+///
+/// @param error On failure, error is filled with the failure information.
+/// @return A temp URL where the mlpackage can be extracted, or nil on failure.
+///
+/// @note The temp URL is unique and includes a UUID to avoid conflicts.
+/// @note Temp entries are automatically cleaned up after 24 hours if not removed.
+- (nullable NSURL*)temporaryDirectoryWithError:(NSError**)error;
+
+@end
+
+NS_ASSUME_NONNULL_END