Skip to content

Commit 3079417

Browse files
authored
Merge branch 'main' into recurrent-fla
2 parents 63c162e + 3466332 commit 3079417

49 files changed

Lines changed: 5298 additions & 79 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.ci/scripts/test_model_e2e.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,7 @@ EOF
354354
fi
355355
;;
356356
qwen3_5_moe)
357-
RUNNER_ARGS="$RUNNER_ARGS --tokenizer_path ${MODEL_DIR}/$TOKENIZER_FILE --prompt 'What is the capital of France?' --max_new_tokens 32"
357+
RUNNER_ARGS="$RUNNER_ARGS --tokenizer_path ${MODEL_DIR}/$TOKENIZER_FILE --prompt 'What is the capital of France?' --max_new_tokens 128 --temperature 0"
358358
;;
359359
voxtral_realtime)
360360
RUNNER_ARGS="--model_path ${MODEL_DIR}/model.pte --tokenizer_path ${MODEL_DIR}/$TOKENIZER_FILE --preprocessor_path ${MODEL_DIR}/$PREPROCESSOR --audio_path ${MODEL_DIR}/$AUDIO_FILE --temperature 0"

.github/workflows/cuda.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,8 @@ jobs:
145145
# Run CUDA backend Python tests
146146
python -m pytest backends/cuda/tests backends/cuda/passes/tests -v -o "addopts="
147147
148-
# Run quantize roundtrip tests (Qwen 3.5 MoE save/load prequantized)
149-
python -m pytest examples/models/qwen3_5_moe/test_quantize_roundtrip.py -v -o "addopts="
148+
# Run Qwen 3.5 MoE tests (quantize roundtrip + TurboQuant KV cache)
149+
python -m pytest examples/models/qwen3_5_moe/test_quantize_roundtrip.py examples/models/qwen3_5_moe/test_turboquant.py -v -o "addopts="
150150
151151
export-model-cuda-artifact:
152152
name: export-model-cuda-artifact

backends/aoti/common_shims_slim.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,10 @@ int32_t aoti_torch_dtype_int8() {
134134
return 1; // ScalarType::Char
135135
}
136136

137+
int32_t aoti_torch_dtype_uint8() {
138+
return 0; // ScalarType::Byte
139+
}
140+
137141
int32_t aoti_torch_dtype_bool() {
138142
return 11; // ScalarType::Bool
139143
}

backends/aoti/common_shims_slim.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ AOTI_SHIM_EXPORT int32_t aoti_torch_dtype_int64();
7676
AOTI_SHIM_EXPORT int32_t aoti_torch_dtype_int32();
7777
AOTI_SHIM_EXPORT int32_t aoti_torch_dtype_int16();
7878
AOTI_SHIM_EXPORT int32_t aoti_torch_dtype_int8();
79+
AOTI_SHIM_EXPORT int32_t aoti_torch_dtype_uint8();
7980
AOTI_SHIM_EXPORT int32_t aoti_torch_dtype_bool();
8081

8182
// ============================================================

backends/aoti/slim/c10/core/ScalarType.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ using BFloat16 = ::executorch::runtime::etensor::BFloat16;
2323
/// Enum representing the scalar type (dtype) of tensor elements.
2424
/// Note: Enum values must match PyTorch's c10::ScalarType for compatibility.
2525
enum class ScalarType : int8_t {
26-
// Byte = 0, // uint8_t - not currently needed
26+
Byte = 0, // uint8_t
2727
Char = 1, // int8_t
2828
Short = 2, // int16_t
2929
Int = 3, // int32_t
@@ -43,6 +43,7 @@ enum class ScalarType : int8_t {
4343
};
4444

4545
// Type alias constants for convenience
46+
constexpr ScalarType kByte = ScalarType::Byte;
4647
constexpr ScalarType kChar = ScalarType::Char;
4748
constexpr ScalarType kShort = ScalarType::Short;
4849
constexpr ScalarType kInt = ScalarType::Int;
@@ -56,6 +57,8 @@ constexpr ScalarType kBFloat16 = ScalarType::BFloat16;
5657
/// @return The size in bytes of a single element.
5758
inline size_t elementSize(ScalarType t) {
5859
switch (t) {
60+
case ScalarType::Byte:
61+
return sizeof(uint8_t);
5962
case ScalarType::Char:
6063
return sizeof(int8_t);
6164
case ScalarType::Short:
@@ -80,6 +83,8 @@ inline size_t elementSize(ScalarType t) {
8083
/// @return The name of the scalar type.
8184
inline const char* toString(ScalarType t) {
8285
switch (t) {
86+
case ScalarType::Byte:
87+
return "Byte";
8388
case ScalarType::Char:
8489
return "Char";
8590
case ScalarType::Short:
@@ -114,6 +119,7 @@ inline bool isFloatingType(ScalarType t) {
114119
/// @return true if the scalar type is integral, false otherwise.
115120
inline bool isIntegralType(ScalarType t, bool includeBool) {
116121
switch (t) {
122+
case ScalarType::Byte:
117123
case ScalarType::Char:
118124
case ScalarType::Short:
119125
case ScalarType::Int:
@@ -138,6 +144,7 @@ inline bool isBoolType(ScalarType t) {
138144
/// @return true if the scalar type is valid, false otherwise.
139145
inline bool isValidScalarType(ScalarType t) {
140146
switch (t) {
147+
case ScalarType::Byte:
141148
case ScalarType::Char:
142149
case ScalarType::Short:
143150
case ScalarType::Int:

backends/apple/coreml/BUCK

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ runtime.cxx_library(
1818
"runtime/delegate/ETCoreMLDefaultModelExecutor.mm",
1919
"runtime/delegate/ETCoreMLLogging.mm",
2020
"runtime/delegate/ETCoreMLModel.mm",
21+
"runtime/delegate/ETCoreMLModelCache.mm",
2122
"runtime/delegate/ETCoreMLModelCompiler.mm",
2223
"runtime/delegate/ETCoreMLModelLoader.mm",
2324
"runtime/delegate/ETCoreMLModelManager.mm",

backends/apple/coreml/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ set(DELEGATE_SOURCES
3232
runtime/delegate/ETCoreMLAsset.mm
3333
runtime/delegate/ETCoreMLAssetManager.mm
3434
runtime/delegate/ETCoreMLDefaultModelExecutor.mm
35+
runtime/delegate/ETCoreMLModelCache.mm
3536
runtime/delegate/ETCoreMLModelLoader.mm
3637
runtime/delegate/ETCoreMLModelCompiler.mm
3738
runtime/delegate/ETCoreMLLogging.mm
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
//
2+
// ETCoreMLModelCache.h
3+
//
4+
// Copyright © 2024 Apple Inc. All rights reserved.
5+
//
6+
// Please refer to the license found in the LICENSE file in the root directory of the source tree.
7+
8+
#import <Foundation/Foundation.h>
9+
10+
#import "ETCoreMLCacheProtocol.h"
11+
12+
NS_ASSUME_NONNULL_BEGIN
13+
14+
extern NSString* const ETCoreMLModelCacheErrorDomain;
15+
16+
typedef NS_ENUM(NSInteger, ETCoreMLModelCacheErrorCode) {
17+
ETCoreMLModelCacheErrorCodeUnknown = 0,
18+
ETCoreMLModelCacheErrorCodeInitializationFailed = 1,
19+
ETCoreMLModelCacheErrorCodeInvalidIdentifier = 2,
20+
ETCoreMLModelCacheErrorCodeSourceNotFound = 3,
21+
ETCoreMLModelCacheErrorCodeDiskFull = 4,
22+
ETCoreMLModelCacheErrorCodeIOError = 5,
23+
ETCoreMLModelCacheErrorCodeCorruptedCache = 6,
24+
};
25+
26+
/// A simplified, filesystem-based cache for compiled CoreML models.
27+
///
28+
/// This class provides a cache implementation that stores compiled models as directories
29+
/// in a versioned cache structure. It uses atomic writes (rename) to ensure cache integrity
30+
/// even in the presence of crashes or concurrent access.
31+
///
32+
/// Directory structure:
33+
/// ```
34+
/// cache_root/
35+
/// ├── version.txt (cache format version)
36+
/// ├── models/
37+
/// │ ├── {identifier}.mlmodelc/ (compiled model bundle)
38+
/// │ ├── {identifier}.accessed (last access time for LRU eviction)
39+
/// │ └── ...
40+
/// └── temp/
41+
/// └── {uuid}/ (mlpackage files awaiting compilation)
42+
/// ```
43+
///
44+
/// ## Thread Safety and Concurrency Guarantees
45+
///
46+
/// This class provides **NO internal synchronization**. It is designed to be used in one of
47+
/// two ways:
48+
///
49+
/// 1. **Single-threaded access**: All calls to a single instance from one thread/queue.
50+
///
51+
/// 2. **External serialization**: When used via `ETCoreMLModelManager`, access is serialized
52+
/// by the manager's per-identifier loading queue. This is the expected usage pattern.
53+
///
54+
/// **Multi-process safety** is provided by:
55+
/// - Atomic filesystem operations (`rename()`)
56+
/// - Unique temp paths (UUID-based) to avoid conflicts
57+
/// - "Last writer wins" semantics (acceptable since all writers produce identical output)
58+
///
59+
/// **Multiple instances** pointing to the same directory are safe because:
60+
/// - Each write uses a unique temp path
61+
/// - Final placement uses atomic `moveItemAtURL:` (POSIX `rename()`)
62+
/// - Concurrent writes result in "last writer wins" (both write identical data)
63+
/// - Cleanup only targets entries older than 24 hours
64+
///
65+
/// **Callers are responsible for**:
66+
/// - Handling `MLModel` load failures gracefully (cache entry may be replaced/deleted
67+
/// between URL retrieval and model load)
68+
/// - Not relying on returned URLs remaining valid indefinitely
69+
@interface ETCoreMLModelCache : NSObject <ETCoreMLCache>
70+
71+
- (instancetype)init NS_UNAVAILABLE;
72+
+ (instancetype)new NS_UNAVAILABLE;
73+
74+
/// The root directory for all cache data (contains models/, temp/, version.txt).
75+
@property (nonatomic, readonly) NSURL* cacheRootDirectory;
76+
77+
/// Whether the cache was initialized successfully and is ready for use.
78+
/// If NO, all operations will fail. Check this after initialization.
79+
@property (nonatomic, readonly, getter=isReady) BOOL ready;
80+
81+
/// If `ready` is NO, this contains the error that occurred during initialization.
82+
@property (nonatomic, readonly, nullable) NSError* initializationError;
83+
84+
/// Initializes the cache with the given root directory.
85+
/// Creates the directory structure if it doesn't exist.
86+
/// Check the `ready` property after initialization to verify success.
87+
/// If initialization fails, `initializationError` will contain the reason.
88+
///
89+
/// @param cacheRootDirectory The root directory for all cache data.
90+
- (instancetype)initWithCacheRootDirectory:(NSURL*)cacheRootDirectory NS_DESIGNATED_INITIALIZER;
91+
92+
/// Returns the URL of a cached model if it exists and is valid, otherwise nil.
93+
///
94+
/// @param identifier The unique identifier for the cached model.
95+
/// @param error On failure, error is filled with the failure information.
96+
/// @return The URL to the cached model bundle, or nil if not found or invalid.
97+
///
98+
/// @warning The returned URL may become invalid before the caller uses it if another
99+
/// process deletes or replaces the cached model. Callers MUST handle MLModel load
100+
/// failures gracefully by treating them as cache misses and recompiling.
101+
- (nullable NSURL*)cachedModelURLForIdentifier:(NSString*)identifier error:(NSError**)error;
102+
103+
/// Stores a compiled model in the cache. Returns the cached URL on success.
104+
///
105+
/// @param compiledModelURL The URL of the compiled model bundle to cache. Must exist.
106+
/// @param identifier The unique identifier for this model. Must not contain '/' or '..'.
107+
/// @param error On failure, contains the error. Check for ETCoreMLModelCacheErrorCodeDiskFull
108+
/// to handle out-of-space conditions specially.
109+
/// @return The URL of the cached model, or nil on failure.
110+
- (nullable NSURL*)storeModelAtURL:(NSURL*)compiledModelURL withIdentifier:(NSString*)identifier error:(NSError**)error;
111+
112+
/// Removes a specific cached model. This is a best-effort operation that removes
113+
/// the model bundle and access time files for the given identifier.
114+
///
115+
/// @param identifier The unique identifier for the cached model to remove.
116+
/// @param error On failure, error is filled with the failure information.
117+
/// @return YES on success (including if the model didn't exist), NO on validation errors.
118+
- (BOOL)removeCachedModelWithIdentifier:(NSString*)identifier error:(NSError**)error;
119+
120+
/// Clears the entire cache, including all cached models.
121+
/// Recreates the empty directory structure after clearing.
122+
///
123+
/// @param error On failure, error is filled with the failure information.
124+
/// @return YES if the cache was purged successfully, otherwise NO.
125+
- (BOOL)purgeAndReturnError:(NSError**)error;
126+
127+
#pragma mark - Temp Directory (for mlpackage extraction before compilation)
128+
129+
/// Returns a temp URL where an mlpackage can be extracted before compilation.
130+
/// The caller is responsible for cleaning up this directory after compilation completes.
131+
///
132+
/// @param error On failure, error is filled with the failure information.
133+
/// @return A temp URL where the mlpackage can be extracted, or nil on failure.
134+
///
135+
/// @note The temp URL is unique and includes a UUID to avoid conflicts.
136+
/// @note Temp entries are automatically cleaned up after 24 hours if not removed.
137+
- (nullable NSURL*)temporaryDirectoryWithError:(NSError**)error;
138+
139+
@end
140+
141+
NS_ASSUME_NONNULL_END

0 commit comments

Comments
 (0)