Skip to content

Commit f62b9be

Browse files
authored
Merge branch 'main' into ov_windows_support
2 parents 69db4d5 + 28f3cf3 commit f62b9be

18 files changed

Lines changed: 1669 additions & 34 deletions

File tree

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
//
2+
// ETCoreMLCacheProtocol.h
3+
//
4+
// Copyright © 2024 Apple Inc. All rights reserved.
5+
//
6+
// Please refer to the license found in the LICENSE file in the root directory of the source tree.
7+
8+
#import <Foundation/Foundation.h>
9+
10+
NS_ASSUME_NONNULL_BEGIN
11+
12+
/// Protocol defining the interface for CoreML model caching.
13+
///
14+
/// This protocol abstracts the cache implementation
15+
@protocol ETCoreMLCache <NSObject>
16+
17+
/// Returns the URL of a cached model if it exists and is valid, otherwise nil.
18+
///
19+
/// @param identifier The unique identifier for the cached model.
20+
/// @param error On failure, error is filled with the failure information.
21+
/// @return The URL to the cached model bundle, or nil if not found or invalid.
22+
///
23+
/// @warning The returned URL may become invalid before the caller uses it if another
24+
/// process deletes or replaces the cached model. Callers MUST handle MLModel load
25+
/// failures gracefully by treating them as cache misses and recompiling.
26+
- (nullable NSURL*)cachedModelURLForIdentifier:(NSString*)identifier error:(NSError**)error;
27+
28+
/// Stores a compiled model in the cache. Returns the cached URL on success.
29+
///
30+
/// @param compiledModelURL The URL of the compiled model bundle to cache. Must exist.
31+
/// @param identifier The unique identifier for this model.
32+
/// @param error On failure, error is filled with the failure information.
33+
/// @return The URL of the cached model, or nil on failure.
34+
- (nullable NSURL*)storeModelAtURL:(NSURL*)compiledModelURL withIdentifier:(NSString*)identifier error:(NSError**)error;
35+
36+
/// Removes a specific cached model.
37+
///
38+
/// @param identifier The unique identifier for the cached model to remove.
39+
/// @param error On failure, error is filled with the failure information.
40+
/// @return YES if the model was removed or didn't exist. Returns NO only on I/O errors.
41+
- (BOOL)removeCachedModelWithIdentifier:(NSString*)identifier error:(NSError**)error;
42+
43+
/// Clears the entire cache, including all cached models.
44+
///
45+
/// @param error On failure, error is filled with the failure information.
46+
/// @return YES if the cache was purged successfully, otherwise NO.
47+
- (BOOL)purgeAndReturnError:(NSError**)error;
48+
49+
/// Returns a temp URL where intermediate files can be written during compilation.
50+
/// This is guaranteed to be on the same filesystem as the cache, ensuring atomic moves.
51+
///
52+
/// @param error On failure, error is filled with the failure information.
53+
/// @return A temp URL where intermediate files can be written, or nil on failure.
54+
///
55+
/// @note The temp URL is unique (UUID-based) to avoid conflicts.
56+
/// @note Temp entries are cleaned up automatically after 24 hours.
57+
- (nullable NSURL*)temporaryDirectoryWithError:(NSError**)error;
58+
59+
@end
60+
61+
NS_ASSUME_NONNULL_END

backends/cadence/aot/export_example.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ def export_and_run_model(
104104
file_name: str = "CadenceDemoModel",
105105
eps_error: float = 1e-1,
106106
eps_warn: float = 1e-5,
107+
force_rebuild: bool = False,
107108
):
108109
# create work directory for outputs and model binary
109110
working_dir = tempfile.mkdtemp(dir="/tmp")
@@ -118,4 +119,5 @@ def export_and_run_model(
118119
eps_error=eps_error,
119120
eps_warn=eps_warn,
120121
file_name=file_name,
122+
force_rebuild=force_rebuild,
121123
)

backends/cadence/runtime/executor.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -105,17 +105,22 @@ def __init__(
105105
self,
106106
working_dir: str = "",
107107
file_name: str = "CadenceDemoModel",
108+
force_rebuild: bool = False,
108109
):
109110
self.working_dir = working_dir
110111
self.executor_builder = "./backends/cadence/build_cadence_runner.sh"
111112
self.execute_runner = "./cmake-out/backends/cadence/cadence_runner"
112113
self.bundled_program_path: str = f"{file_name}.bpte"
114+
self.force_rebuild = force_rebuild
113115

114116
def __call__(self) -> None:
115-
# build executor
116-
args = self.get_bash_command(self.executor_builder)
117-
logging.info(f"\33[33m{' '.join(args)}\33[0m")
118-
execute(args)
117+
# build executor if not already built or force rebuild requested
118+
if self.force_rebuild or not os.path.isfile(self.execute_runner):
119+
args = self.get_bash_command(self.executor_builder)
120+
logging.info(f"\33[33m{' '.join(args)}\33[0m")
121+
execute(args)
122+
else:
123+
logging.info("Reusing existing runner at %s", self.execute_runner)
119124

120125
# run executor
121126
cmd_args = {

backends/cadence/runtime/runtime.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ def run(
5757
ref_outputs: Optional[Sequence[torch.Tensor]] = None,
5858
working_dir: Optional[str] = None,
5959
file_name: str = "CadenceDemoModel",
60+
force_rebuild: bool = False,
6061
) -> Any:
6162
# Get the Program
6263
program = executorch_prog.executorch_program
@@ -70,7 +71,7 @@ def run(
7071
working_dir = tempfile.mkdtemp(dir="/tmp")
7172

7273
# initialize e2e Executor with executorch_cfg.
73-
executor = Executor(working_dir, file_name=file_name)
74+
executor = Executor(working_dir, file_name=file_name, force_rebuild=force_rebuild)
7475

7576
# run Executor
7677
executor()
@@ -138,9 +139,15 @@ def run_and_compare(
138139
eps_error: float = 1e-1,
139140
eps_warn: float = 1e-5,
140141
file_name: str = "CadenceDemoModel",
142+
force_rebuild: bool = False,
141143
) -> Any:
142144
outputs = run(
143-
executorch_prog, inputs, ref_outputs, working_dir, file_name=file_name
145+
executorch_prog,
146+
inputs,
147+
ref_outputs,
148+
working_dir,
149+
file_name=file_name,
150+
force_rebuild=force_rebuild,
144151
)
145152
compare(outputs, ref_outputs, eps_error=eps_error, eps_warn=eps_warn)
146153

backends/xnnpack/_passes/decompose_cat.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,11 @@ def call(self, graph_module: torch.fx.GraphModule):
6969
# if quantized we need to enforce the q/dq pattern for the newly inserted
7070
# concat node
7171
q_params = nodes_to_concat[0].args[1:]
72-
q_kwargs = nodes_to_concat[0].kwargs
72+
dq_kwargs = nodes_to_concat[0].kwargs
73+
# quantize_per_tensor does not accept out_dtype, so exclude
74+
# it from kwargs passed to the quantize node. out_dtype is
75+
# only valid for dequantize_per_tensor (e.g. fp16 models).
76+
q_kwargs = {k: v for k, v in dq_kwargs.items() if k != "out_dtype"}
7377
# Quantizer enforces all the inputs and output to a concat node must share
7478
# the same qparams, this means the newly inserted q/dq pair must share the
7579
# same qparams as the first quantized input in the concat node.
@@ -89,7 +93,7 @@ def call(self, graph_module: torch.fx.GraphModule):
8993
"call_function",
9094
target=exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,
9195
args=(q_node,) + q_params,
92-
kwargs=q_kwargs,
96+
kwargs=dq_kwargs,
9397
)
9498
tag_as_implicit_q_dq(dq_node)
9599
remainder_concat_node.args = (

docs/source/index.md

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,64 @@
1111

1212
---
1313

14+
## 🗺️ Find Your Path
15+
16+
Not sure where to start? Use the guided pathways to navigate ExecuTorch based on your experience level, goal, and target platform.
17+
18+
::::{grid} 3
19+
:gutter: 3
20+
21+
:::{grid-item-card} 🟢 New to ExecuTorch
22+
:class-header: bg-success text-white
23+
:link: pathway-beginner
24+
:link-type: doc
25+
26+
Step-by-step learning sequence from installation to your first on-device deployment. Includes concept explanations and worked examples.
27+
28+
+++
29+
**Beginner Pathway →**
30+
:::
31+
32+
:::{grid-item-card} 🟡 Get Running Fast
33+
:class-header: bg-warning text-dark
34+
:link: pathway-quickstart
35+
:link-type: doc
36+
37+
Skip the theory — get a model running in 15 minutes. Includes export cheat sheets, backend selection tables, and platform quick starts.
38+
39+
+++
40+
**Quick Start Pathway →**
41+
:::
42+
43+
:::{grid-item-card} 🔴 Production & Advanced
44+
:class-header: bg-danger text-white
45+
:link: pathway-advanced
46+
:link-type: doc
47+
48+
Quantization, custom backends, C++ runtime, LLM deployment, and compiler internals for production-grade systems.
49+
50+
+++
51+
**Advanced Pathway →**
52+
:::
53+
54+
::::
55+
56+
::::{grid} 1
57+
58+
:::{grid-item-card} 🔀 Decision Matrix — Route by Goal, Platform & Model
59+
:link: user-pathways
60+
:link-type: doc
61+
62+
Not sure which pathway fits? The decision matrix routes you by experience level, target platform, model status, and developer role to the exact documentation you need.
63+
64+
+++
65+
**View Decision Matrix →**
66+
:::
67+
68+
::::
69+
70+
---
71+
1472
## 🎯 Wins & Success Stories
1573

1674
::::{grid} 1
@@ -186,6 +244,7 @@ Profile, debug, and inspect your models with comprehensive tooling.
186244
187245
intro-section
188246
quick-start-section
247+
user-pathways
189248
edge-platforms-section
190249
backends-section
191250
llm/working-with-llms

docs/source/intro-section.md

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,39 @@ Overview, architecture, and core concepts of ExecuTorch.
66

77
ExecuTorch is PyTorch's solution for training and inference on the Edge, providing portability, productivity, and performance for edge computing platforms.
88

9+
---
10+
11+
## New to ExecuTorch? Start Here
12+
13+
::::{grid} 2
14+
:gutter: 2
15+
16+
:::{grid-item-card} 🟢 Beginner Pathway
17+
:link: pathway-beginner
18+
:link-type: doc
19+
20+
A structured, step-by-step learning sequence for engineers new to on-device deployment. Covers concepts, setup, export, and your first device deployment.
21+
:::
22+
23+
:::{grid-item-card} 🔀 Decision Matrix
24+
:link: user-pathways
25+
:link-type: doc
26+
27+
Route yourself to the right documentation based on your experience level, target platform, model status, and developer role.
28+
:::
29+
30+
::::
31+
32+
---
33+
934
## Getting Started with ExecuTorch
1035

1136
New to ExecuTorch? Start with these foundational topics:
1237

13-
- **{doc}`intro-overview`** - High-level overview of ExecuTorch capabilities
14-
- **{doc}`intro-how-it-works`** - Technical overview of the ExecuTorch workflow
15-
- **{doc}`getting-started-architecture`** - System architecture and components
16-
- **{doc}`concepts`** - Core concepts and terminology
38+
- **{doc}`intro-overview`** High-level overview of ExecuTorch capabilities
39+
- **{doc}`intro-how-it-works`** Technical overview of the ExecuTorch workflow
40+
- **{doc}`getting-started-architecture`** System architecture and components
41+
- **{doc}`concepts`** Core concepts and terminology
1742

1843
```{toctree}
1944
:hidden:

0 commit comments

Comments
 (0)