pytorch
diff --git a/‎backends/apple/coreml/runtime/delegate/ETCoreMLCacheProtocol.h‎
Lines changed: 61 additions & 0 deletions b/‎backends/apple/coreml/runtime/delegate/ETCoreMLCacheProtocol.h‎
Lines changed: 61 additions & 0 deletions
diff --git a/‎backends/cadence/aot/export_example.py‎
Lines changed: 2 additions & 0 deletions b/‎backends/cadence/aot/export_example.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎backends/cadence/runtime/executor.py‎
Lines changed: 9 additions & 4 deletions b/‎backends/cadence/runtime/executor.py‎
Lines changed: 9 additions & 4 deletions
diff --git a/‎backends/cadence/runtime/runtime.py‎
Lines changed: 9 additions & 2 deletions b/‎backends/cadence/runtime/runtime.py‎
Lines changed: 9 additions & 2 deletions
diff --git a/‎backends/xnnpack/_passes/decompose_cat.py‎
Lines changed: 6 additions & 2 deletions b/‎backends/xnnpack/_passes/decompose_cat.py‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎docs/source/index.md‎
Lines changed: 59 additions & 0 deletions b/‎docs/source/index.md‎
Lines changed: 59 additions & 0 deletions
diff --git a/‎docs/source/intro-section.md‎
Lines changed: 29 additions & 4 deletions b/‎docs/source/intro-section.md‎
Lines changed: 29 additions & 4 deletions
@@ -0,0 +1,61 @@
+//
+// ETCoreMLCacheProtocol.h
+//
+// Copyright © 2024 Apple Inc. All rights reserved.
+//
+// Please refer to the license found in the LICENSE file in the root directory of the source tree.
+
+#import <Foundation/Foundation.h>
+
+NS_ASSUME_NONNULL_BEGIN
+
+/// Protocol defining the interface for CoreML model caching.
+///
+/// This protocol abstracts the cache implementation
+@protocol ETCoreMLCache <NSObject>
+
+/// Returns the URL of a cached model if it exists and is valid, otherwise nil.
+///
+/// @param identifier The unique identifier for the cached model.
+/// @param error On failure, error is filled with the failure information.
+/// @return The URL to the cached model bundle, or nil if not found or invalid.
+///
+/// @warning The returned URL may become invalid before the caller uses it if another
+/// process deletes or replaces the cached model. Callers MUST handle MLModel load
+/// failures gracefully by treating them as cache misses and recompiling.
+- (nullable NSURL*)cachedModelURLForIdentifier:(NSString*)identifier error:(NSError**)error;
+
+/// Stores a compiled model in the cache. Returns the cached URL on success.
+///
+/// @param compiledModelURL The URL of the compiled model bundle to cache. Must exist.
+/// @param identifier The unique identifier for this model.
+/// @param error On failure, error is filled with the failure information.
+/// @return The URL of the cached model, or nil on failure.
+- (nullable NSURL*)storeModelAtURL:(NSURL*)compiledModelURL withIdentifier:(NSString*)identifier error:(NSError**)error;
+
+/// Removes a specific cached model.
+///
+/// @param identifier The unique identifier for the cached model to remove.
+/// @param error On failure, error is filled with the failure information.
+/// @return YES if the model was removed or didn't exist. Returns NO only on I/O errors.
+- (BOOL)removeCachedModelWithIdentifier:(NSString*)identifier error:(NSError**)error;
+
+/// Clears the entire cache, including all cached models.
+///
+/// @param error On failure, error is filled with the failure information.
+/// @return YES if the cache was purged successfully, otherwise NO.
+- (BOOL)purgeAndReturnError:(NSError**)error;
+
+/// Returns a temp URL where intermediate files can be written during compilation.
+/// This is guaranteed to be on the same filesystem as the cache, ensuring atomic moves.
+///
+/// @param error On failure, error is filled with the failure information.
+/// @return A temp URL where intermediate files can be written, or nil on failure.
+///
+/// @note The temp URL is unique (UUID-based) to avoid conflicts.
+/// @note Temp entries are cleaned up automatically after 24 hours.
+- (nullable NSURL*)temporaryDirectoryWithError:(NSError**)error;
+
+@end
+
+NS_ASSUME_NONNULL_END
@@ -104,6 +104,7 @@ def export_and_run_model(
     file_name: str = "CadenceDemoModel",
     eps_error: float = 1e-1,
     eps_warn: float = 1e-5,
+    force_rebuild: bool = False,
 ):
     # create work directory for outputs and model binary
     working_dir = tempfile.mkdtemp(dir="/tmp")
@@ -118,4 +119,5 @@ def export_and_run_model(
         eps_error=eps_error,
         eps_warn=eps_warn,
         file_name=file_name,
+        force_rebuild=force_rebuild,
     )
@@ -105,17 +105,22 @@ def __init__(
         self,
         working_dir: str = "",
         file_name: str = "CadenceDemoModel",
+        force_rebuild: bool = False,
     ):
         self.working_dir = working_dir
         self.executor_builder = "./backends/cadence/build_cadence_runner.sh"
         self.execute_runner = "./cmake-out/backends/cadence/cadence_runner"
         self.bundled_program_path: str = f"{file_name}.bpte"
+        self.force_rebuild = force_rebuild
 
     def __call__(self) -> None:
-        # build executor
-        args = self.get_bash_command(self.executor_builder)
-        logging.info(f"\33[33m{' '.join(args)}\33[0m")
-        execute(args)
+        # build executor if not already built or force rebuild requested
+        if self.force_rebuild or not os.path.isfile(self.execute_runner):
+            args = self.get_bash_command(self.executor_builder)
+            logging.info(f"\33[33m{' '.join(args)}\33[0m")
+            execute(args)
+        else:
+            logging.info("Reusing existing runner at %s", self.execute_runner)
 
         # run executor
         cmd_args = {
 
@@ -57,6 +57,7 @@ def run(
     ref_outputs: Optional[Sequence[torch.Tensor]] = None,
     working_dir: Optional[str] = None,
     file_name: str = "CadenceDemoModel",
+    force_rebuild: bool = False,
 ) -> Any:
     # Get the Program
     program = executorch_prog.executorch_program
@@ -70,7 +71,7 @@ def run(
         working_dir = tempfile.mkdtemp(dir="/tmp")
 
     # initialize e2e Executor with executorch_cfg.
-    executor = Executor(working_dir, file_name=file_name)
+    executor = Executor(working_dir, file_name=file_name, force_rebuild=force_rebuild)
 
     # run Executor
     executor()
@@ -138,9 +139,15 @@ def run_and_compare(
     eps_error: float = 1e-1,
     eps_warn: float = 1e-5,
     file_name: str = "CadenceDemoModel",
+    force_rebuild: bool = False,
 ) -> Any:
     outputs = run(
-        executorch_prog, inputs, ref_outputs, working_dir, file_name=file_name
+        executorch_prog,
+        inputs,
+        ref_outputs,
+        working_dir,
+        file_name=file_name,
+        force_rebuild=force_rebuild,
     )
     compare(outputs, ref_outputs, eps_error=eps_error, eps_warn=eps_warn)
 
 
@@ -69,7 +69,11 @@ def call(self, graph_module: torch.fx.GraphModule):
                     # if quantized we need to enforce the q/dq pattern for the newly inserted
                     # concat node
                     q_params = nodes_to_concat[0].args[1:]
-                    q_kwargs = nodes_to_concat[0].kwargs
+                    dq_kwargs = nodes_to_concat[0].kwargs
+                    # quantize_per_tensor does not accept out_dtype, so exclude
+                    # it from kwargs passed to the quantize node. out_dtype is
+                    # only valid for dequantize_per_tensor (e.g. fp16 models).
+                    q_kwargs = {k: v for k, v in dq_kwargs.items() if k != "out_dtype"}
                     # Quantizer enforces all the inputs and output to a concat node must share
                     # the same qparams, this means the newly inserted q/dq pair must share the
                     # same qparams as the first quantized input in the concat node.
@@ -89,7 +93,7 @@ def call(self, graph_module: torch.fx.GraphModule):
                             "call_function",
                             target=exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,
                             args=(q_node,) + q_params,
-                            kwargs=q_kwargs,
+                            kwargs=dq_kwargs,
                         )
                         tag_as_implicit_q_dq(dq_node)
                     remainder_concat_node.args = (
 
@@ -11,6 +11,64 @@
 
 ---
 
+## 🗺️ Find Your Path
+
+Not sure where to start? Use the guided pathways to navigate ExecuTorch based on your experience level, goal, and target platform.
+
+::::{grid} 3
+:gutter: 3
+
+:::{grid-item-card} 🟢 New to ExecuTorch
+:class-header: bg-success text-white
+:link: pathway-beginner
+:link-type: doc
+
+Step-by-step learning sequence from installation to your first on-device deployment. Includes concept explanations and worked examples.
+
++++
+**Beginner Pathway →**
+:::
+
+:::{grid-item-card} 🟡 Get Running Fast
+:class-header: bg-warning text-dark
+:link: pathway-quickstart
+:link-type: doc
+
+Skip the theory — get a model running in 15 minutes. Includes export cheat sheets, backend selection tables, and platform quick starts.
+
++++
+**Quick Start Pathway →**
+:::
+
+:::{grid-item-card} 🔴 Production & Advanced
+:class-header: bg-danger text-white
+:link: pathway-advanced
+:link-type: doc
+
+Quantization, custom backends, C++ runtime, LLM deployment, and compiler internals for production-grade systems.
+
++++
+**Advanced Pathway →**
+:::
+
+::::
+
+::::{grid} 1
+
+:::{grid-item-card} 🔀 Decision Matrix — Route by Goal, Platform & Model
+:link: user-pathways
+:link-type: doc
+
+Not sure which pathway fits? The decision matrix routes you by experience level, target platform, model status, and developer role to the exact documentation you need.
+
++++
+**View Decision Matrix →**
+:::
+
+::::
+
+---
+
 ## 🎯 Wins & Success Stories
 
 ::::{grid} 1
@@ -186,6 +244,7 @@ Profile, debug, and inspect your models with comprehensive tooling.
 
 intro-section
 quick-start-section
+user-pathways
 edge-platforms-section
 backends-section
 llm/working-with-llms
 
@@ -6,14 +6,39 @@ Overview, architecture, and core concepts of ExecuTorch.
 
 ExecuTorch is PyTorch's solution for training and inference on the Edge, providing portability, productivity, and performance for edge computing platforms.
 
+---
+
+## New to ExecuTorch? Start Here
+
+::::{grid} 2
+:gutter: 2
+
+:::{grid-item-card} 🟢 Beginner Pathway
+:link: pathway-beginner
+:link-type: doc
+
+A structured, step-by-step learning sequence for engineers new to on-device deployment. Covers concepts, setup, export, and your first device deployment.
+:::
+
+:::{grid-item-card} 🔀 Decision Matrix
+:link: user-pathways
+:link-type: doc
+
+Route yourself to the right documentation based on your experience level, target platform, model status, and developer role.
+:::
+
+::::
+
+---
+
 ## Getting Started with ExecuTorch
 
 New to ExecuTorch? Start with these foundational topics:
 
-- **{doc}`intro-overview`** - High-level overview of ExecuTorch capabilities
-- **{doc}`intro-how-it-works`** - Technical overview of the ExecuTorch workflow
-- **{doc}`getting-started-architecture`** - System architecture and components
-- **{doc}`concepts`** - Core concepts and terminology
+- **{doc}`intro-overview`** — High-level overview of ExecuTorch capabilities
+- **{doc}`intro-how-it-works`** — Technical overview of the ExecuTorch workflow
+- **{doc}`getting-started-architecture`** — System architecture and components
+- **{doc}`concepts`** — Core concepts and terminology
 
 ```{toctree}
 :hidden: