Pass share_mutable_buffers through llm_config

lucylq · web-flow · commit bcfb551015d1 · 2026-03-02T19:48:34.000Z
Differential Revision: D94456166 Pull Request resolved: pytorch#17763
diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py
@@ -1306,7 +1306,10 @@ def _export_llama_multimethod(llm_config: LlmConfig) -> LLMEdgeManager:
 
     # Convert to executorch and save
     first_builder.edge_manager = edge_manager
-    first_builder = first_builder.to_executorch(passes=additional_passes)
+    first_builder = first_builder.to_executorch(
+        passes=additional_passes,
+        share_mutable_buffers=llm_config.multimethod_lora.share_mutable_buffers,
+    )
 
     output_file = _get_output_filename(
         llm_config,
diff --git a/examples/models/qwen3/config/qwen3_multimethod.yaml b/examples/models/qwen3/config/qwen3_multimethod.yaml
@@ -30,3 +30,4 @@ multimethod_lora:
       adapter_config: ${oc.env:LORA_ADAPTER_CONFIG}
     # Base method - no LoRA
     base_forward: null
+  share_mutable_buffers: True
diff --git a/extension/llm/export/builder.py b/extension/llm/export/builder.py
@@ -480,6 +480,7 @@ def to_executorch(
         external_constants_tag: Optional[
             Callable[[torch.fx.Node], Optional[str]]
         ] = None,
+        share_mutable_buffers: bool = False,
     ) -> "LLMEdgeManager":
         """
         Lower the model to executorch and get an ExecutorchProgram.
@@ -510,7 +511,10 @@ def to_executorch(
                 # QuantFusionPass]]`.
                 passes=to_executorch_passes,
                 do_quant_fusion_and_const_prop=True,
-                memory_planning_pass=MemoryPlanningPass(alloc_graph_input=False),
+                memory_planning_pass=MemoryPlanningPass(
+                    alloc_graph_input=False,
+                    share_mutable_buffers=share_mutable_buffers,
+                ),
                 sym_shape_eval_pass=ConstraintBasedSymShapeEvalPass(),
                 external_constants=external_constants_tag,
             )
diff --git a/extension/llm/export/config/llm_config.py b/extension/llm/export/config/llm_config.py
@@ -304,6 +304,9 @@ class MultimethodLoraConfig:
     Attributes:
         methods: Dict mapping method names to optional LoRA configs.
             Empty dict disables multimethod_lora export.
+        share_mutable_buffers: Whether to share mutable buffers across methods.
+            If True, sets all mutable buffers to mem_id=2. Mutable buffers with
+            the same FQN (fully qualified name) will have the same offset.
 
     Example:
         MultimethodLoraConfig(methods={
@@ -313,6 +316,7 @@ class MultimethodLoraConfig:
     """
 
     methods: Dict[str, Optional[LoraConfig]] = field(default_factory=dict)
+    share_mutable_buffers: bool = False
 
     @property
     def enabled(self) -> bool: