File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -1306,7 +1306,10 @@ def _export_llama_multimethod(llm_config: LlmConfig) -> LLMEdgeManager:
13061306
13071307 # Convert to executorch and save
13081308 first_builder .edge_manager = edge_manager
1309- first_builder = first_builder .to_executorch (passes = additional_passes )
1309+ first_builder = first_builder .to_executorch (
1310+ passes = additional_passes ,
1311+ share_mutable_buffers = llm_config .multimethod_lora .share_mutable_buffers ,
1312+ )
13101313
13111314 output_file = _get_output_filename (
13121315 llm_config ,
Original file line number Diff line number Diff line change @@ -30,3 +30,4 @@ multimethod_lora:
3030 adapter_config : ${oc.env:LORA_ADAPTER_CONFIG}
3131 # Base method - no LoRA
3232 base_forward : null
33+ share_mutable_buffers : True
Original file line number Diff line number Diff line change @@ -480,6 +480,7 @@ def to_executorch(
480480 external_constants_tag : Optional [
481481 Callable [[torch .fx .Node ], Optional [str ]]
482482 ] = None ,
483+ share_mutable_buffers : bool = False ,
483484 ) -> "LLMEdgeManager" :
484485 """
485486 Lower the model to executorch and get an ExecutorchProgram.
@@ -510,7 +511,10 @@ def to_executorch(
510511 # QuantFusionPass]]`.
511512 passes = to_executorch_passes ,
512513 do_quant_fusion_and_const_prop = True ,
513- memory_planning_pass = MemoryPlanningPass (alloc_graph_input = False ),
514+ memory_planning_pass = MemoryPlanningPass (
515+ alloc_graph_input = False ,
516+ share_mutable_buffers = share_mutable_buffers ,
517+ ),
514518 sym_shape_eval_pass = ConstraintBasedSymShapeEvalPass (),
515519 external_constants = external_constants_tag ,
516520 )
Original file line number Diff line number Diff line change @@ -304,6 +304,9 @@ class MultimethodLoraConfig:
304304 Attributes:
305305 methods: Dict mapping method names to optional LoRA configs.
306306 Empty dict disables multimethod_lora export.
307+ share_mutable_buffers: Whether to share mutable buffers across methods.
308+ If True, sets all mutable buffers to mem_id=2. Mutable buffers with
309+ the same FQN (fully qualified name) will have the same offset.
307310
308311 Example:
309312 MultimethodLoraConfig(methods={
@@ -313,6 +316,7 @@ class MultimethodLoraConfig:
313316 """
314317
315318 methods : Dict [str , Optional [LoraConfig ]] = field (default_factory = dict )
319+ share_mutable_buffers : bool = False
316320
317321 @property
318322 def enabled (self ) -> bool :
You can’t perform that action at this time.
0 commit comments