Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion examples/models/llama/export_llama_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -1306,7 +1306,10 @@ def _export_llama_multimethod(llm_config: LlmConfig) -> LLMEdgeManager:

# Convert to executorch and save
first_builder.edge_manager = edge_manager
first_builder = first_builder.to_executorch(passes=additional_passes)
first_builder = first_builder.to_executorch(
passes=additional_passes,
share_mutable_buffers=llm_config.multimethod_lora.share_mutable_buffers,
)

output_file = _get_output_filename(
llm_config,
Expand Down
1 change: 1 addition & 0 deletions examples/models/qwen3/config/qwen3_multimethod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,4 @@ multimethod_lora:
adapter_config: ${oc.env:LORA_ADAPTER_CONFIG}
# Base method - no LoRA
base_forward: null
share_mutable_buffers: True
6 changes: 5 additions & 1 deletion extension/llm/export/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,7 @@ def to_executorch(
external_constants_tag: Optional[
Callable[[torch.fx.Node], Optional[str]]
] = None,
share_mutable_buffers: bool = False,
) -> "LLMEdgeManager":
"""
Lower the model to executorch and get an ExecutorchProgram.
Expand Down Expand Up @@ -510,7 +511,10 @@ def to_executorch(
# QuantFusionPass]]`.
passes=to_executorch_passes,
do_quant_fusion_and_const_prop=True,
memory_planning_pass=MemoryPlanningPass(alloc_graph_input=False),
memory_planning_pass=MemoryPlanningPass(
alloc_graph_input=False,
share_mutable_buffers=share_mutable_buffers,
),
sym_shape_eval_pass=ConstraintBasedSymShapeEvalPass(),
external_constants=external_constants_tag,
)
Expand Down
4 changes: 4 additions & 0 deletions extension/llm/export/config/llm_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,9 @@ class MultimethodLoraConfig:
Attributes:
methods: Dict mapping method names to optional LoRA configs.
Empty dict disables multimethod_lora export.
share_mutable_buffers: Whether to share mutable buffers across methods.
If True, sets all mutable buffers to mem_id=2. Mutable buffers with
the same FQN (fully qualified name) will have the same offset.

Example:
MultimethodLoraConfig(methods={
Expand All @@ -313,6 +316,7 @@ class MultimethodLoraConfig:
"""

methods: Dict[str, Optional[LoraConfig]] = field(default_factory=dict)
share_mutable_buffers: bool = False

@property
def enabled(self) -> bool:
Expand Down
Loading