Ability to specify full file configs for export_llm (#11809)

jackzhxng · web-flow · commit d83636deb46b · 2025-06-23T14:27:17.000-07:00
diff --git a/examples/models/llama/config/llm_config.py b/examples/models/llama/config/llm_config.py
@@ -65,7 +65,9 @@ class BaseConfig:
         params: Model parameters, such as n_layers, hidden_size, etc.
             If left empty will use defaults specified in model_args.py.
         checkpoint: Path to the checkpoint file.
-            If left empty, the model will be initialized with random weights.
+            If left empty, the model will either be initialized with random weights
+            if it is a Llama model or the weights will be downloaded from HuggingFace
+            if it is a non-Llama model.
         checkpoint_dir: Path to directory containing sharded checkpoint files.
         tokenizer_path: Path to the tokenizer file.
         metadata: Json string containing metadata information.
diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py
@@ -53,6 +53,8 @@
 )
 from executorch.util.activation_memory_profiler import generate_memory_trace
 
+from omegaconf import DictConfig
+
 from ..model_factory import EagerModelFactory
 from .source_transformation.apply_spin_quant_r1_r2 import (
     fuse_layer_norms,
@@ -571,12 +573,14 @@ def canonical_path(path: Union[str, Path], *, dir: bool = False) -> str:
 
 
 def export_llama(
-    export_options: Union[argparse.Namespace, LlmConfig],
+    export_options: Union[argparse.Namespace, LlmConfig, DictConfig],
 ) -> str:
     if isinstance(export_options, argparse.Namespace):
         # Legacy CLI.
         llm_config = LlmConfig.from_args(export_options)
-    elif isinstance(export_options, LlmConfig):
+    elif isinstance(export_options, LlmConfig) or isinstance(
+        export_options, DictConfig
+    ):
         # Hydra CLI.
         llm_config = export_options
     else:
diff --git a/extension/llm/export/README.md b/extension/llm/export/README.md
@@ -0,0 +1,137 @@
+# LLM Export API
+
+This directory contains the unified API for exporting Large Language Models (LLMs) to ExecuTorch. The `export_llm` module provides a streamlined interface to convert various LLM architectures to optimized `.pte` files for on-device inference.
+
+## Overview
+
+The LLM export process transforms a model from its original format to an optimized representation suitable for mobile and edge devices. This involves several key steps:
+
+1. **Model Instantiation**: Load the model architecture and weights from sources like Hugging Face
+2. **Source Transformations**: Apply model-specific optimizations and quantization
+3. **IR Export**: Convert to intermediate representations (EXIR, Edge dialect)
+4. **Graph Transformations**: Apply backend-specific optimizations and PT2E quantization  
+5. **Backend Delegation**: Partition operations to hardware-specific backends (XNNPACK, CoreML, QNN, etc.)
+6. **Serialization**: Export to final ExecuTorch `.pte` format
+
+## Supported Models
+
+- **Llama**: Llama 2, Llama 3, Llama 3.1, Llama 3.2 (1B, 3B, 8B variants)
+- **Qwen**: Qwen 2.5, Qwen 3 (0.6B, 1.7B, 4B variants)  
+- **Phi**: Phi-3-Mini, Phi-4-Mini
+- **Stories**: Stories110M (educational model)
+- **SmolLM**: SmolLM2
+
+## Usage
+
+The export API supports two configuration approaches:
+
+### Option 1: Hydra CLI Arguments
+
+Use structured configuration arguments directly on the command line:
+
+```bash
+python -m extension.llm.export.export_llm \
+    base.model_class=llama3 \
+    model.use_sdpa_with_kv_cache=True \
+    model.use_kv_cache=True \
+    export.max_seq_length=128 \
+    debug.verbose=True \
+    backend.xnnpack.enabled=True \
+    backend.xnnpack.extended_ops=True \
+    quantization.qmode=8da4w
+```
+
+### Option 2: Configuration File
+
+Create a YAML configuration file and reference it:
+
+```bash
+python -m extension.llm.export.export_llm --config my_config.yaml
+```
+
+Example `my_config.yaml`:
+```yaml
+base:
+  model_class: llama3
+  tokenizer_path: /path/to/tokenizer.json
+
+model:
+  use_kv_cache: true
+  use_sdpa_with_kv_cache: true
+  enable_dynamic_shape: true
+
+export:
+  max_seq_length: 512
+  output_dir: ./exported_models
+  output_name: llama3_optimized.pte
+
+quantization:
+  qmode: 8da4w
+  group_size: 32
+
+backend:
+  xnnpack:
+    enabled: true
+    extended_ops: true
+
+debug:
+  verbose: true
+```
+
+**Important**: You cannot mix both approaches. Use either CLI arguments OR a config file, not both.
+
+## Example Commands
+
+### Export Qwen3 0.6B with XNNPACK backend and quantization
+```bash
+python -m extension.llm.export.export_llm \
+    base.model_class=qwen3-0_6b \
+    base.params=examples/models/qwen3/0_6b_config.json \
+    base.metadata='{"get_bos_id": 151644, "get_eos_ids":[151645]}' \
+    model.use_kv_cache=true \
+    model.use_sdpa_with_kv_cache=true \
+    model.dtype_override=FP32 \
+    export.max_seq_length=512 \
+    export.output_name=qwen3_0_6b.pte \
+    quantization.qmode=8da4w \
+    backend.xnnpack.enabled=true \
+    backend.xnnpack.extended_ops=true \
+    debug.verbose=true
+```
+
+### Export Phi-4-Mini with custom checkpoint
+```bash
+python -m extension.llm.export.export_llm \
+    base.model_class=phi_4_mini \
+    base.checkpoint=/path/to/phi4_checkpoint.pth \
+    base.params=examples/models/phi-4-mini/config.json \
+    base.metadata='{"get_bos_id":151643, "get_eos_ids":[151643]}' \
+    model.use_kv_cache=true \
+    model.use_sdpa_with_kv_cache=true \
+    export.max_seq_length=256 \
+    export.output_name=phi4_mini.pte \
+    backend.xnnpack.enabled=true \
+    debug.verbose=true
+```
+
+### Export with CoreML backend (iOS optimization)
+```bash
+python -m extension.llm.export.export_llm \
+    base.model_class=llama3 \
+    model.use_kv_cache=true \
+    export.max_seq_length=128 \
+    backend.coreml.enabled=true \
+    backend.coreml.compute_units=ALL \
+    quantization.pt2e_quantize=coreml_c4w \
+    debug.verbose=true
+```
+
+## Configuration Options
+
+For a complete reference of all available configuration options, see the [LlmConfig class definition](../../../examples/models/llama/config/llm_config.py) which documents all supported parameters for base, model, export, quantization, backend, and debug configurations.
+
+## Further Reading
+
+- [Llama Examples](../../../examples/models/llama/README.md) - Comprehensive Llama export guide
+- [LLM Runner](../runner/) - Running exported models
+- [ExecuTorch Documentation](https://pytorch.org/executorch/) - Framework overview
diff --git a/extension/llm/export/export_llm.py b/extension/llm/export/export_llm.py
@@ -23,8 +23,16 @@
     backend.xnnpack.enabled=True \
     backend.xnnpack.extended_ops=True \
     quantization.qmode="8da4w"
+
+Example usage using config file:
+python -m extension.llm.export.export_llm \
+    --config example_llm_config.yaml
 """
 
+import argparse
+import sys
+from typing import Any, List, Tuple
+
 import hydra
 
 from executorch.examples.models.llama.config.llm_config import LlmConfig
@@ -36,10 +44,50 @@
 cs.store(name="llm_config", node=LlmConfig)
 
 
-@hydra.main(version_base=None, config_path=None, config_name="llm_config")
-def main(llm_config: LlmConfig) -> None:
+def parse_config_arg() -> Tuple[str, List[Any]]:
+    """First parse out the arg for whether to use Hydra or the old CLI."""
+    parser = argparse.ArgumentParser(add_help=True)
+    parser.add_argument("--config", type=str, help="Path to the LlmConfig file")
+    args, remaining = parser.parse_known_args()
+    return args.config, remaining
+
+
+def pop_config_arg() -> str:
+    """
+    Removes '--config' and its value from sys.argv.
+    Assumes --config is specified and argparse has already validated the args.
+    """
+    idx = sys.argv.index("--config")
+    value = sys.argv[idx + 1]
+    del sys.argv[idx : idx + 2]
+    return value
+
+
+@hydra.main(version_base=None, config_name="llm_config")
+def hydra_main(llm_config: LlmConfig) -> None:
     export_llama(OmegaConf.to_object(llm_config))
 
 
+def main() -> None:
+    config, remaining_args = parse_config_arg()
+    if config:
+        # Check if there are any remaining hydra CLI args when --config is specified
+        # This might change in the future to allow overriding config file values
+        if remaining_args:
+            raise ValueError(
+                "Cannot specify additional CLI arguments when using --config. "
+                f"Found: {remaining_args}. Use either --config file or hydra CLI args, not both."
+            )
+
+        config_file_path = pop_config_arg()
+        default_llm_config = LlmConfig()
+        llm_config_from_file = OmegaConf.load(config_file_path)
+        # Override defaults with values specified in the .yaml provided by --config.
+        merged_llm_config = OmegaConf.merge(default_llm_config, llm_config_from_file)
+        export_llama(merged_llm_config)
+    else:
+        hydra_main()
+
+
 if __name__ == "__main__":
     main()
diff --git a/extension/llm/export/test/test_export_llm.py b/extension/llm/export/test/test_export_llm.py
@@ -0,0 +1,129 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+import sys
+import tempfile
+import unittest
+from unittest.mock import MagicMock, patch
+
+from executorch.extension.llm.export.export_llm import (
+    main,
+    parse_config_arg,
+    pop_config_arg,
+)
+
+
+class TestExportLlm(unittest.TestCase):
+    def test_parse_config_arg_with_config(self) -> None:
+        """Test parse_config_arg when --config is provided."""
+        # Mock sys.argv to include --config
+        test_argv = ["script.py", "--config", "test_config.yaml", "extra", "args"]
+        with patch.object(sys, "argv", test_argv):
+            config_path, remaining = parse_config_arg()
+            self.assertEqual(config_path, "test_config.yaml")
+            self.assertEqual(remaining, ["extra", "args"])
+
+    def test_parse_config_arg_without_config(self) -> None:
+        """Test parse_config_arg when --config is not provided."""
+        test_argv = ["script.py", "debug.verbose=True"]
+        with patch.object(sys, "argv", test_argv):
+            config_path, remaining = parse_config_arg()
+            self.assertIsNone(config_path)
+            self.assertEqual(remaining, ["debug.verbose=True"])
+
+    def test_pop_config_arg(self) -> None:
+        """Test pop_config_arg removes --config and its value from sys.argv."""
+        test_argv = ["script.py", "--config", "test_config.yaml", "other", "args"]
+        with patch.object(sys, "argv", test_argv):
+            config_path = pop_config_arg()
+            self.assertEqual(config_path, "test_config.yaml")
+            self.assertEqual(sys.argv, ["script.py", "other", "args"])
+
+    @patch("executorch.extension.llm.export.export_llm.export_llama")
+    def test_with_config(self, mock_export_llama: MagicMock) -> None:
+        """Test main function with --config file and no hydra args."""
+        # Create a temporary config file
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
+            f.write(
+                """
+base:
+  tokenizer_path: /path/to/tokenizer.json
+export:
+  max_seq_length: 256
+"""
+            )
+            config_file = f.name
+
+        try:
+            test_argv = ["script.py", "--config", config_file]
+            with patch.object(sys, "argv", test_argv):
+                main()
+
+            # Verify export_llama was called with config
+            mock_export_llama.assert_called_once()
+            called_config = mock_export_llama.call_args[0][0]
+            self.assertEqual(
+                called_config["base"]["tokenizer_path"], "/path/to/tokenizer.json"
+            )
+            self.assertEqual(called_config["export"]["max_seq_length"], 256)
+        finally:
+            os.unlink(config_file)
+
+    def test_with_cli_args(self) -> None:
+        """Test main function with only hydra CLI args."""
+        test_argv = ["script.py", "debug.verbose=True"]
+        with patch.object(sys, "argv", test_argv):
+            with patch(
+                "executorch.extension.llm.export.export_llm.hydra_main"
+            ) as mock_hydra:
+                main()
+                mock_hydra.assert_called_once()
+
+    def test_config_with_cli_args_error(self) -> None:
+        """Test that --config rejects additional CLI arguments to prevent mixing approaches."""
+        # Create a temporary config file
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
+            f.write("base:\n  checkpoint: /path/to/checkpoint.pth")
+            config_file = f.name
+
+        try:
+            test_argv = ["script.py", "--config", config_file, "debug.verbose=True"]
+            with patch.object(sys, "argv", test_argv):
+                with self.assertRaises(ValueError) as cm:
+                    main()
+
+                error_msg = str(cm.exception)
+                self.assertIn(
+                    "Cannot specify additional CLI arguments when using --config",
+                    error_msg,
+                )
+        finally:
+            os.unlink(config_file)
+
+    def test_config_rejects_multiple_cli_args(self) -> None:
+        """Test that --config rejects multiple CLI arguments (not just single ones)."""
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
+            f.write("export:\n  max_seq_length: 128")
+            config_file = f.name
+
+        try:
+            test_argv = [
+                "script.py",
+                "--config",
+                config_file,
+                "debug.verbose=True",
+                "export.output_dir=/tmp",
+            ]
+            with patch.object(sys, "argv", test_argv):
+                with self.assertRaises(ValueError):
+                    main()
+        finally:
+            os.unlink(config_file)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -9,3 +9,5 @@ wheel  # For building the pip package archive.
 zstd  # Imported by resolve_buck.py.
 lintrunner==0.12.7
 lintrunner-adapters==0.12.4
+hydra-core>=1.3.0
+omegaconf>=2.3.0