|
| 1 | +#!/usr/bin/env bash |
| 2 | +set -euo pipefail |
| 3 | + |
| 4 | +# ------------------------- |
| 5 | +# Args / flags |
| 6 | +# ------------------------- |
| 7 | +TEST_WITH_RUNNER=0 |
| 8 | +MODEL_NAME="" |
| 9 | + |
| 10 | +# Parse args |
| 11 | +if [[ $# -lt 1 ]]; then |
| 12 | + echo "Usage: $0 <model_name> [--test_with_runner]" |
| 13 | + echo "Supported model_name values: qwen3_4b, phi_4_mini" |
| 14 | + exit 1 |
| 15 | +fi |
| 16 | + |
| 17 | +MODEL_NAME="$1" |
| 18 | +shift |
| 19 | + |
| 20 | +while [[ $# -gt 0 ]]; do |
| 21 | + case "$1" in |
| 22 | + --test_with_runner) |
| 23 | + TEST_WITH_RUNNER=1 |
| 24 | + ;; |
| 25 | + -h|--help) |
| 26 | + echo "Usage: $0 <model_name> [--test_with_runner]" |
| 27 | + echo " model_name: qwen3_4b | phi_4_mini" |
| 28 | + echo " --test_with_runner: build ET + run llama_main to sanity-check the export" |
| 29 | + exit 0 |
| 30 | + ;; |
| 31 | + *) |
| 32 | + echo "Unknown option: $1" |
| 33 | + exit 1 |
| 34 | + ;; |
| 35 | + esac |
| 36 | + shift |
| 37 | +done |
| 38 | + |
| 39 | +if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then |
| 40 | + PYTHON_EXECUTABLE=python3 |
| 41 | +fi |
| 42 | + |
| 43 | +MODEL_OUT=model.pte |
| 44 | + |
| 45 | +case "$MODEL_NAME" in |
| 46 | + qwen3_4b) |
| 47 | + echo "Running Qwen3-4B export..." |
| 48 | + HF_MODEL_DIR=$(hf download pytorch/Qwen3-4B-INT8-INT4) |
| 49 | + EXPECTED_MODEL_SIZE_UPPER_BOUND=$((3 * 1024 * 1024 * 1024)) # 3GB |
| 50 | + $PYTHON_EXECUTABLE -m executorch.examples.models.qwen3.convert_weights \ |
| 51 | + $HF_MODEL_DIR \ |
| 52 | + pytorch_model_converted.bin |
| 53 | + |
| 54 | + $PYTHON_EXECUTABLE -m executorch.examples.models.llama.export_llama \ |
| 55 | + --model "qwen3_4b" \ |
| 56 | + --checkpoint pytorch_model_converted.bin \ |
| 57 | + --params examples/models/qwen3/config/4b_config.json \ |
| 58 | + --output_name $MODEL_OUT \ |
| 59 | + -kv \ |
| 60 | + --use_sdpa_with_kv_cache \ |
| 61 | + -X \ |
| 62 | + --xnnpack-extended-ops \ |
| 63 | + --max_context_length 1024 \ |
| 64 | + --max_seq_length 1024 \ |
| 65 | + --dtype fp32 \ |
| 66 | + --metadata '{"get_bos_id":199999, "get_eos_ids":[200020,199999]}' |
| 67 | + ;; |
| 68 | + |
| 69 | + phi_4_mini) |
| 70 | + echo "Running Phi-4-mini export..." |
| 71 | + HF_MODEL_DIR=$(hf download pytorch/Phi-4-mini-instruct-INT8-INT4) |
| 72 | + EXPECTED_MODEL_SIZE_UPPER_BOUND=$((3 * 1024 * 1024 * 1024)) # 3GB |
| 73 | + $PYTHON_EXECUTABLE -m executorch.examples.models.phi_4_mini.convert_weights \ |
| 74 | + $HF_MODEL_DIR \ |
| 75 | + pytorch_model_converted.bin |
| 76 | + |
| 77 | + $PYTHON_EXECUTABLE -m executorch.examples.models.llama.export_llama \ |
| 78 | + --model "phi_4_mini" \ |
| 79 | + --checkpoint pytorch_model_converted.bin \ |
| 80 | + --params examples/models/phi_4_mini/config/config.json \ |
| 81 | + --output_name $MODEL_OUT \ |
| 82 | + -kv \ |
| 83 | + --use_sdpa_with_kv_cache \ |
| 84 | + -X \ |
| 85 | + --xnnpack-extended-ops \ |
| 86 | + --max_context_length 1024 \ |
| 87 | + --max_seq_length 1024 \ |
| 88 | + --dtype fp32 \ |
| 89 | + --metadata '{"get_bos_id":199999, "get_eos_ids":[200020,199999]}' |
| 90 | + ;; |
| 91 | + |
| 92 | + *) |
| 93 | + echo "Error: unsupported model_name '$MODEL_NAME'" |
| 94 | + echo "Supported values: qwen3_4b, phi_4_mini" |
| 95 | + exit 1 |
| 96 | + ;; |
| 97 | +esac |
| 98 | + |
| 99 | +# Check file size |
| 100 | +MODEL_SIZE=$(stat --printf="%s" $MODEL_OUT 2>/dev/null || stat -f%z $MODEL_OUT) |
| 101 | +if [[ $MODEL_SIZE -gt $EXPECTED_MODEL_SIZE_UPPER_BOUND ]]; then |
| 102 | + echo "Error: model size $MODEL_SIZE is greater than expected upper bound $EXPECTED_MODEL_SIZE_UPPER_BOUND" |
| 103 | + exit 1 |
| 104 | +fi |
| 105 | + |
| 106 | +# Install ET with CMake |
| 107 | +if [[ "$TEST_WITH_RUNNER" -eq 1 ]]; then |
| 108 | + echo "[runner] Building and testing llama_main ..." |
| 109 | + cmake -DPYTHON_EXECUTABLE=python \ |
| 110 | + -DCMAKE_INSTALL_PREFIX=cmake-out \ |
| 111 | + -DEXECUTORCH_ENABLE_LOGGING=1 \ |
| 112 | + -DCMAKE_BUILD_TYPE=Release \ |
| 113 | + -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ |
| 114 | + -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \ |
| 115 | + -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ |
| 116 | + -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ |
| 117 | + -DEXECUTORCH_BUILD_XNNPACK=ON \ |
| 118 | + -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ |
| 119 | + -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ |
| 120 | + -DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON \ |
| 121 | + -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \ |
| 122 | + -DEXECUTORCH_BUILD_KERNELS_LLM=ON \ |
| 123 | + -Bcmake-out . |
| 124 | + cmake --build cmake-out -j16 --config Release --target install |
| 125 | + |
| 126 | + |
| 127 | + # Install llama runner |
| 128 | + cmake -DPYTHON_EXECUTABLE=python \ |
| 129 | + -DCMAKE_BUILD_TYPE=Release \ |
| 130 | + -Bcmake-out/examples/models/llama \ |
| 131 | + examples/models/llama |
| 132 | + cmake --build cmake-out/examples/models/llama -j16 --config Release |
| 133 | + |
| 134 | + # Run the model |
| 135 | + ./cmake-out/examples/models/llama/llama_main --model_path=$MODEL_OUT --tokenizer_path="${HF_MODEL_DIR}/tokenizer.json" --prompt="Once upon a time," |
| 136 | +fi |
| 137 | + |
| 138 | +# Clean up |
| 139 | +rm -f pytorch_model_converted.bin "$MODEL_OUT" |
0 commit comments