Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 27 additions & 2 deletions examples/models/llama/export_llama_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
get_openvino_partitioner,
get_qnn_partitioner,
get_tosa_partitioner,
get_vgf_partitioner,
get_vulkan_partitioner,
get_xnnpack_partitioner,
)
Expand All @@ -50,6 +51,7 @@
get_pt2e_quantizers,
get_qnn_quantizer,
get_tosa_quantizer,
get_vgf_quantizer,
get_vulkan_quantizer,
)
from executorch.util.activation_memory_profiler import generate_memory_trace
Expand Down Expand Up @@ -824,6 +826,13 @@ def get_quantizer_and_quant_params(llm_config):
llm_config.quantization.pt2e_quantize.value,
)
quantizers.append(ethosu_quantizer)
if llm_config.backend.vgf.enabled and llm_config.quantization.pt2e_quantize:
vgf_quantizer = get_vgf_quantizer(
llm_config.backend.vgf.compile_spec,
llm_config.backend.vgf.compiler_flags,
llm_config.quantization.pt2e_quantize.value,
)
quantizers.append(vgf_quantizer)
if llm_config.backend.vulkan.enabled and llm_config.quantization.pt2e_quantize:
assert (
len(quantizers) == 0
Expand Down Expand Up @@ -1013,6 +1022,14 @@ def _to_edge_and_lower_llama_arm(
)
)
modelname = f"ethosu_{modelname}"
elif llm_config.backend.vgf.enabled:
partitioners.append(
get_vgf_partitioner(
llm_config.backend.vgf.compile_spec,
llm_config.backend.vgf.compiler_flags,
)
)
modelname = f"vgf_{modelname}"
elif llm_config.backend.tosa.enabled:
partitioners.append(get_tosa_partitioner(llm_config.backend.tosa.version))
modelname = f"tosa_{modelname}"
Expand Down Expand Up @@ -1336,7 +1353,11 @@ def _export_llama(llm_config: LlmConfig) -> LLMEdgeManager: # noqa: C901

# export_to_edge
builder_manager = _prepare_for_llama_export(llm_config)
if llm_config.backend.tosa.enabled:
if (
llm_config.backend.tosa.enabled
or llm_config.backend.vgf.enabled
or llm_config.backend.ethosu.enabled
):
builder_manager.skip_dim_order = False
builder_exported = builder_manager.export()
builder_exported.run_canonical_optimizations()
Expand Down Expand Up @@ -1383,7 +1404,11 @@ def _export_llama(llm_config: LlmConfig) -> LLMEdgeManager: # noqa: C901
openvino_device=llm_config.backend.openvino.device,
verbose=llm_config.debug.verbose,
)
elif llm_config.backend.tosa.enabled or llm_config.backend.ethosu.enabled:
elif (
llm_config.backend.tosa.enabled
or llm_config.backend.ethosu.enabled
or llm_config.backend.vgf.enabled
):
builder = _to_edge_and_lower_llama_arm(
builder_exported,
modelname,
Expand Down
18 changes: 18 additions & 0 deletions examples/models/llama/tests/test_export_llama_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,15 @@
from executorch.backends.arm.quantizer.arm_quantizer import (
EthosUQuantizer,
TOSAQuantizer,
VgfQuantizer,
)

HAS_ARM_BACKEND = True
except ImportError:
HAS_ARM_BACKEND = False
EthosUQuantizer = None
TOSAQuantizer = None
VgfQuantizer = None

from executorch.examples.models.llama.export_llama_lib import (
_export_llama,
Expand Down Expand Up @@ -93,3 +95,19 @@ def test_get_quantizer_and_quant_params_returns_ethosu_quantizer(self):
self.assertIsNone(quant_dtype)
self.assertEqual(len(quantizers), 1)
self.assertIsInstance(quantizers[0], EthosUQuantizer)

@unittest.skipUnless(HAS_ARM_BACKEND, "ARM backend not available")
def test_get_quantizer_and_quant_params_returns_vgf_quantizer(self):
llm_config = LlmConfig()
llm_config.backend.vgf.enabled = True
llm_config.backend.vgf.compile_spec = "TOSA-1.0+INT"
llm_config.quantization.pt2e_quantize = Pt2eQuantize.vgf_8a8w

pt2e_quant_params, quantizers, quant_dtype = get_quantizer_and_quant_params(
llm_config
)

self.assertIsNone(pt2e_quant_params)
self.assertIsNone(quant_dtype)
self.assertEqual(len(quantizers), 1)
self.assertIsInstance(quantizers[0], VgfQuantizer)
13 changes: 13 additions & 0 deletions extension/llm/export/config/llm_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,7 @@ class Pt2eQuantize(str, Enum):
vulkan_8w = "vulkan_8w"
tosa_8a8w = "tosa_8a8w"
ethosu_8a8w = "ethosu_8a8w"
vgf_8a8w = "vgf_8a8w"


class SpinQuant(str, Enum):
Expand Down Expand Up @@ -558,6 +559,17 @@ class EthosUConfig:
system_config: str = "default"


@dataclass
class VgfConfig:
"""
Configures the VGF backend.
"""

enabled: bool = False
compile_spec: Optional[str] = "TOSA-1.0+INT"
compiler_flags: List[str] = field(default_factory=list)


@dataclass
class BackendConfig:
"""
Expand All @@ -574,6 +586,7 @@ class BackendConfig:
torchao: TorchAOKernelsConfig = field(default_factory=TorchAOKernelsConfig)
tosa: TosaConfig = field(default_factory=TosaConfig)
ethosu: EthosUConfig = field(default_factory=EthosUConfig)
vgf: VgfConfig = field(default_factory=VgfConfig)


################################################################################
Expand Down
13 changes: 12 additions & 1 deletion extension/llm/export/partitioner_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from typing import Optional
from typing import List, Optional


def get_xnnpack_partitioner(dynamic_quant_only_partitioner: bool = True):
Expand Down Expand Up @@ -255,3 +255,14 @@ def get_ethosu_partitioner(target: str):
compile_spec = EthosUCompileSpec(target)

return EthosUPartitioner(compile_spec)


def get_vgf_partitioner(
compile_spec: Optional[str], compiler_flags: Optional[List[str]]
):
from executorch.backends.arm.vgf.compile_spec import VgfCompileSpec
from executorch.backends.arm.vgf.partitioner import VgfPartitioner

compile_spec_obj = VgfCompileSpec(compile_spec, compiler_flags)

return VgfPartitioner(compile_spec_obj)
23 changes: 23 additions & 0 deletions extension/llm/export/quantizer_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,3 +361,26 @@ def get_ethosu_quantizer(
raise ValueError(f"Unsupported quantizer specification {pt2e_quantize}")

return quantizer


def get_vgf_quantizer(
compile_spec: Optional[str],
compiler_flags: Optional[List[str]],
pt2e_quantize: str,
):
from executorch.backends.arm.quantizer.arm_quantizer import (
get_symmetric_quantization_config,
VgfQuantizer,
)
from executorch.backends.arm.vgf.compile_spec import VgfCompileSpec

compile_spec_obj = VgfCompileSpec(compile_spec, compiler_flags)

quantizer = VgfQuantizer(compile_spec_obj)

if pt2e_quantize == "vgf_8a8w":
quantizer.set_global(get_symmetric_quantization_config())
else:
raise ValueError(f"Unsupported quantizer specification {pt2e_quantize}")

return quantizer
Loading