Add vgf to extension LLM

Sebastian-Larsson · Sebastian-Larsson · commit cc8dc0ca48c6 · 2026-02-27T09:00:31.000Z
Signed-off-by: Sebastian Larsson &lt;sebastian.larsson@arm.com&gt;
Change-Id: Ide55a1928215b21689a9732b61b00b9eaf4e952b
diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py
@@ -39,6 +39,7 @@
     get_openvino_partitioner,
     get_qnn_partitioner,
     get_tosa_partitioner,
+    get_vgf_partitioner,
     get_vulkan_partitioner,
     get_xnnpack_partitioner,
 )
@@ -50,6 +51,7 @@
     get_pt2e_quantizers,
     get_qnn_quantizer,
     get_tosa_quantizer,
+    get_vgf_quantizer,
     get_vulkan_quantizer,
 )
 from executorch.util.activation_memory_profiler import generate_memory_trace
@@ -824,6 +826,13 @@ def get_quantizer_and_quant_params(llm_config):
             llm_config.quantization.pt2e_quantize.value,
         )
         quantizers.append(ethosu_quantizer)
+    if llm_config.backend.vgf.enabled and llm_config.quantization.pt2e_quantize:
+        vgf_quantizer = get_vgf_quantizer(
+            llm_config.backend.vgf.compile_spec,
+            llm_config.backend.vgf.compiler_flags,
+            llm_config.quantization.pt2e_quantize.value,
+        )
+        quantizers.append(vgf_quantizer)
     if llm_config.backend.vulkan.enabled and llm_config.quantization.pt2e_quantize:
         assert (
             len(quantizers) == 0
@@ -1013,6 +1022,14 @@ def _to_edge_and_lower_llama_arm(
             )
         )
         modelname = f"ethosu_{modelname}"
+    elif llm_config.backend.vgf.enabled:
+        partitioners.append(
+            get_vgf_partitioner(
+                llm_config.backend.vgf.compile_spec,
+                llm_config.backend.vgf.compiler_flags,
+            )
+        )
+        modelname = f"vgf_{modelname}"
     elif llm_config.backend.tosa.enabled:
         partitioners.append(get_tosa_partitioner(llm_config.backend.tosa.version))
         modelname = f"tosa_{modelname}"
@@ -1336,7 +1353,11 @@ def _export_llama(llm_config: LlmConfig) -> LLMEdgeManager:  # noqa: C901
 
     # export_to_edge
     builder_manager = _prepare_for_llama_export(llm_config)
-    if llm_config.backend.tosa.enabled:
+    if (
+        llm_config.backend.tosa.enabled
+        or llm_config.backend.vgf.enabled
+        or llm_config.backend.ethosu.enabled
+    ):
         builder_manager.skip_dim_order = False
     builder_exported = builder_manager.export()
     builder_exported.run_canonical_optimizations()
@@ -1383,7 +1404,11 @@ def _export_llama(llm_config: LlmConfig) -> LLMEdgeManager:  # noqa: C901
             openvino_device=llm_config.backend.openvino.device,
             verbose=llm_config.debug.verbose,
         )
-    elif llm_config.backend.tosa.enabled or llm_config.backend.ethosu.enabled:
+    elif (
+        llm_config.backend.tosa.enabled
+        or llm_config.backend.ethosu.enabled
+        or llm_config.backend.vgf.enabled
+    ):
         builder = _to_edge_and_lower_llama_arm(
             builder_exported,
             modelname,
diff --git a/examples/models/llama/tests/test_export_llama_lib.py b/examples/models/llama/tests/test_export_llama_lib.py
@@ -13,13 +13,15 @@
     from executorch.backends.arm.quantizer.arm_quantizer import (
         EthosUQuantizer,
         TOSAQuantizer,
+        VgfQuantizer,
     )
 
     HAS_ARM_BACKEND = True
 except ImportError:
     HAS_ARM_BACKEND = False
     EthosUQuantizer = None
     TOSAQuantizer = None
+    VgfQuantizer = None
 
 from executorch.examples.models.llama.export_llama_lib import (
     _export_llama,
@@ -93,3 +95,19 @@ def test_get_quantizer_and_quant_params_returns_ethosu_quantizer(self):
         self.assertIsNone(quant_dtype)
         self.assertEqual(len(quantizers), 1)
         self.assertIsInstance(quantizers[0], EthosUQuantizer)
+
+    @unittest.skipUnless(HAS_ARM_BACKEND, "ARM backend not available")
+    def test_get_quantizer_and_quant_params_returns_vgf_quantizer(self):
+        llm_config = LlmConfig()
+        llm_config.backend.vgf.enabled = True
+        llm_config.backend.vgf.compile_spec = "TOSA-1.0+INT"
+        llm_config.quantization.pt2e_quantize = Pt2eQuantize.vgf_8a8w
+
+        pt2e_quant_params, quantizers, quant_dtype = get_quantizer_and_quant_params(
+            llm_config
+        )
+
+        self.assertIsNone(pt2e_quant_params)
+        self.assertIsNone(quant_dtype)
+        self.assertEqual(len(quantizers), 1)
+        self.assertIsInstance(quantizers[0], VgfQuantizer)
diff --git a/extension/llm/export/config/llm_config.py b/extension/llm/export/config/llm_config.py
@@ -349,6 +349,7 @@ class Pt2eQuantize(str, Enum):
     vulkan_8w = "vulkan_8w"
     tosa_8a8w = "tosa_8a8w"
     ethosu_8a8w = "ethosu_8a8w"
+    vgf_8a8w = "vgf_8a8w"
 
 
 class SpinQuant(str, Enum):
@@ -558,6 +559,17 @@ class EthosUConfig:
     system_config: str = "default"
 
 
+@dataclass
+class VgfConfig:
+    """
+    Configures the VGF backend.
+    """
+
+    enabled: bool = False
+    compile_spec: Optional[str] = "TOSA-1.0+INT"
+    compiler_flags: List[str] = field(default_factory=list)
+
+
 @dataclass
 class BackendConfig:
     """
@@ -574,6 +586,7 @@ class BackendConfig:
     torchao: TorchAOKernelsConfig = field(default_factory=TorchAOKernelsConfig)
     tosa: TosaConfig = field(default_factory=TosaConfig)
     ethosu: EthosUConfig = field(default_factory=EthosUConfig)
+    vgf: VgfConfig = field(default_factory=VgfConfig)
 
 
 ################################################################################
diff --git a/extension/llm/export/partitioner_lib.py b/extension/llm/export/partitioner_lib.py
@@ -5,7 +5,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-from typing import Optional
+from typing import List, Optional
 
 
 def get_xnnpack_partitioner(dynamic_quant_only_partitioner: bool = True):
@@ -255,3 +255,14 @@ def get_ethosu_partitioner(target: str):
     compile_spec = EthosUCompileSpec(target)
 
     return EthosUPartitioner(compile_spec)
+
+
+def get_vgf_partitioner(
+    compile_spec: Optional[str], compiler_flags: Optional[List[str]]
+):
+    from executorch.backends.arm.vgf.compile_spec import VgfCompileSpec
+    from executorch.backends.arm.vgf.partitioner import VgfPartitioner
+
+    compile_spec_obj = VgfCompileSpec(compile_spec, compiler_flags)
+
+    return VgfPartitioner(compile_spec_obj)
diff --git a/extension/llm/export/quantizer_lib.py b/extension/llm/export/quantizer_lib.py
@@ -361,3 +361,26 @@ def get_ethosu_quantizer(
         raise ValueError(f"Unsupported quantizer specification {pt2e_quantize}")
 
     return quantizer
+
+
+def get_vgf_quantizer(
+    compile_spec: Optional[str],
+    compiler_flags: Optional[List[str]],
+    pt2e_quantize: str,
+):
+    from executorch.backends.arm.quantizer.arm_quantizer import (
+        get_symmetric_quantization_config,
+        VgfQuantizer,
+    )
+    from executorch.backends.arm.vgf.compile_spec import VgfCompileSpec
+
+    compile_spec_obj = VgfCompileSpec(compile_spec, compiler_flags)
+
+    quantizer = VgfQuantizer(compile_spec_obj)
+
+    if pt2e_quantize == "vgf_8a8w":
+        quantizer.set_global(get_symmetric_quantization_config())
+    else:
+        raise ValueError(f"Unsupported quantizer specification {pt2e_quantize}")
+
+    return quantizer